library(ggplot2)
# read in the sea temperature and salinity data
temp <- read.csv("Temperature.csv")
# histogram of salinity
splot = ggplot(data=subset(temp, !is.na(Salinity)), aes(x = Salinity)) +
geom_histogram(binwidth = 1, fill = "darkblue") +
xlab("Salinity (psu)") + ylab("Counts") + ggtitle("Distribution of Salinity Samples")
splot
Yearly salinity distributions:
splot + facet_wrap(~ Year) + ggtitle("Distribution of Salinity Samples by Year")
Salinity distribution by year and month
splot + facet_grid(Year ~ Month) +
theme(axis.text.x = element_text(angle = 45, size = 5),
axis.text.y = element_text(size = 4)) +
ggtitle("Distribution of Salinity Samples", "by Month and Year")
This could also be interpreted as plot the salinity distribution for each month:
splot + facet_wrap(~ Month) + ggtitle("Distribution of Salinity Samples", "by Month")
tbplot = ggplot(data = subset(temp, !is.na(Temperature)), aes(x=Station, y=Temperature)) +
geom_boxplot() +
ggtitle("Temperature Distributions by Station") +
theme(axis.text.x = element_text(angle = 45, size = 6))
tbplot
ggsave("TemperatureDistributionbyStation.png", tbplot)
## Saving 7 x 5 in image
tbplot2 = ggplot(data = subset(temp, !is.na(Temperature)), aes(x=Station, y=Temperature)) +
geom_boxplot(aes(x=reorder(Station, Temperature, mean))) +
ggtitle("Temperature Distributions by Station") +
theme(axis.text.x = element_text(angle = 45, size = 6))
tbplot2
| Part 2 |
Now make some time series plots of temperature and salinity time series. We will need a variable to represent continuous time from the start of the observations and can do this by creating a decimal date as the sum of the year and the day number / 365 (if temp is the dataframe holding the data):
temp$decdate <- temp$Year + temp$dDay3 / 365
temptime = ggplot(data = subset(temp, !is.na(Temperature)), aes(x=decdate, y=Temperature, col = "Temperature")) +
geom_point() + geom_point(aes(y=Salinity, col = "Salinity")) +
ylim(0, 40) + ylab("Temperature (C), Salinity (psu)") + xlab("Years") +
ggtitle("Temperature and Salinity of Samples over Time")
temptime
## Warning: Removed 13 rows containing missing values (`geom_point()`).
## Warning: Removed 36 rows containing missing values (`geom_point()`).
stime = ggplot(data = subset(temp, !is.na(Salinity)),
aes(x=decdate, y=Salinity, col = factor(Area))) +
facet_wrap(~ Area) +
xlab("Time") + ylab("Salinity (psu)") +
ggtitle("Salinity Samples for All Areas")
stime + geom_point()
stime = stime + geom_line()
stime
First, make a subset of the data, only for the OS Area. Then repeat the above methods to plot the data. Scatterplot of temperature and salinity over time:
OSdata = subset(temp, Area == "OS")
OStempsal = ggplot(data = subset(OSdata, !is.na(Temperature)),
aes(x=decdate, y=Temperature, col = "Temperature")) +
geom_point() + geom_point(aes(y=Salinity, col = "Salinity")) +
ylim(0, 40) + ylab("Temperature (C), Salinity (psu)") + xlab("Years") +
ggtitle("Temperature and Salinity Samples for Area OS")
OStempsal
## Warning: Removed 1 rows containing missing values (`geom_point()`).
## Warning: Removed 7 rows containing missing values (`geom_point()`).
Scatterplot of salinity for Area OS:
OSstime = ggplot(data = subset(temp, !is.na(Salinity)),
aes(x=decdate, y=Salinity)) +
xlab("Time") + ylab("Salinity (psu)") + ggtitle("Salinity Samples for Area OS")
OSstime + geom_point(col = "darkgrey")
Lineplot of OS salinity:
OSstime = OSstime + geom_line(col="darkgrey")
OSstime