Better plotting with ggplot2
Part 1
- Use ggplot() to produce a histogram of salinity values
temp = read.csv("Temperature.csv")
library(ggplot2)
str(temp)
## 'data.frame': 8528 obs. of 16 variables:
## $ Sample : chr "DANT.19900110" "DANT.19900206" "DANT.19900308" "DANT.19900404" ...
## $ Date : int 19900110 19900206 19900308 19900404 19900509 19900620 19900718 19900815 19900919 19901017 ...
## $ DateNr : chr "10/1/90" "6/2/90" "8/3/90" "4/4/90" ...
## $ dDay1 : int 7 34 64 91 126 168 196 224 259 287 ...
## $ dDay2 : int 9 36 66 93 128 170 198 226 261 289 ...
## $ dDay3 : int 9 36 66 93 128 170 198 226 261 289 ...
## $ Station : chr "DANT" "DANT" "DANT" "DANT" ...
## $ Area : chr "WZ" "WZ" "WZ" "WZ" ...
## $ X31UE_ED50 : num 681380 681380 681380 681380 681380 ...
## $ X31UN_ED50 : num 5920571 5920571 5920571 5920571 5920571 ...
## $ Year : int 1990 1990 1990 1990 1990 1990 1990 1990 1990 1990 ...
## $ Month : int 1 2 3 4 5 6 7 8 9 10 ...
## $ Season : chr "winter" "winter" "spring" "spring" ...
## $ Salinity : num 29.2 27.4 25 28.8 33.3 ...
## $ Temperature: num 4 6 7.3 8.2 17.4 18.1 17 21 15.1 13.8 ...
## $ CHLFa : num 1.3 NA 21.1 25 10.2 6.2 7.9 7.85 13 11.8 ...
salhist = ggplot(temp, aes(x = Salinity))
salhist + geom_histogram(color = "black", binwidth = .05)
## Warning: Removed 798 rows containing non-finite values (stat_bin).

- Make a histogram of salinity values for each year of study, and then for each month
temp$Year = as.factor(temp$Year)
salyear = ggplot(temp, aes(x =Salinity, fill = Year, color = Year))
salyear + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 798 rows containing non-finite values (stat_bin).

temp$Month = as.factor(temp$Month)
salmonth = ggplot(temp, aes(x = Salinity, fill = Month, color = Month))
salmonth + geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 798 rows containing non-finite values (stat_bin).

- Make a boxplot of temperature values for each station
tempval = ggplot(temp, aes(x = Station, y = Temperature))
tempval + geom_boxplot()
## Warning: Removed 927 rows containing non-finite values (stat_boxplot).

- Reorganize the boxplot from low to high median temperatures
tempval = ggplot(temp, aes(x = Station, y = Temperature))
tempval + geom_boxplot(aes(x = reorder(Station, Temperature, median, na.rm = TRUE)))
## Warning: Removed 927 rows containing non-finite values (stat_boxplot).

Part 2
- Make a scatterplot of temperature and salinity over time
temp$Year = as.numeric(as.character(temp$Year))
temp$decdate <- temp$Year + temp$dDay3 / 365
tempsal = ggplot(temp, aes(x = Temperature, y = Salinity), color = Time)
tempsal + geom_point(aes(color = decdate))
## Warning: Removed 963 rows containing missing values (geom_point).

- Make a scatterplot of salinity, grouped using facet_by() into different ‘Areas’
salarea = ggplot(temp, aes(x = Year, y = Salinity))
salarea + geom_point() + facet_wrap(~ Area)
## Warning: Removed 798 rows containing missing values (geom_point).

- Make a lineplot of salinity for each station, grouped into different ‘Areas’
salstat = ggplot(temp, aes(x = decdate, y = Salinity))
salstat + facet_wrap(~ Area) + geom_line(aes(y = Salinity))
