Read in the data using fread
library(data.table)
library(ggplot2)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:data.table':
##
## between, first, last
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
temp <- fread("Temperature.csv")
Extract all winter observations
winter = temp[Season=="winter"]
head(winter)
## Sample Date DateNr dDay1 dDay2 dDay3 Station Area 31UE_ED50
## 1: DANT.19900110 19900110 10/1/90 7 9 9 DANT WZ 681379.6
## 2: DANT.19900206 19900206 6/2/90 34 36 36 DANT WZ 681379.6
## 3: DANT.19901212 19901212 12/12/90 343 345 345 DANT WZ 681379.6
## 4: DANT.19910116 19910116 1/16/1991 378 380 15 DANT WZ 681379.6
## 5: DANT.19910226 19910226 2/26/1991 419 421 56 DANT WZ 681379.6
## 6: DANT.19911219 19911219 12/19/1991 715 717 352 DANT WZ 681379.6
## 31UN_ED50 Year Month Season Salinity Temperature CHLFa
## 1: 5920571 1990 1 winter 29.19 4.0 1.30
## 2: 5920571 1990 2 winter 27.37 6.0 NA
## 3: 5920571 1990 12 winter 31.50 4.2 60.50
## 4: 5920571 1991 1 winter 20.83 -0.3 2.30
## 5: 5920571 1991 2 winter 28.06 3.9 3.52
## 6: 5920571 1991 12 winter 25.31 3.9 3.50
Extract all winter observations for zone NC
NCwinter = winter[Area=="NC"]
head(NCwinter)
## Sample Date DateNr dDay1 dDay2 dDay3 Station Area 31UE_ED50
## 1: T100.19900103 19900103 3/1/90 0 2 2 T100 NC 587650.2
## 2: T100.19900205 19900205 5/2/90 33 35 35 T100 NC 587650.2
## 3: T100.19901218 19901218 12/18/1990 349 351 351 T100 NC 587650.2
## 4: T100.19910116 19910116 1/16/1991 378 380 15 T100 NC 587650.2
## 5: T100.19910205 19910205 5/2/91 398 400 35 T100 NC 587650.2
## 6: T100.19911211 19911211 11/12/91 707 709 344 T100 NC 587650.2
## 31UN_ED50 Year Month Season Salinity Temperature CHLFa
## 1: 6001110 1990 1 winter 34.82 8.5 0.30
## 2: 6001110 1990 2 winter NA NA NA
## 3: 6001110 1990 12 winter 34.80 9.2 0.40
## 4: 6001110 1991 1 winter 34.86 6.1 0.68
## 5: 6001110 1991 2 winter 34.53 5.2 0.34
## 6: 6001110 1991 12 winter 34.79 9.7 0.44
Select only the columns Area, Season and Temperature
ASTdata <- temp[, .(Area, Season, Temperature)]
head(ASTdata)
## Area Season Temperature
## 1: WZ winter 4.0
## 2: WZ winter 6.0
## 3: WZ spring 7.3
## 4: WZ spring 8.2
## 5: WZ spring 17.4
## 6: WZ summer 18.1
Select only the columns Area and Temperature but only for winter observations
ATwinter <- temp[Season == "winter", .(Area, Temperature)]
head(ATwinter)
## Area Temperature
## 1: WZ 4.0
## 2: WZ 6.0
## 3: WZ 4.2
## 4: WZ -0.3
## 5: WZ 3.9
## 6: WZ 3.9
Find the total number of observations in winter
temp[Season=="winter", .N]
## [1] 1706
Calculate the mean temperature and mean salinity in winter (Note that there are missing values so will have to use na.rm = TRUE)
temp[Season=="winter",
.(m_Temp = mean(Temperature, na.rm=TRUE), m_Sal = mean(Salinity, na.rm=TRUE))]
## m_Temp m_Sal
## 1: 5.57162 29.15756
Find the number of observations per station in winter
temp[Season=="winter", .N, by = .(Area)]
## Area N
## 1: WZ 151
## 2: GM 52
## 3: VD 247
## 4: ED 100
## 5: OS 218
## 6: WS 140
## 7: KZ 470
## 8: NZ 97
## 9: VM 50
## 10: NC 181
Find the number of observations per station per season
temp[, .N, by = .(Area, Season)]
## Area Season N
## 1: WZ winter 151
## 2: WZ spring 266
## 3: WZ summer 267
## 4: WZ autumn 215
## 5: GM winter 52
## 6: GM spring 92
## 7: GM summer 88
## 8: GM autumn 61
## 9: VD winter 247
## 10: VD spring 163
## 11: VD summer 144
## 12: VD autumn 187
## 13: ED winter 100
## 14: ED spring 173
## 15: ED summer 178
## 16: ED autumn 141
## 17: OS winter 218
## 18: OS spring 352
## 19: OS summer 367
## 20: OS autumn 244
## 21: WS winter 140
## 22: WS spring 175
## 23: WS summer 235
## 24: WS autumn 180
## 25: KZ winter 470
## 26: KZ spring 572
## 27: KZ summer 610
## 28: KZ autumn 442
## 29: NZ winter 97
## 30: NZ spring 161
## 31: NZ summer 180
## 32: NZ autumn 126
## 33: NC spring 350
## 34: NC summer 383
## 35: NC autumn 225
## 36: VM winter 50
## 37: VM spring 92
## 38: VM summer 92
## 39: VM autumn 61
## 40: NC winter 181
## Area Season N
Estimate average temperatures by month
temp[, .(m_Temp = mean(Temperature, na.rm = TRUE)), by = .(Month)]
## Month m_Temp
## 1: 1 5.174210
## 2: 2 4.737400
## 3: 3 6.125961
## 4: 4 8.702035
## 5: 5 12.293479
## 6: 6 15.659933
## 7: 7 18.077343
## 8: 8 19.388355
## 9: 9 16.995974
## 10: 10 13.619670
## 11: 11 9.848891
## 12: 12 6.746339
Estimate average temperatures by month by area
temp[, .(m_Temp = mean(Temperature, na.rm = TRUE)), by = .(Month, Area)]
## Month Area m_Temp
## 1: 1 WZ 3.377826
## 2: 2 WZ 3.925800
## 3: 3 WZ 5.818481
## 4: 4 WZ 9.270805
## 5: 5 WZ 13.398191
## ---
## 116: 1 NC 6.789808
## 117: 2 NC 5.682581
## 118: 3 NC 5.837500
## 119: 11 NC 10.978269
## 120: 12 NC 8.716957
Plot the output of the previous question using ggplot2 using the geom_line() geometry
temp[, .(m_Temp = mean(Temperature, na.rm = TRUE)), by = .(Month, Area)] %>%
ggplot(aes(x=Month, y=m_Temp, col = Area)) + geom_line() + theme_bw() +
ylab("Mean Temperature (C)") +
ggtitle("Montly Mean Temperature for Data Samples") +
scale_x_continuous(breaks=1:12,
labels=c("Jan", "Feb", "Mar", "Apr", "May", "Jun",
"Jul", "Aug", "Sep", "Oct", "Nov", "Dec")) +
xlab("Month")