Exercise
Read the data
library(data.table)
temp <- fread("Temperature.csv")
dim(temp)
## [1] 8528 16
class(temp)
## [1] "data.table" "data.frame"
temp
## Sample Date DateNr dDay1 dDay2 dDay3 Station Area
## 1: DANT.19900110 19900110 10/1/90 7 9 9 DANT WZ
## 2: DANT.19900206 19900206 6/2/90 34 36 36 DANT WZ
## 3: DANT.19900308 19900308 8/3/90 64 66 66 DANT WZ
## 4: DANT.19900404 19900404 4/4/90 91 93 93 DANT WZ
## 5: DANT.19900509 19900509 9/5/90 126 128 128 DANT WZ
## ---
## 8524: ZUID.20050926 20050926 9/26/2005 5745 5747 268 ZUID WZ
## 8525: ZUID.20051012 20051012 12/10/05 5761 5763 284 ZUID WZ
## 8526: ZUID.20051027 20051027 10/27/2005 5776 5778 299 ZUID WZ
## 8527: ZUID.20051110 20051110 10/11/05 5790 5792 313 ZUID WZ
## 8528: ZUID.20051212 20051212 12/12/05 5822 5824 345 ZUID WZ
## 31UE_ED50 31UN_ED50 Year Month Season Salinity Temperature CHLFa
## 1: 681379.6 5920571 1990 1 winter 29.19 4.00 1.30
## 2: 681379.6 5920571 1990 2 winter 27.37 6.00 NA
## 3: 681379.6 5920571 1990 3 spring 24.99 7.30 21.10
## 4: 681379.6 5920571 1990 4 spring 28.79 8.20 25.00
## 5: 681379.6 5920571 1990 5 spring 33.28 17.40 10.20
## ---
## 8524: 733386.3 5928197 2005 9 autumn 30.91 15.47 11.40
## 8525: 733386.3 5928197 2005 10 autumn 31.18 13.45 8.30
## 8526: 733386.3 5928197 2005 10 autumn 28.67 12.09 4.56
## 8527: 733386.3 5928197 2005 11 autumn 29.53 9.03 4.94
## 8528: 733386.3 5928197 2005 12 winter 29.08 5.13 2.38
Select only the columns Area, Season, and Temperature
ans <- temp[, .(Area, Season, Temperature)]
head(ans)
## Area Season Temperature
## 1: WZ winter 4.0
## 2: WZ winter 6.0
## 3: WZ spring 7.3
## 4: WZ spring 8.2
## 5: WZ spring 17.4
## 6: WZ summer 18.1
Select only the columns Area and Temperature but only for winter
observations
ans <- temp[Season == "winter", .(Area, Temperature)]
head(ans)
## Area Temperature
## 1: WZ 4.0
## 2: WZ 6.0
## 3: WZ 4.2
## 4: WZ -0.3
## 5: WZ 3.9
## 6: WZ 3.9
Calculate the mean temperature and mean salinity in winter (Note
that there are missing values so will have to use na.rm = TRUE)
ans <- temp[Season == "winter", .(m_temp = mean(Temperature, na.rm = TRUE), m_sal = mean(Salinity, na.rm = TRUE))]
ans
## m_temp m_sal
## 1: 5.57162 29.15756
Find the number of observations per station in winter
ans <- temp[Season == "winter", .(.N), by = .(Station)]
ans
## Station N
## 1: DANT 50
## 2: DREI 52
## 3: G6 101
## 4: GROO 50
## 5: HAMM 55
## 6: HANS 56
## 7: HUIB 50
## 8: LODS 54
## 9: MARS 49
## 10: N02 115
## 11: N10 131
## 12: N20 50
## 13: N70 50
## 14: R03 32
## 15: SOEL 50
## 16: T004 97
## 17: T010 45
## 18: T100 45
## 19: T135 46
## 20: T175 45
## 21: T235 45
## 22: VLIS 84
## 23: W02 99
## 24: W20 47
## 25: W70 47
## 26: WISS 55
## 27: ZIJP 54
## 28: ZUID 52
## Station N
Find the number of observations per station per season
ans <- temp[, .N, by = .(Station, Season)]
ans
## Station Season N
## 1: DANT winter 50
## 2: DANT spring 89
## 3: DANT summer 89
## 4: DANT autumn 72
## 5: DREI winter 52
## ---
## 114: ZIJP autumn 61
## 115: ZUID winter 52
## 116: ZUID spring 89
## 117: ZUID summer 89
## 118: ZUID autumn 73
Estimate average temperatures by month
ans <- temp[, .(m_tmp = mean(Temperature, na.rm = TRUE)), keyby = .(Month)]
ans
## Month m_tmp
## 1: 1 5.174210
## 2: 2 4.737400
## 3: 3 6.125961
## 4: 4 8.702035
## 5: 5 12.293479
## 6: 6 15.659933
## 7: 7 18.077343
## 8: 8 19.388355
## 9: 9 16.995974
## 10: 10 13.619670
## 11: 11 9.848891
## 12: 12 6.746339
Estimate average temperatures by month by area
ans <- temp[, .(m_tmp2 = mean(Temperature, na.rm = TRUE)), keyby = .(Month, Area)]
ans
## Month Area m_tmp2
## 1: 1 ED 3.086333
## 2: 1 GM 4.308750
## 3: 1 KZ 5.296222
## 4: 1 NC 6.789808
## 5: 1 NZ 8.122000
## ---
## 116: 12 OS 6.075000
## 117: 12 VD 7.402778
## 118: 12 VM 5.138125
## 119: 12 WS 6.334500
## 120: 12 WZ 4.985111
Plot the output of the previous question using ggplot2 using the
geom_line() geometry
library(ggplot2)
ans <- temp[, .(m_tmp2 = mean(Temperature, na.rm = TRUE)), keyby = .(Month, Area)]
ggplot(ans, aes(x = Month, y = m_tmp2, col = Area)) + geom_line() + facet_wrap(~ Area) + ggtitle("Average Sea Temperatures by Month per Area") + xlab("Month") + ylab("Temperature")
