library(data.table)
## Warning: package 'data.table' was built under R version 4.4.3
library(ggplot2)
temp <- fread("./data/Temperature.csv")

#Extract all winter data

temp[Season=="winter"]
##              Sample     Date    DateNr dDay1 dDay2 dDay3 Station   Area
##              <char>    <int>    <char> <int> <int> <int>  <char> <char>
##    1: DANT.19900110 19900110   10/1/90     7     9     9    DANT     WZ
##    2: DANT.19900206 19900206    6/2/90    34    36    36    DANT     WZ
##    3: DANT.19901212 19901212  12/12/90   343   345   345    DANT     WZ
##    4: DANT.19910116 19910116 1/16/1991   378   380    15    DANT     WZ
##    5: DANT.19910226 19910226 2/26/1991   419   421    56    DANT     WZ
##   ---                                                                  
## 1702: ZUID.20040216 20040216 2/16/2004  5157  5159    46    ZUID     WZ
## 1703: ZUID.20041208 20041208   8/12/04  5453  5455   342    ZUID     WZ
## 1704: ZUID.20050119 20050119 1/19/2005  5495  5497    18    ZUID     WZ
## 1705: ZUID.20050218 20050218 2/18/2005  5525  5527    48    ZUID     WZ
## 1706: ZUID.20051212 20051212  12/12/05  5822  5824   345    ZUID     WZ
##       31UE_ED50 31UN_ED50  Year Month Season Salinity Temperature CHLFa
##           <num>     <num> <int> <int> <char>    <num>       <num> <num>
##    1:  681379.6   5920571  1990     1 winter    29.19        4.00  1.30
##    2:  681379.6   5920571  1990     2 winter    27.37        6.00    NA
##    3:  681379.6   5920571  1990    12 winter    31.50        4.20 60.50
##    4:  681379.6   5920571  1991     1 winter    20.83       -0.30  2.30
##    5:  681379.6   5920571  1991     2 winter    28.06        3.90  3.52
##   ---                                                                  
## 1702:  733386.3   5928197  2004     2 winter    24.70        4.40    NA
## 1703:  733386.3   5928197  2004    12 winter    28.67        4.72  1.52
## 1704:  733386.3   5928197  2005     1 winter    28.39        3.57  4.12
## 1705:  733386.3   5928197  2005     2 winter    29.31        1.31  6.56
## 1706:  733386.3   5928197  2005    12 winter    29.08        5.13  2.38

#Extract all winter observations for zone NC

temp[Season=="winter" & Area=="NC"]
##             Sample     Date     DateNr dDay1 dDay2 dDay3 Station   Area
##             <char>    <int>     <char> <int> <int> <int>  <char> <char>
##   1: T100.19900103 19900103     3/1/90     0     2     2    T100     NC
##   2: T100.19900205 19900205     5/2/90    33    35    35    T100     NC
##   3: T100.19901218 19901218 12/18/1990   349   351   351    T100     NC
##   4: T100.19910116 19910116  1/16/1991   378   380    15    T100     NC
##   5: T100.19910205 19910205     5/2/91   398   400    35    T100     NC
##  ---                                                                   
## 177: T235.20040209 20040209     9/2/04  5150  5152    39    T235     NC
## 178: T235.20041214 20041214 12/14/2004  5459  5461   348    T235     NC
## 179: T235.20050126 20050126  1/26/2005  5502  5504    25    T235     NC
## 180: T235.20050214 20050214  2/14/2005  5521  5523    44    T235     NC
## 181: T235.20051220 20051220 12/20/2005  5830  5832   353    T235     NC
##      31UE_ED50 31UN_ED50  Year Month Season Salinity Temperature CHLFa
##          <num>     <num> <int> <int> <char>    <num>       <num> <num>
##   1:  587650.2   6001110  1990     1 winter    34.82        8.50  0.30
##   2:  587650.2   6001110  1990     2 winter       NA          NA    NA
##   3:  587650.2   6001110  1990    12 winter    34.80        9.20  0.40
##   4:  587650.2   6001110  1991     1 winter    34.86        6.10  0.68
##   5:  587650.2   6001110  1991     2 winter    34.53        5.20  0.34
##  ---                                                                  
## 177:  510032.3   6114101  2004     2 winter       NA          NA    NA
## 178:  510032.3   6114101  2004    12 winter    34.96        8.47  0.62
## 179:  510032.3   6114101  2005     1 winter    35.09        6.44  0.62
## 180:  510032.3   6114101  2005     2 winter       NA          NA    NA
## 181:  510032.3   6114101  2005    12 winter    33.87        8.19  1.12

#Select only the columns Area, Season and Temperature

temp[,c("Area", "Season", "Temperature")]
##         Area Season Temperature
##       <char> <char>       <num>
##    1:     WZ winter        4.00
##    2:     WZ winter        6.00
##    3:     WZ spring        7.30
##    4:     WZ spring        8.20
##    5:     WZ spring       17.40
##   ---                          
## 8524:     WZ autumn       15.47
## 8525:     WZ autumn       13.45
## 8526:     WZ autumn       12.09
## 8527:     WZ autumn        9.03
## 8528:     WZ winter        5.13

#Select only the columns Area and Temperature but only for winter observations

temp[Season=="winter", c("Area", "Temperature")]
##         Area Temperature
##       <char>       <num>
##    1:     WZ        4.00
##    2:     WZ        6.00
##    3:     WZ        4.20
##    4:     WZ       -0.30
##    5:     WZ        3.90
##   ---                   
## 1702:     WZ        4.40
## 1703:     WZ        4.72
## 1704:     WZ        3.57
## 1705:     WZ        1.31
## 1706:     WZ        5.13

#Find the total number of observations in winter

temp[Season=="winter", (.N)]
## [1] 1706

#Calculate the mean temperature and mean salinity in winter (Note that there are missing values so will have to use na.rm = TRUE)

temp[Season=="winter", lapply(.SD, mean, na.rm=TRUE), .SDcols = c("Temperature", "Salinity")]
##    Temperature Salinity
##          <num>    <num>
## 1:     5.57162 29.15756

#Find the number of observations per station in winter

temp[Season=="winter", (.N), by = Station]
##     Station    V1
##      <char> <int>
##  1:    DANT    50
##  2:    DREI    52
##  3:      G6   101
##  4:    GROO    50
##  5:    HAMM    55
##  6:    HANS    56
##  7:    HUIB    50
##  8:    LODS    54
##  9:    MARS    49
## 10:     N02   115
## 11:     N10   131
## 12:     N20    50
## 13:     N70    50
## 14:     R03    32
## 15:    SOEL    50
## 16:    T004    97
## 17:    T010    45
## 18:    T100    45
## 19:    T135    46
## 20:    T175    45
## 21:    T235    45
## 22:    VLIS    84
## 23:     W02    99
## 24:     W20    47
## 25:     W70    47
## 26:    WISS    55
## 27:    ZIJP    54
## 28:    ZUID    52
##     Station    V1

#Find the number of observations per station per season

temp[ , .(.N), by = c("Station", "Season")]
##      Station Season     N
##       <char> <char> <int>
##   1:    DANT winter    50
##   2:    DANT spring    89
##   3:    DANT summer    89
##   4:    DANT autumn    72
##   5:    DREI winter    52
##  ---                     
## 114:    ZIJP autumn    61
## 115:    ZUID winter    52
## 116:    ZUID spring    89
## 117:    ZUID summer    89
## 118:    ZUID autumn    73

#Estimate average temperatures by month

temp[, lapply(.SD, mean, na.rm=TRUE), by = Month, .SDcols = "Temperature"]
##     Month Temperature
##     <int>       <num>
##  1:     1    5.174210
##  2:     2    4.737400
##  3:     3    6.125961
##  4:     4    8.702035
##  5:     5   12.293479
##  6:     6   15.659933
##  7:     7   18.077343
##  8:     8   19.388355
##  9:     9   16.995974
## 10:    10   13.619670
## 11:    11    9.848891
## 12:    12    6.746339

#Estimate average temperatures by month by area

temp[, lapply(.SD, mean, na.rm=TRUE), by = c("Month", "Area"), .SDcols = "Temperature"]
##      Month   Area Temperature
##      <int> <char>       <num>
##   1:     1     WZ    3.377826
##   2:     2     WZ    3.925800
##   3:     3     WZ    5.818481
##   4:     4     WZ    9.270805
##   5:     5     WZ   13.398191
##  ---                         
## 116:     1     NC    6.789808
## 117:     2     NC    5.682581
## 118:     3     NC    5.837500
## 119:    11     NC   10.978269
## 120:    12     NC    8.716957

#Plot the output of the previous question using ggplot2 using the geom_line() geometry

ans<- temp[, lapply(.SD, mean, na.rm=TRUE), by = c("Month", "Area"), .SDcols = "Temperature"]

ggplot(ans, aes(x=Month, y=Temperature, colour = Area)) + geom_line()