Loading in data

*We also load in ggplot for the final question.

library(data.table)
## Warning: package 'data.table' was built under R version 4.0.5
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.0.5
library(magrittr)
temp = fread("Temperature.csv")
head(temp)
##           Sample     Date    DateNr dDay1 dDay2 dDay3 Station Area 31UE_ED50
## 1: DANT.19900110 19900110   10/1/90     7     9     9    DANT   WZ  681379.6
## 2: DANT.19900206 19900206    6/2/90    34    36    36    DANT   WZ  681379.6
## 3: DANT.19900308 19900308    8/3/90    64    66    66    DANT   WZ  681379.6
## 4: DANT.19900404 19900404    4/4/90    91    93    93    DANT   WZ  681379.6
## 5: DANT.19900509 19900509    9/5/90   126   128   128    DANT   WZ  681379.6
## 6: DANT.19900620 19900620 6/20/1990   168   170   170    DANT   WZ  681379.6
##    31UN_ED50 Year Month Season Salinity Temperature CHLFa
## 1:   5920571 1990     1 winter    29.19         4.0   1.3
## 2:   5920571 1990     2 winter    27.37         6.0    NA
## 3:   5920571 1990     3 spring    24.99         7.3  21.1
## 4:   5920571 1990     4 spring    28.79         8.2  25.0
## 5:   5920571 1990     5 spring    33.28        17.4  10.2
## 6:   5920571 1990     6 summer    32.69        18.1   6.2

Extracting observations

Here we extract all observations from questions 2 & 3

winter = temp[Season == "winter"]
winter
##              Sample     Date    DateNr dDay1 dDay2 dDay3 Station Area 31UE_ED50
##    1: DANT.19900110 19900110   10/1/90     7     9     9    DANT   WZ  681379.6
##    2: DANT.19900206 19900206    6/2/90    34    36    36    DANT   WZ  681379.6
##    3: DANT.19901212 19901212  12/12/90   343   345   345    DANT   WZ  681379.6
##    4: DANT.19910116 19910116 1/16/1991   378   380    15    DANT   WZ  681379.6
##    5: DANT.19910226 19910226 2/26/1991   419   421    56    DANT   WZ  681379.6
##   ---                                                                          
## 1702: ZUID.20040216 20040216 2/16/2004  5157  5159    46    ZUID   WZ  733386.3
## 1703: ZUID.20041208 20041208   8/12/04  5453  5455   342    ZUID   WZ  733386.3
## 1704: ZUID.20050119 20050119 1/19/2005  5495  5497    18    ZUID   WZ  733386.3
## 1705: ZUID.20050218 20050218 2/18/2005  5525  5527    48    ZUID   WZ  733386.3
## 1706: ZUID.20051212 20051212  12/12/05  5822  5824   345    ZUID   WZ  733386.3
##       31UN_ED50 Year Month Season Salinity Temperature CHLFa
##    1:   5920571 1990     1 winter    29.19        4.00  1.30
##    2:   5920571 1990     2 winter    27.37        6.00    NA
##    3:   5920571 1990    12 winter    31.50        4.20 60.50
##    4:   5920571 1991     1 winter    20.83       -0.30  2.30
##    5:   5920571 1991     2 winter    28.06        3.90  3.52
##   ---                                                       
## 1702:   5928197 2004     2 winter    24.70        4.40    NA
## 1703:   5928197 2004    12 winter    28.67        4.72  1.52
## 1704:   5928197 2005     1 winter    28.39        3.57  4.12
## 1705:   5928197 2005     2 winter    29.31        1.31  6.56
## 1706:   5928197 2005    12 winter    29.08        5.13  2.38
winter2 = temp[Season == "winter" & Area == "NC"]
winter2
##             Sample     Date     DateNr dDay1 dDay2 dDay3 Station Area 31UE_ED50
##   1: T100.19900103 19900103     3/1/90     0     2     2    T100   NC  587650.2
##   2: T100.19900205 19900205     5/2/90    33    35    35    T100   NC  587650.2
##   3: T100.19901218 19901218 12/18/1990   349   351   351    T100   NC  587650.2
##   4: T100.19910116 19910116  1/16/1991   378   380    15    T100   NC  587650.2
##   5: T100.19910205 19910205     5/2/91   398   400    35    T100   NC  587650.2
##  ---                                                                           
## 177: T235.20040209 20040209     9/2/04  5150  5152    39    T235   NC  510032.3
## 178: T235.20041214 20041214 12/14/2004  5459  5461   348    T235   NC  510032.3
## 179: T235.20050126 20050126  1/26/2005  5502  5504    25    T235   NC  510032.3
## 180: T235.20050214 20050214  2/14/2005  5521  5523    44    T235   NC  510032.3
## 181: T235.20051220 20051220 12/20/2005  5830  5832   353    T235   NC  510032.3
##      31UN_ED50 Year Month Season Salinity Temperature CHLFa
##   1:   6001110 1990     1 winter    34.82        8.50  0.30
##   2:   6001110 1990     2 winter       NA          NA    NA
##   3:   6001110 1990    12 winter    34.80        9.20  0.40
##   4:   6001110 1991     1 winter    34.86        6.10  0.68
##   5:   6001110 1991     2 winter    34.53        5.20  0.34
##  ---                                                       
## 177:   6114101 2004     2 winter       NA          NA    NA
## 178:   6114101 2004    12 winter    34.96        8.47  0.62
## 179:   6114101 2005     1 winter    35.09        6.44  0.62
## 180:   6114101 2005     2 winter       NA          NA    NA
## 181:   6114101 2005    12 winter    33.87        8.19  1.12

Selecting specific columns

ast = temp[, .(Area, Season, Temperature)]
ast
##       Area Season Temperature
##    1:   WZ winter        4.00
##    2:   WZ winter        6.00
##    3:   WZ spring        7.30
##    4:   WZ spring        8.20
##    5:   WZ spring       17.40
##   ---                        
## 8524:   WZ autumn       15.47
## 8525:   WZ autumn       13.45
## 8526:   WZ autumn       12.09
## 8527:   WZ autumn        9.03
## 8528:   WZ winter        5.13
winter3 = temp[Season == "winter",
               .(Area, Season)]
winter3
##       Area Season
##    1:   WZ winter
##    2:   WZ winter
##    3:   WZ winter
##    4:   WZ winter
##    5:   WZ winter
##   ---            
## 1702:   WZ winter
## 1703:   WZ winter
## 1704:   WZ winter
## 1705:   WZ winter
## 1706:   WZ winter

Total # of winter observations

winter4 = temp[ Season == "winter", .N]
winter4
## [1] 1706

Calculating the mean

winter5 = temp[Season == "winter",
               .(m_temp = mean(Temperature, na.rm = TRUE), m_sal = mean(Salinity, na.rm = TRUE))]
winter5
##     m_temp    m_sal
## 1: 5.57162 29.15756

Finding specific numbers

winter6 = temp[ Season == "winter", .N, by = Station]
winter6
##     Station   N
##  1:    DANT  50
##  2:    DREI  52
##  3:      G6 101
##  4:    GROO  50
##  5:    HAMM  55
##  6:    HANS  56
##  7:    HUIB  50
##  8:    LODS  54
##  9:    MARS  49
## 10:     N02 115
## 11:     N10 131
## 12:     N20  50
## 13:     N70  50
## 14:     R03  32
## 15:    SOEL  50
## 16:    T004  97
## 17:    T010  45
## 18:    T100  45
## 19:    T135  46
## 20:    T175  45
## 21:    T235  45
## 22:    VLIS  84
## 23:     W02  99
## 24:     W20  47
## 25:     W70  47
## 26:    WISS  55
## 27:    ZIJP  54
## 28:    ZUID  52
##     Station   N
st_se = temp[, .N, .(Station, Season)]
st_se
##      Station Season  N
##   1:    DANT winter 50
##   2:    DANT spring 89
##   3:    DANT summer 89
##   4:    DANT autumn 72
##   5:    DREI winter 52
##  ---                  
## 114:    ZIJP autumn 61
## 115:    ZUID winter 52
## 116:    ZUID spring 89
## 117:    ZUID summer 89
## 118:    ZUID autumn 73

Estimating averages by specific columns

temp_month = temp[, .(m_temp = mean(Temperature, na.rm = TRUE)), by = Month]
temp_month
##     Month    m_temp
##  1:     1  5.174210
##  2:     2  4.737400
##  3:     3  6.125961
##  4:     4  8.702035
##  5:     5 12.293479
##  6:     6 15.659933
##  7:     7 18.077343
##  8:     8 19.388355
##  9:     9 16.995974
## 10:    10 13.619670
## 11:    11  9.848891
## 12:    12  6.746339
temp_ma = temp[, .(m_temp = mean(Temperature, na.rm = TRUE)), by = .(Month, Area)]
temp_ma
##      Month Area    m_temp
##   1:     1   WZ  3.377826
##   2:     2   WZ  3.925800
##   3:     3   WZ  5.818481
##   4:     4   WZ  9.270805
##   5:     5   WZ 13.398191
##  ---                     
## 116:     1   NC  6.789808
## 117:     2   NC  5.682581
## 118:     3   NC  5.837500
## 119:    11   NC 10.978269
## 120:    12   NC  8.716957

Plotting

Finally, we plot the output of our previous code in ggplot

temp_ma %>%
  ggplot(aes(x = Month, y = m_temp)) + geom_line() + facet_wrap(~Area)