Load necessary libraries and data

library(data.table)
library(ggplot2)
temp = fread("Temperature.csv")

Extract all winter observations

winter = temp[Season=="winter"]
head(winter)
##           Sample     Date     DateNr dDay1 dDay2 dDay3 Station   Area 31UE_ED50
##           <char>    <int>     <char> <int> <int> <int>  <char> <char>     <num>
## 1: DANT.19900110 19900110    10/1/90     7     9     9    DANT     WZ  681379.6
## 2: DANT.19900206 19900206     6/2/90    34    36    36    DANT     WZ  681379.6
## 3: DANT.19901212 19901212   12/12/90   343   345   345    DANT     WZ  681379.6
## 4: DANT.19910116 19910116  1/16/1991   378   380    15    DANT     WZ  681379.6
## 5: DANT.19910226 19910226  2/26/1991   419   421    56    DANT     WZ  681379.6
## 6: DANT.19911219 19911219 12/19/1991   715   717   352    DANT     WZ  681379.6
##    31UN_ED50  Year Month Season Salinity Temperature CHLFa
##        <num> <int> <int> <char>    <num>       <num> <num>
## 1:   5920571  1990     1 winter    29.19         4.0  1.30
## 2:   5920571  1990     2 winter    27.37         6.0    NA
## 3:   5920571  1990    12 winter    31.50         4.2 60.50
## 4:   5920571  1991     1 winter    20.83        -0.3  2.30
## 5:   5920571  1991     2 winter    28.06         3.9  3.52
## 6:   5920571  1991    12 winter    25.31         3.9  3.50

Extract all winter observations for zone NC

winter_dataNC = temp[Season == "winter" & Area == "NC"]
head(winter_dataNC)
##           Sample     Date     DateNr dDay1 dDay2 dDay3 Station   Area 31UE_ED50
##           <char>    <int>     <char> <int> <int> <int>  <char> <char>     <num>
## 1: T100.19900103 19900103     3/1/90     0     2     2    T100     NC  587650.2
## 2: T100.19900205 19900205     5/2/90    33    35    35    T100     NC  587650.2
## 3: T100.19901218 19901218 12/18/1990   349   351   351    T100     NC  587650.2
## 4: T100.19910116 19910116  1/16/1991   378   380    15    T100     NC  587650.2
## 5: T100.19910205 19910205     5/2/91   398   400    35    T100     NC  587650.2
## 6: T100.19911211 19911211   11/12/91   707   709   344    T100     NC  587650.2
##    31UN_ED50  Year Month Season Salinity Temperature CHLFa
##        <num> <int> <int> <char>    <num>       <num> <num>
## 1:   6001110  1990     1 winter    34.82         8.5  0.30
## 2:   6001110  1990     2 winter       NA          NA    NA
## 3:   6001110  1990    12 winter    34.80         9.2  0.40
## 4:   6001110  1991     1 winter    34.86         6.1  0.68
## 5:   6001110  1991     2 winter    34.53         5.2  0.34
## 6:   6001110  1991    12 winter    34.79         9.7  0.44

Select only the columns area, season, and temperature

selected_columns = temp[,.(Area,Season,Temperature)]
head(selected_columns)
##      Area Season Temperature
##    <char> <char>       <num>
## 1:     WZ winter         4.0
## 2:     WZ winter         6.0
## 3:     WZ spring         7.3
## 4:     WZ spring         8.2
## 5:     WZ spring        17.4
## 6:     WZ summer        18.1

Select only the columns Area and Temp but only for winter observations

selected_columns_winter = temp[Season=="winter",.(Area, Temperature)]
head(selected_columns)
##      Area Season Temperature
##    <char> <char>       <num>
## 1:     WZ winter         4.0
## 2:     WZ winter         6.0
## 3:     WZ spring         7.3
## 4:     WZ spring         8.2
## 5:     WZ spring        17.4
## 6:     WZ summer        18.1

Find the total number of observations in winter

temp[Season == "winter",.N]
## [1] 1706

Calculate the mean temperature and mean salinity in winter

mtemp = mean(winter$Temperature, na.rm = TRUE)
mtemp
## [1] 5.57162
msalinity = mean(winter$Salinity, na.rm = TRUE)
msalinity
## [1] 29.15756

Find the number of observations per station in winter

temp[Season == "winter", by = .(Station), .N]
##     Station     N
##      <char> <int>
##  1:    DANT    50
##  2:    DREI    52
##  3:      G6   101
##  4:    GROO    50
##  5:    HAMM    55
##  6:    HANS    56
##  7:    HUIB    50
##  8:    LODS    54
##  9:    MARS    49
## 10:     N02   115
## 11:     N10   131
## 12:     N20    50
## 13:     N70    50
## 14:     R03    32
## 15:    SOEL    50
## 16:    T004    97
## 17:    T010    45
## 18:    T100    45
## 19:    T135    46
## 20:    T175    45
## 21:    T235    45
## 22:    VLIS    84
## 23:     W02    99
## 24:     W20    47
## 25:     W70    47
## 26:    WISS    55
## 27:    ZIJP    54
## 28:    ZUID    52
##     Station     N

Find the number of observations per station per season

obs_per_station_season = temp[, by = .(Station, Season), .N]
head(obs_per_station_season)
##    Station Season     N
##     <char> <char> <int>
## 1:    DANT winter    50
## 2:    DANT spring    89
## 3:    DANT summer    89
## 4:    DANT autumn    72
## 5:    DREI winter    52
## 6:    DREI spring    92

Estimate average temp by month

temp[,.(avg_temp=mean(Temperature, na.rm = TRUE)), keyby =.(Month)]
## Key: <Month>
##     Month  avg_temp
##     <int>     <num>
##  1:     1  5.174210
##  2:     2  4.737400
##  3:     3  6.125961
##  4:     4  8.702035
##  5:     5 12.293479
##  6:     6 15.659933
##  7:     7 18.077343
##  8:     8 19.388355
##  9:     9 16.995974
## 10:    10 13.619670
## 11:    11  9.848891
## 12:    12  6.746339

Estimate average temp by month by area

atemp_month_area = temp[,.(avg_temp=mean(Temperature,na.rm = TRUE)), keyby = .(Month,Area)]
atemp_month_area
## Key: <Month, Area>
##      Month   Area avg_temp
##      <int> <char>    <num>
##   1:     1     ED 3.086333
##   2:     1     GM 4.308750
##   3:     1     KZ 5.296222
##   4:     1     NC 6.789808
##   5:     1     NZ 8.122000
##  ---                      
## 116:    12     OS 6.075000
## 117:    12     VD 7.402778
## 118:    12     VM 5.138125
## 119:    12     WS 6.334500
## 120:    12     WZ 4.985111

Plot the average temperatures by month by area using ggplot2

ggplot(atemp_month_area, aes(x = Month, y = mtemp, color = Area)) +
  geom_line() + ggtitle("Average Temperature by Month and Area") + xlab("Month") + ylab("Average Temperature")