Read in the data using fread

library(data.table)
library(ggplot2)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:data.table':
## 
##     between, first, last
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
temp <- fread("Temperature.csv")

Extract all winter observations

winter = temp[Season=="winter"]
head(winter)
##           Sample     Date     DateNr dDay1 dDay2 dDay3 Station Area 31UE_ED50
## 1: DANT.19900110 19900110    10/1/90     7     9     9    DANT   WZ  681379.6
## 2: DANT.19900206 19900206     6/2/90    34    36    36    DANT   WZ  681379.6
## 3: DANT.19901212 19901212   12/12/90   343   345   345    DANT   WZ  681379.6
## 4: DANT.19910116 19910116  1/16/1991   378   380    15    DANT   WZ  681379.6
## 5: DANT.19910226 19910226  2/26/1991   419   421    56    DANT   WZ  681379.6
## 6: DANT.19911219 19911219 12/19/1991   715   717   352    DANT   WZ  681379.6
##    31UN_ED50 Year Month Season Salinity Temperature CHLFa
## 1:   5920571 1990     1 winter    29.19         4.0  1.30
## 2:   5920571 1990     2 winter    27.37         6.0    NA
## 3:   5920571 1990    12 winter    31.50         4.2 60.50
## 4:   5920571 1991     1 winter    20.83        -0.3  2.30
## 5:   5920571 1991     2 winter    28.06         3.9  3.52
## 6:   5920571 1991    12 winter    25.31         3.9  3.50

Extract all winter observations for zone NC

NCwinter = winter[Area=="NC"]
head(NCwinter)
##           Sample     Date     DateNr dDay1 dDay2 dDay3 Station Area 31UE_ED50
## 1: T100.19900103 19900103     3/1/90     0     2     2    T100   NC  587650.2
## 2: T100.19900205 19900205     5/2/90    33    35    35    T100   NC  587650.2
## 3: T100.19901218 19901218 12/18/1990   349   351   351    T100   NC  587650.2
## 4: T100.19910116 19910116  1/16/1991   378   380    15    T100   NC  587650.2
## 5: T100.19910205 19910205     5/2/91   398   400    35    T100   NC  587650.2
## 6: T100.19911211 19911211   11/12/91   707   709   344    T100   NC  587650.2
##    31UN_ED50 Year Month Season Salinity Temperature CHLFa
## 1:   6001110 1990     1 winter    34.82         8.5  0.30
## 2:   6001110 1990     2 winter       NA          NA    NA
## 3:   6001110 1990    12 winter    34.80         9.2  0.40
## 4:   6001110 1991     1 winter    34.86         6.1  0.68
## 5:   6001110 1991     2 winter    34.53         5.2  0.34
## 6:   6001110 1991    12 winter    34.79         9.7  0.44

Select only the columns Area, Season and Temperature

ASTdata <- temp[, .(Area, Season, Temperature)]
head(ASTdata)
##    Area Season Temperature
## 1:   WZ winter         4.0
## 2:   WZ winter         6.0
## 3:   WZ spring         7.3
## 4:   WZ spring         8.2
## 5:   WZ spring        17.4
## 6:   WZ summer        18.1

Select only the columns Area and Temperature but only for winter observations

ATwinter <- temp[Season == "winter", .(Area, Temperature)]
head(ATwinter)
##    Area Temperature
## 1:   WZ         4.0
## 2:   WZ         6.0
## 3:   WZ         4.2
## 4:   WZ        -0.3
## 5:   WZ         3.9
## 6:   WZ         3.9

Find the total number of observations in winter

temp[Season=="winter", .N]
## [1] 1706

Calculate the mean temperature and mean salinity in winter (Note that there are missing values so will have to use na.rm = TRUE)

temp[Season=="winter",
     .(m_Temp = mean(Temperature, na.rm=TRUE), m_Sal = mean(Salinity, na.rm=TRUE))]
##     m_Temp    m_Sal
## 1: 5.57162 29.15756

Find the number of observations per station in winter

temp[Season=="winter", .N, by = .(Area)]
##     Area   N
##  1:   WZ 151
##  2:   GM  52
##  3:   VD 247
##  4:   ED 100
##  5:   OS 218
##  6:   WS 140
##  7:   KZ 470
##  8:   NZ  97
##  9:   VM  50
## 10:   NC 181

Find the number of observations per station per season

temp[, .N, by = .(Area, Season)]
##     Area Season   N
##  1:   WZ winter 151
##  2:   WZ spring 266
##  3:   WZ summer 267
##  4:   WZ autumn 215
##  5:   GM winter  52
##  6:   GM spring  92
##  7:   GM summer  88
##  8:   GM autumn  61
##  9:   VD winter 247
## 10:   VD spring 163
## 11:   VD summer 144
## 12:   VD autumn 187
## 13:   ED winter 100
## 14:   ED spring 173
## 15:   ED summer 178
## 16:   ED autumn 141
## 17:   OS winter 218
## 18:   OS spring 352
## 19:   OS summer 367
## 20:   OS autumn 244
## 21:   WS winter 140
## 22:   WS spring 175
## 23:   WS summer 235
## 24:   WS autumn 180
## 25:   KZ winter 470
## 26:   KZ spring 572
## 27:   KZ summer 610
## 28:   KZ autumn 442
## 29:   NZ winter  97
## 30:   NZ spring 161
## 31:   NZ summer 180
## 32:   NZ autumn 126
## 33:   NC spring 350
## 34:   NC summer 383
## 35:   NC autumn 225
## 36:   VM winter  50
## 37:   VM spring  92
## 38:   VM summer  92
## 39:   VM autumn  61
## 40:   NC winter 181
##     Area Season   N

Estimate average temperatures by month

temp[, .(m_Temp = mean(Temperature, na.rm = TRUE)), by = .(Month)]
##     Month    m_Temp
##  1:     1  5.174210
##  2:     2  4.737400
##  3:     3  6.125961
##  4:     4  8.702035
##  5:     5 12.293479
##  6:     6 15.659933
##  7:     7 18.077343
##  8:     8 19.388355
##  9:     9 16.995974
## 10:    10 13.619670
## 11:    11  9.848891
## 12:    12  6.746339

Estimate average temperatures by month by area

temp[, .(m_Temp = mean(Temperature, na.rm = TRUE)), by = .(Month, Area)]
##      Month Area    m_Temp
##   1:     1   WZ  3.377826
##   2:     2   WZ  3.925800
##   3:     3   WZ  5.818481
##   4:     4   WZ  9.270805
##   5:     5   WZ 13.398191
##  ---                     
## 116:     1   NC  6.789808
## 117:     2   NC  5.682581
## 118:     3   NC  5.837500
## 119:    11   NC 10.978269
## 120:    12   NC  8.716957

Plot the output of the previous question using ggplot2 using the geom_line() geometry

temp[, .(m_Temp = mean(Temperature, na.rm = TRUE)), by = .(Month, Area)] %>%
  ggplot(aes(x=Month, y=m_Temp, col = Area)) + geom_line() + theme_bw() +
  ylab("Mean Temperature (C)") + 
  ggtitle("Montly Mean Temperature for Data Samples") + 
  scale_x_continuous(breaks=1:12,
                     labels=c("Jan", "Feb", "Mar", "Apr", "May", "Jun", 
                            "Jul", "Aug", "Sep", "Oct", "Nov", "Dec")) +
  xlab("Month")