Load necessary libraries and data
library(data.table)
library(ggplot2)
temp = fread("Temperature.csv")
Extract all winter observations
winter = temp[Season=="winter"]
head(winter)
## Sample Date DateNr dDay1 dDay2 dDay3 Station Area 31UE_ED50
## <char> <int> <char> <int> <int> <int> <char> <char> <num>
## 1: DANT.19900110 19900110 10/1/90 7 9 9 DANT WZ 681379.6
## 2: DANT.19900206 19900206 6/2/90 34 36 36 DANT WZ 681379.6
## 3: DANT.19901212 19901212 12/12/90 343 345 345 DANT WZ 681379.6
## 4: DANT.19910116 19910116 1/16/1991 378 380 15 DANT WZ 681379.6
## 5: DANT.19910226 19910226 2/26/1991 419 421 56 DANT WZ 681379.6
## 6: DANT.19911219 19911219 12/19/1991 715 717 352 DANT WZ 681379.6
## 31UN_ED50 Year Month Season Salinity Temperature CHLFa
## <num> <int> <int> <char> <num> <num> <num>
## 1: 5920571 1990 1 winter 29.19 4.0 1.30
## 2: 5920571 1990 2 winter 27.37 6.0 NA
## 3: 5920571 1990 12 winter 31.50 4.2 60.50
## 4: 5920571 1991 1 winter 20.83 -0.3 2.30
## 5: 5920571 1991 2 winter 28.06 3.9 3.52
## 6: 5920571 1991 12 winter 25.31 3.9 3.50
Extract all winter observations for zone NC
winter_dataNC = temp[Season == "winter" & Area == "NC"]
head(winter_dataNC)
## Sample Date DateNr dDay1 dDay2 dDay3 Station Area 31UE_ED50
## <char> <int> <char> <int> <int> <int> <char> <char> <num>
## 1: T100.19900103 19900103 3/1/90 0 2 2 T100 NC 587650.2
## 2: T100.19900205 19900205 5/2/90 33 35 35 T100 NC 587650.2
## 3: T100.19901218 19901218 12/18/1990 349 351 351 T100 NC 587650.2
## 4: T100.19910116 19910116 1/16/1991 378 380 15 T100 NC 587650.2
## 5: T100.19910205 19910205 5/2/91 398 400 35 T100 NC 587650.2
## 6: T100.19911211 19911211 11/12/91 707 709 344 T100 NC 587650.2
## 31UN_ED50 Year Month Season Salinity Temperature CHLFa
## <num> <int> <int> <char> <num> <num> <num>
## 1: 6001110 1990 1 winter 34.82 8.5 0.30
## 2: 6001110 1990 2 winter NA NA NA
## 3: 6001110 1990 12 winter 34.80 9.2 0.40
## 4: 6001110 1991 1 winter 34.86 6.1 0.68
## 5: 6001110 1991 2 winter 34.53 5.2 0.34
## 6: 6001110 1991 12 winter 34.79 9.7 0.44
Select only the columns area, season, and temperature
selected_columns = temp[,.(Area,Season,Temperature)]
head(selected_columns)
## Area Season Temperature
## <char> <char> <num>
## 1: WZ winter 4.0
## 2: WZ winter 6.0
## 3: WZ spring 7.3
## 4: WZ spring 8.2
## 5: WZ spring 17.4
## 6: WZ summer 18.1
Select only the columns Area and Temp but only for winter observations
selected_columns_winter = temp[Season=="winter",.(Area, Temperature)]
head(selected_columns)
## Area Season Temperature
## <char> <char> <num>
## 1: WZ winter 4.0
## 2: WZ winter 6.0
## 3: WZ spring 7.3
## 4: WZ spring 8.2
## 5: WZ spring 17.4
## 6: WZ summer 18.1
Find the total number of observations in winter
temp[Season == "winter",.N]
## [1] 1706
Calculate the mean temperature and mean salinity in winter
mtemp = mean(winter$Temperature, na.rm = TRUE)
mtemp
## [1] 5.57162
msalinity = mean(winter$Salinity, na.rm = TRUE)
msalinity
## [1] 29.15756
Find the number of observations per station in winter
temp[Season == "winter", by = .(Station), .N]
## Station N
## <char> <int>
## 1: DANT 50
## 2: DREI 52
## 3: G6 101
## 4: GROO 50
## 5: HAMM 55
## 6: HANS 56
## 7: HUIB 50
## 8: LODS 54
## 9: MARS 49
## 10: N02 115
## 11: N10 131
## 12: N20 50
## 13: N70 50
## 14: R03 32
## 15: SOEL 50
## 16: T004 97
## 17: T010 45
## 18: T100 45
## 19: T135 46
## 20: T175 45
## 21: T235 45
## 22: VLIS 84
## 23: W02 99
## 24: W20 47
## 25: W70 47
## 26: WISS 55
## 27: ZIJP 54
## 28: ZUID 52
## Station N
Find the number of observations per station per season
obs_per_station_season = temp[, by = .(Station, Season), .N]
head(obs_per_station_season)
## Station Season N
## <char> <char> <int>
## 1: DANT winter 50
## 2: DANT spring 89
## 3: DANT summer 89
## 4: DANT autumn 72
## 5: DREI winter 52
## 6: DREI spring 92
Estimate average temp by month
temp[,.(avg_temp=mean(Temperature, na.rm = TRUE)), keyby =.(Month)]
## Key: <Month>
## Month avg_temp
## <int> <num>
## 1: 1 5.174210
## 2: 2 4.737400
## 3: 3 6.125961
## 4: 4 8.702035
## 5: 5 12.293479
## 6: 6 15.659933
## 7: 7 18.077343
## 8: 8 19.388355
## 9: 9 16.995974
## 10: 10 13.619670
## 11: 11 9.848891
## 12: 12 6.746339
Estimate average temp by month by area
atemp_month_area = temp[,.(avg_temp=mean(Temperature,na.rm = TRUE)), keyby = .(Month,Area)]
atemp_month_area
## Key: <Month, Area>
## Month Area avg_temp
## <int> <char> <num>
## 1: 1 ED 3.086333
## 2: 1 GM 4.308750
## 3: 1 KZ 5.296222
## 4: 1 NC 6.789808
## 5: 1 NZ 8.122000
## ---
## 116: 12 OS 6.075000
## 117: 12 VD 7.402778
## 118: 12 VM 5.138125
## 119: 12 WS 6.334500
## 120: 12 WZ 4.985111
Plot the average temperatures by month by area using ggplot2
ggplot(atemp_month_area, aes(x = Month, y = mtemp, color = Area)) +
geom_line() + ggtitle("Average Temperature by Month and Area") + xlab("Month") + ylab("Average Temperature")