# Load required packages
library(data.table)
library(ggplot2)

# Set working directory
setwd("~/Downloads/Intro to R/Module 11")

# Read in the data
temp <- fread("Temperature.csv")

# Extract all winter observations
winter_data <- temp[Season == "Winter"]

# Extract all winter observations for zone NC
winter_nc <- temp[Season == "Winter" & Area == "NC"]

# Select only the columns Area, Season and Temperature
cols_1 <- temp[, .(Area, Season, Temperature)]

# Select only Area and Temperature but only for winter observations
cols_2 <- temp[Season == "Winter", .(Area, Temperature)]

# Find the total number of observations in winter
n_winter <- nrow(winter_data)
cat("Number of winter observations:", n_winter, "\n\n")
## Number of winter observations: 0
# Calculate the mean temperature and mean salinity in winter
mean_temp <- mean(winter_data$Temperature, na.rm = TRUE)
mean_sal <- mean(winter_data$Salinity, na.rm = TRUE)
cat("Mean Winter Temperature:", round(mean_temp, 2), "\n")
## Mean Winter Temperature: NaN
cat("Mean Winter Salinity:", round(mean_sal, 2), "\n\n")
## Mean Winter Salinity: NaN
# Find the number of observations per station in winter
obs_per_station_winter <- winter_data[, .N, by = Station]
obs_per_station_winter
## Empty data.table (0 rows and 2 cols): Station,N
# Find the number of observations per station per season
obs_station_season <- temp[, .N, by = .(Station, Season)]
obs_station_season
##      Station Season     N
##       <char> <char> <int>
##   1:    DANT winter    50
##   2:    DANT spring    89
##   3:    DANT summer    89
##   4:    DANT autumn    72
##   5:    DREI winter    52
##  ---                     
## 114:    ZIJP autumn    61
## 115:    ZUID winter    52
## 116:    ZUID spring    89
## 117:    ZUID summer    89
## 118:    ZUID autumn    73
# Estimate average temperatures by month
avg_temp_month <- temp[, .(Mean_Temp = mean(Temperature, na.rm = TRUE)), by = Month]
avg_temp_month
##     Month Mean_Temp
##     <int>     <num>
##  1:     1  5.174210
##  2:     2  4.737400
##  3:     3  6.125961
##  4:     4  8.702035
##  5:     5 12.293479
##  6:     6 15.659933
##  7:     7 18.077343
##  8:     8 19.388355
##  9:     9 16.995974
## 10:    10 13.619670
## 11:    11  9.848891
## 12:    12  6.746339
# Estimate average temperatures by month by area
avg_temp_month_area <- temp[, .(Mean_Temp = mean(Temperature, na.rm = TRUE)), by = .(Month, Area)]
avg_temp_month_area
##      Month   Area Mean_Temp
##      <int> <char>     <num>
##   1:     1     WZ  3.377826
##   2:     2     WZ  3.925800
##   3:     3     WZ  5.818481
##   4:     4     WZ  9.270805
##   5:     5     WZ 13.398191
##  ---                       
## 116:     1     NC  6.789808
## 117:     2     NC  5.682581
## 118:     3     NC  5.837500
## 119:    11     NC 10.978269
## 120:    12     NC  8.716957
# Plot average monthly temperature by area
ggplot(avg_temp_month_area, aes(x = Month, y = Mean_Temp, color = Area, group = Area)) +
  geom_line() +
  labs(title = "Average Monthly Temperature by Area",
       x = "Month",
       y = "Average Temperature (°C)") +
  theme_minimal()