#install.packages(c("readr", "dplyr", "ggplot2", "tidyr"))
# Load packages
library(readr)
## Warning: package 'readr' was built under R version 4.4.3
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.4.3
library(tidyr)
## Warning: package 'tidyr' was built under R version 4.4.3
# Import Meteorology Data

met_raw <- read_table(
  "../data/uni.barentsburg.20107.dat",
  col_names = FALSE,
  show_col_types = FALSE
)

head(met_raw)
## # A tibble: 6 × 22
##      X1    X2    X3    X4    X5    X6    X7    X8    X9   X10   X11   X12   X13
##   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 20107  1961     1    -1    -1     1  78.1  14.2 -12.7 1013. 1000. 1000.   6  
## 2 20107  1961     2    -1    -1     1  78.1  14.2 -17.2 1005. 1000. 1000.   6.4
## 3 20107  1961     3    -1    -1     1  78.1  14.2 -12.5  998. 1000. 1000.   8.2
## 4 20107  1961     4    -1    -1     1  78.1  14.2 -11.4 1013. 1000. 1000.   5.8
## 5 20107  1961     5    -1    -1     1  78.1  14.2  -3.8 1016. 1000. 1000.   8.4
## 6 20107  1961     6    -1    -1     1  78.1  14.2   2.7 1008. 1000. 1000.   8.4
## # ℹ 9 more variables: X14 <dbl>, X15 <dbl>, X16 <dbl>, X17 <dbl>, X18 <dbl>,
## #   X19 <dbl>, X20 <dbl>, X21 <dbl>, X22 <chr>
# Only keep important columns

met_data <- met_raw %>%
  select(
    station = X1,
    year = X2,
    month = X3,
    latitude = X7,
    longitude = X8,
    temp = X9,
    station_name = X22
  )

head(met_data)
## # A tibble: 6 × 7
##   station  year month latitude longitude  temp station_name
##     <dbl> <dbl> <dbl>    <dbl>     <dbl> <dbl> <chr>       
## 1   20107  1961     1     78.1      14.2 -12.7 barentsburg 
## 2   20107  1961     2     78.1      14.2 -17.2 barentsburg 
## 3   20107  1961     3     78.1      14.2 -12.5 barentsburg 
## 4   20107  1961     4     78.1      14.2 -11.4 barentsburg 
## 5   20107  1961     5     78.1      14.2  -3.8 barentsburg 
## 6   20107  1961     6     78.1      14.2   2.7 barentsburg
summary(met_data)
##     station           year          month          latitude       longitude    
##  Min.   :20107   Min.   :1961   Min.   : 1.00   Min.   :78.07   Min.   :14.25  
##  1st Qu.:20107   1st Qu.:1971   1st Qu.: 3.75   1st Qu.:78.07   1st Qu.:14.25  
##  Median :20107   Median :1980   Median : 6.50   Median :78.07   Median :14.25  
##  Mean   :20107   Mean   :1980   Mean   : 6.50   Mean   :78.07   Mean   :14.25  
##  3rd Qu.:20107   3rd Qu.:1990   3rd Qu.: 9.25   3rd Qu.:78.07   3rd Qu.:14.25  
##  Max.   :20107   Max.   :2000   Max.   :12.00   Max.   :78.07   Max.   :14.25  
##       temp         station_name      
##  Min.   : -23.70   Length:480        
##  1st Qu.: -11.22   Class :character  
##  Median :  -3.70   Mode  :character  
##  Mean   : 161.60                     
##  3rd Qu.:   4.80                     
##  Max.   : 999.99
# Clean temperature data

met_clean <- met_data %>%
  mutate(
    temp = ifelse(temp >= 999, NA, temp),
    period = case_when(
      year >= 1961 & year <= 1980 ~ "1961-1980",
      year >= 1981 & year <= 2000 ~ "1981-2000"
    ),
    month_name = factor(
      month,
      levels = 1:12,
      labels = c("Jan", "Feb", "Mar", "Apr", "May", "Jun",
                 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec")
    )
  )

summary(met_clean$temp)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
## -23.700 -12.725  -6.400  -6.083   0.900   7.700      80
sum(is.na(met_clean$temp))
## [1] 80
# Monthly Statistics Table

monthly_stats <- met_clean %>%
  group_by(month_name) %>%
  summarise(
    overall_mean = mean(temp, na.rm = TRUE),
    overall_median = median(temp, na.rm = TRUE),
    early_mean = mean(temp[period == "1961-1980"], na.rm = TRUE),
    early_median = median(temp[period == "1961-1980"], na.rm = TRUE),
    late_mean = mean(temp[period == "1981-2000"], na.rm = TRUE),
    late_median = median(temp[period == "1981-2000"], na.rm = TRUE),
    sd_temp = sd(temp, na.rm = TRUE)
  )

monthly_stats
## # A tibble: 12 × 8
##    month_name overall_mean overall_median early_mean early_median late_mean
##    <fct>             <dbl>          <dbl>      <dbl>        <dbl>     <dbl>
##  1 Jan             -14.1            -14.6     -14.5        -15.0    -13.4  
##  2 Feb             -14.3            -15       -15.2        -15.2    -13.1  
##  3 Mar             -14.1            -13.8     -15.0        -15.1    -12.8  
##  4 Apr             -11.3            -11.3     -11.7        -11.6    -10.6  
##  5 May              -4.08            -3.8      -4.40        -3.9     -3.58 
##  6 Jun               1.61             1.5       1.54         1.45     1.72 
##  7 Jul               5.52             5.5       5.47         5.45     5.59 
##  8 Aug               4.59             4.5       4.42         4.45     4.86 
##  9 Sep               0.479            0.5       0.38         0.4      0.631
## 10 Oct              -5.07            -4.9      -4.80        -4.1     -5.48 
## 11 Nov              -9.11            -8.5      -9.53        -9.6     -8.46 
## 12 Dec             -12.3            -12       -12.2        -11.8    -12.5  
## # ℹ 2 more variables: late_median <dbl>, sd_temp <dbl>
# Plot 1: Monthly Mean Temperatures

ggplot(monthly_stats,
       
       aes(x = month_name,
           y = overall_mean)) +
  
  geom_col(fill = "steelblue") +
  
  geom_errorbar(
    aes(
      ymin = overall_mean - sd_temp,
      ymax = overall_mean + sd_temp
    ),
    
    width = 0.2
  ) +
  
  labs(
    title = "Mean Monthly Air Temperature at Barentsburg Station (1961–2000)",
    x = "Month",
    y = "Temperature (°C)"
  ) +
  
  theme_minimal()