# 1: Load the packages needed for the project

library(readr)
library(dplyr)
library(ggplot2)
# 2: Import the meteorology data file
# I chose the Seyakha station for this project.

weather_raw <- read_fwf(
  file = "uni.seyakha.20967.dat",
  col_positions = fwf_empty("uni.seyakha.20967.dat")
)
## Rows: 480 Columns: 22
## ── Column specification ────────────────────────────────────────────────────────
## 
## chr  (2): X9, X22
## dbl (20): X1, X2, X3, X4, X5, X6, X7, X8, X10, X11, X12, X13, X14, X15, X16,...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Look at the first few rows to check the data
head(weather_raw)
## # A tibble: 6 × 22
##      X1    X2    X3    X4    X5    X6    X7    X8 X9      X10   X11   X12   X13
##   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 20967  1961     1    -1    -1     1  70.2  72.6 -25.4 1015. 1000. 1000.   6  
## 2 20967  1961     2    -1    -1     1  70.2  72.6 -24.1 1007. 1000. 1000.   6.8
## 3 20967  1961     3    -1    -1     1  70.2  72.6 -12.6 1004. 1000. 1000.   7.6
## 4 20967  1961     4    -1    -1     1  70.2  72.6 -13.3 1006  1000. 1000.   6.6
## 5 20967  1961     5    -1    -1     1  70.2  72.6 -8.0  1010. 1000. 1000.   9  
## 6 20967  1961     6    -1    -1     1  70.2  72.6 .6    1011. 1000. 1000.   8.7
## # ℹ 9 more variables: X14 <dbl>, X15 <dbl>, X16 <dbl>, X17 <dbl>, X18 <dbl>,
## #   X19 <dbl>, X20 <dbl>, X21 <dbl>, X22 <chr>
# 3: Check the column names and structure
# This helps me figure out which columns contain year, month, latitude, longitude, and air temperature.

names(weather_raw)
##  [1] "X1"  "X2"  "X3"  "X4"  "X5"  "X6"  "X7"  "X8"  "X9"  "X10" "X11" "X12"
## [13] "X13" "X14" "X15" "X16" "X17" "X18" "X19" "X20" "X21" "X22"
head(weather_raw)
## # A tibble: 6 × 22
##      X1    X2    X3    X4    X5    X6    X7    X8 X9      X10   X11   X12   X13
##   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 20967  1961     1    -1    -1     1  70.2  72.6 -25.4 1015. 1000. 1000.   6  
## 2 20967  1961     2    -1    -1     1  70.2  72.6 -24.1 1007. 1000. 1000.   6.8
## 3 20967  1961     3    -1    -1     1  70.2  72.6 -12.6 1004. 1000. 1000.   7.6
## 4 20967  1961     4    -1    -1     1  70.2  72.6 -13.3 1006  1000. 1000.   6.6
## 5 20967  1961     5    -1    -1     1  70.2  72.6 -8.0  1010. 1000. 1000.   9  
## 6 20967  1961     6    -1    -1     1  70.2  72.6 .6    1011. 1000. 1000.   8.7
## # ℹ 9 more variables: X14 <dbl>, X15 <dbl>, X16 <dbl>, X17 <dbl>, X18 <dbl>,
## #   X19 <dbl>, X20 <dbl>, X21 <dbl>, X22 <chr>
# 4: Rename the important columns
# These are the columns I need for the project:
# station, year, month, day, latitude, longitude, and temperature.

weather <- weather_raw %>%
  rename(
    station = X1,
    year = X2,
    month = X3,
    day = X4,
    latitude = X7,
    longitude = X8,
    temperature_c = X9
  ) %>%
  mutate(
    temperature_c = as.numeric(temperature_c)
  )

head(weather)
## # A tibble: 6 × 22
##   station  year month   day    X5    X6 latitude longitude temperature_c   X10
##     <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>    <dbl>     <dbl>         <dbl> <dbl>
## 1   20967  1961     1    -1    -1     1     70.2      72.6         -25.4 1015.
## 2   20967  1961     2    -1    -1     1     70.2      72.6         -24.1 1007.
## 3   20967  1961     3    -1    -1     1     70.2      72.6         -12.6 1004.
## 4   20967  1961     4    -1    -1     1     70.2      72.6         -13.3 1006 
## 5   20967  1961     5    -1    -1     1     70.2      72.6          -8   1010.
## 6   20967  1961     6    -1    -1     1     70.2      72.6           0.6 1011.
## # ℹ 12 more variables: X11 <dbl>, X12 <dbl>, X13 <dbl>, X14 <dbl>, X15 <dbl>,
## #   X16 <dbl>, X17 <dbl>, X18 <dbl>, X19 <dbl>, X20 <dbl>, X21 <dbl>, X22 <chr>
# 5: Clean the temperature data
# Missing temperature values are coded as 99.99 or 999.99 so I removed them from the data.

weather_clean <- weather %>%
  filter(
    temperature_c != 99.99,
    temperature_c != 999.99,
    !is.na(temperature_c)
  )

head(weather_clean)
## # A tibble: 6 × 22
##   station  year month   day    X5    X6 latitude longitude temperature_c   X10
##     <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>    <dbl>     <dbl>         <dbl> <dbl>
## 1   20967  1961     1    -1    -1     1     70.2      72.6         -25.4 1015.
## 2   20967  1961     2    -1    -1     1     70.2      72.6         -24.1 1007.
## 3   20967  1961     3    -1    -1     1     70.2      72.6         -12.6 1004.
## 4   20967  1961     4    -1    -1     1     70.2      72.6         -13.3 1006 
## 5   20967  1961     5    -1    -1     1     70.2      72.6          -8   1010.
## 6   20967  1961     6    -1    -1     1     70.2      72.6           0.6 1011.
## # ℹ 12 more variables: X11 <dbl>, X12 <dbl>, X13 <dbl>, X14 <dbl>, X15 <dbl>,
## #   X16 <dbl>, X17 <dbl>, X18 <dbl>, X19 <dbl>, X20 <dbl>, X21 <dbl>, X22 <chr>
# 6: Create the two time periods
# I am comparing 1961-1980 with 1981-2000.

weather_clean <- weather_clean %>%
  mutate(
    period = case_when(
      year >= 1961 & year <= 1980 ~ "1961-1980",
      year >= 1981 & year <= 2000 ~ "1981-2000",
      TRUE ~ "Other"
    )
  ) %>%
  filter(period != "Other")

table(weather_clean$period)
## 
## 1961-1980 1981-2000 
##       240       240
# 7: Calculate monthly summary statistics
# This gives the mean, median, and standard deviation of temperature for each month in each time period.

monthly_summary <- weather_clean %>%
  group_by(period, month) %>%
  summarise(
    mean_temp = mean(temperature_c, na.rm = TRUE),
    median_temp = median(temperature_c, na.rm = TRUE),
    sd_temp = sd(temperature_c, na.rm = TRUE),
    n = n(),
    .groups = "drop"
  )

monthly_summary
## # A tibble: 24 × 6
##    period    month mean_temp median_temp sd_temp     n
##    <chr>     <dbl>     <dbl>       <dbl>   <dbl> <int>
##  1 1961-1980     1    -26.7       -26.0     2.76    20
##  2 1961-1980     2    -26.6       -26.2     5.44    20
##  3 1961-1980     3    -22.4       -22.0     4.81    20
##  4 1961-1980     4    -16.1       -16.6     3.67    20
##  5 1961-1980     5     -7.43       -7.3     1.78    20
##  6 1961-1980     6      1.04        1       1.23    20
##  7 1961-1980     7      7.27        7.4     1.65    20
##  8 1961-1980     8      7.44        8       1.48    20
##  9 1961-1980     9      3.29        3.05    1.29    20
## 10 1961-1980    10     -6.69       -6.85    2.95    20
## # ℹ 14 more rows
# 8: Create graph
# This graph compares monthly mean air temperature between the two time periods.

ggplot(monthly_summary, aes(x = month, y = mean_temp, fill = period)) +
  geom_col(position = "dodge") +
  scale_x_continuous(
    breaks = 1:12,
    labels = c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec")
  ) +
  labs(
    title = "Monthly Mean Air Temperature by Time Period",
    subtitle = "Seyakha Russian Arctic Weather Station Data",
    x = "Month",
    y = "Mean Air Temperature (°C)",
    fill = "Time Period",
    caption = "Figure 1. This graph compares average monthly air temperature from 1961-1980 and 1981-2000."
  ) +
  theme_minimal()

# 9: Graph with standard deviation
# This graph shows monthly mean temperature with standard deviation bars.

ggplot(monthly_summary, aes(x = month, y = mean_temp, fill = period)) +
  geom_col(position = position_dodge(width = 0.9)) +
  geom_errorbar(
    aes(ymin = mean_temp - sd_temp, ymax = mean_temp + sd_temp),
    position = position_dodge(width = 0.9),
    width = 0.2
  ) +
  scale_x_continuous(
    breaks = 1:12,
    labels = c("Jan", "Feb", "Mar", "Apr", "May", "Jun",
               "Jul", "Aug", "Sep", "Oct", "Nov", "Dec")
  ) +
  labs(
    title = "Monthly Mean Air Temperature with Standard Deviation",
    subtitle = "Seyakha Russian Arctic Weather Station Data",
    x = "Month",
    y = "Mean Air Temperature (°C)",
    fill = "Time Period",
    caption = "Figure 2. This graph shows monthly mean air temperature with standard deviation bars for 1961-1980 and 1981-2000."
  ) +
  theme_minimal()