# 1: Load the packages needed for the project
library(readr)
library(dplyr)
library(ggplot2)
# 2: Import the meteorology data file
# I chose the Seyakha station for this project.
weather_raw <- read_fwf(
file = "uni.seyakha.20967.dat",
col_positions = fwf_empty("uni.seyakha.20967.dat")
)
## Rows: 480 Columns: 22
## ── Column specification ────────────────────────────────────────────────────────
##
## chr (2): X9, X22
## dbl (20): X1, X2, X3, X4, X5, X6, X7, X8, X10, X11, X12, X13, X14, X15, X16,...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Look at the first few rows to check the data
head(weather_raw)
## # A tibble: 6 × 22
## X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 20967 1961 1 -1 -1 1 70.2 72.6 -25.4 1015. 1000. 1000. 6
## 2 20967 1961 2 -1 -1 1 70.2 72.6 -24.1 1007. 1000. 1000. 6.8
## 3 20967 1961 3 -1 -1 1 70.2 72.6 -12.6 1004. 1000. 1000. 7.6
## 4 20967 1961 4 -1 -1 1 70.2 72.6 -13.3 1006 1000. 1000. 6.6
## 5 20967 1961 5 -1 -1 1 70.2 72.6 -8.0 1010. 1000. 1000. 9
## 6 20967 1961 6 -1 -1 1 70.2 72.6 .6 1011. 1000. 1000. 8.7
## # ℹ 9 more variables: X14 <dbl>, X15 <dbl>, X16 <dbl>, X17 <dbl>, X18 <dbl>,
## # X19 <dbl>, X20 <dbl>, X21 <dbl>, X22 <chr>
# 3: Check the column names and structure
# This helps me figure out which columns contain year, month, latitude, longitude, and air temperature.
names(weather_raw)
## [1] "X1" "X2" "X3" "X4" "X5" "X6" "X7" "X8" "X9" "X10" "X11" "X12"
## [13] "X13" "X14" "X15" "X16" "X17" "X18" "X19" "X20" "X21" "X22"
head(weather_raw)
## # A tibble: 6 × 22
## X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 20967 1961 1 -1 -1 1 70.2 72.6 -25.4 1015. 1000. 1000. 6
## 2 20967 1961 2 -1 -1 1 70.2 72.6 -24.1 1007. 1000. 1000. 6.8
## 3 20967 1961 3 -1 -1 1 70.2 72.6 -12.6 1004. 1000. 1000. 7.6
## 4 20967 1961 4 -1 -1 1 70.2 72.6 -13.3 1006 1000. 1000. 6.6
## 5 20967 1961 5 -1 -1 1 70.2 72.6 -8.0 1010. 1000. 1000. 9
## 6 20967 1961 6 -1 -1 1 70.2 72.6 .6 1011. 1000. 1000. 8.7
## # ℹ 9 more variables: X14 <dbl>, X15 <dbl>, X16 <dbl>, X17 <dbl>, X18 <dbl>,
## # X19 <dbl>, X20 <dbl>, X21 <dbl>, X22 <chr>
# 4: Rename the important columns
# These are the columns I need for the project:
# station, year, month, day, latitude, longitude, and temperature.
weather <- weather_raw %>%
rename(
station = X1,
year = X2,
month = X3,
day = X4,
latitude = X7,
longitude = X8,
temperature_c = X9
) %>%
mutate(
temperature_c = as.numeric(temperature_c)
)
head(weather)
## # A tibble: 6 × 22
## station year month day X5 X6 latitude longitude temperature_c X10
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 20967 1961 1 -1 -1 1 70.2 72.6 -25.4 1015.
## 2 20967 1961 2 -1 -1 1 70.2 72.6 -24.1 1007.
## 3 20967 1961 3 -1 -1 1 70.2 72.6 -12.6 1004.
## 4 20967 1961 4 -1 -1 1 70.2 72.6 -13.3 1006
## 5 20967 1961 5 -1 -1 1 70.2 72.6 -8 1010.
## 6 20967 1961 6 -1 -1 1 70.2 72.6 0.6 1011.
## # ℹ 12 more variables: X11 <dbl>, X12 <dbl>, X13 <dbl>, X14 <dbl>, X15 <dbl>,
## # X16 <dbl>, X17 <dbl>, X18 <dbl>, X19 <dbl>, X20 <dbl>, X21 <dbl>, X22 <chr>
# 5: Clean the temperature data
# Missing temperature values are coded as 99.99 or 999.99 so I removed them from the data.
weather_clean <- weather %>%
filter(
temperature_c != 99.99,
temperature_c != 999.99,
!is.na(temperature_c)
)
head(weather_clean)
## # A tibble: 6 × 22
## station year month day X5 X6 latitude longitude temperature_c X10
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 20967 1961 1 -1 -1 1 70.2 72.6 -25.4 1015.
## 2 20967 1961 2 -1 -1 1 70.2 72.6 -24.1 1007.
## 3 20967 1961 3 -1 -1 1 70.2 72.6 -12.6 1004.
## 4 20967 1961 4 -1 -1 1 70.2 72.6 -13.3 1006
## 5 20967 1961 5 -1 -1 1 70.2 72.6 -8 1010.
## 6 20967 1961 6 -1 -1 1 70.2 72.6 0.6 1011.
## # ℹ 12 more variables: X11 <dbl>, X12 <dbl>, X13 <dbl>, X14 <dbl>, X15 <dbl>,
## # X16 <dbl>, X17 <dbl>, X18 <dbl>, X19 <dbl>, X20 <dbl>, X21 <dbl>, X22 <chr>
# 6: Create the two time periods
# I am comparing 1961-1980 with 1981-2000.
weather_clean <- weather_clean %>%
mutate(
period = case_when(
year >= 1961 & year <= 1980 ~ "1961-1980",
year >= 1981 & year <= 2000 ~ "1981-2000",
TRUE ~ "Other"
)
) %>%
filter(period != "Other")
table(weather_clean$period)
##
## 1961-1980 1981-2000
## 240 240
# 7: Calculate monthly summary statistics
# This gives the mean, median, and standard deviation of temperature for each month in each time period.
monthly_summary <- weather_clean %>%
group_by(period, month) %>%
summarise(
mean_temp = mean(temperature_c, na.rm = TRUE),
median_temp = median(temperature_c, na.rm = TRUE),
sd_temp = sd(temperature_c, na.rm = TRUE),
n = n(),
.groups = "drop"
)
monthly_summary
## # A tibble: 24 × 6
## period month mean_temp median_temp sd_temp n
## <chr> <dbl> <dbl> <dbl> <dbl> <int>
## 1 1961-1980 1 -26.7 -26.0 2.76 20
## 2 1961-1980 2 -26.6 -26.2 5.44 20
## 3 1961-1980 3 -22.4 -22.0 4.81 20
## 4 1961-1980 4 -16.1 -16.6 3.67 20
## 5 1961-1980 5 -7.43 -7.3 1.78 20
## 6 1961-1980 6 1.04 1 1.23 20
## 7 1961-1980 7 7.27 7.4 1.65 20
## 8 1961-1980 8 7.44 8 1.48 20
## 9 1961-1980 9 3.29 3.05 1.29 20
## 10 1961-1980 10 -6.69 -6.85 2.95 20
## # ℹ 14 more rows
# 8: Create graph
# This graph compares monthly mean air temperature between the two time periods.
ggplot(monthly_summary, aes(x = month, y = mean_temp, fill = period)) +
geom_col(position = "dodge") +
scale_x_continuous(
breaks = 1:12,
labels = c("Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec")
) +
labs(
title = "Monthly Mean Air Temperature by Time Period",
subtitle = "Seyakha Russian Arctic Weather Station Data",
x = "Month",
y = "Mean Air Temperature (°C)",
fill = "Time Period",
caption = "Figure 1. This graph compares average monthly air temperature from 1961-1980 and 1981-2000."
) +
theme_minimal()

# 9: Graph with standard deviation
# This graph shows monthly mean temperature with standard deviation bars.
ggplot(monthly_summary, aes(x = month, y = mean_temp, fill = period)) +
geom_col(position = position_dodge(width = 0.9)) +
geom_errorbar(
aes(ymin = mean_temp - sd_temp, ymax = mean_temp + sd_temp),
position = position_dodge(width = 0.9),
width = 0.2
) +
scale_x_continuous(
breaks = 1:12,
labels = c("Jan", "Feb", "Mar", "Apr", "May", "Jun",
"Jul", "Aug", "Sep", "Oct", "Nov", "Dec")
) +
labs(
title = "Monthly Mean Air Temperature with Standard Deviation",
subtitle = "Seyakha Russian Arctic Weather Station Data",
x = "Month",
y = "Mean Air Temperature (°C)",
fill = "Time Period",
caption = "Figure 2. This graph shows monthly mean air temperature with standard deviation bars for 1961-1980 and 1981-2000."
) +
theme_minimal()
