#install.packages(c("readr", "dplyr", "ggplot2", "tidyr"))
# Load packages
library(readr)
## Warning: package 'readr' was built under R version 4.4.3
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.4.3
library(tidyr)
## Warning: package 'tidyr' was built under R version 4.4.3
# Import Meteorology Data
met_raw <- read_table(
"../data/uni.barentsburg.20107.dat",
col_names = FALSE,
show_col_types = FALSE
)
head(met_raw)
## # A tibble: 6 × 22
## X1 X2 X3 X4 X5 X6 X7 X8 X9 X10 X11 X12 X13
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 20107 1961 1 -1 -1 1 78.1 14.2 -12.7 1013. 1000. 1000. 6
## 2 20107 1961 2 -1 -1 1 78.1 14.2 -17.2 1005. 1000. 1000. 6.4
## 3 20107 1961 3 -1 -1 1 78.1 14.2 -12.5 998. 1000. 1000. 8.2
## 4 20107 1961 4 -1 -1 1 78.1 14.2 -11.4 1013. 1000. 1000. 5.8
## 5 20107 1961 5 -1 -1 1 78.1 14.2 -3.8 1016. 1000. 1000. 8.4
## 6 20107 1961 6 -1 -1 1 78.1 14.2 2.7 1008. 1000. 1000. 8.4
## # ℹ 9 more variables: X14 <dbl>, X15 <dbl>, X16 <dbl>, X17 <dbl>, X18 <dbl>,
## # X19 <dbl>, X20 <dbl>, X21 <dbl>, X22 <chr>
# Only keep important columns
met_data <- met_raw %>%
select(
station = X1,
year = X2,
month = X3,
latitude = X7,
longitude = X8,
temp = X9,
station_name = X22
)
head(met_data)
## # A tibble: 6 × 7
## station year month latitude longitude temp station_name
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 20107 1961 1 78.1 14.2 -12.7 barentsburg
## 2 20107 1961 2 78.1 14.2 -17.2 barentsburg
## 3 20107 1961 3 78.1 14.2 -12.5 barentsburg
## 4 20107 1961 4 78.1 14.2 -11.4 barentsburg
## 5 20107 1961 5 78.1 14.2 -3.8 barentsburg
## 6 20107 1961 6 78.1 14.2 2.7 barentsburg
summary(met_data)
## station year month latitude longitude
## Min. :20107 Min. :1961 Min. : 1.00 Min. :78.07 Min. :14.25
## 1st Qu.:20107 1st Qu.:1971 1st Qu.: 3.75 1st Qu.:78.07 1st Qu.:14.25
## Median :20107 Median :1980 Median : 6.50 Median :78.07 Median :14.25
## Mean :20107 Mean :1980 Mean : 6.50 Mean :78.07 Mean :14.25
## 3rd Qu.:20107 3rd Qu.:1990 3rd Qu.: 9.25 3rd Qu.:78.07 3rd Qu.:14.25
## Max. :20107 Max. :2000 Max. :12.00 Max. :78.07 Max. :14.25
## temp station_name
## Min. : -23.70 Length:480
## 1st Qu.: -11.22 Class :character
## Median : -3.70 Mode :character
## Mean : 161.60
## 3rd Qu.: 4.80
## Max. : 999.99
# Clean temperature data
met_clean <- met_data %>%
mutate(
temp = ifelse(temp >= 999, NA, temp),
period = case_when(
year >= 1961 & year <= 1980 ~ "1961-1980",
year >= 1981 & year <= 2000 ~ "1981-2000"
),
month_name = factor(
month,
levels = 1:12,
labels = c("Jan", "Feb", "Mar", "Apr", "May", "Jun",
"Jul", "Aug", "Sep", "Oct", "Nov", "Dec")
)
)
summary(met_clean$temp)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -23.700 -12.725 -6.400 -6.083 0.900 7.700 80
sum(is.na(met_clean$temp))
## [1] 80
# Monthly Statistics Table
monthly_stats <- met_clean %>%
group_by(month_name) %>%
summarise(
overall_mean = mean(temp, na.rm = TRUE),
overall_median = median(temp, na.rm = TRUE),
early_mean = mean(temp[period == "1961-1980"], na.rm = TRUE),
early_median = median(temp[period == "1961-1980"], na.rm = TRUE),
late_mean = mean(temp[period == "1981-2000"], na.rm = TRUE),
late_median = median(temp[period == "1981-2000"], na.rm = TRUE),
sd_temp = sd(temp, na.rm = TRUE)
)
monthly_stats
## # A tibble: 12 × 8
## month_name overall_mean overall_median early_mean early_median late_mean
## <fct> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Jan -14.1 -14.6 -14.5 -15.0 -13.4
## 2 Feb -14.3 -15 -15.2 -15.2 -13.1
## 3 Mar -14.1 -13.8 -15.0 -15.1 -12.8
## 4 Apr -11.3 -11.3 -11.7 -11.6 -10.6
## 5 May -4.08 -3.8 -4.40 -3.9 -3.58
## 6 Jun 1.61 1.5 1.54 1.45 1.72
## 7 Jul 5.52 5.5 5.47 5.45 5.59
## 8 Aug 4.59 4.5 4.42 4.45 4.86
## 9 Sep 0.479 0.5 0.38 0.4 0.631
## 10 Oct -5.07 -4.9 -4.80 -4.1 -5.48
## 11 Nov -9.11 -8.5 -9.53 -9.6 -8.46
## 12 Dec -12.3 -12 -12.2 -11.8 -12.5
## # ℹ 2 more variables: late_median <dbl>, sd_temp <dbl>
# Plot 1: Monthly Mean Temperatures
ggplot(monthly_stats,
aes(x = month_name,
y = overall_mean)) +
geom_col(fill = "steelblue") +
geom_errorbar(
aes(
ymin = overall_mean - sd_temp,
ymax = overall_mean + sd_temp
),
width = 0.2
) +
labs(
title = "Mean Monthly Air Temperature at Barentsburg Station (1961–2000)",
x = "Month",
y = "Temperature (°C)"
) +
theme_minimal()
