This project analyzes monthly air temperature data from the Cuprapca weather station and compares it with the Gyda weather station. The main goal is to examine seasonal temperature patterns from 1961–2000 and compare earlier years (1961–1980) with later years (1981–2000).
The data used in this project comes from the NSIDC meteorology dataset. The two stations analyzed are:
Both datasets include monthly temperature measurements, which were cleaned and organized in R for statistical analysis and visualization.
#Dowload data
url <- "https://noaadata.apps.nsidc.org/NOAA/G02141/uni.cuprapca.24768.dat"
download.file(url,destfile='data.dat',method='curl')
cuprapca <- read.table('data.dat',sep="",na.strings="999.99")
#Clean Data
cuprapca_clean <- cuprapca %>%
select(
year = V2,
month = V3,
latitude= V7,
longitude =V8,
temp = V9
)
cuprapca_clean <- cuprapca_clean %>%
filter(!is.na(temp))
The data was divided into three time periods:
For each month, I calculated the mean temperature, median temperature, and standard deviation.
#Divide data into time periods for analysis
early_data <- cuprapca_clean %>%
filter(year >= 1961 & year <= 1980)
late_data <- cuprapca_clean %>%
filter(year >= 1981 & year <= 2000)
full <- cuprapca_clean %>%
filter(year >= 1961 & year <= 2000)
#Calculate stats
early_stats <- early_data %>%
group_by(month) %>%
summarise(
early_mean = mean(temp),
early_median = median(temp)
)
late_stats <- late_data %>%
group_by(month) %>%
summarise(
late_mean = mean(temp),
late_median = median(temp)
)
full_stats <- full %>%
group_by(month) %>%
summarise(
full_mean = mean(temp),
full_median = median(temp),
sd = sd(temp)
)
#join all stats into a final table
final_table <- full_stats %>%
left_join(early_stats, by = "month") %>%
left_join(late_stats, by = "month")
knitr::kable(final_table, caption = "Table 1. Monthly temperature statistics for the Cuprapca station.")
| month | full_mean | full_median | sd | early_mean | early_median | late_mean | late_median |
|---|---|---|---|---|---|---|---|
| 1 | -42.2325 | -42.70 | 3.701863 | -42.760 | -43.45 | -41.705 | -42.00 |
| 2 | -36.7500 | -36.80 | 3.180066 | -38.385 | -38.40 | -35.115 | -35.30 |
| 3 | -23.0975 | -23.60 | 2.900176 | -23.940 | -24.10 | -22.255 | -23.05 |
| 4 | -6.8150 | -7.15 | 2.230965 | -7.610 | -7.75 | -6.020 | -6.00 |
| 5 | 6.2100 | 6.40 | 1.788109 | 5.705 | 5.25 | 6.715 | 7.10 |
| 6 | 14.9575 | 14.90 | 1.594524 | 14.625 | 14.60 | 15.290 | 15.10 |
| 7 | 17.9675 | 18.00 | 1.688025 | 18.055 | 18.00 | 17.880 | 17.90 |
| 8 | 13.8450 | 13.85 | 1.698106 | 14.045 | 14.00 | 13.645 | 13.80 |
| 9 | 5.1325 | 5.15 | 1.558827 | 5.065 | 5.20 | 5.200 | 5.15 |
| 10 | -9.7650 | -9.70 | 2.484367 | -9.805 | -9.60 | -9.725 | -9.80 |
| 11 | -31.1500 | -31.65 | 3.235381 | -31.450 | -32.20 | -30.850 | -30.50 |
| 12 | -40.8100 | -41.45 | 3.555696 | -41.125 | -41.05 | -40.495 | -41.60 |
#Montly mean temperature
bars <- full_stats
# Define ymin
ymin <- floor(min(bars$full_mean, na.rm = TRUE) / 10) * 10
# Shift values so all bars go up
bars_shift <- bars$full_mean - ymin
# Plot
barplot(bars_shift,
yaxt = "n",
names.arg = bars$month,
col = "steelblue",
main = "Monthly Mean Temperature",
xlab = "Month",
ylab = "Temp (°C)")
# Create y-axis labels in original temperature scale
ymax <- ceiling(max(bars$full_mean, na.rm = TRUE) / 10) * 10
yticks <- seq(0, ymax - ymin, by = 5)
axis(2, at = yticks, labels = yticks + ymin)
# Create matrix with mean and median
bars <- cbind(full_stats$full_mean, full_stats$full_median)
# Define ymin
ymin <- floor(min(bars, na.rm = TRUE) / 10) * 10
# Shift values so all bars go upward
bars_shift <- bars - ymin
# Plot side-by-side bars
barplot(t(bars_shift),
beside = TRUE,
yaxt = "n",
names.arg = full_stats$month,
col = c("steelblue", "orange"),
main = "Mean vs Median Temperature",
xlab = "Month",
ylab = "Temp (°C)")
# Create y-axis labels in original scale
ymax <- ceiling(max(bars, na.rm = TRUE) / 10) * 10
yticks <- seq(0, ymax - ymin, by = 5)
axis(2, at = yticks, labels = yticks + ymin)
# Add legend
legend("topright",
legend = c("Mean", "Median"),
fill = c("steelblue", "orange"))
Early “1961–1980” vs Late “1981–2000”
#Early vs Late plot
# Combine early and late means into a matrix
bars <- cbind(early_stats$early_mean, late_stats$late_mean)
# Define ymin
ymin <- floor(min(bars, na.rm = TRUE) / 10) * 10
# Shift values so all bars go upward
bars_shift <- bars - ymin
# Plot side-by-side bars
barplot(t(bars_shift),
beside = TRUE,
yaxt = "n",
names.arg = early_stats$month,
col = c("steelblue", "tomato"),
main = "Early vs Late Temperature Comparison",
xlab = "Month",
ylab = "Temp (°C)")
# Create y-axis labels in original scale
ymax <- ceiling(max(bars, na.rm = TRUE) / 10) * 10
yticks <- seq(0, ymax - ymin, by = 5)
axis(2, at = yticks, labels = yticks + ymin)
# Add legend
legend("topright",
legend = c("1961–1980", "1981–2000"),
fill = c("steelblue", "tomato"))
# Mean and sd plot
bars <- full_stats$full_mean
sd_vals <- full_stats$sd
# Define ymin
ymin <- floor(min(bars - sd_vals, na.rm = TRUE) / 10) * 10
# Shift values
bars_shift <- bars - ymin
# Plot bars
bp <- barplot(bars_shift,
yaxt = "n",
names.arg = full_stats$month,
col = "steelblue",
main = "Monthly Mean Temperature with SD",
xlab = "Month",
ylab = "Temp (°C)")
# Add error bars (SD)
arrows(x0 = bp,
y0 = (bars - sd_vals) - ymin,
x1 = bp,
y1 = (bars + sd_vals) - ymin,
angle = 90,
code = 3,
length = 0.05)
# Create y-axis labels in original scale
ymax <- ceiling(max(bars + sd_vals, na.rm = TRUE) / 10) * 10
yticks <- seq(0, ymax - ymin, by = 5)
axis(2, at = yticks, labels = yticks + ymin)
# t-test comparing monthly means
t_test <- t.test(
early_stats$early_mean,
late_stats$late_mean,
paired = TRUE
)
print(t_test)
##
## Paired t-test
##
## data: early_stats$early_mean and late_stats$late_mean
## t = -2.9245, df = 11, p-value = 0.01383
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## -1.4816802 -0.2091531
## sample estimates:
## mean difference
## -0.8454167
#download data
url <- "https://noaadata.apps.nsidc.org/NOAA/G02141/uni.gyda.20963.dat"
download.file(url,destfile = "gyda.dat",method = "curl")
gyda <- read.table("gyda.dat",sep = "",na.strings = "999.99")
#Clean Data
gyda_clean <- gyda %>%
select(
year = V2,
month = V3,
latitude = V7,
longitude = V8,
temp = V9
)
gyda_clean <- gyda_clean %>%
filter(!is.na(temp))
#Divide into time periods for analysis
early_data_gyda <- gyda_clean %>%
filter(year >= 1961 & year <= 1980)
late_data_gyda <- gyda_clean %>%
filter(year >= 1981 & year <= 2000)
full_gyda <- gyda_clean %>%
filter(year >= 1961 & year <= 2000)
#data stats
early_stats_gyda <- early_data_gyda %>%
group_by(month) %>%
summarise(
early_mean = mean(temp),
early_median = median(temp)
)
late_stats_gyda <- late_data_gyda %>%
group_by(month) %>%
summarise(
late_mean = mean(temp),
late_median = median(temp)
)
full_stats_gyda <- full_gyda %>%
group_by(month) %>%
summarise(
full_mean = mean(temp),
full_median = median(temp),
sd = sd(temp)
)
#join all stats into a final table
final_table_gyda <- full_stats_gyda %>%
left_join(early_stats_gyda, by = "month") %>%
left_join(late_stats_gyda, by = "month")
knitr::kable(final_table_gyda, caption = "Table 2. Monthly temperature statistics for the Gyda station.")
| month | full_mean | full_median | sd | early_mean | early_median | late_mean | late_median |
|---|---|---|---|---|---|---|---|
| 1 | -28.271053 | -28.45 | 4.338384 | -29.735 | -29.35 | -26.644444 | -26.75 |
| 2 | -28.302632 | -28.20 | 5.101271 | -28.910 | -28.50 | -27.627778 | -27.45 |
| 3 | -23.173684 | -22.15 | 4.276313 | -24.360 | -24.70 | -21.855556 | -20.80 |
| 4 | -17.413158 | -18.35 | 4.325548 | -17.485 | -17.60 | -17.333333 | -18.85 |
| 5 | -7.765790 | -7.85 | 1.864356 | -8.455 | -8.20 | -7.000000 | -6.90 |
| 6 | 2.044737 | 1.85 | 1.907694 | 1.500 | 1.65 | 2.650000 | 2.15 |
| 7 | 10.297368 | 10.40 | 2.174979 | 9.810 | 9.45 | 10.838889 | 10.65 |
| 8 | 8.476316 | 8.35 | 1.403169 | 8.315 | 7.95 | 8.655556 | 8.55 |
| 9 | 2.852632 | 2.50 | 1.472218 | 2.540 | 2.30 | 3.200000 | 3.10 |
| 10 | -8.600000 | -8.40 | 3.484948 | -9.050 | -8.80 | -8.100000 | -7.95 |
| 11 | -19.615790 | -20.10 | 5.605669 | -19.915 | -19.65 | -19.283333 | -20.15 |
| 12 | -24.707895 | -24.65 | 4.332491 | -24.560 | -25.30 | -24.872222 | -24.20 |
# Use full_stats directly
bars <- full_stats_gyda
# Define ymin
ymin <- floor(min(bars$full_mean, na.rm = TRUE) / 10) * 10
# Shift values so all bars go upward
bars_shift <- bars$full_mean - ymin
# Plot
barplot(bars_shift,
yaxt = "n",
names.arg = bars$month,
col = "mediumpurple2",
main = "Gyda Monthly Mean Temperature",
xlab = "Month",
ylab = "Temp (°C)")
# Create y-axis labels in original temperature scale
ymax <- ceiling(max(bars$full_mean, na.rm = TRUE) / 10) * 10
yticks <- seq(0, ymax - ymin, by = 5)
axis(2, at = yticks, labels = yticks + ymin)
## Plot 6: Mean vs. Median Temperature
# Create matrix with mean and median
bars <- cbind(full_stats_gyda$full_mean,
full_stats_gyda$full_median)
# Define ymin using BOTH columns
ymin <- floor(min(bars, na.rm = TRUE) / 10) * 10
# Shift values so all bars go upward
bars_shift <- bars - ymin
# Plot side-by-side bars
barplot(t(bars_shift),
beside = TRUE,
yaxt = "n",
names.arg = full_stats_gyda$month,
col = c("mediumpurple2", "goldenrod2"),
main = "Gyda Mean vs Median Temperature",
xlab = "Month",
ylab = "Temp (°C)")
# Create y-axis labels in original scale
ymax <- ceiling(max(bars, na.rm = TRUE) / 10) * 10
yticks <- seq(0, ymax - ymin, by = 5)
axis(2, at = yticks, labels = yticks + ymin)
# Add legend
legend("topright",
legend = c("Mean", "Median"),
fill = c("mediumpurple2", "goldenrod2"))
Early include the years (1961 - 1980) Late include the years (1981 - 2000)
# Combine early and late means into a matrix
bars <- cbind(early_stats_gyda$early_mean,
late_stats_gyda$late_mean)
# Define ymin using BOTH datasets
ymin <- floor(min(bars, na.rm = TRUE) / 10) * 10
# Shift values so all bars go upward
bars_shift <- bars - ymin
# Plot side-by-side bars
barplot(t(bars_shift),
beside = TRUE,
yaxt = "n",
names.arg = early_stats_gyda$month,
col = c("mediumpurple2", "darkturquoise"),
main = "Gyda Early vs Late Temperature Comparison",
xlab = "Month",
ylab = "Temp (°C)")
# Create y-axis labels in original temperature scale
ymax <- ceiling(max(bars, na.rm = TRUE) / 10) * 10
yticks <- seq(0, ymax - ymin, by = 5)
axis(2, at = yticks, labels = yticks + ymin)
# Add legend
legend("topright",
legend = c("1961–1980", "1981–2000"),
fill = c("mediumpurple2", "darkturquoise"))
# Use mean and sd
bars <- full_stats_gyda$full_mean
sd_vals <- full_stats_gyda$sd
# Define ymin
ymin <- floor(min(bars - sd_vals, na.rm = TRUE) / 10) * 10
# Shift values
bars_shift <- bars - ymin
# Plot bars
bp <- barplot(bars_shift,
yaxt = "n",
names.arg = full_stats_gyda$month,
col = "mediumpurple2",
main = "Gyda Monthly Mean Temperature with SD",
xlab = "Month",
ylab = "Temp (°C)")
# Add error bars (SD)
arrows(x0 = bp,
y0 = (bars - sd_vals) - ymin,
x1 = bp,
y1 = (bars + sd_vals) - ymin,
angle = 90,
code = 3,
length = 0.05)
# Create y-axis labels in original temperature scale
ymax <- ceiling(max(bars + sd_vals, na.rm = TRUE) / 10) * 10
yticks <- seq(0, ymax - ymin, by = 5)
axis(2, at = yticks, labels = yticks + ymin)
# Paired t-test comparing monthly means
t_test_gyda <- t.test(
early_stats_gyda$early_mean,
late_stats_gyda$late_mean,
paired = TRUE
)
print(t_test_gyda)
##
## Paired t-test
##
## data: early_stats_gyda$early_mean and late_stats_gyda$late_mean
## t = -3.9153, df = 11, p-value = 0.002412
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## -1.6835785 -0.4718845
## sample estimates:
## mean difference
## -1.077731
# Add station names
cuprapca_compare <- cuprapca_clean %>%
mutate(station = "Cuprapca")
gyda_compare <- gyda_clean %>%
mutate(station = "Gyda")
# Combine both stations
both_stations <- bind_rows(cuprapca_compare, gyda_compare)
boxplot(temp ~ station,
data = both_stations,
col = c("steelblue", "mediumpurple2"),
main = "Temperature Comparison Between Stations",
xlab = "Station",
ylab = "Temperature (°C)",
ylim = c(-60, 30))
# Define colors for stations
station_colors <- c("steelblue", "mediumpurple2")
# Side-by-side monthly boxplots
boxplot(temp ~ interaction(month, station),
data = both_stations,
col = rep(station_colors, 12),
xaxt = "n",
main = "Monthly Temperature Comparison by Station",
xlab = "Month",
ylab = "Temperature (°C)")
# Add month labels
axis(1,
at = seq(1.5, 24, by = 2),
labels = 1:12)
# Add legend
legend("topright",
legend = c("Cuprapca", "Gyda"),
fill = station_colors)
station_monthly_means <- both_stations %>%
group_by(station, month) %>%
summarise(
mean_temp = mean(temp, na.rm = TRUE),
median_temp = median(temp, na.rm = TRUE),
sd_temp = sd(temp, na.rm = TRUE),
.groups = "drop"
)
knitr::kable(station_monthly_means, caption = "Table 3. Monthly mean, median, and standard deviation temperatures for Cuprapca and Gyda.")
| station | month | mean_temp | median_temp | sd_temp |
|---|---|---|---|---|
| Cuprapca | 1 | -42.232500 | -42.70 | 3.701863 |
| Cuprapca | 2 | -36.750000 | -36.80 | 3.180066 |
| Cuprapca | 3 | -23.097500 | -23.60 | 2.900176 |
| Cuprapca | 4 | -6.815000 | -7.15 | 2.230965 |
| Cuprapca | 5 | 6.210000 | 6.40 | 1.788109 |
| Cuprapca | 6 | 14.957500 | 14.90 | 1.594524 |
| Cuprapca | 7 | 17.967500 | 18.00 | 1.688025 |
| Cuprapca | 8 | 13.845000 | 13.85 | 1.698106 |
| Cuprapca | 9 | 5.132500 | 5.15 | 1.558827 |
| Cuprapca | 10 | -9.765000 | -9.70 | 2.484367 |
| Cuprapca | 11 | -31.150000 | -31.65 | 3.235381 |
| Cuprapca | 12 | -40.810000 | -41.45 | 3.555696 |
| Gyda | 1 | -28.271053 | -28.45 | 4.338384 |
| Gyda | 2 | -28.302632 | -28.20 | 5.101271 |
| Gyda | 3 | -23.173684 | -22.15 | 4.276313 |
| Gyda | 4 | -17.413158 | -18.35 | 4.325548 |
| Gyda | 5 | -7.765790 | -7.85 | 1.864356 |
| Gyda | 6 | 2.044737 | 1.85 | 1.907694 |
| Gyda | 7 | 10.297368 | 10.40 | 2.174979 |
| Gyda | 8 | 8.476316 | 8.35 | 1.403169 |
| Gyda | 9 | 2.852632 | 2.50 | 1.472218 |
| Gyda | 10 | -8.600000 | -8.40 | 3.484948 |
| Gyda | 11 | -19.615790 | -20.10 | 5.605669 |
| Gyda | 12 | -24.707895 | -24.65 | 4.332491 |
monthly_t_tests <- both_stations %>%
group_by(month) %>%
summarise(
p_value = t.test(temp ~ station)$p.value,
cuprapca_mean = mean(temp[station == "Cuprapca"], na.rm = TRUE),
gyda_mean = mean(temp[station == "Gyda"], na.rm = TRUE)
)
knitr::kable (monthly_t_tests, caption = "Table 4. Montly t tests for Cuprapca and Gyda.")
| month | p_value | cuprapca_mean | gyda_mean |
|---|---|---|---|
| 1 | 0.0000000 | -42.2325 | -28.271053 |
| 2 | 0.0000000 | -36.7500 | -28.302632 |
| 3 | 0.9272871 | -23.0975 | -23.173684 |
| 4 | 0.0000000 | -6.8150 | -17.413158 |
| 5 | 0.0000000 | 6.2100 | -7.765790 |
| 6 | 0.0000000 | 14.9575 | 2.044737 |
| 7 | 0.0000000 | 17.9675 | 10.297368 |
| 8 | 0.0000000 | 13.8450 | 8.476316 |
| 9 | 0.0000000 | 5.1325 | 2.852632 |
| 10 | 0.0952586 | -9.7650 | -8.600000 |
| 11 | 0.0000000 | -31.1500 | -19.615790 |
| 12 | 0.0000000 | -40.8100 | -24.707895 |