This project analyzes monthly air temperature data from the Cuprapca weather station and compares it with the Gyda weather station. The main goal is to examine seasonal temperature patterns from 1961–2000 and compare earlier years (1961–1980) with later years (1981–2000).
The data used in this project comes from the NSIDC meteorology dataset. The two stations analyzed are:
Both datasets include monthly temperature measurements, which were cleaned and organized in R for statistical analysis and visualization.
#Dowload data
url <- "https://noaadata.apps.nsidc.org/NOAA/G02141/uni.cuprapca.24768.dat"
download.file(url,destfile='data.dat',method='curl')
cuprapca <- read.table('data.dat',sep="",na.strings="999.99")
#Clean Data
cuprapca_clean <- cuprapca %>%
select(
year = V2,
month = V3,
latitude= V7,
longitude =V8,
temp = V9
)
cuprapca_clean <- cuprapca_clean %>%
filter(!is.na(temp))
The data was divided into three time periods:
For each month, I calculated the mean temperature, median temperature, and standard deviation.
#Divide data into time periods for analysis
early_data <- cuprapca_clean %>%
filter(year >= 1961 & year <= 1980)
late_data <- cuprapca_clean %>%
filter(year >= 1981 & year <= 2000)
full <- cuprapca_clean %>%
filter(year >= 1961 & year <= 2000)
#Calculate stats
early_stats <- early_data %>%
group_by(month) %>%
summarise(
early_mean = mean(temp),
early_median = median(temp)
)
late_stats <- late_data %>%
group_by(month) %>%
summarise(
late_mean = mean(temp),
late_median = median(temp)
)
full_stats <- full %>%
group_by(month) %>%
summarise(
full_mean = mean(temp),
full_median = median(temp),
sd = sd(temp)
)
#join all stats into a final table
final_table <- full_stats %>%
left_join(early_stats, by = "month") %>%
left_join(late_stats, by = "month")
knitr::kable(final_table, caption = "Table 1. Monthly temperature statistics for the Cuprapca station.")
| month | full_mean | full_median | sd | early_mean | early_median | late_mean | late_median |
|---|---|---|---|---|---|---|---|
| 1 | -42.2325 | -42.70 | 3.701863 | -42.760 | -43.45 | -41.705 | -42.00 |
| 2 | -36.7500 | -36.80 | 3.180066 | -38.385 | -38.40 | -35.115 | -35.30 |
| 3 | -23.0975 | -23.60 | 2.900176 | -23.940 | -24.10 | -22.255 | -23.05 |
| 4 | -6.8150 | -7.15 | 2.230965 | -7.610 | -7.75 | -6.020 | -6.00 |
| 5 | 6.2100 | 6.40 | 1.788109 | 5.705 | 5.25 | 6.715 | 7.10 |
| 6 | 14.9575 | 14.90 | 1.594524 | 14.625 | 14.60 | 15.290 | 15.10 |
| 7 | 17.9675 | 18.00 | 1.688025 | 18.055 | 18.00 | 17.880 | 17.90 |
| 8 | 13.8450 | 13.85 | 1.698106 | 14.045 | 14.00 | 13.645 | 13.80 |
| 9 | 5.1325 | 5.15 | 1.558827 | 5.065 | 5.20 | 5.200 | 5.15 |
| 10 | -9.7650 | -9.70 | 2.484367 | -9.805 | -9.60 | -9.725 | -9.80 |
| 11 | -31.1500 | -31.65 | 3.235381 | -31.450 | -32.20 | -30.850 | -30.50 |
| 12 | -40.8100 | -41.45 | 3.555696 | -41.125 | -41.05 | -40.495 | -41.60 |
#Montly mean temperature
bars <- full_stats
# Define ymin
ymin <- floor(min(bars$full_mean, na.rm = TRUE) / 10) * 10
# Shift values so all bars go up
bars_shift <- bars$full_mean - ymin
# Plot
barplot(bars_shift,
yaxt = "n",
names.arg = bars$month,
col = "steelblue",
main = "Monthly Mean Temperature",
xlab = "Month",
ylab = "Temp (°C)")
# Create y-axis labels in original temperature scale
ymax <- ceiling(max(bars$full_mean, na.rm = TRUE) / 10) * 10
yticks <- seq(0, ymax - ymin, by = 5)
axis(2, at = yticks, labels = yticks + ymin)
Plot 1. Monthly Mean Temperature at Cuprapca
Station.
This bar plot shows the average monthly temperature at the Cuprapca
station from 1961–2000. The pattern shows strong seasonality, with the
coldest temperatures occurring in winter months and the warmest
temperatures occurring during summer.
# Create matrix with mean and median
bars <- cbind(full_stats$full_mean, full_stats$full_median)
# Define ymin
ymin <- floor(min(bars, na.rm = TRUE) / 10) * 10
# Shift values so all bars go upward
bars_shift <- bars - ymin
# Plot side-by-side bars
barplot(t(bars_shift),
beside = TRUE,
yaxt = "n",
names.arg = full_stats$month,
col = c("steelblue", "orange"),
main = "Mean vs Median Temperature",
xlab = "Month",
ylab = "Temp (°C)")
# Create y-axis labels in original scale
ymax <- ceiling(max(bars, na.rm = TRUE) / 10) * 10
yticks <- seq(0, ymax - ymin, by = 5)
axis(2, at = yticks, labels = yticks + ymin)
# Add legend
legend("topright",
legend = c("Mean", "Median"),
fill = c("steelblue", "orange"))
Plot 2. Mean vs. Median Monthly Temperature at Cuprapca
Station.
This plot compares the mean and median temperature for each month.
Similar mean and median values suggest that the monthly temperature data
is fairly balanced, while larger differences may indicate months with
more extreme values.
Early “1961–1980” vs Late “1981–2000”
#Early vs Late plot
# Combine early and late means into a matrix
bars <- cbind(early_stats$early_mean, late_stats$late_mean)
# Define ymin
ymin <- floor(min(bars, na.rm = TRUE) / 10) * 10
# Shift values so all bars go upward
bars_shift <- bars - ymin
# Plot side-by-side bars
barplot(t(bars_shift),
beside = TRUE,
yaxt = "n",
names.arg = early_stats$month,
col = c("steelblue", "tomato"),
main = "Early vs Late Temperature Comparison",
xlab = "Month",
ylab = "Temp (°C)")
# Create y-axis labels in original scale
ymax <- ceiling(max(bars, na.rm = TRUE) / 10) * 10
yticks <- seq(0, ymax - ymin, by = 5)
axis(2, at = yticks, labels = yticks + ymin)
# Add legend
legend("topright",
legend = c("1961–1980", "1981–2000"),
fill = c("steelblue", "tomato"))
Plot 3. Early vs. Late Monthly Temperature Comparison at
Cuprapca Station.
This figure compares monthly mean temperatures between 1961–1980 and
1981–2000. The side-by-side bars make it possible to see whether the
later period was warmer or cooler for each month.
# Mean and sd plot
bars <- full_stats$full_mean
sd_vals <- full_stats$sd
# Define ymin
ymin <- floor(min(bars - sd_vals, na.rm = TRUE) / 10) * 10
# Shift values
bars_shift <- bars - ymin
# Plot bars
bp <- barplot(bars_shift,
yaxt = "n",
names.arg = full_stats$month,
col = "steelblue",
main = "Monthly Mean Temperature with SD",
xlab = "Month",
ylab = "Temp (°C)")
# Add error bars (SD)
arrows(x0 = bp,
y0 = (bars - sd_vals) - ymin,
x1 = bp,
y1 = (bars + sd_vals) - ymin,
angle = 90,
code = 3,
length = 0.05)
# Create y-axis labels in original scale
ymax <- ceiling(max(bars + sd_vals, na.rm = TRUE) / 10) * 10
yticks <- seq(0, ymax - ymin, by = 5)
axis(2, at = yticks, labels = yticks + ymin)
Plot 4. Monthly Mean Temperature with Standard Deviation at
Cuprapca Station.
This plot shows monthly mean temperatures with standard deviation bars.
The standard deviation represents how much temperature varied within
each month across the full study period.
# t-test comparing monthly means
t_test <- t.test(
early_stats$early_mean,
late_stats$late_mean,
paired = TRUE
)
print(t_test)
##
## Paired t-test
##
## data: early_stats$early_mean and late_stats$late_mean
## t = -2.9245, df = 11, p-value = 0.01383
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## -1.4816802 -0.2091531
## sample estimates:
## mean difference
## -0.8454167
A paired t-test was used to compare the monthly mean temperatures from the early period (1961–1980) and the late period (1981–2000). This test was chosen because each month in the early period is compared directly with the same month in the later period.
The p-value from the test was used to determine whether the difference between the two periods was statistically significant. A p-value below 0.05 would suggest that the temperature difference between the two periods is statistically significant.
#download data
url <- "https://noaadata.apps.nsidc.org/NOAA/G02141/uni.gyda.20963.dat"
download.file(url,destfile = "gyda.dat",method = "curl")
gyda <- read.table("gyda.dat",sep = "",na.strings = "999.99")
#Clean Data
gyda_clean <- gyda %>%
select(
year = V2,
month = V3,
latitude = V7,
longitude = V8,
temp = V9
)
gyda_clean <- gyda_clean %>%
filter(!is.na(temp))
#Divide into time periods for analysis
early_data_gyda <- gyda_clean %>%
filter(year >= 1961 & year <= 1980)
late_data_gyda <- gyda_clean %>%
filter(year >= 1981 & year <= 2000)
full_gyda <- gyda_clean %>%
filter(year >= 1961 & year <= 2000)
#data stats
early_stats_gyda <- early_data_gyda %>%
group_by(month) %>%
summarise(
early_mean = mean(temp),
early_median = median(temp)
)
late_stats_gyda <- late_data_gyda %>%
group_by(month) %>%
summarise(
late_mean = mean(temp),
late_median = median(temp)
)
full_stats_gyda <- full_gyda %>%
group_by(month) %>%
summarise(
full_mean = mean(temp),
full_median = median(temp),
sd = sd(temp)
)
#join all stats into a final table
final_table_gyda <- full_stats_gyda %>%
left_join(early_stats_gyda, by = "month") %>%
left_join(late_stats_gyda, by = "month")
knitr::kable(final_table_gyda, caption = "Table 2. Monthly temperature statistics for the Gyda station.")
| month | full_mean | full_median | sd | early_mean | early_median | late_mean | late_median |
|---|---|---|---|---|---|---|---|
| 1 | -28.271053 | -28.45 | 4.338384 | -29.735 | -29.35 | -26.644444 | -26.75 |
| 2 | -28.302632 | -28.20 | 5.101271 | -28.910 | -28.50 | -27.627778 | -27.45 |
| 3 | -23.173684 | -22.15 | 4.276313 | -24.360 | -24.70 | -21.855556 | -20.80 |
| 4 | -17.413158 | -18.35 | 4.325548 | -17.485 | -17.60 | -17.333333 | -18.85 |
| 5 | -7.765790 | -7.85 | 1.864356 | -8.455 | -8.20 | -7.000000 | -6.90 |
| 6 | 2.044737 | 1.85 | 1.907694 | 1.500 | 1.65 | 2.650000 | 2.15 |
| 7 | 10.297368 | 10.40 | 2.174979 | 9.810 | 9.45 | 10.838889 | 10.65 |
| 8 | 8.476316 | 8.35 | 1.403169 | 8.315 | 7.95 | 8.655556 | 8.55 |
| 9 | 2.852632 | 2.50 | 1.472218 | 2.540 | 2.30 | 3.200000 | 3.10 |
| 10 | -8.600000 | -8.40 | 3.484948 | -9.050 | -8.80 | -8.100000 | -7.95 |
| 11 | -19.615790 | -20.10 | 5.605669 | -19.915 | -19.65 | -19.283333 | -20.15 |
| 12 | -24.707895 | -24.65 | 4.332491 | -24.560 | -25.30 | -24.872222 | -24.20 |
# Use full_stats directly
bars <- full_stats_gyda
# Define ymin
ymin <- floor(min(bars$full_mean, na.rm = TRUE) / 10) * 10
# Shift values so all bars go upward
bars_shift <- bars$full_mean - ymin
# Plot
barplot(bars_shift,
yaxt = "n",
names.arg = bars$month,
col = "mediumpurple2",
main = "Gyda Monthly Mean Temperature",
xlab = "Month",
ylab = "Temp (°C)")
# Create y-axis labels in original temperature scale
ymax <- ceiling(max(bars$full_mean, na.rm = TRUE) / 10) * 10
yticks <- seq(0, ymax - ymin, by = 5)
axis(2, at = yticks, labels = yticks + ymin)
Plot 5. Monthly Mean Temperature at Gyda
Station.
This bar plot shows the average monthly temperature at the Gyda station
from 1961–2000. The graph demonstrates clear seasonal temperature
variation, with the coldest temperatures occurring during winter months
and warmer temperatures occurring during summer months.
# Create matrix with mean and median
bars <- cbind(full_stats_gyda$full_mean,
full_stats_gyda$full_median)
# Define ymin using BOTH columns
ymin <- floor(min(bars, na.rm = TRUE) / 10) * 10
# Shift values so all bars go upward
bars_shift <- bars - ymin
# Plot side-by-side bars
barplot(t(bars_shift),
beside = TRUE,
yaxt = "n",
names.arg = full_stats_gyda$month,
col = c("mediumpurple2", "goldenrod2"),
main = "Gyda Mean vs Median Temperature",
xlab = "Month",
ylab = "Temp (°C)")
# Create y-axis labels in original scale
ymax <- ceiling(max(bars, na.rm = TRUE) / 10) * 10
yticks <- seq(0, ymax - ymin, by = 5)
axis(2, at = yticks, labels = yticks + ymin)
# Add legend
legend("topright",
legend = c("Mean", "Median"),
fill = c("mediumpurple2", "goldenrod2"))
Figure 6. Mean vs. Median Monthly Temperature at Gyda
Station.
This figure compares the monthly mean and median temperatures at the
Gyda station. The similarity between the mean and median values for most
months suggests relatively consistent temperature distributions, while
larger differences may indicate months with more variability or extreme
temperature values.
Early include the years (1961 - 1980) Late include the years (1981 - 2000)
# Combine early and late means into a matrix
bars <- cbind(early_stats_gyda$early_mean,
late_stats_gyda$late_mean)
# Define ymin using BOTH datasets
ymin <- floor(min(bars, na.rm = TRUE) / 10) * 10
# Shift values so all bars go upward
bars_shift <- bars - ymin
# Plot side-by-side bars
barplot(t(bars_shift),
beside = TRUE,
yaxt = "n",
names.arg = early_stats_gyda$month,
col = c("mediumpurple2", "darkturquoise"),
main = "Gyda Early vs Late Temperature Comparison",
xlab = "Month",
ylab = "Temp (°C)")
# Create y-axis labels in original temperature scale
ymax <- ceiling(max(bars, na.rm = TRUE) / 10) * 10
yticks <- seq(0, ymax - ymin, by = 5)
axis(2, at = yticks, labels = yticks + ymin)
# Add legend
legend("topright",
legend = c("1961–1980", "1981–2000"),
fill = c("mediumpurple2", "darkturquoise"))
Plot 7. Early vs. Late Monthly Temperature Comparison at Gyda
Station.
This plot compares monthly mean temperatures between the earlier period
(1961–1980) and the later period (1981–2000) at the Gyda station. The
side-by-side bars make it possible to identify changes in temperature
patterns over time and evaluate whether the later period experienced
warming.
# Use mean and sd
bars <- full_stats_gyda$full_mean
sd_vals <- full_stats_gyda$sd
# Define ymin
ymin <- floor(min(bars - sd_vals, na.rm = TRUE) / 10) * 10
# Shift values
bars_shift <- bars - ymin
# Plot bars
bp <- barplot(bars_shift,
yaxt = "n",
names.arg = full_stats_gyda$month,
col = "mediumpurple2",
main = "Gyda Monthly Mean Temperature with SD",
xlab = "Month",
ylab = "Temp (°C)")
# Add error bars (SD)
arrows(x0 = bp,
y0 = (bars - sd_vals) - ymin,
x1 = bp,
y1 = (bars + sd_vals) - ymin,
angle = 90,
code = 3,
length = 0.05)
# Create y-axis labels in original temperature scale
ymax <- ceiling(max(bars + sd_vals, na.rm = TRUE) / 10) * 10
yticks <- seq(0, ymax - ymin, by = 5)
axis(2, at = yticks, labels = yticks + ymin)
Plot 8. Monthly Mean Temperature with Standard Deviation at
Gyda Station.
This visualization displays the monthly mean temperatures at the Gyda
station along with standard deviation error bars. The error bars
represent variability in temperature observations for each month
throughout the study period.
# Paired t-test comparing monthly means
t_test_gyda <- t.test(
early_stats_gyda$early_mean,
late_stats_gyda$late_mean,
paired = TRUE
)
print(t_test_gyda)
##
## Paired t-test
##
## data: early_stats_gyda$early_mean and late_stats_gyda$late_mean
## t = -3.9153, df = 11, p-value = 0.002412
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## -1.6835785 -0.4718845
## sample estimates:
## mean difference
## -1.077731
# Add station names
cuprapca_compare <- cuprapca_clean %>%
mutate(station = "Cuprapca")
gyda_compare <- gyda_clean %>%
mutate(station = "Gyda")
# Combine both stations
both_stations <- bind_rows(cuprapca_compare, gyda_compare)
boxplot(temp ~ station,
data = both_stations,
col = c("steelblue", "mediumpurple2"),
main = "Temperature Comparison Between Stations",
xlab = "Station",
ylab = "Temperature (°C)",
ylim = c(-60, 30))
Plot 9. Temperature Distribution Comparison Between Cuprapca
and Gyda.
This boxplot compares the overall temperature distribution between the
two stations. Differences in the center and spread of the boxes help
show whether one station generally experienced warmer or colder
conditions.
# Define colors for stations
station_colors <- c("steelblue", "mediumpurple2")
# Side-by-side monthly boxplots
boxplot(temp ~ interaction(month, station),
data = both_stations,
col = rep(station_colors, 12),
xaxt = "n",
main = "Monthly Temperature Comparison by Station",
xlab = "Month",
ylab = "Temperature (°C)")
# Add month labels
axis(1,
at = seq(1.5, 24, by = 2),
labels = 1:12)
# Add legend
legend("topright",
legend = c("Cuprapca", "Gyda"),
fill = station_colors)
Plot 10. Monthly Temperature Comparison Between Cuprapca and
Gyda.
This figure compares temperatures by month for both stations. Each month
includes boxplots for Cuprapca and Gyda, allowing seasonal differences
between the two stations to be compared more clearly.
station_monthly_means <- both_stations %>%
group_by(station, month) %>%
summarise(
mean_temp = mean(temp, na.rm = TRUE),
median_temp = median(temp, na.rm = TRUE),
sd_temp = sd(temp, na.rm = TRUE),
.groups = "drop"
)
knitr::kable(station_monthly_means, caption = "Table 3. Monthly mean, median, and standard deviation temperatures for Cuprapca and Gyda.")
| station | month | mean_temp | median_temp | sd_temp |
|---|---|---|---|---|
| Cuprapca | 1 | -42.232500 | -42.70 | 3.701863 |
| Cuprapca | 2 | -36.750000 | -36.80 | 3.180066 |
| Cuprapca | 3 | -23.097500 | -23.60 | 2.900176 |
| Cuprapca | 4 | -6.815000 | -7.15 | 2.230965 |
| Cuprapca | 5 | 6.210000 | 6.40 | 1.788109 |
| Cuprapca | 6 | 14.957500 | 14.90 | 1.594524 |
| Cuprapca | 7 | 17.967500 | 18.00 | 1.688025 |
| Cuprapca | 8 | 13.845000 | 13.85 | 1.698106 |
| Cuprapca | 9 | 5.132500 | 5.15 | 1.558827 |
| Cuprapca | 10 | -9.765000 | -9.70 | 2.484367 |
| Cuprapca | 11 | -31.150000 | -31.65 | 3.235381 |
| Cuprapca | 12 | -40.810000 | -41.45 | 3.555696 |
| Gyda | 1 | -28.271053 | -28.45 | 4.338384 |
| Gyda | 2 | -28.302632 | -28.20 | 5.101271 |
| Gyda | 3 | -23.173684 | -22.15 | 4.276313 |
| Gyda | 4 | -17.413158 | -18.35 | 4.325548 |
| Gyda | 5 | -7.765790 | -7.85 | 1.864356 |
| Gyda | 6 | 2.044737 | 1.85 | 1.907694 |
| Gyda | 7 | 10.297368 | 10.40 | 2.174979 |
| Gyda | 8 | 8.476316 | 8.35 | 1.403169 |
| Gyda | 9 | 2.852632 | 2.50 | 1.472218 |
| Gyda | 10 | -8.600000 | -8.40 | 3.484948 |
| Gyda | 11 | -19.615790 | -20.10 | 5.605669 |
| Gyda | 12 | -24.707895 | -24.65 | 4.332491 |
monthly_t_tests <- both_stations %>%
group_by(month) %>%
summarise(
p_value = t.test(temp ~ station)$p.value,
cuprapca_mean = mean(temp[station == "Cuprapca"], na.rm = TRUE),
gyda_mean = mean(temp[station == "Gyda"], na.rm = TRUE)
)
knitr::kable (monthly_t_tests, caption = "Table 4. Montly t tests for Cuprapca and Gyda.")
| month | p_value | cuprapca_mean | gyda_mean |
|---|---|---|---|
| 1 | 0.0000000 | -42.2325 | -28.271053 |
| 2 | 0.0000000 | -36.7500 | -28.302632 |
| 3 | 0.9272871 | -23.0975 | -23.173684 |
| 4 | 0.0000000 | -6.8150 | -17.413158 |
| 5 | 0.0000000 | 6.2100 | -7.765790 |
| 6 | 0.0000000 | 14.9575 | 2.044737 |
| 7 | 0.0000000 | 17.9675 | 10.297368 |
| 8 | 0.0000000 | 13.8450 | 8.476316 |
| 9 | 0.0000000 | 5.1325 | 2.852632 |
| 10 | 0.0952586 | -9.7650 | -8.600000 |
| 11 | 0.0000000 | -31.1500 | -19.615790 |
| 12 | 0.0000000 | -40.8100 | -24.707895 |