Final R Project: Meteorology Temperature Analysis

This project analyzes monthly air temperature data from the Cuprapca weather station and compares it with the Gyda weather station. The main goal is to examine seasonal temperature patterns from 1961–2000 and compare earlier years (1961–1980) with later years (1981–2000).

The data used in this project comes from the NSIDC meteorology dataset. The two stations analyzed are:

Both datasets include monthly temperature measurements, which were cleaned and organized in R for statistical analysis and visualization.

Cuprapca Weather Station

#Dowload data
url <- "https://noaadata.apps.nsidc.org/NOAA/G02141/uni.cuprapca.24768.dat"
download.file(url,destfile='data.dat',method='curl')

cuprapca <- read.table('data.dat',sep="",na.strings="999.99")

#Clean Data
cuprapca_clean <- cuprapca %>%
  select(
    year = V2,
    month = V3,
    latitude= V7,
    longitude =V8,
    temp = V9
  )

cuprapca_clean <- cuprapca_clean %>%
  filter(!is.na(temp))

Prepare data and calculate temperature statistics

The data was divided into three time periods:

  • Full period: 1961–2000
  • Early period: 1961–1980
  • Late period: 1981–2000

For each month, I calculated the mean temperature, median temperature, and standard deviation.

#Divide data into time periods for analysis

early_data <- cuprapca_clean %>%
  filter(year >= 1961 & year <= 1980)

late_data <- cuprapca_clean %>%
  filter(year >= 1981 & year <= 2000)

full <- cuprapca_clean %>%
  filter(year >= 1961 & year <= 2000)


#Calculate stats
early_stats <- early_data %>%
  group_by(month) %>%
  summarise(
    early_mean = mean(temp),
    early_median = median(temp)
  )

late_stats <- late_data %>%
  group_by(month) %>%
  summarise(
    late_mean = mean(temp),
    late_median = median(temp)
  )

full_stats <- full %>%
  group_by(month) %>%
  summarise(
    full_mean = mean(temp),
    full_median = median(temp),
    sd = sd(temp)
  )

#join all stats into a final table
final_table <- full_stats %>%
  left_join(early_stats, by = "month") %>%
  left_join(late_stats, by = "month")
  
  knitr::kable(final_table, caption = "Table 1. Monthly temperature statistics for the Cuprapca station.")
Table 1. Monthly temperature statistics for the Cuprapca station.
month full_mean full_median sd early_mean early_median late_mean late_median
1 -42.2325 -42.70 3.701863 -42.760 -43.45 -41.705 -42.00
2 -36.7500 -36.80 3.180066 -38.385 -38.40 -35.115 -35.30
3 -23.0975 -23.60 2.900176 -23.940 -24.10 -22.255 -23.05
4 -6.8150 -7.15 2.230965 -7.610 -7.75 -6.020 -6.00
5 6.2100 6.40 1.788109 5.705 5.25 6.715 7.10
6 14.9575 14.90 1.594524 14.625 14.60 15.290 15.10
7 17.9675 18.00 1.688025 18.055 18.00 17.880 17.90
8 13.8450 13.85 1.698106 14.045 14.00 13.645 13.80
9 5.1325 5.15 1.558827 5.065 5.20 5.200 5.15
10 -9.7650 -9.70 2.484367 -9.805 -9.60 -9.725 -9.80
11 -31.1500 -31.65 3.235381 -31.450 -32.20 -30.850 -30.50
12 -40.8100 -41.45 3.555696 -41.125 -41.05 -40.495 -41.60

Plot 1: Montly Mean Temperature

#Montly mean temperature 

bars <- full_stats

# Define ymin
ymin <- floor(min(bars$full_mean, na.rm = TRUE) / 10) * 10

# Shift values so all bars go up
bars_shift <- bars$full_mean - ymin

# Plot
barplot(bars_shift,
        yaxt = "n",
        names.arg = bars$month,
        col = "steelblue",
        main = "Monthly Mean Temperature",
        xlab = "Month",
        ylab = "Temp (°C)")

# Create y-axis labels in original temperature scale
ymax <- ceiling(max(bars$full_mean, na.rm = TRUE) / 10) * 10
yticks <- seq(0, ymax - ymin, by = 5)

axis(2, at = yticks, labels = yticks + ymin)

Plot 2: Mean vs Median Temperature

# Create matrix with mean and median
bars <- cbind(full_stats$full_mean, full_stats$full_median)

# Define ymin
ymin <- floor(min(bars, na.rm = TRUE) / 10) * 10

# Shift values so all bars go upward
bars_shift <- bars - ymin

# Plot side-by-side bars
barplot(t(bars_shift),
        beside = TRUE,
        yaxt = "n",
        names.arg = full_stats$month,
        col = c("steelblue", "orange"),
        main = "Mean vs Median Temperature",
        xlab = "Month",
        ylab = "Temp (°C)")

# Create y-axis labels in original scale
ymax <- ceiling(max(bars, na.rm = TRUE) / 10) * 10
yticks <- seq(0, ymax - ymin, by = 5)

axis(2, at = yticks, labels = yticks + ymin)

# Add legend
legend("topright",
       legend = c("Mean", "Median"),
       fill = c("steelblue", "orange"))

Plot 3: Early vs. Late Temperature Comparison

Early “1961–1980” vs Late “1981–2000”

#Early vs Late plot

# Combine early and late means into a matrix
bars <- cbind(early_stats$early_mean, late_stats$late_mean)

# Define ymin 
ymin <- floor(min(bars, na.rm = TRUE) / 10) * 10

# Shift values so all bars go upward
bars_shift <- bars - ymin

# Plot side-by-side bars
barplot(t(bars_shift),
        beside = TRUE,
        yaxt = "n",
        names.arg = early_stats$month,
        col = c("steelblue", "tomato"),
        main = "Early vs Late Temperature Comparison",
        xlab = "Month",
        ylab = "Temp (°C)")

# Create y-axis labels in original scale
ymax <- ceiling(max(bars, na.rm = TRUE) / 10) * 10
yticks <- seq(0, ymax - ymin, by = 5)

axis(2, at = yticks, labels = yticks + ymin)

# Add legend
legend("topright",
       legend = c("1961–1980", "1981–2000"),
       fill = c("steelblue", "tomato"))

Plot 4: Monthly Mean Temperature with Standard Deviation

# Mean and sd plot 

bars <- full_stats$full_mean
sd_vals <- full_stats$sd

# Define ymin
ymin <- floor(min(bars - sd_vals, na.rm = TRUE) / 10) * 10

# Shift values
bars_shift <- bars - ymin

# Plot bars
bp <- barplot(bars_shift,
              yaxt = "n",
              names.arg = full_stats$month,
              col = "steelblue",
              main = "Monthly Mean Temperature with SD",
              xlab = "Month",
              ylab = "Temp (°C)")

# Add error bars (SD)
arrows(x0 = bp,
       y0 = (bars - sd_vals) - ymin,
       x1 = bp,
       y1 = (bars + sd_vals) - ymin,
       angle = 90,
       code = 3,
       length = 0.05)

# Create y-axis labels in original scale
ymax <- ceiling(max(bars + sd_vals, na.rm = TRUE) / 10) * 10
yticks <- seq(0, ymax - ymin, by = 5)

axis(2, at = yticks, labels = yticks + ymin)

Statistical Test Cuprapca Station

# t-test comparing monthly means
t_test <- t.test(
  early_stats$early_mean,
  late_stats$late_mean,
  paired = TRUE
)

 print(t_test)
## 
##  Paired t-test
## 
## data:  early_stats$early_mean and late_stats$late_mean
## t = -2.9245, df = 11, p-value = 0.01383
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  -1.4816802 -0.2091531
## sample estimates:
## mean difference 
##      -0.8454167

Gyda Weather Station

#download data
url <- "https://noaadata.apps.nsidc.org/NOAA/G02141/uni.gyda.20963.dat"

download.file(url,destfile = "gyda.dat",method = "curl")

gyda <- read.table("gyda.dat",sep = "",na.strings = "999.99")

#Clean Data
gyda_clean <- gyda %>%
  select(
    year = V2,
    month = V3,
    latitude = V7,
    longitude = V8,
    temp = V9
  )

gyda_clean <- gyda_clean %>%
  filter(!is.na(temp))

Prepare data and calculate temperature stats

#Divide into time periods for analysis

early_data_gyda <- gyda_clean %>%
  filter(year >= 1961 & year <= 1980)

late_data_gyda <- gyda_clean %>%
  filter(year >= 1981 & year <= 2000)

full_gyda <- gyda_clean %>%
  filter(year >= 1961 & year <= 2000)


#data stats

early_stats_gyda <- early_data_gyda %>%
  group_by(month) %>%
  summarise(
    early_mean = mean(temp),
    early_median = median(temp)
  )

late_stats_gyda <- late_data_gyda %>%
  group_by(month) %>%
  summarise(
    late_mean = mean(temp),
    late_median = median(temp)
  )

full_stats_gyda <- full_gyda %>%
  group_by(month) %>%
  summarise(
    full_mean = mean(temp),
    full_median = median(temp),
    sd = sd(temp)
  )


#join all stats into a final table

final_table_gyda <- full_stats_gyda %>%
  left_join(early_stats_gyda, by = "month") %>%
  left_join(late_stats_gyda, by = "month")

knitr::kable(final_table_gyda, caption = "Table 2. Monthly temperature statistics for the Gyda station.")
Table 2. Monthly temperature statistics for the Gyda station.
month full_mean full_median sd early_mean early_median late_mean late_median
1 -28.271053 -28.45 4.338384 -29.735 -29.35 -26.644444 -26.75
2 -28.302632 -28.20 5.101271 -28.910 -28.50 -27.627778 -27.45
3 -23.173684 -22.15 4.276313 -24.360 -24.70 -21.855556 -20.80
4 -17.413158 -18.35 4.325548 -17.485 -17.60 -17.333333 -18.85
5 -7.765790 -7.85 1.864356 -8.455 -8.20 -7.000000 -6.90
6 2.044737 1.85 1.907694 1.500 1.65 2.650000 2.15
7 10.297368 10.40 2.174979 9.810 9.45 10.838889 10.65
8 8.476316 8.35 1.403169 8.315 7.95 8.655556 8.55
9 2.852632 2.50 1.472218 2.540 2.30 3.200000 3.10
10 -8.600000 -8.40 3.484948 -9.050 -8.80 -8.100000 -7.95
11 -19.615790 -20.10 5.605669 -19.915 -19.65 -19.283333 -20.15
12 -24.707895 -24.65 4.332491 -24.560 -25.30 -24.872222 -24.20

Plot 5: Montly Mean Temperature

# Use full_stats directly
bars <- full_stats_gyda

# Define ymin
ymin <- floor(min(bars$full_mean, na.rm = TRUE) / 10) * 10

# Shift values so all bars go upward
bars_shift <- bars$full_mean - ymin

# Plot
barplot(bars_shift,
        yaxt = "n",
        names.arg = bars$month,
        col = "mediumpurple2",
        main = "Gyda Monthly Mean Temperature",
        xlab = "Month",
        ylab = "Temp (°C)")

# Create y-axis labels in original temperature scale
ymax <- ceiling(max(bars$full_mean, na.rm = TRUE) / 10) * 10
yticks <- seq(0, ymax - ymin, by = 5)

axis(2, at = yticks, labels = yticks + ymin)

## Plot 6: Mean vs. Median Temperature

# Create matrix with mean and median
bars <- cbind(full_stats_gyda$full_mean,
              full_stats_gyda$full_median)

# Define ymin using BOTH columns
ymin <- floor(min(bars, na.rm = TRUE) / 10) * 10

# Shift values so all bars go upward
bars_shift <- bars - ymin

# Plot side-by-side bars
barplot(t(bars_shift),
        beside = TRUE,
        yaxt = "n",
        names.arg = full_stats_gyda$month,
        col = c("mediumpurple2", "goldenrod2"),
        main = "Gyda Mean vs Median Temperature",
        xlab = "Month",
        ylab = "Temp (°C)")

# Create y-axis labels in original scale
ymax <- ceiling(max(bars, na.rm = TRUE) / 10) * 10
yticks <- seq(0, ymax - ymin, by = 5)

axis(2, at = yticks, labels = yticks + ymin)

# Add legend
legend("topright",
       legend = c("Mean", "Median"),
       fill = c("mediumpurple2", "goldenrod2"))

Plot 7 : Early vs. Late Temperature Comparison

Early include the years (1961 - 1980) Late include the years (1981 - 2000)

# Combine early and late means into a matrix
bars <- cbind(early_stats_gyda$early_mean,
              late_stats_gyda$late_mean)

# Define ymin using BOTH datasets
ymin <- floor(min(bars, na.rm = TRUE) / 10) * 10

# Shift values so all bars go upward
bars_shift <- bars - ymin

# Plot side-by-side bars
barplot(t(bars_shift),
        beside = TRUE,
        yaxt = "n",
        names.arg = early_stats_gyda$month,
        col = c("mediumpurple2", "darkturquoise"),
        main = "Gyda Early vs Late Temperature Comparison",
        xlab = "Month",
        ylab = "Temp (°C)")

# Create y-axis labels in original temperature scale
ymax <- ceiling(max(bars, na.rm = TRUE) / 10) * 10
yticks <- seq(0, ymax - ymin, by = 5)

axis(2, at = yticks, labels = yticks + ymin)

# Add legend
legend("topright",
       legend = c("1961–1980", "1981–2000"),
       fill = c("mediumpurple2", "darkturquoise"))

Plot 8: Monthly Mean Temperature with Standard Deviation

# Use mean and sd
bars <- full_stats_gyda$full_mean
sd_vals <- full_stats_gyda$sd

# Define ymin
ymin <- floor(min(bars - sd_vals, na.rm = TRUE) / 10) * 10

# Shift values
bars_shift <- bars - ymin

# Plot bars
bp <- barplot(bars_shift,
              yaxt = "n",
              names.arg = full_stats_gyda$month,
              col = "mediumpurple2",
              main = "Gyda Monthly Mean Temperature with SD",
              xlab = "Month",
              ylab = "Temp (°C)")

# Add error bars (SD)
arrows(x0 = bp,
       y0 = (bars - sd_vals) - ymin,
       x1 = bp,
       y1 = (bars + sd_vals) - ymin,
       angle = 90,
       code = 3,
       length = 0.05)

# Create y-axis labels in original temperature scale
ymax <- ceiling(max(bars + sd_vals, na.rm = TRUE) / 10) * 10
yticks <- seq(0, ymax - ymin, by = 5)

axis(2, at = yticks, labels = yticks + ymin)

Statistical Test Gyda Station

# Paired t-test comparing monthly means

t_test_gyda <- t.test(
  early_stats_gyda$early_mean,
  late_stats_gyda$late_mean,
  paired = TRUE
)

print(t_test_gyda)
## 
##  Paired t-test
## 
## data:  early_stats_gyda$early_mean and late_stats_gyda$late_mean
## t = -3.9153, df = 11, p-value = 0.002412
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  -1.6835785 -0.4718845
## sample estimates:
## mean difference 
##       -1.077731

Comparison between Cuprapca and Gyda stations

# Add station names
cuprapca_compare <- cuprapca_clean %>%
  mutate(station = "Cuprapca")

gyda_compare <- gyda_clean %>%
  mutate(station = "Gyda")

# Combine both stations
both_stations <- bind_rows(cuprapca_compare, gyda_compare)


boxplot(temp ~ station,
        data = both_stations,
        col = c("steelblue", "mediumpurple2"),
        main = "Temperature Comparison Between Stations",
        xlab = "Station",
        ylab = "Temperature (°C)",
        ylim = c(-60, 30))

Monthly Temperature Comparison

# Define colors for stations
station_colors <- c("steelblue", "mediumpurple2")

# Side-by-side monthly boxplots
boxplot(temp ~ interaction(month, station),
        data = both_stations,
        col = rep(station_colors, 12),
        xaxt = "n",
        main = "Monthly Temperature Comparison by Station",
        xlab = "Month",
        ylab = "Temperature (°C)")

# Add month labels
axis(1,
     at = seq(1.5, 24, by = 2),
     labels = 1:12)

# Add legend
legend("topright",
       legend = c("Cuprapca", "Gyda"),
       fill = station_colors)

station_monthly_means <- both_stations %>%
  group_by(station, month) %>%
  summarise(
    mean_temp = mean(temp, na.rm = TRUE),
    median_temp = median(temp, na.rm = TRUE),
    sd_temp = sd(temp, na.rm = TRUE),
    .groups = "drop"
  )
knitr::kable(station_monthly_means, caption = "Table 3. Monthly mean, median, and standard deviation temperatures for Cuprapca and Gyda.")
Table 3. Monthly mean, median, and standard deviation temperatures for Cuprapca and Gyda.
station month mean_temp median_temp sd_temp
Cuprapca 1 -42.232500 -42.70 3.701863
Cuprapca 2 -36.750000 -36.80 3.180066
Cuprapca 3 -23.097500 -23.60 2.900176
Cuprapca 4 -6.815000 -7.15 2.230965
Cuprapca 5 6.210000 6.40 1.788109
Cuprapca 6 14.957500 14.90 1.594524
Cuprapca 7 17.967500 18.00 1.688025
Cuprapca 8 13.845000 13.85 1.698106
Cuprapca 9 5.132500 5.15 1.558827
Cuprapca 10 -9.765000 -9.70 2.484367
Cuprapca 11 -31.150000 -31.65 3.235381
Cuprapca 12 -40.810000 -41.45 3.555696
Gyda 1 -28.271053 -28.45 4.338384
Gyda 2 -28.302632 -28.20 5.101271
Gyda 3 -23.173684 -22.15 4.276313
Gyda 4 -17.413158 -18.35 4.325548
Gyda 5 -7.765790 -7.85 1.864356
Gyda 6 2.044737 1.85 1.907694
Gyda 7 10.297368 10.40 2.174979
Gyda 8 8.476316 8.35 1.403169
Gyda 9 2.852632 2.50 1.472218
Gyda 10 -8.600000 -8.40 3.484948
Gyda 11 -19.615790 -20.10 5.605669
Gyda 12 -24.707895 -24.65 4.332491

Comparing montly t-tests

monthly_t_tests <- both_stations %>%
  group_by(month) %>%
  summarise(
    p_value = t.test(temp ~ station)$p.value,
    cuprapca_mean = mean(temp[station == "Cuprapca"], na.rm = TRUE),
    gyda_mean = mean(temp[station == "Gyda"], na.rm = TRUE)
  )

 knitr::kable (monthly_t_tests, caption = "Table 4. Montly t tests for Cuprapca and Gyda.")
Table 4. Montly t tests for Cuprapca and Gyda.
month p_value cuprapca_mean gyda_mean
1 0.0000000 -42.2325 -28.271053
2 0.0000000 -36.7500 -28.302632
3 0.9272871 -23.0975 -23.173684
4 0.0000000 -6.8150 -17.413158
5 0.0000000 6.2100 -7.765790
6 0.0000000 14.9575 2.044737
7 0.0000000 17.9675 10.297368
8 0.0000000 13.8450 8.476316
9 0.0000000 5.1325 2.852632
10 0.0952586 -9.7650 -8.600000
11 0.0000000 -31.1500 -19.615790
12 0.0000000 -40.8100 -24.707895