Final R Project: Meteorology Temperature Analysis

This project analyzes monthly air temperature data from the Cuprapca weather station and compares it with the Gyda weather station. The main goal is to examine seasonal temperature patterns from 1961–2000 and compare earlier years (1961–1980) with later years (1981–2000).

The data used in this project comes from the NSIDC meteorology dataset. The two stations analyzed are:

Both datasets include monthly temperature measurements, which were cleaned and organized in R for statistical analysis and visualization.

Cuprapca Weather Station

#Dowload data
url <- "https://noaadata.apps.nsidc.org/NOAA/G02141/uni.cuprapca.24768.dat"
download.file(url,destfile='data.dat',method='curl')

cuprapca <- read.table('data.dat',sep="",na.strings="999.99")

#Clean Data
cuprapca_clean <- cuprapca %>%
  select(
    year = V2,
    month = V3,
    latitude= V7,
    longitude =V8,
    temp = V9
  )

cuprapca_clean <- cuprapca_clean %>%
  filter(!is.na(temp))

Prepare data and calculate temperature statistics

The data was divided into three time periods:

  • Full period: 1961–2000
  • Early period: 1961–1980
  • Late period: 1981–2000

For each month, I calculated the mean temperature, median temperature, and standard deviation.

#Divide data into time periods for analysis

early_data <- cuprapca_clean %>%
  filter(year >= 1961 & year <= 1980)

late_data <- cuprapca_clean %>%
  filter(year >= 1981 & year <= 2000)

full <- cuprapca_clean %>%
  filter(year >= 1961 & year <= 2000)


#Calculate stats
early_stats <- early_data %>%
  group_by(month) %>%
  summarise(
    early_mean = mean(temp),
    early_median = median(temp)
  )

late_stats <- late_data %>%
  group_by(month) %>%
  summarise(
    late_mean = mean(temp),
    late_median = median(temp)
  )

full_stats <- full %>%
  group_by(month) %>%
  summarise(
    full_mean = mean(temp),
    full_median = median(temp),
    sd = sd(temp)
  )

#join all stats into a final table
final_table <- full_stats %>%
  left_join(early_stats, by = "month") %>%
  left_join(late_stats, by = "month")
  
  knitr::kable(final_table, caption = "Table 1. Monthly temperature statistics for the Cuprapca station.")
Table 1. Monthly temperature statistics for the Cuprapca station.
month full_mean full_median sd early_mean early_median late_mean late_median
1 -42.2325 -42.70 3.701863 -42.760 -43.45 -41.705 -42.00
2 -36.7500 -36.80 3.180066 -38.385 -38.40 -35.115 -35.30
3 -23.0975 -23.60 2.900176 -23.940 -24.10 -22.255 -23.05
4 -6.8150 -7.15 2.230965 -7.610 -7.75 -6.020 -6.00
5 6.2100 6.40 1.788109 5.705 5.25 6.715 7.10
6 14.9575 14.90 1.594524 14.625 14.60 15.290 15.10
7 17.9675 18.00 1.688025 18.055 18.00 17.880 17.90
8 13.8450 13.85 1.698106 14.045 14.00 13.645 13.80
9 5.1325 5.15 1.558827 5.065 5.20 5.200 5.15
10 -9.7650 -9.70 2.484367 -9.805 -9.60 -9.725 -9.80
11 -31.1500 -31.65 3.235381 -31.450 -32.20 -30.850 -30.50
12 -40.8100 -41.45 3.555696 -41.125 -41.05 -40.495 -41.60

Plot 1: Montly Mean Temperature

#Montly mean temperature 

bars <- full_stats

# Define ymin
ymin <- floor(min(bars$full_mean, na.rm = TRUE) / 10) * 10

# Shift values so all bars go up
bars_shift <- bars$full_mean - ymin

# Plot
barplot(bars_shift,
        yaxt = "n",
        names.arg = bars$month,
        col = "steelblue",
        main = "Monthly Mean Temperature",
        xlab = "Month",
        ylab = "Temp (°C)")

# Create y-axis labels in original temperature scale
ymax <- ceiling(max(bars$full_mean, na.rm = TRUE) / 10) * 10
yticks <- seq(0, ymax - ymin, by = 5)

axis(2, at = yticks, labels = yticks + ymin)

Plot 1. Monthly Mean Temperature at Cuprapca Station.
This bar plot shows the average monthly temperature at the Cuprapca station from 1961–2000. The pattern shows strong seasonality, with the coldest temperatures occurring in winter months and the warmest temperatures occurring during summer.

Plot 2: Mean vs Median Temperature

# Create matrix with mean and median
bars <- cbind(full_stats$full_mean, full_stats$full_median)

# Define ymin
ymin <- floor(min(bars, na.rm = TRUE) / 10) * 10

# Shift values so all bars go upward
bars_shift <- bars - ymin

# Plot side-by-side bars
barplot(t(bars_shift),
        beside = TRUE,
        yaxt = "n",
        names.arg = full_stats$month,
        col = c("steelblue", "orange"),
        main = "Mean vs Median Temperature",
        xlab = "Month",
        ylab = "Temp (°C)")

# Create y-axis labels in original scale
ymax <- ceiling(max(bars, na.rm = TRUE) / 10) * 10
yticks <- seq(0, ymax - ymin, by = 5)

axis(2, at = yticks, labels = yticks + ymin)

# Add legend
legend("topright",
       legend = c("Mean", "Median"),
       fill = c("steelblue", "orange"))

Plot 2. Mean vs. Median Monthly Temperature at Cuprapca Station.
This plot compares the mean and median temperature for each month. Similar mean and median values suggest that the monthly temperature data is fairly balanced, while larger differences may indicate months with more extreme values.

Plot 3: Early vs. Late Temperature Comparison

Early “1961–1980” vs Late “1981–2000”

#Early vs Late plot

# Combine early and late means into a matrix
bars <- cbind(early_stats$early_mean, late_stats$late_mean)

# Define ymin 
ymin <- floor(min(bars, na.rm = TRUE) / 10) * 10

# Shift values so all bars go upward
bars_shift <- bars - ymin

# Plot side-by-side bars
barplot(t(bars_shift),
        beside = TRUE,
        yaxt = "n",
        names.arg = early_stats$month,
        col = c("steelblue", "tomato"),
        main = "Early vs Late Temperature Comparison",
        xlab = "Month",
        ylab = "Temp (°C)")

# Create y-axis labels in original scale
ymax <- ceiling(max(bars, na.rm = TRUE) / 10) * 10
yticks <- seq(0, ymax - ymin, by = 5)

axis(2, at = yticks, labels = yticks + ymin)

# Add legend
legend("topright",
       legend = c("1961–1980", "1981–2000"),
       fill = c("steelblue", "tomato"))

Plot 3. Early vs. Late Monthly Temperature Comparison at Cuprapca Station.
This figure compares monthly mean temperatures between 1961–1980 and 1981–2000. The side-by-side bars make it possible to see whether the later period was warmer or cooler for each month.

Plot 4: Monthly Mean Temperature with Standard Deviation

# Mean and sd plot 

bars <- full_stats$full_mean
sd_vals <- full_stats$sd

# Define ymin
ymin <- floor(min(bars - sd_vals, na.rm = TRUE) / 10) * 10

# Shift values
bars_shift <- bars - ymin

# Plot bars
bp <- barplot(bars_shift,
              yaxt = "n",
              names.arg = full_stats$month,
              col = "steelblue",
              main = "Monthly Mean Temperature with SD",
              xlab = "Month",
              ylab = "Temp (°C)")

# Add error bars (SD)
arrows(x0 = bp,
       y0 = (bars - sd_vals) - ymin,
       x1 = bp,
       y1 = (bars + sd_vals) - ymin,
       angle = 90,
       code = 3,
       length = 0.05)

# Create y-axis labels in original scale
ymax <- ceiling(max(bars + sd_vals, na.rm = TRUE) / 10) * 10
yticks <- seq(0, ymax - ymin, by = 5)

axis(2, at = yticks, labels = yticks + ymin)

Plot 4. Monthly Mean Temperature with Standard Deviation at Cuprapca Station.
This plot shows monthly mean temperatures with standard deviation bars. The standard deviation represents how much temperature varied within each month across the full study period.

Statistical Test Cuprapca Station

# t-test comparing monthly means
t_test <- t.test(
  early_stats$early_mean,
  late_stats$late_mean,
  paired = TRUE
)

 print(t_test)
## 
##  Paired t-test
## 
## data:  early_stats$early_mean and late_stats$late_mean
## t = -2.9245, df = 11, p-value = 0.01383
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  -1.4816802 -0.2091531
## sample estimates:
## mean difference 
##      -0.8454167

Statistical Analysis

A paired t-test was used to compare the monthly mean temperatures from the early period (1961–1980) and the late period (1981–2000). This test was chosen because each month in the early period is compared directly with the same month in the later period.

The p-value from the test was used to determine whether the difference between the two periods was statistically significant. A p-value below 0.05 would suggest that the temperature difference between the two periods is statistically significant.

Gyda Weather Station

#download data
url <- "https://noaadata.apps.nsidc.org/NOAA/G02141/uni.gyda.20963.dat"

download.file(url,destfile = "gyda.dat",method = "curl")

gyda <- read.table("gyda.dat",sep = "",na.strings = "999.99")

#Clean Data
gyda_clean <- gyda %>%
  select(
    year = V2,
    month = V3,
    latitude = V7,
    longitude = V8,
    temp = V9
  )

gyda_clean <- gyda_clean %>%
  filter(!is.na(temp))

Prepare data and calculate temperature stats

#Divide into time periods for analysis

early_data_gyda <- gyda_clean %>%
  filter(year >= 1961 & year <= 1980)

late_data_gyda <- gyda_clean %>%
  filter(year >= 1981 & year <= 2000)

full_gyda <- gyda_clean %>%
  filter(year >= 1961 & year <= 2000)


#data stats

early_stats_gyda <- early_data_gyda %>%
  group_by(month) %>%
  summarise(
    early_mean = mean(temp),
    early_median = median(temp)
  )

late_stats_gyda <- late_data_gyda %>%
  group_by(month) %>%
  summarise(
    late_mean = mean(temp),
    late_median = median(temp)
  )

full_stats_gyda <- full_gyda %>%
  group_by(month) %>%
  summarise(
    full_mean = mean(temp),
    full_median = median(temp),
    sd = sd(temp)
  )


#join all stats into a final table

final_table_gyda <- full_stats_gyda %>%
  left_join(early_stats_gyda, by = "month") %>%
  left_join(late_stats_gyda, by = "month")

knitr::kable(final_table_gyda, caption = "Table 2. Monthly temperature statistics for the Gyda station.")
Table 2. Monthly temperature statistics for the Gyda station.
month full_mean full_median sd early_mean early_median late_mean late_median
1 -28.271053 -28.45 4.338384 -29.735 -29.35 -26.644444 -26.75
2 -28.302632 -28.20 5.101271 -28.910 -28.50 -27.627778 -27.45
3 -23.173684 -22.15 4.276313 -24.360 -24.70 -21.855556 -20.80
4 -17.413158 -18.35 4.325548 -17.485 -17.60 -17.333333 -18.85
5 -7.765790 -7.85 1.864356 -8.455 -8.20 -7.000000 -6.90
6 2.044737 1.85 1.907694 1.500 1.65 2.650000 2.15
7 10.297368 10.40 2.174979 9.810 9.45 10.838889 10.65
8 8.476316 8.35 1.403169 8.315 7.95 8.655556 8.55
9 2.852632 2.50 1.472218 2.540 2.30 3.200000 3.10
10 -8.600000 -8.40 3.484948 -9.050 -8.80 -8.100000 -7.95
11 -19.615790 -20.10 5.605669 -19.915 -19.65 -19.283333 -20.15
12 -24.707895 -24.65 4.332491 -24.560 -25.30 -24.872222 -24.20

Plot 5: Monthly Mean Temperature

# Use full_stats directly
bars <- full_stats_gyda

# Define ymin
ymin <- floor(min(bars$full_mean, na.rm = TRUE) / 10) * 10

# Shift values so all bars go upward
bars_shift <- bars$full_mean - ymin

# Plot
barplot(bars_shift,
        yaxt = "n",
        names.arg = bars$month,
        col = "mediumpurple2",
        main = "Gyda Monthly Mean Temperature",
        xlab = "Month",
        ylab = "Temp (°C)")

# Create y-axis labels in original temperature scale
ymax <- ceiling(max(bars$full_mean, na.rm = TRUE) / 10) * 10
yticks <- seq(0, ymax - ymin, by = 5)

axis(2, at = yticks, labels = yticks + ymin)

Plot 5. Monthly Mean Temperature at Gyda Station.
This bar plot shows the average monthly temperature at the Gyda station from 1961–2000. The graph demonstrates clear seasonal temperature variation, with the coldest temperatures occurring during winter months and warmer temperatures occurring during summer months.

Plot 6: Mean vs. Median Temperature

# Create matrix with mean and median
bars <- cbind(full_stats_gyda$full_mean,
              full_stats_gyda$full_median)

# Define ymin using BOTH columns
ymin <- floor(min(bars, na.rm = TRUE) / 10) * 10

# Shift values so all bars go upward
bars_shift <- bars - ymin

# Plot side-by-side bars
barplot(t(bars_shift),
        beside = TRUE,
        yaxt = "n",
        names.arg = full_stats_gyda$month,
        col = c("mediumpurple2", "goldenrod2"),
        main = "Gyda Mean vs Median Temperature",
        xlab = "Month",
        ylab = "Temp (°C)")

# Create y-axis labels in original scale
ymax <- ceiling(max(bars, na.rm = TRUE) / 10) * 10
yticks <- seq(0, ymax - ymin, by = 5)

axis(2, at = yticks, labels = yticks + ymin)

# Add legend
legend("topright",
       legend = c("Mean", "Median"),
       fill = c("mediumpurple2", "goldenrod2"))

Figure 6. Mean vs. Median Monthly Temperature at Gyda Station.
This figure compares the monthly mean and median temperatures at the Gyda station. The similarity between the mean and median values for most months suggests relatively consistent temperature distributions, while larger differences may indicate months with more variability or extreme temperature values.

Plot 7 : Early vs. Late Temperature Comparison

Early include the years (1961 - 1980) Late include the years (1981 - 2000)

# Combine early and late means into a matrix
bars <- cbind(early_stats_gyda$early_mean,
              late_stats_gyda$late_mean)

# Define ymin using BOTH datasets
ymin <- floor(min(bars, na.rm = TRUE) / 10) * 10

# Shift values so all bars go upward
bars_shift <- bars - ymin

# Plot side-by-side bars
barplot(t(bars_shift),
        beside = TRUE,
        yaxt = "n",
        names.arg = early_stats_gyda$month,
        col = c("mediumpurple2", "darkturquoise"),
        main = "Gyda Early vs Late Temperature Comparison",
        xlab = "Month",
        ylab = "Temp (°C)")

# Create y-axis labels in original temperature scale
ymax <- ceiling(max(bars, na.rm = TRUE) / 10) * 10
yticks <- seq(0, ymax - ymin, by = 5)

axis(2, at = yticks, labels = yticks + ymin)

# Add legend
legend("topright",
       legend = c("1961–1980", "1981–2000"),
       fill = c("mediumpurple2", "darkturquoise"))

Plot 7. Early vs. Late Monthly Temperature Comparison at Gyda Station.
This plot compares monthly mean temperatures between the earlier period (1961–1980) and the later period (1981–2000) at the Gyda station. The side-by-side bars make it possible to identify changes in temperature patterns over time and evaluate whether the later period experienced warming.

Plot 8: Monthly Mean Temperature with Standard Deviation

# Use mean and sd
bars <- full_stats_gyda$full_mean
sd_vals <- full_stats_gyda$sd

# Define ymin
ymin <- floor(min(bars - sd_vals, na.rm = TRUE) / 10) * 10

# Shift values
bars_shift <- bars - ymin

# Plot bars
bp <- barplot(bars_shift,
              yaxt = "n",
              names.arg = full_stats_gyda$month,
              col = "mediumpurple2",
              main = "Gyda Monthly Mean Temperature with SD",
              xlab = "Month",
              ylab = "Temp (°C)")

# Add error bars (SD)
arrows(x0 = bp,
       y0 = (bars - sd_vals) - ymin,
       x1 = bp,
       y1 = (bars + sd_vals) - ymin,
       angle = 90,
       code = 3,
       length = 0.05)

# Create y-axis labels in original temperature scale
ymax <- ceiling(max(bars + sd_vals, na.rm = TRUE) / 10) * 10
yticks <- seq(0, ymax - ymin, by = 5)

axis(2, at = yticks, labels = yticks + ymin)

Plot 8. Monthly Mean Temperature with Standard Deviation at Gyda Station.
This visualization displays the monthly mean temperatures at the Gyda station along with standard deviation error bars. The error bars represent variability in temperature observations for each month throughout the study period.

Statistical Test Gyda Station

# Paired t-test comparing monthly means

t_test_gyda <- t.test(
  early_stats_gyda$early_mean,
  late_stats_gyda$late_mean,
  paired = TRUE
)

print(t_test_gyda)
## 
##  Paired t-test
## 
## data:  early_stats_gyda$early_mean and late_stats_gyda$late_mean
## t = -3.9153, df = 11, p-value = 0.002412
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
##  -1.6835785 -0.4718845
## sample estimates:
## mean difference 
##       -1.077731

Plot 9: Comparison between Cuprapca and Gyda stations

# Add station names
cuprapca_compare <- cuprapca_clean %>%
  mutate(station = "Cuprapca")

gyda_compare <- gyda_clean %>%
  mutate(station = "Gyda")

# Combine both stations
both_stations <- bind_rows(cuprapca_compare, gyda_compare)


boxplot(temp ~ station,
        data = both_stations,
        col = c("steelblue", "mediumpurple2"),
        main = "Temperature Comparison Between Stations",
        xlab = "Station",
        ylab = "Temperature (°C)",
        ylim = c(-60, 30))

Plot 9. Temperature Distribution Comparison Between Cuprapca and Gyda.
This boxplot compares the overall temperature distribution between the two stations. Differences in the center and spread of the boxes help show whether one station generally experienced warmer or colder conditions.

Monthly Temperature Comparison

# Define colors for stations
station_colors <- c("steelblue", "mediumpurple2")

# Side-by-side monthly boxplots
boxplot(temp ~ interaction(month, station),
        data = both_stations,
        col = rep(station_colors, 12),
        xaxt = "n",
        main = "Monthly Temperature Comparison by Station",
        xlab = "Month",
        ylab = "Temperature (°C)")

# Add month labels
axis(1,
     at = seq(1.5, 24, by = 2),
     labels = 1:12)

# Add legend
legend("topright",
       legend = c("Cuprapca", "Gyda"),
       fill = station_colors)

Plot 10. Monthly Temperature Comparison Between Cuprapca and Gyda.
This figure compares temperatures by month for both stations. Each month includes boxplots for Cuprapca and Gyda, allowing seasonal differences between the two stations to be compared more clearly.

station_monthly_means <- both_stations %>%
  group_by(station, month) %>%
  summarise(
    mean_temp = mean(temp, na.rm = TRUE),
    median_temp = median(temp, na.rm = TRUE),
    sd_temp = sd(temp, na.rm = TRUE),
    .groups = "drop"
  )
knitr::kable(station_monthly_means, caption = "Table 3. Monthly mean, median, and standard deviation temperatures for Cuprapca and Gyda.")
Table 3. Monthly mean, median, and standard deviation temperatures for Cuprapca and Gyda.
station month mean_temp median_temp sd_temp
Cuprapca 1 -42.232500 -42.70 3.701863
Cuprapca 2 -36.750000 -36.80 3.180066
Cuprapca 3 -23.097500 -23.60 2.900176
Cuprapca 4 -6.815000 -7.15 2.230965
Cuprapca 5 6.210000 6.40 1.788109
Cuprapca 6 14.957500 14.90 1.594524
Cuprapca 7 17.967500 18.00 1.688025
Cuprapca 8 13.845000 13.85 1.698106
Cuprapca 9 5.132500 5.15 1.558827
Cuprapca 10 -9.765000 -9.70 2.484367
Cuprapca 11 -31.150000 -31.65 3.235381
Cuprapca 12 -40.810000 -41.45 3.555696
Gyda 1 -28.271053 -28.45 4.338384
Gyda 2 -28.302632 -28.20 5.101271
Gyda 3 -23.173684 -22.15 4.276313
Gyda 4 -17.413158 -18.35 4.325548
Gyda 5 -7.765790 -7.85 1.864356
Gyda 6 2.044737 1.85 1.907694
Gyda 7 10.297368 10.40 2.174979
Gyda 8 8.476316 8.35 1.403169
Gyda 9 2.852632 2.50 1.472218
Gyda 10 -8.600000 -8.40 3.484948
Gyda 11 -19.615790 -20.10 5.605669
Gyda 12 -24.707895 -24.65 4.332491

Comparing montly t-tests

monthly_t_tests <- both_stations %>%
  group_by(month) %>%
  summarise(
    p_value = t.test(temp ~ station)$p.value,
    cuprapca_mean = mean(temp[station == "Cuprapca"], na.rm = TRUE),
    gyda_mean = mean(temp[station == "Gyda"], na.rm = TRUE)
  )

 knitr::kable (monthly_t_tests, caption = "Table 4. Montly t tests for Cuprapca and Gyda.")
Table 4. Montly t tests for Cuprapca and Gyda.
month p_value cuprapca_mean gyda_mean
1 0.0000000 -42.2325 -28.271053
2 0.0000000 -36.7500 -28.302632
3 0.9272871 -23.0975 -23.173684
4 0.0000000 -6.8150 -17.413158
5 0.0000000 6.2100 -7.765790
6 0.0000000 14.9575 2.044737
7 0.0000000 17.9675 10.297368
8 0.0000000 13.8450 8.476316
9 0.0000000 5.1325 2.852632
10 0.0952586 -9.7650 -8.600000
11 0.0000000 -31.1500 -19.615790
12 0.0000000 -40.8100 -24.707895