library(readr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(ggplot2)
library(tidyr)

kjusjur_61_00<- read_csv("/Users/kaitlynmaritan/Desktop/R-spatial/Final_Project/kjusjurstation1.csv")
## Rows: 480 Columns: 22
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (1): Station Name
## dbl (21): Station, Year, Month, Day, Time, PIF, Latitude, Longitude, Tempera...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#Remove unnecessary columns 
kjusjur_61_00<- kjusjur_61_00 %>% select(-"Station", -"Day",-"Time", -"PIF", 
                                         -"Sea Level Pressure", -"Wind Direction",
                                         -"Wind Speed", -"Total Cloud Amount", -"Low Cloud Amount",
                                         -"Relative Humidity", -"Dew Point", -"Wet Bulb",-"Sea Surface", 
                                         -"Vapor Pressure",-"Soil or Ice",-"Station Name")

#Remove missing temperature values
kjusjur_61_00[kjusjur_61_00 == 999.99]<- NA
kjusjur_61_00 <- kjusjur_61_00 %>% filter(!is.na(Temperature))

# Convert month numbers to month names 
kjusjur_61_00$Month <- month.abb[as.numeric(kjusjur_61_00$Month)]
kjusjur_61_00$Month <- factor(kjusjur_61_00$Month, levels = month.abb)

Monthly mean and monthly median air temperature; (1961-2000)

kjusjur_61_00.mean <- kjusjur_61_00
kjusjur_61_00.median <-kjusjur_61_00

#Calculate monthly mean 1961-2000
kjusjur_61_00.mean <- kjusjur_61_00.mean %>%
  group_by(Month) %>%
  summarise(MeanTemp = mean(Temperature, na.rm = TRUE)) %>%
  ungroup()

print(kjusjur_61_00.mean)
## # A tibble: 12 × 2
##    Month MeanTemp
##    <fct>    <dbl>
##  1 Jan     -38.1 
##  2 Feb     -34.1 
##  3 Mar     -26.4 
##  4 Apr     -15.4 
##  5 May      -3.41
##  6 Jun       7.67
##  7 Jul      12.5 
##  8 Aug       9.26
##  9 Sep       1.89
## 10 Oct     -12.3 
## 11 Nov     -30.1 
## 12 Dec     -35.1
#Calculate monthly median 1961-2000
kjusjur_61_00.median <- kjusjur_61_00.median %>%
  group_by(Month) %>%
  summarise(MedianTemp = median(Temperature, na.rm = TRUE)) %>%
  ungroup()

print(kjusjur_61_00.median)
## # A tibble: 12 × 2
##    Month MedianTemp
##    <fct>      <dbl>
##  1 Jan       -37.8 
##  2 Feb       -33.7 
##  3 Mar       -26.6 
##  4 Apr       -15.4 
##  5 May        -3.2 
##  6 Jun         7.8 
##  7 Jul        12.0 
##  8 Aug         9.15
##  9 Sep         1.7 
## 10 Oct       -12.1 
## 11 Nov       -30   
## 12 Dec       -35
#Plot monthly mean 1961-2000
ggplot(kjusjur_61_00.mean) +
  geom_rect(aes(xmin = as.numeric(Month) - 0.4,
                xmax = as.numeric(Month) + 0.4,
                ymin = -40,
                ymax = MeanTemp),
            fill = "skyblue", color = "black") +
  geom_text(aes(x = as.numeric(Month), y = MeanTemp, label = round(MeanTemp, 1)),
            vjust = -0.5, size = 3.5) + 
  scale_x_continuous(breaks = 1:12, labels = levels(kjusjur_61_00.mean$Month)) +
  scale_y_continuous(breaks = seq(-40, 15, 10), limits = c(-40, 15)) +
  geom_hline(yintercept = -40, color = "black") +
  labs(
    title = "Monthly Mean Temperature: Kjusjur Station (1961–2000)",
    x = "Month",
    y = "Temperature (°C)"
  )

#Plot monthly median 1961-2000
ggplot(kjusjur_61_00.median) +
  geom_rect(aes(xmin = as.numeric(Month) - 0.4,
                xmax = as.numeric(Month) + 0.4,
                ymin = -40,
                ymax = MedianTemp),
            fill = "lightpink2", color = "black") +
  geom_text(aes(x = as.numeric(Month), y = MedianTemp, label = round(MedianTemp, 1)),
            vjust = -0.5, size = 3.5) + 
  scale_x_continuous(breaks = 1:12, labels = levels(kjusjur_61_00.median$Month)) +
  scale_y_continuous(breaks = seq(-40, 15, 10), limits = c(-40, 15)) +
  geom_hline(yintercept = -40, color = "black") +
  labs(
    title = "Monthly Median Temperature: Kjusjur Station (1961–2000)",
    x = "Month",
    y = "Temperature (°C)"
  )

Monthly mean with standard deviation air temperature; (1961-2000)

#Calculate standard deviation for temperature mean
SD.kjusjur_61_00 <- kjusjur_61_00


SD.kjusjur_61_00 <- SD.kjusjur_61_00 %>%
  group_by(Month) %>%
  summarise(
    MeanTemp = mean(Temperature, na.rm = TRUE),
    SD = sd(Temperature, na.rm = TRUE)
  ) %>%
  ungroup()

#Plot temperature mean with standard deviation
ggplot(SD.kjusjur_61_00) +
  geom_rect(aes(xmin = as.numeric(Month) - 0.4,
                xmax = as.numeric(Month) + 0.4,
                ymin = -45,
                ymax = MeanTemp),
            fill = "lightblue", color = "black") +
  geom_errorbar(aes(x = as.numeric(Month), 
                    ymin = MeanTemp - SD, 
                    ymax = MeanTemp + SD),
                width = 0.2, color = "black") +
  scale_x_continuous(breaks = 1:12, labels = levels(kjusjur_61_00.median$Month)) +
  scale_y_continuous(breaks = seq(-45, 15, 10), limits = c(-45, 15)) +  # Adjusted limit for error bars/text
  geom_hline(yintercept = -45, color = "black") +
  labs(
    title = "Monthly Median Temperature with Standard Deviation: Kjusjur Station (1961–2000)",
    x = "Month",
    y = "Temperature (°C)"
  )

Side-by-side monthly mean and median (1961-2000)

#Create data frame with mean and median
mean.median_kjusjur_61_00 <- left_join(kjusjur_61_00.mean, kjusjur_61_00.median, by = "Month") %>%
  pivot_longer(cols = c(MeanTemp, MedianTemp),
               names_to = "Statistic",
               values_to = "Temperature") %>%
  mutate(
    Month = factor(Month, levels = unique(Month)),         
    Statistic = recode(Statistic, MeanTemp = "Mean", MedianTemp = "Median")  
  )


#Plot side by side mean and median
mean.median_kjusjur_61_00 <- mean.median_kjusjur_61_00 %>%
  mutate(
    x = as.numeric(Month),
    offset = ifelse(Statistic == "Mean", -0.2, 0.2),
    xmin = x + offset - 0.2,
    xmax = x + offset + 0.2)


ggplot(mean.median_kjusjur_61_00) +
  geom_rect(aes(xmin = xmin, xmax = xmax, ymin = -40, ymax = Temperature, fill = Statistic),
            color = "black") +
  scale_x_continuous(breaks = 1:12, labels = levels(mean.median_kjusjur_61_00$Month)) +
  scale_y_continuous(breaks = seq(-40, 15, 10), limits = c(-40, 15)) +
  scale_fill_manual(values = c("Mean" = "skyblue", "Median" = "orange")) +
  geom_hline(yintercept = -40, color = "black") +
  labs(
    title = "Monthly Mean & Median Temperatures: Kjusjur Station 1961–2000",
    x = "Month",
    y = "Temperature (°C)",
    fill = "Statistic"
  )

Compare 1961-1980 to 1981-2000

1981-2000

#Calculate monthly mean 1981-2000
kjusjur_81_00 <- kjusjur_61_00 %>% filter (Year > 1980)

kjusjur_81_00.mean <- kjusjur_81_00
kjusjur_81_00.median <-kjusjur_81_00

kjusjur_81_00.mean <- kjusjur_81_00.mean %>%
  group_by(Month) %>%
  summarise(MeanTemp = mean(Temperature, na.rm = TRUE)) %>%
  ungroup()
 
print(kjusjur_81_00.mean)
## # A tibble: 12 × 2
##    Month MeanTemp
##    <fct>    <dbl>
##  1 Jan     -37.8 
##  2 Feb     -32.9 
##  3 Mar     -26.2 
##  4 Apr     -15.2 
##  5 May      -2.85
##  6 Jun       7.68
##  7 Jul      12.6 
##  8 Aug       9.35
##  9 Sep       1.91
## 10 Oct     -12.2 
## 11 Nov     -30.0 
## 12 Dec     -34.6
#Calculate monthly median 1981-2000
kjusjur_81_00.median <- kjusjur_81_00.median  %>%
  group_by(Month) %>%
  summarise(MedianTemp = median(Temperature, na.rm = TRUE)) %>%
  ungroup()

print(kjusjur_81_00.median)
## # A tibble: 12 × 2
##    Month MedianTemp
##    <fct>      <dbl>
##  1 Jan       -37.5 
##  2 Feb       -33.3 
##  3 Mar       -26.6 
##  4 Apr       -15.2 
##  5 May        -3   
##  6 Jun         7.9 
##  7 Jul        12.4 
##  8 Aug         9.25
##  9 Sep         1.7 
## 10 Oct       -11.5 
## 11 Nov       -30   
## 12 Dec       -34.5
# Plot mean and median 1981-2000

#Mean
ggplot(kjusjur_81_00.mean) +
  geom_rect(aes(xmin = as.numeric(Month) - 0.4,
                xmax = as.numeric(Month) + 0.4,
                ymin = -40,
                ymax = MeanTemp),
            fill = "aquamarine2", color = "black") +
  geom_text(aes(x = as.numeric(Month), y = MeanTemp, label = round(MeanTemp, 1)),
            vjust = -0.5, size = 3.5) + 
  scale_x_continuous(breaks = 1:12, labels = levels(kjusjur_81_00.mean$Month)) +
  scale_y_continuous(breaks = seq(-40, 20, 10), limits = c(-40, 15)) +
  geom_hline(yintercept = -40, color = "black") +
  labs(
    title = "Monthly Mean Temperature: Kjusjur Station (1981–2000)",
    x = "Month",
    y = "Temperature (°C)"
  )

#Median
ggplot(kjusjur_81_00.median) +
  geom_rect(aes(xmin = as.numeric(Month) - 0.4,
                xmax = as.numeric(Month) + 0.4,
                ymin = -40,
                ymax = MedianTemp),
            fill = "deepskyblue3", color = "black") +
  geom_text(aes(x = as.numeric(Month), y = MedianTemp, label = round(MedianTemp, 1)),
            vjust = -0.5, size = 3.5) + 
  scale_x_continuous(breaks = 1:12, labels = levels(kjusjur_81_00.median$Month)) +
  scale_y_continuous(breaks = seq(-40, 20, 10), limits = c(-40, 15)) +
  geom_hline(yintercept = -40, color = "black") +
  labs(
    title = "Monthly Median Temperature: Kjusjur Station (1981–2000)",
    x = "Month",
    y = "Temperature (°C)"
  )

1961-1980

kjusjur_61_80 <- kjusjur_61_00 %>% filter (Year < 1981)


#Calculate monthly mean 1961-1980
kjusjur_61_80.mean <- kjusjur_61_80

kjusjur_61_80.mean <- kjusjur_61_80.mean %>%
  group_by(Month) %>%
  summarise(MeanTemp = mean(Temperature, na.rm = TRUE)) %>%
  ungroup()

print(kjusjur_61_80.mean)
## # A tibble: 12 × 2
##    Month MeanTemp
##    <fct>    <dbl>
##  1 Jan     -38.4 
##  2 Feb     -35.2 
##  3 Mar     -26.7 
##  4 Apr     -15.6 
##  5 May      -3.97
##  6 Jun       7.66
##  7 Jul      12.4 
##  8 Aug       9.18
##  9 Sep       1.88
## 10 Oct     -12.4 
## 11 Nov     -30.2 
## 12 Dec     -35.6
#Calculate monthly median 1961-1980
kjusjur_61_80.median <- kjusjur_61_80

kjusjur_61_80.median <- kjusjur_61_80.median  %>%
  group_by(Month) %>%
  summarise(MedianTemp = median(Temperature, na.rm = TRUE)) %>%
  ungroup()

print(kjusjur_61_80.median)
## # A tibble: 12 × 2
##    Month MedianTemp
##    <fct>      <dbl>
##  1 Jan       -37.8 
##  2 Feb       -34.6 
##  3 Mar       -26.6 
##  4 Apr       -16.2 
##  5 May        -4   
##  6 Jun         7.8 
##  7 Jul        11.8 
##  8 Aug         9   
##  9 Sep         1.65
## 10 Oct       -12.1 
## 11 Nov       -30.4 
## 12 Dec       -36.1
# Plot mean and median 1961-1980

#Mean
ggplot(kjusjur_61_80.mean) +
  geom_rect(aes(xmin = as.numeric(Month) - 0.4,
                xmax = as.numeric(Month) + 0.4,
                ymin = -40,
                ymax = MeanTemp),
            fill = "plum3", color = "black") +
  geom_text(aes(x = as.numeric(Month), y = MeanTemp, label = round(MeanTemp, 1)),
            vjust = -0.5, size = 3.5) + 
  scale_x_continuous(breaks = 1:12, labels = levels(kjusjur_61_80.mean$Month)) +
  scale_y_continuous(breaks = seq(-50, 20, 10), limits = c(-40, 15)) +
  geom_hline(yintercept = -40, color = "black") +
  labs(
    title = "Monthly Mean Temperature: Kjusjur Station (1961–1980)",
    x = "Month",
    y = "Temperature (°C)"
  )

#Median
ggplot(kjusjur_61_80.median) +
  geom_rect(aes(xmin = as.numeric(Month) - 0.4,
                xmax = as.numeric(Month) + 0.4,
                ymin = -45,
                ymax = MedianTemp),
            fill = "palevioletred2", color = "black") +
  geom_text(aes(x = as.numeric(Month), y = MedianTemp, label = round(MedianTemp, 1)),
            vjust = -0.5, size = 3.5) + 
  scale_x_continuous(breaks = 1:12, labels = levels(kjusjur_61_80.median$Month)) +
  scale_y_continuous(breaks = seq(-45, 17, 10), limits = c(-45, 17)) +
  geom_hline(yintercept = -45, color = "black") +
  labs(
    title = "Monthly Median Temperature: Kjusjur Station (1961–1980)",
    x = "Month",
    y = "Temperature (°C)"
  )

Plot side by side mean of 1961-1980 and 1981-2000

#Create data frame with both time frames


kjusjur_61_80.mean$Period <- "1961–1980"
kjusjur_81_00.mean$Period <- "1981–2000"

combined.kjusjur <- rbind(kjusjur_61_80.mean, kjusjur_81_00.mean)

combined.kjusjur <- combined.kjusjur %>%
  mutate(
    x = as.numeric(Month),
    offset = ifelse(Period == "1961–1980", -0.2, 0.2),
    xmin = x + offset - 0.2,
    xmax = x + offset + 0.2
  )

ggplot(combined.kjusjur) +
  geom_rect(aes(xmin = xmin, xmax = xmax, ymin = -40, ymax = MeanTemp, fill = Period),
            color = "black") +
  scale_x_continuous(breaks = 1:12, labels = levels(combined.kjusjur$Month)) +
  scale_y_continuous(breaks = seq(-40, 13, 10), limits = c(-40, 13)) +
  scale_fill_manual(values = c("1961–1980" = "mediumpurple1", "1981–2000" = "sienna1")) +
  geom_hline(yintercept = -40, color = "black") +
  labs(
    title = "Monthly Average Temperatures: Kjusjur Station by Time Period",
    x = "Month",
    y = "Temperature (°C)",
    fill = "Time Period"
  )

Statistical test; T-test

#Combined month comparison
t.test_result <- t.test(
  kjusjur_61_80.mean$MeanTemp,
  kjusjur_81_00.mean$MeanTemp,
  alternative = "two.sided",  
  var.equal = TRUE            
)
print(t.test_result)  
## 
##  Two Sample t-test
## 
## data:  kjusjur_61_80.mean$MeanTemp and kjusjur_81_00.mean$MeanTemp
## t = -0.072335, df = 22, p-value = 0.943
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -16.55303  15.43724
## sample estimates:
## mean of x mean of y 
## -13.91958 -13.36169

T-test; by month comparison

kjusjur_61_80$Month <- factor(kjusjur_61_80$Month, levels = month.abb, ordered = TRUE)
kjusjur_81_00$Month <- factor(kjusjur_81_00$Month, levels = month.abb, ordered = TRUE)

kjusjur_61_80$Period <- "1961–1980"
kjusjur_81_00$Period <- "1981–2000"

combined.kjusjur2 <- rbind(kjusjur_61_80, kjusjur_81_00)

t.test_result_monthly <- combined.kjusjur2 %>%
  group_by(Month) %>%
  summarise(
    t_statistic = t.test(Temperature ~ Period, var.equal = TRUE)$statistic,
    p_value = t.test(Temperature ~ Period, var.equal = TRUE)$p.value,
    mean_61_80 = mean(Temperature[Period == "1961–1980"]),
    mean_81_00 = mean(Temperature[Period == "1981–2000"]),
    .groups = 'drop'
  )

print(t.test_result_monthly)
## # A tibble: 12 × 5
##    Month t_statistic p_value mean_61_80 mean_81_00
##    <ord>       <dbl>   <dbl>      <dbl>      <dbl>
##  1 Jan       -0.469   0.642      -38.4      -37.8 
##  2 Feb       -1.92    0.0627     -35.2      -32.9 
##  3 Mar       -0.515   0.610      -26.7      -26.2 
##  4 Apr       -0.550   0.586      -15.6      -15.2 
##  5 May       -1.71    0.0949      -3.97      -2.85
##  6 Jun       -0.0295  0.977        7.66       7.68
##  7 Jul       -0.448   0.657       12.4       12.6 
##  8 Aug       -0.280   0.781        9.18       9.35
##  9 Sep       -0.0768  0.939        1.88       1.91
## 10 Oct       -0.259   0.797      -12.4      -12.2 
## 11 Nov       -0.126   0.900      -30.2      -30.0 
## 12 Dec       -0.974   0.336      -35.6      -34.6

Linear regression

linear_regression <- lm(Temperature ~ Year, data = kjusjur_61_00)
summary(linear_regression)
## 
## Call:
## lm(formula = Temperature ~ Year, data = kjusjur_61_00)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -33.744 -17.746  -0.302  18.005  31.860 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)
## (Intercept) -89.19716  144.91211  -0.616    0.539
## Year          0.03819    0.07317   0.522    0.602
## 
## Residual standard error: 18.34 on 475 degrees of freedom
## Multiple R-squared:  0.0005732,  Adjusted R-squared:  -0.001531 
## F-statistic: 0.2724 on 1 and 475 DF,  p-value: 0.602
#Plot linear regression 
ggplot(kjusjur_61_00, aes(x = Year, y = Temperature)) +
  geom_point(alpha = 0.4) +  # raw data points
  geom_smooth(method = "lm", color = "dodgerblue", se = TRUE) +  # regression line with confidence interval
  labs(
    title = "Kjusjur Station Temperature (1961-2000)",
    x = "Year",
    y = "Temperature (°C)") +
  theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

#Plot by month linear regression

month_colors <- c(
  "Jan" = "darkorchid4",  
  "Feb" = "slateblue2",  
  "Mar" = "turquoise3",  
  "Apr" = "mediumspringgreen",  
  "May" = "gold1",  
  "Jun" = "darkorange",  
  "Jul" = "firebrick4",  
  "Aug" = "firebrick1",  
  "Sep" = "yellow2",  
  "Oct" = "olivedrab2", 
  "Nov" = "cornflowerblue",  
  "Dec" = "darkslateblue"  
)

ggplot(kjusjur_61_00, aes(x = Year, y = Temperature, color = Month)) +
  geom_point(alpha = 0.4) +
  geom_smooth(method = "lm", se = FALSE) +
  scale_color_manual(values = month_colors) +
  labs(
    title = "Monthly Kjusjur Station Temperature (1961–2000)",
    x = "Year",
    y = "Temperature (°C)") +
  theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'