library(readr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(tidyr)
kjusjur_61_00<- read_csv("/Users/kaitlynmaritan/Desktop/R-spatial/Final_Project/kjusjurstation1.csv")
## Rows: 480 Columns: 22
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Station Name
## dbl (21): Station, Year, Month, Day, Time, PIF, Latitude, Longitude, Tempera...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#Remove unnecessary columns
kjusjur_61_00<- kjusjur_61_00 %>% select(-"Station", -"Day",-"Time", -"PIF",
-"Sea Level Pressure", -"Wind Direction",
-"Wind Speed", -"Total Cloud Amount", -"Low Cloud Amount",
-"Relative Humidity", -"Dew Point", -"Wet Bulb",-"Sea Surface",
-"Vapor Pressure",-"Soil or Ice",-"Station Name")
#Remove missing temperature values
kjusjur_61_00[kjusjur_61_00 == 999.99]<- NA
kjusjur_61_00 <- kjusjur_61_00 %>% filter(!is.na(Temperature))
# Convert month numbers to month names
kjusjur_61_00$Month <- month.abb[as.numeric(kjusjur_61_00$Month)]
kjusjur_61_00$Month <- factor(kjusjur_61_00$Month, levels = month.abb)
Monthly mean and monthly median air temperature; (1961-2000)
kjusjur_61_00.mean <- kjusjur_61_00
kjusjur_61_00.median <-kjusjur_61_00
#Calculate monthly mean 1961-2000
kjusjur_61_00.mean <- kjusjur_61_00.mean %>%
group_by(Month) %>%
summarise(MeanTemp = mean(Temperature, na.rm = TRUE)) %>%
ungroup()
print(kjusjur_61_00.mean)
## # A tibble: 12 × 2
## Month MeanTemp
## <fct> <dbl>
## 1 Jan -38.1
## 2 Feb -34.1
## 3 Mar -26.4
## 4 Apr -15.4
## 5 May -3.41
## 6 Jun 7.67
## 7 Jul 12.5
## 8 Aug 9.26
## 9 Sep 1.89
## 10 Oct -12.3
## 11 Nov -30.1
## 12 Dec -35.1
#Calculate monthly median 1961-2000
kjusjur_61_00.median <- kjusjur_61_00.median %>%
group_by(Month) %>%
summarise(MedianTemp = median(Temperature, na.rm = TRUE)) %>%
ungroup()
print(kjusjur_61_00.median)
## # A tibble: 12 × 2
## Month MedianTemp
## <fct> <dbl>
## 1 Jan -37.8
## 2 Feb -33.7
## 3 Mar -26.6
## 4 Apr -15.4
## 5 May -3.2
## 6 Jun 7.8
## 7 Jul 12.0
## 8 Aug 9.15
## 9 Sep 1.7
## 10 Oct -12.1
## 11 Nov -30
## 12 Dec -35
#Plot monthly mean 1961-2000
ggplot(kjusjur_61_00.mean) +
geom_rect(aes(xmin = as.numeric(Month) - 0.4,
xmax = as.numeric(Month) + 0.4,
ymin = -40,
ymax = MeanTemp),
fill = "skyblue", color = "black") +
geom_text(aes(x = as.numeric(Month), y = MeanTemp, label = round(MeanTemp, 1)),
vjust = -0.5, size = 3.5) +
scale_x_continuous(breaks = 1:12, labels = levels(kjusjur_61_00.mean$Month)) +
scale_y_continuous(breaks = seq(-40, 15, 10), limits = c(-40, 15)) +
geom_hline(yintercept = -40, color = "black") +
labs(
title = "Monthly Mean Temperature: Kjusjur Station (1961–2000)",
x = "Month",
y = "Temperature (°C)"
)
#Plot monthly median 1961-2000
ggplot(kjusjur_61_00.median) +
geom_rect(aes(xmin = as.numeric(Month) - 0.4,
xmax = as.numeric(Month) + 0.4,
ymin = -40,
ymax = MedianTemp),
fill = "lightpink2", color = "black") +
geom_text(aes(x = as.numeric(Month), y = MedianTemp, label = round(MedianTemp, 1)),
vjust = -0.5, size = 3.5) +
scale_x_continuous(breaks = 1:12, labels = levels(kjusjur_61_00.median$Month)) +
scale_y_continuous(breaks = seq(-40, 15, 10), limits = c(-40, 15)) +
geom_hline(yintercept = -40, color = "black") +
labs(
title = "Monthly Median Temperature: Kjusjur Station (1961–2000)",
x = "Month",
y = "Temperature (°C)"
)
Monthly mean with standard deviation air temperature; (1961-2000)
#Calculate standard deviation for temperature mean
SD.kjusjur_61_00 <- kjusjur_61_00
SD.kjusjur_61_00 <- SD.kjusjur_61_00 %>%
group_by(Month) %>%
summarise(
MeanTemp = mean(Temperature, na.rm = TRUE),
SD = sd(Temperature, na.rm = TRUE)
) %>%
ungroup()
#Plot temperature mean with standard deviation
ggplot(SD.kjusjur_61_00) +
geom_rect(aes(xmin = as.numeric(Month) - 0.4,
xmax = as.numeric(Month) + 0.4,
ymin = -45,
ymax = MeanTemp),
fill = "lightblue", color = "black") +
geom_errorbar(aes(x = as.numeric(Month),
ymin = MeanTemp - SD,
ymax = MeanTemp + SD),
width = 0.2, color = "black") +
scale_x_continuous(breaks = 1:12, labels = levels(kjusjur_61_00.median$Month)) +
scale_y_continuous(breaks = seq(-45, 15, 10), limits = c(-45, 15)) + # Adjusted limit for error bars/text
geom_hline(yintercept = -45, color = "black") +
labs(
title = "Monthly Median Temperature with Standard Deviation: Kjusjur Station (1961–2000)",
x = "Month",
y = "Temperature (°C)"
)
Side-by-side monthly mean and median (1961-2000)
#Create data frame with mean and median
mean.median_kjusjur_61_00 <- left_join(kjusjur_61_00.mean, kjusjur_61_00.median, by = "Month") %>%
pivot_longer(cols = c(MeanTemp, MedianTemp),
names_to = "Statistic",
values_to = "Temperature") %>%
mutate(
Month = factor(Month, levels = unique(Month)),
Statistic = recode(Statistic, MeanTemp = "Mean", MedianTemp = "Median")
)
#Plot side by side mean and median
mean.median_kjusjur_61_00 <- mean.median_kjusjur_61_00 %>%
mutate(
x = as.numeric(Month),
offset = ifelse(Statistic == "Mean", -0.2, 0.2),
xmin = x + offset - 0.2,
xmax = x + offset + 0.2)
ggplot(mean.median_kjusjur_61_00) +
geom_rect(aes(xmin = xmin, xmax = xmax, ymin = -40, ymax = Temperature, fill = Statistic),
color = "black") +
scale_x_continuous(breaks = 1:12, labels = levels(mean.median_kjusjur_61_00$Month)) +
scale_y_continuous(breaks = seq(-40, 15, 10), limits = c(-40, 15)) +
scale_fill_manual(values = c("Mean" = "skyblue", "Median" = "orange")) +
geom_hline(yintercept = -40, color = "black") +
labs(
title = "Monthly Mean & Median Temperatures: Kjusjur Station 1961–2000",
x = "Month",
y = "Temperature (°C)",
fill = "Statistic"
)
Compare 1961-1980 to 1981-2000
1981-2000
#Calculate monthly mean 1981-2000
kjusjur_81_00 <- kjusjur_61_00 %>% filter (Year > 1980)
kjusjur_81_00.mean <- kjusjur_81_00
kjusjur_81_00.median <-kjusjur_81_00
kjusjur_81_00.mean <- kjusjur_81_00.mean %>%
group_by(Month) %>%
summarise(MeanTemp = mean(Temperature, na.rm = TRUE)) %>%
ungroup()
print(kjusjur_81_00.mean)
## # A tibble: 12 × 2
## Month MeanTemp
## <fct> <dbl>
## 1 Jan -37.8
## 2 Feb -32.9
## 3 Mar -26.2
## 4 Apr -15.2
## 5 May -2.85
## 6 Jun 7.68
## 7 Jul 12.6
## 8 Aug 9.35
## 9 Sep 1.91
## 10 Oct -12.2
## 11 Nov -30.0
## 12 Dec -34.6
#Calculate monthly median 1981-2000
kjusjur_81_00.median <- kjusjur_81_00.median %>%
group_by(Month) %>%
summarise(MedianTemp = median(Temperature, na.rm = TRUE)) %>%
ungroup()
print(kjusjur_81_00.median)
## # A tibble: 12 × 2
## Month MedianTemp
## <fct> <dbl>
## 1 Jan -37.5
## 2 Feb -33.3
## 3 Mar -26.6
## 4 Apr -15.2
## 5 May -3
## 6 Jun 7.9
## 7 Jul 12.4
## 8 Aug 9.25
## 9 Sep 1.7
## 10 Oct -11.5
## 11 Nov -30
## 12 Dec -34.5
# Plot mean and median 1981-2000
#Mean
ggplot(kjusjur_81_00.mean) +
geom_rect(aes(xmin = as.numeric(Month) - 0.4,
xmax = as.numeric(Month) + 0.4,
ymin = -40,
ymax = MeanTemp),
fill = "aquamarine2", color = "black") +
geom_text(aes(x = as.numeric(Month), y = MeanTemp, label = round(MeanTemp, 1)),
vjust = -0.5, size = 3.5) +
scale_x_continuous(breaks = 1:12, labels = levels(kjusjur_81_00.mean$Month)) +
scale_y_continuous(breaks = seq(-40, 20, 10), limits = c(-40, 15)) +
geom_hline(yintercept = -40, color = "black") +
labs(
title = "Monthly Mean Temperature: Kjusjur Station (1981–2000)",
x = "Month",
y = "Temperature (°C)"
)
#Median
ggplot(kjusjur_81_00.median) +
geom_rect(aes(xmin = as.numeric(Month) - 0.4,
xmax = as.numeric(Month) + 0.4,
ymin = -40,
ymax = MedianTemp),
fill = "deepskyblue3", color = "black") +
geom_text(aes(x = as.numeric(Month), y = MedianTemp, label = round(MedianTemp, 1)),
vjust = -0.5, size = 3.5) +
scale_x_continuous(breaks = 1:12, labels = levels(kjusjur_81_00.median$Month)) +
scale_y_continuous(breaks = seq(-40, 20, 10), limits = c(-40, 15)) +
geom_hline(yintercept = -40, color = "black") +
labs(
title = "Monthly Median Temperature: Kjusjur Station (1981–2000)",
x = "Month",
y = "Temperature (°C)"
)
1961-1980
kjusjur_61_80 <- kjusjur_61_00 %>% filter (Year < 1981)
#Calculate monthly mean 1961-1980
kjusjur_61_80.mean <- kjusjur_61_80
kjusjur_61_80.mean <- kjusjur_61_80.mean %>%
group_by(Month) %>%
summarise(MeanTemp = mean(Temperature, na.rm = TRUE)) %>%
ungroup()
print(kjusjur_61_80.mean)
## # A tibble: 12 × 2
## Month MeanTemp
## <fct> <dbl>
## 1 Jan -38.4
## 2 Feb -35.2
## 3 Mar -26.7
## 4 Apr -15.6
## 5 May -3.97
## 6 Jun 7.66
## 7 Jul 12.4
## 8 Aug 9.18
## 9 Sep 1.88
## 10 Oct -12.4
## 11 Nov -30.2
## 12 Dec -35.6
#Calculate monthly median 1961-1980
kjusjur_61_80.median <- kjusjur_61_80
kjusjur_61_80.median <- kjusjur_61_80.median %>%
group_by(Month) %>%
summarise(MedianTemp = median(Temperature, na.rm = TRUE)) %>%
ungroup()
print(kjusjur_61_80.median)
## # A tibble: 12 × 2
## Month MedianTemp
## <fct> <dbl>
## 1 Jan -37.8
## 2 Feb -34.6
## 3 Mar -26.6
## 4 Apr -16.2
## 5 May -4
## 6 Jun 7.8
## 7 Jul 11.8
## 8 Aug 9
## 9 Sep 1.65
## 10 Oct -12.1
## 11 Nov -30.4
## 12 Dec -36.1
# Plot mean and median 1961-1980
#Mean
ggplot(kjusjur_61_80.mean) +
geom_rect(aes(xmin = as.numeric(Month) - 0.4,
xmax = as.numeric(Month) + 0.4,
ymin = -40,
ymax = MeanTemp),
fill = "plum3", color = "black") +
geom_text(aes(x = as.numeric(Month), y = MeanTemp, label = round(MeanTemp, 1)),
vjust = -0.5, size = 3.5) +
scale_x_continuous(breaks = 1:12, labels = levels(kjusjur_61_80.mean$Month)) +
scale_y_continuous(breaks = seq(-50, 20, 10), limits = c(-40, 15)) +
geom_hline(yintercept = -40, color = "black") +
labs(
title = "Monthly Mean Temperature: Kjusjur Station (1961–1980)",
x = "Month",
y = "Temperature (°C)"
)
#Median
ggplot(kjusjur_61_80.median) +
geom_rect(aes(xmin = as.numeric(Month) - 0.4,
xmax = as.numeric(Month) + 0.4,
ymin = -45,
ymax = MedianTemp),
fill = "palevioletred2", color = "black") +
geom_text(aes(x = as.numeric(Month), y = MedianTemp, label = round(MedianTemp, 1)),
vjust = -0.5, size = 3.5) +
scale_x_continuous(breaks = 1:12, labels = levels(kjusjur_61_80.median$Month)) +
scale_y_continuous(breaks = seq(-45, 17, 10), limits = c(-45, 17)) +
geom_hline(yintercept = -45, color = "black") +
labs(
title = "Monthly Median Temperature: Kjusjur Station (1961–1980)",
x = "Month",
y = "Temperature (°C)"
)
Plot side by side mean of 1961-1980 and 1981-2000
#Create data frame with both time frames
kjusjur_61_80.mean$Period <- "1961–1980"
kjusjur_81_00.mean$Period <- "1981–2000"
combined.kjusjur <- rbind(kjusjur_61_80.mean, kjusjur_81_00.mean)
combined.kjusjur <- combined.kjusjur %>%
mutate(
x = as.numeric(Month),
offset = ifelse(Period == "1961–1980", -0.2, 0.2),
xmin = x + offset - 0.2,
xmax = x + offset + 0.2
)
ggplot(combined.kjusjur) +
geom_rect(aes(xmin = xmin, xmax = xmax, ymin = -40, ymax = MeanTemp, fill = Period),
color = "black") +
scale_x_continuous(breaks = 1:12, labels = levels(combined.kjusjur$Month)) +
scale_y_continuous(breaks = seq(-40, 13, 10), limits = c(-40, 13)) +
scale_fill_manual(values = c("1961–1980" = "mediumpurple1", "1981–2000" = "sienna1")) +
geom_hline(yintercept = -40, color = "black") +
labs(
title = "Monthly Average Temperatures: Kjusjur Station by Time Period",
x = "Month",
y = "Temperature (°C)",
fill = "Time Period"
)
Statistical test; T-test
#Combined month comparison
t.test_result <- t.test(
kjusjur_61_80.mean$MeanTemp,
kjusjur_81_00.mean$MeanTemp,
alternative = "two.sided",
var.equal = TRUE
)
print(t.test_result)
##
## Two Sample t-test
##
## data: kjusjur_61_80.mean$MeanTemp and kjusjur_81_00.mean$MeanTemp
## t = -0.072335, df = 22, p-value = 0.943
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -16.55303 15.43724
## sample estimates:
## mean of x mean of y
## -13.91958 -13.36169
T-test; by month comparison
kjusjur_61_80$Month <- factor(kjusjur_61_80$Month, levels = month.abb, ordered = TRUE)
kjusjur_81_00$Month <- factor(kjusjur_81_00$Month, levels = month.abb, ordered = TRUE)
kjusjur_61_80$Period <- "1961–1980"
kjusjur_81_00$Period <- "1981–2000"
combined.kjusjur2 <- rbind(kjusjur_61_80, kjusjur_81_00)
t.test_result_monthly <- combined.kjusjur2 %>%
group_by(Month) %>%
summarise(
t_statistic = t.test(Temperature ~ Period, var.equal = TRUE)$statistic,
p_value = t.test(Temperature ~ Period, var.equal = TRUE)$p.value,
mean_61_80 = mean(Temperature[Period == "1961–1980"]),
mean_81_00 = mean(Temperature[Period == "1981–2000"]),
.groups = 'drop'
)
print(t.test_result_monthly)
## # A tibble: 12 × 5
## Month t_statistic p_value mean_61_80 mean_81_00
## <ord> <dbl> <dbl> <dbl> <dbl>
## 1 Jan -0.469 0.642 -38.4 -37.8
## 2 Feb -1.92 0.0627 -35.2 -32.9
## 3 Mar -0.515 0.610 -26.7 -26.2
## 4 Apr -0.550 0.586 -15.6 -15.2
## 5 May -1.71 0.0949 -3.97 -2.85
## 6 Jun -0.0295 0.977 7.66 7.68
## 7 Jul -0.448 0.657 12.4 12.6
## 8 Aug -0.280 0.781 9.18 9.35
## 9 Sep -0.0768 0.939 1.88 1.91
## 10 Oct -0.259 0.797 -12.4 -12.2
## 11 Nov -0.126 0.900 -30.2 -30.0
## 12 Dec -0.974 0.336 -35.6 -34.6
Linear regression
linear_regression <- lm(Temperature ~ Year, data = kjusjur_61_00)
summary(linear_regression)
##
## Call:
## lm(formula = Temperature ~ Year, data = kjusjur_61_00)
##
## Residuals:
## Min 1Q Median 3Q Max
## -33.744 -17.746 -0.302 18.005 31.860
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -89.19716 144.91211 -0.616 0.539
## Year 0.03819 0.07317 0.522 0.602
##
## Residual standard error: 18.34 on 475 degrees of freedom
## Multiple R-squared: 0.0005732, Adjusted R-squared: -0.001531
## F-statistic: 0.2724 on 1 and 475 DF, p-value: 0.602
#Plot linear regression
ggplot(kjusjur_61_00, aes(x = Year, y = Temperature)) +
geom_point(alpha = 0.4) + # raw data points
geom_smooth(method = "lm", color = "dodgerblue", se = TRUE) + # regression line with confidence interval
labs(
title = "Kjusjur Station Temperature (1961-2000)",
x = "Year",
y = "Temperature (°C)") +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
#Plot by month linear regression
month_colors <- c(
"Jan" = "darkorchid4",
"Feb" = "slateblue2",
"Mar" = "turquoise3",
"Apr" = "mediumspringgreen",
"May" = "gold1",
"Jun" = "darkorange",
"Jul" = "firebrick4",
"Aug" = "firebrick1",
"Sep" = "yellow2",
"Oct" = "olivedrab2",
"Nov" = "cornflowerblue",
"Dec" = "darkslateblue"
)
ggplot(kjusjur_61_00, aes(x = Year, y = Temperature, color = Month)) +
geom_point(alpha = 0.4) +
geom_smooth(method = "lm", se = FALSE) +
scale_color_manual(values = month_colors) +
labs(
title = "Monthly Kjusjur Station Temperature (1961–2000)",
x = "Year",
y = "Temperature (°C)") +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'