# library
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.4.3
## Warning: package 'ggplot2' was built under R version 4.4.3
## Warning: package 'purrr' was built under R version 4.4.3
## Warning: package 'lubridate' was built under R version 4.4.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# lubridate 1.9.3 now included in tidyverse 2.0.0
library(PNWColors)
# color palettes
## save color palette, depending on number of observations (n)
### if using a continuous scale, can have n much higher than # observations
pal1 <- pnw_palette("Sunset2", n = 399)
pal2 <- pnw_palette("Starfish", n = 4)
# view color palette:
#pal1
# see name options for palette generation:
names(pnw_palettes)
## [1] "Starfish" "Shuksan" "Bay" "Winter" "Lake" "Sunset"
## [7] "Shuksan2" "Cascades" "Sailboat" "Moth" "Spring" "Mushroom"
## [13] "Sunset2" "Anemone"
These data consist of monthly weather data from the Bellingham airport weather station from January 2000 to present.
# load weather data (RDS file)
## using an easier variable name since I am only working with one dataset
weather <- readRDS("kbliMonthlyTempPrcp.rds")
head(weather)
## # A tibble: 6 × 3
## DATE TEMP PRCP
## <date> <dbl> <dbl>
## 1 2000-01-01 3.19 90.2
## 2 2000-02-01 4.87 44.4
## 3 2000-03-01 6.16 79.5
## 4 2000-04-01 9.43 66.5
## 5 2000-05-01 11.0 116.
## 6 2000-06-01 14.4 62.2
str(weather)
## tibble [299 × 3] (S3: tbl_df/tbl/data.frame)
## $ DATE: Date[1:299], format: "2000-01-01" "2000-02-01" ...
## $ TEMP: num [1:299] 3.19 4.87 6.16 9.43 10.96 ...
## $ PRCP: num [1:299] 90.2 44.4 79.5 66.5 115.6 ...
class(weather$DATE)
## [1] "Date"
weather <- weather %>%
mutate(YEAR = year(DATE),
MONTH = month(DATE))
head(weather)
## # A tibble: 6 × 5
## DATE TEMP PRCP YEAR MONTH
## <date> <dbl> <dbl> <dbl> <dbl>
## 1 2000-01-01 3.19 90.2 2000 1
## 2 2000-02-01 4.87 44.4 2000 2
## 3 2000-03-01 6.16 79.5 2000 3
## 4 2000-04-01 9.43 66.5 2000 4
## 5 2000-05-01 11.0 116. 2000 5
## 6 2000-06-01 14.4 62.2 2000 6
weather <- weather %>%
mutate(MONTHNAME = month(DATE, label = TRUE))
head(weather)
## # A tibble: 6 × 6
## DATE TEMP PRCP YEAR MONTH MONTHNAME
## <date> <dbl> <dbl> <dbl> <dbl> <ord>
## 1 2000-01-01 3.19 90.2 2000 1 Jan
## 2 2000-02-01 4.87 44.4 2000 2 Feb
## 3 2000-03-01 6.16 79.5 2000 3 Mar
## 4 2000-04-01 9.43 66.5 2000 4 Apr
## 5 2000-05-01 11.0 116. 2000 5 May
## 6 2000-06-01 14.4 62.2 2000 6 Jun
weather_long <- weather %>%
select(DATE, TEMP, PRCP) %>%
pivot_longer(cols = c(TEMP, PRCP))
weather_AnnualTemp <- weather %>%
filter(YEAR < 2024) %>%
group_by(YEAR) %>%
summarize(TEMP = mean(TEMP))
head(weather_AnnualTemp)
## # A tibble: 6 × 2
## YEAR TEMP
## <dbl> <dbl>
## 1 2000 9.33
## 2 2001 9.45
## 3 2002 9.46
## 4 2003 10.4
## 5 2004 10.6
## 6 2005 10.0
AvgSummerTemp <- weather %>%
# filter for summer months: June (6), July (7), and August (8)
filter(MONTH %in% 6:8) %>%
# group by year to take summer averages
group_by(YEAR) %>%
# summarize each year using mean()
summarize(TEMP = mean(TEMP)) %>%
# choose only columns of interest for this plot
select(YEAR, TEMP)
plot_AvgSummerTemp <- ggplot(data = AvgSummerTemp) +
## data was reordered so that the color attributes would align
# add connecting line, colored by same palette
geom_line(aes(x = YEAR, y = TEMP, color = TEMP)) +
# add points for each year, colored by temp
geom_point(aes(x = YEAR, y = TEMP, color = TEMP)) +
# change axis labels
## use \u00b0 in character string to get degree symbol
# change color palette
scale_color_gradientn(colors = pal1) +
labs(x = "Year", y = "Temperature (\u00b0C)",
title = "Average Summer Temperature (\u00b0C)",
subtitle = "2000 - 2024 | Bellingham International Airport Weather Station") +
# change legend title
## use \n to add linebreak to character string
guides(color = guide_legend(title = "Temp \n (\u00b0C)")) +
# center plot title and subtitle
theme(plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
plot_AvgSummerTemp
Fig 1: Average summer temperature recorded at Bellingham International Airport’s weather station from 2000 to 2024. Temperatures were averaged across June, July, and August for each year.
CumPrcp <- weather %>%
# select only Year and Precipitation columns
select(YEAR, PRCP) %>%
# group by year
group_by(YEAR) %>%
# warnings said to use reframe instead of summarize - not entirely sure why, since CumPrcp has the same number of rows as weather
# cumsum() creates a vector that is the cumulative sum of the values
reframe(sumPRCP = cumsum(PRCP)) %>%
# add month back into the dataframe
add_column(MONTH = weather$MONTH,
DATE = weather$DATE) %>%
# add quarter column for seasons
mutate(SEASON = as.factor(quarter(DATE)))
# create reference dataframe to add Season Names to DF
seasons <- tibble(data.frame(NAME = c("Winter", "Spring", "Summer", "Fall"),
SEASON = as.factor(1:4)))
# join DF with seasons reference dataframe
CumPrcp <- CumPrcp %>%
inner_join(seasons, by = join_by("SEASON"))
# create separate dataframe that includes only the final month of each year
CumPrcpSeasons <- CumPrcp %>%
group_by(YEAR, SEASON) %>%
filter(sumPRCP == max(sumPRCP))
plot_CumPrcp <- ggplot() +
# create lines for cumulative precipitation, grouped by year
## data was reordered so that the color attributes would align
geom_path(data = CumPrcp[order(CumPrcp$YEAR, rev(CumPrcp$DATE)), ],
aes(x = DATE, y = sumPRCP, group = YEAR, color = NAME),
linewidth = 1) +
# create connecting lines to show precip at the end of each season by year
geom_path(data = CumPrcpSeasons,
aes(x = DATE, y = sumPRCP, group = SEASON, color = NAME),
linewidth = 1, alpha = 0.5) +
# modify x-axis breaks, labels, and margin
scale_x_date(date_breaks = "2 year", date_labels = "%Y",
name = "Year", expand = c(0,400)) +
# use custom palette, reorder season names in legend
scale_color_manual(values = (pal2),
breaks = c("Fall", "Summer", "Spring", "Winter")) +
# relabel y-axis and titles
labs(y = "Precipitation (mm)",
title = "Cumulative Annual Precipitation",
subtitle = "2000 - 2024 | Bellingham International Airport Weather Station") +
# center titles
theme(plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
plot_CumPrcp
Fig. 2: Cumulative Annual Precipitation at Bellingham International Airport’s weather station, 2000 to 2024. Colors indicate season. Horizontal lines show the cumulative precipitation at the end of each season across all years.
The biggest trouble I had in this assignment was getting the colors for geom_path to align correctly with the end of the season, instead of the second-to-last datapoint for each season. I found a reorder method that I used in the data call for the second plot, but despite knowing what that reorder did, I still don’t quite fully grasp why it worked.