# library
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.4.3
## Warning: package 'ggplot2' was built under R version 4.4.3
## Warning: package 'purrr' was built under R version 4.4.3
## Warning: package 'lubridate' was built under R version 4.4.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.4     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# lubridate 1.9.3 now included in tidyverse 2.0.0
library(PNWColors)
# color palettes
## save color palette, depending on number of observations (n)
### if using a continuous scale, can have n much higher than # observations
pal1 <- pnw_palette("Sunset2", n = 399)
pal2 <- pnw_palette("Starfish", n = 4)
# view color palette:
#pal1
# see name options for palette generation:
names(pnw_palettes)
##  [1] "Starfish" "Shuksan"  "Bay"      "Winter"   "Lake"     "Sunset"  
##  [7] "Shuksan2" "Cascades" "Sailboat" "Moth"     "Spring"   "Mushroom"
## [13] "Sunset2"  "Anemone"

Weather Data

These data consist of monthly weather data from the Bellingham airport weather station from January 2000 to present.

# load weather data (RDS file)
## using an easier variable name since I am only working with one dataset
weather <- readRDS("kbliMonthlyTempPrcp.rds")
head(weather)
## # A tibble: 6 × 3
##   DATE        TEMP  PRCP
##   <date>     <dbl> <dbl>
## 1 2000-01-01  3.19  90.2
## 2 2000-02-01  4.87  44.4
## 3 2000-03-01  6.16  79.5
## 4 2000-04-01  9.43  66.5
## 5 2000-05-01 11.0  116. 
## 6 2000-06-01 14.4   62.2
str(weather)
## tibble [299 × 3] (S3: tbl_df/tbl/data.frame)
##  $ DATE: Date[1:299], format: "2000-01-01" "2000-02-01" ...
##  $ TEMP: num [1:299] 3.19 4.87 6.16 9.43 10.96 ...
##  $ PRCP: num [1:299] 90.2 44.4 79.5 66.5 115.6 ...
class(weather$DATE)
## [1] "Date"

Pull Year and Month

weather <- weather %>%
  mutate(YEAR = year(DATE),
         MONTH = month(DATE))
head(weather)
## # A tibble: 6 × 5
##   DATE        TEMP  PRCP  YEAR MONTH
##   <date>     <dbl> <dbl> <dbl> <dbl>
## 1 2000-01-01  3.19  90.2  2000     1
## 2 2000-02-01  4.87  44.4  2000     2
## 3 2000-03-01  6.16  79.5  2000     3
## 4 2000-04-01  9.43  66.5  2000     4
## 5 2000-05-01 11.0  116.   2000     5
## 6 2000-06-01 14.4   62.2  2000     6

Pull Month Name

weather <- weather %>%
  mutate(MONTHNAME = month(DATE, label = TRUE))
head(weather)
## # A tibble: 6 × 6
##   DATE        TEMP  PRCP  YEAR MONTH MONTHNAME
##   <date>     <dbl> <dbl> <dbl> <dbl> <ord>    
## 1 2000-01-01  3.19  90.2  2000     1 Jan      
## 2 2000-02-01  4.87  44.4  2000     2 Feb      
## 3 2000-03-01  6.16  79.5  2000     3 Mar      
## 4 2000-04-01  9.43  66.5  2000     4 Apr      
## 5 2000-05-01 11.0  116.   2000     5 May      
## 6 2000-06-01 14.4   62.2  2000     6 Jun

Pivoting

weather_long <- weather %>%
  select(DATE, TEMP, PRCP) %>%
  pivot_longer(cols = c(TEMP, PRCP))

Aggregating Data

weather_AnnualTemp <- weather %>%
  filter(YEAR < 2024) %>%
  group_by(YEAR) %>%
  summarize(TEMP = mean(TEMP))
head(weather_AnnualTemp)
## # A tibble: 6 × 2
##    YEAR  TEMP
##   <dbl> <dbl>
## 1  2000  9.33
## 2  2001  9.45
## 3  2002  9.46
## 4  2003 10.4 
## 5  2004 10.6 
## 6  2005 10.0

Plot 1: Average Annual Summer Temperatures

Data Wrangling

AvgSummerTemp <- weather %>%
  # filter for summer months: June (6), July (7), and August (8)
  filter(MONTH %in% 6:8) %>%
  # group by year to take summer averages
  group_by(YEAR) %>%
  # summarize each year using mean() 
  summarize(TEMP = mean(TEMP)) %>%
  # choose only columns of interest for this plot 
  select(YEAR, TEMP)

Plot

plot_AvgSummerTemp <- ggplot(data = AvgSummerTemp) +
    ## data was reordered so that the color attributes would align
  # add connecting line, colored by same palette
  geom_line(aes(x = YEAR, y = TEMP, color = TEMP)) +
  # add points for each year, colored by temp
  geom_point(aes(x = YEAR, y = TEMP, color = TEMP)) +
  # change axis labels
  ## use \u00b0 in character string to get degree symbol
  # change color palette
  scale_color_gradientn(colors = pal1) +
  labs(x = "Year", y = "Temperature (\u00b0C)",
       title = "Average Summer Temperature (\u00b0C)",
       subtitle = "2000 - 2024 | Bellingham International Airport Weather Station") +
  # change legend title
  ## use \n to add linebreak to character string
  guides(color = guide_legend(title =  "Temp \n (\u00b0C)")) +
  # center plot title and subtitle
  theme(plot.title = element_text(hjust = 0.5),
        plot.subtitle = element_text(hjust = 0.5))
plot_AvgSummerTemp

Fig 1: Average summer temperature recorded at Bellingham International Airport’s weather station from 2000 to 2024. Temperatures were averaged across June, July, and August for each year.

Plot 2: Cumulative Precipitation by Year

Data Wrangling

CumPrcp <- weather %>%
  # select only Year and Precipitation columns
  select(YEAR, PRCP) %>%
  # group by year 
  group_by(YEAR) %>%
  # warnings said to use reframe instead of summarize - not entirely sure why, since CumPrcp has the same number of rows as weather
  # cumsum() creates a vector that is the cumulative sum of the values
  reframe(sumPRCP = cumsum(PRCP)) %>%
  # add month back into the dataframe
  add_column(MONTH = weather$MONTH,
             DATE = weather$DATE) %>%
  # add quarter column for seasons
  mutate(SEASON = as.factor(quarter(DATE)))

# create reference dataframe to add Season Names to DF
seasons <- tibble(data.frame(NAME = c("Winter", "Spring", "Summer", "Fall"),
                             SEASON = as.factor(1:4)))

# join DF with seasons reference dataframe
CumPrcp <- CumPrcp %>%
  inner_join(seasons, by = join_by("SEASON"))
  

# create separate dataframe that includes only the final month of each year
CumPrcpSeasons <- CumPrcp %>%
  group_by(YEAR, SEASON) %>%
  filter(sumPRCP == max(sumPRCP))

Plot

plot_CumPrcp <- ggplot() +
  # create lines for cumulative precipitation, grouped by year
  ## data was reordered so that the color attributes would align
  geom_path(data = CumPrcp[order(CumPrcp$YEAR, rev(CumPrcp$DATE)), ],
            aes(x = DATE, y = sumPRCP, group = YEAR, color = NAME),
            linewidth = 1) +
  # create connecting lines to show precip at the end of each season by year
  geom_path(data = CumPrcpSeasons,
            aes(x = DATE, y = sumPRCP, group = SEASON, color = NAME), 
            linewidth = 1, alpha = 0.5) +
  # modify x-axis breaks, labels, and margin
  scale_x_date(date_breaks = "2 year", date_labels = "%Y", 
               name = "Year", expand = c(0,400)) +
  # use custom palette, reorder season names in legend
  scale_color_manual(values = (pal2), 
                     breaks = c("Fall", "Summer", "Spring", "Winter")) +
  # relabel y-axis and titles
  labs(y = "Precipitation (mm)",
       title = "Cumulative Annual Precipitation",
       subtitle = "2000 - 2024 | Bellingham International Airport Weather Station") +
  # center titles
  theme(plot.title = element_text(hjust = 0.5),
        plot.subtitle = element_text(hjust = 0.5)) 

plot_CumPrcp

Fig. 2: Cumulative Annual Precipitation at Bellingham International Airport’s weather station, 2000 to 2024. Colors indicate season. Horizontal lines show the cumulative precipitation at the end of each season across all years.

Reflection

The biggest trouble I had in this assignment was getting the colors for geom_path to align correctly with the end of the season, instead of the second-to-last datapoint for each season. I found a reorder method that I used in the data call for the second plot, but despite knowing what that reorder did, I still don’t quite fully grasp why it worked.