Import Libraries

library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5     v purrr   0.3.4
## v tibble  3.1.6     v dplyr   1.0.8
## v tidyr   1.2.0     v stringr 1.4.0
## v readr   2.1.2     v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(readr)

Data Summary for the San Diego Int Airport

Dataset Daily Summaries Order Start Date 1941-01-01 00:00 Order End Date 2022-03-14 23:59 Output Format Custom GHCN-Daily CSV Data Types PRCP, TMAX, TMIN Custom Flag(s) Station Name Units Standard Stations/Locations SAN DIEGO INTERNATIONAL AIRPORT, CA US (Station ID: GHCND:USW00023188)

Import data into R

San_Diego_Airport <- read_csv("SanDiego-2908058.csv", col_types=cols(DATE = col_character()))

Examine

glimpse(San_Diego_Airport)
## Rows: 29,655
## Columns: 6
## $ STATION <chr> "USW00023188", "USW00023188", "USW00023188", "USW00023188", "U~
## $ NAME    <chr> "SAN DIEGO INTERNATIONAL AIRPORT, CA US", "SAN DIEGO INTERNATI~
## $ DATE    <chr> "1941-01-01", "1941-01-02", "1941-01-03", "1941-01-04", "1941-~
## $ PRCP    <dbl> 0.00, 0.00, 0.00, 0.03, 0.00, 0.00, 0.00, 0.00, 0.00, 0.73, 0.~
## $ TMAX    <dbl> 68, 67, 65, 64, 62, 65, 70, 71, 68, 67, 65, 67, 63, 63, 65, 65~
## $ TMIN    <dbl> 49, 47, 45, 46, 51, 52, 49, 48, 56, 56, 53, 48, 48, 50, 47, 49~
summary(San_Diego_Airport)
##    STATION              NAME               DATE                PRCP        
##  Length:29655       Length:29655       Length:29655       Min.   :0.00000  
##  Class :character   Class :character   Class :character   1st Qu.:0.00000  
##  Mode  :character   Mode  :character   Mode  :character   Median :0.00000  
##                                                           Mean   :0.02684  
##                                                           3rd Qu.:0.00000  
##                                                           Max.   :2.70000  
##                                                           NA's   :1        
##       TMAX             TMIN      
##  Min.   : 46.00   Min.   :29.00  
##  1st Qu.: 66.00   1st Qu.:52.00  
##  Median : 70.00   Median :58.00  
##  Mean   : 70.69   Mean   :57.25  
##  3rd Qu.: 75.00   3rd Qu.:63.00  
##  Max.   :111.00   Max.   :78.00  
##  NA's   :2        NA's   :2

The character column DATE contains dates in ISO-8601 format. Use as.date() to convert it and run glimpse again.

San_Diego_Airport$DATE = as.Date(San_Diego_Airport$DATE)
glimpse(San_Diego_Airport)
## Rows: 29,655
## Columns: 6
## $ STATION <chr> "USW00023188", "USW00023188", "USW00023188", "USW00023188", "U~
## $ NAME    <chr> "SAN DIEGO INTERNATIONAL AIRPORT, CA US", "SAN DIEGO INTERNATI~
## $ DATE    <date> 1941-01-01, 1941-01-02, 1941-01-03, 1941-01-04, 1941-01-05, 1~
## $ PRCP    <dbl> 0.00, 0.00, 0.00, 0.03, 0.00, 0.00, 0.00, 0.00, 0.00, 0.73, 0.~
## $ TMAX    <dbl> 68, 67, 65, 64, 62, 65, 70, 71, 68, 67, 65, 67, 63, 63, 65, 65~
## $ TMIN    <dbl> 49, 47, 45, 46, 51, 52, 49, 48, 56, 56, 53, 48, 48, 50, 47, 49~

Summary

Do a summary() and check for annomalies

summary(San_Diego_Airport)
##    STATION              NAME                DATE                 PRCP        
##  Length:29655       Length:29655       Min.   :1941-01-01   Min.   :0.00000  
##  Class :character   Class :character   1st Qu.:1961-04-22   1st Qu.:0.00000  
##  Mode  :character   Mode  :character   Median :1981-08-09   Median :0.00000  
##                                        Mean   :1981-08-08   Mean   :0.02684  
##                                        3rd Qu.:2001-11-25   3rd Qu.:0.00000  
##                                        Max.   :2022-03-14   Max.   :2.70000  
##                                                             NA's   :1        
##       TMAX             TMIN      
##  Min.   : 46.00   Min.   :29.00  
##  1st Qu.: 66.00   1st Qu.:52.00  
##  Median : 70.00   Median :58.00  
##  Mean   : 70.69   Mean   :57.25  
##  3rd Qu.: 75.00   3rd Qu.:63.00  
##  Max.   :111.00   Max.   :78.00  
##  NA's   :2        NA's   :2

Inspect NA values and Drop these records.

San_Diego_Airport %>%
  filter(is.na(TMAX) | is.na(TMIN) | is.na(PRCP))
## # A tibble: 3 x 6
##   STATION     NAME                                  DATE        PRCP  TMAX  TMIN
##   <chr>       <chr>                                 <date>     <dbl> <dbl> <dbl>
## 1 USW00023188 SAN DIEGO INTERNATIONAL AIRPORT, CA ~ 1946-02-23     0    64    NA
## 2 USW00023188 SAN DIEGO INTERNATIONAL AIRPORT, CA ~ 1946-07-17     0    NA    65
## 3 USW00023188 SAN DIEGO INTERNATIONAL AIRPORT, CA ~ 2022-03-14    NA    NA    NA

drop the na

San_Diego_Airport = San_Diego_Airport%>%
  drop_na()
summary(San_Diego_Airport)
##    STATION              NAME                DATE                 PRCP        
##  Length:29652       Length:29652       Min.   :1941-01-01   Min.   :0.00000  
##  Class :character   Class :character   1st Qu.:1961-04-23   1st Qu.:0.00000  
##  Mode  :character   Mode  :character   Median :1981-08-09   Median :0.00000  
##                                        Mean   :1981-08-09   Mean   :0.02684  
##                                        3rd Qu.:2001-11-25   3rd Qu.:0.00000  
##                                        Max.   :2022-03-13   Max.   :2.70000  
##       TMAX             TMIN      
##  Min.   : 46.00   Min.   :29.00  
##  1st Qu.: 66.00   1st Qu.:52.00  
##  Median : 70.00   Median :58.00  
##  Mean   : 70.69   Mean   :57.25  
##  3rd Qu.: 75.00   3rd Qu.:63.00  
##  Max.   :111.00   Max.   :78.00

Graphics

Get density with rug plots for TMAZ, TMIN, and PRCP

# TMAX density plot
San_Diego_Airport %>%
  ggplot(aes(x = TMAX)) +
  geom_density() +
  geom_rug() +
  ggtitle("TMAX")

moderate temperature and a shoulder with a secondary peak to the right of the primary peak.

# TMIN density plot
San_Diego_Airport %>%
  ggplot(aes(x=TMIN)) +
  geom_density() +
  geom_rug() +
  ggtitle("TMIN")

Moderate TMIN

# PRCP density plot
San_Diego_Airport %>%
  ggplot(aes(x=PRCP)) +
  geom_density() +
  geom_rug() +
  ggtitle("PRCP density plot")

We see most of the PRCP are within 0 to 2.

Add some data

Load the package lubridate. Then use the functions year(), month() and day() to create the variables yr, mo, and dy. Make these variables factors. Use glimpse() and summary() to verify the results.

library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
#add dplyr library aslo to work with columns
library(dplyr)

mutate the year, month, date in San_Diego_airport

San_Diego_Airport = San_Diego_Airport %>%
  mutate(year = factor(year(DATE)), 
         month = factor(month(DATE)),
         day = factor(day(DATE)))

glimpse(San_Diego_Airport)
## Rows: 29,652
## Columns: 9
## $ STATION <chr> "USW00023188", "USW00023188", "USW00023188", "USW00023188", "U~
## $ NAME    <chr> "SAN DIEGO INTERNATIONAL AIRPORT, CA US", "SAN DIEGO INTERNATI~
## $ DATE    <date> 1941-01-01, 1941-01-02, 1941-01-03, 1941-01-04, 1941-01-05, 1~
## $ PRCP    <dbl> 0.00, 0.00, 0.00, 0.03, 0.00, 0.00, 0.00, 0.00, 0.00, 0.73, 0.~
## $ TMAX    <dbl> 68, 67, 65, 64, 62, 65, 70, 71, 68, 67, 65, 67, 63, 63, 65, 65~
## $ TMIN    <dbl> 49, 47, 45, 46, 51, 52, 49, 48, 56, 56, 53, 48, 48, 50, 47, 49~
## $ year    <fct> 1941, 1941, 1941, 1941, 1941, 1941, 1941, 1941, 1941, 1941, 19~
## $ month   <fct> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,~
## $ day     <fct> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18,~
# check summary
summary(San_Diego_Airport)
##    STATION              NAME                DATE                 PRCP        
##  Length:29652       Length:29652       Min.   :1941-01-01   Min.   :0.00000  
##  Class :character   Class :character   1st Qu.:1961-04-23   1st Qu.:0.00000  
##  Mode  :character   Mode  :character   Median :1981-08-09   Median :0.00000  
##                                        Mean   :1981-08-09   Mean   :0.02684  
##                                        3rd Qu.:2001-11-25   3rd Qu.:0.00000  
##                                        Max.   :2022-03-13   Max.   :2.70000  
##                                                                              
##       TMAX             TMIN            year           month      
##  Min.   : 46.00   Min.   :29.00   1944   :  366   1      : 2542  
##  1st Qu.: 66.00   1st Qu.:52.00   1948   :  366   3      : 2524  
##  Median : 70.00   Median :58.00   1952   :  366   5      : 2511  
##  Mean   : 70.69   Mean   :57.25   1956   :  366   8      : 2511  
##  3rd Qu.: 75.00   3rd Qu.:63.00   1960   :  366   10     : 2511  
##  Max.   :111.00   Max.   :78.00   1964   :  366   12     : 2511  
##                                   (Other):27456   (Other):14542  
##       day       
##  1      :  975  
##  2      :  975  
##  3      :  975  
##  4      :  975  
##  5      :  975  
##  9      :  975  
##  (Other):23802
# TMAX by month
San_Diego_Airport %>%
  ggplot(aes(month, TMAX)) +
  geom_point() +
  ggtitle("TMAX by month")

# TMIN by Month. #size is used here
San_Diego_Airport %>%
  ggplot(aes(month, TMIN)) +
  geom_point(size=0.5) +     
  geom_smooth(color ="red") +
  ggtitle("TMIN by month")
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

# PRCP by month. #kept alpha for transperance here
San_Diego_Airport %>%
  ggplot(aes(month, PRCP)) +
  geom_point(alpha = 0.3) +   
  ggtitle("PRCP by month")

  1. September and October are two of the hottest months in San Diego.

  2. The minimum temperatures are on December and January. The lowest is on January around 30 degree which is still not that cold as compared to other Northern US locations.

  3. Seems like the precipitation is high in december and january reaching around the value of 2.7.

# Point graph of TMAX and TMIN. geom_jitter() is used here
San_Diego_Airport %>%
  ggplot(aes(TMAX, TMIN)) +
  geom_jitter(size =0.1) +
  geom_smooth(color = "red") +
  ggtitle("TMAX and TMIN")
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

# Point graph of TMAX and PRCP
San_Diego_Airport %>%
  ggplot(aes(TMAX, PRCP)) +
  geom_point() +
  geom_smooth(color="red")+
  ggtitle("TMAX and PRCP")
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

# Point graph of TMIN and PRCP
San_Diego_Airport %>%
  ggplot(aes(TMIN, PRCP)) +
  geom_point() +
  geom_smooth(color="red")+
  ggtitle("TMAX and PRCP")
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

  1. TMAX and TMIN have linear relation.
  2. PRCP is high when TMAX is in range of 60 to 70.
  3. PRCP is high when TMIN is around 50 to 60.

Line graphs

#line graph for TMAX, TMIN, and PRCP
ggplot(San_Diego_Airport, aes(DATE, TMAX)) +
  geom_line(size=0.1) +
  ggtitle("DATE Vs TMAX line graph")

ggplot(San_Diego_Airport, aes(DATE, TMIN)) +
  geom_line(size=0.1, linetype = "dotted") +
  geom_point(size = 0.2) +
  ggtitle("DATE Vs TMIN line graph with dotted points")

ggplot(San_Diego_Airport, aes(DATE, PRCP)) +
  geom_line(size=0.1, linetype = "dotted") +
  geom_point(size = 0.2) +
  ggtitle("DATE Vs PRCP line graph with dotted points")

Histogram for TMAX, TMIN, and PRCP

#default histogram binwidth
ggplot(San_Diego_Airport, aes(x = TMAX)) +
  geom_histogram() +
  ggtitle("TMAX histogram")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#using bin width = 15 and see the difference.
ggplot(San_Diego_Airport, aes(x = TMAX)) +
  geom_histogram(binS = 15) +
  ggtitle("TAX max histogram with binwidth 15")
## Warning: Ignoring unknown parameters: binS
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Boxplot for TMIN

ggplot(San_Diego_Airport, aes(month, TMIN)) +
  geom_boxplot() +
  facet_wrap(~month) +
  ggtitle("TMIN box plot for each month")

## Barplot of PRCP

ggplot(San_Diego_Airport, aes(x = TMAX)) +
  geom_bar() +
  ggtitle("Bar plot for TMAX")

Cehckig to see if San Diego is experiencing drought during the months of May through August.

# it is done by looking at a time-series of z-scores instead of raw data. If the recent recent numbers are below -3, something was going on.

# histogram of the raw data is done.

rain_5_8 = San_Diego_Airport %>%
  select(DATE, PRCP) %>%
  mutate(mo = month(DATE),
         yr = year(DATE)) %>%
  filter(mo >= 5 & mo <= 8) %>% 
  group_by(yr) %>%
  summarize(rain = sum(PRCP)) %>%
  ungroup() %>%
  mutate(mrain = mean(rain),
         sdrain = sd(rain),
         z_score = (rain - mrain)/sdrain)

head(rain_5_8)  
## # A tibble: 6 x 5
##      yr  rain mrain sdrain z_score
##   <dbl> <dbl> <dbl>  <dbl>   <dbl>
## 1  1941  0.09 0.372  0.672 -0.419 
## 2  1942  0.12 0.372  0.672 -0.375 
## 3  1943  0.03 0.372  0.672 -0.509 
## 4  1944  0.32 0.372  0.672 -0.0770
## 5  1945  1.06 0.372  0.672  1.02  
## 6  1946  0.01 0.372  0.672 -0.538
rain_5_8 %>%
  ggplot(aes(x = yr, y = z_score)) +
  geom_point() +
  geom_line(size = .1)

The lowest z_socre is around -0.5 for PRCP. so there is no indication of drought.

The histogram is as follows

rain_5_8 %>%
  ggplot(aes(x = rain)) +
  geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

checking how many perfect days are in SAn Diego in a Year.

perfect day is day with maximum temperature of 75 with no rain.

# code to get the count of perfect days in a year.
San_Diego_Airport %>%
  filter(TMAX == 75 & PRCP == 0) %>%
  group_by(year) %>%
  summarize(count = n()) %>%
  filter(count > 0) %>%
  ungroup() %>%
  ggplot(aes(x = factor(count))) +
  geom_bar()

The most common year count is 18. there have been years with only two.

check which month have perfect days in San Diego

San_Diego_Airport %>%
  filter(TMAX == 75 & PRCP == 0) %>%
  ggplot(aes(x = month)) +
  geom_bar() +
  ggtitle("bar graph to see Which month have perfect days")

July seems to have more perfect days. August comes the second.

Predicting the Weather in San Diego

To summarize the annual pattern, we need to put all of the data into a single year. we could think of this as making a prediction for an arbitrary future year. We need to do this so we can have a date variable that doesn’t contain different years. We’ll use 2024 since it’s a leap year. We’ll call this new dataframe cal24. we will use make_date() from the lubridate package and summary() on cal24 to verify that you succeeded.

cal24 = San_Diego_Airport %>%
  mutate(DATE = make_date(2024, month, day),
         year = year(DATE))

summary(cal24)
##    STATION              NAME                DATE                 PRCP        
##  Length:29652       Length:29652       Min.   :2024-01-01   Min.   :0.00000  
##  Class :character   Class :character   1st Qu.:2024-04-01   1st Qu.:0.00000  
##  Mode  :character   Mode  :character   Median :2024-07-01   Median :0.00000  
##                                        Mean   :2024-07-01   Mean   :0.02684  
##                                        3rd Qu.:2024-10-01   3rd Qu.:0.00000  
##                                        Max.   :2024-12-31   Max.   :2.70000  
##                                                                              
##       TMAX             TMIN            year          month      
##  Min.   : 46.00   Min.   :29.00   Min.   :2024   1      : 2542  
##  1st Qu.: 66.00   1st Qu.:52.00   1st Qu.:2024   3      : 2524  
##  Median : 70.00   Median :58.00   Median :2024   5      : 2511  
##  Mean   : 70.69   Mean   :57.25   Mean   :2024   8      : 2511  
##  3rd Qu.: 75.00   3rd Qu.:63.00   3rd Qu.:2024   10     : 2511  
##  Max.   :111.00   Max.   :78.00   Max.   :2024   12     : 2511  
##                                                  (Other):14542  
##       day       
##  1      :  975  
##  2      :  975  
##  3      :  975  
##  4      :  975  
##  5      :  975  
##  9      :  975  
##  (Other):23802

TMAX scatterplot for each value of DATE.

size and aplpha are 0.2 to deal with the overplotting in geom_point()

cal24 %>%
  ggplot(aes(x = DATE, y = TMAX)) +
  geom_point(aplpha = 0.2, size = 0.2) +
  geom_smooth(color = "red")
## Warning: Ignoring unknown parameters: aplpha
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

it seems to take longer going up in TMAX for San Diago.

plotting 2021 on top of 2024

Add a geom_point() with red points to show calendar 2021. Create a dataframe cal21 by filtering for yr = 2021. Use make_date() to change the yr to 2024. Add a second geom_point() with this dataframe as the data argument. Set the size parameter to .5 in the second geom_point().

cal21 = San_Diego_Airport %>%
  filter(year == 2021) %>%
  mutate(DATE =  make_date(2024, month, day))

cal24 %>%
  ggplot(aes(x=DATE, y = TMAX)) +
  geom_point(size = 0.2, alpha = 0.2) +
  geom_point(data = cal21, aes(x = DATE, y = TMAX),
             color = "red" , size = 0.5)

mean of TMAX and TMIN for each value of DATE and showin result in graph.

#scatter plot for mean of TMAX for each DATE for predicting in 2024
cal24 %>%
  group_by(DATE) %>%
  summarize(MTMAX = mean(TMAX)) %>%
  ungroup() %>%
  ggplot(aes(x =DATE, y = MTMAX))+
  geom_point(size = 0.5) +
  ggtitle("predicting mean of TMAX for each DATE value for predicting in 2024")

#scatter plot for mean of TMIN for each DATE for predicting in 2024
cal24 %>%
  group_by(DATE) %>%
  summarize(MTMIN = mean(TMIN)) %>%
  ungroup() %>%
  ggplot(aes(x =DATE, y = MTMIN))+
  geom_point(size = 0.5) +
  ggtitle("predicting mean of TMIN for each DATE value for predicting in 2024")

#density plot of mean of TMAX for predicting 2024
cal24 %>%
  group_by(DATE)%>%
  summarize(MTMAX = mean(TMAX)) %>%
  ungroup() %>%
  ggplot(aes(x = MTMAX))+ 
  geom_density(adjust = 0.2) +
  ggtitle("density plot of mean of TMAX for predicting in 2024")

#density plot of mean of TMAX for predicting in 2024
cal24 %>%
  group_by(DATE)%>%
  summarize(MTMIN = mean(TMIN)) %>%
  ungroup() %>%
  ggplot(aes(x = MTMIN))+ 
  geom_density(adjust = 0.2) +
  ggtitle("density plot of mean of TMIN for predicting in 2024")

variation in TMAX, TMIN, and PRCP for 2024

standard deviation of TMAX, TMIN, and PRCP based on cal24 for 2024

#standard deviation of TMAX for 2024
cal24 %>%
  group_by(DATE) %>%
  summarize(sd_TMAX = sd(TMAX)) %>%
  ungroup() %>%
  ggplot(aes(DATE, sd_TMAX)) +
  geom_point() +
  geom_smooth(color ="red") +
  ggtitle("standard deviation of TMAX in 2024")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

#standard deviation of TMIN for 2024
cal24 %>%
  group_by(DATE) %>%
  summarize(sd_TMIN = sd(TMIN)) %>%
  ungroup() %>%
  ggplot(aes(DATE, sd_TMIN)) +
  geom_point() +
  ggtitle("standard deviation of TMIN in 2024")

#standard deviation of PRCP for 2024
cal24 %>%
  group_by(DATE) %>%
  summarize(sd_PRCP = sd(PRCP)) %>%
  ungroup() %>%
  ggplot(aes(DATE, sd_PRCP)) +
  geom_point() +
  ggtitle("standard deviation of PRCP in 2024")

Predicting Difference between TMAX and TMIN in 2024.

cal24 %>% 
  mutate(diff = TMAX - TMIN) %>% 
  group_by(DATE) %>% 
  summarize(diff = mean(diff)) %>% 
  ungroup() %>% 
  ggplot(aes(x = DATE, y = diff)) +
  geom_point() +
  ggtitle("predicting Difference between TMAX and TMIN for 2024")

The difference seems to be higher on the december and january.

Predicting preciption on 2024

There are two possible ways to look at precipitation. We could use either the mean value of precipitation for a date, or the probability of precipitation on that date.

#we will use mean value of precipitaiton first. we'll also use plotly for interactive plot for Rpub and HTML

#import library plotly
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
#use plotly for the mean PRCP
g1 = cal24 %>% 
  group_by(DATE) %>% 
  summarize(mean_precip = mean(PRCP)) %>% 
  ungroup() %>% 
  ggplot(aes(x = DATE, y = mean_precip)) +
  geom_point() +
  ggtitle("predicting mean value of precipitation for 2024")

ggplotly(g1) # For Rpubs or other html
# Now do the probability of precipitation for 2024
g2 <- cal24 %>% 
  group_by(DATE) %>% 
  summarize(prob_precip = mean(PRCP > 0)) %>% 
  ungroup() %>% 
  ggplot(aes(x = DATE, y = prob_precip)) +
  geom_point() +
  ggtitle("predicting probability of PRCP in 2024")

ggplotly(g2)

Both of these graphs look similar with slight variations.

Loss curve for PRCP and TMAX

A graph showing loss curves for precipitation and TMAX. Since these two variables have such different values, ywe will have to create z-scores to make them visually compatible. Call the z-score variables n_TMAX and n_PRCP.

cal24 %>% 
  mutate(n_TMAX = (TMAX - mean(TMAX))/sd(TMAX),
         n_PRCP = (PRCP - mean(PRCP))/sd(PRCP)) %>% 
  ggplot(aes(x = DATE)) +
  geom_smooth(aes(y = n_TMAX), color = "red") +
  geom_smooth(aes(y = n_PRCP), color = "blue") +
  ggtitle("predicting loss curve for PRCP and TMAX for 2024")
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'
## `geom_smooth()` using method = 'gam' and formula 'y ~ s(x, bs = "cs")'

The turning points in the summer are essentially the same.