knitr::opts_chunk$set(echo = TRUE)
library(tidyr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(readr)
library(stringr)
library(ggplot2)
urlfile="https://raw.githubusercontent.com/Nhodgkinson/DATA-607-P2/main/cdec-monthly-precipitation-san-joaquin-1913-2014.csv"
calidata<-read_csv(url(urlfile))
## Rows: 102 Columns: 15
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): Region
## dbl (14): WY, Oct, Nov, Dec, Jan, Feb, Mar, Apr, May, Jun, Jul, Aug, Sep, Total
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
I initially began working on gathering the data with the assumption that I would want a month over month view. I saw how much data there was and felt that the year over year was a better approach with an avg rain fall by month would be more insightful.
calidata
## # A tibble: 102 × 15
## Region WY Oct Nov Dec Jan Feb Mar Apr May Jun Jul
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 San Joaquin 1913 1.01 2.8 1.26 5.4 1.74 2.96 2.11 2.09 2.77 1.12
## 2 San Joaquin 1914 0.08 4.55 7.91 21.3 5.17 0.94 5.18 1.05 1.07 0.02
## 3 San Joaquin 1915 1.71 0.61 3.92 7.68 9.2 2.73 3.53 6.94 0 0
## 4 San Joaquin 1916 0 1.47 5.87 20 5.87 6.31 0.84 0.67 0 0
## 5 San Joaquin 1917 6.77 1.64 8.19 1.69 12.5 2.78 1.46 1.71 0 0.24
## 6 San Joaquin 1918 0 1.53 1.79 0.97 11.0 11.7 0.42 1.84 0.15 0.2
## 7 San Joaquin 1919 1.69 5.39 2.18 1.45 10.3 5.69 0.53 1.12 0 0
## 8 San Joaquin 1920 0.95 0.78 6.34 1.45 4.29 9.55 4.86 0.15 0.57 0.02
## 9 San Joaquin 1921 5.41 4.36 6.25 9.69 3.33 4.53 0.7 2.25 0.3 0
## 10 San Joaquin 1922 0.58 0.59 13.1 5.52 8.11 6.05 0.85 2.63 0.42 0.53
## # … with 92 more rows, and 3 more variables: Aug <dbl>, Sep <dbl>, Total <dbl>
#Year over Year info
cyeardf<-calidata[,c(1,2,15)]
cyeardf<- cyeardf %>%
rename(Year = WY) %>%
mutate(`Avg Rain` = mean(Total))
#Month Avgs
cmdf<-gather(calidata, "Month", "n", 3:14)
cmdf<-cmdf %>%
rename(Year = WY) %>%
group_by(Month) %>%
mutate(`Avg Rain` = mean(n))%>%
subset(select=-c(3))#2,5
#cmdf<-cmdf[!duplicated(cmdf), ]
cmdf
## # A tibble: 1,224 × 5
## # Groups: Month [12]
## Region Year Month n `Avg Rain`
## <chr> <dbl> <chr> <dbl> <dbl>
## 1 San Joaquin 1913 Oct 1.01 2.06
## 2 San Joaquin 1914 Oct 0.08 2.06
## 3 San Joaquin 1915 Oct 1.71 2.06
## 4 San Joaquin 1916 Oct 0 2.06
## 5 San Joaquin 1917 Oct 6.77 2.06
## 6 San Joaquin 1918 Oct 0 2.06
## 7 San Joaquin 1919 Oct 1.69 2.06
## 8 San Joaquin 1920 Oct 0.95 2.06
## 9 San Joaquin 1921 Oct 5.41 2.06
## 10 San Joaquin 1922 Oct 0.58 2.06
## # … with 1,214 more rows
cyeardf
## # A tibble: 102 × 4
## Region Year Total `Avg Rain`
## <chr> <dbl> <dbl> <dbl>
## 1 San Joaquin 1913 24.6 38.6
## 2 San Joaquin 1914 48.0 38.6
## 3 San Joaquin 1915 36.4 38.6
## 4 San Joaquin 1916 43 38.6
## 5 San Joaquin 1917 37.1 38.6
## 6 San Joaquin 1918 32.4 38.6
## 7 San Joaquin 1919 30.2 38.6
## 8 San Joaquin 1920 30.7 38.6
## 9 San Joaquin 1921 37.4 38.6
## 10 San Joaquin 1922 38.5 38.6
## # … with 92 more rows
I have two tables, one that includes month and another that only has year over year. I know the avg rain by month and the avg rain by year. I can now create two graphs and see if the month precipitation compared to the avg expected and the yearly total to the avg expected.
#For the month view I want to look at only January month data. But over 100 years is a lot of data to look at for a visual so lets only look at the past 20 years. 1994 to 2014.
Jandf<-cmdf %>%
subset( Month == "Jan") %>%
subset(Year > 1993)
ggplot(Jandf,aes(Year,n))+geom_point()+geom_hline(yintercept=Jandf$`Avg Rain`,color="red")
#Repeating the above steps for a year over year look compared to the avg. This time we will look back to 1984 through 2004
Ydf<-cyeardf%>%
subset(Year > 1983 & Year < 2005)
ggplot(Ydf,aes(Year, Total))+geom_point()+geom_hline(yintercept=Ydf$`Avg Rain`,color="red")
##Observation The month graph for Jan 94 to Jan 2014 shows 95 and 97 having much higher rainfall with around 20 inches for the month compared to the avg Jan rainfall of 6.8 inches.
When looking at the Year graph we see 95 has a very high rain fall at 70 inches for the year with the yearly avg at 38.55 inches. As seen in the Month graph, 95 had high rainfall in January with around 20 inches. 1997 is the second highest year for rain fall between 1984 andf 2004 with 65 inches. It was also showing very high monthly rain fall.