Install the packages and libraries
library(tidyverse)
## ── Attaching packages ──────────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.2.1 ✓ purrr 0.3.3
## ✓ tibble 2.1.3 ✓ dplyr 0.8.4
## ✓ tidyr 1.0.2 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.4.0
## ── Conflicts ─────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(lubridate)
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
We then load the data we need from the files.
load("/cloud/project/arlingtonData.rdata")
load("/cloud/project/olympiaData.rdata")
Here, we clean the code and add columns for the dates
#Get OAP
OAP$NAME <- "OAP"
#Clean the station data
ARL_Raw <- OAP_Raw1 %>% select(STATION,DATE,NAME,PRCP,TMAX,TMIN)
ARL <- ARL_Raw %>%
na.omit() %>%
mutate(TMAX = TMAX * .1 * 1.8 + 32,
TMIN = TMIN * .1 * 1.8 + 32,
PRCP = PRCP/254,
yr = year(DATE),
mo = month(DATE),
dy = day(DATE))
ARL$NAME = "ARL"
#Bind the rows
both = rbind(OAP,ARL)
#Compare the combined data of TMAX
table(both$NAME)
##
## ARL OAP
## 3887 28741
tapply(both$TMAX,both$NAME,summary)
## $ARL
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 19.94 51.08 60.08 61.33 71.96 96.08
##
## $OAP
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 17.96 50.00 59.00 60.52 71.06 104.00
Next, we’ll grab the temperature from the combined data
TMAX_seasonality = both %>%
group_by(NAME,mo,dy) %>%
summarize(TMAXM = mean(TMAX)) %>%
ungroup() %>%
mutate(DATE = make_date(year = 2024,month=mo,day=dy))
head(TMAX_seasonality)
## # A tibble: 6 x 5
## NAME mo dy TMAXM DATE
## <chr> <dbl> <int> <dbl> <date>
## 1 ARL 1 1 41.4 2024-01-01
## 2 ARL 1 2 42.8 2024-01-02
## 3 ARL 1 3 42.8 2024-01-03
## 4 ARL 1 4 44.6 2024-01-04
## 5 ARL 1 5 45.8 2024-01-05
## 6 ARL 1 6 44.9 2024-01-06
Finally, we create and plot the data into different kinds of graphs
TMAX_seasonality %>% ggplot(aes(x=DATE,y=TMAXM,color = NAME)) +
geom_point(size=.3) +
scale_x_date(date_labels = "%b") -> one_plot
ggplotly(one_plot)
TMAX_seasonality %>% ggplot(aes(x=DATE,y=TMAXM,color=NAME)) +
geom_point(size=.3) +
facet_wrap(~NAME,ncol=1) -> facet_plot
facet_plot
both %>% ggplot(aes(x=TMAX,color=NAME)) +
geom_histogram() +
facet_wrap(~NAME)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
both %>% ggplot(aes(x=TMAX,color=NAME)) +
geom_density()