R Markdown

Install the packages and libraries

library(tidyverse)
## ── Attaching packages ──────────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.2.1     ✓ purrr   0.3.3
## ✓ tibble  2.1.3     ✓ dplyr   0.8.4
## ✓ tidyr   1.0.2     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.4.0
## ── Conflicts ─────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
## 
##     date
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout

Including Data

We then load the data we need from the files.

load("/cloud/project/arlingtonData.rdata")
load("/cloud/project/olympiaData.rdata")

Cleaning the data

Here, we clean the code and add columns for the dates

#Get OAP
OAP$NAME <- "OAP"

#Clean the station data
ARL_Raw <- OAP_Raw1 %>% select(STATION,DATE,NAME,PRCP,TMAX,TMIN)
ARL <- ARL_Raw %>% 
  na.omit() %>% 
  mutate(TMAX = TMAX * .1 * 1.8 + 32,
         TMIN = TMIN * .1 * 1.8 + 32,
         PRCP = PRCP/254,
         yr = year(DATE),
         mo = month(DATE),
         dy = day(DATE))

ARL$NAME = "ARL"

#Bind the rows
both = rbind(OAP,ARL)

#Compare the combined data of TMAX
table(both$NAME)
## 
##   ARL   OAP 
##  3887 28741
tapply(both$TMAX,both$NAME,summary)
## $ARL
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   19.94   51.08   60.08   61.33   71.96   96.08 
## 
## $OAP
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   17.96   50.00   59.00   60.52   71.06  104.00

Get the temperature from the rbind data

Next, we’ll grab the temperature from the combined data

TMAX_seasonality = both %>% 
  group_by(NAME,mo,dy) %>% 
  summarize(TMAXM = mean(TMAX)) %>% 
  ungroup() %>% 
  mutate(DATE = make_date(year = 2024,month=mo,day=dy))

head(TMAX_seasonality)
## # A tibble: 6 x 5
##   NAME     mo    dy TMAXM DATE      
##   <chr> <dbl> <int> <dbl> <date>    
## 1 ARL       1     1  41.4 2024-01-01
## 2 ARL       1     2  42.8 2024-01-02
## 3 ARL       1     3  42.8 2024-01-03
## 4 ARL       1     4  44.6 2024-01-04
## 5 ARL       1     5  45.8 2024-01-05
## 6 ARL       1     6  44.9 2024-01-06

Plotting graphs

Finally, we create and plot the data into different kinds of graphs

TMAX_seasonality %>% ggplot(aes(x=DATE,y=TMAXM,color = NAME)) + 
  geom_point(size=.3) +
  scale_x_date(date_labels = "%b") -> one_plot
ggplotly(one_plot)
TMAX_seasonality %>% ggplot(aes(x=DATE,y=TMAXM,color=NAME)) +  
  geom_point(size=.3) + 
  facet_wrap(~NAME,ncol=1) -> facet_plot
facet_plot

both %>% ggplot(aes(x=TMAX,color=NAME)) + 
  geom_histogram() +
  facet_wrap(~NAME)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

both %>% ggplot(aes(x=TMAX,color=NAME)) +
  geom_density()