library(tidyverse)
## ── Attaching packages ─────────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.2.1 ✓ purrr 0.3.3
## ✓ tibble 2.1.3 ✓ dplyr 0.8.3
## ✓ tidyr 1.0.2 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.4.0
## ── Conflicts ────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(lubridate)
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
load("/cloud/project/OAP.rdata")
load("/cloud/project/SDL.rdata")
Put short names in the two dataframes before combining them.
OAP$NAME = "OAP"
SDL$NAME = "SDL"
both = rbind(OAP,SDL)
table(both$NAME)
##
## OAP SDL
## 28741 29434
tapply(both$TMAX,both$NAME,summary)
## $OAP
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 17.96 50.00 59.00 60.52 71.06 104.00
##
## $SDL
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 46.04 66.02 69.98 70.69 75.02 111.02
Create the average temperature for each calendar day for each weather station using data from all of the years.
TMAX_seasonality = both %>%
group_by(NAME,mo,dy) %>%
summarize(TMAXM = mean(TMAX)) %>%
ungroup() %>%
mutate(DATE = make_date(year = 2024,month=mo,day=dy))
summary(TMAX_seasonality)
## NAME mo dy TMAXM
## Length:732 Min. : 1.000 Min. : 1.00 Min. :43.13
## Class :character 1st Qu.: 4.000 1st Qu.: 8.00 1st Qu.:59.41
## Mode :character Median : 7.000 Median :16.00 Median :67.82
## Mean : 6.514 Mean :15.76 Mean :65.57
## 3rd Qu.:10.000 3rd Qu.:23.00 3rd Qu.:73.86
## Max. :12.000 Max. :31.00 Max. :80.08
## DATE
## Min. :2024-01-01
## 1st Qu.:2024-04-01
## Median :2024-07-01
## Mean :2024-07-01
## 3rd Qu.:2024-10-01
## Max. :2024-12-31
head(TMAX_seasonality)
## # A tibble: 6 x 5
## NAME mo dy TMAXM DATE
## <chr> <dbl> <int> <dbl> <date>
## 1 OAP 1 1 43.6 2024-01-01
## 2 OAP 1 2 43.1 2024-01-02
## 3 OAP 1 3 43.3 2024-01-03
## 4 OAP 1 4 43.4 2024-01-04
## 5 OAP 1 5 43.6 2024-01-05
## 6 OAP 1 6 43.2 2024-01-06
Let’s see the two seasonal patterns with one plot using color to identify the stations.
TMAX_seasonality %>% ggplot(aes(x=DATE,y=TMAXM,color = NAME)) +
geom_point(size=.3) +
scale_x_date(date_labels = "%b") -> one_plot
ggplotly(one_plot)
Fiddle around with size to get a graph that looks good to you.
Try facetting instead of one plot.
TMAX_seasonality %>% ggplot(aes(x=DATE,y=TMAXM,color=NAME)) +
geom_point(size=.3) +
facet_wrap(~NAME,ncol=1) -> facet_plot
facet_plot
Another way to look at the TMAX data is to compare the distribution of the variable without considering the time of year.
Here’s a comparison using histograms and facetting.
both %>% ggplot(aes(x=TMAX,color=NAME)) +
geom_histogram() +
facet_wrap(~NAME)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Use geom_density() and put the two curves in one graph.
both %>% ggplot(aes(x=TMAX,color=NAME)) +
geom_density()