library(tidyverse)
## ── Attaching packages ──────────────────
## ✔ ggplot2 2.2.1.9000 ✔ purrr 0.2.4
## ✔ tibble 1.4.2 ✔ dplyr 0.7.4
## ✔ tidyr 0.8.0 ✔ stringr 1.3.0
## ✔ readr 1.1.1 ✔ forcats 0.3.0
## ── Conflicts ── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(lubridate)
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
olywthr <- read_csv("~/Dropbox/RProjects/Oly Weather/olyw3.csv", col_types = cols(DATE = col_date(format = "%Y%m%d"),SNOW = col_double()))
str(olywthr)
## Classes 'tbl_df', 'tbl' and 'data.frame': 52599 obs. of 6 variables:
## $ STATION_NAME: chr "OLYMPIA PRIEST PT PA WA US" "OLYMPIA PRIEST PT PA WA US" "OLYMPIA PRIEST PT PA WA US" "OLYMPIA PRIEST PT PA WA US" ...
## $ DATE : Date, format: "1877-07-01" "1877-07-02" ...
## $ PRCP : num 0 0 0 0 0 0 0 0 0 0 ...
## $ SNOW : num -9999 -9999 -9999 -9999 -9999 ...
## $ TMAX : int 63 63 67 67 71 74 80 88 81 70 ...
## $ TMIN : int 48 53 45 45 43 49 49 50 57 57 ...
## - attr(*, "spec")=List of 2
## ..$ cols :List of 6
## .. ..$ STATION_NAME: list()
## .. .. ..- attr(*, "class")= chr "collector_character" "collector"
## .. ..$ DATE :List of 1
## .. .. ..$ format: chr "%Y%m%d"
## .. .. ..- attr(*, "class")= chr "collector_date" "collector"
## .. ..$ PRCP : list()
## .. .. ..- attr(*, "class")= chr "collector_double" "collector"
## .. ..$ SNOW : list()
## .. .. ..- attr(*, "class")= chr "collector_double" "collector"
## .. ..$ TMAX : list()
## .. .. ..- attr(*, "class")= chr "collector_integer" "collector"
## .. ..$ TMIN : list()
## .. .. ..- attr(*, "class")= chr "collector_integer" "collector"
## ..$ default: list()
## .. ..- attr(*, "class")= chr "collector_guess" "collector"
## ..- attr(*, "class")= chr "col_spec"
olywthr %>% mutate(TMAX = ifelse(TMAX == -9999,NA,TMAX),
TMIN = ifelse(TMIN == -9999,NA,TMIN),
SNOW = ifelse(SNOW == -9999,0,SNOW),
PRCP = ifelse(PRCP == -9999,0,PRCP),
yr = year(DATE),
mo = month(DATE),
dy = day(DATE)) %>%
filter(STATION_NAME == "OLYMPIA AIRPORT WA US" | yr < 1948) -> olywthr
olywthr %>% filter(complete.cases(olywthr)) -> olywthr
save(olywthr,file="olywthr.rdata")
summary(olywthr)
## STATION_NAME DATE PRCP
## Length:49316 Min. :1877-07-01 Min. :0.0000
## Class :character 1st Qu.:1913-05-10 1st Qu.:0.0000
## Mode :character Median :1949-12-24 Median :0.0000
## Mean :1948-11-12 Mean :0.1409
## 3rd Qu.:1983-09-26 3rd Qu.:0.1400
## Max. :2017-07-11 Max. :4.8200
## SNOW TMAX TMIN yr
## Min. : 0.00000 Min. : 15.00 Min. :-8.00 Min. :1877
## 1st Qu.: 0.00000 1st Qu.: 50.00 1st Qu.:34.00 1st Qu.:1913
## Median : 0.00000 Median : 59.00 Median :41.00 Median :1949
## Mean : 0.02647 Mean : 60.64 Mean :40.42 Mean :1948
## 3rd Qu.: 0.00000 3rd Qu.: 71.00 3rd Qu.:47.00 3rd Qu.:1983
## Max. :14.20000 Max. :104.00 Max. :76.00 Max. :2017
## mo dy
## Min. : 1.000 Min. : 1.00
## 1st Qu.: 4.000 1st Qu.: 8.00
## Median : 7.000 Median :16.00
## Mean : 6.516 Mean :15.74
## 3rd Qu.:10.000 3rd Qu.:23.00
## Max. :12.000 Max. :31.00
olywthr %>% filter(mo==12) %>%
group_by(yr) %>%
summarize(mmax = mean(TMAX,na.rm=TRUE),
mmin = mean(TMIN,na.rm=TRUE)) %>%
ungroup() -> december
december
## # A tibble: 135 x 3
## yr mmax mmin
## <dbl> <dbl> <dbl>
## 1 1877. 46.5 37.9
## 2 1878. 44.2 35.8
## 3 1879. 42.8 32.9
## 4 1880. 44.5 34.6
## 5 1881. 46.9 37.1
## 6 1882. 46.6 38.3
## 7 1883. 45.9 35.4
## 8 1884. 39.5 28.1
## 9 1885. 48.7 37.8
## 10 1886. 49.4 40.1
## # ... with 125 more rows
december %>% ggplot(aes(yr,mmax)) +
geom_point(col="red") +
geom_smooth(col="red") +
geom_point(aes(x=yr,y=mmin),col="blue") +
geom_smooth(aes(x=yr,y=mmin),col="blue")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
olywthr %>%
ggplot(aes(x=TMAX)) +
geom_histogram() +
facet_wrap(~mo)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
tapply(olywthr$TMAX,olywthr$mo,summary)
## $`1`
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 15.00 40.00 45.00 44.44 49.00 74.00
##
## $`2`
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 19.00 44.00 49.00 48.36 53.00 73.00
##
## $`3`
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 31.00 49.00 53.00 53.49 57.00 79.00
##
## $`4`
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 39.00 54.00 58.00 59.63 64.00 90.00
##
## $`5`
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 46.00 60.00 65.00 66.54 72.00 96.00
##
## $`6`
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 50.00 65.00 70.00 71.61 77.00 101.00
##
## $`7`
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 56.00 71.00 77.00 77.54 83.00 104.00
##
## $`8`
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 56.00 72.00 77.00 77.34 82.00 104.00
##
## $`9`
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 50.00 65.00 70.00 70.86 76.00 98.00
##
## $`10`
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 33.0 56.0 60.0 60.5 65.0 90.0
##
## $`11`
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 24.00 47.00 51.00 50.93 55.00 74.00
##
## $`12`
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 19.00 42.00 46.00 45.46 50.00 70.00
olywthr %>%
group_by(mo,dy) %>%
summarize(prain = mean(PRCP > 0,na.rm=TRUE)) %>%
ungroup() -> rainy
str(rainy)
## Classes 'tbl_df', 'tbl' and 'data.frame': 366 obs. of 3 variables:
## $ mo : num 1 1 1 1 1 1 1 1 1 1 ...
## $ dy : int 1 2 3 4 5 6 7 8 9 10 ...
## $ prain: num 0.647 0.607 0.681 0.664 0.654 ...
summary(rainy)
## mo dy prain
## Min. : 1.000 Min. : 1.00 Min. :0.0438
## 1st Qu.: 4.000 1st Qu.: 8.00 1st Qu.:0.2844
## Median : 7.000 Median :16.00 Median :0.4630
## Mean : 6.514 Mean :15.76 Mean :0.4344
## 3rd Qu.: 9.750 3rd Qu.:23.00 3rd Qu.:0.5993
## Max. :12.000 Max. :31.00 Max. :0.7353
rainy %>% ggplot(aes(x=prain)) +
geom_histogram() +
facet_wrap(~mo)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
rainy %>% ggplot(aes(x=prain)) +
geom_density() +
facet_wrap(~mo)
Let’s create a weather prediction for 2018 based on average values for each day.
olywthr %>%
group_by(mo,dy) %>%
summarize(prain = mean(PRCP > 0),
dmax = mean(TMAX),
dmin = mean(TMIN)) %>%
ungroup() %>% mutate(date =make_date(2018,mo,dy))-> cal18
cal18 %>% ggplot(aes(x=date,y=dmax)) +
geom_point() +
ggtitle("Average Daily Maximum Temperature") +
theme(plot.title = element_text(hjust = 0.5))
## Warning: Removed 1 rows containing missing values (geom_point).
cal18 %>%
filter(mo>8 & mo < 11) %>%
ggplot(aes(x=date,y=dmax)) +
geom_point() +
ggtitle("Average Daily Maximum Temperature") +
theme(plot.title = element_text(hjust = 0.5))