library(tidyverse)
## Loading tidyverse: ggplot2
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr
## Conflicts with tidy packages ----------------------------------------------
## filter(): dplyr, stats
## lag(): dplyr, stats
library(lubridate)
##
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
##
## date
olywthr <- read_csv("~/Dropbox/RProjects/Oly Weather/olyw3.csv", col_types = cols(DATE = col_date(format = "%Y%m%d"),SNOW = col_double()))
str(olywthr)
## Classes 'tbl_df', 'tbl' and 'data.frame': 52599 obs. of 6 variables:
## $ STATION_NAME: chr "OLYMPIA PRIEST PT PA WA US" "OLYMPIA PRIEST PT PA WA US" "OLYMPIA PRIEST PT PA WA US" "OLYMPIA PRIEST PT PA WA US" ...
## $ DATE : Date, format: "1877-07-01" "1877-07-02" ...
## $ PRCP : num 0 0 0 0 0 0 0 0 0 0 ...
## $ SNOW : num -9999 -9999 -9999 -9999 -9999 ...
## $ TMAX : int 63 63 67 67 71 74 80 88 81 70 ...
## $ TMIN : int 48 53 45 45 43 49 49 50 57 57 ...
## - attr(*, "spec")=List of 2
## ..$ cols :List of 6
## .. ..$ STATION_NAME: list()
## .. .. ..- attr(*, "class")= chr "collector_character" "collector"
## .. ..$ DATE :List of 1
## .. .. ..$ format: chr "%Y%m%d"
## .. .. ..- attr(*, "class")= chr "collector_date" "collector"
## .. ..$ PRCP : list()
## .. .. ..- attr(*, "class")= chr "collector_double" "collector"
## .. ..$ SNOW : list()
## .. .. ..- attr(*, "class")= chr "collector_double" "collector"
## .. ..$ TMAX : list()
## .. .. ..- attr(*, "class")= chr "collector_integer" "collector"
## .. ..$ TMIN : list()
## .. .. ..- attr(*, "class")= chr "collector_integer" "collector"
## ..$ default: list()
## .. ..- attr(*, "class")= chr "collector_guess" "collector"
## ..- attr(*, "class")= chr "col_spec"
olywthr %>% mutate(TMAX = ifelse(TMAX == -9999,NA,TMAX),
TMIN = ifelse(TMIN == -9999,NA,TMIN),
SNOW = ifelse(SNOW == -9999,0,SNOW),
PRCP = ifelse(PRCP == -9999,0,PRCP),
yr = year(DATE),
mo = month(DATE),
dy = day(DATE)) %>%
filter(STATION_NAME == "OLYMPIA AIRPORT WA US" | yr < 1948) -> olywthr
olywthr %>% filter(complete.cases(olywthr)) -> olywthr
save(olywthr,file="olywthr.rdata")
summary(olywthr)
## STATION_NAME DATE PRCP
## Length:49316 Min. :1877-07-01 Min. :0.0000
## Class :character 1st Qu.:1913-05-10 1st Qu.:0.0000
## Mode :character Median :1949-12-24 Median :0.0000
## Mean :1948-11-12 Mean :0.1409
## 3rd Qu.:1983-09-26 3rd Qu.:0.1400
## Max. :2017-07-11 Max. :4.8200
## SNOW TMAX TMIN yr
## Min. : 0.00000 Min. : 15.00 Min. :-8.00 Min. :1877
## 1st Qu.: 0.00000 1st Qu.: 50.00 1st Qu.:34.00 1st Qu.:1913
## Median : 0.00000 Median : 59.00 Median :41.00 Median :1949
## Mean : 0.02647 Mean : 60.64 Mean :40.42 Mean :1948
## 3rd Qu.: 0.00000 3rd Qu.: 71.00 3rd Qu.:47.00 3rd Qu.:1983
## Max. :14.20000 Max. :104.00 Max. :76.00 Max. :2017
## mo dy
## Min. : 1.000 Min. : 1.00
## 1st Qu.: 4.000 1st Qu.: 8.00
## Median : 7.000 Median :16.00
## Mean : 6.516 Mean :15.74
## 3rd Qu.:10.000 3rd Qu.:23.00
## Max. :12.000 Max. :31.00
olywthr %>% filter(mo==12) %>%
group_by(yr) %>%
summarize(mmax = mean(TMAX,na.rm=TRUE),
mmin = mean(TMIN,na.rm=TRUE)) %>%
ungroup() -> december
december
## # A tibble: 135 x 3
## yr mmax mmin
## <dbl> <dbl> <dbl>
## 1 1877 46.48387 37.93548
## 2 1878 44.22581 35.83871
## 3 1879 42.80645 32.93548
## 4 1880 44.54839 34.64516
## 5 1881 46.90323 37.06452
## 6 1882 46.58065 38.25806
## 7 1883 45.90323 35.38710
## 8 1884 39.51613 28.09677
## 9 1885 48.74194 37.80645
## 10 1886 49.38710 40.06452
## # ... with 125 more rows
Nov1 = filter(olywthr,mo==12,dy==1)
summary(Nov1)
## STATION_NAME DATE PRCP
## Length:134 Min. :1877-12-01 Min. :0.0000
## Class :character 1st Qu.:1913-03-02 1st Qu.:0.0000
## Mode :character Median :1950-06-01 Median :0.1450
## Mean :1948-12-17 Mean :0.3508
## 3rd Qu.:1983-08-31 3rd Qu.:0.5275
## Max. :2016-12-01 Max. :3.6000
## SNOW TMAX TMIN yr
## Min. :0.00000 Min. :28.00 Min. : 0.00 Min. :1877
## 1st Qu.:0.00000 1st Qu.:45.00 1st Qu.:32.00 1st Qu.:1912
## Median :0.00000 Median :48.00 Median :37.00 Median :1950
## Mean :0.02164 Mean :48.04 Mean :36.43 Mean :1948
## 3rd Qu.:0.00000 3rd Qu.:52.00 3rd Qu.:41.75 3rd Qu.:1983
## Max. :1.80000 Max. :64.00 Max. :49.00 Max. :2016
## mo dy
## Min. :12 Min. :1
## 1st Qu.:12 1st Qu.:1
## Median :12 Median :1
## Mean :12 Mean :1
## 3rd Qu.:12 3rd Qu.:1
## Max. :12 Max. :1
december %>% ggplot(aes(yr,mmax)) +
geom_point(col="red") +
geom_smooth(col="red") +
geom_point(aes(x=yr,y=mmin),col="blue") +
geom_smooth(aes(x=yr,y=mmin),col="blue")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
december[!complete.cases(december),]
## # A tibble: 0 x 3
## # ... with 3 variables: yr <dbl>, mmax <dbl>, mmin <dbl>
table(december$yr)
##
## 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 1892 1893 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1920 1921 1923 1925 1926
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
table(olywthr$yr)
##
## 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891
## 184 365 365 333 334 365 365 366 365 365 365 366 365 365 365
## 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906
## 366 365 123 26 346 361 361 362 364 363 357 354 366 362 336
## 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921
## 347 349 365 363 364 366 365 364 360 366 365 31 94 336 365
## 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936
## 254 300 244 307 360 357 366 359 363 365 360 361 364 365 365
## 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951
## 358 360 364 366 365 365 365 364 362 365 365 366 365 365 365
## 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966
## 366 365 365 365 366 365 365 365 366 365 365 365 366 365 365
## 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981
## 365 366 365 365 365 366 365 365 365 366 365 365 365 366 365
## 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996
## 365 365 366 365 365 365 366 365 365 365 366 365 365 365 363
## 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011
## 355 365 365 366 365 365 365 366 365 365 365 366 365 365 365
## 2012 2013 2014 2015 2016 2017
## 366 365 365 365 366 192
olywthr %>%
ggplot(aes(x=TMAX)) +
geom_histogram() +
facet_wrap(~mo)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
tapply(olywthr$TMAX,olywthr$mo,summary)
## $`1`
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 15.00 40.00 45.00 44.44 49.00 74.00
##
## $`2`
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 19.00 44.00 49.00 48.36 53.00 73.00
##
## $`3`
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 31.00 49.00 53.00 53.49 57.00 79.00
##
## $`4`
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 39.00 54.00 58.00 59.63 64.00 90.00
##
## $`5`
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 46.00 60.00 65.00 66.54 72.00 96.00
##
## $`6`
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 50.00 65.00 70.00 71.61 77.00 101.00
##
## $`7`
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 56.00 71.00 77.00 77.54 83.00 104.00
##
## $`8`
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 56.00 72.00 77.00 77.34 82.00 104.00
##
## $`9`
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 50.00 65.00 70.00 70.86 76.00 98.00
##
## $`10`
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 33.0 56.0 60.0 60.5 65.0 90.0
##
## $`11`
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 24.00 47.00 51.00 50.93 55.00 74.00
##
## $`12`
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 19.00 42.00 46.00 45.46 50.00 70.00
IQR2 = function(x) {IQR(x,na.rm=TRUE)}
tapply(olywthr$TMAX,olywthr$mo,IQR2)
## 1 2 3 4 5 6 7 8 9 10 11 12
## 9 9 8 10 12 12 12 10 11 9 8 8
sd2 = function(x) {sd(x,na.rm=TRUE)}
tapply(olywthr$TMAX,olywthr$mo,sd2)
## 1 2 3 4 5 6 7 8
## 6.663915 6.491218 6.688247 7.810782 8.615359 8.304573 8.118035 7.551948
## 9 10 11 12
## 7.481331 6.593582 5.949529 6.089234
olywthr %>%
group_by(mo,dy) %>%
summarize(prain = mean(PRCP > 0,na.rm=TRUE)) %>%
ungroup() -> rainy
str(rainy)
## Classes 'tbl_df', 'tbl' and 'data.frame': 366 obs. of 3 variables:
## $ mo : num 1 1 1 1 1 1 1 1 1 1 ...
## $ dy : int 1 2 3 4 5 6 7 8 9 10 ...
## $ prain: num 0.647 0.607 0.681 0.664 0.654 ...
summary(rainy)
## mo dy prain
## Min. : 1.000 Min. : 1.00 Min. :0.0438
## 1st Qu.: 4.000 1st Qu.: 8.00 1st Qu.:0.2844
## Median : 7.000 Median :16.00 Median :0.4630
## Mean : 6.514 Mean :15.76 Mean :0.4344
## 3rd Qu.: 9.750 3rd Qu.:23.00 3rd Qu.:0.5993
## Max. :12.000 Max. :31.00 Max. :0.7353
rainy %>% ggplot(aes(x=prain)) +
geom_histogram() +
facet_wrap(~mo)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
rainy %>% ggplot(aes(x=prain)) +
geom_density() +
facet_wrap(~mo)
Let’s create a weather prediction for 2018 based on average values for each day.
olywthr %>%
group_by(mo,dy) %>%
summarize(prain = mean(PRCP > 0),
dmax = mean(TMAX),
dmin = mean(TMIN)) %>%
ungroup() %>% mutate(date =make_date(2018,mo,dy))-> cal18
cal18 %>% ggplot(aes(x=date,y=dmax)) +
geom_point() +
ggtitle("Average Daily Maximum Temperature") +
theme(plot.title = element_text(hjust = 0.5))
## Warning: Removed 1 rows containing missing values (geom_point).
cal18 %>%
filter(mo>8 & mo < 11) %>%
ggplot(aes(x=date,y=dmax)) +
geom_point() +
ggtitle("Average Daily Maximum Temperature") +
theme(plot.title = element_text(hjust = 0.5))