library(tidyverse)
## Loading tidyverse: ggplot2
## Loading tidyverse: tibble
## Loading tidyverse: tidyr
## Loading tidyverse: readr
## Loading tidyverse: purrr
## Loading tidyverse: dplyr
## Conflicts with tidy packages ----------------------------------------------
## filter(): dplyr, stats
## lag():    dplyr, stats
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following object is masked from 'package:base':
## 
##     date
olywthr <- read_csv("~/Dropbox/RProjects/Oly Weather/olyw3.csv", col_types = cols(DATE = col_date(format = "%Y%m%d"),SNOW = col_double()))
str(olywthr)
## Classes 'tbl_df', 'tbl' and 'data.frame':    52599 obs. of  6 variables:
##  $ STATION_NAME: chr  "OLYMPIA PRIEST PT PA WA US" "OLYMPIA PRIEST PT PA WA US" "OLYMPIA PRIEST PT PA WA US" "OLYMPIA PRIEST PT PA WA US" ...
##  $ DATE        : Date, format: "1877-07-01" "1877-07-02" ...
##  $ PRCP        : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ SNOW        : num  -9999 -9999 -9999 -9999 -9999 ...
##  $ TMAX        : int  63 63 67 67 71 74 80 88 81 70 ...
##  $ TMIN        : int  48 53 45 45 43 49 49 50 57 57 ...
##  - attr(*, "spec")=List of 2
##   ..$ cols   :List of 6
##   .. ..$ STATION_NAME: list()
##   .. .. ..- attr(*, "class")= chr  "collector_character" "collector"
##   .. ..$ DATE        :List of 1
##   .. .. ..$ format: chr "%Y%m%d"
##   .. .. ..- attr(*, "class")= chr  "collector_date" "collector"
##   .. ..$ PRCP        : list()
##   .. .. ..- attr(*, "class")= chr  "collector_double" "collector"
##   .. ..$ SNOW        : list()
##   .. .. ..- attr(*, "class")= chr  "collector_double" "collector"
##   .. ..$ TMAX        : list()
##   .. .. ..- attr(*, "class")= chr  "collector_integer" "collector"
##   .. ..$ TMIN        : list()
##   .. .. ..- attr(*, "class")= chr  "collector_integer" "collector"
##   ..$ default: list()
##   .. ..- attr(*, "class")= chr  "collector_guess" "collector"
##   ..- attr(*, "class")= chr "col_spec"
olywthr %>% mutate(TMAX = ifelse(TMAX == -9999,NA,TMAX),
       TMIN = ifelse(TMIN == -9999,NA,TMIN),
SNOW = ifelse(SNOW == -9999,0,SNOW), 
PRCP = ifelse(PRCP == -9999,0,PRCP), 
       yr = year(DATE),
       mo = month(DATE),
       dy = day(DATE)) %>% 
filter(STATION_NAME == "OLYMPIA AIRPORT WA US" | yr < 1948) -> olywthr 
olywthr %>% filter(complete.cases(olywthr)) -> olywthr
save(olywthr,file="olywthr.rdata")
summary(olywthr)
##  STATION_NAME            DATE                 PRCP       
##  Length:49316       Min.   :1877-07-01   Min.   :0.0000  
##  Class :character   1st Qu.:1913-05-10   1st Qu.:0.0000  
##  Mode  :character   Median :1949-12-24   Median :0.0000  
##                     Mean   :1948-11-12   Mean   :0.1409  
##                     3rd Qu.:1983-09-26   3rd Qu.:0.1400  
##                     Max.   :2017-07-11   Max.   :4.8200  
##       SNOW               TMAX             TMIN             yr      
##  Min.   : 0.00000   Min.   : 15.00   Min.   :-8.00   Min.   :1877  
##  1st Qu.: 0.00000   1st Qu.: 50.00   1st Qu.:34.00   1st Qu.:1913  
##  Median : 0.00000   Median : 59.00   Median :41.00   Median :1949  
##  Mean   : 0.02647   Mean   : 60.64   Mean   :40.42   Mean   :1948  
##  3rd Qu.: 0.00000   3rd Qu.: 71.00   3rd Qu.:47.00   3rd Qu.:1983  
##  Max.   :14.20000   Max.   :104.00   Max.   :76.00   Max.   :2017  
##        mo               dy       
##  Min.   : 1.000   Min.   : 1.00  
##  1st Qu.: 4.000   1st Qu.: 8.00  
##  Median : 7.000   Median :16.00  
##  Mean   : 6.516   Mean   :15.74  
##  3rd Qu.:10.000   3rd Qu.:23.00  
##  Max.   :12.000   Max.   :31.00
olywthr %>% filter(mo==12) %>% 
          group_by(yr) %>% 
          summarize(mmax = mean(TMAX,na.rm=TRUE),
                    mmin = mean(TMIN,na.rm=TRUE)) %>%  
  ungroup() -> december

december
## # A tibble: 135 x 3
##       yr     mmax     mmin
##    <dbl>    <dbl>    <dbl>
##  1  1877 46.48387 37.93548
##  2  1878 44.22581 35.83871
##  3  1879 42.80645 32.93548
##  4  1880 44.54839 34.64516
##  5  1881 46.90323 37.06452
##  6  1882 46.58065 38.25806
##  7  1883 45.90323 35.38710
##  8  1884 39.51613 28.09677
##  9  1885 48.74194 37.80645
## 10  1886 49.38710 40.06452
## # ... with 125 more rows
Nov1 = filter(olywthr,mo==12,dy==1)
summary(Nov1)
##  STATION_NAME            DATE                 PRCP       
##  Length:134         Min.   :1877-12-01   Min.   :0.0000  
##  Class :character   1st Qu.:1913-03-02   1st Qu.:0.0000  
##  Mode  :character   Median :1950-06-01   Median :0.1450  
##                     Mean   :1948-12-17   Mean   :0.3508  
##                     3rd Qu.:1983-08-31   3rd Qu.:0.5275  
##                     Max.   :2016-12-01   Max.   :3.6000  
##       SNOW              TMAX            TMIN             yr      
##  Min.   :0.00000   Min.   :28.00   Min.   : 0.00   Min.   :1877  
##  1st Qu.:0.00000   1st Qu.:45.00   1st Qu.:32.00   1st Qu.:1912  
##  Median :0.00000   Median :48.00   Median :37.00   Median :1950  
##  Mean   :0.02164   Mean   :48.04   Mean   :36.43   Mean   :1948  
##  3rd Qu.:0.00000   3rd Qu.:52.00   3rd Qu.:41.75   3rd Qu.:1983  
##  Max.   :1.80000   Max.   :64.00   Max.   :49.00   Max.   :2016  
##        mo           dy   
##  Min.   :12   Min.   :1  
##  1st Qu.:12   1st Qu.:1  
##  Median :12   Median :1  
##  Mean   :12   Mean   :1  
##  3rd Qu.:12   3rd Qu.:1  
##  Max.   :12   Max.   :1
december %>% ggplot(aes(yr,mmax)) +
  geom_point(col="red") + 
  geom_smooth(col="red") +
  geom_point(aes(x=yr,y=mmin),col="blue") +
  geom_smooth(aes(x=yr,y=mmin),col="blue")
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
## `geom_smooth()` using method = 'loess' and formula 'y ~ x'

december[!complete.cases(december),]
## # A tibble: 0 x 3
## # ... with 3 variables: yr <dbl>, mmax <dbl>, mmin <dbl>
table(december$yr)
## 
## 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 
##    1    1    1    1    1    1    1    1    1    1    1    1    1    1    1 
## 1892 1893 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 
##    1    1    1    1    1    1    1    1    1    1    1    1    1    1    1 
## 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1920 1921 1923 1925 1926 
##    1    1    1    1    1    1    1    1    1    1    1    1    1    1    1 
## 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 
##    1    1    1    1    1    1    1    1    1    1    1    1    1    1    1 
## 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 
##    1    1    1    1    1    1    1    1    1    1    1    1    1    1    1 
## 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 
##    1    1    1    1    1    1    1    1    1    1    1    1    1    1    1 
## 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 1982 1983 1984 1985 1986 
##    1    1    1    1    1    1    1    1    1    1    1    1    1    1    1 
## 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 
##    1    1    1    1    1    1    1    1    1    1    1    1    1    1    1 
## 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 
##    1    1    1    1    1    1    1    1    1    1    1    1    1    1    1
table(olywthr$yr)
## 
## 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 
##  184  365  365  333  334  365  365  366  365  365  365  366  365  365  365 
## 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 
##  366  365  123   26  346  361  361  362  364  363  357  354  366  362  336 
## 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 
##  347  349  365  363  364  366  365  364  360  366  365   31   94  336  365 
## 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 
##  254  300  244  307  360  357  366  359  363  365  360  361  364  365  365 
## 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948 1949 1950 1951 
##  358  360  364  366  365  365  365  364  362  365  365  366  365  365  365 
## 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 
##  366  365  365  365  366  365  365  365  366  365  365  365  366  365  365 
## 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1978 1979 1980 1981 
##  365  366  365  365  365  366  365  365  365  366  365  365  365  366  365 
## 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 
##  365  365  366  365  365  365  366  365  365  365  366  365  365  365  363 
## 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007 2008 2009 2010 2011 
##  355  365  365  366  365  365  365  366  365  365  365  366  365  365  365 
## 2012 2013 2014 2015 2016 2017 
##  366  365  365  365  366  192
olywthr %>% 
  ggplot(aes(x=TMAX)) +
  geom_histogram() +
  facet_wrap(~mo)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

tapply(olywthr$TMAX,olywthr$mo,summary)
## $`1`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   15.00   40.00   45.00   44.44   49.00   74.00 
## 
## $`2`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   19.00   44.00   49.00   48.36   53.00   73.00 
## 
## $`3`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   31.00   49.00   53.00   53.49   57.00   79.00 
## 
## $`4`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   39.00   54.00   58.00   59.63   64.00   90.00 
## 
## $`5`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   46.00   60.00   65.00   66.54   72.00   96.00 
## 
## $`6`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   50.00   65.00   70.00   71.61   77.00  101.00 
## 
## $`7`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   56.00   71.00   77.00   77.54   83.00  104.00 
## 
## $`8`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   56.00   72.00   77.00   77.34   82.00  104.00 
## 
## $`9`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   50.00   65.00   70.00   70.86   76.00   98.00 
## 
## $`10`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    33.0    56.0    60.0    60.5    65.0    90.0 
## 
## $`11`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   24.00   47.00   51.00   50.93   55.00   74.00 
## 
## $`12`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   19.00   42.00   46.00   45.46   50.00   70.00
IQR2 = function(x) {IQR(x,na.rm=TRUE)}
tapply(olywthr$TMAX,olywthr$mo,IQR2)
##  1  2  3  4  5  6  7  8  9 10 11 12 
##  9  9  8 10 12 12 12 10 11  9  8  8
sd2 = function(x) {sd(x,na.rm=TRUE)}
tapply(olywthr$TMAX,olywthr$mo,sd2)
##        1        2        3        4        5        6        7        8 
## 6.663915 6.491218 6.688247 7.810782 8.615359 8.304573 8.118035 7.551948 
##        9       10       11       12 
## 7.481331 6.593582 5.949529 6.089234
olywthr %>% 
  group_by(mo,dy) %>%
  summarize(prain = mean(PRCP > 0,na.rm=TRUE)) %>% 
  ungroup() -> rainy
str(rainy)
## Classes 'tbl_df', 'tbl' and 'data.frame':    366 obs. of  3 variables:
##  $ mo   : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ dy   : int  1 2 3 4 5 6 7 8 9 10 ...
##  $ prain: num  0.647 0.607 0.681 0.664 0.654 ...
summary(rainy)
##        mo               dy            prain       
##  Min.   : 1.000   Min.   : 1.00   Min.   :0.0438  
##  1st Qu.: 4.000   1st Qu.: 8.00   1st Qu.:0.2844  
##  Median : 7.000   Median :16.00   Median :0.4630  
##  Mean   : 6.514   Mean   :15.76   Mean   :0.4344  
##  3rd Qu.: 9.750   3rd Qu.:23.00   3rd Qu.:0.5993  
##  Max.   :12.000   Max.   :31.00   Max.   :0.7353
rainy %>% ggplot(aes(x=prain)) +
  geom_histogram() +
  facet_wrap(~mo)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.