#Load all libraries that will/may be used

library(tidyverse)
## ── Attaching packages ──────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.1.0     ✔ purrr   0.2.5
## ✔ tibble  1.4.2     ✔ dplyr   0.7.8
## ✔ tidyr   0.8.2     ✔ stringr 1.3.1
## ✔ readr   1.1.1     ✔ forcats 0.3.0
## ── Conflicts ─────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(rpart)
library(rattle)
## Rattle: A free graphical interface for data science with R.
## Version 5.2.0 Copyright (c) 2006-2018 Togaware Pty Ltd.
## Type 'rattle()' to shake, rattle, and roll your data.
library(rpart.plot)
library(fpp2)
## Loading required package: forecast
## Loading required package: fma
## Loading required package: expsmooth
library(expsmooth)
library(fma)
library(readr)
load(file="olywthr.rdata")
head(olywthr)
summary(olywthr)
##  STATION_NAME            DATE                 PRCP       
##  Length:49316       Min.   :1877-07-01   Min.   :0.0000  
##  Class :character   1st Qu.:1913-05-10   1st Qu.:0.0000  
##  Mode  :character   Median :1949-12-24   Median :0.0000  
##                     Mean   :1948-11-12   Mean   :0.1409  
##                     3rd Qu.:1983-09-26   3rd Qu.:0.1400  
##                     Max.   :2017-07-11   Max.   :4.8200  
##       SNOW               TMAX             TMIN             yr      
##  Min.   : 0.00000   Min.   : 15.00   Min.   :-8.00   Min.   :1877  
##  1st Qu.: 0.00000   1st Qu.: 50.00   1st Qu.:34.00   1st Qu.:1913  
##  Median : 0.00000   Median : 59.00   Median :41.00   Median :1949  
##  Mean   : 0.02647   Mean   : 60.64   Mean   :40.42   Mean   :1948  
##  3rd Qu.: 0.00000   3rd Qu.: 71.00   3rd Qu.:47.00   3rd Qu.:1983  
##  Max.   :14.20000   Max.   :104.00   Max.   :76.00   Max.   :2017  
##        mo               dy       
##  Min.   : 1.000   Min.   : 1.00  
##  1st Qu.: 4.000   1st Qu.: 8.00  
##  Median : 7.000   Median :16.00  
##  Mean   : 6.516   Mean   :15.74  
##  3rd Qu.:10.000   3rd Qu.:23.00  
##  Max.   :12.000   Max.   :31.00
#Version 1

model <- rpart(
  STATION_NAME ~ PRCP + SNOW + TMAX + TMIN, 
  data = olywthr, 
  control = rpart.control(minsplit = 2))

par(xpd = NA, mar = rep(0.7, 4)) 
plot(model, compress = TRUE)
text(model, cex = 0.7, use.n = TRUE, fancy = FALSE, all = TRUE)

#Version 2

data(olywthr)
## Warning in data(olywthr): data set 'olywthr' not found
nobs <- nrow(olywthr) 
train <- sample(nrow(olywthr), 0.7*nobs)
test <- setdiff(seq_len(nrow(olywthr)), train)
colnames(olywthr)
## [1] "STATION_NAME" "DATE"         "PRCP"         "SNOW"        
## [5] "TMAX"         "TMIN"         "yr"           "mo"          
## [9] "dy"
### The following variable selections have been noted.
input <- c("PRCP","SNOW","TMAX","TMIN")
numeric <- c("PRCP","SNOW","TMAX","TMIN")
categoric <- NULL
target  <-"STATION_NAME"
risk    <- NULL
ident   <- NULL
ignore  <- NULL
weights <- NULL

#set.seed(500)
# Build the Decision Tree model.
rpart <- rpart(STATION_NAME~.,
    data=olywthr[train, ],
    method="class",
    parms=list(split="information"),
      control=rpart.control(minsplit=12,
        usesurrogate=0, 
        maxsurrogate=0))

# Generate a textual view of the Decision Tree model.
print(rpart)
## n= 34521 
## 
## node), split, n, loss, yval, (yprob)
##       * denotes terminal node
## 
## 1) root 34521 16799 OLYMPIA AIRPORT WA US (0.5133687 0.4866313)  
##   2) DATE>=-8036 17722     0 OLYMPIA AIRPORT WA US (1.0000000 0.0000000) *
##   3) DATE< -8036 16799     0 OLYMPIA PRIEST PT PA WA US (0.0000000 1.0000000) *
printcp(rpart)
## 
## Classification tree:
## rpart(formula = STATION_NAME ~ ., data = olywthr[train, ], method = "class", 
##     parms = list(split = "information"), control = rpart.control(minsplit = 12, 
##         usesurrogate = 0, maxsurrogate = 0))
## 
## Variables actually used in tree construction:
## [1] DATE
## 
## Root node error: 16799/34521 = 0.48663
## 
## n= 34521 
## 
##     CP nsplit rel error xerror      xstd
## 1 1.00      0         1      1 0.0055281
## 2 0.01      1         0      0 0.0000000
# Decision Tree Plot...
prp(rpart)
## Warning: Bad 'data' field in model 'call' (expected a data.frame or a matrix).
## To silence this warning:
##     Call prp with roundint=FALSE,
##     or rebuild the rpart model with model=TRUE.

dev.new()
fancyRpartPlot(rpart, main="Decision Tree Graph")
#Create variable using ts() to create time series analysis using a vector

Y <- ts(olywthr[,4],start=c(1991,1),frequency = 12)


#################################
#Preliminary Analysis
#################################

#Time PLot

autoplot(Y)+
  ylab("Snow")