#Load all libraries that will/may be used
library(tidyverse)
## ── Attaching packages ──────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.1.0 ✔ purrr 0.2.5
## ✔ tibble 1.4.2 ✔ dplyr 0.7.8
## ✔ tidyr 0.8.2 ✔ stringr 1.3.1
## ✔ readr 1.1.1 ✔ forcats 0.3.0
## ── Conflicts ─────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(rpart)
library(rattle)
## Rattle: A free graphical interface for data science with R.
## Version 5.2.0 Copyright (c) 2006-2018 Togaware Pty Ltd.
## Type 'rattle()' to shake, rattle, and roll your data.
library(rpart.plot)
library(fpp2)
## Loading required package: forecast
## Loading required package: fma
## Loading required package: expsmooth
library(expsmooth)
library(fma)
library(readr)
load(file="olywthr.rdata")
head(olywthr)
summary(olywthr)
## STATION_NAME DATE PRCP
## Length:49316 Min. :1877-07-01 Min. :0.0000
## Class :character 1st Qu.:1913-05-10 1st Qu.:0.0000
## Mode :character Median :1949-12-24 Median :0.0000
## Mean :1948-11-12 Mean :0.1409
## 3rd Qu.:1983-09-26 3rd Qu.:0.1400
## Max. :2017-07-11 Max. :4.8200
## SNOW TMAX TMIN yr
## Min. : 0.00000 Min. : 15.00 Min. :-8.00 Min. :1877
## 1st Qu.: 0.00000 1st Qu.: 50.00 1st Qu.:34.00 1st Qu.:1913
## Median : 0.00000 Median : 59.00 Median :41.00 Median :1949
## Mean : 0.02647 Mean : 60.64 Mean :40.42 Mean :1948
## 3rd Qu.: 0.00000 3rd Qu.: 71.00 3rd Qu.:47.00 3rd Qu.:1983
## Max. :14.20000 Max. :104.00 Max. :76.00 Max. :2017
## mo dy
## Min. : 1.000 Min. : 1.00
## 1st Qu.: 4.000 1st Qu.: 8.00
## Median : 7.000 Median :16.00
## Mean : 6.516 Mean :15.74
## 3rd Qu.:10.000 3rd Qu.:23.00
## Max. :12.000 Max. :31.00
#Version 1
model <- rpart(
STATION_NAME ~ PRCP + SNOW + TMAX + TMIN,
data = olywthr,
control = rpart.control(minsplit = 2))
par(xpd = NA, mar = rep(0.7, 4))
plot(model, compress = TRUE)
text(model, cex = 0.7, use.n = TRUE, fancy = FALSE, all = TRUE)

#Version 2
data(olywthr)
## Warning in data(olywthr): data set 'olywthr' not found
nobs <- nrow(olywthr)
train <- sample(nrow(olywthr), 0.7*nobs)
test <- setdiff(seq_len(nrow(olywthr)), train)
colnames(olywthr)
## [1] "STATION_NAME" "DATE" "PRCP" "SNOW"
## [5] "TMAX" "TMIN" "yr" "mo"
## [9] "dy"
### The following variable selections have been noted.
input <- c("PRCP","SNOW","TMAX","TMIN")
numeric <- c("PRCP","SNOW","TMAX","TMIN")
categoric <- NULL
target <-"STATION_NAME"
risk <- NULL
ident <- NULL
ignore <- NULL
weights <- NULL
#set.seed(500)
# Build the Decision Tree model.
rpart <- rpart(STATION_NAME~.,
data=olywthr[train, ],
method="class",
parms=list(split="information"),
control=rpart.control(minsplit=12,
usesurrogate=0,
maxsurrogate=0))
# Generate a textual view of the Decision Tree model.
print(rpart)
## n= 34521
##
## node), split, n, loss, yval, (yprob)
## * denotes terminal node
##
## 1) root 34521 16799 OLYMPIA AIRPORT WA US (0.5133687 0.4866313)
## 2) DATE>=-8036 17722 0 OLYMPIA AIRPORT WA US (1.0000000 0.0000000) *
## 3) DATE< -8036 16799 0 OLYMPIA PRIEST PT PA WA US (0.0000000 1.0000000) *
printcp(rpart)
##
## Classification tree:
## rpart(formula = STATION_NAME ~ ., data = olywthr[train, ], method = "class",
## parms = list(split = "information"), control = rpart.control(minsplit = 12,
## usesurrogate = 0, maxsurrogate = 0))
##
## Variables actually used in tree construction:
## [1] DATE
##
## Root node error: 16799/34521 = 0.48663
##
## n= 34521
##
## CP nsplit rel error xerror xstd
## 1 1.00 0 1 1 0.0055281
## 2 0.01 1 0 0 0.0000000
# Decision Tree Plot...
prp(rpart)
## Warning: Bad 'data' field in model 'call' (expected a data.frame or a matrix).
## To silence this warning:
## Call prp with roundint=FALSE,
## or rebuild the rpart model with model=TRUE.

dev.new()
fancyRpartPlot(rpart, main="Decision Tree Graph")
- Preliminary Analysis and Time Plot
#Create variable using ts() to create time series analysis using a vector
Y <- ts(olywthr[,4],start=c(1991,1),frequency = 12)
#################################
#Preliminary Analysis
#################################
#Time PLot
autoplot(Y)+
ylab("Snow")
