library(tidyr)
library(forecast)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(fma)
library(fpp2)
## ── Attaching packages ────────────────────────────────────────────── fpp2 2.4 ──
## ✓ ggplot2   3.3.5     ✓ expsmooth 2.3
## 
#load data
hurricane<-read.csv("/Users/Luke/Documents/BC/Predictive Analytics/Discussion 2/hurricanes.csv")

#Decomposition: Hurricanes and Tropical Storms

##Convert to time series, plot

hurricane$Date<-as.Date(hurricane$Date)
hts <- ts(hurricane$Storms, frequency = 12, start=c(1950))
hts
##      Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
## 1950   0   0   0   0   0   0   0   4   3   3   2   6
## 1951   3   0   2   0   0   4   0   0   1   0   0   1
## 1952   1   0   4   1   1   1   0   0   2   0   0   0
## 1953   0   2   0   1   4   0   4   2   0   2   0   0
## 1954   1   0   0   1   0   1   1   0   0   5   0   0
## 1955   2   0   0   4   0   0   2   0   0   2   0   4
## 1956   0   2   2   0   5   2   0   1   0   0   4   0
## 1957   0   3   1   0   2   0   0   3   0   0   1   0
## 1958   1   2   0   0   4   0   0   3   0   1   0   3
## 1959   4   0   4   4   1   3   1   0   5   0   0   1
## 1960   0   0   1   0   0   4   0   0   2   0   0   7
## 1961   0   1   4   0   1   0   0   4   0   4   4   0
## 1962   4   1   0   3   1   0   3   0   0   3   0   0
## 1963   7   0   0   5   0   0   3   0   0   2   0   0
## 1964   1   1   0   5   2   0   0   0   0   0   0   1
## 1965   1   4   1   0   0   0   0   0   0   2   0   4
## 1966   1   0   0   0   0   0   0   1   0   4   1   0
## 1967   0   0   0   0   1   2   2   3   2   0   0   0
## 1968   0   0   0   1   2   2   0   0   0   0   0   0
## 1969   0   0   1   6   2   2   0   0   0   0   0   0
## 1970   0   2   1   0   0   0   0   0   0   0   1   5
## 1971   2   0   0   0   0   0   0   1   1   4   1   1
## 1972   0   0   0   0   0   1   0   2   1   0   0   0
## 1973   0   0   0   1   4   4   0   1   0   0   0   0
## 1974   0   0   0   4   3   0   0   0   0   0   0   3
## 1975   0   3   1   0   0   0   0   0   0   0   1   6
## 1976   5   1   0   0   0   0   1   0   2   3   2   0
## 1977   0   0   0   0   0   0   1   6   1   1   0   0
## 1978   0   0   1   1   0   2   0   1   0   0   0   0
## 1979   0   0   2   2   2   0   0   0   0   0   0   1
## 1980   1   4   1   0   0   0   0   0   0   1   1   3
## 1981   1   0   0   0   0   0   1   0   1   2   1   0
## 1982   0   0   0   0   0   0   0   3   2   0   1   0
## 1983   0   0   0   0   1   3   3   0   0   0   0   0
## 1984   0   1   2   2   1   0   0   0   0   0   0   0
## 1985   1   5   1   2   0   0   0   0   1   1   0   4
## 1986   1   2   0   0   0   0   0   2   0   3   0   0
## 1987   0   0   0   0   0   0   0   2   0   0   0   0
## 1988   0   0   0   0   0   6   1   1   0   0   0   0
## 1989   0   0   2   3   2   1   0   0   0   0   0   2
## 1990   0   2   0   1   0   0   0   0   0   0   0   3
## 1991   1   0   0   0   0   0   0   0   0   6   1   1
## 1992   0   0   0   0   0   1   3   2   1   1   0   0
## 1993   0   0   0   0   3   2   4   0   0   0   0   0
## 1994   0   1   0   3   3   0   0   0   0   1   0   0
## 1995   0   4   1   0   0   0   0   0   0   1   0   3
## 1996   0   0   0   0   0   0   0   1   0   2   0   2
## 1997   0   0   0   0   0   1   4   3   4   0   0   0
## 1998   0   0   0   1   2   2   3   1   0   0   0   0
## 1999   1   1   3   1   2   0   0   0   0   0   0   0
## 2000   1   6   2   1   0   0   0   0   0   1   0   3
## 2001   3   1   0   0   0   0   0   0   0   7   4   0
## 2002   0   0   0   0   0   1   0   4   5   2   0   0
## 2003   0   0   0   0   1   8   0   0   0   0   0   1
## 2004   0   1   2   5   2   0   0   0   0   0   0   0
## 2005   1   4   2   1   0   0   0   0   0   2   5   5
## 2006   7   3   0   0   0   0   0   1   2   4   0   0
plot(hts)

##Decompose and compare

dec1<-decompose(hts, type="additive")
dec2<-decompose(hts, type="multiplicative" )
plot(dec1)

plot(dec2)

##Setup data to measure error

For this data, the additive model performs better on all measurements.

error1<-na.omit(dec1$random)  #generate residuals without NA
error2<-na.omit(dec2$random)  #generate residuals without NA
error1<-as.vector(error1)       #set residuals as a vector, not a time series
error2<-as.vector(error2)
datavector<-length(dec2$random)-length(dec2$random[is.na(dec2$random)])
abserror1<-abs(error1)  
abserror2<-abs(error2)
sqerror1<-error1^2
sqerror2<-error2^2
pererror1<-(abserror1/datavector)*100
pererror2<-(abserror2/datavector)*100
me<-c(mean(error1),mean(error2))
mad<-c(mean(abserror1), mean(abserror2))
mse<-c(mean(sqerror1),mean(sqerror2))
rmse<-sqrt(mse)
mape<-c(mean(pererror1),mean(pererror2))
rbind(me,mad,mse,rmse,mape)
##             [,1]      [,2]
## me   0.003534226 1.1265691
## mad  1.104523743 1.1265691
## mse  2.126983285 4.6725106
## rmse 1.458418076 2.1615991
## mape 0.164363652 0.1676442