library(tidyr)
library(forecast)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
library(fma)
library(fpp2)
## ── Attaching packages ────────────────────────────────────────────── fpp2 2.4 ──
## ✓ ggplot2 3.3.5 ✓ expsmooth 2.3
##
#load data
hurricane<-read.csv("/Users/Luke/Documents/BC/Predictive Analytics/Discussion 2/hurricanes.csv")
#Decomposition: Hurricanes and Tropical Storms
##Convert to time series, plot
hurricane$Date<-as.Date(hurricane$Date)
hts <- ts(hurricane$Storms, frequency = 12, start=c(1950))
hts
## Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
## 1950 0 0 0 0 0 0 0 4 3 3 2 6
## 1951 3 0 2 0 0 4 0 0 1 0 0 1
## 1952 1 0 4 1 1 1 0 0 2 0 0 0
## 1953 0 2 0 1 4 0 4 2 0 2 0 0
## 1954 1 0 0 1 0 1 1 0 0 5 0 0
## 1955 2 0 0 4 0 0 2 0 0 2 0 4
## 1956 0 2 2 0 5 2 0 1 0 0 4 0
## 1957 0 3 1 0 2 0 0 3 0 0 1 0
## 1958 1 2 0 0 4 0 0 3 0 1 0 3
## 1959 4 0 4 4 1 3 1 0 5 0 0 1
## 1960 0 0 1 0 0 4 0 0 2 0 0 7
## 1961 0 1 4 0 1 0 0 4 0 4 4 0
## 1962 4 1 0 3 1 0 3 0 0 3 0 0
## 1963 7 0 0 5 0 0 3 0 0 2 0 0
## 1964 1 1 0 5 2 0 0 0 0 0 0 1
## 1965 1 4 1 0 0 0 0 0 0 2 0 4
## 1966 1 0 0 0 0 0 0 1 0 4 1 0
## 1967 0 0 0 0 1 2 2 3 2 0 0 0
## 1968 0 0 0 1 2 2 0 0 0 0 0 0
## 1969 0 0 1 6 2 2 0 0 0 0 0 0
## 1970 0 2 1 0 0 0 0 0 0 0 1 5
## 1971 2 0 0 0 0 0 0 1 1 4 1 1
## 1972 0 0 0 0 0 1 0 2 1 0 0 0
## 1973 0 0 0 1 4 4 0 1 0 0 0 0
## 1974 0 0 0 4 3 0 0 0 0 0 0 3
## 1975 0 3 1 0 0 0 0 0 0 0 1 6
## 1976 5 1 0 0 0 0 1 0 2 3 2 0
## 1977 0 0 0 0 0 0 1 6 1 1 0 0
## 1978 0 0 1 1 0 2 0 1 0 0 0 0
## 1979 0 0 2 2 2 0 0 0 0 0 0 1
## 1980 1 4 1 0 0 0 0 0 0 1 1 3
## 1981 1 0 0 0 0 0 1 0 1 2 1 0
## 1982 0 0 0 0 0 0 0 3 2 0 1 0
## 1983 0 0 0 0 1 3 3 0 0 0 0 0
## 1984 0 1 2 2 1 0 0 0 0 0 0 0
## 1985 1 5 1 2 0 0 0 0 1 1 0 4
## 1986 1 2 0 0 0 0 0 2 0 3 0 0
## 1987 0 0 0 0 0 0 0 2 0 0 0 0
## 1988 0 0 0 0 0 6 1 1 0 0 0 0
## 1989 0 0 2 3 2 1 0 0 0 0 0 2
## 1990 0 2 0 1 0 0 0 0 0 0 0 3
## 1991 1 0 0 0 0 0 0 0 0 6 1 1
## 1992 0 0 0 0 0 1 3 2 1 1 0 0
## 1993 0 0 0 0 3 2 4 0 0 0 0 0
## 1994 0 1 0 3 3 0 0 0 0 1 0 0
## 1995 0 4 1 0 0 0 0 0 0 1 0 3
## 1996 0 0 0 0 0 0 0 1 0 2 0 2
## 1997 0 0 0 0 0 1 4 3 4 0 0 0
## 1998 0 0 0 1 2 2 3 1 0 0 0 0
## 1999 1 1 3 1 2 0 0 0 0 0 0 0
## 2000 1 6 2 1 0 0 0 0 0 1 0 3
## 2001 3 1 0 0 0 0 0 0 0 7 4 0
## 2002 0 0 0 0 0 1 0 4 5 2 0 0
## 2003 0 0 0 0 1 8 0 0 0 0 0 1
## 2004 0 1 2 5 2 0 0 0 0 0 0 0
## 2005 1 4 2 1 0 0 0 0 0 2 5 5
## 2006 7 3 0 0 0 0 0 1 2 4 0 0
plot(hts)
##Decompose and compare
dec1<-decompose(hts, type="additive")
dec2<-decompose(hts, type="multiplicative" )
plot(dec1)
plot(dec2)
##Setup data to measure error
For this data, the additive model performs better on all measurements.
error1<-na.omit(dec1$random) #generate residuals without NA
error2<-na.omit(dec2$random) #generate residuals without NA
error1<-as.vector(error1) #set residuals as a vector, not a time series
error2<-as.vector(error2)
datavector<-length(dec2$random)-length(dec2$random[is.na(dec2$random)])
abserror1<-abs(error1)
abserror2<-abs(error2)
sqerror1<-error1^2
sqerror2<-error2^2
pererror1<-(abserror1/datavector)*100
pererror2<-(abserror2/datavector)*100
me<-c(mean(error1),mean(error2))
mad<-c(mean(abserror1), mean(abserror2))
mse<-c(mean(sqerror1),mean(sqerror2))
rmse<-sqrt(mse)
mape<-c(mean(pererror1),mean(pererror2))
rbind(me,mad,mse,rmse,mape)
## [,1] [,2]
## me 0.003534226 1.1265691
## mad 1.104523743 1.1265691
## mse 2.126983285 4.6725106
## rmse 1.458418076 2.1615991
## mape 0.164363652 0.1676442