For this week’s discussion I chose to continue exploring the data set from Week 2’s discussion post: RIver Flow from 1923 - 1960 for Madison River in Yellowstone, MT, obtained from Hyndman’s Time Series Database.

#install.packages("devtools")
#devtools::install_github("FinYang/tsdl")
library(forecast)
library(xts)
library(tseries)
library(fpp)
library(fpp2)
library(seasonal)
library(magrittr)
library(dplyr)
library(tsdl)
library(kableExtra)
library(gridExtra)
ys_flow<- tsdl[[300]]

str(ys_flow)
##  Time-Series [1:456] from 1923 to 1961: 10.3 11.1 11.6 11.3 10.8 ...
##  - attr(*, "source")= chr "Hipel and McLeod (1994)"
##  - attr(*, "description")= chr "Monthly riverflow in cms, Madison River near west yellowstone, MT., 1923 <U+FFFD> 1960"
##  - attr(*, "subject")= chr "Hydrology"
autoplot(ys_flow,  las=2, main="Madison River: Monthly Riverflow", ylab="Flow in cms", xlab="Month" )

X11 decomposition of the series shows some variability in seasonality.

ys_flow %>% seas(x11 = "") -> fit

autoplot(fit) + ggtitle("X11 Decomposition of Madison River Flow")

The training and test data are created using the 80/20 rule:

train=ys_flow[1:365] #split into train and test
test=ys_flow[366:456] #test is the last 6 months

train.ts=ts(train,frequency=12, start = 1923)

test.ts=ts(test,frequency=12)

Model 1 ETS Automatic Selection

With the first model we allow automatic selection of the terms:

Model_1 <- ets(ys_flow, model="ZZZ")
Model_1 
## ETS(M,Ad,M) 
## 
## Call:
##  ets(y = ys_flow, model = "ZZZ") 
## 
##   Smoothing parameters:
##     alpha = 0.5875 
##     beta  = 1e-04 
##     gamma = 1e-04 
##     phi   = 0.9781 
## 
##   Initial states:
##     l = 13.34 
##     b = 0.2173 
##     s = 0.8532 0.858 0.9801 1.582 1.698 1.007
##            0.8073 0.8083 0.8272 0.8482 0.8667 0.864
## 
##   sigma:  0.1207
## 
##      AIC     AICc      BIC 
## 3174.461 3176.026 3248.666

M,Ad,M was selected.

Model_1.pred=forecast(Model_1,h=12)

autoplot(ys_flow) +
  autolayer(Model_1.pred)+ ylab("River Flow") +ggtitle("Model_1 Auto-select: M,Ad,M")

kable(summary(Model_1))
## ETS(M,Ad,M) 
## 
## Call:
##  ets(y = ys_flow, model = "ZZZ") 
## 
##   Smoothing parameters:
##     alpha = 0.5875 
##     beta  = 1e-04 
##     gamma = 1e-04 
##     phi   = 0.9781 
## 
##   Initial states:
##     l = 13.34 
##     b = 0.2173 
##     s = 0.8532 0.858 0.9801 1.582 1.698 1.007
##            0.8073 0.8083 0.8272 0.8482 0.8667 0.864
## 
##   sigma:  0.1207
## 
##      AIC     AICc      BIC 
## 3174.461 3176.026 3248.666 
## 
## Training set error measures:
##                       ME     RMSE      MAE       MPE     MAPE      MASE
## Training set -0.07954362 2.176228 1.188124 -1.353442 8.027805 0.5460296
##                    ACF1
## Training set 0.02731546
ME RMSE MAE MPE MAPE MASE ACF1
Training set -0.0795436 2.176228 1.188124 -1.353442 8.027805 0.5460296 0.0273155
kable(Model_1$par)
x
alpha 0.5874831
beta 0.0001047
gamma 0.0001032
phi 0.9780619
l 13.3399989
b 0.2172909
s0 0.8531943
s1 0.8580440
s2 0.9801284
s3 1.5820155
s4 1.6979999
s5 1.0069808
s6 0.8072585
s7 0.8083224
s8 0.8271646
s9 0.8482076
s10 0.8667260

Model 2 ETS A,N,N

Model_2 <- ets(ys_flow, model="ANN")
Model_2 
## ETS(A,N,N) 
## 
## Call:
##  ets(y = ys_flow, model = "ANN") 
## 
##   Smoothing parameters:
##     alpha = 0.9999 
## 
##   Initial states:
##     l = 10.2858 
## 
##   sigma:  4.2813
## 
##      AIC     AICc      BIC 
## 4122.141 4122.194 4134.508
Model_2.pred=forecast(Model_2,h=12)

autoplot(ys_flow) +
  autolayer(Model_2.pred,h=12) + ylab("River Flow") + ggtitle("Model_2: A,N,N")

kable(accuracy(Model_2))
ME RMSE MAE MPE MAPE MASE ACF1
Training set -0.0004495 4.271931 2.378646 -2.876575 15.34773 1.093161 0.0551872
kable(Model_2$par)
x
alpha 0.9998998
l 10.2857705

Model 3: Holt-Winters Seasonal Method

Model_3a <- hw(ys_flow, seasonal = "additive")
Model_3b <- hw(ys_flow, seasonal = "multiplicative")


#Model_3.pred=forecast(Model_3,h=12)

autoplot(ys_flow) +
  autolayer(Model_3a, series = "HW additive forecasts", PI = FALSE) + 
  autolayer(Model_3b, series = "HW multiplicative forecasts", PI = FALSE) + 
  ylab("River Flow") + ggtitle("Model_3: Holt-Winter") +
  guides(colour=guide_legend(title="Forecast"))

kable(accuracy(Model_3a))
ME RMSE MAE MPE MAPE MASE ACF1
Training set -0.0022947 2.318892 1.370909 -0.7627078 9.685716 0.6300324 0.1525013
kable(accuracy(Model_3b))
ME RMSE MAE MPE MAPE MASE ACF1
Training set 0.0478974 2.198916 1.24049 -0.5682699 8.446475 0.5700954 -0.0054562
m1<- gghistogram(residuals(Model_1)) + ggtitle("Model 1: Histogram of residuals")
m2<- gghistogram(residuals(Model_2)) + ggtitle("Model 2: Histogram of residuals")
m3<- gghistogram(residuals(Model_3a)) + ggtitle("Model 3a: Histogram of residuals")
m4<- gghistogram(residuals(Model_3b)) + ggtitle("Model 3b: Histogram of residuals")

grid.arrange(m1,m2,m3,m4, nrow=2)

Model 1, using a Multiplicative Error with a damped Trend and Multiplicative Seasonality has the lowest RMSE out of the 4 models.