For this week’s discussion I chose to continue exploring the data set from Week 2’s discussion post: RIver Flow from 1923 - 1960 for Madison River in Yellowstone, MT, obtained from Hyndman’s Time Series Database.

#install.packages("devtools")
#devtools::install_github("FinYang/tsdl")
library(forecast)
library(xts)
library(tseries)
library(fpp)
library(fpp2)
library(seasonal)
library(magrittr)
library(dplyr)
library(tsdl)
library(kableExtra)
library(gridExtra)

ys_flow<- tsdl[[300]]

str(ys_flow)

##  Time-Series [1:456] from 1923 to 1961: 10.3 11.1 11.6 11.3 10.8 ...
##  - attr(*, "source")= chr "Hipel and McLeod (1994)"
##  - attr(*, "description")= chr "Monthly riverflow in cms, Madison River near west yellowstone, MT., 1923 <U+FFFD> 1960"
##  - attr(*, "subject")= chr "Hydrology"

autoplot(ys_flow,  las=2, main="Madison River: Monthly Riverflow", ylab="Flow in cms", xlab="Month" )

X11 decomposition of the series shows some variability in seasonality.

ys_flow %>% seas(x11 = "") -> fit

autoplot(fit) + ggtitle("X11 Decomposition of Madison River Flow")

The training and test data are created using the 80/20 rule:

train=ys_flow[1:365] #split into train and test
test=ys_flow[366:456] #test is the last 6 months

train.ts=ts(train,frequency=12, start = 1923)

test.ts=ts(test,frequency=12)

Model 1 ETS Automatic Selection

With the first model we allow automatic selection of the terms:

Model_1 <- ets(ys_flow, model="ZZZ")
Model_1

## ETS(M,Ad,M) 
## 
## Call:
##  ets(y = ys_flow, model = "ZZZ") 
## 
##   Smoothing parameters:
##     alpha = 0.5875 
##     beta  = 1e-04 
##     gamma = 1e-04 
##     phi   = 0.9781 
## 
##   Initial states:
##     l = 13.34 
##     b = 0.2173 
##     s = 0.8532 0.858 0.9801 1.582 1.698 1.007
##            0.8073 0.8083 0.8272 0.8482 0.8667 0.864
## 
##   sigma:  0.1207
## 
##      AIC     AICc      BIC 
## 3174.461 3176.026 3248.666

M,Ad,M was selected.

Model_1.pred=forecast(Model_1,h=12)

autoplot(ys_flow) +
  autolayer(Model_1.pred)+ ylab("River Flow") +ggtitle("Model_1 Auto-select: M,Ad,M")

kable(summary(Model_1))

## ETS(M,Ad,M) 
## 
## Call:
##  ets(y = ys_flow, model = "ZZZ") 
## 
##   Smoothing parameters:
##     alpha = 0.5875 
##     beta  = 1e-04 
##     gamma = 1e-04 
##     phi   = 0.9781 
## 
##   Initial states:
##     l = 13.34 
##     b = 0.2173 
##     s = 0.8532 0.858 0.9801 1.582 1.698 1.007
##            0.8073 0.8083 0.8272 0.8482 0.8667 0.864
## 
##   sigma:  0.1207
## 
##      AIC     AICc      BIC 
## 3174.461 3176.026 3248.666 
## 
## Training set error measures:
##                       ME     RMSE      MAE       MPE     MAPE      MASE
## Training set -0.07954362 2.176228 1.188124 -1.353442 8.027805 0.5460296
##                    ACF1
## Training set 0.02731546

	ME	RMSE	MAE	MPE	MAPE	MASE	ACF1
Training set	-0.0795436	2.176228	1.188124	-1.353442	8.027805	0.5460296	0.0273155

kable(Model_1$par)

	x
alpha	0.5874831
beta	0.0001047
gamma	0.0001032
phi	0.9780619
l	13.3399989
b	0.2172909
s0	0.8531943
s1	0.8580440
s2	0.9801284
s3	1.5820155
s4	1.6979999
s5	1.0069808
s6	0.8072585
s7	0.8083224
s8	0.8271646
s9	0.8482076
s10	0.8667260

Model 2 ETS A,N,N

Model_2 <- ets(ys_flow, model="ANN")
Model_2

## ETS(A,N,N) 
## 
## Call:
##  ets(y = ys_flow, model = "ANN") 
## 
##   Smoothing parameters:
##     alpha = 0.9999 
## 
##   Initial states:
##     l = 10.2858 
## 
##   sigma:  4.2813
## 
##      AIC     AICc      BIC 
## 4122.141 4122.194 4134.508

Model_2.pred=forecast(Model_2,h=12)

autoplot(ys_flow) +
  autolayer(Model_2.pred,h=12) + ylab("River Flow") + ggtitle("Model_2: A,N,N")

kable(accuracy(Model_2))

	ME	RMSE	MAE	MPE	MAPE	MASE	ACF1
Training set	-0.0004495	4.271931	2.378646	-2.876575	15.34773	1.093161	0.0551872

kable(Model_2$par)

	x
alpha	0.9998998
l	10.2857705

Model 3: Holt-Winters Seasonal Method

Model_3a <- hw(ys_flow, seasonal = "additive")
Model_3b <- hw(ys_flow, seasonal = "multiplicative")


#Model_3.pred=forecast(Model_3,h=12)

autoplot(ys_flow) +
  autolayer(Model_3a, series = "HW additive forecasts", PI = FALSE) + 
  autolayer(Model_3b, series = "HW multiplicative forecasts", PI = FALSE) + 
  ylab("River Flow") + ggtitle("Model_3: Holt-Winter") +
  guides(colour=guide_legend(title="Forecast"))

kable(accuracy(Model_3a))

	ME	RMSE	MAE	MPE	MAPE	MASE	ACF1
Training set	-0.0022947	2.318892	1.370909	-0.7627078	9.685716	0.6300324	0.1525013

kable(accuracy(Model_3b))

	ME	RMSE	MAE	MPE	MAPE	MASE	ACF1
Training set	0.0478974	2.198916	1.24049	-0.5682699	8.446475	0.5700954	-0.0054562

m1<- gghistogram(residuals(Model_1)) + ggtitle("Model 1: Histogram of residuals")
m2<- gghistogram(residuals(Model_2)) + ggtitle("Model 2: Histogram of residuals")
m3<- gghistogram(residuals(Model_3a)) + ggtitle("Model 3a: Histogram of residuals")
m4<- gghistogram(residuals(Model_3b)) + ggtitle("Model 3b: Histogram of residuals")

grid.arrange(m1,m2,m3,m4, nrow=2)

Model 1, using a Multiplicative Error with a damped Trend and Multiplicative Seasonality has the lowest RMSE out of the 4 models.

ADEC 7460 - Forecasting: Discussion Wk 3

Zille Engineer

July 14, 2020

Model 1 ETS Automatic Selection

Model 2 ETS A,N,N

Model 3: Holt-Winters Seasonal Method