1. Read in data and convert to time series.

library(forecast)
## Warning: package 'forecast' was built under R version 3.3.3
library(zoo)
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
library(knitr)
#read in data
ridership <- read.csv("Amtrak.csv")
##convert data to a time series
ridership.ts <- ts(ridership$Ridership_in_thousands, start = c(1991,1), end = c(2004, 3), frequency = 12)
tail(ridership)
##         Month Ridership_in_thousands
## 154 10/1/2003                   2121
## 155 11/1/2003                   2076
## 156 12/1/2003                   2141
## 157  1/1/2004                   1832
## 158  2/1/2004                   1838
## 159  3/1/2004                   2132

2. Visualize for trend and seasonality

Trend

#trend line
ridership.lm <- tslm(ridership.ts ~ trend + I(trend^2)) 
par(mfrow = c(2, 1)) 
#simple plot
plot(ridership.ts, xlab = "Time", ylab = "Ridership", ylim = c(1300, 2300), bty = "l")
#plot fitted trend line
lines(ridership.lm$fitted, lwd = 2) 
#zoom into 4 years
#first create window
ridership.ts.zoom <- window(ridership.ts, start = c(1997, 1), end = c(2000, 12))
#plot zooned in data
plot(ridership.ts.zoom, xlab = "Time", ylab = "Ridership", ylim = c(1300, 2300), bty = "l") 

Using centered moving average to visualize trend, along with linear trend line

# moving average by one year
ma.centered <- ma(ridership.ts, order = 12) 
plot(ridership.ts, xlab = "Time", ylab = "Ridership", ylim = c(1300, 2300), bty = "l")
#plot fitted trend line
lines(ridership.lm$fitted, lwd = 2, col = 4) 
lines(ma.centered, lwd = 2, col = 3) 
legend(1994,2300, c("Ridership","Trend", "Centered Moving Average"), lty=c(1,1,1), lwd=c(2,2,2), col = c(1,4,3), bty = "o")

Seasonality

Look for seasonality

library(ggplot2)
ggseasonplot(ridership.ts, ylab = "Amtrak Ridership", main = "Seasonal Plot for Amtrak Ridership", lwd = 2) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank())

Ridership boken out by month

par(oma = c(0, 0, 0, 2))
xrange <- c(1992,2004)
yrange <- range(ridership.ts)
plot(xrange, yrange, type="n", xlab="Year", ylab="Amtrak Ridership", bty="l", las=1)
colors <- rainbow(12) 
linetype <- c(1:12) 
plotchar <- c(1:12)
axis(1, at=seq(1992,2004,1), labels=format(seq(1992,2004,1)))
for (i in 1:12) { 
  currentMonth <- subset(ridership.ts, cycle(ridership.ts)==i)
  lines(seq(1992, 1992 +length(currentMonth)-1,1), currentMonth, type="b", lwd=1,
      lty=linetype[i], col=colors[i], pch=plotchar[i]) 
} 
title("Ridership Broken Out by Month")
legend(2002.35, 80, 1:12, cex=0.8, col=colors, pch=plotchar, lty=linetype, title="Month", xpd=NA)

There appears to be some seasonality

3. Forecast with moving average after double differencing

Double differencing

diff.twice.ts <- diff(diff(ridership.ts, lag = 12), lag = 1)

Trailing moving average of double differenced Amtrak data

#create validation period length
nValid <- 36
#create training period length
nTrain <- length(diff.twice.ts) - nValid 
#create training period window
train.ts <- window(diff.twice.ts, start = c(1992, 2), end = c(1992, nTrain + 1)) 
#create validation period window
valid.ts <- window(diff.twice.ts, start = c(1992, nTrain + 2), end = c(1992, nTrain + 1 + nValid))
#trailing moving average of training period
ma.trailing <- rollmean(train.ts, k = 12, align = "right")
forecast.ma.trailing <- forecast(ma.trailing, h = nValid, level = 0)
accuracy(forecast.ma.trailing, valid.ts)
##                      ME     RMSE       MAE      MPE     MAPE      MASE
## Training set -0.1555953 10.29453  8.033678     -Inf      Inf 0.5263357
## Test set     -3.6568249 75.99122 52.994921 101.4047 104.3094 3.4720237
##                     ACF1 Theil's U
## Training set  0.07846082        NA
## Test set     -0.36676938 0.9758209

4. Simple Exponential Smoothing using only seasonal differencing

diff.ts <- diff(ridership.ts, lag = 12)
#create validation period length
nValid <- 36
#create training period length
nTrain <- length(diff.ts) - nValid 
#create training period window
train.ts <- window(diff.ts, start = c(1992, 2), end = c(1992, nTrain + 1)) 
#create validation period window
valid.ts <- window(diff.ts, start = c(1992, nTrain + 2), end = c(1992, nTrain + 1 + nValid))
## Warning in window.default(x, ...): 'end' value not changed

ANN, \(\alpha = 0.2\)

#simple exponential smoothing with no trend and no seasonality model, alpha = 0.2
ses1 <- ets(train.ts, model = "ANN", alpha = 0.2)
#forecast the above ses model. The forecast is flat because the trend and seasonality are not configured in. 
ses.pred <- forecast(ses1, h = nValid, level = 0)
#accuracy of ses, ANN forecast with alpha = .2
accuracy(ses.pred, valid.ts)
##                      ME      RMSE      MAE      MPE     MAPE      MASE
## Training set   5.614821  79.14323 62.55559 82.41135 219.2102 0.5615708
## Test set     -58.906176 107.33000 87.24667 38.09999 254.0051 0.7832264
##                   ACF1 Theil's U
## Training set 0.3397771        NA
## Test set     0.6328273  2.402537
ses1
## ETS(A,N,N) 
## 
## Call:
##  ets(y = train.ts, model = "ANN", alpha = 0.2) 
## 
##   Smoothing parameters:
##     alpha = 0.2 
## 
##   Initial states:
##     l = -23.2 
## 
##   sigma:  79.1432
## 
##      AIC     AICc      BIC 
## 1497.177 1497.289 1502.596

ANN, \(\alpha = null\)

#simple exponential smoothing with no trend and no seasonality model, ANN, no predetermined alpha
ses2 <- ets(train.ts, model = "ANN")
#forecast the above ses model. The forecast is flat because the trend and seasonality are not configured in. 
ses.pred2 <- forecast(ses2, h = nValid, level = 0)
#accuracy of ses, ANN forecast with alpha = null
accuracy(ses.pred2, valid.ts)
##                     ME     RMSE      MAE      MPE     MAPE      MASE
## Training set   2.24230 75.42840 59.03037 97.97484 216.4829 0.5299245
## Test set     -31.42115 95.06356 74.92949 54.87022 188.7018 0.6726532
##                    ACF1 Theil's U
## Training set 0.06668754        NA
## Test set     0.63282735  1.857062
ses2
## ETS(A,N,N) 
## 
## Call:
##  ets(y = train.ts, model = "ANN") 
## 
##   Smoothing parameters:
##     alpha = 0.4862 
## 
##   Initial states:
##     l = -47.0394 
## 
##   sigma:  75.4284
## 
##      AIC     AICc      BIC 
## 1488.505 1488.729 1496.633

When no model determined, the function chooses ANN

ZZZ, \(\alpha = 0.2\)

#simple exponential smoothing with no trend and no predetermined model, alpha = 0.2
ses3 <- ets(train.ts, model = "ZZZ", alpha = 0.2)
#forecast the above ses model. The forecast is flat because the trend and seasonality are not configured in. 
ses.pred3 <- forecast(ses3, h = nValid, level = 0)
#accuracy of ses, ANN forecast with alpha = .2
accuracy(ses.pred3, valid.ts)
##                      ME      RMSE      MAE      MPE     MAPE      MASE
## Training set   5.614821  79.14323 62.55559 82.41135 219.2102 0.5615708
## Test set     -58.906176 107.33000 87.24667 38.09999 254.0051 0.7832264
##                   ACF1 Theil's U
## Training set 0.3397771        NA
## Test set     0.6328273  2.402537
ses3
## ETS(A,N,N) 
## 
## Call:
##  ets(y = train.ts, model = "ZZZ", alpha = 0.2) 
## 
##   Smoothing parameters:
##     alpha = 0.2 
## 
##   Initial states:
##     l = -23.2 
## 
##   sigma:  79.1432
## 
##      AIC     AICc      BIC 
## 1497.177 1497.289 1502.596

Again, model chooses ANN and alpha of 0.468 ZZZ, \(\alpha = null\)

#simple exponential smoothing with no trend and no seasonality model, alpha  is blank
ses4 <- ets(train.ts, model = "ZZZ")
#forecast the above ses model. The forecast is flat because the trend and seasonality are not configured in. 
ses.pred4 <- forecast(ses4, h = nValid, level = 0)
#accuracy of ses, ANN forecast with alpha = blank
accuracy(ses.pred4, valid.ts)
##                     ME     RMSE      MAE      MPE     MAPE      MASE
## Training set   2.24230 75.42840 59.03037 97.97484 216.4829 0.5299245
## Test set     -31.42115 95.06356 74.92949 54.87022 188.7018 0.6726532
##                    ACF1 Theil's U
## Training set 0.06668754        NA
## Test set     0.63282735  1.857062
ses4
## ETS(A,N,N) 
## 
## Call:
##  ets(y = train.ts, model = "ZZZ") 
## 
##   Smoothing parameters:
##     alpha = 0.4862 
## 
##   Initial states:
##     l = -47.0394 
## 
##   sigma:  75.4284
## 
##      AIC     AICc      BIC 
## 1488.505 1488.729 1496.633

Advanced exponential smoothing using Holt Winters.

Since this model is apppropriatefor series with trend and seasonality, no differencing is needed.

#create validation period length
nValid <- 36
#create training period length
nTrain <- length(ridership.ts) - nValid 
#create training period window
train.ts <- window(ridership.ts, start = c(1993, 1), end = c(1992, nTrain + 1)) 
#create validation period window
valid.ts <- window(ridership.ts, start = c(199, nTrain + 2), end = c(1993, nTrain + 1 + nValid))
## Warning in window.default(x, ...): 'start' value not changed
## Warning in window.default(x, ...): 'end' value not changed
hwRidership <- HoltWinters(train.ts)
forecasthwRIdership <- forecast(hwRidership, h = nValid, level = 0)
accuracy(forecasthwRIdership, valid.ts)
##                      ME     RMSE      MAE        MPE     MAPE      MASE
## Training set   7.849522 58.37717 46.30338  0.3386518 2.673978 0.5949298
## Test set     -25.806716 80.11433 58.69309 -1.3646450 2.981850 0.7541191
##                   ACF1 Theil's U
## Training set 0.2272424        NA
## Test set     0.6447138 0.4889906
hwRidership
## Holt-Winters exponential smoothing with trend and additive seasonal component.
## 
## Call:
## HoltWinters(x = train.ts)
## 
## Smoothing parameters:
##  alpha: 0.4473881
##  beta : 0.01735796
##  gamma: 0.613333
## 
## Coefficients:
##            [,1]
## a   1982.026347
## b      2.203838
## s1    95.714582
## s2   108.701878
## s3   179.595103
## s4   225.907694
## s5  -148.855119
## s6    54.208672
## s7    53.348729
## s8    51.294335
## s9  -224.162897
## s10 -240.616007
## s11   42.504412
## s12   68.406459