library(forecast)
## Warning: package 'forecast' was built under R version 3.3.3
library(zoo)
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(knitr)
#read in data
ridership <- read.csv("Amtrak.csv")
##convert data to a time series
ridership.ts <- ts(ridership$Ridership_in_thousands, start = c(1991,1), end = c(2004, 3), frequency = 12)
tail(ridership)
## Month Ridership_in_thousands
## 154 10/1/2003 2121
## 155 11/1/2003 2076
## 156 12/1/2003 2141
## 157 1/1/2004 1832
## 158 2/1/2004 1838
## 159 3/1/2004 2132
#trend line
ridership.lm <- tslm(ridership.ts ~ trend + I(trend^2))
par(mfrow = c(2, 1))
#simple plot
plot(ridership.ts, xlab = "Time", ylab = "Ridership", ylim = c(1300, 2300), bty = "l")
#plot fitted trend line
lines(ridership.lm$fitted, lwd = 2)
#zoom into 4 years
#first create window
ridership.ts.zoom <- window(ridership.ts, start = c(1997, 1), end = c(2000, 12))
#plot zooned in data
plot(ridership.ts.zoom, xlab = "Time", ylab = "Ridership", ylim = c(1300, 2300), bty = "l")
Using centered moving average to visualize trend, along with linear trend line
# moving average by one year
ma.centered <- ma(ridership.ts, order = 12)
plot(ridership.ts, xlab = "Time", ylab = "Ridership", ylim = c(1300, 2300), bty = "l")
#plot fitted trend line
lines(ridership.lm$fitted, lwd = 2, col = 4)
lines(ma.centered, lwd = 2, col = 3)
legend(1994,2300, c("Ridership","Trend", "Centered Moving Average"), lty=c(1,1,1), lwd=c(2,2,2), col = c(1,4,3), bty = "o")
Look for seasonality
library(ggplot2)
ggseasonplot(ridership.ts, ylab = "Amtrak Ridership", main = "Seasonal Plot for Amtrak Ridership", lwd = 2) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(),
panel.background = element_blank())
Ridership boken out by month
par(oma = c(0, 0, 0, 2))
xrange <- c(1992,2004)
yrange <- range(ridership.ts)
plot(xrange, yrange, type="n", xlab="Year", ylab="Amtrak Ridership", bty="l", las=1)
colors <- rainbow(12)
linetype <- c(1:12)
plotchar <- c(1:12)
axis(1, at=seq(1992,2004,1), labels=format(seq(1992,2004,1)))
for (i in 1:12) {
currentMonth <- subset(ridership.ts, cycle(ridership.ts)==i)
lines(seq(1992, 1992 +length(currentMonth)-1,1), currentMonth, type="b", lwd=1,
lty=linetype[i], col=colors[i], pch=plotchar[i])
}
title("Ridership Broken Out by Month")
legend(2002.35, 80, 1:12, cex=0.8, col=colors, pch=plotchar, lty=linetype, title="Month", xpd=NA)
There appears to be some seasonality
Double differencing
diff.twice.ts <- diff(diff(ridership.ts, lag = 12), lag = 1)
Trailing moving average of double differenced Amtrak data
#create validation period length
nValid <- 36
#create training period length
nTrain <- length(diff.twice.ts) - nValid
#create training period window
train.ts <- window(diff.twice.ts, start = c(1992, 2), end = c(1992, nTrain + 1))
#create validation period window
valid.ts <- window(diff.twice.ts, start = c(1992, nTrain + 2), end = c(1992, nTrain + 1 + nValid))
#trailing moving average of training period
ma.trailing <- rollmean(train.ts, k = 12, align = "right")
forecast.ma.trailing <- forecast(ma.trailing, h = nValid, level = 0)
accuracy(forecast.ma.trailing, valid.ts)
## ME RMSE MAE MPE MAPE MASE
## Training set -0.1555953 10.29453 8.033678 -Inf Inf 0.5263357
## Test set -3.6568249 75.99122 52.994921 101.4047 104.3094 3.4720237
## ACF1 Theil's U
## Training set 0.07846082 NA
## Test set -0.36676938 0.9758209
4. Simple Exponential Smoothing using only seasonal differencing
diff.ts <- diff(ridership.ts, lag = 12)
#create validation period length
nValid <- 36
#create training period length
nTrain <- length(diff.ts) - nValid
#create training period window
train.ts <- window(diff.ts, start = c(1992, 2), end = c(1992, nTrain + 1))
#create validation period window
valid.ts <- window(diff.ts, start = c(1992, nTrain + 2), end = c(1992, nTrain + 1 + nValid))
## Warning in window.default(x, ...): 'end' value not changed
ANN, \(\alpha = 0.2\)
#simple exponential smoothing with no trend and no seasonality model, alpha = 0.2
ses1 <- ets(train.ts, model = "ANN", alpha = 0.2)
#forecast the above ses model. The forecast is flat because the trend and seasonality are not configured in.
ses.pred <- forecast(ses1, h = nValid, level = 0)
#accuracy of ses, ANN forecast with alpha = .2
accuracy(ses.pred, valid.ts)
## ME RMSE MAE MPE MAPE MASE
## Training set 5.614821 79.14323 62.55559 82.41135 219.2102 0.5615708
## Test set -58.906176 107.33000 87.24667 38.09999 254.0051 0.7832264
## ACF1 Theil's U
## Training set 0.3397771 NA
## Test set 0.6328273 2.402537
ses1
## ETS(A,N,N)
##
## Call:
## ets(y = train.ts, model = "ANN", alpha = 0.2)
##
## Smoothing parameters:
## alpha = 0.2
##
## Initial states:
## l = -23.2
##
## sigma: 79.1432
##
## AIC AICc BIC
## 1497.177 1497.289 1502.596
ANN, \(\alpha = null\)
#simple exponential smoothing with no trend and no seasonality model, ANN, no predetermined alpha
ses2 <- ets(train.ts, model = "ANN")
#forecast the above ses model. The forecast is flat because the trend and seasonality are not configured in.
ses.pred2 <- forecast(ses2, h = nValid, level = 0)
#accuracy of ses, ANN forecast with alpha = null
accuracy(ses.pred2, valid.ts)
## ME RMSE MAE MPE MAPE MASE
## Training set 2.24230 75.42840 59.03037 97.97484 216.4829 0.5299245
## Test set -31.42115 95.06356 74.92949 54.87022 188.7018 0.6726532
## ACF1 Theil's U
## Training set 0.06668754 NA
## Test set 0.63282735 1.857062
ses2
## ETS(A,N,N)
##
## Call:
## ets(y = train.ts, model = "ANN")
##
## Smoothing parameters:
## alpha = 0.4862
##
## Initial states:
## l = -47.0394
##
## sigma: 75.4284
##
## AIC AICc BIC
## 1488.505 1488.729 1496.633
When no model determined, the function chooses ANN
ZZZ, \(\alpha = 0.2\)
#simple exponential smoothing with no trend and no predetermined model, alpha = 0.2
ses3 <- ets(train.ts, model = "ZZZ", alpha = 0.2)
#forecast the above ses model. The forecast is flat because the trend and seasonality are not configured in.
ses.pred3 <- forecast(ses3, h = nValid, level = 0)
#accuracy of ses, ANN forecast with alpha = .2
accuracy(ses.pred3, valid.ts)
## ME RMSE MAE MPE MAPE MASE
## Training set 5.614821 79.14323 62.55559 82.41135 219.2102 0.5615708
## Test set -58.906176 107.33000 87.24667 38.09999 254.0051 0.7832264
## ACF1 Theil's U
## Training set 0.3397771 NA
## Test set 0.6328273 2.402537
ses3
## ETS(A,N,N)
##
## Call:
## ets(y = train.ts, model = "ZZZ", alpha = 0.2)
##
## Smoothing parameters:
## alpha = 0.2
##
## Initial states:
## l = -23.2
##
## sigma: 79.1432
##
## AIC AICc BIC
## 1497.177 1497.289 1502.596
Again, model chooses ANN and alpha of 0.468 ZZZ, \(\alpha = null\)
#simple exponential smoothing with no trend and no seasonality model, alpha is blank
ses4 <- ets(train.ts, model = "ZZZ")
#forecast the above ses model. The forecast is flat because the trend and seasonality are not configured in.
ses.pred4 <- forecast(ses4, h = nValid, level = 0)
#accuracy of ses, ANN forecast with alpha = blank
accuracy(ses.pred4, valid.ts)
## ME RMSE MAE MPE MAPE MASE
## Training set 2.24230 75.42840 59.03037 97.97484 216.4829 0.5299245
## Test set -31.42115 95.06356 74.92949 54.87022 188.7018 0.6726532
## ACF1 Theil's U
## Training set 0.06668754 NA
## Test set 0.63282735 1.857062
ses4
## ETS(A,N,N)
##
## Call:
## ets(y = train.ts, model = "ZZZ")
##
## Smoothing parameters:
## alpha = 0.4862
##
## Initial states:
## l = -47.0394
##
## sigma: 75.4284
##
## AIC AICc BIC
## 1488.505 1488.729 1496.633
Since this model is apppropriatefor series with trend and seasonality, no differencing is needed.
#create validation period length
nValid <- 36
#create training period length
nTrain <- length(ridership.ts) - nValid
#create training period window
train.ts <- window(ridership.ts, start = c(1993, 1), end = c(1992, nTrain + 1))
#create validation period window
valid.ts <- window(ridership.ts, start = c(199, nTrain + 2), end = c(1993, nTrain + 1 + nValid))
## Warning in window.default(x, ...): 'start' value not changed
## Warning in window.default(x, ...): 'end' value not changed
hwRidership <- HoltWinters(train.ts)
forecasthwRIdership <- forecast(hwRidership, h = nValid, level = 0)
accuracy(forecasthwRIdership, valid.ts)
## ME RMSE MAE MPE MAPE MASE
## Training set 7.849522 58.37717 46.30338 0.3386518 2.673978 0.5949298
## Test set -25.806716 80.11433 58.69309 -1.3646450 2.981850 0.7541191
## ACF1 Theil's U
## Training set 0.2272424 NA
## Test set 0.6447138 0.4889906
hwRidership
## Holt-Winters exponential smoothing with trend and additive seasonal component.
##
## Call:
## HoltWinters(x = train.ts)
##
## Smoothing parameters:
## alpha: 0.4473881
## beta : 0.01735796
## gamma: 0.613333
##
## Coefficients:
## [,1]
## a 1982.026347
## b 2.203838
## s1 95.714582
## s2 108.701878
## s3 179.595103
## s4 225.907694
## s5 -148.855119
## s6 54.208672
## s7 53.348729
## s8 51.294335
## s9 -224.162897
## s10 -240.616007
## s11 42.504412
## s12 68.406459