if (!require(mlba)) {
  library(devtools)
  install_github("gedeck/mlba/mlba", force=TRUE)
}

Time Series Components

Example: Ridership on Amtrak Trains

library(forecast)
library(ggplot2)
Amtrak.data <- mlba::Amtrak

# create time series object using ts()
# ts() takes three arguments: start, end, and freq.
# with monthly data, the frequency of periods per cycle is 12 (per year).
# arguments start and end are (cycle [=year] number, seasonal period [=month] number) pairs.
# here start is Jan 1991: start = c(1991, 1); end is Mar 2004: end = c(2004, 3).
ridership.ts <- ts(Amtrak.data$Ridership,
    start = c(1991, 1), end = c(2004, 3), freq = 12)

# plot the series using the autoplot function to make use of ggplot
autoplot(ridership.ts, xlab="Time", ylab="Ridership (in 000s)") +
  scale_y_continuous(limits=c(1300, 2300))

g <- last_plot() +
    scale_x_continuous(n.breaks=10) +
    theme_bw()
  ggsave(file=file.path(getwd(), "figures", "chapter_17", "AmtrakFirstPlot.pdf"),
         g, width=6, height=3, units="in")
library(gridExtra)
  library(lubridate)
  library(zoo)

  BareggTunnel <- mlba::BareggTunnel
  # convert Day information to a dates object
  dates <- as.POSIXct(BareggTunnel$Day, format='%d %b %Y')
  tunnel.ts <- ts(BareggTunnel$Number.of.vehicles,
                  start=c(2003, yday(dates[1])),
                  frequency=365)

  options(scipen=999)
  g1 <- autoplot(tunnel.ts, xlab="Time", ylab="Number of vehicles") +
    scale_x_yearmon() +
    scale_y_continuous(labels = scales::comma)
  g2 <- autoplot(window(tunnel.ts,
                 start=c(2004, yday(ISOdate(2004, 2, 1))),
                 end=c(2004, yday(ISOdate(2004, 6, 1)))),
                 xlab="Time", ylab="Number of vehicles") +
    scale_x_yearmon() +
    scale_y_continuous(labels = scales::comma)

  grid.arrange(g1, g2, nrow=2)

  g <- arrangeGrob(g1 + theme_bw(), g2 + theme_bw())
  ggsave(file=file.path(getwd(), "figures", "chapter_17", "TS-TunnelPlots.pdf"), g,
           width=6, height=4)
library(gridExtra)

# to zoom in to a certain period, use window() to create a new, shorter time series
# we create a new, 3-year time series of ridership.ts from Jan 1997 to Dec 1999
ridership.ts.3yrs <- window(ridership.ts, start = c(1997, 1), end = c(1999, 12))
g1 <- autoplot(ridership.ts.3yrs, xlab="Time", ylab="Ridership (in 000s)") +
  scale_y_continuous(limits=c(1300, 2300))

# fit a trend line to the time series
g2 <- autoplot(ridership.ts, xlab="Time", ylab="Ridership (in 000s)") +
  scale_y_continuous(limits=c(1300, 2300)) +
  geom_smooth(method="lm", formula=y~poly(x, 2))

grid.arrange(g1, g2, nrow=2)

g <- arrangeGrob(g1 + theme_bw(),
                   g2 + scale_x_continuous(n.breaks=10) + theme_bw())
  ggsave(file=file.path(getwd(), "figures", "chapter_17", "AmtrakZoomPlots.pdf"), g,
         width=6, height=6)

# we can also use tslm to create the quadratic fit
ridership.lm <- tslm(ridership.ts ~ trend + I(trend^2))
autoplot(ridership.ts, xlab="Time", ylab="Ridership (in 000s)") +
  scale_y_continuous(limits=c(1300, 2300)) +
  autolayer(ridership.lm$fitted.values)

Data Partitioning and Performance Evaluation

Benchmark Performance: Naive Forecasts

nTest <- 36
nTrain <- length(ridership.ts) - nTest

# partition the data
train.ts <- window(ridership.ts, start = c(1991, 1), end = c(1991, nTrain))
test.ts <- window(ridership.ts, start = c(1991, nTrain + 1),
                                   end = c(1991, nTrain + nTest))

# generate the naive and seasonal naive forecasts
naive.pred <- naive(train.ts, h=nTest)
snaive.pred <- snaive(train.ts, h=nTest)

# compare the actual values and forecasts for both methods
colData <- "steelblue"; colModel <- "tomato"
autoplot(train.ts, xlab="Time", ylab="Ridership (in 000s$)", color=colData) +
  autolayer(test.ts, linetype=2, color=colData) +
  autolayer(naive.pred, PI=FALSE, color=colModel, size=0.75) +
  autolayer(snaive.pred, PI=FALSE, color=colModel, size=0.75)

# for the book visualization add additional annotation
delta <- 1/12
date_t <- time(train.ts)[1]
date_th <- time(test.ts)[1] - delta
date_hf <- tail(time(test.ts), 1) + delta
g <- last_plot() +
  geom_vline(xintercept=date_th, color="darkgrey") + geom_vline(xintercept=date_hf, color="darkgrey") +
  geom_segment(aes(x=date_t, xend=date_th-delta, y=2300, yend=2300), color="darkgrey") +
  geom_segment(aes(x=date_th+delta, xend=date_hf-delta, y=2300, yend=2300), color="darkgrey")  +
  geom_segment(aes(x=date_hf+delta, xend=date_hf+2, y=2300, yend=2300), color="darkgrey") +
  geom_text(aes(x=(date_t+date_th)/2, y=2350, label='Training')) +
  geom_text(aes(x=(date_th+date_hf)/2, y=2350, label='Test')) +
  geom_text(aes(x=date_hf+1, y=2350, label='Future')) +
  scale_x_continuous(n.breaks=10) +
  theme_bw()
ggsave(file=file.path(getwd(), "figures", "chapter_17", "AmtrakNaive.pdf"),
       g, width=7, height=4.5)
accuracy(naive.pred, test.ts)
#>                     ME     RMSE      MAE        MPE     MAPE     MASE
#> Training set   2.45091 168.1470 125.2975 -0.3460027 7.271393 1.518906
#> Test set     -14.71772 142.7551 115.9234 -1.2749992 6.021396 1.405269
#>                    ACF1 Theil's U
#> Training set -0.2472621        NA
#> Test set      0.2764480 0.8346967
accuracy(snaive.pred, test.ts)
#>                    ME     RMSE      MAE       MPE     MAPE     MASE      ACF1
#> Training set 13.93991 99.26557 82.49196 0.5850656 4.715251 1.000000 0.6400044
#> Test set     54.72961 95.62433 84.09406 2.6527928 4.247656 1.019421 0.6373346
#>              Theil's U
#> Training set        NA
#> Test set     0.5532435

Generating Future Forecasts

ts <- ts(mlba::CanadianWorkHours$Hours,
    start = c(mlba::CanadianWorkHours$Year[1], 1), freq = 1)

# plot the series using the autoplot function to make use of ggplot
autoplot(ts, xlab="Year", ylab="Hours per week")

ggsave(file=file.path(getwd(), "figures", "chapter_17", "Exercise-CanadianWorkers.pdf"),
         last_plot() + theme_bw(), width=5, height=4, units="in")