if (!require(mlba)) {
library(devtools)
install_github("gedeck/mlba/mlba", force=TRUE)
}
options(scipen=999)
library(forecast)
library(ggplot2)
Amtrak.data <- mlba::Amtrak
# create time series
ridership.ts <- ts(Amtrak.data$Ridership,
start=c(1991, 1), end=c(2004, 3), freq=12)
# produce linear trend model
ridership.lm <- tslm(ridership.ts ~ trend)
# plot the series using the autoplot function to make use of ggplot
autoplot(ridership.ts, xlab="Time", ylab="Ridership (in 000s)", color="steelblue") +
autolayer(ridership.lm$fitted.values, color="tomato", size=0.75) +
scale_y_continuous(limits=c(1300, 2300))
ggsave(file=file.path(getwd(), "figures", "chapter_18", "AmtrakLinear.pdf"),
last_plot() + scale_x_continuous(n.breaks=10) + theme_bw(),
width=6, height=3, units="in")
nTest <- 36
nTrain <- length(ridership.ts) - nTest
# partition the data
train.ts <- window(ridership.ts, start=c(1991, 1), end=c(1991, nTrain))
test.ts <- window(ridership.ts, start=c(1991, nTrain + 1),
end=c(1991, nTrain + nTest))
# fit linear trend model to training set
train.lm <- tslm(train.ts ~ trend)
train.lm.pred <- forecast(train.lm, h=length(test.ts), level=0)
# define functions to create consistent plots of models
library(gridExtra)
colData <- "steelblue"; colModel <- "tomato"
plotForecast <- function(model, train.ts, test.ts) {
model.pred <- forecast(model, h=length(test.ts), level=0)
g <- autoplot(train.ts, xlab="Time", ylab="Ridership (in 000s)", color=colData) +
autolayer(test.ts, color=colData) +
autolayer(model$fitted.values, color=colModel, size=0.75) +
autolayer(model.pred$mean, color=colModel, size=0.75)
return (g)
}
plotResiduals <- function(model, test.ts) {
model.pred <- forecast(model, h=length(test.ts), level=0)
g <- autoplot(model$residuals, xlab="Time", ylab="Forecast errors", color=colModel, size=0.75) +
autolayer(test.ts - model.pred$mean, color=colModel, size=0.75) +
geom_hline(yintercept=0, color="darkgrey") +
coord_cartesian(ylim=c(-410, 410))
return (g)
}
g1 <- plotForecast(train.lm, train.ts, test.ts)
g2 <- plotResiduals(train.lm, test.ts)
grid.arrange(g1, g2, nrow=2)
addTrainValid <- function(g, train.ts, test.ts, yh) {
delta <- 1/12
date_t <- time(train.ts)[1]
date_th <- time(test.ts)[1] - delta
date_hf <- tail(time(test.ts), 1) + delta
g <- g + geom_vline(xintercept=date_th, color="darkgrey") +
geom_segment(aes(x=date_t, xend=date_th-delta, y=yh, yend=yh), color="darkgrey") +
geom_segment(aes(x=date_th+delta, xend=date_hf-delta, y=yh, yend=yh), color="darkgrey") +
geom_text(aes(x=(date_t+date_th)/2, y=yh+50, label='Training')) +
geom_text(aes(x=(date_th+date_hf)/2, y=yh+50, label='Test'))
g <- g + scale_x_continuous(n.breaks=10)
return (g)
}
g1 <- addTrainValid(g1, train.ts, test.ts, 2300)
g2 <- addTrainValid(g2, train.ts, test.ts, 500)# + coord_cartesian(ylim=c(-410, 550))
gridExtra::grid.arrange(g1, g2, nrow=2)
g <- arrangeGrob(g1 + theme_bw(), g2 + theme_bw())
ggsave(file=file.path(getwd(), "figures", "chapter_18", "AmtrakLinearPred.pdf"),
g, width=6, height=6, units="in")
summary(train.lm)
#>
#> Call:
#> tslm(formula = train.ts ~ trend)
#>
#> Residuals:
#> Min 1Q Median 3Q Max
#> -411.29 -114.02 16.06 129.28 306.35
#>
#> Coefficients:
#> Estimate Std. Error t value Pr(>|t|)
#> (Intercept) 1750.3595 29.0729 60.206 <0.0000000000000002 ***
#> trend 0.3514 0.4069 0.864 0.39
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Residual standard error: 160.2 on 121 degrees of freedom
#> Multiple R-squared: 0.006125, Adjusted R-squared: -0.002089
#> F-statistic: 0.7456 on 1 and 121 DF, p-value: 0.3896
accuracy(train.lm.pred, test.ts)
#> ME RMSE MAE MPE MAPE
#> Training set 0.000000000000003720601 158.9269 129.6778 -0.853984 7.535999
#> Test set 193.131584752838051599610 239.4863 209.4371 9.209919 10.147732
#> MASE ACF1 Theil's U
#> Training set 1.572005 0.4372087 NA
#> Test set 2.538879 0.2734545 1.358369
# fit exponential trend using tslm() with argument lambda = 0
train.lm.expo.trend <- tslm(train.ts ~ trend, lambda=0)
train.lm.expo.trend.pred <- forecast(train.lm.expo.trend, h=nTest, level=0)
# fit linear trend using tslm() with argument lambda = 1 (no transform of y)
train.lm.linear.trend <- tslm(train.ts ~ trend, lambda=1)
train.lm.linear.trend.pred <- forecast(train.lm.linear.trend, h=nTest, level=0)
plotForecast(train.lm.expo.trend, train.ts, test.ts) +
autolayer(train.lm.linear.trend$fitted.values, color="forestgreen", size=0.75) +
autolayer(train.lm.linear.trend.pred, color="forestgreen", size=0.75)
g <- addTrainValid(last_plot(), train.ts, test.ts, 2300) + theme_bw()
ggsave(file=file.path(getwd(), "figures", "chapter_18", "AmtrakExpoPred.pdf"),
g, width=6, height=3, units="in")
# fit quadratic trend using function I(), which treats an object "as is".
train.lm.poly <- tslm(train.ts ~ trend + I(trend^2))
summary(train.lm.poly)
#>
#> Call:
#> tslm(formula = train.ts ~ trend + I(trend^2))
#>
#> Residuals:
#> Min 1Q Median 3Q Max
#> -344.79 -101.86 40.89 98.54 279.81
#>
#> Coefficients:
#> Estimate Std. Error t value Pr(>|t|)
#> (Intercept) 1888.88401 40.91521 46.166 < 0.0000000000000002 ***
#> trend -6.29780 1.52327 -4.134 0.0000663 ***
#> I(trend^2) 0.05362 0.01190 4.506 0.0000155 ***
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Residual standard error: 148.8 on 120 degrees of freedom
#> Multiple R-squared: 0.1499, Adjusted R-squared: 0.1358
#> F-statistic: 10.58 on 2 and 120 DF, p-value: 0.00005844
g1 <- plotForecast(train.lm.poly, train.ts, test.ts)
g2 <- plotResiduals(train.lm.poly, test.ts)
grid.arrange(g1, g2, nrow=2)
g1 <- addTrainValid(g1, train.ts, test.ts, 2300)
g2 <- addTrainValid(g2, train.ts, test.ts, 500)# + coord_cartesian(ylim=c(-410, 550))
grid.arrange(g1, g2, nrow=2)
g <- arrangeGrob(g1 + theme_bw(), g2 + theme_bw())
ggsave(file=file.path(getwd(), "figures", "chapter_18", "AmtrakPolyPred.pdf"),
g, width=6, height=6, units="in")
# fit season model
train.lm.season <- tslm(train.ts ~ season)
g1 <- plotForecast(train.lm.season, train.ts, test.ts)
g2 <- plotResiduals(train.lm.season, test.ts)
grid.arrange(g1, g2, nrow=2)
g1 <- addTrainValid(g1, train.ts, test.ts, 2300)
g2 <- addTrainValid(g2, train.ts, test.ts, 500)
grid.arrange(g1, g2, nrow=2)
g <- arrangeGrob(g1 + theme_bw(), g2 + theme_bw())
ggsave(file=file.path(getwd(), "figures", "chapter_18", "AmtrakSeason.pdf"),
g, width=6, height=6, units="in")
train.lm.season <- tslm(train.ts ~ season)
summary(train.lm.season)
#>
#> Call:
#> tslm(formula = train.ts ~ season)
#>
#> Residuals:
#> Min 1Q Median 3Q Max
#> -276.165 -52.934 5.868 54.544 215.081
#>
#> Coefficients:
#> Estimate Std. Error t value Pr(>|t|)
#> (Intercept) 1573.97 30.58 51.475 < 0.0000000000000002 ***
#> season2 -42.93 43.24 -0.993 0.3230
#> season3 260.77 43.24 6.030 0.00000002191915177 ***
#> season4 245.09 44.31 5.531 0.00000021404255781 ***
#> season5 278.22 44.31 6.279 0.00000000681101962 ***
#> season6 233.46 44.31 5.269 0.00000068154999666 ***
#> season7 345.33 44.31 7.793 0.00000000000378528 ***
#> season8 396.66 44.31 8.952 0.00000000000000919 ***
#> season9 75.76 44.31 1.710 0.0901 .
#> season10 200.61 44.31 4.527 0.00001509734148025 ***
#> season11 192.36 44.31 4.341 0.00003144621146447 ***
#> season12 230.42 44.31 5.200 0.00000091832087453 ***
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Residual standard error: 101.4 on 111 degrees of freedom
#> Multiple R-squared: 0.6348, Adjusted R-squared: 0.5986
#> F-statistic: 17.54 on 11 and 111 DF, p-value: < 0.00000000000000022
train.lm.trend.season <- tslm(train.ts ~ trend + I(trend^2) + season)
g1 <- plotForecast(train.lm.trend.season, train.ts, test.ts)
g2 <- plotResiduals(train.lm.trend.season, test.ts)
grid.arrange(g1, g2, nrow=2)
g1 <- addTrainValid(g1, train.ts, test.ts, 2300)
g2 <- addTrainValid(g2, train.ts, test.ts, 500)
grid.arrange(g1, g2, nrow=2)
g <- arrangeGrob(g1 + theme_bw(), g2 + theme_bw())
ggsave(file=file.path(getwd(), "figures", "chapter_18", "AmtrakTrendSeason.pdf"),
g, width=6, height=6, units="in")
summary(train.lm.trend.season)
#>
#> Call:
#> tslm(formula = train.ts ~ trend + I(trend^2) + season)
#>
#> Residuals:
#> Min 1Q Median 3Q Max
#> -213.775 -39.363 9.711 42.422 152.187
#>
#> Coefficients:
#> Estimate Std. Error t value Pr(>|t|)
#> (Intercept) 1696.979360 27.675224 61.318 < 0.0000000000000002 ***
#> trend -7.155851 0.729283 -9.812 < 0.0000000000000002 ***
#> I(trend^2) 0.060744 0.005698 10.660 < 0.0000000000000002 ***
#> season2 -43.245842 30.240666 -1.430 0.15556
#> season3 260.014920 30.242281 8.598 0.000000000000066035 ***
#> season4 260.617456 31.021050 8.401 0.000000000000182636 ***
#> season5 293.796560 31.020188 9.471 0.000000000000000689 ***
#> season6 248.961476 31.019903 8.026 0.000000000001260334 ***
#> season7 360.634004 31.020165 11.626 < 0.0000000000000002 ***
#> season8 411.651344 31.020954 13.270 < 0.0000000000000002 ***
#> season9 90.316196 31.022265 2.911 0.00437 **
#> season10 214.603661 31.024102 6.917 0.000000000329207928 ***
#> season11 205.671137 31.026486 6.629 0.000000001339180089 ***
#> season12 242.929425 31.029448 7.829 0.000000000003442810 ***
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Residual standard error: 70.92 on 109 degrees of freedom
#> Multiple R-squared: 0.8246, Adjusted R-squared: 0.8037
#> F-statistic: 39.42 on 13 and 109 DF, p-value: < 0.00000000000000022
ridership.24.ts <- window(train.ts, start=c(1991, 1), end=c(1991, 24))
Acf(ridership.24.ts, lag.max=12, main="")
# ggplot version of graph
ggAcf(ridership.24.ts, lag.max=12, main="")
ggsave(file=file.path(getwd(), "figures", "chapter_18", "AmtrakACF.pdf"),
last_plot() + theme_bw(), width=6, height=3, units="in")
# ggplot version of graph
ggAcf(train.lm.trend.season$residuals, lag.max=12, main="")
ggsave(file=file.path(getwd(), "figures", "chapter_18", "AmtrakResACF.pdf"),
last_plot() + theme_bw(), width=6, height=3, units="in")
# fit linear regression with quadratic trend and seasonality to Ridership
train.lm.trend.season <- tslm(train.ts ~ trend + I(trend^2) + season)
# fit AR(1) model to training residuals
# use Arima() in the forecast package to fit an ARIMA model
# (that includes AR models); order = c(1,0,0) gives an AR(1).
train.res.arima <- Arima(train.lm.trend.season$residuals, order = c(1,0,0))
test.res.arima.pred <- forecast(train.res.arima, h = 1)
summary(train.res.arima)
#> Series: train.lm.trend.season$residuals
#> ARIMA(1,0,0) with non-zero mean
#>
#> Coefficients:
#> ar1 mean
#> 0.5998 0.3728
#> s.e. 0.0712 11.8408
#>
#> sigma^2 = 2876: log likelihood = -663.54
#> AIC=1333.08 AICc=1333.29 BIC=1341.52
#>
#> Training set error measures:
#> ME RMSE MAE MPE MAPE MASE
#> Training set -0.1223159 53.19141 39.8277 103.3885 175.2219 0.5721356
#> ACF1
#> Training set -0.07509305
test.res.arima.pred
#> Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
#> Apr 2001 7.41111 -61.31748 76.1397 -97.70019 112.5224
autoplot(train.lm.trend.season$residuals, xlab="Time", ylab="Residuals", color=colData) +
autolayer(test.res.arima.pred$fitted, color=colModel, size=0.75)
g <- addTrainValid(last_plot(), train.ts, test.ts, 200) + theme_bw()
ggsave(file=file.path(getwd(), "figures", "chapter_18", "AmtrakResARIMA.pdf"),
g, width=6, height=3, units="in")
g <- ggAcf(train.res.arima$residuals, lag.max=12, main="")
ggsave(file=file.path(getwd(), "figures", "chapter_18", "AmtrakResARIMAResACF.pdf"),
g + theme_bw(), width=6, height=3, units="in")
sp500.df <- mlba::SP500
sp500.ts <- ts(sp500.df$Close, start = c(1995, 5), end = c(2003, 8), freq = 12)
g <- autoplot(sp500.ts, xlab="Time", ylab="Closing price") + theme_bw()
ggsave(file=file.path(getwd(), "figures", "chapter_18", "SP500.pdf"),
g, width=6, height=3, units="in")
sp500.df <- mlba::SP500
sp500.ts <- ts(sp500.df$Close, start = c(1995, 5), end = c(2003, 8), freq = 12)
delta <- sp500.ts[2:length(sp500.ts)] - sp500.ts[1:length(sp500.ts)-1]
ggAcf(delta, lag.max=12, main="")
g <- last_plot() + theme_bw()
ggsave(file=file.path(getwd(), "figures", "chapter_18", "SP500Diff1-ACF.pdf"),
g, width=6, height=3, units="in")
sp500.df <- mlba::SP500
sp500.ts <- ts(sp500.df$Close, start = c(1995, 5), end = c(2003, 8), freq = 12)
sp500.arima <- Arima(sp500.ts, order = c(1,0,0))
sp500.arima
#> Series: sp500.ts
#> ARIMA(1,0,0) with non-zero mean
#>
#> Coefficients:
#> ar1 mean
#> 0.9833 890.0707
#> s.e. 0.0145 221.0280
#>
#> sigma^2 = 2833: log likelihood = -540.05
#> AIC=1086.1 AICc=1086.35 BIC=1093.92
library(zoo)
airTravel.ts <- ts(mlba::Sept11Travel$Air.RPM..000s., start = c(1990, 1), freq = 12)
airTravel.ts <- window(airTravel.ts, start=c(1990, 1), end=c(2001, 8))
g <- autoplot((airTravel.ts - decompose(airTravel.ts)$seasonal) / 1000000,
xlab="Month",
ylab="Seasonally adjusted air revenue\n passenger miles ($ billions)") +
scale_x_yearmon() +
ylim(0, 63) +
theme_bw()
g
ggsave(file=file.path(getwd(), "figures", "chapter_18", "seasonalPreSept11air.pdf"),
g, width=6, height=3)
toys.ts <- ts(mlba::ToysRUsRevenues$Revenue.in.million..., start=c(1992, 1), freq=4)
autoplot(toys.ts, x="Time", y="Revenue ($ millions)")
g <- last_plot() + theme_bw()
ggsave(file=file.path(getwd(), "figures", "chapter_18", "Exercise-ToysRUs.pdf"),
last_plot() + theme_bw(), width=5, height=4, units="in")
n <- length(toys.ts)
train.ts <- window(toys.ts, end=c(1992, n - 2))
test.ts <- window(toys.ts, start=c(1992, n - 1))
summary(tslm(train.ts ~ trend + season))
#>
#> Call:
#> tslm(formula = train.ts ~ trend + season)
#>
#> Residuals:
#> Min 1Q Median 3Q Max
#> -335.90 -54.29 18.50 63.80 319.24
#>
#> Coefficients:
#> Estimate Std. Error t value Pr(>|t|)
#> (Intercept) 906.75 115.35 7.861 0.0000254518 ***
#> trend 47.11 11.26 4.185 0.00236 **
#> season2 -15.11 119.66 -0.126 0.90231
#> season3 89.17 128.67 0.693 0.50582
#> season4 2101.73 129.17 16.272 0.0000000555 ***
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Residual standard error: 168.5 on 9 degrees of freedom
#> Multiple R-squared: 0.9774, Adjusted R-squared: 0.9673
#> F-statistic: 97.18 on 4 and 9 DF, p-value: 0.0000002129
walmart <- mlba::WalMartStock
# use zoo to handle the gaps in the dates
walmart.zoo <- zoo(walmart$Close, as.Date(walmart$Date, "%d-%b-%y"))
autoplot(walmart.zoo) +
labs(x="Time", y="Close price ($)")
g <- last_plot() + theme_bw()
ggsave(file=file.path(getwd(), "figures", "chapter_18", "Exercise-Walmart.pdf"),
last_plot() + theme_bw(), width=5, height=4, units="in")
g1 <- ggAcf(walmart$Close, lag.max=12, ylab="Close ACF", main="")
delta <- walmart$Close[2:length(walmart$Close)] - walmart$Close[1:length(walmart$Close)-1]
g2 <- ggAcf(delta, lag.max=12, ylab="Close differences ACF", main="")
grid.arrange(g1, g2, nrow=2)
g <- arrangeGrob(g1 + theme_bw(), g2 + theme_bw())
ggsave(file=file.path(getwd(), "figures", "chapter_18", "Exercise-Walmart-acf.pdf"),
g, width=5, height=5, units="in")
sales.ts <- ts(mlba::DepartmentStoreSales$Sales, freq=4)
autoplot(sales.ts, xlab="Year-quarter", ylab="Sales") +
geom_point() +
scale_x_yearqtr(format = "Y%Y-Q%q")
ggsave(file=file.path(getwd(), "figures", "chapter_18", "Exercise-DeptStore.pdf"),
last_plot() + theme_bw(), width=6, height=3, units="in")
train.ts <- window(sales.ts, end=c(1, 20))
test.ts <- window(sales.ts, start=c(1, 21))
model <- tslm(train.ts ~ trend + season, lambda=0)
summary(model)
#>
#> Call:
#> tslm(formula = train.ts ~ trend + season, lambda = 0)
#>
#> Residuals:
#> Min 1Q Median 3Q Max
#> -0.053524 -0.013199 -0.004527 0.014387 0.062681
#>
#> Coefficients:
#> Estimate Std. Error t value Pr(>|t|)
#> (Intercept) 10.748945 0.018725 574.057 < 0.0000000000000002 ***
#> trend 0.011088 0.001295 8.561 0.0000003702488 ***
#> season2 0.024956 0.020764 1.202 0.248
#> season3 0.165343 0.020884 7.917 0.0000009788403 ***
#> season4 0.433746 0.021084 20.572 0.0000000000021 ***
#> ---
#> Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#>
#> Residual standard error: 0.03277 on 15 degrees of freedom
#> Multiple R-squared: 0.9791, Adjusted R-squared: 0.9736
#> F-statistic: 175.9 on 4 and 15 DF, p-value: 0.000000000002082
g1 <- autoplot(train.ts, xlab="Year", ylab="Sales") +
geom_point() +
autolayer(model$fitted.values, color=colModel) +
scale_y_continuous(labels = scales::comma)
g2 <- autoplot(model$residuals, xlab="Year", ylab="Residuals")
grid.arrange(g1, g2, nrow=2)
g <- arrangeGrob(g1 + theme_bw(), g2 + theme_bw())
ggsave(file=file.path(getwd(), "figures", "chapter_18", "Exercise-DeptStore-model.pdf"),
g, width=5, height=5, units="in")
sales.df <- mlba::SouvenirSales
sales.ts <- ts(sales.df$Sales, start=c(1995, 1), end=c(2001, 12), freq=12)
g1 <- autoplot(sales.ts, xlab="Time", ylab="Sales (Australian $)") +
geom_point(size=0.5) +
scale_y_continuous(labels = scales::comma)
g2 <- g1 +
scale_y_log10(labels = scales::comma)
grid.arrange(g1, g2, ncol=2)
g <- arrangeGrob(g1 + theme_bw(), g2 + theme_bw(), ncol=2)
ggsave(file=file.path(getwd(), "figures", "chapter_18", "Exercise-SouvenirShop.pdf"),
g, width=8, height=4, units="in")
shipments.df <- mlba::ApplianceShipments
shipments.ts <- ts(shipments.df$Shipments, start=c(1985, 1), freq=4)
autoplot(shipments.ts, xlab="Time", ylab="Shipments (in $000s)") +
geom_point(size=0.5)
g <- last_plot() + theme_bw()
ggsave(file=file.path(getwd(), "figures", "chapter_18", "Exercise-Appliances.pdf"),
g, width=5, height=2.5, units="in")
wines <- mlba::AustralianWines
wines.ts <- ts(wines, start=c(1980, 1), freq=12)
g1 <- autoplot(wines.ts[, "sweet.white"], main="Sweet wine sales",
xlab="Year", ylab="Thousands of liters")
g2 <- autoplot(wines.ts[, "rose"], main="Rose wine sales",
xlab="Year", ylab="Thousands of liters")
g3 <- autoplot(wines.ts[, "sparkling"], main="Sparkling wine sales",
xlab="Year", ylab="Thousands of liters")
g4 <- autoplot(wines.ts[, "red"], main="Red wine sales",
xlab="Year", ylab="Thousands of liters")
g5 <- autoplot(wines.ts[, "dry.white"], main="Dry white wine sales",
xlab="Year", ylab="Thousands of liters")
g6 <- autoplot(wines.ts[, "fortified"], main="Fortified wine sales",
xlab="Year", ylab="Thousands of liters")
grid.arrange(g1, g2, g3, g4, g5, g6, ncol=2, nrow=3)
g <- arrangeGrob(g1 + theme_bw(), g2 + theme_bw(),
g3 + theme_bw(), g4 + theme_bw(),
g5 + theme_bw(), g6 + theme_bw(),
ncol=2, nrow=3)
ggsave(file=file.path(getwd(), "figures", "chapter_18", "Exercise-Wines.pdf"),
g, width=7, height=9, units="in")