library(fpp2)

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

## ── Attaching packages ─────────────────────────────────────────────────────────────── fpp2 2.4 ──

## ✓ ggplot2   3.3.2     ✓ fma       2.4  
## ✓ forecast  8.13      ✓ expsmooth 2.3

##

library(gridExtra)
library(ggplot2)

3.1 For the following series, find an appropriate Box-Cox transformation in order to stabilise the variance.

usnetelec

series <- usnetelec 

lambda_series <- BoxCox.lambda(series)
plot1 <- autoplot(series)+
  autolayer(meanf(series, h=11),
  series="Mean", PI=FALSE) +
  autolayer(naive(series, h=11),
  series="Naïve", PI=FALSE) +
  autolayer(snaive(series, h=11),
  series="Seasonal naïve", PI=FALSE) +
  autolayer(rwf(series, drift=TRUE, h=11),
  series="Drift", PI=FALSE) +
  ggtitle("Forecasts Before Transformation") +
  xlab("time") + ylab("value") +
  guides(colour=guide_legend(title="Forecast"))


plot2 <- autoplot(BoxCox(series,lambda_series))+
  autolayer(meanf(BoxCox(series,lambda_series), h=11),
  series="Mean", PI=FALSE) +
  autolayer(naive(BoxCox(series,lambda_series), h=11),
  series="Naïve", PI=FALSE) +
  autolayer(snaive(BoxCox(series,lambda_series), h=11),
  series="Seasonal naïve", PI=FALSE) +
  autolayer(rwf(BoxCox(series,lambda_series), drift=TRUE, h=11),
  series="Drift", PI=FALSE) +
  ggtitle("Forecasts After Transformation") +
  xlab("time") + ylab("value") +
  guides(colour=guide_legend(title="Forecast"))


combined <- cbind(series,BoxCox(series,lambda_series))
grid.arrange(plot1, plot2, nrow=2)

autoplot(combined) +labs(x = "Time", y="Value")+scale_color_manual(labels = c("Before Transformation", "After Transformation"), values = c(1, 2))

lambda_series

## [1] 0.5167714

Answer To stabilize the usnetelec series a lamba of 0.516 has been chosen by the BoxCox.lambda() method

usgdp

series <- usgdp 

lambda_series <- BoxCox.lambda(series)
plot1 <- autoplot(series)+
  autolayer(meanf(series, h=100),
  series="Mean", PI=FALSE) +
  autolayer(naive(series, h=100),
  series="Naïve", PI=FALSE) +
  autolayer(snaive(series, h=100),
  series="Seasonal naïve", PI=FALSE) +
  autolayer(rwf(series, drift=TRUE, h=100),
  series="Drift", PI=FALSE) +
  ggtitle("Forecasts Before Transformation") +
  xlab("time") + ylab("value") +
  guides(colour=guide_legend(title="Forecast"))


plot2 <- autoplot(BoxCox(series,lambda_series))+
  autolayer(meanf(BoxCox(series,lambda_series), h=100),
  series="Mean", PI=FALSE) +
  autolayer(naive(BoxCox(series,lambda_series), h=100),
  series="Naïve", PI=FALSE) +
  autolayer(snaive(BoxCox(series,lambda_series), h=100),
  series="Seasonal naïve", PI=FALSE) +
  autolayer(rwf(BoxCox(series,lambda_series), drift=TRUE, h=100),
  series="Drift", PI=FALSE) +
  ggtitle("Forecasts After Transformation") +
  xlab("time") + ylab("value") +
  guides(colour=guide_legend(title="Forecast"))


combined <- cbind(series,BoxCox(series,lambda_series))
grid.arrange(plot1, plot2, nrow=2)

autoplot(combined) +labs(x = "Time", y="Value")+scale_color_manual(labels = c("Before Transformation", "After Transformation"), values = c(1, 2))

lambda_series

## [1] 0.366352

Answer To stabilize the usgdp series a lamba of 0.366352 has been chosen by the BoxCox.lambda() method

mcopper

series <- mcopper 

lambda_series <- BoxCox.lambda(series)
plot1 <- autoplot(series)+
  autolayer(meanf(series, h=11),
  series="Mean", PI=FALSE) +
  autolayer(naive(series, h=11),
  series="Naïve", PI=FALSE) +
  autolayer(snaive(series, h=11),
  series="Seasonal naïve", PI=FALSE) +
  autolayer(rwf(series, drift=TRUE, h=11),
  series="Drift", PI=FALSE) +
  ggtitle("Forecasts Before Transformation") +
  xlab("time") + ylab("value") +
  guides(colour=guide_legend(title="Forecast"))


plot2 <- autoplot(BoxCox(series,lambda_series))+
  autolayer(meanf(BoxCox(series,lambda_series), h=11),
  series="Mean", PI=FALSE) +
  autolayer(naive(BoxCox(series,lambda_series), h=11),
  series="Naïve", PI=FALSE) +
  autolayer(snaive(BoxCox(series,lambda_series), h=11),
  series="Seasonal naïve", PI=FALSE) +
  autolayer(rwf(BoxCox(series,lambda_series), drift=TRUE, h=11),
  series="Drift", PI=FALSE) +
  ggtitle("Forecasts After Transformation") +
  xlab("time") + ylab("value") +
  guides(colour=guide_legend(title="Forecast"))


combined <- cbind(series,BoxCox(series,lambda_series))
grid.arrange(plot1, plot2, nrow=2)

autoplot(combined) +labs(x = "Time", y="Value")+scale_color_manual(labels = c("Before Transformation", "After Transformation"), values = c(1, 2))

lambda_series

## [1] 0.1919047

Answer To stabilize the mcopper series a lamba of 0.1919047 has been chosen by the BoxCox.lambda() method

enplanements

series <- enplanements 

lambda_series <- BoxCox.lambda(series)
plot1 <- autoplot(series)+
  autolayer(meanf(series, h=11),
  series="Mean", PI=FALSE) +
  autolayer(naive(series, h=11),
  series="Naïve", PI=FALSE) +
  autolayer(snaive(series, h=11),
  series="Seasonal naïve", PI=FALSE) +
  autolayer(rwf(series, drift=TRUE, h=11),
  series="Drift", PI=FALSE) +
  ggtitle("Forecasts Before Transformation") +
  xlab("time") + ylab("value") +
  guides(colour=guide_legend(title="Forecast"))


plot2 <- autoplot(BoxCox(series,lambda_series))+
  autolayer(meanf(BoxCox(series,lambda_series), h=11),
  series="Mean", PI=FALSE) +
  autolayer(naive(BoxCox(series,lambda_series), h=11),
  series="Naïve", PI=FALSE) +
  autolayer(snaive(BoxCox(series,lambda_series), h=11),
  series="Seasonal naïve", PI=FALSE) +
  autolayer(rwf(BoxCox(series,lambda_series), drift=TRUE, h=11),
  series="Drift", PI=FALSE) +
  ggtitle("Forecasts After Transformation") +
  xlab("time") + ylab("value") +
  guides(colour=guide_legend(title="Forecast"))


combined <- cbind(series,BoxCox(series,lambda_series))
grid.arrange(plot1, plot2, nrow=2)

autoplot(combined) +labs(x = "Time", y="Value")+scale_color_manual(labels = c("Before Transformation", "After Transformation"), values = c(1, 2))

lambda_series

## [1] -0.2269461

Answer To stabilize the enplanements series a lamba of -0.2269461 has been chosen by the BoxCox.lambda() method

3.2 Why is a Box-Cox transformation unhelpful for the cangas data?

series <- cangas 

lambda_series <- BoxCox.lambda(series)
plot1 <- autoplot(series)+
  autolayer(meanf(series, h=100),
  series="Mean", PI=FALSE) +
  autolayer(naive(series, h=100),
  series="Naïve", PI=FALSE) +
  autolayer(snaive(series, h=100),
  series="Seasonal naïve", PI=FALSE) +
  autolayer(rwf(series, drift=TRUE, h=100),
  series="Drift", PI=FALSE) +
  ggtitle("Forecasts Before Transformation") +
  xlab("time") + ylab("value") +
  guides(colour=guide_legend(title="Forecast"))


plot2 <- autoplot(BoxCox(series,lambda_series))+
  autolayer(meanf(BoxCox(series,lambda_series), h=100),
  series="Mean", PI=FALSE) +
  autolayer(naive(BoxCox(series,lambda_series), h=100),
  series="Naïve", PI=FALSE) +
  autolayer(snaive(BoxCox(series,lambda_series), h=100),
  series="Seasonal naïve", PI=FALSE) +
  autolayer(rwf(BoxCox(series,lambda_series), drift=TRUE, h=100),
  series="Drift", PI=FALSE) +
  ggtitle("Forecasts After Transformation") +
  xlab("time") + ylab("value") +
  guides(colour=guide_legend(title="Forecast"))


combined <- cbind(series,BoxCox(series,lambda_series))
grid.arrange(plot1, plot2, nrow=2)

autoplot(combined) +labs(x = "Time", y="Value")+scale_color_manual(labels = c("Before Transformation", "After Transformation"), values = c(1, 2))

  lambda_series

## [1] 0.5767759

res1<-residuals(rwf(series,drift=TRUE))
res1plot<-autoplot(res1) + xlab("Time") + ylab("") +
  ggtitle("Residuals from rwf method before tranformation")

res2<-residuals(rwf(BoxCox(series,lambda_series)),drift=TRUE)
res2plot<-autoplot(res2) + xlab("Time") + ylab("") +
  ggtitle("Residuals from rwf method after tranformation")


grid.arrange(res1plot, res2plot, nrow=2)

## Answer The purpose of Box-Cox is stabilize the variance of residuals. Under the fitted random walk with a drift model the variance of residuals (unde BoxCox transformation) appears heteroscedastic in that it is smaller from 60 to end of 70ies and then it is more volatitle from 80ies onwards. So the transformation has not achieved fully the desired outcome.

3.3 What Box-Cox transformation would you select for your retail data (from Exercise 3 in Section 2.10)?

retaildata <- readxl::read_excel("retail.xlsx", skip=1)
myts <- ts(retaildata[,"A3349335T"],
  frequency=12, start=c(1982,4))

series <- myts 

lambda_series <- BoxCox.lambda(series)
plot1 <- autoplot(series)+
  autolayer(meanf(series, h=100),
  series="Mean", PI=FALSE) +
  autolayer(naive(series, h=100),
  series="Naïve", PI=FALSE) +
  autolayer(snaive(series, h=100),
  series="Seasonal naïve", PI=FALSE) +
  autolayer(rwf(series, drift=TRUE, h=100),
  series="Drift", PI=FALSE) +
  ggtitle("Forecasts Before Transformation") +
  xlab("time") + ylab("value") +
  guides(colour=guide_legend(title="Forecast"))


plot2 <- autoplot(BoxCox(series,lambda_series))+
  autolayer(meanf(BoxCox(series,lambda_series), h=100),
  series="Mean", PI=FALSE) +
  autolayer(naive(BoxCox(series,lambda_series), h=100),
  series="Naïve", PI=FALSE) +
  autolayer(snaive(BoxCox(series,lambda_series), h=100),
  series="Seasonal naïve", PI=FALSE) +
  autolayer(rwf(BoxCox(series,lambda_series), drift=TRUE, h=100),
  series="Drift", PI=FALSE) +
  ggtitle("Forecasts After Transformation") +
  xlab("time") + ylab("value") +
  guides(colour=guide_legend(title="Forecast"))


combined <- cbind(series,BoxCox(series,lambda_series))
grid.arrange(plot1, plot2, nrow=2)

autoplot(combined) +labs(x = "Time", y="Value")+scale_color_manual(labels = c("Before Transformation", "After Transformation"), values = c(1, 2))

  lambda_series

## [1] 0.193853

res1<-residuals(snaive(series,drift=TRUE))
res1plot<-autoplot(res1) + xlab("Time") + ylab("") +
  ggtitle("Residuals from snaïve method before tranformation")

res2<-residuals(snaive(BoxCox(series,lambda_series)),drift=TRUE)
res2plot<-autoplot(res2) + xlab("Time") + ylab("") +
  ggtitle("Residuals from snaïve method after tranformation")


grid.arrange(res1plot, res2plot, nrow=2)

## Answer To stabilize the myts series [A3349335T = Turnover ; New South Wales ; Supermarket and grocery stores ;] a lamba of 0.19385 has been chosen by the BoxCox.lambda() method. As can be seen from the plot of residuals under the BoxCox transformation the variance of residuals appears stabilized, more so that under the untransformed time series where errors exhibit a small variance in the beginning of the period and then a larger variance in the latter part of the period. The choice of lambda therefore appears reasonable.

3.8 For your retail time series (from Exercise 3 in Section 2.10):

a. Split the data into two parts using

myts.train <- window(myts, end=c(2010,12))
myts.test <- window(myts, start=2011)

b. Check that your data have been split appropriately by producing the following plot.

autoplot(myts) +
  autolayer(myts.train, series="Training") +
  autolayer(myts.test, series="Test")

c. Calculate forecasts using snaive applied to myts.train.

fc <- snaive(myts.train)

d. Compare the accuracy of your forecasts against the actual values stored in myts.test.

accuracy(fc,myts.test)

##                    ME      RMSE       MAE      MPE     MAPE     MASE      ACF1
## Training set 61.56787  72.20702  61.68438 6.388722 6.404105 1.000000 0.6018274
## Test set     97.44583 109.62545 100.02917 4.629852 4.751209 1.621629 0.2686595
##              Theil's U
## Training set        NA
## Test set     0.9036205

fit1 <- meanf(myts.train,h=40)
fit2 <- rwf(myts.train,h=40)
fit3 <- snaive(myts.train,h=40)
autoplot(window(myts, start=2005)) +
  autolayer(fit1, series="Mean", PI=FALSE) +
  autolayer(fit2, series="Naïve", PI=FALSE) +
  autolayer(fit3, series="Seasonal naïve", PI=FALSE) +
  xlab("Year") + ylab("Megalitres") +
  ggtitle("Forecasts for quarterly beer production") +
  guides(colour=guide_legend(title="Forecast"))

accuracy(fc,myts.test)

##                    ME      RMSE       MAE      MPE     MAPE     MASE      ACF1
## Training set 61.56787  72.20702  61.68438 6.388722 6.404105 1.000000 0.6018274
## Test set     97.44583 109.62545 100.02917 4.629852 4.751209 1.621629 0.2686595
##              Theil's U
## Training set        NA
## Test set     0.9036205

Answer The testing set out-of-sample errors are larger than the training ones. This is as expected. We note that the problem with our seasonally naive forecast is that it is missing a drift.

e. Check the residuals.

checkresiduals(fc)

## 
##  Ljung-Box test
## 
## data:  Residuals from Seasonal naive method
## Q* = 812.76, df = 24, p-value < 2.2e-16
## 
## Model df: 0.   Total lags used: 24

Do the residuals appear to be uncorrelated and normally distributed?

Answer Residuals are not normally distributed, there is a right skew present. There is also an autocorrelation of residuals at various lags.

f. How sensitive are the accuracy measures to the training/test split?

Answer In general the shorter the training horizon the easier it is to fit the data, hence the various error metrics should decrease as we shorten the training period. The reverse will typically happen to the testing set metrics (ou-of-sample) ; the shorter the training period the larger the testing set errors (it follows from the fact that there is more uncertainty in the testing set hence no surprise that on average we may not hit the actauls in the test set with our model which was trained on a relatively short period).

Homework 2 - 624