Data 624 HW 2

Maryluz Cruz

2021-02-21

require(fpp2)
require(kableExtra)

3.1. For the following series, find an appropriate Box-Cox transformation in order to stabilise the variance.

usnetelec

BoxCox.lambda(usnetelec)
## [1] 0.5167714
autoplot(usnetelec,BoxCox.lambda(usnetelec))+ 
  theme_light()+
  ggtitle("usnetelec")

usgdp

BoxCox.lambda(usgdp)
## [1] 0.366352
autoplot(usgdp,BoxCox.lambda(usgdp))+ 
  theme_light()+
  ggtitle("usgdp")

mcopper

BoxCox.lambda(mcopper)
## [1] 0.1919047
autoplot(mcopper,BoxCox.lambda(mcopper))+ 
  theme_light()+
  ggtitle("mcopper")

enplanements

BoxCox.lambda(enplanements)
## [1] -0.2269461
autoplot(enplanements,BoxCox.lambda(enplanements))+ 
  theme_light()+
  ggtitle("enplanements")

2. Why is a Box-Cox transformation unhelpful for the cangas data?

BoxCox.lambda(cangas)
## [1] 0.5767759
autoplot(cangas,BoxCox.lambda(cangas))+ 
  theme_light()+
  ggtitle("cangas")

autoplot(cangas)+
  theme_light()+
  ggtitle("cangas")

Seems to complicated.

3. What Box-Cox transformation would you select for your retail data (from Exercise 3 in Section 2.10)?

Using the same series as Homework 1.

retaildata <- readxl::read_excel("retail.xlsx", skip=1)


myts <- ts(retaildata[,"A3349352V"],
  frequency=12, start=c(1982,4))

BoxCox.lambda(myts)
## [1] 0.1738794
fc <- rwf(myts, drift=TRUE, lambda=0, h=50, level=80)
fc2 <- rwf(myts, drift=TRUE, lambda=0, h=50, level=80,
  biasadj=TRUE)
autoplot(myts) +
  autolayer(fc, series="Simple back transformation") +
  autolayer(fc2, series="Bias adjusted", PI=FALSE) +
  guides(colour=guide_legend(title="Forecast"))+
  theme_light()+
  ggtitle("myts")

3.8. For your retail time series (from Exercise 3 in Section 2.10):

a. Split the data into two parts using

myts.train <- window(myts, end=c(2010,12))
myts.test <- window(myts, start=2011)

b. Check that your data have been split appropriately by producing the following plot.

autoplot(myts) +
  autolayer(myts.train, series="Training") +
  autolayer(myts.test, series="Test")+
  theme_light()+
  ggtitle("myts")

c. Calculate forecasts using snaive applied to myts.train.

fc <- snaive(myts.train)
kable(fc)
Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
Jan 2011 4930.7 4691.376 5170.024 4564.685 5296.715
Feb 2011 4403.5 4164.176 4642.824 4037.485 4769.515
Mar 2011 4918.8 4679.476 5158.124 4552.785 5284.815
Apr 2011 4847.3 4607.976 5086.624 4481.285 5213.315
May 2011 4944.4 4705.076 5183.724 4578.385 5310.415
Jun 2011 4896.5 4657.176 5135.824 4530.485 5262.515
Jul 2011 5095.4 4856.076 5334.724 4729.385 5461.415
Aug 2011 5016.8 4777.476 5256.124 4650.785 5382.815
Sep 2011 5041.3 4801.976 5280.624 4675.285 5407.315
Oct 2011 5262.2 5022.876 5501.524 4896.185 5628.215
Nov 2011 5437.4 5198.076 5676.724 5071.385 5803.415
Dec 2011 6952.0 6712.676 7191.324 6585.985 7318.015
Jan 2012 4930.7 4592.245 5269.155 4413.077 5448.323
Feb 2012 4403.5 4065.045 4741.955 3885.877 4921.123
Mar 2012 4918.8 4580.345 5257.255 4401.177 5436.423
Apr 2012 4847.3 4508.845 5185.755 4329.677 5364.923
May 2012 4944.4 4605.945 5282.855 4426.777 5462.023
Jun 2012 4896.5 4558.045 5234.955 4378.877 5414.123
Jul 2012 5095.4 4756.945 5433.855 4577.777 5613.023
Aug 2012 5016.8 4678.345 5355.255 4499.177 5534.423
Sep 2012 5041.3 4702.845 5379.755 4523.677 5558.923
Oct 2012 5262.2 4923.745 5600.655 4744.577 5779.823
Nov 2012 5437.4 5098.945 5775.855 4919.777 5955.023
Dec 2012 6952.0 6613.545 7290.455 6434.377 7469.623

d.Compare the accuracy of your forecasts against the actual values stored in myts.test.

accuracy(fc,myts.test)
##                    ME     RMSE      MAE      MPE     MAPE     MASE      ACF1
## Training set 149.9174 186.7455 156.1625 5.721577 6.008671 1.000000 0.6541086
## Test set     173.1417 198.5256 173.1417 3.324160 3.324160 1.108728 0.3146049
##              Theil's U
## Training set        NA
## Test set     0.3612413

e. Check the residuals.

checkresiduals(fc)

## 
##  Ljung-Box test
## 
## data:  Residuals from Seasonal naive method
## Q* = 914.85, df = 24, p-value < 2.2e-16
## 
## Model df: 0.   Total lags used: 24

Do the residuals appear to be uncorrelated and normally distributed?

It is very close to being normally distributed.

f. How sensitive are the accuracy measures to the training/test split?

Seems to be very senstive.

myts2 <- window(myts,start=1990,end=c(2010,12))
mytsfit1 <- meanf(myts2,h=10)
mytsfit2 <- rwf(myts2,h=10)
mytsfit3 <- snaive(myts2,h=10)
autoplot(window(myts, start=1990)) +
  autolayer(mytsfit1, series="Mean", PI=FALSE) +
  autolayer(mytsfit2, series="Naïve", PI=FALSE) +
  autolayer(mytsfit3, series="Seasonal naïve", PI=FALSE) +
  xlab("Year") + ylab("Megalitres") +
  ggtitle("Forecasts of Retail") +
  guides(colour=guide_legend(title="Forecast"))

mytsac <- window(myts,start=2011)
accuracy(mytsfit1,mytsac)
##                        ME     RMSE      MAE       MPE     MAPE      MASE
## Training set 1.599955e-14 1163.342 1000.377 -14.69126 36.53152  5.656387
## Test set     1.956317e+03 1967.506 1956.317  38.40195 38.40195 11.061517
##                   ACF1 Theil's U
## Training set 0.8991659        NA
## Test set     0.1613245  6.975627
accuracy(mytsfit2,mytsac)
##                       ME      RMSE       MAE         MPE      MAPE      MASE
## Training set    20.92191  455.5736  275.3578  -0.2679754  8.590276  1.556943
## Test set     -1872.62000 1884.3058 1872.6200 -37.1184761 37.118476 10.588274
##                    ACF1 Theil's U
## Training set -0.2966355        NA
## Test set      0.1613245   6.64803
accuracy(mytsfit3,mytsac)
##                    ME     RMSE      MAE      MPE     MAPE      MASE      ACF1
## Training set 169.9137 208.1161 176.8579 5.191937 5.503981 1.0000000 0.6400111
## Test set     143.6900 156.2141 143.6900 2.826400 2.826400 0.8124601 0.3420138
##              Theil's U
## Training set        NA
## Test set     0.5754598