Data 624 Homework 2

Homework Assignment 2

knitr::opts_chunk$set(echo = TRUE)
library(fpp2)

## Warning: package 'fpp2' was built under R version 3.4.4

## Loading required package: ggplot2

## Warning: package 'ggplot2' was built under R version 3.4.4

## Loading required package: forecast

## Warning: package 'forecast' was built under R version 3.4.4

## Loading required package: fma

## Loading required package: expsmooth

3.1

?usnetelec
?usgdp
?mcopper
?enplanements

usnetelec_lambda <- BoxCox.lambda(usnetelec)
usnetelec_lambda

## [1] 0.5167714

autoplot(BoxCox(usnetelec, usnetelec_lambda))

usgdp_lambda <- BoxCox.lambda(usgdp)
usgdp_lambda

## [1] 0.366352

autoplot(BoxCox(usgdp, usgdp_lambda))

mcopper_lambda <- BoxCox.lambda(mcopper)
mcopper_lambda

## [1] 0.1919047

autoplot(BoxCox(mcopper, mcopper_lambda))

enplanements_lambda <- BoxCox.lambda(enplanements)
enplanements_lambda

## [1] -0.2269461

autoplot(BoxCox(enplanements, enplanements_lambda))

3.2

?cangas

cangas_lambda <- BoxCox.lambda(cangas)
cangas_lambda

## [1] 0.5767759

autoplot(BoxCox(cangas, cangas_lambda))

autoplot(cangas)

Using a Box-Cox transformation on the `cangas` data set does not yield a data set that follows approximately a normal distribution. The transformation does not seem to have an effect on the original data. As the text notes, Box-Cox transformations depend on the parameter lambda, and a good value of lambda is one which makes the size of the seasonal variation about the same across the whole series. We do not see that in this example.

3.3

retaildata <- readxl::read_excel('retail.xlsx', skip = 1)

## readxl works best with a newer version of the tibble package.
## You currently have tibble v1.4.2.
## Falling back to column name repair from tibble <= v1.4.2.
## Message displays once per session.

myts <- ts(retaildata[,'A3349399C'], frequency = 12, start = c(1982,4))

myts_lambda <- BoxCox.lambda(myts)
myts_lambda

## [1] 0.02074707

autoplot(BoxCox(myts, myts_lambda))

autoplot(myts)

fc <- rwf(myts, drift =TRUE, lambda = 0, h=50, level=80)
fc2 <- rwf(myts, drift =TRUE, lambda = 0, h=50, level=80, biasadj = TRUE)
autoplot(myts) +
  autolayer(fc, series= 'simple back transformation') +
  autolayer(fc2, series= 'bias adjusted', PI=FALSE) +
  guides(color =guide_legend(title = 'Forecast'))

In this instance, I would say that a simple back transformation of the data is good for this data set.

3.8

a.

myts.train <- window(myts, end=c(2010, 12))
myts.test <- window(myts, start=2011)

b.

autoplot(myts) +
  autolayer(myts.train, series = 'training') +
  autolayer(myts.test, series = 'test')

####c.

fc <-snaive(myts.train)
fc

##          Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## Jan 2011          317.2 290.1101 344.2899 275.7696 358.6304
## Feb 2011          244.7 217.6101 271.7899 203.2696 286.1304
## Mar 2011          301.4 274.3101 328.4899 259.9696 342.8304
## Apr 2011          323.9 296.8101 350.9899 282.4696 365.3304
## May 2011          368.1 341.0101 395.1899 326.6696 409.5304
## Jun 2011          342.6 315.5101 369.6899 301.1696 384.0304
## Jul 2011          331.5 304.4101 358.5899 290.0696 372.9304
## Aug 2011          324.5 297.4101 351.5899 283.0696 365.9304
## Sep 2011          348.0 320.9101 375.0899 306.5696 389.4304
## Oct 2011          332.0 304.9101 359.0899 290.5696 373.4304
## Nov 2011          358.2 331.1101 385.2899 316.7696 399.6304
## Dec 2011          539.8 512.7101 566.8899 498.3696 581.2304
## Jan 2012          317.2 278.8891 355.5109 258.6086 375.7914
## Feb 2012          244.7 206.3891 283.0109 186.1086 303.2914
## Mar 2012          301.4 263.0891 339.7109 242.8086 359.9914
## Apr 2012          323.9 285.5891 362.2109 265.3086 382.4914
## May 2012          368.1 329.7891 406.4109 309.5086 426.6914
## Jun 2012          342.6 304.2891 380.9109 284.0086 401.1914
## Jul 2012          331.5 293.1891 369.8109 272.9086 390.0914
## Aug 2012          324.5 286.1891 362.8109 265.9086 383.0914
## Sep 2012          348.0 309.6891 386.3109 289.4086 406.5914
## Oct 2012          332.0 293.6891 370.3109 273.4086 390.5914
## Nov 2012          358.2 319.8891 396.5109 299.6086 416.7914
## Dec 2012          539.8 501.4891 578.1109 481.2086 598.3914

d.

accuracy(fc, myts.test)

##                     ME     RMSE      MAE      MPE     MAPE     MASE
## Training set  9.007207 21.13832 16.58859 4.224080 7.494415 1.000000
## Test set     10.362500 21.50499 18.99583 2.771495 5.493632 1.145115
##                   ACF1 Theil's U
## Training set 0.5277855        NA
## Test set     0.7420700 0.3223094

e.

checkresiduals(fc)

## 
##  Ljung-Box test
## 
## data:  Residuals from Seasonal naive method
## Q* = 342, df = 24, p-value < 2.2e-16
## 
## Model df: 0.   Total lags used: 24

A large value of Q* (342) suggest that the autocorrelations.

On the histogram, the left tail seems a little too long for the data to be normally distributer On the ACF chart, the lack of correlation suggests that the forecast is good. There is no apparent pattern to the points outside of the bands and ####f.

myts2<- window(myts, start= 2000, end=c(2010, 12))
mytsfc1 <- meanf(myts2, h = 40)
mytsfc2 <- rwf(myts2, h=40)
mytsfc3 <- rwf(myts2, drift =TRUE, h=40)
autoplot(subset(myts, end=c(2010, 12))) + 
  autolayer(mytsfc1, PI=FALSE, series = 'Mean')+
  autolayer(mytsfc2, PI=FALSE, series = 'Naïve')+
  autolayer(mytsfc3, PI=FALSE, series = 'Drift')+
  guides(colours=guide_legend(title = 'Forecasts'))

## Warning in start:end: numerical expression has 2 elements: only the first
## used

myts3 <- window(myts, start=2011)
accuracy(mytsfc1, myts3)

##                        ME      RMSE      MAE      MPE     MAPE     MASE
## Training set 3.917952e-15  71.26729 53.75619 -5.13179 18.37203 2.461834
## Test set     8.143611e+01 115.61242 83.98519 18.65010 19.58882 3.846209
##                   ACF1 Theil's U
## Training set 0.4839071        NA
## Test set     0.3362795  1.500614

accuracy(mytsfc2, myts3)

##                       ME      RMSE       MAE        MPE     MAPE     MASE
## Training set    2.496183  69.08934  46.53282  -1.412102 15.33853 2.131030
## Test set     -163.422222 182.86939 173.07778 -48.885973 50.39801 7.926319
##                    ACF1 Theil's U
## Training set -0.2140901        NA
## Test set      0.3362795  2.760429

accuracy(mytsfc3, myts3)

##                         ME      RMSE       MAE        MPE     MAPE
## Training set -1.391577e-12  69.04423  46.05035  -2.299703 15.23363
## Test set     -2.096016e+02 221.11969 211.64813 -60.821874 61.12893
##                  MASE       ACF1 Theil's U
## Training set 2.108935 -0.2140901        NA
## Test set     9.692698  0.1776701  3.297976

Looking at the results, the mean method is the best, regardless of which accuracy measurement we take into consideration.

Data 624 Homework 2

Natalie Mollaghan

2019-02-17

Homework Assignment 2

3.1

3.2

3.3

3.8

a.

b.

d.

e.