9.7 Exercise #2
# load the data
huron.series <- get("huron")
# plot data
plot.ts(huron.series)

# a. Fit a piecewise linear trend model to the Lake Huron data with a knot at 1920 and an ARMA error structure
T <- length(huron.series)
x1 <- seq(T)
# create a knot at 1920
x2 <- pmax(0, x1-52)
t <- seq(T+30)
# fit ARMA with errors
fit <- Arima(huron.series, xreg=cbind(x1,x2), order=c(1,0,1))
# b. Forecast the level for the next 30 years
fc <- forecast(fit,
xreg=cbind(max(x1)+seq(30), max(x2)+seq(30)))
b0 <- coef(fit)["intercept"]
b1 <- coef(fit)["x1"]
b2 <- coef(fit)["x2"]
trend <- ts(b0 + b1*t + b2*pmax(0,t-52),
start=start(huron.series))
plot(fc, main="Piecewise linear trend with AR(1) errors")
lines(trend, col='red')

9.7 Exercise #6
# a Develop an appropriate dynamic regression model with Fourier terms for the seasonality. Use the AIC to select the number of Fourier terms to include in the model. (You will probably need to use the same Box-Cox transformation you identified previously.)
retaildata <- readxl::read_excel("/Users/namsan/Desktop/Spring\ 2018/PredictiveClass/projects/RawData/retail.xlsx", skip=1)
retail.ts <- ts(retaildata[,"A3349873A"],
frequency=12, start=c(1982,4))
plot.ts(retail.ts)

for (i in seq(6)) {
fit <- auto.arima(retail.ts, xreg = fourier(retail.ts, K = i),
seasonal = FALSE, lambda = "auto")
print(fit)
}
## Series: retail.ts
## Regression with ARIMA(4,1,2) errors
## Box Cox transformation: lambda= 0.1276369
##
## Coefficients:
## ar1 ar2 ar3 ar4 ma1 ma2 drift S1-12
## 0.2033 -0.6760 -0.2734 -0.4520 -1.1457 0.8557 0.0094 -0.23
## s.e. 0.0503 0.0491 0.0453 0.0542 0.0335 0.0372 0.0037 0.01
## C1-12
## -0.0703
## s.e. 0.0100
##
## sigma^2 estimated as 0.05198: log likelihood=25.18
## AIC=-30.37 AICc=-29.77 BIC=9.03
## Series: retail.ts
## Regression with ARIMA(3,1,2) errors
## Box Cox transformation: lambda= 0.1276369
##
## Coefficients:
## ar1 ar2 ar3 ma1 ma2 drift S1-12 C1-12
## -0.5712 -0.7843 -0.390 -0.5708 0.4752 0.0098 -0.2295 -0.0704
## s.e. 0.0756 0.0734 0.081 0.0858 0.0865 0.0032 0.0087 0.0087
## S2-12 C2-12
## 0.0796 -0.1536
## s.e. 0.0053 0.0053
##
## sigma^2 estimated as 0.03723: log likelihood=89.92
## AIC=-157.83 AICc=-157.12 BIC=-114.49
## Series: retail.ts
## Regression with ARIMA(2,1,1) errors
## Box Cox transformation: lambda= 0.1276369
##
## Coefficients:
## ar1 ar2 ma1 drift S1-12 C1-12 S2-12 C2-12
## -1.0026 -0.5514 -0.1771 0.0098 -0.2298 -0.0705 0.0798 -0.1544
## s.e. 0.0660 0.0564 0.0778 0.0026 0.0080 0.0080 0.0058 0.0058
## S3-12 C3-12
## 0.1535 0.0375
## s.e. 0.0075 0.0075
##
## sigma^2 estimated as 0.02536: log likelihood=163.21
## AIC=-304.42 AICc=-303.7 BIC=-261.08
## Series: retail.ts
## Regression with ARIMA(4,1,3) errors
## Box Cox transformation: lambda= 0.1276369
##
## Coefficients:
## ar1 ar2 ar3 ar4 ma1 ma2 ma3 drift
## -1.8335 -0.5662 0.9121 0.5870 1.0399 -0.5160 -0.7683 0.0097
## s.e. 0.1292 0.3301 0.3178 0.1143 0.1123 0.2022 0.1054 0.0023
## S1-12 C1-12 S2-12 C2-12 S3-12 C3-12 S4-12 C4-12
## -0.2301 -0.0704 0.0800 -0.1543 0.1539 0.0381 -0.0365 0.1225
## s.e. 0.0088 0.0088 0.0054 0.0054 0.0049 0.0049 0.0062 0.0062
##
## sigma^2 estimated as 0.01287: log likelihood=293.43
## AIC=-552.86 AICc=-551.17 BIC=-485.87
## Series: retail.ts
## Regression with ARIMA(2,1,1) errors
## Box Cox transformation: lambda= 0.1276369
##
## Coefficients:
## ar1 ar2 ma1 drift S1-12 C1-12 S2-12 C2-12
## -0.0362 0.2436 -0.6807 0.0095 -0.2298 -0.0711 0.0806 -0.1539
## s.e. 0.1340 0.1024 0.1172 0.0023 0.0088 0.0088 0.0061 0.0061
## S3-12 C3-12 S4-12 C4-12 S5-12 C5-12
## 0.1538 0.0388 -0.0374 0.1223 -0.1324 0.0104
## s.e. 0.0055 0.0055 0.0059 0.0059 0.0076 0.0076
##
## sigma^2 estimated as 0.01242: log likelihood=301.46
## AIC=-572.91 AICc=-571.59 BIC=-513.81
## Series: retail.ts
## Regression with ARIMA(0,1,1) errors
## Box Cox transformation: lambda= 0.1276369
##
## Coefficients:
## ma1 drift S1-12 C1-12 S2-12 C2-12 S3-12 C3-12
## -0.5428 0.0094 -0.2294 -0.0717 0.0811 -0.1537 0.1535 0.0394
## s.e. 0.0437 0.0024 0.0084 0.0084 0.0063 0.0063 0.0059 0.0059
## S4-12 C4-12 S5-12 C5-12 C6-12
## -0.0383 0.1223 -0.1332 0.0093 -0.040
## s.e. 0.0057 0.0057 0.0056 0.0056 0.004
##
## sigma^2 estimated as 0.01043: log likelihood=334.16
## AIC=-640.31 AICc=-639.16 BIC=-585.15
# pick K = 1 since AIC of K = 1 is the smallest
fit.final <- auto.arima(retail.ts, xreg = fourier(retail.ts, K = 1),
seasonal = FALSE, lambda = "auto")
autoplot(forecast(fit.final,xreg=fourier(retail.ts, K=1, h=24)))

# b Check the residuals of the fitted model. Does the residual series look like white noise. The residual does not look white noise. One possible reason could be that auto.arima does not handle seasonal data so well. The residuals also demonstrate that there is a lot of information that has not been captured with this model. An ARIMAX model might be a better choice here.o
checkresiduals(fit.final)

##
## Ljung-Box test
##
## data: Residuals from Regression with ARIMA(4,1,2) errors
## Q* = 693.26, df = 15, p-value < 2.2e-16
##
## Model df: 9. Total lags used: 24
10.8 Exercise #4
# Generate 8-step-ahead optimally reconciled coherent forecasts using ARIMA base forecasts for the visnights Australian domestic tourism data. Plot the coherent forecasts by level and comment on their nature. How and why are these different to the bottom-up forecasts generated in question 2 above.
library(hts)
## Warning: package 'hts' was built under R version 3.4.4
tourism.hts <- hts(visnights, characters = c(3, 5))
tourism.hts %>% aggts(levels=0:1) %>%
autoplot(facet=TRUE) +
xlab("Year") + ylab("millions") + ggtitle("Visitor nights")

# total (Level 0) -> States (Level 1) -> Zones (Level 2)
y <- hts(visnights, nodes=list(6,c(5,3,3,4,3,2)))
## Since argument characters are not specified, the default labelling system is used.
# bottom up
all.bu <- forecast(y, h = 8, method="bu", fmethod="arima")
plot(all.bu)

# optimal reconciliation approach
all.comb <- forecast(y, h = 8,method = 'comb', weights = 'ols')
plot(all.comb)

10.8 Exercise #5
# Using the last two years of the visnights Australian domestic tourism data as a test set, generate bottom-up, top-down and optimally reconciled forecasts for this period and compare their accuracy.
mydata <- get("visnights")
train <- window(mydata, start = 1998, end = 2014, frequency = 4)
test <- window(mydata, 2015, 2017)
## Warning in window.default(x, ...): 'end' value not changed
# total (Level 0) -> States (Level 1) -> Zones (Level 2)
train.ts <- hts(train, nodes=list(6,c(5,3,3,4,3,2)))
## Since argument characters are not specified, the default labelling system is used.
test.ts <- hts(test, nodes=list(6,c(5,3,3,4,3,2)))
## Since argument characters are not specified, the default labelling system is used.
# bottom up
train.bu <- forecast(train.ts, h = 8, method="bu", fmethod="arima")
plot(train.bu)

# top down
train.tp <- forecast(train.ts, h = 8, method="tdfp", fmethod="arima")
plot(train.tp)

# optimal reconciliation approach
train.comb <- forecast(train.ts, h = 8,method = 'comb', weights = 'ols')
plot(train.comb)

# bottom up
accuracy(train.bu,test.ts)
## Total A B C D E
## ME 10.575292 2.590049 1.506850 0.5622451 1.725147 2.906611
## RMSE 15.122749 4.975827 3.282014 1.2479498 4.668607 3.022910
## MAE 11.565150 3.841511 2.955868 0.9490020 3.582782 2.906611
## MAPE 13.558905 15.445679 14.284152 16.6142709 19.975566 27.272598
## MPE 12.317001 9.967119 6.238340 7.6468826 8.119082 27.272598
## MASE 3.767175 3.436821 2.220134 2.1158671 4.974806 3.613530
## F NSWMetro NSWNthCo NSWSthCo NSWSthIn NSWNthIn
## ME 1.284389 1.083067 0.6703997 0.1905102 0.2043711 0.4417014
## RMSE 1.437664 1.531740 2.0324227 1.6690271 0.4289942 0.6046244
## MAE 1.284389 1.174161 1.5801076 1.4420035 0.3270330 0.4833198
## MAPE 19.848874 15.458121 20.4989391 46.7438727 11.4831033 13.3610955
## MPE 19.848874 14.146557 6.4639607 -6.2189759 6.0868538 11.8601036
## MASE 2.145307 1.816548 2.9159309 4.1539647 1.0445535 1.3189228
## QLDMetro QLDCntrl QLDNthCo SAUMetro SAUCoast SAUInner
## ME 0.641197 0.4240317 0.4416215 0.1245140 0.1935885 0.2441426
## RMSE 1.782016 1.1771478 1.6023383 0.4436388 0.7199169 0.2920257
## MAE 1.421289 0.9634403 1.2808662 0.3359427 0.6077721 0.2468588
## MAPE 12.935800 17.0265848 28.7675454 14.1079405 31.1298554 19.4966031
## MPE 4.740272 4.8527710 4.7132468 3.1577033 1.5763151 19.2029763
## MASE 1.616719 1.6043382 2.2947421 1.1148292 3.1098524 1.6364299
## VICMetro VICWstCo VICEstCo VICInner WAUMetro WAUCoast
## ME 1.121307 0.01482662 0.01639215 0.5726215 0.5664610 1.876351
## RMSE 2.170442 1.03417904 1.03540202 0.7729115 0.6675319 1.994472
## MAE 1.624018 0.87047335 0.90372770 0.6296526 0.5664610 1.876351
## MAPE 18.700391 54.87056663 47.24972270 12.8076114 15.8532788 30.750397
## MPE 12.293580 -15.88427984 -13.43744407 11.5634775 15.8532788 30.750397
## MASE 3.154563 5.00423517 3.83204530 1.7813799 1.6855392 3.573545
## WAUInner OTHMetro OTHNoMet
## ME 0.4637996 0.6642852 0.6201040
## RMSE 0.4900579 0.7603927 0.7466206
## MAE 0.4637996 0.6883408 0.6258181
## MAPE 38.7187516 16.4032195 26.9987070
## MPE 38.7187516 15.6693748 26.7151657
## MASE 2.5071687 1.5197790 2.6654125
# optimal reconciliation approach
accuracy(train.comb,test.ts)
## Total A B C D E
## ME 9.701451 2.063945 1.603324 0.5929097 1.800260 2.386963
## RMSE 14.665349 4.602371 3.704319 1.3070200 4.528286 2.579094
## MAE 10.591610 3.629240 3.380935 1.0147740 3.428043 2.386963
## MAPE 12.278745 14.616859 16.506565 17.5851163 19.036605 22.194045
## MPE 11.161926 7.751091 6.613047 8.2024863 8.718648 22.194045
## MASE 3.450059 3.246912 2.539400 2.2625105 4.759946 2.967498
## F NSWMetro NSWNthCo NSWSthCo NSWSthIn NSWNthIn
## ME 1.254049 0.4909420 0.6816277 0.1793677 0.1929793 0.5190289
## RMSE 1.505904 1.1583261 2.0183887 1.6922862 0.5100798 0.6731167
## MAE 1.262854 0.8824261 1.5755094 1.4764172 0.4270545 0.5442529
## MAPE 19.248980 11.7048841 20.3773301 48.1378625 15.4498790 15.0805515
## MPE 19.079489 5.8232667 6.9002162 -6.2152208 5.3230317 14.1527954
## MASE 2.109337 1.3652039 2.9074453 4.2530999 1.3640251 1.4852020
## QLDMetro QLDCntrl QLDNthCo SAUMetro SAUCoast SAUInner
## ME 0.544459 0.7044089 0.3544561 0.07566693 0.2104071 0.3068356
## RMSE 1.805666 1.3699794 1.5434516 0.40774261 0.7982733 0.3423936
## MAE 1.524869 1.0305485 1.2833885 0.30107896 0.6711526 0.3068356
## MAPE 14.063243 17.2786250 29.8273035 12.62715791 34.1031918 24.7314978
## MPE 3.746562 10.7466741 2.5474532 1.07652649 1.8741343 24.7314978
## MASE 1.734541 1.7160879 2.2992609 0.99913352 3.4341581 2.0340166
## VICMetro VICWstCo VICEstCo VICInner WAUMetro WAUCoast
## ME 1.283625 -0.05665811 0.04853233 0.5247611 0.4746394 1.437546
## RMSE 2.133912 1.03657907 1.00930673 0.7436025 0.6751107 1.609180
## MAE 1.546383 0.84368438 0.88536838 0.6030051 0.5169433 1.437546
## MAPE 17.810168 52.32358175 46.46782216 12.2724379 14.3176146 23.311410
## MPE 14.452792 -20.59898815 -10.87238596 10.5655455 12.8377603 23.311410
## MASE 3.003761 4.85022898 3.75419691 1.7059902 1.5381962 2.737834
## WAUInner OTHMetro OTHNoMet
## ME 0.4747775 0.6882053 0.5658433
## RMSE 0.5073884 0.8908851 0.6878917
## MAE 0.4747775 0.7775388 0.5660742
## MAPE 39.3318442 18.5393303 24.1550625
## MPE 39.3318442 15.8141057 24.1436079
## MASE 2.5665119 1.7167182 2.4109579
# top down
accuracy(train.tp,test.ts)
## Total A B C D E
## ME 9.854425 1.903499 1.498164 0.7126613 1.686887 2.833662
## RMSE 15.051319 4.880564 2.924736 1.3929145 4.904064 2.937564
## MAE 11.757084 3.966255 2.597171 1.1061746 3.996712 2.833662
## MAPE 13.823649 16.075380 12.503248 19.3328585 22.836435 26.588250
## MPE 11.466362 6.912609 6.411087 10.5481197 7.852979 26.588250
## MASE 3.829695 3.548424 1.950720 2.4662945 5.549561 3.522838
## F NSWMetro NSWNthCo NSWSthCo NSWSthIn NSWNthIn
## ME 1.219552 0.8805781 0.4564707 0.09558996 0.1228477 0.3480123
## RMSE 1.418115 1.4478165 2.0489980 1.71028141 0.3697613 0.5424301
## MAE 1.219552 1.0572843 1.6884498 1.49651906 0.2787171 0.4575121
## MAPE 18.719752 13.8584460 22.3508212 49.53334709 9.9012836 12.8938310
## MPE 18.719752 11.2545468 3.2252979 -10.17028950 3.2064802 9.0051161
## MASE 2.037009 1.6357274 3.1158656 4.31100709 0.8902311 1.2484967
## QLDMetro QLDCntrl QLDNthCo SAUMetro SAUCoast SAUInner
## ME 0.612344 0.4219726 0.4638469 0.1947866 0.2414439 0.2764308
## RMSE 1.744208 1.1174653 1.5085861 0.4732272 0.7717573 0.3283500
## MAE 1.231498 0.9268238 1.1836217 0.3188545 0.6486514 0.2881358
## MAPE 11.065400 16.4803742 26.2615644 12.8662936 32.6469703 23.0545428
## MPE 4.710139 4.9671220 5.4673857 6.4975003 4.3372018 21.7892346
## MASE 1.400831 1.5433638 2.1205232 1.0581219 3.3190242 1.9100556
## VICMetro VICWstCo VICEstCo VICInner WAUMetro WAUCoast
## ME 1.106021 8.575432e-04 0.006561725 0.5734471 0.5377757 1.839399
## RMSE 2.267004 1.065124e+00 1.071161193 0.8203929 0.6415844 1.946618
## MAE 1.752287 9.112290e-01 0.935880465 0.6705123 0.5377757 1.839399
## MAPE 20.416219 5.842472e+01 49.178201009 13.7190832 14.9963233 30.148054
## MPE 12.102720 -1.705129e+01 -14.260164843 11.6016050 14.9963233 30.148054
## MASE 3.403719 5.238534e+00 3.968381544 1.8969780 1.6001845 3.503171
## WAUInner OTHMetro OTHNoMet
## ME 0.4564868 0.6224612 0.5970904
## RMSE 0.4843473 0.7458343 0.7424759
## MAE 0.4564868 0.6727917 0.6233279
## MAPE 38.0549549 16.0657022 26.8924084
## MPE 38.0549549 14.5303105 25.5904622
## MASE 2.4676375 1.4854484 2.6548064
# From those accuracy plots, we find that the only level for which the top-down approach is better than bottom up is the top level. As we move down the hierarchy, the optimal combination approach and the bottom-up approach clearly outperform the top-down method.The good performance of the bottom-up approach can be attributed to the fact that the data have strong seasonality and trends, even at the bottom level.