9.7 Exercise #2

# load the data
huron.series <- get("huron")

# plot data
plot.ts(huron.series)

# a. Fit a piecewise linear trend model to the Lake Huron data with a knot at 1920 and an ARMA error structure
T <- length(huron.series)
x1 <- seq(T)

# create a knot at 1920
x2 <- pmax(0, x1-52)
t <- seq(T+30)

# fit ARMA with errors
fit <- Arima(huron.series, xreg=cbind(x1,x2), order=c(1,0,1))

# b. Forecast the level for the next 30 years
fc <- forecast(fit,
               xreg=cbind(max(x1)+seq(30), max(x2)+seq(30)))
b0 <- coef(fit)["intercept"]
b1 <- coef(fit)["x1"]
b2 <- coef(fit)["x2"]
trend <- ts(b0 + b1*t + b2*pmax(0,t-52),
            start=start(huron.series))

plot(fc, main="Piecewise linear trend with AR(1) errors")
lines(trend, col='red')

9.7 Exercise #6

# a Develop an appropriate dynamic regression model with Fourier terms for the seasonality. Use the AIC to select the number of Fourier terms to include in the model. (You will probably need to use the same Box-Cox transformation you identified previously.)

retaildata <- readxl::read_excel("/Users/namsan/Desktop/Spring\ 2018/PredictiveClass/projects/RawData/retail.xlsx", skip=1)

retail.ts <- ts(retaildata[,"A3349873A"],
           frequency=12, start=c(1982,4))

plot.ts(retail.ts)

for (i in seq(6)) {
  fit <- auto.arima(retail.ts, xreg = fourier(retail.ts, K = i),
                    seasonal = FALSE, lambda = "auto")
  print(fit)
}
## Series: retail.ts 
## Regression with ARIMA(4,1,2) errors 
## Box Cox transformation: lambda= 0.1276369 
## 
## Coefficients:
##          ar1      ar2      ar3      ar4      ma1     ma2   drift  S1-12
##       0.2033  -0.6760  -0.2734  -0.4520  -1.1457  0.8557  0.0094  -0.23
## s.e.  0.0503   0.0491   0.0453   0.0542   0.0335  0.0372  0.0037   0.01
##         C1-12
##       -0.0703
## s.e.   0.0100
## 
## sigma^2 estimated as 0.05198:  log likelihood=25.18
## AIC=-30.37   AICc=-29.77   BIC=9.03
## Series: retail.ts 
## Regression with ARIMA(3,1,2) errors 
## Box Cox transformation: lambda= 0.1276369 
## 
## Coefficients:
##           ar1      ar2     ar3      ma1     ma2   drift    S1-12    C1-12
##       -0.5712  -0.7843  -0.390  -0.5708  0.4752  0.0098  -0.2295  -0.0704
## s.e.   0.0756   0.0734   0.081   0.0858  0.0865  0.0032   0.0087   0.0087
##        S2-12    C2-12
##       0.0796  -0.1536
## s.e.  0.0053   0.0053
## 
## sigma^2 estimated as 0.03723:  log likelihood=89.92
## AIC=-157.83   AICc=-157.12   BIC=-114.49
## Series: retail.ts 
## Regression with ARIMA(2,1,1) errors 
## Box Cox transformation: lambda= 0.1276369 
## 
## Coefficients:
##           ar1      ar2      ma1   drift    S1-12    C1-12   S2-12    C2-12
##       -1.0026  -0.5514  -0.1771  0.0098  -0.2298  -0.0705  0.0798  -0.1544
## s.e.   0.0660   0.0564   0.0778  0.0026   0.0080   0.0080  0.0058   0.0058
##        S3-12   C3-12
##       0.1535  0.0375
## s.e.  0.0075  0.0075
## 
## sigma^2 estimated as 0.02536:  log likelihood=163.21
## AIC=-304.42   AICc=-303.7   BIC=-261.08
## Series: retail.ts 
## Regression with ARIMA(4,1,3) errors 
## Box Cox transformation: lambda= 0.1276369 
## 
## Coefficients:
##           ar1      ar2     ar3     ar4     ma1      ma2      ma3   drift
##       -1.8335  -0.5662  0.9121  0.5870  1.0399  -0.5160  -0.7683  0.0097
## s.e.   0.1292   0.3301  0.3178  0.1143  0.1123   0.2022   0.1054  0.0023
##         S1-12    C1-12   S2-12    C2-12   S3-12   C3-12    S4-12   C4-12
##       -0.2301  -0.0704  0.0800  -0.1543  0.1539  0.0381  -0.0365  0.1225
## s.e.   0.0088   0.0088  0.0054   0.0054  0.0049  0.0049   0.0062  0.0062
## 
## sigma^2 estimated as 0.01287:  log likelihood=293.43
## AIC=-552.86   AICc=-551.17   BIC=-485.87
## Series: retail.ts 
## Regression with ARIMA(2,1,1) errors 
## Box Cox transformation: lambda= 0.1276369 
## 
## Coefficients:
##           ar1     ar2      ma1   drift    S1-12    C1-12   S2-12    C2-12
##       -0.0362  0.2436  -0.6807  0.0095  -0.2298  -0.0711  0.0806  -0.1539
## s.e.   0.1340  0.1024   0.1172  0.0023   0.0088   0.0088  0.0061   0.0061
##        S3-12   C3-12    S4-12   C4-12    S5-12   C5-12
##       0.1538  0.0388  -0.0374  0.1223  -0.1324  0.0104
## s.e.  0.0055  0.0055   0.0059  0.0059   0.0076  0.0076
## 
## sigma^2 estimated as 0.01242:  log likelihood=301.46
## AIC=-572.91   AICc=-571.59   BIC=-513.81
## Series: retail.ts 
## Regression with ARIMA(0,1,1) errors 
## Box Cox transformation: lambda= 0.1276369 
## 
## Coefficients:
##           ma1   drift    S1-12    C1-12   S2-12    C2-12   S3-12   C3-12
##       -0.5428  0.0094  -0.2294  -0.0717  0.0811  -0.1537  0.1535  0.0394
## s.e.   0.0437  0.0024   0.0084   0.0084  0.0063   0.0063  0.0059  0.0059
##         S4-12   C4-12    S5-12   C5-12   C6-12
##       -0.0383  0.1223  -0.1332  0.0093  -0.040
## s.e.   0.0057  0.0057   0.0056  0.0056   0.004
## 
## sigma^2 estimated as 0.01043:  log likelihood=334.16
## AIC=-640.31   AICc=-639.16   BIC=-585.15
# pick K = 1 since AIC of K = 1 is the smallest
fit.final <- auto.arima(retail.ts, xreg = fourier(retail.ts, K = 1),
                  seasonal = FALSE, lambda = "auto")

autoplot(forecast(fit.final,xreg=fourier(retail.ts, K=1, h=24))) 

# b Check the residuals of the fitted model. Does the residual series look like white noise. The residual does not look white noise. One possible reason could be that auto.arima does not handle seasonal data so well. The residuals also demonstrate that there is a lot of information that has not been captured with this model. An ARIMAX model might be a better choice here.o

checkresiduals(fit.final)

## 
##  Ljung-Box test
## 
## data:  Residuals from Regression with ARIMA(4,1,2) errors
## Q* = 693.26, df = 15, p-value < 2.2e-16
## 
## Model df: 9.   Total lags used: 24

10.8 Exercise #4

# Generate 8-step-ahead optimally reconciled coherent forecasts using ARIMA base forecasts for the visnights Australian domestic tourism data. Plot the coherent forecasts by level and comment on their nature. How and why are these different to the bottom-up forecasts generated in question 2 above. 

library(hts)
## Warning: package 'hts' was built under R version 3.4.4
tourism.hts <- hts(visnights, characters = c(3, 5))
tourism.hts %>% aggts(levels=0:1) %>%
  autoplot(facet=TRUE) +
  xlab("Year") + ylab("millions") + ggtitle("Visitor nights")

# total (Level 0) -> States (Level 1) -> Zones (Level 2)
y <- hts(visnights, nodes=list(6,c(5,3,3,4,3,2)))
## Since argument characters are not specified, the default labelling system is used.
# bottom up 
all.bu <- forecast(y, h = 8, method="bu", fmethod="arima")
plot(all.bu)

# optimal reconciliation approach
all.comb <- forecast(y, h = 8,method = 'comb', weights = 'ols')
plot(all.comb)

10.8 Exercise #5

# Using the last two years of the visnights Australian domestic tourism data as a test set, generate bottom-up, top-down and optimally reconciled forecasts for this period and compare their accuracy.

mydata <- get("visnights")
train <- window(mydata, start = 1998, end = 2014, frequency = 4)

test <- window(mydata, 2015, 2017)
## Warning in window.default(x, ...): 'end' value not changed
# total (Level 0) -> States (Level 1) -> Zones (Level 2)

train.ts <- hts(train, nodes=list(6,c(5,3,3,4,3,2)))
## Since argument characters are not specified, the default labelling system is used.
test.ts <- hts(test, nodes=list(6,c(5,3,3,4,3,2)))
## Since argument characters are not specified, the default labelling system is used.
# bottom up 
train.bu <- forecast(train.ts, h = 8, method="bu", fmethod="arima")
plot(train.bu)

# top down
train.tp <- forecast(train.ts, h = 8, method="tdfp", fmethod="arima")
plot(train.tp)

# optimal reconciliation approach
train.comb <- forecast(train.ts, h = 8,method = 'comb', weights = 'ols')
plot(train.comb)

# bottom up 
accuracy(train.bu,test.ts)
##          Total         A         B          C         D         E
## ME   10.575292  2.590049  1.506850  0.5622451  1.725147  2.906611
## RMSE 15.122749  4.975827  3.282014  1.2479498  4.668607  3.022910
## MAE  11.565150  3.841511  2.955868  0.9490020  3.582782  2.906611
## MAPE 13.558905 15.445679 14.284152 16.6142709 19.975566 27.272598
## MPE  12.317001  9.967119  6.238340  7.6468826  8.119082 27.272598
## MASE  3.767175  3.436821  2.220134  2.1158671  4.974806  3.613530
##              F  NSWMetro   NSWNthCo   NSWSthCo   NSWSthIn   NSWNthIn
## ME    1.284389  1.083067  0.6703997  0.1905102  0.2043711  0.4417014
## RMSE  1.437664  1.531740  2.0324227  1.6690271  0.4289942  0.6046244
## MAE   1.284389  1.174161  1.5801076  1.4420035  0.3270330  0.4833198
## MAPE 19.848874 15.458121 20.4989391 46.7438727 11.4831033 13.3610955
## MPE  19.848874 14.146557  6.4639607 -6.2189759  6.0868538 11.8601036
## MASE  2.145307  1.816548  2.9159309  4.1539647  1.0445535  1.3189228
##       QLDMetro   QLDCntrl   QLDNthCo   SAUMetro   SAUCoast   SAUInner
## ME    0.641197  0.4240317  0.4416215  0.1245140  0.1935885  0.2441426
## RMSE  1.782016  1.1771478  1.6023383  0.4436388  0.7199169  0.2920257
## MAE   1.421289  0.9634403  1.2808662  0.3359427  0.6077721  0.2468588
## MAPE 12.935800 17.0265848 28.7675454 14.1079405 31.1298554 19.4966031
## MPE   4.740272  4.8527710  4.7132468  3.1577033  1.5763151 19.2029763
## MASE  1.616719  1.6043382  2.2947421  1.1148292  3.1098524  1.6364299
##       VICMetro     VICWstCo     VICEstCo   VICInner   WAUMetro  WAUCoast
## ME    1.121307   0.01482662   0.01639215  0.5726215  0.5664610  1.876351
## RMSE  2.170442   1.03417904   1.03540202  0.7729115  0.6675319  1.994472
## MAE   1.624018   0.87047335   0.90372770  0.6296526  0.5664610  1.876351
## MAPE 18.700391  54.87056663  47.24972270 12.8076114 15.8532788 30.750397
## MPE  12.293580 -15.88427984 -13.43744407 11.5634775 15.8532788 30.750397
## MASE  3.154563   5.00423517   3.83204530  1.7813799  1.6855392  3.573545
##        WAUInner   OTHMetro   OTHNoMet
## ME    0.4637996  0.6642852  0.6201040
## RMSE  0.4900579  0.7603927  0.7466206
## MAE   0.4637996  0.6883408  0.6258181
## MAPE 38.7187516 16.4032195 26.9987070
## MPE  38.7187516 15.6693748 26.7151657
## MASE  2.5071687  1.5197790  2.6654125
# optimal reconciliation approach
accuracy(train.comb,test.ts)
##          Total         A         B          C         D         E
## ME    9.701451  2.063945  1.603324  0.5929097  1.800260  2.386963
## RMSE 14.665349  4.602371  3.704319  1.3070200  4.528286  2.579094
## MAE  10.591610  3.629240  3.380935  1.0147740  3.428043  2.386963
## MAPE 12.278745 14.616859 16.506565 17.5851163 19.036605 22.194045
## MPE  11.161926  7.751091  6.613047  8.2024863  8.718648 22.194045
## MASE  3.450059  3.246912  2.539400  2.2625105  4.759946  2.967498
##              F   NSWMetro   NSWNthCo   NSWSthCo   NSWSthIn   NSWNthIn
## ME    1.254049  0.4909420  0.6816277  0.1793677  0.1929793  0.5190289
## RMSE  1.505904  1.1583261  2.0183887  1.6922862  0.5100798  0.6731167
## MAE   1.262854  0.8824261  1.5755094  1.4764172  0.4270545  0.5442529
## MAPE 19.248980 11.7048841 20.3773301 48.1378625 15.4498790 15.0805515
## MPE  19.079489  5.8232667  6.9002162 -6.2152208  5.3230317 14.1527954
## MASE  2.109337  1.3652039  2.9074453  4.2530999  1.3640251  1.4852020
##       QLDMetro   QLDCntrl   QLDNthCo    SAUMetro   SAUCoast   SAUInner
## ME    0.544459  0.7044089  0.3544561  0.07566693  0.2104071  0.3068356
## RMSE  1.805666  1.3699794  1.5434516  0.40774261  0.7982733  0.3423936
## MAE   1.524869  1.0305485  1.2833885  0.30107896  0.6711526  0.3068356
## MAPE 14.063243 17.2786250 29.8273035 12.62715791 34.1031918 24.7314978
## MPE   3.746562 10.7466741  2.5474532  1.07652649  1.8741343 24.7314978
## MASE  1.734541  1.7160879  2.2992609  0.99913352  3.4341581  2.0340166
##       VICMetro     VICWstCo     VICEstCo   VICInner   WAUMetro  WAUCoast
## ME    1.283625  -0.05665811   0.04853233  0.5247611  0.4746394  1.437546
## RMSE  2.133912   1.03657907   1.00930673  0.7436025  0.6751107  1.609180
## MAE   1.546383   0.84368438   0.88536838  0.6030051  0.5169433  1.437546
## MAPE 17.810168  52.32358175  46.46782216 12.2724379 14.3176146 23.311410
## MPE  14.452792 -20.59898815 -10.87238596 10.5655455 12.8377603 23.311410
## MASE  3.003761   4.85022898   3.75419691  1.7059902  1.5381962  2.737834
##        WAUInner   OTHMetro   OTHNoMet
## ME    0.4747775  0.6882053  0.5658433
## RMSE  0.5073884  0.8908851  0.6878917
## MAE   0.4747775  0.7775388  0.5660742
## MAPE 39.3318442 18.5393303 24.1550625
## MPE  39.3318442 15.8141057 24.1436079
## MASE  2.5665119  1.7167182  2.4109579
# top down
accuracy(train.tp,test.ts)
##          Total         A         B          C         D         E
## ME    9.854425  1.903499  1.498164  0.7126613  1.686887  2.833662
## RMSE 15.051319  4.880564  2.924736  1.3929145  4.904064  2.937564
## MAE  11.757084  3.966255  2.597171  1.1061746  3.996712  2.833662
## MAPE 13.823649 16.075380 12.503248 19.3328585 22.836435 26.588250
## MPE  11.466362  6.912609  6.411087 10.5481197  7.852979 26.588250
## MASE  3.829695  3.548424  1.950720  2.4662945  5.549561  3.522838
##              F   NSWMetro   NSWNthCo     NSWSthCo  NSWSthIn   NSWNthIn
## ME    1.219552  0.8805781  0.4564707   0.09558996 0.1228477  0.3480123
## RMSE  1.418115  1.4478165  2.0489980   1.71028141 0.3697613  0.5424301
## MAE   1.219552  1.0572843  1.6884498   1.49651906 0.2787171  0.4575121
## MAPE 18.719752 13.8584460 22.3508212  49.53334709 9.9012836 12.8938310
## MPE  18.719752 11.2545468  3.2252979 -10.17028950 3.2064802  9.0051161
## MASE  2.037009  1.6357274  3.1158656   4.31100709 0.8902311  1.2484967
##       QLDMetro   QLDCntrl   QLDNthCo   SAUMetro   SAUCoast   SAUInner
## ME    0.612344  0.4219726  0.4638469  0.1947866  0.2414439  0.2764308
## RMSE  1.744208  1.1174653  1.5085861  0.4732272  0.7717573  0.3283500
## MAE   1.231498  0.9268238  1.1836217  0.3188545  0.6486514  0.2881358
## MAPE 11.065400 16.4803742 26.2615644 12.8662936 32.6469703 23.0545428
## MPE   4.710139  4.9671220  5.4673857  6.4975003  4.3372018 21.7892346
## MASE  1.400831  1.5433638  2.1205232  1.0581219  3.3190242  1.9100556
##       VICMetro      VICWstCo      VICEstCo   VICInner   WAUMetro  WAUCoast
## ME    1.106021  8.575432e-04   0.006561725  0.5734471  0.5377757  1.839399
## RMSE  2.267004  1.065124e+00   1.071161193  0.8203929  0.6415844  1.946618
## MAE   1.752287  9.112290e-01   0.935880465  0.6705123  0.5377757  1.839399
## MAPE 20.416219  5.842472e+01  49.178201009 13.7190832 14.9963233 30.148054
## MPE  12.102720 -1.705129e+01 -14.260164843 11.6016050 14.9963233 30.148054
## MASE  3.403719  5.238534e+00   3.968381544  1.8969780  1.6001845  3.503171
##        WAUInner   OTHMetro   OTHNoMet
## ME    0.4564868  0.6224612  0.5970904
## RMSE  0.4843473  0.7458343  0.7424759
## MAE   0.4564868  0.6727917  0.6233279
## MAPE 38.0549549 16.0657022 26.8924084
## MPE  38.0549549 14.5303105 25.5904622
## MASE  2.4676375  1.4854484  2.6548064
# From those accuracy plots, we find that the only level for which the top-down approach is better than bottom up is the top level. As we move down the hierarchy, the optimal combination approach and the bottom-up approach clearly outperform the top-down method.The good performance of the bottom-up approach can be attributed to the fact that the data have strong seasonality and trends, even at the bottom level.