Hyndman Chapter 7 & Chapter 8 exercises

##Chapter 7 - Exponential Smoothing

#Packages used

#library(ggplot2)
library(fpp2)

## Loading required package: ggplot2

## Loading required package: forecast

## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo

## Loading required package: fma

## Loading required package: expsmooth

#library(forecast)
library(dplyr)

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(psych)

## 
## Attaching package: 'psych'

## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha

#library(GGally)
library(seasonal)

## 
## Attaching package: 'seasonal'

## The following object is masked from 'package:psych':
## 
##     outlier

library(apricom)
library(readr)

#Question 1.A

str(pigs)

##  Time-Series [1:188] from 1980 to 1996: 76378 71947 33873 96428 105084 ...

pigsSES = ses(pigs)
pigsSES

##          Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## Sep 1995       98816.41 85605.43 112027.4 78611.97 119020.8
## Oct 1995       98816.41 85034.52 112598.3 77738.83 119894.0
## Nov 1995       98816.41 84486.34 113146.5 76900.46 120732.4
## Dec 1995       98816.41 83958.37 113674.4 76092.99 121539.8
## Jan 1996       98816.41 83448.52 114184.3 75313.25 122319.6
## Feb 1996       98816.41 82955.06 114677.8 74558.56 123074.2
## Mar 1996       98816.41 82476.49 115156.3 73826.66 123806.2
## Apr 1996       98816.41 82011.54 115621.3 73115.58 124517.2
## May 1996       98816.41 81559.12 116073.7 72423.66 125209.2
## Jun 1996       98816.41 81118.26 116514.6 71749.42 125883.4

#The code is working, but for some reason it does not work once I knit it.
# Here are the results

#        Point Forecast Lo 80   Hi 80     Lo 95     Hi 95
#Sep 1995   98816.41    85605.43    112027.4    78611.97    119020.8
#Oct 1995   98816.41    85034.52    112598.3    77738.83    119894.0
#Nov 1995   98816.41    84486.34    113146.5    76900.46    120732.4
#Dec 1995   98816.41    83958.37    113674.4    76092.99    121539.8
#Jan 1996   98816.41    83448.52    114184.3    75313.25    122319.6
#Feb 1996   98816.41    82955.06    114677.8    74558.56    123074.2
#Mar 1996   98816.41    82476.49    115156.3    73826.66    123806.2
#Apr 1996   98816.41    82011.54    115621.3    73115.58    124517.2
#May 1996   98816.41    81559.12    116073.7    72423.66    125209.2
#Jun 1996   98816.41    81118.26    116514.6    71749.42    125883.4

pigsSES2 = ses(pigs, h=4) # h=4 because 4 quarters per year
autoplot(pigsSES2)

#Question 1.B

pigsSES$lower[1,"95%"]

##      95% 
## 78611.97

pigsSES$upper[1,"95%"]

##      95% 
## 119020.8

res = sd(pigsSES$residuals)
res

## [1] 10273.69

pigsSES$mean[1] - 1.96*res

## [1] 78679.97

pigsSES$mean[1] + 1.96*res

## [1] 118952.8

#Question 2

pigs.f1 <- ses(pigs, h=4, alpha=0.1, initial="simple")
pigs.f1

##          Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## Sep 1995       98389.03 84025.49 112752.6 76421.90 120356.2
## Oct 1995       98389.03 83953.85 112824.2 76312.34 120465.7
## Nov 1995       98389.03 83882.57 112895.5 76203.31 120574.7
## Dec 1995       98389.03 83811.63 112966.4 76094.83 120683.2

forecast(pigs.f1, h=4)

##          Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## Sep 1995       98389.03 84025.49 112752.6 76421.90 120356.2
## Oct 1995       98389.03 83953.85 112824.2 76312.34 120465.7
## Nov 1995       98389.03 83882.57 112895.5 76203.31 120574.7
## Dec 1995       98389.03 83811.63 112966.4 76094.83 120683.2

# When we use the forecast function, the results are lower than when we do not use it.

#Question 3

# create ses function manually to test forecasted value
ses_manual = function(y, alpha, l0){
  y_hat = l0
  for(index in 1:length(y)){
   y_hat = alpha*y[index] + (1 - alpha)*y_hat 
  }
  cat("Forecast result by ses_optim function: ",
      as.character(y_hat),
      sep = "\n")
}

# Let's compare the forecasting results from built-in SES function in R to manual function using optim
a = pigs.f1$model$par[1]
l0 = pigs.f1$model$par[2]

# They are the same:
ses_manual(pigs, alpha = a, l0 = l0)

## Forecast result by ses_optim function: 
## 98389.0265200346

#We do get the same results as the SES function

#Question 4

ses_manual(pigs, alpha = a, l0 = l0)

## Forecast result by ses_optim function: 
## 98389.0265200346

#Question 5.A Data set books contains the daily sales of paperback and hardcover books at the same store. The task is to forecast the next four days’ sales for paperback and hardcover books

autoplot(books, ylab="Book sales", xlab="Number of Days", main="Book Sales over Time")

#5.A
# The paperback and hardcover book sales show seasonal patterns and act as opposite to one another; when paperback sales increase, hardcover sales decrease and vice versa.

#Question 5.B

books = data.frame(books)
hard.ts = ts(books$Hardcover)
paper.ts = ts(books$Paperback)

fore.hard = ses(hard.ts, h=12)
fore.paper = ses(paper.ts, h=12) #h=12 for 12 days prediction

summary(fore.hard)

## 
## Forecast method: Simple exponential smoothing
## 
## Model Information:
## Simple exponential smoothing 
## 
## Call:
##  ses(y = hard.ts, h = 12) 
## 
##   Smoothing parameters:
##     alpha = 0.3283 
## 
##   Initial states:
##     l = 149.2861 
## 
##   sigma:  33.0517
## 
##      AIC     AICc      BIC 
## 315.8506 316.7737 320.0542 
## 
## Error measures:
##                    ME     RMSE      MAE      MPE     MAPE      MASE       ACF1
## Training set 9.166735 31.93101 26.77319 2.636189 13.39487 0.7987887 -0.1417763
## 
## Forecasts:
##    Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 31       239.5601 197.2026 281.9176 174.7799 304.3403
## 32       239.5601 194.9788 284.1414 171.3788 307.7414
## 33       239.5601 192.8607 286.2595 168.1396 310.9806
## 34       239.5601 190.8347 288.2855 165.0410 314.0792
## 35       239.5601 188.8895 290.2306 162.0662 317.0540
## 36       239.5601 187.0164 292.1038 159.2014 319.9188
## 37       239.5601 185.2077 293.9124 156.4353 322.6848
## 38       239.5601 183.4574 295.6628 153.7584 325.3618
## 39       239.5601 181.7600 297.3602 151.1625 327.9577
## 40       239.5601 180.1111 299.0091 148.6406 330.4795
## 41       239.5601 178.5066 300.6135 146.1869 332.9333
## 42       239.5601 176.9433 302.1769 143.7960 335.3242

autoplot(fore.hard)

summary(fore.paper)

## 
## Forecast method: Simple exponential smoothing
## 
## Model Information:
## Simple exponential smoothing 
## 
## Call:
##  ses(y = paper.ts, h = 12) 
## 
##   Smoothing parameters:
##     alpha = 0.1685 
## 
##   Initial states:
##     l = 170.8271 
## 
##   sigma:  34.8183
## 
##      AIC     AICc      BIC 
## 318.9747 319.8978 323.1783 
## 
## Error measures:
##                    ME     RMSE     MAE       MPE     MAPE      MASE       ACF1
## Training set 7.175981 33.63769 27.8431 0.4736071 15.57784 0.7021303 -0.2117522
## 
## Forecasts:
##    Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 31       207.1097 162.4882 251.7311 138.8670 275.3523
## 32       207.1097 161.8589 252.3604 137.9046 276.3147
## 33       207.1097 161.2382 252.9811 136.9554 277.2639
## 34       207.1097 160.6259 253.5935 136.0188 278.2005
## 35       207.1097 160.0215 254.1979 135.0945 279.1249
## 36       207.1097 159.4247 254.7946 134.1818 280.0375
## 37       207.1097 158.8353 255.3840 133.2804 280.9389
## 38       207.1097 158.2531 255.9663 132.3899 281.8294
## 39       207.1097 157.6777 256.5417 131.5099 282.7094
## 40       207.1097 157.1089 257.1105 130.6400 283.5793
## 41       207.1097 156.5465 257.6729 129.7799 284.4394
## 42       207.1097 155.9903 258.2291 128.9293 285.2900

autoplot(fore.paper)

#Question 5.C

round(forecast::accuracy(fore.hard),3)[3]

## [1] 26.773

round(forecast::accuracy(fore.paper),3)[3]

## [1] 27.843

#Question 6.A

h1 = holt(hard.ts, h=12)
h1

##    Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 31       250.1739 212.7390 287.6087 192.9222 307.4256
## 32       253.4765 216.0416 290.9113 196.2248 310.7282
## 33       256.7791 219.3442 294.2140 199.5274 314.0308
## 34       260.0817 222.6468 297.5166 202.8300 317.3334
## 35       263.3843 225.9494 300.8192 206.1326 320.6360
## 36       266.6869 229.2520 304.1218 209.4351 323.9386
## 37       269.9895 232.5546 307.4244 212.7377 327.2412
## 38       273.2921 235.8572 310.7270 216.0403 330.5438
## 39       276.5947 239.1597 314.0296 219.3429 333.8465
## 40       279.8973 242.4623 317.3322 222.6455 337.1491
## 41       283.1999 245.7649 320.6348 225.9480 340.4517
## 42       286.5025 249.0675 323.9375 229.2506 343.7544

h2 = holt(paper.ts, h=12)
h2

##    Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 31       209.4668 166.6035 252.3301 143.9130 275.0205
## 32       210.7177 167.8544 253.5811 145.1640 276.2715
## 33       211.9687 169.1054 254.8320 146.4149 277.5225
## 34       213.2197 170.3564 256.0830 147.6659 278.7735
## 35       214.4707 171.6073 257.3340 148.9169 280.0245
## 36       215.7216 172.8583 258.5850 150.1678 281.2754
## 37       216.9726 174.1093 259.8360 151.4188 282.5264
## 38       218.2236 175.3602 261.0869 152.6697 283.7774
## 39       219.4746 176.6112 262.3379 153.9207 285.0284
## 40       220.7255 177.8621 263.5889 155.1716 286.2794
## 41       221.9765 179.1131 264.8399 156.4226 287.5305
## 42       223.2275 180.3640 266.0909 157.6735 288.7815

#Question 6.B

# Hardcover
round(forecast::accuracy(h1),3) #3 pour 3 chiffres après la virgule

##                  ME   RMSE    MAE    MPE   MAPE  MASE   ACF1
## Training set -0.136 27.194 23.156 -2.115 12.163 0.691 -0.032

# Paperback
round(forecast::accuracy(h2),3)

##                  ME   RMSE    MAE    MPE   MAPE MASE   ACF1
## Training set -3.717 31.137 26.181 -5.509 15.584 0.66 -0.175

#The RMSE of the Hardcover data set is a little lower than for the Paperback data set,

#Question 6.C

# # It appears that the Holt's method performed best overall because both models are more accurate with that method.

#Question 6.D

#Lower Hardcover
h1$mean[1] - round(forecast::accuracy(h1),2)*1.96

##                    ME     RMSE      MAE      MPE     MAPE     MASE     ACF1
## Training set 250.4483 196.8815 204.7803 254.3095 226.3403 248.8215 250.2327

#Upper Hardcover
h1$mean[1] + round(forecast::accuracy(h1),2)*1.96

##                    ME     RMSE      MAE      MPE     MAPE     MASE     ACF1
## Training set 249.8995 303.4663 295.5675 246.0383 274.0075 251.5263 250.1151

#Lower Paperback
h1$mean[1] - round(forecast::accuracy(h2),2)*1.96

##                    ME     RMSE      MAE      MPE     MAPE     MASE     ACF1
## Training set 257.4651 189.1395 198.8611 260.9735 219.6371 248.8803 250.5267

#Upper Paperback
h1$mean[1] + round(forecast::accuracy(h1),2)*1.96

##                    ME     RMSE      MAE      MPE     MAPE     MASE     ACF1
## Training set 249.8995 303.4663 295.5675 246.0383 274.0075 251.5263 250.1151

#The intervals of this forecast appear to be higher than the interval using holt and ses.

#Question 7

# Holt Method
holt(eggs, h=100)

##      Point Forecast       Lo 80     Hi 80       Lo 95    Hi 95
## 1994    59.78553313   24.970286  94.60078    6.540207 113.0309
## 1995    57.06372643   12.206005 101.92145  -11.540238 125.6677
## 1996    54.34191973    1.308668 107.37517  -26.765440 135.4493
## 1997    51.62011302   -8.488401 111.72863  -40.307926 143.5482
## 1998    48.89830632  -17.537664 115.33428  -52.706742 150.5034
## 1999    46.17649962  -26.035964 118.38896  -64.262933 156.6159
## 2000    43.45469292  -34.106500 121.01589  -75.164916 162.0743
## 2001    40.73288622  -41.832449 123.29822  -85.539898 167.0057
## 2002    38.01107951  -49.273098 125.29526  -95.478551 171.5007
## 2003    35.28927281  -56.472472 127.05102 -105.048206 175.6268
## 2004    32.56746611  -63.464327 128.59926 -114.300487 179.4354
## 2005    29.84565941  -70.275216 129.96654 -123.276007 182.9673
## 2006    27.12385271  -76.926479 131.17418 -132.007398 186.2551
## 2007    24.40204600  -83.435566 132.23966 -140.521350 189.3254
## 2008    21.68023930  -89.816966 133.17744 -148.840022 192.2005
## 2009    18.95843260  -96.082866 133.99973 -156.982051 194.8989
## 2010    16.23662590 -102.243631 134.71688 -164.963291 197.4365
## 2011    13.51481920 -108.308165 135.33780 -172.797358 199.8270
## 2012    10.79301249 -114.284184 135.87021 -180.496053 202.0821
## 2013     8.07120579 -120.178426 136.32084 -188.069681 204.2121
## 2014     5.34939909 -125.996817 136.69562 -195.527304 206.2261
## 2015     2.62759239 -131.744601 136.99979 -202.876944 208.1321
## 2016    -0.09421431 -137.426446 137.23802 -210.125737 209.9373
## 2017    -2.81602102 -143.046526 137.41448 -217.280071 211.6480
## 2018    -5.53782772 -148.608596 137.53294 -224.345686 213.2700
## 2019    -8.25963442 -154.116046 137.59678 -231.327766 214.8085
## 2020   -10.98144112 -159.571946 137.60906 -238.231008 216.2681
## 2021   -13.70324782 -164.979092 137.57260 -245.059687 217.6532
## 2022   -16.42505453 -170.340036 137.48993 -251.817706 218.9676
## 2023   -19.14686123 -175.657116 137.36339 -258.508640 220.2149
## 2024   -21.86866793 -180.932478 137.19514 -265.135773 221.3984
## 2025   -24.59047463 -186.168101 136.98715 -271.702130 222.5212
## 2026   -27.31228133 -191.365811 136.74125 -278.210504 223.5859
## 2027   -30.03408804 -196.527300 136.45912 -284.663482 224.5953
## 2028   -32.75589474 -201.654137 136.14235 -291.063466 225.5517
## 2029   -35.47770144 -206.747783 135.79238 -297.412688 226.4573
## 2030   -38.19950814 -211.809598 135.41058 -303.713228 227.3142
## 2031   -40.92131484 -216.840851 134.99822 -309.967029 228.1244
## 2032   -43.64312155 -221.842733 134.55649 -316.175908 228.8897
## 2033   -46.36492825 -226.816355 134.08650 -322.341569 229.6117
## 2034   -49.08673495 -231.762762 133.58929 -328.465610 230.2921
## 2035   -51.80854165 -236.682939 133.06586 -334.549533 230.9324
## 2036   -54.53034835 -241.577809 132.51711 -340.594753 231.5341
## 2037   -57.25215506 -246.448244 131.94393 -346.602603 232.0983
## 2038   -59.97396176 -251.295068 131.34714 -352.574344 232.6264
## 2039   -62.69576846 -256.119059 130.72752 -358.511164 233.1196
## 2040   -65.41757516 -260.920954 130.08580 -364.414190 233.5790
## 2041   -68.13938186 -265.701450 129.42269 -370.284491 234.0057
## 2042   -70.86118857 -270.461210 128.73883 -376.123079 234.4007
## 2043   -73.58299527 -275.200863 128.03487 -381.930915 234.7649
## 2044   -76.30480197 -279.921006 127.31140 -387.708914 235.0993
## 2045   -79.02660867 -284.622209 126.56899 -393.457946 235.4047
## 2046   -81.74841537 -289.305013 125.80818 -399.178839 235.6820
## 2047   -84.47022208 -293.969936 125.02949 -404.872385 235.9319
## 2048   -87.19202878 -298.617469 124.23341 -410.539337 236.1553
## 2049   -89.91383548 -303.248085 123.42041 -416.180415 236.3527
## 2050   -92.63564218 -307.862233 122.59095 -421.796308 236.5250
## 2051   -95.35744888 -312.460345 121.74545 -427.387675 236.6728
## 2052   -98.07925559 -317.042831 120.88432 -432.955146 236.7966
## 2053  -100.80106229 -321.610088 120.00796 -438.499326 236.8972
## 2054  -103.52286899 -326.162494 119.11676 -444.020793 236.9751
## 2055  -106.24467569 -330.700413 118.21106 -449.520103 237.0308
## 2056  -108.96648239 -335.224193 117.29123 -454.997790 237.0648
## 2057  -111.68828910 -339.734170 116.35759 -460.454367 237.0778
## 2058  -114.41009580 -344.230666 115.41047 -465.890327 237.0701
## 2059  -117.13190250 -348.713991 114.45019 -471.306143 237.0423
## 2060  -119.85370920 -353.184443 113.47702 -476.702272 236.9949
## 2061  -122.57551590 -357.642310 112.49128 -482.079153 236.9281
## 2062  -125.29732261 -362.087868 111.49322 -487.437211 236.8426
## 2063  -128.01912931 -366.521384 110.48313 -492.776852 236.7386
## 2064  -130.74093601 -370.943117 109.46124 -498.098471 236.6166
## 2065  -133.46274271 -375.353314 108.42783 -503.402447 236.4770
## 2066  -136.18454941 -379.752215 107.38312 -508.689149 236.3200
## 2067  -138.90635612 -384.140052 106.32734 -513.958929 236.1462
## 2068  -141.62816282 -388.517050 105.26072 -519.212132 235.9558
## 2069  -144.34996952 -392.883424 104.18349 -524.449088 235.7491
## 2070  -147.07177622 -397.239385 103.09583 -529.670117 235.5266
## 2071  -149.79358292 -401.585134 101.99797 -534.875530 235.2884
## 2072  -152.51538963 -405.920870 100.89009 -540.065628 235.0348
## 2073  -155.23719633 -410.246781  99.77239 -545.240700 234.7663
## 2074  -157.95900303 -414.563051  98.64505 -550.401029 234.4830
## 2075  -160.68080973 -418.869861  97.50824 -555.546889 234.1853
## 2076  -163.40261643 -423.167382  96.36215 -560.678543 233.8733
## 2077  -166.12442314 -427.455784  95.20694 -565.796249 233.5474
## 2078  -168.84622984 -431.735228  94.04277 -570.900257 233.2078
## 2079  -171.56803654 -436.005874  92.86980 -575.990809 232.8547
## 2080  -174.28984324 -440.267874  91.68819 -581.068139 232.4885
## 2081  -177.01164994 -444.521379  90.49808 -586.132476 232.1092
## 2082  -179.73345665 -448.766534  89.29962 -591.184043 231.7171
## 2083  -182.45526335 -453.003480  88.09295 -596.223054 231.3125
## 2084  -185.17707005 -457.232353  86.87821 -601.249720 230.8956
## 2085  -187.89887675 -461.453288  85.65553 -606.264245 230.4665
## 2086  -190.62068345 -465.666414  84.42505 -611.266828 230.0255
## 2087  -193.34249016 -469.871857  83.18688 -616.257661 229.5727
## 2088  -196.06429686 -474.069741  81.94115 -621.236934 229.1083
## 2089  -198.78610356 -478.260186  80.68798 -626.204828 228.6326
## 2090  -201.50791026 -482.443308  79.42749 -631.161524 228.1457
## 2091  -204.22971696 -486.619220  78.15979 -636.107193 227.6478
## 2092  -206.95152367 -490.788035  76.88499 -641.042007 227.1390
## 2093  -209.67333037 -494.949859  75.60320 -645.966131 226.6195

lambda = BoxCox.lambda(eggs)

holt1 = holt(eggs, h=100)
holt2 = holt(eggs, h=100, lambda=lambda)
holt3 = holt(eggs, h=100, lambda=lambda, damped=TRUE)

autoplot(holt(eggs,h=100)) + autolayer(holt1, series="holts") + autolayer(holt2, series="boxcox") + autolayer(holt3, series="boxcox_damped") + ylab("Paperback") + xlab("Year")

acc1 = accuracy(holt1)
acc1

##                      ME     RMSE      MAE       MPE     MAPE      MASE
## Training set 0.04499087 26.58219 19.18491 -1.142201 9.653791 0.9463626
##                    ACF1
## Training set 0.01348202

acc2=accuracy(holt2)
acc2

##                     ME     RMSE      MAE       MPE     MAPE      MASE
## Training set 0.7736844 26.39376 18.96387 -1.072416 9.620095 0.9354593
##                    ACF1
## Training set 0.03887152

acc3=accuracy(holt3)
acc3

##                      ME     RMSE      MAE       MPE     MAPE      MASE
## Training set -0.8200445 26.53321 19.45654 -2.019718 9.976131 0.9597618
##                     ACF1
## Training set 0.005852382

#The BoxCox has the best accuracy

#Question 8.A

#Download the monthly Australian retail data. These represent retail sales in various categories for different Australian states.

retaildata <- readxl::read_excel("C:\\Users\\student\\Desktop\\Predictive Analytics - Forecasting\\HW 3 - Hyndman Ch 7 & 8\\retail.xlsx",skip=1)

retailTS <- ts(retaildata[,20], frequency=12, start=c(1982,4))
autoplot(retailTS)

#Multiplicative seasonality is necessary for this series because of the increasing trend in the graph.

#Question 8.B

# Damped = FALSE
holtFit1 <- hw(retailTS, seasonal="multiplicative", damped=FALSE, h=30)
autoplot(retailTS) + autolayer(holtFit1) + ylab("Retail Sales") + xlab("Year")

# Damped = TRUE
holtFit2 <- hw(retailTS, seasonal="multiplicative", damped=TRUE, h=30)
autoplot(retailTS) + autolayer(holtFit2) + ylab("Retail Sales") + xlab("Year")

#Question 8.C

holtFit1$mean[1]

## [1] 392.1741

holtFit2$mean[1]

## [1] 394.1059

round(forecast::accuracy(holtFit1),2)

##                 ME RMSE  MAE   MPE MAPE MASE ACF1
## Training set -0.47  9.4 6.91 -0.49 3.42 0.34 0.07

round(forecast::accuracy(holtFit2),2)

##                ME RMSE MAE  MPE MAPE MASE ACF1
## Training set 0.74 9.37 6.8 0.25 3.27 0.34 0.02

#When we compare both results, the "damped=TRUE" forecast show better result than the "damped=FALSE" forecast. the damped-true forcast has a lower MAPE and RMSE.

#Question 8.D

plot(holtFit2$residuals, ylab="Residuals", xlab="Year", main="Damped Model Resdiuals")

checkresiduals(holtFit2)

## 
##  Ljung-Box test
## 
## data:  Residuals from Damped Holt-Winters' multiplicative method
## Q* = 23.257, df = 7, p-value = 0.001537
## 
## Model df: 17.   Total lags used: 24

#Question 8.E

testRetail <- window(retailTS, start=2011)

retailSN <- snaive(testRetail, h=36)
round(forecast::accuracy(retailSN),2)

##                ME  RMSE   MAE  MPE MAPE MASE ACF1
## Training set 8.03 24.88 20.28 1.98 5.52    1 0.57

#Question 9

lambdaTS <- BoxCox.lambda(retailTS)
bc1 <- BoxCox(retailTS, lambdaTS)
bc1

##           Jan      Feb      Mar      Apr      May      Jun      Jul      Aug
## 1982                            4.171794 4.165599 4.122013 4.140355 4.122013
## 1983 4.246614 4.168702 4.154121 4.111036 4.126364 4.111036 4.164562 4.169734
## 1984 4.288901 4.220778 4.261562 4.207068 4.244726 4.191123 4.205091 4.220778
## 1985 4.308405 4.188100 4.252250 4.258779 4.277161 4.239040 4.280791 4.311904
## 1986 4.402869 4.290692 4.355293 4.387039 4.415323 4.361903 4.363547 4.400513
## 1987 4.458994 4.394990 4.402869 4.410674 4.410674 4.396572 4.483584 4.471378
## 1988 4.553985 4.477144 4.535421 4.499822 4.515066 4.494209 4.508863 4.503310
## 1989 4.565707 4.461193 4.504702 4.474266 4.494209 4.562467 4.563116 4.584889
## 1990 4.767675 4.655198 4.704320 4.703223 4.704320 4.691054 4.679271 4.696053
## 1991 4.681528 4.577906 4.638757 4.643487 4.628016 4.617743 4.634596 4.659258
## 1992 4.754865 4.682092 4.757443 4.726509 4.729708 4.663875 4.636382 4.634000
## 1993 4.632209 4.528014 4.481443 4.546079 4.492800 4.538098 4.576630 4.605496
## 1994 4.805454 4.646430 4.774259 4.647018 4.655198 4.654616 4.689939 4.693835
## 1995 4.776275 4.669039 4.756928 4.756928 4.751762 4.717380 4.803507 4.817038
## 1996 5.036951 4.952566 4.998147 4.975867 4.957441 4.921857 4.917619 4.937354
## 1997 5.011158 4.877582 4.970701 4.939840 4.954601 4.903046 4.939840 4.948073
## 1998 4.972692 4.871767 4.901748 4.782293 4.755897 4.766657 4.793214 4.722226
## 1999 4.787274 4.660415 4.743952 4.707604 4.722762 4.703772 4.673033 4.716300
## 2000 4.697713 4.618958 4.692168 4.723835 4.703772 4.736068 4.875797 4.873560
## 2001 4.968306 4.883358 4.984930 4.844387 4.808849 4.835526 4.860012 4.855447
## 2002 5.027698 4.829881 4.941080 4.933194 4.901748 4.888214 4.949710 4.922703
## 2003 5.014946 4.828463 4.918469 5.020972 5.032153 4.989619 5.046102 5.058746
## 2004 5.153496 5.031041 5.075427 5.157005 5.147720 5.118223 5.180235 5.131791
## 2005 5.136374 5.043550 5.097615 5.109839 5.097615 5.104768 5.204934 5.142877
## 2006 5.234730 5.109839 5.190692 5.177444 5.170270 5.156687 5.243950 5.225992
## 2007 5.195872 5.085208 5.183941 5.174021 5.174332 5.185788 5.266558 5.266558
## 2008 5.237911 5.209428 5.256462 5.248807 5.264605 5.232118 5.273222 5.263486
## 2009 5.353505 5.210622 5.305751 5.316576 5.331136 5.285571 5.378206 5.383059
## 2010 5.524641 5.376501 5.427775 5.487176 5.476025 5.484616 5.561627 5.522196
## 2011 5.526064 5.381606 5.432571 5.410180 5.369153 5.364960 5.351497 5.344935
## 2012 5.426629 5.292083 5.366936 5.429148 5.402185 5.409946 5.465595 5.436433
## 2013 5.415778 5.306813 5.435072 5.430519 5.416242 5.411116 5.413683 5.455030
##           Sep      Oct      Nov      Dec
## 1982 4.137147 4.191123 4.192129 4.285306
## 1983 4.160401 4.115444 4.145676 4.213951
## 1984 4.210024 4.280791 4.290692 4.349467
## 1985 4.311904 4.366008 4.416095 4.480012
## 1986 4.379817 4.426072 4.457525 4.575353
## 1987 4.489267 4.498422 4.504702 4.574073
## 1988 4.481443 4.510246 4.513004 4.578543
## 1989 4.581087 4.624404 4.641717 4.728110
## 1990 4.570865 4.646430 4.647018 4.742381
## 1991 4.624404 4.674171 4.678706 4.760013
## 1992 4.647605 4.677008 4.667894 4.698266
## 1993 4.642898 4.694390 4.698818 4.737651
## 1994 4.701574 4.654616 4.650533 4.743429
## 1995 4.815599 4.921012 4.934444 5.006971
## 1996 4.900448 4.967906 4.944378 5.000074
## 1997 4.957036 4.975867 4.952566 5.012296
## 1998 4.715759 4.781793 4.751762 4.819909
## 1999 4.738704 4.750725 4.744475 4.741857
## 2000 4.927755 4.993506 4.894790 4.986887
## 2001 4.857733 4.929432 4.869069 4.987668
## 2002 4.912932 4.986104 4.927335 5.029186
## 2003 5.041722 5.116219 5.070847 5.174021
## 2004 5.152856 5.178686 5.115549 5.198904
## 2005 5.158913 5.255332 5.236756 5.319718
## 2006 5.211815 5.178065 5.179306 5.243377
## 2007 5.235310 5.268229 5.253635 5.309463
## 2008 5.212708 5.295588 5.249091 5.385715
## 2009 5.397920 5.458125 5.430291 5.552045
## 2010 5.542150 5.549287 5.504874 5.622297
## 2011 5.374793 5.373080 5.336529 5.502996
## 2012 5.442079 5.433254 5.444999 5.550273
## 2013 5.418098 5.465595 5.486324 5.562793

lambdaTS <- BoxCox.lambda(testRetail)
bc1 <- BoxCox(testRetail, lambdaTS)
bc1

##           Jan      Feb      Mar      Apr      May      Jun      Jul      Aug
## 2011 4285.012 3340.600 3646.792 3508.894 3269.857 3246.380 3172.157 3136.611
## 2012 3609.668 2864.726 3257.422 3625.360 3460.951 3507.481 3860.340 3671.129
## 2013 3542.860 2937.995 3662.534 3633.928 3545.695 3514.548 3530.111 3790.669
##           Sep      Oct      Nov      Dec
## 2011 3301.703 3292.000 3091.674 4117.653
## 2012 3707.010 3651.083 3725.711 4468.125
## 2013 3557.042 3860.340 4000.857 4565.935

retailSTL <- stl(bc1[,1], s.window="periodic")

forecast1 <- forecast(retailSTL, h=36)

round(forecast::accuracy(forecast1),2)

##                 ME   RMSE    MAE   MPE MAPE MASE  ACF1
## Training set -6.82 161.18 134.58 -0.31 3.76 0.47 -0.01

# This method does not perform as well as the previous test performed on the created test set.

#Question 10.A For this exercise use data set ukcars, the quarterly UK passenger vehicle production data from 1977Q1–2005Q1.

autoplot(ukcars) + ylab("Vehicle Production") + xlab("Year")

#Question 10.B

ukcarsSTL <- stl(ukcars, s.window="periodic")
ukcarsSTL

##  Call:
##  stl(x = ukcars, s.window = "periodic")
## 
## Components
##          seasonal    trend     remainder
## 1977 Q1  25.66677 318.5295 -13.825249626
## 1977 Q2  21.01432 324.6948  25.341911916
## 1977 Q3 -45.12476 330.9708 -15.176049732
## 1977 Q4  -1.55632 334.7004  10.735966254
## 1978 Q1  25.66677 334.0139  -1.189641048
## 1978 Q2  21.01432 320.7959  21.011758342
## 1978 Q3 -45.12476 300.9207   5.485038711
## 1978 Q4  -1.55632 288.6423 -46.730931160
## 1979 Q1  25.66677 276.1832  23.532046833
## 1979 Q2  21.01432 267.2862  28.399513279
## 1979 Q3 -45.12476 261.5016 -45.223827211
## 1979 Q4  -1.55632 252.2466   6.526756134
## 1980 Q1  25.66677 247.9784  24.481824797
## 1980 Q2  21.01432 239.4564  -9.006725034
## 1980 Q3 -45.12476 223.2180   3.461793086
## 1980 Q4  -1.55632 215.7849 -21.630623541
## 1981 Q1  25.66677 220.2357  -0.250430089
## 1981 Q2  21.01432 233.6016  -9.089932014
## 1981 Q3 -45.12476 241.8626  28.523188447
## 1981 Q4  -1.55632 240.7936  -1.026285436
## 1982 Q1  25.66677 230.3050   1.413217582
## 1982 Q2  21.01432 222.3072 -14.860538991
## 1982 Q3 -45.12476 222.2015  -1.705761122
## 1982 Q4  -1.55632 231.0511  -3.032794821
## 1983 Q1  25.66677 244.9445  -4.461265566
## 1983 Q2  21.01432 257.0965   9.140190429
## 1983 Q3 -45.12476 263.2765   7.731214593
## 1983 Q4  -1.55632 257.4187   9.450666648
## 1984 Q1  25.66677 245.2947   1.797509364
## 1984 Q2  21.01432 233.9046 -20.784913580
## 1984 Q3 -45.12476 228.4033  13.183489842
## 1984 Q4  -1.55632 237.1828 -30.075524618
## 1985 Q1  25.66677 247.7529  17.863310026
## 1985 Q2  21.01432 258.0678   5.339865349
## 1985 Q3 -45.12476 258.1793   8.516496453
## 1985 Q4  -1.55632 250.2516   2.001681939
## 1986 Q1  25.66677 246.4844 -18.394218794
## 1986 Q2  21.01432 250.3747  -4.372980363
## 1986 Q3 -45.12476 259.5206   5.992168272
## 1986 Q4  -1.55632 267.0536  12.303689570
## 1987 Q1  25.66677 274.7015 -17.135282062
## 1987 Q2  21.01432 282.7800  -1.722305893
## 1987 Q3 -45.12476 290.1326  14.712201563
## 1987 Q4  -1.55632 294.2213   4.992997392
## 1988 Q1  25.66677 295.1384 -14.676141546
## 1988 Q2  21.01432 300.0450   1.046637055
## 1988 Q3 -45.12476 313.0266 -11.178866207
## 1988 Q4  -1.55632 324.8864  18.546947724
## 1989 Q1  25.66677 331.9778  -1.640598597
## 1989 Q2  21.01432 329.0151  11.510606843
## 1989 Q3 -45.12476 321.2409  -5.683114815
## 1989 Q4  -1.55632 312.5455   0.115820802
## 1990 Q1  25.66677 308.5850  -7.563781787
## 1990 Q2  21.01432 315.1534  -9.108686479
## 1990 Q3 -45.12476 326.9998  -7.618043743
## 1990 Q4  -1.55632 333.4080  35.754296494
## 1991 Q1  25.66677 332.0422 -11.545969032
## 1991 Q2  21.01432 318.1429   9.053818020
## 1991 Q3 -45.12476 307.8729 -12.740133598
## 1991 Q4  -1.55632 306.5401 -12.465738791
## 1992 Q1  25.66677 310.8608   6.790443375
## 1992 Q2  21.01432 318.7590   3.655635693
## 1992 Q3 -45.12476 325.3152  -4.804421022
## 1992 Q4  -1.55632 332.1973  -0.893989077
## 1993 Q1  25.66677 340.5537  -1.699468477
## 1993 Q2  21.01432 344.6912  12.742515376
## 1993 Q3 -45.12476 344.0280   1.894724342
## 1993 Q4  -1.55632 343.3536 -10.040311206
## 1994 Q1  25.66677 348.0348 -11.165536675
## 1994 Q2  21.01432 359.2368   8.881872441
## 1994 Q3 -45.12476 374.3526  -5.905812714
## 1994 Q4  -1.55632 385.3718   8.016551486
## 1995 Q1  25.66677 388.6002   7.378976268
## 1995 Q2  21.01432 384.3830  11.425715058
## 1995 Q3 -45.12476 381.6207 -24.782895640
## 1995 Q4  -1.55632 383.7478  -0.289469197
## 1996 Q1  25.66677 394.5880   2.727254896
## 1996 Q2  21.01432 411.2334  -4.525738213
## 1996 Q3 -45.12476 424.2350  -2.260256000
## 1996 Q4  -1.55632 428.3269  31.809443631
## 1997 Q1  25.66677 427.3670 -16.808791145
## 1997 Q2  21.01432 423.5993  -3.126575508
## 1997 Q3 -45.12476 427.6065 -12.915722425
## 1997 Q4  -1.55632 434.9783  17.301020647
## 1998 Q1  25.66677 442.4171  -5.641897664
## 1998 Q2  21.01432 442.2694   4.948265854
## 1998 Q3 -45.12476 437.2456  11.515205896
## 1998 Q4  -1.55632 433.4005 -17.896137200
## 1999 Q1  25.66677 431.3057   3.523496436
## 1999 Q2  21.01432 439.1628 -11.245141015
## 1999 Q3 -45.12476 450.8969   2.014845258
## 1999 Q4  -1.55632 455.5570  15.407293417
## 2000 Q1  25.66677 444.3540  24.290258982
## 2000 Q2  21.01432 421.1754  -8.949762594
## 2000 Q3 -45.12476 395.4301 -15.199289613
## 2000 Q4  -1.55632 375.7606   4.590727142
## 2001 Q1  25.66677 367.5362  -6.102990303
## 2001 Q2  21.01432 369.1569 -17.776203042
## 2001 Q3 -45.12476 380.2567   0.658094924
## 2001 Q4  -1.55632 394.1493   4.486990545
## 2002 Q1  25.66677 403.5459  20.542371093
## 2002 Q2  21.01432 408.8754 -27.637700761
## 2002 Q3 -45.12476 404.8623  32.109499840
## 2002 Q4  -1.55632 404.8806 -17.434230990
## 2003 Q1  25.66677 406.8758  -8.217613506
## 2003 Q2  21.01432 412.2684  -0.002686131
## 2003 Q3 -45.12476 418.0638  18.273925233
## 2003 Q4  -1.55632 419.2933  -8.996989753
## 2004 Q1  25.66677 416.2195   3.571765783
## 2004 Q2  21.01432 413.9253  -6.737593135
## 2004 Q3 -45.12476 410.1435  14.029258400
## 2004 Q4  -1.55632 407.2154 -11.617080187
## 2005 Q1  25.66677 403.8199   3.309325330

summary(ukcarsSTL)

##  Call:
##  stl(x = ukcars, s.window = "periodic")
## 
##  Time.series components:
##     seasonal             trend            remainder        
##  Min.   :-45.12476   Min.   :215.7849   Min.   :-46.73093  
##  1st Qu.: -1.55632   1st Qu.:263.2765   1st Qu.: -9.08993  
##  Median : 21.01432   Median :329.0151   Median : -0.00269  
##  Mean   :  0.22714   Mean   :333.1863   Mean   :  0.06435  
##  3rd Qu.: 25.66677   3rd Qu.:403.8199   3rd Qu.:  8.88187  
##  Max.   : 25.66677   Max.   :455.5570   Max.   : 35.75430  
##  IQR:
##      STL.seasonal STL.trend STL.remainder data  
##       27.22       140.54     17.97        124.83
##    %  21.8        112.6      14.4         100.0 
## 
##  Weights: all == 1
## 
##  Other components: List of 5
##  $ win  : Named num [1:3] 1131 7 5
##  $ deg  : Named int [1:3] 0 1 1
##  $ jump : Named num [1:3] 114 1 1
##  $ inner: int 2
##  $ outer: int 0

#Question 10.C

stlf(ukcars, etsmodel="AAN", damped=TRUE, h=8)

##         Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 2005 Q2       415.0289 384.4576 445.6001 368.2742 461.7836
## 2005 Q3       364.2543 326.8700 401.6386 307.0799 421.4287
## 2005 Q4       400.8059 357.6702 443.9416 334.8355 466.7762
## 2006 Q1       432.5663 384.3596 480.7731 358.8404 506.2922
## 2006 Q2       415.0250 362.2312 467.8189 334.2838 495.7663
## 2006 Q3       364.2507 307.2369 421.2646 277.0556 451.4459
## 2006 Q4       400.8026 339.8596 461.7456 307.5983 494.0069
## 2007 Q1       432.5633 367.9289 497.1976 333.7136 531.4130

#Questio 10.D

stlf(ukcars, etsmodel="AAN", damped=FALSE, h=8)

##         Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 2005 Q2       416.1915 385.7950 446.5881 369.7040 462.6791
## 2005 Q3       366.3343 328.7907 403.8780 308.9163 423.7524
## 2005 Q4       403.8032 360.2690 447.3375 337.2233 470.3831
## 2006 Q1       436.4809 387.6847 485.2771 361.8535 511.1083
## 2006 Q2       419.8568 366.3120 473.4016 337.9671 501.7465
## 2006 Q3       369.9996 312.0932 427.9060 281.4394 458.5598
## 2006 Q4       407.4685 345.5056 469.4313 312.7045 502.2325
## 2007 Q1       440.1461 374.3755 505.9167 339.5587 540.7336

#Question 10.E

ets(ukcars)

## ETS(A,N,A) 
## 
## Call:
##  ets(y = ukcars) 
## 
##   Smoothing parameters:
##     alpha = 0.6199 
##     gamma = 1e-04 
## 
##   Initial states:
##     l = 314.2568 
##     s = -1.7579 -44.9601 21.1956 25.5223
## 
##   sigma:  25.9302
## 
##      AIC     AICc      BIC 
## 1277.752 1278.819 1296.844

#Question 10.F

# ETS (A,N,A) Model
m1 <- ets(ukcars)
round(forecast::accuracy(m1),2)

##                ME  RMSE   MAE   MPE MAPE MASE ACF1
## Training set 1.31 25.23 20.18 -0.16 6.63 0.66 0.03

# STL Decomposition Model
ukcarsSTL <- stl(ukcars, s.window="periodic")
ukcarsSTLTS <- ts(ukcarsSTL)
round(forecast::accuracy(ukcarsSTLTS),2)

## Warning in trainingaccuracy(object, test, d, D): test elements must be within
## sample

##               ME RMSE MAE MPE MAPE
## Training set NaN  NaN NaN NaN  NaN

#The ETS Model gives the best in-sample fits

#Question 10.G

# ETS Model Forecast
forecast(m1, h=8)

##         Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 2005 Q2       427.4885 394.2576 460.7195 376.6662 478.3109
## 2005 Q3       361.3329 322.2353 400.4305 301.5383 421.1275
## 2005 Q4       404.5358 360.3437 448.7280 336.9497 472.1219
## 2006 Q1       431.8154 383.0568 480.5741 357.2455 506.3854
## 2006 Q2       427.4885 374.5571 480.4200 346.5369 508.4401
## 2006 Q3       361.3329 304.5345 418.1313 274.4672 448.1986
## 2006 Q4       404.5358 344.1174 464.9542 312.1338 496.9378
## 2007 Q1       431.8154 367.9809 495.6500 334.1890 529.4419

# ETS Model 2 Forecast
m2 = stlf(ukcars, etsmodel="AAN", damped=FALSE, h=8)
forecast(m2, h=8)

##         Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 2005 Q2       416.1915 385.7950 446.5881 369.7040 462.6791
## 2005 Q3       366.3343 328.7907 403.8780 308.9163 423.7524
## 2005 Q4       403.8032 360.2690 447.3375 337.2233 470.3831
## 2006 Q1       436.4809 387.6847 485.2771 361.8535 511.1083
## 2006 Q2       419.8568 366.3120 473.4016 337.9671 501.7465
## 2006 Q3       369.9996 312.0932 427.9060 281.4394 458.5598
## 2006 Q4       407.4685 345.5056 469.4313 312.7045 502.2325
## 2007 Q1       440.1461 374.3755 505.9167 339.5587 540.7336

# SSTL Decomposition Model Forecast
forecast(ukcarsSTL, h=8)

##         Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 2005 Q2       427.2046 394.5838 459.8254 377.3154 477.0938
## 2005 Q3       361.0655 322.7354 399.3955 302.4447 419.6862
## 2005 Q4       404.6339 361.3411 447.9267 338.4232 470.8446
## 2006 Q1       431.8570 384.1145 479.5995 358.8412 504.8729
## 2006 Q2       427.2046 375.3931 479.0160 347.9658 506.4433
## 2006 Q3       361.0655 305.4822 416.6487 276.0582 446.0727
## 2006 Q4       404.6339 345.5190 463.7489 314.2254 495.0424
## 2007 Q1       431.8570 369.4098 494.3042 336.3523 527.3618

# The results are all fairly similar but I choose model 1 because the ets() function said that it is the best model for this data set.

#Question 10.H

mForecast1 <- forecast(m2, h=8)
mForecast1$residuals

##               Qtr1          Qtr2          Qtr3          Qtr4
## 1977 -28.835239869  34.792950811 -17.843831364  31.143100613
## 1978 -18.175720851   1.789747863 -29.218141165 -65.825405579
## 1979  25.998433199   1.346804631 -74.484984962  29.002956755
## 1980   8.317191758 -40.520338332 -12.229254637 -28.393509990
## 1981   5.050525600   6.027685989  48.253212322  -9.425344894
## 1982 -22.321958202 -27.783679532  -0.012727342  16.652487401
## 1983   8.042045233  33.012673112   2.153599029   6.699556156
## 1984 -21.962669518 -38.468462137   6.355632624 -25.140545039
## 1985  53.635699541  10.703052762  -5.015190761 -10.836159374
## 1986 -21.242991619   7.760589156  13.877718528  17.882577013
## 1987  -7.407826721  14.778064951  23.680829001  -3.593278240
## 1988  -7.537614439  10.031461191   2.939348431  34.807075618
## 1989   9.840156810   2.642922463 -21.056725817 -19.263914257
## 1990  -3.993613576  -6.622635114  17.983390170  43.258441001
## 1991 -26.141859081 -11.319129411 -25.370095016 -20.330656797
## 1992  26.319778794   2.231717462   9.759212652   0.260887790
## 1993  14.823014726  13.807557287   4.426125704 -25.630712216
## 1994   3.574426241  24.304018286  17.680371091  16.389172378
## 1995  14.033379103  -3.362987339 -30.777150586   5.482016102
## 1996  21.166106881  11.769191549  24.587674772  31.871314218
## 1997 -35.179172026   0.422917653  -2.609546302  23.437070977
## 1998  -3.659459264  13.668629606   2.380732555 -41.199023082
## 1999  10.246771423   4.210664071  17.338356788  19.168463908
## 2000   3.052622471 -46.312369844 -55.885831158 -15.707943051
## 2001 -24.933899266  -6.688247260  14.202562977  24.358003624
## 2002  28.743000922 -24.122914045  34.619407687 -36.074702184
## 2003  -2.915573212  24.231270146  15.264612974 -16.733418686
## 2004   0.006014205  -0.769632650   0.489235653 -22.342510884
## 2005  -0.071449002

summary(mForecast1$residuals)

##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## -74.4850 -17.8438   2.2317  -0.3413  14.8230  53.6357

#Question 11.A For this exercise use data set visitors, the monthly Australian short-term overseas visitors data, May 1985–April 2005.

autoplot(visitors) + ylab("Short-term Overseas Visitors") + xlab("Year")

#Question 11.B

# Training Set
# Forecasting one year (12 months)
hw(visitors, seasonal="multiplicative", damped=FALSE, h=12)

##          Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## May 2005       363.6434 337.2901 389.9967 323.3395 403.9474
## Jun 2005       389.5974 356.8764 422.3184 339.5550 439.6399
## Jul 2005       482.7709 437.0247 528.5172 412.8081 552.7338
## Aug 2005       428.5001 383.4951 473.5050 359.6709 497.3293
## Sep 2005       420.4548 372.1236 468.7860 346.5386 494.3710
## Oct 2005       475.5963 416.3290 534.8636 384.9547 566.2379
## Nov 2005       503.4598 435.9471 570.9725 400.2080 606.7116
## Dec 2005       608.3779 521.1122 695.6436 474.9165 741.8393
## Jan 2006       455.1525 385.6597 524.6452 348.8725 561.4324
## Feb 2006       509.0590 426.6702 591.4478 383.0562 635.0618
## Mar 2006       496.8101 411.8771 581.7431 366.9162 626.7039
## Apr 2006       441.9586 362.3923 521.5249 320.2724 563.6448

# Testing set
testVisitor = window(visitors, start=2004)
# Forecasting one year (12 months)
hw(testVisitor, seasonal="multiplicative", damped=FALSE, h=12)

## Warning in ets(x, "MAM", alpha = alpha, beta = beta, gamma = gamma, phi = phi, :
## Seasonal component could not be estimated

##          Point Forecast     Lo 80    Hi 80       Lo 95     Hi 95
## May 2005       441.3111 354.24112 528.3810  308.149069  574.4731
## Jun 2005       439.6645 325.75496 553.5740  265.454900  613.8740
## Jul 2005       438.0179 294.86439 581.1714  219.083511  656.9522
## Aug 2005       436.3713 261.77064 610.9719  169.342651  703.3999
## Sep 2005       434.7247 226.62773 642.8216  116.467867  752.9815
## Oct 2005       433.0781 189.55935 676.5968   60.648333  805.5078
## Nov 2005       431.4315 150.66814 712.1948    2.041018  860.8219
## Dec 2005       429.7849 110.04129 749.5284  -59.220727  918.7904
## Jan 2006       428.1383  67.75421 788.5223 -123.021579  979.2981
## Feb 2006       426.4917  23.87303 829.1103 -189.260392 1042.2437
## Mar 2006       424.8450 -21.54358 871.2337 -257.847447 1107.5375
## Apr 2006       423.1984 -68.44309 914.8400 -328.702399 1175.0993

#Question 11.C

# Multiplicative seasonality is necessary because of the increasing trend seen in the data set. The multiplicative method captures trends in the data that the additive is unable to capture.

#Question 11.D

# i. an ETS model
vis1 <- ets(testVisitor)
forecast(vis1, h=24)

##          Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## May 2005       465.1445 385.2825 545.0065 343.0061 587.2829
## Jun 2005       465.1445 380.5627 549.7263 335.7878 594.5012
## Jul 2005       465.1445 376.0927 554.1963 328.9515 601.3375
## Aug 2005       465.1445 371.8366 558.4524 322.4424 607.8466
## Sep 2005       465.1445 367.7664 562.5226 316.2175 614.0715
## Oct 2005       465.1445 363.8596 566.4294 310.2426 620.0464
## Nov 2005       465.1445 360.0980 570.1910 304.4897 625.7993
## Dec 2005       465.1445 356.4665 573.8225 298.9358 631.3532
## Jan 2006       465.1445 352.9525 577.3365 293.5616 636.7274
## Feb 2006       465.1445 349.5452 580.7438 288.3507 641.9383
## Mar 2006       465.1445 346.2356 584.0534 283.2890 647.0000
## Apr 2006       465.1445 343.0156 587.2734 278.3645 651.9245
## May 2006       465.1445 339.8784 590.4106 273.5665 656.7225
## Jun 2006       465.1445 336.8178 593.4712 268.8857 661.4033
## Jul 2006       465.1445 333.8285 596.4605 264.3141 665.9749
## Aug 2006       465.1445 330.9058 599.3832 259.8442 670.4448
## Sep 2006       465.1445 328.0454 602.2436 255.4696 674.8195
## Oct 2006       465.1445 325.2435 605.0455 251.1844 679.1047
## Nov 2006       465.1445 322.4966 607.7924 246.9833 683.3057
## Dec 2006       465.1445 319.8016 610.4875 242.8616 687.4274
## Jan 2007       465.1445 317.1556 613.1334 238.8150 691.4740
## Feb 2007       465.1445 314.5562 615.7328 234.8395 695.4495
## Mar 2007       465.1445 312.0008 618.2882 230.9315 699.3576
## Apr 2007       465.1445 309.4874 620.8016 227.0875 703.2015

# ii. an additive ETS model applied to a Box-Cox transformed series
vis2 <- ets(testVisitor, model="AZZ", lambda="auto")
forecast(vis2)

##          Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## May 2005       465.1511 385.2891 545.0131 343.0127 587.2895
## Jun 2005       465.1511 380.5737 549.7286 335.8011 594.5011
## Jul 2005       465.1511 376.1076 554.1946 328.9708 601.3314
## Aug 2005       465.1511 371.8551 558.4471 322.4672 607.8350
## Sep 2005       465.1511 367.7882 562.5141 316.2473 614.0549
## Oct 2005       465.1511 363.8844 566.4178 310.2771 620.0252
## Nov 2005       465.1511 360.1257 570.1765 304.5286 625.7737
## Dec 2005       465.1511 356.4969 573.8053 298.9788 631.3234
## Jan 2006       465.1511 352.9854 577.3168 293.6085 636.6937
## Feb 2006       465.1511 349.5806 580.7216 288.4013 641.9009
## Mar 2006       465.1511 346.2733 584.0290 283.3431 646.9591
## Apr 2006       465.1511 343.0555 587.2467 278.4220 651.8803
## May 2006       465.1511 339.9204 590.3819 273.6272 656.6750
## Jun 2006       465.1511 336.8618 593.4404 268.9496 661.3526
## Jul 2006       465.1511 333.8745 596.4277 264.3809 665.9213
## Aug 2006       465.1511 330.9537 599.3485 259.9139 670.3883
## Sep 2006       465.1511 328.0951 602.2071 255.5421 674.7601
## Oct 2006       465.1511 325.2950 605.0072 251.2596 679.0426
## Nov 2006       465.1511 322.5498 607.7524 247.0612 683.2410
## Dec 2006       465.1511 319.8564 610.4458 242.9421 687.3601
## Jan 2007       465.1511 317.2121 613.0901 238.8980 691.4042
## Feb 2007       465.1511 314.6143 615.6879 234.9249 695.3773
## Mar 2007       465.1511 312.0605 618.2417 231.0192 699.2830
## Apr 2007       465.1511 309.5486 620.7536 227.1776 703.1246

# iii. a seasonal naïve method
vis3 <- snaive(testVisitor, h=24)
forecast(vis3, h=24)

##          Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## May 2005          341.3 279.6464 402.9536 247.0090 435.5910
## Jun 2005          367.3 305.6464 428.9536 273.0090 461.5910
## Jul 2005          472.0 410.3464 533.6536 377.7090 566.2910
## Aug 2005          405.8 344.1464 467.4536 311.5090 500.0910
## Sep 2005          395.6 333.9464 457.2536 301.3090 489.8910
## Oct 2005          449.9 388.2464 511.5536 355.6090 544.1910
## Nov 2005          479.9 418.2464 541.5536 385.6090 574.1910
## Dec 2005          593.1 531.4464 654.7536 498.8090 687.3910
## Jan 2006          462.4 400.7464 524.0536 368.1090 556.6910
## Feb 2006          501.6 439.9464 563.2536 407.3090 595.8910
## Mar 2006          504.7 443.0464 566.3536 410.4090 598.9910
## Apr 2006          409.5 347.8464 471.1536 315.2090 503.7910
## May 2006          341.3 254.1087 428.4913 207.9524 474.6476
## Jun 2006          367.3 280.1087 454.4913 233.9524 500.6476
## Jul 2006          472.0 384.8087 559.1913 338.6524 605.3476
## Aug 2006          405.8 318.6087 492.9913 272.4524 539.1476
## Sep 2006          395.6 308.4087 482.7913 262.2524 528.9476
## Oct 2006          449.9 362.7087 537.0913 316.5524 583.2476
## Nov 2006          479.9 392.7087 567.0913 346.5524 613.2476
## Dec 2006          593.1 505.9087 680.2913 459.7524 726.4476
## Jan 2007          462.4 375.2087 549.5913 329.0524 595.7476
## Feb 2007          501.6 414.4087 588.7913 368.2524 634.9476
## Mar 2007          504.7 417.5087 591.8913 371.3524 638.0476
## Apr 2007          409.5 322.3087 496.6913 276.1524 542.8476

summary(vis3)

## 
## Forecast method: Seasonal naive method
## 
## Model Information:
## Call: snaive(y = testVisitor, h = 24) 
## 
## Residual sd: 27.039 
## 
## Error measures:
##                  ME     RMSE    MAE      MPE     MAPE MASE       ACF1
## Training set 42.025 48.10855 42.025 8.609435 8.609435    1 -0.4227005
## 
## Forecasts:
##          Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## May 2005          341.3 279.6464 402.9536 247.0090 435.5910
## Jun 2005          367.3 305.6464 428.9536 273.0090 461.5910
## Jul 2005          472.0 410.3464 533.6536 377.7090 566.2910
## Aug 2005          405.8 344.1464 467.4536 311.5090 500.0910
## Sep 2005          395.6 333.9464 457.2536 301.3090 489.8910
## Oct 2005          449.9 388.2464 511.5536 355.6090 544.1910
## Nov 2005          479.9 418.2464 541.5536 385.6090 574.1910
## Dec 2005          593.1 531.4464 654.7536 498.8090 687.3910
## Jan 2006          462.4 400.7464 524.0536 368.1090 556.6910
## Feb 2006          501.6 439.9464 563.2536 407.3090 595.8910
## Mar 2006          504.7 443.0464 566.3536 410.4090 598.9910
## Apr 2006          409.5 347.8464 471.1536 315.2090 503.7910
## May 2006          341.3 254.1087 428.4913 207.9524 474.6476
## Jun 2006          367.3 280.1087 454.4913 233.9524 500.6476
## Jul 2006          472.0 384.8087 559.1913 338.6524 605.3476
## Aug 2006          405.8 318.6087 492.9913 272.4524 539.1476
## Sep 2006          395.6 308.4087 482.7913 262.2524 528.9476
## Oct 2006          449.9 362.7087 537.0913 316.5524 583.2476
## Nov 2006          479.9 392.7087 567.0913 346.5524 613.2476
## Dec 2006          593.1 505.9087 680.2913 459.7524 726.4476
## Jan 2007          462.4 375.2087 549.5913 329.0524 595.7476
## Feb 2007          501.6 414.4087 588.7913 368.2524 634.9476
## Mar 2007          504.7 417.5087 591.8913 371.3524 638.0476
## Apr 2007          409.5 322.3087 496.6913 276.1524 542.8476

# iv. an STL decomposition applied to the Box-Cox transformed data followed by an ETS model applied to the seasonally adjusted (transformed) data.
vis4 <- stl(visitors, s.window="periodic")
forecast(vis4, h=24)

##          Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## May 2005       401.2570 375.1438 427.3701 361.3204 441.1935
## Jun 2005       416.4592 387.3886 445.5298 371.9996 460.9189
## Jul 2005       471.8865 440.1316 503.6413 423.3216 520.4514
## Aug 2005       443.2371 409.0069 477.4674 390.8865 495.5878
## Sep 2005       435.2928 398.7535 471.8320 379.4108 491.1747
## Oct 2005       472.7590 434.0474 511.4706 413.5547 531.9633
## Nov 2005       490.5152 449.7460 531.2844 428.1641 552.8663
## Dec 2005       560.3010 517.5724 603.0296 494.9533 625.6488
## Jan 2006       464.0268 419.4240 508.6296 395.8128 532.2408
## Feb 2006       496.6757 450.2737 543.0777 425.7099 567.6414
## Mar 2006       488.9345 440.7998 537.0692 415.3188 562.5502
## Apr 2006       454.2768 404.4689 504.0847 378.1022 530.4514
## May 2006       418.2968 366.8695 469.7241 339.6456 496.9481
## Jun 2006       433.4991 380.5012 486.4969 352.4459 514.5523
## Jul 2006       488.9263 434.4025 543.4502 405.5394 572.3133
## Aug 2006       460.2770 404.2682 516.2858 374.6189 545.9351
## Sep 2006       452.3326 394.8766 509.7887 364.4612 540.2040
## Oct 2006       489.7989 430.9306 548.6671 399.7677 579.8301
## Nov 2006       507.5551 447.3072 567.8030 415.4139 599.6963
## Dec 2006       577.3409 515.7436 638.9381 483.1360 671.5457
## Jan 2007       481.0667 418.1485 543.9848 384.8416 577.2917
## Feb 2007       513.7155 449.5031 577.9280 415.5111 611.9200
## Mar 2007       505.9744 440.4927 571.4560 405.8288 606.1199
## Apr 2007       471.3167 404.5894 538.0439 369.2662 573.3672

#Question 11.E

# The Seasoanl naive method appears to give the best results in this case. The sample size is small with just two additional years forecasted. It does not pass the residual tests, but would likely be improved with a larger set.

checkresiduals(vis3)

## 
##  Ljung-Box test
## 
## data:  Residuals from Seasonal naive method
## Q* = 1.4962, df = 3, p-value = 0.6832
## 
## Model df: 0.   Total lags used: 3

summary(vis3$residuals)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##    6.70   33.33   44.50   42.02   53.20   72.40      12

tsCV(tsCV(visitors, forecastfunction, h = 1, window = 0, xreg = NULL,initial = 1))

##      Jan Feb Mar Apr May Jun Jul Aug Sep Oct Nov Dec
## 1985                  NA  NA  NA  NA  NA  NA  NA  NA
## 1986  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA
## 1987  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA
## 1988  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA
## 1989  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA
## 1990  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA
## 1991  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA
## 1992  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA
## 1993  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA
## 1994  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA
## 1995  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA
## 1996  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA
## 1997  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA
## 1998  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA
## 1999  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA
## 2000  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA
## 2001  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA
## 2002  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA
## 2003  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA
## 2004  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA  NA
## 2005  NA  NA  NA  NA

#No, it does not show anything.

#Question 12.A

fets = function(y, h) {
  forecast(ets(y), h = h)
}

f1 = fets(y=qcement, h=4)
f1

##         Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 2014 Q2       2.521670 2.368825 2.674515 2.287913 2.755427
## 2014 Q3       2.610968 2.412978 2.808958 2.308169 2.913767
## 2014 Q4       2.571906 2.344302 2.799510 2.223816 2.919996
## 2015 Q1       2.268522 2.042601 2.494443 1.923006 2.614039

f2 = snaive(qcement, h=4)
f2

##         Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 2014 Q2          2.528 2.356456 2.699544 2.265646 2.790354
## 2014 Q3          2.637 2.465456 2.808544 2.374646 2.899354
## 2014 Q4          2.565 2.393456 2.736544 2.302646 2.827354
## 2015 Q1          2.229 2.057456 2.400544 1.966646 2.491354

# ETS Model
qcementTS = ts(qcement)
tsCV1 = tsCV(qcementTS, fets)

# Seasonal Naive Model
fets2 = function(y, h) {
  forecast(snaive(y), h = h)
}

tsCV2 = tsCV(qcementTS, fets2)

#Question 12.B

f1na = na.omit(f1)
f2na = na.omit(f2)

round(forecast::accuracy(f1na),2)

##              ME RMSE  MAE   MPE MAPE MASE  ACF1
## Training set  0 0.08 0.06 -0.09 3.61 0.55 -0.04

round(forecast::accuracy(f2na),2)

##                ME RMSE MAE  MPE MAPE MASE ACF1
## Training set 0.03 0.13 0.1 2.34 6.77    1 0.61

summary(f1na)

## 
## Forecast method: ETS(M,A,M)
## 
## Model Information:
## ETS(M,A,M) 
## 
## Call:
##  ets(y = y) 
## 
##   Smoothing parameters:
##     alpha = 0.7505 
##     beta  = 0.003 
##     gamma = 1e-04 
## 
##   Initial states:
##     l = 0.5043 
##     b = 0.0081 
##     s = 1.0297 1.0488 1.0164 0.9052
## 
##   sigma:  0.0473
## 
##       AIC      AICc       BIC 
##  6.783846  7.591021 37.843192 
## 
## Error measures:
##                        ME       RMSE        MAE         MPE     MAPE      MASE
## Training set 0.0006237224 0.07829922 0.05603263 -0.08782872 3.606505 0.5485176
##                     ACF1
## Training set -0.03632985
## 
## Forecasts:
##         Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 2014 Q2       2.521670 2.368825 2.674515 2.287913 2.755427
## 2014 Q3       2.610968 2.412978 2.808958 2.308169 2.913767
## 2014 Q4       2.571906 2.344302 2.799510 2.223816 2.919996
## 2015 Q1       2.268522 2.042601 2.494443 1.923006 2.614039

summary(f2na)

## 
## Forecast method: Seasonal naive method
## 
## Model Information:
## Call: snaive(y = qcement, h = 4) 
## 
## Residual sd: 0.1297 
## 
## Error measures:
##                      ME      RMSE       MAE      MPE     MAPE MASE     ACF1
## Training set 0.03419651 0.1338564 0.1021528 2.338509 6.768085    1 0.613902
## 
## Forecasts:
##         Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 2014 Q2          2.528 2.356456 2.699544 2.265646 2.790354
## 2014 Q3          2.637 2.465456 2.808544 2.374646 2.899354
## 2014 Q4          2.565 2.393456 2.736544 2.302646 2.827354
## 2015 Q1          2.229 2.057456 2.400544 1.966646 2.491354

# The missing values are the result of values not populated in the forecast or data. Missing values can cause issues in forecasting and comparing descriptive statistics of models.
# The ETS model seems to perform better based on the error measures produced above.

#Question 13.A

# ausbeer
testAus = window(ausbeer, start=2008)

ets1 = ets(testAus)
round(forecast::accuracy(ets1),2)

##                 ME RMSE   MAE   MPE MAPE MASE  ACF1
## Training set -0.01 35.9 26.57 -0.68 6.12 3.39 -0.07

sn1 = snaive(testAus)
round(forecast::accuracy(sn1),2)

##                 ME  RMSE  MAE   MPE MAPE MASE ACF1
## Training set -2.17 11.16 7.83 -0.62    2    1 0.12

st1 = stlf(testAus)
round(forecast::accuracy(st1),2)

##                 ME RMSE  MAE   MPE MAPE MASE ACF1
## Training set -1.86 5.88 4.36 -0.46 1.07 0.56 0.02

# The stlf() function provides the best forecasting model of the three for ausbeer.

# bricksq
testBri = window(bricksq, start=1992)

ets2 = ets(testBri)
round(forecast::accuracy(ets2),2)

##                 ME RMSE   MAE MPE MAPE MASE  ACF1
## Training set 12.93   33 28.85 2.5 6.45 1.27 -0.02

sn2 = snaive(testBri)
round(forecast::accuracy(sn2),2)

##                 ME  RMSE   MAE MPE MAPE MASE ACF1
## Training set 22.71 26.07 22.71   5    5    1 -0.3

st2 <- stlf(testBri)
round(forecast::accuracy(st2),2)

##               ME  RMSE  MAE  MPE MAPE MASE ACF1
## Training set 4.9 10.96 8.44 1.07 1.91 0.37 -0.3

# The stlf() function provides the best forecasting model of the three for brickq.

# dole
testDol = window(dole, start=1990)

ets3 = ets(testDol)
round(forecast::accuracy(ets3),2)

##                   ME     RMSE    MAE   MPE MAPE MASE ACF1
## Training set -583.69 12272.63 9763.9 -0.22  1.7 0.04 0.47

sn3 = snaive(testDol)
round(forecast::accuracy(sn3),2)

##                    ME   RMSE      MAE   MPE  MAPE MASE ACF1
## Training set 221411.8 223754 221411.8 31.02 31.02    1 0.74

st3 = stlf(testDol)
round(forecast::accuracy(st3),2)

##                   ME    RMSE     MAE   MPE MAPE MASE ACF1
## Training set -533.24 7483.45 6008.59 -0.05 1.03 0.03  0.1

# The dole test set produces interesting results in both the stlf() and ets() models. If the negative values for ME are interpreted as helpful, the stlf() appears to produce the best results. Outside of that, the seasonal naive shows the best results.

# a10
testa10 = window(a10, start=2006)

ets4 = ets(testa10)
round(forecast::accuracy(ets4),2)

##              ME RMSE  MAE   MPE  MAPE MASE  ACF1
## Training set  0 3.74 2.86 -2.62 14.96 0.81 -0.08

sn4 = snaive(testa10)
round(forecast::accuracy(sn4),2)

##                ME RMSE  MAE   MPE  MAPE MASE  ACF1
## Training set 3.21  3.9 3.51 14.43 16.08    1 -0.14

st4 = stlf(testa10)
round(forecast::accuracy(st4),2)

##                 ME RMSE  MAE   MPE MAPE MASE  ACF1
## Training set -0.03 1.15 0.94 -0.55 4.81 0.27 -0.28

# The ets() model shows the best results for the a10 data set.

# h02
testh02 = window(h02, start=2006)

ets5 = ets(testh02)
round(forecast::accuracy(ets5),2)

##                ME RMSE  MAE  MPE MAPE MASE  ACF1
## Training set 0.01 0.05 0.04 0.49 5.04  0.6 -0.33

sn5 = snaive(testh02)
round(forecast::accuracy(sn5),2)

##                ME RMSE  MAE  MPE MAPE MASE  ACF1
## Training set 0.03 0.09 0.07 2.87 8.11    1 -0.19

st5 = stlf(testh02)
round(forecast::accuracy(st5),2)

##                ME RMSE  MAE  MPE MAPE MASE  ACF1
## Training set 0.01 0.05 0.04 0.62 4.97 0.59 -0.32

# The ets() model shows slightly better results for forecasting the h02 data set.

# usmelec
testUs = window(usmelec, start=2010)

ets6 = ets(testUs)
round(forecast::accuracy(ets6),2)

##                 ME RMSE  MAE   MPE MAPE MASE ACF1
## Training set -1.07 6.38 5.18 -0.35 1.54  0.7 0.15

sn6 = snaive(testUs)
round(forecast::accuracy(sn6),2)

##                 ME RMSE  MAE   MPE MAPE MASE ACF1
## Training set -2.18 9.73 7.39 -0.63  2.2    1  0.2

st6 = stlf(testUs)
round(forecast::accuracy(st6),2)

##                 ME RMSE  MAE   MPE MAPE MASE ACF1
## Training set -0.92 6.23 5.05 -0.31  1.5 0.68 0.14

# The stlf() model appears to show the best results for forecasting the usmelec data set.

#Question 14 Use ets() on the following series: bicoal, chicken, dole, usdeaths, lynx, ibmclose, eggs. Does it always give good forecasts?

#bicoal
ets(bicoal)

## ETS(M,N,N) 
## 
## Call:
##  ets(y = bicoal) 
## 
##   Smoothing parameters:
##     alpha = 0.8205 
## 
##   Initial states:
##     l = 542.665 
## 
##   sigma:  0.1262
## 
##      AIC     AICc      BIC 
## 595.2499 595.7832 600.9253

#chicken
ets(chicken)

## ETS(M,N,N) 
## 
## Call:
##  ets(y = chicken) 
## 
##   Smoothing parameters:
##     alpha = 0.98 
## 
##   Initial states:
##     l = 159.8322 
## 
##   sigma:  0.1691
## 
##      AIC     AICc      BIC 
## 635.2382 635.6018 641.9836

#dole
ets(dole)

## ETS(M,Ad,M) 
## 
## Call:
##  ets(y = dole) 
## 
##   Smoothing parameters:
##     alpha = 0.697 
##     beta  = 0.124 
##     gamma = 0.303 
##     phi   = 0.902 
## 
##   Initial states:
##     l = 2708.6621 
##     b = 836.017 
##     s = 1.0404 0.8893 0.9103 1.0301 1.0576 1.0584
##            0.9801 0.9632 1.021 0.9838 1.0145 1.0514
## 
##   sigma:  0.0935
## 
##      AIC     AICc      BIC 
## 10602.67 10604.30 10676.19

#usdeaths
ets(usdeaths)

## ETS(A,N,A) 
## 
## Call:
##  ets(y = usdeaths) 
## 
##   Smoothing parameters:
##     alpha = 0.5972 
##     gamma = 0.0019 
## 
##   Initial states:
##     l = 9195.6403 
##     s = -62.6129 -270.0351 263.3823 -89.4907 1005.529 1662.647
##            795.2585 333.326 -551.161 -737.5102 -1552.872 -796.4611
## 
##   sigma:  294.4663
## 
##      AIC     AICc      BIC 
## 1141.016 1149.587 1175.166

#lynx
ets(lynx)

## ETS(M,N,N) 
## 
## Call:
##  ets(y = lynx) 
## 
##   Smoothing parameters:
##     alpha = 0.9999 
## 
##   Initial states:
##     l = 2372.8047 
## 
##   sigma:  0.9594
## 
##      AIC     AICc      BIC 
## 2058.138 2058.356 2066.346

#ibmclose
ets(ibmclose)

## ETS(A,N,N) 
## 
## Call:
##  ets(y = ibmclose) 
## 
##   Smoothing parameters:
##     alpha = 0.9999 
## 
##   Initial states:
##     l = 459.9339 
## 
##   sigma:  7.2637
## 
##      AIC     AICc      BIC 
## 3648.450 3648.515 3660.182

#eggs
ets(eggs)

## ETS(M,N,N) 
## 
## Call:
##  ets(y = eggs) 
## 
##   Smoothing parameters:
##     alpha = 0.8198 
## 
##   Initial states:
##     l = 278.8889 
## 
##   sigma:  0.1355
## 
##      AIC     AICc      BIC 
## 1043.286 1043.553 1050.916

#No, not always, for some models, the AIC,AICc, and BIC metrics are over 10,000 which is abnormally huge; it would be hard to believe that a model is efficient with such metrics.

#Question 14.B

# The ets() function did not perform overly well on the usdeaths data set. This can be explained by the fact that there are not clear patterns that exist in the data set. Plus, the time series is short.
usdeaths

##        Jan   Feb   Mar   Apr   May   Jun   Jul   Aug   Sep   Oct   Nov   Dec
## 1973  9007  8106  8928  9137 10017 10826 11317 10744  9713  9938  9161  8927
## 1974  7750  6981  8038  8422  8714  9512 10120  9823  8743  9129  8710  8680
## 1975  8162  7306  8124  7870  9387  9556 10093  9620  8285  8433  8160  8034
## 1976  7717  7461  7776  7925  8634  8945 10078  9179  8037  8488  7874  8647
## 1977  7792  6957  7726  8106  8890  9299 10625  9302  8314  8850  8265  8796
## 1978  7836  6892  7791  8129  9115  9434 10484  9827  9110  9070  8633  9240

#Question 15

#The three-letter code in an ETS model indicates the ETS classification and notification. The possible inputs are “N” for none, “A” for additive, “M” for multiplicative, or “Z” for automatic selection. In this case, ETS(M,A,M) utilizes the multiplicative and additive processes. This matches the process used within the Holt-Winters' multiplicative method.

#Question 16

# for the ETS(A,N,N) model, the traditional parameter region is 0<α<1 but the admissible region is 0<α<2 .
# In this case, σ^2[1+α^2(h-1) shows the same forecast variance that can be found in the ETS(A,N,N) model.

#Question 17

# Assuming that the forecast errors are normally distributed, a 95% prediction interval for the h-step forecast is ^yT+h|T ± 1.96^σh, where ^σh is an estimate of the standard deviation of the h-step forecast distribution. More generally, a prediction interval can be written as ^yT+h|T ± c^σh

##Chapter 8 - ARIMA Models Figure 8.31 shows the ACFs for 36 random numbers, 360 random numbers and 1,000 random numbers.

#Question 1.A

#Q. Explain the differences among these figures. Do they all indicate that the data are white noise?

# The figures show the correlation between a series of lags in the time series. It is a correlation coefficient since the scale is from -1 to 1 as seen on the y-axis. All of the correlations at each lag point lies within the dashed blue line. This indicates white noise within each of the figures.

#Question 1.B

#Q. Why are the critical values at different distances from the mean of zero? Why are the autocorrelations different in each figure when they each refer to white noise?

# The critical value region is dependent on the sample size of numbers used. In this case, we can see the series of numbers increased dramatically from series x1 to x3. As the series number increases, the critical region decreases, indicating that less is required for the correlation to leave the critical value range.

#Question 2

autoplot(ibmclose) + ylab("Close Price") + xlab("Day")

plot(acf(ibmclose))

plot(pacf(ibmclose))

# The first ACF plot shows a strong trend of autocorrelation in the time series. This can be seen in the pattern shown for the correlations as the lag increases. The correlation value slowly decreases as lag increases.

## The Patrial ACF plot shows a strong outlier at the Lag 1 position with a correlation of almost 1. The values past this point are within the critical region but are still affected by the strong correlation at lag 1.

#Question 3.A

#usnetelec
plot(BoxCox(usnetelec, lambda=0), main="US Net Electricity Generation")

#Question 3.B

#usgdp
plot(BoxCox(usgdp, lambda=1/2), main="US GDP")

#Question 3.C

#mcopper
plot(BoxCox(usgdp, lambda=1/3), main="Monthly Copper Prices")

#Question 3.D

#enplanements
plot(BoxCox(enplanements, lambda=0), main="Monthly US Domestic Enplanements")

#Question 3.E

#visitors
plot(BoxCox(visitors, lambda=0), main="Monthly Australian Overseas Visitors")

#Question 4

# I chose a Log transformation for enplanements
enp <- plot(BoxCox(usgdp, lambda=0), main="Monthly US Domestic Enplanements")

acf(enplanements)

acf(BoxCox(enplanements, lambda=0))

pacf(enplanements)

#There is no major difference in the acf plots here. If we adjust the lambda value further we see similar results
acf(enplanements)

acf(BoxCox(enplanements, lambda=1/3))

pacf(BoxCox(enplanements, lambda=1/3))

# If we incorporate a backshift operater of 2 we see the following:
#pacf(enplanements - lag(enplanements,2))

#Question 5

# Importing the retail dataset
library(readxl)
retail = read_excel("C:\\Users\\student\\Desktop\\Predictive Analytics - Forecasting\\HW 3 - Hyndman Ch 7 & 8\\retail.xlsx")

## New names:
## * `` -> ...1

retailTS = ts(retail)

retailTransform = BoxCox(retailTS, lambda = 0) ## Log transformation

#Question 6.A

#Use the following R code to generate data from an AR(1) model with ϕ1=0.6 and σ^2=1. The process starts with y1=0.
y <- ts(numeric(100))
e <- rnorm(100)
for(i in 2:100)
  y[i] <- 0.6*y[i-1] + e[i]

#Question 6.B

autoplot(y)

#As we decrease ϕ1 we see the data become more condensed and volatile.

y2 = ts(numeric(100))
e = rnorm(100)
for(i in 2:100)
  y2[i] = 0.1*y2[i-1] + e[i]

plot(y2)

#Question 6.C

# Select θ1
input1 = 0.6

yO = ts(numeric(100))
e = rnorm(100)
for(i in 2:100)
  yO[i] = input1*yO[i-1] + e[i]

plot(yO)

#Question 6.D

input1 = 0.6
input2 = 0.1
input3 = 1.5

## Test 1
y1 = ts(numeric(100))
e = rnorm(100)
for(i in 2:100)
  y1[i] = 0.6*y1[i-1] + e[i]

plot(y1)

# Test 2
y2 = ts(numeric(100))
e = rnorm(100)
for(i in 2:100)
  y2[i] = 0.6*y2[i-1] + e[i]

plot(y2)

# Test 3
y3 = ts(numeric(100))
e = rnorm(100)
for(i in 2:100)
  y3[i] = 1.5*y3[i-1] + e[i]

plot(y3)

# The plot becomes less condensed and volatile as the theta value increases. As we hit 1.5, the graph becomes uninterpretable to its original form.

#Question 6.E

y = ts(numeric(100))
e = rnorm(100)
for(i in 2:100)
  y[i] = 0.6*y[i-1] + e[i]

arima(y)

## 
## Call:
## arima(x = y)
## 
## Coefficients:
##       intercept
##         -0.2892
## s.e.     0.1181
## 
## sigma^2 estimated as 1.395:  log likelihood = -158.55,  aic = 321.1

yAr = ts(numeric(100))
e = rnorm(100)
for(i in 2:100)
  yAr[i] = 0.6*yAr[i-1] + 0.6*e[i-1] + e[i]

plot(yAr)

arima(yAr)

## 
## Call:
## arima(x = yAr)
## 
## Coefficients:
##       intercept
##          0.3554
## s.e.     0.1862
## 
## sigma^2 estimated as 3.466:  log likelihood = -204.05,  aic = 412.1

#Question 6.F

yAr2 = ts(numeric(100))
e = rnorm(100)
for(i in 2:100)
  yAr2[i] = -0.8*yAr2[i-1] + 0.3*e[i-1] + e[i]

plot(yAr2)

#Question 6.G

plot(yAr)

plot(yAr2)

# The second ARIMA model shows much higher volatility in the data compared to the first ARIMA model

#Question 7.A Consider wmurders, the number of women murdered each year (per 100,000 standard population) in the United States.

autoplot(wmurders) + ylab("Female Murder Rate") + xlab("Year")

describe(wmurders)

##    vars  n mean   sd median trimmed  mad  min  max range  skew kurtosis  se
## X1    1 55 3.39 0.75   3.46     3.4 0.97 2.23 4.49  2.26 -0.14    -1.57 0.1

summary(wmurders)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   2.233   2.609   3.463   3.393   4.055   4.492

wm1 = ets(wmurders, model="ZZZ", damped=NULL, alpha=NULL, beta=NULL,
    gamma=NULL, phi=NULL, lambda=NULL, biasadj=FALSE,
    additive.only=FALSE, restrict=TRUE,
    allow.multiplicative.trend=FALSE)
wm1

## ETS(M,N,N) 
## 
## Call:
##  ets(y = wmurders, model = "ZZZ", damped = NULL, alpha = NULL,  
## 
##  Call:
##      beta = NULL, gamma = NULL, phi = NULL, additive.only = FALSE,  
## 
##  Call:
##      lambda = NULL, biasadj = FALSE, restrict = TRUE, allow.multiplicative.trend = FALSE) 
## 
##   Smoothing parameters:
##     alpha = 0.9599 
## 
##   Initial states:
##     l = 2.4177 
## 
##   sigma:  0.0617
## 
##      AIC     AICc      BIC 
## 49.36113 49.83172 55.38313

wm2 = wmurders %>%
  Arima(order=c(0,1,1), seasonal=c(0,1,1)) %>%
  residuals() %>% ggtsdisplay()

wm2

wm3 <- arima(wmurders, order=c(3,1,1))
wm3

## 
## Call:
## arima(x = wmurders, order = c(3, 1, 1))
## 
## Coefficients:
##          ar1     ar2      ar3      ma1
##       0.0683  0.3011  -0.0608  -0.1179
## s.e.  1.7595  0.1635   0.5328   1.7569
## 
## sigma^2 estimated as 0.04104:  log likelihood = 9.5,  aic = -8.99

auto.arima(wmurders)

## Series: wmurders 
## ARIMA(1,2,1) 
## 
## Coefficients:
##           ar1      ma1
##       -0.2434  -0.8261
## s.e.   0.1553   0.1143
## 
## sigma^2 estimated as 0.04632:  log likelihood=6.44
## AIC=-6.88   AICc=-6.39   BIC=-0.97

# Auto ARIMA suggests (1,2,1)

#Question 7.B

#It depends because adding a constant will most likely change the suggested ARIMA model for the dataset.

#Question 7.C

wmAuto = auto.arima(wmurders)
wmAuto

## Series: wmurders 
## ARIMA(1,2,1) 
## 
## Coefficients:
##           ar1      ma1
##       -0.2434  -0.8261
## s.e.   0.1553   0.1143
## 
## sigma^2 estimated as 0.04632:  log likelihood=6.44
## AIC=-6.88   AICc=-6.39   BIC=-0.97

acf(wmurders)

pacf(wmurders)

#Question 7.D

#Error while fitting the model, but based on results, I would say the model is satisfactory.

#Question 7.E

forecast(wmurders, h=3)

##      Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 2005       2.592549 2.387657 2.797442 2.279194 2.905905
## 2006       2.592549 2.308273 2.876826 2.157786 3.027313
## 2007       2.592549 2.246455 2.938644 2.063243 3.121855

#Question 7.F

plot(forecast(wmurders, h=3))

#Question 7.G

auto.arima(wmurders)

## Series: wmurders 
## ARIMA(1,2,1) 
## 
## Coefficients:
##           ar1      ma1
##       -0.2434  -0.8261
## s.e.   0.1553   0.1143
## 
## sigma^2 estimated as 0.04632:  log likelihood=6.44
## AIC=-6.88   AICc=-6.39   BIC=-0.97

autoplot(forecast(auto.arima(wmurders),h=3))

# The two plots give similar forecasts but some differences can be seen in the format and forecast range.

#Question 8.A Consider austa, the total international visitors to Australia (in millions) for the period 1980-2015.

aus = auto.arima(austa)
aus

## Series: austa 
## ARIMA(0,1,1) with drift 
## 
## Coefficients:
##          ma1   drift
##       0.3006  0.1735
## s.e.  0.1647  0.0390
## 
## sigma^2 estimated as 0.03376:  log likelihood=10.62
## AIC=-15.24   AICc=-14.46   BIC=-10.57

checkresiduals(aus)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(0,1,1) with drift
## Q* = 2.297, df = 5, p-value = 0.8067
## 
## Model df: 2.   Total lags used: 7

#Question 8.B

aus2 = forecast(arima(austa, order=c(0,1,1)),h=10)
plot(aus2)

#Question 8.C

#aus3 = forecast(arima(austa, order=c(2,1,3)),h=10)
#Error in arima(austa, order = c(2, 1, 3)) : non-stationary AR part from CSS

#Question 8.D

aus4 = forecast(arima(austa, order=c(0,0,1)),h=10)
plot(aus4)

plot(forecast(arima(austa),h=10))

#Question 8.E

aus5 = forecast(arima(austa, order=c(0,2,1)),h=10)
plot(aus5)

#Question 9.A For the usgdp series:

plot(BoxCox(usgdp, lambda=0), main="US GDP") ## Log transformation

#Question 9.B

usgdpT = BoxCox(usgdp, lambda=0)
auto.arima(usgdpT)

## Series: usgdpT 
## ARIMA(2,1,2) with drift 
## 
## Coefficients:
##          ar1      ar2      ma1     ma2   drift
##       1.3594  -0.7732  -1.1002  0.6102  0.0084
## s.e.  0.1489   0.1774   0.2197  0.2285  0.0007
## 
## sigma^2 estimated as 8.503e-05:  log likelihood=773.76
## AIC=-1535.52   AICc=-1535.15   BIC=-1514.73

#Question 9.C

arima(usgdpT, order=c(0,0,1))

## 
## Call:
## arima(x = usgdpT, order = c(0, 0, 1))
## 
## Coefficients:
##          ma1  intercept
##       1.0000     8.3967
## s.e.  0.0186     0.0373
## 
## sigma^2 estimated as 0.08289:  log likelihood = -43.93,  aic = 93.87

arima(usgdpT, order=c(1,1,1))

## 
## Call:
## arima(x = usgdpT, order = c(1, 1, 1))
## 
## Coefficients:
##          ar1      ma1
##       0.8512  -0.4140
## s.e.  0.0579   0.1117
## 
## sigma^2 estimated as 9.845e-05:  log likelihood = 753.49,  aic = -1500.97

#Question 9.D

gdpA = auto.arima(usgdpT)
checkresiduals(gdpA)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(2,1,2) with drift
## Q* = 6.3724, df = 3, p-value = 0.09483
## 
## Model df: 5.   Total lags used: 8

#Question 9.E

forecast(gdpA, h=25)

##         Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 2006 Q2       9.350972 9.339154 9.362789 9.332898 9.369045
## 2006 Q3       9.360099 9.341097 9.379101 9.331038 9.389161
## 2006 Q4       9.368788 9.343212 9.394364 9.329673 9.407904
## 2007 Q1       9.377005 9.345848 9.408162 9.329355 9.424655
## 2007 Q2       9.384918 9.349435 9.420401 9.330651 9.439185
## 2007 Q3       9.392785 9.354123 9.431446 9.333656 9.451913
## 2007 Q4       9.400821 9.359781 9.441862 9.338055 9.463587
## 2008 Q1       9.409127 9.366114 9.452139 9.343345 9.474909
## 2008 Q2       9.417665 9.372764 9.462565 9.348995 9.486334
## 2008 Q3       9.426311 9.379400 9.473223 9.354567 9.498056
## 2008 Q4       9.434927 9.385815 9.484039 9.359816 9.510037
## 2009 Q1       9.443414 9.391979 9.494849 9.364751 9.522077
## 2009 Q2       9.451753 9.398015 9.505491 9.369568 9.533938
## 2009 Q3       9.459988 9.404096 9.515881 9.374508 9.545469
## 2009 Q4       9.468198 9.410357 9.526039 9.379738 9.556658
## 2010 Q1       9.476452 9.416851 9.536054 9.385300 9.567605
## 2010 Q2       9.484788 9.423549 9.546027 9.391131 9.578445
## 2010 Q3       9.493199 9.430373 9.556025 9.397115 9.589283
## 2010 Q4       9.501650 9.437234 9.566066 9.403134 9.600166
## 2011 Q1       9.510097 9.444065 9.576129 9.409110 9.611084
## 2011 Q2       9.518507 9.450845 9.586169 9.415027 9.621987
## 2011 Q3       9.526871 9.457594 9.596148 9.420921 9.632821
## 2011 Q4       9.535200 9.464354 9.606047 9.426850 9.643551
## 2012 Q1       9.543518 9.471165 9.615871 9.432863 9.654173
## 2012 Q2       9.551847 9.478049 9.625644 9.438983 9.664710

autoplot(forecast(gdpA, h=25))

#Question 9.F

ets(usgdpT)

## ETS(A,A,N) 
## 
## Call:
##  ets(y = usgdpT) 
## 
##   Smoothing parameters:
##     alpha = 0.9959 
##     beta  = 1e-04 
## 
##   Initial states:
##     l = 7.3524 
##     b = 0.0084 
## 
##   sigma:  0.0099
## 
##       AIC      AICc       BIC 
## -883.5955 -883.3357 -866.2552

etsGDP = forecast(ets(usgdpT), h=25)
autoplot(etsGDP)

#Question 10.A

Consider austourists, the quarterly visitor nights (in millions) spent by international tourists to Australia for the period 1999–2015.

autoplot(austourists)

#Question 10.B

acf(austourists)

# The acf graph gives information regarding autocorrelation seen in the data through the different lag values. In this case, many of the values lie outside of the critical region signaling the series is most likely not white noise.

#Question 10.C

pacf(austourists)

# The pacf graph provides information regarding partial autocorrelation within the data. It focuses on the correlation of the residuals in the data. We can see similar results to the acf in terms of values passing the critical region, signaling that in most cases there is not white noise present in the series.

#Question 10.D

plot(diff(austourists, lag=4, differences=1), xlab="Year", ylab="Total Night Visitors", main="International Tourists to Australia", col="dodgerblue")

# The graph suggests a model that captures the seasonality present within the data. We can see a general trend but no strong overall trend can be identified.

#Question 10.E

auto.arima(austourists)

## Series: austourists 
## ARIMA(1,0,0)(1,1,0)[4] with drift 
## 
## Coefficients:
##          ar1     sar1   drift
##       0.4705  -0.5305  0.5489
## s.e.  0.1154   0.1122  0.0864
## 
## sigma^2 estimated as 5.15:  log likelihood=-142.48
## AIC=292.97   AICc=293.65   BIC=301.6

# Auto arima also selects a seasonal model with (1−B^4)Yt

#Question 10.F

# (1−B^4)Yt = BY(t-1) + e[i]

#Question 11.A Consider usmelec, the total net generation of electricity (in billion kilowatt hours) by the U.S. electric industry (monthly for the period January 1973 – June 2013). In general there are two peaks per year: in mid-summer and mid-winter.

mel1 = ma(usmelec, order=12)
plot(mel1, col="dodgerblue", main="Electricity Monthly Total Net Generation", xlab="Year", ylab="Electricity Generation (billions kWh")

#Question 11.B

melLog = BoxCox(usmelec, lambda=0)

plot(BoxCox(mel1, lambda=0), main="Electricity Monthly Total Net Generation", xlab="Year", ylab="Electricity Generation (billions kWh") ## Log transformation

#Question 11.C

# The data shows some small seasonal patterns throughout.

tsdisplay(diff(usmelec, 1))

library(aTSA)

## 
## Attaching package: 'aTSA'

## The following object is masked from 'package:forecast':
## 
##     forecast

## The following object is masked from 'package:graphics':
## 
##     identify

adf.test(diff(usmelec,12))

## Augmented Dickey-Fuller Test 
## alternative: stationary 
##  
## Type 1: no drift no trend 
##      lag    ADF p.value
## [1,]   0 -11.21    0.01
## [2,]   1  -9.01    0.01
## [3,]   2  -6.43    0.01
## [4,]   3  -6.26    0.01
## [5,]   4  -5.58    0.01
## [6,]   5  -4.91    0.01
## Type 2: with drift no trend 
##      lag    ADF p.value
## [1,]   0 -12.53    0.01
## [2,]   1 -10.30    0.01
## [3,]   2  -7.43    0.01
## [4,]   3  -7.35    0.01
## [5,]   4  -6.61    0.01
## [6,]   5  -5.92    0.01
## Type 3: with drift and trend 
##      lag    ADF p.value
## [1,]   0 -12.63    0.01
## [2,]   1 -10.41    0.01
## [3,]   2  -7.54    0.01
## [4,]   3  -7.46    0.01
## [5,]   4  -6.72    0.01
## [6,]   5  -6.04    0.01
## ---- 
## Note: in fact, p.value = 0.01 means p.value <= 0.01

tsdisplay(diff(melLog, 1))

adf.test(diff(melLog,12))

## Augmented Dickey-Fuller Test 
## alternative: stationary 
##  
## Type 1: no drift no trend 
##      lag    ADF p.value
## [1,]   0 -10.35    0.01
## [2,]   1  -8.08    0.01
## [3,]   2  -5.90    0.01
## [4,]   3  -5.64    0.01
## [5,]   4  -5.02    0.01
## [6,]   5  -4.40    0.01
## Type 2: with drift no trend 
##      lag    ADF p.value
## [1,]   0 -11.88    0.01
## [2,]   1  -9.48    0.01
## [3,]   2  -7.00    0.01
## [4,]   3  -6.78    0.01
## [5,]   4  -6.08    0.01
## [6,]   5  -5.43    0.01
## Type 3: with drift and trend 
##      lag    ADF p.value
## [1,]   0 -12.25    0.01
## [2,]   1  -9.86    0.01
## [3,]   2  -7.33    0.01
## [4,]   3  -7.13    0.01
## [5,]   4  -6.40    0.01
## [6,]   5  -5.80    0.01
## ---- 
## Note: in fact, p.value = 0.01 means p.value <= 0.01

#Question 11.D

auto.arima(usmelec)

## Series: usmelec 
## ARIMA(1,0,2)(0,1,1)[12] with drift 
## 
## Coefficients:
##          ar1      ma1      ma2     sma1   drift
##       0.9717  -0.4374  -0.2774  -0.7061  0.3834
## s.e.  0.0163   0.0483   0.0493   0.0310  0.0868
## 
## sigma^2 estimated as 57.67:  log likelihood=-1635.13
## AIC=3282.26   AICc=3282.44   BIC=3307.22

# Alternate model
arima(usmelec, order=c(1,1,1))

## 
## Call:
## arima(x = usmelec, order = c(1, 1, 1))
## 
## Coefficients:
##           ar1     ma1
##       -0.1626  0.3242
## s.e.   0.2454  0.2344
## 
## sigma^2 estimated as 555.7:  log likelihood = -2220.85,  aic = 4447.69

# The autoselection model produces better results in terms of AIC.

#Question 11.E

autoMel = auto.arima(usmelec)
checkresiduals(autoMel)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(1,0,2)(0,1,1)[12] with drift
## Q* = 42.725, df = 19, p-value = 0.001413
## 
## Model df: 5.   Total lags used: 24

tsdisplay(autoMel$residuals)

melARIMAauto = arima(usmelec)
autoMelF = forecast(melARIMAauto)

## Forecast for univariate time series: 
##     Lead Forecast  S.E Lower Upper
## 487    1      260 68.8   125   394
## ------ 
## Note: confidence level = 95 %

autoMelF

##     Lead Forecast      S.E   Lower    Upper
## 487    1 259.5576 68.80362 124.705 394.4102

adf.test(autoMel$residuals)

## Augmented Dickey-Fuller Test 
## alternative: stationary 
##  
## Type 1: no drift no trend 
##      lag    ADF p.value
## [1,]   0 -21.81    0.01
## [2,]   1 -14.88    0.01
## [3,]   2 -11.92    0.01
## [4,]   3 -11.32    0.01
## [5,]   4 -10.83    0.01
## [6,]   5  -9.34    0.01
## Type 2: with drift no trend 
##      lag    ADF p.value
## [1,]   0 -21.79    0.01
## [2,]   1 -14.86    0.01
## [3,]   2 -11.90    0.01
## [4,]   3 -11.31    0.01
## [5,]   4 -10.82    0.01
## [6,]   5  -9.33    0.01
## Type 3: with drift and trend 
##      lag    ADF p.value
## [1,]   0 -21.83    0.01
## [2,]   1 -14.91    0.01
## [3,]   2 -11.96    0.01
## [4,]   3 -11.38    0.01
## [5,]   4 -10.91    0.01
## [6,]   5  -9.43    0.01
## ---- 
## Note: in fact, p.value = 0.01 means p.value <= 0.01

melA = arima(usmelec, order=c(1,1,1))
tsdisplay(melA$residuals)

adf.test(melA$residuals)

## Augmented Dickey-Fuller Test 
## alternative: stationary 
##  
## Type 1: no drift no trend 
##      lag   ADF p.value
## [1,]   0 -22.2    0.01
## [2,]   1 -17.8    0.01
## [3,]   2 -22.3    0.01
## [4,]   3 -26.0    0.01
## [5,]   4 -14.3    0.01
## [6,]   5 -14.2    0.01
## Type 2: with drift no trend 
##      lag   ADF p.value
## [1,]   0 -22.2    0.01
## [2,]   1 -17.7    0.01
## [3,]   2 -22.3    0.01
## [4,]   3 -26.1    0.01
## [5,]   4 -14.3    0.01
## [6,]   5 -14.2    0.01
## Type 3: with drift and trend 
##      lag   ADF p.value
## [1,]   0 -22.2    0.01
## [2,]   1 -17.7    0.01
## [3,]   2 -22.2    0.01
## [4,]   3 -26.0    0.01
## [5,]   4 -14.3    0.01
## [6,]   5 -14.2    0.01
## ---- 
## Note: in fact, p.value = 0.01 means p.value <= 0.01

#Question 11.F

# The forecast is not overly accurate based on the link but the forecast gives a good overall idea of what to expect.

#Question 11.G

# This will vary based on the dataset and the current state of the economy. In this example, I believe that any forecasts past 3 years should be considered unpredictable because nobody knows for sure what the world will look like 3 years from now. Especially with this pandemic...

#Question 12.A For the mcopper data:

autoplot(mcopper, col="dodgerblue") + ylab("Monthly Copper Prices") + xlab("Year")

mcT = BoxCox(mcopper, lambda=0) ## Log transform
autoplot(mcT, col="dodgerblue") + ylab("Monthly Copper Prices") + xlab("Year")

#Question 12.B

auto.arima(mcT)

## Series: mcT 
## ARIMA(0,1,1) 
## 
## Coefficients:
##          ma1
##       0.3756
## s.e.  0.0385
## 
## sigma^2 estimated as 0.003634:  log likelihood=782.84
## AIC=-1561.69   AICc=-1561.66   BIC=-1553.02

#Question 12.C

# Model 2
arima(mcT, order=c(0,0,1))

## 
## Call:
## arima(x = mcT, order = c(0, 0, 1))
## 
## Coefficients:
##          ma1  intercept
##       0.9451     6.7305
## s.e.  0.0091     0.0267
## 
## sigma^2 estimated as 0.1062:  log likelihood = -169.03,  aic = 344.06

# Model 3
arima(mcT, order=c(1,1,1))

## 
## Call:
## arima(x = mcT, order = c(1, 1, 1))
## 
## Coefficients:
##           ar1     ma1
##       -0.0083  0.3825
## s.e.   0.1007  0.0912
## 
## sigma^2 estimated as 0.003628:  log likelihood = 782.85,  aic = -1559.69

#Question 12.D

# Based on the results from the models, I would choose model 1 because of its low AIC, AICc, and BIC metrics. In terms of AIC, 0 is an arbitrary number so the negative sign is not a concern.

#Question 12.E

mc = arima(mcT)
mc

## 
## Call:
## arima(x = mcT)
## 
## Coefficients:
##       intercept
##          6.7305
## s.e.     0.0260
## 
## sigma^2 estimated as 0.3802:  log likelihood = -527.6,  aic = 1059.19

mcF = forecast(mc)

## Forecast for univariate time series: 
##     Lead Forecast   S.E Lower Upper
## 565    1     6.73 0.617  5.52  7.94
## ------ 
## Note: confidence level = 95 %

mcF

##     Lead Forecast       S.E    Lower    Upper
## 565    1 6.730527 0.6166311 5.521952 7.939101

#autoplot(mcF) + ylab("Monthly Copper Prices") + xlab("Year")
#Code not working once knitting

#Question 12.F

#code not working once knitting, but here are the results

#mc2 = ets(mcT)
#mcF2 = forecast(mc2)
#mcF2

#       Point Forecast Lo 80      Hi 80     Lo 95   Hi 95

#Jan 2007   8.133809    8.052148    8.215470    8.008919    8.258699
#Feb 2007   8.126612    8.004955    8.248269    7.940553    8.312670
#Mar 2007   8.120296    7.964292    8.276299    7.881709    8.358883
#Apr 2007   8.114754    7.927141    8.302367    7.827825    8.401683
#May 2007   8.109891    7.892420    8.327362    7.777298    8.442485
#Jun 2007   8.105624    7.859604    8.351644    7.729369    8.481879
#Jul 2007   8.101880    7.828388    8.375371    7.683611    8.520149
#Aug 2007   8.098594    7.798571    8.398618    7.639748    8.557441
#Sep 2007   8.095711    7.770005    8.421418    7.597586    8.593837
#Oct 2007   8.093182    7.742577    8.443787    7.556978    8.629386

#autoplot(mcF2) + ylab("Monthly Copper Prices") + xlab("Year")

#Because the code above is not working once knitting, this code does not work either

#  A small difference can be observed in the trend of the forecast models.The ETS model shows a more negative trend compared to the ARIMA model.

#Question 13.A Choose one of the following seasonal time series: hsales, auscafe, qauselec, qcement, qgas.

autoplot(qgas) + ylab("Monthly One-Family Home Sales") + xlab("Year")

gasT = BoxCox(qgas, lambda=0) ## Log Transform
autoplot(gasT) + ylab("Monthly One-Family Home Sales") + xlab("Year")

#Question 13.B

adf.test(gasT)

## Augmented Dickey-Fuller Test 
## alternative: stationary 
##  
## Type 1: no drift no trend 
##      lag   ADF p.value
## [1,]   0 0.860   0.891
## [2,]   1 0.809   0.876
## [3,]   2 5.606   0.990
## [4,]   3 3.702   0.990
## [5,]   4 1.454   0.962
## Type 2: with drift no trend 
##      lag   ADF p.value
## [1,]   0 -1.52   0.514
## [2,]   1 -1.40   0.556
## [3,]   2 -2.40   0.173
## [4,]   3 -1.99   0.332
## [5,]   4 -1.52   0.515
## Type 3: with drift and trend 
##      lag    ADF p.value
## [1,]   0 -2.727   0.270
## [2,]   1 -2.769   0.253
## [3,]   2  0.556   0.990
## [4,]   3  0.184   0.990
## [5,]   4 -0.983   0.939
## ---- 
## Note: in fact, p.value = 0.01 means p.value <= 0.01

# Testing different lambda values
gasT = BoxCox(qgas, lambda=3) ## Log Transform
autoplot(gasT) + ylab("Monthly One-Family Home Sales") + xlab("Year")

adf.test(gasT)

## Augmented Dickey-Fuller Test 
## alternative: stationary 
##  
## Type 1: no drift no trend 
##      lag   ADF p.value
## [1,]   0 -2.55  0.0113
## [2,]   1 -2.47  0.0150
## [3,]   2  1.88  0.9844
## [4,]   3  7.40  0.9900
## [5,]   4  3.98  0.9900
## Type 2: with drift no trend 
##      lag    ADF p.value
## [1,]   0 -3.580    0.01
## [2,]   1 -3.539    0.01
## [3,]   2  0.836    0.99
## [4,]   3  5.055    0.99
## [5,]   4  2.744    0.99
## Type 3: with drift and trend 
##      lag    ADF p.value
## [1,]   0 -7.805   0.010
## [2,]   1 -8.749   0.010
## [3,]   2 -1.596   0.745
## [4,]   3  0.746   0.990
## [5,]   4 -0.202   0.990
## ---- 
## Note: in fact, p.value = 0.01 means p.value <= 0.01

gasT = BoxCox(qgas, lambda=.5) ## Log Transform
autoplot(gasT) + ylab("Monthly One-Family Home Sales") + xlab("Year")

adf.test(gasT)

## Augmented Dickey-Fuller Test 
## alternative: stationary 
##  
## Type 1: no drift no trend 
##      lag   ADF p.value
## [1,]   0 0.258   0.718
## [2,]   1 0.264   0.719
## [3,]   2 5.147   0.990
## [4,]   3 5.537   0.990
## [5,]   4 2.541   0.990
## Type 2: with drift no trend 
##      lag    ADF p.value
## [1,]   0 -1.380   0.563
## [2,]   1 -1.348   0.574
## [3,]   2 -0.511   0.868
## [4,]   3 -0.541   0.858
## [5,]   4 -0.662   0.815
## Type 3: with drift and trend 
##      lag    ADF p.value
## [1,]   0 -5.826   0.010
## [2,]   1 -6.293   0.010
## [3,]   2 -0.751   0.965
## [4,]   3 -0.514   0.981
## [5,]   4 -1.168   0.910
## ---- 
## Note: in fact, p.value = 0.01 means p.value <= 0.01

gasT = BoxCox(qgas, lambda=-2) ## Log Transform
autoplot(gasT) + ylab("Monthly One-Family Home Sales") + xlab("Year")

adf.test(gasT)

## Augmented Dickey-Fuller Test 
## alternative: stationary 
##  
## Type 1: no drift no trend 
##      lag   ADF p.value
## [1,]   0 0.528   0.795
## [2,]   1 0.363   0.748
## [3,]   2 1.825   0.982
## [4,]   3 4.337   0.990
## [5,]   4 1.700   0.978
## Type 2: with drift no trend 
##      lag   ADF p.value
## [1,]   0 -4.33  0.0100
## [2,]   1 -3.74  0.0100
## [3,]   2 -2.63  0.0915
## [4,]   3 -4.76  0.0100
## [5,]   4 -2.54  0.1181
## Type 3: with drift and trend 
##      lag   ADF p.value
## [1,]   0 -5.83   0.010
## [2,]   1 -5.47   0.010
## [3,]   2 -1.69   0.707
## [4,]   3 -1.38   0.837
## [5,]   4 -1.43   0.816
## ---- 
## Note: in fact, p.value = 0.01 means p.value <= 0.01

# It does not seem that the lambda values tested provided stationary data transformation

#Question 13.C

# Autoselection
auto.arima(qgas)

## Series: qgas 
## ARIMA(0,0,3)(0,1,2)[4] with drift 
## 
## Coefficients:
##          ma1     ma2     ma3     sma1    sma2   drift
##       0.7986  0.5291  0.6714  -0.0728  0.0999  1.0039
## s.e.  0.0558  0.0938  0.0719   0.0832  0.0854  0.2315
## 
## sigma^2 estimated as 20.21:  log likelihood=-623.8
## AIC=1261.59   AICc=1262.14   BIC=1285.16

# (0,0,1)
arima(qgas, order=c(0,0,1))

## 
## Call:
## arima(x = qgas, order = c(0, 0, 1))
## 
## Coefficients:
##          ma1  intercept
##       0.9108    99.2164
## s.e.  0.0172     5.3722
## 
## sigma^2 estimated as 1731:  log likelihood = -1122.95,  aic = 2251.9

# (1,1,1)
arima(qgas, order=c(1,1,1))

## 
## Call:
## arima(x = qgas, order = c(1, 1, 1))
## 
## Coefficients:
##          ar1      ma1
##       0.1389  -0.7868
## s.e.  0.0760   0.0323
## 
## sigma^2 estimated as 278.5:  log likelihood = -919.08,  aic = 1844.17

# The autoselection performed best when compared to the other models.

#Question 13.D

gasA = auto.arima(qgas)
checkresiduals(gasA)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(0,0,3)(0,1,2)[4] with drift
## Q* = 7.3741, df = 3, p-value = 0.06088
## 
## Model df: 6.   Total lags used: 9

acf(qgas)

pacf(qgas)

# There is not white noise present in the model based on the acf values at different lag points. Many of the values lie outside of the critical region.

#Question 13.E

#gasF = forecast(gasA, h=24)
#autoplot(gasF)

#The code is not working once i knit it... Yet, it works perfectly fine when I run it.

#Question 13.F

#gasA2 = ets(qgas)
#gasF2 = forecast(gasA2, h=24)

#autoplot(gasF2)

#Same problem as above, the code works perfectly fine, but become capricious once I want to knit it.

# The forecast region of the ETS model is wider than the ARIMA model.

#Question 14 For the same time series you used in the previous exercise, try using a non-seasonal model applied to the seasonally adjusted data obtained from STL. The stlf() function will make the calculations easy (with method=“arima”). Compare the forecasts with those obtained in the previous exercise. Which do you think is the best approach?

gasSTLF <- stlf(qgas, method="arima")

plot(gasSTLF)

# The STLF() method seems to produce a more focuse forecast and reasonable results. I believe that this method is preffered in this situation.

#Question 15.A

library(readxl)
retail = read_excel("C:\\Users\\student\\Desktop\\Predictive Analytics - Forecasting\\HW 3 - Hyndman Ch 7 & 8\\retail.xlsx")

## New names:
## * `` -> ...1

retailTS = ts(retail)
autoplot(retailTS)

#Question 15.B

# The retail dataset produced volatile graphs in previous examples. The best ARIMA model for this situation captures the seasonal components in the data while transforming the data to prevent autocorrelation.

#Question 15.C

#When looking at the metrics, the forecasts are fairly good it looks like.

#Question 16.A Consider sheep, the sheep population of England and Wales from 1867–1939.

autoplot(sheep) + ylab("Sheep Population") + xlab("Year")

#Question 16.B

auto.arima(sheep)

## Series: sheep 
## ARIMA(3,1,0) 
## 
## Coefficients:
##          ar1      ar2      ar3
##       0.4210  -0.2018  -0.3044
## s.e.  0.1193   0.1363   0.1243
## 
## sigma^2 estimated as 4991:  log likelihood=-407.56
## AIC=823.12   AICc=823.71   BIC=832.22

# The autoselection model produces ARIMA(3,1,0)

#Question 16.C

sheepA = auto.arima(sheep)
forecast(arima(sheep))

## Forecast for univariate time series: 
##    Lead Forecast S.E Lower Upper
## 74    1     1857 221  1423  2290
## ------ 
## Note: confidence level = 95 %

acf(sheep)

pacf(sheep)

# This model would be appropriate because it would capture the seasonal trends evident in the data. The data does not show strong trends in either direction but the model was selected as the best for capturing the data.

#Question 16.D

# The last five values of the series are given below:
# The estimated parameters are ϕ1=0.42, ϕ2=−0.20, and ϕ3=−0.30. Without using the forecast function, calculate forecasts for the next three years (1940–1942).

# 1940
# ϕ1=0.42, ϕ2=−0.20, ϕ3=−0.30

# σ^2[1+α^2(h-1)]

# 1941
# ϕ1=0.42, ϕ2=−0.20, ϕ3=−0.30

# 1942
# ϕ1=0.42, ϕ2=−0.20, ϕ3=−0.30

#Question 16.E

sheepTS = ts(sheep)
forecast(arima(sheepTS))

## Forecast for univariate time series: 
##    Lead Forecast S.E Lower Upper
## 74    1     1857 221  1423  2290
## ------ 
## Note: confidence level = 95 %

# The results are similar from the ARIMA model but are subject to some constraints within R.

#Question 17.A

autoplot(bicoal) + ylab("Bituminous Coal Production") + xlab("Year")

#Question 17.B

auto.arima(bicoal)

## Series: bicoal 
## ARIMA(4,0,0) with non-zero mean 
## 
## Coefficients:
##          ar1      ar2     ar3      ar4      mean
##       0.8334  -0.3443  0.5525  -0.3780  481.5221
## s.e.  0.1366   0.1752  0.1733   0.1414   21.0591
## 
## sigma^2 estimated as 2795:  log likelihood=-262.05
## AIC=536.1   AICc=538.1   BIC=547.45

# The autoselection model produces a ARIMA(4,0,0) with non-zero mean

#Question 17.C

coal = auto.arima(bicoal)
coalF = forecast(arima(bicoal))

## Forecast for univariate time series: 
##    Lead Forecast  S.E Lower Upper
## 50    1      478 77.5   326   630
## ------ 
## Note: confidence level = 95 %

acf(bicoal)

pacf(bicoal)

# This model would be appropriate because it would capture the seasonal trends evident in the data. The data does not show strong trends in either direction but the model was selected as the best for capturing the dat

#Question 17.D

# The last five values of the series are given below.
# The estimated parameters are c=162.00, ϕ1=0.83, ϕ2=−0.34, ϕ3=0.55, and ϕ4=−0.38. Without using the forecast function, calculate forecasts for the next three years (1969–1971).

# 1969
# ϕ1=0.83, ϕ2=−0.34, ϕ3=0.55, ϕ4=−0.38

# σ^2[1+α^2(h-1)]

# 1970
# ϕ1=0.83, ϕ2=−0.34, ϕ3=0.55, ϕ4=−0.38

# 1971
# ϕ1=0.83, ϕ2=−0.34, ϕ3=0.55, ϕ4=−0.38

#Question 17.E

coalTS = ts(bicoal)
forecast(arima(coalTS))

## Forecast for univariate time series: 
##    Lead Forecast  S.E Lower Upper
## 50    1      478 77.5   326   630
## ------ 
## Note: confidence level = 95 %

# The results are similar from the ARIMA model but are subject to some constraints within R.

#Question 18.A

library(Quandl)

## Loading required package: xts

## Loading required package: zoo

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

## 
## Attaching package: 'xts'

## The following objects are masked from 'package:dplyr':
## 
##     first, last

#data <- Quandl("FRED/GDP", type="ts")

# Error: { "quandl_error": { "code": "QELx01", "message": "You have exceeded the anonymous user limit of 50 calls per day. To make more calls today, please register for a free Quandl account and then include your API key with your requests." } }

#Question 18.B

# I decided to use the ibmclose dataset
autoplot(ibmclose)

ibmA = auto.arima(ibmclose)
ibmA

## Series: ibmclose 
## ARIMA(0,1,0) 
## 
## sigma^2 estimated as 52.62:  log likelihood=-1251.37
## AIC=2504.74   AICc=2504.75   BIC=2508.64

# Autoselection chose ARIMA(0,1,0) as the appropriate model

#Question 18.C

checkresiduals(ibmA)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(0,1,0)
## Q* = 14.151, df = 10, p-value = 0.1662
## 
## Model df: 0.   Total lags used: 10

# The residuals do show signs of white noise based on the ACF graph. Some values go beyond the large critical region

#Question 18.D

forecast(arima(ibmclose))

## Forecast for univariate time series: 
##     Lead Forecast  S.E Lower Upper
## 370    1      478 84.1   314   643
## ------ 
## Note: confidence level = 95 %

plot(forecast(arima(ibmclose)))

## Forecast for univariate time series: 
##     Lead Forecast  S.E Lower Upper
## 370    1      478 84.1   314   643
## ------ 
## Note: confidence level = 95 %

#Question 18.E

ibm2 = ets(ibmclose, model="ZZZ", lambda="auto")
ibm2

## ETS(A,N,N) 
## 
## Call:
##  ets(y = ibmclose, model = "ZZZ", lambda = "auto") 
## 
##   Box-Cox transformation: lambda= 1.9999 
## 
##   Smoothing parameters:
##     alpha = 0.9999 
## 
##   Initial states:
##     l = 109274.6793 
## 
##   sigma:  3191.485
## 
##      AIC     AICc      BIC 
## 8139.441 8139.507 8151.173

autoplot(ibm2)

# The model selected is ETS(A,N,N)

#Question 18.F

checkresiduals(ibm2)

## 
##  Ljung-Box test
## 
## data:  Residuals from ETS(A,N,N)
## Q* = 16.112, df = 8, p-value = 0.04081
## 
## Model df: 2.   Total lags used: 10

# The residual check does show signs of white noise in the ACF graph.

#Question 18.G

ibm2F = forecast.ets(ibm2)
ibm2F

##     Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 370       356.9995 345.3475 368.2830 339.0173 374.1185
## 371       356.9995 340.4052 372.8561 331.2844 380.9830
## 372       356.9995 336.5634 376.3275 325.2259 386.1678
## 373       356.9995 333.2903 379.2294 320.0292 390.4853
## 374       356.9995 330.3797 381.7678 315.3799 394.2500
## 375       356.9995 327.7261 384.0482 311.1168 397.6229
## 376       356.9995 325.2667 386.1334 307.1442 400.6995
## 377       356.9995 322.9607 388.0642 303.3998 403.5421
## 378       356.9995 320.7798 389.8689 299.8405 406.1938
## 379       356.9995 318.7033 391.5682 296.4347 408.6860

autoplot(ibm2F) + ylab("Stock Close") + xlab("Year")

#Question 18.H

# I prefer the ETS model because the forecast range is smaller, which means that it is more accurate.

Hyndman Chapter 7 & Chapter 8 exercises

Rodrigue

7/23/2020