#Load Packages
##Chapter 7
#Problem 1
a: optimal alpha = 0.2971; optimal lambda = 77260.0561. See forecast plot/output. b: calculate 95% Pred Interval. My PI is close to the ses provided one, but off by a bit - assume R has more precision in terms of decimals as well as degrees of freedom (t critical versus z critical).
##
## Forecast method: Simple exponential smoothing
##
## Model Information:
## Simple exponential smoothing
##
## Call:
## ses(y = pigs, h = 4)
##
## Smoothing parameters:
## alpha = 0.2971
##
## Initial states:
## l = 77260.0561
##
## sigma: 10308.58
##
## AIC AICc BIC
## 4462.955 4463.086 4472.665
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 385.8721 10253.6 7961.383 -0.922652 9.274016 0.7966249
## ACF1
## Training set 0.01282239
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Sep 1995 98816.41 85605.43 112027.4 78611.97 119020.8
## Oct 1995 98816.41 85034.52 112598.3 77738.83 119894.0
## Nov 1995 98816.41 84486.34 113146.5 76900.46 120732.4
## Dec 1995 98816.41 83958.37 113674.4 76092.99 121539.8
## Sep
## 1995 78679.97
## Sep
## 1995 118952.8
#Problem 2
Writing own function for SES model. My results are very similar to the automated ses model.
## [1] 98816.45
#Problem 3
Modifying function from P2. My results are very similar to the automated ses model, but not precisely the same alpha and lambda values.
## [1] 0.297119 77265.874781
#Problem 4
Combine two functions into one function.I get virtually the same point estimates as the ses model.
customses2 <- function(par, y)
{
alpha <- par[1]
level <- par[2]
n <- length(y)
y.hat <- numeric(n)
y.hat[1] <- level
for (i in 2:(n)) {
y.hat[i] <- alpha * y[i-1] + (1 - alpha) * y.hat[i-1]
}
return(sum((y-y.hat)^2))
}
customsesA <- function(y, h=4)
{
par <- optim(c(.1, y[1]), customses2, y=y)$par
alpha <- par[1]
level <- par[2]
n <- length(y)
y.hat <- numeric(n+1)
y.hat[1] <- level
for (i in 2:(n+1)) {
y.hat[i] <- alpha * y[i-1] + (1 - alpha) * y.hat[i-1]
}
return(rep(y.hat[n+1], h))
}
customsesA(pigs)
## [1] 98816.44 98816.44 98816.44 98816.44
#Problem 5
a: there are two groups represented, paperback and hardcover books. Both have a positive trend, with no noticeable seasonality.
b: forecast and plot the two models.
c: used accuracy() to find the RMSE for each forecast.
autoplot(books)
fcpaper <- ses(books[,"Paperback"], h=4)
summary(fcpaper)
##
## Forecast method: Simple exponential smoothing
##
## Model Information:
## Simple exponential smoothing
##
## Call:
## ses(y = books[, "Paperback"], h = 4)
##
## Smoothing parameters:
## alpha = 0.1685
##
## Initial states:
## l = 170.8271
##
## sigma: 34.8183
##
## AIC AICc BIC
## 318.9747 319.8978 323.1783
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 7.175981 33.63769 27.8431 0.4736071 15.57784 0.7021303
## ACF1
## Training set -0.2117522
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 31 207.1097 162.4882 251.7311 138.8670 275.3523
## 32 207.1097 161.8589 252.3604 137.9046 276.3147
## 33 207.1097 161.2382 252.9811 136.9554 277.2639
## 34 207.1097 160.6259 253.5935 136.0188 278.2005
fchard <- ses(books[,"Hardcover"], h=4)
summary(fchard)
##
## Forecast method: Simple exponential smoothing
##
## Model Information:
## Simple exponential smoothing
##
## Call:
## ses(y = books[, "Hardcover"], h = 4)
##
## Smoothing parameters:
## alpha = 0.3283
##
## Initial states:
## l = 149.2861
##
## sigma: 33.0517
##
## AIC AICc BIC
## 315.8506 316.7737 320.0542
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 9.166735 31.93101 26.77319 2.636189 13.39487 0.7987887
## ACF1
## Training set -0.1417763
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 31 239.5601 197.2026 281.9176 174.7799 304.3403
## 32 239.5601 194.9788 284.1414 171.3788 307.7414
## 33 239.5601 192.8607 286.2595 168.1396 310.9806
## 34 239.5601 190.8347 288.2855 165.0410 314.0792
autoplot(books) + autolayer(fcpaper, series = "Paperback") + autolayer(fchard, series = "Hardcover")
accuracy(fcpaper)
## ME RMSE MAE MPE MAPE MASE
## Training set 7.175981 33.63769 27.8431 0.4736071 15.57784 0.7021303
## ACF1
## Training set -0.2117522
accuracy(fchard)
## ME RMSE MAE MPE MAPE MASE
## Training set 9.166735 31.93101 26.77319 2.636189 13.39487 0.7987887
## ACF1
## Training set -0.1417763
#Problem 6
a: apply Holt methodology to both models; forecast and plot the two models.
b: used accuracy() to find the RMSE for each forecast. Holt has lower RMSE for both models, expected given the clear positive trend in both paperback and hardcover books.
c: Holt incorporates the trend, predictions are higher which seems reasonable.
d: my 95% PI is a bit different, again using Z critical value instead of t critical, degrees of freedom.
autoplot(books)
fcholtpaper <- holt(books[,"Paperback"], h=4)
summary(fcholtpaper)
##
## Forecast method: Holt's method
##
## Model Information:
## Holt's method
##
## Call:
## holt(y = books[, "Paperback"], h = 4)
##
## Smoothing parameters:
## alpha = 1e-04
## beta = 1e-04
##
## Initial states:
## l = 170.699
## b = 1.2621
##
## sigma: 33.4464
##
## AIC AICc BIC
## 318.3396 320.8396 325.3456
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -3.717178 31.13692 26.18083 -5.508526 15.58354 0.6602122
## ACF1
## Training set -0.1750792
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 31 209.4668 166.6035 252.3301 143.9130 275.0205
## 32 210.7177 167.8544 253.5811 145.1640 276.2715
## 33 211.9687 169.1054 254.8320 146.4149 277.5225
## 34 213.2197 170.3564 256.0830 147.6659 278.7735
fcholthard <- holt(books[,"Hardcover"], h=4)
summary(fcholthard)
##
## Forecast method: Holt's method
##
## Model Information:
## Holt's method
##
## Call:
## holt(y = books[, "Hardcover"], h = 4)
##
## Smoothing parameters:
## alpha = 1e-04
## beta = 1e-04
##
## Initial states:
## l = 147.7935
## b = 3.303
##
## sigma: 29.2106
##
## AIC AICc BIC
## 310.2148 312.7148 317.2208
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.1357882 27.19358 23.15557 -2.114792 12.1626 0.6908555
## ACF1
## Training set -0.03245186
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 31 250.1739 212.7390 287.6087 192.9222 307.4256
## 32 253.4765 216.0416 290.9113 196.2248 310.7282
## 33 256.7791 219.3442 294.2140 199.5274 314.0308
## 34 260.0817 222.6468 297.5166 202.8300 317.3334
autoplot(books) + autolayer(fcholtpaper, series = "Paperback") + autolayer(fcholthard, series = "Hardcover")
accuracy(fcholtpaper)
## ME RMSE MAE MPE MAPE MASE
## Training set -3.717178 31.13692 26.18083 -5.508526 15.58354 0.6602122
## ACF1
## Training set -0.1750792
accuracy(fcholthard)
## ME RMSE MAE MPE MAPE MASE
## Training set -0.1357882 27.19358 23.15557 -2.114792 12.1626 0.6908555
## ACF1
## Training set -0.03245186
sd.paper <- sqrt(fcholtpaper$model$mse)
lowerPIpaper <- fcholtpaper$mean[1] - (1.96* sd.paper)
upperPIpaper <- fcholtpaper$mean[1] + (1.96* sd.paper)
lowerPIpaper
## [1] 148.4384
upperPIpaper
## [1] 270.4951
sd.hard <- sqrt(fcholthard$model$mse)
lowerPIhard <- fcholthard$mean[1] - (1.96* sd.hard)
upperPIhard <- fcholthard$mean[1] + (1.96* sd.hard)
lowerPIhard
## [1] 196.8745
upperPIhard
## [1] 303.4733
I found that specifying a small lambda reduces RMSE, as does an undamped forecast.
autoplot(eggs)
fceggs <- holt(eggs, h=100)
accuracy(fceggs)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.04499087 26.58219 19.18491 -1.142201 9.653791 0.9463626
## ACF1
## Training set 0.01348202
autoplot(eggs)+ autolayer(fceggs)
fceggs1 <- holt(eggs, h=100, damped = TRUE, lambda = .4)
accuracy(fceggs1)
## ME RMSE MAE MPE MAPE MASE
## Training set -0.837214 26.53963 19.46805 -2.024508 9.98062 0.9603296
## ACF1
## Training set 0.004380399
fceggs2 <- holt(eggs, h=100, damped = TRUE, lambda = .1)
accuracy(fceggs2)
## ME RMSE MAE MPE MAPE MASE
## Training set -0.59707 26.48436 19.27263 -1.97799 9.912668 0.9506899
## ACF1
## Training set 0.02833915
fceggs3 <- holt(eggs, h=100, damped = FALSE, lambda = .1)
accuracy(fceggs3)
## ME RMSE MAE MPE MAPE MASE
## Training set 1.316453 26.3967 19.10048 -0.8728258 9.661653 0.942198
## ACF1
## Training set 0.03722244
autoplot(eggs)+ autolayer(fceggs3)
##Problem 8
a: there is increasing seasonality as well as increasing trend, so clearly multiplicative.
b: the undamped model seems a bit better.
c: the undamped model has a lower, and therefore better, RMSE.
d: neither set of residual plots looks completely random.
e: the test RMSE is 70.117.
retail <- read_excel("E:/WOODS/ADECXXXX/retail.xlsx", skip = 1)[, "A3349335T"]
retailts <- ts(retail, frequency = 12, start = c(1982, 4))
autoplot(retailts)
fcretail <- hw(retailts, seasonal = "multiplicative", damped = FALSE)
fcretail2 <- hw(retailts, seasonal = "multiplicative", damped = TRUE)
autoplot(fcretail)
autoplot(fcretail2)
autoplot(retailts) + autolayer(fcretail, PI = FALSE) + autolayer(fcretail2, color = 'red', PI=FALSE)
accuracy(fcretail)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.9212824 25.20381 18.77683 0.06856226 1.979315 0.3016982
## ACF1
## Training set -0.1217931
accuracy(fcretail2)
## ME RMSE MAE MPE MAPE MASE
## Training set 2.9059 25.10059 18.74334 0.2366077 1.993423 0.3011601
## ACF1
## Training set -0.1394101
checkresiduals(fcretail)
##
## Ljung-Box test
##
## data: Residuals from Holt-Winters' multiplicative method
## Q* = 250.64, df = 8, p-value < 2.2e-16
##
## Model df: 16. Total lags used: 24
checkresiduals(fcretail2)
##
## Ljung-Box test
##
## data: Residuals from Damped Holt-Winters' multiplicative method
## Q* = 285.62, df = 7, p-value < 2.2e-16
##
## Model df: 17. Total lags used: 24
retailts3 <- window(retailts, end=c(2010, 12))
fcretail3 <- hw(retailts3, seasonal = "multiplicative", damped = FALSE)
accuracy(fcretail3)
## ME RMSE MAE MPE MAPE MASE
## Training set 2.658058 25.00649 18.25149 0.2228273 1.965971 0.2958851
## ACF1
## Training set -0.006737509
retailts %>% window(end=c(2010, 12)) %>% hw(seasonal = "multiplicative", damped = FALSE) %>% accuracy(x=retailts)
## ME RMSE MAE MPE MAPE MASE
## Training set 2.658058 25.00649 18.25149 0.2228273 1.965971 0.2958851
## Test set -49.514639 65.26190 52.87938 -2.2820578 2.442746 0.8572571
## ACF1 Theil's U
## Training set -0.006737509 NA
## Test set 0.438392290 0.5339184
##Problem 9 the STL RMSE is far worse than any other RMSE from these other models.
retailts %>% window(end=c(2010, 12)) %>% stlf(lambda = 0) %>% accuracy(x=retailts)
## ME RMSE MAE MPE MAPE MASE
## Training set -1.644558 21.70898 16.34821 -0.1753908 1.775947 0.2650299
## Test set -84.688605 101.36033 85.38992 -3.9194410 3.951950 1.3843036
## ACF1 Theil's U
## Training set -0.00379309 NA
## Test set 0.61147886 0.8342339
##Problem 10
a: this data has both seasonality and a cyclical form, decreasing trend thru about 1982, increasing through 200, and then dropping again after that.
b: create seasonally adjusted data
c:forecast next 2 years using additive damped on seasonally adjusted.
d: forecast 2 years without dampening on seasonally adjusted.
e: use ETS to choose a seasonal model.
f: the dampened holt model has the lowest RMSE.
g: the forecasts are virtually identical; given that, use lowest RMSE (holt, dampened).
h: in checking the residuals of the holt-dampened model (and holt model), we see some instances of statically significant (p < 0.001) autocorrelation at lags 4, 7, 11, 12, 18. The ets model has only two instances, but is still significant.
autoplot(ukcars)
stlcars <- stl(ukcars, s.window = "periodic")
autoplot(stlcars)
adjust <- seasadj(stlcars)
autoplot(adjust)
fchdcars <- stlf(ukcars, etsmodel = "AAN", damped = TRUE)
autoplot(ukcars) + autolayer(fchdcars, PI=FALSE)
fcholtcars <- stlf(ukcars, etsmodel = "AAN", damped = FALSE)
autoplot(ukcars) + autolayer(fcholtcars, PI=FALSE)
etscars <- ets(ukcars)
accuracy(fchdcars)
## ME RMSE MAE MPE MAPE MASE
## Training set 1.551267 23.32113 18.48987 0.04121971 6.042764 0.602576
## ACF1
## Training set 0.02262668
accuracy(fcholtcars)
## ME RMSE MAE MPE MAPE MASE
## Training set -0.3412727 23.295 18.1605 -0.5970778 5.98018 0.5918418
## ACF1
## Training set 0.02103582
accuracy(etscars)
## ME RMSE MAE MPE MAPE MASE
## Training set 1.313884 25.23244 20.17907 -0.1570979 6.629003 0.6576259
## ACF1
## Training set 0.02573334
autoplot(ukcars) + autolayer(fchdcars, PI=FALSE, series = "HOLT DAMPENED") + autolayer(fcholtcars, PI=FALSE, series = "HOLT") + autolayer(forecast(etscars), PI=FALSE, series = "ETS")
checkresiduals(fchdcars)
## Warning in checkresiduals(fchdcars): The fitted degrees of freedom is based
## on the model used for the seasonally adjusted data.
##
## Ljung-Box test
##
## data: Residuals from STL + ETS(A,Ad,N)
## Q* = 24.138, df = 3, p-value = 2.337e-05
##
## Model df: 5. Total lags used: 8
checkresiduals(fcholtcars)
## Warning in checkresiduals(fcholtcars): The fitted degrees of freedom is
## based on the model used for the seasonally adjusted data.
##
## Ljung-Box test
##
## data: Residuals from STL + ETS(A,A,N)
## Q* = 22.061, df = 4, p-value = 0.0001949
##
## Model df: 4. Total lags used: 8
checkresiduals(forecast(etscars))
##
## Ljung-Box test
##
## data: Residuals from ETS(A,N,A)
## Q* = 18.324, df = 3, p-value = 0.0003772
##
## Model df: 6. Total lags used: 9
##Problem 11
a: the data has an upward trend, with a seasonal pattern that shows increasing variation as the trend is increasing. This indicates multiplicative seasonality.
b: split into test/train and and forecast using Holt-Winters multiplicative method.
c: The multiplicative method is needed here because of the increase in seasonality as the trend is increasing.
d: forecast the 2 years using ets, additive ets, and stl decomposition.
e: seasonal naive matches the forecast window the best, but doesn’t satisfy the residual test.
f:
autoplot(visitors)
trainv <- window(visitors, end=end(visitors)-c(2,0))
fcvisits <- hw(trainv, h=24, seasonal = "multiplicative")
autoplot(visitors) + autolayer(fcvisits)
fc1 <- forecast(ets(trainv))
fc2 <- forecast(ets(trainv, lambda = 0))
fc3 <- snaive(trainv)
fc4 <- stlf(trainv, lambda = 0)
autoplot(visitors) + autolayer(fc1, PI=FALSE, series = "ETS") + autolayer(fc2, PI=FALSE, series = "ETS Additive Box Cox") + autolayer(fc3, PI=FALSE, series = "Seasonal Naive") + autolayer(fc4, PI=FALSE, series = "STL plus ETS with Box Cox")
accuracy(fc1, visitors)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.7640074 14.53480 10.57657 0.1048224 3.994788 0.405423
## Test set 72.1992664 80.23124 74.55285 15.9202832 16.822384 2.857773
## ACF1 Theil's U
## Training set -0.05311217 NA
## Test set 0.58716982 1.127269
accuracy(fc2, visitors)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.9083287 14.40936 10.55577 0.2423138 3.999821 0.4046254
## Test set 72.7302643 80.66859 75.03497 16.0415154 16.924882 2.8762535
## ACF1 Theil's U
## Training set -0.04015055 NA
## Test set 0.58358862 1.134582
accuracy(fc3, visitors)
## ME RMSE MAE MPE MAPE MASE
## Training set 17.29363 31.15613 26.08775 7.192445 10.285961 1.000000
## Test set 32.87083 50.30097 42.24583 6.640781 9.962647 1.619375
## ACF1 Theil's U
## Training set 0.6327669 NA
## Test set 0.5725430 0.6594016
accuracy(fc4, visitors)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.4927659 13.08152 9.27516 0.02142402 3.480288 0.3555371
## Test set 73.9821300 82.36201 75.94468 16.26695851 17.019182 2.9111248
## ACF1 Theil's U
## Training set -0.0769995 NA
## Test set 0.6553119 1.147498
checkresiduals(fc1)
##
## Ljung-Box test
##
## data: Residuals from ETS(M,Ad,M)
## Q* = 20.677, df = 7, p-value = 0.004278
##
## Model df: 17. Total lags used: 24
checkresiduals(fc2)
##
## Ljung-Box test
##
## data: Residuals from ETS(A,Ad,A)
## Q* = 20.119, df = 7, p-value = 0.005318
##
## Model df: 17. Total lags used: 24
checkresiduals(fc3)
##
## Ljung-Box test
##
## data: Residuals from Seasonal naive method
## Q* = 295.02, df = 24, p-value < 2.2e-16
##
## Model df: 0. Total lags used: 24
checkresiduals(fc4)
## Warning in checkresiduals(fc4): The fitted degrees of freedom is based on
## the model used for the seasonally adjusted data.
##
## Ljung-Box test
##
## data: Residuals from STL + ETS(A,Ad,N)
## Q* = 20.371, df = 19, p-value = 0.3726
##
## Model df: 5. Total lags used: 24
##Problem 12
a: use provided fets code to run a fets and snaive model.
b: compute MSE of the 4-period forecast errors. The ets fits better than the seasonal naive.
fets <- function(y, h) {
forecast(ets(y), h = h)
}
fccement1 <- tsCV(qcement, fets, h=4)
fccement2 <- tsCV(qcement, snaive, h=4)
colMeans(fccement1^2, na.rm = TRUE)
## h=1 h=2 h=3 h=4
## 0.006959511 0.010592277 0.014117721 0.018451074
colMeans(fccement2^2, na.rm = TRUE)
## h=1 h=2 h=3 h=4
## 0.01779243 0.01782813 0.01796352 0.01810650
##Problem 13
ETS works best on ausbeer, a10, h02 and usmelec. Seasonal naive works best onbricksq and dole.
##ausbeer
train <- window(ausbeer, end = end(ausbeer)-c(3,0))
fc1 <- forecast(ets(train))
fc2 <- snaive(train)
fc3 <- stlf(train)
accuracy(fc1, ausbeer)
## ME RMSE MAE MPE MAPE MASE
## Training set -0.3466095 15.781973 11.979955 -0.06407344 2.864311 0.7567076
## Test set -0.5797650 8.839261 8.250906 -0.10616247 1.967744 0.5211642
## ACF1 Theil's U
## Training set -0.19422757 NA
## Test set 0.07426138 0.1569371
accuracy(fc2, ausbeer)
## ME RMSE MAE MPE MAPE MASE
## Training set 3.336634 19.67005 15.83168 0.9044009 3.771370 1.0000000
## Test set -3.000000 10.87428 9.75000 -0.6233917 2.319199 0.6158537
## ACF1 Theil's U
## Training set 0.0009690785 NA
## Test set 0.0663615561 0.1975711
accuracy(fc3, ausbeer)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.6449182 13.77953 10.641492 0.1643171 2.55259 0.6721643
## Test set -3.8294793 9.46171 7.970866 -0.8668174 1.90483 0.5034756
## ACF1 Theil's U
## Training set -0.14695084 NA
## Test set 0.03077298 0.1584669
##bricksq
train <- window(bricksq, end = end(bricksq)-c(3,0))
fc1 <- forecast(ets(train))
fc2 <- snaive(train)
fc3 <- stlf(train)
accuracy(fc1, bricksq)
## ME RMSE MAE MPE MAPE MASE
## Training set 1.228708 21.96146 15.81586 0.2606171 3.912882 0.4299638
## Test set 28.224638 32.15445 28.22464 6.4555105 6.455511 0.7673039
## ACF1 Theil's U
## Training set 0.2038074 NA
## Test set 0.3913233 0.8218788
accuracy(fc2, bricksq)
## ME RMSE MAE MPE MAPE MASE
## Training set 6.438849 50.25482 36.78417 1.430237 8.999949 1.0000000
## Test set 5.875000 31.17491 29.12500 1.177091 6.694482 0.7917808
## ACF1 Theil's U
## Training set 0.8116983 NA
## Test set -0.2088834 0.7632143
accuracy(fc3, bricksq)
## ME RMSE MAE MPE MAPE MASE
## Training set 1.28882 19.93458 14.30052 0.3272766 3.524822 0.3887682
## Test set 33.95021 37.79440 34.95725 7.8910905 8.134926 0.9503340
## ACF1 Theil's U
## Training set 0.2458414 NA
## Test set 0.1591310 0.9402375
##dole
train <- window(dole, end = end(dole)-c(3,0))
fc1 <- forecast(ets(train))
fc2 <- snaive(train)
fc3 <- stlf(train)
accuracy(fc1, dole)
## ME RMSE MAE MPE MAPE
## Training set 98.00253 16130.58 9427.497 0.5518769 6.20867
## Test set 117566.73209 156071.68 117566.732 21.4480618 21.44806
## MASE ACF1 Theil's U
## Training set 0.2995409 0.5139536 NA
## Test set 3.7354602 0.8655436 6.933427
accuracy(fc2, dole)
## ME RMSE MAE MPE MAPE MASE
## Training set 12606.58 56384.06 31473.16 3.350334 27.73651 1.000000
## Test set 57600.75 131644.50 96346.75 7.639661 17.97554 3.061235
## ACF1 Theil's U
## Training set 0.9781058 NA
## Test set 0.8440205 5.635152
accuracy(fc3, dole)
## ME RMSE MAE MPE MAPE
## Training set 272.8467 6394.784 3898.066 0.5609317 4.972168
## Test set 99751.1171 145788.165 104059.836 17.3871499 18.490381
## MASE ACF1 Theil's U
## Training set 0.1238537 0.02000331 NA
## Test set 3.3063042 0.86554986 6.323866
##a10
train <- window(a10, end = end(a10)-c(3,0))
fc1 <- forecast(ets(train))
fc2 <- snaive(train)
fc3 <- stlf(train)
accuracy(fc1, a10)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.04378049 0.488989 0.3542285 0.2011915 3.993267 0.3611299
## Test set 0.87398544 1.846934 1.4508299 3.8099899 7.423349 1.4790961
## ACF1 Theil's U
## Training set -0.05238173 NA
## Test set 0.07312731 0.5138717
accuracy(fc2, a10)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.9577207 1.177528 0.9808895 10.86283 11.15767 1.000000
## Test set 2.8642171 3.581892 2.8830255 14.17026 14.30240 2.939195
## ACF1 Theil's U
## Training set 0.3779746 NA
## Test set 0.3807602 0.9973288
accuracy(fc3, a10)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.05989697 0.496396 0.3733539 0.4373976 4.394199 0.3806279
## Test set 1.12694066 2.280538 1.8066741 4.4213855 9.021185 1.8418732
## ACF1 Theil's U
## Training set 0.0516973 NA
## Test set 0.1803601 0.59831
##h02
train <- window(h02, end = end(h02)-c(3,0))
fc1 <- forecast(ets(train))
fc2 <- snaive(train)
fc3 <- stlf(train)
accuracy(fc1, h02)
## ME RMSE MAE MPE MAPE
## Training set 0.001532209 0.04653434 0.03463451 0.0008075938 4.575471
## Test set 0.008429432 0.06929054 0.05518862 -0.1270560745 6.114535
## MASE ACF1 Theil's U
## Training set 0.5850192 0.07982687 NA
## Test set 0.9322032 -0.23466606 0.3794892
accuracy(fc2, h02)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.03880677 0.07122149 0.05920234 5.220203 8.156509 1.000000
## Test set -0.03122621 0.08266334 0.06851824 -3.266822 7.535036 1.157357
## ACF1 Theil's U
## Training set 0.4046529 NA
## Test set -0.1837015 0.4355114
accuracy(fc3, h02)
## ME RMSE MAE MPE MAPE
## Training set -0.000273156 0.04217150 0.03145411 -0.1803034 4.451769
## Test set -0.021313701 0.07711689 0.06565212 -4.1330407 8.095598
## MASE ACF1 Theil's U
## Training set 0.5312983 -0.04363844 NA
## Test set 1.1089446 0.02159730 0.4691498
##usmelec
train <- window(usmelec, end = end(usmelec)-c(3,0))
fc1 <- forecast(ets(train))
fc2 <- snaive(train)
fc3 <- stlf(train)
accuracy(fc1, usmelec)
## ME RMSE MAE MPE MAPE MASE
## Training set -0.07834851 7.447167 5.601963 -0.1168709 2.163495 0.6225637
## Test set -6.45998118 12.433755 10.366090 -2.1079889 3.096548 1.1520160
## ACF1 Theil's U
## Training set 0.2719249 NA
## Test set 0.4480421 0.3900672
accuracy(fc2, usmelec)
## ME RMSE MAE MPE MAPE MASE
## Training set 4.921564 11.58553 8.998217 2.000667 3.511648 1.000000
## Test set 6.543500 17.23339 13.255167 1.680996 3.671730 1.473088
## ACF1 Theil's U
## Training set 0.4841860 NA
## Test set 0.3626942 0.4493194
accuracy(fc3, usmelec)
## ME RMSE MAE MPE MAPE MASE
## Training set -0.06835845 6.32711 4.786752 -0.06755695 1.872096 0.5319668
## Test set -9.09659930 14.45598 12.425778 -2.93857671 3.776312 1.3809156
## ACF1 Theil's U
## Training set 0.1180312 NA
## Test set 0.5142596 0.4596839
##Problem 14
ets does not always predict well; it does quite poorly on the dole data set. When I change the lambda value to 0, it tracks better, but still has very wide prediction intervals.
etscoal <- ets(bicoal)
autoplot(forecast(etscoal))
etschicken <- ets(chicken)
autoplot(forecast(etschicken))
etsdole <- ets(dole)
autoplot(forecast(etsdole))
etsusdeaths <- ets(usdeaths)
autoplot(forecast(etsusdeaths))
etslynx <- ets(lynx)
autoplot(forecast(etslynx))
etsibmclose <- ets(ibmclose)
autoplot(forecast(etsibmclose))
etseggs <- ets(eggs)
autoplot(forecast(etseggs))
etsdole2 <- ets(dole, lambda = 0)
autoplot(forecast(etsdole2))
#Problem 1 a: each graph shows critical values that are ‘boundaries’ for indicating whether white noise is shown; all these graphs, as the lag measures are within the boundaries, show white noise.
b: the critical values change as the sample size changes; higher n, tighter critical values.
#Problem 2
The ACF plot shows very high values throughout; the PACF first measure is very high; the time series wanders a lot…all indicators of a non-stationary time series.
ggtsdisplay(ibmclose)
#Problem 3
Both enplanements and visitors benefit from the box-cox transform and a lag of 12 for seasonal differences.
autoplot(usnetelec)
autoplot(enplanements)
autoplot(diff(BoxCox(enplanements, lambda=0), diff=12))
autoplot(visitors)
autoplot(diff(BoxCox(visitors, lambda=0), diff=12))
#Problem 4
a 12 month lag for seasonal. (1-Beta)
#Problem 5 After examining the data, I tried a log with 12 month seasonal, the data now looks quite stationary.
retail <- read_excel("E:/WOODS/ADECXXXX/retail.xlsx", skip = 1)[, "A3349335T"]
retailts <- ts(retail, frequency = 12, start = c(1982, 4))
autoplot(retailts)
autoplot(window(retailts, 1990))
autoplot(window(retailts, 1990, 2005))
autoplot(window(retailts, 1990, 2000))
ggtsdisplay(diff(log(retailts), diff=12))
#Problem 6
a & b: create plots, changing lambda. As lambda increases, the plot wanders a lot more.
c & d: create plots, changing theta. as theta changes, the plots change peakedness.
e & f & g: create ARMA(1,1) model and AR(2) models; plot and compare.The AR2 model is more volatile.
ar1 <- function(phi, n=100)
{
y <- ts(numeric(n))
e <- rnorm(n)
for(i in 2:n)
y[i] <- phi*y[i-1] + e[i]
return(y)
}
autoplot(ar1(.1), main = "lambda = .1")
autoplot(ar1(.2), main = "lambda = .2")
autoplot(ar1(.6), main = "lambda = .6")
autoplot(ar1(.9), main = "lambda = .9")
#create MA model function
ma1 <- function(theta, n=100)
{
y <- ts(numeric(n))
e <- rnorm(n)
for(i in 2:n)
y[i] <- theta*e[i-1] + e[i]
return(y)
}
autoplot(ma1(.1), main = "theta = .1")
autoplot(ma1(.2), main = "theta = .2")
autoplot(ma1(.6), main = "theta = .6")
autoplot(ma1(.9), main = "theta = .9")
#ARMA and AR models
arma1 <- function(phi, theta, n=100)
{
y <- ts(numeric(n))
e <- rnorm(n)
for(i in 2:n)
y[i] <- phi*y[i-1] + theta*e[i-1] + e[i]
return(y)
}
autoplot(arma1(.6, .6), main = "ARMA .6 and .6")
ar2 <- function(phi1, phi2, n=100)
{
y <- ts(numeric(100))
e <- rnorm(100)
for(i in 3:100)
y[i] <- phi1*y[i-1] + phi2*y[i-1] + e[i]
return(y)
}
autoplot(ar2(-.8, .3), main = "AR(2) -.8 and .3")
#Problem 7
a: The data shows an increasing trend from 1950 through 1973(ish); then some stabilization thru 19909ish), and a general decline thru end of time period with a peak in 2001.The ACF and PACF graphs show evidence of a lag, so we will difference the data. The differenced plots show improvement, with one lag identified at period 2. So an ARIMA with 0,1,2 might fit well.
b: no constant, as no evidence of drift.
c: backshift operator notation (1-Beta) = (1+ThetaBeta + ThetaBeta^2)
d: the residuals show a reasonably well fit model!
e: forecast three periods ahead.
f: plot of forecast shows the # of murders stabilizing with fairly wide PIs.
g: the autoarima plot shows a model with steadily decreasing murders,following the post-2001 trend, which is unlikely. I would use the other model.
autoplot(wmurders)
ggtsdisplay(wmurders)
ggtsdisplay(diff(wmurders))
modelwm <- Arima(wmurders, c(0,1,2))
summary(modelwm)
## Series: wmurders
## ARIMA(0,1,2)
##
## Coefficients:
## ma1 ma2
## -0.0660 0.3712
## s.e. 0.1263 0.1640
##
## sigma^2 estimated as 0.0422: log likelihood=9.71
## AIC=-13.43 AICc=-12.95 BIC=-7.46
##
## Training set error measures:
## ME RMSE MAE MPE MAPE
## Training set 0.0007242355 0.1997392 0.1543531 -0.08224024 4.434684
## MASE ACF1
## Training set 0.9491994 0.005880608
checkresiduals(modelwm)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,1,2)
## Q* = 9.7748, df = 8, p-value = 0.2812
##
## Model df: 2. Total lags used: 10
fcwm <- forecast(modelwm, h=3)
fcwm
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 2005 2.458450 2.195194 2.721707 2.055834 2.861066
## 2006 2.477101 2.116875 2.837327 1.926183 3.028018
## 2007 2.477101 1.979272 2.974929 1.715738 3.238464
autoplot(fcwm)
modelwmauto <- auto.arima(wmurders)
fcwmauto <- forecast(modelwmauto, h=3)
autoplot(fcwmauto)
a: an ARIMA(0,1,1) model with drift is chosen. The residual plots show white noise which is good. The 10 period forecast shows a steady upward trend which seems reasonably given the plots.
b: removing the drift effect changes the model a great deal, the trend is gone. Removing the MA tern makes virtually no difference.
c: the ARIMA 2,1,3 fits nicely with a constant; it will not run without the constant. Drift is needed.
d: both ARIMA models with 0,0,1 with/without the constant fit terribly.
e: the ARIMA 0,2,1 without a constant also fits nicely.
modelaus <- auto.arima(austa)
summary(modelaus)
checkresiduals(modelaus)
fcaus <- forecast(modelaus, h=10)
autoplot(fcaus)
modelaus2 <- Arima(austa, c(0,1,1), include.constant=FALSE)
checkresiduals(modelaus2)
fcaus2 <- forecast(modelaus2, h=10)
autoplot(fcaus2)
modelaus3 <- Arima(austa, c(0,1,0), include.constant=FALSE)
checkresiduals(modelaus3)
fcaus3 <- forecast(modelaus3, h=10)
autoplot(fcaus3)
modelaus4 <- Arima(austa, c(2,1,3), include.constant=TRUE)
checkresiduals(modelaus4)
fcaus4 <- forecast(modelaus4, h=10)
autoplot(fcaus4)
modelaus5 <- Arima(austa, c(2,1,3), include.constant=FALSE)
modelaus6 <- Arima(austa, c(0,0,1), include.constant=TRUE)
checkresiduals(modelaus6)
fcaus6 <- forecast(modelaus6, h=10)
autoplot(fcaus6)
modelaus7 <- Arima(austa, c(0,0,0), include.constant=TRUE)
checkresiduals(modelaus7)
fcaus7 <- forecast(modelaus7, h=10)
autoplot(fcaus7)
modelaus8 <- Arima(austa, c(0,2,1), include.constant=FALSE)
checkresiduals(modelaus8)
fcaus8 <- forecast(modelaus8, h=10)
autoplot(fcaus8)
##Problem 9
a:based on the time series plot, a BoxCox transform does not seem necessary.
b: auto ARIMA selects a 2,2,2, model without drift.
c: I tried several models: (1,2,2), (1,2,1), (2,2,1) and selected (2,2,1) based on lowest RSME and AIC values.
d: the residuals show some lag issues at period 8 and 12, but has a high enough p-value that it is not a problem.
e: the forecast plot seems reasonable.
f: the ets forecast is steeper and has a wider PI associated.
autoplot(usgdp)
modgdp <- auto.arima(usgdp)
summary(modgdp)
## Series: usgdp
## ARIMA(2,2,2)
##
## Coefficients:
## ar1 ar2 ma1 ma2
## -0.1228 0.3106 -0.5835 -0.3669
## s.e. 0.2869 0.0872 0.3004 0.2862
##
## sigma^2 estimated as 1604: log likelihood=-1199.57
## AIC=2409.13 AICc=2409.39 BIC=2426.43
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 4.40617 39.54446 29.69209 0.09143455 0.6916719 0.1678117
## ACF1
## Training set -0.01784487
checkresiduals(modgdp)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(2,2,2)
## Q* = 8.6247, df = 4, p-value = 0.0712
##
## Model df: 4. Total lags used: 8
modgdp2 <- Arima(usgdp,order=c(1,2,2))
summary(modgdp2)
## Series: usgdp
## ARIMA(1,2,2)
##
## Coefficients:
## ar1 ma1 ma2
## 0.6539 -1.3426 0.3636
## s.e. 0.1174 0.1347 0.1277
##
## sigma^2 estimated as 1631: log likelihood=-1201.96
## AIC=2411.92 AICc=2412.09 BIC=2425.76
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 4.3832 39.95684 29.79445 0.09037192 0.6953713 0.1683902
## ACF1
## Training set -0.04532914
modgdp3 <- Arima(usgdp,order=c(1,2,1))
summary(modgdp3)
## Series: usgdp
## ARIMA(1,2,1)
##
## Coefficients:
## ar1 ma1
## 0.2899 -0.9555
## s.e. 0.0668 0.0197
##
## sigma^2 estimated as 1665: log likelihood=-1204.96
## AIC=2415.92 AICc=2416.02 BIC=2426.3
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 4.488314 40.46031 30.09537 0.09247046 0.6936754 0.170091
## ACF1
## Training set -0.07106256
modgdp4 <- Arima(usgdp,order=c(2,2,1))
summary(modgdp4)
## Series: usgdp
## ARIMA(2,2,1)
##
## Coefficients:
## ar1 ar2 ma1
## 0.2421 0.2074 -0.9666
## s.e. 0.0664 0.0662 0.0153
##
## sigma^2 estimated as 1606: log likelihood=-1200.13
## AIC=2408.25 AICc=2408.43 BIC=2422.09
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 4.370134 39.64392 29.71532 0.09078461 0.6963903 0.167943
## ACF1
## Training set -0.002150983
checkresiduals(modgdp4)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(2,2,1)
## Q* = 9.362, df = 5, p-value = 0.09547
##
## Model df: 3. Total lags used: 8
fcgdp4 <- forecast(modgdp4, h=30)
etsgdp <- ets(usgdp)
fcetsgdp <- forecast(etsgdp, h=30)
autoplot(fcetsgdp)
a: the time series shows a positive trend, marked seasonality with increasing seasonal variation.
b & c: the ACF and ACF show marked lag periods which should be addressed through differencing in the modeling process.
d: the seasonally differenced data shows marked improvement in both ACF and PACF plots. The best model of the ones tried is c(0,0,1), c(1,1,0) with lowest AIC.
e & f: the auto ARIMA function produces a model with ARIMA(1,0,0)(1,1,0) with drift which is not the same. I like this one better has narrower PIs on the plot.Backshift notation is (1-PhiBeta) (1-Beta^4) = constant + (1+TheatBeta^4).
autoplot(austourists)
ggtsdisplay(austourists)
tsdisplay(diff(log(austourists), lag=4))
mod1 = Arima(austourists, c(0,0,1), c(0,1,1), include.drift = TRUE, lambda = 0)
mod2 = Arima(austourists, c(0,0,1), c(1,1,0), include.drift = TRUE, lambda = 0)
mod3 = Arima(austourists, c(1,0,0), c(0,1,1), include.drift = TRUE, lambda = 0)
mod4 = Arima(austourists, c(1,0,0), c(1,1,0), include.drift = TRUE, lambda = 0)
autoplot(forecast(mod1), h=30)
autoplot(forecast(mod2), h=30)
autoplot(forecast(mod3), h=30)
autoplot(forecast(mod4), h=30)
summary(mod1)
## Series: austourists
## ARIMA(0,0,1)(0,1,1)[4] with drift
## Box Cox transformation: lambda= 0
##
## Coefficients:
## ma1 sma1 drift
## 0.2787 -1.0000 0.0131
## s.e. 0.1024 0.2219 0.0005
##
## sigma^2 estimated as 0.003259: log likelihood=88.26
## AIC=-168.51 AICc=-167.84 BIC=-159.88
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.04901929 1.866839 1.364479 -0.3647319 3.678859 0.4527952
## ACF1
## Training set -0.04749739
summary(mod2)
## Series: austourists
## ARIMA(0,0,1)(1,1,0)[4] with drift
## Box Cox transformation: lambda= 0
##
## Coefficients:
## ma1 sar1 drift
## 0.3551 -0.5540 0.0130
## s.e. 0.1049 0.1067 0.0018
##
## sigma^2 estimated as 0.004545: log likelihood=82.52
## AIC=-157.04 AICc=-156.36 BIC=-148.4
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.1067716 2.118759 1.659153 -0.01652045 4.559292 0.5505814
## ACF1
## Training set 0.02860478
summary(mod3)
## Series: austourists
## ARIMA(1,0,0)(0,1,1)[4] with drift
## Box Cox transformation: lambda= 0
##
## Coefficients:
## ar1 sma1 drift
## 0.3707 -1.0000 0.0131
## s.e. 0.1173 0.2372 0.0005
##
## sigma^2 estimated as 0.003131: log likelihood=89.54
## AIC=-171.08 AICc=-170.4 BIC=-162.44
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.0590747 1.845908 1.332321 -0.3093518 3.569582 0.4421238
## ACF1
## Training set -0.1440657
summary(mod4)
## Series: austourists
## ARIMA(1,0,0)(1,1,0)[4] with drift
## Box Cox transformation: lambda= 0
##
## Coefficients:
## ar1 sar1 drift
## 0.4463 -0.5850 0.0132
## s.e. 0.1150 0.1036 0.0023
##
## sigma^2 estimated as 0.004303: log likelihood=84.15
## AIC=-160.31 AICc=-159.63 BIC=-151.67
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.07614023 2.084595 1.599164 -0.06338571 4.397435 0.5306742
## ACF1
## Training set -0.06686522
mod5 <- auto.arima(austourists)
summary(mod5)
## Series: austourists
## ARIMA(1,0,0)(1,1,0)[4] with drift
##
## Coefficients:
## ar1 sar1 drift
## 0.4705 -0.5305 0.5489
## s.e. 0.1154 0.1122 0.0864
##
## sigma^2 estimated as 5.15: log likelihood=-142.48
## AIC=292.97 AICc=293.65 BIC=301.6
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.02200144 2.149384 1.620917 -0.7072593 4.388288 0.5378929
## ACF1
## Training set -0.06393238
autoplot(forecast(mod5), h=30)
#Problem 11
a: marked seasonality, with positive trend and increasing variation.
b: lambda function suggests a BoxCox transform with lambda = - 0.5738.
c: data is not stationary has a marked trend; ACF plot suggests differencing with BoxCox transform addresses the issue.
d: I tried 2 ARIMA models, ARIMA (2,0,0), (0,1,1) has lower AIC and RMSE.
e: the residual plots look acceptable, the ACF plot shows lag issues in several periods, with a low p-value they are concerning. I tried several others, ARIMA (3,0,1)(1,1,1) has lowest AIC.
f & g: the 15 period forecast fits rather well. The match deteriorates as it gets farther out from the end of the model.
autoplot(usmelec)
lambda <- BoxCox.lambda(usmelec)
lambda
## [1] -0.5738331
tsdisplay(diff(BoxCox(usmelec, lambda), lag = 12))
modelec1 = Arima(usmelec, c(1,0,0), c(0,1,1), lambda = lambda)
modelec2 = Arima(usmelec, c(2,0,0), c(0,1,1), lambda = lambda)
summary(modelec1)
## Series: usmelec
## ARIMA(1,0,0)(0,1,1)[12]
## Box Cox transformation: lambda= -0.5738331
##
## Coefficients:
## ar1 sma1
## 0.9832 -0.8616
## s.e. 0.0105 0.0280
##
## sigma^2 estimated as 1.529e-06: log likelihood=2504.34
## AIC=-5002.68 AICc=-5002.63 BIC=-4990.2
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.5956814 8.014071 5.881827 0.2539314 2.225859 0.6531921
## ACF1
## Training set -0.2755812
summary(modelec2)
## Series: usmelec
## ARIMA(2,0,0)(0,1,1)[12]
## Box Cox transformation: lambda= -0.5738331
##
## Coefficients:
## ar1 ar2 sma1
## 0.7322 0.2561 -0.8611
## s.e. 0.0445 0.0444 0.0275
##
## sigma^2 estimated as 1.438e-06: log likelihood=2520.41
## AIC=-5032.81 AICc=-5032.73 BIC=-5016.17
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.4006375 7.71333 5.725737 0.1816669 2.17392 0.6358579
## ACF1
## Training set -0.09153044
checkresiduals(modelec2)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(2,0,0)(0,1,1)[12]
## Q* = 58.339, df = 21, p-value = 2.271e-05
##
## Model df: 3. Total lags used: 24
modelec3 = Arima(usmelec, c(3,0,0), c(0,1,1), lambda = lambda)
summary(modelec3)
## Series: usmelec
## ARIMA(3,0,0)(0,1,1)[12]
## Box Cox transformation: lambda= -0.5738331
##
## Coefficients:
## ar1 ar2 ar3 sma1
## 0.6689 0.0828 0.2378 -0.8462
## s.e. 0.0449 0.0540 0.0447 0.0291
##
## sigma^2 estimated as 1.367e-06: log likelihood=2534.11
## AIC=-5058.22 AICc=-5058.09 BIC=-5037.41
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.3111022 7.451189 5.552934 0.1474111 2.114059 0.6166677
## ACF1
## Training set -0.05321607
checkresiduals(modelec3)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(3,0,0)(0,1,1)[12]
## Q* = 42.824, df = 20, p-value = 0.002157
##
## Model df: 4. Total lags used: 24
modelec4 = Arima(usmelec, c(3,0,1), c(1,1,1), lambda = lambda)
summary(modelec4)
## Series: usmelec
## ARIMA(3,0,1)(1,1,1)[12]
## Box Cox transformation: lambda= -0.5738331
##
## Coefficients:
## ar1 ar2 ar3 ma1 sar1 sma1
## 1.3785 -0.4219 0.0428 -0.8073 0.0621 -0.8733
## s.e. 0.0659 0.0781 0.0544 0.0481 0.0536 0.0273
##
## sigma^2 estimated as 1.277e-06: log likelihood=2551.66
## AIC=-5089.31 AICc=-5089.07 BIC=-5060.18
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.1475455 7.230029 5.373064 -0.01657644 2.042492 0.5966927
## ACF1
## Training set -0.02562349
checkresiduals(modelec4)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(3,0,1)(1,1,1)[12]
## Q* = 35.354, df = 18, p-value = 0.008528
##
## Model df: 6. Total lags used: 24
modelec5 = Arima(usmelec, c(5,0,1), c(1,1,1), lambda = lambda)
summary(modelec5)
## Series: usmelec
## ARIMA(5,0,1)(1,1,1)[12]
## Box Cox transformation: lambda= -0.5738331
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5 ma1 sar1 sma1
## 1.3235 -0.4023 0.1024 -0.1207 0.0962 -0.7495 0.0629 -0.8729
## s.e. 0.0875 0.0872 0.0784 0.0761 0.0532 0.0774 0.0541 0.0276
##
## sigma^2 estimated as 1.275e-06: log likelihood=2553.32
## AIC=-5088.64 AICc=-5088.26 BIC=-5051.19
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.1075268 7.188245 5.338885 -0.00200698 2.030319 0.592897
## ACF1
## Training set -0.02446812
checkresiduals(modelec5)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(5,0,1)(1,1,1)[12]
## Q* = 29.41, df = 16, p-value = 0.02131
##
## Model df: 8. Total lags used: 24
autoplot(forecast(modelec4), h=15)
##Problem 12
a: boxcox lambda recommended is 0.1919.
b: auto arima suggests ARIMA (0,1,1).
c: experimented with other settings, best model identified by autoarima base don lowest AIC: ARIMA (0,1,1).
d: the residuals are acceptable, no issues visible in graph and high p-value.
e: the forecast is a flat line.
f: the ets model is virtually unchanged form the ARIMA model.
autoplot(mcopper)
lambda <- BoxCox.lambda(mcopper)
lambda
## [1] 0.1919047
tsdisplay(diff(BoxCox(mcopper, lambda)))
modcop <- auto.arima(mcopper, lambda = lambda)
summary(modcop)
## Series: mcopper
## ARIMA(0,1,1)
## Box Cox transformation: lambda= 0.1919047
##
## Coefficients:
## ma1
## 0.3720
## s.e. 0.0388
##
## sigma^2 estimated as 0.04997: log likelihood=45.05
## AIC=-86.1 AICc=-86.08 BIC=-77.43
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 3.480533 77.27254 44.92858 0.166202 4.303677 0.2021433
## ACF1
## Training set -0.08442198
checkresiduals(modcop)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,1,1)
## Q* = 22.913, df = 23, p-value = 0.4659
##
## Model df: 1. Total lags used: 24
modcop2 <- Arima(mcopper, c(4,1,1), lambda = lambda)
summary(modcop2)
## Series: mcopper
## ARIMA(4,1,1)
## Box Cox transformation: lambda= 0.1919047
##
## Coefficients:
## Warning in sqrt(diag(x$var.coef)): NaNs produced
## ar1 ar2 ar3 ar4 ma1
## 0.0572 -0.0303 0.0122 0.0213 0.3134
## s.e. NaN NaN NaN NaN NaN
##
## sigma^2 estimated as 0.05029: log likelihood=45.27
## AIC=-78.54 AICc=-78.38 BIC=-52.54
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 3.351968 77.41223 45.00435 0.1606762 4.309764 0.2024842
## ACF1
## Training set -0.08790739
modcop3 <- Arima(mcopper, c(8,1,1), lambda = lambda)
summary(modcop3)
## Series: mcopper
## ARIMA(8,1,1)
## Box Cox transformation: lambda= 0.1919047
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5 ar6 ar7 ar8
## 0.4837 -0.1878 0.0755 0.0005 -0.0130 0.0674 -0.0897 -0.0215
## s.e. 0.6614 0.2507 0.1086 0.0616 0.0477 0.0488 0.0677 0.0779
## ma1
## -0.1135
## s.e. 0.6607
##
## sigma^2 estimated as 0.05007: log likelihood=48.44
## AIC=-76.89 AICc=-76.49 BIC=-33.56
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 3.921354 76.97429 44.65764 0.1878724 4.277649 0.2009243
## ACF1
## Training set -0.101763
autoplot(forecast(modcop, h=30))
etscop <-ets(mcopper)
fcetscop = forecast(etscop, h = 30)
plot(fcetscop)
##Problem 13
a: boxcox lambda suggested is .1454.
b: data appears relatively stationary so differencing not needed.
c: I opted for auto.arima and it produced a model ARIMA(1,0,0)(1,1,0)[12] with drift.
d: The p-value on the residuals is low, and there is a lag identified at period 24.A better model is found using ARIMA c(1,0,1), c(1,1,1), this has a high p-value for the residuals and plots are acceptable.
e: the forecast is reasonably, but has very high prediction intervals.
f: ets model does not perform any better, still very wide PIs.
autoplot(hsales)
lambda <- BoxCox.lambda(hsales)
lambda
## [1] 0.1454608
tsdisplay(diff(BoxCox(hsales, lambda)))
modsales <- auto.arima(hsales, lambda = lambda)
summary(modsales)
## Series: hsales
## ARIMA(1,0,0)(1,1,0)[12] with drift
## Box Cox transformation: lambda= 0.1454608
##
## Coefficients:
## ar1 sar1 drift
## 0.9038 -0.4603 -0.0012
## s.e. 0.0274 0.0558 0.0064
##
## sigma^2 estimated as 0.03176: log likelihood=79.8
## AIC=-151.6 AICc=-151.44 BIC=-137.31
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.1002314 5.05951 3.991839 -0.3071834 7.825825 0.484697
## ACF1
## Training set -0.08509908
checkresiduals(modsales)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(1,0,0)(1,1,0)[12] with drift
## Q* = 42.593, df = 21, p-value = 0.003542
##
## Model df: 3. Total lags used: 24
modsales1 <- Arima(hsales, c(1,0,1), c(1,1,1), lambda=lambda)
summary(modsales1)
## Series: hsales
## ARIMA(1,0,1)(1,1,1)[12]
## Box Cox transformation: lambda= 0.1454608
##
## Coefficients:
## ar1 ma1 sar1 sma1
## 0.9223 -0.0667 0.0737 -1.0000
## s.e. 0.0279 0.0706 0.0646 0.0558
##
## sigma^2 estimated as 0.02142: log likelihood=115.81
## AIC=-221.63 AICc=-221.4 BIC=-203.77
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.05971732 4.135546 3.229239 -0.4277639 6.395666 0.3921006
## ACF1
## Training set -0.03892361
checkresiduals(modsales1)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(1,0,1)(1,1,1)[12]
## Q* = 15.763, df = 20, p-value = 0.7312
##
## Model df: 4. Total lags used: 24
autoplot(forecast(modsales1), h=24)
etssales <-ets(hsales)
fcetssales = forecast(etssales, h = 24)
plot(fcetssales)
##Problem 14
the stl model has a much higher AIC, the forecast plot is not that different form the ARIMA model.
stlsales <- stlf(hsales, method = "arima")
summary(stlsales)
##
## Forecast method: STL + ARIMA(1,0,0) with non-zero mean
##
## Model Information:
## Series: x
## ARIMA(1,0,0) with non-zero mean
##
## Coefficients:
## ar1 mean
## 0.9111 52.6254
## s.e. 0.0243 2.6104
##
## sigma^2 estimated as 16: log likelihood=-771.31
## AIC=1548.62 AICc=1548.71 BIC=1559.47
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.05747535 3.985245 3.144856 -0.7507073 6.261222 0.3818547
## ACF1
## Training set -0.03586744
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Dec 1995 41.15016 36.02419 46.27613 33.31066 48.98965
## Jan 1996 45.54806 38.61350 52.48261 34.94257 56.15355
## Feb 1996 51.88185 43.74578 60.01791 39.43881 64.32488
## Mar 1996 61.85493 52.84230 70.86755 48.07131 75.63854
## Apr 1996 60.03290 50.35271 69.71308 45.22833 74.83746
## May 1996 58.54780 48.34658 68.74903 42.94638 74.14923
## Jun 1996 56.94542 46.33107 67.55977 40.71217 73.17866
## Jul 1996 54.34584 43.40038 65.29131 37.60620 71.08549
## Aug 1996 56.19905 44.98614 67.41196 39.05038 73.34772
## Sep 1996 50.37969 38.94951 61.80987 32.89874 67.86064
## Oct 1996 50.69073 39.08328 62.29819 32.93866 68.44280
## Nov 1996 44.00001 32.24742 55.75259 26.02598 61.97404
## Dec 1996 41.15016 29.27844 53.02188 22.99393 59.30639
## Jan 1997 45.54806 33.57835 57.51778 27.24196 63.85416
## Feb 1997 51.88185 39.83139 63.93231 33.45226 70.31144
## Mar 1997 61.85493 49.73785 73.97201 43.32345 80.38641
## Apr 1997 60.03290 47.86079 72.20501 41.41726 78.64854
## May 1997 58.54781 46.33020 70.76541 39.86259 77.23302
## Jun 1997 56.94542 44.69018 69.20066 38.20264 75.68820
## Jul 1997 54.34584 42.05944 66.63224 35.55542 73.13627
## Aug 1997 56.19905 43.88685 68.51126 37.36916 75.02894
## Sep 1997 50.37969 38.04611 62.71328 31.51710 69.24229
## Oct 1997 50.69074 38.33943 63.04204 31.80104 69.58043
## Nov 1997 44.00001 31.63401 56.36601 25.08784 62.91217
autoplot(forecast(stlsales), h=24)
##Problem 15
a: auto ARIMA with ARIMA(2,1,4)(2,1,1)[12]
b: this model has an AIC of -756.99 and RMSE of 24.279; this is an improvement over the other models.
c: the model forecasts nicely, matching the values and with fairly narrow PI bands.
retail <- read_excel("E:/WOODS/ADECXXXX/retail.xlsx", skip = 1)[, "A3349335T"]
retailts <- ts(retail, frequency = 12, start = c(1982, 4))
lambda <- BoxCox.lambda(retailts)
lambda
## [1] 0.193853
tsdisplay(diff(BoxCox(retailts, lambda)))
modretail <- auto.arima(retailts, lambda = lambda)
summary(modretail)
## Series: retailts
## ARIMA(2,1,4)(2,1,1)[12]
## Box Cox transformation: lambda= 0.193853
##
## Coefficients:
## ar1 ar2 ma1 ma2 ma3 ma4 sar1 sar2
## -0.2303 0.5751 -0.5524 -0.5704 0.4741 -0.2339 0.3393 -0.2871
## s.e. 0.1640 0.1735 0.1600 0.1524 0.1263 0.0577 0.0614 0.0565
## sma1
## -0.8203
## s.e. 0.0399
##
## sigma^2 estimated as 0.006961: log likelihood=388.5
## AIC=-756.99 AICc=-756.38 BIC=-717.91
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -1.779973 24.27905 17.21045 -0.1183191 1.632751 0.2765302
## ACF1
## Training set 0.09326967
autoplot(forecast(modretail), h=30)
a: time plot shows a jagged downward trend, does not appear seasonal.
b: this is an ARIMA model with (3,1,0) p=3 autoregressive terms; d=1 non seasonal differences, q=0 lagged errors (so no lag here).
c: the residual plots are acceptable, and have a high p-value for the test indicating it is not significant. d & e: the forecast matches well.
autoplot(sheep)
modsheep <- Arima(sheep, c(3,1,0))
summary(modsheep)
## Series: sheep
## ARIMA(3,1,0)
##
## Coefficients:
## ar1 ar2 ar3
## 0.4210 -0.2018 -0.3044
## s.e. 0.1193 0.1363 0.1243
##
## sigma^2 estimated as 4991: log likelihood=-407.56
## AIC=823.12 AICc=823.71 BIC=832.22
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -6.566385 68.68715 53.43974 -0.4246247 2.977279 0.7962876
## ACF1
## Training set -0.06149874
checkresiduals(modsheep)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(3,1,0)
## Q* = 3.4035, df = 7, p-value = 0.8453
##
## Model df: 3. Total lags used: 10
forecast(modsheep, h=3)
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 1940 1777.996 1687.454 1868.537 1639.524 1916.467
## 1941 1718.869 1561.544 1876.193 1478.261 1959.476
## 1942 1695.985 1494.151 1897.819 1387.306 2004.664
autoplot(forecast(modsheep, h=3))
a: time plot shows a jagged pattern with no real trend, does not appear seasonal.
b: this is an ARIMA model with (4,0,0) p=4 autoregressive terms; d=0 non seasonal differences (no seasonality), q=0 lagged errors (so no lag here). c: the residual plots are acceptable, and there is a high p-value for the test indicating it is not significant autocorrelation. d & e: the forecast matches well.
autoplot(bicoal)
modcoal <- Arima(bicoal, c(4,0,0))
summary(modcoal)
## Series: bicoal
## ARIMA(4,0,0) with non-zero mean
##
## Coefficients:
## ar1 ar2 ar3 ar4 mean
## 0.8334 -0.3443 0.5525 -0.3780 481.5221
## s.e. 0.1366 0.1752 0.1733 0.1414 21.0591
##
## sigma^2 estimated as 2795: log likelihood=-262.05
## AIC=536.1 AICc=538.1 BIC=547.45
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.9191614 50.09487 36.28915 -1.426663 8.003388 0.7797132
## ACF1
## Training set 0.02170264
checkresiduals(modcoal)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(4,0,0) with non-zero mean
## Q* = 4.852, df = 5, p-value = 0.4342
##
## Model df: 5. Total lags used: 10
forecast(modsheep, h=3)
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 1940 1777.996 1687.454 1868.537 1639.524 1916.467
## 1941 1718.869 1561.544 1876.193 1478.261 1959.476
## 1942 1695.985 1494.151 1897.819 1387.306 2004.664
autoplot(forecast(modsheep, h=3))