library(forecast)
## Warning: package 'forecast' was built under R version 3.4.2
## Warning in as.POSIXlt.POSIXct(Sys.time()): unknown timezone 'zone/tz/2018c.
## 1.0/zoneinfo/America/New_York'
library(xts)
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
library(tseries)
library(readr)
library(fpp)
## Loading required package: fma
## Loading required package: expsmooth
## Loading required package: lmtest
library("ZIM")
## Loading required package: MASS
##
## Attaching package: 'MASS'
## The following objects are masked from 'package:fma':
##
## cement, housing, petrol
plot(books, main= "Daily Sales of Paperback and Hardcover Books")
The best fit to the data based on the lowest SSE is paperfit2. This model has an alpha = 0.2. As the alpha increases so does the SSE of the models.
paperbacks <-books[,1]
paperfit1 <- ses(paperbacks, alpha = 0.1, initial = "simple", h = 4)
sum((paperbacks-fitted(paperfit1))^2) #37785.2
## [1] 37785.2
paperfit2 <- ses(paperbacks, alpha = 0.2, initial = "simple", h = 4)
sum((paperbacks-fitted(paperfit2))^2) #36329.34
## [1] 36329.34
paperfit3 <- ses(paperbacks, alpha = 0.3, initial = "simple", h = 4)
sum((paperbacks-fitted(paperfit3))^2) #36930.75
## [1] 36930.75
paperfit4 <- ses(paperbacks, alpha = 0.4, initial = "simple", h = 4)
sum((paperbacks-fitted(paperfit4))^2)#38738.4
## [1] 38738.4
paperfit5 <- ses(paperbacks, alpha = 0.5, initial = "simple", h = 4)
sum((paperbacks-fitted(paperfit5))^2) #41383.7
## [1] 41383.7
paperfit6 <- ses(paperbacks, alpha = 0.6, initial = "simple", h = 4)
sum((paperbacks-fitted(paperfit6))^2) #44742.62
## [1] 44742.62
paperfit7 <- ses(paperbacks, alpha = 0.7, initial = "simple", h = 4)
sum((paperbacks-fitted(paperfit7))^2) #48773.56
## [1] 48773.56
paperfit8 <- ses(paperbacks, alpha = 0.8, initial = "simple", h = 4)
sum((paperbacks-fitted(paperfit8))^2) #53456.14
## [1] 53456.14
paperfit9 <- ses(paperbacks, alpha = 0.9, initial = "simple", h = 4)
sum((paperbacks-fitted(paperfit9))^2) #58769.45
## [1] 58769.45
paperfit10 <- ses(paperbacks, alpha = 1, initial = "simple", h = 4)
sum((paperbacks-fitted(paperfit10))^2) #64690
## [1] 64690
Alpha=c(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1)
SSE=c(37785.2,36329.34,36930.75,38738.4,41383.7, 44742.62,48773.56,53456.14, 58769.45,64690)
plot(Alpha, SSE)
Looking at both models. The automatic has lower point forecasts for the next four day than the alpha= 0.2 model. The model where alpha=0.2 has larger 95% and 80% prediction bands than the automatic one.
paperautomatic<-ses(paperbacks, h=4)
paperautomatic
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 31 207.1098 164.0013 250.2182 141.1811 273.0384
## 32 207.1098 163.3934 250.8261 140.2513 273.9682
## 33 207.1098 162.7937 251.4258 139.3342 274.8853
## 34 207.1098 162.2021 252.0174 138.4294 275.7901
paperfit2
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 31 209.3882 164.7914 253.9851 141.1832 277.5932
## 32 209.3882 163.9082 254.8683 139.8325 278.9440
## 33 209.3882 163.0418 255.7347 138.5075 280.2690
## 34 209.3882 162.1914 256.5851 137.2069 281.5696
plot(paperautomatic, ylab = "Paperback Sales", xlab = "Time", main="Auto Alpha")
plot(paperfit2, ylab = "Paperback Sales", xlab = "Time", main="Alpha= 0.2")
Overall the Apha=0.2 model has lower SSE with initial=“optimal” than initial=“simple”. The model has slightly lower forecasts as well. They also have smaller 80% and 95% prediction bands, but still are larger than the automatically picked alpha model.
paperfit1 <- ses(paperbacks, alpha = 0.1, initial = "optimal", h = 4)
sum((paperbacks-fitted(paperfit1))^2) #34759.28
## [1] 34759.28
paperfit2 <- ses(paperbacks, alpha = 0.2, initial = "optimal", h = 4)
sum((paperbacks-fitted(paperfit2))^2) #34066.6
## [1] 34066.6
paperfit3 <- ses(paperbacks, alpha = 0.3, initial = "optimal", h = 4)
sum((paperbacks-fitted(paperfit3))^2) #35511.3
## [1] 35511.3
paperfit4 <- ses(paperbacks, alpha = 0.4, initial = "optimal", h = 4)
sum((paperbacks-fitted(paperfit4))^2)#37865.5
## [1] 37865.5
paperfit5 <- ses(paperbacks, alpha = 0.5, initial = "simple", h = 4)
sum((paperbacks-fitted(paperfit5))^2) #41383.7
## [1] 41383.7
paperfit6 <- ses(paperbacks, alpha = 0.6, initial = "optimal", h = 4)
sum((paperbacks-fitted(paperfit6))^2) #44450.39
## [1] 44450.39
paperfit7 <- ses(paperbacks, alpha = 0.7, initial = "optimal", h = 4)
sum((paperbacks-fitted(paperfit7))^2) #48631.17
## [1] 48631.17
paperfit8 <- ses(paperbacks, alpha = 0.8, initial = "optimal", h = 4)
sum((paperbacks-fitted(paperfit8))^2) #53403.02
## [1] 53403.02
paperfit9 <- ses(paperbacks, alpha = 0.9, initial = "optimal", h = 4)
sum((paperbacks-fitted(paperfit9))^2) #53403.02
## [1] 58758.99
alpha=c(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9)
sse=c(34759.28,34066.6,35511.3,37865.5,41383.7,44450.39,48631.17,53403.02,53403.02)
plot(alpha, sse)
#Alpha=0.2 has lowest SSE.
paperautomatic<-ses(paperbacks, h=4)
paperautomatic
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 31 207.1098 164.0013 250.2182 141.1811 273.0384
## 32 207.1098 163.3934 250.8261 140.2513 273.9682
## 33 207.1098 162.7937 251.4258 139.3342 274.8853
## 34 207.1098 162.2021 252.0174 138.4294 275.7901
paperfit2
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 31 209.3529 166.1672 252.5386 143.3061 275.3997
## 32 209.3529 165.3120 253.3938 141.9981 276.7077
## 33 209.3529 164.4730 254.2328 140.7150 277.9908
## 34 209.3529 163.6495 255.0563 139.4555 279.2503
plot(paperautomatic, ylab = "Paperback Sales", xlab = "Time", main="Auto Alpha")
plot(paperfit2, ylab = "Paperback Sales", xlab = "Time", main="Alpha= 0.2")
Overall the automatic alpha model has lower point forecast than both, simple alpha=0.4 and optimal alpha= 0.3.The simple alpha=0.4 model has the highest point forecasts with the largest 80% and 95% prediction bands.
hardcover <-books[,2]
hardfit1 <- ses(hardcover, alpha = 0.1, initial = "simple", h = 4)
sum((hardcover-fitted(hardfit1))^2) #45714.82
## [1] 45714.82
hardfit2 <- ses(hardcover, alpha = 0.2, initial = "simple", h = 4)
sum((hardcover-fitted(hardfit2))^2) #33148.16
## [1] 33148.16
hardfit3 <- ses(hardcover, alpha = 0.3, initial = "simple", h = 4)
sum((hardcover-fitted(hardfit3))^2) #30909.69
## [1] 30909.69
hardfit4 <- ses(hardcover, alpha = 0.4, initial = "simple", h = 4)
sum((hardcover-fitted(hardfit4))^2)#30895.27
## [1] 30895.27
hardfit5 <- ses(hardcover, alpha = 0.5, initial = "simple", h = 4)
sum((hardcover-fitted(hardfit5))^2) #31702.6
## [1] 31702.6
hardfit6 <- ses(hardcover, alpha = 0.6, initial = "simple", h = 4)
sum((hardcover-fitted(hardfit6))^2) #33059.93
## [1] 33059.93
hardfit7 <- ses(hardcover, alpha = 0.7, initial = "simple", h = 4)
sum((hardcover-fitted(hardfit7))^2) #34993.95
## [1] 34993.95
hardfit8 <- ses(hardcover, alpha = 0.8, initial = "simple", h = 4)
sum((hardcover-fitted(hardfit8))^2) #37641.79
## [1] 37641.79
hardfit9 <- ses(hardcover, alpha = 0.9, initial = "simple", h = 4)
sum((hardcover-fitted(hardfit9))^2) #41209.53
## [1] 41209.53
hardfit10 <- ses(hardcover, alpha = 1, initial = "simple", h = 4)
sum((hardcover-fitted(hardfit10))^2) #45982
## [1] 45982
alpha=c(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1)
sse=c(45714.82,33148.16,30909.69,30895.27,31702.6,33059.93,34993.95,37641.79,41209.53,45982)
plot(alpha, sse)
#The best fit to the data based on the lowest SSE is hardfit4. This model has an alpha = 0.4. There seems to be a u-shaped curve as SSE decreases and then increseases.
#c. Now let ses select the optimal value of α. Use this value to generate forecasts for the next four days. Compare your results with 2.
hardautomatic<-ses(hardcover, h=4)
hardautomatic
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 31 239.5602 198.6390 280.4815 176.9766 302.1439
## 32 239.5602 196.4905 282.6299 173.6908 305.4297
## 33 239.5602 194.4443 284.6762 170.5613 308.5591
## 34 239.5602 192.4869 286.6336 167.5677 311.5527
hardfit4
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 31 242.4404 201.3139 283.5668 179.5428 305.3379
## 32 242.4404 198.1458 286.7349 174.6977 310.1830
## 33 242.4404 195.1896 289.6911 170.1766 314.7041
## 34 242.4404 192.4078 292.4729 165.9222 318.9585
plot(hardautomatic, ylab = "Hardcover Sales", xlab = "Time", main="Auto Alpha")
plot(hardfit4, ylab = "Hardcover Sales", xlab = "Time", main="Alpha= 0.4")
#Looking at both models. The automatic has lower point forecasts for the next four days than the alpha= 0.4 model. The model where alpha=0.4 has lardger 95% and 80% prediction bands than the automatic one.
#d. Repeat but with initial="optimal". How much difference does an optimal initial level make?
hardcover <-books[,2]
hardfit1 <- ses(hardcover, alpha = 0.1, initial = "optimal", h = 4)
sum((hardcover-fitted(hardfit1))^2) #38390.67
## [1] 38390.67
hardfit2 <- ses(hardcover, alpha = 0.2, initial = "optimal", h = 4)
sum((hardcover-fitted(hardfit2))^2) #32038
## [1] 32038
hardfit3 <- ses(hardcover, alpha = 0.3, initial = "optimal", h = 4)
sum((hardcover-fitted(hardfit3))^2) #30634.58
## [1] 30634.58
hardfit4 <- ses(hardcover, alpha = 0.4, initial = "optimal", h = 4)
sum((hardcover-fitted(hardfit4))^2)#30816.56
## [1] 30816.56
hardfit5 <- ses(hardcover, alpha = 0.5, initial = "optimal", h = 4)
sum((hardcover-fitted(hardfit5))^2) #31682.57
## [1] 31682.57
hardfit6 <- ses(hardcover, alpha = 0.6, initial = "optimal", h = 4)
sum((hardcover-fitted(hardfit6))^2) #33056.83
## [1] 33056.83
hardfit7 <- ses(hardcover, alpha = 0.7, initial = "optimal", h = 4)
sum((hardcover-fitted(hardfit7))^2) #34993.93
## [1] 34993.93
hardfit8 <- ses(hardcover, alpha = 0.8, initial = "optimal", h = 4)
sum((hardcover-fitted(hardfit8))^2) #37641.38
## [1] 37641.38
hardfit9 <- ses(hardcover, alpha = 0.9, initial = "optimal", h = 4)
sum((hardcover-fitted(hardfit9))^2) #41209.05
## [1] 41209.05
alpha=c(0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9)
sse=c(38390.67,32038,30634.58,30816.56,31682.57,33056.83,34993.93,37641.38,41209.05)
plot(alpha, sse)
#Alpha=0.3 has lowest SSE.
hardautomatic<-ses(paperbacks, h=4)
hardautomatic
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 31 207.1098 164.0013 250.2182 141.1811 273.0384
## 32 207.1098 163.3934 250.8261 140.2513 273.9682
## 33 207.1098 162.7937 251.4258 139.3342 274.8853
## 34 207.1098 162.2021 252.0174 138.4294 275.7901
hardfit3
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 31 238.2486 197.2961 279.2012 175.6171 300.8802
## 32 238.2486 195.4929 281.0044 172.8593 303.6380
## 33 238.2486 193.7627 282.7346 170.2133 306.2840
## 34 238.2486 192.0974 284.3999 167.6664 308.8309
plot(paperautomatic, ylab = "Hardcover Sales", xlab = "Time", main="Auto Alpha")
plot(paperfit2, ylab = "Hardcover Sales", xlab = "Time", main="Alpha= 0.3")
Looking at the models in part a that had the lowest SSE, I compared the paperback alpha=0.2 optimal model for the paperback set and the hardcover alpha=0.3 optimal model for the hardcover set. Based on the SSE of the respected models the Holt linear model for the paperback set performed worse than the paperback SES model and the Holt linear model for the hardcover set performed better than the hardcover SES model.
paperholt <- holt(paperbacks, h=4)
sum((hardcover-fitted(paperholt))^2) #35825.06
## [1] 35825.06
paperfit2 <- ses(paperbacks, alpha = 0.2, initial = "optimal", h = 4)
sum((paperbacks-fitted(paperfit2))^2) #34066.6
## [1] 34066.6
hardholt <- holt(hardcover, h=4)
sum((hardcover-fitted(hardholt))^2) #22581.83
## [1] 22581.83
hardfit3 <- ses(hardcover, alpha = 0.3, initial = "optimal", h = 4)
sum((hardcover-fitted(hardfit3))^2) #30634.58
## [1] 30634.58
#paperback
summary(paperholt)
##
## Forecast method: Holt's method
##
## Model Information:
## Holt's method
##
## Call:
## holt(y = paperbacks, h = 4)
##
## Smoothing parameters:
## alpha = 1e-04
## beta = 1e-04
##
## Initial states:
## l = 174.6003
## b = 1.0606
##
## sigma: 31.6618
##
## AIC AICc BIC
## 319.3427 321.8427 326.3486
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -4.458116 31.66184 26.4503 -6.029264 15.84609 0.6670076
## ACF1
## Training set -0.1444935
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 31 207.0380 166.4617 247.6143 144.9820 269.0941
## 32 208.0853 167.5090 248.6615 146.0292 270.1413
## 33 209.1325 168.5562 249.7088 147.0764 271.1886
## 34 210.1797 169.6034 250.7560 148.1236 272.2358
summary(paperfit2)
##
## Forecast method: Simple exponential smoothing
##
## Model Information:
## Simple exponential smoothing
##
## Call:
## ses(y = paperbacks, h = 4, initial = "optimal", alpha = 0.2)
##
## Smoothing parameters:
## alpha = 0.2
##
## Initial states:
## l = 170.4443
##
## sigma: 33.698
##
## AIC AICc BIC
## 317.0822 317.5266 319.8846
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 6.48476 33.69797 28.23692 0.1229441 15.82888 0.7120616
## ACF1
## Training set -0.2353819
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 31 209.3529 166.1672 252.5386 143.3061 275.3997
## 32 209.3529 165.3120 253.3938 141.9981 276.7077
## 33 209.3529 164.4730 254.2328 140.7150 277.9908
## 34 209.3529 163.6495 255.0563 139.4555 279.2503
#For the paperback set, the holt model has higher AIC and BIC calculations. It also has lower ME, RMSE, MAE and MASE statistics based on the training set. Based on these calculations and the SSE, I would choose the SES model for forecasting paperback book sales.
#hardcovers
summary(hardholt)
##
## Forecast method: Holt's method
##
## Model Information:
## Holt's method
##
## Call:
## holt(y = hardcover, h = 4)
##
## Smoothing parameters:
## alpha = 1e-04
## beta = 1e-04
##
## Initial states:
## l = 154.6543
## b = 2.9961
##
## sigma: 27.4359
##
## AIC AICc BIC
## 310.747 313.247 317.753
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -2.169026 27.43588 23.34589 -3.402396 12.47686 0.6965336
## ACF1
## Training set -0.02685637
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 31 247.3504 212.1899 282.5109 193.5770 301.1237
## 32 250.3400 215.1795 285.5005 196.5667 304.1133
## 33 253.3296 218.1691 288.4901 199.5563 307.1030
## 34 256.3192 221.1587 291.4797 202.5459 310.0926
summary(hardfit3)
##
## Forecast method: Simple exponential smoothing
##
## Model Information:
## Simple exponential smoothing
##
## Call:
## ses(y = hardcover, h = 4, initial = "optimal", alpha = 0.3)
##
## Smoothing parameters:
## alpha = 0.3
##
## Initial states:
## l = 150.8405
##
## sigma: 31.9555
##
## AIC AICc BIC
## 313.8965 314.3410 316.6989
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 9.712011 31.95548 27.00503 2.873589 13.50611 0.8057058
## ACF1
## Training set -0.1222506
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 31 238.2486 197.2961 279.2012 175.6171 300.8802
## 32 238.2486 195.4929 281.0044 172.8593 303.6380
## 33 238.2486 193.7627 282.7346 170.2133 306.2840
## 34 238.2486 192.0974 284.3999 167.6664 308.8309
#For the hardcover set, the holt model has lower AIC and slightly higher BIC statistics. It also has lower ME, RMSE, MAE, MAPE, and MSE. The SSE is also smaller. I would use the holt method for forecasting future hardcover book sales.
#paperback books
predict(paperholt, paperbacks, interval="predict")
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 31 207.0380 166.4617 247.6143 144.9820 269.0941
## 32 208.0853 167.5090 248.6615 146.0292 270.1413
## 33 209.1325 168.5562 249.7088 147.0764 271.1886
## 34 210.1797 169.6034 250.7560 148.1236 272.2358
predict(paperfit2, paperbacks, interval="predict")
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 31 209.3529 166.1672 252.5386 143.3061 275.3997
## 32 209.3529 165.3120 253.3938 141.9981 276.7077
## 33 209.3529 164.4730 254.2328 140.7150 277.9908
## 34 209.3529 163.6495 255.0563 139.4555 279.2503
#The SES model has larger 95% prediction interals than the Holt linear model.
#hardcover books
predict(hardholt, hardcover, interval="predict")
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 31 247.3504 212.1899 282.5109 193.5770 301.1237
## 32 250.3400 215.1795 285.5005 196.5667 304.1133
## 33 253.3296 218.1691 288.4901 199.5563 307.1030
## 34 256.3192 221.1587 291.4797 202.5459 310.0926
predict(hardfit3, hardcover, interval="predict")
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 31 238.2486 197.2961 279.2012 175.6171 300.8802
## 32 238.2486 195.4929 281.0044 172.8593 303.6380
## 33 238.2486 193.7627 282.7346 170.2133 306.2840
## 34 238.2486 192.0974 284.3999 167.6664 308.8309
# The SES model has larger 95% prediction intervals than the Holt model.
There seems to be an overall upward trend. Though production decreased from the late 1970s to the early 1980s. Since 2000 it looks to be very constant.
plot(ukcars, ylab="Vehicle Production", main= "Quarterly UK Passenger Vehicle Production")
ukcars.stl <- stl(ukcars, s.window = "periodic")
plot(ukcars.stl)
f1<-seasadj(ukcars.stl)
Unable to reseasonalize data. Summary of fit includes alpha = 0.5737, beta = 1e-04, and phi = 0.9106. AIC is equal to 1274.92. The RMSE = 25.13965.
fit1 <- holt(f1, seasonal="aditive", damped = TRUE, h=8)
summary(fit1)
##
## Forecast method: Damped Holt's method
##
## Model Information:
## Damped Holt's method
##
## Call:
## holt(y = f1, h = 8, damped = TRUE, seasonal = "aditive")
##
## Smoothing parameters:
## alpha = 0.5737
## beta = 1e-04
## phi = 0.9106
##
## Initial states:
## l = 342.6908
## b = -9.9556
##
## sigma: 25.1396
##
## AIC AICc BIC
## 1274.920 1275.712 1291.284
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 2.542419 25.13965 20.50991 0.319648 6.568478 0.6684081
## ACF1
## Training set 0.03469485
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 2005 Q2 406.4535 374.2357 438.6712 357.1807 455.7263
## 2005 Q3 406.4530 369.3085 443.5974 349.6454 463.2605
## 2005 Q4 406.4525 364.9611 447.9439 342.9969 469.9081
## 2006 Q1 406.4521 361.0268 451.8773 336.9802 475.9240
## 2006 Q2 406.4517 357.4063 455.4971 331.4432 481.4601
## 2006 Q3 406.4513 354.0345 458.8682 326.2867 486.6159
## 2006 Q4 406.4510 350.8663 462.0358 321.4415 491.4605
## 2007 Q1 406.4507 347.8686 465.0328 316.8571 496.0444
Summary of this fit includes alpha = 0.5962 and beta = 1e-04. AIC is equal to 1274.482. The training set RMSE = 25.31401.
fit2 <- holt(f1, h=8)
summary(fit2)
##
## Forecast method: Holt's method
##
## Model Information:
## Holt's method
##
## Call:
## holt(y = f1, h = 8)
##
## Smoothing parameters:
## alpha = 0.5962
## beta = 1e-04
##
## Initial states:
## l = 340.2744
## b = 1.0018
##
## sigma: 25.314
##
## AIC AICc BIC
## 1274.482 1275.043 1288.119
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.6725216 25.31401 20.10156 -0.7566151 6.492304 0.6551
## ACF1
## Training set 0.03864727
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 2005 Q2 407.9770 375.5358 440.4182 358.3625 457.5916
## 2005 Q3 408.9712 371.2006 446.7419 351.2060 466.7364
## 2005 Q4 409.9654 367.5280 452.4029 345.0629 474.8679
## 2006 Q1 410.9596 364.3187 457.6006 339.6284 482.2908
## 2006 Q2 411.9538 361.4568 462.4508 334.7253 489.1823
## 2006 Q3 412.9480 358.8681 467.0279 330.2399 495.6561
## 2006 Q4 413.9422 356.5013 471.3831 326.0940 501.7905
## 2007 Q1 414.9364 354.3196 475.5533 322.2310 507.6419
The ETS of this data chose ETS(A,N,N). The model has an AIC of 1269.711 and a RMSE of 25.22788 on the training data.
fit3<-ets(f1, model = "ZZZ")
summary(fit3)
## ETS(A,N,N)
##
## Call:
## ets(y = f1, model = "ZZZ")
##
## Smoothing parameters:
## alpha = 0.6167
##
## Initial states:
## l = 318.3843
##
## sigma: 25.2279
##
## AIC AICc BIC
## 1269.711 1269.932 1277.894
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 1.259939 25.22788 20.21963 -0.1384294 6.508415 0.6589481
## ACF1
## Training set 0.02657539
f.Compare the RMSE of the fitted model with the RMSE of the model you obtained using an STL decomposition with Holt’s method. Which gives the better in-sample fits?
Based on RMSE, all models are very close and are around 25. The model with the lowest RMSE is the Damped Holt’s method.
Fit1, the Damped Holt’s method, looks to be the best forecast. The ETS(A,N,N) and the Holt’s method models have flat forecasts.
fcast3<-forecast(fit3, h=8)
plot(fit1)
plot(fit2)
plot(fcast3)
The time series of monthly Australian Overseas Visitors has a positive seasonal trend. Although it increases throughout, during the off-season in 2003, there looks to be a significant drop in the amount of visitors.
plot(visitors, main= "Monthly Australian Overseas Vistors", ylab="Number of Visitors")
hwm <- hw(visitors, seasonal = "multiplicative", h=24)
hwm
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## May 2005 369.3175 343.3002 395.3348 329.5275 409.1076
## Jun 2005 395.5080 365.2767 425.7393 349.2733 441.7427
## Jul 2005 485.9444 446.0391 525.8497 424.9145 546.9743
## Aug 2005 436.7465 398.5070 474.9859 378.2643 495.2287
## Sep 2005 422.9069 383.6657 462.1481 362.8927 482.9211
## Oct 2005 478.2627 431.4628 525.0627 406.6885 549.8370
## Nov 2005 502.5833 450.9301 554.2365 423.5865 581.5800
## Dec 2005 615.6455 549.4181 681.8728 514.3595 716.9314
## Jan 2006 461.1564 409.3845 512.9284 381.9781 540.3348
## Feb 2006 511.8202 452.0068 571.6335 420.3436 603.2968
## Mar 2006 498.9206 438.3614 559.4798 406.3033 591.5378
## Apr 2006 443.9647 388.1032 499.8261 358.5320 529.3974
## May 2006 383.5190 333.5830 433.4550 307.1484 459.8896
## Jun 2006 410.6680 355.4225 465.9134 326.1774 495.1585
## Jul 2006 504.5116 434.4881 574.5350 397.4199 611.6032
## Aug 2006 453.3808 388.5399 518.2217 354.2152 552.5464
## Sep 2006 438.9632 374.3497 503.5767 340.1454 537.7811
## Oct 2006 496.3635 421.2456 571.4814 381.4806 611.2464
## Nov 2006 521.5446 440.4747 602.6146 397.5588 645.5305
## Dec 2006 638.7996 536.9011 740.6982 482.9592 794.6400
## Jan 2007 478.4461 400.1915 556.7008 358.7660 598.1263
## Feb 2007 530.9496 441.9744 619.9248 394.8738 667.0255
## Mar 2007 517.5100 428.7206 606.2994 381.7183 653.3017
## Apr 2007 460.4553 379.6266 541.2840 336.8384 584.0721
Why is multiplicative seasonality necessary here? Mutiplicative seasonality is used here because in the original time series the seasonal variations are changing proportional to the level of the time series it self. In this case they are become larger.
Experiment with making the trend exponential and/or damped.
hwmdamped <- hw(visitors, seasonal = "multiplicative", h=24, damped= TRUE)
hwmexpo <- hw(visitors, seasonal = "multiplicative", h=24, exponential = TRUE)
plot(hwm)
plot(hwmdamped)
plot(hwmexpo)
The Damped Holt-Winters’ multiplicative method is the best forecast, as it has the lowest RMSE of 14.44801.
summary(hwm)#RMSE=14.8295
##
## Forecast method: Holt-Winters' multiplicative method
##
## Model Information:
## Holt-Winters' multiplicative method
##
## Call:
## hw(y = visitors, h = 24, seasonal = "multiplicative")
##
## Smoothing parameters:
## alpha = 0.4154
## beta = 0.0063
## gamma = 1e-04
##
## Initial states:
## l = 90.826
## b = 3.0992
## s=0.932 1.0506 1.0811 0.9771 1.3085 1.0715
## 1.0229 0.9074 0.9401 1.0494 0.8568 0.8026
##
## sigma: 0.055
##
## AIC AICc BIC
## 2633.589 2636.346 2692.760
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.9498442 14.8295 10.96716 -0.8150922 4.271167 0.4050069
## ACF1
## Training set 0.2223887
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## May 2005 369.3175 343.3002 395.3348 329.5275 409.1076
## Jun 2005 395.5080 365.2767 425.7393 349.2733 441.7427
## Jul 2005 485.9444 446.0391 525.8497 424.9145 546.9743
## Aug 2005 436.7465 398.5070 474.9859 378.2643 495.2287
## Sep 2005 422.9069 383.6657 462.1481 362.8927 482.9211
## Oct 2005 478.2627 431.4628 525.0627 406.6885 549.8370
## Nov 2005 502.5833 450.9301 554.2365 423.5865 581.5800
## Dec 2005 615.6455 549.4181 681.8728 514.3595 716.9314
## Jan 2006 461.1564 409.3845 512.9284 381.9781 540.3348
## Feb 2006 511.8202 452.0068 571.6335 420.3436 603.2968
## Mar 2006 498.9206 438.3614 559.4798 406.3033 591.5378
## Apr 2006 443.9647 388.1032 499.8261 358.5320 529.3974
## May 2006 383.5190 333.5830 433.4550 307.1484 459.8896
## Jun 2006 410.6680 355.4225 465.9134 326.1774 495.1585
## Jul 2006 504.5116 434.4881 574.5350 397.4199 611.6032
## Aug 2006 453.3808 388.5399 518.2217 354.2152 552.5464
## Sep 2006 438.9632 374.3497 503.5767 340.1454 537.7811
## Oct 2006 496.3635 421.2456 571.4814 381.4806 611.2464
## Nov 2006 521.5446 440.4747 602.6146 397.5588 645.5305
## Dec 2006 638.7996 536.9011 740.6982 482.9592 794.6400
## Jan 2007 478.4461 400.1915 556.7008 358.7660 598.1263
## Feb 2007 530.9496 441.9744 619.9248 394.8738 667.0255
## Mar 2007 517.5100 428.7206 606.2994 381.7183 653.3017
## Apr 2007 460.4553 379.6266 541.2840 336.8384 584.0721
summary(hwmdamped)#RMSE=14.44801
##
## Forecast method: Damped Holt-Winters' multiplicative method
##
## Model Information:
## Damped Holt-Winters' multiplicative method
##
## Call:
## hw(y = visitors, h = 24, seasonal = "multiplicative", damped = TRUE)
##
## Smoothing parameters:
## alpha = 0.6306
## beta = 0.0071
## gamma = 1e-04
## phi = 0.9797
##
## Initial states:
## l = 85.7688
## b = 3.4912
## s=0.9328 1.0558 1.0829 0.9805 1.3187 1.0838
## 1.029 0.9097 0.9317 1.0447 0.8442 0.7861
##
## sigma: 0.0542
##
## AIC AICc BIC
## 2624.818 2627.913 2687.469
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.9123468 14.44801 10.64909 0.07071844 4.064322 0.3932608
## ACF1
## Training set 0.01740636
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## May 2005 355.5226 330.8178 380.2275 317.7398 393.3054
## Jun 2005 382.1616 350.6620 413.6613 333.9871 430.3362
## Jul 2005 473.4585 429.0426 517.8745 405.5302 541.3868
## Aug 2005 422.7207 378.6914 466.7499 355.3837 490.0576
## Sep 2005 413.1132 366.1242 460.1022 341.2498 484.9767
## Oct 2005 467.7761 410.3573 525.1949 379.9617 555.5906
## Nov 2005 493.1745 428.4256 557.9233 394.1496 592.1993
## Dec 2005 600.6053 516.8496 684.3611 472.5120 728.6986
## Jan 2006 446.9945 381.1536 512.8354 346.2995 547.6895
## Feb 2006 494.1338 417.6065 570.6611 377.0955 611.1722
## Mar 2006 482.2060 403.9860 560.4260 362.5788 601.8331
## Apr 2006 426.4016 354.1904 498.6127 315.9642 536.8390
## May 2006 359.6485 296.2392 423.0578 262.6723 456.6247
## Jun 2006 386.5021 315.7314 457.2729 278.2676 494.7366
## Jul 2006 478.7214 387.8801 569.5626 339.7917 617.6511
## Aug 2006 427.3196 343.4452 511.1939 299.0448 555.5943
## Sep 2006 417.5121 332.8892 502.1350 288.0925 546.9316
## Oct 2006 472.6513 373.8771 571.4254 321.5892 623.7133
## Nov 2006 498.2053 391.0042 605.4063 334.2554 662.1551
## Dec 2006 606.6022 472.3745 740.8299 401.3187 811.8857
## Jan 2007 451.3631 348.7700 553.9561 294.4605 608.2656
## Feb 2007 498.8610 382.5092 615.2128 320.9163 676.8057
## Mar 2007 486.7215 370.3466 603.0964 308.7414 664.7016
## Apr 2007 430.3102 324.9295 535.6910 269.1443 591.4762
summary(hwmexpo)#RMSE=14.49416
##
## Forecast method: Holt-Winters' multiplicative method with exponential trend
##
## Model Information:
## Holt-Winters' multiplicative method with exponential trend
##
## Call:
## hw(y = visitors, h = 24, seasonal = "multiplicative", exponential = TRUE)
##
## Smoothing parameters:
## alpha = 0.5722
## beta = 0.0013
## gamma = 1e-04
##
## Initial states:
## l = 91.0884
## b = 1.0025
## s=0.9278 1.0475 1.0821 0.9815 1.3152 1.0813
## 1.0294 0.9145 0.9348 1.0438 0.8497 0.7923
##
## sigma: 0.0556
##
## AIC AICc BIC
## 2633.767 2636.524 2692.938
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.6442177 14.49416 10.62951 0.2554469 4.0328 0.3925378
## ACF1
## Training set 0.07595792
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## May 2005 363.2966 336.9114 388.8585 324.1617 401.7521
## Jun 2005 391.2661 359.4170 423.3742 342.8854 441.3499
## Jul 2005 482.6967 437.6875 527.8038 414.8307 552.3173
## Aug 2005 434.1673 389.9053 477.4817 369.3372 501.4302
## Sep 2005 426.5256 380.6007 472.6896 359.1054 498.7171
## Oct 2005 482.2232 427.0115 538.9365 402.2969 574.0888
## Nov 2005 508.6900 447.2381 570.8211 418.4527 611.3099
## Dec 2005 621.3098 541.8445 704.7370 505.6883 749.9583
## Jan 2006 465.6513 403.1039 528.7879 372.8457 565.1243
## Feb 2006 515.5859 443.1405 588.1965 411.4766 634.1066
## Mar 2006 501.2247 429.7002 576.9526 392.1442 618.7863
## Apr 2006 445.8398 378.7162 518.2235 346.6915 556.3955
## May 2006 382.3303 321.8179 444.2748 295.3148 480.0461
## Jun 2006 411.7651 344.7608 481.7775 314.9837 520.8613
## Jul 2006 507.9859 422.3583 596.8062 387.2713 648.5360
## Aug 2006 456.9141 377.5975 538.3777 344.6734 585.8647
## Sep 2006 448.8719 369.3656 531.8847 333.0687 579.5673
## Oct 2006 507.4876 417.3049 603.8785 375.5773 662.8158
## Nov 2006 535.3411 436.5533 640.7133 392.8235 700.2755
## Dec 2006 653.8613 528.3921 784.4774 474.3540 860.0598
## Jan 2007 490.0476 394.4596 590.0619 353.2683 652.8425
## Feb 2007 542.5983 436.0491 655.0152 390.4664 725.9127
## Mar 2007 527.4847 419.4515 637.5087 375.5990 711.8138
## Apr 2007 469.1980 371.8091 570.6899 332.9506 640.9010
#i. a multiplicative Holt-Winters' method
fiti <- hw(visitors, seasonal = "multiplicative")
#ii. an ETS model
fitii <- ets(visitors, model = "ZZZ")
#iii. an additive ETS model applied to a Box-Cox transformed series
lambda <-BoxCox.lambda(visitors)
boxcox <- BoxCox(visitors, lambda)
fitiii <- ets(boxcox, model = "ZZZ", additive.only=TRUE)
#iv. a seasonal naive method applied to the Box-Cox transformed series
fitiv <- snaive(boxcox)
#v. an STL decomposition applied to the Box-Cox transformed data followed by an ETS model applied to the seasonally adjusted (transformed) data.
stl1<-stl(boxcox,s.window="periodic")
sad <-stl1$time.series[,1]
fit<-boxcox-sad
fitv<-ets(fit, model = "ZZZ")
Based on residuals and forecasts, I believe fiti and fitii are the best. This includes the forecasts from the Holt-Winter’s multiplicative method and the ETS(M,A,M).
#histograms of residuals
hist(fiti$residuals)
hist(fitii$residuals)
hist(fitiii$residuals)
hist(fitiv$residuals)
hist(fitv$residuals)
#Based on residuals, most normal looking histograms ar fiti, fitii, and fitv.
#forecasts
plot(forecast(fiti, h=24))
plot(forecast(fitii, h=24))
plot(forecast(fitv, h=24))
All three graphs indicate that there is stationarity among all datasets. All three ACF plots have white noise.
Each figure has a different number of observations. This includes the first with 36, the second with 360, and the third with 1,000. The time series shows no autocorrelation since there is no spikes over their respected 95% level.
Through The ACF and PACF we can conclude that the Daily Closing IBM Stock closing prices time series is not stationary. In the ACF plot, the data decreases toward zero at a very slow rate. The values are very large and positive as well. The PACF indicates auto correlation as well, as it too has a lag that is outside the 95%. Having a differencing measurement in the model such as a lag effect, will help the model become stationary for future forecasts to be accurate.
plot(ibmclose)
Acf(ibmclose)
Pacf(ibmclose)
#a. usnetelec
lambda1 <-BoxCox.lambda(usnetelec)
lambda1 #0.5167714
## [1] 0.5167714
plot(BoxCox(usnetelec,lambda1))
#b. usgdp
lambda2 <-BoxCox.lambda(usgdp)
lambda2 #0.366352
## [1] 0.366352
plot(BoxCox(usgdp,lambda2))
#c. mcopper
lambda3 <-BoxCox.lambda(mcopper)
lambda3 # 0.1919047
## [1] 0.1919047
plot(BoxCox(mcopper,lambda3))
#d. enplanements
lambda4 <-BoxCox.lambda(enplanements)
lambda4 # -0.2269461
## [1] -0.2269461
plot(BoxCox(enplanements,lambda4))
#e. visitors
lambda5 <-BoxCox.lambda(visitors)
lambda5 # 0.2775249
## [1] 0.2775249
plot(BoxCox(visitors,lambda5))
In this problem I used bshift(). Here the number of lags equal to 1. Due to the NAs in the data, I am unable to view further ACF or PACF plots.
plot(enplanements)
fit<-BoxCox(enplanements, lambda5)
acf(fit)
pacf(fit)
fit<-bshift(fit, k = 1)
plot.ts(fit)
Got stuck on this excercise.
#a. Use the following R code to generate data from an AR(1) model with ϕ1=0.6 and σ2=1. The process starts with y0=0.
y <- ts(numeric(100))
e <- rnorm(100)
for(i in 2:100)
y[i] <- 0.6*y[i-1] + e[i]
#b. Produce a time plot for the series. How does the plot change as you change ϕ1?
plot(y)
# The time series seems to be very randomized.
#c. Write your own code to generate data from an MA(1) model with θ1=0.6 and σ2=1
ma(y, order = 1)
## Time Series:
## Start = 1
## End = 100
## Frequency = 1
## [1] 0.00000000 0.84750721 1.14162467 1.09572155 1.97633848
## [6] 0.37429518 0.49894585 0.89745992 0.95457514 0.68198289
## [11] 0.36596227 0.37296666 -2.62966258 -0.35809898 1.73321204
## [16] 1.74970309 1.92122697 1.16375028 1.06460831 -0.29291555
## [21] -0.92190950 -0.10048729 0.48305042 -0.11306532 -0.16328902
## [26] -0.78675958 -0.28498660 1.01125276 1.92675469 -0.03164863
## [31] -0.63651764 0.55107755 -0.06122794 0.54642372 1.44793009
## [36] 0.80373547 2.70446386 0.87567351 0.79146171 -0.67041818
## [41] 0.84638936 3.28321850 0.30335531 1.26525791 -0.21009147
## [46] 2.12127008 -0.06841223 -1.41617376 -2.05516186 -2.80859474
## [51] -3.11284852 -2.19435489 -2.25513522 -1.87700050 -2.26158264
## [56] -1.47021347 -2.13587303 -2.07203918 -1.23332746 -0.63713299
## [61] 0.49707613 0.34383215 0.69856629 0.75511748 1.81409926
## [66] 0.83589176 1.22704317 -0.48407938 0.33647956 0.56996011
## [71] 2.31254523 2.11428659 1.35401732 -0.72050137 -0.39417484
## [76] -0.13265723 -0.31221824 -1.91795966 -1.61737930 -0.35334099
## [81] -1.11369686 -1.49054982 -0.55785434 -1.53538784 -0.58996182
## [86] 0.51440991 1.17490214 -0.07750039 -0.35049716 -3.49395909
## [91] -1.83893041 -2.14328958 0.30537503 -0.65076699 -0.94707846
## [96] -0.59669082 -0.54058410 -0.12737569 0.28660089 0.90617584
#d. Produce a time plot for the series. How does the plot change as you change?
plot.ts(e)
#This time series appers to be random as well.
#e. Generate data from an ARMA(1,1) model with ϕ1= 0.6 and θ1=0.6 and σ2=1
#f. Generate data from an AR(2) model with ϕ1=−0.8 and ϕ2=0.3 and σ2=1 (Note that these parameters will give a non-stationary series.)
#g. Graph the latter two series and compare them.
The ARIMA(1,0,1) model appears to be stationary. It has a smaller AIC than ARIMA (1,0,1).
plot(wmurders)
adf.test(wmurders)
##
## Augmented Dickey-Fuller Test
##
## data: wmurders
## Dickey-Fuller = -0.29243, Lag order = 3, p-value = 0.9878
## alternative hypothesis: stationary
fita=ts(wmurders, frequency = 12)
dec = stl(fita, s.window="periodic")
deseasonal <- seasadj(dec)
plot(dec)
acf(fita)
pacf(fita)
fitb = diff(deseasonal, differences = 1)
plot(fitb)
Acf(fitb)
Pacf(fitb)
fit2 = arima(fitb, order=c(1,1,1))
fit3 = arima(fitb, order = c(1,0,1))
fit2
##
## Call:
## arima(x = fitb, order = c(1, 1, 1))
##
## Coefficients:
## ar1 ma1
## -0.2379 -0.8036
## s.e. 0.1616 0.1247
##
## sigma^2 estimated as 0.03951: log likelihood = 9.7, aic = -13.41
fit3
##
## Call:
## arima(x = fitb, order = c(1, 0, 1))
##
## Coefficients:
## ar1 ma1 intercept
## -0.7715 0.6513 0.0032
## s.e. 0.2189 0.2436 0.0252
##
## sigma^2 estimated as 0.0394: log likelihood = 10.66, aic = -13.33
tsdisplay(residuals(fit2), lag.max=15, main='Seasonal Model Residuals')
In this case the model I constructed does not have a constant. This is because the model has d greater or equal to 1.
lag1<-bshift(wmurders, k = 1)
fitlag <- arima(lag1,order=c(1,1,1))
fitlag
##
## Call:
## arima(x = lag1, order = c(1, 1, 1))
##
## Coefficients:
## ar1 ma1
## -0.8252 0.6914
## s.e. 0.2002 0.2378
##
## sigma^2 estimated as 0.04398: log likelihood = 7.53, aic = -9.06
The fitlag model with the backshifter seems to be satisfactory when examining the residuals, as they seem to be random around zero, with white noise.
plot(fitlag$residuals)
fcast<-forecast(fitlag, h= 3)
fcast
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 56 2.589533 2.320769 2.858298 2.178494 3.000573
## 57 2.649522 2.293950 3.005093 2.105722 3.193322
## 58 2.600018 2.158061 3.041976 1.924102 3.275934
plot(fcast)
I would use the auto.arima model as it goes on to further minimize the AIC and AICc statistics of the model.
fit.auto <- auto.arima(deseasonal)
fit.auto
## Series: deseasonal
## ARIMA(0,2,1)(1,0,0)[12]
##
## Coefficients:
## ma1 sar1
## -0.8540 -0.3922
## s.e. 0.0722 0.1402
##
## sigma^2 estimated as 0.03637: log likelihood=11.93
## AIC=-17.85 AICc=-17.36 BIC=-11.94
There seems to be an overall increasing trend with the data, with signs of seasonality in quarterly visitor nights spent by international tourists that travel to Australia.
plot(austourists)
The data is not stationary. There are signs of correlation on every 4 periods. There may be a high correlation where more people are visiting Australia every year in the 4th quarter.
acf(austourists)
The PACF graph shows unstationary time series as well. Lagged variables may be needed for a better model.
pacf(austourists)
dec = stl(austourists, s.window="periodic")
deseasonal <- seasadj(dec)
fitb = diff(deseasonal, differences = 4)
Acf(fitb)
Pacf(fitb)
fit2 = arima(deseasonal, order=c(1,1,4))
fit2
##
## Call:
## arima(x = deseasonal, order = c(1, 1, 4))
##
## Coefficients:
## ar1 ma1 ma2 ma3 ma4
## -0.9403 0.4317 -0.5207 0.0133 0.3151
## s.e. 0.0974 0.1801 0.1628 0.1533 0.1416
##
## sigma^2 estimated as 6.764: log likelihood = -112.32, aic = 236.65
tsdisplay(residuals(fit2), lag.max=15, main='Seasonal Model Residuals')
Auto.arima doesn’t give me the same model that I constructed. In this circumstance, I believe that the auto.arima model is a better model due to the fact that it has a lower AIC. Both models have similar residuals and are stationary.
fit.auto <- auto.arima(deseasonal)
fit.auto
## Series: deseasonal
## ARIMA(0,1,1)(1,0,0)[4] with drift
##
## Coefficients:
## ma1 sar1 drift
## -0.8006 0.3483 0.4752
## s.e. 0.1704 0.1501 0.1157
##
## sigma^2 estimated as 6.392: log likelihood=-109.35
## AIC=226.7 AICc=227.65 BIC=234.1
tsdisplay(residuals(fit.auto), lag.max=15, main='Seasonal Model Residuals')
With the backshift operator auto.arima chooses an ARIMA(2,1,2) with drift model. The original auto.arima without the backshift operator prefoms better on the data. It has the lower AIC of 226.7 compared to this model of 304.38.
fit<-bshift(austourists, k = 1)
plot.ts(fit)
auto.arima(fit)
## Series: fit
## ARIMA(2,1,2) with drift
##
## Coefficients:
## ar1 ar2 ma1 ma2 drift
## -0.0739 -0.5926 -1.1530 0.6086 0.5045
## s.e. 0.1430 0.1360 0.1306 0.1494 0.2298
##
## sigma^2 estimated as 34.56: log likelihood=-146.19
## AIC=304.38 AICc=306.48 BIC=315.48
Overall we can see a positive trend in net generation of electricity. The US has been using more and more electricity as time progresses since 1985.
plot(usmelec)
ma1<-ma(usmelec, order=12)
plot(ma1)
The data has been seasonally adjusted.
dec = stl(usmelec, s.window="periodic")
plot(dec)
deseasonal <- seasadj(dec)
fit1 = diff(deseasonal, differences = 1)
plot(fit1)
The data does need to be stationary according to the ACF and PACF plot. I can not figure out the correct amount of differences or lags for the data to be fit and be stationary.
Acf(fit1)
Pacf(fit1)
According to the AIC values the ARIMA(3,0,3) with non-zero mean has the lowest AIC of 3263.35.
f1<-auto.arima(fit1,seasonal=FALSE)
f1
## Series: fit1
## ARIMA(3,0,3) with non-zero mean
##
## Coefficients:
## ar1 ar2 ar3 ma1 ma2 ma3 mean
## 0.7508 0.3173 -0.5258 -0.9984 -0.4656 0.6063 0.4290
## s.e. 0.2484 0.3592 0.1901 0.2402 0.4062 0.1974 0.1276
##
## sigma^2 estimated as 76.9: log likelihood=-1623.68
## AIC=3263.35 AICc=3263.68 BIC=3296.28
f2<-arima(fit1, c(1,0,1))
f2
##
## Call:
## arima(x = fit1, order = c(1, 0, 1))
##
## Coefficients:
## ar1 ma1 intercept
## 0.6132 -0.9471 0.4282
## s.e. 0.0438 0.0159 0.0615
##
## sigma^2 estimated as 84.55: log likelihood = -1648.34, aic = 3304.67
f3<-arima(fit1,c(2,0,2))
f3
##
## Call:
## arima(x = fit1, order = c(2, 0, 2))
##
## Coefficients:
## ar1 ar2 ma1 ma2 intercept
## 1.4736 -0.7421 -1.7352 0.8182 0.4287
## s.e. 0.0619 0.0558 0.0536 0.0662 0.1268
##
## sigma^2 estimated as 75.78: log likelihood = -1623.89, aic = 3259.78
I am unable to properly make this data stationary. Due to this factor, I believe this is the best fit model. The residuals resemble white noise in the histogram and residual plot. The ACF plot explains forecasts will not be accurate.
summary(f1)
## Series: fit1
## ARIMA(3,0,3) with non-zero mean
##
## Coefficients:
## ar1 ar2 ar3 ma1 ma2 ma3 mean
## 0.7508 0.3173 -0.5258 -0.9984 -0.4656 0.6063 0.4290
## s.e. 0.2484 0.3592 0.1901 0.2402 0.4062 0.1974 0.1276
##
## sigma^2 estimated as 76.9: log likelihood=-1623.68
## AIC=3263.35 AICc=3263.68 BIC=3296.28
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.011315 8.701121 6.739532 64.02277 191.2577 0.8415866
## ACF1
## Training set -0.01167873
checkresiduals(f1)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(3,0,3) with non-zero mean
## Q* = 190.75, df = 17, p-value < 2.2e-16
##
## Model df: 7. Total lags used: 24
library(readxl)
check <- read_excel("~/Desktop/workbook1.xlsx")
check.ts<-ts(check$Electricity, start = c(2011, 10), end = c(2013, 6), frequency = 12 )
fcast1<-forecast(f1, h=21)
plot(fcast1)
accuracy(fcast1, check.ts)
## ME RMSE MAE MPE MAPE
## Training set 0.011315 8.701121 6.739532 64.02277 191.25774
## Test set 331.236290 333.039903 331.236290 99.75373 99.75373
## MASE ACF1 Theil's U
## Training set 0.8415866 -0.01167873 NA
## Test set 41.3625172 0.36192224 11.83058
I don’t believe this forecasting method is accurate. The ACF plot explained the need for more lagged variables. Unfortunately I was unable to correctly create stationary data in this example.
plot(mcopper)
lambda <-BoxCox.lambda(mcopper)
lambda #0.1919047
## [1] 0.1919047
new<-BoxCox(mcopper,lambda)
ARIMA1 <- auto.arima(new)
ARIMA1
## Series: new
## ARIMA(0,1,1)
##
## Coefficients:
## ma1
## 0.3720
## s.e. 0.0388
##
## sigma^2 estimated as 0.04997: log likelihood=45.05
## AIC=-86.1 AICc=-86.08 BIC=-77.43
ARIMA2 = arima(new, order=c(0,1,2))
ARIMA2
##
## Call:
## arima(x = new, order = c(0, 1, 2))
##
## Coefficients:
## ma1 ma2
## 0.3704 -0.0037
## s.e. 0.0424 0.0406
##
## sigma^2 estimated as 0.04988: log likelihood = 45.05, aic = -84.11
ARIMA3 = arima(new, order=c(0,1,3))
ARIMA3
##
## Call:
## arima(x = new, order = c(0, 1, 3))
##
## Coefficients:
## ma1 ma2 ma3
## 0.3713 -0.0091 -0.0131
## s.e. 0.0423 0.0438 0.0406
##
## sigma^2 estimated as 0.04987: log likelihood = 45.1, aic = -82.21
I believe ARIMA1 model is the best when compared to the other 2 models constructed. This is becasue of the minimized AIC. The histogram of the residuals seems to be normally distributed.
hist(ARIMA1$residuals)
The forecast presented here are naive. Although the 80% and 95% bands fo look reasonable, the model’s point forecasts do not.
fcast<-forecast(ARIMA3)
plot(fcast)
In this case, the ETS model seems to me to be more realistic, visually, due to the forecast. Although, the ARIMA Box-Cox transformation model has the minimized AIC, AICc, and BIC statistics when compared to that of the ETS. Therefore the model is better at forecasting.
ETS1<-ets(mcopper, model = "ZZZ")
ETS1
## ETS(M,Ad,N)
##
## Call:
## ets(y = mcopper, model = "ZZZ")
##
## Smoothing parameters:
## alpha = 0.9999
## beta = 0.2174
## phi = 0.8
##
## Initial states:
## l = 265.142
## b = -3.4724
##
## sigma: 0.0629
##
## AIC AICc BIC
## 8052.055 8052.206 8078.065
fcast2<-forecast(ETS1)
plot(fcast2)
Data must be seasonally adjusted as there is evidence of seasonality.
plot(condmilk)
seasonplot(condmilk)
dec = stl(condmilk, s.window="periodic")
deseasonal <- seasadj(dec)
plot(deseasonal)
Data is now stationary
adf.test(condmilk, alternative = "stationary")
## Warning in adf.test(condmilk, alternative = "stationary"): p-value smaller
## than printed p-value
##
## Augmented Dickey-Fuller Test
##
## data: condmilk
## Dickey-Fuller = -6.9943, Lag order = 4, p-value = 0.01
## alternative hypothesis: stationary
Acf(condmilk)
Pacf(condmilk)
fit1 = diff(deseasonal, differences = 1)
Acf(fit1)
Pacf(fit1)
Identify a couple of ARIMA models that might be useful in describing the time series. Which of your models is the best according to their AIC values?
ARIMA(1,0,0) is best accoring to the AIC.
fit.a<-auto.arima(deseasonal, seasonal=FALSE)
fit.a #AIC=900.85
## Series: deseasonal
## ARIMA(1,0,0) with non-zero mean
##
## Coefficients:
## ar1 mean
## 0.7770 95.7445
## s.e. 0.0575 3.9927
##
## sigma^2 estimated as 102.3: log likelihood=-447.42
## AIC=900.85 AICc=901.06 BIC=909.21
fit.b<-arima(deseasonal, c(2,0,0))
fit.b #aic = 902.61
##
## Call:
## arima(x = deseasonal, order = c(2, 0, 0))
##
## Coefficients:
## ar1 ar2 intercept
## 0.7424 0.0446 95.7189
## s.e. 0.0912 0.0916 4.1631
##
## sigma^2 estimated as 100.4: log likelihood = -447.31, aic = 902.61
fit.c<-arima(deseasonal, c(3,0,0))
fit.c # aic = 904.42
##
## Call:
## arima(x = deseasonal, order = c(3, 0, 0))
##
## Coefficients:
## ar1 ar2 ar3 intercept
## 0.7438 0.0745 -0.0403 95.7447
## s.e. 0.0912 0.1139 0.0918 4.0030
##
## sigma^2 estimated as 100.3: log likelihood = -447.21, aic = 904.42
d.Estimate the parameters of your best model and do diagnostic testing on the residuals. Do the residuals resemble white noise? If not, try to find another ARIMA model which fits better.
I believe the criteria fits that there is white noise in the data and is good for forecasting. Although the histogram of the residuals has an outlier, they are normally distributed. Residuals are random. The ACF plot shows lack of lag correlations.
summary(fit.a)
## Series: deseasonal
## ARIMA(1,0,0) with non-zero mean
##
## Coefficients:
## ar1 mean
## 0.7770 95.7445
## s.e. 0.0575 3.9927
##
## sigma^2 estimated as 102.3: log likelihood=-447.42
## AIC=900.85 AICc=901.06 BIC=909.21
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.1710362 10.03162 6.891324 -2.288852 8.889688 0.3651767
## ACF1
## Training set -0.03738111
checkresiduals(fit.a)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(1,0,0) with non-zero mean
## Q* = 19.229, df = 22, p-value = 0.6312
##
## Model df: 2. Total lags used: 24
fcast1<-forecast(fit.a, h=24)
plot(fcast1)
fcast1
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Jan 1981 79.49377 66.52924 92.45830 59.66624 99.32131
## Feb 1981 83.11734 66.69910 99.53558 58.00781 108.22688
## Mar 1981 85.93293 67.74425 104.12162 58.11574 113.75013
## Apr 1981 88.12071 68.94206 107.29935 58.78950 117.45191
## May 1981 89.82065 70.06831 109.57299 59.61205 120.02925
## Jun 1981 91.14155 71.05076 111.23233 60.41534 121.86775
## Jul 1981 92.16791 71.87552 112.46030 61.13337 123.20245
## Aug 1981 92.96542 72.55226 113.37857 61.74619 124.18464
## Sep 1981 93.58509 73.09938 114.07081 62.25489 124.91530
## Oct 1981 94.06660 73.53719 114.59600 62.66958 125.46362
## Nov 1981 94.44074 73.88500 114.99648 63.00345 125.87803
## Dec 1981 94.73145 74.15983 115.30307 63.26987 126.19303
## Jan 1982 94.95734 74.37614 115.53854 63.48111 126.43358
## Feb 1982 95.13286 74.54588 115.71985 63.64778 126.61794
## Mar 1982 95.26925 74.67877 115.85973 63.77883 126.75967
## Apr 1982 95.37522 74.78263 115.96781 63.88158 126.86886
## May 1982 95.45756 74.86371 116.05142 63.96198 126.95315
## Jun 1982 95.52155 74.92692 116.11617 64.02478 127.01831
## Jul 1982 95.57126 74.97617 116.16635 64.07379 127.06874
## Aug 1982 95.60989 75.01452 116.20526 64.11199 127.10779
## Sep 1982 95.63991 75.04437 116.23545 64.14175 127.13807
## Oct 1982 95.66323 75.06759 116.25887 64.16492 127.16155
## Nov 1982 95.68136 75.08565 116.27706 64.18295 127.17977
## Dec 1982 95.69544 75.09970 116.29118 64.19697 127.19390
I believe the ARIMA model is better for forecasting than the ETS model. The ETS model is naive. When comparing models, the ARIMA model has the lower AIC.
fit.d<-ets(deseasonal,model = "ZZZ" )
fcast2<-forecast(fit.d, h=24)
plot(fcast2)
fcast2
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Jan 1981 75.57095 62.17549 88.96641 55.0843642 96.05754
## Feb 1981 75.57095 58.14666 92.99524 48.9228017 102.21910
## Mar 1981 75.57095 54.88827 96.25363 43.9395191 107.20238
## Apr 1981 75.57095 52.07753 99.06437 39.6408645 111.50104
## May 1981 75.57095 49.56887 101.57304 35.8041968 115.33771
## Jun 1981 75.57095 47.28180 103.86010 32.3064347 118.83547
## Jul 1981 75.57095 45.16629 105.97561 29.0710371 122.07087
## Aug 1981 75.57095 43.18869 107.95321 26.0465568 125.09535
## Sep 1981 75.57095 41.32510 109.81680 23.1964414 127.94546
## Oct 1981 75.57095 39.55782 111.58409 20.4936156 130.64829
## Nov 1981 75.57095 37.87329 113.26861 17.9173604 133.22454
## Dec 1981 75.57095 36.26089 114.88101 15.4514023 135.69050
## Jan 1982 75.57095 34.71207 116.42984 13.0826820 138.05922
## Feb 1982 75.57095 33.21985 117.92205 10.8005303 140.34137
## Mar 1982 75.57095 31.77845 119.36346 8.5960973 142.54581
## Apr 1982 75.57095 30.38300 120.75890 6.4619454 144.67996
## May 1982 75.57095 29.02938 122.11253 4.3917526 146.75015
## Jun 1982 75.57095 27.71402 123.42788 2.3800915 148.76181
## Jul 1982 75.57095 26.43386 124.70804 0.4222614 150.71964
## Aug 1982 75.57095 25.18622 125.95568 -1.4858409 152.62774
## Sep 1982 75.57095 23.96874 127.17316 -3.3478225 154.48973
## Oct 1982 75.57095 22.77932 128.36258 -5.1668743 156.30878
## Nov 1982 75.57095 21.61612 129.52578 -6.9458356 158.08774
## Dec 1982 75.57095 20.47748 130.66443 -8.6872456 159.82915
11.For the same time series you used in exercise Q10, try using a non-seasonal model applied to the seasonally adjusted data obtained from STL. The stlf() function will make the calculations easy (with method=“arima”). Compare the forecasts with those obtained in exercise Q10. Which do you think is the best approach?
The STL + ARIMA(1,0,0) with non-zero mean is a better forecast. It has lower AIC, ME, RMSE, MAE, MPE, MAPE, and MASE on the training set. Forecasts also look more accurate based on trends.
fit.e<-stlf(condmilk, method="arima")
plot(fit.e)
summary(fit.e)
##
## Forecast method: STL + ARIMA(1,0,0) with non-zero mean
##
## Model Information:
## Series: x
## ARIMA(1,0,0) with non-zero mean
##
## Coefficients:
## ar1 mean
## 0.8041 95.9237
## s.e. 0.0542 4.1212
##
## sigma^2 estimated as 84.99: log likelihood=-436.34
## AIC=878.68 AICc=878.88 BIC=887.04
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.184544 9.142056 6.425992 -1.284859 7.592609 0.3405184
## ACF1
## Training set -0.03551006
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Jan 1981 49.93980 38.12491 61.75468 31.87048 68.00911
## Feb 1981 48.74745 33.58681 63.90809 25.56125 71.93365
## Mar 1981 50.58182 33.60540 67.55824 24.61862 76.54501
## Apr 1981 64.11609 46.06263 82.16956 36.50570 91.72649
## May 1981 89.44786 70.73098 108.16473 60.82286 118.07285
## Jun 1981 111.63287 92.49930 130.76643 82.37060 140.89513
## Jul 1981 127.14110 107.74289 146.53932 97.47409 156.80811
## Aug 1981 135.12830 115.56088 154.69572 105.20252 165.05409
## Sep 1981 130.70262 111.02658 150.37867 100.61071 160.79454
## Oct 1981 114.03870 94.29274 133.78466 83.83986 144.23754
## Nov 1981 87.38616 67.59513 107.17720 57.11839 117.65393
## Dec 1981 69.29629 49.47617 89.11640 38.98403 99.60854
## Jan 1982 64.01635 44.17745 83.85525 33.67537 94.35733
## Feb 1982 60.06621 40.21518 79.91725 29.70667 90.42575
## Mar 1982 59.68307 39.82419 79.54195 29.31154 90.05461
## Apr 1982 71.43429 51.57034 91.29823 41.05500 101.81357
## May 1982 95.33231 75.46509 115.19954 64.94802 125.71661
## Jun 1982 116.36447 96.49513 136.23382 85.97694 146.75201
## Jul 1982 130.94572 111.07501 150.81643 100.55609 161.33535
## Aug 1982 138.18754 118.31595 158.05914 107.79656 168.57853
## Sep 1982 133.16251 113.29034 153.03468 102.77065 163.55437
## Oct 1982 116.01667 96.14413 135.88920 85.62424 146.40909
## Nov 1982 88.97662 69.10384 108.84940 58.58383 119.36941
## Dec 1982 70.57515 50.70221 90.44808 40.18212 100.96817