HW 2

Chapter 7 Exercises 1. a)

library(fpp2)

## Warning: package 'fpp2' was built under R version 3.4.4

## Loading required package: ggplot2

## Loading required package: forecast

## Warning: package 'forecast' was built under R version 3.4.4

## Loading required package: fma

## Loading required package: expsmooth

library(forecast)
library(fpp)

## Loading required package: lmtest

## Loading required package: zoo

## Warning: package 'zoo' was built under R version 3.4.3

## 
## Attaching package: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

## Loading required package: tseries

## Warning: package 'tseries' was built under R version 3.4.3

## 
## Attaching package: 'fpp'

## The following objects are masked from 'package:fpp2':
## 
##     ausair, ausbeer, austa, austourists, debitcards, departures,
##     elecequip, euretail, guinearice, oil, sunspotarea, usmelec

plot(books)

Both hardcover and paperback increase gradually with very high fluctuations b)

myfit1<-ses(books[,1], alpha=0.1, initial="simple")
myfit2<-ses(books[,1], alpha=0.2, initial="simple")
myfit3<-ses(books[,1], alpha=0.3, initial="simple")
myfit4<-ses(books[,1], alpha=0.4, initial="simple")
myfit5<-ses(books[,1], alpha=0.5, initial="simple")
myfit6<-ses(books[,1], alpha=0.6, initial="simple")
myfit7<-ses(books[,1], alpha=0.7, initial="simple")
myfit8<-ses(books[,1], alpha=0.8, initial="simple")
myfit9<-ses(books[,1], alpha=0.9, initial="simple")
sum1<-sum(books[,1]-fitted(myfit1))
sum2<-sum(books[,1]-fitted(myfit2))
sum3<-sum(books[,1]-fitted(myfit3))
sum4<-sum(books[,1]-fitted(myfit4))
sum5<-sum(books[,1]-fitted(myfit5))
sum6<-sum(books[,1]-fitted(myfit6))
sum7<-sum(books[,1]-fitted(myfit7))
sum8<-sum(books[,1]-fitted(myfit8))
sum9<-sum(books[,1]-fitted(myfit9))
alphas<-c(0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9)
sums<-c(sum1,sum2,sum3,sum4,sum5,sum6,sum7,sum8,sum9)
plot(alphas,sums,type='l')

It looks like a value of alpha between 0.1 and 0.2 gives the lowest SSE value. Also as alpha goes from 0.3 to 0.8, the SSE value continues to go down. c)

myfit10<-ses(books[,1], initial='simple', h=4)
myfit11<-ses(books[,1], alpha=0.15, initial='simple', h=4)
plot(myfit10, main="Optimal")

plot(myfit11, main="Alpha = 0.15")

myfit12<-ses(books[,1], initial='optimal', h=4)
plot(myfit12)

myfit12

##    Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 31       207.1097 162.4882 251.7311 138.8670 275.3523
## 32       207.1097 161.8589 252.3604 137.9046 276.3147
## 33       207.1097 161.2382 252.9811 136.9554 277.2639
## 34       207.1097 160.6259 253.5935 136.0188 278.2005

This gives us a higher SSE value, which means it is less accurate.

paperback<-books[,1]
hardcover<-books[,2]
holt.fit.p<-holt(paperback, initial='simple', h=4)
sum.p<-sum(paperback-fitted(holt.fit.p))
sum.p

## [1] 233.0964

plot(holt.fit.p)

holt.fit.h<-holt(hardcover, initial='simple', h=4)
sum.h<-sum(hardcover-fitted(holt.fit.h))
sum.h

## [1] 215.796

plot(holt.fit.h)

holt.fit2.p<-holt(paperback, initial='optimal', h=4)
sum2.p<-sum(paperback-fitted(holt.fit2.p))
sum2.p

## [1] -111.5153

plot(holt.fit.p)

holt.fit2.h<-holt(hardcover, initial='optimal', h=4)
sum2.h<-sum(hardcover-fitted(holt.fit2.h))
sum2.h

## [1] -4.073645

plot(holt.fit2.h)

The SSE on the Holt model is larger than the ses model for the paperback data. For hardcover data, the optimal Holt model seems to produce the result with the lower SSE.

The ses model is better for the paperback data but the Holt model is better for the hardcover data.

predict(myfit11, paperback, interval="predict")

##    Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 31       205.7319 160.8846 250.5791 137.1439 274.3198
## 32       205.7319 160.3829 251.0808 136.3766 275.0871
## 33       205.7319 159.8866 251.5771 135.6177 275.8460
## 34       205.7319 159.3957 252.0680 134.8669 276.5968

predict(holt.fit2.h, hardcover, interval="predict")

##    Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 31       250.1739 212.7390 287.6087 192.9222 307.4256
## 32       253.4765 216.0416 290.9113 196.2248 310.7282
## 33       256.7791 219.3442 294.2140 199.5274 314.0308
## 34       260.0817 222.6468 297.5166 202.8300 317.3334

Shown are the 95% prediction interval for the ses paperback model and the Holt hardcover model.

plot(ukcars)

Very seasonal trend that begins with a decline and then trends mostly upward after 1980 with a slight plateau around 2000. There is a big drop around 2000 before the plateau.

ukcars.de <- stl(ukcars, s.window = "periodic" , robust = T)
ukcars.s <- ukcars.de$time.series[,1]
ukcars.sa <- ukcars - ukcars.s

fit.ukcars1<-holt(ukcars.sa, damped = T, h = 8)
summary(fit.ukcars1)

## 
## Forecast method: Damped Holt's method
## 
## Model Information:
## Damped Holt's method 
## 
## Call:
##  holt(y = ukcars.sa, h = 8, damped = T) 
## 
##   Smoothing parameters:
##     alpha = 0.5877 
##     beta  = 1e-04 
##     phi   = 0.9339 
## 
##   Initial states:
##     l = 328.2013 
##     b = -4.8363 
## 
##   sigma:  25.7677
## 
##      AIC     AICc      BIC 
## 1275.382 1276.175 1291.747 
## 
## Error measures:
##                    ME     RMSE      MAE      MPE     MAPE      MASE
## Training set 2.215326 25.19118 20.41856 0.197677 6.547458 0.6654308
##                    ACF1
## Training set 0.03051902
## 
## Forecasts:
##         Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 2005 Q2       407.3185 374.2959 440.3412 356.8148 457.8223
## 2005 Q3       407.3170 369.0123 445.6218 348.7349 465.8991
## 2005 Q4       407.3156 364.3723 450.2589 341.6395 472.9917
## 2006 Q1       407.3143 360.1857 454.4428 335.2373 479.3912
## 2006 Q2       407.3130 356.3407 458.2853 329.3576 485.2684
## 2006 Q3       407.3118 352.7652 461.8585 323.8899 490.7338
## 2006 Q4       407.3108 349.4092 465.2123 318.7580 495.8635
## 2007 Q1       407.3097 346.2367 468.3828 313.9066 500.7129

year.before<-rep(ukcars.de$time.series[110:113], "seasonal", 2)
fit.ukcars1.re <- fit.ukcars1$mean + year.before

fit.ukcars2<-holt(ukcars.sa, h = 8)
summary(fit.ukcars2)

## 
## Forecast method: Holt's method
## 
## Model Information:
## Holt's method 
## 
## Call:
##  holt(y = ukcars.sa, h = 8) 
## 
##   Smoothing parameters:
##     alpha = 0.6046 
##     beta  = 1e-04 
## 
##   Initial states:
##     l = 336.1832 
##     b = 0.8047 
## 
##   sigma:  25.7893
## 
##      AIC     AICc      BIC 
## 1274.614 1275.174 1288.251 
## 
## Error measures:
##                      ME     RMSE      MAE        MPE     MAPE     MASE
## Training set -0.2697968 25.32878 20.05443 -0.6328861 6.474306 0.653564
##                   ACF1
## Training set 0.0276835
## 
## Forecasts:
##         Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 2005 Q2       408.5868 375.5364 441.6372 358.0406 459.1330
## 2005 Q3       409.3885 370.7653 448.0116 350.3195 468.4574
## 2005 Q4       410.1901 366.7011 453.6791 343.6794 476.7008
## 2006 Q1       410.9917 363.1276 458.8559 337.7899 484.1936
## 2006 Q2       411.7934 359.9206 463.6662 332.4607 491.1260
## 2006 Q3       412.5950 357.0006 468.1894 327.5707 497.6193
## 2006 Q4       413.3966 354.3135 472.4797 323.0368 503.7564
## 2007 Q1       414.1983 351.8202 476.5763 318.7993 509.5973

fit.ukcars2.re <- fit.ukcars2$mean + year.before

fit.ukcars3<-ets(ukcars, model="ZZZ")
summary(fit.ukcars3)

## ETS(A,N,A) 
## 
## Call:
##  ets(y = ukcars, model = "ZZZ") 
## 
##   Smoothing parameters:
##     alpha = 0.6199 
##     gamma = 1e-04 
## 
##   Initial states:
##     l = 314.2568 
##     s=-1.7579 -44.9601 21.1956 25.5223
## 
##   sigma:  25.9302
## 
##      AIC     AICc      BIC 
## 1277.752 1278.819 1296.844 
## 
## Training set error measures:
##                    ME     RMSE      MAE        MPE     MAPE      MASE
## Training set 1.313884 25.23244 20.17907 -0.1570979 6.629003 0.6576259
##                    ACF1
## Training set 0.02573334

The RMSE are so close in value that the difference is insignificant. So they give similar in-sample fits.

plot(fit.ukcars1.re)

plot(fit.ukcars2.re)

plot(predict(fit.ukcars3))

I think the third model shows the best forecast because the seasonality looks like it fits for the next two years, unlike the other models.

plot(visitors)

A very seasonal graph with one very high point each year and a nice upward trend. Looks like there was a big dip in 2003.

vis.fit1<-hw(visitors, h=24, seasonal="multiplicative")
summary(vis.fit1)

## 
## Forecast method: Holt-Winters' multiplicative method
## 
## Model Information:
## Holt-Winters' multiplicative method 
## 
## Call:
##  hw(y = visitors, h = 24, seasonal = "multiplicative") 
## 
##   Smoothing parameters:
##     alpha = 0.5653 
##     beta  = 0.0215 
##     gamma = 5e-04 
## 
##   Initial states:
##     l = 91.7613 
##     b = 2.4333 
##     s=0.935 1.0545 1.0841 0.9724 1.3037 1.0824
##            1.0258 0.9102 0.9304 1.0521 0.8518 0.7976
## 
##   sigma:  0.0565
## 
##      AIC     AICc      BIC 
## 2628.219 2630.976 2687.390 
## 
## Error measures:
##                       ME    RMSE      MAE        MPE     MAPE      MASE
## Training set -0.09495709 14.6622 10.97229 -0.3070136 4.188878 0.4051965
##                    ACF1
## Training set 0.07998858
## 
## Forecasts:
##          Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## May 2005       363.6434 337.2901 389.9967 323.3395 403.9474
## Jun 2005       389.5974 356.8764 422.3184 339.5550 439.6399
## Jul 2005       482.7709 437.0247 528.5172 412.8081 552.7338
## Aug 2005       428.5001 383.4951 473.5050 359.6709 497.3293
## Sep 2005       420.4548 372.1236 468.7860 346.5386 494.3710
## Oct 2005       475.5963 416.3290 534.8636 384.9547 566.2379
## Nov 2005       503.4598 435.9471 570.9725 400.2080 606.7116
## Dec 2005       608.3779 521.1122 695.6436 474.9165 741.8393
## Jan 2006       455.1525 385.6597 524.6452 348.8725 561.4324
## Feb 2006       509.0590 426.6702 591.4478 383.0562 635.0618
## Mar 2006       496.8101 411.8771 581.7431 366.9162 626.7039
## Apr 2006       441.9586 362.3923 521.5249 320.2724 563.6448
## May 2006       378.2099 306.6932 449.7266 268.8345 487.5852
## Jun 2006       405.1516 324.8788 485.4244 282.3849 527.9183
## Jul 2006       501.9810 397.9885 605.9736 342.9382 661.0239
## Aug 2006       445.4943 349.1776 541.8110 298.1906 592.7980
## Sep 2006       437.0751 338.6243 535.5258 286.5076 587.6425
## Oct 2006       494.3345 378.5064 610.1626 317.1908 671.4783
## Nov 2006       523.2309 395.8806 650.5813 328.4653 717.9965
## Dec 2006       632.1913 472.5654 791.8172 388.0646 876.3180
## Jan 2007       472.9103 349.1848 596.6358 283.6884 662.1322
## Feb 2007       528.8557 385.6501 672.0612 309.8416 747.8697
## Mar 2007       516.0680 371.5831 660.5529 295.0974 737.0386
## Apr 2007       459.0351 326.2850 591.7853 256.0113 662.0590

plot(vis.fit1)

Multiplicative seasonality is necessary because the seasonality effect is increasing year by year.

vis.fit2<-hw(visitors, h=24, seasonal="multiplicative", damped=T)
summary(vis.fit2)

## 
## Forecast method: Damped Holt-Winters' multiplicative method
## 
## Model Information:
## Damped Holt-Winters' multiplicative method 
## 
## Call:
##  hw(y = visitors, h = 24, seasonal = "multiplicative", damped = T) 
## 
##   Smoothing parameters:
##     alpha = 0.6668 
##     beta  = 0.0043 
##     gamma = 1e-04 
##     phi   = 0.98 
## 
##   Initial states:
##     l = 91.5731 
##     b = 2.1794 
##     s=0.9303 1.0531 1.086 0.9822 1.3144 1.0796
##            1.025 0.9094 0.9322 1.05 0.8485 0.7895
## 
##   sigma:  0.0568
## 
##      AIC     AICc      BIC 
## 2628.489 2631.584 2691.140 
## 
## Error measures:
##                    ME     RMSE      MAE       MPE     MAPE      MASE
## Training set 1.286455 14.41189 10.67154 0.2674105 4.065573 0.3940899
##                     ACF1
## Training set -0.02073956
## 
## Forecasts:
##          Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## May 2005       356.7701 330.8024 382.7378 317.0559 396.4842
## Jun 2005       383.6952 350.0518 417.3386 332.2420 435.1484
## Jul 2005       475.1096 427.3402 522.8790 402.0526 548.1666
## Aug 2005       422.1456 374.8352 469.4560 349.7906 494.5006
## Sep 2005       412.0505 361.5166 462.5844 334.7656 489.3354
## Oct 2005       464.8117 403.2339 526.3896 370.6366 558.9869
## Nov 2005       489.8499 420.4189 559.2808 383.6644 596.0354
## Dec 2005       596.7632 506.9344 686.5921 459.3819 734.1446
## Jan 2006       446.1817 375.2749 517.0884 337.7391 554.6242
## Feb 2006       493.6657 411.2359 576.0956 367.6002 619.7313
## Mar 2006       479.0113 395.3086 562.7139 350.9991 607.0234
## Apr 2006       423.3885 346.2252 500.5518 305.3774 541.3996
## May 2006       359.5283 291.3831 427.6736 255.3093 463.7474
## Jun 2006       386.6002 310.5842 462.6163 270.3437 502.8567
## Jul 2006       478.6323 381.2146 576.0499 329.6448 627.6197
## Aug 2006       425.2109 335.7988 514.6230 288.4668 561.9549
## Sep 2006       414.9807 324.9827 504.9786 277.3407 552.6207
## Oct 2006       468.0489 363.5183 572.5794 308.1831 627.9146
## Nov 2006       493.1910 379.9208 606.4611 319.9592 666.4228
## Dec 2006       600.7496 459.0398 742.4594 384.0232 817.4760
## Jan 2007       449.1007 340.4175 557.7839 282.8841 615.3173
## Feb 2007       496.8290 373.6073 620.0506 308.3778 685.2801
## Mar 2007       482.0174 359.6150 604.4199 294.8190 669.2158
## Apr 2007       425.9909 315.3301 536.6517 256.7497 595.2321

vis.fit3<-hw(visitors, h=4, seasonal="multiplicative", exponential=T)
summary(vis.fit3)

## 
## Forecast method: Holt-Winters' multiplicative method with exponential trend
## 
## Model Information:
## Holt-Winters' multiplicative method with exponential trend 
## 
## Call:
##  hw(y = visitors, h = 4, seasonal = "multiplicative", exponential = T) 
## 
##   Smoothing parameters:
##     alpha = 0.5679 
##     beta  = 0.0074 
##     gamma = 1e-04 
## 
##   Initial states:
##     l = 91.0406 
##     b = 0.9981 
##     s=0.9252 1.0484 1.0821 0.9821 1.3155 1.0848
##            1.0297 0.9118 0.9346 1.0432 0.8541 0.7886
## 
##   sigma:  0.058
## 
##      AIC     AICc      BIC 
## 2638.824 2641.580 2697.994 
## 
## Error measures:
##                      ME     RMSE      MAE        MPE     MAPE      MASE
## Training set 0.00768142 14.62367 10.77736 0.05314097 4.095109 0.3979977
##                   ACF1
## Training set 0.0867989
## 
## Forecasts:
##          Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## May 2005       362.3304 335.4685 389.5820 321.3737 403.7532
## Jun 2005       394.1483 362.1565 428.1964 345.3488 445.1593
## Jul 2005       483.5354 439.7726 530.9166 415.2524 556.4905
## Aug 2005       435.1428 391.6228 481.6929 370.4200 508.0293

vis.fit4<-hw(visitors, h=24, seasonal="multiplicative", damped=T, exponential=T)
summary(vis.fit4)

## 
## Forecast method: Damped Holt-Winters' multiplicative method with exponential trend
## 
## Model Information:
## Damped Holt-Winters' multiplicative method with exponential trend 
## 
## Call:
##  hw(y = visitors, h = 24, seasonal = "multiplicative", damped = T,  
## 
##  Call:
##      exponential = T) 
## 
##   Smoothing parameters:
##     alpha = 0.6396 
##     beta  = 6e-04 
##     gamma = 1e-04 
##     phi   = 0.9781 
## 
##   Initial states:
##     l = 91.5874 
##     b = 1.0348 
##     s=0.9311 1.0591 1.0894 0.9832 1.3179 1.0827
##            1.0275 0.9078 0.9295 1.0436 0.843 0.7853
## 
##   sigma:  0.0563
## 
##      AIC     AICc      BIC 
## 2626.047 2629.142 2688.698 
## 
## Error measures:
##                     ME     RMSE      MAE       MPE     MAPE      MASE
## Training set 0.5892423 14.42116 10.73912 -0.152332 4.120193 0.3965854
##                    ACF1
## Training set 0.01586642
## 
## Forecasts:
##          Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## May 2005       354.3868 328.8222 379.2684 314.4382 392.8808
## Jun 2005       380.4925 348.9278 413.8040 331.2063 431.8646
## Jul 2005       471.1811 426.3488 518.2794 402.7662 542.9011
## Aug 2005       419.7887 374.2968 465.8355 352.5677 490.7062
## Sep 2005       410.0901 362.0845 459.8395 338.3850 487.9066
## Oct 2005       464.3236 407.2457 524.5896 379.9949 558.8060
## Nov 2005       489.3765 425.8685 557.5298 395.7256 598.9819
## Dec 2005       595.8404 513.3521 682.9942 475.5438 739.3642
## Jan 2006       444.6023 381.8894 512.7687 351.8604 557.5390
## Feb 2006       492.7808 420.7462 573.4019 386.7308 621.2911
## Mar 2006       479.1808 404.0254 559.7282 371.4940 611.8810
## Apr 2006       421.3934 353.1525 494.2297 324.8656 542.4569
## May 2006       355.5110 297.0878 418.1241 271.4682 461.9013
## Jun 2006       381.6731 316.9773 452.9432 285.6398 501.1251
## Jul 2006       472.6109 389.2276 562.5669 351.5185 624.4460
## Aug 2006       421.0346 344.6026 505.9809 310.1551 564.8602
## Sep 2006       411.2806 333.8458 495.0727 299.9146 557.3168
## Oct 2006       465.6420 375.0998 563.6521 336.7860 635.3601
## Nov 2006       490.7356 392.5876 597.1102 354.0243 672.5830
## Dec 2006       597.4588 475.7920 731.7897 423.0137 829.5879
## Jan 2007       445.7834 352.3946 546.9354 314.7862 624.4395
## Feb 2007       494.0612 389.1890 609.8498 344.5872 693.3602
## Mar 2007       480.3986 376.0182 592.2960 332.7863 676.1898
## Apr 2007       422.4409 328.7590 524.5611 289.1456 595.6167

The damped H-W multiplicative model has RMSE of 14.41, which is the lowest of the models. So I would prefer this one.

visitors1<-HoltWinters(visitors, seasonal="multiplicative")
visitors2<-ets(visitors, model="ZZZ")
lambda<-BoxCox.lambda(visitors)
boxcox.visitors<-BoxCox(visitors,lambda)
visitors3<-ets(boxcox.visitors, model="AAZ")
visitors4<-snaive(boxcox.visitors)
visitors.de<-stl(boxcox.visitors, s.window="periodic", robust=T)
visitors.s<-visitors.de$time.series[,1]
visitors.sa<-boxcox.visitors-visitors.s
visitors5<-ets(visitors.sa, model="ZZZ")

plot(residuals(visitors1))

plot(residuals(visitors2))

plot(residuals(visitors3))

plot(residuals(visitors4))

plot(residuals(visitors5))

plot(forecast(visitors1, h=24), main="Multiplicative HoltWinter")

plot(forecast(visitors2, h=24), main="ETS")

plot(forecast(visitors3, h=24), main="Additive ETS")

plot(forecast(visitors4, h=24), main="Seasonal Naive")

plot(forecast(visitors5, h=24), main="STL Decomposition")

The ETS and additive ETS models look better just going off residual plots. Looking at forecast plots, I can see that those two look like they are the most accurate too.

Chapter 8 Exercises 1. a) Yes, these three graphs all indicate that the data are white noise. The reason they are different is because as the number of random numbers increases, the dispersion will be tighter. b) Because each graph has a different number of observations. There is no autocorrelation since nothing goes outside the defined 95% level.

plot(ibmclose)

Acf(ibmclose)

Pacf(ibmclose)

In the ACF, the data decreases towards zero starting with a large positive value. In the PACF, we can see that there is autocorrelation, since there is lag outside the dotted lines. We need differencing in the model to make sure future forecasts are accurate.

bc1<-BoxCox.lambda(usnetelec)
bc1

## [1] 0.5167714

plot(BoxCox(usnetelec,bc1))

bc2<-BoxCox.lambda(usgdp)
bc2

## [1] 0.366352

plot(BoxCox(usgdp,bc2))

bc3<-BoxCox.lambda(mcopper)
bc3

## [1] 0.1919047

plot(BoxCox(mcopper,bc3))

bc4<-BoxCox.lambda(enplanements)
bc4

## [1] -0.2269461

plot(BoxCox(enplanements,bc4))

bc5<-BoxCox.lambda(visitors)
bc5

## [1] 0.2775249

plot(BoxCox(visitors,bc5))

plot(enplanements)

bc.fit<-BoxCox(enplanements, bc5)
acf(bc.fit)

pacf(bc.fit)

library(ZIM)

## Loading required package: MASS

## 
## Attaching package: 'MASS'

## The following objects are masked from 'package:fma':
## 
##     cement, housing, petrol

bc.fit<-bshift(bc.fit, k=1)
plot.ts(bc.fit)

Used bshift function, with the number of lags being 1.

y <- ts(numeric(100))
e <- rnorm(100)
for(i in 2:100)
  y[i] <- 0.6*y[i-1] + e[i]

plot(y)

#Looks like there isn't a pattern

ma(y, order=1)

## Time Series:
## Start = 1 
## End = 100 
## Frequency = 1 
##   [1]  0.00000000  0.53000101  0.98618414 -0.40326669 -0.19344982
##   [6]  0.69566245 -0.45390543  1.41374215  0.47392384 -0.57330584
##  [11]  0.62937923  0.89538587  2.54572312  1.21099958 -0.53531707
##  [16]  0.52744470 -0.52370011  0.01894972 -1.27142913 -0.67866770
##  [21] -1.08168296  0.29303504 -0.25832103 -1.47173458 -0.80886457
##  [26] -2.70939841 -2.78922692 -0.91878086 -1.74765035 -2.07358890
##  [31] -2.16632618 -2.09621697 -0.35606104  1.51039786  0.04401447
##  [36] -0.32376609 -1.03856477 -2.16625677 -1.85542551 -1.95724751
##  [41] -1.74396254 -0.61134055 -0.29816195 -0.66914401  0.73772742
##  [46]  0.37507895 -0.95991099 -0.32648015  1.19969661  0.03288077
##  [51]  1.18800564  1.81169984  0.19856782  0.10102151  0.43803036
##  [56]  1.72870410  0.49359908  1.78954607  1.30910608  1.38009676
##  [61] -0.04108009  0.66471129  2.42696077  1.54131838  2.38407247
##  [66]  2.31266004  1.05206493  2.11501499  1.15760541 -1.38690184
##  [71] -0.69041233  1.31461031  0.19438180  0.74866332 -0.43219593
##  [76]  0.32390143 -1.76278508 -0.16738295 -1.18806156 -1.70075677
##  [81] -0.59894944 -0.88958330  1.77720657 -2.00537679 -1.76138894
##  [86]  0.92107042  0.99578044 -1.00956992 -0.10236877 -2.01937331
##  [91] -1.55603031 -1.02226842  0.47466182 -0.27100437 -0.67247506
##  [96]  0.64492916  1.90957900 -1.55397022  0.18773369 -0.95109197

plot.ts(e)

#A little more orderly than y but still not really a pattern

plot(wmurders)

fit.6<-ts(wmurders, frequency = 12)
dec = stl(fit.6, s.window="periodic")
deseasonal <- seasadj(dec)
plot(dec)

fit.6a = diff(deseasonal, differences = 1)
plot(fit.6a)

arima.fit<-arima(fit.6a, order=c(1,1,1))
arima.fit

## 
## Call:
## arima(x = fit.6a, order = c(1, 1, 1))
## 
## Coefficients:
##           ar1      ma1
##       -0.2379  -0.8036
## s.e.   0.1616   0.1247
## 
## sigma^2 estimated as 0.03951:  log likelihood = 9.7,  aic = -13.41

Since d is greater than 1, no need for a constant

lag1<-bshift(wmurders, k = 1)
fit.lag <- arima(lag1,order=c(1,1,1))
fit.lag

## 
## Call:
## arima(x = lag1, order = c(1, 1, 1))
## 
## Coefficients:
##           ar1     ma1
##       -0.8252  0.6914
## s.e.   0.2002  0.2378
## 
## sigma^2 estimated as 0.04398:  log likelihood = 7.53,  aic = -9.06

plot(fit.lag$residuals)

Although they seem to be somewhat biased, the residuals seem satisfactory as they are random.

forecast<-forecast(fit.lag, h=3)
forecast

##    Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## 56       2.589533 2.320769 2.858298 2.178494 3.000573
## 57       2.649522 2.293950 3.005093 2.105722 3.193322
## 58       2.600018 2.158061 3.041976 1.924102 3.275934

#f)
plot(forecast)

Auto arima gives a different model (0,2,1) and I would choose this one since auto.arima automatically minimizes the AIC and BIC in the model

autofit<-auto.arima(deseasonal)
autofit

## Series: deseasonal 
## ARIMA(0,2,1)(1,0,0)[12] 
## 
## Coefficients:
##           ma1     sar1
##       -0.8540  -0.3922
## s.e.   0.0722   0.1402
## 
## sigma^2 estimated as 0.03637:  log likelihood=11.93
## AIC=-17.85   AICc=-17.36   BIC=-11.94

plot(austourists)

It looks like there are two seasons per year, one with a peak and one with a low point. The data has been going up over the years and gotten more consistent with the seasonality.

acf(austourists)

The data is not stationary and there are correlation signs in the five periods.

pacf(austourists)

This graph shows unstationary time series.

de = stl(austourists, s.window="periodic")
deseason <- seasadj(de)
fit = diff(deseason, differences = 4)
Acf(fit)

fit2 = arima(deseason, order=c(1,1,4))
fit2

## 
## Call:
## arima(x = deseason, order = c(1, 1, 4))
## 
## Coefficients:
##           ar1     ma1      ma2     ma3     ma4
##       -0.9403  0.4317  -0.5207  0.0133  0.3151
## s.e.   0.0974  0.1801   0.1628  0.1533  0.1416
## 
## sigma^2 estimated as 6.764:  log likelihood = -112.32,  aic = 236.65

autofit<-auto.arima(deseason)
autofit

## Series: deseason 
## ARIMA(0,1,1)(1,0,0)[4] with drift 
## 
## Coefficients:
##           ma1    sar1   drift
##       -0.8006  0.3483  0.4752
## s.e.   0.1704  0.1501  0.1157
## 
## sigma^2 estimated as 6.392:  log likelihood=-109.35
## AIC=226.7   AICc=227.65   BIC=234.1

It gives a different model, I would again choose the auto.arima model since it is systematically better.

fit<-bshift(austourists, k = 1)
plot.ts(fit)

auto.arima(fit)

## Series: fit 
## ARIMA(2,1,2) with drift 
## 
## Coefficients:
##           ar1      ar2      ma1     ma2   drift
##       -0.0739  -0.5926  -1.1530  0.6086  0.5045
## s.e.   0.1430   0.1360   0.1306  0.1494  0.2298
## 
## sigma^2 estimated as 34.56:  log likelihood=-146.19
## AIC=304.38   AICc=306.48   BIC=315.48

plot(usmelec)

ma1<-ma(usmelec, order=12)
plot(ma1)

Overall a very high positive trend over the years.

de<-stl(usmelec, s.window="periodic")
plot(de)

deseasonal <- seasadj(dec)
fit1 = diff(deseasonal, differences = 1)
plot(fit1)

Yes, the data needed to be seasonally adjusted.

acf(fit1)

Looking at the ACF plot, the data does not appear to be stationary.

fit.arima<-auto.arima(fit1,seasonal=FALSE)
fit.arima

## Series: fit1 
## ARIMA(0,1,1) 
## 
## Coefficients:
##           ma1
##       -0.8850
## s.e.   0.0695
## 
## sigma^2 estimated as 0.04187:  log likelihood=8.63
## AIC=-13.26   AICc=-13.02   BIC=-9.32

The ARIMA (0,1,1) model is the best according to AIC value.

plot(residuals(fit.arima))

summary(fit.arima)

## Series: fit1 
## ARIMA(0,1,1) 
## 
## Coefficients:
##           ma1
##       -0.8850
## s.e.   0.0695
## 
## sigma^2 estimated as 0.04187:  log likelihood=8.63
## AIC=-13.26   AICc=-13.02   BIC=-9.32
## 
## Training set error measures:
##                       ME      RMSE       MAE      MPE     MAPE      MASE
## Training set -0.01652062 0.2007849 0.1622658 41.55541 172.8469 0.5843122
##                    ACF1
## Training set -0.1911967

checkresiduals(fit.arima)

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(0,1,1)
## Q* = 32.384, df = 23, p-value = 0.09242
## 
## Model df: 1.   Total lags used: 24

This ARIMA model resembles white noise after looking at the residual diagnostics.

library(readr)
electricity_overview <- read_csv("~/Desktop/electricity-overview.csv")

## Parsed with column specification:
## cols(
##   Month = col_character(),
##   `Electricity: overview` = col_double()
## )

## Warning in rbind(names(probs), probs_f): number of columns of result is not
## a multiple of vector length (arg 2)

## Warning: 1 parsing failure.
## row # A tibble: 1 x 5 col     row   col  expected    actual                                 file expected   <int> <chr>     <chr>     <chr>                                <chr> actual 1   487  <NA> 2 columns 1 columns '~/Desktop/electricity-overview.csv' file # A tibble: 1 x 5

myts<-ts(electricity_overview$`Electricity: overview`, start = c(2011, 10), end = c(2013, 6), frequency = 12 )
fcast<-forecast(fit.arima, h=21)
plot(fcast)

#accuracy(myts, fcast)

I don’t think these forecast are accurate and/or usable

summary(mcopper)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   216.6   566.0   949.2   997.8  1262.5  4306.0

plot(mcopper)

transform<-BoxCox.lambda(mcopper)
transform

## [1] 0.1919047

tran.mc<-BoxCox(mcopper,transform)

fit.arima1<-auto.arima(tran.mc)
fit.arima1

## Series: tran.mc 
## ARIMA(0,1,1) 
## 
## Coefficients:
##          ma1
##       0.3720
## s.e.  0.0388
## 
## sigma^2 estimated as 0.04997:  log likelihood=45.05
## AIC=-86.1   AICc=-86.08   BIC=-77.43

fit.arima2<-arima(tran.mc, order=c(0,1,2))
fit.arima2

## 
## Call:
## arima(x = tran.mc, order = c(0, 1, 2))
## 
## Coefficients:
##          ma1      ma2
##       0.3704  -0.0037
## s.e.  0.0424   0.0406
## 
## sigma^2 estimated as 0.04988:  log likelihood = 45.05,  aic = -84.11

fit.arima3<-arima(tran.mc, order=c(0,1,3))
fit.arima3

## 
## Call:
## arima(x = tran.mc, order = c(0, 1, 3))
## 
## Coefficients:
##          ma1      ma2      ma3
##       0.3713  -0.0091  -0.0131
## s.e.  0.0423   0.0438   0.0406
## 
## sigma^2 estimated as 0.04987:  log likelihood = 45.1,  aic = -82.21

I chose my first model since that is what the auto.arima function gave us.

plot(fit.arima1$residuals)

Looks good.

forecast<-forecast(fit.arima1)
plot(forecast)

The forecast looks reasonable but not great. It could be improved.

myets<-ets(mcopper, model="ZZZ")
summary(myets)

## ETS(M,Ad,N) 
## 
## Call:
##  ets(y = mcopper, model = "ZZZ") 
## 
##   Smoothing parameters:
##     alpha = 0.9999 
##     beta  = 0.2141 
##     phi   = 0.8 
## 
##   Initial states:
##     l = 265.0541 
##     b = -3.9142 
## 
##   sigma:  0.0632
## 
##      AIC     AICc      BIC 
## 8052.038 8052.189 8078.049 
## 
## Training set error measures:
##                    ME     RMSE      MAE       MPE     MAPE      MASE
## Training set 2.691483 78.87699 46.76047 0.1883633 4.503026 0.2103854
##                   ACF1
## Training set 0.1052005

forecast2<-forecast(myets)
plot(forecast2)

This forecast looks a lot more reasonable than the previous arima model.

plot(hsales)

seasonplot(hsales)

Looks like the data needs to be transformed to account for seasonality

de<-stl(hsales, s.window="periodic")
deseasonal<-seasadj(de)
plot(deseasonal)

adf.test(hsales, alternative="stationary")

## 
##  Augmented Dickey-Fuller Test
## 
## data:  hsales
## Dickey-Fuller = -3.9397, Lag order = 6, p-value = 0.01247
## alternative hypothesis: stationary

Acf(hsales)

Pacf(hsales)

myfit<-diff(deseasonal, differences=1)
Acf(myfit)

Now the data is non-stationary

myfit1<-auto.arima(deseasonal, seasonal=FALSE)
myfit1 #1584

## Series: deseasonal 
## ARIMA(1,0,0) with non-zero mean 
## 
## Coefficients:
##          ar1     mean
##       0.9013  52.5875
## s.e.  0.0255   2.5165
## 
## sigma^2 estimated as 18.2:  log likelihood=-789.02
## AIC=1584.04   AICc=1584.13   BIC=1594.89

myfit2<-arima(deseasonal, c(2,0,0))
myfit2 #1585

## 
## Call:
## arima(x = deseasonal, order = c(2, 0, 0))
## 
## Coefficients:
##          ar1     ar2  intercept
##       0.8743  0.0297    52.5967
## s.e.  0.0601  0.0602     2.5830
## 
## sigma^2 estimated as 18.06:  log likelihood = -788.9,  aic = 1585.8

myfit3<-arima(deseasonal, c(3,0,0))
myfit3 #1587

## 
## Call:
## arima(x = deseasonal, order = c(3, 0, 0))
## 
## Coefficients:
##          ar1      ar2     ar3  intercept
##       0.8731  -0.0056  0.0403    52.6270
## s.e.  0.0601   0.0800  0.0601     2.6777
## 
## sigma^2 estimated as 18.03:  log likelihood = -788.67,  aic = 1587.35

The arima (1,0,0) model gives us the lowest AIC

summary(myfit1)

## Series: deseasonal 
## ARIMA(1,0,0) with non-zero mean 
## 
## Coefficients:
##          ar1     mean
##       0.9013  52.5875
## s.e.  0.0255   2.5165
## 
## sigma^2 estimated as 18.2:  log likelihood=-789.02
## AIC=1584.04   AICc=1584.13   BIC=1594.89
## 
## Training set error measures:
##                       ME     RMSE      MAE        MPE     MAPE      MASE
## Training set -0.05203778 4.251127 3.325061 -0.8537709 6.641165 0.4037355
##                     ACF1
## Training set -0.02679891

plot(myfit1$residuals)

checkresiduals((myfit1))

## 
##  Ljung-Box test
## 
## data:  Residuals from ARIMA(1,0,0) with non-zero mean
## Q* = 24.744, df = 22, p-value = 0.3095
## 
## Model df: 2.   Total lags used: 24

Looks good, residuals resemble white noise.

forecast<-forecast(myfit1, h=24)
plot(forecast)

ets.fit<-ets(deseasonal, model="ZZZ")
forecast2<-forecast(ets.fit, h=24)
plot(forecast2)

It looks like the ARIMA model gives us a better forecast than the ets model.

myfit11<-stlf(hsales, method="arima")
plot(myfit11)

summary(myfit11)

## 
## Forecast method: STL +  ARIMA(1,0,0) with non-zero mean
## 
## Model Information:
## Series: x 
## ARIMA(1,0,0) with non-zero mean 
## 
## Coefficients:
##          ar1     mean
##       0.9111  52.6254
## s.e.  0.0243   2.6104
## 
## sigma^2 estimated as 16:  log likelihood=-771.31
## AIC=1548.62   AICc=1548.71   BIC=1559.47
## 
## Error measures:
##                       ME     RMSE      MAE        MPE     MAPE      MASE
## Training set -0.05747535 3.985245 3.144856 -0.7507073 6.261222 0.3818547
##                     ACF1
## Training set -0.03586744
## 
## Forecasts:
##          Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
## Dec 1995       41.15016 36.02419 46.27613 33.31066 48.98965
## Jan 1996       45.54806 38.61350 52.48261 34.94257 56.15355
## Feb 1996       51.88185 43.74578 60.01791 39.43881 64.32488
## Mar 1996       61.85493 52.84230 70.86755 48.07131 75.63854
## Apr 1996       60.03290 50.35271 69.71308 45.22833 74.83746
## May 1996       58.54780 48.34658 68.74903 42.94638 74.14923
## Jun 1996       56.94542 46.33107 67.55977 40.71217 73.17866
## Jul 1996       54.34584 43.40038 65.29131 37.60620 71.08549
## Aug 1996       56.19905 44.98614 67.41196 39.05038 73.34772
## Sep 1996       50.37969 38.94951 61.80987 32.89874 67.86064
## Oct 1996       50.69073 39.08328 62.29819 32.93866 68.44280
## Nov 1996       44.00001 32.24742 55.75259 26.02598 61.97404
## Dec 1996       41.15016 29.27844 53.02188 22.99393 59.30639
## Jan 1997       45.54806 33.57835 57.51778 27.24196 63.85416
## Feb 1997       51.88185 39.83139 63.93231 33.45226 70.31144
## Mar 1997       61.85493 49.73785 73.97201 43.32345 80.38641
## Apr 1997       60.03290 47.86079 72.20501 41.41726 78.64854
## May 1997       58.54781 46.33020 70.76541 39.86259 77.23302
## Jun 1997       56.94542 44.69018 69.20066 38.20264 75.68820
## Jul 1997       54.34584 42.05944 66.63224 35.55542 73.13627
## Aug 1997       56.19905 43.88685 68.51126 37.36916 75.02894
## Sep 1997       50.37969 38.04611 62.71328 31.51710 69.24229
## Oct 1997       50.69074 38.33943 63.04204 31.80104 69.58043
## Nov 1997       44.00001 31.63401 56.36601 25.08784 62.91217

This STL ARIMA model is the best forecast of the three models in terms of the look of the plot and the errors.

HW 2

Tyler

4/22/2018