Data 624 - Predictive Analytics
Chapter 7
library(fpp2)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## ── Attaching packages ────────────────────────────────────────────── fpp2 2.4 ──
## ✓ ggplot2 3.3.3 ✓ fma 2.4
## ✓ forecast 8.13 ✓ expsmooth 2.3
##
help(pigs)
fc <- ses(pigs, h=4)
summary(fc)
##
## Forecast method: Simple exponential smoothing
##
## Model Information:
## Simple exponential smoothing
##
## Call:
## ses(y = pigs, h = 4)
##
## Smoothing parameters:
## alpha = 0.2971
##
## Initial states:
## l = 77260.0561
##
## sigma: 10308.58
##
## AIC AICc BIC
## 4462.955 4463.086 4472.665
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 385.8721 10253.6 7961.383 -0.922652 9.274016 0.7966249 0.01282239
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Sep 1995 98816.41 85605.43 112027.4 78611.97 119020.8
## Oct 1995 98816.41 85034.52 112598.3 77738.83 119894.0
## Nov 1995 98816.41 84486.34 113146.5 76900.46 120732.4
## Dec 1995 98816.41 83958.37 113674.4 76092.99 121539.8
fcpt <- 98816.41
s <- sd(residuals(fc))
paste('Point forecst:', fcpt)
## [1] "Point forecst: 98816.41"
paste('Lo 95:', fcpt - 1.96 * s)
## [1] "Lo 95: 78679.9711418255"
paste('Hi 95', fcpt + 1.96 * s)
## [1] "Hi 95 118952.848858174"
help(books)
autoplot(books) +
ggtitle('Sales of Books at a Store') +
xlab('Day') +
ylab('Books Sold')
sesfitp <- ses(books[,1])
sesfith <- ses(books[,2])
summary(sesfitp)
##
## Forecast method: Simple exponential smoothing
##
## Model Information:
## Simple exponential smoothing
##
## Call:
## ses(y = books[, 1])
##
## Smoothing parameters:
## alpha = 0.1685
##
## Initial states:
## l = 170.8271
##
## sigma: 34.8183
##
## AIC AICc BIC
## 318.9747 319.8978 323.1783
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 7.175981 33.63769 27.8431 0.4736071 15.57784 0.7021303 -0.2117522
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 31 207.1097 162.4882 251.7311 138.8670 275.3523
## 32 207.1097 161.8589 252.3604 137.9046 276.3147
## 33 207.1097 161.2382 252.9811 136.9554 277.2639
## 34 207.1097 160.6259 253.5935 136.0188 278.2005
## 35 207.1097 160.0215 254.1979 135.0945 279.1249
## 36 207.1097 159.4247 254.7946 134.1818 280.0375
## 37 207.1097 158.8353 255.3840 133.2804 280.9389
## 38 207.1097 158.2531 255.9663 132.3899 281.8294
## 39 207.1097 157.6777 256.5417 131.5099 282.7094
## 40 207.1097 157.1089 257.1105 130.6400 283.5793
autoplot(sesfitp) +
autolayer(fitted(sesfitp), series='Fitted') +
ggtitle('SES Fit and Forecast of Paperback Sales') +
xlab('Day') +
ylab('Books Sale')
summary(sesfith)
##
## Forecast method: Simple exponential smoothing
##
## Model Information:
## Simple exponential smoothing
##
## Call:
## ses(y = books[, 2])
##
## Smoothing parameters:
## alpha = 0.3283
##
## Initial states:
## l = 149.2861
##
## sigma: 33.0517
##
## AIC AICc BIC
## 315.8506 316.7737 320.0542
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 9.166735 31.93101 26.77319 2.636189 13.39487 0.7987887 -0.1417763
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 31 239.5601 197.2026 281.9176 174.7799 304.3403
## 32 239.5601 194.9788 284.1414 171.3788 307.7414
## 33 239.5601 192.8607 286.2595 168.1396 310.9806
## 34 239.5601 190.8347 288.2855 165.0410 314.0792
## 35 239.5601 188.8895 290.2306 162.0662 317.0540
## 36 239.5601 187.0164 292.1038 159.2014 319.9188
## 37 239.5601 185.2077 293.9124 156.4353 322.6848
## 38 239.5601 183.4574 295.6628 153.7584 325.3618
## 39 239.5601 181.7600 297.3602 151.1625 327.9577
## 40 239.5601 180.1111 299.0091 148.6406 330.4795
autoplot(sesfith) +
autolayer(fitted(sesfith), series='Fitted') +
ggtitle('SES Fit and Forecast of Hardcover Sales') +
xlab('Day') +
ylab('Books Sale')
round(accuracy(sesfitp), 2)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 7.18 33.64 27.84 0.47 15.58 0.7 -0.21
round(accuracy(sesfith), 2)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 9.17 31.93 26.77 2.64 13.39 0.8 -0.14
holtfitp <- holt(books[,1], h=4)
forecast(holtfitp)
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 31 209.4668 166.6035 252.3301 143.9130 275.0205
## 32 210.7177 167.8544 253.5811 145.1640 276.2715
## 33 211.9687 169.1054 254.8320 146.4149 277.5225
## 34 213.2197 170.3564 256.0830 147.6659 278.7735
holtfith <- holt(books[,2], h=4)
forecast(holtfith)
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 31 250.1739 212.7390 287.6087 192.9222 307.4256
## 32 253.4765 216.0416 290.9113 196.2248 310.7282
## 33 256.7791 219.3442 294.2140 199.5274 314.0308
## 34 260.0817 222.6468 297.5166 202.8300 317.3334
round(accuracy(holtfitp), 2)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set -3.72 31.14 26.18 -5.51 15.58 0.66 -0.18
round(accuracy(holtfith), 2)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set -0.14 27.19 23.16 -2.11 12.16 0.69 -0.03
sesfitp <- ses(books[,1], h=4)
sesfith <- ses(books[,1], h=4)
autoplot(books[,1]) +
autolayer(holtfitp, series='Holts Method', PI=F) +
autolayer(sesfitp, series='Simple ETS', PI=F) +
ggtitle('Paperback Sales') +
xlab('Day') +
ylab('Books Sales') +
guides(colour=guide_legend(title="Forecast"))
autoplot(books[,2]) +
autolayer(holtfith, series='Holts Method', PI=F) +
autolayer(sesfith, series='Simple ETS', PI=F) +
ggtitle('Hardcover Sales') +
xlab('Day') +
ylab('Books Sales') +
guides(colour=guide_legend(title="Forecast"))
rmsep <- 31.14
ptholtp <- 209.4668
ptsesp <- 207.1097
lowerp <- ptholtp - 1.96 * rmsep
upperp <- ptholtp + 1.96 * rmsep
holtlowerp <- 143.9130
holtupperp <- 275.0205
seslowerp <- 138.8670
sesupperp <- 275.3523
rmseh <- 27.19
ptholth <- 250.1739
ptsesh <- 239.5601
lowerh <- ptholth - 1.96 * rmseh
upperh <- ptholth + 1.96 * rmseh
holtlowerh <- 192.9222
holtupperh <- 307.4256
seslowerh <- 174.7799
sesupperh <- 304.3403
df <- data.frame(c(ptholtp, lowerp, upperp), c(ptholtp, holtlowerp, holtupperp), c(ptsesp, seslowerp, sesupperp), c(ptholth, lowerh, upperh), c(ptholth, holtlowerh, holtupperh), c(ptsesh, seslowerh, sesupperh))
df[4,] <- df[3,] - df[2,]
colnames(df) <- c('Calculated', 'R - holt', 'R - ses', 'Calculated', 'R - holt', 'R - ses')
row.names(df) <- c('Point Forecast', 'Lower 95%', 'Upper 95%', 'Interval Range')
library(kableExtra)
kable(df) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive")) %>%
add_header_above(c(' ', 'Paperback Forecast' = 3, 'Hardcover Forecost' = 3))
Calculated | R - holt | R - ses | Calculated | R - holt | R - ses | |
---|---|---|---|---|---|---|
Point Forecast | 209.4668 | 209.4668 | 207.1097 | 250.1739 | 250.1739 | 239.5601 |
Lower 95% | 148.4324 | 143.9130 | 138.8670 | 196.8815 | 192.9222 | 174.7799 |
Upper 95% | 270.5012 | 275.0205 | 275.3523 | 303.4663 | 307.4256 | 304.3403 |
Interval Range | 122.0688 | 131.1075 | 136.4853 | 106.5848 | 114.5034 | 129.5604 |
help(eggs)
default <- holt(eggs, h=100)
damped <- holt(eggs, h=100, damped = T)
exponential <- holt(eggs, h=100, exponential = T)
lambda <- holt(eggs, h=100, lambda = 'auto', biasadj = T)
da_ex <- holt(eggs, h=100, exponential = T, damped = T)
da_la <- holt(eggs, h=100, damped = T, lambda = 'auto', biasadj = T)
autoplot(eggs) +
autolayer(default, series='Default', PI=F) +
autolayer(damped, series='Damped', PI=F) +
autolayer(exponential, series='Exponential', PI=F) +
autolayer(lambda, series='Box-Cox Transformed', PI=F) +
autolayer(da_ex, series='Damped & Exponential', PI=F) +
autolayer(da_la, series='Damped & Box-Cox', PI=F) +
ggtitle('Forecast of US Eggs Prices') +
xlab('Year') +
ylab('Price of Dozen Eggs')
df <- rbind(accuracy(default), accuracy(damped), accuracy(exponential), accuracy(lambda), accuracy(da_ex), accuracy(da_la))
row.names(df) <- c('Default', 'Damped', 'Exponential', 'Box-Cox', 'Damped & Exponential', 'Damped & Box-Cox')
kable(df) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed", "responsive"))
ME | RMSE | MAE | MPE | MAPE | MASE | ACF1 | |
---|---|---|---|---|---|---|---|
Default | 0.0449909 | 26.58219 | 19.18491 | -1.142201 | 9.653791 | 0.9463626 | 0.0134820 |
Damped | -2.8914955 | 26.54019 | 19.27950 | -2.907633 | 10.018944 | 0.9510287 | -0.0031954 |
Exponential | 0.4918791 | 26.49795 | 19.29399 | -1.263235 | 9.766049 | 0.9517436 | 0.0103908 |
Box-Cox | -0.2015298 | 26.38689 | 18.99362 | -1.630430 | 9.713172 | 0.9369265 | 0.0383996 |
Damped & Exponential | -0.9089678 | 26.59113 | 19.54973 | -2.125756 | 10.023283 | 0.9643590 | 0.0137612 |
Damped & Box-Cox | -1.8062134 | 26.58589 | 19.55896 | -2.584250 | 10.117605 | 0.9648141 | 0.0053221 |
retaildata <- readxl::read_excel("retail.xlsx", skip=1)
myts <- ts(retaildata[,"A3349873A"],
frequency=12, start=c(1982,4))
autoplot(myts) +
ggtitle('Turnover; New South Wales; Other retailing') +
ylab('Turnover')
fit1 <- hw(myts, seasonal='multiplicative', damped=F)
summary(fit1)
##
## Forecast method: Holt-Winters' multiplicative method
##
## Model Information:
## Holt-Winters' multiplicative method
##
## Call:
## hw(y = myts, seasonal = "multiplicative", damped = F)
##
## Smoothing parameters:
## alpha = 0.504
## beta = 1e-04
## gamma = 0.4578
##
## Initial states:
## l = 62.8715
## b = 0.8152
## s = 0.9514 0.886 0.9114 1.5529 1.0184 0.9813
## 0.9589 0.9898 0.9593 0.8883 0.9094 0.9929
##
## sigma: 0.0513
##
## AIC AICc BIC
## 4040.084 4041.770 4107.112
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 0.1170648 13.29378 8.991856 -0.1217735 3.918351 0.4748948
## ACF1
## Training set 0.08635577
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Jan 2014 390.3784 364.7154 416.0413 351.1303 429.6264
## Feb 2014 391.1995 362.4039 419.9951 347.1605 435.2386
## Mar 2014 427.9732 393.4376 462.5088 375.1555 480.7909
## Apr 2014 394.1500 359.7834 428.5167 341.5908 446.7093
## May 2014 403.4598 365.8492 441.0704 345.9394 460.9802
## Jun 2014 392.3988 353.6036 431.1940 333.0667 451.7309
## Jul 2014 410.9940 368.1710 453.8169 345.5019 476.4860
## Aug 2014 405.6186 361.3056 449.9315 337.8478 473.3893
## Sep 2014 416.5669 369.0509 464.0828 343.8975 489.2362
## Oct 2014 437.9753 385.9982 489.9524 358.4832 517.4674
## Nov 2014 585.8096 513.6953 657.9240 475.5203 696.0990
## Dec 2014 577.7851 504.1964 651.3737 465.2409 690.3292
## Jan 2015 399.6599 342.8992 456.4206 312.8519 486.4679
## Feb 2015 400.4831 342.1250 458.8412 311.2321 489.7341
## Mar 2015 438.1104 372.6939 503.5270 338.0644 538.1564
## Apr 2015 403.4687 341.8115 465.1258 309.1722 497.7652
## May 2015 412.9807 348.4595 477.5019 314.3041 511.6574
## Jun 2015 401.6414 337.5529 465.7300 303.6264 499.6565
## Jul 2015 420.6566 352.1637 489.1496 315.9057 525.4076
## Aug 2015 415.1371 346.2205 484.0538 309.7383 520.5360
## Sep 2015 426.3243 354.2214 498.4272 316.0524 536.5961
## Oct 2015 448.2152 371.0413 525.3891 330.1879 566.2425
## Nov 2015 599.4807 494.4676 704.4937 438.8771 760.0842
## Dec 2015 591.2440 485.9383 696.5497 430.1928 752.2952
fit2 <- hw(myts, seasonal='multiplicative', damped=T)
summary(fit2)
##
## Forecast method: Damped Holt-Winters' multiplicative method
##
## Model Information:
## Damped Holt-Winters' multiplicative method
##
## Call:
## hw(y = myts, seasonal = "multiplicative", damped = T)
##
## Smoothing parameters:
## alpha = 0.5524
## beta = 2e-04
## gamma = 0.4476
## phi = 0.9328
##
## Initial states:
## l = 62.9106
## b = 0.6659
## s = 0.8986 0.8635 0.8733 1.5546 1.1214 1.0392
## 1.0033 0.9655 0.9238 0.8886 0.9303 0.9378
##
## sigma: 0.0527
##
## AIC AICc BIC
## 4055.981 4057.871 4126.952
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 1.414869 13.30494 9.042151 0.6105987 3.959617 0.4775511 0.04077895
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Jan 2014 391.3161 364.8883 417.7439 350.8983 431.7339
## Feb 2014 392.2638 361.9876 422.5401 345.9603 438.5673
## Mar 2014 427.6983 391.0174 464.3792 371.5997 483.7969
## Apr 2014 391.6405 354.9948 428.2863 335.5957 447.6853
## May 2014 399.2265 358.9916 439.4614 337.6925 460.7605
## Jun 2014 387.6109 345.9350 429.2868 323.8731 451.3487
## Jul 2014 405.5421 359.3641 451.7201 334.9189 476.1653
## Aug 2014 399.6910 351.7735 447.6085 326.4076 472.9745
## Sep 2014 410.5242 358.9526 462.0958 331.6522 489.3962
## Oct 2014 430.9373 374.4342 487.4405 344.5233 517.3514
## Nov 2014 574.0409 495.7448 652.3370 454.2974 693.7845
## Dec 2014 564.4915 484.6264 644.3565 442.3484 686.6345
## Jan 2015 391.6898 330.2053 453.1743 297.6574 485.7222
## Feb 2015 392.6313 329.2488 456.0139 295.6961 489.5666
## Mar 2015 428.0919 357.1258 499.0579 319.5587 536.6251
## Apr 2015 391.9948 325.3521 458.6376 290.0736 493.9161
## May 2015 399.5818 329.9960 469.1677 293.1595 506.0042
## Jun 2015 387.9507 318.8208 457.0805 282.2257 493.6756
## Jul 2015 405.8924 331.9582 479.8266 292.8198 518.9650
## Aug 2015 400.0316 325.6130 474.4502 286.2182 513.8450
## Sep 2015 410.8695 332.8718 488.8672 291.5822 530.1567
## Oct 2015 431.2954 347.8100 514.7807 303.6156 558.9752
## Nov 2015 574.5123 461.1985 687.8262 401.2138 747.8109
## Dec 2015 564.9500 451.4869 678.4131 391.4232 738.4768
autoplot(myts) +
autolayer(fit1, PI=F, series='Not damped') +
autolayer(fit2, PI=F, series='Damped Trend') +
guides(colour=guide_legend(title="Forecast")) +
ggtitle("Turnover Forecast - Holt-Winter's Multiplicative Method") +
ylab('Turnover')
accuracy(fit1)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.1170648 13.29378 8.991856 -0.1217735 3.918351 0.4748948
## ACF1
## Training set 0.08635577
accuracy(fit2)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 1.414869 13.30494 9.042151 0.6105987 3.959617 0.4775511 0.04077895
checkresiduals(fit1)
##
## Ljung-Box test
##
## data: Residuals from Holt-Winters' multiplicative method
## Q* = 40.405, df = 8, p-value = 2.692e-06
##
## Model df: 16. Total lags used: 24
autoplot(residuals(fit1)) +
ggtitle('Residuals') +
ylab('')
train <- window(myts, end=c(2010, 12))
test <- window(myts, start=c(2011,1))
autoplot(myts) +
autolayer(train, series="Training") +
autolayer(test, series="Test") +
ggtitle('Train-Test Split') +
ylab('Turnover')
fit_snaive <- snaive(train, h=36)
fit1_hw <- hw(train, h=36, seasonal='multiplicative', damped=F)
fit2_hw <- hw(train, h=36, seasonal='additive', damped=F, lambda='auto')
autoplot(test, series='Ground Truth') +
autolayer(fit_snaive, series='Seasonal Naive Forecast', PI=F) +
autolayer(fit1_hw, series="Holt-Winter's Forecast 1", PI=F) +
autolayer(fit2_hw, series="Holt-Winter's Forecast 2", PI=F) +
guides(colour=guide_legend(title="Legend")) +
ggtitle('Test Set Forecast') +
ylab('Turnover')
library(Metrics)
##
## Attaching package: 'Metrics'
## The following object is masked from 'package:forecast':
##
## accuracy
df <- c(rmse(test, fit_snaive$mean), rmse(test, fit1_hw$mean), rmse(test, fit2_hw$mean))
names(df) <- c('Seasonal Naive Forecast', "Holt-Winter's Multiplicative Method",
"Holt-Winter's Additive Method with Box-Cox Transform")
df
## Seasonal Naive Forecast
## 100.00869
## Holt-Winter's Multiplicative Method
## 94.80662
## Holt-Winter's Additive Method with Box-Cox Transform
## 99.21057
train <- ts(as.vector(myts), start=c(1982,4), end=c(2010,12), frequency = 12)
lambda <- BoxCox.lambda(train)
paste('Best lambda for Box-Cox Transformation is found to be:', lambda)
## [1] "Best lambda for Box-Cox Transformation is found to be: 0.197968156308491"
train.bc <- BoxCox(train, lambda)
fit.stl <- stl(train.bc, s.window='periodic', robust=T)
autoplot(fit.stl) +
ggtitle('STL Decomposition')
train.bc.seadj <- train.bc - fit.stl$time.series[,'seasonal']
autoplot(train.bc, series='Unadjusted Data') +
autolayer(train.bc.seadj, series='Seasonally Adjusted') +
ylab('')
fit.ets <- ets(train.bc.seadj)
summary(fit.ets)
## ETS(M,A,N)
##
## Call:
## ets(y = train.bc.seadj)
##
## Smoothing parameters:
## alpha = 0.6333
## beta = 1e-04
##
## Initial states:
## l = 6.567
## b = 0.0134
##
## sigma: 0.0129
##
## AIC AICc BIC
## 543.5141 543.6911 562.7319
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -0.003878286 0.1172707 0.0899321 -0.03866332 0.9882063 0.3832231
## ACF1
## Training set 0.01864534
fc1 <- forecast(fit.ets, h=36)$mean
fc1 <- InvBoxCox(fc1, lambda=lambda)
fc1
## Jan Feb Mar Apr May Jun Jul Aug
## 2011 280.4265 281.6456 282.8690 284.0966 285.3286 286.5648 287.8053 289.0500
## 2012 295.3390 296.6098 297.8850 299.1647 300.4487 301.7371 303.0300 304.3273
## 2013 310.8809 312.2051 313.5338 314.8670 316.2048 317.5472 318.8941 320.2456
## Sep Oct Nov Dec
## 2011 290.2992 291.5526 292.8104 294.0725
## 2012 305.6291 306.9353 308.2460 309.5612
## 2013 321.6016 322.9623 324.3276 325.6975
autoplot(test, series='Ground Truth') +
autolayer(fc1, series='Forecast') +
ylab('')
rmse(test, fc1)
## [1] 96.15759