library(fpp2)
## Loading required package: ggplot2
## Loading required package: forecast
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
## Loading required package: fma
## Loading required package: expsmooth
data(pigs)
## Warning in data(pigs): data set 'pigs' not found
m <- ses(pigs, h=4)
summary(m)
##
## Forecast method: Simple exponential smoothing
##
## Model Information:
## Simple exponential smoothing
##
## Call:
## ses(y = pigs, h = 4)
##
## Smoothing parameters:
## alpha = 0.2971
##
## Initial states:
## l = 77260.0561
##
## sigma: 10308.58
##
## AIC AICc BIC
## 4462.955 4463.086 4472.665
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 385.8721 10253.6 7961.383 -0.922652 9.274016 0.7966249 0.01282239
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Sep 1995 98816.41 85605.43 112027.4 78611.97 119020.8
## Oct 1995 98816.41 85034.52 112598.3 77738.83 119894.0
## Nov 1995 98816.41 84486.34 113146.5 76900.46 120732.4
## Dec 1995 98816.41 83958.37 113674.4 76092.99 121539.8
The output of the simple exponential smoothing tells that
The optimal value of α is 0.2971 and ℓ0 is 77260.056 The point forecast for the next 4 quaretrs is 98816.41
Lets plot the model and see how is the data and forecast looks like.
autoplot(m) +
autolayer(fitted(m), series='Modeled') +
xlab('Year') +
ylab(' Number of pigs') +
ggtitle('Number of Pigs slaughtered in Victoria each month and forecast')
s <- sd(resid(m))
s
## [1] 10273.69
mu <- m$mean[1]
mu + c(-1, 1) * 1.96 * s
## [1] 78679.97 118952.84
The sigma calculated by the ses model is 10308.58 and standard deviation of the residual is 10273.69 The 95 % interval for the first forecast is somewhat closer to the one calcualted by the SES()
data(books)
autoplot(books ,facet=TRUE) +
xlab('Year') +
ylab(' Sales ') +
ggtitle('Sales of books')
Observations
m_paper <- ses(books[,1], h=4)
m_hard <- ses(books[,2], h=4)
autoplot(m_paper) +
autolayer(fitted(m_paper), series='Modeled') +
labs(x='Day', y='Hardback books sold')
autoplot(m_hard) +
autolayer(fitted(m_hard), series='Modeled') +
labs(x='Day', y='Hardback books sold')
accuracy(m_paper)[1, 'RMSE']
## [1] 33.63769
accuracy(m_hard)[1, 'RMSE']
## [1] 31.93101
holt_paper <- holt(books[,1], h=4)
holt_hard <- holt(books[,2], h=4)
autoplot(holt_paper) +
autolayer(fitted(holt_paper), series='Modeled') +
labs(x='Day', y='Hardback books sold - Holt')
autoplot(holt_hard) +
autolayer(fitted(holt_hard), series='Modeled') +
labs(x='Day', y='Hardback books sold - Holt ')
### b. Compare the RMSE measures of Holt’s method for the two series to those of simple exponential smoothing in the previous question. (Remember that Holt’s method is using one more parameter than SES.) Discuss the merits of the two forecasting methods for these data sets.
accuracy(holt_paper)[1, 'RMSE']
## [1] 31.13692
accuracy(holt_hard)[1, 'RMSE']
## [1] 27.19358
SES can be used if the data is not trended, whereas holt linear picks the trend factor and used for forecasting.
The comparison in RMSE indicate that the holt linear method helped to become model better because of the data has an upward trend. ### d. Calculate a 95% prediction interval for the first forecast for each series, using the RMSE values and assuming normal errors. Compare your intervals with those produced using ses and holt.
summary(m_paper)
##
## Forecast method: Simple exponential smoothing
##
## Model Information:
## Simple exponential smoothing
##
## Call:
## ses(y = books[, 1], h = 4)
##
## Smoothing parameters:
## alpha = 0.1685
##
## Initial states:
## l = 170.8271
##
## sigma: 34.8183
##
## AIC AICc BIC
## 318.9747 319.8978 323.1783
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 7.175981 33.63769 27.8431 0.4736071 15.57784 0.7021303 -0.2117522
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 31 207.1097 162.4882 251.7311 138.8670 275.3523
## 32 207.1097 161.8589 252.3604 137.9046 276.3147
## 33 207.1097 161.2382 252.9811 136.9554 277.2639
## 34 207.1097 160.6259 253.5935 136.0188 278.2005
summary(holt_paper)
##
## Forecast method: Holt's method
##
## Model Information:
## Holt's method
##
## Call:
## holt(y = books[, 1], h = 4)
##
## Smoothing parameters:
## alpha = 1e-04
## beta = 1e-04
##
## Initial states:
## l = 170.699
## b = 1.2621
##
## sigma: 33.4464
##
## AIC AICc BIC
## 318.3396 320.8396 325.3456
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set -3.717178 31.13692 26.18083 -5.508526 15.58354 0.6602122
## ACF1
## Training set -0.1750792
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 31 209.4668 166.6035 252.3301 143.9130 275.0205
## 32 210.7177 167.8544 253.5811 145.1640 276.2715
## 33 211.9687 169.1054 254.8320 146.4149 277.5225
## 34 213.2197 170.3564 256.0830 147.6659 278.7735
m_paper_s <- sd(resid(m_paper))
m_paper_s
## [1] 33.42515
mu <- m_paper$mean[1]
mu + c(-1, 1) * 1.96 * m_paper_s
## [1] 141.5964 272.6230
mu
## [1] 207.1097
holt_paper_s <- sd(resid(holt_paper))
m_paper_s
## [1] 33.42515
mu <- holt_paper$mean[1]
mu + c(-1, 1) * 1.96 * holt_paper_s
## [1] 147.8390 271.0945
mu
## [1] 209.4668
[Hint: use h=100 when calling holt() so you can clearly see the differences between the various options when plotting the forecasts.]
Which model gives the best RMSE?
eggs <- force(eggs)
caret::BoxCoxTrans(eggs)
## Box-Cox Transformation
##
## 94 data points used to estimate Lambda
##
## Input data summary:
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 62.27 148.87 209.15 206.15 276.71 358.78
##
## Largest/Smallest: 5.76
## Sample Skewness: -0.0758
##
## Estimated Lambda: 0.9
## With fudge factor, no transformation is applied
holt_1 <- holt(eggs, h=100)
holt_2 <- holt(eggs, damped = TRUE, h=100)
holt_3 <- holt(eggs, lambda = .9, h=100)
holt_4 <- holt(eggs, damped = TRUE, lambda = .9, h=100)
autoplot(eggs) +
autolayer(holt_1, series = "Holt - Normal", PI=FALSE) +
autolayer(holt_2, series = "Holt - Damped", PI=FALSE) +
autolayer(holt_3, series = "Holt - Boxcox", PI=FALSE) +
autolayer(holt_4, series = "Holt - Damped Boxcox", PI=FALSE)
accuracy(holt_1)[1, 'RMSE']
## [1] 26.58219
accuracy(holt_2)[1, 'RMSE']
## [1] 26.54019
accuracy(holt_3)[1, 'RMSE']
## [1] 26.66924
accuracy(holt_4)[1, 'RMSE']
## [1] 26.72889
From the RMSE comparison, we can see the Holt - Damped has lowest RMSE
retaildata <- readxl::read_excel('C:\\Users\\charls.joseph\\Documents\\Cuny\\Data624\\week1\\retail.xlsx', skip=1)
myts <- ts(retaildata[,"A3349397X"],
frequency=12, start=c(1982,4))
autoplot(myts) +
xlab('Year') +
ylab("Turn Over")
The seasonality increases over the time. If the seasonality over time, we should use the multiplicative model.
myts_holt_wind <- hw(myts, seasonal = "multiplicative")
myts_holt_wind_damp <- hw(myts, seasonal = "multiplicative", damped = TRUE)
autoplot(window(myts, start=2012)) +
autolayer(myts_holt_wind, PI=FALSE, series = "Holt-Winters - multiplicative ") +
autolayer(myts_holt_wind_damp, PI=FALSE, series = "Holt-Winters - multiplicative ( Damped) ")
accuracy(myts_holt_wind)[1, 'RMSE']
## [1] 27.04837
accuracy(myts_holt_wind_damp)[1, 'RMSE']
## [1] 26.81287
Based on RMSE comparison on both method,Holt-Winters - multiplicative ( Damped) yields good results.
checkresiduals(myts_holt_wind)
##
## Ljung-Box test
##
## data: Residuals from Holt-Winters' multiplicative method
## Q* = 202.84, df = 8, p-value < 2.2e-16
##
## Model df: 16. Total lags used: 24
checkresiduals(myts_holt_wind_damp)
##
## Ljung-Box test
##
## data: Residuals from Damped Holt-Winters' multiplicative method
## Q* = 143.18, df = 7, p-value < 2.2e-16
##
## Model df: 17. Total lags used: 24
train_myts <- window(myts, end = c(2010, 12))
test_myts <- window(myts, start = 2011)
model_hw <- hw(train_myts, h = 36, seasonal = "multiplicative", damped = TRUE)
model_sn <- snaive(train_myts, h = 36)
accuracy(model_hw, test_myts)
## ME RMSE MAE MPE MAPE MASE
## Training set 3.118499 27.17563 19.28539 0.4383733 3.642098 0.4724238
## Test set -7.187395 28.11861 23.58354 -0.8213246 2.218750 0.5777134
## ACF1 Theil's U
## Training set -0.07586747 NA
## Test set 0.66881188 0.2315329
accuracy(model_sn, test_myts)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 31.38979 51.12549 40.82222 5.159977 7.098791 1.0000000 0.6439376
## Test set -13.52500 29.73431 23.76389 -1.375617 2.216215 0.5821312 0.4435017
## Theil's U
## Training set NA
## Test set 0.2411057
The RMSE for Holt-Winters - multiplicative(damped) for test set is 28.11 where as RMSE for seasonal naive is 29.74. Hence Holt-Winters - multiplicative(damped) is better.
lambda <- BoxCox.lambda(train_myts)
lambda
## [1] 0.4996717
stl <- stlf(train_myts, lambda =lambda)
accuracy(stl, test_myts)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.3529155 20.90509 14.61170 -0.135106 2.730995 0.357935
## Test set -69.7814339 77.00923 69.78143 -6.557786 6.557786 1.709398
## ACF1 Theil's U
## Training set -0.009143406 NA
## Test set 0.792592502 0.6643801