library(fpp2)
## Loading required package: ggplot2
## Loading required package: forecast
## Loading required package: fma
## Loading required package: expsmooth
library(seasonal)
# Plot the series and discuss the main features of the data.
autoplot(books) + xlab("days")+ylab("counts")
# The plot shows an upward trend for both paperback and hardcover. Since there is only one month data, it's hard to tell whether there are monthly, quarterly, or annual seasonality.
#Use the ses() function to forecast each series, and plot the forecasts.
books_ts<- ts(books, frequency = 1)
paper<- books_ts[,1]
hard<- books_ts[,2]
ses_paper <- ses(paper, h = 4)
ses_hard <- ses(hard, h = 4)
autoplot(paper, series = "Paperback") +
autolayer(ses_paper, series = "Paperback") +
autolayer(hard, series = "Hardcover") +
autolayer(ses_hard, series = "Hardcover", PI = FALSE) +
ylab("Sales amount") +
ggtitle("Sales of paperback and hardcover books")
# the plot shows flat forecast by ses method.
#Compute the RMSE values for the training data in each case.
sqrt(mean(ses_paper$residuals^2))
## [1] 33.63769
sqrt(mean(ses_hard$residuals^2))
## [1] 31.93101
#Apply Holt’s linear method to the paperback and hardback series and compute four-day forecasts in each case.
paper_Holt<- holt(paper, h=4)
hard_Holt<- holt(hard,h=4)
autoplot(paper)+autolayer(paper_Holt)
autoplot(hard)+autolayer(hard_Holt)
#Compare the RMSE measures of Holt’s method for the two series to those of simple exponential smoothing in the previous question.
s_paperback<- sqrt(mean(paper_Holt$residuals^2))
s_hardcover<- sqrt(mean(hard_Holt$residuals^2))
s_paperback
## [1] 31.13692
s_hardcover
## [1] 27.19358
#For both series, RMSE values became lower when Holt's method was used.
# If there is linearly approximable trend in data, it would be better to use Holt's linear method even if one more parameter is needed than SES. But if there isn't any particular trend in data, it would be better to use SES method to make the model simpler.
#Compare the forecasts for the two series using both methods. Which do you think is best?
# The forecast for hardcover is better because RMSE of hardcover is lower than that of paperback. Also, the forecast for paperback by Holt does not reflect any trend or pattern.
#Calculate a 95% prediction interval for the first forecast for each series, using the RMSE values and assuming normal errors. Compare your intervals with those produced using ses and holt.
writeLines("95% PI of paperback sales calculated by holt function")
## 95% PI of paperback sales calculated by holt function
paper_Holt$upper[1,"95%"]
## 95%
## 275.0205
paper_Holt$lower[1,"95%"]
## 95%
## 143.913
writeLines("95% PI of paperback sales calculated by formula")
## 95% PI of paperback sales calculated by formula
paper_Holt$mean[1] + 1.96*s_paperback
## [1] 270.4951
paper_Holt$mean[1] - 1.96*s_paperback
## [1] 148.4384
writeLines("95% PI of paperback sales calculated by ses function")
## 95% PI of paperback sales calculated by ses function
ses_paper$upper[1,"95%"]
## 95%
## 275.3523
ses_paper$lower[1,"95%"]
## 95%
## 138.867
writeLines("95% PI of paperback sales calculated by formula")
## 95% PI of paperback sales calculated by formula
ses_paper$mean[1] + 1.96*s_paperback
## [1] 268.138
ses_paper$mean[1] - 1.96*s_paperback
## [1] 146.0813
writeLines("95% PI of hardcover sales calculated by holt function")
## 95% PI of hardcover sales calculated by holt function
hard_Holt$upper[1, "95%"]
## 95%
## 307.4256
hard_Holt$lower[1, "95%"]
## 95%
## 192.9222
writeLines("95% PI of hardcover sales calculated by formula")
## 95% PI of hardcover sales calculated by formula
hard_Holt$mean[1] + 1.96*s_hardcover
## [1] 303.4733
hard_Holt$mean[1] - 1.96*s_hardcover
## [1] 196.8745
writeLines("95% PI of paperback sales calculated by ses function")
## 95% PI of paperback sales calculated by ses function
ses_hard$upper[1,"95%"]
## 95%
## 304.3403
ses_hard$lower[1,"95%"]
## 95%
## 174.7799
writeLines("95% PI of paperback sales calculated by formula")
## 95% PI of paperback sales calculated by formula
ses_hard$mean[1] + 1.96*s_hardcover
## [1] 292.8595
ses_hard$mean[1] - 1.96*s_hardcover
## [1] 186.2607
# In this case, the prediction interval for the first forecast for each series was almost same regardless of calculating method. It is different from the ses case, in which the PI was different when it was calculated by ses function and formula respectively.
#Make a time plot of your data and describe the main features of the series.
autoplot(visitors)
#Split your data into a training set and a test set comprising the last two years of available data. Forecast the test set using Holt-Winters’ multiplicative method.
visitors_train <- subset(visitors,
end = length(visitors) - 24)
visitors_test <- subset(visitors,
start = length(visitors) - 23)
hwm_visitors_train <- hw(visitors_train,
h = 24,
seasonal = "multiplicative")
#Why is multiplicative seasonality necessary here?
autoplot(hwm_visitors_train)
#the plot shows that the seasonality pattern is increasing as the number of the visitor increased. The additive method is preferred when the seasonal variations are roughly constant through the series, while the multiplicative method is preferred when the seasonal variations are changing proportional to the level of the series.
#Forecast the two-year test set using each of the following methods:
#(an ETS model; an additive ETS model applied to a Box-Cox transformed series; a seasonal naïve method; an STL decomposition applied to the Box-Cox transformed data followed by an ETS model applied to the seasonally adjusted (transformed) data. )
#ETS model
fc_ets_visitors_train <- forecast(ets(visitors_train),h=24)
autoplot(fc_ets_visitors_train)
#additive ETS model applied to a Box-Cox transformed series
fc_ets_aETS_visitors_train <- forecast(
ets(visitors_train,
lambda = BoxCox.lambda(visitors_train),
additive.only = TRUE),
h = 24
)
autoplot(fc_ets_aETS_visitors_train)
#a seasonal naïve method
fc_snaive_visitors_train <- snaive(visitors_train, h = 24)
autoplot(fc_snaive_visitors_train)
#STL decomposition applied to the Box-Cox transformed data followed by an ETS model applied to the seasonally adjusted (transformed) data
fc_BoxCox_stl_ets_visitors_train <- visitors_train %>%
stlm(
lambda = BoxCox.lambda(visitors_train),
s.window = 13,
robust = TRUE,
method = "ets"
) %>%
forecast(h = 24)
autoplot(fc_BoxCox_stl_ets_visitors_train)
#Which method gives the best forecasts? Does it pass the residual tests?
accuracy(hwm_visitors_train, visitors_test)
## ME RMSE MAE MPE MAPE MASE
## Training set -0.9749466 14.06539 10.35763 -0.5792169 4.223204 0.3970304
## Test set 72.9189889 83.23541 75.89673 15.9157249 17.041927 2.9092868
## ACF1 Theil's U
## Training set 0.1356528 NA
## Test set 0.6901318 1.151065
accuracy(fc_ets_visitors_train, visitors_test)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.7640074 14.53480 10.57657 0.1048224 3.994788 0.405423
## Test set 72.1992664 80.23124 74.55285 15.9202832 16.822384 2.857773
## ACF1 Theil's U
## Training set -0.05311217 NA
## Test set 0.58716982 1.127269
accuracy(fc_ets_aETS_visitors_train, visitors_test)
## ME RMSE MAE MPE MAPE MASE
## Training set 1.001363 14.97096 10.82396 0.1609336 3.974215 0.4149057
## Test set 69.458843 78.61032 72.41589 15.1662261 16.273089 2.7758586
## ACF1 Theil's U
## Training set -0.02535299 NA
## Test set 0.67684148 1.086953
accuracy(fc_snaive_visitors_train, visitors_test)
## ME RMSE MAE MPE MAPE MASE
## Training set 17.29363 31.15613 26.08775 7.192445 10.285961 1.000000
## Test set 32.87083 50.30097 42.24583 6.640781 9.962647 1.619375
## ACF1 Theil's U
## Training set 0.6327669 NA
## Test set 0.5725430 0.6594016
accuracy(fc_BoxCox_stl_ets_visitors_train, visitors_test)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.5803348 13.36431 9.551391 0.08767744 3.51950 0.3661256
## Test set 76.3637263 84.24658 78.028992 16.87750474 17.51578 2.9910209
## ACF1 Theil's U
## Training set -0.05924203 NA
## Test set 0.64749552 1.178154
# ratings according too accuracy from high to low:
# snaive > additive ETS with BoxCox transformation - ETS(A, A, A) > STL + ETS(M, A, N) with BoxCox transformation > ETS (M, Ad, M) > Holt-Winters' multiplicative method
#Compare the same four methods using time series cross-validation with the tsCV()function instead of using a training and test set. Do you come to the same conclusions?
fets_add_BoxCox <- function(y, h) {
forecast(ets(
y,
lambda = BoxCox.lambda(y),
additive.only = TRUE
),
h = h)
}
fstlm <- function(y, h) {
forecast(stlm(
y,
lambda = BoxCox.lambda(y),
s.window = frequency(y) + 1,
robust = TRUE,
method = "ets"
),
h = h)
}
fets <- function(y, h) {
forecast(ets(y),
h = h)
}
#compare with RMSE model
sqrt(mean(tsCV(visitors, snaive, h = 1)^2, na.rm = TRUE))
## [1] 32.78675
sqrt(mean(tsCV(visitors, fets_add_BoxCox, h = 1)^2,
na.rm = TRUE))
## [1] 18.86439
sqrt(mean(tsCV(visitors, fstlm, h = 1)^2,
na.rm = TRUE))
## [1] 17.49642
sqrt(mean(tsCV(visitors, fets, h = 1)^2, na.rm = TRUE))
## [1] 18.52985
sqrt(mean(tsCV(visitors, hw, h = 1,
seasonal = "multiplicative")^2,
na.rm = TRUE))
## [1] 19.62107
# Does it always give good forecasts?
autoplot(forecast(ets(bicoal)))
#I think the ETS (M,N,N) does not forecast very well because there is no pattern or trend.
autoplot(forecast(ets(chicken)))
#I think the ETS (M,N,N) does not give good forecast because the price tends to be 0 and there is no trend or pattern afterwards.
autoplot(forecast(ets(dole)))
#I think the ETS(M,Ad,M) gives good forecast because it indicates the increasing trend and small seasonality.
#Find an example where it does not work well. Can you figure out why?
## The first example of bicoal using ETS does not work well because ets function does not produce forecasts. Rather, it estimates the model parameters and returns information about the fitted model.
4.Compare ets(), snaive() and stlf() on the following three time series. For stlf(), you might need to use a Box-Cox transformation. Use a test set of three years to decide what gives the best forecasts. bricksq, dole, a10.
# bricksq data case
autoplot(bricksq)
# the plot indicates both trend and seasonality pattern.
#bricksq data
bricksq_train <- subset(bricksq, end = c(143))
bricksq_test <- subset(bricksq, start = c(144))
ets_bricksq_train <- forecast(ets(bricksq_train), h = 12)
snaive_bricksq_train <- snaive(bricksq_train, h = 12)
stlf_bricksq_train <- stlf(bricksq_train, h = 12, lambda = BoxCox.lambda(bricksq_train))
accuracy(ets_bricksq_train, bricksq_test)
## ME RMSE MAE MPE MAPE MASE
## Training set 1.228708 21.96146 15.81586 0.2606171 3.912882 0.4299638
## Test set 37.682916 42.36696 37.68292 8.4157549 8.415755 1.0244329
## ACF1 Theil's U
## Training set 0.2038074 NA
## Test set 0.6190546 1.116608
accuracy(snaive_bricksq_train, bricksq_test)
## ME RMSE MAE MPE MAPE MASE
## Training set 6.438849 50.25482 36.78417 1.430237 8.999949 1.0000000
## Test set 15.333333 37.15284 33.66667 3.231487 7.549326 0.9152487
## ACF1 Theil's U
## Training set 0.81169827 NA
## Test set -0.09314867 0.9538702
accuracy(stlf_bricksq_train, bricksq_test)
## ME RMSE MAE MPE MAPE MASE
## Training set 1.307145 20.65231 14.88304 0.3317424 3.630985 0.4046045
## Test set 39.841978 44.31471 40.65759 8.9561467 9.153632 1.1053013
## ACF1 Theil's U
## Training set 0.2143369 NA
## Test set 0.5207064 1.152856
autoplot(dole)
#the plot has an upward trend with small seasonality.
#dole data
dole_train <- subset(dole, end = c(427))
dole_test <- subset(dole, start = c(428))
ets_dole_train <- forecast(ets(dole_train), h = 12)
snaive_dole_train <- snaive(dole_train, h = 12)
stlf_dole_train <- stlf(dole_train, h = 12, lambda = BoxCox.lambda(dole_train))
accuracy(ets_dole_train, dole_test)
## ME RMSE MAE MPE MAPE MASE
## Training set 419.4624 18184.20 10856.63 0.5111136 6.268750 0.3014767
## Test set 5143.1303 46330.29 40948.53 1.0361996 5.319935 1.1370959
## ACF1 Theil's U
## Training set 0.5191139 NA
## Test set 0.8678915 2.792915
accuracy(snaive_dole_train, dole_test)
## ME RMSE MAE MPE MAPE MASE
## Training set 16033.54 63643.2 36011.5 3.781301 27.34644 1.00000
## Test set 222180.50 224777.8 222180.5 28.712922 28.71292 6.16971
## ACF1 Theil's U
## Training set 0.9655291 NA
## Test set 0.6869117 13.15477
accuracy(stlf_dole_train, dole_test)
## ME RMSE MAE MPE MAPE MASE
## Training set 428.6905 6534.903 3816.433 0.2205941 3.619427 0.1059782
## Test set 12861.3268 29442.441 22340.560 1.5036966 2.719281 0.6203730
## ACF1 Theil's U
## Training set -0.03900888 NA
## Test set 0.69924487 1.692465
#a10
autoplot(a10)
#the plot indicates strong trend and seasonality.
a10_train <- subset(a10, end = c(192))
a10_test <- subset(a10, start = c(193))
ets_a10_train <- forecast(ets(a10_train), h = 12)
snaive_a10_train <- snaive(a10_train, h = 12)
stlf_a10_train <- stlf(a10_train, h = 12, lambda = BoxCox.lambda(a10_train))
accuracy(ets_a10_train, a10_test)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.08142015 0.6834087 0.4666522 0.3611560 4.366536 0.4031628
## Test set 0.23749664 1.9938413 1.6968091 0.8537386 7.596603 1.4659532
## ACF1 Theil's U
## Training set -0.1088647 NA
## Test set -0.3483479 0.6186824
accuracy(snaive_a10_train, a10_test)
## ME RMSE MAE MPE MAPE MASE
## Training set 1.118474 1.475289 1.157478 10.85668 11.22859 1.000000
## Test set 2.899299 3.901300 3.362144 12.21499 14.68060 2.904715
## ACF1 Theil's U
## Training set 0.4420062 NA
## Test set -0.1958294 1.191566
accuracy(stlf_a10_train, a10_test)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.02179683 0.5926686 0.406025 -0.1409417 3.874051 0.3507841
## Test set 0.37206467 1.8798559 1.469918 1.2852494 6.785370 1.2699313
## ACF1 Theil's U
## Training set -0.06320692 NA
## Test set -0.43197435 0.596385
# Plot the data and describe the main features of the series.
autoplot(ukcars)
# the graph shows trend and quarterly seasonality.
# Decompose the series using STL and obtain the seasonally adjusted data.
seasadj_ukcars <- ukcars %>% stl(s.window = 4, robust = TRUE) %>% seasadj()
autoplot(seasadj_ukcars)
#The variations in seasonally adjusted data are smaller.
# Forecast the next two years of the series using an additive damped trend method applied to the seasonally adjusted data. (This can be done in one step using stlf()with arguments etsmodel="AAN", damped=TRUE.)
stlf_ets_AAdN_ukcars <- ukcars %>% stlf(h = 8, etsmodel = "AAN", damped = TRUE)
autoplot(stlf_ets_AAdN_ukcars)
# Forecast the next two years of the series using Holt’s linear method applied to the seasonally adjusted data (as before but with damped=FALSE).
stlf_ets_AAN_ukcars <- ukcars %>% stlf(h = 8, etsmodel = "AAN", damped = FALSE)
autoplot(stlf_ets_AAN_ukcars)
# Now use ets() to choose a seasonal model for the data.
ets_ukcars <- ets(ukcars)
# got ETS(A,N,A) model
autoplot(forecast(ets_ukcars, h = 8))
# Compare the RMSE of the ETS model with the RMSE of the models you obtained using STL decompositions. Which gives the better in-sample fits?
writeLines("")
print("Accuracy of STL + ETS(A, Ad, N) model")
## [1] "Accuracy of STL + ETS(A, Ad, N) model"
accuracy(stlf_ets_AAdN_ukcars)
## ME RMSE MAE MPE MAPE MASE
## Training set 1.551267 23.32113 18.48987 0.04121971 6.042764 0.602576
## ACF1
## Training set 0.02262668
print("Accuracy of STL + ETS(A, A, N) model")
## [1] "Accuracy of STL + ETS(A, A, N) model"
accuracy(stlf_ets_AAN_ukcars)
## ME RMSE MAE MPE MAPE MASE
## Training set -0.3412727 23.295 18.1605 -0.5970778 5.98018 0.5918418
## ACF1
## Training set 0.02103582
print("Accuracy of ETS(A, N, A) model")
## [1] "Accuracy of ETS(A, N, A) model"
accuracy(ets_ukcars)
## ME RMSE MAE MPE MAPE MASE
## Training set 1.313884 25.23244 20.17907 -0.1570979 6.629003 0.6576259
## ACF1
## Training set 0.02573334
# STL + ETS(A, Ad, N) was the best model.
#Compare the forecasts from the three approaches? Which seems most reasonable?
## the STL + ETS(A, Ad, N) model is the most reasonable one because the forecasts best reflect smaller-variation trend after the fall of 2001.
# h. Check the residuals of your preferred model.
checkresiduals(stlf_ets_AAdN_ukcars)
## Warning in checkresiduals(stlf_ets_AAdN_ukcars): The fitted degrees of
## freedom is based on the model used for the seasonally adjusted data.
##
## Ljung-Box test
##
## data: Residuals from STL + ETS(A,Ad,N)
## Q* = 24.138, df = 3, p-value = 2.337e-05
##
## Model df: 5. Total lags used: 8
# the residuals do not look randomly distributed.
autoplot(eggs)
# 1. use holt function without using any options
holt_eggs<- holt(eggs, h=100)
autoplot(holt_eggs)+ autolayer(holt_eggs$fitted)
# this model is not good because the prices is going to be below zero.
# 2. Use holt function with damped option.
holt_damped_eggs <- holt(eggs, damped = TRUE, h = 100)
autoplot(holt_damped_eggs) +autolayer(holt_damped_eggs$fitted)
# The price prediction is ok but the forecast does not reflect existing trend.
# 3. Use holt function with Box-Cox transformation
holt_BoxCox_eggs <- holt(eggs, lambda = BoxCox.lambda(eggs), h = 100)
autoplot(holt_BoxCox_eggs) +autolayer(holt_BoxCox_eggs$fitted)
# the price is not below zero and it shows the existing trend.
# 4. Use holt function with Box-Cox transformation and damped option
holt_BoxCox_damped_eggs <- holt(
eggs,
damped = TRUE,
lambda = BoxCox.lambda(eggs),
h = 100)
autoplot(holt_BoxCox_damped_eggs) +autolayer(holt_BoxCox_damped_eggs$fitted)
# The point forecasts didn't go below 0 and are still decreasing. But they didn't reflect the existing trend well.
#show RMSE values for each model
writeLines("RMSE when using holt function")
## RMSE when using holt function
sqrt(mean(holt_eggs$residuals^2))
## [1] 26.58219
writeLines("RMSE when using holt function with damped option")
## RMSE when using holt function with damped option
sqrt(mean(holt_damped_eggs$residuals^2))
## [1] 26.54019
writeLines("RMSE when using holt function with Box-Cox transformation")
## RMSE when using holt function with Box-Cox transformation
sqrt(mean(holt_BoxCox_eggs$residuals^2))
## [1] 1.032217
writeLines("RMSE when using holt function with damped option and Box-Cox transformation")
## RMSE when using holt function with damped option and Box-Cox transformation
sqrt(mean(holt_BoxCox_damped_eggs$residuals^2))
## [1] 1.039187
# BoxCox transformation with holt's linear method gives best RMSE. It gives the best forecast because it captures trend and reflects it to the forecast.
This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
summary(cars)
## speed dist
## Min. : 4.0 Min. : 2.00
## 1st Qu.:12.0 1st Qu.: 26.00
## Median :15.0 Median : 36.00
## Mean :15.4 Mean : 42.98
## 3rd Qu.:19.0 3rd Qu.: 56.00
## Max. :25.0 Max. :120.00
You can also embed plots, for example:
Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.