#Chapter 7
#1. Consider the pigs series — the number of pigs slaughtered in Victoria each month.
#a.
summary(ses(pigs, h=4))
##
## Forecast method: Simple exponential smoothing
##
## Model Information:
## Simple exponential smoothing
##
## Call:
## ses(y = pigs, h = 4)
##
## Smoothing parameters:
## alpha = 0.2971
##
## Initial states:
## l = 77260.0561
##
## sigma: 10308.58
##
## AIC AICc BIC
## 4462.955 4463.086 4472.665
##
## Error measures:
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 385.8721 10253.6 7961.383 -0.922652 9.274016 0.7966249 0.01282239
##
## Forecasts:
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Sep 1995 98816.41 85605.43 112027.4 78611.97 119020.8
## Oct 1995 98816.41 85034.52 112598.3 77738.83 119894.0
## Nov 1995 98816.41 84486.34 113146.5 76900.46 120732.4
## Dec 1995 98816.41 83958.37 113674.4 76092.99 121539.8
#alpha = 0.2971
# l0 = 77260.0561
#b.
fc_pigs = ses(pigs, h=4)
sd_pigs = sd(fc_pigs$residuals)
mean_pigs = fc_pigs$mean[1]
int_pigs_fc <- c(mean_pigs - (1.96*sd_pigs),mean_pigs + (1.96*sd_pigs))
int_pigs_fc
## [1] 78679.97 118952.84
#The intervals appear to be similar
#2. Write your own function to implement simple exponential smoothing. The function should take arguments y (the time series), alpha (the smoothing parameter α ) and level (the initial level ℓ0 ). It should return the forecast of the next observation in the series. Does it give the same forecast as ses()?
pig_ses = function(y, alpha, l0){
y_hat <- l0
for(index in 1:length(y)){
y_hat <- alpha*y[index] + (1 - alpha)*y_hat
}
cat("SES forecast: ",
as.character(y_hat),
sep = "\n")
}
alpha <- fc_pigs$model$par[1]
l0 <- fc_pigs$model$par[2]
pig_ses(pigs,alpha = alpha, l0 = l0)
## SES forecast:
## 98816.4061115907
#Projections appear to be similar to the SES we saw before
pig_ses_2 <- function(pars = c(alpha, l0), y){
error <- 0
SSE <- 0
alpha <- pars[1]
l0 <- pars[2]
y_hat <- l0
for(index in 1:length(y)){
error <- y[index] - y_hat
SSE <- SSE + error^2
y_hat <- alpha*y[index] + (1 - alpha)*y_hat
}
return(SSE)
}
pig_optim = optim(par = c(0.5, pigs[1]), y = pigs, fn = pig_ses_2)
pig_optim
## $par
## [1] 2.990081e-01 7.637927e+04
##
## $value
## [1] 19767049111
##
## $counts
## function gradient
## 91 NA
##
## $convergence
## [1] 0
##
## $message
## NULL
print(paste0("Created SES: ", pig_optim$par[1]))
## [1] "Created SES: 0.299008094014243"
print(paste0("Created SES: ", pig_optim$par[2]))
## [1] "Created SES: 76379.2653476235"
print(paste0("R's SES: ", fc_pigs$model$par[1]))
## [1] "R's SES: 0.297148833372095"
print(paste0("R's SES: ", fc_pigs$model$par[2]))
## [1] "R's SES: 77260.0561458528"
#The values appear to be somewhat similar
pig_ses_3 = function(init_pars, data){
fc_next <- 0
SSE <- function(pars, data){
error <- 0
SSE <- 0
alpha <- pars[1]
l0 <- pars[2]
y_hat <- l0
for(index in 1:length(data)){
error <- data[index] - y_hat
SSE <- SSE + error^2
y_hat <- alpha*data[index] + (1 - alpha)*y_hat
}
fc_next <<- y_hat
return(SSE)
}
# use optim function to get optimal values of alpha and l0.
optim_pars <- optim(par = init_pars, data = data, fn = SSE)
# return results
return(list(
Next_observation_forecast = fc_next,
alpha = optim_pars$par[1],
l0 = optim_pars$par[2]
))
}
pig_ses_3(c(0.5, pigs[1]), pigs)
## $Next_observation_forecast
## [1] 98814.63
##
## $alpha
## [1] 0.2990081
##
## $l0
## [1] 76379.27
print(paste0("R's SES: ", fc_pigs$model$par[1]))
## [1] "R's SES: 0.297148833372095"
print(paste0("R's SES: ", fc_pigs$model$par[2]))
## [1] "R's SES: 77260.0561458528"
#a.
autoplot(books)
#Both paperback and hardcover books have an upward trend in sales over time, however, there is a randomness to how much is sold on a given day
#b.
ses_paperback = ses(books[, "Paperback"], h = 4)
ses_hardcover = ses(books[, "Hardcover"], h = 4)
autoplot(books[, "Paperback"], series = "Paperback") +
autolayer(ses_paperback, series = "Paperback") +
autolayer(books[, "Hardcover"], series = "Hardcover") +
autolayer(ses_hardcover, series = "Hardcover", PI = FALSE)
#c.
paperback_RMSE = sqrt(mean(ses_paperback$residuals^2))
hardcover_RMSE = sqrt(mean(ses_hardcover$residuals^2))
summary(paperback_RMSE)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 33.64 33.64 33.64 33.64 33.64 33.64
summary(hardcover_RMSE)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 31.93 31.93 31.93 31.93 31.93 31.93
#a.
paper_holt <- holt(books[, "Paperback"], h = 4)
hard_holt <- holt(books[, "Hardcover"], h = 4)
autoplot(books[, "Paperback"]) +
autolayer(paper_holt)
autoplot(books[, "Hardcover"]) +
autolayer(hard_holt)
#
#b.
rmse_paperback <- sqrt(mean(paper_holt$residuals^2))
rmse_hardcover <- sqrt(mean(hard_holt$residuals^2))
rmse_paperback
## [1] 31.13692
rmse_hardcover
## [1] 27.19358
#The RMSE for both paperback and hardback appear to be lower for the Holts method versus SES.
#SES is more able to shine when there is more randomness to a dataset, whilst Holt's method is more able to accurately predict data when more linearity exists
#c.
#I think Holt's method works better for these particular series, as despite a lack of full linearity, there is still an upward trend that's more readily captured in a forecast usingg Holt's method, the RMSE is also lower for both paperback and hardcover
#d.
print(paste0("Paperback 95% Interval, Upper: ",
paper_holt$upper[1, "95%"]))
## [1] "Paperback 95% Interval, Upper: 275.020545221845"
print(paste0("Paperback 95% Interval, Lower: ",
paper_holt$lower[1, "95%"]))
## [1] "Paperback 95% Interval, Lower: 143.912985820256"
print(paste0("Paperback 95% Interval, Upper: ",
paper_holt$mean[1] + 1.96*rmse_paperback))
## [1] "Paperback 95% Interval, Upper: 270.495134632871"
print(paste0("Paperback 95% Interval, Lower: ",
paper_holt$mean[1] - 1.96*rmse_paperback))
## [1] "Paperback 95% Interval, Lower: 148.438396409231"
print(paste0("Hardcover 95% Interval, Upper: ",
hard_holt$upper[1, "95%"]))
## [1] "Hardcover 95% Interval, Upper: 307.42557365277"
print(paste0("Hardcover 95% Interval, Upper: ",
hard_holt$lower[1, "95%"]))
## [1] "Hardcover 95% Interval, Upper: 192.922170771941"
print(paste0("Hardcover 95% Interval, Upper: ",
hard_holt$mean[1] + 1.96*rmse_hardcover))
## [1] "Hardcover 95% Interval, Upper: 303.473285056784"
print(paste0("Hardcover 95% Interval, Upper: ",
hard_holt$mean[1] - 1.96*rmse_hardcover))
## [1] "Hardcover 95% Interval, Upper: 196.874459367927"
autoplot(eggs)
#there is a very clear, downward trend in the data
holt_egg = holt(eggs, h = 100)
autoplot(holt_egg) +
autolayer(holt_egg$fitted)
holt_damp_egg = holt(eggs, damped = TRUE, h = 100)
autoplot(holt_damp_egg) +
autolayer(holt_damp_egg$fitted)
holt_Box_damp_egg = holt(
eggs,
damped = TRUE,
lambda = BoxCox.lambda(eggs),
h = 100)
autoplot(holt_Box_damp_egg) +
autolayer(holt_Box_damp_egg$fitted)
#Based on the created forecasts, we can see that each integration is getting better in regards to avoiding dipping into the negative values for something that cannot be a negative number, such as the price of eggs in this instance.
library(readxl)
retail <- read_excel("C:/Users/dmcsw/Downloads/retail.xlsx",
skip = 1)
ts_retail <- ts(retail[, "A3349873A"],
frequency = 12,
start = c(1982, 4))
autoplot(ts_retail)
#This was necessary for this series, as there is a strong seasonality with the number of sales on a yearly basis
#b.
hw_retail <- hw(ts_retail,
seasonal = "multiplicative")
hw_damp_retail <- hw(ts_retail,
seasonal = "multiplicative",
damped = TRUE)
autoplot(hw_retail)
autoplot(hw_damp_retail)
#It seems that there is a wider range of outcomes available with the dampened holt-winters method.
#c.
tscv_retail <- tsCV(
ts_retail,
hw, h = 1, seasonal = "multiplicative"
)
damp_tscv_retail <- tsCV(
ts_retail,
hw, h = 1, seasonal = "multiplicative", damped = TRUE
)
sqrt(mean(tscv_retail^2, na.rm = TRUE))
## [1] 14.72762
sqrt(mean(damp_tscv_retail^2, na.rm = TRUE))
## [1] 14.94306
#There is a minimal difference between the RMSE of either, but the dampened model would be better to go with as it will be less suceptible to forecasting a negative value at any point
#d.
checkresiduals(damp_tscv_retail)
## Warning in modeldf.default(object): Could not find appropriate degrees of
## freedom for this model.
#These residuals are difficult to determine if they are white noise or not, with that lack of clarity, it would be safer to assume that they are not white noise
#e.
ts_retail_train = window(ts_retail,
end = c(2011))
ts_retail_test = window(ts_retail,
start = 2010)
# Holt-Winters' damped method.
damp_hw_retail = hw(ts_retail_train,
h = 36,
seasonal = "multiplicative",
damped = TRUE)
autoplot(damp_hw_retail)
damp_check = accuracy(damp_hw_retail, ts_retail_test)
# try Holt-Winters' method.
hw_retail = hw(ts_retail_train,
h = 36,
seasonal = "multiplicative")
autoplot(hw_retail)
hw_check = accuracy(hw_retail, ts_retail_test)
damp_check
## ME RMSE MAE MPE MAPE MASE
## Training set 0.6850894 8.918343 6.398241 0.2648898 3.209221 0.399920
## Test set 65.1135689 86.678289 66.456844 15.9445237 16.448947 4.153864
## ACF1 Theil's U
## Training set -0.03930028 NA
## Test set 0.55630834 1.432892
hw_check
## ME RMSE MAE MPE MAPE MASE
## Training set 0.4341199 9.043952 6.468265 0.1680194 3.230092 0.4042968
## Test set 59.0723813 79.772765 60.469975 14.4405377 14.965546 3.7796564
## ACF1 Theil's U
## Training set 0.04843008 NA
## Test set 0.49513754 1.32284
#The RMSE and the ACF would suggest that neither model I tried was not as accurate as I would have hoped, but of the 2, I would choose to use the damped model
stl_ets_train <- ts_retail_train %>%
stlm(
s.window = 13,
robust = TRUE,
method = "ets",
lambda = BoxCox.lambda(ts_retail_train)
) %>%
forecast(
h = 36,
lambda = BoxCox.lambda(ts_retail_train)
)
## Warning in InvBoxCox(fcast$mean, lambda, biasadj, fcast): biasadj information
## not found, defaulting to FALSE.
autoplot(stl_ets_train)
accuracy(stl_ets_train, ts_retail_test)
## ME RMSE MAE MPE MAPE MASE
## Training set -0.6447266 8.848999 6.019668 -0.2989156 2.939973 0.3762574
## Test set 50.4143503 71.180518 52.207017 12.2159132 12.882611 3.2631828
## ACF1 Theil's U
## Training set -0.009661764 NA
## Test set 0.408707373 1.184903
#Comparing this approach to the previous models, it seems to be an improvement over the previous examples, even if only slight.
autoplot(ukcars)
#The data appears to have an overall upward trend, with some seasonality.
#b.
seas_ukcars = ukcars %>%
stl(s.window = 4, robust = TRUE) %>%
seasadj()
autoplot(seas_ukcars)
#c.
seas_fc_ukcars_damp = ukcars %>%
stlf(h = 8, etsmodel = "AAN", damped = TRUE)
autoplot(seas_fc_ukcars_damp)
#d.
seas_fc_ukcars = ukcars %>%
stlf(h = 8, etsmodel = "AAN", damped = FALSE)
autoplot(seas_fc_ukcars)
#e.
ets_ukcars = ets(ukcars)
summary(ets_ukcars)
## ETS(A,N,A)
##
## Call:
## ets(y = ukcars)
##
## Smoothing parameters:
## alpha = 0.6199
## gamma = 1e-04
##
## Initial states:
## l = 314.2568
## s = -1.7579 -44.9601 21.1956 25.5223
##
## sigma: 25.9302
##
## AIC AICc BIC
## 1277.752 1278.819 1296.844
##
## Training set error measures:
## ME RMSE MAE MPE MAPE MASE
## Training set 1.313884 25.23244 20.17907 -0.1570979 6.629003 0.6576259
## ACF1
## Training set 0.02573334
#A,N,A came up as the ets model
#f.
accuracy(seas_fc_ukcars_damp)
## ME RMSE MAE MPE MAPE MASE
## Training set 1.040088 22.81499 17.73846 -0.1296296 5.821059 0.5780878
## ACF1
## Training set 0.01590553
accuracy(seas_fc_ukcars)
## ME RMSE MAE MPE MAPE MASE
## Training set -0.4024211 22.86408 17.732 -0.5948215 5.836029 0.5778774
## ACF1
## Training set 0.02416541
#The sampened model is overall the best model, as the ACF and the RMSE are less
#g.
#I think the most reasonable forecast is the dampened model, as the RMSE is lower than the others
#h.
checkresiduals(seas_fc_ukcars_damp)
## Warning in checkresiduals(seas_fc_ukcars_damp): The fitted degrees of freedom is
## based on the model used for the seasonally adjusted data.
##
## Ljung-Box test
##
## data: Residuals from STL + ETS(A,Ad,N)
## Q* = 23.825, df = 3, p-value = 2.717e-05
##
## Model df: 5. Total lags used: 8
#The residuals don't appear to be as normally distributed as we would like to see from a forecasted model
autoplot(visitors)
ggseasonplot(visitors)
#b.
train_visitors = subset(visitors,
end = length(visitors) - 24)
test_visitors = subset(visitors,
start = length(visitors) - 23)
hwm_visit_train = hw(train_visitors,
h = 24,
seasonal = "multiplicative")
#c.
autoplot(hwm_visit_train)
#Multiplicative allows you to see the effect of seasonality as visitors increased, whilst additive would not be able to account for an inrease in visitors in a meaningful or representative way
#d.
# d-1. an ETS model;
fc_ets_visit = forecast(ets(train_visitors), h = 24)
autoplot(fc_ets_visit)
# d-2. an additive ETS model applied to a Box-Cox transformed series;
fc_Box_visit = forecast(
ets(train_visitors,
lambda = BoxCox.lambda(train_visitors),
additive.only = TRUE),
h = 24
)
autoplot(fc_Box_visit)
# d-3. a seasonal naive method;
fc_snaive_visit = snaive(train_visitors, h = 24)
autoplot(fc_snaive_visit)
# d-4. an STL decomposition applied to the Box-Cox transformed data followed by an ETS model applied to the seasonally adjusted (transformed) data.
fc_Box_stl_visitors = train_visitors %>%
stlm(
lambda = BoxCox.lambda(train_visitors),
s.window = 13,
robust = TRUE,
method = "ets"
) %>%
forecast(h = 24)
autoplot(fc_Box_stl_visitors)
#e.
accuracy(fc_ets_visit)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 0.7640074 14.5348 10.57657 0.1048224 3.994788 0.405423 -0.05311217
accuracy(fc_Box_visit)
## ME RMSE MAE MPE MAPE MASE
## Training set 1.001363 14.97096 10.82396 0.1609336 3.974215 0.4149057
## ACF1
## Training set -0.02535299
accuracy(fc_snaive_visit)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 17.29363 31.15613 26.08775 7.192445 10.28596 1 0.6327669
accuracy(fc_Box_stl_visitors)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.5803348 13.36431 9.551391 0.08767744 3.5195 0.3661256
## ACF1
## Training set -0.05924203
#The STL decomposition applied to the Box-Cox transformed data followed by an ETS model applied to the seasonally adjusted (transformed) data ended up performing the best of the group overall.
checkresiduals(fc_Box_stl_visitors)
## Warning in checkresiduals(fc_Box_stl_visitors): The fitted degrees of freedom is
## based on the model used for the seasonally adjusted data.
##
## Ljung-Box test
##
## data: Residuals from STL + ETS(M,Ad,N)
## Q* = 15.032, df = 19, p-value = 0.7205
##
## Model df: 5. Total lags used: 24
#The residuals seem to pass the test
#f.
f_Box = function(y, h) {
forecast(ets(
y,
lambda = BoxCox.lambda(y),
additive.only = TRUE
),
h = h)
}
fstlm = function(y, h) {
forecast(stlm(
y,
lambda = BoxCox.lambda(y),
s.window = frequency(y) + 1,
robust = TRUE,
method = "ets"
),
h = h)
}
fets = function(y, h) {
forecast(ets(y),
h = h)
}
sqrt(mean(tsCV(visitors, snaive, h = 1)^2, na.rm = TRUE))
## [1] 32.78675
sqrt(mean(tsCV(visitors, f_Box, h = 1)^2,
na.rm = TRUE))
## [1] 18.86439
sqrt(mean(tsCV(visitors, fstlm, h = 1)^2,
na.rm = TRUE))
## [1] 17.49642
sqrt(mean(tsCV(visitors, fets, h = 1)^2, na.rm = TRUE))
## [1] 18.52985
sqrt(mean(tsCV(visitors, hw, h = 1,
seasonal = "multiplicative")^2,
na.rm = TRUE))
## [1] 19.62107
#The tsCV function does appear to draw the same conclusion.
#a.
fets = function(y, h) {
forecast(ets(y), h = h)
}
qcement_tscv = tsCV(qcement, fets, h=4)
qcement_naive = tsCV(qcement, snaive, h= 4)
#b.
sqrt(mean(qcement_tscv^2, na.rm = TRUE))
## [1] 0.111846
sqrt(mean(qcement_naive^2, na.rm = TRUE))
## [1] 0.1338711
#Based on the values bellow, I would say that the ets model is more accurate, I would expect that over the naive model.
bricksq.BC = BoxCox(bricksq, BoxCox.lambda(bricksq))
a10.BC = BoxCox(a10, BoxCox.lambda(a10))
#validation sets
ausbeer.train = window(ausbeer, end = c(2007,2))
ausbeer.test = window(ausbeer, start = c(2007,3))
bricksq.BC.train = window(bricksq.BC, end = c(1991,1))
bricksq.BC.test = window(bricksq.BC, start = c(1991,2))
dole.train = window(dole, end=c(1989,7))
dole.test = window(dole, start=c(1989,8))
a10.train = window(a10.BC, end = c(2005,6))
a10.test = window(a10.BC, start = c(2005,7))
h02.train = window(h02, end = c(2005,7))
h02.test = window(h02, start = c(2005,8))
usmelec.train = window(usmelec, end = c(2010,6))
usmelec.test = window(usmelec, start = c(2010,7))
#ETS
ausbeer.ets = ets(ausbeer.train)
bricksq.BC.ets = ets(bricksq.BC.train)
dole.ets = ets(dole.train)
a10.ets = ets(a10.train)
h02.ets = ets(h02.train)
usmelec.ets = ets(usmelec.train)
#ETS forecasts
ausbeer.etsF = forecast(ausbeer.ets, h=36)
bricksq.BC.etsF = forecast(bricksq.BC.ets, h=36)
dole.etsF = forecast(dole.ets, h=36)
a10.etsF = forecast(a10.ets, h=36)
h02.etsF = forecast(h02.ets, h=36)
usmelec.etsF = forecast(usmelec.ets, h = 36)
#snaive
ausbeer.sn = snaive(ausbeer.train, h = 36)
bricksq.BC.sn = snaive(bricksq.BC.train, h= 36)
dole.sn = snaive(dole.train, h= 36)
a10.sn = snaive(a10.train, h = 36)
h02.sn = snaive(h02.train, h = 36)
usmelec.sn = snaive(usmelec.train, h= 36)
#stlf
ausbeer.stlf = stlf(ausbeer.train, h = 36)
bricksq.BC.stlf = stlf(bricksq.BC.train, h= 36)
dole.stlf = stlf(dole.train, h= 36)
a10.stlf = stlf(a10.train, h = 36)
h02.stlf = stlf(h02.train, h = 36)
usmelec.stlf = stlf(usmelec.train, h= 36)
accuracy(ausbeer.etsF,ausbeer.test)
## ME RMSE MAE MPE MAPE MASE
## Training set -0.3466095 15.781973 11.979955 -0.064073435 2.864311 0.7567076
## Test set 0.1272571 9.620828 8.919347 0.009981598 2.132836 0.5633859
## ACF1 Theil's U
## Training set -0.1942276 NA
## Test set 0.3763918 0.1783972
accuracy(ausbeer.sn,ausbeer.test)
## ME RMSE MAE MPE MAPE MASE
## Training set 3.336634 19.67005 15.83168 0.9044009 3.771370 1.0000000
## Test set -2.916667 10.80509 9.75000 -0.6505927 2.338012 0.6158537
## ACF1 Theil's U
## Training set 0.0009690785 NA
## Test set 0.3254581810 0.1981463
accuracy(ausbeer.stlf,ausbeer.test)
## ME RMSE MAE MPE MAPE MASE
## Training set -0.08449615 13.640828 10.495557 -0.02319029 2.523657 0.6629464
## Test set -0.74491615 9.596294 9.014064 -0.20867707 2.159142 0.5693686
## ACF1 Theil's U
## Training set -0.1623773 NA
## Test set 0.3154543 0.1712144
accuracy(bricksq.BC.etsF,bricksq.BC.test)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.01996121 0.2419629 0.1755865 0.1409763 1.247006 0.4329129
## Test set 0.25707641 0.3389809 0.2909121 1.7480916 1.983664 0.7172512
## ACF1 Theil's U
## Training set 0.1434416 NA
## Test set 0.5769625 0.919898
accuracy(bricksq.BC.sn,bricksq.BC.test)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.1051226 0.5448411 0.4055930 0.7368813 2.873281 1.000000
## Test set -0.3259032 0.5522919 0.4649268 -2.2284771 3.205211 1.146289
## ACF1 Theil's U
## Training set 0.7777698 NA
## Test set -0.1277042 1.395018
accuracy(bricksq.BC.stlf,bricksq.BC.test)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.0208919 0.2258669 0.1671388 0.150274 1.185157 0.4120850
## Test set 0.2002913 0.3047304 0.2627101 1.361419 1.796522 0.6477185
## ACF1 Theil's U
## Training set 0.2125626 NA
## Test set 0.4857060 0.8319484
accuracy(dole.etsF,dole.test)
## ME RMSE MAE MPE MAPE MASE
## Training set 98.00253 16130.58 9427.497 0.5518769 6.20867 0.2995409
## Test set 221647.42595 279668.65 221647.426 32.5447637 32.54476 7.0424271
## ACF1 Theil's U
## Training set 0.5139536 NA
## Test set 0.9394267 11.29943
accuracy(dole.sn,dole.test)
## ME RMSE MAE MPE MAPE MASE
## Training set 12606.58 56384.06 31473.16 3.350334 27.73651 1.000000
## Test set 160370.33 240830.92 186201.00 20.496197 27.38678 5.916184
## ACF1 Theil's U
## Training set 0.9781058 NA
## Test set 0.9208325 9.479785
accuracy(dole.stlf,dole.test)
## ME RMSE MAE MPE MAPE MASE
## Training set 256.3644 6248.781 3836.768 0.5425694 4.804379 0.121906
## Test set 205359.4652 268646.638 208074.403 29.3215555 30.017034 6.611170
## ACF1 Theil's U
## Training set 0.01337877 NA
## Test set 0.93746400 10.71493
accuracy(a10.etsF,a10.test)
## ME RMSE MAE MPE MAPE MASE
## Training set 7.659502e-05 0.06455211 0.05014221 -0.002655551 2.224162 0.3212884
## Test set 5.129718e-02 0.13739413 0.11893283 1.217786471 3.234877 0.7620674
## ACF1 Theil's U
## Training set -0.04890245 NA
## Test set -0.01633399 0.4906605
accuracy(a10.sn,a10.test)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 0.1523955 0.1731097 0.1560660 6.682003 6.842931 1.000000 0.2212723
## Test set 0.3393671 0.4025493 0.3406072 8.979124 9.018149 2.182456 0.6073193
## Theil's U
## Training set NA
## Test set 1.423661
accuracy(a10.stlf,a10.test)
## ME RMSE MAE MPE MAPE MASE
## Training set -0.0001106891 0.05780298 0.04565029 -0.0493025 2.039699 0.2925062
## Test set 0.0557901445 0.13819615 0.11837298 1.3422799 3.212405 0.7584802
## ACF1 Theil's U
## Training set -0.09571412 NA
## Test set 0.04878539 0.4893801
accuracy(h02.etsF,h02.test)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.0004513622 0.04579930 0.03426080 -0.2209876 4.560572 0.5745005
## Test set 0.0342751950 0.08019185 0.06763825 2.8172154 7.267771 1.1341883
## ACF1 Theil's U
## Training set 0.05057522 NA
## Test set -0.12581913 0.4581788
accuracy(h02.sn,h02.test)
## ME RMSE MAE MPE MAPE MASE
## Training set 0.037749040 0.07171709 0.05963581 5.0942482 8.197262 1.000000
## Test set -0.004309813 0.08238325 0.06787933 -0.1376051 7.444182 1.138231
## ACF1 Theil's U
## Training set 0.40080565 NA
## Test set 0.09840611 0.4929215
accuracy(h02.stlf,h02.test)
## ME RMSE MAE MPE MAPE MASE
## Training set -0.000830773 0.04053338 0.03033702 -0.2685748 4.274993 0.5087047
## Test set -0.006211506 0.07075988 0.05886616 -2.0093569 6.971515 0.9870941
## ACF1 Theil's U
## Training set -0.06958533 NA
## Test set -0.06144624 0.4425916
accuracy(usmelec.etsF,usmelec.test)
## ME RMSE MAE MPE MAPE MASE
## Training set -0.07834851 7.447167 5.601963 -0.1168709 2.163495 0.6225637
## Test set -10.20080652 15.291359 13.123441 -3.1908161 3.926338 1.4584491
## ACF1 Theil's U
## Training set 0.2719249 NA
## Test set 0.4679496 0.4859495
accuracy(usmelec.sn,usmelec.test)
## ME RMSE MAE MPE MAPE MASE ACF1
## Training set 4.921564 11.58553 8.998217 2.000667 3.511648 1.000000 0.4841860
## Test set 5.475972 17.20037 13.494750 1.391437 3.767514 1.499714 0.2692544
## Theil's U
## Training set NA
## Test set 0.4765145
accuracy(usmelec.stlf,usmelec.test)
## ME RMSE MAE MPE MAPE MASE
## Training set -0.0657726 6.19216 4.682101 -0.06613967 1.826421 0.5203366
## Test set -13.0124627 17.48270 15.358336 -4.06887002 4.655355 1.7068199
## ACF1 Theil's U
## Training set 0.1106876 NA
## Test set 0.5244502 0.5593233
#The best conclusion that I can draw from all of these results is that the best model is going to depend ont he kind of data, whether or not it's seasonal, or how much variance there is year to year.
autoplot(forecast(ets(bicoal)))
#The ETS forecast does not appear all that useful
autoplot(forecast(ets(chicken)))
#The ETS forecast is not the best, but it is workable in this instance
autoplot(forecast(ets(dole)))
#The ETS forecast was good, as it reflected the increasing trend and seasonality.
autoplot(forecast(ets(usdeaths)))
#The ETS was good, as it reflected the seasonality of the data.
autoplot(forecast(ets(lynx)))
#The ETSis solid, but not great, the wide range of outcomes is not reflective of the data so far
autoplot(forecast(ets(ibmclose)))
#The ETS appears okay at first glance, however, it does not seem like it will be helpful going forward
autoplot(forecast(ets(eggs)))
#The ETS is not as useful as it would initially appear to be, the decreasing trend is not well accounted for.
#b.
#The ibmclose dataset had issues with the ets forecast, as the large fluctuations in price that existed historically were nto accounted for in the forecast.
usdeaths_ets = ets(usdeaths,model = "MAM")
usdeaths_ets2 = forecast(usdeaths_ets, h=4)
usdeaths_hwM = hw(usdeaths, seasonal = 'multiplicative', h=1)
usdeaths_ets2$mean
## Jan Feb Mar Apr
## 1979 8233.107 7475.624 8285.738 8509.419
usdeaths_hwM$mean
## Jan
## 1979 8217.64
#The results appear to be very similar
usdeaths_ets_ANN = ets(usdeaths, model = "ANN")
usdeaths_ets_F = forecast(usdeaths_ets_ANN,4)
usdeaths_ets_F
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## Jan 1979 9239.938 8296.855 10183.02 7797.618 10682.26
## Feb 1979 9239.938 7906.286 10573.59 7200.294 11279.58
## Mar 1979 9239.938 7606.583 10873.29 6741.938 11737.94
## Apr 1979 9239.938 7353.919 11125.96 6355.521 12124.35
sigma = 7.2637
alpha = .9999^2
h = 4
ci = sigma*(1+alpha*(h-1))
usdeaths_ets_F$mean[1]
## [1] 9239.938
usdeaths_ets_F$mean[1] - ci
## [1] 9210.887
usdeaths_ets_F$lower[1,'95%']
## 95%
## 7797.618
# I believe this could be written out as Φµ,σ2(1.96) = 0.9750
#Chapter 8 #1.
#a.
#With all 3 figures, it appears that at no point do these lines go outside of the dashed lines, indicating that these appear like entirely white noise.
#b.
#The critical values narrow as the dataset gets larger, as larger datasets are less likely to appear correlary by accident.
#2.
autoplot(ibmclose)
ggtsdisplay(ibmclose)
#This data is certainly not stationary, as we would expect to see, as the ACF stays well above zero throughout, and PACF starts out with a large value, far away from the rest, indicative of non-stationary data.
ggtsdisplay(usnetelec)
#Not seasonal data, needed to use diff function with value of 1 as seen in PACF
diff_usnetelec = diff(usnetelec, 1)
ggtsdisplay(diff_usnetelec)
#b.
ggtsdisplay(usgdp)
bcl_usgdp = BoxCox.lambda(usgdp)
bcl_diff_usgdp = usgdp %>%
BoxCox(bcl_usgdp) %>%
diff(1)
ggtsdisplay(bcl_diff_usgdp)
#c.
ggtsdisplay(mcopper)
bcl_mcop = BoxCox.lambda(mcopper)
bcl_diff_mcop = mcopper %>%
BoxCox(bcl_mcop) %>%
diff(1)
ggtsdisplay(bcl_diff_mcop)
#d.
ggtsdisplay(enplanements)
bcl_empl = BoxCox.lambda(enplanements)
bcl_diff_empl = enplanements %>%
BoxCox(bcl_empl) %>%
diff(12) %>%
diff(1)
ggtsdisplay(bcl_diff_empl)
#e.
ggtsdisplay(visitors)
bcl_visit = BoxCox.lambda(visitors)
bcl_diff_visit = visitors %>%
BoxCox(bcl_visit) %>%
diff(12) %>%
diff(1)
ggtsdisplay(bcl_diff_visit)
#y(t) = (1-B^12) * y(t)
autoplot(ts_retail)
ggtsdisplay(ts_retail)
bcl_retail = BoxCox.lambda(ts_retail)
bcl_diff_retail <- ts_retail %>%
BoxCox(bcl_retail) %>%
diff(1) %>%
diff(1)
ggtsdisplay(bcl_diff_retail)
#a.
phi1 = 0.6
y = ts(numeric(100))
e = rnorm(100)
for(i in 2:100){
y[i] = phi1*y[i-1] + e[i]
}
#b.
func_ar1 = function(phi1){
y = ts(numeric(100))
e = rnorm(100)
for(i in 2:100){
y[i] = phi1*y[i-1] + e[i]
}
return(y)
}
autoplot(func_ar1(0.3), series = "0.3") +
geom_line(size = 1, colour = "blue") +
autolayer(y, series = "0.6", size = 1) +
autolayer(func_ar1(0.9), size = 1, series = "0.9") +
ylab("AR1 model") +
guides(colour = guide_legend(title = "Phi1"))
#c.
theta1 = 0.6
func_ma1 = function(theta1){
y = ts(numeric(100))
e = rnorm(100)
for(i in 2:100){
y[i] = theta1*e[i-1] + e[i]
}
return(y)
}
#d.
autoplot(func_ma1(0.3), series = "0.3") +
geom_line(size = 1, colour = "green") +
autolayer(y, series = "0.6", size = 1) +
autolayer(func_ma1(0.9), size = 1, series = "0.9") +
ylab("MA(1) model") +
guides(colour = guide_legend(title = "Theta1"))
#e.
arima_101 = ts(numeric(50))
e = rnorm(50)
for(i in 2:50){
arima_101[i] = 0.6*arima_101[i-1] + 0.6*e[i-1] + e[i]
}
#f.
func_ar2 = ts(numeric(50))
e = rnorm(50)
for(i in 3:50){
func_ar2[i] = -0.8*func_ar2[i-1] + 0.3*func_ar2[i-2] + e[i]
}
#g.
autoplot(arima_101, series = "ARMA (1,1)") +
autolayer(func_ar2, series = "AR (2)") +
ylab("y") +
guides(colour = guide_legend(title = "Models"))
#The AR(2) model appears to have a rapidly increasing back and forth above and below 0, whereas ARMA (1,1) model looks more like a line of data we would expect to see in the real world.
#a.
autoplot(wmurders)
autoplot(diff(wmurders))
ndiffs(wmurders)
## [1] 2
autoplot(diff(wmurders, differences = 2))
ggtsdisplay((diff(wmurders, differences = 2)))
#The appropriate ARIMA model can be ARIMA(1,2,1).
#b.
#The data appears to not be seasonal nor does it have an overall trend, therefore including a constant in the model could introduce problems later on
#c.
#(1−ϕ1B)(1−B)2yt=(1+θ1B)εt
#d.
wm_fit = Arima(wmurders, order=c(1,2,1))
checkresiduals(wm_fit)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(1,2,1)
## Q* = 12.419, df = 8, p-value = 0.1335
##
## Model df: 2. Total lags used: 10
#Based on the residuals being mostly normally distributed, and all the values on the ACF being within the critical values, this model appears to be a solid one.
#e.
wm_fit_fc = forecast(wm_fit, h=3)
wm_fit_fc
## Point Forecast Lo 80 Hi 80 Lo 95 Hi 95
## 2005 2.470660 2.194836 2.746484 2.048824 2.892496
## 2006 2.363106 1.986351 2.739862 1.786908 2.939304
## 2007 2.252833 1.765391 2.740276 1.507354 2.998313
#f.
autoplot(wm_fit_fc)
#g.
auto_wm_fit = auto.arima(wmurders)
auto_wm_fit
## Series: wmurders
## ARIMA(1,2,1)
##
## Coefficients:
## ar1 ma1
## -0.2434 -0.8261
## s.e. 0.1553 0.1143
##
## sigma^2 estimated as 0.04632: log likelihood=6.44
## AIC=-6.88 AICc=-6.39 BIC=-0.97
auto_wm_fit_fc = forecast(auto_wm_fit, h=3)
autoplot(auto_wm_fit_fc)
#Auto-Arima does give the same model as I have chosen
#a.
austa_arima = auto.arima(austa)
austa_arima
## Series: austa
## ARIMA(0,1,1) with drift
##
## Coefficients:
## ma1 drift
## 0.3006 0.1735
## s.e. 0.1647 0.0390
##
## sigma^2 estimated as 0.03376: log likelihood=10.62
## AIC=-15.24 AICc=-14.46 BIC=-10.57
#ARIMA(0,1,1) has been the model selected
fc_austa = forecast(austa_arima, h=10)
autoplot(fc_austa)
#b.
#ARIMA(0,1,1) is also the model from part a
#c.
arima_213 = arima(austa,order = c(2,1,3),method="ML")
arima_213
##
## Call:
## arima(x = austa, order = c(2, 1, 3), method = "ML")
##
## Coefficients:
## ar1 ar2 ma1 ma2 ma3
## 0.0004 0.9996 0.4633 -0.9893 -0.4625
## s.e. 0.0031 0.0031 0.2283 0.0329 0.2261
##
## sigma^2 estimated as 0.03059: log likelihood = 9.24, aic = -6.48
arima_213 %>%
forecast(h=10) %>%
autoplot()
austa3<-austa-mean(austa)
arima_213_nc<-arima(austa3,order = c(2,1,3),method="ML")
arima_213_nc
##
## Call:
## arima(x = austa3, order = c(2, 1, 3), method = "ML")
##
## Coefficients:
## ar1 ar2 ma1 ma2 ma3
## 0.0006 0.9994 0.4534 -0.9878 -0.4538
## s.e. 0.0053 0.0053 0.2302 0.0371 0.2254
##
## sigma^2 estimated as 0.03069: log likelihood = 9.23, aic = -6.47
arima_213_nc %>%
forecast(h=10) %>%
autoplot()
#d.
arima_001 = arima(austa,order = c(0,0,1),method="ML")
arima_001
##
## Call:
## arima(x = austa, order = c(0, 0, 1), method = "ML")
##
## Coefficients:
## ma1 intercept
## 1.0000 3.5515
## s.e. 0.0907 0.3090
##
## sigma^2 estimated as 0.8827: log likelihood = -50.64, aic = 107.28
arima_001 %>%
forecast(h=10) %>%
autoplot()
arima_000 = arima(austa,order = c(0,0,0),method="ML")
arima_000
##
## Call:
## arima(x = austa, order = c(0, 0, 0), method = "ML")
##
## Coefficients:
## intercept
## 3.5414
## s.e. 0.2972
##
## sigma^2 estimated as 3.179: log likelihood = -71.9, aic = 147.8
arima_000 %>%
forecast(h=10) %>%
autoplot()
#e.
arima_021 = arima(austa,order = c(0,2,1),method="ML")
arima_021
##
## Call:
## arima(x = austa, order = c(0, 2, 1), method = "ML")
##
## Coefficients:
## ma1
## -0.7262
## s.e. 0.2277
##
## sigma^2 estimated as 0.03853: log likelihood = 6.74, aic = -9.48
arima_021 %>%
forecast(h=10) %>%
autoplot()
#a
autoplot(usgdp)
bcl_usgdp = BoxCox.lambda(usgdp)
ndiffs(usgdp)
## [1] 2
bcl_diff_usgdp = usgdp %>%
BoxCox(bcl_usgdp) %>%
diff(2)
ggtsdisplay(bcl_diff_usgdp)
#b.
auto_gdp = auto.arima(usgdp)
auto_gdp
## Series: usgdp
## ARIMA(2,2,2)
##
## Coefficients:
## ar1 ar2 ma1 ma2
## -0.1228 0.3106 -0.5835 -0.3669
## s.e. 0.2869 0.0872 0.3004 0.2862
##
## sigma^2 estimated as 1604: log likelihood=-1199.57
## AIC=2409.13 AICc=2409.39 BIC=2426.43
auto_gdp %>%
forecast(h=20) %>%
autoplot()
#c.
arima_001_gdp = arima(usgdp,order = c(0,0,1),method="ML")
arima_001_gdp
##
## Call:
## arima(x = usgdp, order = c(0, 0, 1), method = "ML")
##
## Coefficients:
## ma1 intercept
## 1.0000 5173.1228
## s.e. 0.0113 180.8189
##
## sigma^2 estimated as 1945425: log likelihood = -2055.02, aic = 4116.04
arima_001_gdp %>%
forecast(h=8) %>%
autoplot()
arima_121_gdp = arima(usgdp,order = c(1,2,1),method="ML")
arima_121_gdp
##
## Call:
## arima(x = usgdp, order = c(1, 2, 1), method = "ML")
##
## Coefficients:
## ar1 ma1
## 0.2899 -0.9555
## s.e. 0.0668 0.0197
##
## sigma^2 estimated as 1651: log likelihood = -1204.96, aic = 2415.92
arima_121_gdp %>%
forecast(h=8) %>%
autoplot()
arima_000_gdp = arima(usgdp,order = c(0,0,0),method="ML")
arima_000_gdp
##
## Call:
## arima(x = usgdp, order = c(0, 0, 0), method = "ML")
##
## Coefficients:
## intercept
## 5168.4692
## s.e. 179.4869
##
## sigma^2 estimated as 7635091: log likelihood = -2214.31, aic = 4432.62
arima_000_gdp %>%
forecast(h=8) %>%
autoplot()
#d.
#I would be inclined to go with the autoarima model, as it appears to follow the recent trend of the data more closely than the others
checkresiduals(auto_gdp)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(2,2,2)
## Q* = 8.6247, df = 4, p-value = 0.0712
##
## Model df: 4. Total lags used: 8
#e.
#The forecast was already created as a part of b, and I would say the forecast looks reasonable.
auto_gdp %>%
forecast(h=20) %>%
autoplot()
#f.
ets_usgdp = ets(usgdp)
ets_usgdp %>%
forecast(h=20) %>%
autoplot()
autoplot(austourists)
tsdisplay(austourists)
#There appears to be a slight upward trend to the data as well as a high level of seasonality
#b.
#The lagged observations appear to have some level of autocorrelation occurring
#c.
#The seasonality of the data needs to be accounted for when we are trying to forecast, if we want to ensure some level of accuracy
#d.
dec_tour = decompose(austourists, "multiplicative")
adj_tour = austourists / dec_tour$seasonal
diff_adj_tour = diff(adj_tour)
tsdisplay(diff_adj_tour)
#I believe this would suggest a (1,0,0) model since we are looking for a single lag period
#e.
auto_tour = auto.arima(austourists)
auto_tour %>%
forecast(h=4) %>%
autoplot()
#It looks like the auto arima gives the same model.
#f.
#(1−B4)Yt=BY(t−1)+et
autoplot(usmelec)
movAvg <- ma(usmelec, order=12)
autoplot(movAvg)
#b.
#The data will likely need transforming, as there is a long, upward trend in electrical consumption over time
bcl_elec = BoxCox.lambda(movAvg)
ndiffs(movAvg)
## [1] 1
bcl_diff_elec = movAvg %>%
BoxCox(bcl_elec) %>%
diff(1)
ggtsdisplay(bcl_diff_elec)
## Warning: Removed 12 rows containing missing values (geom_point).
#c.
#The data appears to be seasonal and therefore not stationary
#d.
arima_001_elec = arima(usmelec,order = c(0,0,1),method="ML")
arima_001_elec
##
## Call:
## arima(x = usmelec, order = c(0, 0, 1), method = "ML")
##
## Coefficients:
## ma1 intercept
## 0.8483 259.5482
## s.e. 0.0156 3.4307
##
## sigma^2 estimated as 1678: log likelihood = -2494.55, aic = 4995.09
arima_001_elec %>%
forecast(h=12) %>%
autoplot()
arima_121_elec = arima(usmelec,order = c(1,2,1),method="ML")
arima_121_elec
##
## Call:
## arima(x = usmelec, order = c(1, 2, 1), method = "ML")
##
## Coefficients:
## ar1 ma1
## 0.1334 -1.0000
## s.e. 0.0452 0.0052
##
## sigma^2 estimated as 561.8: log likelihood = -2221.87, aic = 4449.73
arima_121_elec %>%
forecast(h=12) %>%
autoplot()
arima_621_elec = arima(usmelec,order = c(6,1,1),method="ML")
arima_621_elec
##
## Call:
## arima(x = usmelec, order = c(6, 1, 1), method = "ML")
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5 ar6 ma1
## -0.4736 -0.2502 -0.5709 -0.5812 -0.0011 -0.1411 0.4707
## s.e. 0.0852 0.0500 0.0443 0.0520 0.0559 0.0500 0.0753
##
## sigma^2 estimated as 316.6: log likelihood = -2085.49, aic = 4186.98
arima_621_elec %>%
forecast(h=12) %>%
autoplot()
#e.
checkresiduals(arima_621_elec)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(6,1,1)
## Q* = 749.39, df = 17, p-value < 2.2e-16
##
## Model df: 7. Total lags used: 24
#The normal distribution of the residuals suggests this is a decent model, but the ACF chart shows that there is still much room for improvement
#f.
arima_621_elec = arima(usmelec,order = c(6,1,1),method="ML")
arima_621_elec
##
## Call:
## arima(x = usmelec, order = c(6, 1, 1), method = "ML")
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5 ar6 ma1
## -0.4736 -0.2502 -0.5709 -0.5812 -0.0011 -0.1411 0.4707
## s.e. 0.0852 0.0500 0.0443 0.0520 0.0559 0.0500 0.0753
##
## sigma^2 estimated as 316.6: log likelihood = -2085.49, aic = 4186.98
arima_621_elec %>%
forecast(h=180) %>%
autoplot()
#Based on page 19 of the pdf showing electrical consumption, there was indeed a leveling off of electicity use, so the model appears to be somewhat accurate.
#a.
autoplot(mcopper)
bcl_mcop = BoxCox.lambda(mcopper)
ndiffs(mcopper)
## [1] 1
bcl_diff_mcop = mcopper %>%
BoxCox(bcl_mcop) %>%
diff(1)
ggtsdisplay(bcl_diff_mcop)
#b.
auto_mcop = auto.arima(mcopper)
auto_mcop
## Series: mcopper
## ARIMA(0,1,1)
##
## Coefficients:
## ma1
## 0.2900
## s.e. 0.0419
##
## sigma^2 estimated as 6026: log likelihood=-3248.53
## AIC=6501.07 AICc=6501.09 BIC=6509.73
auto_mcop %>%
forecast(h=12) %>%
autoplot()
#c.
arima_001_mcop = arima(mcopper,order = c(0,0,1),method="ML")
arima_001_mcop
##
## Call:
## arima(x = mcopper, order = c(0, 0, 1), method = "ML")
##
## Coefficients:
## ma1 intercept
## 0.9451 998.6773
## s.e. 0.0087 26.3482
##
## sigma^2 estimated as 103672: log likelihood = -4058.21, aic = 8122.43
arima_001_mcop %>%
forecast(h=12) %>%
autoplot()
arima_221_mcop = arima(mcopper,order = c(2,2,1),method="ML")
arima_221_mcop
##
## Call:
## arima(x = mcopper, order = c(2, 2, 1), method = "ML")
##
## Coefficients:
## ar1 ar2 ma1
## 0.2803 -0.0505 -0.9980
## s.e. 0.0427 0.0435 0.0152
##
## sigma^2 estimated as 6030: log likelihood = -3245.88, aic = 6499.76
arima_221_mcop %>%
forecast(h=12) %>%
autoplot()
arima_121_mcop = arima(mcopper,order = c(1,2,1),method="ML")
arima_121_mcop
##
## Call:
## arima(x = mcopper, order = c(1, 2, 1), method = "ML")
##
## Coefficients:
## ar1 ma1
## 0.2684 -1.0000
## s.e. 0.0410 0.0139
##
## sigma^2 estimated as 6036: log likelihood = -3246.57, aic = 6499.14
arima_121_mcop %>%
forecast(h=12) %>%
autoplot()
#d.
#I would use the auto arima, as there is a higher liklihood of a better forcast.
checkresiduals(auto_mcop)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(0,1,1)
## Q* = 30.716, df = 23, p-value = 0.1299
##
## Model df: 1. Total lags used: 24
#e.
auto_mcop %>%
forecast(h=24) %>%
autoplot()
#The range of outcomes in the forecast appear to be reasonable, but I would not expect the forecast to level off
#f.
fc_mcop_ets = forecast(ets(mcopper), h=24)
autoplot(fc_mcop_ets)
#The ets model looks like it accounts for the recent, downward trend in the data better than the auto-arima model
autoplot(qauselec)
bcl_qauselec <- BoxCox.lambda(qauselec)
#BoxCox transformation necessary due to the sustained upward and seasonal trends in the data
#b.
#The data is not stationary, as previously included, the upward trend and seasonal trends
nsdiffs(qauselec)
## [1] 1
ndiffs(qauselec)
## [1] 1
#c.
arima_401_qaus = arima(qauselec,order = c(4,0,1),method="ML")
## Warning in arima(qauselec, order = c(4, 0, 1), method = "ML"): possible
## convergence problem: optim gave code = 1
arima_401_qaus
##
## Call:
## arima(x = qauselec, order = c(4, 0, 1), method = "ML")
##
## Coefficients:
## ar1 ar2 ar3 ar4 ma1 intercept
## 0.0530 0.1289 0.0105 0.8070 0.5517 30.8648
## s.e. 0.0451 0.0417 0.0395 0.0382 0.0670 26.4027
##
## sigma^2 estimated as 1.104: log likelihood = -325.01, aic = 664.02
arima_401_qaus %>%
forecast(h=12) %>%
autoplot()
arima_611_qaus = arima(qauselec,order = c(6,1,1),method="ML")
arima_611_qaus
##
## Call:
## arima(x = qauselec, order = c(6, 1, 1), method = "ML")
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5 ar6 ma1
## -0.3429 -0.1216 -0.1308 0.7239 0.1843 -0.0621 -0.0666
## s.e. 0.3883 0.1757 0.0861 0.0720 0.2853 0.1176 0.3831
##
## sigma^2 estimated as 0.7542: log likelihood = -279.64, aic = 575.29
arima_611_qaus %>%
forecast(h=12) %>%
autoplot()
arima_610_qaus = arima(qauselec,order = c(6,1,0),method="ML")
arima_610_qaus
##
## Call:
## arima(x = qauselec, order = c(6, 1, 0), method = "ML")
##
## Coefficients:
## ar1 ar2 ar3 ar4 ar5 ar6
## -0.4097 -0.1507 -0.1428 0.7151 0.2330 -0.0439
## s.e. 0.0679 0.0714 0.0522 0.0518 0.0712 0.0678
##
## sigma^2 estimated as 0.7543: log likelihood = -279.66, aic = 573.32
arima_610_qaus %>%
forecast(h=12) %>%
autoplot()
arima_410_qaus = arima(qauselec,order = c(4,1,0), seasonal = c(0,1,2) ,method="ML")
arima_410_qaus
##
## Call:
## arima(x = qauselec, order = c(4, 1, 0), seasonal = c(0, 1, 2), method = "ML")
##
## Coefficients:
## ar1 ar2 ar3 ar4 sma1 sma2
## -0.4774 -0.3657 -0.2715 0.3116 -1.3807 0.5440
## s.e. 0.0766 0.0747 0.0750 0.1627 0.1818 0.1386
##
## sigma^2 estimated as 0.5112: log likelihood = -233.26, aic = 480.51
arima_410_qaus %>%
forecast(h=12) %>%
autoplot()
auto_quas = auto.arima(qauselec)
auto_quas
## Series: qauselec
## ARIMA(1,0,1)(1,1,2)[4] with drift
##
## Coefficients:
## ar1 ma1 sar1 sma1 sma2 drift
## 0.9158 -0.4411 0.8628 -1.6921 0.7737 0.2299
## s.e. 0.0467 0.0942 0.1420 0.1425 0.0954 0.0456
##
## sigma^2 estimated as 0.5082: log likelihood=-230.58
## AIC=475.16 AICc=475.71 BIC=498.72
auto_quas %>%
forecast(h=12) %>%
autoplot()
#d.
checkresiduals(auto_quas)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(1,0,1)(1,1,2)[4] with drift
## Q* = 14.822, df = 3, p-value = 0.001975
##
## Model df: 6. Total lags used: 9
#The residuals do not appear to be white noise
checkresiduals(arima_401_qaus)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(4,0,1) with non-zero mean
## Q* = 47.525, df = 3, p-value = 2.688e-10
##
## Model df: 6. Total lags used: 9
checkresiduals(arima_611_qaus)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(6,1,1)
## Q* = 34.856, df = 3, p-value = 1.307e-07
##
## Model df: 7. Total lags used: 10
checkresiduals(arima_610_qaus)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(6,1,0)
## Q* = 29.737, df = 3, p-value = 1.567e-06
##
## Model df: 6. Total lags used: 9
checkresiduals(arima_410_qaus)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(4,1,0)(0,1,2)[4]
## Q* = 17.275, df = 3, p-value = 0.0006205
##
## Model df: 6. Total lags used: 9
#e.
arima_410_qaus %>%
forecast(h=24) %>%
autoplot()
#f.
fc_qaus_ets = forecast(
ets(qauselec), h = 24
)
autoplot(fc_qaus_ets)
fc_stlf_quas = stlf(qauselec, lambda = BoxCox.lambda(qauselec), s.window = 5, robust = TRUE,
method = "arima", h = 24)
autoplot(fc_stlf_quas)
checkresiduals(fc_stlf_quas)
## Warning in checkresiduals(fc_stlf_quas): The fitted degrees of freedom is based
## on the model used for the seasonally adjusted data.
##
## Ljung-Box test
##
## data: Residuals from STL + ARIMA(0,1,1) with drift
## Q* = 8.0648, df = 6, p-value = 0.2334
##
## Model df: 2. Total lags used: 8
#Based on the checkresiduals results, I would say that the stlf is the best way to forecast this particular dataset.
ts_arima_retail = auto.arima(ts_retail)
ts_arima_retail
## Series: ts_retail
## ARIMA(1,0,2)(0,1,1)[12] with drift
##
## Coefficients:
## ar1 ma1 ma2 sma1 drift
## 0.9328 -0.2745 -0.1676 -0.2989 0.9843
## s.e. 0.0267 0.0608 0.0553 0.0551 0.3479
##
## sigma^2 estimated as 198.4: log likelihood=-1498.1
## AIC=3008.21 AICc=3008.44 BIC=3031.67
#b.
ts_retail %>%
forecast(h=36) %>%
autoplot()
damp_hw_retail %>%
forecast(h=36) %>%
autoplot()
#c.
library(readxl)
X8501011 <- read_excel("C:/Users/dmcsw/Downloads/8501011.xlsx",
sheet = "Data1", skip = 9)
retail_new = X8501011
retail_new_ts = ts(retail_new[, "A3349873A"],
start = c(1982, 4),
frequency = 12)
retail_new_test = subset(
retail_new_ts,
start = length(ts_retail) + 1
)
autoplot(retail_new_test) +
autolayer(forecast(ts_arima_retail, h=36))
#The forecasts appear to capture the trend, but not the results
#a.
ts_sheep = ts(sheep)
autoplot(ts_sheep)
#b.
# (yt - yt-1) - phi1(yt-1 - yt-2) - phi2(yt-2 - yt-3) - phi3(yt-3 - yt-4) = et
# (1 - B)yt - phi1*B(1- B)yt - phi2*B^2(1- B)yt - phi3*B^3(1- B)yt = et
# (1 - phi1*B - phi2*B^2 - phi3*B^3)(1 - B)yt = et
# It is ARIMA(3, 1, 0) model.
#c.
arima_310_sheep = arima(sheep,order = c(3,1,0),method="ML")
arima_310_sheep
##
## Call:
## arima(x = sheep, order = c(3, 1, 0), method = "ML")
##
## Coefficients:
## ar1 ar2 ar3
## 0.4210 -0.2018 -0.3043
## s.e. 0.1193 0.1363 0.1243
##
## sigma^2 estimated as 4783: log likelihood = -407.56, aic = 823.12
arima_310_sheep %>%
forecast(h=12) %>%
autoplot()
checkresiduals(arima_310_sheep)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(3,1,0)
## Q* = 3.4032, df = 7, p-value = 0.8454
##
## Model df: 3. Total lags used: 10
#d.
sheep.1940 = 1797 + 0.42*(1797 - 1791) -0.20*(1791 - 1627) - 0.30*(1627 - 1665)
sheep.1941 = sheep.1940 + 0.42*(sheep.1940 - 1797) -0.20*(1797 - 1791) - 0.30*(1791 - 1627)
sheep.1942 = sheep.1941 + 0.42*(sheep.1941 - sheep.1940) -0.20*(sheep.1940 - 1797) - 0.30*(1797 - 1791)
c(sheep.1940, sheep.1941, sheep.1942)
## [1] 1778.120 1719.790 1697.268
#e.
arima_310_sheep = arima(sheep,order = c(3,1,0),method="ML")
arima_310_sheep
##
## Call:
## arima(x = sheep, order = c(3, 1, 0), method = "ML")
##
## Coefficients:
## ar1 ar2 ar3
## 0.4210 -0.2018 -0.3043
## s.e. 0.1193 0.1363 0.1243
##
## sigma^2 estimated as 4783: log likelihood = -407.56, aic = 823.12
arima_310_sheep %>%
forecast(h=12) %>%
autoplot()
#The differences in the values of the given and calculated coefficients would make a difference in the projections
ts_bicoal = ts(bicoal)
autoplot(ts_bicoal)
#b.
#(1 - phi1*B - phi2*B^2 - phi3*B^3 - phi4*B^4)*yt = c + et
# c = mu*(1 - phi1*B - phi2*B^2 - phi3*B^3 - phi4*B^4)
# This model is ARIMA(4, 0, 0).
#c.
arima_400_coal = arima(bicoal,order = c(4,0,0),method="ML")
arima_400_coal
##
## Call:
## arima(x = bicoal, order = c(4, 0, 0), method = "ML")
##
## Coefficients:
## ar1 ar2 ar3 ar4 intercept
## 0.8334 -0.3444 0.5525 -0.3780 481.5106
## s.e. 0.1366 0.1752 0.1733 0.1414 21.0569
##
## sigma^2 estimated as 2509: log likelihood = -262.05, aic = 536.1
arima_400_coal %>%
forecast(h=12) %>%
autoplot()
ggAcf(bicoal, lag.max = 36)
ggPacf(bicoal, lag.max = 36)
checkresiduals(arima_400_coal)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(4,0,0) with non-zero mean
## Q* = 4.8533, df = 5, p-value = 0.434
##
## Model df: 5. Total lags used: 10
#With the ACF plot, the values largely stay within the critical values, after the 3 initial values. The PACF shows that with a lag value of 4.
#d.
c = 162.00
phi1 = 0.83
phi2 = -0.34
phi3 = 0.55
phi4 = -0.38
bicoal.1969 <- c + phi1*545 + phi2*552 + phi3*534 + phi4*512
bicoal.1970 <- c + phi1*bicoal.1969 + phi2*545 + phi3*552 + phi4*534
bicoal.1971 <- c + phi1*bicoal.1970 + phi2*bicoal.1969 + phi3*545 + phi4*552
c(bicoal.1969, bicoal.1970, bicoal.1971)
## [1] 525.8100 513.8023 499.6705
#e.
arima_400_coal = arima(bicoal,order = c(4,0,0),method="ML")
arima_400_coal
##
## Call:
## arima(x = bicoal, order = c(4, 0, 0), method = "ML")
##
## Coefficients:
## ar1 ar2 ar3 ar4 intercept
## 0.8334 -0.3444 0.5525 -0.3780 481.5106
## s.e. 0.1366 0.1752 0.1733 0.1414 21.0569
##
## sigma^2 estimated as 2509: log likelihood = -262.05, aic = 536.1
arima_400_coal %>%
forecast(h=12) %>%
autoplot()
#The forecast makes sense in the context of previous available data.
y <- Quandl("FRED/NROUST", api_key="duzxAd-Usc3TA4-vdQRx", type="ts")
#b.
autoplot(y)
unemp_arima = auto.arima(y)
unemp_arima
## Series: y
## ARIMA(3,2,3)
##
## Coefficients:
## ar1 ar2 ar3 ma1 ma2 ma3
## 0.0930 0.3693 -0.3294 -0.3478 -0.3050 0.8710
## s.e. 0.0695 0.0651 0.0612 0.0375 0.0368 0.0281
##
## sigma^2 estimated as 9.266e-05: log likelihood=1066.1
## AIC=-2118.19 AICc=-2117.84 BIC=-2091.6
#Autoarima came up with an Arima (3,2,3) model
#c.
checkresiduals(unemp_arima)
##
## Ljung-Box test
##
## data: Residuals from ARIMA(3,2,3)
## Q* = 34.006, df = 3, p-value = 1.976e-07
##
## Model df: 6. Total lags used: 9
#The residuals are not white noise.
#d.
unemp_arima_fc = forecast(unemp_arima, h=48)
unemp_arima_fc %>%
autoplot()
#e.
unemp_ets = ets(y)
unemp_ets
## ETS(M,Ad,N)
##
## Call:
## ets(y = y)
##
## Smoothing parameters:
## alpha = 0.9002
## beta = 0.9002
## phi = 0.9117
##
## Initial states:
## l = 5.2505
## b = 0.009
##
## sigma: 0.002
##
## AIC AICc BIC
## -1089.744 -1089.485 -1066.913
#f.
checkresiduals(unemp_ets)
##
## Ljung-Box test
##
## data: Residuals from ETS(M,Ad,N)
## Q* = 59.562, df = 3, p-value = 7.292e-13
##
## Model df: 5. Total lags used: 8
#The residuals are not white noise
#g.
unemp_ets %>%
forecast(h=48) %>%
autoplot()
#h.
#To me, the ARIMA model appears to be better in line with where the data is heading, the ACF and the residuals also look more promising for a forecasting model