library(fpp3)
## ── Attaching packages ────────────────────────────────────────────── fpp3 0.5 ──
## âś” tibble 3.2.1 âś” tsibble 1.1.4
## âś” dplyr 1.1.2 âś” tsibbledata 0.4.1
## âś” tidyr 1.3.0 âś” feasts 0.3.1
## âś” lubridate 1.9.2 âś” fable 0.3.3
## âś” ggplot2 3.4.4 âś” fabletools 0.3.4
## Warning: package 'tsibble' was built under R version 4.3.2
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## âś– lubridate::date() masks base::date()
## âś– dplyr::filter() masks stats::filter()
## âś– tsibble::intersect() masks base::intersect()
## âś– tsibble::interval() masks lubridate::interval()
## âś– dplyr::lag() masks stats::lag()
## âś– tsibble::setdiff() masks base::setdiff()
## âś– tsibble::union() masks base::union()
library(forecast)
## Registered S3 method overwritten by 'quantmod':
## method from
## as.zoo.data.frame zoo
Do exercises 8.1, 8.5, 8.6, 8.7, 8.8, 8.9 in Hyndman. Please submit both the link to your Rpubs and the .pdf file with your run code
pigs slaughtered in
Victoria, available in the aus_livestock dataset.pigs <- aus_livestock |>
filter(State == "Victoria" & Animal == "Pigs")
head(pigs)
autoplot(pigs)
## Plot variable not specified, automatically selected `.vars = Count`
a and
l_0, and generate forecasts for the next four months.# Estimate parameters
fit <- pigs |>
model(ETS(Count ~ error("A") + trend("N") + season("N")))
fit |>
report()
## Series: Count
## Model: ETS(A,N,N)
## Smoothing parameters:
## alpha = 0.3221247
##
## Initial states:
## l[0]
## 100646.6
##
## sigma^2: 87480760
##
## AIC AICc BIC
## 13737.10 13737.14 13750.07
The optimal values for alpha is 0.3221247. This means that about 32% of the forecast is based on the most recent observationand initial states is 100646.6.
fc <- fit |>
forecast(h = 4)
fc |>
autoplot(pigs) +
geom_line(aes(y = .fitted), col="#D55E00",
data = augment(fit)) +
labs(y="Count", title="Victorian Pigs Slaughtered") +
guides(colour = "none")
The forecast for the next four months is shown above.
fc
y_hat <- fc$.mean[1]
resid(fit)
fit_df <- augment(fit)
sd<- sd(fit_df$.resid)
upper <- y_hat + (1.96 * sd)
lower <- y_hat - (1.96 * sd)
c(lower,upper)
## [1] 76871.01 113502.10
fc_hilo <- fc %>% hilo()
fc_hilo$`95%`[1]
## <hilo[1]>
## [1] [76854.79, 113518.3]95
The hilo function and my computed values differ but do
not have a significant difference.
JA <- global_economy |>
filter(Country == "Jamaica")
head(JA)
JA |>
autoplot(Exports) +
labs(x= "Year", y="Exports", title="Jamaican Exports") +
guides(colour = "none")
fit <- JA |>
model(ETS(Exports ~ error("A") + trend("N") + season("N")))
fit |>
report()
## Series: Exports
## Model: ETS(A,N,N)
## Smoothing parameters:
## alpha = 0.7074404
##
## Initial states:
## l[0]
## 33.26479
##
## sigma^2: 29.5677
##
## AIC AICc BIC
## 435.8980 436.3424 442.0793
fc <- fit |>
forecast(h = 4)
fc |>
autoplot(JA) +
geom_line(aes(y = .fitted), col="#D55E00",
data = augment(fit)) +
labs(y="Exports", title="Jamaican Exports") +
guides(colour = "none")
accuracy(fit)
The RMSE value for the model is 5.343044.
models <- JA |>
model(
AAN = ETS(Exports ~ error("A") + trend("A") + season("N")),
ANN = ETS(Exports ~ error("A") + trend("N") + season("N"))
)
accuracy(models)
The AAN model had a higher RMSE than the ANN model.
fc <- models |>
forecast(h= 8)
fc |>
autoplot(JA, level = NULL)
The AAN model shows an increase in the amount of Exports
in its forecast. This is more plausible than the ANN model
which shows no increase or decrease over the forecast.
fit1 <- JA |>
model(
AAN = ETS(Exports ~ error("A") + trend("A") + season("N"))
)
accuracy(fit1)
fc1 <- fit1 |>
forecast(h= 8)
fc1 |>
autoplot(JA, level = NULL)
fc1
y_hat_1 <- fc1$.mean[1]
resid(fit1)
fit_df <- augment(fit1)
sd<- sd(fit_df$.resid)
upper <- y_hat_1 + (1.96 * sd)
lower <- y_hat_1 - (1.96 * sd)
c(lower,upper)
## [1] 22.62591 43.76015
fc_hilo <- fc1 %>% hilo()
fc_hilo$`95%`[1]
## <hilo[1]>
## [1] [22.33654, 44.04952]95
fit2 <- JA |>
model(ANN = ETS(Exports ~ error("A") + trend("N") + season("N"))
)
accuracy(fit2)
fit2
fc2 <- fit2 |>
forecast(h= 8)
fc2 |>
autoplot(JA, level = NULL)
fc2
y_hat_2 <- fc2$.mean[1]
head(resid(fit2))
fit_df <- augment(fit2)
sd<- sd(fit_df$.resid)
upper <- y_hat_2 + (1.96 * sd)
lower <- y_hat_2 - (1.96 * sd)
upper
## [1] 43.72768
lower
## [1] 22.60003
The upper value is 43.7276848 and the lower value is 22.6000288.
Using the hilo function the results differ from my
computed values by less than 0.1.
fc_hilo <- fc2 %>% hilo()
fc_hilo$`95%`[1]
## <hilo[1]>
## [1] [22.50632, 43.82139]95
Fit1 had an R computed interval of [22.33654, 44.04952]. I calculated for the first model [22.62591, 43.76015]. My interval for the second fit was [22.60003, 43.72768]. Fit 2 had an R computed interval of [22.50632, 43.82139].
My computed intervals were close to the R computed value.
global_economy |>
filter(Country == "China") |>
autoplot(GDP)
Before using BoxCox, we can use the guerrero formula to
determine the optimal lambda value.
lambda <- global_economy |>
filter(Country == "China") |>
features(GDP, features = guerrero) |>
pull(lambda_guerrero)
fit <- global_economy |>
filter(Country == "China") |>
model(
# ETS
ETS = ETS(GDP),
`Damped` = ETS(GDP ~ trend("Ad")),
`Box-Cox` = ETS(box_cox(GDP, lambda))
)
CH <- global_economy |>
filter(Country == "China")
fit %>%
forecast(h=10) %>%
autoplot(CH)
aus_production |>
autoplot(Gas)
Multiplicative seasonality is needed because there are changes to the heights of seasonal periods over time.
fit <- aus_production |>
model(
Multiplicative = ETS(Gas ~ error("M") + trend("A") + season("M")),
Damped = ETS(Gas ~ error("M") + trend("Ad") + season("M"))
)
fc <- fit %>% forecast(h = "5 years")
fc %>%
autoplot(aus_production, level = NULL) +
labs(title="Australian Gas Production") +
guides(colour = guide_legend(title = "Forecast"))
accuracy(fit )
As shown in the table above, the Damped model shows a better result based on the RMSE than the Multiplicative model.
set.seed(811)
myseries <- aus_retail |>
filter(`Series ID` == sample(aus_retail$`Series ID`,1))
It accounts for the variance over time.
fit <- myseries %>%
model(
`Holt-Winters’ Multiplicative` = ETS(Turnover ~ error("M") + trend("A") +
season("M")),
`Holt-Winters’ Damped Multiplicative` = ETS(Turnover ~ error("M") + trend("Ad") +
season("M"))
)
fc <- fit %>% forecast(h = "5 years")
fc %>%
autoplot(myseries, level = NULL) +
labs(title="Australian Department Stores",
y="Turnover") +
guides(colour = guide_legend(title = "Forecast"))
accuracy(fit)
The RMSE of the model with least RMSE value is the Holt-Winters’ Damped Multiplicative method.
fit |>
select("Holt-Winters’ Damped Multiplicative") |>
gg_tsresiduals()
There is no clear pattern in the residuals plot. However, the ACF plot shows lag values which are outside of the dashed lines. Additionally the re
myseries_train <- myseries |>
filter(year(Month) < 2011)
autoplot(myseries, Turnover) +
autolayer(myseries_train, Turnover, colour = "red")
# seasonal naĂŻve
fit <- myseries_train %>%
model(
"Holt-Winters' Damped" = ETS(Turnover ~ error("M") + trend("Ad") +
season("M")),
"Holt-Winters' Multiplicative" = ETS(Turnover ~ error("M") + trend("A") +
season("M"))
)
accuracy(fit)
The multiplicative method had the lower RMSE value, so I would prefer to use the multiplicatice model.
fit |>
select("Holt-Winters' Multiplicative") |>
gg_tsresiduals()
The model shows residuls with two values outside of the dashed lines, but it is less than 5% so the residuals are white noise. Additionally, the residuals do appear to have a normal distribution.
Again, to get the optimal lambda value, I used the
guerrero feature.
lambda <- myseries_train |>
features(Turnover, features = guerrero) |>
pull(lambda_guerrero)
Box_cox_train <- myseries_train |>
mutate(
Turnover_Box_Cox = box_cox(Turnover, lambda)
)
fit <- Box_cox_train |>
model(
'STL' = STL(Turnover_Box_Cox ~ season(window = "periodic"),
robust = T),
'ETS' = ETS(Turnover_Box_Cox)
)
The accuracy of the above fit is shown below.
accuracy(fit)
The previous best fit (from the Holt-Winters’ Multiplicative model) had a RMSE value of 1.069499. These models both performed better than the Multiplicative model I produced in the previous exercise.