8.1 Consider the the number of pigs slaughtered in Victoria, available in the aus_livestock dataset.
knitr::opts_chunk$set(warning = FALSE, message = FALSE)
# package was not installed i have installed below package
#install.packages("tidyverse")
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(fpp3)
## Registered S3 method overwritten by 'tsibble':
## method from
## as_tibble.grouped_df dplyr
## ── Attaching packages ──────────────────────────────────────────── fpp3 1.0.1 ──
## ✔ tsibble 1.1.6 ✔ feasts 0.4.1
## ✔ tsibbledata 0.4.1 ✔ fable 0.4.1
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date() masks base::date()
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tsibble::setdiff() masks base::setdiff()
## ✖ tsibble::union() masks base::union()
df <- aus_livestock %>%
filter(Animal == 'Pigs' & State == 'Victoria')
df %>%
autoplot(Count) +
labs(
title = 'Pigs in Victoria'
)
Pigs<-aus_livestock %>%
filter(State=="Victoria" & Animal=="Pigs")
fit <- as_tsibble(Pigs) %>%
model(ETS(Count ~ error("A") + trend("N") + season("N")))
#fit1<-ses(Pigs,h=4)
fc<-fit %>%
forecast(h = 4)
print(tidy(fit))
## # A tibble: 2 × 5
## Animal State .model term estimate
## <fct> <fct> <chr> <chr> <dbl>
## 1 Pigs Victoria "ETS(Count ~ error(\"A\") + trend(\"N\") + sea… alpha 3.22e-1
## 2 Pigs Victoria "ETS(Count ~ error(\"A\") + trend(\"N\") + sea… l[0] 1.01e+5
DISCUSSION:
Parameters
alpha=.322 l0=1.01e+5
fc %>%
autoplot(Pigs) +
geom_line(aes(y = .fitted), col="green",
data = augment(fit)) +
labs(y="Count", title="Victoria Pigs") +
guides(colour = "none")
DISCUSSION: Black line represents data, blue line is one step ahead.
b.Compute a 95% prediction interval for the first forecast using ^y±1.96s where s is the standard deviation of the residuals. Compare your interval with the interval produced by R.
h = unpack_hilo(hilo(fc, 95) , "95%" )
View(h)
h[1,]
## # A tsibble: 1 x 8 [1M]
## # Key: Animal, State, .model [1]
## Animal State .model Month
## <fct> <fct> <chr> <mth>
## 1 Pigs Victoria "ETS(Count ~ error(\"A\") + trend(\"N\") + season(\"… 2019 Jan
## # ℹ 4 more variables: Count <dist>, .mean <dbl>, `95%_lower` <dbl>,
## # `95%_upper` <dbl>
h$Count
## <distribution[4]>
## [1] N(95187, 8.7e+07) N(95187, 9.7e+07) N(95187, 1.1e+08) N(95187, 1.1e+08)
R_sd <- sqrt(distributional::variance(fc$Count[1]))
upper <- round(fc$.mean[1] + 1.96 * R_sd, 2)
lower <- round(fc$.mean[1] - 1.96 * R_sd, 2)
print_upper <- paste('Upper bound of first forecasted value =', upper)
print_lower <- paste('Lower bound of first forecasted value =', lower)
cat(print_upper, '\n', print_lower, sep='')
## Upper bound of first forecasted value = 113518.66
## Lower bound of first forecasted value = 76854.45
So the r generated interval is: [76855,113518]
Now, manually calculate.
# mean of first forecast is 95186.56
(m<-95186.56)
## [1] 95186.56
#get sigma
report(fit)
## Series: Count
## Model: ETS(A,N,N)
## Smoothing parameters:
## alpha = 0.3221247
##
## Initial states:
## l[0]
## 100646.6
##
## sigma^2: 87480760
##
## AIC AICc BIC
## 13737.10 13737.14 13750.07
#sigma^2: 87480760
(sigma<-84780760^.5)
## [1] 9207.647
#Lower bound
m-(1.96*sigma)
## [1] 77139.57
#upper bound
m+(1.96*sigma)
## [1] 113233.5
DISCUSSION: r generated 95% CI: [76855,113518] manual 95% CI: [77140,113234]
The two intervals are reasonably close. The manual CI bounds are closer together.
8.5 Data set global_economy contains the annual Exports from many countries. Select one country to analyse.
a. Plot the Exports series and discuss the main features of the data.
I have chosen USA for the purpose of this question. The relevant data is pulled
data("global_economy")
USA_Exports <- global_economy %>%
filter(Code == 'USA' & !is.na(Exports))
USA_Exports %>%
autoplot(Exports) +
labs(
title = 'Exports of USA as % of GDP ', y = '% of GDP' )
Plotting the data overall upwards trend:
b. Use an ETS(A,N,N) model to forecast the series, and plot the forecasts.
USA_Exports <- na.omit(USA_Exports)
usa_fit <- USA_Exports %>%
model(ETS(Exports ~ error("A") + trend("N") + season("N")))
usa_fc <- usa_fit %>%
forecast(h = 5)
usa_fc %>%
autoplot(USA_Exports) +
geom_line(aes(y = .fitted), col="green",
data = augment(usa_fit)) +
labs(y="% of GDP", title="Exports: United States") +
guides(colour = "none")
c. Compute the RMSE values for the training data.
accuracy(usa_fit)
## # A tibble: 1 × 11
## Country .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <fct> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 United States "ETS(Expo… Trai… 0.125 0.632 0.468 1.35 5.09 0.982 0.991 0.239
d. Compare the results to those from an ETS(A,A,N) model. (Remember that the trended model is using one more parameter than the simpler model.) Discuss the merits of the two forecasting methods for this data set.
usa_RMSE_ETS<-accuracy(USA_Exports %>%
model(
ANN = ETS (Exports ~ error("A") + trend("N") + season("N")),
AAN = ETS (Exports ~ error("A") + trend("A") + season("N"))
))[["RMSE"]]
usa_RMSE_train<-accuracy(usa_fit)[["RMSE"]]
paste0("Training Model RMSE is ",usa_RMSE_train)
## [1] "Training Model RMSE is 0.631987696877015"
paste0("Compared to ")
## [1] "Compared to "
paste0(usa_RMSE_ETS)
## [1] "0.631987696877015" "0.620934771848593"
With the AAN model providing a smaller RMSE by about 0.011 than ANN, it suggests a better model lies with AAN.
USA_Exports %>%
model(
ANN = ETS (Exports ~ error("A") + trend("N") + season("N")),
AAN = ETS (Exports ~ error("A") + trend("A") + season("N"))
) %>%
forecast(h=4) %>%
autoplot(USA_Exports, level=NULL) +
labs(title="US Export Predictions")
usa_yhat <- usa_fc$.mean[1]
usa_aug <- augment(usa_fit)
usa_sd <- sd(usa_aug$.resid)
usa_upper95 <- usa_yhat + (usa_sd * 1.96)
usa_lower95 <- usa_yhat - (usa_sd * 1.96)
usa_hilo <- usa_fc %>% hilo()
paste0("Lower 95% ",usa_lower95," Mean ",usa_yhat, " Upper 95% ",usa_upper95)
## [1] "Lower 95% 10.6654088104703 Mean 11.8906832823187 Upper 95% 13.115957754167"
paste0("While our forecast had the values", usa_hilo$`95%`[1],
" with a mean of ", usa_hilo$.mean[1] )
## [1] "While our forecast had the values[10.6292803146377, 13.1520862499996]95 with a mean of 11.8906832823187"
Both are accurate up to the first decimal. The method using R vs the manual, accounts for degrees of freedom and has a more precise value for the critical values, but also does gave a greater range.
8.6 Forecast the Chinese GDP from the global_economy data set using an ETS model. Experiment with the various options in the ETS() function to see how much the forecasts change with damped trend, or with a Box-Cox transformation. Try to develop an intuition of what each is doing to the forecasts.
# package was not installed i have installed below package
#install.packages("cowplot")
library(cowplot)
china_df <- global_economy %>%
filter(Code == 'CHN')
china_plot1<-china_df %>% autoplot(GDP) +
labs(title="China GDP")
china_lambda <- china_df %>%
features(GDP, features = guerrero) %>%
pull(lambda_guerrero)
fit_china <- china_df %>%
model(
# Find ETS
ETS = ETS(GDP),
# Transformation Log
`Log` = ETS(log(GDP)),
# Damped Model
`Damped` = ETS(GDP ~ trend("Ad")),
# Box-Cox Transformation
`Box-Cox` = ETS(box_cox(GDP, china_lambda)),
# Damped Model with Box-Cox Transformation
`Box-Cox, Damped` = ETS(box_cox(GDP, china_lambda) ~ trend("Ad"))
)
china_plot2<-fit_china %>%
forecast(h="20 years") %>%
autoplot(china_df, level = NULL)+
labs(title="Chinese GDP Forecast") +
guides(colour = guide_legend(title = "Forecast"))
plot_grid(china_plot1,
china_plot2, nrow = 2)
Damped and ETS show similar growth, while Log and Box-Cox seem to
exaggerate its forecast. Box-Cox, Damped shows slightly more growth than
ETS and Damped.
8.7 Find an ETS model for the Gas data from aus_production and forecast the next few years. Why is multiplicative seasonality necessary here? Experiment with making the trend damped. Does it improve the forecasts?
Seasonal variation makes multiplicative seasonality necessary.
#install.packages("cowplot")
library(cowplot)
gas_plot1<-aus_production %>% autoplot(Gas)+
labs(title="Austrailian Gas Production")
gas_fit <- aus_production %>%
model(
# Multiplicative
Multiplicative = ETS(Gas ~ error("M") + trend("A") + season("M")),
# Damped multiplicative
`Multiplicative, Damped` = ETS(Gas ~ error("M") + trend("Ad") + season("M"))
)
gas_fc <- gas_fit %>% forecast(h = "5 years")
gas_plot2<-gas_fc %>%
autoplot(aus_production, level = NULL) +
labs(title="Australian Gas Production Forecast") +
guides(colour = guide_legend(title = "Forecast"))
plot_grid(gas_plot1,
gas_plot2, nrow = 2)
Very little difference between the two models. About 3.099 difference,
meaning either would be accurate or optimal
8.8 Recall your retail time series data (from Exercise 7 in Section 2.10).
Becuase there is clear seasonality and peaks on January
b. Apply Holt-Winters’ multiplicative method to the data. Experiment with making the trend damped.
set.seed(123)
myseries <- aus_retail %>%
filter(`Series ID` == sample(aus_retail$`Series ID`,1))
myfit <- myseries %>%
model(
`Holt-Winters’ Multiplicative` = ETS(Turnover ~ error("M") + trend("A") +
season("M")),
`Holt-Winters’ Damped Multiplicative` = ETS(Turnover ~ error("M") + trend("Ad") +
season("M"))
)
myfc <- myfit %>% forecast(h = "5 years")
myfc %>%
autoplot(myseries, level = NULL) +
labs(title="Australian Department Stores",
y="Turnover") +
guides(colour = guide_legend(title = "Forecast"))
c. Compare the RMSE of the one-step forecasts from the two methods. Which do you prefer?
accuracy(myfit)%>%
dplyr::select(".model","RMSE")
## # A tibble: 2 × 2
## .model RMSE
## <chr> <dbl>
## 1 Holt-Winters’ Multiplicative 24.3
## 2 Holt-Winters’ Damped Multiplicative 23.7
d. Check that the residuals from the best method look like white noise.
myfit%>%
dplyr::select("Holt-Winters’ Damped Multiplicative")%>%gg_tsresiduals()
Holt-Winters’ Damped Multiplicative is not white noise base on the plot,
which has over 5% of the spikes of the bounds made by dashed lines.
e. Now find the test set RMSE, while training the
model to the end of 2010. Can you beat the seasonal naïve
approach from Exercise 7 in Section 5.11?
mytrain<-myseries%>%
filter(Month >= yearmonth('2011 Jan'))
# seasonal naïve
myfit2 <- mytrain %>%
model(
"Holt-Winters' Damped" = ETS(Turnover ~ error("M") + trend("Ad") +
season("M")),
"Holt-Winters' Multiplicative" = ETS(Turnover ~ error("M") + trend("A") +
season("M")),
"Seasonal Naïve Forecast" = SNAIVE(Turnover)
)
comparison <- anti_join(myseries, mytrain,
by = c("State", "Industry", "Series ID", "Month", "Turnover"))
# forecasting according to comparison data
myfc2 <- myfit2 %>%
forecast(comparison)
# plot
autoplot(comparison, Turnover) +
autolayer(myfc2, level = NULL) +
guides(colour=guide_legend(title="Forecast")) +
ggtitle('Forecast Comparison ')
accuracy(myfit2)
## # A tibble: 3 × 12
## State Industry .model .type ME RMSE MAE MPE MAPE MASE RMSSE
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Victoria Household … Holt-… Trai… 1.97 26.3 21.1 0.135 2.01 0.393 0.400
## 2 Victoria Household … Holt-… Trai… 0.740 27.0 21.7 -0.0276 2.08 0.405 0.411
## 3 Victoria Household … Seaso… Trai… 47.0 65.7 53.6 4.23 4.90 1 1
## # ℹ 1 more variable: ACF1 <dbl>
It appears the Damped model is the best performing, base on the RMSE values.
8.9 For the same retail data, try an STL decomposition applied to the Box-Cox transformed series, followed by ETS on the seasonally adjusted data. How does that compare with your best previous forecasts on the test set?
#find lambda
mylambda <- mytrain %>%
features(Turnover, features = guerrero) %>%
pull(lambda_guerrero)
#bc transformed data
ts_bc <- mytrain %>%
mutate(
bc_turnover = box_cox(Turnover, mylambda)
)
# bc transformed model
fit <- ts_bc %>%
model(
'Box-Cox STL' = STL(bc_turnover ~ season(window = "periodic"),
robust = T),
'Box-Cox ETS' = ETS(bc_turnover)
)
# best previous model
best_fit <-ts_bc %>%
model(
"Holt-Winters' Damped" = ETS(Turnover ~ error("M") + trend("Ad") +
season("M"))
)
rbind(accuracy(fit),accuracy(best_fit))
## # A tibble: 3 × 12
## State Industry .model .type ME RMSE MAE MPE MAPE MASE RMSSE
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Victor… Househo… Box-C… Trai… 0.0482 0.417 0.285 0.126 0.702 0.310 0.370
## 2 Victor… Househo… Box-C… Trai… -0.00279 0.433 0.352 -0.0152 0.862 0.383 0.384
## 3 Victor… Househo… Holt-… Trai… 1.97 26.3 21.1 0.135 2.01 0.393 0.400
## # ℹ 1 more variable: ACF1 <dbl>
Based on the Values the Box-Cox ETS is the best performing of the three.