library(tsibbledata)
library(fpp3)
## Registered S3 method overwritten by 'tsibble':
## method from
## as_tibble.grouped_df dplyr
## ── Attaching packages ──────────────────────────────────────────── fpp3 1.0.1 ──
## ✔ tibble 3.2.1 ✔ ggplot2 3.5.1
## ✔ dplyr 1.1.4 ✔ tsibble 1.1.6
## ✔ tidyr 1.3.1 ✔ feasts 0.4.2
## ✔ lubridate 1.9.4 ✔ fable 0.4.1
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date() masks base::date()
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tsibble::setdiff() masks base::setdiff()
## ✖ tsibble::union() masks base::union()
Since this data shows a clear upward trend with no strong seasonality drift would be most ideal.
aus_pop <- global_economy %>%
filter(Country == "Australia") %>%
model(RW(Population ~ drift()))
aus_pop_fc <- forecast(aus_pop, h = 10)
autoplot(aus_pop_fc, global_economy %>% filter(Country == "Australia"))
### Bricks (aus_production)
Since there is strong seasonality (quarterly bricks production) SNAIVE(y) is best to use here.
bricks_fit <- aus_production %>%
filter(!is.na(Bricks)) %>%
model(SNAIVE(Bricks))
bricks_fc <- forecast(bricks_fit, h = 8) # 2 years
autoplot(bricks_fc, aus_production)
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).
Since there is seasonality (monthly lamb slaughterings) SNAIVE(y) is best to use here as well.
lambs_fit <- aus_livestock %>%
filter(State == "New South Wales", Animal == "Lambs") %>%
model(SNAIVE(Count))
lambs_fc <- forecast(lambs_fit, h = 12)
autoplot(lambs_fc, aus_livestock %>%
filter(State == "New South Wales", Animal == "Lambs"))
### Household wealth (hh_budget)
Since this data shows a slight upward trend with no strong seasonality drift would be most ideal.
wealth <- hh_budget %>%
model(RW(Wealth ~ drift()))
wealth_fc <- forecast(wealth, h = 10)
autoplot(wealth_fc, hh_budget)
Since there is strong seasonality (monthly retail turnover) SNAIVE(y) is best to use here as well.
aus_retail %>%
filter(State == "Australian Capital Territory",
Industry == 'Takeaway food services') %>%
model(SNAIVE(Turnover ~ lag("year"))) %>%
forecast(h = 12) %>%
autoplot(aus_retail)
# Filter Facebook stock
fb <- gafa_stock %>%
filter(Symbol == "FB") %>%
mutate(trading_day = row_number()) %>%
as_tsibble(index = trading_day, regular = TRUE)
# 5.2a Produce a time plot of the series.
fb %>%
autoplot(Close) +
labs(title = "Facebook Stock Price",
x = "Trading Day",
y = "Closing Price (USD)")
# 5.2b Produce forecasts using the drift method and plot them.
fb_drift <- fb %>%
model(RW(Close ~ drift()))
fb_drift_fc <- fb_drift %>%
forecast(h = 90)
autoplot(fb_drift_fc, fb) +
labs(title = "Facebook Stock Price Forecasts (Drift)",
x = "Trading Day", y = "Closing Price (USD)")
#5.2c Show that the forecasts are identical to extending the line drawn between the first and last observations.
fb %>%
autoplot(Close) +
geom_abline(
intercept = first(fb$Close),
slope = (last(fb$Close) - first(fb$Close)) / (nrow(fb) - 1),
colour = "blue", linetype = "solid"
) +
labs(title = "Facebook Stock Price with Drift Line",
x = "Trading Day", y = "Closing Price (USD)")
autoplot(fb_drift_fc, fb) +
geom_abline(
intercept = first(fb$Close),
slope = (last(fb$Close) - first(fb$Close)) / (nrow(fb) - 1),
colour = "blue"
) +
labs(title = "Proof that the forecasts are identical to extending the line",
x = "Trading Day", y = "Closing Price (USD)")
## 5.2d Try using some of the other benchmark functions to forecast the same data set. Which do you think is best? Why?
fb %>%
model(
Mean = MEAN(Close),
Naive = NAIVE(Close),
Drift = RW(Close ~ drift())
) %>%
forecast(h = 90) %>%
autoplot(fb)
I think that Naive might work best for short term forecasting (such as
day-to day). However, I do think that Drift will be better to long term
since it captures the long term persistent trend. Mean would not work
best here because it isn’t realistic for stock prices due to the fact it
ignores upward and downward movement.
# Extract data of interest
recent_production <- aus_production |>
filter(year(Quarter) >= 1992)
# Define and estimate a model
fit <- recent_production |> model(SNAIVE(Beer))
# Look at the residuals
fit |> gg_tsresiduals()
## Warning: `gg_tsresiduals()` was deprecated in feasts 0.4.2.
## ℹ Please use `ggtime::gg_tsresiduals()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 4 rows containing non-finite outside the scale range
## (`stat_bin()`).
# Look a some forecasts
fit |> forecast() |> autoplot(recent_production)
Conclusion: The residuals are not just white noise. Although the residuals are roughly centered and fairly symmetric, the evidence of seasonality (lag 4 autocorrelation) means the SNAIVE method does not fully explain the data.
# Extract data of interest
aus_economy <- global_economy %>%
filter(Country == "Australia")
# Define and estimate a model
fit <- aus_economy %>% model(NAIVE(Exports))
# Look at the residuals
fit %>% gg_tsresiduals()
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_bin()`).
# Look a some forecasts
fit %>% forecast() %>% autoplot(aus_economy)
* Top residuals plot - Residuals fluctuate around zero with no obvious
strong pattern. - A few clusters of positives/negatives exist, but
overall it is fairly random.
Conclusion: The residuals can be considered to be white noise. The NAIVE model is appropriate for this annual series because the data is annual and does not exhibit strong seasonality, so SNAIVE is unnecessary.
fit <- aus_production %>%
filter(!is.na(Bricks)) %>%
model(SNAIVE(Bricks ~ lag("year")))
# Look at the residuals
fit %>% gg_tsresiduals()
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 4 rows containing non-finite outside the scale range
## (`stat_bin()`).
# Look a some forecasts
fit %>% forecast() %>% autoplot(aus_production)
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).
* Top residuals plot - Residuals fluctuate around zero but show clear
clustering and some long runs of positive and negative values. Therefore
this is not completely random.
Conclusion The residuals are not pure white noise — there is clearly some leftover autocorrelation and non-normality.
set.seed(2000)
myseries <- aus_retail |>
filter(`Series ID` == sample(aus_retail$`Series ID`, 1))
# Training set: before 2011
myseries_train <- myseries |>
filter(year(Month) < 2011)
autoplot(myseries, Turnover) +
autolayer(myseries_train, Turnover, colour = "red")
fit <- myseries_train |>
model(SNAIVE(Turnover))
fc <- fit |>
forecast(new_data = anti_join(myseries, myseries_train))
## Joining with `by = join_by(State, Industry, `Series ID`, Month, Turnover)`
fc |> autoplot(myseries)
### 5.7f Compare the accuracy of your forecasts against the actual
values.
fit |> accuracy()
fc |> accuracy(myseries)