library(fpp3)
## Warning: package 'fpp3' was built under R version 4.2.3
## ── Attaching packages ────────────────────────────────────────────── fpp3 0.5 ──
## ✔ tibble 3.2.1 ✔ tsibble 1.1.4
## ✔ dplyr 1.1.3 ✔ tsibbledata 0.4.1
## ✔ tidyr 1.3.0 ✔ feasts 0.3.1
## ✔ lubridate 1.9.3 ✔ fable 0.3.3
## ✔ ggplot2 3.4.4 ✔ fabletools 0.4.0
## Warning: package 'tibble' was built under R version 4.2.3
## Warning: package 'dplyr' was built under R version 4.2.3
## Warning: package 'tidyr' was built under R version 4.2.3
## Warning: package 'lubridate' was built under R version 4.2.3
## Warning: package 'ggplot2' was built under R version 4.2.3
## Warning: package 'tsibble' was built under R version 4.2.3
## Warning: package 'tsibbledata' was built under R version 4.2.3
## Warning: package 'feasts' was built under R version 4.2.3
## Warning: package 'fabletools' was built under R version 4.2.3
## Warning: package 'fable' was built under R version 4.2.3
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date() masks base::date()
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tsibble::setdiff() masks base::setdiff()
## ✖ tsibble::union() masks base::union()
library(dplyr)
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.2.3
## Warning: package 'readr' was built under R version 4.2.3
## Warning: package 'purrr' was built under R version 4.2.3
## Warning: package 'stringr' was built under R version 4.2.3
## Warning: package 'forcats' was built under R version 4.2.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ readr 2.1.4
## ✔ purrr 1.0.2 ✔ stringr 1.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
global_economy %>%
filter(Country == "Australia") %>%
autoplot(Population)
There is a steadily increasing trend, therefore use of the RW() drift
would be the best approach for this forecast.
aus <- global_economy %>%
filter(Country == "Australia")
aus %>%
model(RW(Population ~ drift())) %>%
forecast(h = 20) %>%
autoplot(aus)
### Bricks (aus_production)
aus_production %>%
autoplot(Bricks)
## Warning: Removed 20 rows containing missing values (`geom_line()`).
There is seasonality, so we can use the seasonal naive method.
bricks <- aus_production %>%
filter(!is.na(Bricks))
bricks %>%
model(SNAIVE(Bricks ~ lag("year"))) %>%
forecast(h = 20) %>%
autoplot(bricks)
### NSW Lambs (aus_livestock)
aus_livestock %>%
filter(State == "New South Wales",
Animal == "Lambs") %>%
autoplot(Count)
No trend or seasonal patterns, we can use Naive.
lambs <- aus_livestock %>%
filter(State == "New South Wales",
Animal == "Lambs")
lambs %>% model(NAIVE(Count)) %>%
forecast(h=20) %>%
autoplot(lambs)
### Household wealth (hh_budget)
hh_budget %>%
autoplot(Wealth)
There is an overall upwards yearly trend, no seasonality apparent. Can use drift.
hh_budget %>%
model(Drift = RW(Wealth ~ drift())) %>%
forecast(h = 20) %>%
autoplot(hh_budget)
aus_retail %>%
filter(Industry == "Takeaway food services" & State == "Australian Capital Territory") %>%
model(RW(Turnover ~ drift())) %>%
forecast(h = 20) %>%
autoplot(aus_retail)
Upwards trend, some seasonality. Would use drift.
gafa_stock %>%
filter(Symbol=="FB") %>%
autoplot(Close)
fb <- gafa_stock %>%
filter(Symbol=="FB") %>%
mutate(day = row_number()) %>%
update_tsibble(index = day, regular = TRUE)
fb %>%
model(RW(Close ~ drift())) %>%
forecast(h = 20) %>%
autoplot(fb)
### Show that the forecasts are identical to extending the line drawn
between the first and last observations.
fb %>%
model(RW(Close ~ drift())) %>%
forecast(h = 20) %>%
autoplot(fb)+
geom_segment(aes(x = 1, y = 54.83, xend = 1258, yend = 134.45))
Try using some of the other benchmark functions to forecast the same
data set. Which do you think is best? Why?
fb %>%
model(Mean = MEAN(Close),
`Naïve` = NAIVE(Close),
Drift = NAIVE(Close ~ drift())) %>%
forecast(h = 100) %>%
autoplot(fb)
I had to extend my forecast out a little more to see how each forecast
behaved. The data is not seasonal, so seasonal naive would not work.
Based on the results, Drift appears the most realistic.
# Extract data of interest
recent_production <- aus_production |>
filter(year(Quarter) >= 1992)
# Define and estimate a model
fit <- recent_production |> model(SNAIVE(Beer))
# Look at the residuals
fit |> gg_tsresiduals()
## Warning: Removed 4 rows containing missing values (`geom_line()`).
## Warning: Removed 4 rows containing missing values (`geom_point()`).
## Warning: Removed 4 rows containing non-finite values (`stat_bin()`).
# Look a some forecasts
fit |> forecast() |> autoplot(recent_production)
With the exception of an outlier, the residuals appear to be centered
around zero and pvalues are small, indicating the data is most likely
not white noise.
aus_exp <- global_economy %>%
filter(Country == "Australia")
fit <- aus_exp %>% model(NAIVE(Exports))
fit %>% gg_tsresiduals()
## Warning: Removed 1 row containing missing values (`geom_line()`).
## Warning: Removed 1 rows containing missing values (`geom_point()`).
## Warning: Removed 1 rows containing non-finite values (`stat_bin()`).
Box.test(aus_exp$Exports, lag = 24, type = "Ljung")
##
## Box-Ljung test
##
## data: aus_exp$Exports
## X-squared = 398.24, df = 24, p-value < 2.2e-16
fit %>% forecast(h = 5) %>% autoplot(aus_exp)
Mean of the residuals is centered around 0 meaning constant variations.
The box-ljung test shows a pvalue <= 0.05 therefore no significance
and not white noise.
set.seed(12345678)
myseries <- aus_retail |>
filter(`Series ID` == sample(aus_retail$`Series ID`,1))
myseries_train <- myseries |>
filter(year(Month) < 2011)
autoplot(myseries, Turnover) +
autolayer(myseries_train, Turnover, colour = "red")
fit <- myseries_train |>
model(SNAIVE(Turnover))
fit |> gg_tsresiduals()
## Warning: Removed 12 rows containing missing values (`geom_line()`).
## Warning: Removed 12 rows containing missing values (`geom_point()`).
## Warning: Removed 12 rows containing non-finite values (`stat_bin()`).
fc <- fit |>
forecast(new_data = anti_join(myseries, myseries_train))
## Joining with `by = join_by(State, Industry, `Series ID`, Month, Turnover)`
fc |> autoplot(myseries)
fit |> accuracy()
## # A tibble: 1 × 12
## State Industry .model .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Norther… Clothin… SNAIV… Trai… 0.439 1.21 0.915 5.23 12.4 1 1 0.768
fc |> accuracy(myseries)
## # A tibble: 1 × 12
## .model State Industry .type ME RMSE MAE MPE MAPE MASE RMSSE ACF1
## <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SNAIVE(T… Nort… Clothin… Test 0.836 1.55 1.24 5.94 9.06 1.36 1.28 0.601
The model does not appear to be a good fit.