\[Libraries \]
library(fpp3)
## Warning: package 'fpp3' was built under R version 4.3.3
## Registered S3 method overwritten by 'tsibble':
## method from
## as_tibble.grouped_df dplyr
## ── Attaching packages ──────────────────────────────────────────── fpp3 1.0.0 ──
## ✔ tibble 3.2.1 ✔ tsibble 1.1.5
## ✔ dplyr 1.1.4 ✔ tsibbledata 0.4.1
## ✔ tidyr 1.3.1 ✔ feasts 0.3.2
## ✔ lubridate 1.9.3 ✔ fable 0.3.4
## ✔ ggplot2 3.5.1 ✔ fabletools 0.4.2
## Warning: package 'dplyr' was built under R version 4.3.2
## Warning: package 'tidyr' was built under R version 4.3.2
## Warning: package 'lubridate' was built under R version 4.3.2
## Warning: package 'ggplot2' was built under R version 4.3.3
## Warning: package 'tsibble' was built under R version 4.3.3
## Warning: package 'tsibbledata' was built under R version 4.3.3
## Warning: package 'feasts' was built under R version 4.3.3
## Warning: package 'fabletools' was built under R version 4.3.3
## Warning: package 'fable' was built under R version 4.3.3
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date() masks base::date()
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tsibble::setdiff() masks base::setdiff()
## ✖ tsibble::union() masks base::union()
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.3.3
## Warning: package 'readr' was built under R version 4.3.2
## Warning: package 'purrr' was built under R version 4.3.2
## Warning: package 'stringr' was built under R version 4.3.2
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ readr 2.1.5
## ✔ purrr 1.0.2 ✔ stringr 1.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
\[5.1:\]
# Australian Population (global_economy):
global_economy |>
filter(Country == 'Australia') |>
model(RW(Population ~ drift())) |>
forecast(h = 5) |>
autoplot(global_economy) +
labs(title = 'Australian Population',
subtitle = 'Drift Method: 5 year forecast')
#Bricks (aus_production):
aus_production |>
filter(!is.na(Bricks)) |>
model(SNAIVE(Bricks)) |>
forecast(h = '5 years') |>
autoplot(aus_production) +
labs(title = 'Austrialian Clay Brick Production',
subtitle = 'SNAIVE Method: 5 year forecast')
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).
#NSW Lambs (aus_livestock):
aus_livestock |>
filter(Animal == 'Lambs',
State == 'New South Wales',
!is.na(Count)) |>
model(classical_decomposition(Count, type = 'additive')) |>
components() |>
autoplot()
## Warning: Removed 6 rows containing missing values or values outside the scale range
## (`geom_line()`).
#The seasonal component is insignificant compared to the trend and random components.
aus_livestock |>
filter(Animal == 'Lambs',
State == 'New South Wales',
!is.na(Count)) |>
model(NAIVE(Count)) |>
forecast(h = '5 years') |>
autoplot(aus_livestock) +
labs(title = 'New South Wales Lambs Slaughter',
subtitle = 'NAIVE Model: 5 year forecast')
# Household wealth (hh_budget).
hh_budget |>
filter(!is.na(Wealth)) |>
model(RW(Wealth ~ drift())) |>
forecast(h = '5 years') |>
autoplot(hh_budget) +
labs(title = 'Household Wealth',
subtitle = 'Drift Method: 5 year forecast')
# Australian takeaway food turnover (aus_retail).
aus_retail |>
filter(Industry == 'Takeaway food services') |>
model(stl = STL(Turnover)) |>
components() |>
autoplot()
aus_retail |>
filter(Industry == 'Takeaway food services') |>
model(SNAIVE(Turnover)) |>
forecast(h = '5 years') |>
autoplot(aus_retail) +
labs(title = 'Australian Takeaway food services Turnover',
subtitle = 'SNAIVE Model: 5 year forecast') +
facet_wrap(~State, scales = 'free')
\[5.2:\]
# a.
fb <- gafa_stock |>
filter(Symbol == 'FB') |>
mutate(day = row_number()) |>
update_tsibble(index = day, regular = TRUE)
fb |>
autoplot(Close) +
labs(title = 'Daily close price of Facebook')
# b.
fb |>
model(RW(Close ~ drift())) |>
forecast(h = 60) |>
autoplot(fb) +
labs(title = 'Daily Close price of Facebook',
subtitle = 'Drift Method: 60 traiding day forecast')
#c.
fb |>
model(RW(Close ~ drift())) |>
forecast(h = 60) |>
autoplot(fb) +
labs(title = 'Daily Close price of Facebook',
subtitle = 'Drift Method: 60 traiding day forecast') +
geom_segment(x = fb |>
filter(day == min(day)) |>
pull(day),
y = fb |>
filter(day == min(day)) |>
pull(Close),
xend = fb |>
filter(day == max(day)) |>
pull(day),
yend = fb |>
filter(day == max(day)) |>
pull(Close),
colour = 'blue',
linetype = 'dashed')
#d.
fb |>
model(mean = MEAN(Close),
Naive = NAIVE(Close),
Drift = RW(Close ~ drift())) |>
forecast(h = 60) |>
autoplot(fb)
# It makes no sense to use the SNAIVE method since the data has no
seasonality component. Using MEAN and NAIVE produce a constant forecast
which doesn’t take the overall trend into account.
\[5.3:\]
# Extract data of interest
recent_production <- aus_production |>
filter(year(Quarter) >= 1992)
# Define and estimate a model
fit <- recent_production |> model(SNAIVE(Beer))
# Look at the residuals
fit |> gg_tsresiduals()
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 4 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 4 rows containing non-finite outside the scale range
## (`stat_bin()`).
# Look a some forecasts
fit |> forecast() |> autoplot(recent_production)
\[Box-Pierce Test\]
fit |>
augment() |>
features(.innov, box_pierce, lag = 8)
## # A tibble: 1 × 3
## .model bp_stat bp_pvalue
## <chr> <dbl> <dbl>
## 1 SNAIVE(Beer) 29.7 0.000234
\[Ljung-Box Test\]
fit |>
augment() |>
features(.innov, ljung_box, lag = 8)
## # A tibble: 1 × 3
## .model lb_stat lb_pvalue
## <chr> <dbl> <dbl>
## 1 SNAIVE(Beer) 32.3 0.0000834
\[5.4:\]
# Australian Exports from global_economy:
global_economy |>
filter(Country == 'Australia') |>
autoplot(Exports) +
labs(title = 'Australian Exports')
model_ae <- global_economy |>
filter(Country == 'Australia') |>
model(NAIVE(Exports))
model_ae |> gg_tsresiduals()
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 1 row containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 1 row containing non-finite outside the scale range
## (`stat_bin()`).
model_ae |> forecast() |> autoplot(global_economy)
#The residuals are centered around zero and appear to be normally distributed..
model_ae |>
augment() |>
features(.innov, ljung_box, lag = 10)
## # A tibble: 1 × 4
## Country .model lb_stat lb_pvalue
## <fct> <chr> <dbl> <dbl>
## 1 Australia NAIVE(Exports) 16.4 0.0896
#The Ljung-Box test produces a p-value greater than 0.05 which allows us to conclude that the residuals resemble white noise.
#Bricks from aus_production:
aus_production |>
autoplot(Bricks) +
labs(title = 'Australian Clay Brick Production')
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).
model_b <- aus_production |>
model(SNAIVE(Bricks))
model_b |> gg_tsresiduals()
## Warning: Removed 24 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 24 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 24 rows containing non-finite outside the scale range
## (`stat_bin()`).
model_b |> forecast() |> autoplot(aus_production)
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning: Removed 8 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 20 rows containing missing values or values outside the scale range
## (`geom_line()`).
# The residuals do not seem to be centered at zero.
model_b |>
augment() |>
features(.innov, ljung_box, lag = 8)
## # A tibble: 1 × 3
## .model lb_stat lb_pvalue
## <chr> <dbl> <dbl>
## 1 SNAIVE(Bricks) 274. 0
#The Ljung-Box test produces a p-value less than 0.05 which allows us to conclude that the residuals do not resemble white noise.
\[5.7:\]
set.seed(1234)
myseries <- aus_retail |>
filter(`Series ID` == sample(aus_retail$`Series ID`, 1))
#a.
myseries_train <- myseries |> filter(year(Month) < 2011)
#b.
autoplot(myseries, Turnover) +
autolayer(myseries_train, Turnover, colour = "green")
#c.
myseries_fit <- myseries_train |>
model(SNAIVE(Turnover))
# d.
myseries_fit |> gg_tsresiduals()
## Warning: Removed 12 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 12 rows containing missing values or values outside the scale range
## (`geom_point()`).
## Warning: Removed 12 rows containing non-finite outside the scale range
## (`stat_bin()`).
#The residuals do not appear to be uncorrelated nor normally distributed.
#e.
fc <- myseries_fit |>
forecast(new_data = anti_join(myseries, myseries_train,
by = join_by(State, Industry, `Series ID`, Month, Turnover)))
fc |> autoplot(myseries)
#f.
myseries_fit |>
accuracy() |>
select(.model, .type, RMSE, MAE, MAPE, MASE, RMSSE)
## # A tibble: 1 × 7
## .model .type RMSE MAE MAPE MASE RMSSE
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SNAIVE(Turnover) Training 2.90 2.22 10.7 1 1
fc |>
accuracy(myseries) |>
select(.model, .type, RMSE, MAE, MAPE, MASE, RMSSE)
## # A tibble: 1 × 7
## .model .type RMSE MAE MAPE MASE RMSSE
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 SNAIVE(Turnover) Test 9.13 7.58 14.4 3.42 3.15
#g.Accuracy measures are very sensitive to the amount of training data used. The more training data, the better the accuracy. With time series it is also important to use training data as close to the time periods being forecast.