Week 2 Discussion
#to clean the whole environment
remove(list=ls())
#Set up all of the libraries needed
library(fpp3)
## Registered S3 method overwritten by 'tsibble':
## method from
## as_tibble.grouped_df dplyr
## ── Attaching packages ──────────────────────────────────────────── fpp3 1.0.2 ──
## ✔ tibble 3.3.0 ✔ tsibble 1.1.6
## ✔ dplyr 1.1.4 ✔ tsibbledata 0.4.1
## ✔ tidyr 1.3.1 ✔ feasts 0.4.2
## ✔ lubridate 1.9.4 ✔ fable 0.4.1
## ✔ ggplot2 3.5.2
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date() masks base::date()
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ✖ tsibble::setdiff() masks base::setdiff()
## ✖ tsibble::union() masks base::union()
library(fredr)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ readr 2.1.5
## ✔ purrr 1.1.0 ✔ stringr 1.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(patchwork)
library(knitr)
library(kableExtra)
##
## Attaching package: 'kableExtra'
##
## The following object is masked from 'package:dplyr':
##
## group_rows
library(writexl)
library(dplyr)
library(feasts)
#Set up my FRED Key
#430508d0ad1df25ecddc34d5581dafc8
fredr_set_key("430508d0ad1df25ecddc34d5581dafc8")
#Pulling out the data and setting is as time series data
wfh <- fredr("WFHCOVIDMATQUESTION",
observation_start = as.Date("2019-01-01"),
observation_end = as.Date("2025-09-05")
) |>
mutate(Month = yearmonth(date), value) |>
as_tsibble(index = Month) |>
fill_gaps() |>
tidyr::fill(value, .direction = "downup")
glimpse(wfh)
## Rows: 69
## Columns: 6
## $ date <date> 2019-12-01, 2020-01-01, 2020-02-01, 2020-03-01, 2020-0…
## $ series_id <chr> "WFHCOVIDMATQUESTION", "WFHCOVIDMATQUESTION", "WFHCOVID…
## $ value <dbl> 7.15, 7.15, 7.15, 7.15, 7.15, 61.55, 56.35, 51.15, 48.3…
## $ realtime_start <date> 2025-09-09, 2025-09-09, 2025-09-09, 2025-09-09, 2025-0…
## $ realtime_end <date> 2025-09-09, 2025-09-09, 2025-09-09, 2025-09-09, 2025-0…
## $ Month <mth> 2019 Dec, 2020 Jan, 2020 Feb, 2020 Mar, 2020 Apr, 2020 …
str(wfh)
## tbl_ts [69 × 6] (S3: tbl_ts/tbl_df/tbl/data.frame)
## $ date : Date[1:69], format: "2019-12-01" "2020-01-01" ...
## $ series_id : chr [1:69] "WFHCOVIDMATQUESTION" "WFHCOVIDMATQUESTION" "WFHCOVIDMATQUESTION" "WFHCOVIDMATQUESTION" ...
## $ value : num [1:69] 7.15 7.15 7.15 7.15 7.15 ...
## $ realtime_start: Date[1:69], format: "2025-09-09" "2025-09-09" ...
## $ realtime_end : Date[1:69], format: "2025-09-09" "2025-09-09" ...
## $ Month : mth [1:69] 2019 Dec, 2020 Jan, 2020 Feb, 2020 Mar, 2020 Apr, 2020 May,...
## - attr(*, "key")= tibble [1 × 1] (S3: tbl_df/tbl/data.frame)
## ..$ .rows: list<int> [1:1]
## .. ..$ : int [1:69] 1 2 3 4 5 6 7 8 9 10 ...
## .. ..@ ptype: int(0)
## - attr(*, "index")= chr "Month"
## ..- attr(*, "ordered")= logi TRUE
## - attr(*, "index2")= chr "Month"
## - attr(*, "interval")= interval [1:1] 1M
## ..@ .regular: logi TRUE
#Setting up the data to have a test and train sample with 80%/20%
split_index <- floor(0.8 * nrow(wfh))
train <- wfh[1:split_index, ]
test <- wfh[(split_index + 1):nrow(wfh), ]
#checking the data
interval(wfh)
## <interval[1]>
## [1] 1M
autoplot(wfh, value)

# Fit five benchmark forecasting models
library(fable)
models <- train %>% model( NAIVE = NAIVE(formula = value),
MEAN = MEAN(formula = value),
SNAIVE = SNAIVE(formula = value),
AVG = MEAN(formula = value),
WAVG = TSLM(formula = value ~ trend())
)
report(models%>% select(WAVG))
## Series: value
## Model: TSLM
##
## Residuals:
## Min 1Q Median 3Q Max
## -25.3846 -1.6267 0.1183 2.1398 29.3272
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 32.59695 2.85146 11.432 6.37e-16 ***
## trend() -0.06235 0.08859 -0.704 0.485
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10.43 on 53 degrees of freedom
## Multiple R-squared: 0.00926, Adjusted R-squared: -0.009433
## F-statistic: 0.4954 on 1 and 53 DF, p-value: 0.48462
library(ggplot2)
# forecasts on the test data
fc <- models %>%
forecast(h = nrow(test)
) # h = 12 if you want to forecast the next 12 months
# Plot all forecasts with the training data
fc %>% autoplot(train) +
labs( title = "Five Benchmark Forecasts: Naive, Mean, SNaive, Average, Weighted",
y = "Value",
x = "Month"
) + facet_wrap(~ .model,
ncol = 2
)

library(tsibble)
#prep data for decomposition making sure theres no gaps
train_clean <- train %>% filter(!is.na(value))
train_clean_filled <- train_clean %>%
fill_gaps() %>%
tidyr::fill(value, .direction = "downup")
#to view if there's any gaps in the data
train_clean_filled %>%
mutate(gap = is.na(value)) %>%
ggplot(aes(x = Month, y = value)) +
geom_line() +
geom_point(aes(color = gap)) +
scale_color_manual(values = c("black", "red")) +
labs(title = "Time series with missing values highlighted in red")

#additive and multiplicative decomposition models
mods <- train_clean_filled |>
model(
additive = decomposition_model(
STL(formula = value ~ season(window="periodic")),
ETS(formula = season_adjust ~ error("A") + trend("Ad") + season("N"))
),
multiplicative = decomposition_model(
dcmp = STL(formula = log(value) ~ season(window = "periodic")),
ETS(formula = season_adjust)
)
)
library(ggplot2)
mods
## # A mable: 1 x 2
## additive multiplicative
## <model> <model>
## 1 <STL decomposition model> <STL decomposition model>
#forecast with the additive and multiplicative models, brings the multiplicative back from log
preds <- forecast(object = mods,
new_data = test) |>
mutate(.mean = ifelse(test = .model=="multiplicative",
yes = exp(.mean),
no = .mean)
)
#plot the two models against eachother
p3 <- autoplot(wfh, value) +
autolayer(filter(preds, .model=="additive"), colour="blue") +
autolayer(filter(preds, .model=="multiplicative"), colour="red") +
ggtitle("Additive (blue) vs Multiplicative (red) forecasts")
## Scale for fill_ramp is already present.
## Adding another scale for fill_ramp, which will replace the existing scale.
print(p3)

# Calc the accuracy metrics and print
acc <- accuracy(object = fc,
data = test
) |>
select(.model, ME, MPE, RMSE, MAE, MAPE #, MASE, RMSSE
)
# Print with knitr::kable
kable(acc, caption = "Forecast Accuracy Metrics") |>
kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
Forecast Accuracy Metrics
.model
|
ME
|
MPE
|
RMSE
|
MAE
|
MAPE
|
AVG
|
-3.282519
|
-12.038586
|
3.419679
|
3.282519
|
12.038586
|
MEAN
|
-3.282519
|
-12.038586
|
3.419679
|
3.282519
|
12.038586
|
NAIVE
|
-1.001429
|
-3.754593
|
1.386408
|
1.268571
|
4.663743
|
SNAIVE
|
-1.351429
|
-5.010241
|
2.100786
|
1.742857
|
6.393080
|
WAVG
|
-1.131372
|
-4.218731
|
1.462316
|
1.293834
|
4.772199
|