Week 2 Discussion

#to clean the whole environment
remove(list=ls())


#Set up all of the libraries needed
library(fpp3)
## Registered S3 method overwritten by 'tsibble':
##   method               from 
##   as_tibble.grouped_df dplyr
## ── Attaching packages ──────────────────────────────────────────── fpp3 1.0.2 ──
## ✔ tibble      3.3.0     ✔ tsibble     1.1.6
## ✔ dplyr       1.1.4     ✔ tsibbledata 0.4.1
## ✔ tidyr       1.3.1     ✔ feasts      0.4.2
## ✔ lubridate   1.9.4     ✔ fable       0.4.1
## ✔ ggplot2     3.5.2
## ── Conflicts ───────────────────────────────────────────────── fpp3_conflicts ──
## ✖ lubridate::date()    masks base::date()
## ✖ dplyr::filter()      masks stats::filter()
## ✖ tsibble::intersect() masks base::intersect()
## ✖ tsibble::interval()  masks lubridate::interval()
## ✖ dplyr::lag()         masks stats::lag()
## ✖ tsibble::setdiff()   masks base::setdiff()
## ✖ tsibble::union()     masks base::union()
library(fredr)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0     ✔ readr   2.1.5
## ✔ purrr   1.1.0     ✔ stringr 1.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter()     masks stats::filter()
## ✖ tsibble::interval() masks lubridate::interval()
## ✖ dplyr::lag()        masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(patchwork)
library(knitr)
library(kableExtra)
## 
## Attaching package: 'kableExtra'
## 
## The following object is masked from 'package:dplyr':
## 
##     group_rows
library(writexl)
library(dplyr)
library(feasts)

#Set up my FRED Key
#430508d0ad1df25ecddc34d5581dafc8

fredr_set_key("430508d0ad1df25ecddc34d5581dafc8")

#Pulling out the data and setting is as time series data 
wfh <- fredr("WFHCOVIDMATQUESTION",
              observation_start = as.Date("2019-01-01"),
              observation_end   = as.Date("2025-09-05")
              ) |>
  mutate(Month = yearmonth(date), value) |>
  as_tsibble(index = Month) |>
   fill_gaps() |>
    tidyr::fill(value, .direction = "downup")

glimpse(wfh)
## Rows: 69
## Columns: 6
## $ date           <date> 2019-12-01, 2020-01-01, 2020-02-01, 2020-03-01, 2020-0…
## $ series_id      <chr> "WFHCOVIDMATQUESTION", "WFHCOVIDMATQUESTION", "WFHCOVID…
## $ value          <dbl> 7.15, 7.15, 7.15, 7.15, 7.15, 61.55, 56.35, 51.15, 48.3…
## $ realtime_start <date> 2025-09-09, 2025-09-09, 2025-09-09, 2025-09-09, 2025-0…
## $ realtime_end   <date> 2025-09-09, 2025-09-09, 2025-09-09, 2025-09-09, 2025-0…
## $ Month          <mth> 2019 Dec, 2020 Jan, 2020 Feb, 2020 Mar, 2020 Apr, 2020 …
str(wfh)
## tbl_ts [69 × 6] (S3: tbl_ts/tbl_df/tbl/data.frame)
##  $ date          : Date[1:69], format: "2019-12-01" "2020-01-01" ...
##  $ series_id     : chr [1:69] "WFHCOVIDMATQUESTION" "WFHCOVIDMATQUESTION" "WFHCOVIDMATQUESTION" "WFHCOVIDMATQUESTION" ...
##  $ value         : num [1:69] 7.15 7.15 7.15 7.15 7.15 ...
##  $ realtime_start: Date[1:69], format: "2025-09-09" "2025-09-09" ...
##  $ realtime_end  : Date[1:69], format: "2025-09-09" "2025-09-09" ...
##  $ Month         : mth [1:69] 2019 Dec, 2020 Jan, 2020 Feb, 2020 Mar, 2020 Apr, 2020 May,...
##  - attr(*, "key")= tibble [1 × 1] (S3: tbl_df/tbl/data.frame)
##   ..$ .rows: list<int> [1:1] 
##   .. ..$ : int [1:69] 1 2 3 4 5 6 7 8 9 10 ...
##   .. ..@ ptype: int(0) 
##  - attr(*, "index")= chr "Month"
##   ..- attr(*, "ordered")= logi TRUE
##  - attr(*, "index2")= chr "Month"
##  - attr(*, "interval")= interval [1:1] 1M
##   ..@ .regular: logi TRUE
#Setting up the data to have a test and train sample with 80%/20% 
split_index <- floor(0.8 * nrow(wfh))  

train <- wfh[1:split_index, ]
test  <- wfh[(split_index + 1):nrow(wfh), ]

#checking the data 
interval(wfh)
## <interval[1]>
## [1] 1M
autoplot(wfh, value)

# Fit five benchmark forecasting models 
library(fable)

models <- train %>% model( NAIVE  = NAIVE(formula = value), 
                           MEAN   = MEAN(formula = value), 
                           SNAIVE = SNAIVE(formula = value), 
                           AVG    = MEAN(formula = value), 
                           WAVG   = TSLM(formula = value ~ trend())
                           ) 

report(models%>% select(WAVG))
## Series: value 
## Model: TSLM 
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -25.3846  -1.6267   0.1183   2.1398  29.3272 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 32.59695    2.85146  11.432 6.37e-16 ***
## trend()     -0.06235    0.08859  -0.704    0.485    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 10.43 on 53 degrees of freedom
## Multiple R-squared: 0.00926, Adjusted R-squared: -0.009433
## F-statistic: 0.4954 on 1 and 53 DF, p-value: 0.48462
library(ggplot2)

# forecasts on the test data 
fc <- models %>%
  forecast(h = nrow(test)
  ) # h = 12 if you want to forecast the next 12 months

# Plot all forecasts with the training data
fc %>% autoplot(train) + 
  labs( title = "Five Benchmark Forecasts: Naive, Mean, SNaive, Average, Weighted",
        y = "Value",
        x = "Month" 
        ) + facet_wrap(~ .model,
                       ncol = 2
                       ) 

library(tsibble)

#prep data for decomposition making sure theres no gaps 
train_clean <- train %>% filter(!is.na(value))
train_clean_filled <- train_clean %>%
  fill_gaps() %>%
  tidyr::fill(value, .direction = "downup")

#to view if there's any gaps in the data
train_clean_filled %>%
  mutate(gap = is.na(value)) %>%
  ggplot(aes(x = Month, y = value)) +
  geom_line() +
  geom_point(aes(color = gap)) +
  scale_color_manual(values = c("black", "red")) +
  labs(title = "Time series with missing values highlighted in red")

#additive and multiplicative decomposition models
mods <- train_clean_filled |>
  model(
    additive = decomposition_model(
      STL(formula = value ~ season(window="periodic")),
      ETS(formula = season_adjust ~ error("A") + trend("Ad") + season("N"))
    ),
    multiplicative = decomposition_model(
      dcmp = STL(formula = log(value) ~ season(window = "periodic")),
      ETS(formula = season_adjust)
    )
  )

library(ggplot2)
mods
## # A mable: 1 x 2
##                    additive            multiplicative
##                     <model>                   <model>
## 1 <STL decomposition model> <STL decomposition model>
#forecast with the additive and multiplicative models, brings the multiplicative back from log 
preds <- forecast(object = mods, 
                  new_data = test) |>
  mutate(.mean = ifelse(test = .model=="multiplicative", 
                        yes = exp(.mean),
                        no = .mean)
         )
#plot the two models against eachother
p3 <- autoplot(wfh, value) +
  autolayer(filter(preds, .model=="additive"), colour="blue") +
  autolayer(filter(preds, .model=="multiplicative"), colour="red") +
  ggtitle("Additive (blue) vs Multiplicative (red) forecasts")
## Scale for fill_ramp is already present.
## Adding another scale for fill_ramp, which will replace the existing scale.
print(p3)

# Calc the accuracy metrics and print
acc <- accuracy(object = fc, 
                data = test
                ) |>
  select(.model, ME, MPE, RMSE, MAE, MAPE #, MASE, RMSSE
         )



# Print with knitr::kable
kable(acc, caption = "Forecast Accuracy Metrics") |>
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)
Forecast Accuracy Metrics
.model ME MPE RMSE MAE MAPE
AVG -3.282519 -12.038586 3.419679 3.282519 12.038586
MEAN -3.282519 -12.038586 3.419679 3.282519 12.038586
NAIVE -1.001429 -3.754593 1.386408 1.268571 4.663743
SNAIVE -1.351429 -5.010241 2.100786 1.742857 6.393080
WAVG -1.131372 -4.218731 1.462316 1.293834 4.772199