tidyverts tutorial

1. Intro: Tidyverts packages

The tidyverts (https://tidyverts.org/) is a collection of packages created specifically to handle time series data

The tidyverts libraries are more closely integrated with the tidyverse framework
https://otexts.com/fpp3/ (Rob Hyndman etc)

## load relevant libraries

## from tidyverse
library(dplyr)    ## data wrangling
library(stringr)  ## handling of strings
library(readr)    ## r objects' import/export
library(ggplot2)  ## r plotting

## from tidyverts
library(tsibble)
library(fable)
library(fabletools)
library(feasts)

## others
library(statswalesr) ## import data from StatsWales

options(scipen = 999) ## remove scientific notation

2. Data retrieval

Time series health data retrieved from StatsWales
Aggregated monthly attendances to Emergency Dept (ED), grouped by Local Health Boards (LHB):
Table: “Number of attendances in NHS Wales emergency departments by age band, sex and site”.
We can directly extract the whole dataset in R using the statswalesr package, using the Open Data table code (HLTH0037)

## download the raw table
HLTH0037_data_raw <- statswalesr::statswales_get_dataset("HLTH0037")

….Data cleaning (data_cleaning.R)
…Quick look at the cleaned dataset:

## import cleaned data
HLTH0037_data_cln <- readr::read_rds("data/HLTH0037_data_cln.rds")

head(HLTH0037_data_cln)

# A tibble: 6 × 3
  MonthYear LHB     Attendances
      <mth> <chr>         <int>
1  2016 Apr AB UHB        13145
2  2016 Apr ABM           15153
3  2016 Apr BC UHB        16284
4  2016 Apr CAV UHB       11080
5  2016 Apr Cwm Taf       10451
6  2016 Apr HD UHB        11566

3. Creation of a `tsibble` object

index = the time index variable
- must be in a date-type format
key = it allows multiple time series to be stored in a single object (one or more categorical columns to uniquely identify a time series)

Creation of tsibble object

HLTH0037_data_ts <- tsibble::as_tsibble(
  ## x = data
  x = HLTH0037_data_cln,
  ## index = time column in the table
  index = MonthYear, 
  ## key = groups of different time series
  key = LHB
  )

HLTH0037_data_ts

# A tsibble: 637 x 3 [1M]
# Key:       LHB [9]
   MonthYear LHB    Attendances
       <mth> <chr>        <int>
 1  2016 Apr AB UHB       13145
 2  2016 May AB UHB       14692
 3  2016 Jun AB UHB       14282
 4  2016 Jul AB UHB       14831
 5  2016 Aug AB UHB       13594
 6  2016 Sep AB UHB       13904
 7  2016 Oct AB UHB       13955
 8  2016 Nov AB UHB       12946
 9  2016 Dec AB UHB       12552
10  2017 Jan AB UHB       12739
# … with 627 more rows

4. Time series plotting

Use function fabletools::autoplot()

All Wales aggregated data

## summarise records at All Wales level
AllWales_ts <-
  HLTH0037_data_ts |>
  dplyr::summarise(Attendances = sum(Attendances))

AllWales_ts

# A tsibble: 91 x 2 [1M]
   MonthYear Attendances
       <mth>       <int>
 1  2016 Apr       79246
 2  2016 May       88058
 3  2016 Jun       84269
 4  2016 Jul       87252
 5  2016 Aug       85104
 6  2016 Sep       84134
 7  2016 Oct       85177
 8  2016 Nov       78154
 9  2016 Dec       76731
10  2017 Jan       77117
# … with 81 more rows

## plot the time series
fabletools::autoplot(
  object = AllWales_ts,
  .vars = Attendances) +
  ggplot2::labs(title = "All Wales ED Attendances")

Time plots by group/key

By Local Health Boards

HLTH0037_data_ts

# A tsibble: 637 x 3 [1M]
# Key:       LHB [9]
   MonthYear LHB    Attendances
       <mth> <chr>        <int>
 1  2016 Apr AB UHB       13145
 2  2016 May AB UHB       14692
 3  2016 Jun AB UHB       14282
 4  2016 Jul AB UHB       14831
 5  2016 Aug AB UHB       13594
 6  2016 Sep AB UHB       13904
 7  2016 Oct AB UHB       13955
 8  2016 Nov AB UHB       12946
 9  2016 Dec AB UHB       12552
10  2017 Jan AB UHB       12739
# … with 627 more rows

## plot the time series
fabletools::autoplot(
  object = HLTH0037_data_ts,
  .vars = Attendances)+
  ggplot2::labs(title = "ED Attendances by Local Health Board") + 
  theme(legend.position="bottom")

Standard seasonal plot: feasts::gg_season()

feasts::gg_season(data = AllWales_ts, y = Attendances, period = "year") +
  labs(title = "All Wales ED Attendances - Seasonal plot")

5. Forecasting

Create Training and Validation periods

Training set: from July 2021 to July 2023 (2 years)
Validation set: from August 2023 to October 2023 (3 months)

AllWales_ts <-
  AllWales_ts |>
  dplyr::mutate(
    Type = dplyr::case_when(
      
      ## Define Training Period
      MonthYear >= tsibble::yearmonth("2021-07-01") & 
        MonthYear <= tsibble::yearmonth("2023-07-01") ~ "Training",
      
      ## Define Validation Period
      MonthYear > tsibble::yearmonth("2023-07-01") ~ "Validation",
      
      ## Data points before Training
      TRUE ~ "Pre-training"
      
    )
  )

AllWales_ts

# A tsibble: 91 x 3 [1M]
   MonthYear Attendances Type        
       <mth>       <int> <chr>       
 1  2016 Apr       79246 Pre-training
 2  2016 May       88058 Pre-training
 3  2016 Jun       84269 Pre-training
 4  2016 Jul       87252 Pre-training
 5  2016 Aug       85104 Pre-training
 6  2016 Sep       84134 Pre-training
 7  2016 Oct       85177 Pre-training
 8  2016 Nov       78154 Pre-training
 9  2016 Dec       76731 Pre-training
10  2017 Jan       77117 Pre-training
# … with 81 more rows

Fit a model to the Training data

The fable framework offers a range of popular forecasting models, including:
- Benchmark models (MEAN, NAIVE, SNAIVE)
- ARIMA
- ETS (Exponential Smoothing)
- Regression Models (TSLM)
- Neural Networks
- …
Interface for external libraries: Facebook Prophet and tscount (for count time series)

Syntax: fabletools::model(MODEL_NAME(y ~ predictors))
This creates a list of mables (= model tables)

AllWales_benchmarks <-
  
  fabletools::model(
    
    ## select data only from the Training period to fit the models
    .data =  AllWales_ts |> filter(Type == "Training"),
    
    ## fit benchmark models
    mean_model = fable::MEAN(Attendances),
    naive_model = fable::NAIVE(Attendances),
    snaive_model = fable::SNAIVE(Attendances ~ lag("year"))
  )

AllWales_benchmarks

# A mable: 1 x 3
  mean_model naive_model snaive_model
     <model>     <model>      <model>
1     <MEAN>     <NAIVE>     <SNAIVE>

Generate forecasts
- Use fabletools::forecast(object = mable(s), h = horizon) to obtain a fable (= forecast table) object with point and distribution forecasts for each model in the mable

AllWales_benchmarks_fct <-
  fabletools::forecast(
    object = AllWales_benchmarks,
    h=3 ## h = horizon, 3 months
    ) 

AllWales_benchmarks_fct

# A fable: 9 x 4 [1M]
# Key:     .model [3]
  .model       MonthYear         Attendances  .mean
  <chr>            <mth>              <dist>  <dbl>
1 mean_model    2023 Aug  N(85624, 57321791) 85624.
2 mean_model    2023 Sep  N(85624, 57321791) 85624.
3 mean_model    2023 Oct  N(85624, 57321791) 85624.
4 naive_model   2023 Aug  N(94953, 49793420) 94953 
5 naive_model   2023 Sep  N(94953, 99586841) 94953 
6 naive_model   2023 Oct N(94953, 149380261) 94953 
7 snaive_model  2023 Aug  N(90165, 61909635) 90165 
8 snaive_model  2023 Sep  N(86320, 61909635) 86320 
9 snaive_model  2023 Oct  N(91435, 61909635) 91435

Plot all models’ point forecasts with prediction intervals

## plot forecasts
fabletools::autoplot(
  object = AllWales_benchmarks_fct, 
  level = c(80,95), # level = prediction intervals
  size = 1, lty = 2) + 
  
  ## overlap actual data
  autolayer(
    AllWales_ts |> filter(Type %in% c("Training","Validation")),
    .vars = Attendances) +
  theme(legend.position="bottom") +
  labs(title = "All Wales ED: forecasts benchmark models")+ 
  facet_wrap(vars(.model), nrow = 3, scales = "free_y")

The End

All code available on github https://github.com/AuraFrizzati/WMC-Dec-2023-R-Tidyverts.git
Thanks for the attention :) …Any questions??

tidyverts tutorial

1. Intro: Tidyverts packages

2. Data retrieval

3. Creation of a tsibble object

4. Time series plotting

5. Forecasting

Create Training and Validation periods

Fit a model to the Training data

The End

3. Creation of a `tsibble` object