Fable Modelling & Accuracy Metrics - VISASMIDSA

Author

Robert Jenkins

Setup

library(fpp3)
library(fredr)
library(dplyr)
library(tseries)
fredr_has_key()
[1] TRUE

FRED Data

#Total Construction Spending: Total Construction in the United States (TTLCONS)
Construction_Ts <- fredr(series_id = "TTLCONS")|>
  transmute(month = yearmonth(date),value) |>
  as_tsibble(index = month)

#Producer Price Index by Industry: Total Manufacturing Industries (PCUOMFGOMFG)
PPI_Ts <- fredr(series_id = "PCUOMFGOMFG",
  observation_start = as.Date("1986-01-01"),
  observation_end   = as.Date("2025-12-01")) |> #Used Start and End Dates Because of Missing Values
  transmute(Month = yearmonth(date), value) |>
  as_tsibble(index = Month)

#Retail Sales: Sporting Goods Stores (MRTSSM45111USN)
RSSG_Ts <- fredr(series_id = "MRTSSM45111USN") |>
  transmute(Month = yearmonth(date), value) |>
  as_tsibble(index = Month)

Construction_Ts
# A tsibble: 394 x 2 [1M]
      month  value
      <mth>  <dbl>
 1 1993 Jan 458080
 2 1993 Feb 462967
 3 1993 Mar 458399
 4 1993 Apr 469425
 5 1993 May 468998
 6 1993 Jun 480247
 7 1993 Jul 483571
 8 1993 Aug 491494
 9 1993 Sep 497297
10 1993 Oct 492823
# ℹ 384 more rows
PPI_Ts
# A tsibble: 480 x 2 [1M]
      Month value
      <mth> <dbl>
 1 1986 Jan 101. 
 2 1986 Feb  99.9
 3 1986 Mar  98.3
 4 1986 Apr  97.9
 5 1986 May  98.2
 6 1986 Jun  98.2
 7 1986 Jul  97.5
 8 1986 Aug  97.4
 9 1986 Sep  97.6
10 1986 Oct  98.2
# ℹ 470 more rows
RSSG_Ts
# A tsibble: 407 x 2 [1M]
      Month value
      <mth> <dbl>
 1 1992 Jan   901
 2 1992 Feb  1020
 3 1992 Mar  1127
 4 1992 Apr  1175
 5 1992 May  1200
 6 1992 Jun  1237
 7 1992 Jul  1243
 8 1992 Aug  1277
 9 1992 Sep  1120
10 1992 Oct  1040
# ℹ 397 more rows
autoplot(Construction_Ts, value) +
  labs(
    title = "Total Construction Spending (TTLCONS)",
    subtitle = "FRED, Monthly",
    x = "Month",
    y = "Value (Millions of $)")

autoplot(PPI_Ts, value) +
  labs(
    title = "Producer Price Index by Industry: Total Manufacturing Industries",
    subtitle = "FRED, Monthly",
    x = "Month",
    y = "Index")

autoplot(RSSG_Ts, value) +
  labs(
    title = "Retail Sales: Sporting Goods Stores (MRTSSM45111USN)",
    subtitle = "FRED, Monthly",
    x = "Month",
    y = "Value (Millions of $)")

Is the Data Stationary? Confirm with ADF Test

#Construction_TS ADF Test
x <- Construction_Ts$value
x <- na.omit(x)
adf_out1 <- adf.test(x)
adf_out1

    Augmented Dickey-Fuller Test

data:  x
Dickey-Fuller = -0.53361, Lag order = 7, p-value = 0.9801
alternative hypothesis: stationary
#PPI_Ts ADF Test
y <- PPI_Ts$value
y <- na.omit(y)
adf_out2 <- adf.test(y)
adf_out2

    Augmented Dickey-Fuller Test

data:  y
Dickey-Fuller = -2.231, Lag order = 7, p-value = 0.4804
alternative hypothesis: stationary
#RSSG_Ts
z <- RSSG_Ts$value
z <- na.omit(z)
adf_out3 <- adf.test(z)
adf_out3

    Augmented Dickey-Fuller Test

data:  z
Dickey-Fuller = -3.1353, Lag order = 7, p-value = 0.09909
alternative hypothesis: stationary

Make the Data Stationary

#Make the Construction Time Series Stationary
Construction_st <- Construction_Ts |>
  mutate(st = difference(value)) |>
  filter(!is.na(st))

#Plot Original and New Data Sets
autoplot(Construction_Ts, value) +
  labs(title = "Construction (Original)", x = "Month", y = "value")

autoplot(Construction_st, st) +
  labs(title = "Construction (Stationary: First Difference)", x = "Month", y = "Δ value")

#Construction_St ADF Test to Confirm Stationary
a <- Construction_st$st
a <- na.omit(a)
adf_out3 <- adf.test(a)
adf_out3

    Augmented Dickey-Fuller Test

data:  a
Dickey-Fuller = -5.2867, Lag order = 7, p-value = 0.01
alternative hypothesis: stationary
#p-value = 0.01 < 0.05 so this data is stationary

#-------------------------------------------------------------------------------------------

#Make the Producer Price Index Time Series Stationary
PPI_st <- PPI_Ts |>
  mutate(st2 = difference(value)) |>
  filter(!is.na(st2))

#Plot Original and New Data Sets
autoplot(PPI_Ts, value) +
  labs(title = "Producer Price Index (Original)", x = "Month", y = "value")

autoplot(PPI_st, st2) +
  labs(title = "Producer Price Index (Stationary: First Difference)", x = "Month", y = "Δ value")

#PPI_St ADF Test to Confirm Stationary
b <- PPI_st$st2
b <- na.omit(b)
adf_out4 <- adf.test(b)
adf_out4

    Augmented Dickey-Fuller Test

data:  b
Dickey-Fuller = -7.1803, Lag order = 7, p-value = 0.01
alternative hypothesis: stationary
#p-value = 0.01 < 0.05 so this data is stationary

#--------------------------------------------------------------------------------------------

#Make the Retail Sales Sporting Goods Time Series Stationary
RSSG_st <- RSSG_Ts |>
  mutate(st3 = difference(difference(value, lag=12))) |>
  filter(!is.na(st3))

#Plot Original and New Data Sets
autoplot(RSSG_Ts, value) +
  labs(title = "Retail Sales Sportring Goods (Original)", x = "Month", y = "value")

autoplot(RSSG_st, st3) +
  labs(title = "Retail Sales Sporting Goods (Stationary: Seasonal Difference)", x = "Month", y = "Δ value")

#RSSG_St ADF Test to Confirm Stationary
c <- RSSG_st$st3
c <- na.omit(c)
adf_out5 <- adf.test(c)
adf_out5

    Augmented Dickey-Fuller Test

data:  c
Dickey-Fuller = -9.8667, Lag order = 7, p-value = 0.01
alternative hypothesis: stationary
#p-value = 0.01 < 0.05 so this data is stationary

ACF and PACF Functions and Graphs

Construction_st |> ACF(st) |> autoplot()

Construction_st |> PACF(st)|> autoplot()

#-------------------------------------------

PPI_st |> ACF(st2) |> autoplot()

PPI_st |> PACF(st2) |> autoplot()

#-------------------------------------------

RSSG_st |> ACF(st3) |> autoplot()

RSSG_st |> PACF(st3) |> autoplot()

Time Series Decomposition

# 1) Construction (TTLCONS)
Construction_Ts |>
  model(STL(value)) |>
  components() |>
  autoplot() +
  labs(title = "STL Decomposition: Construction Spending (TTLCONS)",
       x = "Month", y = "")

# 2) PPI (Total Manufacturing Industries)
PPI_Ts |>
  model(STL(value)) |>
  components() |>
  autoplot() +
  labs(title = "STL Decomposition: PPI (Total Manufacturing)",
       x = "Month", y = "")

# 3) Retail Sales: Sporting Goods Stores (RSSG)
# Use log(value) to stabilize variance before decomposing
RSSG_Ts |>
  mutate(log_value = log(value)) |>
  model(STL(log_value)) |>
  components() |>
  autoplot() +
  labs(title = "STL Decomposition: Retail Sales Sporting Goods (log scale)",
       x = "Month", y = "")