Unemployment Data Accuracy Metrics

Author

Kevin Rusu

LOAD LIBS

library(fpp3)           
library(fredr)          
library(tidyverse)      
library(patchwork)      
library(knitr)          
library(kableExtra)     
library(writexl)

Import Data from FRED

remove(list=ls()) #Clear the workspace

fredr_set_key("50a1931a46d37bed8d8da0498cfb2127")

#unemployment rate
myts <- fredr("UNRATE",
              observation_start = as.Date("1980-01-01"),
              observation_end   = as.Date("2025-12-01")
              ) |>
  transmute(Month = yearmonth(date), value) |>  #Convert to yearmonth format, combine with |>
  as_tsibble(index = Month)                      #Create tsibble object

#quick look at it
glimpse(myts)

Rows: 552
Columns: 2
$ Month <mth> 1980 Jan, 1980 Feb, 1980 Mar, 1980 Apr, 1980 May, 1980 Jun, 1980…
$ value <dbl> 6.3, 6.3, 6.3, 6.9, 7.5, 7.6, 7.8, 7.7, 7.5, 7.5, 7.5, 7.2, 7.5,…

head(myts)

# A tsibble: 6 x 2 [1M]
     Month value
     <mth> <dbl>
1 1980 Jan   6.3
2 1980 Feb   6.3
3 1980 Mar   6.3
4 1980 Apr   6.9
5 1980 May   7.5
6 1980 Jun   7.6

tail(myts)

# A tsibble: 6 x 2 [1M]
     Month value
     <mth> <dbl>
1 2025 Jul   4.3
2 2025 Aug   4.3
3 2025 Sep   4.4
4 2025 Oct  NA  
5 2025 Nov   4.5
6 2025 Dec   4.4

About the data: UNRATE represents the unemployment rate in the United States, measured as a percentage. This is monthly data collected by the Bureau of Labor Statistics. Lower values indicate a healthier economy with more people employed.

Quick look at full time series

myts |>
  autoplot(value) +
  labs(title = "US Unemployment Rate (1980-2025)",
       subtitle = "Monthly data from FRED",
       y = "Unemployment Rate (%)",
       x = "Year") +
  theme_minimal()

Split

#calc split index (80% of 552 = 441)
split_index <- floor(0.8 * nrow(myts))  # = 441

#create training and test sets
train <- myts[1:split_index, ]
test  <- myts[(split_index + 1):nrow(myts), ]

#Describe Split
head(train, 1)

# A tsibble: 1 x 2 [1M]
     Month value
     <mth> <dbl>
1 1980 Jan   6.3

tail(x = train, n = 10) #show last month of training set

# A tsibble: 10 x 2 [1M]
      Month value
      <mth> <dbl>
 1 2015 Dec   5  
 2 2016 Jan   4.8
 3 2016 Feb   4.9
 4 2016 Mar   5  
 5 2016 Apr   5.1
 6 2016 May   4.8
 7 2016 Jun   4.9
 8 2016 Jul   4.8
 9 2016 Aug   4.9
10 2016 Sep   5

head(x = test, n = 1) #show first month of test set

# A tsibble: 1 x 2 [1M]
     Month value
     <mth> <dbl>
1 2016 Oct   4.9

tail(test, 1)

# A tsibble: 1 x 2 [1M]
     Month value
     <mth> <dbl>
1 2025 Dec   4.4

Split summary: Train Data is from Jan 1980 until Sep 2016, Test Data is from Oct 2016 until Dec 2025

Benchmark Forecasting Models

# Fit multiple forecasting models to the training data
models <- train |>
  model(
    NAIVE  = NAIVE(value),     #Last value is the forecast
    MEAN   = MEAN(value),      #average of all historical values
    SNAIVE = SNAIVE(value),    #seasonal Naive (last year's same month)
    AVG    = MEAN(value),      #same as MEAN (simple average)
    WAVG   = TSLM(value ~ trend())  #weighted average using linear trend
  )

#summaries
report(models)

Warning in report.mdl_df(models): Model reporting is only supported for
individual models, so a glance will be shown. To see the report for a specific
model, use `select()` and `filter()` to identify a single model.

# A tibble: 5 × 15
  .model sigma2 r_squared adj_r_squared statistic   p_value    df log_lik   AIC
  <chr>   <dbl>     <dbl>         <dbl>     <dbl>     <dbl> <int>   <dbl> <dbl>
1 NAIVE  0.0298   NA            NA           NA   NA           NA     NA    NA 
2 MEAN   2.63     NA            NA           NA   NA           NA     NA    NA 
3 SNAIVE 1.12     NA            NA           NA   NA           NA     NA    NA 
4 AVG    2.63     NA            NA           NA   NA           NA     NA    NA 
5 WAVG   2.55      0.0332        0.0310      15.1  0.000119     2   -831.  416.
# ℹ 6 more variables: AICc <dbl>, BIC <dbl>, CV <dbl>, deviance <dbl>,
#   df.residual <int>, rank <int>

Generate Forecasts

#generate forecasts for the test period (48 months)
fc <- models |>
  forecast(h = nrow(test))

#view forecast structure
head(fc)

# A fable: 6 x 4 [1M]
# Key:     .model [1]
  .model    Month
  <chr>     <mth>
1 NAIVE  2016 Oct
2 NAIVE  2016 Nov
3 NAIVE  2016 Dec
4 NAIVE  2017 Jan
5 NAIVE  2017 Feb
6 NAIVE  2017 Mar
# ℹ 2 more variables: value <dist>, .mean <dbl>

Visualize All Forecasts

fc %>% autoplot(train) + 
  labs( title = "Five Benchmark Forecasts: Naive, Mean, SNaive, Average, Weighted",
        y = "Value",
        x = "Month" 
        ) + facet_wrap(~ .model,
                       ncol = 2
                       )

Accuracy Metrics

Calculate Accuracy Metrics

# Compute accuracy metrics comparing forecasts to test data
acc <- accuracy(
  object = fc,    # Our forecasts
  data = test     # Actual test values
) |>
  select(.model, ME, MPE, RMSE, MAE, MAPE) |>
  arrange(RMSE)  # Sort by RMSE (lower is better)

# Display the accuracy table
acc

# A tibble: 5 × 6
  .model     ME   MPE  RMSE   MAE  MAPE
  <chr>   <dbl> <dbl> <dbl> <dbl> <dbl>
1 SNAIVE -0.386 -17.0  1.84  1.28  26.5
2 NAIVE  -0.443 -18.3  1.84  1.31  27.5
3 WAVG   -1.19  -36.1  2.15  1.84  42.3
4 AVG    -1.83  -51.3  2.56  2.35  55.7
5 MEAN   -1.83  -51.3  2.56  2.35  55.7

Understanding the Metrics

Metric	Formula	Interpretation
ME (Mean Error)	\(\displaystyle \frac{1}{n} \sum (y_t - \hat{y}_t)\)	Average forecast error. Indicates bias: YES, Penalizes Outliers: NO Positive ->underprediction Negative -> overprediction Average Sum of Errors (actual - predicted)
MPE (Mean Percentage Error)	\(\frac{1}{n} \sum ( \frac{y_t - \hat{y}_t}{y_t}) * 100\%\)	Average percentage error. Can be misleading when Y’s are near zero. UNIT FREE = models with diff units can be compared Indicates bias: YES, Penalizes Outliers: NO
RMSE (Root Mean Squared Error)	\(\sqrt{ \frac{1}{n} \sum (y_t - \hat{y}_t)^2 }\)	Penalizes large errors more than MAE. Sensitive to outliers, detects bias indirect, BUT can’t say if over/under Reflects \(bias^2\) + variance
MAE (Mean Absolute Error)	\(\frac{1}{n} \sum \|y_t - \hat{y}_t\|\)	Average absolute forecast error. Easy to interpret in original units. Detects Bias = NO Penalize Outliers = mild
MAPE (Mean Absolute Percentage Error)	\(\frac{1}{n} \sum \| \frac{y_t - \hat{y}_t}{y_t}\| * 100\%\)	Scale-independent, but undefined if any actual \(y_t = 0\).
MASE (Mean Absolute Scaled Error)	\(\displaystyle \frac{\text{MAE}}{\text{MAE of naive model}}\)	<1 = better than naive; >1 = worse. Allows comparison across series.
RMSSE (Root Mean Squared Scaled Error)	\(\frac{RMSE}{RMSE \ of \ naive \ model}\)	Like MASE, but penalizes larger errors more heavily.

Formatted Accuracy Table

#Print with knitr::kable
kable(acc, caption = "Forecast Accuracy Metrics") |>
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = FALSE)

Forecast Accuracy Metrics
.model	ME	MPE	RMSE	MAE	MAPE
SNAIVE	-0.3863636	-17.01517	1.835781	1.275454	26.49778
NAIVE	-0.4427273	-18.34653	1.843933	1.310000	27.51435
WAVG	-1.1949090	-36.10188	2.146629	1.844463	42.31226
AVG	-1.8334302	-51.26350	2.562333	2.346030	55.74352
MEAN	-1.8334302	-51.26350	2.562333	2.346030	55.74352

Interpretation: The SNAIVE model performed best because it has the lowest RMSE (1.84), MAE (1.28), and MAPE (26.5%), meaning it produces the smallest forecast errors on average. SNAIVE outperforms the other methods because it captures the seasonal patterns in the data by using the observation from the same season in the previous year as the forecast. The simple averaging methods (WAVG, AVG, MEAN) perform much worse, with errors roughly double those of SNAIVE, showing that recent seasonal patterns are more informative than long-term averages for this dataset.

Decomposition

mods <- train |>
  fabletools::model(
    additive = decomposition_model(
      STL(formula = value ~ season(window="periodic")),
      ETS(formula = season_adjust ~ error("A") + trend("Ad") + season("N"))
    ),
    multiplicative = decomposition_model(
      dcmp = STL(formula = log(value) ~ season(window = "periodic")),
      ETS(formula = season_adjust)
    )
  )

mods

# A mable: 1 x 2
                   additive            multiplicative
                    <model>                   <model>
1 <STL decomposition model> <STL decomposition model>

typeof(mods)

[1] "list"

glimpse(mods)

Rows: 1
Columns: 2
$ additive       <model> [STL decomposition model]
$ multiplicative <model> [STL decomposition model]

# ----- Plots -----
p1 <- train |>
  model(STL(value ~ season(window="periodic"))) |>
  components() |> autoplot() + ggtitle("Additive STL decomposition")

p2 <- train |>
  model(STL(log(value) ~ season(window="periodic"))) |>
  components() |> autoplot() + ggtitle("Multiplicative STL decomposition (log)")
 
p1/p2

preds <- forecast(object = mods, 
                  new_data = test) |>
  mutate(.mean = ifelse(test = .model=="multiplicative", 
                        yes = exp(.mean),
                        no = .mean)
         )

p3 <- autoplot(myts) +
  autolayer(filter(preds, .model=="additive"), colour="blue") +
  autolayer(filter(preds, .model=="multiplicative"), colour="red") +
  ggtitle("Additive (blue) vs Multiplicative (red) forecasts")

Plot variable not specified, automatically selected `.vars = value`
Scale for fill_ramp is already present.
Adding another scale for fill_ramp, which will replace the existing scale.

p4 <- autoplot(myts |> filter_index("2015 Jan"~.)) +
  autolayer(filter(preds, .model=="additive"), colour="blue") +
  autolayer(filter(preds, .model=="multiplicative"), colour="red") +
  ggtitle("Forecasts vs Actuals (2015+)")

Plot variable not specified, automatically selected `.vars = value`
Scale for fill_ramp is already present.
Adding another scale for fill_ramp, which will replace the existing scale.

(p3 / p4)

Which one worked better?

The multiplicative decomposition worked better than the additive method. This is clear from the forecast plot, where the additive method (blue) produces negative forecasts, which is impossible for this type of data. The multiplicative method (red) keeps all forecasts positive and shows prediction intervals that appropriately widen over time.

How could you use it for Forecasting?

I would use the results from the multiplicative decomposition because it captures how the seasonal patterns change with the level of the data better. The seasonally adjusted series from this decomposition gives a better view of the underlying trend, which makes it useful for understanding the trend of the data.