library(fpp3)
library(feasts)
library(fredr)
library(dplyr)
library(tseries)
library(fabletools)
fredr_has_key()[1] TRUE
library(fpp3)
library(feasts)
library(fredr)
library(dplyr)
library(tseries)
library(fabletools)
fredr_has_key()[1] TRUE
# Producer Price Index by Industry: Total Manufacturing Industries (PCUOMFGOMFG)
PPI <- fredr(series_id = "PCUOMFGOMFG") |> #Data comes in as a tibble object, date in YYYY-MM-DD, Value, realtime_start, & realtime_end -> convert the date to month-year
transmute(month = yearmonth(date),value) |>
as_tsibble(index = month) |>
filter(month >= yearmonth("1986 Jan")) #There was one observation in DEC 1984 but all of 1985 is missing.
PPI #480x2 Tsibble() which makes sense because observations are 1986 to 2025 (12 Per Year)# A tsibble: 480 x 2 [1M]
month value
<mth> <dbl>
1 1986 Jan 101.
2 1986 Feb 99.9
3 1986 Mar 98.3
4 1986 Apr 97.9
5 1986 May 98.2
6 1986 Jun 98.2
7 1986 Jul 97.5
8 1986 Aug 97.4
9 1986 Sep 97.6
10 1986 Oct 98.2
# ℹ 470 more rows
autoplot(PPI) +
labs(
title = "Producer Price Index: Total Manufacturing Industries",
x = "Month",
y = "Index 1986 = 101.3"
) +
theme_bw()# Data Goes From 1986(Monthly) - 480 Observations - 80% = 384 - Will Leave out the Last 96 Months - 20% - as Test
split <- yearmonth(max(PPI$month)) - 96
train <- PPI |> filter(month <= split)
test <- PPI |> filter(month > split)
nrow(PPI) #check total observations - confirmed 480[1] 480
nrow(train) #check observations on train - confirmed 384[1] 384
nrow(test) #check observations on test - confirmed 96[1] 96
bind_rows(train |> mutate(Set = "Train"), test |> mutate(Set = "Test")) |>
ggplot(aes(x = month, y = value, color = Set)) +
geom_line() +
labs(
title = "Producer Price Index: Train vs Test Split (80/20)",
x = "Month",
y = "PPI (Index)"
) +
theme_bw()# Plot the Training Data
autoplot(train, value) +
labs(title = "PPI (Training Set)", x = "Month", y = "PPI (Index)") +
theme_bw()# Classical Decomposition
train |>
model(classical = classical_decomposition(value, type = "additive")) |>
components() |>
autoplot() +
labs(title = "Classical Decomposition (Additive) - Training Set") +
theme_bw()# STL Decomposition
train |>
model(stl = STL(value)) |>
components() |>
autoplot() +
labs(title = "STL Decomposition - Training Set") +
theme_bw()p1 <- autoplot(train, value) +
labs(title = "Training Set (Level)", x = "Month", y = "PPI (Index)") +
theme_bw()
p2 <- train %>%
mutate(log_value = log(value)) %>%
autoplot(log_value) +
labs(title = "Training Set (Log Scale)", x = "Month", y = "log(PPI)") +
theme_bw()
library(patchwork)
p1 / p2x <- train$value
# ADF: null = unit root (non-stationary)
adf_res <- adf.test(x)
# KPSS: null = stationary
kpss_res <- kpss.test(x)
adf_res
Augmented Dickey-Fuller Test
data: x
Dickey-Fuller = -1.8654, Lag order = 7, p-value = 0.634
alternative hypothesis: stationary
kpss_res
KPSS Test for Level Stationarity
data: x
KPSS Level = 6.2297, Truncation lag parameter = 5, p-value = 0.01
# Build the models using the train data set.
fits <- train |>
model(
baseline_drift = RW(value ~ drift()),
reg_trend_season = TSLM(value ~ trend() + season())
)
# Regression Model Report & Coefficient
fits |> select(reg_trend_season) |> report()Series: value
Model: TSLM
Residuals:
Min 1Q Median 3Q Max
-14.0865 -6.6162 0.1536 4.2487 19.3855
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 92.661134 1.521336 60.908 <2e-16 ***
trend() 0.267654 0.003563 75.114 <2e-16 ***
season()year2 0.079221 1.934146 0.041 0.967
season()year3 0.489691 1.934156 0.253 0.800
season()year4 0.915787 1.934172 0.473 0.636
season()year5 1.285633 1.934195 0.665 0.507
season()year6 1.183603 1.934225 0.612 0.541
season()year7 1.019074 1.934261 0.527 0.599
season()year8 0.863919 1.934303 0.447 0.655
season()year9 0.796265 1.934353 0.412 0.681
season()year10 0.756736 1.934408 0.391 0.696
season()year11 0.107831 1.934471 0.056 0.956
season()year12 -0.687948 1.934540 -0.356 0.722
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 7.737 on 371 degrees of freedom
Multiple R-squared: 0.9384, Adjusted R-squared: 0.9364
F-statistic: 470.7 on 12 and 371 DF, p-value: < 2.22e-16
fits |> tidy() |> filter(.model == "reg_trend_season")# A tibble: 13 × 6
.model term estimate std.error statistic p.value
<chr> <chr> <dbl> <dbl> <dbl> <dbl>
1 reg_trend_season (Intercept) 92.7 1.52 60.9 2.91e-195
2 reg_trend_season trend() 0.268 0.00356 75.1 1.69e-226
3 reg_trend_season season()year2 0.0792 1.93 0.0410 9.67e- 1
4 reg_trend_season season()year3 0.490 1.93 0.253 8.00e- 1
5 reg_trend_season season()year4 0.916 1.93 0.473 6.36e- 1
6 reg_trend_season season()year5 1.29 1.93 0.665 5.07e- 1
7 reg_trend_season season()year6 1.18 1.93 0.612 5.41e- 1
8 reg_trend_season season()year7 1.02 1.93 0.527 5.99e- 1
9 reg_trend_season season()year8 0.864 1.93 0.447 6.55e- 1
10 reg_trend_season season()year9 0.796 1.93 0.412 6.81e- 1
11 reg_trend_season season()year10 0.757 1.93 0.391 6.96e- 1
12 reg_trend_season season()year11 0.108 1.93 0.0557 9.56e- 1
13 reg_trend_season season()year12 -0.688 1.93 -0.356 7.22e- 1
# Drift Model Parameter
fits |> tidy() |> filter(.model == "baseline_drift")# A tibble: 1 × 6
.model term estimate std.error statistic p.value
<chr> <chr> <dbl> <dbl> <dbl> <dbl>
1 baseline_drift b 0.235 0.0622 3.78 0.000185
#create the forecast fable
test_forecast <- fits |> forecast(new_data = test)
test_forecast# A fable: 192 x 4 [1M]
# Key: .model [2]
.model month
<chr> <mth>
1 baseline_drift 2018 Jan
2 baseline_drift 2018 Feb
3 baseline_drift 2018 Mar
4 baseline_drift 2018 Apr
5 baseline_drift 2018 May
6 baseline_drift 2018 Jun
7 baseline_drift 2018 Jul
8 baseline_drift 2018 Aug
9 baseline_drift 2018 Sep
10 baseline_drift 2018 Oct
# ℹ 182 more rows
# ℹ 2 more variables: value <dist>, .mean <dbl>
#test for accuracy on RMSE, MAE, and MAPE
acc <- test_forecast |>
accuracy(test) |>
select(.model, RMSE, MAE, MAPE)
acc# A tibble: 2 × 4
.model RMSE MAE MAPE
<chr> <dbl> <dbl> <dbl>
1 baseline_drift 30.0 23.8 9.77
2 reg_trend_season 25.1 20.4 8.47
#Remove Distribution Column that Creates Ribbons and Messy Charts
test_fc_mean <- test_forecast |>
as_tibble() |>
select(month, .model, .mean) |>
as_tsibble(index = month, key = .model)
#Plot the Train, Test Actual (Dashed) and the Drift and Regression Forecasts
autoplot(train, value) +
autolayer(test, value, linetype = "dashed") +
autolayer(test_fc_mean, .mean) +
labs(
title = "Test-Set Forecast Comparison: Drift vs Trend+Season Regression",
x = "Month",
y = "PPI (Index)"
) +
theme_bw()#Plot Only Test Setr and the Drift and Regression Forecasts to see easier
autoplot(test, value) +
autolayer(test_fc_mean, .mean) +
labs(
title = "Forecasts vs Actuals (Test Period Only)",
x = "Month",
y = "PPI (Index)"
) +
theme_bw()