Discussion 4

Author

Robert Jenkins

Setup

library(fpp3)
library(fredr)
library(dplyr)
library(tseries)
library(fabletools)
library(ggplot2)
library(readr)
library(tidyverse)
library(tsibble)
library(feasts)
library(scales)
library(patchwork)
library(ggtime)
library(tseries)
fredr_has_key()
[1] TRUE

FRED Data Part 1

#Housing Inventory: Active Listing Count in the United States (ACTLISCOUUS)
Housing_Inventory <- fredr(series_id = "ACTLISCOUUS") |>
  transmute(month = yearmonth(date),value) |>
  as_tsibble(index = month)

autoplot(Housing_Inventory, value) +
  labs(title = "Housing Inventory: Active Listing Count in the U.S.",x = "Month",y = "Active Listings") #Not Stationary, Downward Trend and Then Upward Trend

adf.test(Housing_Inventory$value) #The Augmented Dickey-Fuller test returned a p-value of 0.9698, which is greater than 0.05. Therefore, we fail to reject the null hypothesis of a unit root, indicating that the time series is non-stationary. This is consistent with the observed trend and seasonality in the data.

    Augmented Dickey-Fuller Test

data:  Housing_Inventory$value
Dickey-Fuller = -0.68087, Lag order = 4, p-value = 0.9698
alternative hypothesis: stationary

First Difference

# First difference
housing_diff1 <- Housing_Inventory |>
  mutate(diff_1 = difference(value))

autoplot(housing_diff1, diff_1) +
  labs(title = "First-Differenced Housing Inventory",x = "Month",y = "First Difference")

adf.test(na.omit(housing_diff1$diff_1)) #P value 0.01 -> Series is Stationary

    Augmented Dickey-Fuller Test

data:  na.omit(housing_diff1$diff_1)
Dickey-Fuller = -6.8908, Lag order = 4, p-value = 0.01
alternative hypothesis: stationary

ACF/PCF

housing_diff1 |>
  gg_tsdisplay(diff_1, plot_type = "partial")

# ACF and PACF Interpretation:
# The ACF plot of the first-differenced series shows a strong spike at lag 1,
# followed by a gradual decay, which is indicative of a moving average process.
# The PACF plot also shows an initial spike at lag 1 but does not exhibit a clear
# cutoff, instead tapering off across subsequent lags. This pattern suggests that
# autoregressive terms are less dominant, while a moving average component is more appropriate.
# 
# Based on these observations, I selected an ARIMA(0,1,1) model, where:
# p = 0 reflects the lack of a clear autoregressive cutoff in the PACF,
# d = 1 accounts for the first differencing required to achieve stationarity,
# and q = 1 captures the strong lag 1 correlation observed in the ACF.
# 
# While a seasonal spike is visible at lag 12 in the ACF, the model is intentionally
# kept simple for this analysis, focusing on the dominant short-term dynamics.

Model Fitting

manual_fit <- Housing_Inventory |>
  model(ARIMA(value ~ pdq(0,1,1)))

auto_fit <- Housing_Inventory |>
  model(ARIMA(value))

Forecasting

manual_fc <- forecast(manual_fit, h = 12)
auto_fc   <- forecast(auto_fit, h = 12)

Plot

autoplot(Housing_Inventory, value) +
  autolayer(manual_fc, level = 95, series = "Manual ARIMA") +
  autolayer(auto_fc, level = 95, series = "Auto ARIMA") +
  labs(title = "Housing Inventory Forecast",x = "Month",y = "Active Listings")

report(auto_fit)
Series: value 
Model: ARIMA(0,1,3)(2,1,0)[12] 

Coefficients:
         ma1     ma2     ma3     sar1     sar2
      0.9035  0.6612  0.3519  -0.4369  -0.2545
s.e.  0.1025  0.0986  0.0926   0.0976   0.0909

sigma^2 estimated as 359379322:  log likelihood=-1171.49
AIC=2354.97   AICc=2355.84   BIC=2370.84
glance(manual_fit)
# A tibble: 1 × 8
  .model                      sigma2 log_lik   AIC  AICc   BIC ar_roots ma_roots
  <chr>                        <dbl>   <dbl> <dbl> <dbl> <dbl> <list>   <list>  
1 ARIMA(value ~ pdq(0, 1, 1)) 4.33e8  -1186. 2381. 2382. 2394. <cpl>    <cpl>   
glance(auto_fit)
# A tibble: 1 × 8
  .model           sigma2 log_lik   AIC  AICc   BIC ar_roots   ma_roots 
  <chr>             <dbl>   <dbl> <dbl> <dbl> <dbl> <list>     <list>   
1 ARIMA(value) 359379322.  -1171. 2355. 2356. 2371. <cpl [24]> <cpl [3]>

FRED Data Part 2

#Retail Sales: Retail Trade (MRTSSM44000USN)
Retail_Sales <- fredr(series_id = "MRTSSM44000USN") |>
  transmute(month = yearmonth(date),value) |>
  as_tsibble(index = month)

autoplot(Retail_Sales, value) +
  labs(title = "Retail Sales: Retail Trade Millions of DOllars, U.S.",x = "Month",y = "Millions of Dollars") #Not Stationary, Upward Trend With Extreme Seasonality

adf.test(Retail_Sales$value) #The Augmented Dickey-Fuller test returned a p-value of 0.9789, which is greater than 0.05. Therefore, we fail to reject the null hypothesis of a unit root, indicating that the time series is non-stationary. This is consistent with the observed trend and seasonality in the data.

    Augmented Dickey-Fuller Test

data:  Retail_Sales$value
Dickey-Fuller = -0.56141, Lag order = 7, p-value = 0.9789
alternative hypothesis: stationary

SARIMA Model Fit

seasonal_fit <- Retail_Sales |>
  model(auto_arima = ARIMA(value))

report(seasonal_fit)
Series: value 
Model: ARIMA(1,1,2)(0,1,1)[12] 

Coefficients:
          ar1     ma1      ma2     sma1
      -0.5720  0.2437  -0.4553  -0.7042
s.e.   0.0909  0.0840   0.0440   0.0352

sigma^2 estimated as 92665162:  log likelihood=-4196.47
AIC=8402.93   AICc=8403.09   BIC=8422.84