Forecasting Unemployment Rate in New Hampshire

Objective

This analysis aims to forecast the quarterly home prices in Vermont next five years.

Data

Dependent variable

  • FHFA All-Transactions HPI from Federal Housing Finance Agency (via FRED - St. Louis Fed).

Independent variable

  • Mortgage Interest Rate from FRED (30-Year Fixed Rate Mortgage Average)
  • Inventory/Supply from VAR, Redfin/Zillow (Active Listing Count, Months of Supply)
  • Population/Migration from U.S. Census Bureau, BEA (Net Migration Data)
  • Employment/Income from FRED (Vermont Unemployment Rate, Real Median Household Income). * National Home Price Index from S&P CoreLogic Case-Shiller U.S. National Home Price Index
  • Second home/Tourism from state rooms and meals tax revenue
  • Policy impact from state-level housing legislation
# Time Series Machine Learning
library(tidymodels)
library(modeltime)
library(vip)

# EDA
library(DataExplorer)

# Core 
library(tidyverse)
library(timetk)
library(lubridate)
library(readxl)

moodys_tbl <- read_excel("../../../Vermont/docs from Tom/Moodys 0625.xlsx", sheet = "Vermont (Quarterly)", skip = 0, n_max = 98) %>%
    filter(!is.na(`VERMONT - QUARTERLY`))

moodys_us_tbl <- read_excel("../../../Vermont/docs from Tom/Moodys 0625.xlsx", sheet = "US (Quarterly)", skip = 0) %>%
    filter(!is.na(`UNITED STATES - QUARTERLY`))

home_inventory <- read_csv("../00_data/Housing_Inventory_VT_FRED.csv")

# Zillow
# ZORI (Smoothed): All Homes Plus Multifamily Time Series ($): County
# Downloaded on 10/16/2025
zillow <- read_csv("../00_data/County_zori_uc_sfrcondomfr_sm_month.csv")

zillow_clean_tbl <- zillow %>%
    filter(State == "VT") %>%
    select(-c(RegionID,SizeRank,RegionType,StateName,State,Metro,StateCodeFIPS,MunicipalCodeFIPS)) %>%
    pivot_longer(-RegionName, names_to = "date", values_to = "rent") %>%
    mutate(date = lubridate::ymd(date)) %>%
    group_by(RegionName) %>%
    summarise_by_time(date, .by = "quarter", rent = mean(rent)) %>%
    ungroup() %>% 
    pivot_wider(names_from = RegionName, values_from = rent, names_prefix = "rent") %>% 
    janitor::clean_names() %>%
    
    # Only chittenden_county has enough data
    select(date, rent_chittenden_county) %>%
    drop_na()

1.0 Clean data

# Extract var description
var_desc_tbl <- moodys_tbl %>% select(1:2) %>% set_names(c("Var_abb","Var_desc"))
var_desc_us_tbl <- moodys_us_tbl %>% 
    select(1:2) %>% 
    set_names(c("Var_abb","Var_desc")) %>%
    filter(Var_desc != "0")

# Reshape Moodys data
moodys_clean_tbl <- moodys_tbl %>%
    mutate(across(where(is.numeric), as.character)) %>%
    
    # Add national data
    bind_rows(
        moodys_us_tbl %>% 
            filter(`UNITED STATES - QUARTERLY` == "FHOFHOPIQ")  %>% 
            rename(`VERMONT - QUARTERLY`=`UNITED STATES - QUARTERLY`) %>% 
            mutate(across(everything(), as.character)) %>% 
            mutate(`VERMONT - QUARTERLY` = `VERMONT - QUARTERLY` %>% paste0("_us")) %>%
            distinct() 
    ) %>%

    select(-2) %>%
    pivot_longer(-`VERMONT - QUARTERLY`, names_to = "date") %>%
    pivot_wider(names_from = `VERMONT - QUARTERLY`, values_from = value) %>%
    # Replace "ND" with ""
    mutate(across(everything(), ~str_replace(.,"ND",""))) %>%
    # Convert them to numeric
    mutate(across(-date, as.numeric)) %>%
    mutate(date = date %>% yq()) %>%
    
    # Remove forecast
    filter(date <= as.Date("2025-04-01")) %>%
    
    # Add rent from Zillow
    left_join(
        zillow_clean_tbl
    ) %>%
    
    # Add FRED data
    left_join(
        home_inventory %>% 
            setNames(c("date", "home_inv")) %>%
            summarise_by_time(date, .by = "quarter", home_inv = sum(home_inv))
    )

moodys_clean_tbl %>% skimr::skim()
Data summary
Name Piped data
Number of rows 202
Number of columns 101
_______________________
Column type frequency:
Date 1
numeric 100
________________________
Group variables None

Variable type: Date

skim_variable n_missing complete_rate min max median n_unique
date 0 1 1975-01-01 2025-04-01 2000-02-15 202

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
RETQ 0 1.00 269.74 45.11 160.49 247.84 289.72 306.26 317.30 ▁▂▂▃▇
RERMQ 0 1.00 0.93 0.12 0.73 0.83 0.90 1.00 1.31 ▇▇▆▃▁
RE23Q 0 1.00 13.79 2.48 6.47 11.98 14.54 15.37 18.67 ▁▂▅▇▂
REMFQ 0 1.00 37.68 6.23 26.47 30.94 38.65 43.87 46.93 ▅▃▃▅▇
RETUQ 0 1.00 7.67 1.13 5.19 6.72 8.05 8.57 8.97 ▃▂▁▃▇
RE51Q 0 1.00 5.27 0.81 3.80 4.61 4.98 5.90 6.97 ▃▇▃▃▃
RETRQ 0 1.00 3.90 0.56 2.72 3.52 3.92 4.45 4.68 ▃▂▆▆▇
RETTQ 0 1.00 42.75 7.16 25.60 40.74 44.90 47.36 50.93 ▂▂▁▅▇
RE42Q 0 1.00 8.83 1.35 5.27 8.61 9.27 9.70 10.47 ▂▂▁▇▇
RERTQ 0 1.00 33.92 5.86 20.24 31.68 35.77 37.99 40.57 ▂▂▂▅▇
REFIQ 0 1.00 11.80 1.57 7.53 11.68 12.17 12.87 13.77 ▂▂▁▇▇
REPSQ 0 1.00 19.27 7.85 5.92 13.63 20.32 26.18 33.75 ▇▆▇▆▅
REEHQ 0 1.00 46.51 14.11 23.48 33.48 45.65 60.37 66.83 ▆▃▃▃▇
RELHQ 0 1.00 29.27 6.17 14.86 25.76 31.97 33.23 37.70 ▃▂▂▇▆
RE81Q 0 1.00 8.36 2.10 3.90 6.70 9.38 10.03 10.50 ▂▁▁▂▇
RE62Q 0 1.00 34.83 12.96 13.38 22.70 33.75 48.19 52.73 ▅▃▃▃▇
RE5411Q 0 1.00 1.79 0.39 0.82 1.69 1.93 2.04 2.19 ▁▂▁▅▇
RE5416Q 0 1.00 1.40 0.74 0.39 0.80 1.22 2.11 2.82 ▇▇▂▆▃
REGVQ 0 1.00 46.44 9.06 26.69 39.95 49.18 54.54 56.87 ▂▂▂▃▇
RE311Q 0 1.00 3.76 1.13 2.00 2.83 3.77 5.00 5.51 ▆▃▇▁▇
RE312Q 0 1.00 0.42 0.35 0.09 0.19 0.26 0.50 1.31 ▇▂▁▁▂
RE313Q 0 1.00 0.16 0.07 0.05 0.10 0.17 0.22 0.25 ▃▇▂▆▇
RE314Q 0 1.00 0.17 0.08 0.04 0.08 0.19 0.21 0.34 ▇▂▇▅▂
RE315Q 0 1.00 0.71 0.33 0.25 0.38 0.72 1.06 1.15 ▇▃▂▃▇
RE316Q 0 1.00 0.05 0.01 0.02 0.04 0.05 0.05 0.08 ▂▇▇▃▁
RE321Q 0 1.00 2.40 0.58 1.30 1.80 2.54 2.93 3.29 ▅▃▃▆▇
RE322Q 0 1.00 1.17 0.40 0.55 0.74 1.30 1.55 1.69 ▆▃▁▃▇
RE323Q 0 1.00 2.13 0.92 0.78 1.08 2.39 3.01 3.26 ▆▁▂▂▇
RE324Q 0 1.00 0.10 0.03 0.05 0.07 0.11 0.14 0.22 ▆▅▇▁▁
RE325Q 0 1.00 1.19 0.23 0.73 0.97 1.22 1.40 1.59 ▂▇▃▇▃
RE326Q 0 1.00 1.34 0.20 1.00 1.20 1.31 1.37 1.83 ▂▇▃▁▂
RE327Q 0 1.00 1.82 0.22 1.17 1.59 1.85 2.01 2.22 ▁▆▆▇▇
RE331Q 0 1.00 0.52 0.30 0.13 0.17 0.62 0.81 0.93 ▇▂▁▆▆
RE332Q 0 1.00 2.10 0.41 1.34 1.77 1.91 2.48 2.94 ▁▇▁▃▂
RE333Q 0 1.00 3.92 1.13 2.37 2.86 3.98 4.90 5.74 ▇▂▃▃▅
RE334Q 0 1.00 8.25 2.62 3.85 6.25 8.79 10.82 11.79 ▆▃▃▆▇
RE335Q 0 1.00 1.35 0.19 0.95 1.21 1.34 1.51 1.64 ▃▅▇▆▇
RE336Q 0 1.00 2.45 0.49 1.58 1.99 2.50 2.96 3.23 ▅▃▆▃▇
RE337Q 0 1.00 2.27 0.68 1.21 1.46 2.48 2.83 3.44 ▆▁▃▇▂
RE339Q 0 1.00 1.43 0.49 0.63 0.84 1.52 1.81 2.37 ▇▁▇▆▂
REMFDQ 0 1.00 26.49 5.76 16.17 21.46 27.68 32.20 34.58 ▅▃▃▅▇
REMFNQ 0 1.00 11.20 1.03 8.95 10.65 11.31 11.65 13.34 ▂▂▇▃▂
REOFFQ 0 1.00 43.84 10.66 21.97 37.30 47.68 52.52 58.42 ▅▂▆▇▇
FLBF 4 0.98 319.43 40.17 216.41 295.82 335.48 352.66 362.01 ▁▂▂▃▇
FLBE 4 0.98 305.83 40.94 197.94 286.99 325.09 338.49 348.11 ▁▂▁▃▇
FLBU 4 0.98 13.60 4.07 6.15 10.94 13.21 15.48 36.05 ▆▇▂▁▁
FLBR 4 0.98 4.37 1.51 1.76 3.27 4.18 5.33 10.48 ▆▇▅▁▁
FYPQ 0 1.00 19040.73 12251.11 2480.56 8312.88 17426.49 28328.92 47824.65 ▇▅▅▃▂
FYP\(Q | 0| 1.00| 22906.27| 8425.01| 9447.61| 15611.76| 23728.81| 29984.86| 38880.39|▇▇▆▇▅ | |FYPCONQ | 0| 1.00| 1520.52| 974.61| 158.40| 666.89| 1418.56| 2082.69| 3699.97|▇▇▅▅▂ | |FYPADJQ | 0| 1.00| 215.69| 185.45| -11.40| 42.76| 219.43| 337.47| 643.53|▇▃▅▂▂ | |FYPDIRQ | 0| 1.00| 3907.21| 2471.00| 471.37| 1800.54| 3646.09| 5385.76| 9848.49|▇▇▃▃▂ | |FYPTRPQ | 0| 1.00| 3488.90| 2880.63| 364.86| 973.78| 2388.90| 5590.63| 12808.74|▇▂▃▁▁ | |FYPEWSQ | 0| 1.00| 9055.01| 5343.68| 1375.32| 4467.31| 8798.67| 12886.42| 21201.92|▇▆▇▃▂ | |FYPESSQ | 0| 1.00| 2187.63| 1380.17| 200.23| 908.12| 2010.88| 3422.96| 4872.93|▇▆▃▇▃ | |FYPEPPQ | 0| 1.00| 1706.81| 1088.69| 234.40| 770.59| 1713.90| 2486.15| 4446.49|▇▂▆▂▁ | |FYPEPPFRQ | 0| 1.00| 99.54| 55.58| 16.34| 61.46| 85.60| 125.48| 301.64|▇▇▂▁▁ | |FYPEPPNFQ | 0| 1.00| 1607.28| 1059.14| 202.62| 676.73| 1629.98| 2348.08| 4179.26|▇▂▆▂▂ | |FYPNWQ | 0| 1.00| 9985.72| 6934.26| 1105.25| 3833.95| 8600.34| 15465.58| 26622.74|▇▅▃▂▂ | |FYPDPIQ | 0| 1.00| 16999.34| 10933.09| 2204.35| 7314.80| 15205.04| 25685.07| 42443.52|▇▅▅▃▂ | |FAHEMF | 104| 0.49| 18.80| 3.60| 14.11| 16.35| 18.34| 21.07| 26.56|▇▅▅▁▂ | |FPOPQ | 0| 1.00| 588.98| 51.51| 478.10| 543.10| 609.09| 631.61| 650.77|▂▂▂▃▇ | |FPOP0004Q | 0| 1.00| 34.49| 4.13| 26.83| 31.09| 33.93| 38.09| 41.81|▃▇▇▅▇ | |FPOP0519Q | 0| 1.00| 122.05| 8.94| 106.50| 116.18| 120.94| 130.76| 137.72|▆▅▇▅▇ | |FPOP2024Q | 0| 1.00| 44.47| 3.82| 36.19| 43.10| 44.37| 46.56| 52.91|▂▂▇▃▂ | |FPOP2544Q | 0| 1.00| 160.43| 17.67| 118.81| 147.38| 157.41| 177.23| 188.74|▁▃▇▃▆ | |FPOP4564Q | 0| 1.00| 142.18| 39.77| 89.43| 95.72| 150.96| 180.06| 194.33|▇▂▂▃▇ | |FPOP65GQ | 0| 1.00| 85.36| 26.94| 52.86| 64.04| 77.81| 100.73| 151.62|▇▆▂▂▂ | |FHHOLDQ | 0| 1.00| 231.26| 36.18| 154.88| 199.46| 240.40| 263.74| 276.33|▂▃▂▃▇ | |FNMQ | 0| 1.00| 0.49| 0.41| -0.23| 0.16| 0.47| 0.79| 1.49|▆▇▇▆▂ | |FBRQ | 0| 1.00| 11.62| 2.63| 7.77| 9.60| 10.72| 14.59| 15.93|▅▇▂▂▆ | |FDRQ | 0| 1.00| 8.79| 0.69| 8.01| 8.33| 8.64| 8.92| 11.28|▇▅▁▁▁ | |FRTFSQ | 0| 1.00| 7.70| 4.33| 1.29| 4.29| 7.52| 10.86| 17.63|▇▇▇▅▂ | |FCCALLQ | 12| 0.94| 4012.25| 2586.46| 615.47| 1714.02| 3862.89| 5815.80| 9167.27|▇▃▅▂▃ | |FCCREVQ | 12| 0.94| 1109.50| 734.13| 2.44| 353.24| 1382.10| 1719.32| 2443.47|▆▂▂▇▂ | |FBKPY | 23| 0.89| 819.83| 599.16| 99.00| 244.00| 671.00| 1397.50| 2544.00|▇▅▂▃▁ | |FHPNR | 20| 0.90| 2479.17| 949.88| 914.30| 1911.96| 2238.31| 2904.93| 6100.63|▅▇▂▂▁ | |FHPN1 | 20| 0.90| 1807.34| 815.28| 509.22| 1151.45| 1711.76| 2260.61| 4252.64|▇▇▅▂▁ | |FHPNM | 20| 0.90| 671.83| 386.30| 27.75| 351.52| 590.26| 893.02| 2137.42|▇▇▅▂▁ | |FHVACRQ | 44| 0.78| 4.56| 1.55| 1.97| 3.51| 4.36| 5.22| 11.25|▆▇▂▁▁ | |FHX1 | 24| 0.88| 9.61| 2.58| 3.19| 7.89| 9.70| 11.39| 15.59|▂▅▇▇▂ | |FHX1MED | 0| 1.00| 137.19| 99.80| 20.63| 61.35| 93.26| 201.97| 433.89|▇▃▃▁▁ | |FMBADC | 17| 0.92| 3.42| 2.33| 0.00| 2.29| 2.99| 4.15| 23.47|▇▂▁▁▁ | |FMBAFSC | 27| 0.87| 0.23| 0.19| 0.00| 0.11| 0.17| 0.31| 0.95|▇▃▁▁▁ | |FHXAFF | 0| 1.00| 164.68| 29.31| 101.29| 144.69| 168.77| 187.43| 219.47|▃▅▇▇▅ | |FDEBTC | 12| 0.94| 21.44| 2.26| 16.92| 19.95| 21.37| 23.38| 24.96|▅▃▇▆▇ | |FCCDAF | 122| 0.40| 3.30| 0.47| 2.14| 3.00| 3.32| 3.54| 4.34|▁▅▇▅▂ | |FCCDAB | 122| 0.40| 1.53| 0.30| 0.88| 1.35| 1.49| 1.67| 2.29|▂▇▇▂▂ | |FCCDBC | 122| 0.40| 2.85| 0.98| 1.40| 2.07| 2.50| 3.60| 5.21|▆▇▂▃▂ | |FCCDTO | 122| 0.40| 2.47| 0.99| 0.85| 1.82| 2.32| 3.26| 4.27|▇▆▇▃▆ | |FMOSDQ | 60| 0.70| 2.76| 1.46| 0.59| 1.65| 2.54| 3.64| 7.54|▇▇▅▂▁ | |FMOPDQ | 60| 0.70| 1.31| 0.54| 0.49| 0.87| 1.30| 1.64| 2.93|▇▇▆▂▁ | |FMORDQ | 60| 0.70| 1.45| 1.18| 0.03| 0.48| 1.15| 2.07| 5.82|▇▅▂▁▁ | |FYHHMEDQ | 0| 1.00| 40942.35| 19334.26| 10558.78| 25502.96| 41611.15| 52835.58| 89594.79|▆▆▇▂▂ | |FHOFHOPIQ | 0| 1.00| 320.18| 191.25| 56.84| 175.72| 252.84| 446.18| 876.63|▇▃▆▁▁ | |FGDPQ | 8| 0.96| 20.45| 11.75| 3.15| 11.02| 19.33| 29.46| 47.90|▇▆▆▅▂ | |FGDP\)Q 8 0.96 24.09 8.17 9.42 18.03 25.55 31.60 36.54 ▅▅▃▅▇
FHOFHOPIQ_us 0 1.00 268.23 155.96 59.87 146.38 231.84 357.56 704.49 ▇▅▅▁▂
rent_chittenden_county 167 0.17 2078.29 311.02 1693.07 1814.40 1930.64 2375.34 2588.68 ▇▂▂▂▅
home_inv 166 0.18 9553.69 6435.66 2342.00 3986.75 6318.50 15311.75 23434.00 ▇▁▂▃▁

Calculate year-over-year percent change of home price to remove stationarity and seasonality.

moodys_clean_tbl %>%
    select(date, FHOFHOPIQ) %>%
    tk_augment_lags(
      .value = FHOFHOPIQ,
      .lags  = 4
    ) %>%
    mutate(
      yoy_pct_change = (FHOFHOPIQ - FHOFHOPIQ_lag4) / FHOFHOPIQ_lag4 * 100
    ) %>%
    
    filter(date >= as.Date("1984-01-01")) %>%
    plot_time_series(date, yoy_pct_change, .smooth = FALSE)

Choose independent variables mortgage delinquency, foreclosure in VT Consider mortgage rate in US

moodys_select_tbl <- moodys_clean_tbl %>%
    select(date, FHOFHOPIQ, FHOFHOPIQ_us, FYHHMEDQ, FYPEWSQ, FMBAFSC, FMBADC, home_inv, rent_chittenden_county, FLBR, FNMQ) %>%
    setNames(c("date", "home_price", "home_price_us", "income_median", "income_wage", "mort_forecl", "mort_delinq","home_inv", "rent", "unemp_rate", "net_migr")) 

moodys_trans_tbl <- moodys_select_tbl %>%
    
    # Transform home_price to YOY to remove stationarity
    tk_augment_lags(c(matches("(home|income|rent|migr)")), .lags  = 4) %>%
    mutate(
      home_price_yoy = (home_price - home_price_lag4) / home_price_lag4 * 100,
      home_price_us_yoy = (home_price_us - home_price_us_lag4) / home_price_us_lag4 * 100,
      income_median_yoy = (income_median - income_median_lag4) / income_median_lag4 * 100,
      income_wage_yoy = (income_wage - income_wage_lag4) / income_wage_lag4 * 100,
      home_inv_yoy = (home_inv - home_inv_lag4) / home_inv_lag4 * 100,
      rent_yoy = (rent - rent_lag4) / rent_lag4 * 100,
      net_migr_yoy = (net_migr - net_migr_lag4) / net_migr_lag4 * 100
    ) %>%
    
    # Difference mortgage variables to remove stationarity
    # mutate(mort_f_d = mort_forecl + mort_delinq) %>%
    tk_augment_differences(mort_forecl, .lags = 4) %>%
    tk_augment_differences(mort_delinq, .lags = 4) %>%
    tk_augment_differences(unemp_rate, .lags = 4) %>%
    
    select(date, contains("yoy"), contains("diff")) %>%
    
    # Remove volatile early periods
    filter(date >= as.Date("1984-01-01"))
    
moodys_trans_tbl
# A tibble: 166 × 11
   date       home_price_yoy home_price_us_yoy income_median_yoy income_wage_yoy
   <date>              <dbl>             <dbl>             <dbl>           <dbl>
 1 1984-01-01           4.36              4.32             12.5            10.0 
 2 1984-04-01           9.17              4.40             16.1             8.63
 3 1984-07-01           6.65              4.51             18.4             9.05
 4 1984-10-01           5.56              4.64             19.4             9.76
 5 1985-01-01           9.12              4.39             20.0             8.87
 6 1985-04-01          10.0               4.54             19.3             9.21
 7 1985-07-01           5.23              5.55             14.5            10.8 
 8 1985-10-01           7.93              5.90              6.09            9.59
 9 1986-01-01          11.3               6.28             -1.31           10.6 
10 1986-04-01           9.23              6.89             -7.40            9.44
# ℹ 156 more rows
# ℹ 6 more variables: home_inv_yoy <dbl>, rent_yoy <dbl>, net_migr_yoy <dbl>,
#   mort_forecl_lag4_diff1 <dbl>, mort_delinq_lag4_diff1 <dbl>,
#   unemp_rate_lag4_diff1 <dbl>
# moodys_trans_tbl %>%
#     select(date, contains("home")) %>%
#     pivot_longer(-date) %>% 
#     plot_time_series(date, value, name, .smooth = TRUE, .smooth_span = 0.2, .smooth_alpha = 1, .smooth_size = 0.5)

2.0 Explore data

Explore data to recommend data transformation.

2.1 Regularity of times series

moodys_trans_tbl %>%
    tk_summary_diagnostics(.date_var = date) %>%
    glimpse()
Rows: 1
Columns: 12
$ n.obs        <int> 166
$ start        <date> 1984-01-01
$ end          <date> 2025-04-01
$ units        <chr> "days"
$ scale        <chr> "quarter"
$ tzone        <chr> "UTC"
$ diff.minimum <dbl> 7776000
$ diff.q1      <dbl> 7862400
$ diff.median  <dbl> 7862400
$ diff.mean    <dbl> 7889105
$ diff.q3      <dbl> 7948800
$ diff.maximum <dbl> 7948800

2.2 Variance reduction

Consider an alternative to log transformation as it produces NaNs due to negative values.

moodys_trans_tbl %>% 
    select(date, home_price_yoy) %>%
    mutate(home_price_yoy_log = log1p(home_price_yoy)) %>%
    pivot_longer(-date) %>%
    plot_time_series(date, value, .color_var = name, .smooth = FALSE)
# unemployment_tbl %>% plot_time_series(date, log(rate), .smooth_span = 0.2)

2.3 Seasonality

In theory, there shouldn’t be seasonality in yoy %.

moodys_trans_tbl %>% 
    select(date, home_price_yoy) %>% plot_seasonal_diagnostics(date, home_price_yoy)

2.4 Anomalies

When analyzing and forecasting unemployment, you should remove the extreme data points from the COVID-19 pandemic. These were one-time anomalies that are not likely to repeat and can distort your model. After removing them, use a method like linear interpolation to impute or replace the missing values, creating a smoother and more reliable dataset. This will allow your forecasting model to be based on normal economic trends, leading to a more accurate and robust long-term forecast.

moodys_trans_tbl %>% 
    select(date, home_price_yoy) %>%
    plot_anomaly_diagnostics(
        .date_var = date, 
        .value    = home_price_yoy,
        .alpha    = 0.02,
        .max_anomalies = 0.01
    )
moodys_trans_tbl %>% 
    select(date, home_price_yoy) %>%
    tk_anomaly_diagnostics(
        .date_var = date, 
        .value    = home_price_yoy,
        .alpha    = 0.02,
        .max_anomalies = 0.01
    )
# A tibble: 166 × 11
   date       observed  season trend remainder seasadj remainder_l1 remainder_l2
   <date>        <dbl>   <dbl> <dbl>     <dbl>   <dbl>        <dbl>        <dbl>
 1 1984-01-01     4.36  0.333   9.77    -5.74     4.03        -22.8         23.3
 2 1984-04-01     9.17 -0.0893  9.59    -0.328    9.26        -22.8         23.3
 3 1984-07-01     6.65 -0.354   9.40    -2.40     7.00        -22.8         23.3
 4 1984-10-01     5.56  0.110   9.22    -3.77     5.45        -22.8         23.3
 5 1985-01-01     9.12  0.333   9.04    -0.249    8.79        -22.8         23.3
 6 1985-04-01    10.0  -0.0893  8.83     1.29    10.1         -22.8         23.3
 7 1985-07-01     5.23 -0.354   8.62    -3.04     5.58        -22.8         23.3
 8 1985-10-01     7.93  0.110   8.41    -0.588    7.82        -22.8         23.3
 9 1986-01-01    11.3   0.333   8.20     2.74    10.9         -22.8         23.3
10 1986-04-01     9.23 -0.0893  7.98     1.35     9.32        -22.8         23.3
# ℹ 156 more rows
# ℹ 3 more variables: anomaly <chr>, recomposed_l1 <dbl>, recomposed_l2 <dbl>

2.5 ACF Diagnostics

moodys_trans_tbl %>%
    
    plot_acf_diagnostics(date, home_price_yoy, 
                         .ccf_vars = c(income_median_yoy, contains("diff")), 
                         # .show_ccf_vars_only = TRUE,
                         .facet_ncol = 1, 
                         .lags = 36, 
                         .title = "HP corr with its own lags at 4,8,12, m_income at 11,12,13 and foreclosure at 7,8,9 after differencing")

2.6 Recommendation

  1. seasonality seems minimal
  2. corr with its own lags of 4,8,12; sign of remaining seasonality?
  3. corr with foreclosure at 7,8,9 after differencing
  4. corr with median income at 11 through 17

3.0 Identify features

Identify possible features for modeling.

data preparation

# Prep data
moodys_trans_tbl <- unemployment_tbl %>%
    
    # log transform for variance reduction 
    mutate(rate_trans = log(rate)) %>%
    # standardize for easy comparison and some ML algorithms
    mutate(rate_trans = standardize_vec(rate_trans)) %>%

    # Cleaning for the Pandemic
    mutate(home_price_yoy = ts_clean_vec(rate_trans, period = 12)) %>%
    select(-rate, -rate_trans)

moodys_trans_tbl %>%
    pivot_longer(contains("trans")) %>%
    plot_time_series(date, value, name, .smooth_span = 0.2)

# Save Key Params
std_mean_rate    <- 1.34702775921281
std_sd_rate      <- 0.352769766524941

3.1 Time-based features

* Time Series Signature

moodys_trans_signature_tbl <- moodys_trans_tbl %>% 
    tk_augment_timeseries_signature() %>%
    select(-diff, -ends_with("iso"), -ends_with(".xts"),
           -contains("month"), -contains("week"), -contains("day"),
           -contains("hour"), -contains("minute"), -contains("second"), -contains("am.pm"))

moodys_trans_signature_tbl %>% glimpse()
Rows: 166
Columns: 15
$ date                   <date> 1984-01-01, 1984-04-01, 1984-07-01, 1984-10-01…
$ home_price_yoy         <dbl> 4.363144, 9.169452, 6.646419, 5.563444, 9.11999…
$ home_price_us_yoy      <dbl> 4.319844, 4.399065, 4.510958, 4.643825, 4.39238…
$ income_median_yoy      <dbl> 12.502231, 16.102115, 18.401050, 19.449061, 19.…
$ income_wage_yoy        <dbl> 10.026114, 8.632716, 9.053278, 9.763479, 8.8677…
$ home_inv_yoy           <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ rent_yoy               <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ net_migr_yoy           <dbl> -1.142027e+02, -1.286330e+02, -1.501123e+02, -3…
$ mort_forecl_lag4_diff1 <dbl> 0.64437, 0.00000, 0.00000, 0.00000, -0.64437, 0…
$ mort_delinq_lag4_diff1 <dbl> 4.31668, 3.89201, -2.11274, -2.46383, -6.79729,…
$ unemp_rate_lag4_diff1  <dbl> -1.88298, -1.80066, -1.53282, -1.18839, -0.7479…
$ index.num              <dbl> 441763200, 449625600, 457488000, 465436800, 473…
$ year                   <int> 1984, 1984, 1984, 1984, 1985, 1985, 1985, 1985,…
$ half                   <int> 1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2,…
$ quarter                <int> 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4,…

* Trend-Based Features

nonlinear natural spline model seems the best.

# ** Linear Trend

moodys_trans_signature_tbl %>%
    plot_time_series_regression(
        date,
        .formula = home_price_yoy ~ index.num, 
        .show_summary = TRUE
    )

Call:
stats::lm(formula = .formula, data = df)

Residuals:
   Min     1Q Median     3Q    Max 
-8.083 -4.432 -1.661  3.609 16.251 

Coefficients:
              Estimate Std. Error t value Pr(>|t|)    
(Intercept)  5.372e+00  1.306e+00   4.115 6.12e-05 ***
index.num   -2.830e-10  1.129e-09  -0.251    0.802    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 5.5 on 164 degrees of freedom
Multiple R-squared:  0.0003828, Adjusted R-squared:  -0.005712 
F-statistic: 0.0628 on 1 and 164 DF,  p-value: 0.8024
# ** Nonlinear Trend - Basis Splines

moodys_trans_signature_tbl %>%
    plot_time_series_regression(
        date,
        .formula = home_price_yoy ~ splines::bs(index.num, df = 4),
        .show_summary = TRUE
    )

Call:
stats::lm(formula = .formula, data = df)

Residuals:
    Min      1Q  Median      3Q     Max 
-10.613  -3.009  -1.081   2.344  13.004 

Coefficients:
                                Estimate Std. Error t value Pr(>|t|)    
(Intercept)                       13.657      1.653   8.260 5.04e-14 ***
splines::bs(index.num, df = 4)1  -17.600      3.408  -5.165 7.03e-07 ***
splines::bs(index.num, df = 4)2    2.694      2.703   0.997    0.320    
splines::bs(index.num, df = 4)3  -21.121      3.035  -6.959 8.22e-11 ***
splines::bs(index.num, df = 4)4    2.809      2.191   1.282    0.202    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 4.587 on 161 degrees of freedom
Multiple R-squared:  0.3176,    Adjusted R-squared:  0.3007 
F-statistic: 18.74 on 4 and 161 DF,  p-value: 1.155e-12
moodys_trans_signature_tbl %>%
    plot_time_series_regression(
        date,
        .formula = home_price_yoy ~ splines::ns(index.num, 
                                              knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9))),
        .show_summary = TRUE
    )

Call:
stats::lm(formula = .formula, data = df)

Residuals:
    Min      1Q  Median      3Q     Max 
-10.224  -2.486  -0.454   1.557  10.774 

Coefficients:
                                                                                     Estimate
(Intercept)                                                                            14.587
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))1    3.092
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))2  -25.667
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))3    1.602
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))4  -21.892
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))5    8.985
                                                                                     Std. Error
(Intercept)                                                                               1.142
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))1      1.499
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))2      2.046
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))3      1.727
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))4      3.018
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))5      1.588
                                                                                     t value
(Intercept)                                                                           12.771
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))1   2.063
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))2 -12.544
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))3   0.928
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))4  -7.254
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))5   5.658
                                                                                     Pr(>|t|)
(Intercept)                                                                           < 2e-16
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))1   0.0407
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))2  < 2e-16
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))3   0.3550
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))4 1.64e-11
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))5 6.90e-08
                                                                                        
(Intercept)                                                                          ***
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))1 *  
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))2 ***
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))3    
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))4 ***
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))5 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 3.671 on 160 degrees of freedom
Multiple R-squared:  0.5655,    Adjusted R-squared:  0.5519 
F-statistic: 41.65 on 5 and 160 DF,  p-value: < 2.2e-16

* Seasonal Features

No seasonality detected

# ** Monthly Seasonality

moodys_trans_signature_tbl %>%
    plot_time_series_regression(
        date,
        .formula = home_price_yoy ~ quarter,
        .show_summary = TRUE
    )

Call:
stats::lm(formula = .formula, data = df)

Residuals:
   Min     1Q Median     3Q    Max 
-8.061 -4.518 -1.794  3.409 16.073 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  5.17011    1.04172   4.963 1.72e-06 ***
quarter     -0.04305    0.38192  -0.113     0.91    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 5.501 on 164 degrees of freedom
Multiple R-squared:  7.747e-05, Adjusted R-squared:  -0.00602 
F-statistic: 0.01271 on 1 and 164 DF,  p-value: 0.9104

3.2 INTERACTIONS

How do I know possible interactions?

model_formula_interactions <- as.formula(
    home_price_yoy ~ splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))
    + .
    + (as.factor(quarter) * month.lbl)
)

unemployment_trans_signature_tbl %>% glimpse()

unemployment_trans_signature_tbl %>%
    plot_time_series_regression(
        date, 
        .formula = model_formula_interactions,
        .show_summary = TRUE
    )

3.3 FOURIER SERIES

How do I pick periods?

  • For daily data with over a year of observations, include the following fourier series: 7 (weekly), 30 (monthly), 90 (quarterly), and 365 (yearly).
  • Look for the duration of up and down wavy cycles from the ACF chart.

Fourier series may not be a good fit for the unemployment rate, which doesn’t display a wavy pattern, a frequent and regular patterns of ups and downs.

# - tk_augment_fourier

# Data Prep

unemployment_trans_signature_tbl %>%
    plot_acf_diagnostics(date, home_price_yoy)

unemployment_trans_fourier_tbl <- unemployment_trans_signature_tbl %>%
    tk_augment_fourier(date, .periods = c(6, 13, 25), .K = 2)

unemployment_trans_fourier_tbl %>% glimpse()

# Model

model_formula_fourier <- as.formula(
    home_price_yoy ~ splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))
    + .
    + (as.factor(quarter) * month.lbl)
)

# Visualize

unemployment_trans_fourier_tbl %>%
    # filter_by_time(.start_date = "2018-09-13") %>%
    plot_time_series_regression(
        date,
        .formula = model_formula_fourier,
        .show_summary = TRUE
    )

3.4 LAGS

How do I get

  • .lags = (5*12):600: 5 year forecast for monthly data (12 months)
  • .lags = c(87,135): significant spikes in ACF and/or PACF

Look for spikes in ACF/PACF beyond the forecasting time horizon to avoid forecasting external regressors.

# - tk_augment_lags()

# Data Prep
2*4 # 2 year forecast for quarterly data
[1] 8
moodys_trans_signature_tbl %>%
    plot_acf_diagnostics(
        date, .value = home_price_yoy, .lags = (2*4):100
    )
moodys_trans_lags_tbl <- moodys_trans_signature_tbl %>%
    tk_augment_lags(home_price_yoy, .lags = c(4,8,9)) 


moodys_trans_lags_tbl %>% glimpse()
Rows: 166
Columns: 18
$ date                   <date> 1984-01-01, 1984-04-01, 1984-07-01, 1984-10-01…
$ home_price_yoy         <dbl> 4.363144, 9.169452, 6.646419, 5.563444, 9.11999…
$ home_price_us_yoy      <dbl> 4.319844, 4.399065, 4.510958, 4.643825, 4.39238…
$ income_median_yoy      <dbl> 12.502231, 16.102115, 18.401050, 19.449061, 19.…
$ income_wage_yoy        <dbl> 10.026114, 8.632716, 9.053278, 9.763479, 8.8677…
$ home_inv_yoy           <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ rent_yoy               <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ net_migr_yoy           <dbl> -1.142027e+02, -1.286330e+02, -1.501123e+02, -3…
$ mort_forecl_lag4_diff1 <dbl> 0.64437, 0.00000, 0.00000, 0.00000, -0.64437, 0…
$ mort_delinq_lag4_diff1 <dbl> 4.31668, 3.89201, -2.11274, -2.46383, -6.79729,…
$ unemp_rate_lag4_diff1  <dbl> -1.88298, -1.80066, -1.53282, -1.18839, -0.7479…
$ index.num              <dbl> 441763200, 449625600, 457488000, 465436800, 473…
$ year                   <int> 1984, 1984, 1984, 1984, 1985, 1985, 1985, 1985,…
$ half                   <int> 1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2,…
$ quarter                <int> 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4,…
$ home_price_yoy_lag4    <dbl> NA, NA, NA, NA, 4.363144, 9.169452, 6.646419, 5…
$ home_price_yoy_lag8    <dbl> NA, NA, NA, NA, NA, NA, NA, NA, 4.363144, 9.169…
$ home_price_yoy_lag9    <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 4.363144, 9…
# Model

model_formula <- as.formula(
    home_price_yoy ~ splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))
    + .
    - home_price_us_yoy
    - income_median_yoy
    - income_wage_yoy
    - mort_forecl_lag4_diff1 
    - home_inv_yoy 
    - rent_yoy
)


# Visualize

moodys_trans_lags_tbl %>%
    plot_time_series_regression(
        date, 
        .formula = model_formula,
        .show_summary = TRUE 
    )

Call:
stats::lm(formula = .formula, data = df)

Residuals:
    Min      1Q  Median      3Q     Max 
-2.0896 -0.6676  0.0221  0.7188  1.8849 

Coefficients: (4 not defined because of singularities)
                                                                                       Estimate
(Intercept)                                                                          -3.853e+05
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))1  6.261e+03
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))2  2.094e+03
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))3  9.818e+02
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))4         NA
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))5         NA
date                                                                                         NA
net_migr_yoy                                                                         -2.461e-02
mort_delinq_lag4_diff1                                                                8.756e-01
unemp_rate_lag4_diff1                                                                -3.786e-01
index.num                                                                                    NA
year                                                                                  1.903e+02
half                                                                                  9.797e-01
quarter                                                                               4.726e+01
home_price_yoy_lag4                                                                  -2.428e-01
home_price_yoy_lag8                                                                  -8.642e-01
home_price_yoy_lag9                                                                   2.474e-01
                                                                                     Std. Error
(Intercept)                                                                           3.844e+05
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))1  4.062e+03
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))2  2.233e+03
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))3  9.696e+02
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))4         NA
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))5         NA
date                                                                                         NA
net_migr_yoy                                                                          5.177e-03
mort_delinq_lag4_diff1                                                                7.436e-01
unemp_rate_lag4_diff1                                                                 2.230e-01
index.num                                                                                    NA
year                                                                                  1.898e+02
half                                                                                  1.099e+00
quarter                                                                               4.733e+01
home_price_yoy_lag4                                                                   1.016e-01
home_price_yoy_lag8                                                                   1.383e-01
home_price_yoy_lag9                                                                   1.564e-01
                                                                                     t value
(Intercept)                                                                           -1.002
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))1   1.541
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))2   0.938
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))3   1.013
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))4      NA
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))5      NA
date                                                                                      NA
net_migr_yoy                                                                          -4.753
mort_delinq_lag4_diff1                                                                 1.178
unemp_rate_lag4_diff1                                                                 -1.698
index.num                                                                                 NA
year                                                                                   1.002
half                                                                                   0.891
quarter                                                                                0.999
home_price_yoy_lag4                                                                   -2.390
home_price_yoy_lag8                                                                   -6.249
home_price_yoy_lag9                                                                    1.582
                                                                                     Pr(>|t|)
(Intercept)                                                                          0.329441
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))1 0.140636
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))2 0.360840
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))3 0.324715
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))4       NA
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))5       NA
date                                                                                       NA
net_migr_yoy                                                                         0.000159
mort_delinq_lag4_diff1                                                               0.254286
unemp_rate_lag4_diff1                                                                0.106801
index.num                                                                                  NA
year                                                                                 0.329415
half                                                                                 0.384668
quarter                                                                              0.331254
home_price_yoy_lag4                                                                  0.028019
home_price_yoy_lag8                                                                  6.79e-06
home_price_yoy_lag9                                                                  0.130983
                                                                                        
(Intercept)                                                                             
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))1    
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))2    
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))3    
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))4    
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))5    
date                                                                                    
net_migr_yoy                                                                         ***
mort_delinq_lag4_diff1                                                                  
unemp_rate_lag4_diff1                                                                   
index.num                                                                               
year                                                                                    
half                                                                                    
quarter                                                                                 
home_price_yoy_lag4                                                                  *  
home_price_yoy_lag8                                                                  ***
home_price_yoy_lag9                                                                     
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1.297 on 18 degrees of freedom
  (135 observations deleted due to missingness)
Multiple R-squared:  0.967, Adjusted R-squared:  0.945 
F-statistic: 43.95 on 12 and 18 DF,  p-value: 7.993e-11

3.5 SPECIAL EVENTS

Created the dummy for the Pandemic starting from the month when the rate shot up to the month that gave us the largest R-squared.

moodys_trans_lags_pandemic_tbl <- moodys_trans_lags_tbl %>%
    mutate(event = ifelse(between(date, as.Date("2020-04-01"),as.Date("2023-09-01")), 1, 0))

moodys_trans_lags_pandemic_tbl %>% glimpse()
Rows: 166
Columns: 19
$ date                   <date> 1984-01-01, 1984-04-01, 1984-07-01, 1984-10-01…
$ home_price_yoy         <dbl> 4.363144, 9.169452, 6.646419, 5.563444, 9.11999…
$ home_price_us_yoy      <dbl> 4.319844, 4.399065, 4.510958, 4.643825, 4.39238…
$ income_median_yoy      <dbl> 12.502231, 16.102115, 18.401050, 19.449061, 19.…
$ income_wage_yoy        <dbl> 10.026114, 8.632716, 9.053278, 9.763479, 8.8677…
$ home_inv_yoy           <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ rent_yoy               <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ net_migr_yoy           <dbl> -1.142027e+02, -1.286330e+02, -1.501123e+02, -3…
$ mort_forecl_lag4_diff1 <dbl> 0.64437, 0.00000, 0.00000, 0.00000, -0.64437, 0…
$ mort_delinq_lag4_diff1 <dbl> 4.31668, 3.89201, -2.11274, -2.46383, -6.79729,…
$ unemp_rate_lag4_diff1  <dbl> -1.88298, -1.80066, -1.53282, -1.18839, -0.7479…
$ index.num              <dbl> 441763200, 449625600, 457488000, 465436800, 473…
$ year                   <int> 1984, 1984, 1984, 1984, 1985, 1985, 1985, 1985,…
$ half                   <int> 1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2,…
$ quarter                <int> 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4,…
$ home_price_yoy_lag4    <dbl> NA, NA, NA, NA, 4.363144, 9.169452, 6.646419, 5…
$ home_price_yoy_lag8    <dbl> NA, NA, NA, NA, NA, NA, NA, NA, 4.363144, 9.169…
$ home_price_yoy_lag9    <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 4.363144, 9…
$ event                  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
g <- moodys_trans_lags_pandemic_tbl %>%
    plot_time_series(date, home_price_yoy, .interactive = FALSE) +
    geom_point(color = "red", data = . %>% filter(event == 1))

plotly::ggplotly(g)
# Model

model_formula <- as.formula(
    home_price_yoy ~ splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))
    + .
    - home_price_us_yoy
    - income_median_yoy
    - mort_forecl_lag4_diff1 
)

# Visualize

moodys_trans_lags_pandemic_tbl %>%
    plot_time_series_regression(
        date,
        .formula = model_formula,
        .show_summary = TRUE
    )

Call:
stats::lm(formula = .formula, data = df)

Residuals:
    Min      1Q  Median      3Q     Max 
-1.2219 -0.5268  0.1283  0.4601  1.5931 

Coefficients: (4 not defined because of singularities)
                                                                                       Estimate
(Intercept)                                                                          -1.463e+05
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))1  2.594e+03
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))2  7.689e+02
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))3  3.671e+02
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))4         NA
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))5         NA
date                                                                                         NA
income_wage_yoy                                                                       7.353e-01
home_inv_yoy                                                                         -5.036e-02
rent_yoy                                                                              3.925e-02
net_migr_yoy                                                                         -2.217e-02
mort_delinq_lag4_diff1                                                                9.268e-01
unemp_rate_lag4_diff1                                                                 3.366e-01
index.num                                                                                    NA
year                                                                                  7.227e+01
half                                                                                  1.308e+00
quarter                                                                               1.779e+01
home_price_yoy_lag4                                                                  -1.448e-01
home_price_yoy_lag8                                                                  -7.829e-01
home_price_yoy_lag9                                                                   3.954e-01
event                                                                                 5.655e-01
                                                                                     Std. Error
(Intercept)                                                                           3.204e+05
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))1  3.472e+03
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))2  1.857e+03
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))3  8.094e+02
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))4         NA
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))5         NA
date                                                                                         NA
income_wage_yoy                                                                       3.010e-01
home_inv_yoy                                                                          1.980e-02
rent_yoy                                                                              2.358e-01
net_migr_yoy                                                                          5.456e-03
mort_delinq_lag4_diff1                                                                6.367e-01
unemp_rate_lag4_diff1                                                                 3.714e-01
index.num                                                                                    NA
year                                                                                  1.582e+02
half                                                                                  8.673e-01
quarter                                                                               3.946e+01
home_price_yoy_lag4                                                                   1.171e-01
home_price_yoy_lag8                                                                   1.239e-01
home_price_yoy_lag9                                                                   1.321e-01
event                                                                                 1.465e+00
                                                                                     t value
(Intercept)                                                                           -0.457
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))1   0.747
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))2   0.414
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))3   0.454
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))4      NA
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))5      NA
date                                                                                      NA
income_wage_yoy                                                                        2.443
home_inv_yoy                                                                          -2.543
rent_yoy                                                                               0.166
net_migr_yoy                                                                          -4.062
mort_delinq_lag4_diff1                                                                 1.456
unemp_rate_lag4_diff1                                                                  0.906
index.num                                                                                 NA
year                                                                                   0.457
half                                                                                   1.508
quarter                                                                                0.451
home_price_yoy_lag4                                                                   -1.237
home_price_yoy_lag8                                                                   -6.318
home_price_yoy_lag9                                                                    2.993
event                                                                                  0.386
                                                                                     Pr(>|t|)
(Intercept)                                                                           0.65488
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))1  0.46737
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))2  0.68511
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))3  0.65710
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))4       NA
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))5       NA
date                                                                                       NA
income_wage_yoy                                                                       0.02843
home_inv_yoy                                                                          0.02342
rent_yoy                                                                              0.87017
net_migr_yoy                                                                          0.00117
mort_delinq_lag4_diff1                                                                0.16753
unemp_rate_lag4_diff1                                                                 0.38020
index.num                                                                                  NA
year                                                                                  0.65485
half                                                                                  0.15372
quarter                                                                               0.65893
home_price_yoy_lag4                                                                   0.23645
home_price_yoy_lag8                                                                   1.9e-05
home_price_yoy_lag9                                                                   0.00968
event                                                                                 0.70529
                                                                                        
(Intercept)                                                                             
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))1    
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))2    
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))3    
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))4    
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))5    
date                                                                                    
income_wage_yoy                                                                      *  
home_inv_yoy                                                                         *  
rent_yoy                                                                                
net_migr_yoy                                                                         ** 
mort_delinq_lag4_diff1                                                                  
unemp_rate_lag4_diff1                                                                   
index.num                                                                               
year                                                                                    
half                                                                                    
quarter                                                                                 
home_price_yoy_lag4                                                                     
home_price_yoy_lag8                                                                  ***
home_price_yoy_lag9                                                                  ** 
event                                                                                   
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 1.012 on 14 degrees of freedom
  (135 observations deleted due to missingness)
Multiple R-squared:  0.9844,    Adjusted R-squared:  0.9666 
F-statistic:  55.2 on 16 and 14 DF,  p-value: 7.022e-10

3.6 EXTERNAL LAGGED REGRESSORS

  • Examine cross correlation (CC) between home_price_yoy and diff mort variables.
  • Look for spikes in CC beyond the forecasting time horizon to avoid forecasting external regressors.
  • Beyond 2 (years) * 4 (quarters) forecasting horizon, * delinquency: no spikes beyond 8 quarters * foreclosure: spikes at 7,8 * us home price: spikes at 8-18 * median income: 28 * wage: 1-9
# Identify corr predictors with 4-5 quarter lag
# income_median_yoy(5), home_inv_yoy(5), mort_forecl_lag4_diff1
moodys_trans_lags_tbl %>%
    plot_acf_diagnostics(
        date, home_price_yoy,
        .ccf_vars = -c(date, home_price_yoy, home_price_yoy_lag4, home_price_yoy_lag8, 
                       home_price_yoy_lag9,
                       index.num, year, half, quarter),
        .show_ccf_vars_only = TRUE,
        .facet_ncol = 3, 
        .lags = 10 
    )
moodys_trans_ext_lags_tbl <- moodys_trans_lags_tbl %>%
    tk_augment_lags(home_price_us_yoy, .lags = c(4)) %>%
    tk_augment_lags(income_wage_yoy, .lags = c(4)) %>%
    tk_augment_lags(rent_yoy, .lags = c(4)) %>%
    tk_augment_lags(income_median_yoy, .lags = c(5)) %>%
    tk_augment_lags(home_inv_yoy, .lags = c(5)) %>%
    tk_augment_lags(mort_forecl_lag4_diff1, .lags = c(7)) %>%
    select(-home_price_us_yoy, -income_median_yoy,
           -income_wage_yoy, -mort_forecl_lag4_diff1,
           -home_inv_yoy, rent_yoy)

moodys_trans_ext_lags_tbl %>% glimpse()
Rows: 166
Columns: 19
$ date                        <date> 1984-01-01, 1984-04-01, 1984-07-01, 1984-…
$ home_price_yoy              <dbl> 4.363144, 9.169452, 6.646419, 5.563444, 9.…
$ rent_yoy                    <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ net_migr_yoy                <dbl> -1.142027e+02, -1.286330e+02, -1.501123e+0…
$ mort_delinq_lag4_diff1      <dbl> 4.31668, 3.89201, -2.11274, -2.46383, -6.7…
$ unemp_rate_lag4_diff1       <dbl> -1.88298, -1.80066, -1.53282, -1.18839, -0…
$ index.num                   <dbl> 441763200, 449625600, 457488000, 465436800…
$ year                        <int> 1984, 1984, 1984, 1984, 1985, 1985, 1985, …
$ half                        <int> 1, 1, 2, 2, 1, 1, 2, 2, 1, 1, 2, 2, 1, 1, …
$ quarter                     <int> 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, 3, 4, 1, 2, …
$ home_price_yoy_lag4         <dbl> NA, NA, NA, NA, 4.363144, 9.169452, 6.6464…
$ home_price_yoy_lag8         <dbl> NA, NA, NA, NA, NA, NA, NA, NA, 4.363144, …
$ home_price_yoy_lag9         <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 4.3631…
$ home_price_us_yoy_lag4      <dbl> NA, NA, NA, NA, 4.319844, 4.399065, 4.5109…
$ income_wage_yoy_lag4        <dbl> NA, NA, NA, NA, 10.026114, 8.632716, 9.053…
$ rent_yoy_lag4               <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ income_median_yoy_lag5      <dbl> NA, NA, NA, NA, NA, 12.502231, 16.102115, …
$ home_inv_yoy_lag5           <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ mort_forecl_lag4_diff1_lag7 <dbl> NA, NA, NA, NA, NA, NA, NA, 0.64437, 0.000…
# Model

model_formula <- as.formula(
    home_price_yoy ~ splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.50, 0.7, 0.9)))
    + .
)

# Visualize

moodys_trans_ext_lags_tbl %>%
    plot_time_series_regression(
        .date_var = date,
        .formula = model_formula,
        .show_summary = TRUE
    )

Call:
stats::lm(formula = .formula, data = df)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.57919 -0.15150 -0.01948  0.21653  0.66630 

Coefficients: (4 not defined because of singularities)
                                                                                       Estimate
(Intercept)                                                                          -2.938e+05
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))1  9.665e+03
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))2  1.477e+03
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))3  7.810e+02
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))4         NA
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))5         NA
date                                                                                         NA
rent_yoy                                                                             -1.020e+00
net_migr_yoy                                                                         -5.920e-03
mort_delinq_lag4_diff1                                                               -1.764e+00
unemp_rate_lag4_diff1                                                                 2.527e-01
index.num                                                                                    NA
year                                                                                  1.451e+02
half                                                                                  4.331e-01
quarter                                                                               3.606e+01
home_price_yoy_lag4                                                                   2.689e-01
home_price_yoy_lag8                                                                  -6.048e-01
home_price_yoy_lag9                                                                   9.665e-02
home_price_us_yoy_lag4                                                               -5.038e-01
income_wage_yoy_lag4                                                                  3.326e-01
rent_yoy_lag4                                                                         6.260e-01
income_median_yoy_lag5                                                                4.449e-01
home_inv_yoy_lag5                                                                     1.658e-02
mort_forecl_lag4_diff1_lag7                                                          -1.274e+01
                                                                                     Std. Error
(Intercept)                                                                           2.268e+05
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))1  3.148e+03
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))2  1.305e+03
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))3  5.715e+02
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))4         NA
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))5         NA
date                                                                                         NA
rent_yoy                                                                              2.774e-01
net_migr_yoy                                                                          6.135e-03
mort_delinq_lag4_diff1                                                                1.016e+00
unemp_rate_lag4_diff1                                                                 2.155e-01
index.num                                                                                    NA
year                                                                                  1.120e+02
half                                                                                  6.629e-01
quarter                                                                               2.791e+01
home_price_yoy_lag4                                                                   4.106e-01
home_price_yoy_lag8                                                                   1.364e-01
home_price_yoy_lag9                                                                   1.549e-01
home_price_us_yoy_lag4                                                                3.508e-01
income_wage_yoy_lag4                                                                  1.065e-01
rent_yoy_lag4                                                                         2.719e-01
income_median_yoy_lag5                                                                1.383e-01
home_inv_yoy_lag5                                                                     3.825e-02
mort_forecl_lag4_diff1_lag7                                                           2.786e+00
                                                                                     t value
(Intercept)                                                                           -1.295
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))1   3.070
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))2   1.131
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))3   1.367
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))4      NA
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))5      NA
date                                                                                      NA
rent_yoy                                                                              -3.676
net_migr_yoy                                                                          -0.965
mort_delinq_lag4_diff1                                                                -1.736
unemp_rate_lag4_diff1                                                                  1.172
index.num                                                                                 NA
year                                                                                   1.295
half                                                                                   0.653
quarter                                                                                1.292
home_price_yoy_lag4                                                                    0.655
home_price_yoy_lag8                                                                   -4.433
home_price_yoy_lag9                                                                    0.624
home_price_us_yoy_lag4                                                                -1.436
income_wage_yoy_lag4                                                                   3.123
rent_yoy_lag4                                                                          2.302
income_median_yoy_lag5                                                                 3.217
home_inv_yoy_lag5                                                                      0.433
mort_forecl_lag4_diff1_lag7                                                           -4.572
                                                                                     Pr(>|t|)
(Intercept)                                                                           0.23627
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))1  0.01807
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))2  0.29512
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))3  0.21401
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))4       NA
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))5       NA
date                                                                                       NA
rent_yoy                                                                              0.00791
net_migr_yoy                                                                          0.36668
mort_delinq_lag4_diff1                                                                0.12609
unemp_rate_lag4_diff1                                                                 0.27945
index.num                                                                                  NA
year                                                                                  0.23625
half                                                                                  0.53438
quarter                                                                               0.23733
home_price_yoy_lag4                                                                   0.53341
home_price_yoy_lag8                                                                   0.00303
home_price_yoy_lag9                                                                   0.55243
home_price_us_yoy_lag4                                                                0.19412
income_wage_yoy_lag4                                                                  0.01678
rent_yoy_lag4                                                                         0.05479
income_median_yoy_lag5                                                                0.01472
home_inv_yoy_lag5                                                                     0.67776
mort_forecl_lag4_diff1_lag7                                                           0.00257
                                                                                       
(Intercept)                                                                            
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))1 * 
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))2   
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))3   
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))4   
splines::ns(index.num, knots = quantile(index.num, probs = c(0.25, 0.5, 0.7, 0.9)))5   
date                                                                                   
rent_yoy                                                                             **
net_migr_yoy                                                                           
mort_delinq_lag4_diff1                                                                 
unemp_rate_lag4_diff1                                                                  
index.num                                                                              
year                                                                                   
half                                                                                   
quarter                                                                                
home_price_yoy_lag4                                                                    
home_price_yoy_lag8                                                                  **
home_price_yoy_lag9                                                                    
home_price_us_yoy_lag4                                                                 
income_wage_yoy_lag4                                                                 * 
rent_yoy_lag4                                                                        . 
income_median_yoy_lag5                                                               * 
home_inv_yoy_lag5                                                                      
mort_forecl_lag4_diff1_lag7                                                          **
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.625 on 7 degrees of freedom
  (139 observations deleted due to missingness)
Multiple R-squared:  0.9965,    Adjusted R-squared:  0.9871 
F-statistic: 105.3 on 19 and 7 DF,  p-value: 8.434e-07
# home price vs inventory (~50% explained)
model_formula <- as.formula(
    home_price_yoy ~ mort_forecl_lag4_diff1_lag7 + 
        income_median_yoy_lag5 + home_inv_yoy_lag5
)
moodys_trans_ext_lags_tbl %>% 
    plot_time_series_regression(date, 
                                model_formula, 
                                .show_summary = T)

Call:
stats::lm(formula = .formula, data = df)

Residuals:
    Min      1Q  Median      3Q     Max 
-4.6000 -2.8468 -0.0186  2.1743  5.5048 

Coefficients:
                            Estimate Std. Error t value Pr(>|t|)    
(Intercept)                   2.2347     1.5875   1.408 0.172612    
mort_forecl_lag4_diff1_lag7  -2.2416     7.8551  -0.285 0.777914    
income_median_yoy_lag5        0.6245     0.2033   3.072 0.005391 ** 
home_inv_yoy_lag5            -0.1781     0.0384  -4.637 0.000115 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 3.304 on 23 degrees of freedom
  (139 observations deleted due to missingness)
Multiple R-squared:   0.68, Adjusted R-squared:  0.6382 
F-statistic: 16.29 on 3 and 23 DF,  p-value: 6.771e-06
moodys_trans_ext_lags_tbl %>%
    select(date, home_price_yoy, mort_forecl_lag4_diff1_lag7, income_median_yoy_lag5, home_inv_yoy_lag5, rent_yoy) %>%
    # standardize for easy comparison and some ML algorithms
    mutate(across(where(is.numeric), standardize_vec)) %>%
    drop_na() %>%
    pivot_longer(-date) %>%
    plot_time_series(date, value, name, .smooth = FALSE)

3.7 RECOMMENDATION

  • Time_based feastures
    • trend-based feature (0.55): yes (splines::ns)
    • seasonal feature (): no
  • Lags (0.69): yes
  • Special Event (0.69): no
  • External Lags (0.73): yes

4.0 Prep Data

4.1 Create full dataset

  • Extend to Future Window
  • Add any lags to full dataset
  • Add any external regressors to full dataset

How do I determine? rolling_periods <- c(30, 60, 90)

horizon    <- 4
lag_period <- c(4) # first spike in ACF/PACF beyond the forecasting time horizon
rolling_periods <- c(4, 8, 12)

data_prepared_full_tbl <- moodys_trans_tbl %>%
    
    # Add future window
    bind_rows(
        future_frame(.data = ., .date_var = date, .length_out = horizon)
    ) %>%
    
    # Add external lags
    tk_augment_lags(mort_forecl_lag4_diff1, .lags = c(7)) %>%
    # tk_augment_lags(home_price_us_yoy, .lags = c(4)) %>%
    # tk_augment_lags(income_wage_yoy, .lags = c(4)) %>%
    # tk_augment_lags(rent_yoy, .lags = c(4)) %>%
    tk_augment_lags(income_median_yoy, .lags = c(5)) %>%
    tk_augment_lags(home_inv_yoy, .lags = c(5))  %>%
    select(-home_price_us_yoy, -income_wage_yoy, -rent_yoy, -income_median_yoy,
           -mort_forecl_lag4_diff1, -home_inv_yoy, -unemp_rate_lag4_diff1,
           -mort_delinq_lag4_diff1, -net_migr_yoy) %>%
    
    # Add Autocorrelated Lags
    # tk_augment_lags(home_price_yoy, .lags = lag_period) %>%

    # Add rolling features
    # tk_augment_slidify(
    #     .value   = home_price_yoy_lag4, # time horizon
    #     .f       = mean,
    #     .period  = rolling_periods,
    #     .align   = "center",
    #     .partial = TRUE
    # ) %>%
    
    # Format Columns
    rename_with(.cols = contains("lag"), .fn = ~ str_c("lag_", .))


data_prepared_full_tbl %>%
    pivot_longer(-date) %>%
    plot_time_series(date, value, name, .smooth = FALSE)
data_prepared_full_tbl %>% tail(5*12 + 1)
# A tibble: 61 × 5
   date       home_price_yoy lag_mort_forecl_lag4_diff1…¹ lag_income_median_yo…²
   <date>              <dbl>                        <dbl>                  <dbl>
 1 2011-04-01        -1.35                         0.121                  -5.39 
 2 2011-07-01        -1.36                        -0.0105                 -6.25 
 3 2011-10-01        -0.432                        0.0899                 -4.72 
 4 2012-01-01        -0.178                       -0.0103                 -0.668
 5 2012-04-01         0.657                        0.205                   3.72 
 6 2012-07-01         0.716                        0.0944                  7.81 
 7 2012-10-01        -0.505                       -0.0984                  9.12 
 8 2013-01-01         0.319                        0.0302                  6.68 
 9 2013-04-01        -0.225                       -0.205                   3.71 
10 2013-07-01        -0.0640                      -0.0644                  0.811
# ℹ 51 more rows
# ℹ abbreviated names: ¹​lag_mort_forecl_lag4_diff1_lag7,
#   ²​lag_income_median_yoy_lag5
# ℹ 1 more variable: lag_home_inv_yoy_lag5 <dbl>

4.2 Separate into model and forecast data

data_prepared_full_tbl %>% tail(61)
# A tibble: 61 × 5
   date       home_price_yoy lag_mort_forecl_lag4_diff1…¹ lag_income_median_yo…²
   <date>              <dbl>                        <dbl>                  <dbl>
 1 2011-04-01        -1.35                         0.121                  -5.39 
 2 2011-07-01        -1.36                        -0.0105                 -6.25 
 3 2011-10-01        -0.432                        0.0899                 -4.72 
 4 2012-01-01        -0.178                       -0.0103                 -0.668
 5 2012-04-01         0.657                        0.205                   3.72 
 6 2012-07-01         0.716                        0.0944                  7.81 
 7 2012-10-01        -0.505                       -0.0984                  9.12 
 8 2013-01-01         0.319                        0.0302                  6.68 
 9 2013-04-01        -0.225                       -0.205                   3.71 
10 2013-07-01        -0.0640                      -0.0644                  0.811
# ℹ 51 more rows
# ℹ abbreviated names: ¹​lag_mort_forecl_lag4_diff1_lag7,
#   ²​lag_income_median_yoy_lag5
# ℹ 1 more variable: lag_home_inv_yoy_lag5 <dbl>
data_prepared_tbl <- data_prepared_full_tbl %>%
    filter(!is.na(home_price_yoy))
data_prepared_tbl
# A tibble: 166 × 5
   date       home_price_yoy lag_mort_forecl_lag4_diff1…¹ lag_income_median_yo…²
   <date>              <dbl>                        <dbl>                  <dbl>
 1 1984-01-01           4.36                       NA                       NA  
 2 1984-04-01           9.17                       NA                       NA  
 3 1984-07-01           6.65                       NA                       NA  
 4 1984-10-01           5.56                       NA                       NA  
 5 1985-01-01           9.12                       NA                       NA  
 6 1985-04-01          10.0                        NA                       12.5
 7 1985-07-01           5.23                       NA                       16.1
 8 1985-10-01           7.93                        0.644                   18.4
 9 1986-01-01          11.3                         0                       19.4
10 1986-04-01           9.23                        0                       20.0
# ℹ 156 more rows
# ℹ abbreviated names: ¹​lag_mort_forecl_lag4_diff1_lag7,
#   ²​lag_income_median_yoy_lag5
# ℹ 1 more variable: lag_home_inv_yoy_lag5 <dbl>
forecast_tbl <- data_prepared_full_tbl %>%
    filter(is.na(home_price_yoy))
forecast_tbl
# A tibble: 4 × 5
  date       home_price_yoy lag_mort_forecl_lag4_diff1_…¹ lag_income_median_yo…²
  <date>              <dbl>                         <dbl>                  <dbl>
1 2025-07-01             NA                       0.0151                    5.83
2 2025-10-01             NA                       0.00864                   4.69
3 2026-01-01             NA                      -0.00741                   3.88
4 2026-04-01             NA                       0.0253                    4.03
# ℹ abbreviated names: ¹​lag_mort_forecl_lag4_diff1_lag7,
#   ²​lag_income_median_yoy_lag5
# ℹ 1 more variable: lag_home_inv_yoy_lag5 <dbl>

4.3 Split model data into training/test

data_prepared_tbl
# A tibble: 166 × 5
   date       home_price_yoy lag_mort_forecl_lag4_diff1…¹ lag_income_median_yo…²
   <date>              <dbl>                        <dbl>                  <dbl>
 1 1984-01-01           4.36                       NA                       NA  
 2 1984-04-01           9.17                       NA                       NA  
 3 1984-07-01           6.65                       NA                       NA  
 4 1984-10-01           5.56                       NA                       NA  
 5 1985-01-01           9.12                       NA                       NA  
 6 1985-04-01          10.0                        NA                       12.5
 7 1985-07-01           5.23                       NA                       16.1
 8 1985-10-01           7.93                        0.644                   18.4
 9 1986-01-01          11.3                         0                       19.4
10 1986-04-01           9.23                        0                       20.0
# ℹ 156 more rows
# ℹ abbreviated names: ¹​lag_mort_forecl_lag4_diff1_lag7,
#   ²​lag_income_median_yoy_lag5
# ℹ 1 more variable: lag_home_inv_yoy_lag5 <dbl>
splits <- time_series_split(data_prepared_tbl, assess = horizon, cumulative = TRUE)

splits %>%
    tk_time_series_cv_plan() %>%
    plot_time_series_cv_plan(date, home_price_yoy)

5.0 Specify recipes

  • Time Series Signature - Adds bulk time-based features
  • Spline Transformation to index.num
  • External lags
# recipe_spec_base <- recipe(home_price_yoy ~ ., data = training(splits)) %>%
#     
#     # Time Series Signature
#     step_timeseries_signature(date) %>%
#     step_rm(matches("(iso)|(xts)|(month)|(week)|(day)|(hour)|(minute)|(second)|(am.pm)")) %>%
#     
#     # Standardization
#     step_normalize(matches("(index.num)|(year)")) %>%
#     
#     # Dummy Encoding (One Hot Encoding)
#     step_dummy(all_nominal(), one_hot = TRUE) 

recipe_spec_base <- recipe(home_price_yoy ~ ., data = training(splits)) %>%
    step_normalize(-date)
recipe_spec_base %>% prep() %>% juice() %>% glimpse()
Rows: 162
Columns: 5
$ date                            <date> 1984-01-01, 1984-04-01, 1984-07-01, 1…
$ lag_mort_forecl_lag4_diff1_lag7 <dbl> NA, NA, NA, NA, NA, NA, NA, 4.97304322…
$ lag_income_median_yoy_lag5      <dbl> NA, NA, NA, NA, NA, 1.3252670, 1.87522…
$ lag_home_inv_yoy_lag5           <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ home_price_yoy                  <dbl> -0.11763205, 0.74996427, 0.29452655, 0…

6.0 Specify models

6.1 GLMNet

# - Strengths: Very good for trend
# - Weaknesses: Not as good for complex patterns (i.e. seasonality)

model_spec_glmnet <- linear_reg(
    penalty = 0.3,
    mixture = 1
) %>%
    set_engine("glmnet")

recipe_spec_2_lag <- recipe_spec_base %>%
    step_rm(date) %>% # Remove linear features
    step_naomit(contains("lag"))

recipe_spec_2_lag %>% prep() %>% juice() %>% glimpse()
Rows: 23
Columns: 4
$ lag_mort_forecl_lag4_diff1_lag7 <dbl> -0.02515893, -0.21705115, 0.33362982, …
$ lag_income_median_yoy_lag5      <dbl> -0.65323697, -0.26081530, 0.06713234, …
$ lag_home_inv_yoy_lag5           <dbl> 0.5522146, 0.5640290, 0.3671761, 0.442…
$ home_price_yoy                  <dbl> -0.54514262, -0.51076880, -0.29986892,…
workflow_fit_glmnet <- workflow() %>%
    add_recipe(recipe_spec_2_lag) %>%
    add_model(model_spec_glmnet) %>%
    fit(training(splits))

workflow_fit_glmnet %>%
    # Pull the fitted model object from the workflow
    extract_fit_parsnip() %>%
    # Get the coefficient estimates from the glmnet fit (using s=0.1, the penalty value)
    broom::tidy() %>%
    # Filter to show only predictors where the estimate is not zero
    # The 'term' column contains the predictor names.
    filter(estimate != 0) %>%
    # Arrange by the absolute value of the estimate to see the most important predictors
    arrange(desc(abs(estimate)))
# A tibble: 3 × 3
  term                       estimate penalty
  <chr>                         <dbl>   <dbl>
1 (Intercept)                   0.801     0.3
2 lag_home_inv_yoy_lag5        -0.429     0.3
3 lag_income_median_yoy_lag5    0.371     0.3

6.2 XGBOOST

# - Strengths: Best for seasonality & complex patterns
# - Weaknesses: 
#   - Cannot predict beyond the maximum/minimum target (e.g. increasing trend)
# - Solution: Model trend separately (if needed). 
#   - Can combine with ARIMA, Linear Regression, Mars, or Prophet
#   - prophet_boost & arima_boost: Do this

# Implementation

model_spec_boost <- boost_tree(
    mode = "regression",
    mtry = 3, 
    trees = 500, 
    min_n = 2, 
    tree_depth = 12, 
    learn_rate = 0.3, 
    loss_reduction = 0
) %>%
    set_engine("xgboost")

# spline
# recipe_spec_1 <- recipe_spec_base %>%
#     step_rm(date) %>%
#     step_ns(ends_with("index.num"), deg_free = 4) %>%
#     step_rm(starts_with("lag_"))
# 
# recipe_spec_1 %>% prep() %>% juice() %>% glimpse()
# 
# set.seed(123)
# wflw_fit_xgboost_spline <- workflow() %>%
#     add_recipe(recipe_spec_1) %>%
#     add_model(model_spec_boost) %>%
#     fit(training(splits))
# 
# # Extract the fitted model and display Variable Importance
# wflw_fit_xgboost_spline %>%
#     # Pull the fitted model object
#     pull_workflow_fit() %>%
#     # Use the vip package to calculate and plot variable importance
#     vip(num_features = 10,  # Show the top 10 most important features
#         geom = "point")     # Use a point plot for a clean display

# Lag
recipe_spec_2 <- recipe_spec_base %>%
    step_rm(date) %>%
    step_naomit(starts_with("lag_"))

set.seed(123)
wflw_fit_xgboost_lag <- workflow() %>%
    add_recipe(recipe_spec_2_lag) %>%
    add_model(model_spec_boost) %>%
    fit(training(splits))

# Extract the fitted model and display Variable Importance
wflw_fit_xgboost_lag %>%
    # Pull the fitted model object
    pull_workflow_fit() %>%
    # Use the vip package to calculate and plot variable importance
    vip(num_features = 10,  # Show the top 10 most important features
        geom = "point")     # Use a point plot for a clean display

6.3 Spline

model_spec_lm <- linear_reg() %>%
    set_engine("lm")

recipe_spec_1 <- recipe_spec_base %>%
    step_rm(date) %>%
    step_ns(ends_with("index.num"), deg_free = 4) %>%
    step_rm(contains("roll"))

recipe_spec_1 %>% prep() %>% juice() %>% glimpse()
Rows: 162
Columns: 4
$ lag_mort_forecl_lag4_diff1_lag7 <dbl> NA, NA, NA, NA, NA, NA, NA, 4.97304322…
$ lag_income_median_yoy_lag5      <dbl> NA, NA, NA, NA, NA, 1.3252670, 1.87522…
$ lag_home_inv_yoy_lag5           <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ home_price_yoy                  <dbl> -0.11763205, 0.74996427, 0.29452655, 0…
workflow_fit_lm_1_spline <- workflow() %>%
    add_model(model_spec_lm) %>%
    add_recipe(recipe_spec_1) %>%
    fit(training(splits))

workflow_fit_lm_1_spline %>% 
    pull_workflow_fit() %>%
    pluck("fit") %>%
    summary()

Call:
stats::lm(formula = ..y ~ ., data = data)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.88175 -0.43174  0.01341  0.35097  1.03442 

Coefficients:
                                Estimate Std. Error t value Pr(>|t|)    
(Intercept)                       0.7310     0.1333   5.482 2.74e-05 ***
lag_mort_forecl_lag4_diff1_lag7   0.1787     0.2196   0.814 0.425815    
lag_income_median_yoy_lag5        0.9475     0.2749   3.446 0.002705 ** 
lag_home_inv_yoy_lag5            -0.7847     0.1828  -4.293 0.000393 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.594 on 19 degrees of freedom
  (139 observations deleted due to missingness)
Multiple R-squared:  0.7259,    Adjusted R-squared:  0.6826 
F-statistic: 16.77 on 3 and 19 DF,  p-value: 1.433e-05

7.0 Compare model performance

7.1 Modeltime table

Organize

model_tbl <- modeltime_table(
    workflow_fit_glmnet,
    # wflw_fit_xgboost_spline,
    wflw_fit_xgboost_lag,
    workflow_fit_lm_1_spline
) %>%
    # update_model_description(2, "XGBOOST - Spline Recipe") %>%
    update_model_description(2, "XGBOOST - Lag Recipe")

model_tbl
# Modeltime Table
# A tibble: 3 × 3
  .model_id .model     .model_desc         
      <int> <list>     <chr>               
1         1 <workflow> GLMNET              
2         2 <workflow> XGBOOST - Lag Recipe
3         3 <workflow> LM                  

7.2 Calibration

# - Calculates residual model errors on test set
# - Gives us a true prediction error estimate when we model with confidence intervals

calibration_tbl <- model_tbl %>%
    modeltime_calibrate(new_data = testing(splits))

calibration_tbl %>%
    slice(1) %>%
    unnest(.calibration_data)
# A tibble: 4 × 8
  .model_id .model     .model_desc .type date       .actual .prediction
      <int> <list>     <chr>       <chr> <date>       <dbl>       <dbl>
1         1 <workflow> GLMNET      Test  2024-07-01   0.235      0.890 
2         1 <workflow> GLMNET      Test  2024-10-01   0.761      0.817 
3         1 <workflow> GLMNET      Test  2025-01-01   0.297      0.359 
4         1 <workflow> GLMNET      Test  2025-04-01   0.151     -0.0954
# ℹ 1 more variable: .residuals <dbl>

7.3 Test accuracy

# - Calculates common accuracy measures
# - MAE, MAPE, MASE, SMAPE, RMSE, R-SQUARED

calibration_tbl %>%
    modeltime_accuracy()
# A tibble: 3 × 9
  .model_id .model_desc          .type   mae  mape  mase smape  rmse   rsq
      <int> <chr>                <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1         1 GLMNET               Test  0.255  118. 0.674  85.6 0.353 0.319
2         2 XGBOOST - Lag Recipe Test  0.326  127. 0.862  78.9 0.360 0.851
3         3 LM                   Test  0.623  305. 1.65  124.  0.746 0.247
# Table Modeltime Accuracy
calibration_tbl %>%
    modeltime_accuracy(
        metric_set = default_forecast_accuracy_metric_set()
    ) %>%
    table_modeltime_accuracy(
        .interactive = TRUE,
        bordered = TRUE, 
        resizable = TRUE
    )
# Metric Sets

#?default_forecast_accuracy_metric_set

metric_set(mae, rmse, iic)
A metric set, consisting of:
- `mae()`, a numeric metric  | direction: minimize
- `rmse()`, a numeric metric | direction: minimize
- `iic()`, a numeric metric  | direction: maximize
calibration_tbl %>%
    modeltime_accuracy(
        metric_set = metric_set(mae, rmse, iic)
    )
# A tibble: 3 × 6
  .model_id .model_desc          .type   mae  rmse    iic
      <int> <chr>                <chr> <dbl> <dbl>  <dbl>
1         1 GLMNET               Test  0.255 0.353  0.540
2         2 XGBOOST - Lag Recipe Test  0.326 0.360 -0.519
3         3 LM                   Test  0.623 0.746  0.491

7.4 Test forecast

# - Visualize the out-of-sample forecast

calibration_tbl %>%
    modeltime_forecast(
        new_data      = testing(splits),
        actual_data   = data_prepared_tbl,
        conf_interval = 0.80
    ) %>%
    plot_modeltime_forecast(
        .legend_max_width = 60,
        .legend_show = TRUE,
        .conf_interval_show = TRUE,
        .conf_interval_alpha = 0.20,
        .conf_interval_fill = "lightblue",
        .title = "NH Unemployment Rate Forecast"
    )

8.0 Refit

refit_tbl <- calibration_tbl %>%
    modeltime_refit(data = data_prepared_tbl)

9.0 Forecast

# * Final Forecast ----
# - 'new_data' vs 'h'
# - 'actual_data'
# - Preprocessing

refit_tbl %>%
    modeltime_forecast(
        # h = "8 weeks",
        new_data = forecast_tbl,
        actual_data = data_prepared_tbl,
        conf_interval = 0.80
    ) %>%
    plot_modeltime_forecast(
        .legend_max_width = 25,
        .conf_interval_fill = "lightblue",
        .interactive = TRUE,
        .legend_show = TRUE 
    )