Modeling 30Y fixed mortgage rates

Summary

Modeling 30-year fixed mortgage rates from St. Louis FRED database.

Using ADL(1,1) and ECM frameworks.

Many OLS assumptions are violated, but there is evidence of co-integration (i.e., Pesaran-Shin-Smith Cointegration Test and Phillips-Ouliaris Cointegration Test).

A second lag of the dependent variable would resolve violations of OLS assumptions. However, this causes erratic behavior in the ex-ante forecasts when the CMT forecasts are shifted up or down 100 bps. The zig-zag behavior is not shown, but reader may change the ar_order parameter to 2 or 3.

Extract Data from FRED

library(tidyverse)
library(ggthemes)
library(fredr)
library(dLagM)
library(ecm)

## Help for dLagM --------
## https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7034805/

## Login to FRED API --------
## Get API key from https://fred.stlouisfed.org/docs/api/api_key.html

myKey <- rstudioapi::askForSecret(name="FRED", message="Enter API Key for FRED: ")
fredr_set_key(myKey)

mySeries <- c("MORTGAGE30US", # 30-year fixed mortgage rate
              "DGS10",  # 10-year CMT yield
              "T10Y2Y",  # Yield Curve 10Y CMT minus 2Y CMT
              "TOTLL",   # Total Loans
              "DPSACBW027SBOG",  # Total Deposits
              "BAMLC0A4CBBB" # BBB OAS
              )

## Get Data from FRED --------

pullOneSeries <- function(x, startDate = as.Date("2000-01-01"),
                          endDate = as.Date("2021-11-01"),
                          frequency_ind = "m",
                          agg_type = "avg") {
  df <- fredr(series_id = x,
  observation_start = startDate,
  observation_end = endDate,
  frequency = frequency_ind,
  aggregation_method = agg_type
  )
}

myData <- purrr::map_dfr(mySeries, pullOneSeries)  ## pullMultipleSeries

## Reshape data --------

myData_reshaped <- myData %>% 
  select(-realtime_start, -realtime_end) %>% 
  pivot_wider(names_from=series_id, values_from=value) %>% 
  mutate(Loan_to_Deposits_10X = (TOTLL / DPSACBW027SBOG) * 10, ## for plotting
         Loan_to_Deposits = (TOTLL / DPSACBW027SBOG)) %>% 
  select(-TOTLL, -DPSACBW027SBOG) %>% 
  mutate(date=as.Date(date))

myData_plotting <- myData_reshaped %>% 
  pivot_longer(-date)

## Plot the data --------

ggplot(myData_plotting, aes(x=date, y=value, color=name, linetype=name)) +
  geom_line() + theme_bw() +
  scale_y_continuous(breaks=seq(-0.5,10,0.5)) + theme_igray() +
  scale_colour_tableau() +
  ggtitle("FRED: Actual Time Series")

Split train and test

## Split train and test --------

full_data <- myData_reshaped %>% 
  select(-Loan_to_Deposits_10X) %>% 
  filter(complete.cases(.)) %>% 
  mutate(DGS10_Loan_to_Dep = DGS10 * Loan_to_Deposits) %>% ## interaction term
  read.zoo(.)

train <- as.data.frame(window(full_data, end = as.Date("2021-05-01")))
test <- as.data.frame(window(full_data, start= as.Date("2021-06-01")))

Train ADL(1,1)

## Train ADL Model --------

fmla1 <- as.formula("MORTGAGE30US ~ DGS10 + Loan_to_Deposits")

#fmla1 <- as.formula("MORTGAGE30US ~ DGS10 + T10Y2Y + BAMLC0A4CBBB + Loan_to_Deposits")

#fmla1 <- as.formula("MORTGAGE30US ~ DGS10 + Loan_to_Deposits + DGS10_Loan_to_Dep")

#fmla1 <- as.formula("MORTGAGE30US ~ DGS10 + Loan_to_Deposits")


rhs_vars <- labels(terms(fmla1)) ## this comes in handy!!!
lhs_var <- all.vars(fmla1)[1]

ar_order = 1
x_lags = 1

adl_mod <- ardlDlm(fmla1,data=train, p=x_lags, q=ar_order)

summary(adl_mod)

## 
## Time series regression with "ts" data:
## Start = 2, End = 257
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.24738 -0.04614 -0.01067  0.03502  0.41531 
## 
## Coefficients:
##                    Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         0.09443    0.07600   1.242  0.21523    
## DGS10.t             0.65070    0.02636  24.686  < 2e-16 ***
## DGS10.1            -0.54898    0.03492 -15.721  < 2e-16 ***
## Loan_to_Deposits.t  2.43687    0.91167   2.673  0.00801 ** 
## Loan_to_Deposits.1 -2.30390    0.91020  -2.531  0.01198 *  
## MORTGAGE30US.1      0.89248    0.02495  35.765  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.08589 on 250 degrees of freedom
## Multiple R-squared:  0.9959, Adjusted R-squared:  0.9959 
## F-statistic: 1.228e+04 on 5 and 250 DF,  p-value: < 2.2e-16

Check ADL(1,1) assumptions

## Test for cointegration and other assumptions --------

# orders2 <-ardlBoundOrders(data = train, formula = fmla1,
#                             ic = "BIC", max.p = 5, max.q = 5)

p <-as.data.frame(matrix(c(ar_order, rep(x_lags, length(rhs_vars))), nrow=1))

names(p) <- c("q", rhs_vars)

test1 <-ardlBound(data = train, formula = fmla1, case = 1, p = p, ECM = TRUE)

## 
##  Breusch-Godfrey Test for the autocorrelation in residuals:
## 
##  Breusch-Godfrey test for serial correlation of order up to 1
## 
## data:  modelFull$model
## LM test = 8.0377, df1 = 1, df2 = 249, p-value = 0.004957
## 
## The p-value of Breusch-Godfrey test for the autocorrelation in residuals:  0.004957386 < 0.05!
## ------------------------------------------------------ 
## 
##  Ljung-Box Test for the autocorrelation in residuals:
## 
##  Box-Ljung test
## 
## data:  res
## X-squared = 6.7057, df = 1, p-value = 0.00961
## 
## The p-value of Ljung-Box test for the autocorrelation in residuals:  0.009610371 < 0.05!
## ------------------------------------------------------ 
## 
##  Breusch-Pagan Test for the homoskedasticity of residuals:
## 
##  studentized Breusch-Pagan test
## 
## data:  modelFull$model
## BP = 2.6865, df = 4, p-value = 0.6116
## 
## ------------------------------------------------------ 
## 
##  Shapiro-Wilk test of normality of residuals:
## 
##  Shapiro-Wilk normality test
## 
## data:  modelFull$model$residual
## W = 0.94091, p-value = 1.318e-08
## 
## The p-value of Shapiro-Wilk test normality of residuals:  1.317903e-08 < 0.05!
## ------------------------------------------------------ 
## 
##  PESARAN, SHIN AND SMITH (2001) COINTEGRATION TEST 
## 
##  Observations: 256 
##  Number of Regressors (k): 2 
##  Case: 1 
## 
##  ------------------------------------------------------ 
##  -                       F-test                       - 
##  ------------------------------------------------------ 
##                  <------- I(0) ------------ I(1) -----> 
##  10% critical value       2.17            3.19 
##  5% critical value        2.72            3.83 
##  1% critical value        3.88            5.3 
##  
## 
##  F-statistic = 7.33332949701599 
##   
##  ------------------------------------------------------ 
##  F-statistic note: Asymptotic critical values used. 
##  
## ------------------------------------------------------ 
## 
##  Ramsey's RESET Test for model specification:
## 
##  RESET test
## 
## data:  modelECM$model
## RESET = 13.042, df1 = 2, df2 = 250, p-value = 4.095e-06
## 
## the p-value of RESET test:  4.095159e-06 < 0.05!
## ------------------------------------------------------

## ------------------------------------------------------ 
## Error Correction Model Output: 
## 
## Time series regression with "ts" data:
## Start = 2, End = 256
## 
## Call:
## dynlm(formula = as.formula(model.text), data = data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.24675 -0.04594 -0.01156  0.03520  0.41008 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## ec.1                -0.11196    0.02378  -4.709 4.11e-06 ***
## dDGS10.t             0.64912    0.02551  25.444  < 2e-16 ***
## dLoan_to_Deposits.t  2.54484    0.85920   2.962  0.00335 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.08492 on 252 degrees of freedom
## Multiple R-squared:  0.7492, Adjusted R-squared:  0.7462 
## F-statistic: 250.9 on 3 and 252 DF,  p-value: < 2.2e-16
## 
## ------------------------------------------------------ 
## Long-run coefficients: 
##     MORTGAGE30US.1            DGS10.1 Loan_to_Deposits.1 
##        -0.11195993         0.09448671         0.29929558 
##

# summary(test1$model$modelFull)
# summary(test1$ARDL.model)
# summary(test1$ECM$EC.model)

ADL(1,1) Ex-Post Forecasts

## ADL In-sample Forecasts (Ex-post) --------

fitted <- adl_mod$model$fitted.values
actuals <- train$MORTGAGE30US

padding <- length(actuals) - length(fitted)

fitted <- c(rep(NA, padding), fitted)

df <- data.frame(date = as.Date(rownames(train)),
                 Forecasts = fitted,
                 Actuals = actuals) %>% 
  filter(complete.cases(.)) %>% 
  pivot_longer(-date)

ggplot(df, aes(x=date, y=value, color=name, linetype=name)) + geom_line() +
  scale_y_continuous(breaks=seq(0,10,0.5), limits=c(0,10)) + theme_igray() +
  scale_colour_tableau() +
  ggtitle("ADL: In-Sample Ex-Post Forecasts")

ADL(1,1) Ex-Ante Forecasts

## Out-of-sample Forecasts --------

## The forecast() function works on test data that 
## is **immediately** after the train data
## Does not work on in-sample data

create_adl_forecasts <- function(modelADL, newdata=test) {
  
  name_dep_var <- all.vars(modelADL$formula)[1]
  name_indep_var <- labels(terms(modelADL$formula))
  
  test_df <- newdata %>% 
    select(all_of(name_indep_var))# !!!! *** column order matters *** !!!!!!!!
  
  hh <- nrow(test_df)
  
  mat_x <- test_df %>% 
    as.matrix(.) %>% 
    t(.)
  
  mat_x_up100 <- test_df %>% 
    mutate(DGS10 = DGS10 + 1) %>% 
    select(all_of(name_indep_var)) %>% 
    as.matrix(.) %>% 
    t(.)
  
  mat_x_down100 <- test_df %>% 
    mutate(DGS10 = DGS10 - 1) %>% 
    select(all_of(name_indep_var)) %>% 
    as.matrix(.) %>% 
    t(.)
  
  out_sample_fitted <- forecast(modelADL, mat_x, h=hh)$forecasts
  out_sample_actuals <- newdata[,name_dep_var,drop=TRUE]
  
  out_sample_fitted_up100 <- forecast(modelADL, mat_x_up100, h=hh)$forecasts
  out_sample_fitted_down100 <- forecast(modelADL, mat_x_down100, h=hh)$forecasts
  
  
  results_df <- data.frame(date = as.Date(rownames(test_df)),
                           Forecasts = out_sample_fitted,
                           Actuals = out_sample_actuals,
                           Fcst_Up100 = out_sample_fitted_up100,
                           Fcst_Down100 = out_sample_fitted_down100
  ) %>% 
    filter(complete.cases(.)) %>% 
    cbind(test_df) %>% 
    pivot_longer(-date)
  
  return(list(results_df=results_df,out_sample_actuals=out_sample_actuals, out_sample_fitted=out_sample_fitted))
}

ex_ante_list <- create_adl_forecasts(adl_mod, newdata=test)
ex_ante_df <- ex_ante_list$results_df

  
ggplot(ex_ante_df, aes(x=date, y=value, color=name, linetype=name)) + geom_line() +
  scale_y_continuous(breaks=seq(0,5,0.5), limits=c(0,5)) + theme_igray() +
  scale_colour_tableau() +
  ggtitle("ADL: Out-sample Ex-Ante Forecasts")

Train OLS Equilibrium Model

## Fit Equilibrium Model --------

equil_mod <- lm(fmla1, data=train)

summary(equil_mod)

## 
## Call:
## lm(formula = fmla1, data = train)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.50414 -0.16485  0.00365  0.10396  0.61230 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## (Intercept)       -0.3526     0.1887  -1.868   0.0629 .  
## DGS10              0.7617     0.0218  34.933   <2e-16 ***
## Loan_to_Deposits   3.4706     0.2959  11.730   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2205 on 254 degrees of freedom
## Multiple R-squared:  0.9735, Adjusted R-squared:  0.9732 
## F-statistic:  4657 on 2 and 254 DF,  p-value: < 2.2e-16

po_df <- train %>% 
  select(all_of(c("MORTGAGE30US", rhs_vars)))

tseries::po.test(po_df)

## 
##  Phillips-Ouliaris Cointegration Test
## 
## data:  po_df
## Phillips-Ouliaris demeaned = -24.511, Truncation lag parameter = 2,
## p-value = 0.06778

## Plot equilibrium vs. actuals ----

equil_fitted <- equil_mod$fitted.values
actuals <- train$MORTGAGE30US

equil_df <- data.frame(date = as.Date(rownames(train)),
                       Fcst_Equil = equil_fitted,
                       Actuals = actuals) %>% 
  filter(complete.cases(.)) %>% 
  pivot_longer(-date)

ggplot(equil_df, aes(x=date, y=value, color=name, linetype=name)) + geom_line() +
  scale_y_continuous(breaks=seq(0,9,0.5), limits=c(0,9)) + theme_igray() +
  scale_colour_tableau() +
  ggtitle("Equil: In-sample Forecasts")

ggplot(tail(equil_df, 24), aes(x=date, y=value, color=name, linetype=name)) + geom_line() +
  scale_y_continuous(breaks=seq(0,9,0.5), limits=c(0,9)) + theme_igray() +
  scale_colour_tableau() +
  ggtitle("Eqil: In-sample Forecasts (Last 24 mos)")

Train ECM

## Fit ECM ----

ecm_mod <- ecm(y=train$MORTGAGE30US, xeq=train[rhs_vars], xtr=train[rhs_vars],
               includeIntercept = FALSE)

summary(ecm_mod)

## 
## Call:
## lm(formula = dy ~ . - 1, data = x, weights = weights)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.25058 -0.04630 -0.01055  0.03545  0.41185 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## deltaDGS10             0.64854    0.02633  24.631  < 2e-16 ***
## deltaLoan_to_Deposits  2.58333    0.90500   2.855 0.004670 ** 
## DGS10Lag1              0.09549    0.02057   4.643 5.54e-06 ***
## Loan_to_DepositsLag1   0.28848    0.07591   3.800 0.000181 ***
## yLag1                 -0.11067    0.02485  -4.453 1.27e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.08599 on 251 degrees of freedom
## Multiple R-squared:  0.7444, Adjusted R-squared:  0.7393 
## F-statistic: 146.2 on 5 and 251 DF,  p-value: < 2.2e-16

ECM Ex-Ante Forecasts

## Out-sample predictions ecm ------

ecm_out_fcsts <- ecmpredict(ecm_mod, newdata=test[rhs_vars], init=tail(train$MORTGAGE30US,1))

ecm_ex_ante_df <- data.frame(date = as.Date(rownames(test)),
                         ECM_Forecasts = ecm_out_fcsts,
                         Actuals = test$MORTGAGE30US,
                         ADL_Forecats = ex_ante_list$out_sample_fitted
) %>% 
  filter(complete.cases(.)) %>% 
  cbind(test[rhs_vars]) %>% 
  pivot_longer(-date)

ggplot(ecm_ex_ante_df, aes(x=date, y=value, color=name, linetype=name)) + geom_line() +
  scale_y_continuous(breaks=seq(0,5,0.5), limits=c(0,5)) + theme_igray() +
  scale_colour_tableau() +
  ggtitle("Out-sample ECM vs. ADL Ex-Ante Forecasts")

ECM - CMT Sensitivity Analysis

## CMT Sensitivity Analysis -----

create_df_flat_up_down <- function(formula = fmla1, horizon=24,
                                   hist_df=myData_reshaped){
  
  scenarioHorizon <- horizon
  d <- tail(hist_df, 1) #last obs. of test set or train set
  
  x <- do.call("rbind", replicate(scenarioHorizon, d, simplify = FALSE)) ## repeat rows
  
  x$date <- seq.Date(from=d$date, by="month", length.out = scenarioHorizon)
  
  sens_df <- x
  
  sens_up100_df <- sens_df %>% 
    mutate(DGS10 = DGS10 + 1)
  
  sens_down100_df <- sens_df %>% 
    mutate(DGS10 = DGS10 - 1)
  
  return(list(sens_df=sens_df, sens_up100_df=sens_up100_df,
              sens_down100_df=sens_down100_df))
}

df_cmt_list <- create_df_flat_up_down()

initY <- tail(test$MORTGAGE30US, 1)

ecm_flat_fcsts <- ecmpredict(ecm_mod, newdata=df_cmt_list$sens_df, init=initY)
ecm_up_fcsts <- ecmpredict(ecm_mod, newdata=df_cmt_list$sens_up100_df, init=initY)
ecm_down_fcsts <- ecmpredict(ecm_mod, newdata=df_cmt_list$sens_down100_df, init=initY)

ecm_sens_normal_df <- data.frame(date = df_cmt_list$sens_df$date,
                                 ECM_flat = ecm_flat_fcsts,
                                 ECM_up100 = ecm_up_fcsts,
                                 ECM_down100 = ecm_down_fcsts
                                  ) 

other_vars <- df_cmt_list$sens_df %>% 
  select(all_of(rhs_vars))

ecm_sens_df <- ecm_sens_normal_df %>% 
  filter(complete.cases(.)) %>% 
  cbind(other_vars) %>% 
  pivot_longer(-date)

ggplot(ecm_sens_df, aes(x=date, y=value, color=name, linetype=name)) + geom_line() +
  scale_y_continuous(breaks=seq(0,5,0.5), limits=c(0,5)) + theme_igray() +
  scale_colour_tableau() +
  ggtitle("ECM Sensitivity: Flat CMT, CMT Up 100, CMT Down 100")

tail(ecm_sens_normal_df$ECM_flat, 1) - initY

## [1] -0.1640493

tail(ecm_sens_normal_df$ECM_up100, 1) - initY

## [1] 0.6406836

tail(ecm_sens_normal_df$ECM_down100, 1) - initY

## [1] -0.9687823

ECM - Loan-to-Deposit Ratio Sensitivity

## Loan-to-Deposit Sensitivity Analysis -----

df_cmt_list_LDRatio_Up <- lapply(df_cmt_list, function(df, scenarioHorizon){
  df <- df %>% 
    mutate(Loan_to_Deposits = cumsum(c(tail(test$Loan_to_Deposits,1),
                                       rep(0.005,scenarioHorizon-1)))) ## upward drift on LD ratio
  
  return(df)
}, scenarioHorizon=24)

ld_ecm_flat_fcsts <- ecmpredict(ecm_mod, newdata=df_cmt_list_LDRatio_Up$sens_df, init=initY)
ld_ecm_up_fcsts <- ecmpredict(ecm_mod, newdata=df_cmt_list_LDRatio_Up$sens_up100_df, init=initY)
ld_ecm_down_fcsts <- ecmpredict(ecm_mod, newdata=df_cmt_list_LDRatio_Up$sens_down100_df, init=initY)

ld_ecm_sens_normal_df <- data.frame(date = df_cmt_list_LDRatio_Up$sens_df$date,
                                 ECM_flat = ld_ecm_flat_fcsts,
                                 ECM_up100 = ld_ecm_up_fcsts,
                                 ECM_down100 = ld_ecm_down_fcsts
) 


other_vars_2 <- df_cmt_list_LDRatio_Up$sens_df %>% 
  select(all_of(rhs_vars))

ld_ecm_sens_df <- ld_ecm_sens_normal_df %>% 
  filter(complete.cases(.)) %>% 
  cbind(other_vars_2) %>% 
  pivot_longer(-date)

ggplot(ld_ecm_sens_df, aes(x=date, y=value, color=name, linetype=name)) + geom_line() +
  scale_y_continuous(breaks=seq(0,5,0.5), limits=c(0,5)) + theme_igray() +
  scale_colour_tableau() +
  ggtitle("ECM Sensitivity: Upward Loan-to-Deposit Ratio Forecasts")

tail(ld_ecm_sens_normal_df$ECM_flat, 1) - initY

## [1] 0.1347306

tail(ld_ecm_sens_normal_df$ECM_up100, 1) - initY

## [1] 0.9394636

tail(ld_ecm_sens_normal_df$ECM_down100, 1) - initY

## [1] -0.6700023