Modeling 30-year fixed mortgage rates from St. Louis FRED database.
Using ADL(1,1) and ECM frameworks.
Many OLS assumptions are violated, but there is evidence of co-integration (i.e., Pesaran-Shin-Smith Cointegration Test and Phillips-Ouliaris Cointegration Test).
A second lag of the dependent variable would resolve violations of OLS assumptions. However, this causes erratic behavior in the ex-ante forecasts when the CMT forecasts are shifted up or down 100 bps. The zig-zag behavior is not shown, but reader may change the ar_order parameter to 2 or 3.
library(tidyverse)
library(ggthemes)
library(fredr)
library(dLagM)
library(ecm)
## Help for dLagM --------
## https://www.ncbi.nlm.nih.gov/pmc/articles/PMC7034805/
## Login to FRED API --------
## Get API key from https://fred.stlouisfed.org/docs/api/api_key.html
myKey <- rstudioapi::askForSecret(name="FRED", message="Enter API Key for FRED: ")
fredr_set_key(myKey)
mySeries <- c("MORTGAGE30US", # 30-year fixed mortgage rate
"DGS10", # 10-year CMT yield
"T10Y2Y", # Yield Curve 10Y CMT minus 2Y CMT
"TOTLL", # Total Loans
"DPSACBW027SBOG", # Total Deposits
"BAMLC0A4CBBB" # BBB OAS
)
## Get Data from FRED --------
pullOneSeries <- function(x, startDate = as.Date("2000-01-01"),
endDate = as.Date("2021-11-01"),
frequency_ind = "m",
agg_type = "avg") {
df <- fredr(series_id = x,
observation_start = startDate,
observation_end = endDate,
frequency = frequency_ind,
aggregation_method = agg_type
)
}
myData <- purrr::map_dfr(mySeries, pullOneSeries) ## pullMultipleSeries
## Reshape data --------
myData_reshaped <- myData %>%
select(-realtime_start, -realtime_end) %>%
pivot_wider(names_from=series_id, values_from=value) %>%
mutate(Loan_to_Deposits_10X = (TOTLL / DPSACBW027SBOG) * 10, ## for plotting
Loan_to_Deposits = (TOTLL / DPSACBW027SBOG)) %>%
select(-TOTLL, -DPSACBW027SBOG) %>%
mutate(date=as.Date(date))
myData_plotting <- myData_reshaped %>%
pivot_longer(-date)
## Plot the data --------
ggplot(myData_plotting, aes(x=date, y=value, color=name, linetype=name)) +
geom_line() + theme_bw() +
scale_y_continuous(breaks=seq(-0.5,10,0.5)) + theme_igray() +
scale_colour_tableau() +
ggtitle("FRED: Actual Time Series")
## Split train and test --------
full_data <- myData_reshaped %>%
select(-Loan_to_Deposits_10X) %>%
filter(complete.cases(.)) %>%
mutate(DGS10_Loan_to_Dep = DGS10 * Loan_to_Deposits) %>% ## interaction term
read.zoo(.)
train <- as.data.frame(window(full_data, end = as.Date("2021-05-01")))
test <- as.data.frame(window(full_data, start= as.Date("2021-06-01")))
## Train ADL Model --------
fmla1 <- as.formula("MORTGAGE30US ~ DGS10 + Loan_to_Deposits")
#fmla1 <- as.formula("MORTGAGE30US ~ DGS10 + T10Y2Y + BAMLC0A4CBBB + Loan_to_Deposits")
#fmla1 <- as.formula("MORTGAGE30US ~ DGS10 + Loan_to_Deposits + DGS10_Loan_to_Dep")
#fmla1 <- as.formula("MORTGAGE30US ~ DGS10 + Loan_to_Deposits")
rhs_vars <- labels(terms(fmla1)) ## this comes in handy!!!
lhs_var <- all.vars(fmla1)[1]
ar_order = 1
x_lags = 1
adl_mod <- ardlDlm(fmla1,data=train, p=x_lags, q=ar_order)
summary(adl_mod)
##
## Time series regression with "ts" data:
## Start = 2, End = 257
##
## Call:
## dynlm(formula = as.formula(model.text), data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.24738 -0.04614 -0.01067 0.03502 0.41531
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.09443 0.07600 1.242 0.21523
## DGS10.t 0.65070 0.02636 24.686 < 2e-16 ***
## DGS10.1 -0.54898 0.03492 -15.721 < 2e-16 ***
## Loan_to_Deposits.t 2.43687 0.91167 2.673 0.00801 **
## Loan_to_Deposits.1 -2.30390 0.91020 -2.531 0.01198 *
## MORTGAGE30US.1 0.89248 0.02495 35.765 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.08589 on 250 degrees of freedom
## Multiple R-squared: 0.9959, Adjusted R-squared: 0.9959
## F-statistic: 1.228e+04 on 5 and 250 DF, p-value: < 2.2e-16
## Test for cointegration and other assumptions --------
# orders2 <-ardlBoundOrders(data = train, formula = fmla1,
# ic = "BIC", max.p = 5, max.q = 5)
p <-as.data.frame(matrix(c(ar_order, rep(x_lags, length(rhs_vars))), nrow=1))
names(p) <- c("q", rhs_vars)
test1 <-ardlBound(data = train, formula = fmla1, case = 1, p = p, ECM = TRUE)
##
## Breusch-Godfrey Test for the autocorrelation in residuals:
##
## Breusch-Godfrey test for serial correlation of order up to 1
##
## data: modelFull$model
## LM test = 8.0377, df1 = 1, df2 = 249, p-value = 0.004957
##
## The p-value of Breusch-Godfrey test for the autocorrelation in residuals: 0.004957386 < 0.05!
## ------------------------------------------------------
##
## Ljung-Box Test for the autocorrelation in residuals:
##
## Box-Ljung test
##
## data: res
## X-squared = 6.7057, df = 1, p-value = 0.00961
##
## The p-value of Ljung-Box test for the autocorrelation in residuals: 0.009610371 < 0.05!
## ------------------------------------------------------
##
## Breusch-Pagan Test for the homoskedasticity of residuals:
##
## studentized Breusch-Pagan test
##
## data: modelFull$model
## BP = 2.6865, df = 4, p-value = 0.6116
##
## ------------------------------------------------------
##
## Shapiro-Wilk test of normality of residuals:
##
## Shapiro-Wilk normality test
##
## data: modelFull$model$residual
## W = 0.94091, p-value = 1.318e-08
##
## The p-value of Shapiro-Wilk test normality of residuals: 1.317903e-08 < 0.05!
## ------------------------------------------------------
##
## PESARAN, SHIN AND SMITH (2001) COINTEGRATION TEST
##
## Observations: 256
## Number of Regressors (k): 2
## Case: 1
##
## ------------------------------------------------------
## - F-test -
## ------------------------------------------------------
## <------- I(0) ------------ I(1) ----->
## 10% critical value 2.17 3.19
## 5% critical value 2.72 3.83
## 1% critical value 3.88 5.3
##
##
## F-statistic = 7.33332949701599
##
## ------------------------------------------------------
## F-statistic note: Asymptotic critical values used.
##
## ------------------------------------------------------
##
## Ramsey's RESET Test for model specification:
##
## RESET test
##
## data: modelECM$model
## RESET = 13.042, df1 = 2, df2 = 250, p-value = 4.095e-06
##
## the p-value of RESET test: 4.095159e-06 < 0.05!
## ------------------------------------------------------
## ------------------------------------------------------
## Error Correction Model Output:
##
## Time series regression with "ts" data:
## Start = 2, End = 256
##
## Call:
## dynlm(formula = as.formula(model.text), data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.24675 -0.04594 -0.01156 0.03520 0.41008
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## ec.1 -0.11196 0.02378 -4.709 4.11e-06 ***
## dDGS10.t 0.64912 0.02551 25.444 < 2e-16 ***
## dLoan_to_Deposits.t 2.54484 0.85920 2.962 0.00335 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.08492 on 252 degrees of freedom
## Multiple R-squared: 0.7492, Adjusted R-squared: 0.7462
## F-statistic: 250.9 on 3 and 252 DF, p-value: < 2.2e-16
##
## ------------------------------------------------------
## Long-run coefficients:
## MORTGAGE30US.1 DGS10.1 Loan_to_Deposits.1
## -0.11195993 0.09448671 0.29929558
##
# summary(test1$model$modelFull)
# summary(test1$ARDL.model)
# summary(test1$ECM$EC.model)
## ADL In-sample Forecasts (Ex-post) --------
fitted <- adl_mod$model$fitted.values
actuals <- train$MORTGAGE30US
padding <- length(actuals) - length(fitted)
fitted <- c(rep(NA, padding), fitted)
df <- data.frame(date = as.Date(rownames(train)),
Forecasts = fitted,
Actuals = actuals) %>%
filter(complete.cases(.)) %>%
pivot_longer(-date)
ggplot(df, aes(x=date, y=value, color=name, linetype=name)) + geom_line() +
scale_y_continuous(breaks=seq(0,10,0.5), limits=c(0,10)) + theme_igray() +
scale_colour_tableau() +
ggtitle("ADL: In-Sample Ex-Post Forecasts")
## Out-of-sample Forecasts --------
## The forecast() function works on test data that
## is **immediately** after the train data
## Does not work on in-sample data
create_adl_forecasts <- function(modelADL, newdata=test) {
name_dep_var <- all.vars(modelADL$formula)[1]
name_indep_var <- labels(terms(modelADL$formula))
test_df <- newdata %>%
select(all_of(name_indep_var))# !!!! *** column order matters *** !!!!!!!!
hh <- nrow(test_df)
mat_x <- test_df %>%
as.matrix(.) %>%
t(.)
mat_x_up100 <- test_df %>%
mutate(DGS10 = DGS10 + 1) %>%
select(all_of(name_indep_var)) %>%
as.matrix(.) %>%
t(.)
mat_x_down100 <- test_df %>%
mutate(DGS10 = DGS10 - 1) %>%
select(all_of(name_indep_var)) %>%
as.matrix(.) %>%
t(.)
out_sample_fitted <- forecast(modelADL, mat_x, h=hh)$forecasts
out_sample_actuals <- newdata[,name_dep_var,drop=TRUE]
out_sample_fitted_up100 <- forecast(modelADL, mat_x_up100, h=hh)$forecasts
out_sample_fitted_down100 <- forecast(modelADL, mat_x_down100, h=hh)$forecasts
results_df <- data.frame(date = as.Date(rownames(test_df)),
Forecasts = out_sample_fitted,
Actuals = out_sample_actuals,
Fcst_Up100 = out_sample_fitted_up100,
Fcst_Down100 = out_sample_fitted_down100
) %>%
filter(complete.cases(.)) %>%
cbind(test_df) %>%
pivot_longer(-date)
return(list(results_df=results_df,out_sample_actuals=out_sample_actuals, out_sample_fitted=out_sample_fitted))
}
ex_ante_list <- create_adl_forecasts(adl_mod, newdata=test)
ex_ante_df <- ex_ante_list$results_df
ggplot(ex_ante_df, aes(x=date, y=value, color=name, linetype=name)) + geom_line() +
scale_y_continuous(breaks=seq(0,5,0.5), limits=c(0,5)) + theme_igray() +
scale_colour_tableau() +
ggtitle("ADL: Out-sample Ex-Ante Forecasts")
## Fit Equilibrium Model --------
equil_mod <- lm(fmla1, data=train)
summary(equil_mod)
##
## Call:
## lm(formula = fmla1, data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.50414 -0.16485 0.00365 0.10396 0.61230
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.3526 0.1887 -1.868 0.0629 .
## DGS10 0.7617 0.0218 34.933 <2e-16 ***
## Loan_to_Deposits 3.4706 0.2959 11.730 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2205 on 254 degrees of freedom
## Multiple R-squared: 0.9735, Adjusted R-squared: 0.9732
## F-statistic: 4657 on 2 and 254 DF, p-value: < 2.2e-16
po_df <- train %>%
select(all_of(c("MORTGAGE30US", rhs_vars)))
tseries::po.test(po_df)
##
## Phillips-Ouliaris Cointegration Test
##
## data: po_df
## Phillips-Ouliaris demeaned = -24.511, Truncation lag parameter = 2,
## p-value = 0.06778
## Plot equilibrium vs. actuals ----
equil_fitted <- equil_mod$fitted.values
actuals <- train$MORTGAGE30US
equil_df <- data.frame(date = as.Date(rownames(train)),
Fcst_Equil = equil_fitted,
Actuals = actuals) %>%
filter(complete.cases(.)) %>%
pivot_longer(-date)
ggplot(equil_df, aes(x=date, y=value, color=name, linetype=name)) + geom_line() +
scale_y_continuous(breaks=seq(0,9,0.5), limits=c(0,9)) + theme_igray() +
scale_colour_tableau() +
ggtitle("Equil: In-sample Forecasts")
ggplot(tail(equil_df, 24), aes(x=date, y=value, color=name, linetype=name)) + geom_line() +
scale_y_continuous(breaks=seq(0,9,0.5), limits=c(0,9)) + theme_igray() +
scale_colour_tableau() +
ggtitle("Eqil: In-sample Forecasts (Last 24 mos)")
## Fit ECM ----
ecm_mod <- ecm(y=train$MORTGAGE30US, xeq=train[rhs_vars], xtr=train[rhs_vars],
includeIntercept = FALSE)
summary(ecm_mod)
##
## Call:
## lm(formula = dy ~ . - 1, data = x, weights = weights)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.25058 -0.04630 -0.01055 0.03545 0.41185
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## deltaDGS10 0.64854 0.02633 24.631 < 2e-16 ***
## deltaLoan_to_Deposits 2.58333 0.90500 2.855 0.004670 **
## DGS10Lag1 0.09549 0.02057 4.643 5.54e-06 ***
## Loan_to_DepositsLag1 0.28848 0.07591 3.800 0.000181 ***
## yLag1 -0.11067 0.02485 -4.453 1.27e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.08599 on 251 degrees of freedom
## Multiple R-squared: 0.7444, Adjusted R-squared: 0.7393
## F-statistic: 146.2 on 5 and 251 DF, p-value: < 2.2e-16
## Out-sample predictions ecm ------
ecm_out_fcsts <- ecmpredict(ecm_mod, newdata=test[rhs_vars], init=tail(train$MORTGAGE30US,1))
ecm_ex_ante_df <- data.frame(date = as.Date(rownames(test)),
ECM_Forecasts = ecm_out_fcsts,
Actuals = test$MORTGAGE30US,
ADL_Forecats = ex_ante_list$out_sample_fitted
) %>%
filter(complete.cases(.)) %>%
cbind(test[rhs_vars]) %>%
pivot_longer(-date)
ggplot(ecm_ex_ante_df, aes(x=date, y=value, color=name, linetype=name)) + geom_line() +
scale_y_continuous(breaks=seq(0,5,0.5), limits=c(0,5)) + theme_igray() +
scale_colour_tableau() +
ggtitle("Out-sample ECM vs. ADL Ex-Ante Forecasts")
## CMT Sensitivity Analysis -----
create_df_flat_up_down <- function(formula = fmla1, horizon=24,
hist_df=myData_reshaped){
scenarioHorizon <- horizon
d <- tail(hist_df, 1) #last obs. of test set or train set
x <- do.call("rbind", replicate(scenarioHorizon, d, simplify = FALSE)) ## repeat rows
x$date <- seq.Date(from=d$date, by="month", length.out = scenarioHorizon)
sens_df <- x
sens_up100_df <- sens_df %>%
mutate(DGS10 = DGS10 + 1)
sens_down100_df <- sens_df %>%
mutate(DGS10 = DGS10 - 1)
return(list(sens_df=sens_df, sens_up100_df=sens_up100_df,
sens_down100_df=sens_down100_df))
}
df_cmt_list <- create_df_flat_up_down()
initY <- tail(test$MORTGAGE30US, 1)
ecm_flat_fcsts <- ecmpredict(ecm_mod, newdata=df_cmt_list$sens_df, init=initY)
ecm_up_fcsts <- ecmpredict(ecm_mod, newdata=df_cmt_list$sens_up100_df, init=initY)
ecm_down_fcsts <- ecmpredict(ecm_mod, newdata=df_cmt_list$sens_down100_df, init=initY)
ecm_sens_normal_df <- data.frame(date = df_cmt_list$sens_df$date,
ECM_flat = ecm_flat_fcsts,
ECM_up100 = ecm_up_fcsts,
ECM_down100 = ecm_down_fcsts
)
other_vars <- df_cmt_list$sens_df %>%
select(all_of(rhs_vars))
ecm_sens_df <- ecm_sens_normal_df %>%
filter(complete.cases(.)) %>%
cbind(other_vars) %>%
pivot_longer(-date)
ggplot(ecm_sens_df, aes(x=date, y=value, color=name, linetype=name)) + geom_line() +
scale_y_continuous(breaks=seq(0,5,0.5), limits=c(0,5)) + theme_igray() +
scale_colour_tableau() +
ggtitle("ECM Sensitivity: Flat CMT, CMT Up 100, CMT Down 100")
tail(ecm_sens_normal_df$ECM_flat, 1) - initY
## [1] -0.1640493
tail(ecm_sens_normal_df$ECM_up100, 1) - initY
## [1] 0.6406836
tail(ecm_sens_normal_df$ECM_down100, 1) - initY
## [1] -0.9687823
## Loan-to-Deposit Sensitivity Analysis -----
df_cmt_list_LDRatio_Up <- lapply(df_cmt_list, function(df, scenarioHorizon){
df <- df %>%
mutate(Loan_to_Deposits = cumsum(c(tail(test$Loan_to_Deposits,1),
rep(0.005,scenarioHorizon-1)))) ## upward drift on LD ratio
return(df)
}, scenarioHorizon=24)
ld_ecm_flat_fcsts <- ecmpredict(ecm_mod, newdata=df_cmt_list_LDRatio_Up$sens_df, init=initY)
ld_ecm_up_fcsts <- ecmpredict(ecm_mod, newdata=df_cmt_list_LDRatio_Up$sens_up100_df, init=initY)
ld_ecm_down_fcsts <- ecmpredict(ecm_mod, newdata=df_cmt_list_LDRatio_Up$sens_down100_df, init=initY)
ld_ecm_sens_normal_df <- data.frame(date = df_cmt_list_LDRatio_Up$sens_df$date,
ECM_flat = ld_ecm_flat_fcsts,
ECM_up100 = ld_ecm_up_fcsts,
ECM_down100 = ld_ecm_down_fcsts
)
other_vars_2 <- df_cmt_list_LDRatio_Up$sens_df %>%
select(all_of(rhs_vars))
ld_ecm_sens_df <- ld_ecm_sens_normal_df %>%
filter(complete.cases(.)) %>%
cbind(other_vars_2) %>%
pivot_longer(-date)
ggplot(ld_ecm_sens_df, aes(x=date, y=value, color=name, linetype=name)) + geom_line() +
scale_y_continuous(breaks=seq(0,5,0.5), limits=c(0,5)) + theme_igray() +
scale_colour_tableau() +
ggtitle("ECM Sensitivity: Upward Loan-to-Deposit Ratio Forecasts")
tail(ld_ecm_sens_normal_df$ECM_flat, 1) - initY
## [1] 0.1347306
tail(ld_ecm_sens_normal_df$ECM_up100, 1) - initY
## [1] 0.9394636
tail(ld_ecm_sens_normal_df$ECM_down100, 1) - initY
## [1] -0.6700023