Overview
This document estimates the deposit provision cost
\(c\) following the DSSW (2026,
forthcoming) hedonic approach. The key idea: net non-interest expense
includes both deposit-servicing costs and non-deposit overhead. A
hedonic regression isolates the marginal cost of servicing each dollar
of deposits.
Methodology:
\[
\frac{\text{Net Non-Interest Expense}_{i,t}}{\text{Total Assets}_{i,t}}
= \alpha_t + \sum_k \beta_k \frac{\text{Deposit}_k}{\text{TA}}_{i,t}
+ \gamma' \mathbf{X}_{i,t} + \varepsilon_{i,t}
\]
where \(k\) indexes deposit
categories (insured, uninsured, time) and \(\mathbf{X}\) includes balance-sheet
controls (loans, equity, FHLB, etc.).
The estimated \(\hat{\beta}_k\)
coefficients represent the marginal non-interest cost per dollar of each
deposit type. The bank-level deposit cost is then:
\[
c_i = \sum_k \hat{\beta}_k \frac{\text{Deposit}_{k,i}}{\text{Total
Deposits}_i}
\]
Output:
01_data/processed/dssw_deposit_costs.csv
Setup
rm(list = ls())
library(data.table); library(dplyr); library(tidyr); library(stringr)
library(lubridate); library(purrr); library(tibble)
library(fixest); library(modelsummary)
library(knitr); library(kableExtra)
library(ggplot2); library(scales)
library(readr)
cat("All packages loaded.\n")
## All packages loaded.
Paths
BASE_PATH <- "C:/Users/mferdo2/OneDrive - Louisiana State University/Finance_PhD/DW_Stigma_paper/Liquidity_project_2025"
DATA_RAW <- file.path(BASE_PATH, "01_data/raw")
DATA_PROC <- file.path(BASE_PATH, "01_data/processed")
OUTPUT_PATH <- file.path(BASE_PATH, "01_data/processed")
TABLE_PATH <- file.path(OUTPUT_PATH, "tables")
FIG_PATH <- file.path(OUTPUT_PATH, "figures")
for (path in c(TABLE_PATH, FIG_PATH)) if (!dir.exists(path)) dir.create(path, recursive = TRUE)
Data Loading
We load the full clean call report (which contains
income statement items from Schedule RI) — not the final processed panel
which drops these columns.
# ==============================================================================
# Load clean call report with all RIAD (income statement) variables
# This file has: noninterest_income, noninterest_expense,
# total_interest_on_deposits_calc, and component-level deposit interest
# ==============================================================================
call_full <- read_csv(file.path(DATA_PROC, "clean_call_report_with_mtm_2021Q4_2024Q4.csv"),
show_col_types = FALSE) %>%
mutate(idrssd = as.character(idrssd),
period = paste0(year, "Q", quarter))
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
## dat <- vroom(...)
## problems(dat)
# Load GSIB flags to exclude
gsib_panel <- read_csv(file.path(DATA_PROC, "final_call_gsib.csv"), show_col_types = FALSE) %>%
mutate(idrssd = as.character(idrssd)) %>%
select(idrssd, period, gsib, failed_bank, size_bin) %>%
distinct()
cat("Call report rows:", nrow(call_full), "\n")
## Call report rows: 75989
cat("Available periods:", paste(sort(unique(call_full$period)), collapse = ", "), "\n")
## Available periods: 2021Q1, 2021Q2, 2021Q3, 2021Q4, 2022Q1, 2022Q2, 2022Q3, 2022Q4, 2023Q1, 2023Q2, 2023Q3, 2023Q4, 2024Q1, 2024Q2, 2024Q3, 2024Q4
Check Available
Variables
# Verify income/expense columns exist
required_vars <- c("noninterest_income", "noninterest_expense",
"total_interest_on_deposits_calc",
"insured_deposit", "uninsured_deposit",
"total_deposit", "total_asset", "total_loan",
"total_bank_equity", "fhlb_adv")
available <- required_vars[required_vars %in% names(call_full)]
missing <- required_vars[!required_vars %in% names(call_full)]
cat("Available:", paste(available, collapse = ", "), "\n")
## Available: noninterest_income, noninterest_expense, total_interest_on_deposits_calc, insured_deposit, uninsured_deposit, total_deposit, total_asset, total_loan, total_bank_equity, fhlb_adv
if (length(missing) > 0) cat("MISSING:", paste(missing, collapse = ", "), "\n")
Variable
Construction
# ==============================================================================
# Construct hedonic regression variables
# All scaled by total assets for cross-sectional comparability
# ==============================================================================
df <- call_full %>%
mutate(
# ── LHS: Net non-interest expense / TA ──
net_nie_to_ta = (noninterest_expense - noninterest_income) / total_asset,
# ── Deposit subcategories / TA ──
# Insured deposits (predominantly zero-maturity: transaction + savings)
insured_dep_to_ta = insured_deposit / total_asset,
# Uninsured deposits (predominantly zero-maturity demand deposits)
uninsured_dep_to_ta = uninsured_deposit / total_asset,
# ── Controls / TA ──
loans_to_ta = ifelse(!is.na(total_loan), total_loan / total_asset, NA_real_),
equity_to_ta = ifelse(!is.na(total_bank_equity), total_bank_equity / total_asset, NA_real_),
fhlb_to_ta = ifelse(!is.na(fhlb_adv), fhlb_adv / total_asset, 0),
other_borr_to_ta = ifelse(!is.na(other_borrowed_money),
other_borrowed_money / total_asset, 0),
fed_funds_to_ta = ifelse(!is.na(fed_fund_purchase),
fed_fund_purchase / total_asset, 0),
repo_to_ta = ifelse(!is.na(repo), repo / total_asset, 0),
# ── Size quartiles (for interactions) ──
ln_ta = log(total_asset),
# ── Interest cost of deposits (for comparison) ──
int_cost_dep = total_interest_on_deposits_calc / total_deposit,
# ── Total deposit cost (interest + net non-interest) ──
total_dep_to_ta = total_deposit / total_asset
)
# Size quartiles within each period
df <- df %>%
group_by(period) %>%
mutate(size_quartile = ntile(total_asset, 4)) %>%
ungroup()
cat("Analysis sample:", nrow(df), "bank-quarters\n")
## Analysis sample: 75989 bank-quarters
cat("Periods:", n_distinct(df$period), "| Banks:", n_distinct(df$idrssd), "\n")
## Periods: 16 | Banks: 5074
Summary
Statistics
summ_vars <- c("net_nie_to_ta", "insured_dep_to_ta", "uninsured_dep_to_ta",
"loans_to_ta", "equity_to_ta", "fhlb_to_ta", "int_cost_dep")
df_summ <- df %>%
select(all_of(summ_vars)) %>%
pivot_longer(everything(), names_to = "Variable", values_to = "Value") %>%
group_by(Variable) %>%
summarise(N = sum(!is.na(Value)),
Mean = round(mean(Value, na.rm = TRUE), 5),
SD = round(sd(Value, na.rm = TRUE), 5),
P25 = round(quantile(Value, 0.25, na.rm = TRUE), 5),
Median = round(median(Value, na.rm = TRUE), 5),
P75 = round(quantile(Value, 0.75, na.rm = TRUE), 5),
.groups = "drop")
kbl(df_summ, format = "html", caption = "Summary: Hedonic Regression Variables") %>%
kable_styling(bootstrap_options = c("striped","hover","condensed"), full_width = FALSE)
Summary: Hedonic Regression Variables
|
Variable
|
N
|
Mean
|
SD
|
P25
|
Median
|
P75
|
|
equity_to_ta
|
75987
|
0.11673
|
0.10744
|
0.08097
|
0.09682
|
0.11834
|
|
fhlb_to_ta
|
75987
|
0.02480
|
0.04182
|
0.00000
|
0.00107
|
0.03538
|
|
insured_dep_to_ta
|
75987
|
0.61897
|
0.15124
|
0.55856
|
0.64593
|
0.71643
|
|
int_cost_dep
|
75164
|
Inf
|
NaN
|
0.00134
|
0.00322
|
0.00832
|
|
loans_to_ta
|
75987
|
0.59714
|
0.18289
|
0.49741
|
0.62437
|
0.73226
|
|
net_nie_to_ta
|
75987
|
0.00904
|
0.06216
|
0.00571
|
0.01053
|
0.01598
|
|
uninsured_dep_to_ta
|
75987
|
0.21927
|
0.12190
|
0.13664
|
0.20362
|
0.28242
|
Hedonic Deposit Cost
Regression
Main Specification
(DSSW-style)
# ==============================================================================
# DSSW Hedonic Regression (Eq. 67)
#
# LHS: Net Non-Interest Expense / Total Assets
# RHS: Deposit subcategories / TA + Controls + Period FE
#
# The βs on deposit types = marginal non-interest cost per dollar of deposits
# ==============================================================================
# Winsorize at 2.5%/97.5% to reduce outlier influence
winsorize <- function(x, probs = c(0.025, 0.975)) {
if (all(is.na(x))) return(x)
q <- quantile(x, probs = probs, na.rm = TRUE, names = FALSE)
pmax(pmin(x, q[2]), q[1])
}
df_reg <- df %>%
filter(!is.na(net_nie_to_ta), !is.na(insured_dep_to_ta),
!is.na(uninsured_dep_to_ta), !is.na(loans_to_ta),
!is.na(equity_to_ta)) %>%
mutate(across(c(net_nie_to_ta, insured_dep_to_ta, uninsured_dep_to_ta,
loans_to_ta, equity_to_ta, fhlb_to_ta,
other_borr_to_ta, fed_funds_to_ta, repo_to_ta),
winsorize))
# ── Specification 1: Basic (deposit types + controls) ──
reg1 <- feols(net_nie_to_ta ~ insured_dep_to_ta + uninsured_dep_to_ta +
loans_to_ta + equity_to_ta + fhlb_to_ta + ln_ta | period,
data = df_reg, cluster = ~idrssd)
# ── Specification 2: Full controls ──
reg2 <- feols(net_nie_to_ta ~ insured_dep_to_ta + uninsured_dep_to_ta +
loans_to_ta + equity_to_ta + fhlb_to_ta +
other_borr_to_ta + fed_funds_to_ta + repo_to_ta + ln_ta | period,
data = df_reg, cluster = ~idrssd)
# ── Specification 3: Size-quartile interactions ──
reg3 <- feols(net_nie_to_ta ~ insured_dep_to_ta * factor(size_quartile) +
uninsured_dep_to_ta +
loans_to_ta + equity_to_ta + fhlb_to_ta +
other_borr_to_ta + ln_ta | period,
data = df_reg, cluster = ~idrssd)
# ── Specification 4: Cross-sectional (2022Q4 only) ──
df_2022q4 <- df_reg %>% filter(period == "2022Q4")
reg4 <- feols(net_nie_to_ta ~ insured_dep_to_ta + uninsured_dep_to_ta +
loans_to_ta + equity_to_ta + fhlb_to_ta +
other_borr_to_ta + ln_ta,
data = df_2022q4, vcov = "hetero")
models <- list(
"Basic + FE" = reg1,
"Full Controls + FE" = reg2,
"Size × Insured" = reg3,
"2022Q4 Only" = reg4
)
msummary(models, stars = c("*" = .10, "**" = .05, "***" = .01),
gof_omit = "AIC|BIC|Log|RMSE",
coef_rename = c(
"insured_dep_to_ta" = "Insured Deposits / TA",
"uninsured_dep_to_ta" = "Uninsured Deposits / TA",
"loans_to_ta" = "Loans / TA",
"equity_to_ta" = "Equity / TA",
"fhlb_to_ta" = "FHLB / TA",
"other_borr_to_ta" = "Other Borrowing / TA",
"fed_funds_to_ta" = "Fed Funds Purchased / TA",
"repo_to_ta" = "Repo / TA",
"ln_ta" = "Log(Total Assets)"),
title = "Hedonic Deposit Cost Regression (DSSW Approach)")
Hedonic Deposit Cost Regression (DSSW Approach)
| |
Basic + FE |
Full Controls + FE |
Size × Insured |
2022Q4 Only |
| * p < 0.1, ** p < 0.05, *** p < 0.01 |
| Insured Deposits / TA |
0.013*** |
0.014*** |
0.014*** |
0.021*** |
|
(0.001) |
(0.001) |
(0.001) |
(0.002) |
| Uninsured Deposits / TA |
0.010*** |
0.011*** |
0.011*** |
0.017*** |
|
(0.001) |
(0.001) |
(0.001) |
(0.002) |
| Loans / TA |
0.008*** |
0.008*** |
0.008*** |
0.013*** |
|
(0.000) |
(0.000) |
(0.000) |
(0.001) |
| Equity / TA |
0.006*** |
0.008*** |
0.007*** |
0.010*** |
|
(0.002) |
(0.002) |
(0.002) |
(0.004) |
| FHLB / TA |
0.004** |
-0.009** |
-0.008** |
-0.015 |
|
(0.002) |
(0.004) |
(0.004) |
(0.013) |
| Log(Total Assets) |
-0.001*** |
-0.001*** |
-0.001*** |
-0.002*** |
|
(0.000) |
(0.000) |
(0.000) |
(0.000) |
| Other Borrowing / TA |
|
0.013*** |
0.012*** |
0.025** |
|
|
(0.004) |
(0.004) |
(0.013) |
| Fed Funds Purchased / TA |
|
-0.013 |
|
|
|
|
(0.010) |
|
|
| Repo / TA |
|
-0.000 |
|
|
|
|
(0.004) |
|
|
| factor(size_quartile)2 |
|
|
-0.000 |
|
|
|
|
(0.001) |
|
| factor(size_quartile)3 |
|
|
-0.001 |
|
|
|
|
(0.001) |
|
| factor(size_quartile)4 |
|
|
-0.001 |
|
|
|
|
(0.001) |
|
| Insured Deposits / TA:factor(size_quartile)2 |
|
|
-0.001 |
|
|
|
|
(0.001) |
|
| Insured Deposits / TA:factor(size_quartile)3 |
|
|
0.000 |
|
|
|
|
(0.001) |
|
| Insured Deposits / TA:factor(size_quartile)4 |
|
|
-0.000 |
|
|
|
|
(0.001) |
|
| (Intercept) |
|
|
|
0.012*** |
|
|
|
|
(0.002) |
| Num.Obs. |
75987 |
75987 |
75987 |
4737 |
| R2 |
0.648 |
0.649 |
0.651 |
0.357 |
| R2 Adj. |
0.648 |
0.649 |
0.651 |
0.356 |
| R2 Within |
0.243 |
0.245 |
0.249 |
|
| R2 Within Adj. |
0.243 |
0.245 |
0.249 |
|
| Std.Errors |
by: idrssd |
by: idrssd |
by: idrssd |
Heteroskedasticity-robust |
| FE: period |
X |
X |
X |
|
Interpreting the
Coefficients
cat("================================================================\n")
## ================================================================
cat(" HEDONIC DEPOSIT COST COEFFICIENTS\n")
## HEDONIC DEPOSIT COST COEFFICIENTS
cat("================================================================\n\n")
## ================================================================
# Use Specification 2 (full controls + FE) as the main specification
main_reg <- reg2
b_ins <- coef(main_reg)["insured_dep_to_ta"]
b_unin <- coef(main_reg)["uninsured_dep_to_ta"]
se_ins <- sqrt(vcov(main_reg)["insured_dep_to_ta","insured_dep_to_ta"])
se_unin <- sqrt(vcov(main_reg)["uninsured_dep_to_ta","uninsured_dep_to_ta"])
cat(sprintf(" β_insured = %.5f (SE = %.5f) → %.3f%% cost per dollar of insured deposits\n",
b_ins, se_ins, b_ins * 100))
## β_insured = 0.01394 (SE = 0.00104) → 1.394% cost per dollar of insured deposits
cat(sprintf(" β_uninsured = %.5f (SE = %.5f) → %.3f%% cost per dollar of uninsured deposits\n",
b_unin, se_unin, b_unin * 100))
## β_uninsured = 0.01082 (SE = 0.00116) → 1.082% cost per dollar of uninsured deposits
# Average deposit cost
avg_ins_share <- mean(df_reg$insured_dep_to_ta / (df_reg$insured_dep_to_ta + df_reg$uninsured_dep_to_ta), na.rm = TRUE)
avg_unin_share <- 1 - avg_ins_share
c_avg <- b_ins * avg_ins_share + b_unin * avg_unin_share
cat(sprintf("\n Average deposit cost c = %.5f (%.3f%%)\n", c_avg, c_avg * 100))
##
## Average deposit cost c = 0.01314 (1.314%)
cat(sprintf(" (Weighted by: %.1f%% insured, %.1f%% uninsured)\n",
avg_ins_share * 100, avg_unin_share * 100))
## (Weighted by: 74.2% insured, 25.8% uninsured)
cat(sprintf("\n DSSW reference: ~1.317%% average cost per dollar of deposits\n"))
##
## DSSW reference: ~1.317% average cost per dollar of deposits
Bank-Level Deposit
Costs
# ==============================================================================
# Compute bank-level deposit cost c_i for each bank at 2022Q4
# c_i = β_insured × (InsuredDep_i / TotalDep_i) + β_uninsured × (UninsuredDep_i / TotalDep_i)
#
# This is the per-dollar non-interest cost of maintaining deposits
# ==============================================================================
compute_bank_costs <- function(df_base, reg_model) {
b_ins <- coef(reg_model)["insured_dep_to_ta"]
b_unin <- coef(reg_model)["uninsured_dep_to_ta"]
df_base %>%
mutate(
ins_share_dep = insured_deposit / (insured_deposit + uninsured_deposit),
unin_share_dep = uninsured_deposit / (insured_deposit + uninsured_deposit),
# Per-dollar deposit cost (weighted by bank's deposit composition)
deposit_cost_insured = b_ins,
deposit_cost_uninsured = b_unin,
deposit_cost_weighted = b_ins * ins_share_dep + b_unin * unin_share_dep,
# Total cost of deposits = interest cost + non-interest cost
interest_cost_per_dep = ifelse(total_deposit > 0,
total_interest_on_deposits_calc / total_deposit, NA_real_),
total_deposit_cost = interest_cost_per_dep + deposit_cost_weighted
)
}
# Compute for all periods
df_costs <- compute_bank_costs(df, main_reg)
# Summary by period
cost_summary <- df_costs %>%
group_by(period) %>%
summarise(
N = n(),
mean_c = round(mean(deposit_cost_weighted, na.rm = TRUE), 5),
med_c = round(median(deposit_cost_weighted, na.rm = TRUE), 5),
sd_c = round(sd(deposit_cost_weighted, na.rm = TRUE), 5),
mean_int = round(mean(interest_cost_per_dep, na.rm = TRUE), 5),
mean_tot = round(mean(total_deposit_cost, na.rm = TRUE), 5),
.groups = "drop"
)
kbl(cost_summary, format = "html",
col.names = c("Period","N","Mean c","Median c","SD c",
"Mean Int. Cost","Mean Total Cost"),
caption = "Bank-Level Deposit Costs by Period") %>%
kable_styling(bootstrap_options = c("striped","condensed"), full_width = FALSE)
Bank-Level Deposit Costs by Period
|
Period
|
N
|
Mean c
|
Median c
|
SD c
|
Mean Int. Cost
|
Mean Total Cost
|
|
2021Q1
|
5027
|
0.01316
|
0.01323
|
0.00045
|
0.00097
|
0.01413
|
|
2021Q2
|
4998
|
0.01313
|
0.01321
|
0.00046
|
0.00182
|
0.01496
|
|
2021Q3
|
4962
|
0.01311
|
0.01319
|
0.00046
|
0.00258
|
0.01569
|
|
2021Q4
|
4887
|
0.01308
|
0.01317
|
0.00047
|
0.00322
|
0.01631
|
|
2022Q1
|
4844
|
0.01308
|
0.01315
|
0.00046
|
0.00065
|
0.01373
|
|
2022Q2
|
4812
|
0.01307
|
0.01315
|
0.00046
|
0.00134
|
0.01441
|
|
2022Q3
|
4765
|
0.01307
|
0.01314
|
0.00046
|
0.00233
|
0.01540
|
|
2022Q4
|
4737
|
0.01307
|
0.01314
|
0.00046
|
0.00394
|
0.01701
|
|
2023Q1
|
4712
|
0.01312
|
0.01318
|
0.00044
|
0.00244
|
0.01556
|
|
2023Q2
|
4681
|
0.01315
|
0.01320
|
0.00043
|
0.00569
|
0.01884
|
|
2023Q3
|
4641
|
0.01315
|
0.01321
|
0.00043
|
0.00955
|
0.02270
|
|
2023Q4
|
4630
|
0.01314
|
0.01320
|
0.00043
|
1.73535
|
1.74849
|
|
2024Q1
|
4610
|
0.01315
|
0.01322
|
0.00043
|
0.00479
|
0.01795
|
|
2024Q2
|
4584
|
0.01315
|
0.01322
|
0.00043
|
0.00985
|
0.02301
|
|
2024Q3
|
4566
|
0.01314
|
0.01321
|
0.00043
|
0.01497
|
0.02812
|
|
2024Q4
|
4533
|
0.01313
|
0.01319
|
0.00043
|
0.01985
|
0.03298
|
Distribution of
Deposit Costs
df_plot <- df_costs %>% filter(period == "2022Q4")
p1 <- ggplot(df_plot, aes(x = deposit_cost_weighted * 100)) +
geom_histogram(bins = 50, fill = "steelblue", alpha = 0.7) +
geom_vline(xintercept = mean(df_plot$deposit_cost_weighted, na.rm = TRUE) * 100,
color = "red", linetype = "dashed", linewidth = 1) +
labs(title = "Distribution of Deposit Costs (c), 2022Q4",
x = "Non-Interest Deposit Cost (% of deposits)", y = "Count") +
theme_minimal()
p2 <- ggplot(df_plot, aes(x = total_deposit_cost * 100)) +
geom_histogram(bins = 50, fill = "darkgreen", alpha = 0.7) +
geom_vline(xintercept = mean(df_plot$total_deposit_cost, na.rm = TRUE) * 100,
color = "red", linetype = "dashed", linewidth = 1) +
labs(title = "Total Deposit Cost (Interest + Non-Interest), 2022Q4",
x = "Total Cost (% of deposits)", y = "Count") +
theme_minimal()
library(patchwork)
p1 + p2
## Warning: Removed 52 rows containing non-finite outside the scale range (`stat_bin()`).
## Removed 52 rows containing non-finite outside the scale range (`stat_bin()`).

Save Output
# ==============================================================================
# Save bank-level deposit costs for use in main analysis
# ==============================================================================
output_costs <- df_costs %>%
select(idrssd, period,
deposit_cost_insured, deposit_cost_uninsured, deposit_cost_weighted,
interest_cost_per_dep, total_deposit_cost) %>%
filter(!is.na(deposit_cost_weighted))
write_csv(output_costs, file.path(DATA_PROC, "dssw_deposit_costs.csv"))
cat(sprintf("Saved: dssw_deposit_costs.csv (%d rows, %d banks, %d periods)\n",
nrow(output_costs), n_distinct(output_costs$idrssd), n_distinct(output_costs$period)))
## Saved: dssw_deposit_costs.csv (75149 rows, 5012 banks, 16 periods)
# Also save regression summary
reg_summ <- tibble(
specification = c("Basic+FE", "FullControls+FE", "SizeInteraction", "2022Q4Only"),
beta_insured = sapply(models, function(m) coef(m)["insured_dep_to_ta"]),
beta_uninsured = sapply(models, function(m) coef(m)["uninsured_dep_to_ta"]),
se_insured = sapply(models, function(m) sqrt(vcov(m)["insured_dep_to_ta","insured_dep_to_ta"])),
se_uninsured = sapply(models, function(m) sqrt(vcov(m)["uninsured_dep_to_ta","uninsured_dep_to_ta"])),
r_squared = sapply(models, function(m) summary(m)$r2),
n_obs = sapply(models, function(m) m$nobs)
)
write_csv(reg_summ, file.path(DATA_PROC, "dssw_deposit_cost_regression.csv"))
cat("Saved: dssw_deposit_cost_regression.csv\n")
## Saved: dssw_deposit_cost_regression.csv
Diagnostic: Deposit
Cost vs. Deposit Beta
# Merge with deposit betas to check relationship
betas <- read_csv(file.path(DATA_PROC, "dssw_deposit_betas.csv"), show_col_types = FALSE) %>%
mutate(idrssd = as.character(idrssd)) %>%
filter(estimation_date == "2022Q4") %>%
select(idrssd, beta_uninsured)
df_diag <- output_costs %>%
filter(period == "2022Q4") %>%
left_join(betas, by = "idrssd") %>%
filter(!is.na(beta_uninsured), !is.na(deposit_cost_weighted))
cor_val <- cor(df_diag$deposit_cost_weighted, df_diag$beta_uninsured, use = "complete.obs")
ggplot(df_diag, aes(x = beta_uninsured, y = deposit_cost_weighted * 100)) +
geom_point(alpha = 0.3, size = 1) +
geom_smooth(method = "lm", color = "red", se = TRUE) +
labs(title = sprintf("Deposit Cost (c) vs. Deposit Beta (β^U) — ρ = %.3f", cor_val),
x = "Deposit Beta (β^U)", y = "Non-Interest Deposit Cost (%)") +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

cat(sprintf("Correlation between c and β^U: %.3f\n", cor_val))
## Correlation between c and β^U: 0.020
cat(sprintf("Banks in diagnostic: %d\n", nrow(df_diag)))
## Banks in diagnostic: 4635
Summary
cat("================================================================\n")
## ================================================================
cat(" DEPOSIT COST ESTIMATION — FINAL SUMMARY\n")
## DEPOSIT COST ESTIMATION — FINAL SUMMARY
cat("================================================================\n\n")
## ================================================================
cat("Methodology: DSSW hedonic regression (net non-interest expense approach)\n")
## Methodology: DSSW hedonic regression (net non-interest expense approach)
cat(sprintf("Estimation sample: %d bank-quarters, %d unique banks\n",
nrow(df_reg), n_distinct(df_reg$idrssd)))
## Estimation sample: 75987 bank-quarters, 5073 unique banks
cat(sprintf("Main specification: Full Controls + Period FE, clustered SE at bank level\n\n"))
## Main specification: Full Controls + Period FE, clustered SE at bank level
cat("Key Results:\n")
## Key Results:
cat(sprintf(" β_insured = %.5f (%.3f%% per dollar)\n", b_ins, b_ins * 100))
## β_insured = 0.01394 (1.394% per dollar)
cat(sprintf(" β_uninsured = %.5f (%.3f%% per dollar)\n", b_unin, b_unin * 100))
## β_uninsured = 0.01082 (1.082% per dollar)
cat(sprintf(" Average c = %.5f (%.3f%%)\n\n", c_avg, c_avg * 100))
## Average c = 0.01314 (1.314%)
cat("Output files:\n")
## Output files:
cat(" 1. dssw_deposit_costs.csv — Bank-level deposit costs by period\n")
## 1. dssw_deposit_costs.csv — Bank-level deposit costs by period
cat(" 2. dssw_deposit_cost_regression.csv — Regression coefficient summary\n\n")
## 2. dssw_deposit_cost_regression.csv — Regression coefficient summary
cat("Usage in main analysis (revised_franchise_bulletproof_1.Rmd):\n")
## Usage in main analysis (revised_franchise_bulletproof_1.Rmd):
cat(" Load: deposit_costs <- read_csv('dssw_deposit_costs.csv')\n")
## Load: deposit_costs <- read_csv('dssw_deposit_costs.csv')
cat(" Merge: left_join(deposit_costs %>% filter(period=='2022Q4'), by='idrssd')\n")
## Merge: left_join(deposit_costs %>% filter(period=='2022Q4'), by='idrssd')
cat(" Franchise: f = (1-β^U)(r-c) × [1-(1+δ)^{-T}]/δ × D^U/A\n")
## Franchise: f = (1-β^U)(r-c) × [1-(1+δ)^{-T}]/δ × D^U/A