library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.2 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(lubridate)
library(janitor)
##
## Attaching package: 'janitor'
##
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
library(scales)
##
## Attaching package: 'scales'
##
## The following object is masked from 'package:purrr':
##
## discard
##
## The following object is masked from 'package:readr':
##
## col_factor
library(sf)
## Linking to GEOS 3.13.0, GDAL 3.8.5, PROJ 9.5.1; sf_use_s2() is TRUE
library(tigris)
## To enable caching of data, set `options(tigris_use_cache = TRUE)`
## in your R script or .Rprofile.
library(tidycensus)
library(censusxy)
library(progressr)
library(fixest)
##
## Attaching package: 'fixest'
##
## The following object is masked from 'package:scales':
##
## pvalue
library(lubridate)
options(tigris_use_cache = TRUE)
he <- read_csv("data/hcd_housing/housing_element/housing_element_table.csv")
## Rows: 4069 Columns: 7
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (5): county, jurisdiction, type, link_text, view_link
## dbl (1): planning_period
## date (1): received_date
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
permits_juris <- read_csv("data/hcd_housing/raw/tablea2.csv") %>%
group_by(
juris = JURIS_NAME,
county = CNTY_NAME,
year = YEAR,
cat = UNIT_CAT,
tenure = TENURE
) %>%
summarise(
permits = sum(NO_BUILDING_PERMITS, na.rm = TRUE),
permits_vli = sum(BP_VLOW_INCOME_DR + BP_VLOW_INCOME_NDR, na.rm = TRUE),
permits_li = sum(BP_LOW_INCOME_DR + BP_LOW_INCOME_NDR, na.rm = TRUE),
permits_mod = sum(BP_MOD_INCOME_DR + BP_MOD_INCOME_NDR, na.rm = TRUE),
permits_above_mod = sum(BP_ABOVE_MOD_INCOME, na.rm = TRUE)
) %>%
filter(!is.na(year))
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
## dat <- vroom(...)
## problems(dat)
## Rows: 786023 Columns: 52
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (17): JURIS_NAME, CNTY_NAME, PRIOR_APN, APN, STREET_ADDRESS, PROJECT_NA...
## dbl (32): YEAR, VLOW_INCOME_DR, VLOW_INCOME_NDR, LOW_INCOME_DR, LOW_INCOME_...
## date (3): ENT_APPROVE_DT1, BP_ISSUE_DT1, CO_ISSUE_DT1
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'juris', 'county', 'year', 'cat'. You can override using the `.groups` argument.
apps_juris <- read_csv("data/hcd_housing/raw/tablea.csv") %>%
filter(as.Date(APP_SUBMIT_DT, format="%m/%d/%Y") > as.Date("2016-01-01")) %>%
mutate(month_year = format(as.Date(APP_SUBMIT_DT, format="%m/%d/%Y"), "%Y-%m")) %>%
filter(UNIT_CAT == "5+", !(ABOVE_MOD_INCOME == TOT_PROPOSED_UNITS)) %>%
group_by(
juris = JURIS_NAME,
county = CNTY_NAME,
month_year
) %>%
summarise(
projects = n(),
units = sum(TOT_PROPOSED_UNITS, na.rm = TRUE)
) %>%
filter(!is.na(month_year))
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
## dat <- vroom(...)
## problems(dat)
## Rows: 302094 Columns: 27
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (15): JURIS_NAME, CNTY_NAME, PRIOR_APN, APN, STREET_ADDRESS, PROJECT_NA...
## dbl (11): YEAR, VLOW_INCOME_DR, VLOW_INCOME_NDR, LOW_INCOME_DR, LOW_INCOME_...
## date (1): APP_SUBMIT_DT
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## `summarise()` has grouped output by 'juris', 'county'. You can override using the `.groups` argument.
apps_juris %>%
mutate(month_year = as.Date(paste0(month_year, "-01"))) %>%
arrange(month_year) %>%
filter(month_year > as.Date("2017-12-01"), month_year < as.Date("2025-01-01")) %>%
group_by(month_year) %>%
summarise(units = sum(units, na.rm = TRUE)) %>%
ggplot(aes(x = month_year, y = units)) +
geom_line(size = 2) +
scale_x_date(date_labels = "%b %Y", date_breaks = "3 months") +
theme_minimal() +
labs(
x = "Month-Year",
y = "Number of Proposed Units",
title = "Proposed Units by Month"
) +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

# ----------------------------
# 1) helpers: cleaning + region mapping + deadlines
# ----------------------------
clean_name <- function(x) {
x %>%
str_squish() %>%
str_to_upper() %>%
str_replace_all("[^A-Z0-9 ]", "") %>%
str_squish()
}
county_to_region <- function(county_clean) {
case_when(
county_clean %in% c("ALAMEDA","CONTRA COSTA","MARIN","NAPA","SAN FRANCISCO","SAN MATEO","SANTA CLARA","SOLANO","SONOMA") ~ "ABAG",
county_clean %in% c("MONTEREY","SANTA CRUZ") ~ "AMBAG",
county_clean %in% c("EL DORADO","PLACER","SACRAMENTO","SUTTER","YOLO","YUBA") ~ "SACOG",
county_clean %in% c("SAN DIEGO") ~ "SANDAG",
county_clean %in% c("IMPERIAL","LOS ANGELES","ORANGE","RIVERSIDE","SAN BERNARDINO","VENTURA") ~ "SCAG",
county_clean %in% c("SAN LUIS OBISPO") ~ "SLOCOG",
TRUE ~ county_clean
)
}
region_deadlines <- tribble(
~region, ~due_date,
"ABAG", as.Date("2023-01-31"),
"AMBAG", as.Date("2023-12-15"),
"SACOG", as.Date("2021-05-15"),
"SANDAG", as.Date("2021-04-30"),
"SCAG", as.Date("2021-10-15")
)
# month helpers
month_start_from_ym <- function(ym_str) {
ymd(paste0(ym_str, "-01"))
}
month_end_from_start <- function(d) {
ceiling_date(d, "month") - days(1)
}
month_id <- function(d) year(d) * 12L + month(d)
# ----------------------------
# 2) build BR windows (start = region due date; end = latest Adopted in planning_period 6)
# ----------------------------
he_clean <- he %>%
mutate(
juris_clean = clean_name(jurisdiction),
county_clean = clean_name(county),
region = county_to_region(county_clean),
type_clean = str_to_upper(str_squish(type)),
planning_period = as.integer(planning_period),
received_date = as.Date(received_date)
) %>%
filter(region %in% region_deadlines$region)
adoption_6 <- he_clean %>%
filter(planning_period == 6, str_detect(type_clean, "ADOPT")) %>%
group_by(juris_clean, county_clean, region) %>%
summarise(adopted_6 = max(received_date, na.rm = TRUE), .groups = "drop")
br_windows <- he_clean %>%
distinct(juris_clean, county_clean, region) %>%
left_join(region_deadlines, by = "region") %>%
left_join(adoption_6, by = c("juris_clean", "county_clean", "region")) %>%
mutate(
# if adopted_6 exists but is before due_date, BR never turns on
br_end_eff = case_when(
is.na(adopted_6) ~ as.Date(NA),
adopted_6 < due_date ~ due_date - days(1),
TRUE ~ adopted_6
),
br_start = due_date
)
# ----------------------------
# 3) attach BR on/off to monthly applications
# ----------------------------
apps_clean <- apps_juris %>%
mutate(
juris_clean = clean_name(juris),
county_clean = clean_name(county),
region = county_to_region(county_clean),
month_start = month_start_from_ym(month_year),
month_end = month_end_from_start(month_start),
month_id = month_id(month_start)
) %>%
filter(region %in% region_deadlines$region) %>%
left_join(br_windows %>% select(juris_clean, county_clean, region, br_start, br_end_eff),
by = c("juris_clean", "county_clean", "region"))
# If br_end_eff is NA (no adopted_6 yet), treat BR as on through the end of your apps panel
panel_max_end <- max(apps_clean$month_end, na.rm = TRUE)
apps_br <- apps_clean %>%
mutate(
br_end_open = coalesce(br_end_eff, panel_max_end),
br_on = (month_end >= br_start) & (month_start <= br_end_open) & (br_end_open >= br_start),
br_pre = month_end < br_start,
br_post = month_start > br_end_open
) %>%
filter(month_start < as.Date("2025-01-01"))
apps_br_summ <- apps_br %>%
filter(!is.na(br_end_eff)) %>%
mutate(
br_status = case_when(
br_pre ~ "br_pre",
br_on ~ "br_on",
br_post ~ "br_post",
TRUE ~ NA_character_
),
br_status = factor(br_status, levels = c("br_pre", "br_on", "br_post"))
) %>%
filter(!is.na(br_status)) %>%
group_by(br_status) %>%
summarise(
n_juris_month = n(),
n_juris = n_distinct(juris_clean),
avg_units = mean(units, na.rm = TRUE),
med_units = median(units, na.rm = TRUE),
avg_projects = mean(projects, na.rm = TRUE),
total_units = sum(units, na.rm = TRUE),
.groups = "drop"
)
apps_br_summ
## # A tibble: 3 × 7
## br_status n_juris_month n_juris avg_units med_units avg_projects total_units
## <fct> <int> <int> <dbl> <dbl> <dbl> <dbl>
## 1 br_pre 1215 226 216. 84 2.50 262989
## 2 br_on 368 143 230. 80 2.07 84603
## 3 br_post 667 165 262. 106 2.35 175021
apps_br %>%
count(is.na(br_end_eff))
## # A tibble: 290 × 4
## # Groups: juris, county [290]
## juris county `is.na(br_end_eff)` n
## <chr> <chr> <lgl> <int>
## 1 AGOURA HILLS Los Angeles FALSE 4
## 2 ALAMEDA Alameda FALSE 7
## 3 ALAMEDA COUNTY Alameda FALSE 7
## 4 ALBANY Alameda FALSE 5
## 5 ALHAMBRA Los Angeles FALSE 7
## 6 ALISO VIEJO Orange FALSE 4
## 7 AMERICAN CANYON Napa TRUE 5
## 8 ANAHEIM Orange FALSE 10
## 9 ANTIOCH Contra Costa FALSE 2
## 10 APPLE VALLEY San Bernardino FALSE 11
## # ℹ 280 more rows
# avg duration of BR window among treated juris
br_durations <- br_windows %>%
filter(!is.na(br_end_eff)) %>%
mutate(
duration_days = as.integer(br_end_eff - br_start) + 1,
duration_months = interval(br_start, br_end_eff) %/% months(1) + 1
)
summary(br_durations$duration_months)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.00 7.00 13.00 16.87 24.00 51.00
br_durations %>% arrange(desc(duration_months))
## # A tibble: 361 × 9
## juris_clean county_clean region due_date adopted_6 br_end_eff br_start
## <chr> <chr> <chr> <date> <date> <date> <date>
## 1 CANYON LAKE RIVERSIDE SCAG 2021-10-15 2025-12-15 2025-12-15 2021-10-15
## 2 HIDDEN HILLS LOS ANGELES SCAG 2021-10-15 2025-12-15 2025-12-15 2021-10-15
## 3 ROCKLIN PLACER SACOG 2021-05-15 2025-08-05 2025-08-05 2021-05-15
## 4 SAN DIMAS LOS ANGELES SCAG 2021-10-15 2025-11-20 2025-11-20 2021-10-15
## 5 COMMERCE LOS ANGELES SCAG 2021-10-15 2025-10-30 2025-10-30 2021-10-15
## 6 DESERT HOT S… RIVERSIDE SCAG 2021-10-15 2025-11-14 2025-11-14 2021-10-15
## 7 HESPERIA SAN BERNARD… SCAG 2021-10-15 2025-11-06 2025-11-06 2021-10-15
## 8 PLACER COUNTY PLACER SACOG 2021-05-15 2025-06-12 2025-06-12 2021-05-15
## 9 SEAL BEACH ORANGE SCAG 2021-10-15 2025-10-29 2025-10-29 2021-10-15
## 10 VILLA PARK ORANGE SCAG 2021-10-15 2025-10-30 2025-10-30 2021-10-15
## # ℹ 351 more rows
## # ℹ 2 more variables: duration_days <dbl>, duration_months <dbl>
OFF: Quarterly Balanced Panel
quarter_id <- function(d) year(d) * 4L + quarter(d)
apps_q <- apps_br %>%
mutate(
q_start = floor_date(month_start, "quarter"),
units = replace_na(units, 0),
projects = replace_na(projects, 0),
br_start_q = floor_date(br_start, "quarter")
) %>%
group_by(juris_clean, region, q_start, br_start_q, br_end_eff) %>%
summarise(
units_q = sum(units, na.rm = TRUE),
projects_q = sum(projects, na.rm = TRUE),
.groups = "drop"
)
# balanced quarter panel (fill missing quarters with 0s)
apps_q_panel <- apps_q %>%
group_by(juris_clean, region) %>%
complete(
q_start = seq(min(q_start, na.rm = TRUE),
max(q_start, na.rm = TRUE),
by = "quarter"),
fill = list(units_q = 0, projects_q = 0)
) %>%
ungroup() %>%
left_join(
apps_q %>% distinct(juris_clean, region, br_start_q, br_end_eff),
by = c("juris_clean", "region", "br_start_q", "br_end_eff")
) %>%
mutate(
year = year(q_start),
t = quarter_id(q_start),
# cohort g_i = first quarter when BR is OFF
# define "post" as the quarter AFTER the adoption quarter
br_end_q = floor_date(br_end_eff, "quarter"),
g = if_else(
is.na(br_end_eff),
NA_integer_,
quarter_id(br_end_q %m+% months(3))
),
in_br_era = q_start >= br_start_q,
y_units = log1p(units_q),
y_projects = log1p(projects_q),
region_t = interaction(region, t, drop = TRUE),
region_y = interaction(region, year, drop = TRUE)
)
apps_est_q <- apps_q_panel %>% filter(in_br_era)
Log Units + Log Projects
apps_est_q_win <- apps_est_q %>%
mutate(rel_q = if_else(!is.na(g), t - g, NA_integer_)) %>%
filter(is.na(g) | dplyr::between(rel_q, -8L, 8L))
# --- re-run SA event studies on the restricted sample ---
m_sa_units_q_win <- feols(
y_units ~ sunab(g, t, ref.p = -1) | juris_clean + region_y,
cluster = ~juris_clean,
data = apps_est_q_win
)
## NOTE: 21 observations removed because of NA values (RHS: 21).
## The variables 't::5:cohort::8087', 't::8:cohort::8089' and 't::8:cohort::8092' have been removed because of collinearity (see $collin.var).
m_sa_projects_q_win <- feols(
y_projects ~ sunab(g, t, ref.p = -1) | juris_clean + region_y,
cluster = ~juris_clean,
data = apps_est_q_win
)
## NOTE: 21 observations removed because of NA values (RHS: 21).
## The variables 't::5:cohort::8087', 't::8:cohort::8089' and 't::8:cohort::8092' have been removed because of collinearity (see $collin.var).
# --- plots, zoomed to ±8 quarters ---
iplot(
m_sa_units_q_win,
ref.line = 0,
xlim = c(-8, 8),
main = "BR window ending → log(1+units), quarterly (±8Q)",
xlab = "Quarters relative to BR end (0 = first BR-off quarter)",
ylab = "ATT relative to quarter -1"
)

iplot(
m_sa_projects_q_win,
ref.line = 0,
xlim = c(-8, 8),
main = "BR window ending → log(1+projects), quarterly (±8Q)",
xlab = "Quarters relative to BR end (0 = first BR-off quarter)",
ylab = "ATT relative to quarter -1"
)

summary(apps_est_q_win$y_units)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.6931 3.7553 4.7318 4.7663 5.8763 9.6172
# apps_est_q <- apps_est_q %>%
# mutate(post_end = !is.na(g) & t >= g)
#
# m_post_q <- feols(
# y_units ~ post_end | juris_clean + region_y,
# cluster = ~juris_clean,
# data = apps_est_q
# )
#etable(m_post_q)
# do agg estimates from m_sa_units_q_win
summary(m_sa_units_q_win, agg = "att")
## OLS estimation, Dep. Var.: y_units
## Observations: 643
## Fixed-effects: juris_clean: 212, region_y: 16
## Standard-errors: Clustered (juris_clean)
## Estimate Std. Error t value Pr(>|t|)
## ATT -0.360989 0.244807 -1.47459 0.14181
## ... 3 variables were removed because of collinearity (t::5:cohort::8087, t::8:cohort::8089 and t::8:cohort::8092)
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.787401 Adj. R2: 0.335731
## Within R2: 0.313117
summary(m_sa_projects_q_win, agg = "att")
## OLS estimation, Dep. Var.: y_projects
## Observations: 643
## Fixed-effects: juris_clean: 212, region_y: 16
## Standard-errors: Clustered (juris_clean)
## Estimate Std. Error t value Pr(>|t|)
## ATT -0.192409 0.072928 -2.63835 0.008952 **
## ... 3 variables were removed because of collinearity (t::5:cohort::8087, t::8:cohort::8089 and t::8:cohort::8092)
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.248495 Adj. R2: 0.633486
## Within R2: 0.407031
Avg Units / Project
# --- average units per project (quarter) ---
apps_est_q_win_upp <- apps_est_q_win %>%
mutate(
avg_units_per_project = if_else(projects_q > 0, units_q / projects_q, NA_real_),
y_upp = log(avg_units_per_project), # intensive margin
# optional: keep finite logs only
y_upp = if_else(is.finite(y_upp), y_upp, NA_real_)
) %>%
filter(!is.na(y_upp)) # drops quarters with 0 projects (and any weird cases)
m_sa_upp_q_win <- feols(
y_upp ~ sunab(g, t, ref.p = -1) | juris_clean + region_y,
cluster = ~juris_clean,
data = apps_est_q_win_upp
)
## NOTE: 21 observations removed because of NA values (RHS: 21).
## The variables 't::5:cohort::8087', 't::8:cohort::8089' and 't::8:cohort::8092' have been removed because of collinearity (see $collin.var).
etable(m_sa_upp_q_win)
## m_sa_upp_q_win
## Dependent Var.: y_upp
##
## t = -8 -0.5702 (0.6767)
## t = -7 -0.8268 (0.7369)
## t = -6 0.0625 (0.4536)
## t = -5 0.8726* (0.4187)
## t = -4 0.5016 (0.3293)
## t = -3 -0.1323 (0.2209)
## t = -2 0.2213 (0.2328)
## t = 0 -0.1530 (0.2192)
## t = 1 0.0058 (0.2018)
## t = 2 0.3656 (0.2502)
## t = 3 -0.1047 (0.3041)
## t = 4 -0.0552 (0.2963)
## t = 5 -0.6100. (0.3376)
## t = 6 -0.5313 (0.5047)
## t = 7 0.5960 (0.5648)
## t = 8 -0.0147 (0.5528)
## Fixed-Effects: -----------------
## juris_clean Yes
## region_y Yes
## _______________ _________________
## S.E.: Clustered by: juris_clean
## Observations 643
## R2 0.68223
## Within R2 0.32713
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
iplot(
m_sa_upp_q_win,
ref.line = 0,
xlim = c(-8, 8),
main = "BR window ending → log(avg units per project), quarterly (±8Q)",
xlab = "Quarters relative to BR end (0 = first BR-off quarter)",
ylab = "ATT relative to quarter -1"
)

% Projects bigger than pre-period 90th percentile
quarter_id <- function(d) year(d) * 4L + quarter(d)
# ----------------------------
# 1) project-level apps with robust date parsing
# ----------------------------
apps_raw <- read_csv("data/hcd_housing/raw/tablea.csv") %>%
filter(UNIT_CAT == "5+", !(ABOVE_MOD_INCOME == TOT_PROPOSED_UNITS)) %>%
clean_names()
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
## dat <- vroom(...)
## problems(dat)
## Rows: 302094 Columns: 27
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (15): JURIS_NAME, CNTY_NAME, PRIOR_APN, APN, STREET_ADDRESS, PROJECT_NA...
## dbl (11): YEAR, VLOW_INCOME_DR, VLOW_INCOME_NDR, LOW_INCOME_DR, LOW_INCOME_...
## date (1): APP_SUBMIT_DT
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
pick1 <- function(cands, nm) {
out <- intersect(cands, nm)[1]
if (is.na(out)) NA_character_ else out
}
date_col <- pick1(c("app_submit_dt","app_submit_date","submitted_dt","submitted_date"), names(apps_raw))
units_col <- pick1(c("tot_proposed_units","total_proposed_units","proposed_units","tot_units","units"), names(apps_raw))
juris_col <- pick1(c("juris_name","jurisdiction","jurisdiction_name","juris"), names(apps_raw))
county_col<- pick1(c("cnty_name","county","county_name"), names(apps_raw))
if (any(is.na(c(date_col, units_col, juris_col, county_col)))) {
stop(paste(
"Missing needed column.",
"\n date_col =", date_col,
"\n units_col =", units_col,
"\n juris_col =", juris_col,
"\n county_col=", county_col
))
}
id_col <- pick1(c("app_id","application_id","project_id","appl_id","project_number","application_number"), names(apps_raw))
date_orders <- c(
"mdy", "mdy HMS", "mdy HM",
"ymd", "ymd HMS", "ymd HM",
"m/d/Y", "m/d/y",
"Y-m-d", "Y-m-d H:M:S",
"Y/m/d", "Y/m/d H:M:S"
)
apps_proj <- apps_raw %>%
mutate(
project_id = if (is.na(id_col)) row_number() else as.character(.data[[id_col]]),
app_date = suppressWarnings(parse_date_time(.data[[date_col]], orders = date_orders, tz = "UTC")) %>% as.Date(),
proj_units = readr::parse_number(as.character(.data[[units_col]])),
juris_clean = clean_name(.data[[juris_col]]),
county_clean = clean_name(.data[[county_col]]),
region = county_to_region(county_clean),
q_start = floor_date(app_date, "quarter"),
year = year(q_start),
region_y = interaction(region, year, drop = TRUE)
) %>%
filter(!is.na(q_start), app_date > as.Date("2016-01-01")) %>%
group_by(juris_clean, county_clean, region,region_y, q_start, project_id) %>%
summarise(
proj_units = if (all(is.na(proj_units))) NA_real_ else max(proj_units, na.rm = TRUE),
.groups = "drop"
) %>%
filter(!is.na(proj_units))
# ----------------------------
# 2) attach BR timing; define event time (g = first BR-off quarter)
# ----------------------------
apps_proj <- apps_proj %>%
left_join(
br_windows %>% select(juris_clean, county_clean, region, br_start, br_end_eff),
by = c("juris_clean","county_clean","region")
) %>%
mutate(
br_start_q = floor_date(br_start, "quarter"),
t = quarter_id(q_start),
br_end_q = floor_date(br_end_eff, "quarter"),
g = if_else(is.na(br_end_eff), NA_integer_, quarter_id(br_end_q %m+% months(3))),
rel_q = if_else(!is.na(g), t - g, NA_integer_),
in_br_era = q_start >= br_start_q,
# keeps cities that actually have (or could have) a BR window
ever_br = is.na(br_end_eff) | (br_end_eff >= br_start)
) %>%
filter(ever_br)
# ----------------------------
# 3) define juris-specific pre-period p90 size, using ALL BR-ERA pre quarters
# ----------------------------
pre_cut <- apps_proj %>%
filter(in_br_era) %>%
mutate(pre_period = if_else(is.na(g), TRUE, t < g))
pre_p90_by_juris <- pre_cut %>%
group_by(juris_clean) %>%
summarise(
n_pre_projects = sum(pre_period),
p90_pre_size = if (sum(pre_period) == 0) NA_real_
else as.numeric(quantile(proj_units[pre_period], 0.75, na.rm = TRUE, type = 7)),
.groups = "drop"
) %>%
# optional: require some minimum pre support for a stable p90
filter(n_pre_projects >= 10, !is.na(p90_pre_size))
apps_proj2 <- apps_proj %>%
left_join(pre_p90_by_juris, by = "juris_clean") %>%
filter(!is.na(p90_pre_size)) %>%
mutate(
large = proj_units > p90_pre_size
)
# ----------------------------
# 4) collapse to juris × quarter: share large
# ----------------------------
apps_share_q <- apps_proj2 %>%
group_by(juris_clean, region, q_start) %>%
summarise(
projects_q = n(),
share_large_q = mean(large, na.rm = TRUE),
.groups = "drop"
) %>%
left_join(
apps_proj2 %>% distinct(juris_clean, region, region_y, br_start_q, g),
by = c("juris_clean","region")
) %>%
mutate(
t = quarter_id(q_start),
rel_q = if_else(!is.na(g), t - g, NA_integer_),
region_t = interaction(region, t, drop = TRUE),
in_br_era = q_start >= br_start_q
) %>%
filter(in_br_era) %>%
filter(projects_q > 0)
## Warning in left_join(., apps_proj2 %>% distinct(juris_clean, region, region_y, : Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 1 of `x` matches multiple rows in `y`.
## ℹ Row 1 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
## "many-to-many"` to silence this warning.
# ----------------------------
# 5) staggered DiD on share_large (±8 quarters)
# ----------------------------
apps_share_q_win <- apps_share_q %>%
filter(is.na(g) | dplyr::between(rel_q, -8L, 8L))
m_share_large <- feols(
share_large_q ~ sunab(g, t, ref.p = -1) | juris_clean + region_y,
cluster = ~juris_clean,
data = apps_share_q_win
)
## NOTE: 32 observations removed because of NA values (RHS: 32).
# average ATT (single number)
summary(m_share_large, agg = "att")
## OLS estimation, Dep. Var.: share_large_q
## Observations: 761
## Fixed-effects: juris_clean: 16, region_y: 32
## Standard-errors: Clustered (juris_clean)
## Estimate Std. Error t value Pr(>|t|)
## ATT 0.078278 0.041602 1.88162 0.079439 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.134423 Adj. R2: 0.776109
## Within R2: 0.758532
# event study plot
iplot(
m_share_large,
ref.line = 0,
xlim = c(-8, 8),
main = "% projects > pre-period 90th percentile size, quarterly (±8Q)",
xlab = "Quarters relative to BR end (0 = first BR-off quarter)",
ylab = "ATT relative to quarter -1"
)

summary(apps_share_q$share_large_q)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 0.0000 0.2500 0.3074 0.5000 1.0000
ON did
quarter_id <- function(d) year(d) * 4L + quarter(d)
# quarter collapse
apps_q_on <- apps_br %>%
mutate(
q_start = floor_date(month_start, "quarter"),
units = replace_na(units, 0),
projects = replace_na(projects, 0)
) %>%
group_by(juris_clean, region, q_start) %>%
summarise(
units_q = sum(units, na.rm = TRUE),
projects_q = sum(projects, na.rm = TRUE),
.groups = "drop"
)
# balanced quarter panel over the observed quarter range
q_min <- min(apps_q_on$q_start, na.rm = TRUE)
q_max <- max(apps_q_on$q_start, na.rm = TRUE)
apps_q_panel_on <- apps_q_on %>%
group_by(juris_clean, region) %>%
complete(
q_start = seq(q_min, q_max, by = "quarter"),
fill = list(units_q = 0, projects_q = 0)
) %>%
ungroup() %>%
left_join(
br_windows %>%
select(juris_clean, region, br_start, br_end_eff),
by = c("juris_clean", "region")
) %>%
mutate(
t = quarter_id(q_start),
br_start_q = floor_date(br_start, "quarter"),
# did BR ever actually turn on?
# (your construction makes br_end_eff < br_start when adopted before due_date)
br_ever_on = is.na(br_end_eff) | (br_end_eff >= br_start),
# cohort g_on = first FULL quarter when BR is on (quarter after due-date quarter)
g_on = if_else(br_ever_on, quarter_id(br_start_q %m+% months(3)), NA_integer_),
rel_q_on = if_else(!is.na(g_on), t - g_on, NA_integer_),
y_units = log1p(units_q),
y_projects = log1p(projects_q),
avg_units_per_project = if_else(projects_q > 0, units_q / projects_q, NA_real_),
y_upp = if_else(is.finite(log(avg_units_per_project)), log(avg_units_per_project), NA_real_),
year = year(q_start),
region_t = interaction(region, t, drop = TRUE),
region_y = interaction(region, year, drop = TRUE)
)
apps_on_treated <- apps_q_panel_on %>%
filter(!is.na(g_on), dplyr::between(rel_q_on, -8L, 8L))
t_keep <- apps_on_treated %>% distinct(t) %>% pull(t)
apps_est_q_on_win <- apps_q_panel_on %>%
filter(t %in% t_keep) %>%
filter(is.na(g_on) | dplyr::between(rel_q_on, -8L, 8L))
m_sa_units_q_on <- feols(
y_units ~ sunab(g_on, t, ref.p = -1) | juris_clean + region_y,
cluster = ~juris_clean,
data = apps_est_q_on_win
)
## NOTE: 220 observations removed because of NA values (RHS: 220).
## The variables 't::-7:cohort::8087', 't::-6:cohort::8094', 't::-5:cohort::8089', 't::-5:cohort::8097', 't::-3:cohort::8087', 't::-2:cohort::8094' and 7 others have been removed because of collinearity (see $collin.var).
m_sa_projects_q_on <- feols(
y_projects ~ sunab(g_on, t, ref.p = -1) | juris_clean + region_y ,
cluster = ~juris_clean,
data = apps_est_q_on_win
)
## NOTE: 220 observations removed because of NA values (RHS: 220).
## The variables 't::-7:cohort::8087', 't::-6:cohort::8094', 't::-5:cohort::8089', 't::-5:cohort::8097', 't::-3:cohort::8087', 't::-2:cohort::8094' and 7 others have been removed because of collinearity (see $collin.var).
# average ATT (single number)
summary(m_sa_units_q_on, agg = "att")
## OLS estimation, Dep. Var.: y_units
## Observations: 4,521
## Fixed-effects: juris_clean: 280, region_y: 22
## Standard-errors: Clustered (juris_clean)
## Estimate Std. Error t value Pr(>|t|)
## ATT -0.147911 0.086855 -1.70298 0.089686 .
## ... 13 variables were removed because of collinearity (t::-7:cohort::8087, t::-6:cohort::8094 and 11 others [full set in $collin.var])
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 1.62962 Adj. R2: 0.315511
## Within R2: 0.008916
summary(m_sa_projects_q_on, agg = "att")
## OLS estimation, Dep. Var.: y_projects
## Observations: 4,521
## Fixed-effects: juris_clean: 280, region_y: 22
## Standard-errors: Clustered (juris_clean)
## Estimate Std. Error t value Pr(>|t|)
## ATT -0.037627 0.018677 -2.01466 0.044899 *
## ... 13 variables were removed because of collinearity (t::-7:cohort::8087, t::-6:cohort::8094 and 11 others [full set in $collin.var])
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.353042 Adj. R2: 0.516858
## Within R2: 0.009583
apps_upp_on <- apps_est_q_on_win %>% filter(!is.na(y_upp))
m_sa_upp_q_on <- feols(
y_upp ~ sunab(g_on, t, ref.p = -1) | juris_clean ,
cluster = ~juris_clean,
data = apps_upp_on
)
## NOTE: 76 observations removed because of NA values (RHS: 76).
summary(m_sa_upp_q_on, agg = "att")
## OLS estimation, Dep. Var.: y_upp
## Observations: 961
## Fixed-effects: juris_clean: 251
## Standard-errors: Clustered (juris_clean)
## Estimate Std. Error t value Pr(>|t|)
## ATT 0.216092 0.157609 1.37106 0.17159
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.852797 Adj. R2: 0.234749
## Within R2: 0.062168
quarter_id <- function(d) year(d) * 4L + quarter(d)
# project-level tablea parsing (use your existing robust block)
# assumes you already created: apps_proj (juris_clean, county_clean, region, q_start, proj_units)
apps_proj_on <- apps_proj %>%
left_join(
br_windows %>% select(juris_clean, county_clean, region, br_start, br_end_eff),
by = c("juris_clean","county_clean","region", "br_start", "br_end_eff")
) %>%
mutate(
br_start_q = floor_date(br_start, "quarter"),
t = quarter_id(q_start),
br_ever_on = is.na(br_end_eff) | (br_end_eff >= br_start),
g_on = if_else(br_ever_on, quarter_id(br_start_q %m+% months(3)), NA_integer_),
pre_period = !is.na(g_on) & (t < g_on)
)
# juris-specific pre p90 (require some support)
pre_p90_by_juris_on <- apps_proj_on %>%
group_by(juris_clean) %>%
summarise(
n_pre_projects = sum(pre_period, na.rm = TRUE),
p90_pre_size = if (sum(pre_period, na.rm = TRUE) == 0) NA_real_
else as.numeric(quantile(proj_units[pre_period], 0.90, na.rm = TRUE, type = 7)),
.groups = "drop"
) %>%
filter(n_pre_projects >= 5, !is.na(p90_pre_size))
apps_proj_on2 <- apps_proj_on %>%
left_join(pre_p90_by_juris_on, by = "juris_clean") %>%
filter(!is.na(p90_pre_size)) %>%
mutate(large = proj_units > p90_pre_size)
# collapse to share large per quarter
apps_share_q_on <- apps_proj_on2 %>%
group_by(juris_clean, region, q_start) %>%
summarise(
projects_q = n(),
share_large_q = mean(large, na.rm = TRUE),
.groups = "drop"
) %>%
left_join(
apps_proj_on2 %>% distinct(juris_clean, region, g_on, br_start_q),
by = c("juris_clean","region")
) %>%
mutate(
t = quarter_id(q_start),
rel_q_on = if_else(!is.na(g_on), t - g_on, NA_integer_),
region_t = interaction(region, t, drop = TRUE),
year = year(q_start),
region_y = interaction(region, year, drop = TRUE)
) %>%
filter(projects_q > 0)
# ±8 window and align controls to treated calendar quarters
treated_share <- apps_share_q_on %>%
filter(!is.na(g_on), dplyr::between(rel_q_on, -8L, 8L))
t_keep_share <- treated_share %>% distinct(t) %>% pull(t)
apps_share_q_on_win <- apps_share_q_on %>%
filter(t %in% t_keep_share) %>%
filter(is.na(g_on) | dplyr::between(rel_q_on, -8L, 8L))
m_share_large_on <- feols(
share_large_q ~ sunab(g_on, t, ref.p = -1) | juris_clean + region_y,
cluster = ~juris_clean,
data = apps_share_q_on_win
)
## The variables 't::-7:cohort::8087', 't::-6:cohort::8094', 't::-5:cohort::8089', 't::-5:cohort::8097', 't::-3:cohort::8087', 't::-2:cohort::8094' and 7 others have been removed because of collinearity (see $collin.var).
summary(m_share_large_on, agg = "att")
## OLS estimation, Dep. Var.: share_large_q
## Observations: 547
## Fixed-effects: juris_clean: 85, region_y: 22
## Standard-errors: Clustered (juris_clean)
## Estimate Std. Error t value Pr(>|t|)
## ATT -0.021215 0.043258 -0.490444 0.6251
## ... 13 variables were removed because of collinearity (t::-7:cohort::8087, t::-6:cohort::8094 and 11 others [full set in $collin.var])
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 0.269199 Adj. R2: 0.159011
## Within R2: 0.10186
OFF but with lagged building permits
# ============================================================
# BR OFF (lag +4 quarters) → QUARTERLY PERMITS
# Outcome: permits issued (Table A2) in 5+ projects where
# the project has ≥1 unit that is ≤ moderate income.
# Filter: BP_ISSUE_DT1 is not NA (so we can do quarters)
# ============================================================
a2_q <- read_csv("data/hcd_housing/raw/tablea2.csv", show_col_types = FALSE) %>%
clean_names() %>%
mutate(
juris_clean = clean_name(juris_name),
county_clean = clean_name(cnty_name),
region = county_to_region(county_clean)
) %>%
filter(region %in% region_deadlines$region) %>%
filter(unit_cat == "5+") %>%
filter(!is.na(bp_issue_dt1)) %>%
mutate(
q_start = floor_date(bp_issue_dt1, "quarter"),
t = quarter_id(q_start),
year = year(q_start),
# project has at least one ≤moderate unit (using BP income counts)
bp_le_mod_units =
coalesce(bp_vlow_income_dr, 0) + coalesce(bp_vlow_income_ndr, 0) +
coalesce(bp_low_income_dr, 0) + coalesce(bp_low_income_ndr, 0) +
coalesce(bp_mod_income_dr, 0) + coalesce(bp_mod_income_ndr, 0),
has_le_mod = bp_le_mod_units > 0
) %>%
filter(has_le_mod)
## Warning: One or more parsing issues, call `problems()` on your data frame for details,
## e.g.:
## dat <- vroom(...)
## problems(dat)
# collapse to juris × quarter (units issued in qualifying projects)
permits_lemod_5p_q <- a2_q %>%
group_by(juris_clean, county_clean, region, q_start) %>%
summarise(
permits_units_in_qualproj = sum(no_building_permits, na.rm = TRUE),
le_mod_units_in_qualproj = sum(bp_le_mod_units, na.rm = TRUE),
.groups = "drop"
) %>%
# balanced quarterly panel (fill missing quarters with 0)
group_by(juris_clean, county_clean, region) %>%
complete(
q_start = seq(min(q_start, na.rm = TRUE), max(q_start, na.rm = TRUE), by = "quarter"),
fill = list(permits_units_in_qualproj = 0, le_mod_units_in_qualproj = 0)
) %>%
ungroup() %>%
mutate(
t = quarter_id(q_start),
year = year(q_start),
region_y = interaction(region, year, drop = TRUE)
) %>%
left_join(
br_windows %>% select(juris_clean, county_clean, region, br_start, br_end_eff),
by = c("juris_clean", "county_clean", "region")
) %>%
mutate(
br_start_q = floor_date(br_start, "quarter"),
br_end_q = floor_date(br_end_eff, "quarter"),
# g_base = first BR-off quarter (quarter after adoption quarter)
g_base = if_else(is.na(br_end_eff), NA_integer_, quarter_id(br_end_q %m+% months(3))),
# lag the “event” by 4 quarters (≈ 1 year)
g_perm = if_else(is.na(g_base), NA_integer_, g_base + 4L),
in_br_era = q_start >= br_start_q,
rel_q = if_else(!is.na(g_perm), t - g_perm, NA_integer_),
y_perm_units = log1p(permits_units_in_qualproj),
y_lemod_units = log1p(le_mod_units_in_qualproj)
) %>%
filter(in_br_era) %>%
filter(is.na(g_perm) | dplyr::between(rel_q, -8L, 8L))
# --- event study: total permitted units in qualifying projects ---
m_br_off_perm_units_lag4 <- feols(
y_perm_units ~ sunab(g_perm, t, ref.p = -1) | juris_clean + region_y,
cluster = ~juris_clean,
data = permits_lemod_5p_q
)
## NOTE: 23 observations removed because of NA values (RHS: 23).
## The variables 't::-3:cohort::8091', 't::2:cohort::8097' and 't::8:cohort::8092' have been removed because of collinearity (see $collin.var).
# --- event study: ≤moderate units (within qualifying projects) ---
m_br_off_lemod_units_lag4 <- feols(
y_lemod_units ~ sunab(g_perm, t, ref.p = -1) | juris_clean + region_y,
cluster = ~juris_clean,
data = permits_lemod_5p_q
)
## NOTE: 23 observations removed because of NA values (RHS: 23).
## The variables 't::-3:cohort::8091', 't::2:cohort::8097' and 't::8:cohort::8092' have been removed because of collinearity (see $collin.var).
summary(m_br_off_lemod_units_lag4, agg = "att")
## OLS estimation, Dep. Var.: y_lemod_units
## Observations: 874
## Fixed-effects: juris_clean: 139, region_y: 16
## Standard-errors: Clustered (juris_clean)
## Estimate Std. Error t value Pr(>|t|)
## ATT -0.64341 0.375362 -1.7141 0.088755 .
## ... 3 variables were removed because of collinearity (t::-3:cohort::8091, t::2:cohort::8097 and t::8:cohort::8092)
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## RMSE: 1.48706 Adj. R2: 0.33773
## Within R2: 0.200113
iplot(
m_br_off_lemod_units_lag4,
ref.line = 0,
xlim = c(-8, 8),
main = "BR ending → log(1+ ≤moderate units) in qualifying 5+ projects (lag +4Q)",
xlab = "Quarters relative to (BR-off +4Q), 0 = first lagged post quarter",
ylab = "ATT relative to quarter -1"
)

permits_lemod_5p_q %>%
group_by(juris_clean) %>%
summarise(
min_q = min(q_start),
max_q = max(q_start),
n_q = n(),
n_q_expected = as.integer((max_q - min_q) / ddays(91)) + 1, # rough; see exact below
n_distinct_q = n_distinct(q_start),
any_dupes = n_q != n_distinct_q
) %>%
summarise(
share_with_dupes = mean(any_dupes),
min_coverage = min(n_distinct_q),
max_coverage = max(n_distinct_q)
)
## # A tibble: 1 × 3
## share_with_dupes min_coverage max_coverage
## <dbl> <int> <int>
## 1 0 1 15
permits_lemod_5p_q %>%
group_by(juris_clean) %>%
summarise(
min_q = min(q_start),
max_q = max(q_start),
n_q = n_distinct(q_start),
n_expected = length(seq(min_q, max_q, by = "quarter")),
balanced_within = (n_q == n_expected)
) %>%
count(balanced_within)
## # A tibble: 1 × 2
## balanced_within n
## <lgl> <int>
## 1 TRUE 144
q_global <- seq(min(permits_lemod_5p_q$q_start),
max(permits_lemod_5p_q$q_start),
by = "quarter")
permits_lemod_5p_q %>%
group_by(juris_clean) %>%
summarise(n_q = n_distinct(q_start)) %>%
summarise(
min_nq = min(n_q),
max_nq = max(n_q),
all_same = (min_nq == max_nq),
global_expected = length(q_global)
)
## # A tibble: 1 × 4
## min_nq max_nq all_same global_expected
## <int> <int> <lgl> <int>
## 1 1 15 FALSE 15
q_global <- seq(min(permits_lemod_5p_q$q_start),
max(permits_lemod_5p_q$q_start),
by = "quarter")
permits_lemod_5p_q_rect <- permits_lemod_5p_q %>%
group_by(juris_clean, county_clean, region) %>%
complete(q_start = q_global,
fill = list(permits_units_in_qualproj = 0,
le_mod_units_in_qualproj = 0)) %>%
ungroup()