library(tidyverse)
library(lfe)
library(stargazer)
library(texreg)
agency_lvl <-
read_csv("clean_data/pruned_agency_lvl_transcal_panel_data.csv") %>%
mutate(
total_crime = violent_crime + property_crime,
log_tot_crime = log10(total_crime),
lpop = log10(total_population),
linc = log10(median_hh_income),
lemp = log10(num_employees),
city_lpop_2012 = log10(total_population_2012_5_yr),
city_linc_2012 = log10(median_hh_income_2012_5_yr),
total_pay = base_plus_overtime,
base_pay = Base_Pay,
overtime_pay = Overtime_Pay
) %>%
filter(num_pruned_employees > 4)
#View(agency_lvl %>% filter(Year > 2014) %>% select(Agency, total_population, median_hh_income, everything()))
filtered_data <-
agency_lvl %>%
filter(dept == "Police" | is.na(dept), position == "Officer-Firefighters" | is.na(position)) %>%
select(Year, county, city = Agency, city_hpi, county_hpi, county_loo_hpi, city_lpop_2012, city_linc_2012, avg_wage_2020, everything())
city_2016 <- filtered_data %>%
filter(Year == 2016) %>%
select(city, city_hpi_2016 = city_hpi, county_loo_hpi_2016 = county_loo_hpi)
# Then, join these back to the original data and calculate the normalized indices
normalized_data <-
filtered_data %>%
left_join(city_2016, by = "city") %>%
mutate(
city_hpi_norm = city_hpi / city_hpi_2016,
county_loo_hpi_norm = county_loo_hpi / county_loo_hpi_2016
) %>%
select(-city_hpi_2016, -county_loo_hpi_2016) %>%
filter(Year > 2009, city != "[Not In A Place]", Year < 2022, is.finite(lpop), is.finite(linc)) %>%
select(Year, city, city_hpi_norm, county_loo_hpi_norm, lpop, linc, everything())
normalized_data %>% select(city, county, Year, city_hpi_norm, county_loo_hpi, city_lpop_2012, city_linc_2012, avg_wage_2020) %>% View()
1. Re-do old regressions with HPI normalized to 1
reg1 <- felm(city_hpi_norm ~ county_loo_hpi_norm*city_lpop_2012 | county + Year | 0 | city, data = normalized_data)
reg2 <- felm(city_hpi_norm ~ county_loo_hpi_norm*city_linc_2012 | county + Year | 0 | city, data = normalized_data)
reg3 <- felm(city_hpi_norm ~ county_loo_hpi_norm*city_lpop_2012 | Year | 0 | city, data = normalized_data)
reg4 <- felm(city_hpi_norm ~ county_loo_hpi_norm*city_linc_2012 | Year | 0 | city, data = normalized_data)
reg5 <- felm(city_hpi_norm ~ county_loo_hpi_norm*city_lpop_2012 | city + Year | 0 | city, data = normalized_data)
reg6 <- felm(city_hpi_norm ~ county_loo_hpi_norm*city_linc_2012 | city + Year | 0 | city, data = normalized_data)
# reg1_m <- extract(reg1, include.fstatistic = T)
# reg2_m <- extract(reg2, include.fstatistic = T)
# reg3_m <- extract(reg3, include.fstatistic = T)
# reg4_m <- extract(reg4, include.fstatistic = T)
screenreg(list(reg1, reg2, reg3, reg4, reg5, reg6))
##
## =======================================================================================================
## Model 1 Model 2 Model 3 Model 4 Model 5 Model 6
## -------------------------------------------------------------------------------------------------------
## county_loo_hpi_norm -2.32 ** 11.93 *** -2.38 ** 12.12 *** -2.74 ** 13.53 ***
## (0.81) (1.68) (0.87) (1.92) (1.03) (2.19)
## city_lpop_2012 -0.62 ** -0.64 **
## (0.20) (0.21)
## county_loo_hpi_norm:city_lpop_2012 0.59 ** 0.60 ** 0.69 **
## (0.18) (0.20) (0.24)
## city_linc_2012 2.15 *** 2.35 ***
## (0.36) (0.42)
## county_loo_hpi_norm:city_linc_2012 -2.38 *** -2.42 *** -2.70 ***
## (0.35) (0.39) (0.45)
## -------------------------------------------------------------------------------------------------------
## Num. obs. 874 874 874 874 874 874
## R^2 (full model) 0.62 0.69 0.57 0.63 0.75 0.76
## R^2 (proj model) 0.28 0.41 0.27 0.37 0.37 0.39
## Adj. R^2 (full model) 0.60 0.67 0.56 0.62 0.69 0.70
## Adj. R^2 (proj model) 0.25 0.38 0.26 0.37 0.23 0.26
## Num. groups: county 33 33
## Num. groups: Year 9 9 9 9 9 9
## Num. groups: city 152 152
## =======================================================================================================
## *** p < 0.001; ** p < 0.01; * p < 0.05
reg1 <- felm(city_hpi_norm ~ county_loo_hpi_norm | Year | 0 | city, data = normalized_data)
reg1_m <- extract(reg1, include.fstatistic = T)
screenreg(list(reg1_m))
##
## ==================================
## Model 1
## ----------------------------------
## county_loo_hpi_norm 0.34 *
## (0.16)
## ----------------------------------
## Num. obs. 874
## R^2 (full model) 0.52
## R^2 (proj model) 0.18
## Adj. R^2 (full model) 0.51
## Adj. R^2 (proj model) 0.17
## F statistic (full model) 102.07
## F (full model): p-value 0.00
## F statistic (proj model) 4.37
## F (proj model): p-value 0.04
## Num. groups: Year 9
## ==================================
## *** p < 0.001; ** p < 0.01; * p < 0.05
2. Manual 2 Stage Regression
pre_period_data <- normalized_data %>% filter(Year >= 2010 & Year <= 2016)
post_period_data <- normalized_data %>% filter(Year >= 2017 & Year <= 2021)
model <- lm(city_hpi_norm ~ lpop + linc + county_loo_hpi_norm + Year, data = pre_period_data)
post_period_data$predicted_city_hpi <- predict(model, newdata = post_period_data)
post_period_data$error <- post_period_data$city_hpi_norm - post_period_data$predicted_city_hpi
ggplot(post_period_data, aes(x = city_hpi_norm, y = error)) +
geom_point() +
geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
labs(title = "Prediction Errors vs. Post-Period Normalized City HPI",
x = "Post-Period Normalized City HPI (2016 = 1)",
y = "Prediction Error")

3. Replacing County FE with County Wages
normalized_data
normalized_data <-
normalized_data %>%
mutate(avg_wage_2020_1000 = avg_wage_2020 / 1000)
reg1 <- felm(city_hpi_norm ~ county_loo_hpi_norm*city_lpop_2012 + avg_wage_2020_1000 | Year | 0 | city, data = normalized_data)
reg2 <- felm(city_hpi_norm ~ county_loo_hpi_norm*city_linc_2012 + avg_wage_2020_1000 | Year | 0 | city, data = normalized_data)
# reg1_m <- extract(reg1, include.fstatistic = T)
# reg2_m <- extract(reg2, include.fstatistic = T)
screenreg(list(reg1, reg2))
##
## =========================================================
## Model 1 Model 2
## ---------------------------------------------------------
## county_loo_hpi_norm -2.41 ** 12.03 ***
## (0.93) (1.84)
## city_lpop_2012 -0.65 **
## (0.23)
## avg_wage_2020_1000 0.01 0.05
## (0.03) (0.03)
## county_loo_hpi_norm:city_lpop_2012 0.61 **
## (0.21)
## city_linc_2012 2.22 ***
## (0.37)
## county_loo_hpi_norm:city_linc_2012 -2.40 ***
## (0.38)
## ---------------------------------------------------------
## Num. obs. 874 874
## R^2 (full model) 0.57 0.65
## R^2 (proj model) 0.28 0.41
## Adj. R^2 (full model) 0.56 0.64
## Adj. R^2 (proj model) 0.27 0.40
## Num. groups: Year 9 9
## =========================================================
## *** p < 0.001; ** p < 0.01; * p < 0.05