library(tidyverse) 
library(lfe)
library(stargazer)
library(texreg)
agency_lvl <- 
  read_csv("clean_data/pruned_agency_lvl_transcal_panel_data.csv") %>% 
  mutate(
    total_crime = violent_crime + property_crime,
    log_tot_crime = log10(total_crime),
    lpop = log10(total_population),
    linc = log10(median_hh_income),
    lemp = log10(num_employees), 
    city_lpop_2012 = log10(total_population_2012_5_yr),
    city_linc_2012 = log10(median_hh_income_2012_5_yr),  
    total_pay = base_plus_overtime,
    base_pay = Base_Pay,
    overtime_pay = Overtime_Pay
  ) %>% 
  filter(num_pruned_employees > 4)

#View(agency_lvl %>% filter(Year > 2014) %>% select(Agency, total_population, median_hh_income, everything()))

filtered_data <-
  agency_lvl %>%
  filter(dept == "Police" | is.na(dept), position == "Officer-Firefighters" | is.na(position)) %>% 
  select(Year, county, city = Agency, city_hpi, county_hpi, county_loo_hpi, city_lpop_2012, city_linc_2012, avg_wage_2020, everything()) 

city_2016 <- filtered_data %>%
    filter(Year == 2016) %>%
    select(city, city_hpi_2016 = city_hpi, county_loo_hpi_2016 = county_loo_hpi)

# Then, join these back to the original data and calculate the normalized indices
normalized_data <- 
    filtered_data %>%
    left_join(city_2016, by = "city") %>%
    mutate(
        city_hpi_norm = city_hpi / city_hpi_2016,
        county_loo_hpi_norm = county_loo_hpi / county_loo_hpi_2016
    ) %>%
    select(-city_hpi_2016, -county_loo_hpi_2016) %>% 
    filter(Year > 2009, city != "[Not In A Place]", Year < 2022, is.finite(lpop), is.finite(linc)) %>% 
    select(Year, city, city_hpi_norm, county_loo_hpi_norm, lpop, linc, everything())
normalized_data %>% select(city, county, Year, city_hpi_norm, county_loo_hpi, city_lpop_2012, city_linc_2012, avg_wage_2020) %>% View()

1. Re-do old regressions with HPI normalized to 1

reg1 <- felm(city_hpi_norm ~ county_loo_hpi_norm*city_lpop_2012 | county + Year | 0 | city, data = normalized_data)
reg2 <- felm(city_hpi_norm ~ county_loo_hpi_norm*city_linc_2012 | county + Year | 0 | city, data = normalized_data)
reg3 <- felm(city_hpi_norm ~ county_loo_hpi_norm*city_lpop_2012 | Year | 0 | city, data = normalized_data)
reg4 <- felm(city_hpi_norm ~ county_loo_hpi_norm*city_linc_2012 | Year | 0 | city, data = normalized_data)
reg5 <- felm(city_hpi_norm ~ county_loo_hpi_norm*city_lpop_2012 | city + Year | 0 | city, data = normalized_data)
reg6 <- felm(city_hpi_norm ~ county_loo_hpi_norm*city_linc_2012 | city + Year | 0 | city, data = normalized_data)


# reg1_m <- extract(reg1, include.fstatistic = T)
# reg2_m <- extract(reg2, include.fstatistic = T)
# reg3_m <- extract(reg3, include.fstatistic = T)
# reg4_m <- extract(reg4, include.fstatistic = T)

screenreg(list(reg1, reg2, reg3, reg4, reg5, reg6))
## 
## =======================================================================================================
##                                     Model 1    Model 2     Model 3    Model 4     Model 5    Model 6   
## -------------------------------------------------------------------------------------------------------
## county_loo_hpi_norm                  -2.32 **   11.93 ***   -2.38 **   12.12 ***   -2.74 **   13.53 ***
##                                      (0.81)     (1.68)      (0.87)     (1.92)      (1.03)     (2.19)   
## city_lpop_2012                       -0.62 **               -0.64 **                                   
##                                      (0.20)                 (0.21)                                     
## county_loo_hpi_norm:city_lpop_2012    0.59 **                0.60 **                0.69 **            
##                                      (0.18)                 (0.20)                 (0.24)              
## city_linc_2012                                   2.15 ***               2.35 ***                       
##                                                 (0.36)                 (0.42)                          
## county_loo_hpi_norm:city_linc_2012              -2.38 ***              -2.42 ***              -2.70 ***
##                                                 (0.35)                 (0.39)                 (0.45)   
## -------------------------------------------------------------------------------------------------------
## Num. obs.                           874        874         874        874         874        874       
## R^2 (full model)                      0.62       0.69        0.57       0.63        0.75       0.76    
## R^2 (proj model)                      0.28       0.41        0.27       0.37        0.37       0.39    
## Adj. R^2 (full model)                 0.60       0.67        0.56       0.62        0.69       0.70    
## Adj. R^2 (proj model)                 0.25       0.38        0.26       0.37        0.23       0.26    
## Num. groups: county                  33         33                                                     
## Num. groups: Year                     9          9           9          9           9          9       
## Num. groups: city                                                                 152        152       
## =======================================================================================================
## *** p < 0.001; ** p < 0.01; * p < 0.05
reg1 <- felm(city_hpi_norm ~ county_loo_hpi_norm | Year | 0 | city, data = normalized_data)

reg1_m <- extract(reg1, include.fstatistic = T)

screenreg(list(reg1_m))
## 
## ==================================
##                           Model 1 
## ----------------------------------
## county_loo_hpi_norm         0.34 *
##                            (0.16) 
## ----------------------------------
## Num. obs.                 874     
## R^2 (full model)            0.52  
## R^2 (proj model)            0.18  
## Adj. R^2 (full model)       0.51  
## Adj. R^2 (proj model)       0.17  
## F statistic (full model)  102.07  
## F (full model): p-value     0.00  
## F statistic (proj model)    4.37  
## F (proj model): p-value     0.04  
## Num. groups: Year           9     
## ==================================
## *** p < 0.001; ** p < 0.01; * p < 0.05

2. Manual 2 Stage Regression

pre_period_data <- normalized_data %>% filter(Year >= 2010 & Year <= 2016)
post_period_data <- normalized_data %>% filter(Year >= 2017 & Year <= 2021)
model <- lm(city_hpi_norm ~ lpop + linc + county_loo_hpi_norm + Year, data = pre_period_data)

post_period_data$predicted_city_hpi <- predict(model, newdata = post_period_data)

post_period_data$error <- post_period_data$city_hpi_norm - post_period_data$predicted_city_hpi

ggplot(post_period_data, aes(x = city_hpi_norm, y = error)) +
  geom_point() +
  geom_hline(yintercept = 0, linetype = "dashed", color = "red") +
  labs(title = "Prediction Errors vs. Post-Period Normalized City HPI",
       x = "Post-Period Normalized City HPI (2016 = 1)",
       y = "Prediction Error")

3. Replacing County FE with County Wages

normalized_data
normalized_data <- 
  normalized_data %>% 
  mutate(avg_wage_2020_1000 = avg_wage_2020 / 1000)

reg1 <- felm(city_hpi_norm ~ county_loo_hpi_norm*city_lpop_2012 + avg_wage_2020_1000 | Year | 0 | city, data = normalized_data)
reg2 <- felm(city_hpi_norm ~ county_loo_hpi_norm*city_linc_2012 + avg_wage_2020_1000 | Year | 0 | city, data = normalized_data)

# reg1_m <- extract(reg1, include.fstatistic = T)
# reg2_m <- extract(reg2, include.fstatistic = T)

screenreg(list(reg1, reg2))
## 
## =========================================================
##                                     Model 1    Model 2   
## ---------------------------------------------------------
## county_loo_hpi_norm                  -2.41 **   12.03 ***
##                                      (0.93)     (1.84)   
## city_lpop_2012                       -0.65 **            
##                                      (0.23)              
## avg_wage_2020_1000                    0.01       0.05    
##                                      (0.03)     (0.03)   
## county_loo_hpi_norm:city_lpop_2012    0.61 **            
##                                      (0.21)              
## city_linc_2012                                   2.22 ***
##                                                 (0.37)   
## county_loo_hpi_norm:city_linc_2012              -2.40 ***
##                                                 (0.38)   
## ---------------------------------------------------------
## Num. obs.                           874        874       
## R^2 (full model)                      0.57       0.65    
## R^2 (proj model)                      0.28       0.41    
## Adj. R^2 (full model)                 0.56       0.64    
## Adj. R^2 (proj model)                 0.27       0.40    
## Num. groups: Year                     9          9       
## =========================================================
## *** p < 0.001; ** p < 0.01; * p < 0.05