library(tidyverse)
library(sandwich)
library(lmtest)
library(plm)
I first define a quit as an employee (identified by name only) appearing in the data one year in the police department and then not appearing in the data the next year at the same department. I then calculate the average wage of all employees in the same agency in the same year, excluding the employee in question. I then run a linear probability model and a probit model with and without MSA fixed effects. I cluster the standard errors at the agency level. I then repeat the analysis, but this time I define a quit as an employee appearing in the data one year in the police department and then just dropping out of the dataset completely.
I restrict to individuals at police departments. I allow for position titles indicating both officers and managers because otherwise we would count it as a quit if an officer got promoted from officer to manager in the second set of analyses
# Create quits
df <-
read_csv("clean_data/pruned_transcal_panel_data.csv") %>%
filter(
dept == "Police",
position == "Officer-Firefighters",
!(str_detect(Employee_Name, ".*[0-9].*")),
!(str_detect(Employee_Name, "Redacted")),
!(Employee_Name %in% c("A N"))
) %>%
mutate(crime = violent_crime + property_crime)
df_grouped <- df %>%
group_by(Employee_Name) %>%
arrange(Employee_Name, Year) %>%
mutate(
quit = ifelse(dplyr::lead(Agency) != Agency, 1, 0)
) %>%
ungroup() %>%
select(Employee_Name, Year, Agency, quit, base_plus_overtime, everything())
df_grouped %>%
count(quit)
# Note that 1 is definitely quit, 0 is definitely not quit. NA means this is the last year we observed this person in the data, which if it's not 2022, means they could have quit working as a policeman.
df_grouped %>%
group_by(Agency) %>%
summarise(n = n(), n_quits = sum(quit, na.rm = TRUE), prop_quits = (n_quits / n)) %>%
ggplot(aes(prop_quits)) +
geom_histogram() +
scale_x_continuous(labels = scales::percent_format()) +
labs(x = "Proportion of Employees that Quit for Another Agency", y = "Number of Agencies")
df_avg_wage <- df_grouped %>%
group_by(Agency, Year) %>%
mutate(avg_totalwage_leave_oneout = (sum(base_plus_overtime) - base_plus_overtime) / (n() - 1)) %>%
ungroup()
lpm_model <- plm(quit ~ crime + avg_totalwage_leave_oneout,
data = df_avg_wage,
index = c("Agency", "Employee_Name"),
model = "pooling")
# Clustered standard errors at the agency level
lpm_se <- coeftest(lpm_model, vcov = vcovHC(lpm_model, type = "HC1", cluster = "group"))
print(lpm_se)
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.27e-03 2.21e-02 0.37 0.71
## crime -4.50e-08 4.84e-08 -0.93 0.35
## avg_totalwage_leave_oneout 1.10e-07 1.88e-07 0.59 0.56
probit_model <- glm(quit ~ crime + avg_totalwage_leave_oneout,
family = binomial(link = "probit"),
data = df_avg_wage)
# Robust standard errors clustered at the agency level
probit_se <- coeftest(probit_model, vcov = vcovCL(probit_model, cluster = ~ Agency))
print(probit_se)
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.31e+00 4.68e-01 -4.93 8.2e-07 ***
## crime -1.06e-06 1.20e-06 -0.89 0.38
## avg_totalwage_leave_oneout 2.36e-06 3.86e-06 0.61 0.54
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
lpm_model_msa_fe <- plm(quit ~ crime + avg_totalwage_leave_oneout + factor(msa),
data = df_avg_wage,
index = c("Employee_Name", "Year", "msa"),
model = "within")
# Clustered standard errors at the agency level
lpm_msa_fe_se <- coeftest(lpm_model_msa_fe, vcov = vcovHC(lpm_model_msa_fe, type = "HC1", cluster = "group"))
lpm_se_matrix <- as.matrix(lpm_msa_fe_se)
lpm_se_filtered <- lpm_se_matrix[!grepl("factor\\(msa\\)", rownames(lpm_se_matrix)), ]
lpm_se_filtered_df <- as.data.frame(lpm_se_filtered)
print(lpm_se_filtered_df)
## Estimate Std. Error t value Pr(>|t|)
## crime -3.43e-07 4.56e-07 -0.752 0.4519
## avg_totalwage_leave_oneout -2.05e-07 1.22e-07 -1.676 0.0937
probit_model_msa_fe <- glm(quit ~ crime + avg_totalwage_leave_oneout + factor(msa),
family = binomial(link = "probit"),
data = df_avg_wage)
# Robust standard errors clustered at the agency level
probit_msa_fe_se <- coeftest(probit_model_msa_fe, vcov = vcovCL(probit_model_msa_fe, cluster = ~ Agency))
lpm_se_matrix <- as.matrix(probit_msa_fe_se)
lpm_se_filtered <- lpm_se_matrix[!grepl("factor\\(msa\\)", rownames(lpm_se_matrix)), ]
lpm_se_filtered_df <- as.data.frame(lpm_se_filtered)
print(lpm_se_filtered_df)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.20e+00 6.70e-01 -3.2897 0.001
## crime -1.58e-06 1.41e-06 -1.1180 0.264
## avg_totalwage_leave_oneout 5.42e-07 6.32e-06 0.0858 0.932
df_modified_quit <-
df_avg_wage %>%
mutate(quit = ifelse(is.na(quit) & Year != 2022, 1, quit))
lpm_model <- plm(quit ~ crime + avg_totalwage_leave_oneout,
data = df_modified_quit,
index = c("Agency", "Employee_Name"),
model = "pooling")
# Clustered standard errors at the agency level
lpm_se <- coeftest(lpm_model, vcov = vcovHC(lpm_model, type = "HC1", cluster = "group"))
print(lpm_se)
##
## t test of coefficients:
##
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.54e-01 2.09e-01 -0.74 0.462
## crime 2.61e-07 2.82e-07 0.93 0.355
## avg_totalwage_leave_oneout 4.32e-06 1.83e-06 2.36 0.018 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
probit_model <- glm(quit ~ crime + avg_totalwage_leave_oneout,
family = binomial(link = "probit"),
data = df_modified_quit)
# Robust standard errors clustered at the agency level
probit_se <- coeftest(probit_model, vcov = vcovCL(probit_model, cluster = ~ Agency))
print(probit_se)
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.77e+00 6.10e-01 -2.91 0.0036 **
## crime 7.28e-07 8.53e-07 0.85 0.3929
## avg_totalwage_leave_oneout 1.18e-05 5.23e-06 2.25 0.0242 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Just quit ~ crime + avg_totalwage_leave_oneout + factor(msa)
lpm_model_msa_fe <- plm(quit ~ crime + avg_totalwage_leave_oneout + factor(msa),
data = df_modified_quit,
index = c("Employee_Name", "Year", "Agency"),
model = "within")
# Clustered standard errors at the agency level
lpm_msa_fe_se <- coeftest(lpm_model_msa_fe, vcov = vcovHC(lpm_model_msa_fe, type = "HC1", cluster = "group"))
lpm_se_matrix <- as.matrix(lpm_se)
lpm_se_filtered <- lpm_se_matrix[!grepl("factor\\(msa\\)", rownames(lpm_se_matrix)), ]
lpm_se_filtered_df <- as.data.frame(lpm_se_filtered)
print(lpm_se_filtered_df)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.54e-01 2.09e-01 -0.736 0.4619
## crime 2.61e-07 2.82e-07 0.926 0.3546
## avg_totalwage_leave_oneout 4.32e-06 1.83e-06 2.361 0.0182
Add Year FE
lpm_model_msa_fe <- plm(quit ~ crime + avg_totalwage_leave_oneout + factor(msa) + factor(Year),
data = df_modified_quit,
index = c("Employee_Name", "Year", "Agency"),
model = "within")
# Clustered standard errors at the agency level
lpm_msa_fe_se <- coeftest(lpm_model_msa_fe, vcov = vcovHC(lpm_model_msa_fe, type = "HC1", cluster = "group"))
lpm_se_matrix <- as.matrix(lpm_msa_fe_se)
lpm_se_filtered <- lpm_se_matrix[!grepl("factor\\(msa\\)", rownames(lpm_se_matrix)), ]
lpm_se_filtered_df <- as.data.frame(lpm_se_filtered)
print(lpm_se_filtered_df)
## Estimate Std. Error t value Pr(>|t|)
## crime 9.82e-07 4.91e-07 2.00 4.53e-02
## avg_totalwage_leave_oneout 2.88e-06 7.84e-07 3.67 2.39e-04
## factor(Year)2014 -1.37e-01 1.27e-02 -10.79 4.11e-27
## factor(Year)2015 -2.79e-01 1.03e-02 -27.22 5.75e-162
## factor(Year)2016 -1.82e-01 1.02e-02 -17.87 3.23e-71
## factor(Year)2017 1.90e-01 9.66e-03 19.63 1.84e-85
## factor(Year)2018 6.76e-02 9.41e-03 7.19 6.54e-13
## factor(Year)2019 2.59e-01 1.04e-02 24.78 7.80e-135
## factor(Year)2020 4.13e-01 1.14e-02 36.13 2.30e-282
## factor(Year)2021 1.12e+00 1.22e-02 91.81 0.00e+00
## factor(Year)2022 2.20e-01 1.57e-01 1.40 1.61e-01
probit_model_msa_fe <- glm(quit ~ crime + avg_totalwage_leave_oneout + factor(msa),
family = binomial(link = "probit"),
data = df_modified_quit)
# Robust standard errors clustered at the agency level
probit_msa_fe_se <- coeftest(probit_model_msa_fe, vcov = vcovCL(probit_model_msa_fe, cluster = ~ Agency))
lpm_se_matrix <- as.matrix(probit_msa_fe_se)
lpm_se_filtered <- lpm_se_matrix[!grepl("factor\\(msa\\)", rownames(lpm_se_matrix)), ]
lpm_se_filtered_df <- as.data.frame(lpm_se_filtered)
print(lpm_se_filtered_df)
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.14e+00 7.33e-01 -2.91 0.00356
## crime 3.05e-06 1.58e-06 1.93 0.05303
## avg_totalwage_leave_oneout 2.03e-05 8.46e-06 2.40 0.01626