library(tidyverse)
library(sandwich)
library(lmtest)
library(plm)

I first define a quit as an employee (identified by name only) appearing in the data one year in the police department and then not appearing in the data the next year at the same department. I then calculate the average wage of all employees in the same agency in the same year, excluding the employee in question. I then run a linear probability model and a probit model with and without MSA fixed effects. I cluster the standard errors at the agency level. I then repeat the analysis, but this time I define a quit as an employee appearing in the data one year in the police department and then just dropping out of the dataset completely.

I restrict to individuals at police departments. I allow for position titles indicating both officers and managers because otherwise we would count it as a quit if an officer got promoted from officer to manager in the second set of analyses

# Create quits
df <-
  read_csv("clean_data/pruned_transcal_panel_data.csv") %>% 
  filter(
    dept == "Police", 
    position == "Officer-Firefighters",
    !(str_detect(Employee_Name, ".*[0-9].*")), 
    !(str_detect(Employee_Name, "Redacted")),
    !(Employee_Name %in% c("A N"))
  ) %>% 
  mutate(crime = violent_crime + property_crime)

df_grouped <- df %>% 
  group_by(Employee_Name) %>%
  arrange(Employee_Name, Year) %>% 
  mutate(
    quit = ifelse(dplyr::lead(Agency) != Agency, 1, 0)
  ) %>%
  ungroup() %>% 
  select(Employee_Name, Year, Agency, quit, base_plus_overtime, everything())

df_grouped %>% 
  count(quit)

# Note that 1 is definitely quit, 0 is definitely not quit. NA means this is the last year we observed this person in the data, which if it's not 2022, means they could have quit working as a policeman.

df_grouped %>% 
  group_by(Agency) %>% 
  summarise(n = n(), n_quits = sum(quit, na.rm = TRUE), prop_quits = (n_quits / n)) %>% 
  ggplot(aes(prop_quits)) +
  geom_histogram() +
  scale_x_continuous(labels = scales::percent_format()) +
  labs(x = "Proportion of Employees that Quit for Another Agency", y = "Number of Agencies")

df_avg_wage <- df_grouped %>%
  group_by(Agency, Year) %>%
  mutate(avg_totalwage_leave_oneout = (sum(base_plus_overtime) - base_plus_overtime) / (n() - 1)) %>%
  ungroup()

Just Counting Quits as Those Who Left for Another Agency

No MSA FE

LPM

lpm_model <- plm(quit ~ crime + avg_totalwage_leave_oneout, 
                 data = df_avg_wage, 
                 index = c("Agency", "Employee_Name"), 
                 model = "pooling")

# Clustered standard errors at the agency level
lpm_se <- coeftest(lpm_model, vcov = vcovHC(lpm_model, type = "HC1", cluster = "group"))

print(lpm_se)

## 
## t test of coefficients:
## 
##                             Estimate Std. Error t value Pr(>|t|)
## (Intercept)                 8.27e-03   2.21e-02    0.37     0.71
## crime                      -4.50e-08   4.84e-08   -0.93     0.35
## avg_totalwage_leave_oneout  1.10e-07   1.88e-07    0.59     0.56

Probit

probit_model <- glm(quit ~ crime + avg_totalwage_leave_oneout, 
                    family = binomial(link = "probit"), 
                    data = df_avg_wage)

# Robust standard errors clustered at the agency level
probit_se <- coeftest(probit_model, vcov = vcovCL(probit_model, cluster = ~ Agency))

print(probit_se)

## 
## z test of coefficients:
## 
##                             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                -2.31e+00   4.68e-01   -4.93  8.2e-07 ***
## crime                      -1.06e-06   1.20e-06   -0.89     0.38    
## avg_totalwage_leave_oneout  2.36e-06   3.86e-06    0.61     0.54    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

MSA FE

LPM

lpm_model_msa_fe <- plm(quit ~ crime + avg_totalwage_leave_oneout + factor(msa), 
                        data = df_avg_wage, 
                        index = c("Employee_Name", "Year", "msa"), 
                        model = "within")

# Clustered standard errors at the agency level
lpm_msa_fe_se <- coeftest(lpm_model_msa_fe, vcov = vcovHC(lpm_model_msa_fe, type = "HC1", cluster = "group"))

lpm_se_matrix <- as.matrix(lpm_msa_fe_se)
lpm_se_filtered <- lpm_se_matrix[!grepl("factor\\(msa\\)", rownames(lpm_se_matrix)), ]
lpm_se_filtered_df <- as.data.frame(lpm_se_filtered)

print(lpm_se_filtered_df)

##                             Estimate Std. Error t value Pr(>|t|)
## crime                      -3.43e-07   4.56e-07  -0.752   0.4519
## avg_totalwage_leave_oneout -2.05e-07   1.22e-07  -1.676   0.0937

Probit

probit_model_msa_fe <- glm(quit ~ crime + avg_totalwage_leave_oneout + factor(msa), 
                           family = binomial(link = "probit"), 
                           data = df_avg_wage)

# Robust standard errors clustered at the agency level
probit_msa_fe_se <- coeftest(probit_model_msa_fe, vcov = vcovCL(probit_model_msa_fe, cluster = ~ Agency))

lpm_se_matrix <- as.matrix(probit_msa_fe_se)
lpm_se_filtered <- lpm_se_matrix[!grepl("factor\\(msa\\)", rownames(lpm_se_matrix)), ]
lpm_se_filtered_df <- as.data.frame(lpm_se_filtered)

print(lpm_se_filtered_df)

##                             Estimate Std. Error z value Pr(>|z|)
## (Intercept)                -2.20e+00   6.70e-01 -3.2897    0.001
## crime                      -1.58e-06   1.41e-06 -1.1180    0.264
## avg_totalwage_leave_oneout  5.42e-07   6.32e-06  0.0858    0.932

Counting as a Quit if Exit from Data

df_modified_quit <-
  df_avg_wage %>% 
  mutate(quit = ifelse(is.na(quit) & Year != 2022, 1, quit))

No MSA FE

LPM

lpm_model <- plm(quit ~ crime + avg_totalwage_leave_oneout, 
                 data = df_modified_quit, 
                 index = c("Agency", "Employee_Name"), 
                 model = "pooling")

# Clustered standard errors at the agency level
lpm_se <- coeftest(lpm_model, vcov = vcovHC(lpm_model, type = "HC1", cluster = "group"))

print(lpm_se)

## 
## t test of coefficients:
## 
##                             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)                -1.54e-01   2.09e-01   -0.74    0.462  
## crime                       2.61e-07   2.82e-07    0.93    0.355  
## avg_totalwage_leave_oneout  4.32e-06   1.83e-06    2.36    0.018 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Probit

probit_model <- glm(quit ~ crime + avg_totalwage_leave_oneout, 
                    family = binomial(link = "probit"), 
                    data = df_modified_quit)

# Robust standard errors clustered at the agency level
probit_se <- coeftest(probit_model, vcov = vcovCL(probit_model, cluster = ~ Agency))

print(probit_se)

## 
## z test of coefficients:
## 
##                             Estimate Std. Error z value Pr(>|z|)   
## (Intercept)                -1.77e+00   6.10e-01   -2.91   0.0036 **
## crime                       7.28e-07   8.53e-07    0.85   0.3929   
## avg_totalwage_leave_oneout  1.18e-05   5.23e-06    2.25   0.0242 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

MSA FE

LPM

Just quit ~ crime + avg_totalwage_leave_oneout + factor(msa)

lpm_model_msa_fe <- plm(quit ~ crime + avg_totalwage_leave_oneout + factor(msa), 
                        data = df_modified_quit, 
                        index = c("Employee_Name", "Year", "Agency"), 
                        model = "within")

# Clustered standard errors at the agency level
lpm_msa_fe_se <- coeftest(lpm_model_msa_fe, vcov = vcovHC(lpm_model_msa_fe, type = "HC1", cluster = "group"))

lpm_se_matrix <- as.matrix(lpm_se)
lpm_se_filtered <- lpm_se_matrix[!grepl("factor\\(msa\\)", rownames(lpm_se_matrix)), ]
lpm_se_filtered_df <- as.data.frame(lpm_se_filtered)

print(lpm_se_filtered_df)

##                             Estimate Std. Error t value Pr(>|t|)
## (Intercept)                -1.54e-01   2.09e-01  -0.736   0.4619
## crime                       2.61e-07   2.82e-07   0.926   0.3546
## avg_totalwage_leave_oneout  4.32e-06   1.83e-06   2.361   0.0182

Add Year FE

lpm_model_msa_fe <- plm(quit ~ crime + avg_totalwage_leave_oneout + factor(msa) + factor(Year), 
                        data = df_modified_quit, 
                        index = c("Employee_Name", "Year", "Agency"), 
                        model = "within")

# Clustered standard errors at the agency level
lpm_msa_fe_se <- coeftest(lpm_model_msa_fe, vcov = vcovHC(lpm_model_msa_fe, type = "HC1", cluster = "group"))

lpm_se_matrix <- as.matrix(lpm_msa_fe_se)
lpm_se_filtered <- lpm_se_matrix[!grepl("factor\\(msa\\)", rownames(lpm_se_matrix)), ]
lpm_se_filtered_df <- as.data.frame(lpm_se_filtered)

print(lpm_se_filtered_df)

##                             Estimate Std. Error t value  Pr(>|t|)
## crime                       9.82e-07   4.91e-07    2.00  4.53e-02
## avg_totalwage_leave_oneout  2.88e-06   7.84e-07    3.67  2.39e-04
## factor(Year)2014           -1.37e-01   1.27e-02  -10.79  4.11e-27
## factor(Year)2015           -2.79e-01   1.03e-02  -27.22 5.75e-162
## factor(Year)2016           -1.82e-01   1.02e-02  -17.87  3.23e-71
## factor(Year)2017            1.90e-01   9.66e-03   19.63  1.84e-85
## factor(Year)2018            6.76e-02   9.41e-03    7.19  6.54e-13
## factor(Year)2019            2.59e-01   1.04e-02   24.78 7.80e-135
## factor(Year)2020            4.13e-01   1.14e-02   36.13 2.30e-282
## factor(Year)2021            1.12e+00   1.22e-02   91.81  0.00e+00
## factor(Year)2022            2.20e-01   1.57e-01    1.40  1.61e-01

Probit

probit_model_msa_fe <- glm(quit ~ crime + avg_totalwage_leave_oneout + factor(msa), 
                           family = binomial(link = "probit"), 
                           data = df_modified_quit)

# Robust standard errors clustered at the agency level
probit_msa_fe_se <- coeftest(probit_model_msa_fe, vcov = vcovCL(probit_model_msa_fe, cluster = ~ Agency))

lpm_se_matrix <- as.matrix(probit_msa_fe_se)
lpm_se_filtered <- lpm_se_matrix[!grepl("factor\\(msa\\)", rownames(lpm_se_matrix)), ]
lpm_se_filtered_df <- as.data.frame(lpm_se_filtered)

print(lpm_se_filtered_df)

##                             Estimate Std. Error z value Pr(>|z|)
## (Intercept)                -2.14e+00   7.33e-01   -2.91  0.00356
## crime                       3.05e-06   1.58e-06    1.93  0.05303
## avg_totalwage_leave_oneout  2.03e-05   8.46e-06    2.40  0.01626

exercise_2_quits

Olivia Martin

2024-09-30

Just Counting Quits as Those Who Left for Another Agency

No MSA FE

LPM

Probit

MSA FE

LPM

Probit

Counting as a Quit if Exit from Data

No MSA FE

LPM

Probit

MSA FE

LPM

Probit