library(pacman)
p_load(tidyverse, ggthemes, usmap, gganimate, gifski, png, transformr, lfe, plotly, broom, gsynth, scales, Synth)
shr <- as.data.frame(read_csv("/Users/jenniputz/Downloads/shr.csv"))
statepop <- as.data.frame(read_csv("/Users/jenniputz/Downloads/statepop.csv"))
shr_2 <- shr %>% filter(additional_victim_count <= 2) %>%
filter(year >= "2000") %>%
group_by(state_abb, year) %>% summarize(murders = n()) %>%
replace_na(list(state_abb = "DC"))
names(shr_2) <- c("state_abbrev", "year", "murders")
statepop <- statepop %>% filter(state_abbrev != "FL")
state_data <- inner_join(shr_2, statepop) %>% mutate(murder_rate = murders/(pop/100000))
# need to drop washington dc, alabama, and florida (but florida is already filtered out since i filtered on >= 2000)
state_data <- state_data %>% filter(state_abbrev != "DC" & state_abbrev != "AL")
plot <- plot_usmap(data = state_data, regions = "states", values = "murder_rate")
plot <- plot + transition_states(year, transition_length = 2, state_length = 1) +
ease_aes('linear') +
labs(title = "Murder Rate per 100,000 by State 2000-2017",
subtitle = "Year: {closest_state}") +
scale_fill_continuous(low = "white", high = "blue", name = "Murder Rate") + theme(legend.position = "right")
plot
state_data <- state_data %>%
mutate(legalized = ifelse((state_abbrev == "CO" & year >= "2014") |
(state_abbrev == "WA" & year >= "2014") |
(state_abbrev == "OR" & year >= "2015") |
(state_abbrev == "AK" & year >= "2015") |
(state_abbrev == "NV" & year >= "2017"), 1, 0))
reg1 <- felm(log(murder_rate) ~ legalized | state_abbrev + year, data = state_data)
tidy(reg1, conf.int = TRUE, conf.level = 0.95)
## # A tibble: 1 x 7
## term estimate std.error statistic p.value conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 legalized 0.00537 0.0615 0.0873 0.930 -0.115 0.126
Based on the results of the regression above, we find no evidence that marijuana legalization has a statistically significant relationships with murders per 100,000 population.
`%ni%` <- Negate(`%in%`)
legalizer_data <- state_data %>% filter(state_abbrev == "CO" | state_abbrev == "WA") %>%
group_by(year) %>% summarize(legal_rate = mean(murder_rate))
other_data <- state_data %>% filter(state_abbrev != "CO" & state_abbrev != "WA") %>%
group_by(year) %>% summarize(other_rate = mean(murder_rate))
plot_trends <- ggplot() +
geom_line(data = legalizer_data, aes(x = year, y = legal_rate), color = "blue") +
geom_line(data = other_data, aes(x = year, y = other_rate), color = "black") +
geom_vline(xintercept = 2014, linetype="dashed", color="black", size = .2) +
theme_minimal() +
labs(title = "Murder Rate per 100,000 for Colorado and Washington versus Other States",
x = "Year", y = "Murder Rate") +
annotate("text", x = 2015.7, y = 3, label = "Legalized States") +
annotate("text", x = 2015.7, y = 5, label = "Non-Legalized States")
plot_trends<- plotly::ggplotly(plot_trends, width = NULL, height = NULL,
tooltip = "all", dynamicTicks = FALSE, layerData = 1,
originalData = TRUE, source = "A")
plot_trends
In the above graph we can see that legalized states and non-legalized states both had increases in murders per 100,000 after 2014. The trends for Colorado and Washington follow closely with trends in other states.
state_data_dropped <- state_data %>% filter(state_abbrev != "CA" & state_abbrev != "OR"
& state_abbrev != "NV" & state_abbrev != "AK") %>% as.data.frame()
other_data_2 <- state_data_dropped %>% filter(state_abbrev != "CO" & state_abbrev != "WA") %>% pull(state_fips) %>% unique()
system.time(
out_co <- gsynth(murder_rate ~ legalized, data = state_data_dropped %>% filter(state_abbrev != "WA"), index = c("state_fips","year"), force = "two-way", CV = TRUE, r = c(0, 5), se = TRUE, inference = "parametric", nboots = 1000, parallel = FALSE)
)
co_est <- data.frame(
avgatt <- out_co$est.avg[1],
SE <- out_co$est.avg[2],
p <- out_co$est.avg[5],
state <- "COLORADO")
names(co_est) <- c("avgatt", "SE", "p", "state")
co_synth <- plot(out_co, type = "counterfactual", raw = "all", main="", theme.bw=TRUE) +
labs(title = "Trends in Murder Rate per 100,000 for Colorado and Synthetic Colorado",
x = "Year", y = "Murder Rate per 100,000") + theme(plot.title = element_text(size=12))
system.time(
out_wa <- gsynth(murder_rate ~ legalized, data = state_data_dropped %>% filter(state_abbrev != "CO"), index = c("state_fips","year"), force = "two-way", CV = TRUE, r = c(0, 5), se = TRUE, inference = "parametric", nboots = 1000, parallel = FALSE)
)
wa_est <- data.frame(
avgatt <- out_wa$est.avg[1],
SE <- out_wa$est.avg[2],
p <- out_wa$est.avg[5],
state <- "WASHINGTON")
names(wa_est) <- c("avgatt", "SE", "p", "state")
wa_synth <- plot(out_wa, type = "counterfactual", raw = "all", main="", theme.bw=TRUE) +
labs(title = "Trends in Murder Rate per 100,000 for Washington and Synthetic Washington",
x = "Year", y = "Murder Rate per 100,000") + theme(plot.title = element_text(size=12))
co_synth
wa_synth
The synthetic control method does not seem to produce much a difference between the synthetic states and legalized states. For Colorado, there does not seem to be any difference post-legalization for Colorado and synthetic Colorado. For Washington, the synthetic Washington estimate is slightly above Washington after 2015 but the estimates are following similar trends.
state_data_dropped2 <- state_data_dropped %>% filter(state_abbrev != "CO" &
state_abbrev != "WA")
other_data_3 <- state_data_dropped2 %>% filter(state_abbrev != "AR" &
state_abbrev != "ID" &
state_abbrev != "IN" &
state_abbrev != "MN" &
state_abbrev != "MS" &
state_abbrev != "NM" &
state_abbrev != "WI" &
state_abbrev != "WV" &
state_abbrev != "WY") %>% pull(state) %>% unique()
rows <- length(other_data_3)
stored <- data.frame(
avgatt = numeric(rows),
SE = numeric(rows),
p = numeric(rows),
state = character(rows)
)
for (i in 1:length(other_data_3)){
statename = other_data_3[i]
print(statename)
state_data_dropped2 <- state_data_dropped2 %>% mutate(legalized = ifelse((state == as.character(statename) & year >= 2014), 1, 0))
out <- gsynth(murder_rate ~ legalized, data = as.tibble(state_data_dropped2), index = c("state","year"), force = "two-way", CV = TRUE, r = c(0, 5), se = TRUE, inference = "parametric", nboots = 1000, parallel = TRUE)
stored[i,] = c(out$est.avg[1],
out$est.avg[2],
out$est.avg[5],
statename)
}
stored[,4] <- other_data_3
stored_merged <- rbind(stored, wa_est)
stored_merged <- rbind(stored_merged, co_est)
mse_plot <- ggplot() +
geom_histogram(data = stored_merged, aes(x = p, fill = ifelse(state == "WASHINGTON" | state == "COLORADO", "Treated", "Control")), stat = "count") +
scale_fill_manual(name = "State", values=c("grey40","purple2"))+
theme_minimal() + theme(axis.text.x = element_text(angle = 90)) +
labs(title = "Synthetic Control P-Values",
x = "p-value")
## Warning: Ignoring unknown parameters: binwidth, bins, pad
mse_plot
Note: The gsynth package wanted to kick a few states out of the loop of control states due to the inability to complete the bootstrapping. Thus the following states do not have p-values computed: Arkansas, Idaho, Indiana, Minnesota, Mississippi, New Mexico, Wisconsin, West Virginia, and Wyoming.
Looking at the above graph, the p-values for CO and WA are not at the tail end of the distribution so we cannot reject our null hypothesis that legalization did not increase crime.
bc_data <- read.table("/Users/jenniputz/Downloads/bc.txt", sep=",", header = TRUE)
bc_data_agg <- bc_data %>% group_by(month) %>%
summarise(total_bc = sum(totals, na.rm=TRUE), total_lr = sum(long_gun, na.rm=TRUE), total_hg = sum(handgun, na.rm=TRUE))
bc_data_agg$year <- format(as.Date(bc_data_agg$month, format = "%Y-%M"), "%Y") %>% as.numeric()
bc_data_agg$month_2 <- substr(bc_data_agg$month,6,7)
bc_plot <- ggplot(data = bc_data_agg, aes(x = month)) +
geom_line(aes(y = total_bc, group = 1, color = "Total")) +
geom_line(aes(y = total_lr, group = 2, color = "Long Rifle")) +
geom_line(aes(y = total_hg, group = 3, color = "Handguns")) +
theme_minimal() +
scale_x_discrete(
breaks = unique(bc_data_agg$month),
labels = unique(bc_data_agg$month),
limits=c("1998-11","1999-11","2000-11","2001-11","2002-11","2003-11","2004-11",
"2005-11","2006-11","2007-11","2008-11","2009-11","2010-11","2011-11","2012-11",
"2013-11","2014-11","2015-11","2016-11","2017-11","2018-11")) +
theme(axis.text.x = element_text(angle = 45)) +
labs(x = "Month",
y = "Total Background Checks",
title = "Total Background Checks from Nov. 1998 through Nov. 2018",
color = "Type") +
scale_color_manual(values = c("#09324C", "#127FC4", "#9BCCEB"))
bc_plot
bc_data_agg <- bc_data_agg %>% mutate(time = c(1:length(month))) %>%
mutate(time0 = ifelse(time == 170, 1, 0)) %>%
mutate(m1_prior = ifelse(time == 169, 1, 0)) %>%
mutate(m2_prior = ifelse(time == 168, 1, 0)) %>%
mutate(m3_prior = ifelse(time == 167, 1, 0)) %>%
mutate(m4_prior = ifelse(time == 166, 1, 0)) %>%
mutate(m5_prior = ifelse(time == 165, 1, 0)) %>%
mutate(m6_prior = ifelse(time == 164, 1, 0)) %>%
mutate(m1_post = ifelse(time == 171, 1, 0)) %>%
mutate(m2_post = ifelse(time == 172, 1, 0)) %>%
mutate(m3_post = ifelse(time == 173, 1, 0)) %>%
mutate(m4_post = ifelse(time == 174, 1, 0)) %>%
mutate(m5_post = ifelse(time == 175, 1, 0)) %>%
mutate(m6_post = ifelse(time == 176, 1, 0)) %>%
mutate(time2 = time^2) %>%
mutate(time3 = time^3) %>%
mutate(time4 = time^4) %>%
mutate(time5 = time^5)
event_reg1 <- felm(data = bc_data_agg, total_bc ~ time + time^2 + time^3 + time^4 + time^5 +
m1_prior + m2_prior + m3_prior + m4_prior + m5_prior + m6_prior +
m1_post + m2_post + m3_post + m4_post + m5_post + m6_post | factor(month_2))
tidy(event_reg1, conf.int = T)
## # A tibble: 13 x 7
## term estimate std.error statistic p.value conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 time 7443. 214. 34.8 1.84e-90 7024. 7863.
## 2 m1_prior 261643. 233780. 1.12 2.64e- 1 -196556. 719843.
## 3 m2_prior -35167. 234014. -0.150 8.81e- 1 -493827. 423493.
## 4 m3_prior -39804. 234014. -0.170 8.65e- 1 -498464. 418856.
## 5 m4_prior 60465. 234014. 0.258 7.96e- 1 -398195. 519125.
## 6 m5_prior -52195. 234014. -0.223 8.24e- 1 -510854. 406465.
## 7 m6_prior -57622. 234367. -0.246 8.06e- 1 -516972. 401728.
## 8 m1_post 896573. 234139. 3.83 1.68e- 4 437668. 1355477.
## 9 m2_post 592250. 234139. 2.53 1.21e- 2 133345. 1051154.
## 10 m3_post 410098. 234139. 1.75 8.13e- 2 -48806. 869003.
## 11 m4_post 131672. 234139. 0.562 5.74e- 1 -327233. 590576.
## 12 m5_post -23747. 234139. -0.101 9.19e- 1 -482651. 435158.
## 13 m6_post -166333. 234492. -0.709 4.79e- 1 -625928. 293262.
reg_results1 <- as.data.frame(tidy(event_reg1, conf.int = T)) %>%
filter(term != "time" & term != "time2" & term != "time3" & term != "time4" & term != "time5" ) %>%
mutate(time = c(-6:-1,1:6))
ggplot(data = reg_results1) +
geom_point(aes(x = time, y = estimate)) +
geom_errorbar(aes(x = time, ymin=conf.low, ymax=conf.high), width=.2) +
labs(x = "Months", y = "Total Background Checks Coefficient",
title = "Total Background Checks Before and After Sandy Hook") +
theme_minimal() +
geom_vline(xintercept = 0, linetype="dashed", color="black", size = .2)
event_reg2 <- felm(data = bc_data_agg, total_lr ~ time + time^2 + time^3 + time^4 + time^5 +
m1_prior + m2_prior + m3_prior + m4_prior + m5_prior + m6_prior +
m1_post + m2_post + m3_post + m4_post + m5_post + m6_post | factor(month_2))
tidy(event_reg2, conf.int = T)
## # A tibble: 13 x 7
## term estimate std.error statistic p.value conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 time 456. 64.7 7.05 2.34e-11 329. 583.
## 2 m1_prior 264117. 70660. 3.74 2.38e- 4 125626. 402608.
## 3 m2_prior 63599. 70731. 0.899 3.70e- 1 -75031. 202229.
## 4 m3_prior 58231. 70731. 0.823 4.11e- 1 -80398. 196861.
## 5 m4_prior 102039. 70731. 1.44 1.51e- 1 -36591. 240669.
## 6 m5_prior 81669. 70731. 1.15 2.50e- 1 -56961. 220299.
## 7 m6_prior 86588. 70837. 1.22 2.23e- 1 -52250. 225427.
## 8 m1_post 405118. 70769. 5.72 3.44e- 8 266414. 543821.
## 9 m2_post 282477. 70769. 3.99 8.99e- 5 143773. 421181.
## 10 m3_post 255574. 70769. 3.61 3.79e- 4 116870. 394278.
## 11 m4_post 184220. 70769. 2.60 9.88e- 3 45516. 322923.
## 12 m5_post 122731. 70769. 1.73 8.43e- 2 -15972. 261435.
## 13 m6_post 84566. 70875. 1.19 2.34e- 1 -54347. 223479.
reg_results2 <- as.data.frame(tidy(event_reg2, conf.int = T)) %>%
filter(term != "time" & term != "time2" & term != "time3" & term != "time4" & term != "time5" ) %>%
mutate(time = c(-6:-1,1:6))
ggplot(data = reg_results2) +
geom_point(aes(x = time, y = estimate)) +
geom_errorbar(aes(x = time, ymin=conf.low, ymax=conf.high), width=.2) +
labs(x = "Months", y = "Total Background Checks Coefficient",
title = "Total Background Checks for Long Rifles Before and After Sandy Hook") +
theme_minimal() +
geom_vline(xintercept = 0, linetype="dashed", color="black", size = .2)
event_reg3 <- felm(data = bc_data_agg, total_hg ~ time + time2 + time3 + time4 + time5 +
m1_prior + m2_prior + m3_prior + m4_prior + m5_prior + m6_prior +
m1_post + m2_post + m3_post + m4_post + m5_post + m6_post | factor(month_2))
tidy(event_reg3, conf.int = T)
## Warning in chol.default(mat, pivot = TRUE, tol = tol): the matrix is either
## rank-deficient or indefinite
## # A tibble: 17 x 7
## term estimate std.error statistic p.value conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 time -2.53e+3 2.14e+3 -1.19 0.237 -6.73e+3 1.66e+3
## 2 time2 6.95e+1 5.48e+1 1.27 0.206 -3.79e+1 1.77e+2
## 3 time3 -8.55e-1 5.75e-1 -1.49 0.138 -1.98e+0 2.71e-1
## 4 time4 5.45e-3 2.62e-3 2.08 0.0387 3.16e-4 1.06e-2
## 5 time5 -1.17e-5 4.30e-6 -2.72 0.00703 -2.01e-5 -3.27e-6
## 6 m1_prior 7.43e+4 6.54e+4 1.14 0.257 -5.39e+4 2.02e+5
## 7 m2_prior -1.85e+4 6.54e+4 -0.282 0.778 -1.47e+5 1.10e+5
## 8 m3_prior -6.74e+3 6.54e+4 -0.103 0.918 -1.35e+5 1.21e+5
## 9 m4_prior 2.38e+4 6.54e+4 0.364 0.716 -1.04e+5 1.52e+5
## 10 m5_prior -4.70e+3 6.54e+4 -0.0719 0.943 -1.33e+5 1.24e+5
## 11 m6_prior -1.51e+4 6.56e+4 -0.231 0.818 -1.44e+5 1.13e+5
## 12 m1_post 4.15e+5 6.54e+4 6.34 0.00000000135 2.87e+5 5.43e+5
## 13 m2_post 2.31e+5 6.54e+4 3.53 0.000516 1.02e+5 3.59e+5
## 14 m3_post 1.38e+5 6.54e+4 2.10 0.0367 9.35e+3 2.66e+5
## 15 m4_post 5.45e+4 6.54e+4 0.834 0.405 -7.37e+4 1.83e+5
## 16 m5_post -2.29e+3 6.54e+4 -0.0351 0.972 -1.30e+5 1.26e+5
## 17 m6_post -5.22e+4 6.55e+4 -0.797 0.426 -1.81e+5 7.62e+4
reg_results3 <- as.data.frame(tidy(event_reg3, conf.int = T)) %>%
filter(term != "time" & term != "time2" & term != "time3" & term != "time4" & term != "time5" ) %>% mutate(time = c(-6:-1,1:6))
## Warning in chol.default(mat, pivot = TRUE, tol = tol): the matrix is either
## rank-deficient or indefinite
ggplot(data = reg_results3) +
geom_point(aes(x = time, y = estimate)) +
geom_errorbar(aes(x = time, ymin=conf.low, ymax=conf.high), width=.2) +
labs(x = "Months", y = "Total Background Checks Coefficient",
title = "Total Background Checks for Handguns Before and After Sandy Hook") +
theme_minimal() +
geom_vline(xintercept = 0, linetype="dashed", color="black", size = .2)
We find that background checks for guns, in aggregate and for both handguns and long rifles, increased following the Sandy Hook shooting. The event lead to people believing that stricter gun control measures would be enacted and thus created a surge in background checks immediately following the shooting. This has implications for the impact of gun control on violence because the announcement of gun control regulation could lead to increases in background checks and gun purchases prior to the regulation being implemented.