Question 1

library(pacman)
p_load(tidyverse, ggthemes, usmap, gganimate, gifski, png, transformr, lfe, plotly, broom, gsynth, scales, Synth)
shr <- as.data.frame(read_csv("/Users/jenniputz/Downloads/shr.csv"))
statepop <- as.data.frame(read_csv("/Users/jenniputz/Downloads/statepop.csv"))

shr_2 <- shr %>% filter(additional_victim_count <= 2) %>% 
  filter(year >= "2000") %>%
  group_by(state_abb, year) %>% summarize(murders = n()) %>%
  replace_na(list(state_abb = "DC"))

names(shr_2) <- c("state_abbrev", "year", "murders")

statepop <- statepop %>% filter(state_abbrev != "FL")
state_data <- inner_join(shr_2, statepop) %>% mutate(murder_rate = murders/(pop/100000))

# need to drop washington dc, alabama, and florida (but florida is already filtered out since i filtered on >= 2000)
state_data <- state_data %>% filter(state_abbrev != "DC" & state_abbrev != "AL") 

plot <- plot_usmap(data = state_data, regions = "states", values = "murder_rate") 
plot <- plot + transition_states(year, transition_length = 2, state_length = 1) +
  ease_aes('linear') +
  labs(title = "Murder Rate per 100,000 by State 2000-2017", 
       subtitle = "Year: {closest_state}") +
  scale_fill_continuous(low = "white", high = "blue", name = "Murder Rate") + theme(legend.position = "right")
plot

A)

state_data <- state_data %>% 
  mutate(legalized = ifelse((state_abbrev == "CO"  & year >= "2014") |
                             (state_abbrev == "WA" & year >= "2014") |
                             (state_abbrev == "OR" & year >= "2015") | 
                             (state_abbrev == "AK" & year >= "2015") |
                             (state_abbrev == "NV" & year >= "2017"), 1, 0))

reg1 <- felm(log(murder_rate) ~ legalized | state_abbrev + year, data = state_data)
tidy(reg1, conf.int = TRUE, conf.level = 0.95)
## # A tibble: 1 x 7
##   term      estimate std.error statistic p.value conf.low conf.high
##   <chr>        <dbl>     <dbl>     <dbl>   <dbl>    <dbl>     <dbl>
## 1 legalized  0.00537    0.0615    0.0873   0.930   -0.115     0.126

Based on the results of the regression above, we find no evidence that marijuana legalization has a statistically significant relationships with murders per 100,000 population.

B)

`%ni%` <- Negate(`%in%`)
legalizer_data <- state_data %>% filter(state_abbrev == "CO" | state_abbrev == "WA") %>%
  group_by(year) %>% summarize(legal_rate = mean(murder_rate))
other_data <- state_data %>% filter(state_abbrev != "CO" & state_abbrev != "WA") %>% 
  group_by(year) %>% summarize(other_rate = mean(murder_rate))


plot_trends <- ggplot() +
  geom_line(data = legalizer_data, aes(x = year, y = legal_rate), color = "blue") + 
  geom_line(data = other_data, aes(x = year, y = other_rate), color = "black") +
  geom_vline(xintercept = 2014, linetype="dashed", color="black", size = .2) + 
  theme_minimal() +
  labs(title = "Murder Rate per 100,000 for Colorado and Washington versus Other States",
       x = "Year", y = "Murder Rate") +
  annotate("text", x = 2015.7, y = 3, label = "Legalized States") +
  annotate("text", x = 2015.7, y = 5, label = "Non-Legalized States")

plot_trends<- plotly::ggplotly(plot_trends, width = NULL, height = NULL,
  tooltip = "all", dynamicTicks = FALSE, layerData = 1,
  originalData = TRUE, source = "A")

plot_trends

In the above graph we can see that legalized states and non-legalized states both had increases in murders per 100,000 after 2014. The trends for Colorado and Washington follow closely with trends in other states.

C)

state_data_dropped <- state_data %>% filter(state_abbrev != "CA" & state_abbrev != "OR"
                                            & state_abbrev != "NV" & state_abbrev != "AK") %>% as.data.frame()

other_data_2 <- state_data_dropped %>% filter(state_abbrev != "CO" & state_abbrev != "WA") %>% pull(state_fips) %>% unique() 

system.time(
    out_co <- gsynth(murder_rate ~  legalized, data = state_data_dropped %>% filter(state_abbrev != "WA"), index = c("state_fips","year"), force = "two-way", CV = TRUE, r = c(0, 5), se = TRUE, inference = "parametric", nboots = 1000, parallel = FALSE)
)

co_est <-  data.frame(
            avgatt <- out_co$est.avg[1],
            SE <- out_co$est.avg[2],
            p <- out_co$est.avg[5],
            state <- "COLORADO")
names(co_est) <- c("avgatt", "SE", "p", "state")

co_synth <- plot(out_co, type = "counterfactual", raw = "all", main="", theme.bw=TRUE) +
  labs(title = "Trends in Murder Rate per 100,000 for Colorado and Synthetic Colorado",
       x = "Year", y = "Murder Rate per 100,000") + theme(plot.title = element_text(size=12))
system.time(
    out_wa <- gsynth(murder_rate ~  legalized, data = state_data_dropped %>% filter(state_abbrev != "CO"), index = c("state_fips","year"), force = "two-way", CV = TRUE, r = c(0, 5), se = TRUE, inference = "parametric", nboots = 1000, parallel = FALSE)
)

wa_est <-  data.frame(
            avgatt <- out_wa$est.avg[1],
            SE <- out_wa$est.avg[2],
            p <- out_wa$est.avg[5],
            state <- "WASHINGTON")
names(wa_est) <- c("avgatt", "SE", "p", "state")

wa_synth <- plot(out_wa, type = "counterfactual", raw = "all", main="", theme.bw=TRUE) +
  labs(title = "Trends in Murder Rate per 100,000 for Washington and Synthetic Washington",
       x = "Year", y = "Murder Rate per 100,000") + theme(plot.title = element_text(size=12))
co_synth

wa_synth

The synthetic control method does not seem to produce much a difference between the synthetic states and legalized states. For Colorado, there does not seem to be any difference post-legalization for Colorado and synthetic Colorado. For Washington, the synthetic Washington estimate is slightly above Washington after 2015 but the estimates are following similar trends.

D)

state_data_dropped2 <- state_data_dropped %>% filter(state_abbrev != "CO" &
                                                       state_abbrev != "WA")


other_data_3 <- state_data_dropped2 %>% filter(state_abbrev != "AR" &
                                                 state_abbrev != "ID" &
                                                 state_abbrev != "IN" &
                                                 state_abbrev != "MN" &
                                                 state_abbrev != "MS" &
                                                 state_abbrev != "NM" &
                                                 state_abbrev != "WI" &
                                                 state_abbrev != "WV" &
                                                 state_abbrev != "WY") %>% pull(state) %>% unique()

rows <- length(other_data_3)
stored <- data.frame(
  avgatt = numeric(rows),
  SE = numeric(rows),
  p = numeric(rows),
  state = character(rows)
)

for (i in 1:length(other_data_3)){
  statename = other_data_3[i]
    print(statename)
  state_data_dropped2 <- state_data_dropped2 %>% mutate(legalized = ifelse((state == as.character(statename) & year >= 2014), 1, 0))

   out <- gsynth(murder_rate ~ legalized, data = as.tibble(state_data_dropped2), index = c("state","year"), force = "two-way", CV = TRUE, r = c(0, 5), se = TRUE, inference = "parametric", nboots = 1000, parallel = TRUE)
    stored[i,] = c(out$est.avg[1],
            out$est.avg[2],
            out$est.avg[5],
            statename)
}

stored[,4] <- other_data_3

stored_merged <- rbind(stored, wa_est)
stored_merged <- rbind(stored_merged, co_est)
mse_plot <- ggplot() +
  geom_histogram(data = stored_merged, aes(x = p, fill = ifelse(state == "WASHINGTON" | state == "COLORADO", "Treated", "Control")), stat = "count") +
  scale_fill_manual(name = "State", values=c("grey40","purple2"))+
  theme_minimal() + theme(axis.text.x = element_text(angle = 90)) +
  labs(title = "Synthetic Control P-Values",
       x = "p-value")
## Warning: Ignoring unknown parameters: binwidth, bins, pad
mse_plot

Note: The gsynth package wanted to kick a few states out of the loop of control states due to the inability to complete the bootstrapping. Thus the following states do not have p-values computed: Arkansas, Idaho, Indiana, Minnesota, Mississippi, New Mexico, Wisconsin, West Virginia, and Wyoming.

Looking at the above graph, the p-values for CO and WA are not at the tail end of the distribution so we cannot reject our null hypothesis that legalization did not increase crime.

Question 2

bc_data <- read.table("/Users/jenniputz/Downloads/bc.txt", sep=",", header = TRUE)

bc_data_agg <- bc_data %>% group_by(month) %>% 
  summarise(total_bc = sum(totals, na.rm=TRUE), total_lr = sum(long_gun, na.rm=TRUE), total_hg = sum(handgun, na.rm=TRUE)) 

bc_data_agg$year <- format(as.Date(bc_data_agg$month, format = "%Y-%M"), "%Y") %>% as.numeric()
bc_data_agg$month_2 <- substr(bc_data_agg$month,6,7)

bc_plot <- ggplot(data = bc_data_agg, aes(x = month)) +
  geom_line(aes(y = total_bc, group = 1, color = "Total")) + 
  geom_line(aes(y = total_lr, group = 2, color = "Long Rifle")) + 
  geom_line(aes(y = total_hg, group = 3, color = "Handguns")) +
  theme_minimal() +
  scale_x_discrete(
    breaks = unique(bc_data_agg$month),
    labels = unique(bc_data_agg$month),
    limits=c("1998-11","1999-11","2000-11","2001-11","2002-11","2003-11","2004-11",  
             "2005-11","2006-11","2007-11","2008-11","2009-11","2010-11","2011-11","2012-11",
             "2013-11","2014-11","2015-11","2016-11","2017-11","2018-11")) +
  theme(axis.text.x = element_text(angle = 45)) +
  labs(x = "Month",
       y = "Total Background Checks",
       title = "Total Background Checks from Nov. 1998 through Nov. 2018",
       color = "Type") +
  scale_color_manual(values = c("#09324C", "#127FC4", "#9BCCEB"))
  
bc_plot

bc_data_agg <- bc_data_agg %>% mutate(time = c(1:length(month))) %>%
  mutate(time0 = ifelse(time == 170, 1, 0)) %>% 
  mutate(m1_prior = ifelse(time == 169, 1, 0)) %>%
  mutate(m2_prior = ifelse(time == 168, 1, 0)) %>%
  mutate(m3_prior = ifelse(time == 167, 1, 0)) %>%
  mutate(m4_prior = ifelse(time == 166, 1, 0)) %>%
  mutate(m5_prior = ifelse(time == 165, 1, 0)) %>%
  mutate(m6_prior = ifelse(time == 164, 1, 0)) %>%
  mutate(m1_post = ifelse(time == 171, 1, 0)) %>%
  mutate(m2_post = ifelse(time == 172, 1, 0)) %>%
  mutate(m3_post = ifelse(time == 173, 1, 0)) %>%
  mutate(m4_post = ifelse(time == 174, 1, 0)) %>%
  mutate(m5_post = ifelse(time == 175, 1, 0)) %>%
  mutate(m6_post = ifelse(time == 176, 1, 0)) %>%
  mutate(time2 = time^2) %>%
  mutate(time3 = time^3) %>%
  mutate(time4 = time^4) %>%
  mutate(time5 = time^5)
  



event_reg1 <- felm(data = bc_data_agg, total_bc ~  time + time^2 + time^3 + time^4 + time^5 +
                     m1_prior + m2_prior + m3_prior + m4_prior + m5_prior + m6_prior + 
                     m1_post + m2_post + m3_post + m4_post + m5_post + m6_post | factor(month_2))
tidy(event_reg1, conf.int = T)
## # A tibble: 13 x 7
##    term     estimate std.error statistic  p.value conf.low conf.high
##    <chr>       <dbl>     <dbl>     <dbl>    <dbl>    <dbl>     <dbl>
##  1 time        7443.      214.    34.8   1.84e-90    7024.     7863.
##  2 m1_prior  261643.   233780.     1.12  2.64e- 1 -196556.   719843.
##  3 m2_prior  -35167.   234014.    -0.150 8.81e- 1 -493827.   423493.
##  4 m3_prior  -39804.   234014.    -0.170 8.65e- 1 -498464.   418856.
##  5 m4_prior   60465.   234014.     0.258 7.96e- 1 -398195.   519125.
##  6 m5_prior  -52195.   234014.    -0.223 8.24e- 1 -510854.   406465.
##  7 m6_prior  -57622.   234367.    -0.246 8.06e- 1 -516972.   401728.
##  8 m1_post   896573.   234139.     3.83  1.68e- 4  437668.  1355477.
##  9 m2_post   592250.   234139.     2.53  1.21e- 2  133345.  1051154.
## 10 m3_post   410098.   234139.     1.75  8.13e- 2  -48806.   869003.
## 11 m4_post   131672.   234139.     0.562 5.74e- 1 -327233.   590576.
## 12 m5_post   -23747.   234139.    -0.101 9.19e- 1 -482651.   435158.
## 13 m6_post  -166333.   234492.    -0.709 4.79e- 1 -625928.   293262.
reg_results1 <- as.data.frame(tidy(event_reg1, conf.int = T)) %>% 
   filter(term != "time" & term != "time2" & term != "time3" & term != "time4" & term != "time5" ) %>% 
  mutate(time = c(-6:-1,1:6))


ggplot(data = reg_results1) +
  geom_point(aes(x = time, y = estimate)) +
  geom_errorbar(aes(x = time, ymin=conf.low, ymax=conf.high), width=.2) + 
  labs(x = "Months", y = "Total Background Checks Coefficient", 
       title = "Total Background Checks Before and After Sandy Hook") +
  theme_minimal() +
  geom_vline(xintercept = 0, linetype="dashed", color="black", size = .2)

event_reg2 <- felm(data = bc_data_agg, total_lr ~  time + time^2 + time^3 + time^4 + time^5 +
                     m1_prior + m2_prior + m3_prior + m4_prior + m5_prior + m6_prior + 
                     m1_post + m2_post + m3_post + m4_post + m5_post + m6_post | factor(month_2))
tidy(event_reg2, conf.int = T)
## # A tibble: 13 x 7
##    term     estimate std.error statistic  p.value conf.low conf.high
##    <chr>       <dbl>     <dbl>     <dbl>    <dbl>    <dbl>     <dbl>
##  1 time         456.      64.7     7.05  2.34e-11     329.      583.
##  2 m1_prior  264117.   70660.      3.74  2.38e- 4  125626.   402608.
##  3 m2_prior   63599.   70731.      0.899 3.70e- 1  -75031.   202229.
##  4 m3_prior   58231.   70731.      0.823 4.11e- 1  -80398.   196861.
##  5 m4_prior  102039.   70731.      1.44  1.51e- 1  -36591.   240669.
##  6 m5_prior   81669.   70731.      1.15  2.50e- 1  -56961.   220299.
##  7 m6_prior   86588.   70837.      1.22  2.23e- 1  -52250.   225427.
##  8 m1_post   405118.   70769.      5.72  3.44e- 8  266414.   543821.
##  9 m2_post   282477.   70769.      3.99  8.99e- 5  143773.   421181.
## 10 m3_post   255574.   70769.      3.61  3.79e- 4  116870.   394278.
## 11 m4_post   184220.   70769.      2.60  9.88e- 3   45516.   322923.
## 12 m5_post   122731.   70769.      1.73  8.43e- 2  -15972.   261435.
## 13 m6_post    84566.   70875.      1.19  2.34e- 1  -54347.   223479.
reg_results2 <- as.data.frame(tidy(event_reg2, conf.int = T)) %>% 
   filter(term != "time" & term != "time2" & term != "time3" & term != "time4" & term != "time5" ) %>% 
  mutate(time = c(-6:-1,1:6))

ggplot(data = reg_results2) +
  geom_point(aes(x = time, y = estimate)) +
  geom_errorbar(aes(x = time, ymin=conf.low, ymax=conf.high), width=.2) + 
  labs(x = "Months", y = "Total Background Checks Coefficient", 
       title = "Total Background Checks for Long Rifles Before and After Sandy Hook") +
  theme_minimal() +
  geom_vline(xintercept = 0, linetype="dashed", color="black", size = .2)

event_reg3 <- felm(data = bc_data_agg, total_hg ~  time + time2 + time3 + time4 + time5 +
                     m1_prior + m2_prior + m3_prior + m4_prior + m5_prior + m6_prior + 
                     m1_post + m2_post + m3_post + m4_post + m5_post + m6_post | factor(month_2))
tidy(event_reg3, conf.int = T)
## Warning in chol.default(mat, pivot = TRUE, tol = tol): the matrix is either
## rank-deficient or indefinite
## # A tibble: 17 x 7
##    term     estimate std.error statistic       p.value conf.low conf.high
##    <chr>       <dbl>     <dbl>     <dbl>         <dbl>    <dbl>     <dbl>
##  1 time     -2.53e+3   2.14e+3   -1.19   0.237         -6.73e+3   1.66e+3
##  2 time2     6.95e+1   5.48e+1    1.27   0.206         -3.79e+1   1.77e+2
##  3 time3    -8.55e-1   5.75e-1   -1.49   0.138         -1.98e+0   2.71e-1
##  4 time4     5.45e-3   2.62e-3    2.08   0.0387         3.16e-4   1.06e-2
##  5 time5    -1.17e-5   4.30e-6   -2.72   0.00703       -2.01e-5  -3.27e-6
##  6 m1_prior  7.43e+4   6.54e+4    1.14   0.257         -5.39e+4   2.02e+5
##  7 m2_prior -1.85e+4   6.54e+4   -0.282  0.778         -1.47e+5   1.10e+5
##  8 m3_prior -6.74e+3   6.54e+4   -0.103  0.918         -1.35e+5   1.21e+5
##  9 m4_prior  2.38e+4   6.54e+4    0.364  0.716         -1.04e+5   1.52e+5
## 10 m5_prior -4.70e+3   6.54e+4   -0.0719 0.943         -1.33e+5   1.24e+5
## 11 m6_prior -1.51e+4   6.56e+4   -0.231  0.818         -1.44e+5   1.13e+5
## 12 m1_post   4.15e+5   6.54e+4    6.34   0.00000000135  2.87e+5   5.43e+5
## 13 m2_post   2.31e+5   6.54e+4    3.53   0.000516       1.02e+5   3.59e+5
## 14 m3_post   1.38e+5   6.54e+4    2.10   0.0367         9.35e+3   2.66e+5
## 15 m4_post   5.45e+4   6.54e+4    0.834  0.405         -7.37e+4   1.83e+5
## 16 m5_post  -2.29e+3   6.54e+4   -0.0351 0.972         -1.30e+5   1.26e+5
## 17 m6_post  -5.22e+4   6.55e+4   -0.797  0.426         -1.81e+5   7.62e+4
reg_results3 <- as.data.frame(tidy(event_reg3, conf.int = T)) %>% 
  filter(term != "time" & term != "time2" & term != "time3" & term != "time4" & term != "time5" ) %>% mutate(time = c(-6:-1,1:6))
## Warning in chol.default(mat, pivot = TRUE, tol = tol): the matrix is either
## rank-deficient or indefinite
ggplot(data = reg_results3) +
  geom_point(aes(x = time, y = estimate)) +
  geom_errorbar(aes(x = time, ymin=conf.low, ymax=conf.high), width=.2) + 
  labs(x = "Months", y = "Total Background Checks Coefficient", 
       title = "Total Background Checks for Handguns Before and After Sandy Hook") +
  theme_minimal() + 
  geom_vline(xintercept = 0, linetype="dashed", color="black", size = .2)

We find that background checks for guns, in aggregate and for both handguns and long rifles, increased following the Sandy Hook shooting. The event lead to people believing that stricter gun control measures would be enacted and thus created a surge in background checks immediately following the shooting. This has implications for the impact of gun control on violence because the announcement of gun control regulation could lead to increases in background checks and gun purchases prior to the regulation being implemented.