raw <- read.csv(file="shr.csv", header=TRUE, sep=",")
shr <- raw %>% subset(additional_victim_count < 3) %>% mutate(total_victims = additional_victim_count + 1)
shr_clean <- shr %>% select(year, state, state_abb, fips_state_code, incident_number, total_victims) %>% group_by(year, state, state_abb, fips_state_code) %>% summarize(n(), total_victims=sum(total_victims)) %>% rename("murders" = "n()") %>% rename("state_fips" = "fips_state_code") %>% arrange(year, state_fips) %>% filter(year %in% 2000:2017)
pop <- read.csv(file = "statepop.csv", header = TRUE, sep = ",") %>% select(year, state_fips, pop) %>% arrange(year, state_fips)
shr_pop <- shr_clean %>% left_join(pop, by = c("year", "state_fips")) %>% mutate(murder_pop = (murders*100000)/(pop)) %>% drop_na(state_fips) %>% filter(state !="guam") %>% filter(state_fips != "1") %>% filter(state_fips != "11") %>% filter(state_fips != "12")
head(shr_pop)
## # A tibble: 6 x 8
## # Groups: year, state, state_abb [6]
## year state state_abb state_fips murders total_victims pop murder_pop
## <int> <fct> <fct> <int> <int> <dbl> <int> <dbl>
## 1 2000 alaska AK 2 34 35 6.28e5 5.41
## 2 2000 arizo… AZ 4 363 385 5.16e6 7.03
## 3 2000 arkan… AR 5 160 163 2.68e6 5.97
## 4 2000 calif… CA 6 2121 2207 3.40e7 6.24
## 5 2000 color… CO 8 123 130 4.33e6 2.84
## 6 2000 conne… CT 9 93 100 3.41e6 2.73
I have dropped observations for Guam, US Virgin Islands, Florida, Alabama and DC due to recurrent missing data. The data is now ready for us to be analyzed
shr_pop$legal_mj <- ifelse(shr_pop$state_fips == 8 & shr_pop$year > 2013 | shr_pop$state_fips == 53 & shr_pop$year > 2013 | shr_pop$state_fips == 41 & shr_pop$year > 2014 | shr_pop$state_fips == 2 & shr_pop$year > 2014 | shr_pop$state_fips == 32 & shr_pop$year > 2016, 1, 0 )
murder_fe_lm <- lm(murders ~ legal_mj + state + year, data = shr_pop)
summary(murder_fe_lm)
##
## Call:
## lm(formula = murders ~ legal_mj + state + year, data = shr_pop)
##
## Residuals:
## Min 1Q Median 3Q Max
## -337.79 -14.78 -0.68 11.62 395.24
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 779.2378 779.4764 1.000 0.317756
## legal_mj 21.8860 16.9773 1.289 0.197717
## statearizona 361.3143 19.5227 18.507 < 2e-16 ***
## statearkansas 127.1477 19.5227 6.513 1.29e-10 ***
## statecalifornia 2096.5366 19.5227 107.390 < 2e-16 ***
## statecolorado 126.3952 19.3396 6.536 1.12e-10 ***
## stateconnecticut 67.1477 19.5227 3.439 0.000613 ***
## statedelaware 7.9254 19.5227 0.406 0.684878
## stategeorgia 535.8699 19.5227 27.449 < 2e-16 ***
## statehawaii -6.9634 19.5227 -0.357 0.721420
## stateidaho -5.6857 19.5227 -0.291 0.770947
## stateillinois 514.6477 19.5227 26.361 < 2e-16 ***
## stateindiana 273.9254 19.5227 14.031 < 2e-16 ***
## stateiowa 17.5366 19.5227 0.898 0.369310
## statekansas 60.2588 19.5227 3.087 0.002093 **
## statekentucky 130.5366 19.5227 6.686 4.25e-11 ***
## statelouisiana 460.0921 19.5227 23.567 < 2e-16 ***
## statemaine -11.1301 19.5227 -0.570 0.568760
## statemaryland 433.4254 19.5227 22.201 < 2e-16 ***
## statemassachusetts 117.4810 19.5227 6.018 2.67e-09 ***
## statemichigan 570.1477 19.5227 29.204 < 2e-16 ***
## stateminnesota 76.0921 19.5227 3.898 0.000105 ***
## statemississippi 132.0366 19.5227 6.763 2.58e-11 ***
## statemissouri 370.1477 19.5227 18.960 < 2e-16 ***
## statemontana -13.1301 19.5227 -0.673 0.501421
## statenebraska 2.3143 19.5227 0.119 0.905665
## statenevada 146.3762 19.4085 7.542 1.24e-13 ***
## statenew hampshire -20.7412 19.5227 -1.062 0.288362
## statenew jersey 333.8699 19.5227 17.102 < 2e-16 ***
## statenew mexico 86.5921 19.5227 4.435 1.04e-05 ***
## statenew york 735.1477 19.5227 37.656 < 2e-16 ***
## statenorth carolina 460.0366 19.5227 23.564 < 2e-16 ***
## statenorth dakota -24.4079 19.5227 -1.250 0.211575
## stateohio 436.5366 19.5227 22.360 < 2e-16 ***
## stateoklahoma 179.0921 19.5227 9.174 < 2e-16 ***
## stateoregon 47.7778 19.3166 2.473 0.013586 *
## statepennsylvania 637.0366 19.5227 32.631 < 2e-16 ***
## staterhode island -5.0190 19.5227 -0.257 0.797178
## statesouth carolina 276.5366 19.5227 14.165 < 2e-16 ***
## statesouth dakota -18.7412 19.5227 -0.960 0.337355
## statetennessee 385.0366 19.5227 19.722 < 2e-16 ***
## statetexas 1295.8143 19.5227 66.375 < 2e-16 ***
## stateutah 19.3143 19.5227 0.989 0.322797
## statevermont -24.7412 19.5227 -1.267 0.205409
## statevirginia 331.0921 19.5227 16.959 < 2e-16 ***
## statewashington 150.8952 19.3396 7.802 1.86e-14 ***
## statewest virginia 28.3143 19.5227 1.450 0.147352
## statewisconsin 143.7032 19.5227 7.361 4.48e-13 ***
## statewyoming -20.6301 19.5227 -1.057 0.290951
## year -0.3709 0.3883 -0.955 0.339794
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 57.95 on 814 degrees of freedom
## Multiple R-squared: 0.9778, Adjusted R-squared: 0.9764
## F-statistic: 730.4 on 49 and 814 DF, p-value: < 2.2e-16
Marijauana legalization does not appear to have a significant effect on murder rates when states fixed effects are used.
shr_pop$colwash <- ifelse(shr_pop$state_fips == 8 | shr_pop$state_fips == 53, 1, 0)
shr_pop$colwash[shr_pop$state_fips==53] <- 2
shr_agg <- shr_pop %>% group_by(colwash, year) %>% summarise(pop=sum(pop, na.rm = TRUE), murders=sum(murders, na.rm = TRUE), state_av_murder_rate = mean(murder_pop, na.rm = TRUE)) %>% mutate(av_murder_rate = murders*100000/pop) %>% drop_na(colwash)
head(shr_agg)
## # A tibble: 6 x 6
## # Groups: colwash [1]
## colwash year pop murders state_av_murder_rate av_murder_rate
## <dbl> <int> <int> <int> <dbl> <dbl>
## 1 0 2000 250853244 12648 4.12 5.04
## 2 0 2001 253158442 13283 4.28 5.25
## 3 0 2002 255339821 13573 4.37 5.32
## 4 0 2003 257399008 13720 4.29 5.33
## 5 0 2004 259537839 13596 4.35 5.24
## 6 0 2005 261648427 14150 4.48 5.41
shr_agg$colwash[shr_agg$colwash==0] <- "Rest of US"
shr_agg$colwash[shr_agg$colwash==1] <- "Colorado"
shr_agg$colwash[shr_agg$colwash==2] <- "Washington"
shr_agg <- shr_agg %>% arrange(year, colwash)
ggplot(data=shr_agg, aes(x=year, y=av_murder_rate, group = colwash)) + geom_line(aes(color=colwash)) + geom_point(aes(color=colwash)) + geom_vline(xintercept = 2014) + theme(legend.title=element_blank()) + ylab("Average murder rate") + xlab("Year") + ggtitle (label = "Murder rates before and after legalization", subtitle = "WA and CO vs rest of US (2000-2017)") + theme(plot.title = element_text(size=15), plot.subtitle = element_text(size = 10))
When plotted against the US murder rate, the murder rates in Colorado and Washington do not appear to respond to marijuana legalization.
shr_pop_filt <- shr_pop %>% subset(state_fips != 41 & state_fips != 2 & state_fips != 32 & state_fips != 6) %>% arrange(state_fips)
shr_pop_filt$state_fips <-as.numeric(shr_pop_filt$state_fips)
data <- as.data.frame(shr_pop_filt)
data <- data %>% drop_na(murder_pop)
dumdat <- data.frame(data$year, data$state_fips)
data<-data[!(data$state_fips==11 | data$state_fips==1),]
dataprep.out <-
dataprep(foo = data,
special.predictors = list(
list("murder_pop", 2001, "mean"),
list("murder_pop", 2002, "mean"),
list("murder_pop", 2003, "mean"),
list("murder_pop", 2004, "mean"),
list("murder_pop", 2005, "mean"),
list("murder_pop", 2006, "mean"),
list("murder_pop", 2007, "mean"),
list("murder_pop", 2008, "mean"),
list("murder_pop", 2009, "mean"),
list("murder_pop", 2010, "mean"),
list("murder_pop", 2011, "mean"),
list("murder_pop", 2012, "mean"),
list("murder_pop", 2013, "mean")
),
time.predictors.prior = 2002:2013,
dependent = "murder_pop",
unit.variable = "state_fips",
time.variable = "year",
treatment.identifier = 8,
controls.identifier = c(4,5,9,10,13,15:31,33:40,42,44:51,54:56),
time.optimize.ssr = 2002:2013,
time.plot = 2002:2016
)
synth.out <- synth(data.prep.obj = dataprep.out, method = BFGS)
##
## X1, X0, Z1, Z0 all come directly from dataprep object.
##
##
## ****************
## searching for synthetic control unit
##
##
## ****************
## ****************
## ****************
##
## MSPE (LOSS V): 0.08048141
##
## solution.v:
## 8.9451e-06 0.1040174 0.1046719 0.087407 0.07843877 0.1252359 0.1078967 0.07520487 0.07444028 0.06442087 0.06063223 0.06457235 0.05305274
##
## solution.w:
## 0.2262158 4.11e-08 1.02e-08 5.3e-09 7.3e-09 2e-10 7e-09 2.66e-08 5.02e-08 4e-09 1.03e-08 2e-08 0.05618882 7e-09 4.01e-08 1.38e-08 8.64e-08 0.03653619 9.5e-08 9.9e-09 0.3603769 6.29e-08 0.2161422 1.718e-07 2.45e-08 1.547e-07 2.38e-08 2.69e-08 8.52e-08 1.03e-08 1.46e-08 2.18e-08 1.86e-08 7.8e-09 1.7e-08 3.25e-08 0.04896767 2.9e-09 6.47e-08 6.55e-08 5.33e-08 0.05557117
synth.tables <- synth.tab(dataprep.res = dataprep.out, synth.res = synth.out)
paths_col <- path.plot(synth.res = synth.out, dataprep.res = dataprep.out,
Ylab = "Murder rate", Xlab = "year",
Ylim = c(0,10), Legend = c("Colorado", "synthetic Colorado"),
Legend.position = "bottomright")
gaps_col <- gaps.plot(synth.res = synth.out, dataprep.res = dataprep.out,
Ylab = "Gap in murder rate between Colorado and synthetic Colorado", Xlab = "year",
Ylim = c(-2,2), Main = NA)
paths_col
## $rect
## $rect$w
## [1] 4.282023
##
## $rect$h
## [1] 1.627486
##
## $rect$left
## [1] 2011.718
##
## $rect$top
## [1] 1.627486
##
##
## $text
## $text$x
## [1] 2012.968 2012.968
##
## $text$y
## [1] 1.0849910 0.5424955
gaps_col
## NULL
The graphs above show that there is no abnormal increase in murder rate in Colorado gollowing mrijuana legalization. In the following regressions, I will refrain from showing the code and focus on the results
##
## X1, X0, Z1, Z0 all come directly from dataprep object.
##
##
## ****************
## searching for synthetic control unit
##
##
## ****************
## ****************
## ****************
##
## MSPE (LOSS V): 0.003368255
##
## solution.v:
## 0.001161367 0.1120915 0.1072085 0.09003934 0.08029324 0.09492744 0.1113905 0.08262669 0.07385606 0.06573302 0.06616088 0.06522053 0.04929087
##
## solution.w:
## 4.43409e-05 2.23362e-05 2.752e-07 1.959e-07 2.593e-07 1.318e-07 1.719e-07 1.463e-07 5.6279e-06 6.6e-09 4.763e-07 8.052e-07 5.079e-07 2.875e-07 4.4809e-06 2.012e-07 5.7029e-06 0.2097009 0.04495899 3.799e-07 0.170185 6.321e-07 0.08656654 0.003935989 2.438e-07 1.43e-08 0.08931143 3.193e-07 8.9987e-06 4.945e-07 1.1117e-06 0.1368621 2.4139e-06 9.4e-08 1.3615e-06 1.2467e-06 1.081e-07 2.377e-07 0.1113758 1.12515e-05 1.41094e-05 0.1469743
The same is also true of Washington
Now we run a randomization inference to test if the results above are driven by experimental design bias
Placebo test confirms our initial assessment, and removes doubt about experimental design bias. There is no statistically significant effect of legalization on murder rates for legalizing states
bc <- read.csv(file="bc.csv", header=TRUE, sep=",")
bc_clean <- bc %>% group_by(month) %>% summarize(total = sum(totals, na.rm = TRUE), handgun =sum(handgun, na.rm = TRUE), long_gun=sum(long_gun, na.rm = TRUE)) %>% arrange(month)
p2 <- melt(bc_clean, id = "month") %>% ggplot(aes(x=month, y=value, colour = variable, group = variable))
p2 + geom_line()+ ylab("Number of background checks")+ xlab("Month-Year") + ggtitle (label = "Background checks", subtitle = "Total and by type") + theme(plot.title = element_text(size=15), plot.subtitle = element_text(size = 10)) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank(), axis.line = element_line(colour = "black")) + theme(axis.text.x = element_blank()) + scale_y_continuous(labels = scales::comma)
model1 <- lm(bc_clean$total ~ poly(seq_along(bc_clean$total),5), data = bc_clean)
model2 <- lm(bc_clean$total ~ poly(seq_along(bc_clean$total),5) + month, data = bc_clean)
model3 <- lm(bc_clean$total ~ poly(seq_along(bc_clean$total),5) + minus6 + minus5 + minus4 + minus3 + minus2 + minus1 + plus1 + plus2 + plus3 + plus4 + plus5 + plus6, data = bc_clean)
summary(model3)
##
## Call:
## lm(formula = bc_clean$total ~ poly(seq_along(bc_clean$total),
## 5) + minus6 + minus5 + minus4 + minus3 + minus2 + minus1 +
## plus1 + plus2 + plus3 + plus4 + plus5 + plus6, data = bc_clean)
##
## Residuals:
## Min 1Q Median 3Q Max
## -634579 -152540 -20097 102086 1278826
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1239278 16529 74.976 < 2e-16
## poly(seq_along(bc_clean$total), 5)1 8005430 252800 31.667 < 2e-16
## poly(seq_along(bc_clean$total), 5)2 2011384 251435 8.000 6.76e-14
## poly(seq_along(bc_clean$total), 5)3 -894633 259199 -3.452 0.000667
## poly(seq_along(bc_clean$total), 5)4 -736878 250252 -2.945 0.003577
## poly(seq_along(bc_clean$total), 5)5 -38420 254634 -0.151 0.880205
## minus6 -244472 251659 -0.971 0.332380
## minus5 -258062 251650 -1.025 0.306248
## minus4 -45928 251639 -0.183 0.855343
## minus3 -122874 251627 -0.488 0.625803
## minus2 18140 251614 0.072 0.942591
## minus1 398108 251600 1.582 0.114996
## plus1 857672 251570 3.409 0.000773
## plus2 660040 251556 2.624 0.009295
## plus3 545653 251541 2.169 0.031122
## plus4 38074 251527 0.151 0.879819
## plus5 -252819 251513 -1.005 0.315894
## plus6 -419304 251501 -1.667 0.096877
##
## (Intercept) ***
## poly(seq_along(bc_clean$total), 5)1 ***
## poly(seq_along(bc_clean$total), 5)2 ***
## poly(seq_along(bc_clean$total), 5)3 ***
## poly(seq_along(bc_clean$total), 5)4 **
## poly(seq_along(bc_clean$total), 5)5
## minus6
## minus5
## minus4
## minus3
## minus2
## minus1
## plus1 ***
## plus2 **
## plus3 *
## plus4
## plus5
## plus6 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 249000 on 223 degrees of freedom
## Multiple R-squared: 0.8398, Adjusted R-squared: 0.8275
## F-statistic: 68.75 on 17 and 223 DF, p-value: < 2.2e-16
confint <- confint(model3, level = 0.95)
confint <- confint[-c(1:6),]
estimate <- as.vector(summary(model3)$coefficients[7:18])
ci <- cbind(estimate,confint) %>% as.data.frame %>% rename("upper" = "97.5 %") %>% rename("lower" = "2.5 %")
pacman::p_load(plotrix)
x <- c(-6:-1, 1:6)
p3 <- ggplot(ci, aes(x = x, y = estimate)) + geom_point(size = 4) + geom_errorbar(aes(ymax = upper, ymin = lower))
p4 <- p3 + ylab("Values")+ xlab("Months") + ggtitle (label = "Variation in total background checks", subtitle = "Pre and post Sandy Hook events") + theme(plot.title = element_text(size=15), plot.subtitle = element_text(size = 10)) + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank(), axis.line = element_line(colour = "black")) + theme(axis.text.x = element_blank()) + scale_y_continuous(labels = scales::comma) + scale_x_discrete(breaks = c(1, 3, 5, 7, 10, 12))+ geom_hline(yintercept = 0, linetype = "dotdash") + geom_vline(xintercept = 0, linetype ="3313")
p4
Sandy Hook caused background checks to increase, with statistical significance, in the 3 months following event. Possible channel: people feeling less safe and purchasing guns. But also maybe : more systematic reporting ?
Below I disaggregate the total into handguns and rifles
The increase seems to be mostly driven by handguns. Following an episod of gun violence, the public’s response appears to be to purchase more guns. This implies that part of the demand for guns is in fact to protect oneself against possible shootings.
THE END