library(haven)#read in Angirst (2009) paper dataMDVE <-read_dta("C:/Users/rlutt/Downloads/Angrist (2009)MDVE.dta")#a.) determine compliance rates in the experimentlibrary(janitor)
Attaching package: 'janitor'
The following objects are masked from 'package:stats':
chisq.test, fisher.test
#I run this analysis as as a linear model to generate an r-squared value and follow the method used by Angrist.#a.)mo<-lm(reoffend1~coddle_assigned, data=MDVE)summary(mo)
Call:
lm(formula = reoffend1 ~ coddle_assigned, data = MDVE)
Residuals:
Min 1Q Median 3Q Max
-0.2110 -0.2110 -0.2110 -0.1075 0.8925
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.10753 0.03982 2.700 0.00729 **
coddle_assigned 0.10344 0.04699 2.201 0.02841 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.384 on 328 degrees of freedom
Multiple R-squared: 0.01456, Adjusted R-squared: 0.01155
F-statistic: 4.846 on 1 and 328 DF, p-value: 0.02841
#This is not the true treatment effect since not all the policemen complied with the treatment. A true treatment effect would take into account actual compliance. Rather, this can be interpreted as the intent to treat.
#instrumental variable approach: coddle_assigned is the instrumental variablelibrary(ivreg)
Warning: package 'ivreg' was built under R version 4.3.3
Warning: package 'modelsummary' was built under R version 4.3.3
`modelsummary` 2.0.0 now uses `tinytable` as its default table-drawing
backend. Learn more at: https://vincentarelbundock.github.io/tinytable/
Revert to `kableExtra` for one session:
options(modelsummary_factory_default = 'kableExtra')
Change the default backend persistently:
config_modelsummary(factory_default = 'gt')
Silence this message forever:
config_modelsummary(startup_message = FALSE)
library(sandwich)
Warning: package 'sandwich' was built under R version 4.3.3
library(lmtest)
Warning: package 'lmtest' was built under R version 4.3.3
Loading required package: zoo
Attaching package: 'zoo'
The following objects are masked from 'package:base':
as.Date, as.Date.numeric
# robust standard errors with lm()ivmultrse <-coeftest(ivmult,vcov = vcovCL,type ="HC1")modelsummary(ivmultrse)
The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
#import data GW <-read_dta("C:/Users/rlutt/Downloads/GreenWinik_Criminology_2010.dta")# filter to include judge in analysisGWfiltered<-filter(GW, incjudge==1)
#replicate Table 5#First, I run the models, then I generate the robust clustered standard errors.library(ivreg)library(modelsummary)library(sandwich)library(lmtest)ivincar<-ivreg(laterarr~ toserve|calendar1+ calendar2 + calendar3+ calendar4 + calendar5+ calendar6 + calendar7 + calendar8, data= GWfiltered)# Clustered robust standard errors with lm()model1_robust_clustered <-coeftest(ivincar,vcov = vcovCL,type ="HC1",cluster =~clusterid)summary(ivincar)
Call:
ivreg(formula = laterarr ~ toserve | calendar1 + calendar2 +
calendar3 + calendar4 + calendar5 + calendar6 + calendar7 +
calendar8, data = GWfiltered)
Residuals:
Min 1Q Median 3Q Max
-1.9078 -0.4713 0.3372 0.5287 0.5287
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.471286 0.057247 8.233 5.68e-16 ***
toserve 0.007981 0.007933 1.006 0.315
Diagnostic tests:
df1 df2 statistic p-value
Weak instruments 8 994 3.188 0.00141 **
Wu-Hausman 1 1000 3.013 0.08292 .
Sargan 7 NA 14.367 0.04502 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.5239 on 1001 degrees of freedom
Multiple R-Squared: -0.0986, Adjusted R-squared: -0.0997
Wald test: 1.012 on 1 and 1001 DF, p-value: 0.3147
# Clustered robust standard errors with lm()model5_robust_clustered <-coeftest(fulliv,vcov = vcovCL,type ="HC1",cluster =~clusterid)both<-ivreg(laterarr~ toserve + probat | calendar1+ calendar2 + calendar3+ calendar4 + calendar5+ calendar6 + calendar7 + calendar8, data=GWfiltered)# Clustered robust standard errors with lm()model6_robust_clustered <-coeftest(both,vcov = vcovCL,type ="HC1",cluster =~clusterid)
library(modelsummary)m_list <-list("1"=model1_robust_clustered, "2"= model2_robust_clustered, "3"=model3_robust_clustered, "4"=model4_robust_clustered, "5"=model5_robust_clustered, "6"= model6_robust_clustered)#msummary(m_list, title= "Table 5. 2SLS Estimates of the Effects of Length of Incarceration and Probation on Recidivism")#Note, here the code runs to produce the table, but when I try to render it I get an error message saying I cannot render it because the arguments have different lengths. Because the models are different they will obviously have different lengths. I screenshotted the table and pasted it in the excel document and I place # next to the codes so the code would render.
Rows: 372 Columns: 8
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
dbl (8): NJ.PA, EmploymentPost, EmploymentPre, WagePre, BurgerKing, KFC, Roy...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#write a naive regression model model to estimate the effect of raising minimum wage on employment in the fast-food industrynaivemodel<-lm(EmploymentPost~EmploymentPre + WagePre + BurgerKing + Roys + KFC + Wendys + NJ.PA, data=min)summary(naivemodel)
Call:
lm(formula = EmploymentPost ~ EmploymentPre + WagePre + BurgerKing +
Roys + KFC + Wendys + NJ.PA, data = min)
Residuals:
Min 1Q Median 3Q Max
-18.1190 -4.0558 -0.7142 3.0178 30.7417
Coefficients: (1 not defined because of singularities)
Estimate Std. Error t value Pr(>|t|)
(Intercept) 11.44019 5.12414 2.233 0.02618 *
EmploymentPre 0.41576 0.04152 10.013 < 2e-16 ***
WagePre -0.32061 1.05777 -0.303 0.76198
BurgerKing 0.89322 1.13730 0.785 0.43274
Roys -1.52545 1.20632 -1.265 0.20684
KFC -4.36154 1.30780 -3.335 0.00094 ***
Wendys NA NA NA NA
NJ.PA 1.16862 0.89810 1.301 0.19400
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 6.806 on 365 degrees of freedom
Multiple R-squared: 0.3603, Adjusted R-squared: 0.3498
F-statistic: 34.26 on 6 and 365 DF, p-value: < 2.2e-16
#In this model, the outcome, a minimum wage increase in NJ is not the result of an exogenous force, so we cannot claim unconfoundedness. The political atmosphere in the state and the demographic makeup are just some examples of endogenous factors affecting employment in NJ.
#estimate propensity scoreslibrary(cobalt)
Warning: package 'cobalt' was built under R version 4.3.3
Call:
MatchIt::matchit(formula = NJ.PA ~ EmploymentPre + WagePre +
BurgerKing + Roys + KFC + Wendys, data = min, method = "cem",
estimand = "ATT", s.weights = NULL, verbose = FALSE)
Summary of Balance for All Data:
Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean
EmploymentPre 17.0485 20.0993 -0.3444 0.5714 0.0637
WagePre 4.6061 4.6286 -0.0658 0.9295 0.0291
BurgerKing 0.4047 0.4521 -0.0965 . 0.0474
Roys 0.2508 0.2329 0.0414 . 0.0180
KFC 0.2241 0.1370 0.2089 . 0.0871
Wendys 0.1204 0.1781 -0.1772 . 0.0577
eCDF Max
EmploymentPre 0.1142
WagePre 0.0876
BurgerKing 0.0474
Roys 0.0180
KFC 0.0871
Wendys 0.0577
Summary of Balance for Matched Data:
Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean
EmploymentPre 15.9284 15.3844 0.0614 0.9049 0.0231
WagePre 4.5240 4.5219 0.0063 1.0053 0.0030
BurgerKing 0.4451 0.4451 0.0000 . 0.0000
Roys 0.1768 0.1768 0.0000 . 0.0000
KFC 0.2622 0.2622 0.0000 . 0.0000
Wendys 0.1159 0.1159 0.0000 . 0.0000
eCDF Max Std. Pair Dist.
EmploymentPre 0.1128 0.2719
WagePre 0.0305 0.0284
BurgerKing 0.0000 0.0000
Roys 0.0000 0.0000
KFC 0.0000 0.0000
Wendys 0.0000 0.0000
Sample Sizes:
Control Treated
All 73. 299
Matched (ESS) 40.47 164
Matched 64. 164
Unmatched 9. 135
Discarded 0. 0
#CEM is the superior method given that the mean differences are smaller than in the nearest neighbor matching method#plot covariate balance between the treatment and control groups: shown in plot for before and after matchingplot(m.outcem, type="density")
#create dataset with matches library(MatchIt)
Warning: package 'MatchIt' was built under R version 4.3.3
Attaching package: 'MatchIt'
The following object is masked from 'package:cobalt':
lalonde
m.data<-match.data(m.out)#estimate the causal effect using matched data: since the data is matched based on covariates there is no need to include them in the new modelmatchedmodel<-lm(EmploymentPost~NJ.PA , data=m.data)summary(matchedmodel)
Call:
lm(formula = EmploymentPost ~ NJ.PA, data = m.data)
Residuals:
Min 1Q Median 3Q Max
-17.222 -6.030 -0.222 5.278 38.278
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 17.20385 1.04330 16.490 <2e-16 ***
NJ.PA 0.01773 1.15113 0.015 0.988
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 8.411 on 362 degrees of freedom
Multiple R-squared: 6.55e-07, Adjusted R-squared: -0.002762
F-statistic: 0.0002371 on 1 and 362 DF, p-value: 0.9877
#the results show no significant causal effect of raising minimum wage