library(haven)
df <- read_dta("Angrist (2009)MDVE.dta")
#create compliance rate
df$compliance_rate <- ifelse(df$coddle_assigned == 1 & df$coddle_received == 1, 1,
ifelse(df$coddle_assigned == 0 & df$coddle_received == 0, 1,
ifelse(df$coddle_assigned == 0 & df$coddle_received == 1, 0, 0)))
Compliance rate is 85.76%
summary(df$compliance_rate)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.0000 1.0000 1.0000 0.8576 1.0000 1.0000
model1_q2a<- glm(formula = reoffend1 ~ coddle_assigned, family = "binomial", data = df)
summary(model1_q2a)
##
## Call:
## glm(formula = reoffend1 ~ coddle_assigned, family = "binomial",
## data = df)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.6884 -0.6884 -0.6884 -0.4770 2.1119
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.1163 0.3347 -6.322 2.58e-10 ***
## coddle_assigned 0.7972 0.3707 2.151 0.0315 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 312.93 on 329 degrees of freedom
## Residual deviance: 307.71 on 328 degrees of freedom
## AIC: 311.71
##
## Number of Fisher Scoring iterations: 4
coef_estimate <- 0.7972
# Calculate odds ratio
odds_ratio <- exp(coef_estimate)
(odds_ratio - 1) *100
## [1] 121.9318
#121
#individuals assigned to receive mediation or separation are around 122.5% more likely to reoffend compared to those who were assigned to be arrested
model2_q2c<- glm(formula = reoffend1 ~ coddle_assigned + y82 + q1 + q2+ q3 + nonwhite + mixed + anyweapon + s_influence , family = "binomial", data = df)
summary(model2_q2c)
##
## Call:
## glm(formula = reoffend1 ~ coddle_assigned + y82 + q1 + q2 + q3 +
## nonwhite + mixed + anyweapon + s_influence, family = "binomial",
## data = df)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.2132 -0.6831 -0.5361 -0.3754 2.5045
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.5671 0.5334 -2.938 0.0033 **
## coddle_assigned 0.7774 0.3799 2.046 0.0407 *
## y82 0.1705 0.3811 0.447 0.6546
## q1 -1.0440 0.5678 -1.839 0.0660 .
## q2 -0.8478 0.4331 -1.958 0.0503 .
## q3 -0.1603 0.4355 -0.368 0.7128
## nonwhite 0.3527 0.3016 1.170 0.2422
## mixed 0.5106 0.3305 1.545 0.1224
## anyweapon -0.1201 0.3451 -0.348 0.7278
## s_influence -0.5568 0.2976 -1.871 0.0613 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 312.93 on 329 degrees of freedom
## Residual deviance: 293.43 on 320 degrees of freedom
## AIC: 313.43
##
## Number of Fisher Scoring iterations: 4
# Fit linear regression model
model1 <- lm(reoffend1 ~ coddle_assigned, data = df)
# Obtain summary of the model
summary(model1)
##
## Call:
## lm(formula = reoffend1 ~ coddle_assigned, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.2110 -0.2110 -0.2110 -0.1075 0.8925
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.10753 0.03982 2.700 0.00729 **
## coddle_assigned 0.10344 0.04699 2.201 0.02841 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.384 on 328 degrees of freedom
## Multiple R-squared: 0.01456, Adjusted R-squared: 0.01155
## F-statistic: 4.846 on 1 and 328 DF, p-value: 0.02841
# Fit linear regression model
model2 <- lm(reoffend1 ~ coddle_assigned + y82 + q1 + q2+ q3 + nonwhite + mixed + anyweapon + s_influence, data = df)
# Obtain summary of the model
summary(model2)
##
## Call:
## lm(formula = reoffend1 ~ coddle_assigned + y82 + q1 + q2 + q3 +
## nonwhite + mixed + anyweapon + s_influence, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.42949 -0.22103 -0.14599 -0.04852 1.02123
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.20433 0.07597 2.690 0.00752 **
## coddle_assigned 0.09857 0.04698 2.098 0.03668 *
## y82 0.02355 0.05234 0.450 0.65308
## q1 -0.15250 0.07964 -1.915 0.05639 .
## q2 -0.12565 0.06439 -1.952 0.05187 .
## q3 -0.02436 0.06905 -0.353 0.72448
## nonwhite 0.05061 0.04313 1.173 0.24152
## mixed 0.07598 0.04905 1.549 0.12236
## anyweapon -0.01804 0.04842 -0.373 0.70963
## s_influence -0.08187 0.04345 -1.884 0.06042 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3802 on 320 degrees of freedom
## Multiple R-squared: 0.05793, Adjusted R-squared: 0.03143
## F-statistic: 2.186 on 9 and 320 DF, p-value: 0.02274
model1_q2e<- glm(formula = reoffend2 ~ coddle_received, family = "binomial", data = df)
summary(model1_q2e)
##
## Call:
## glm(formula = reoffend2 ~ coddle_received, family = "binomial",
## data = df)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.6407 -0.6407 -0.5329 -0.5329 2.0111
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.8803 0.2530 -7.431 1.08e-13 ***
## coddle_received 0.4012 0.3133 1.281 0.2
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 294.13 on 329 degrees of freedom
## Residual deviance: 292.44 on 328 degrees of freedom
## AIC: 296.44
##
## Number of Fisher Scoring iterations: 4
coef_estimate <- 0.4012
# Calculate odds ratio
odds_ratio <- exp(coef_estimate)
(odds_ratio - 1) *100
## [1] 49.3616
#49
#individuals who received mediation or separation are around 49.3% more likely to reoffend compared to those did were arrested
# Install and load the ivreg package
library(ivreg)
formula_iv <- reoffend1 ~ coddle_assigned | coddle_received
iv_model <- ivreg(formula_iv, data = df)
summary(iv_model)
##
## Call:
## ivreg(formula = formula_iv, data = df)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.22316 -0.22316 -0.22316 -0.07646 0.92354
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.07646 0.05144 1.486 0.1381
## coddle_assigned 0.14670 0.06528 2.247 0.0253 *
##
## Diagnostic tests:
## df1 df2 statistic p-value
## Weak instruments 1 328 354.694 <2e-16 ***
## Wu-Hausman 1 327 0.916 0.339
## Sargan 0 NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3845 on 328 degrees of freedom
## Multiple R-Squared: 0.01201, Adjusted R-squared: 0.009
## Wald test: 5.051 on 1 and 328 DF, p-value: 0.02528
coef_estimate <- 0.14670
# Calculate odds ratio
odds_ratio <- exp(coef_estimate)
(odds_ratio - 1) *100
## [1] 15.80065
# Interpretation: Individuals who received mediation or separation compared to those arrested are around 15.8% more likely to reoffend compared to those who were not assigned mediation or separation, after accounting for endogeneity using instrumental variable regression.
QUESTION 2
library(haven)
df_green <- read_dta("GreenWinik_Criminology_2010.dta")
#DV = LATER ARREST
table(df_green$laterarr)
##
## 0 1
## 475 528
table(df_green$incjudge)
##
## 1
## 1003
#table(df_green$calendar)
df_green$calendar_new<- as.factor(df_green$calendar)
library(ivreg)
# Fit the 2SLS model and save it as model1
model1 <- ivreg(laterarr ~ toserve | calendar_new, data = df_green)
model2 <- ivreg(laterarr ~ toserve + age + agesq + female + nonblack + priorarr + priordrugarr + priorfelarr + priorfeldrugarr + priorcon + priordrugcon + priorfelcon + priorfeldrugcon + pwid + dist + marijuana + cocaine + crack + heroin + pcp + otherdrug + nondrug | calendar_new, data = df_green)
## Warning in ivreg.fit(X, Y, Z, weights, offset, method, ...): more regressors
## than instruments
model3 <- ivreg(laterarr ~ probat | calendar_new, data = df_green)
model4 <- ivreg(laterarr ~ probat + age + agesq + female + nonblack + priorarr + priordrugarr + priorfelarr + priorfeldrugarr + priorcon + priordrugcon + priorfelcon + priorfeldrugcon + pwid + dist + marijuana + cocaine + crack + heroin + pcp + otherdrug + nondrug | calendar_new, data = df_green)
## Warning in ivreg.fit(X, Y, Z, weights, offset, method, ...): more regressors
## than instruments
model5 <- ivreg(laterarr ~ toserve + probat | calendar_new, data = df_green)
model6 <- ivreg(laterarr ~ toserve + probat + age + agesq + female + nonblack + priorarr + priordrugarr + priorfelarr + priorfeldrugarr + priorcon + priordrugcon + priorfelcon + priorfeldrugcon + pwid + dist + marijuana + cocaine + crack + heroin + pcp + otherdrug + nondrug | calendar_new, data = df_green)
## Warning in ivreg.fit(X, Y, Z, weights, offset, method, ...): more regressors
## than instruments
library(ivreg)
# Fit the 2SLS model and save it as model1
model1 <- ivreg(laterarr ~ toserve | calendar_new, data = df_green, cluster = "clusterid", robust = TRUE)
## Warning: In lm.fit(z, x, ...) :
## extra arguments 'cluster', 'robust' will be disregarded
## Warning: In lm.fit(xz, y, offset = offset, ...) :
## extra arguments 'cluster', 'robust' will be disregarded
model2 <- ivreg(laterarr ~ toserve + age + agesq + female + nonblack + priorarr + priordrugarr + priorfelarr + priorfeldrugarr + priorcon + priordrugcon + priorfelcon + priorfeldrugcon + pwid + dist + marijuana + cocaine + crack + heroin + pcp + otherdrug + nondrug | calendar_new, data = df_green, cluster = "clusterid", robust = TRUE)
## Warning in ivreg.fit(X, Y, Z, weights, offset, method, ...): more regressors
## than instruments
## Warning: In lm.fit(z, x, ...) :
## extra arguments 'cluster', 'robust' will be disregarded
## Warning: In lm.fit(xz, y, offset = offset, ...) :
## extra arguments 'cluster', 'robust' will be disregarded
model3 <- ivreg(laterarr ~ probat | calendar_new, data = df_green, cluster = "clusterid", robust = TRUE)
## Warning: In lm.fit(z, x, ...) :
## extra arguments 'cluster', 'robust' will be disregarded
## Warning: In lm.fit(xz, y, offset = offset, ...) :
## extra arguments 'cluster', 'robust' will be disregarded
model4 <- ivreg(laterarr ~ probat + age + agesq + female + nonblack + priorarr + priordrugarr + priorfelarr + priorfeldrugarr + priorcon + priordrugcon + priorfelcon + priorfeldrugcon + pwid + dist + marijuana + cocaine + crack + heroin + pcp + otherdrug + nondrug | calendar_new, data = df_green, cluster = "clusterid", robust = TRUE)
## Warning in ivreg.fit(X, Y, Z, weights, offset, method, ...): more regressors
## than instruments
## Warning: In lm.fit(z, x, ...) :
## extra arguments 'cluster', 'robust' will be disregarded
## Warning: In lm.fit(xz, y, offset = offset, ...) :
## extra arguments 'cluster', 'robust' will be disregarded
model5 <- ivreg(laterarr ~ toserve + probat | calendar_new, data = df_green, cluster = "clusterid", robust = TRUE)
## Warning: In lm.fit(z, x, ...) :
## extra arguments 'cluster', 'robust' will be disregarded
## Warning: In lm.fit(xz, y, offset = offset, ...) :
## extra arguments 'cluster', 'robust' will be disregarded
model6 <- ivreg(laterarr ~ toserve + probat + age + agesq + female + nonblack + priorarr + priordrugarr + priorfelarr + priorfeldrugarr + priorcon + priordrugcon + priorfelcon + priorfeldrugcon + pwid + dist + marijuana + cocaine + crack + heroin + pcp + otherdrug + nondrug | calendar_new, data = df_green, cluster = "clusterid", robust = TRUE)
## Warning in ivreg.fit(X, Y, Z, weights, offset, method, ...): more regressors
## than instruments
## Warning: In lm.fit(z, x, ...) :
## extra arguments 'cluster', 'robust' will be disregarded
## Warning: In lm.fit(xz, y, offset = offset, ...) :
## extra arguments 'cluster', 'robust' will be disregarded
library(stargazer)
##
## Please cite as:
## Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.
## R package version 5.2.3. https://CRAN.R-project.org/package=stargazer
# Create a table containing summaries of all models
stargazer(model1, model2, model3, model4, model5, model6, type = "text")
##
## ============================================================================================================================
## Dependent variable:
## --------------------------------------------------------------------------------------------------------
## laterarr
## (1) (2) (3) (4) (5) (6)
## ----------------------------------------------------------------------------------------------------------------------------
## toserve 0.008 -0.017 0.008 -0.028
## (0.008) (0.073) (0.008) (0.163)
##
## age -0.412 -0.404 -0.416
## (2.049) (1.158) (2.736)
##
## agesq 0.004 0.004 0.003
## (0.027) (0.016) (0.032)
##
## female -5.482 -1.894 -7.706
## (12.858) (5.433) (29.264)
##
## nonblack 2.571 0.805 3.666
## (10.489) (4.734) (18.936)
##
## priorarr 0.786 0.576 0.916
## (3.077) (1.770) (3.899)
##
## priordrugarr 1.602 0.255 2.437
## (4.637) (2.371) (10.630)
##
## priorfelarr 0.640 1.672
## (5.418) (4.207)
##
## priorfeldrugarr
##
##
## priorcon
##
##
## priordrugcon
##
##
## priorfelcon
##
##
## priorfeldrugcon
##
##
## pwid
##
##
## dist
##
##
## marijuana
##
##
## cocaine
##
##
## crack
##
##
## heroin
##
##
## pcp
##
##
## otherdrug
##
##
## nondrug
##
##
## probat 0.002 0.011 0.003 -0.007
## (0.005) (0.026) (0.006) (0.076)
##
## Constant 0.471*** 8.065 0.504*** 6.836 0.443*** 8.827
## (0.057) (31.395) (0.057) (16.744) (0.084) (48.144)
##
## ----------------------------------------------------------------------------------------------------------------------------
## Observations 1,003 1,003 1,003 1,003 1,003 1,003
## R2 -0.099 -26.158 0.002 -7.782 -0.094 -45.889
## Adjusted R2 -0.100 -26.376 0.001 -7.853 -0.097 -46.267
## Residual Std. Error 0.524 (df = 1001) 2.614 (df = 994) 0.499 (df = 1001) 1.486 (df = 994) 0.523 (df = 1000) 3.434 (df = 994)
## ============================================================================================================================
## Note: *p<0.1; **p<0.05; ***p<0.01
library(stargazer)
# Create a table containing summaries of all models and save it as a LaTeX document
#stargazer(model1, model2, model3, model4, model5, model6, type = "latex", out = "model_summary.tex")
Part 3
library(readxl)
df_wage<- read_excel("Minimum Wage.xlsx")
library(MatchIt)
# Nearest neighbor 2:1 matching with replacement
m.out <- MatchIt::matchit(NJ.PA ~EmploymentPost + EmploymentPre + WagePre + BurgerKing + KFC + Roys + Wendys,
data = df_wage, method = "nearest",
ratio = 1, replace = TRUE)
summary(m.out)
##
## Call:
## MatchIt::matchit(formula = NJ.PA ~ EmploymentPost + EmploymentPre +
## WagePre + BurgerKing + KFC + Roys + Wendys, data = df_wage,
## method = "nearest", replace = TRUE, ratio = 1)
##
## Summary of Balance for All Data:
## Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean
## distance 0.8092 0.7815 0.4794 0.4892 0.1027
## EmploymentPost 17.2216 17.7637 -0.0632 1.1723 0.0254
## EmploymentPre 17.0485 20.0993 -0.3444 0.5714 0.0637
## WagePre 4.6061 4.6286 -0.0658 0.9295 0.0291
## BurgerKing 0.4047 0.4521 -0.0965 . 0.0474
## KFC 0.2241 0.1370 0.2089 . 0.0871
## Roys 0.2508 0.2329 0.0414 . 0.0180
## Wendys 0.1204 0.1781 -0.1772 . 0.0577
## eCDF Max
## distance 0.2329
## EmploymentPost 0.0804
## EmploymentPre 0.1142
## WagePre 0.0876
## BurgerKing 0.0474
## KFC 0.0871
## Roys 0.0180
## Wendys 0.0577
##
## Summary of Balance for Matched Data:
## Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean
## distance 0.8092 0.8090 0.0030 0.9815 0.0088
## EmploymentPost 17.2216 18.7416 -0.1773 0.9434 0.0477
## EmploymentPre 17.0485 16.9398 0.0123 1.4910 0.0488
## WagePre 4.6061 4.6323 -0.0766 0.8893 0.0474
## BurgerKing 0.4047 0.3980 0.0136 . 0.0067
## KFC 0.2241 0.1237 0.2406 . 0.1003
## Roys 0.2508 0.3211 -0.1620 . 0.0702
## Wendys 0.1204 0.1572 -0.1130 . 0.0368
## eCDF Max Std. Pair Dist.
## distance 0.0502 0.0372
## EmploymentPost 0.1338 1.0397
## EmploymentPre 0.1438 0.5895
## WagePre 0.1371 1.1510
## BurgerKing 0.0067 0.9539
## KFC 0.1003 0.2887
## Roys 0.0702 0.7638
## Wendys 0.0368 0.4419
##
## Sample Sizes:
## Control Treated
## All 73. 299
## Matched (ESS) 35.83 299
## Matched 63. 299
## Unmatched 10. 0
## Discarded 0. 0
plot(m.out, type=
"density")



plot(m.out, type=
"qq")



plot(m.out, type=
"ecdf")



m.data<-match.data(m.out)
# Install and load the cobalt package
library(cobalt)
## cobalt (Version 4.5.5, Build Date: 2024-04-02)
##
## Attaching package: 'cobalt'
## The following object is masked from 'package:MatchIt':
##
## lalonde
covs <- subset(df_wage, select = -c(NJ.PA, EmploymentPost))
df_wage$p.score <- glm(NJ.PA ~ EmploymentPre + WagePre + BurgerKing + KFC + Roys + Wendys,
data = df_wage,
family = "binomial")$fitted.values
df_wage$att.weights <- with(df_wage, NJ.PA + (1-NJ.PA)*p.score/(1-p.score))
bal.tab(covs, treat = df_wage$NJ.PA, weights = df_wage$att.weights)
## Balance Measures
## Type Diff.Adj
## EmploymentPre Contin. 0.0506
## WagePre Contin. -0.0845
## BurgerKing Binary 0.0264
## KFC Binary -0.0081
## Roys Binary -0.0195
## Wendys Binary 0.0012
##
## Effective sample sizes
## Control Treated
## Unadjusted 73. 299
## Adjusted 64.11 299
bal.tab(NJ.PA ~ covs, data = df_wage,
weights = "att.weights",
binary = "std", continuous = "std")
## Balance Measures
## Type Diff.Adj
## EmploymentPre Contin. 0.0506
## WagePre Contin. -0.0845
## BurgerKing Binary 0.0537
## KFC Binary -0.0194
## Roys Binary -0.0450
## Wendys Binary 0.0037
##
## Effective sample sizes
## Control Treated
## Unadjusted 73. 299
## Adjusted 64.11 299
df_wage$NJ.PA<- as.factor(df_wage$NJ.PA)
propensity_model <- glm(NJ.PA ~ EmploymentPre + WagePre + BurgerKing + KFC + Roys + Wendys,
data = df_wage,
family = binomial)
df_wage$propensity_score <- predict(propensity_model, type = "response")
# Plot propensity score distributions within each treatment group
plot(propensity_score ~ NJ.PA, data = df_wage, col = c("blue", "red"),
xlab = "Treatment (NJ.PA)", ylab = "Propensity Score",
main = "Propensity Score Distributions by Treatment Group",
pch = 19)
legend("topright", legend = c("Pennsylvania (0)", "New Jersey (1)"),
col = c("blue", "red"), pch = 19, cex = 0.8)

matched_data <- MatchIt::match.data(m.out)
# Calculate the average treatment effect (ATE)
ATE <- with(matched_data, mean(EmploymentPost[NJ.PA == 1]) - mean(EmploymentPost[NJ.PA == 0]))
print(ATE)
## [1] 0.07077826