Causal Inference HW 4

library(haven)
df <- read_dta("Angrist (2009)MDVE.dta")

#create compliance rate 

df$compliance_rate <- ifelse(df$coddle_assigned == 1 & df$coddle_received == 1, 1,
                             ifelse(df$coddle_assigned == 0 & df$coddle_received == 0, 1,
                                    ifelse(df$coddle_assigned == 0 & df$coddle_received == 1, 0, 0)))

Compliance rate is 85.76%

summary(df$compliance_rate)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0000  1.0000  1.0000  0.8576  1.0000  1.0000

model1_q2a<- glm(formula = reoffend1  ~ coddle_assigned, family = "binomial",   data = df)
summary(model1_q2a)

## 
## Call:
## glm(formula = reoffend1 ~ coddle_assigned, family = "binomial", 
##     data = df)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.6884  -0.6884  -0.6884  -0.4770   2.1119  
## 
## Coefficients:
##                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)      -2.1163     0.3347  -6.322 2.58e-10 ***
## coddle_assigned   0.7972     0.3707   2.151   0.0315 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 312.93  on 329  degrees of freedom
## Residual deviance: 307.71  on 328  degrees of freedom
## AIC: 311.71
## 
## Number of Fisher Scoring iterations: 4

coef_estimate <- 0.7972  
# Calculate odds ratio
odds_ratio <- exp(coef_estimate)

(odds_ratio - 1) *100

## [1] 121.9318

#121
 #individuals assigned to receive mediation or separation are around 122.5% more likely to reoffend compared to those who were assigned to be arrested

model2_q2c<- glm(formula = reoffend1  ~ coddle_assigned + y82 + q1 + q2+ q3 + nonwhite + mixed + anyweapon + s_influence , family = "binomial",   data = df)
summary(model2_q2c)

## 
## Call:
## glm(formula = reoffend1 ~ coddle_assigned + y82 + q1 + q2 + q3 + 
##     nonwhite + mixed + anyweapon + s_influence, family = "binomial", 
##     data = df)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.2132  -0.6831  -0.5361  -0.3754   2.5045  
## 
## Coefficients:
##                 Estimate Std. Error z value Pr(>|z|)   
## (Intercept)      -1.5671     0.5334  -2.938   0.0033 **
## coddle_assigned   0.7774     0.3799   2.046   0.0407 * 
## y82               0.1705     0.3811   0.447   0.6546   
## q1               -1.0440     0.5678  -1.839   0.0660 . 
## q2               -0.8478     0.4331  -1.958   0.0503 . 
## q3               -0.1603     0.4355  -0.368   0.7128   
## nonwhite          0.3527     0.3016   1.170   0.2422   
## mixed             0.5106     0.3305   1.545   0.1224   
## anyweapon        -0.1201     0.3451  -0.348   0.7278   
## s_influence      -0.5568     0.2976  -1.871   0.0613 . 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 312.93  on 329  degrees of freedom
## Residual deviance: 293.43  on 320  degrees of freedom
## AIC: 313.43
## 
## Number of Fisher Scoring iterations: 4

# Fit linear regression model
model1 <- lm(reoffend1  ~ coddle_assigned, data = df)

# Obtain summary of the model
summary(model1)

## 
## Call:
## lm(formula = reoffend1 ~ coddle_assigned, data = df)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.2110 -0.2110 -0.2110 -0.1075  0.8925 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)   
## (Intercept)      0.10753    0.03982   2.700  0.00729 **
## coddle_assigned  0.10344    0.04699   2.201  0.02841 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.384 on 328 degrees of freedom
## Multiple R-squared:  0.01456,    Adjusted R-squared:  0.01155 
## F-statistic: 4.846 on 1 and 328 DF,  p-value: 0.02841

# Fit linear regression model
model2 <- lm(reoffend1  ~ coddle_assigned + y82 + q1 + q2+ q3 + nonwhite + mixed + anyweapon + s_influence, data = df)

# Obtain summary of the model
summary(model2)

## 
## Call:
## lm(formula = reoffend1 ~ coddle_assigned + y82 + q1 + q2 + q3 + 
##     nonwhite + mixed + anyweapon + s_influence, data = df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.42949 -0.22103 -0.14599 -0.04852  1.02123 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)   
## (Intercept)      0.20433    0.07597   2.690  0.00752 **
## coddle_assigned  0.09857    0.04698   2.098  0.03668 * 
## y82              0.02355    0.05234   0.450  0.65308   
## q1              -0.15250    0.07964  -1.915  0.05639 . 
## q2              -0.12565    0.06439  -1.952  0.05187 . 
## q3              -0.02436    0.06905  -0.353  0.72448   
## nonwhite         0.05061    0.04313   1.173  0.24152   
## mixed            0.07598    0.04905   1.549  0.12236   
## anyweapon       -0.01804    0.04842  -0.373  0.70963   
## s_influence     -0.08187    0.04345  -1.884  0.06042 . 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3802 on 320 degrees of freedom
## Multiple R-squared:  0.05793,    Adjusted R-squared:  0.03143 
## F-statistic: 2.186 on 9 and 320 DF,  p-value: 0.02274

model1_q2e<- glm(formula = reoffend2  ~ coddle_received, family = "binomial",   data = df)

summary(model1_q2e)

## 
## Call:
## glm(formula = reoffend2 ~ coddle_received, family = "binomial", 
##     data = df)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.6407  -0.6407  -0.5329  -0.5329   2.0111  
## 
## Coefficients:
##                 Estimate Std. Error z value Pr(>|z|)    
## (Intercept)      -1.8803     0.2530  -7.431 1.08e-13 ***
## coddle_received   0.4012     0.3133   1.281      0.2    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 294.13  on 329  degrees of freedom
## Residual deviance: 292.44  on 328  degrees of freedom
## AIC: 296.44
## 
## Number of Fisher Scoring iterations: 4

coef_estimate <- 0.4012 
# Calculate odds ratio
odds_ratio <- exp(coef_estimate)

(odds_ratio - 1) *100

## [1] 49.3616

#49
 #individuals who received mediation or separation are around 49.3% more likely to reoffend compared to those did were arrested

# Install and load the ivreg package

library(ivreg)

formula_iv <- reoffend1 ~ coddle_assigned | coddle_received


iv_model <- ivreg(formula_iv, data = df)


summary(iv_model)

## 
## Call:
## ivreg(formula = formula_iv, data = df)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.22316 -0.22316 -0.22316 -0.07646  0.92354 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)  
## (Intercept)      0.07646    0.05144   1.486   0.1381  
## coddle_assigned  0.14670    0.06528   2.247   0.0253 *
## 
## Diagnostic tests:
##                  df1 df2 statistic p-value    
## Weak instruments   1 328   354.694  <2e-16 ***
## Wu-Hausman         1 327     0.916   0.339    
## Sargan             0  NA        NA      NA    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3845 on 328 degrees of freedom
## Multiple R-Squared: 0.01201, Adjusted R-squared: 0.009 
## Wald test: 5.051 on 1 and 328 DF,  p-value: 0.02528

coef_estimate <- 0.14670
# Calculate odds ratio
odds_ratio <- exp(coef_estimate)

(odds_ratio - 1) *100

## [1] 15.80065

# Interpretation: Individuals who received mediation or separation compared to those arrested are around 15.8% more likely to reoffend compared to those who were not assigned mediation or separation, after accounting for endogeneity using instrumental variable regression.

QUESTION 2

library(haven)
df_green <- read_dta("GreenWinik_Criminology_2010.dta")

#DV = LATER ARREST
table(df_green$laterarr)

## 
##   0   1 
## 475 528

table(df_green$incjudge)

## 
##    1 
## 1003

#table(df_green$calendar) 
df_green$calendar_new<- as.factor(df_green$calendar)

library(ivreg)

# Fit the 2SLS model and save it as model1
model1 <- ivreg(laterarr ~ toserve | calendar_new, data = df_green)
model2 <- ivreg(laterarr ~ toserve + age + agesq + female + nonblack + priorarr + priordrugarr + priorfelarr + priorfeldrugarr + priorcon + priordrugcon + priorfelcon + priorfeldrugcon + pwid +  dist + marijuana + cocaine + crack + heroin  + pcp + otherdrug + nondrug | calendar_new, data = df_green)

## Warning in ivreg.fit(X, Y, Z, weights, offset, method, ...): more regressors
## than instruments

model3 <- ivreg(laterarr ~ probat  | calendar_new, data = df_green)
model4 <- ivreg(laterarr ~ probat + age + agesq + female + nonblack + priorarr + priordrugarr + priorfelarr + priorfeldrugarr + priorcon + priordrugcon + priorfelcon + priorfeldrugcon + pwid +  dist + marijuana + cocaine + crack + heroin  + pcp + otherdrug + nondrug | calendar_new, data = df_green)

## Warning in ivreg.fit(X, Y, Z, weights, offset, method, ...): more regressors
## than instruments

model5 <- ivreg(laterarr ~ toserve + probat | calendar_new, data = df_green)
model6 <- ivreg(laterarr ~ toserve + probat + age + agesq + female + nonblack + priorarr + priordrugarr + priorfelarr + priorfeldrugarr + priorcon + priordrugcon + priorfelcon + priorfeldrugcon + pwid +  dist + marijuana + cocaine + crack + heroin  + pcp + otherdrug + nondrug | calendar_new, data = df_green)

## Warning in ivreg.fit(X, Y, Z, weights, offset, method, ...): more regressors
## than instruments

library(ivreg)

# Fit the 2SLS model and save it as model1
model1 <- ivreg(laterarr ~ toserve | calendar_new, data = df_green, cluster = "clusterid", robust = TRUE)

## Warning: In lm.fit(z, x, ...) :
##  extra arguments 'cluster', 'robust' will be disregarded

## Warning: In lm.fit(xz, y, offset = offset, ...) :
##  extra arguments 'cluster', 'robust' will be disregarded

model2 <- ivreg(laterarr ~ toserve + age + agesq + female + nonblack + priorarr + priordrugarr + priorfelarr + priorfeldrugarr + priorcon + priordrugcon + priorfelcon + priorfeldrugcon + pwid +  dist + marijuana + cocaine + crack + heroin  + pcp + otherdrug + nondrug | calendar_new, data = df_green, cluster = "clusterid", robust = TRUE)

## Warning in ivreg.fit(X, Y, Z, weights, offset, method, ...): more regressors
## than instruments

## Warning: In lm.fit(z, x, ...) :
##  extra arguments 'cluster', 'robust' will be disregarded

## Warning: In lm.fit(xz, y, offset = offset, ...) :
##  extra arguments 'cluster', 'robust' will be disregarded

model3 <- ivreg(laterarr ~ probat  | calendar_new, data = df_green, cluster = "clusterid", robust = TRUE)

## Warning: In lm.fit(z, x, ...) :
##  extra arguments 'cluster', 'robust' will be disregarded

## Warning: In lm.fit(xz, y, offset = offset, ...) :
##  extra arguments 'cluster', 'robust' will be disregarded

model4 <- ivreg(laterarr ~ probat + age + agesq + female + nonblack + priorarr + priordrugarr + priorfelarr + priorfeldrugarr + priorcon + priordrugcon + priorfelcon + priorfeldrugcon + pwid +  dist + marijuana + cocaine + crack + heroin  + pcp + otherdrug + nondrug | calendar_new, data = df_green, cluster = "clusterid", robust = TRUE)

## Warning in ivreg.fit(X, Y, Z, weights, offset, method, ...): more regressors
## than instruments

## Warning: In lm.fit(z, x, ...) :
##  extra arguments 'cluster', 'robust' will be disregarded

## Warning: In lm.fit(xz, y, offset = offset, ...) :
##  extra arguments 'cluster', 'robust' will be disregarded

model5 <- ivreg(laterarr ~ toserve + probat | calendar_new, data = df_green, cluster = "clusterid", robust = TRUE)

## Warning: In lm.fit(z, x, ...) :
##  extra arguments 'cluster', 'robust' will be disregarded

## Warning: In lm.fit(xz, y, offset = offset, ...) :
##  extra arguments 'cluster', 'robust' will be disregarded

model6 <- ivreg(laterarr ~ toserve + probat + age + agesq + female + nonblack + priorarr + priordrugarr + priorfelarr + priorfeldrugarr + priorcon + priordrugcon + priorfelcon + priorfeldrugcon + pwid +  dist + marijuana + cocaine + crack + heroin  + pcp + otherdrug + nondrug | calendar_new, data = df_green, cluster = "clusterid", robust = TRUE)

## Warning in ivreg.fit(X, Y, Z, weights, offset, method, ...): more regressors
## than instruments

## Warning: In lm.fit(z, x, ...) :
##  extra arguments 'cluster', 'robust' will be disregarded

## Warning: In lm.fit(xz, y, offset = offset, ...) :
##  extra arguments 'cluster', 'robust' will be disregarded

library(stargazer)

## 
## Please cite as:

##  Hlavac, Marek (2022). stargazer: Well-Formatted Regression and Summary Statistics Tables.

##  R package version 5.2.3. https://CRAN.R-project.org/package=stargazer

# Create a table containing summaries of all models
stargazer(model1, model2, model3, model4, model5, model6, type = "text")

## 
## ============================================================================================================================
##                                                               Dependent variable:                                           
##                     --------------------------------------------------------------------------------------------------------
##                                                                     laterarr                                                
##                            (1)              (2)               (3)              (4)               (5)              (6)       
## ----------------------------------------------------------------------------------------------------------------------------
## toserve                   0.008            -0.017                                               0.008            -0.028     
##                          (0.008)          (0.073)                                              (0.008)          (0.163)     
##                                                                                                                             
## age                                        -0.412                             -0.404                             -0.416     
##                                           (2.049)                            (1.158)                            (2.736)     
##                                                                                                                             
## agesq                                      0.004                              0.004                              0.003      
##                                           (0.027)                            (0.016)                            (0.032)     
##                                                                                                                             
## female                                     -5.482                             -1.894                             -7.706     
##                                           (12.858)                           (5.433)                            (29.264)    
##                                                                                                                             
## nonblack                                   2.571                              0.805                              3.666      
##                                           (10.489)                           (4.734)                            (18.936)    
##                                                                                                                             
## priorarr                                   0.786                              0.576                              0.916      
##                                           (3.077)                            (1.770)                            (3.899)     
##                                                                                                                             
## priordrugarr                               1.602                              0.255                              2.437      
##                                           (4.637)                            (2.371)                            (10.630)    
##                                                                                                                             
## priorfelarr                                0.640                              1.672                                         
##                                           (5.418)                            (4.207)                                        
##                                                                                                                             
## priorfeldrugarr                                                                                                             
##                                                                                                                             
##                                                                                                                             
## priorcon                                                                                                                    
##                                                                                                                             
##                                                                                                                             
## priordrugcon                                                                                                                
##                                                                                                                             
##                                                                                                                             
## priorfelcon                                                                                                                 
##                                                                                                                             
##                                                                                                                             
## priorfeldrugcon                                                                                                             
##                                                                                                                             
##                                                                                                                             
## pwid                                                                                                                        
##                                                                                                                             
##                                                                                                                             
## dist                                                                                                                        
##                                                                                                                             
##                                                                                                                             
## marijuana                                                                                                                   
##                                                                                                                             
##                                                                                                                             
## cocaine                                                                                                                     
##                                                                                                                             
##                                                                                                                             
## crack                                                                                                                       
##                                                                                                                             
##                                                                                                                             
## heroin                                                                                                                      
##                                                                                                                             
##                                                                                                                             
## pcp                                                                                                                         
##                                                                                                                             
##                                                                                                                             
## otherdrug                                                                                                                   
##                                                                                                                             
##                                                                                                                             
## nondrug                                                                                                                     
##                                                                                                                             
##                                                                                                                             
## probat                                                       0.002            0.011             0.003            -0.007     
##                                                             (0.005)          (0.026)           (0.006)          (0.076)     
##                                                                                                                             
## Constant                0.471***           8.065           0.504***           6.836           0.443***           8.827      
##                          (0.057)          (31.395)          (0.057)          (16.744)          (0.084)          (48.144)    
##                                                                                                                             
## ----------------------------------------------------------------------------------------------------------------------------
## Observations              1,003            1,003             1,003            1,003             1,003            1,003      
## R2                       -0.099           -26.158            0.002            -7.782           -0.094           -45.889     
## Adjusted R2              -0.100           -26.376            0.001            -7.853           -0.097           -46.267     
## Residual Std. Error 0.524 (df = 1001) 2.614 (df = 994) 0.499 (df = 1001) 1.486 (df = 994) 0.523 (df = 1000) 3.434 (df = 994)
## ============================================================================================================================
## Note:                                                                                            *p<0.1; **p<0.05; ***p<0.01

library(stargazer)
# Create a table containing summaries of all models and save it as a LaTeX document
#stargazer(model1, model2, model3, model4, model5, model6, type = "latex", out = "model_summary.tex")

Part 3

library(readxl)
df_wage<- read_excel("Minimum Wage.xlsx")

library(MatchIt)
# Nearest neighbor 2:1 matching with replacement
m.out <- MatchIt::matchit(NJ.PA ~EmploymentPost + EmploymentPre + WagePre + BurgerKing + KFC + Roys + Wendys, 
                          data = df_wage, method = "nearest", 
                          ratio = 1,  replace = TRUE)

summary(m.out)

## 
## Call:
## MatchIt::matchit(formula = NJ.PA ~ EmploymentPost + EmploymentPre + 
##     WagePre + BurgerKing + KFC + Roys + Wendys, data = df_wage, 
##     method = "nearest", replace = TRUE, ratio = 1)
## 
## Summary of Balance for All Data:
##                Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean
## distance              0.8092        0.7815          0.4794     0.4892    0.1027
## EmploymentPost       17.2216       17.7637         -0.0632     1.1723    0.0254
## EmploymentPre        17.0485       20.0993         -0.3444     0.5714    0.0637
## WagePre               4.6061        4.6286         -0.0658     0.9295    0.0291
## BurgerKing            0.4047        0.4521         -0.0965          .    0.0474
## KFC                   0.2241        0.1370          0.2089          .    0.0871
## Roys                  0.2508        0.2329          0.0414          .    0.0180
## Wendys                0.1204        0.1781         -0.1772          .    0.0577
##                eCDF Max
## distance         0.2329
## EmploymentPost   0.0804
## EmploymentPre    0.1142
## WagePre          0.0876
## BurgerKing       0.0474
## KFC              0.0871
## Roys             0.0180
## Wendys           0.0577
## 
## Summary of Balance for Matched Data:
##                Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean
## distance              0.8092        0.8090          0.0030     0.9815    0.0088
## EmploymentPost       17.2216       18.7416         -0.1773     0.9434    0.0477
## EmploymentPre        17.0485       16.9398          0.0123     1.4910    0.0488
## WagePre               4.6061        4.6323         -0.0766     0.8893    0.0474
## BurgerKing            0.4047        0.3980          0.0136          .    0.0067
## KFC                   0.2241        0.1237          0.2406          .    0.1003
## Roys                  0.2508        0.3211         -0.1620          .    0.0702
## Wendys                0.1204        0.1572         -0.1130          .    0.0368
##                eCDF Max Std. Pair Dist.
## distance         0.0502          0.0372
## EmploymentPost   0.1338          1.0397
## EmploymentPre    0.1438          0.5895
## WagePre          0.1371          1.1510
## BurgerKing       0.0067          0.9539
## KFC              0.1003          0.2887
## Roys             0.0702          0.7638
## Wendys           0.0368          0.4419
## 
## Sample Sizes:
##               Control Treated
## All             73.       299
## Matched (ESS)   35.83     299
## Matched         63.       299
## Unmatched       10.         0
## Discarded        0.         0

plot(m.out, type=
"density")

plot(m.out, type=
"qq")

plot(m.out, type=
"ecdf")

m.data<-match.data(m.out)

# Install and load the cobalt package
library(cobalt)

##  cobalt (Version 4.5.5, Build Date: 2024-04-02)

## 
## Attaching package: 'cobalt'

## The following object is masked from 'package:MatchIt':
## 
##     lalonde

covs <- subset(df_wage, select = -c(NJ.PA, EmploymentPost))

df_wage$p.score <- glm(NJ.PA ~ EmploymentPre + WagePre + BurgerKing + KFC + Roys + Wendys,
                       data = df_wage,
                       family = "binomial")$fitted.values

df_wage$att.weights <- with(df_wage, NJ.PA + (1-NJ.PA)*p.score/(1-p.score))

bal.tab(covs, treat = df_wage$NJ.PA, weights = df_wage$att.weights)

## Balance Measures
##                  Type Diff.Adj
## EmploymentPre Contin.   0.0506
## WagePre       Contin.  -0.0845
## BurgerKing     Binary   0.0264
## KFC            Binary  -0.0081
## Roys           Binary  -0.0195
## Wendys         Binary   0.0012
## 
## Effective sample sizes
##            Control Treated
## Unadjusted   73.       299
## Adjusted     64.11     299

bal.tab(NJ.PA ~ covs, data = df_wage,
        weights = "att.weights",
        binary = "std", continuous = "std")

## Balance Measures
##                  Type Diff.Adj
## EmploymentPre Contin.   0.0506
## WagePre       Contin.  -0.0845
## BurgerKing     Binary   0.0537
## KFC            Binary  -0.0194
## Roys           Binary  -0.0450
## Wendys         Binary   0.0037
## 
## Effective sample sizes
##            Control Treated
## Unadjusted   73.       299
## Adjusted     64.11     299

df_wage$NJ.PA<- as.factor(df_wage$NJ.PA)

propensity_model <- glm(NJ.PA ~ EmploymentPre + WagePre + BurgerKing + KFC + Roys + Wendys,
                        data = df_wage,
                        family = binomial)

df_wage$propensity_score <- predict(propensity_model, type = "response")

# Plot propensity score distributions within each treatment group
plot(propensity_score ~ NJ.PA, data = df_wage, col = c("blue", "red"), 
     xlab = "Treatment (NJ.PA)", ylab = "Propensity Score",
     main = "Propensity Score Distributions by Treatment Group",
     
     
     pch = 19)
legend("topright", legend = c("Pennsylvania (0)", "New Jersey (1)"), 
       col = c("blue", "red"), pch = 19, cex = 0.8)

matched_data <- MatchIt::match.data(m.out)

# Calculate the average treatment effect (ATE)
ATE <- with(matched_data, mean(EmploymentPost[NJ.PA == 1]) - mean(EmploymentPost[NJ.PA == 0]))



print(ATE)

## [1] 0.07077826

Causal Inference HW 4

Joseph Jaiyeola

2024-04-10

Compliance rate is 85.76%

QUESTION 2

Part 3