Causal Inference HW 4

Author

R Lutinen

Part 1, Q1

library(haven)

#read in Angirst (2009) paper data
MDVE <- read_dta("C:/Users/rlutt/Downloads/Angrist (2009)MDVE.dta")

#a.) determine compliance rates in the experiment

library(janitor)

Attaching package: 'janitor'
The following objects are masked from 'package:stats':

    chisq.test, fisher.test
t1<-tabyl(MDVE,t_random, t_final)

t1 %>%
  adorn_percentages("row") %>%
  adorn_pct_formatting(digits = 2) %>%
  adorn_ns()
 t_random           1           2           3           4
        1 97.85% (91)  0.00%  (0)  1.08%  (1)  1.08%  (1)
        2 17.27% (19) 76.36% (84)  4.55%  (5)  1.82%  (2)
        3 20.47% (26)  3.94%  (5) 65.35% (83) 10.24% (13)

Q2

#I run this analysis as  as a linear model to generate an r-squared value and follow the method used by Angrist.

#a.)

mo<-lm(reoffend1~coddle_assigned, data=MDVE)

summary(mo)

Call:
lm(formula = reoffend1 ~ coddle_assigned, data = MDVE)

Residuals:
    Min      1Q  Median      3Q     Max 
-0.2110 -0.2110 -0.2110 -0.1075  0.8925 

Coefficients:
                Estimate Std. Error t value Pr(>|t|)   
(Intercept)      0.10753    0.03982   2.700  0.00729 **
coddle_assigned  0.10344    0.04699   2.201  0.02841 * 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.384 on 328 degrees of freedom
Multiple R-squared:  0.01456,   Adjusted R-squared:  0.01155 
F-statistic: 4.846 on 1 and 328 DF,  p-value: 0.02841
mo2<-lm(reoffend1~coddle_assigned+ y82 + q1+ q2 + q3 + nonwhite + anyweapon + s_influence, data=MDVE)

summary(mo2)

Call:
lm(formula = reoffend1 ~ coddle_assigned + y82 + q1 + q2 + q3 + 
    nonwhite + anyweapon + s_influence, data = MDVE)

Residuals:
    Min      1Q  Median      3Q     Max 
-0.3706 -0.2161 -0.1409 -0.0415  0.9928 

Coefficients:
                Estimate Std. Error t value Pr(>|t|)   
(Intercept)      0.22887    0.07446   3.074  0.00229 **
coddle_assigned  0.10085    0.04706   2.143  0.03286 * 
y82              0.01879    0.05236   0.359  0.71994   
q1              -0.14379    0.07961  -1.806  0.07185 . 
q2              -0.12237    0.06449  -1.897  0.05867 . 
q3              -0.02966    0.06911  -0.429  0.66807   
nonwhite         0.04092    0.04277   0.957  0.33939   
anyweapon       -0.01008    0.04825  -0.209  0.83462   
s_influence     -0.08920    0.04328  -2.061  0.04012 * 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.381 on 321 degrees of freedom
Multiple R-squared:  0.05086,   Adjusted R-squared:  0.02721 
F-statistic:  2.15 on 8 and 321 DF,  p-value: 0.03104
#d.)If you run the models as OLS, the R-squared goes up as you add more covariates, meaning they do add value to the model.
#e.)

#estimate the OLS treatment effect with reoffend2 as the DV 

new<-lm(reoffend2~coddle_assigned, data=MDVE)

summary(new)

Call:
lm(formula = reoffend2 ~ coddle_assigned, data = MDVE)

Residuals:
    Min      1Q  Median      3Q     Max 
-0.1772 -0.1772 -0.1772 -0.1290  0.8710 

Coefficients:
                Estimate Std. Error t value Pr(>|t|)    
(Intercept)      0.12903    0.03841   3.359 0.000874 ***
coddle_assigned  0.04818    0.04533   1.063 0.288555    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.3704 on 328 degrees of freedom
Multiple R-squared:  0.003433,  Adjusted R-squared:  0.000395 
F-statistic:  1.13 on 1 and 328 DF,  p-value: 0.2886
new2<-lm(reoffend2~coddle_assigned + y82 + q1+ q2 + q3 + nonwhite + anyweapon + s_influence, data=MDVE)

summary(new2)

Call:
lm(formula = reoffend2 ~ coddle_assigned + y82 + q1 + q2 + q3 + 
    nonwhite + anyweapon + s_influence, data = MDVE)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.30652 -0.20646 -0.13625 -0.07997  0.94193 

Coefficients:
                Estimate Std. Error t value Pr(>|t|)  
(Intercept)      0.11764    0.07227   1.628    0.105  
coddle_assigned  0.04397    0.04567   0.963    0.336  
y82             -0.04994    0.05082  -0.983    0.326  
q1               0.10728    0.07727   1.388    0.166  
q2               0.07819    0.06259   1.249    0.213  
q3               0.06222    0.06708   0.928    0.354  
nonwhite        -0.09106    0.04151  -2.194    0.029 *
anyweapon        0.03764    0.04683   0.804    0.422  
s_influence     -0.01248    0.04201  -0.297    0.767  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.3698 on 321 degrees of freedom
Multiple R-squared:  0.02811,   Adjusted R-squared:  0.003888 
F-statistic: 1.161 on 8 and 321 DF,  p-value: 0.3227
#This is not the true treatment effect since not all the policemen complied with the treatment. A true treatment effect would take into account actual compliance. Rather, this can be interpreted as the intent to treat. 
#instrumental variable approach: coddle_assigned is the instrumental variable


library(ivreg)
Warning: package 'ivreg' was built under R version 4.3.3
ivbiv<-lm(reoffend2~coddle_received |coddle_assigned, data=MDVE)

summary(ivbiv)

Call:
lm(formula = reoffend2 ~ coddle_received | coddle_assigned, data = MDVE)

Residuals:
    Min      1Q  Median      3Q     Max 
-0.1757 -0.1757 -0.1757 -0.1319  0.8681 

Coefficients:
                                      Estimate Std. Error t value Pr(>|t|)    
(Intercept)                            0.13187    0.03884   3.395 0.000771 ***
coddle_received | coddle_assignedTRUE  0.04386    0.04564   0.961 0.337259    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.3706 on 328 degrees of freedom
Multiple R-squared:  0.002808,  Adjusted R-squared:  -0.0002325 
F-statistic: 0.9235 on 1 and 328 DF,  p-value: 0.3373
ivmult<-lm(reoffend2~coddle_received + y82 + q1+ q2 + q3 + nonwhite + anyweapon + s_influence|coddle_assigned + y82 + q1+ q2 + q3 + nonwhite + anyweapon + s_influence , data=MDVE)

summary(ivmult)

Call:
lm(formula = reoffend2 ~ coddle_received + y82 + q1 + q2 + q3 + 
    nonwhite + anyweapon + s_influence | coddle_assigned + y82 + 
    q1 + q2 + q3 + nonwhite + anyweapon + s_influence, data = MDVE)

Residuals:
    Min      1Q  Median      3Q     Max 
-0.1646 -0.1646 -0.1646 -0.1646  0.8354 

Coefficients:
                                                                                                                                                           Estimate
(Intercept)                                                                                                                                               5.109e-15
coddle_received + y82 + q1 + q2 + q3 + nonwhite + anyweapon + s_influence | coddle_assigned + y82 + q1 + q2 + q3 + nonwhite + anyweapon + s_influenceTRUE 1.646e-01
                                                                                                                                                          Std. Error
(Intercept)                                                                                                                                                2.622e-01
coddle_received + y82 + q1 + q2 + q3 + nonwhite + anyweapon + s_influence | coddle_assigned + y82 + q1 + q2 + q3 + nonwhite + anyweapon + s_influenceTRUE  2.630e-01
                                                                                                                                                          t value
(Intercept)                                                                                                                                                 0.000
coddle_received + y82 + q1 + q2 + q3 + nonwhite + anyweapon + s_influence | coddle_assigned + y82 + q1 + q2 + q3 + nonwhite + anyweapon + s_influenceTRUE   0.626
                                                                                                                                                          Pr(>|t|)
(Intercept)                                                                                                                                                  1.000
coddle_received + y82 + q1 + q2 + q3 + nonwhite + anyweapon + s_influence | coddle_assigned + y82 + q1 + q2 + q3 + nonwhite + anyweapon + s_influenceTRUE    0.532

Residual standard error: 0.3709 on 328 degrees of freedom
Multiple R-squared:  0.001193,  Adjusted R-squared:  -0.001852 
F-statistic: 0.3918 on 1 and 328 DF,  p-value: 0.5318
library(modelsummary)
Warning: package 'modelsummary' was built under R version 4.3.3
`modelsummary` 2.0.0 now uses `tinytable` as its default table-drawing
  backend. Learn more at: https://vincentarelbundock.github.io/tinytable/

Revert to `kableExtra` for one session:

  options(modelsummary_factory_default = 'kableExtra')

Change the default backend persistently:

  config_modelsummary(factory_default = 'gt')

Silence this message forever:

  config_modelsummary(startup_message = FALSE)
library(sandwich)
Warning: package 'sandwich' was built under R version 4.3.3
library(lmtest)
Warning: package 'lmtest' was built under R version 4.3.3
Loading required package: zoo

Attaching package: 'zoo'
The following objects are masked from 'package:base':

    as.Date, as.Date.numeric
#  robust standard errors with lm()
ivmultrse <- coeftest(ivmult,
                                    vcov = vcovCL,
                                    type = "HC1")
modelsummary(ivmultrse)
tinytable_n5j8dpzbuywbyd3lciqk
(1)
(Intercept) 0.000
(0.000)
coddle_received + y82 + q1 + q2 + q3 + nonwhite + anyweapon + s_influence | coddle_assigned + y82 + q1 + q2 + q3 + nonwhite + anyweapon + s_influenceTRUE 0.165
(0.021)
Num.Obs. 330
AIC 935.8
BIC 2181.9

Part 2

library(haven)
library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
#import data 
GW <- read_dta("C:/Users/rlutt/Downloads/GreenWinik_Criminology_2010.dta")

# filter to include judge in analysis
GWfiltered<-filter(GW, incjudge== 1)
#replicate Table 5

#First, I run the models, then I generate the robust clustered standard errors.

library(ivreg)

library(modelsummary)

library(sandwich)

library(lmtest)

ivincar<- ivreg(laterarr~ toserve|calendar1+ calendar2 + calendar3+ calendar4 + calendar5+ calendar6 + calendar7 + calendar8, data= GWfiltered)

# Clustered robust standard errors with lm()
model1_robust_clustered <- coeftest(ivincar,
                                    vcov = vcovCL,
                                    type = "HC1",
                                    cluster = ~clusterid)

summary(ivincar)

Call:
ivreg(formula = laterarr ~ toserve | calendar1 + calendar2 + 
    calendar3 + calendar4 + calendar5 + calendar6 + calendar7 + 
    calendar8, data = GWfiltered)

Residuals:
    Min      1Q  Median      3Q     Max 
-1.9078 -0.4713  0.3372  0.5287  0.5287 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) 0.471286   0.057247   8.233 5.68e-16 ***
toserve     0.007981   0.007933   1.006    0.315    

Diagnostic tests:
                  df1  df2 statistic p-value   
Weak instruments    8  994     3.188 0.00141 **
Wu-Hausman          1 1000     3.013 0.08292 . 
Sargan              7   NA    14.367 0.04502 * 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.5239 on 1001 degrees of freedom
Multiple R-Squared: -0.0986,    Adjusted R-squared: -0.0997 
Wald test: 1.012 on 1 and 1001 DF,  p-value: 0.3147 
fullinc<-ivreg(laterarr~ toserve + age + agesq + female+ nonblack + priorarr + priordrugarr + priorfelarr + priorfeldrugarr + priorcon + priordrugcon + priorfelcon + priorfeldrugcon + pwid  +dist + marijuana + cocaine + crack + heroin + pcp  + otherdrug +nondrug | calendar1+ calendar2 + calendar3+ calendar4 + calendar5+ calendar6 + calendar7 + calendar8 + age + agesq + female + nonblack + priorarr + priordrugarr + priorfelarr + priorfeldrugarr + priorcon + priordrugcon + priorfelcon + priorfeldrugcon + pwid  + dist + marijuana + cocaine + crack + heroin + pcp  + otherdrug + nondrug, data=GWfiltered,)

summary(fullinc)

Call:
ivreg(formula = laterarr ~ toserve + age + agesq + female + nonblack + 
    priorarr + priordrugarr + priorfelarr + priorfeldrugarr + 
    priorcon + priordrugcon + priorfelcon + priorfeldrugcon + 
    pwid + dist + marijuana + cocaine + crack + heroin + pcp + 
    otherdrug + nondrug | calendar1 + calendar2 + calendar3 + 
    calendar4 + calendar5 + calendar6 + calendar7 + calendar8 + 
    age + agesq + female + nonblack + priorarr + priordrugarr + 
    priorfelarr + priorfeldrugarr + priorcon + priordrugcon + 
    priorfelcon + priorfeldrugcon + pwid + dist + marijuana + 
    cocaine + crack + heroin + pcp + otherdrug + nondrug, data = GWfiltered)

Residuals:
    Min      1Q  Median      3Q     Max 
-1.8860 -0.4825  0.2041  0.4608  0.9049 

Coefficients:
                  Estimate Std. Error t value Pr(>|t|)    
(Intercept)      1.0123798  0.1904306   5.316 1.31e-07 ***
toserve          0.0086508  0.0082367   1.050   0.2938    
age             -0.0254644  0.0100055  -2.545   0.0111 *  
agesq            0.0002126  0.0001324   1.605   0.1088    
female          -0.0012192  0.0636057  -0.019   0.9847    
nonblack        -0.2190621  0.1093432  -2.003   0.0454 *  
priorarr        -0.0600934  0.0771971  -0.778   0.4365    
priordrugarr     0.0073863  0.0692132   0.107   0.9150    
priorfelarr      0.1044293  0.0711928   1.467   0.1427    
priorfeldrugarr -0.1004298  0.0728534  -1.379   0.1684    
priorcon         0.0204244  0.0757842   0.270   0.7876    
priordrugcon     0.0405106  0.0775742   0.522   0.6016    
priorfelcon     -0.1001847  0.0769295  -1.302   0.1931    
priorfeldrugcon  0.0555271  0.0849087   0.654   0.5133    
pwid             0.0113328  0.0618777   0.183   0.8547    
dist             0.0109205  0.0653119   0.167   0.8672    
marijuana        0.1000156  0.0576449   1.735   0.0830 .  
cocaine         -0.0001741  0.0584119  -0.003   0.9976    
crack            0.0399155  0.0655350   0.609   0.5426    
heroin           0.0837925  0.0624773   1.341   0.1802    
pcp              0.0817318  0.0968362   0.844   0.3989    
otherdrug       -0.0402090  0.1071726  -0.375   0.7076    
nondrug          0.0014532  0.0501801   0.029   0.9769    

Diagnostic tests:
                 df1 df2 statistic p-value   
Weak instruments   8 973     3.090  0.0019 **
Wu-Hausman         1 979     3.486  0.0622 . 
Sargan             7  NA    12.704  0.0797 . 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.514 on 980 degrees of freedom
Multiple R-Squared: -0.03562,   Adjusted R-squared: -0.05887 
Wald test: 3.142 on 22 and 980 DF,  p-value: 1.645e-06 
# Clustered robust standard errors with lm()
model2_robust_clustered <- coeftest(fullinc,
                                    vcov = vcovCL,
                                    type = "HC1",
                                    cluster = ~clusterid)


ivprobation<- ivreg(laterarr~ probat|calendar1+ calendar2 + calendar3+ calendar4 + calendar5+ calendar6 + calendar7 + calendar8, data= GWfiltered )

summary(ivprobation)

Call:
ivreg(formula = laterarr ~ probat | calendar1 + calendar2 + calendar3 + 
    calendar4 + calendar5 + calendar6 + calendar7 + calendar8, 
    data = GWfiltered)

Residuals:
    Min      1Q  Median      3Q     Max 
-0.6350 -0.5039  0.4174  0.4961  0.4961 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) 0.503897   0.056997   8.841   <2e-16 ***
probat      0.002186   0.005315   0.411    0.681    

Diagnostic tests:
                  df1  df2 statistic  p-value    
Weak instruments    8  994     6.627 1.84e-08 ***
Wu-Hausman          1 1000     0.003   0.9578    
Sargan              7   NA    16.771   0.0189 *  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.4992 on 1001 degrees of freedom
Multiple R-Squared: 0.002492,   Adjusted R-squared: 0.001496 
Wald test: 0.1691 on 1 and 1001 DF,  p-value: 0.681 
# Clustered robust standard errors with lm()
model3_robust_clustered <- coeftest(ivprobation,
                                    vcov = vcovCL,
                                    type = "HC1",
                                    cluster = ~clusterid)


fullprob<-ivreg(laterarr~ probat + age + agesq + female+ nonblack + priorarr + priordrugarr + priorfelarr + priorfeldrugarr + priorcon + priordrugcon + priorfelcon + priorfeldrugcon + pwid  +dist + marijuana + cocaine + crack + heroin + pcp  + otherdrug +nondrug | calendar1+ calendar2 + calendar3+ calendar4 + calendar5+ calendar6 + calendar7 + calendar8 + age + agesq + female + nonblack + priorarr + priordrugarr + priorfelarr + priorfeldrugarr + priorcon + priordrugcon + priorfelcon + priorfeldrugcon + pwid  + dist + marijuana + cocaine + crack + heroin + pcp  + otherdrug + nondrug, data=GWfiltered,)

summary(fullprob)

Call:
ivreg(formula = laterarr ~ probat + age + agesq + female + nonblack + 
    priorarr + priordrugarr + priorfelarr + priorfeldrugarr + 
    priorcon + priordrugcon + priorfelcon + priorfeldrugcon + 
    pwid + dist + marijuana + cocaine + crack + heroin + pcp + 
    otherdrug + nondrug | calendar1 + calendar2 + calendar3 + 
    calendar4 + calendar5 + calendar6 + calendar7 + calendar8 + 
    age + agesq + female + nonblack + priorarr + priordrugarr + 
    priorfelarr + priorfeldrugarr + priorcon + priordrugcon + 
    priorfelcon + priorfeldrugcon + pwid + dist + marijuana + 
    cocaine + crack + heroin + pcp + otherdrug + nondrug, data = GWfiltered)

Residuals:
    Min      1Q  Median      3Q     Max 
-0.7976 -0.4752  0.2322  0.4431  0.8624 

Coefficients:
                  Estimate Std. Error t value Pr(>|t|)    
(Intercept)      1.0147071  0.1961545   5.173 2.79e-07 ***
probat           0.0012358  0.0051850   0.238   0.8117    
age             -0.0252338  0.0101412  -2.488   0.0130 *  
agesq            0.0002094  0.0001334   1.570   0.1168    
female          -0.0318686  0.0555817  -0.573   0.5665    
nonblack        -0.1910977  0.1050883  -1.818   0.0693 .  
priorarr        -0.0697442  0.0726339  -0.960   0.3372    
priordrugarr     0.0029892  0.0653524   0.046   0.9635    
priorfelarr      0.1368515  0.0673954   2.031   0.0426 *  
priorfeldrugarr -0.1191658  0.0680028  -1.752   0.0800 .  
priorcon         0.0309100  0.0711503   0.434   0.6641    
priordrugcon     0.0562407  0.0719406   0.782   0.4345    
priorfelcon     -0.0825914  0.0716554  -1.153   0.2493    
priorfeldrugcon  0.0875194  0.0804564   1.088   0.2770    
pwid             0.0145487  0.0590992   0.246   0.8056    
dist             0.0225452  0.0682875   0.330   0.7414    
marijuana        0.0866665  0.0588066   1.474   0.1409    
cocaine         -0.0036400  0.0599389  -0.061   0.9516    
crack            0.0311841  0.0681925   0.457   0.6476    
heroin           0.0753797  0.0628826   1.199   0.2309    
pcp              0.1109518  0.0877993   1.264   0.2066    
otherdrug       -0.0577620  0.1027145  -0.562   0.5740    
nondrug          0.0113237  0.0470993   0.240   0.8101    

Diagnostic tests:
                 df1 df2 statistic  p-value    
Weak instruments   8 973     7.098 3.76e-09 ***
Wu-Hausman         1 979     0.001   0.9714    
Sargan             7  NA    15.398   0.0312 *  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.4863 on 980 degrees of freedom
Multiple R-Squared: 0.07314,    Adjusted R-squared: 0.05233 
Wald test: 3.457 on 22 and 980 DF,  p-value: 1.536e-07 
# Clustered robust standard errors with lm()
model4_robust_clustered <- coeftest(fullprob,
                                    vcov = vcovCL,
                                    type = "HC1",
                                    cluster = ~clusterid)


fulliv<-ivreg(laterarr~ toserve + probat + age + agesq + female+ nonblack + priorarr + priordrugarr + priorfelarr + priorfeldrugarr + priorcon + priordrugcon + priorfelcon + priorfeldrugcon + pwid  +dist + marijuana + cocaine + crack + heroin + pcp  + otherdrug +nondrug | calendar1+ calendar2 + calendar3+ calendar4 + calendar5+ calendar6 + calendar7 + calendar8 + age + agesq + female + nonblack + priorarr + priordrugarr + priorfelarr + priorfeldrugarr + priorcon + priordrugcon + priorfelcon + priorfeldrugcon + pwid  + dist + marijuana + cocaine + crack + heroin + pcp  + otherdrug + nondrug, data=GWfiltered,)

summary(fulliv)

Call:
ivreg(formula = laterarr ~ toserve + probat + age + agesq + female + 
    nonblack + priorarr + priordrugarr + priorfelarr + priorfeldrugarr + 
    priorcon + priordrugcon + priorfelcon + priorfeldrugcon + 
    pwid + dist + marijuana + cocaine + crack + heroin + pcp + 
    otherdrug + nondrug | calendar1 + calendar2 + calendar3 + 
    calendar4 + calendar5 + calendar6 + calendar7 + calendar8 + 
    age + agesq + female + nonblack + priorarr + priordrugarr + 
    priorfelarr + priorfeldrugarr + priorcon + priordrugcon + 
    priorfelcon + priorfeldrugcon + pwid + dist + marijuana + 
    cocaine + crack + heroin + pcp + otherdrug + nondrug, data = GWfiltered)

Residuals:
    Min      1Q  Median      3Q     Max 
-1.9026 -0.4783  0.2097  0.4626  0.9182 

Coefficients:
                  Estimate Std. Error t value Pr(>|t|)    
(Intercept)      0.9860869  0.2090100   4.718 2.73e-06 ***
toserve          0.0088388  0.0082577   1.070   0.2847    
probat           0.0016750  0.0054947   0.305   0.7606    
age             -0.0242595  0.0107556  -2.256   0.0243 *  
agesq            0.0001974  0.0001414   1.396   0.1629    
female          -0.0040103  0.0642458  -0.062   0.9502    
nonblack        -0.2108590  0.1125796  -1.873   0.0614 .  
priorarr        -0.0607938  0.0772123  -0.787   0.4313    
priordrugarr     0.0077784  0.0692081   0.112   0.9105    
priorfelarr      0.1116499  0.0750131   1.488   0.1370    
priorfeldrugarr -0.1031926  0.0733972  -1.406   0.1601    
priorcon         0.0208340  0.0757774   0.275   0.7834    
priordrugcon     0.0390094  0.0777113   0.502   0.6158    
priorfelcon     -0.1051171  0.0785941  -1.337   0.1814    
priorfeldrugcon  0.0627369  0.0881210   0.712   0.4767    
pwid             0.0081431  0.0627411   0.130   0.8968    
dist            -0.0005208  0.0753138  -0.007   0.9945    
marijuana        0.0927199  0.0624025   1.486   0.1376    
cocaine         -0.0077443  0.0634583  -0.122   0.9029    
crack            0.0307665  0.0720657   0.427   0.6695    
heroin           0.0768647  0.0664678   1.156   0.2478    
pcp              0.0769635  0.0980678   0.785   0.4328    
otherdrug       -0.0464154  0.1090633  -0.426   0.6705    
nondrug          0.0029006  0.0503920   0.058   0.9541    

Diagnostic tests:
                           df1 df2 statistic  p-value    
Weak instruments (toserve)   8 973     3.090   0.0019 ** 
Weak instruments (probat)    8 973     7.098 3.76e-09 ***
Wu-Hausman                   2 977     1.771   0.1708    
Sargan                       6  NA    12.628   0.0493 *  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.5139 on 979 degrees of freedom
Multiple R-Squared: -0.03405,   Adjusted R-squared: -0.05835 
Wald test: 3.011 on 23 and 979 DF,  p-value: 2.819e-06 
# Clustered robust standard errors with lm()
model5_robust_clustered <- coeftest(fulliv,
                                    vcov = vcovCL,
                                    type = "HC1",
                                    cluster = ~clusterid)

both<-ivreg(laterarr~ toserve + probat | calendar1+ calendar2 + calendar3+ calendar4 + calendar5+ calendar6 + calendar7 + calendar8, data=GWfiltered)


# Clustered robust standard errors with lm()
model6_robust_clustered <- coeftest(both,
                                    vcov = vcovCL,
                                    type = "HC1",
                                    cluster = ~clusterid)
library(modelsummary)

m_list <- list("1" =model1_robust_clustered, "2"= model2_robust_clustered, "3"=model3_robust_clustered, "4"=model4_robust_clustered, "5"=model5_robust_clustered,  "6" = model6_robust_clustered)


#msummary(m_list, title= "Table 5. 2SLS Estimates of the Effects of Length of Incarceration and Probation on Recidivism")

#Note, here the code runs to produce the table, but when I try to render it I get an error message saying I cannot render it because the arguments have different lengths. Because the models are different they will obviously have different lengths. I screenshotted the table and pasted it in the excel document and I place # next to the codes so the code would render.

Part 3

#install data

library(readr)
min <- read_csv("C:/Users/rlutt/Downloads/Minimum Wage.csv")
Rows: 372 Columns: 8
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
dbl (8): NJ.PA, EmploymentPost, EmploymentPre, WagePre, BurgerKing, KFC, Roy...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#write a naive regression model model to estimate the effect of raising minimum wage on employment in the fast-food industry

naivemodel<-lm(EmploymentPost~EmploymentPre + WagePre + BurgerKing + Roys + KFC + Wendys + NJ.PA, data=min)

summary(naivemodel)

Call:
lm(formula = EmploymentPost ~ EmploymentPre + WagePre + BurgerKing + 
    Roys + KFC + Wendys + NJ.PA, data = min)

Residuals:
     Min       1Q   Median       3Q      Max 
-18.1190  -4.0558  -0.7142   3.0178  30.7417 

Coefficients: (1 not defined because of singularities)
              Estimate Std. Error t value Pr(>|t|)    
(Intercept)   11.44019    5.12414   2.233  0.02618 *  
EmploymentPre  0.41576    0.04152  10.013  < 2e-16 ***
WagePre       -0.32061    1.05777  -0.303  0.76198    
BurgerKing     0.89322    1.13730   0.785  0.43274    
Roys          -1.52545    1.20632  -1.265  0.20684    
KFC           -4.36154    1.30780  -3.335  0.00094 ***
Wendys              NA         NA      NA       NA    
NJ.PA          1.16862    0.89810   1.301  0.19400    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 6.806 on 365 degrees of freedom
Multiple R-squared:  0.3603,    Adjusted R-squared:  0.3498 
F-statistic: 34.26 on 6 and 365 DF,  p-value: < 2.2e-16
#In this model, the outcome, a minimum wage increase in NJ is not the result of an exogenous force, so we cannot claim unconfoundedness. The political atmosphere in the state and the demographic makeup are just some examples of endogenous factors affecting employment in NJ. 
#estimate propensity scores

library(cobalt)
Warning: package 'cobalt' was built under R version 4.3.3
 cobalt (Version 4.5.5, Build Date: 2024-04-02)
# Nearest neighbor 2:1 matching with replacement
m.out <- MatchIt::matchit(NJ.PA ~ EmploymentPre + WagePre + BurgerKing + Roys + KFC + Wendys,
                      data = min, method = "nearest", 
                          ratio = 1,  replace = TRUE)

summary(m.out)

Call:
MatchIt::matchit(formula = NJ.PA ~ EmploymentPre + WagePre + 
    BurgerKing + Roys + KFC + Wendys, data = min, method = "nearest", 
    replace = TRUE, ratio = 1)

Summary of Balance for All Data:
              Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean
distance             0.8081        0.7859          0.4121     0.5982    0.0886
EmploymentPre       17.0485       20.0993         -0.3444     0.5714    0.0637
WagePre              4.6061        4.6286         -0.0658     0.9295    0.0291
BurgerKing           0.4047        0.4521         -0.0965          .    0.0474
Roys                 0.2508        0.2329          0.0414          .    0.0180
KFC                  0.2241        0.1370          0.2089          .    0.0871
Wendys               0.1204        0.1781         -0.1772          .    0.0577
              eCDF Max
distance        0.1609
EmploymentPre   0.1142
WagePre         0.0876
BurgerKing      0.0474
Roys            0.0180
KFC             0.0871
Wendys          0.0577

Summary of Balance for Matched Data:
              Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean
distance             0.8081        0.8085         -0.0059     1.0106    0.0089
EmploymentPre       17.0485       17.3386         -0.0327     0.9657    0.0141
WagePre              4.6061        4.6018          0.0126     1.0325    0.0202
BurgerKing           0.4047        0.4381         -0.0681          .    0.0334
Roys                 0.2508        0.2742         -0.0540          .    0.0234
KFC                  0.2241        0.2040          0.0481          .    0.0201
Wendys               0.1204        0.0836          0.1130          .    0.0368
              eCDF Max Std. Pair Dist.
distance        0.0502          0.0310
EmploymentPre   0.0635          0.4627
WagePre         0.0468          1.0279
BurgerKing      0.0334          0.6269
Roys            0.0234          0.6404
KFC             0.0201          0.1604
Wendys          0.0368          0.4214

Sample Sizes:
              Control Treated
All             73.       299
Matched (ESS)   38.62     299
Matched         65.       299
Unmatched        8.         0
Discarded        0.         0
m.outcem<-MatchIt::matchit(NJ.PA ~ EmploymentPre + WagePre + BurgerKing + Roys + KFC + Wendys,
                      data = min, method = "cem", estimand = "ATT",
        s.weights = NULL,
        verbose = FALSE)

summary(m.outcem)

Call:
MatchIt::matchit(formula = NJ.PA ~ EmploymentPre + WagePre + 
    BurgerKing + Roys + KFC + Wendys, data = min, method = "cem", 
    estimand = "ATT", s.weights = NULL, verbose = FALSE)

Summary of Balance for All Data:
              Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean
EmploymentPre       17.0485       20.0993         -0.3444     0.5714    0.0637
WagePre              4.6061        4.6286         -0.0658     0.9295    0.0291
BurgerKing           0.4047        0.4521         -0.0965          .    0.0474
Roys                 0.2508        0.2329          0.0414          .    0.0180
KFC                  0.2241        0.1370          0.2089          .    0.0871
Wendys               0.1204        0.1781         -0.1772          .    0.0577
              eCDF Max
EmploymentPre   0.1142
WagePre         0.0876
BurgerKing      0.0474
Roys            0.0180
KFC             0.0871
Wendys          0.0577

Summary of Balance for Matched Data:
              Means Treated Means Control Std. Mean Diff. Var. Ratio eCDF Mean
EmploymentPre       15.9284       15.3844          0.0614     0.9049    0.0231
WagePre              4.5240        4.5219          0.0063     1.0053    0.0030
BurgerKing           0.4451        0.4451          0.0000          .    0.0000
Roys                 0.1768        0.1768          0.0000          .    0.0000
KFC                  0.2622        0.2622          0.0000          .    0.0000
Wendys               0.1159        0.1159          0.0000          .    0.0000
              eCDF Max Std. Pair Dist.
EmploymentPre   0.1128          0.2719
WagePre         0.0305          0.0284
BurgerKing      0.0000          0.0000
Roys            0.0000          0.0000
KFC             0.0000          0.0000
Wendys          0.0000          0.0000

Sample Sizes:
              Control Treated
All             73.       299
Matched (ESS)   40.47     164
Matched         64.       164
Unmatched        9.       135
Discarded        0.         0
#CEM is the superior method given that the mean differences are smaller than in the nearest neighbor matching method

#plot covariate balance between the treatment and control groups: shown in plot for before and after matching

plot(m.outcem, type="density")

#create dataset with matches 

library(MatchIt)
Warning: package 'MatchIt' was built under R version 4.3.3

Attaching package: 'MatchIt'
The following object is masked from 'package:cobalt':

    lalonde
m.data<-match.data(m.out)


#estimate the causal effect using matched data: since the data is matched based on covariates there is no need to include them in the new model

matchedmodel<-lm(EmploymentPost~NJ.PA , data=m.data)

summary(matchedmodel)

Call:
lm(formula = EmploymentPost ~ NJ.PA, data = m.data)

Residuals:
    Min      1Q  Median      3Q     Max 
-17.222  -6.030  -0.222   5.278  38.278 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept) 17.20385    1.04330  16.490   <2e-16 ***
NJ.PA        0.01773    1.15113   0.015    0.988    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 8.411 on 362 degrees of freedom
Multiple R-squared:  6.55e-07,  Adjusted R-squared:  -0.002762 
F-statistic: 0.0002371 on 1 and 362 DF,  p-value: 0.9877
#the results show no significant causal effect of raising minimum wage