Preparation (clean the environment and load the data).
(i) Balance table.
(ii) Average Treatment Effect (ATE) using regression.
(iii) Average Treatment Effect for the Treated (ATT) using covariate matching.
(iv) Average Treatment Effect for the Treated (ATT) using propensity score matching.

Preparation (clean the environment and load the data).

rm(list = ls())
data(lalonde, package = "Matching")

(i) Balance table.

y0 <- subset(lalonde, treat == 0)
y1 <- subset(lalonde, treat == 1)
mean_of_untreated <- NULL
mean_of_treated <- NULL
difference <- NULL
t_statistics <- NULL
probability <- NULL
for (i in 1:(ncol(lalonde) - 1)){
 mean_of_untreated[i] <- t.test(y0[, i], y1[, i])$estimate[[1]]
 mean_of_treated[i] <- t.test(y0[, i], y1[, i])$estimate[[2]]
 difference[i] <- t.test(y0[, i], y1[, i])$estimate[[2]] - t.test(y0[,i], y1[,i])$estimate[[1]]
 t_statistics[i] <- t.test(y0[, i], y1[, i])$statistic[[1]]
 probability[i] <- t.test(y0[, i], y1[, i])$p.value[[1]]
}
t_table <- rbind(mean_of_treated, mean_of_untreated, difference, t_statistics, probability)
colnames(t_table) <- colnames(lalonde[, 1:(ncol(lalonde) - 1)])
t_table <- t(t_table)
colnames(t_table)[colnames(t_table) == "mean_of_untreated"] <- "Mean of untreated"
colnames(t_table)[colnames(t_table) == "mean_of_treated"] <- "Mean of treated"
colnames(t_table)[colnames(t_table) == "difference"] <- "Difference"
colnames(t_table)[colnames(t_table) == "t_statistics"] <- "t-Statistics"
colnames(t_table)[colnames(t_table) == "probability"] <- "Probability"
t_table

##         Mean of treated Mean of untreated    Difference t-Statistics
## age        2.581622e+01        25.0538462    0.76237006   -1.1140361
## educ       1.034595e+01        10.0884615    0.25748441   -1.4421840
## black      8.432432e-01         0.8269231    0.01632017   -0.4577777
## hisp       5.945946e-02         0.1076923   -0.04823285    1.8565425
## married    1.891892e-01         0.1538462    0.03534304   -0.9668363
## nodegr     7.081081e-01         0.8346154   -0.12650728    3.1084981
## re74       2.095574e+03      2107.0268154  -11.45281538    0.0227466
## re75       1.532056e+03      1266.9092408  265.14638896   -0.8692061
## re78       6.349145e+03      4554.8022827 1794.34308488   -2.6741458
## u74        7.081081e-01         0.7500000   -0.04189189    0.9746890
## u75        6.000000e-01         0.6846154   -0.08461538    1.8299743
##         Probability
## age     0.265944347
## educ    0.150169353
## black   0.647357377
## hisp    0.064043273
## married 0.334247781
## nodegr  0.002036780
## re74    0.981863018
## re75    0.385272603
## re78    0.007892971
## u74     0.330327781
## u75     0.068030520

# COMPARISON: The probabilities to be born as a Hispanic, to have a college degree, and to be in the unemployed status in 1975 for the people in the treated sample (the participants of the program) are statistically difference from the untreated sample.

(ii) Average Treatment Effect (ATE) using regression.

lalonde$age2 <- lalonde$age ^ 2
lalonde$educ2 <- lalonde$educ ^ 2

lalonde$x1 <- (lalonde$age - mean(lalonde$age)) * lalonde$treat
lalonde$x2 <- (lalonde$age2 - mean(lalonde$age2)) * lalonde$treat
lalonde$x3 <- (lalonde$educ - mean(lalonde$educ)) * lalonde$treat
lalonde$x4 <- (lalonde$educ2 - mean(lalonde$educ2)) * lalonde$treat
lalonde$x5 <- (lalonde$black - mean(lalonde$black)) * lalonde$treat
lalonde$x6 <- (lalonde$hisp - mean(lalonde$hisp)) * lalonde$treat
lalonde$x7 <- (lalonde$married - mean(lalonde$married)) * lalonde$treat
lalonde$x8 <- (lalonde$nodegr - mean(lalonde$nodegr)) * lalonde$treat
lalonde$x9 <- (lalonde$re74 - mean(lalonde$re74)) * lalonde$treat
lalonde$x10 <- (lalonde$re75 - mean(lalonde$re75)) * lalonde$treat
lalonde$x11 <- (lalonde$u74 - mean(lalonde$u74)) * lalonde$treat
lalonde$x12 <- (lalonde$u75 - mean(lalonde$u75)) * lalonde$treat

m1 <- lm(re78 ~ treat + 
         age + age2 + educ + educ2 + black + hisp + married + nodegr + re74 + re75 + u74 + u75 + 
         x1 + x2 + x3 + x4 + x5 + x6 + x7 + x8 + x9 + x10 + x11 + x12, 
         data = lalonde)
summary(m1)

## 
## Call:
## lm(formula = re78 ~ treat + age + age2 + educ + educ2 + black + 
##     hisp + married + nodegr + re74 + re75 + u74 + u75 + x1 + 
##     x2 + x3 + x4 + x5 + x6 + x7 + x8 + x9 + x10 + x11 + x12, 
##     data = lalonde)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -12713  -3969  -1631   2931  49566 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.018e+03  9.569e+03   0.106 0.915360    
## treat        1.544e+03  6.371e+02   2.423 0.015799 *  
## age         -9.652e+01  3.362e+02  -0.287 0.774199    
## age2         2.510e+00  5.499e+00   0.457 0.648244    
## educ         2.276e+03  2.064e+03   1.103 0.270629    
## educ2       -1.256e+02  1.173e+02  -1.071 0.284961    
## black       -3.105e+03  1.658e+03  -1.872 0.061877 .  
## hisp        -8.912e+02  2.046e+03  -0.436 0.663319    
## married     -6.238e+02  1.215e+03  -0.514 0.607804    
## nodegr      -1.441e+03  1.789e+03  -0.805 0.421018    
## re74        -1.250e-02  1.084e-01  -0.115 0.908197    
## re75        -7.639e-03  2.035e-01  -0.038 0.970079    
## u74         -2.895e+03  1.566e+03  -1.848 0.065318 .  
## u75          8.345e+02  1.353e+03   0.617 0.537744    
## x1           5.971e+02  5.791e+02   1.031 0.303075    
## x2          -1.039e+01  9.526e+00  -1.091 0.275885    
## x3          -4.569e+03  2.590e+03  -1.764 0.078481 .  
## x4           2.830e+02  1.459e+02   1.939 0.053115 .  
## x5           2.380e+03  2.328e+03   1.022 0.307227    
## x6           2.284e+03  3.244e+03   0.704 0.481815    
## x7           1.395e+03  1.784e+03   0.782 0.434757    
## x8           2.286e+03  2.497e+03   0.915 0.360515    
## x9           2.644e-01  1.868e-01   1.416 0.157627    
## x10          2.453e-02  2.989e-01   0.082 0.934624    
## x11          9.426e+03  2.412e+03   3.908 0.000108 ***
## x12         -4.970e+03  2.060e+03  -2.413 0.016237 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6420 on 419 degrees of freedom
## Multiple R-squared:  0.1155, Adjusted R-squared:  0.06275 
## F-statistic: 2.189 on 25 and 419 DF,  p-value: 0.0009413

# INTERPRETATION: Yes, the participation of the training program did significantly increase the real earnings of the participants by about $1,544 at the statistically significance level of 0.05.

(iii) Average Treatment Effect for the Treated (ATT) using covariate matching.

One-to-one matching.

library(MatchIt)

## Warning: package 'MatchIt' was built under R version 4.4.2

## 
## Attaching package: 'MatchIt'

## The following object is masked _by_ '.GlobalEnv':
## 
##     lalonde

library(cobalt)

## Warning: package 'cobalt' was built under R version 4.4.3

##  cobalt (Version 4.5.5, Build Date: 2024-04-02)

## 
## Attaching package: 'cobalt'

## The following object is masked _by_ '.GlobalEnv':
## 
##     lalonde

## The following object is masked from 'package:MatchIt':
## 
##     lalonde

nnm1 <- matchit(treat ~ age + age2 + educ + educ2 + black + hisp + married + nodegr + re74 + re75 + u74 + u75, 
                data = lalonde, 
                method = "nearest")
matched_nnm1 <- match.data(nnm1)
m_nnm1 <- lm(re78 ~ treat, data = matched_nnm1)
summary(m_nnm1)

## 
## Call:
## lm(formula = re78 ~ treat, data = matched_nnm1)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
##  -6349  -4391  -1742   2915  53959 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   4390.6      499.0   8.799  < 2e-16 ***
## treat         1958.6      705.7   2.775  0.00579 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6787 on 368 degrees of freedom
## Multiple R-squared:  0.0205, Adjusted R-squared:  0.01784 
## F-statistic: 7.703 on 1 and 368 DF,  p-value: 0.005794

Exact matching.

exact_match <- matchit(treat ~ age + age2 + educ + educ2 + black + hisp + married + nodegr + re74 + re75 + u74 + u75, 
                       exact = ~ age + age2 + educ + educ2 + black + hisp + married + nodegr + re74 + re75 + u74 + u75, 
                       data = lalonde)

## Warning: Fewer control units than treated units in some `exact` strata; not all
## treated units will get a match.

matched_exact <- match.data(exact_match)
m_exact <- lm(re78 ~ treat, data = matched_exact)
summary(m_exact)

## 
## Call:
## lm(formula = re78 ~ treat, data = matched_exact)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
##  -5161  -3818  -1781   3282  19749 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   3818.2      763.8   4.999 2.74e-06 ***
## treat         1342.8     1080.2   1.243    0.217    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5236 on 92 degrees of freedom
## Multiple R-squared:  0.01652,    Adjusted R-squared:  0.00583 
## F-statistic: 1.545 on 1 and 92 DF,  p-value: 0.217

# EXPLANATION: The one-to-one matching allows repetitive use of the control units as a matched unit of a treated unit, while the exact matching does not. When applying the exact matching method, the total number of matched units are less. Qualitatively, the coefficient to describe the treatment effect changes from significant (at 0.01 level of significance) to insignificant after applying the method.

Nearest neighbor matching (1:2).

nnm2 <- matchit(treat ~ age + age2 + educ + educ2 + black + hisp + married + nodegr + re74 + re75 + u74 + u75, 
                data = lalonde, 
                method = "nearest", 
                distance = "glm", 
                replace = TRUE, 
                ratio = 2)
matched_nnm2 <- match.data(nnm2)
m_nnm2 <- lm(re78 ~ treat, data = matched_nnm2)
summary(m_nnm2)

## 
## Call:
## lm(formula = re78 ~ treat, data = matched_nnm2)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
##  -6349  -4764  -1520   2911  53959 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   4763.5      545.2   8.738   <2e-16 ***
## treat         1585.6      745.6   2.127   0.0342 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6918 on 344 degrees of freedom
## Multiple R-squared:  0.01298,    Adjusted R-squared:  0.01011 
## F-statistic: 4.523 on 1 and 344 DF,  p-value: 0.03416

Nearest neighbor matching (1:3).

nnm3 <- matchit(treat ~ age + age2 + educ + educ2 + black + hisp + married + nodegr + re74 + re75 + u74 + u75, 
                    data = lalonde, 
                    method = "nearest", 
                    distance = "glm", 
                    replace = TRUE, 
                    ratio = 3)
matched_nnm3 <- match.data(nnm3)
m_nnm3 <- lm(re78 ~ treat, data = matched_nnm3)
summary(m_nnm3)

## 
## Call:
## lm(formula = re78 ~ treat, data = matched_nnm3)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
##  -6349  -4544  -1579   2893  53959 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   4543.5      480.0   9.465  < 2e-16 ***
## treat         1805.6      689.8   2.618  0.00921 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6738 on 380 degrees of freedom
## Multiple R-squared:  0.01771,    Adjusted R-squared:  0.01513 
## F-statistic: 6.851 on 1 and 380 DF,  p-value: 0.009211

Bias corrections.

nnm2bc <- matchit(treat ~ age + age2 + educ + educ2 + black + hisp + married + nodegr + re74 + re75 + u74 + u75, 
                data = lalonde, 
                method = "nearest", 
                distance = "glm", 
                replace = TRUE, 
                BiasAdjust = T, 
                ratio = 2)
matched_nnm2bc <- match.data(nnm2bc)
m_nnm2bc <- lm(re78 ~ treat, data = matched_nnm2bc)
summary(m_nnm2bc)

## 
## Call:
## lm(formula = re78 ~ treat, data = matched_nnm2bc)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
##  -6349  -4764  -1520   2911  53959 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   4763.5      545.2   8.738   <2e-16 ***
## treat         1585.6      745.6   2.127   0.0342 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6918 on 344 degrees of freedom
## Multiple R-squared:  0.01298,    Adjusted R-squared:  0.01011 
## F-statistic: 4.523 on 1 and 344 DF,  p-value: 0.03416

nnm3bc <- matchit(treat ~ age + age2 + educ + educ2 + black + hisp + married + nodegr + re74 + re75 + u74 + u75, 
                    data = lalonde, 
                    method = "nearest", 
                    distance = "glm", 
                    replace = TRUE, 
                    BiasAdjust = T, 
                    ratio = 3)
matched_nnm3bc <- match.data(nnm3bc)
m_nnm3bc <- lm(re78 ~ treat, data = matched_nnm3bc)
summary(m_nnm3bc)

## 
## Call:
## lm(formula = re78 ~ treat, data = matched_nnm3bc)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
##  -6349  -4544  -1579   2893  53959 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   4543.5      480.0   9.465  < 2e-16 ***
## treat         1805.6      689.8   2.618  0.00921 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6738 on 380 degrees of freedom
## Multiple R-squared:  0.01771,    Adjusted R-squared:  0.01513 
## F-statistic: 6.851 on 1 and 380 DF,  p-value: 0.009211

# RESULTS: The significance levels of the treatment effects do not change much after the biases are corrected.

Coarsened exact matching.

cem <- matchit(treat ~ age + age2 + educ + educ2 + black + hisp + married + nodegr + re74 + re75 + u74 + u75, 
                   data = lalonde, 
                   method = "cem")
matched_cem <- match.data(cem)
m_cem <- lm(re78 ~ treat, data = matched_cem)
summary(m_cem)

## 
## Call:
## lm(formula = re78 ~ treat, data = matched_cem)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
##  -5284  -4007  -2104   3174  28815 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   4006.7      522.1   7.674 6.57e-13 ***
## treat         1277.4      802.7   1.591    0.113    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 5720 on 206 degrees of freedom
## Multiple R-squared:  0.01214,    Adjusted R-squared:  0.007348 
## F-statistic: 2.532 on 1 and 206 DF,  p-value: 0.1131

# EXPLANATION: Compared to the exact matching, the coefficient of the treatment effect becomes insignificant.

(iv) Average Treatment Effect for the Treated (ATT) using propensity score matching.

Directed Acyclic Graph (DAG).

library(ggplot2)
library(ggdag)

## Warning: package 'ggdag' was built under R version 4.4.2

## 
## Attaching package: 'ggdag'

## The following object is masked from 'package:stats':
## 
##     filter

dagify(
  Y78 ~ D,
  D ~ Y74 + Y75,
  Y78 ~ Y74 + Y75) %>%
  ggplot(aes(x = x, y = y, xend = xend, yend = yend)) +
  geom_dag_point() +
  geom_dag_edges_arc() +
  geom_dag_text() +
  theme_dag()

# DESCRIPTION: People with less earnings before (or with a earnings of 0 in 1974 or 1975) are more likely to participate in a job training program. We have the problem of intent to be treated. Unfortunately, the earnings in 1974 and 1975 are also correlated with the earnings in 1978. The endogeneity arose, as the earnings in 1974 and 1975 are the confounders.

Descriptive graphs.

x1 <- asinh(lalonde$re74)
x2 <- asinh(lalonde$re75)
x3 <- asinh(lalonde$re78)

hist(x1, main = "Histogram", col = "lightyellow", xlim = c(min(x1, x2, x3), max(x1, x2, x3)), xlab = "Log of real earnings ($)")
hist(x2, col = "lightgreen", add = TRUE)
hist(x3, col = "lightblue", add = TRUE)
legend("topright", legend = c("RE74", "RE75", "RE78"), fill = c("lightyellow", "lightgreen", "lightblue"))

hist(y0$educ, main = "Histogram", col = "lightgreen", xlab = "Years of education")
hist(y1$educ, col = "lightblue", add = TRUE)
legend("topright", legend = c("Untreated", "Treated"), fill = c("lightgreen", "lightblue"))

# DISCUSSIONS: (I applied the inverse hyperbolic sine transformation to the earnings.) The numbers of people zero earnings generally decreased from 1974 to 1975, and from 1975 to 1978. The unemployment rate decreased over time. The years of education attainment in the untreated group is generally lower than the treated group.

Propensity score.

psfit <- glm(treat ~ age + age2 + educ + educ2 + black + hisp + married + nodegr + re74 + re75 + u74 + u75, family = binomial, data = lalonde)
lalonde$psfit <- predict(psfit, data = lalonde)

Matching on the propensity score.

psm3 <- matchit(treat ~ psfit, 
                    data = lalonde, 
                    method = "nearest", 
                    distance = "glm", 
                    replace = TRUE, 
                    BiasAdjust = T, 
                    ratio = 3)
matched_psm3 <- match.data(psm3)
m_psm3 <- lm(re78 ~ treat, data = matched_psm3)
summary(m_psm3)

## 
## Call:
## lm(formula = re78 ~ treat, data = matched_psm3)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
##  -6349  -4544  -1579   2893  53959 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   4543.5      480.0   9.465  < 2e-16 ***
## treat         1805.6      689.8   2.618  0.00921 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6738 on 380 degrees of freedom
## Multiple R-squared:  0.01771,    Adjusted R-squared:  0.01513 
## F-statistic: 6.851 on 1 and 380 DF,  p-value: 0.009211

Bootstrapping.

library(PSAboot)

## Warning: package 'PSAboot' was built under R version 4.4.3

## Loading required package: PSAgraphics

## Warning: package 'PSAgraphics' was built under R version 4.4.3

## Loading required package: rpart

lm <- treat ~ age + age2 + educ + educ2 + black + hisp + married + nodegr + re74 + re75 + u74 + u75
boot <- PSAboot(Tr = lalonde$treat, 
                        Y = lalonde$re78,
                        X = lalonde,
                        formu = lm,
                        M = 100, 
                        seed = 16802)

## 100 bootstrap samples using 6 methods.
## Bootstrap sample sizes:
##    Treated=185 (100%) with replacement.
##    Control=260 (100%) with replacement.

##   |                                                                              |                                                                      |   0%

## Warning in value[[3L]](cond): Error occurred during iteration 2 for ctree method: Error in f(Tr = Tr[rows], Y = Y[rows], X = X[rows, ], X.trans = X.trans[rows, : Classification tree (ctree) with no splits occurred.

##   |                                                                              |=                                                                     |   1%  |                                                                              |=                                                                     |   2%  |                                                                              |==                                                                    |   3%  |                                                                              |===                                                                   |   4%  |                                                                              |====                                                                  |   5%

## Warning in value[[3L]](cond): Error occurred during iteration 7 for ctree method: Error in f(Tr = Tr[rows], Y = Y[rows], X = X[rows, ], X.trans = X.trans[rows, : Classification tree (ctree) with no splits occurred.

##   |                                                                              |====                                                                  |   6%

## Warning in value[[3L]](cond): Error occurred during iteration 8 for ctree method: Error in f(Tr = Tr[rows], Y = Y[rows], X = X[rows, ], X.trans = X.trans[rows, : Classification tree (ctree) with no splits occurred.

##   |                                                                              |=====                                                                 |   7%  |                                                                              |======                                                                |   8%  |                                                                              |======                                                                |   9%  |                                                                              |=======                                                               |  10%  |                                                                              |========                                                              |  11%

## Warning in value[[3L]](cond): Error occurred during iteration 13 for ctree method: Error in f(Tr = Tr[rows], Y = Y[rows], X = X[rows, ], X.trans = X.trans[rows, : Classification tree (ctree) with no splits occurred.

##   |                                                                              |========                                                              |  12%  |                                                                              |=========                                                             |  13%

## Warning in value[[3L]](cond): Error occurred during iteration 15 for ctree method: Error in f(Tr = Tr[rows], Y = Y[rows], X = X[rows, ], X.trans = X.trans[rows, : Classification tree (ctree) with no splits occurred.

##   |                                                                              |==========                                                            |  14%  |                                                                              |===========                                                           |  15%  |                                                                              |===========                                                           |  16%  |                                                                              |============                                                          |  17%

## Warning in value[[3L]](cond): Error occurred during iteration 19 for ctree method: Error in f(Tr = Tr[rows], Y = Y[rows], X = X[rows, ], X.trans = X.trans[rows, : Classification tree (ctree) with no splits occurred.

##   |                                                                              |=============                                                         |  18%  |                                                                              |=============                                                         |  19%  |                                                                              |==============                                                        |  20%  |                                                                              |===============                                                       |  21%  |                                                                              |================                                                      |  22%  |                                                                              |================                                                      |  23%  |                                                                              |=================                                                     |  24%  |                                                                              |==================                                                    |  25%

## Warning in value[[3L]](cond): Error occurred during iteration 27 for ctree method: Error in f(Tr = Tr[rows], Y = Y[rows], X = X[rows, ], X.trans = X.trans[rows, : Classification tree (ctree) with no splits occurred.

##   |                                                                              |==================                                                    |  26%  |                                                                              |===================                                                   |  27%  |                                                                              |====================                                                  |  28%  |                                                                              |=====================                                                 |  29%  |                                                                              |=====================                                                 |  30%

## Warning in value[[3L]](cond): Error occurred during iteration 32 for ctree method: Error in f(Tr = Tr[rows], Y = Y[rows], X = X[rows, ], X.trans = X.trans[rows, : Classification tree (ctree) with no splits occurred.

##   |                                                                              |======================                                                |  31%  |                                                                              |=======================                                               |  32%  |                                                                              |=======================                                               |  33%  |                                                                              |========================                                              |  34%  |                                                                              |=========================                                             |  35%  |                                                                              |=========================                                             |  36%  |                                                                              |==========================                                            |  37%  |                                                                              |===========================                                           |  38%  |                                                                              |============================                                          |  39%  |                                                                              |============================                                          |  40%  |                                                                              |=============================                                         |  41%

## Warning in value[[3L]](cond): Error occurred during iteration 43 for ctree method: Error in f(Tr = Tr[rows], Y = Y[rows], X = X[rows, ], X.trans = X.trans[rows, : Classification tree (ctree) with no splits occurred.

##   |                                                                              |==============================                                        |  42%  |                                                                              |==============================                                        |  43%  |                                                                              |===============================                                       |  44%  |                                                                              |================================                                      |  45%  |                                                                              |=================================                                     |  46%  |                                                                              |=================================                                     |  47%  |                                                                              |==================================                                    |  48%  |                                                                              |===================================                                   |  49%  |                                                                              |===================================                                   |  51%  |                                                                              |====================================                                  |  52%  |                                                                              |=====================================                                 |  53%

## Warning in value[[3L]](cond): Error occurred during iteration 54 for ctree method: Error in f(Tr = Tr[rows], Y = Y[rows], X = X[rows, ], X.trans = X.trans[rows, : Classification tree (ctree) with no splits occurred.

##   |                                                                              |=====================================                                 |  54%  |                                                                              |======================================                                |  55%  |                                                                              |=======================================                               |  56%  |                                                                              |========================================                              |  57%  |                                                                              |========================================                              |  58%  |                                                                              |=========================================                             |  59%  |                                                                              |==========================================                            |  60%  |                                                                              |==========================================                            |  61%

## Warning in value[[3L]](cond): Error occurred during iteration 62 for ctree method: Error in f(Tr = Tr[rows], Y = Y[rows], X = X[rows, ], X.trans = X.trans[rows, : Classification tree (ctree) with no splits occurred.

##   |                                                                              |===========================================                           |  62%  |                                                                              |============================================                          |  63%

## Warning in value[[3L]](cond): Error occurred during iteration 64 for ctree method: Error in f(Tr = Tr[rows], Y = Y[rows], X = X[rows, ], X.trans = X.trans[rows, : Classification tree (ctree) with no splits occurred.

##   |                                                                              |=============================================                         |  64%  |                                                                              |=============================================                         |  65%  |                                                                              |==============================================                        |  66%  |                                                                              |===============================================                       |  67%  |                                                                              |===============================================                       |  68%

## Warning in value[[3L]](cond): Error occurred during iteration 69 for ctree method: Error in f(Tr = Tr[rows], Y = Y[rows], X = X[rows, ], X.trans = X.trans[rows, : Classification tree (ctree) with no splits occurred.

##   |                                                                              |================================================                      |  69%

## Warning in value[[3L]](cond): Error occurred during iteration 70 for ctree method: Error in f(Tr = Tr[rows], Y = Y[rows], X = X[rows, ], X.trans = X.trans[rows, : Classification tree (ctree) with no splits occurred.

##   |                                                                              |=================================================                     |  70%

## Warning in value[[3L]](cond): Error occurred during iteration 71 for ctree method: Error in f(Tr = Tr[rows], Y = Y[rows], X = X[rows, ], X.trans = X.trans[rows, : Classification tree (ctree) with no splits occurred.

##   |                                                                              |=================================================                     |  71%

## Warning in value[[3L]](cond): Error occurred during iteration 72 for ctree method: Error in f(Tr = Tr[rows], Y = Y[rows], X = X[rows, ], X.trans = X.trans[rows, : Classification tree (ctree) with no splits occurred.

##   |                                                                              |==================================================                    |  72%  |                                                                              |===================================================                   |  73%  |                                                                              |====================================================                  |  74%  |                                                                              |====================================================                  |  75%  |                                                                              |=====================================================                 |  76%  |                                                                              |======================================================                |  77%  |                                                                              |======================================================                |  78%  |                                                                              |=======================================================               |  79%  |                                                                              |========================================================              |  80%  |                                                                              |=========================================================             |  81%  |                                                                              |=========================================================             |  82%  |                                                                              |==========================================================            |  83%  |                                                                              |===========================================================           |  84%

## Warning in value[[3L]](cond): Error occurred during iteration 85 for ctree method: Error in f(Tr = Tr[rows], Y = Y[rows], X = X[rows, ], X.trans = X.trans[rows, : Classification tree (ctree) with no splits occurred.

##   |                                                                              |===========================================================           |  85%

## Warning in value[[3L]](cond): Error occurred during iteration 86 for ctree method: Error in f(Tr = Tr[rows], Y = Y[rows], X = X[rows, ], X.trans = X.trans[rows, : Classification tree (ctree) with no splits occurred.

##   |                                                                              |============================================================          |  86%  |                                                                              |=============================================================         |  87%  |                                                                              |==============================================================        |  88%  |                                                                              |==============================================================        |  89%  |                                                                              |===============================================================       |  90%

## Warning in value[[3L]](cond): Error occurred during iteration 91 for ctree method: Error in f(Tr = Tr[rows], Y = Y[rows], X = X[rows, ], X.trans = X.trans[rows, : Classification tree (ctree) with no splits occurred.

##   |                                                                              |================================================================      |  91%

## Warning in value[[3L]](cond): Error occurred during iteration 92 for ctree method: Error in f(Tr = Tr[rows], Y = Y[rows], X = X[rows, ], X.trans = X.trans[rows, : Classification tree (ctree) with no splits occurred.

##   |                                                                              |================================================================      |  92%  |                                                                              |=================================================================     |  93%  |                                                                              |==================================================================    |  94%  |                                                                              |==================================================================    |  95%

## Warning in value[[3L]](cond): Error occurred during iteration 96 for ctree method: Error in f(Tr = Tr[rows], Y = Y[rows], X = X[rows, ], X.trans = X.trans[rows, : Classification tree (ctree) with no splits occurred.

##   |                                                                              |===================================================================   |  96%  |                                                                              |====================================================================  |  97%  |                                                                              |===================================================================== |  98%  |                                                                              |===================================================================== |  99%  |                                                                              |======================================================================| 100%

summary(boot)

## Stratification Results:
##    Complete estimate = 1453
##    Complete CI = [186, 2720]
##    Bootstrap pooled estimate = 1418
##    Bootstrap weighted pooled estimate = 1370
##    Bootstrap pooled CI = [71.5, 2764]
##    58% of bootstrap samples have confidence intervals that do not span zero.
##       58% positive.
##       0% negative.
## ctree Results:
##    Complete estimate = 1598
##    Complete CI = [-6.62, 3203]
##    Bootstrap pooled estimate = 1530
##    Bootstrap weighted pooled estimate = 1551
##    Bootstrap pooled CI = [170, 2890]
##    49.4% of bootstrap samples have confidence intervals that do not span zero.
##       49.4% positive.
##       0% negative.
## rpart Results:
##    Complete estimate = 1332
##    Complete CI = [-295, 2959]
##    Bootstrap pooled estimate = 1534
##    Bootstrap weighted pooled estimate = 1522
##    Bootstrap pooled CI = [-275, 3343]
##    34% of bootstrap samples have confidence intervals that do not span zero.
##       34% positive.
##       0% negative.
## Matching Results:
##    Complete estimate = 1382
##    Complete CI = [679, 2085]
##    Bootstrap pooled estimate = 1303
##    Bootstrap weighted pooled estimate = 1319
##    Bootstrap pooled CI = [-581, 3187]
##    78% of bootstrap samples have confidence intervals that do not span zero.
##       77% positive.
##       1% negative.
## MatchIt Results:
##    Complete estimate = 1959
##    Complete CI = [550, 3367]
##    Bootstrap pooled estimate = 1833
##    Bootstrap weighted pooled estimate = 1810
##    Bootstrap pooled CI = [373, 3293]
##    74% of bootstrap samples have confidence intervals that do not span zero.
##       74% positive.
##       0% negative.
## Weighting Results:
##    Complete estimate = 1534
##    Complete CI = [291, 2777]
##    Bootstrap pooled estimate = 1452
##    Bootstrap weighted pooled estimate = 1403
##    Bootstrap pooled CI = [109, 2795]
##    62% of bootstrap samples have confidence intervals that do not span zero.
##       62% positive.
##       0% negative.

# COMPARISON: The bootstrapping results always have less than 90% of the coefficients estimated to be positive and different from zeros, which matches the bias-corrected estimations.

Quality of the propensity score matching.

library(kdensity)

## Warning: package 'kdensity' was built under R version 4.4.3

psfitdens1 <- kdensity(lalonde$psfit[lalonde$treat == 1])
plot(psfitdens1)

psfitdens0 <- kdensity(lalonde$psfit[lalonde$treat == 0])
plot(psfitdens0)

# COMPARISON: The propensity scores of the participants are significantly higher than the non-participants. The overlap condition may not hold.

Another matching method: genetic matching.

library(rgenoud)

## Warning: package 'rgenoud' was built under R version 4.4.3

## ##  rgenoud (Version 5.9-0.11, Build Date: 2024-10-03)
## ##  See http://sekhon.berkeley.edu/rgenoud for additional documentation.
## ##  Please cite software as:
## ##   Walter Mebane, Jr. and Jasjeet S. Sekhon. 2011.
## ##   ``Genetic Optimization Using Derivatives: The rgenoud package for R.''
## ##   Journal of Statistical Software, 42(11): 1-26. 
## ##

gm <- matchit(treat ~ age + age2 + educ + educ2 + black + hisp + married + nodegr + re74 + re75 + u74 + u75, 
                    data = lalonde, 
                    method = "genetic", 
                    distance = "mahalanobis", 
                    replace = TRUE)

## Warning: (from Matching) The key tuning parameters for optimization were are all
## left at their default values.  The 'pop.size' option in particular
## should probably be increased for optimal results.  For details please
## see the help page and https://www.jsekhon.com

matched_gm <- match.data(gm)
m_gm <- lm(re78 ~ treat, data = matched_gm)
summary(m_gm)

## 
## Call:
## lm(formula = re78 ~ treat, data = matched_gm)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
##  -6349  -4913  -1866   3263  53959 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   4913.1      641.7   7.657 2.46e-13 ***
## treat         1436.1      832.0   1.726   0.0853 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7203 on 309 degrees of freedom
## Multiple R-squared:  0.00955,    Adjusted R-squared:  0.006345 
## F-statistic: 2.979 on 1 and 309 DF,  p-value: 0.08533

# RESULT: The treatment effect becomes insignificant.

EEFE 530, Spring 2025

Problem Set 3

Timothy (Yuan) Zhuang

16 March 2025

Preparation (clean the environment and load the data).

(i) Balance table.

(ii) Average Treatment Effect (ATE) using regression.

(iii) Average Treatment Effect for the Treated (ATT) using covariate matching.

One-to-one matching.

Exact matching.

Nearest neighbor matching (1:2).

Nearest neighbor matching (1:3).

Bias corrections.

Coarsened exact matching.

(iv) Average Treatment Effect for the Treated (ATT) using propensity score matching.

Directed Acyclic Graph (DAG).

Descriptive graphs.

Propensity score.

Matching on the propensity score.

Bootstrapping.

Quality of the propensity score matching.

Another matching method: genetic matching.