rm(list = ls())
data(lalonde, package = "Matching")
y0 <- subset(lalonde, treat == 0)
y1 <- subset(lalonde, treat == 1)
mean_of_untreated <- NULL
mean_of_treated <- NULL
difference <- NULL
t_statistics <- NULL
probability <- NULL
for (i in 1:(ncol(lalonde) - 1)){
mean_of_untreated[i] <- t.test(y0[, i], y1[, i])$estimate[[1]]
mean_of_treated[i] <- t.test(y0[, i], y1[, i])$estimate[[2]]
difference[i] <- t.test(y0[, i], y1[, i])$estimate[[2]] - t.test(y0[,i], y1[,i])$estimate[[1]]
t_statistics[i] <- t.test(y0[, i], y1[, i])$statistic[[1]]
probability[i] <- t.test(y0[, i], y1[, i])$p.value[[1]]
}
t_table <- rbind(mean_of_treated, mean_of_untreated, difference, t_statistics, probability)
colnames(t_table) <- colnames(lalonde[, 1:(ncol(lalonde) - 1)])
t_table <- t(t_table)
colnames(t_table)[colnames(t_table) == "mean_of_untreated"] <- "Mean of untreated"
colnames(t_table)[colnames(t_table) == "mean_of_treated"] <- "Mean of treated"
colnames(t_table)[colnames(t_table) == "difference"] <- "Difference"
colnames(t_table)[colnames(t_table) == "t_statistics"] <- "t-Statistics"
colnames(t_table)[colnames(t_table) == "probability"] <- "Probability"
t_table
## Mean of treated Mean of untreated Difference t-Statistics
## age 2.581622e+01 25.0538462 0.76237006 -1.1140361
## educ 1.034595e+01 10.0884615 0.25748441 -1.4421840
## black 8.432432e-01 0.8269231 0.01632017 -0.4577777
## hisp 5.945946e-02 0.1076923 -0.04823285 1.8565425
## married 1.891892e-01 0.1538462 0.03534304 -0.9668363
## nodegr 7.081081e-01 0.8346154 -0.12650728 3.1084981
## re74 2.095574e+03 2107.0268154 -11.45281538 0.0227466
## re75 1.532056e+03 1266.9092408 265.14638896 -0.8692061
## re78 6.349145e+03 4554.8022827 1794.34308488 -2.6741458
## u74 7.081081e-01 0.7500000 -0.04189189 0.9746890
## u75 6.000000e-01 0.6846154 -0.08461538 1.8299743
## Probability
## age 0.265944347
## educ 0.150169353
## black 0.647357377
## hisp 0.064043273
## married 0.334247781
## nodegr 0.002036780
## re74 0.981863018
## re75 0.385272603
## re78 0.007892971
## u74 0.330327781
## u75 0.068030520
# COMPARISON: The probabilities to be born as a Hispanic, to have a college degree, and to be in the unemployed status in 1975 for the people in the treated sample (the participants of the program) are statistically difference from the untreated sample.
lalonde$age2 <- lalonde$age ^ 2
lalonde$educ2 <- lalonde$educ ^ 2
lalonde$x1 <- (lalonde$age - mean(lalonde$age)) * lalonde$treat
lalonde$x2 <- (lalonde$age2 - mean(lalonde$age2)) * lalonde$treat
lalonde$x3 <- (lalonde$educ - mean(lalonde$educ)) * lalonde$treat
lalonde$x4 <- (lalonde$educ2 - mean(lalonde$educ2)) * lalonde$treat
lalonde$x5 <- (lalonde$black - mean(lalonde$black)) * lalonde$treat
lalonde$x6 <- (lalonde$hisp - mean(lalonde$hisp)) * lalonde$treat
lalonde$x7 <- (lalonde$married - mean(lalonde$married)) * lalonde$treat
lalonde$x8 <- (lalonde$nodegr - mean(lalonde$nodegr)) * lalonde$treat
lalonde$x9 <- (lalonde$re74 - mean(lalonde$re74)) * lalonde$treat
lalonde$x10 <- (lalonde$re75 - mean(lalonde$re75)) * lalonde$treat
lalonde$x11 <- (lalonde$u74 - mean(lalonde$u74)) * lalonde$treat
lalonde$x12 <- (lalonde$u75 - mean(lalonde$u75)) * lalonde$treat
m1 <- lm(re78 ~ treat +
age + age2 + educ + educ2 + black + hisp + married + nodegr + re74 + re75 + u74 + u75 +
x1 + x2 + x3 + x4 + x5 + x6 + x7 + x8 + x9 + x10 + x11 + x12,
data = lalonde)
summary(m1)
##
## Call:
## lm(formula = re78 ~ treat + age + age2 + educ + educ2 + black +
## hisp + married + nodegr + re74 + re75 + u74 + u75 + x1 +
## x2 + x3 + x4 + x5 + x6 + x7 + x8 + x9 + x10 + x11 + x12,
## data = lalonde)
##
## Residuals:
## Min 1Q Median 3Q Max
## -12713 -3969 -1631 2931 49566
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.018e+03 9.569e+03 0.106 0.915360
## treat 1.544e+03 6.371e+02 2.423 0.015799 *
## age -9.652e+01 3.362e+02 -0.287 0.774199
## age2 2.510e+00 5.499e+00 0.457 0.648244
## educ 2.276e+03 2.064e+03 1.103 0.270629
## educ2 -1.256e+02 1.173e+02 -1.071 0.284961
## black -3.105e+03 1.658e+03 -1.872 0.061877 .
## hisp -8.912e+02 2.046e+03 -0.436 0.663319
## married -6.238e+02 1.215e+03 -0.514 0.607804
## nodegr -1.441e+03 1.789e+03 -0.805 0.421018
## re74 -1.250e-02 1.084e-01 -0.115 0.908197
## re75 -7.639e-03 2.035e-01 -0.038 0.970079
## u74 -2.895e+03 1.566e+03 -1.848 0.065318 .
## u75 8.345e+02 1.353e+03 0.617 0.537744
## x1 5.971e+02 5.791e+02 1.031 0.303075
## x2 -1.039e+01 9.526e+00 -1.091 0.275885
## x3 -4.569e+03 2.590e+03 -1.764 0.078481 .
## x4 2.830e+02 1.459e+02 1.939 0.053115 .
## x5 2.380e+03 2.328e+03 1.022 0.307227
## x6 2.284e+03 3.244e+03 0.704 0.481815
## x7 1.395e+03 1.784e+03 0.782 0.434757
## x8 2.286e+03 2.497e+03 0.915 0.360515
## x9 2.644e-01 1.868e-01 1.416 0.157627
## x10 2.453e-02 2.989e-01 0.082 0.934624
## x11 9.426e+03 2.412e+03 3.908 0.000108 ***
## x12 -4.970e+03 2.060e+03 -2.413 0.016237 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6420 on 419 degrees of freedom
## Multiple R-squared: 0.1155, Adjusted R-squared: 0.06275
## F-statistic: 2.189 on 25 and 419 DF, p-value: 0.0009413
# INTERPRETATION: Yes, the participation of the training program did significantly increase the real earnings of the participants by about $1,544 at the statistically significance level of 0.05.
library(MatchIt)
## Warning: package 'MatchIt' was built under R version 4.4.2
##
## Attaching package: 'MatchIt'
## The following object is masked _by_ '.GlobalEnv':
##
## lalonde
library(cobalt)
## Warning: package 'cobalt' was built under R version 4.4.3
## cobalt (Version 4.5.5, Build Date: 2024-04-02)
##
## Attaching package: 'cobalt'
## The following object is masked _by_ '.GlobalEnv':
##
## lalonde
## The following object is masked from 'package:MatchIt':
##
## lalonde
nnm1 <- matchit(treat ~ age + age2 + educ + educ2 + black + hisp + married + nodegr + re74 + re75 + u74 + u75,
data = lalonde,
method = "nearest")
matched_nnm1 <- match.data(nnm1)
m_nnm1 <- lm(re78 ~ treat, data = matched_nnm1)
summary(m_nnm1)
##
## Call:
## lm(formula = re78 ~ treat, data = matched_nnm1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6349 -4391 -1742 2915 53959
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4390.6 499.0 8.799 < 2e-16 ***
## treat 1958.6 705.7 2.775 0.00579 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6787 on 368 degrees of freedom
## Multiple R-squared: 0.0205, Adjusted R-squared: 0.01784
## F-statistic: 7.703 on 1 and 368 DF, p-value: 0.005794
exact_match <- matchit(treat ~ age + age2 + educ + educ2 + black + hisp + married + nodegr + re74 + re75 + u74 + u75,
exact = ~ age + age2 + educ + educ2 + black + hisp + married + nodegr + re74 + re75 + u74 + u75,
data = lalonde)
## Warning: Fewer control units than treated units in some `exact` strata; not all
## treated units will get a match.
matched_exact <- match.data(exact_match)
m_exact <- lm(re78 ~ treat, data = matched_exact)
summary(m_exact)
##
## Call:
## lm(formula = re78 ~ treat, data = matched_exact)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5161 -3818 -1781 3282 19749
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 3818.2 763.8 4.999 2.74e-06 ***
## treat 1342.8 1080.2 1.243 0.217
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5236 on 92 degrees of freedom
## Multiple R-squared: 0.01652, Adjusted R-squared: 0.00583
## F-statistic: 1.545 on 1 and 92 DF, p-value: 0.217
# EXPLANATION: The one-to-one matching allows repetitive use of the control units as a matched unit of a treated unit, while the exact matching does not. When applying the exact matching method, the total number of matched units are less. Qualitatively, the coefficient to describe the treatment effect changes from significant (at 0.01 level of significance) to insignificant after applying the method.
nnm2 <- matchit(treat ~ age + age2 + educ + educ2 + black + hisp + married + nodegr + re74 + re75 + u74 + u75,
data = lalonde,
method = "nearest",
distance = "glm",
replace = TRUE,
ratio = 2)
matched_nnm2 <- match.data(nnm2)
m_nnm2 <- lm(re78 ~ treat, data = matched_nnm2)
summary(m_nnm2)
##
## Call:
## lm(formula = re78 ~ treat, data = matched_nnm2)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6349 -4764 -1520 2911 53959
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4763.5 545.2 8.738 <2e-16 ***
## treat 1585.6 745.6 2.127 0.0342 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6918 on 344 degrees of freedom
## Multiple R-squared: 0.01298, Adjusted R-squared: 0.01011
## F-statistic: 4.523 on 1 and 344 DF, p-value: 0.03416
nnm3 <- matchit(treat ~ age + age2 + educ + educ2 + black + hisp + married + nodegr + re74 + re75 + u74 + u75,
data = lalonde,
method = "nearest",
distance = "glm",
replace = TRUE,
ratio = 3)
matched_nnm3 <- match.data(nnm3)
m_nnm3 <- lm(re78 ~ treat, data = matched_nnm3)
summary(m_nnm3)
##
## Call:
## lm(formula = re78 ~ treat, data = matched_nnm3)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6349 -4544 -1579 2893 53959
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4543.5 480.0 9.465 < 2e-16 ***
## treat 1805.6 689.8 2.618 0.00921 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6738 on 380 degrees of freedom
## Multiple R-squared: 0.01771, Adjusted R-squared: 0.01513
## F-statistic: 6.851 on 1 and 380 DF, p-value: 0.009211
nnm2bc <- matchit(treat ~ age + age2 + educ + educ2 + black + hisp + married + nodegr + re74 + re75 + u74 + u75,
data = lalonde,
method = "nearest",
distance = "glm",
replace = TRUE,
BiasAdjust = T,
ratio = 2)
matched_nnm2bc <- match.data(nnm2bc)
m_nnm2bc <- lm(re78 ~ treat, data = matched_nnm2bc)
summary(m_nnm2bc)
##
## Call:
## lm(formula = re78 ~ treat, data = matched_nnm2bc)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6349 -4764 -1520 2911 53959
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4763.5 545.2 8.738 <2e-16 ***
## treat 1585.6 745.6 2.127 0.0342 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6918 on 344 degrees of freedom
## Multiple R-squared: 0.01298, Adjusted R-squared: 0.01011
## F-statistic: 4.523 on 1 and 344 DF, p-value: 0.03416
nnm3bc <- matchit(treat ~ age + age2 + educ + educ2 + black + hisp + married + nodegr + re74 + re75 + u74 + u75,
data = lalonde,
method = "nearest",
distance = "glm",
replace = TRUE,
BiasAdjust = T,
ratio = 3)
matched_nnm3bc <- match.data(nnm3bc)
m_nnm3bc <- lm(re78 ~ treat, data = matched_nnm3bc)
summary(m_nnm3bc)
##
## Call:
## lm(formula = re78 ~ treat, data = matched_nnm3bc)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6349 -4544 -1579 2893 53959
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4543.5 480.0 9.465 < 2e-16 ***
## treat 1805.6 689.8 2.618 0.00921 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6738 on 380 degrees of freedom
## Multiple R-squared: 0.01771, Adjusted R-squared: 0.01513
## F-statistic: 6.851 on 1 and 380 DF, p-value: 0.009211
# RESULTS: The significance levels of the treatment effects do not change much after the biases are corrected.
cem <- matchit(treat ~ age + age2 + educ + educ2 + black + hisp + married + nodegr + re74 + re75 + u74 + u75,
data = lalonde,
method = "cem")
matched_cem <- match.data(cem)
m_cem <- lm(re78 ~ treat, data = matched_cem)
summary(m_cem)
##
## Call:
## lm(formula = re78 ~ treat, data = matched_cem)
##
## Residuals:
## Min 1Q Median 3Q Max
## -5284 -4007 -2104 3174 28815
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4006.7 522.1 7.674 6.57e-13 ***
## treat 1277.4 802.7 1.591 0.113
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5720 on 206 degrees of freedom
## Multiple R-squared: 0.01214, Adjusted R-squared: 0.007348
## F-statistic: 2.532 on 1 and 206 DF, p-value: 0.1131
# EXPLANATION: Compared to the exact matching, the coefficient of the treatment effect becomes insignificant.
library(ggplot2)
library(ggdag)
## Warning: package 'ggdag' was built under R version 4.4.2
##
## Attaching package: 'ggdag'
## The following object is masked from 'package:stats':
##
## filter
dagify(
Y78 ~ D,
D ~ Y74 + Y75,
Y78 ~ Y74 + Y75) %>%
ggplot(aes(x = x, y = y, xend = xend, yend = yend)) +
geom_dag_point() +
geom_dag_edges_arc() +
geom_dag_text() +
theme_dag()
# DESCRIPTION: People with less earnings before (or with a earnings of 0 in 1974 or 1975) are more likely to participate in a job training program. We have the problem of intent to be treated. Unfortunately, the earnings in 1974 and 1975 are also correlated with the earnings in 1978. The endogeneity arose, as the earnings in 1974 and 1975 are the confounders.
x1 <- asinh(lalonde$re74)
x2 <- asinh(lalonde$re75)
x3 <- asinh(lalonde$re78)
hist(x1, main = "Histogram", col = "lightyellow", xlim = c(min(x1, x2, x3), max(x1, x2, x3)), xlab = "Log of real earnings ($)")
hist(x2, col = "lightgreen", add = TRUE)
hist(x3, col = "lightblue", add = TRUE)
legend("topright", legend = c("RE74", "RE75", "RE78"), fill = c("lightyellow", "lightgreen", "lightblue"))
hist(y0$educ, main = "Histogram", col = "lightgreen", xlab = "Years of education")
hist(y1$educ, col = "lightblue", add = TRUE)
legend("topright", legend = c("Untreated", "Treated"), fill = c("lightgreen", "lightblue"))
# DISCUSSIONS: (I applied the inverse hyperbolic sine transformation to the earnings.) The numbers of people zero earnings generally decreased from 1974 to 1975, and from 1975 to 1978. The unemployment rate decreased over time. The years of education attainment in the untreated group is generally lower than the treated group.
psfit <- glm(treat ~ age + age2 + educ + educ2 + black + hisp + married + nodegr + re74 + re75 + u74 + u75, family = binomial, data = lalonde)
lalonde$psfit <- predict(psfit, data = lalonde)
psm3 <- matchit(treat ~ psfit,
data = lalonde,
method = "nearest",
distance = "glm",
replace = TRUE,
BiasAdjust = T,
ratio = 3)
matched_psm3 <- match.data(psm3)
m_psm3 <- lm(re78 ~ treat, data = matched_psm3)
summary(m_psm3)
##
## Call:
## lm(formula = re78 ~ treat, data = matched_psm3)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6349 -4544 -1579 2893 53959
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4543.5 480.0 9.465 < 2e-16 ***
## treat 1805.6 689.8 2.618 0.00921 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6738 on 380 degrees of freedom
## Multiple R-squared: 0.01771, Adjusted R-squared: 0.01513
## F-statistic: 6.851 on 1 and 380 DF, p-value: 0.009211
library(PSAboot)
## Warning: package 'PSAboot' was built under R version 4.4.3
## Loading required package: PSAgraphics
## Warning: package 'PSAgraphics' was built under R version 4.4.3
## Loading required package: rpart
lm <- treat ~ age + age2 + educ + educ2 + black + hisp + married + nodegr + re74 + re75 + u74 + u75
boot <- PSAboot(Tr = lalonde$treat,
Y = lalonde$re78,
X = lalonde,
formu = lm,
M = 100,
seed = 16802)
## 100 bootstrap samples using 6 methods.
## Bootstrap sample sizes:
## Treated=185 (100%) with replacement.
## Control=260 (100%) with replacement.
## | | | 0%
## Warning in value[[3L]](cond): Error occurred during iteration 2 for ctree method: Error in f(Tr = Tr[rows], Y = Y[rows], X = X[rows, ], X.trans = X.trans[rows, : Classification tree (ctree) with no splits occurred.
## | |= | 1% | |= | 2% | |== | 3% | |=== | 4% | |==== | 5%
## Warning in value[[3L]](cond): Error occurred during iteration 7 for ctree method: Error in f(Tr = Tr[rows], Y = Y[rows], X = X[rows, ], X.trans = X.trans[rows, : Classification tree (ctree) with no splits occurred.
## | |==== | 6%
## Warning in value[[3L]](cond): Error occurred during iteration 8 for ctree method: Error in f(Tr = Tr[rows], Y = Y[rows], X = X[rows, ], X.trans = X.trans[rows, : Classification tree (ctree) with no splits occurred.
## | |===== | 7% | |====== | 8% | |====== | 9% | |======= | 10% | |======== | 11%
## Warning in value[[3L]](cond): Error occurred during iteration 13 for ctree method: Error in f(Tr = Tr[rows], Y = Y[rows], X = X[rows, ], X.trans = X.trans[rows, : Classification tree (ctree) with no splits occurred.
## | |======== | 12% | |========= | 13%
## Warning in value[[3L]](cond): Error occurred during iteration 15 for ctree method: Error in f(Tr = Tr[rows], Y = Y[rows], X = X[rows, ], X.trans = X.trans[rows, : Classification tree (ctree) with no splits occurred.
## | |========== | 14% | |=========== | 15% | |=========== | 16% | |============ | 17%
## Warning in value[[3L]](cond): Error occurred during iteration 19 for ctree method: Error in f(Tr = Tr[rows], Y = Y[rows], X = X[rows, ], X.trans = X.trans[rows, : Classification tree (ctree) with no splits occurred.
## | |============= | 18% | |============= | 19% | |============== | 20% | |=============== | 21% | |================ | 22% | |================ | 23% | |================= | 24% | |================== | 25%
## Warning in value[[3L]](cond): Error occurred during iteration 27 for ctree method: Error in f(Tr = Tr[rows], Y = Y[rows], X = X[rows, ], X.trans = X.trans[rows, : Classification tree (ctree) with no splits occurred.
## | |================== | 26% | |=================== | 27% | |==================== | 28% | |===================== | 29% | |===================== | 30%
## Warning in value[[3L]](cond): Error occurred during iteration 32 for ctree method: Error in f(Tr = Tr[rows], Y = Y[rows], X = X[rows, ], X.trans = X.trans[rows, : Classification tree (ctree) with no splits occurred.
## | |====================== | 31% | |======================= | 32% | |======================= | 33% | |======================== | 34% | |========================= | 35% | |========================= | 36% | |========================== | 37% | |=========================== | 38% | |============================ | 39% | |============================ | 40% | |============================= | 41%
## Warning in value[[3L]](cond): Error occurred during iteration 43 for ctree method: Error in f(Tr = Tr[rows], Y = Y[rows], X = X[rows, ], X.trans = X.trans[rows, : Classification tree (ctree) with no splits occurred.
## | |============================== | 42% | |============================== | 43% | |=============================== | 44% | |================================ | 45% | |================================= | 46% | |================================= | 47% | |================================== | 48% | |=================================== | 49% | |=================================== | 51% | |==================================== | 52% | |===================================== | 53%
## Warning in value[[3L]](cond): Error occurred during iteration 54 for ctree method: Error in f(Tr = Tr[rows], Y = Y[rows], X = X[rows, ], X.trans = X.trans[rows, : Classification tree (ctree) with no splits occurred.
## | |===================================== | 54% | |====================================== | 55% | |======================================= | 56% | |======================================== | 57% | |======================================== | 58% | |========================================= | 59% | |========================================== | 60% | |========================================== | 61%
## Warning in value[[3L]](cond): Error occurred during iteration 62 for ctree method: Error in f(Tr = Tr[rows], Y = Y[rows], X = X[rows, ], X.trans = X.trans[rows, : Classification tree (ctree) with no splits occurred.
## | |=========================================== | 62% | |============================================ | 63%
## Warning in value[[3L]](cond): Error occurred during iteration 64 for ctree method: Error in f(Tr = Tr[rows], Y = Y[rows], X = X[rows, ], X.trans = X.trans[rows, : Classification tree (ctree) with no splits occurred.
## | |============================================= | 64% | |============================================= | 65% | |============================================== | 66% | |=============================================== | 67% | |=============================================== | 68%
## Warning in value[[3L]](cond): Error occurred during iteration 69 for ctree method: Error in f(Tr = Tr[rows], Y = Y[rows], X = X[rows, ], X.trans = X.trans[rows, : Classification tree (ctree) with no splits occurred.
## | |================================================ | 69%
## Warning in value[[3L]](cond): Error occurred during iteration 70 for ctree method: Error in f(Tr = Tr[rows], Y = Y[rows], X = X[rows, ], X.trans = X.trans[rows, : Classification tree (ctree) with no splits occurred.
## | |================================================= | 70%
## Warning in value[[3L]](cond): Error occurred during iteration 71 for ctree method: Error in f(Tr = Tr[rows], Y = Y[rows], X = X[rows, ], X.trans = X.trans[rows, : Classification tree (ctree) with no splits occurred.
## | |================================================= | 71%
## Warning in value[[3L]](cond): Error occurred during iteration 72 for ctree method: Error in f(Tr = Tr[rows], Y = Y[rows], X = X[rows, ], X.trans = X.trans[rows, : Classification tree (ctree) with no splits occurred.
## | |================================================== | 72% | |=================================================== | 73% | |==================================================== | 74% | |==================================================== | 75% | |===================================================== | 76% | |====================================================== | 77% | |====================================================== | 78% | |======================================================= | 79% | |======================================================== | 80% | |========================================================= | 81% | |========================================================= | 82% | |========================================================== | 83% | |=========================================================== | 84%
## Warning in value[[3L]](cond): Error occurred during iteration 85 for ctree method: Error in f(Tr = Tr[rows], Y = Y[rows], X = X[rows, ], X.trans = X.trans[rows, : Classification tree (ctree) with no splits occurred.
## | |=========================================================== | 85%
## Warning in value[[3L]](cond): Error occurred during iteration 86 for ctree method: Error in f(Tr = Tr[rows], Y = Y[rows], X = X[rows, ], X.trans = X.trans[rows, : Classification tree (ctree) with no splits occurred.
## | |============================================================ | 86% | |============================================================= | 87% | |============================================================== | 88% | |============================================================== | 89% | |=============================================================== | 90%
## Warning in value[[3L]](cond): Error occurred during iteration 91 for ctree method: Error in f(Tr = Tr[rows], Y = Y[rows], X = X[rows, ], X.trans = X.trans[rows, : Classification tree (ctree) with no splits occurred.
## | |================================================================ | 91%
## Warning in value[[3L]](cond): Error occurred during iteration 92 for ctree method: Error in f(Tr = Tr[rows], Y = Y[rows], X = X[rows, ], X.trans = X.trans[rows, : Classification tree (ctree) with no splits occurred.
## | |================================================================ | 92% | |================================================================= | 93% | |================================================================== | 94% | |================================================================== | 95%
## Warning in value[[3L]](cond): Error occurred during iteration 96 for ctree method: Error in f(Tr = Tr[rows], Y = Y[rows], X = X[rows, ], X.trans = X.trans[rows, : Classification tree (ctree) with no splits occurred.
## | |=================================================================== | 96% | |==================================================================== | 97% | |===================================================================== | 98% | |===================================================================== | 99% | |======================================================================| 100%
summary(boot)
## Stratification Results:
## Complete estimate = 1453
## Complete CI = [186, 2720]
## Bootstrap pooled estimate = 1418
## Bootstrap weighted pooled estimate = 1370
## Bootstrap pooled CI = [71.5, 2764]
## 58% of bootstrap samples have confidence intervals that do not span zero.
## 58% positive.
## 0% negative.
## ctree Results:
## Complete estimate = 1598
## Complete CI = [-6.62, 3203]
## Bootstrap pooled estimate = 1530
## Bootstrap weighted pooled estimate = 1551
## Bootstrap pooled CI = [170, 2890]
## 49.4% of bootstrap samples have confidence intervals that do not span zero.
## 49.4% positive.
## 0% negative.
## rpart Results:
## Complete estimate = 1332
## Complete CI = [-295, 2959]
## Bootstrap pooled estimate = 1534
## Bootstrap weighted pooled estimate = 1522
## Bootstrap pooled CI = [-275, 3343]
## 34% of bootstrap samples have confidence intervals that do not span zero.
## 34% positive.
## 0% negative.
## Matching Results:
## Complete estimate = 1382
## Complete CI = [679, 2085]
## Bootstrap pooled estimate = 1303
## Bootstrap weighted pooled estimate = 1319
## Bootstrap pooled CI = [-581, 3187]
## 78% of bootstrap samples have confidence intervals that do not span zero.
## 77% positive.
## 1% negative.
## MatchIt Results:
## Complete estimate = 1959
## Complete CI = [550, 3367]
## Bootstrap pooled estimate = 1833
## Bootstrap weighted pooled estimate = 1810
## Bootstrap pooled CI = [373, 3293]
## 74% of bootstrap samples have confidence intervals that do not span zero.
## 74% positive.
## 0% negative.
## Weighting Results:
## Complete estimate = 1534
## Complete CI = [291, 2777]
## Bootstrap pooled estimate = 1452
## Bootstrap weighted pooled estimate = 1403
## Bootstrap pooled CI = [109, 2795]
## 62% of bootstrap samples have confidence intervals that do not span zero.
## 62% positive.
## 0% negative.
# COMPARISON: The bootstrapping results always have less than 90% of the coefficients estimated to be positive and different from zeros, which matches the bias-corrected estimations.
library(kdensity)
## Warning: package 'kdensity' was built under R version 4.4.3
psfitdens1 <- kdensity(lalonde$psfit[lalonde$treat == 1])
plot(psfitdens1)
psfitdens0 <- kdensity(lalonde$psfit[lalonde$treat == 0])
plot(psfitdens0)
# COMPARISON: The propensity scores of the participants are significantly higher than the non-participants. The overlap condition may not hold.
library(rgenoud)
## Warning: package 'rgenoud' was built under R version 4.4.3
## ## rgenoud (Version 5.9-0.11, Build Date: 2024-10-03)
## ## See http://sekhon.berkeley.edu/rgenoud for additional documentation.
## ## Please cite software as:
## ## Walter Mebane, Jr. and Jasjeet S. Sekhon. 2011.
## ## ``Genetic Optimization Using Derivatives: The rgenoud package for R.''
## ## Journal of Statistical Software, 42(11): 1-26.
## ##
gm <- matchit(treat ~ age + age2 + educ + educ2 + black + hisp + married + nodegr + re74 + re75 + u74 + u75,
data = lalonde,
method = "genetic",
distance = "mahalanobis",
replace = TRUE)
## Warning: (from Matching) The key tuning parameters for optimization were are all
## left at their default values. The 'pop.size' option in particular
## should probably be increased for optimal results. For details please
## see the help page and https://www.jsekhon.com
matched_gm <- match.data(gm)
m_gm <- lm(re78 ~ treat, data = matched_gm)
summary(m_gm)
##
## Call:
## lm(formula = re78 ~ treat, data = matched_gm)
##
## Residuals:
## Min 1Q Median 3Q Max
## -6349 -4913 -1866 3263 53959
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4913.1 641.7 7.657 2.46e-13 ***
## treat 1436.1 832.0 1.726 0.0853 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 7203 on 309 degrees of freedom
## Multiple R-squared: 0.00955, Adjusted R-squared: 0.006345
## F-statistic: 2.979 on 1 and 309 DF, p-value: 0.08533
# RESULT: The treatment effect becomes insignificant.