Check out the data set “data_health_insurance_experiment.csv”, a randomized controlled trial, which was collected to study of the effect of expanding public health insurance on health care use, health outcomes, financial strain, and well-being of low-income adults.
# Variable names:
# Y: Any Emergency Department visit in the study period
# D: Ever enrolled in Medicaid in the study period
# Z: Selected in the lottery
# numhh_1: household head + 1
# numhh_2: household head + 2
# english: individual requested english-language materials
# female dummy
# year of birth
## You can find more information about the experiment and the data here:
## https://www.nber.org/programs-projects/projects-and-centers/oregon-health-insurance-experiment?page=1&perPage=50
data <- read.csv("data_health_insurance_experiment.csv")
## OLS
ols <- lm(data = data, formula = Y ~ D)
ols_controls <- lm(data = data, formula = Y ~ D + numhh_1 + numhh_2 + birthyear + english + female)
library(jtools)
summ(ols, digits = 4, robust = TRUE)
## MODEL INFO:
## Observations: 24646
## Dependent Variable: Y
## Type: OLS linear regression
##
## MODEL FIT:
## F(1,24644) = 577.0421, p = 0.0000
## R² = 0.0229
## Adj. R² = 0.0228
##
## Standard errors: Robust, type = HC3
## ------------------------------------------------------
## Est. S.E. t val. p
## ----------------- -------- -------- --------- --------
## (Intercept) 0.3026 0.0034 89.8342 0.0000
## D 0.1670 0.0073 23.0272 0.0000
## ------------------------------------------------------
summ(ols_controls, digits = 4, robust = TRUE)
## MODEL INFO:
## Observations: 24646
## Dependent Variable: Y
## Type: OLS linear regression
##
## MODEL FIT:
## F(6,24639) = 240.4794, p = 0.0000
## R² = 0.0553
## Adj. R² = 0.0551
##
## Standard errors: Robust, type = HC3
## --------------------------------------------------------
## Est. S.E. t val. p
## ----------------- --------- -------- ---------- --------
## (Intercept) 1.0735 0.4779 2.2464 0.0247
## D 0.1701 0.0071 23.8549 0.0000
## numhh_1 -0.0967 0.0071 -13.5271 0.0000
## numhh_2 -0.1031 0.0543 -1.8998 0.0575
## birthyear -0.0005 0.0002 -1.9240 0.0544
## english 0.1934 0.0074 26.1743 0.0000
## female -0.0002 0.0059 -0.0277 0.9779
## --------------------------------------------------------
## Double-ML-OLS
library(DoubleML)
library(mlr3)
library(mlr3learners)
library(ggplot2)
library(data.table)
library(ivreg)
library(ranger)
# Specify the data and variables for the causal model
dml_data = DoubleMLData$new(data,
y_col = "Y",
d_cols = "D",
x_cols = c("numhh_1", "numhh_2", "birthyear",
"english", "female"))
print(dml_data)
## ================= DoubleMLData Object ==================
##
##
## ------------------ Data summary ------------------
## Outcome variable: Y
## Treatment variable(s): D
## Covariates: numhh_1, numhh_2, birthyear, english, female
## Instrument(s):
## No. Observations: 24646
# surpress messages from mlr3 package during fitting
lgr::get_logger("mlr3")$set_threshold("warn")
Learners to estimate the nuisance models:
# Option mtry usually set to the square root of the number of controls: mtry=floor(sqrt(n_vars))
forest = lrn("regr.ranger", num.trees=500, mtry=5, max.depth=5, min.node.size=2)
forest
## <LearnerRegrRanger:regr.ranger>: Random Forest
## * Model: -
## * Parameters: max.depth=5, min.node.size=2, mtry=5, num.threads=1,
## num.trees=500
## * Packages: mlr3, mlr3learners, ranger
## * Predict Types: [response], se
## * Feature Types: logical, integer, numeric, character, factor, ordered
## * Properties: hotstart_backward, importance, oob_error, weights
#learner = lrn("regr.glmnet", lambda = sqrt(log(n_vars)/(n_obs)))
lasso = lrn("regr.cv_glmnet")
lasso
## <LearnerRegrCVGlmnet:regr.cv_glmnet>: GLM with Elastic Net Regularization
## * Model: -
## * Parameters: family=gaussian
## * Packages: mlr3, mlr3learners, glmnet
## * Predict Types: [response]
## * Feature Types: logical, integer, numeric
## * Properties: selected_features, weights
trees = lrn("regr.rpart")
set.seed(10101)
dml_plr_forest = DoubleMLPLR$new(dml_data, ml_l=forest, ml_m=forest)
dml_plr_forest$fit()
print(dml_plr_forest)
## ================= DoubleMLPLR Object ==================
##
##
## ------------------ Data summary ------------------
## Outcome variable: Y
## Treatment variable(s): D
## Covariates: numhh_1, numhh_2, birthyear, english, female
## Instrument(s):
## No. Observations: 24646
##
## ------------------ Score & algorithm ------------------
## Score function: partialling out
## DML algorithm: dml2
##
## ------------------ Machine learner ------------------
## ml_l: regr.ranger
## ml_m: regr.ranger
##
## ------------------ Resampling ------------------
## No. folds: 5
## No. repeated sample splits: 1
## Apply cross-fitting: TRUE
##
## ------------------ Fit summary ------------------
## Estimates and significance testing of the effect of target variables
## Estimate. Std. Error t value Pr(>|t|)
## D 0.171169 0.007145 23.96 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
dml_plr_lasso = DoubleMLPLR$new(dml_data, ml_l=lasso, ml_m=lasso)
dml_plr_lasso$fit()
print(dml_plr_lasso)
## ================= DoubleMLPLR Object ==================
##
##
## ------------------ Data summary ------------------
## Outcome variable: Y
## Treatment variable(s): D
## Covariates: numhh_1, numhh_2, birthyear, english, female
## Instrument(s):
## No. Observations: 24646
##
## ------------------ Score & algorithm ------------------
## Score function: partialling out
## DML algorithm: dml2
##
## ------------------ Machine learner ------------------
## ml_l: regr.cv_glmnet
## ml_m: regr.cv_glmnet
##
## ------------------ Resampling ------------------
## No. folds: 5
## No. repeated sample splits: 1
## Apply cross-fitting: TRUE
##
## ------------------ Fit summary ------------------
## Estimates and significance testing of the effect of target variables
## Estimate. Std. Error t value Pr(>|t|)
## D 0.169039 0.007133 23.7 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
dml_plr_trees = DoubleMLPLR$new(dml_data, ml_l=trees, ml_m=trees)
dml_plr_trees$fit()
print(dml_plr_trees)
## ================= DoubleMLPLR Object ==================
##
##
## ------------------ Data summary ------------------
## Outcome variable: Y
## Treatment variable(s): D
## Covariates: numhh_1, numhh_2, birthyear, english, female
## Instrument(s):
## No. Observations: 24646
##
## ------------------ Score & algorithm ------------------
## Score function: partialling out
## DML algorithm: dml2
##
## ------------------ Machine learner ------------------
## ml_l: regr.rpart
## ml_m: regr.rpart
##
## ------------------ Resampling ------------------
## No. folds: 5
## No. repeated sample splits: 1
## Apply cross-fitting: TRUE
##
## ------------------ Fit summary ------------------
## Estimates and significance testing of the effect of target variables
## Estimate. Std. Error t value Pr(>|t|)
## D 0.167527 0.007137 23.47 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
iv <- ivreg(data = data, Y ~ numhh_1 + numhh_2 + birthyear + english + female | D | Z)
summary(iv)
##
## Call:
## ivreg(formula = Y ~ numhh_1 + numhh_2 + birthyear + english +
## female | D | Z, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.4544 -0.3720 -0.2773 0.6152 0.9357
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.9462085 0.4839622 1.955 0.05058 .
## D 0.0696273 0.0249371 2.792 0.00524 **
## numhh_1 -0.0941450 0.0076821 -12.255 < 2e-16 ***
## numhh_2 -0.1118885 0.0644765 -1.735 0.08269 .
## birthyear -0.0003925 0.0002460 -1.596 0.11060
## english 0.1934575 0.0089655 21.578 < 2e-16 ***
## female 0.0085460 0.0063229 1.352 0.17652
##
## Diagnostic tests:
## df1 df2 statistic p-value
## Weak instruments 1 24639 2044.86 < 2e-16 ***
## Wu-Hausman 1 24638 17.75 2.52e-05 ***
## Sargan 0 NA NA NA
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4636 on 24639 degrees of freedom
## Multiple R-Squared: 0.04712, Adjusted R-squared: 0.04689
## Wald test: 138.5 on 6 and 24639 DF, p-value: < 2.2e-16
# Specify the data and variables for the causal model
dml_data_iv = DoubleMLData$new(data,
y_col = "Y",
d_cols = "D",
z_cols = "Z",
x_cols = c("numhh_1", "numhh_2", "birthyear",
"english", "female"))
print(dml_data_iv)
## ================= DoubleMLData Object ==================
##
##
## ------------------ Data summary ------------------
## Outcome variable: Y
## Treatment variable(s): D
## Covariates: numhh_1, numhh_2, birthyear, english, female
## Instrument(s): Z
## No. Observations: 24646
# surpress messages from mlr3 package during fitting
lgr::get_logger("mlr3")$set_threshold("warn")
Learners to estimate the nuisance models:
# Option mtry usually set to the square root of the number of controls: mtry=floor(sqrt(n_vars))
forest = lrn("regr.ranger", num.trees=500, mtry=5, max.depth=5, min.node.size=2)
forest
## <LearnerRegrRanger:regr.ranger>: Random Forest
## * Model: -
## * Parameters: max.depth=5, min.node.size=2, mtry=5, num.threads=1,
## num.trees=500
## * Packages: mlr3, mlr3learners, ranger
## * Predict Types: [response], se
## * Feature Types: logical, integer, numeric, character, factor, ordered
## * Properties: hotstart_backward, importance, oob_error, weights
#learner = lrn("regr.glmnet", lambda = sqrt(log(n_vars)/(n_obs)))
lasso = lrn("regr.cv_glmnet")
lasso
## <LearnerRegrCVGlmnet:regr.cv_glmnet>: GLM with Elastic Net Regularization
## * Model: -
## * Parameters: family=gaussian
## * Packages: mlr3, mlr3learners, glmnet
## * Predict Types: [response]
## * Feature Types: logical, integer, numeric
## * Properties: selected_features, weights
trees = lrn("regr.rpart")
trees
## <LearnerRegrRpart:regr.rpart>: Regression Tree
## * Model: -
## * Parameters: xval=0
## * Packages: mlr3, rpart
## * Predict Types: [response]
## * Feature Types: logical, integer, numeric, factor, ordered
## * Properties: importance, missings, selected_features, weights
boost = lrn("regr.xgboost", objective="reg:squarederror", eta=0.1, nrounds=35)
boost
## <LearnerRegrXgboost:regr.xgboost>: Extreme Gradient Boosting
## * Model: -
## * Parameters: eta=0.1, nrounds=35, nthread=1,
## objective=reg:squarederror, verbose=0
## * Validate: NULL
## * Packages: mlr3, mlr3learners, xgboost
## * Predict Types: [response]
## * Feature Types: logical, integer, numeric
## * Properties: hotstart_forward, importance, internal_tuning, missings,
## validation, weights
library(xgboost)
set.seed(10103)
dml_pliv_forest = DoubleMLPLIV$new(dml_data_iv, ml_l=forest, ml_m=forest, ml_r=forest)
dml_pliv_forest$fit()
print(dml_pliv_forest)
## ================= DoubleMLPLIV Object ==================
##
##
## ------------------ Data summary ------------------
## Outcome variable: Y
## Treatment variable(s): D
## Covariates: numhh_1, numhh_2, birthyear, english, female
## Instrument(s): Z
## No. Observations: 24646
##
## ------------------ Score & algorithm ------------------
## Score function: partialling out
## DML algorithm: dml2
##
## ------------------ Machine learner ------------------
## ml_l: regr.ranger
## ml_m: regr.ranger
## ml_r: regr.ranger
##
## ------------------ Resampling ------------------
## No. folds: 5
## No. repeated sample splits: 1
## Apply cross-fitting: TRUE
##
## ------------------ Fit summary ------------------
## Estimates and significance testing of the effect of target variables
## Estimate. Std. Error t value Pr(>|t|)
## D 0.07300 0.02479 2.944 0.00324 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
dml_pliv_lasso = DoubleMLPLIV$new(dml_data_iv, ml_l=lasso, ml_m=lasso, ml_r=lasso)
dml_pliv_lasso$fit()
print(dml_pliv_lasso)
## ================= DoubleMLPLIV Object ==================
##
##
## ------------------ Data summary ------------------
## Outcome variable: Y
## Treatment variable(s): D
## Covariates: numhh_1, numhh_2, birthyear, english, female
## Instrument(s): Z
## No. Observations: 24646
##
## ------------------ Score & algorithm ------------------
## Score function: partialling out
## DML algorithm: dml2
##
## ------------------ Machine learner ------------------
## ml_l: regr.cv_glmnet
## ml_m: regr.cv_glmnet
## ml_r: regr.cv_glmnet
##
## ------------------ Resampling ------------------
## No. folds: 5
## No. repeated sample splits: 1
## Apply cross-fitting: TRUE
##
## ------------------ Fit summary ------------------
## Estimates and significance testing of the effect of target variables
## Estimate. Std. Error t value Pr(>|t|)
## D 0.05984 0.02505 2.389 0.0169 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
dml_pliv_trees = DoubleMLPLIV$new(dml_data_iv, ml_l=trees, ml_m=trees, ml_r=trees)
dml_pliv_trees$fit()
print(dml_pliv_trees)
## ================= DoubleMLPLIV Object ==================
##
##
## ------------------ Data summary ------------------
## Outcome variable: Y
## Treatment variable(s): D
## Covariates: numhh_1, numhh_2, birthyear, english, female
## Instrument(s): Z
## No. Observations: 24646
##
## ------------------ Score & algorithm ------------------
## Score function: partialling out
## DML algorithm: dml2
##
## ------------------ Machine learner ------------------
## ml_l: regr.rpart
## ml_m: regr.rpart
## ml_r: regr.rpart
##
## ------------------ Resampling ------------------
## No. folds: 5
## No. repeated sample splits: 1
## Apply cross-fitting: TRUE
##
## ------------------ Fit summary ------------------
## Estimates and significance testing of the effect of target variables
## Estimate. Std. Error t value Pr(>|t|)
## D 0.06589 0.02507 2.628 0.00859 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
dml_pliv_xgboost = DoubleMLPLIV$new(dml_data_iv, ml_l=boost, ml_m=boost, ml_r=boost)
dml_pliv_xgboost$fit()
print(dml_pliv_xgboost)
## ================= DoubleMLPLIV Object ==================
##
##
## ------------------ Data summary ------------------
## Outcome variable: Y
## Treatment variable(s): D
## Covariates: numhh_1, numhh_2, birthyear, english, female
## Instrument(s): Z
## No. Observations: 24646
##
## ------------------ Score & algorithm ------------------
## Score function: partialling out
## DML algorithm: dml2
##
## ------------------ Machine learner ------------------
## ml_l: regr.xgboost
## ml_m: regr.xgboost
## ml_r: regr.xgboost
##
## ------------------ Resampling ------------------
## No. folds: 5
## No. repeated sample splits: 1
## Apply cross-fitting: TRUE
##
## ------------------ Fit summary ------------------
## Estimates and significance testing of the effect of target variables
## Estimate. Std. Error t value Pr(>|t|)
## D 0.06885 0.02486 2.77 0.00561 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## Plot results
confints = rbind(confint(iv)["D", ],
dml_pliv_forest$confint(), dml_pliv_lasso$confint(),
dml_pliv_trees$confint(), dml_pliv_xgboost$confint())
estimates = c(coef(iv)["D"],
dml_pliv_forest$coef, dml_pliv_lasso$coef,
dml_pliv_trees$coef, dml_pliv_xgboost$coef)
result_pliv = data.table("model" = "PLIV",
"ML" = c("2SLS", "ranger", "glmnet", "rpart", "xgboost"),
"Estimate" = estimates,
"lower" = confints[,1],
"upper" = confints[,2])
result_pliv$ML = factor(result_pliv$ML, c("2SLS", "ranger", "glmnet", "rpart", "xgboost"))
result_pliv
## model ML Estimate lower upper
## <char> <fctr> <num> <num> <num>
## 1: PLIV 2SLS 0.06962734 0.02074912 0.1185056
## 2: PLIV ranger 0.07300334 0.02440888 0.1215978
## 3: PLIV glmnet 0.05983663 0.01074140 0.1089319
## 4: PLIV rpart 0.06589218 0.01674871 0.1150357
## 5: PLIV xgboost 0.06884929 0.02012969 0.1175689
g_ci = ggplot(result_pliv, aes(x = ML, y = Estimate, color = ML)) +
geom_point() +
geom_errorbar(aes(ymin = lower, ymax = upper, color = ML)) +
geom_hline(yintercept = 0, color = "grey") +
theme_minimal() + ylab("Coefficients and 0.95- confidence interval") +
xlab("") +
ggtitle("PLIV") +
theme(axis.text.x = element_text(angle = 90), legend.position = "none",
text = element_text(size = 20))
g_ci
We want to explore the identification of Local Average Treatment Effects (LATEs) and test for violations of instrumental variable assumptions using the LATEtest package in R. The package uses causal forests to search for and test local violations in a data-driven way. Your task is to conduct a LATE analysis and investigate potential violations of the instrument validity assumption.
library(devtools)
## Caricamento del pacchetto richiesto: usethis
#install_github("farbmacher/LATEtest")
library(LATEtest)
library(rpart.plot)
## Caricamento del pacchetto richiesto: rpart
df<-data
Xvars <- paste0(c('numhh_1','numhh_2','birthyear','english','female')) #covariates
df$Y <- as.vector(df[['Y']])#Outcome
df$D <- as.vector(df[['D']])#Treatment
df$Z <- as.vector(df[['Z']])#instrument
test <- LATEtest(data=df, covars=Xvars, alpha=0.05, minsize=100)
maxtree <- eval(parse(text=paste("test$treelist$tree_",test$maxTtree$label,test$maxTtree$J,sep="")))
rpart.plot(maxtree,extra = 101,box.palette="GyRd",shadow.col="gray",nn=TRUE,roundint=FALSE)
test
## $treelist
## $treelist$tree_Q1A1
## n= 12323
##
## node), split, n, deviance, yval
## * denotes terminal node
##
## 1) root 12323 7299.4720 -0.13721280
## 2) birthyear< 1951.5 1186 971.9999 -0.25391750 *
## 3) birthyear>=1951.5 11137 6309.5980 -0.12478460
## 6) birthyear< 1973.5 6330 3527.4810 -0.14633180 *
## 7) birthyear>=1973.5 4807 2775.3080 -0.09641076 *
##
## $treelist$tree_Q1B1
## n= 12323
##
## node), split, n, deviance, yval
## * denotes terminal node
##
## 1) root 12323 7876.577 -0.1515664 *
##
## $treelist$tree_Q1A2
## n= 12323
##
## node), split, n, deviance, yval
## * denotes terminal node
##
## 1) root 12323 6690.1160 -0.11671090
## 2) english>=0.5 10666 6207.5490 -0.12753170 *
## 3) english< 0.5 1657 473.2793 -0.04705769 *
##
## $treelist$tree_Q1B2
## n= 12323
##
## node), split, n, deviance, yval
## * denotes terminal node
##
## 1) root 12323 6665.0900 -0.09880377
## 2) english>=0.5 10645 6266.6670 -0.10964220
## 4) birthyear>=1952.5 9408 5649.9570 -0.11849910
## 8) birthyear< 1968.5 4045 2707.5450 -0.15135740 *
## 9) birthyear>=1968.5 5363 2934.7510 -0.09371596 *
## 5) birthyear< 1952.5 1237 610.3591 -0.04228154 *
## 3) english< 0.5 1678 389.2395 -0.03004614 *
##
## $treelist$tree_Q0A1
## n= 12323
##
## node), split, n, deviance, yval
## * denotes terminal node
##
## 1) root 12323 13007.150 -0.1605121
## 2) birthyear< 1973.5 7516 7856.622 -0.1905979 *
## 3) birthyear>=1973.5 4807 5133.088 -0.1134713 *
##
## $treelist$tree_Q0B1
## n= 12323
##
## node), split, n, deviance, yval
## * denotes terminal node
##
## 1) root 12323 13107.190 -0.1537689
## 2) birthyear< 1975.5 8096 8517.112 -0.1723714 *
## 3) birthyear>=1975.5 4227 4581.911 -0.1181395 *
##
## $treelist$tree_Q0A2
## n= 12323
##
## node), split, n, deviance, yval
## * denotes terminal node
##
## 1) root 12323 8706.669 -0.08909228
## 2) numhh_1< 0.5 9831 7304.932 -0.10250490 *
## 3) numhh_1>=0.5 2492 1392.991 -0.03617909 *
##
## $treelist$tree_Q0B2
## n= 12323
##
## node), split, n, deviance, yval
## * denotes terminal node
##
## 1) root 12323 8669.2250 -0.08842468
## 2) english>=0.5 10645 8072.6930 -0.09835373 *
## 3) english< 0.5 1678 588.8252 -0.02543615 *
##
##
## $maxTtree
## $maxTtree$label
## [1] "Q0B"
##
## $maxTtree$J
## [1] 2
##
##
## $descriptives
## $descriptives$meanY_D_Z
## Mean Mean Mean
## 0.3435040 0.2451513 0.3905705
##
## $descriptives$min_mean_maxZhatA
## Min. Mean Max.
## 0.2012062 0.3900155 0.7467511
##
## $descriptives$min_mean_maxZhatB
## Min. Mean Max.
## 0.2099685 0.3906398 0.7218264
##
##
## $parameters
## $parameters$covars
## [1] "numhh_1" "numhh_2" "birthyear" "english" "female"
##
## $parameters$slice
## [1] "equidistant"
##
## $parameters$subsets
## [1] 4
##
## $parameters$minsize
## [1] 100
##
## $parameters$alpha
## [1] 0.05
##
## $parameters$huge
## [1] FALSE
##
##
## $leafinfo
## label J knot n_train Tstat_train is_relevant is_largest sep n_est Tstat_est
## 1 Q1A 1 2 1186 -9.651124 FALSE TRUE | 1187 -7.174851
## 2 Q1A 1 6 6330 -15.593419 FALSE FALSE | 6343 -16.495115
## 3 Q1A 1 7 4807 -8.795361 FALSE TRUE | 4793 -11.267660
## 4 Q1B 1 1 12323 -21.043369 FALSE TRUE | 12323 -19.789279
## 5 Q1A 2 2 10666 -17.263112 FALSE FALSE | 10645 -14.742271
## 6 Q1A 2 3 1657 -3.582055 FALSE TRUE | 1678 -2.553955
## 7 Q1B 2 3 1678 -2.553955 FALSE TRUE | 1657 -3.582055
## 8 Q1B 2 5 1237 -2.115322 TRUE TRUE | 1253 -5.254173
## 9 Q1B 2 8 4045 -11.763237 FALSE FALSE | 4083 -12.312726
## 10 Q1B 2 9 5363 -9.275872 FALSE TRUE | 5330 -11.032464
## 11 Q0A 1 2 7516 -16.159545 FALSE FALSE | 7530 -14.163542
## 12 Q0A 1 3 4807 -7.611681 FALSE TRUE | 4793 -8.802597
## 13 Q0B 1 2 8096 -15.119444 FALSE FALSE | 8137 -16.121321
## 14 Q0B 1 3 4227 -7.375668 FALSE TRUE | 4186 -7.322763
## 15 Q0A 2 2 9831 -11.789358 FALSE FALSE | 9864 -11.218187
## 16 Q0A 2 3 2492 -2.414666 FALSE TRUE | 2459 -3.527070
## 17 Q0B 2 2 10645 -11.651624 FALSE FALSE | 10666 -11.520276
## 18 Q0B 2 3 1678 -1.757888 TRUE TRUE | 1657 -2.482540
## Tmax
## 1 0
## 2 0
## 3 0
## 4 0
## 5 0
## 6 0
## 7 0
## 8 0
## 9 0
## 10 0
## 11 0
## 12 0
## 13 0
## 14 0
## 15 0
## 16 0
## 17 0
## 18 1
##
## $results
## $results$nu_ineq
## p0 p1 p2
## [1,] 18 2 2
##
## $results$Tmax
## T0 T1 T2
## [1,] -2.48254 -2.48254 -2.48254
##
## $results$cv
## cv0 cv1 cv2
## [1,] 2.772921 1.959964 1.959964
##
## $results$reject_Bonf
## [,1] [,2] [,3]
## [1,] FALSE FALSE FALSE
##
## $results$pvalue_Bonf
## [,1] [,2] [,3]
## [1,] 1 1 1
##
## $results$pvalue_Holm
## [,1] [,2] [,3]
## [1,] 1 1 1
##
## $results$pvalue_BenjHochberg
## [,1] [,2] [,3]
## [1,] 1 0.9999999 0.9999999
##
## $results$pvalue_BenjYekutieli
## [,1] [,2] [,3]
## [1,] 1 1 1
#Artifical (local) violation of LATE assumtions
subset <- df$birthyear == 1979
effect_size <- 1*sd(df$Y)
df$Y <- df$Y + effect_size*subset*df$Z
test <- LATEtest(data=df, covars=Xvars, huge=TRUE, tree_fraction=0.05, alpha=0.05, minsize=800) # DELETE Huge here!!! and reduce minsize!!!!
maxtree <- eval(parse(text=paste("test$treelist$tree_",test$maxTtree$label,test$maxTtree$J,sep="")))
rpart.plot(maxtree,extra = 101,box.palette="GyRd",shadow.col="gray",nn=TRUE,roundint=FALSE)
test$results
## $nu_ineq
## p0 p1 p2
## [1,] 51 12 11
##
## $Tmax
## T0 T1 T2
## [1,] 8.047937 8.047937 8.047937
##
## $cv
## cv0 cv1 cv2
## [1,] 3.096109 2.638257 2.608616
##
## $reject_Bonf
## [,1] [,2] [,3]
## [1,] TRUE TRUE TRUE
##
## $pvalue_Bonf
## [,1] [,2] [,3]
## [1,] 2.264855e-14 5.329071e-15 4.884981e-15
##
## $pvalue_Holm
## [,1] [,2] [,3]
## [1,] 2.264855e-14 5.329071e-15 4.884981e-15
##
## $pvalue_BenjHochberg
## [,1] [,2] [,3]
## [1,] 2.264855e-14 5.329071e-15 4.884981e-15
##
## $pvalue_BenjYekutieli
## [,1] [,2] [,3]
## [1,] 1.023446e-13 1.653723e-14 1.475204e-14