jointVIP
# load jointVIP package
library(jointVIP)
# data to use for example
library(causaldata)
# matching method shown in example
library(MatchIt)
library(optmatch)
# weighting method shown in example
library(WeightIt)
library(optweight)
# load data for estimating earnings from 1978
# treatment is the NSW program
pilot_df = cps_mixtape
analysis_df = nsw_mixtape
head(pilot_df)
## # A tibble: 6 × 11
## data_id treat age educ black hisp marr nodegree re74 re75 re78
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 CPS1 0 45 11 0 0 1 1 21517. 25244. 25565.
## 2 CPS1 0 21 14 0 0 0 0 3176. 5853. 13496.
## 3 CPS1 0 38 12 0 0 1 0 23039. 25131. 25565.
## 4 CPS1 0 48 6 0 0 1 1 24994. 25244. 25565.
## 5 CPS1 0 18 8 0 0 1 1 1669. 10728. 9861.
## 6 CPS1 0 22 11 0 0 1 1 16366. 18449. 25565.
## # A tibble: 6 × 11
## data_id treat age educ black hisp marr nodegree re74 re75 re78
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Dehejia-Wahba… 1 37 11 1 0 1 1 0 0 9930.
## 2 Dehejia-Wahba… 1 22 9 0 1 0 1 0 0 3596.
## 3 Dehejia-Wahba… 1 30 12 1 0 0 0 0 0 24909.
## 4 Dehejia-Wahba… 1 27 11 1 0 0 1 0 0 7506.
## 5 Dehejia-Wahba… 1 33 8 1 0 0 1 0 0 290.
## 6 Dehejia-Wahba… 1 22 9 1 0 0 1 0 0 4056.
## [1] 15992 11
## [1] 445 11
transform_earn <- function(data, variables){
data = data.frame(data)
log_variables = sapply(variables,
function(s){paste0('log_',s)})
data[,log_variables] =
apply(data[,variables], 2,
function(x){ifelse(x == 0,
log(x + 1),
log(x))})
return(data)
}
pilot_df <- cps_mixtape
pilot_df <- transform_earn(pilot_df, c('re74', 're75', 're78'))
head(pilot_df)
## data_id treat age educ black hisp marr nodegree re74 re75 re78
## 1 CPS1 0 45 11 0 0 1 1 21516.670 25243.551 25564.670
## 2 CPS1 0 21 14 0 0 0 0 3175.971 5852.565 13496.080
## 3 CPS1 0 38 12 0 0 1 0 23039.020 25130.760 25564.670
## 4 CPS1 0 48 6 0 0 1 1 24994.369 25243.551 25564.670
## 5 CPS1 0 18 8 0 0 1 1 1669.295 10727.610 9860.869
## 6 CPS1 0 22 11 0 0 1 1 16365.760 18449.270 25564.670
## log_re74 log_re75 log_re78
## 1 9.976583 10.136326 10.148967
## 2 8.063369 8.674635 9.510155
## 3 10.044945 10.131848 10.148967
## 4 10.126406 10.136326 10.148967
## 5 7.420157 9.280576 9.196330
## 6 9.702947 9.822780 10.148967
analysis_df <- nsw_mixtape
analysis_df <- transform_earn(analysis_df, c('re74', 're75', 're78'))
head(analysis_df)
## data_id treat age educ black hisp marr nodegree re74 re75
## 1 Dehejia-Wahba Sample 1 37 11 1 0 1 1 0 0
## 2 Dehejia-Wahba Sample 1 22 9 0 1 0 1 0 0
## 3 Dehejia-Wahba Sample 1 30 12 1 0 0 0 0 0
## 4 Dehejia-Wahba Sample 1 27 11 1 0 0 1 0 0
## 5 Dehejia-Wahba Sample 1 33 8 1 0 0 1 0 0
## 6 Dehejia-Wahba Sample 1 22 9 1 0 0 1 0 0
## re78 log_re74 log_re75 log_re78
## 1 9930.0459 0 0 9.203320
## 2 3595.8940 0 0 8.187548
## 3 24909.4492 0 0 10.123002
## 4 7506.1460 0 0 8.923477
## 5 289.7899 0 0 5.669156
## 6 4056.4939 0 0 8.308074
treatment = 'treat'
outcome = 'log_re78'
covariates = c(names(analysis_df)[!names(analysis_df) %in% c(treatment,
outcome, "data_id",
"re74", "re75",
"re78")])
new_jointVIP = create_jointVIP(treatment = treatment,
outcome = outcome,
covariates = covariates,
pilot_df = pilot_df,
analysis_df = analysis_df)
summary(new_jointVIP,
smd = "cross-sample",
use_abs = TRUE,
bias_tol = 0.01)
## Max absolute bias is 0.113
## 2 variables are above the desired 0.01 absolute bias tolerance
## 8 variables can be plotted
#> Max absolute bias is 0.113
#> 2 variables are above the desired 0.01 absolute bias tolerance
#> 8 variables can be plotted
print(new_jointVIP,
smd = "cross-sample",
use_abs = TRUE,
bias_tol = 0.01)
## bias
## log_re75 0.113
## log_re74 0.045

m.out <- matchit(
treat ~ log_re75 + log_re74,
data = analysis_df,
method = "optimal",
distance = "mahalanobis"
)
optmatch_df <- match.data(m.out)[, c(treatment, outcome, covariates)]
# ordering for the weightit
ordered_analysis_df = analysis_df[order(analysis_df$treat, decreasing = T),]
optwt <- weightit(treat ~ log_re74 + log_re75,
data = ordered_analysis_df,
method = "optweight", estimand = "ATE",
tols = 0.005, include.obj = TRUE)
# summary(optwt)
optwt_df = ordered_analysis_df[, c(covariates, treatment, outcome)]
post_optmatch_jointVIP = create_post_jointVIP(new_jointVIP,
post_analysis_df = optmatch_df)
post_optmatch_jointVIP = create_post_jointVIP(new_jointVIP,
post_analysis_df = optmatch_df)