1 jointVIP

# load jointVIP package
library(jointVIP)

# data to use for example
library(causaldata)

# matching method shown in example
library(MatchIt)
library(optmatch)

# weighting method shown in example
library(WeightIt)
library(optweight)

# load data for estimating earnings from 1978
# treatment is the NSW program
pilot_df = cps_mixtape
analysis_df = nsw_mixtape
head(pilot_df)
## # A tibble: 6 × 11
##   data_id treat   age  educ black  hisp  marr nodegree   re74   re75   re78
##   <chr>   <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>    <dbl>  <dbl>  <dbl>  <dbl>
## 1 CPS1        0    45    11     0     0     1        1 21517. 25244. 25565.
## 2 CPS1        0    21    14     0     0     0        0  3176.  5853. 13496.
## 3 CPS1        0    38    12     0     0     1        0 23039. 25131. 25565.
## 4 CPS1        0    48     6     0     0     1        1 24994. 25244. 25565.
## 5 CPS1        0    18     8     0     0     1        1  1669. 10728.  9861.
## 6 CPS1        0    22    11     0     0     1        1 16366. 18449. 25565.
head(analysis_df)
## # A tibble: 6 × 11
##   data_id        treat   age  educ black  hisp  marr nodegree  re74  re75   re78
##   <chr>          <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>    <dbl> <dbl> <dbl>  <dbl>
## 1 Dehejia-Wahba…     1    37    11     1     0     1        1     0     0  9930.
## 2 Dehejia-Wahba…     1    22     9     0     1     0        1     0     0  3596.
## 3 Dehejia-Wahba…     1    30    12     1     0     0        0     0     0 24909.
## 4 Dehejia-Wahba…     1    27    11     1     0     0        1     0     0  7506.
## 5 Dehejia-Wahba…     1    33     8     1     0     0        1     0     0   290.
## 6 Dehejia-Wahba…     1    22     9     1     0     0        1     0     0  4056.
dim(pilot_df)
## [1] 15992    11
dim(analysis_df)
## [1] 445  11
transform_earn <- function(data, variables){
  data = data.frame(data)
  log_variables = sapply(variables, 
                         function(s){paste0('log_',s)})
  data[,log_variables] =
    apply(data[,variables], 2, 
          function(x){ifelse(x == 0, 
                             log(x + 1), 
                             log(x))})
  return(data)
}

pilot_df <- cps_mixtape
pilot_df <- transform_earn(pilot_df, c('re74', 're75', 're78'))
head(pilot_df)
##   data_id treat age educ black hisp marr nodegree      re74      re75      re78
## 1    CPS1     0  45   11     0    0    1        1 21516.670 25243.551 25564.670
## 2    CPS1     0  21   14     0    0    0        0  3175.971  5852.565 13496.080
## 3    CPS1     0  38   12     0    0    1        0 23039.020 25130.760 25564.670
## 4    CPS1     0  48    6     0    0    1        1 24994.369 25243.551 25564.670
## 5    CPS1     0  18    8     0    0    1        1  1669.295 10727.610  9860.869
## 6    CPS1     0  22   11     0    0    1        1 16365.760 18449.270 25564.670
##    log_re74  log_re75  log_re78
## 1  9.976583 10.136326 10.148967
## 2  8.063369  8.674635  9.510155
## 3 10.044945 10.131848 10.148967
## 4 10.126406 10.136326 10.148967
## 5  7.420157  9.280576  9.196330
## 6  9.702947  9.822780 10.148967
analysis_df <- nsw_mixtape
analysis_df <- transform_earn(analysis_df, c('re74', 're75', 're78'))
head(analysis_df)
##                data_id treat age educ black hisp marr nodegree re74 re75
## 1 Dehejia-Wahba Sample     1  37   11     1    0    1        1    0    0
## 2 Dehejia-Wahba Sample     1  22    9     0    1    0        1    0    0
## 3 Dehejia-Wahba Sample     1  30   12     1    0    0        0    0    0
## 4 Dehejia-Wahba Sample     1  27   11     1    0    0        1    0    0
## 5 Dehejia-Wahba Sample     1  33    8     1    0    0        1    0    0
## 6 Dehejia-Wahba Sample     1  22    9     1    0    0        1    0    0
##         re78 log_re74 log_re75  log_re78
## 1  9930.0459        0        0  9.203320
## 2  3595.8940        0        0  8.187548
## 3 24909.4492        0        0 10.123002
## 4  7506.1460        0        0  8.923477
## 5   289.7899        0        0  5.669156
## 6  4056.4939        0        0  8.308074
treatment = 'treat'
outcome = 'log_re78'
covariates = c(names(analysis_df)[!names(analysis_df) %in% c(treatment,
                                                             outcome, "data_id",
                                                             "re74", "re75",
                                                             "re78")])

new_jointVIP = create_jointVIP(treatment = treatment,
                               outcome = outcome,
                               covariates = covariates,
                               pilot_df = pilot_df,
                               analysis_df = analysis_df)


summary(new_jointVIP, 
        smd = "cross-sample",
        use_abs = TRUE,
        bias_tol = 0.01)
## Max absolute bias is 0.113
## 2 variables are above the desired 0.01 absolute bias tolerance
## 8 variables can be plotted
#> Max absolute bias is 0.113
#> 2 variables are above the desired 0.01 absolute bias tolerance
#> 8 variables can be plotted
print(new_jointVIP,
      smd = "cross-sample",
      use_abs = TRUE,
      bias_tol = 0.01)
##           bias
## log_re75 0.113
## log_re74 0.045
plot(new_jointVIP)

m.out <- matchit(
  treat ~ log_re75 + log_re74,
  data = analysis_df,
  method = "optimal",
  distance = "mahalanobis"
)
optmatch_df <- match.data(m.out)[, c(treatment, outcome, covariates)]

# ordering for the weightit
ordered_analysis_df = analysis_df[order(analysis_df$treat, decreasing = T),]

optwt <- weightit(treat ~ log_re74 + log_re75,
                  data = ordered_analysis_df,
                  method = "optweight", estimand = "ATE",
                  tols = 0.005, include.obj = TRUE)
# summary(optwt)

optwt_df = ordered_analysis_df[, c(covariates, treatment, outcome)]

post_optmatch_jointVIP = create_post_jointVIP(new_jointVIP,
                                              post_analysis_df = optmatch_df)




post_optmatch_jointVIP = create_post_jointVIP(new_jointVIP,
                                              post_analysis_df = optmatch_df)