1 PSM

set.seed(111)
baselinevars <- names(dplyr::select(ObsData, 
                         !c(A,Y)))
ps.formula <- as.formula(paste("A~", paste(baselinevars, collapse = "+")))
ps.fit <- glm(ps.formula, family = "binomial", data = ObsData)
ObsData$PS <- predict(ps.fit, newdata = ObsData, type = "response")

p <- ObsData$PS
logitPS <- log(p/(1-p))
match.obj <- matchit(ps.formula, data = ObsData, distance = ObsData$PS, method = "nearest", replace = FALSE, ratio = 1, caliper = 0.2*sd(logitPS))

1.1 PSM Diagnostics

require(cobalt)

## Loading required package: cobalt

##  cobalt (Version 4.3.2, Build Date: 2022-01-19)

## 
## Attaching package: 'cobalt'

## The following object is masked from 'package:MatchIt':
## 
##     lalonde

bal.plot(match.obj, var.name = "distance", which = "both", type = "histogram", mirror = TRUE)

bal.tab(match.obj, un = TRUE, thresholds = c(m=0.1))

## Call
##  matchit(formula = ps.formula, data = ObsData, method = "nearest", 
##     distance = ObsData$PS, replace = FALSE, caliper = 0.2 * sd(logitPS), 
##     ratio = 1)
## 
## Balance Measures
##                                           Type Diff.Un Diff.Adj    M.Threshold
## distance                              Distance  1.1558   0.1820               
## Disease.category_ARF                    Binary -0.0290  -0.0178 Balanced, <0.1
## Disease.category_CHF                    Binary  0.0261  -0.0006 Balanced, <0.1
## Disease.category_Other                  Binary -0.1737  -0.0092 Balanced, <0.1
## Disease.category_MOSF                   Binary  0.1766   0.0276 Balanced, <0.1
## Cancer_None                             Binary  0.0439   0.0075 Balanced, <0.1
## Cancer_Localized (Yes)                  Binary -0.0267  -0.0109 Balanced, <0.1
## Cancer_Metastatic                       Binary -0.0172   0.0035 Balanced, <0.1
## Cardiovascular                          Binary  0.0445  -0.0104 Balanced, <0.1
## Congestive.HF                           Binary  0.0268   0.0012 Balanced, <0.1
## Dementia                                Binary -0.0472  -0.0023 Balanced, <0.1
## Psychiatric                             Binary -0.0348  -0.0081 Balanced, <0.1
## Pulmonary                               Binary -0.0737  -0.0138 Balanced, <0.1
## Renal                                   Binary  0.0066  -0.0058 Balanced, <0.1
## Hepatic                                 Binary -0.0124  -0.0023 Balanced, <0.1
## GI.Bleed                                Binary -0.0122  -0.0006 Balanced, <0.1
## Tumor                                   Binary -0.0423  -0.0052 Balanced, <0.1
## Immunosupperssion                       Binary  0.0358  -0.0046 Balanced, <0.1
## Transfer.hx                             Binary  0.0554   0.0115 Balanced, <0.1
## MI                                      Binary  0.0139  -0.0012 Balanced, <0.1
## age_[-Inf,50)                           Binary -0.0017   0.0063 Balanced, <0.1
## age_[50,60)                             Binary  0.0161   0.0104 Balanced, <0.1
## age_[60,70)                             Binary  0.0355   0.0006 Balanced, <0.1
## age_[70,80)                             Binary  0.0144  -0.0132 Balanced, <0.1
## age_[80, Inf)                           Binary -0.0643  -0.0040 Balanced, <0.1
## sex_Female                              Binary -0.0462  -0.0092 Balanced, <0.1
## edu                                    Contin.  0.0910   0.0293 Balanced, <0.1
## DASIndex                               Contin.  0.0654   0.0263 Balanced, <0.1
## APACHE.score                           Contin.  0.4837   0.0813 Balanced, <0.1
## Glasgow.Coma.Score                     Contin. -0.1160  -0.0147 Balanced, <0.1
## blood.pressure                         Contin. -0.4869  -0.0680 Balanced, <0.1
## WBC                                    Contin.  0.0799  -0.0096 Balanced, <0.1
## Heart.rate                             Contin.  0.1460  -0.0005 Balanced, <0.1
## Respiratory.rate                       Contin. -0.1641  -0.0361 Balanced, <0.1
## Temperature                            Contin. -0.0209  -0.0219 Balanced, <0.1
## PaO2vs.FIO2                            Contin. -0.4566  -0.0560 Balanced, <0.1
## Albumin                                Contin. -0.2010  -0.0281 Balanced, <0.1
## Hematocrit                             Contin. -0.2954  -0.0293 Balanced, <0.1
## Bilirubin                              Contin.  0.1329   0.0319 Balanced, <0.1
## Creatinine                             Contin.  0.2678   0.0339 Balanced, <0.1
## Sodium                                 Contin. -0.0927  -0.0218 Balanced, <0.1
## Potassium                              Contin. -0.0274   0.0064 Balanced, <0.1
## PaCo2                                  Contin. -0.2880  -0.0456 Balanced, <0.1
## PH                                     Contin. -0.1163  -0.0228 Balanced, <0.1
## Weight                                 Contin.  0.2640   0.0241 Balanced, <0.1
## DNR.status_Yes                          Binary -0.0696   0.0006 Balanced, <0.1
## Medical.insurance_Medicaid              Binary -0.0395  -0.0035 Balanced, <0.1
## Medical.insurance_Medicare              Binary -0.0327  -0.0075 Balanced, <0.1
## Medical.insurance_Medicare & Medicaid   Binary -0.0144  -0.0058 Balanced, <0.1
## Medical.insurance_No insurance          Binary  0.0099   0.0046 Balanced, <0.1
## Medical.insurance_Private               Binary  0.0624   0.0259 Balanced, <0.1
## Medical.insurance_Private & Medicare    Binary  0.0143  -0.0138 Balanced, <0.1
## Respiratory.Diag_Yes                    Binary -0.1277  -0.0299 Balanced, <0.1
## Cardiovascular.Diag_Yes                 Binary  0.1395   0.0236 Balanced, <0.1
## Neurological.Diag_Yes                   Binary -0.1079  -0.0098 Balanced, <0.1
## Gastrointestinal.Diag_Yes               Binary  0.0453   0.0052 Balanced, <0.1
## Renal.Diag_Yes                          Binary  0.0264   0.0040 Balanced, <0.1
## Metabolic.Diag_Yes                      Binary -0.0059   0.0017 Balanced, <0.1
## Hematologic.Diag_Yes                    Binary -0.0146  -0.0035 Balanced, <0.1
## Sepsis.Diag_Yes                         Binary  0.0912   0.0138 Balanced, <0.1
## Trauma.Diag_Yes                         Binary  0.0105   0.0017 Balanced, <0.1
## Orthopedic.Diag_Yes                     Binary  0.0010   0.0012 Balanced, <0.1
## race_white                              Binary  0.0063   0.0069 Balanced, <0.1
## race_black                              Binary -0.0114  -0.0081 Balanced, <0.1
## race_other                              Binary  0.0050   0.0012 Balanced, <0.1
## income_> $50k                           Binary  0.0165   0.0086 Balanced, <0.1
## income_$11-$25k                         Binary  0.0062  -0.0104 Balanced, <0.1
## income_$25-$50k                         Binary  0.0391   0.0173 Balanced, <0.1
## income_Under $11k                       Binary -0.0618  -0.0155 Balanced, <0.1
## 
## Balance tally for mean differences
##                    count
## Balanced, <0.1        68
## Not Balanced, >0.1     0
## 
## Variable with the greatest mean difference
##      Variable Diff.Adj    M.Threshold
##  APACHE.score   0.0813 Balanced, <0.1
## 
## Sample sizes
##           Control Treated
## All          3551    2184
## Matched      1739    1739
## Unmatched    1812     445

love.plot(match.obj, binary = "std", thresholds = c(m=0.1))

1.2 PSM Result

require(tableone)

## Loading required package: tableone

matched <- match.data(match.obj)
CreateTableOne(vars = c("Y"), data = matched, strata = "A", test = TRUE)

##                Stratified by A
##                 0             1             p      test
##   n              1739          1739                    
##   Y (mean (SD)) 21.22 (25.36) 24.47 (28.79) <0.001

After matching, the significant different between A=0 and A=1 exist for Y (p-value < 0.001).

2 G Computation

2.1 Data Details

# Read the data saved at the last chapter
ObsData <- readRDS(file = "rhcAnalytic.RDS")
dim(ObsData)

## [1] 5735   51

Y: length of stay, outcome variable
A: RHC Status, exposure variable
49 covariates and 5735 obs

Now, we change our outcome variable as follows:

Y(A=1): outcome under exposed; length of stay under RHC
Y(A=0): outcome under unexposed; length of stay under no RHC.

Then the data changed to the following format (the first 6 rows):

require(kableExtra)

## Loading required package: kableExtra

small.data <- ObsData[1:6,c("sex","A","Y")]
small.data$id <- c("John","Emma","Isabella","Sophia","Luke", "Mia")
small.data$Y1 <- ifelse(small.data$A==1, small.data$Y, NA)
small.data$Y0 <- ifelse(small.data$A==0, small.data$Y, NA)
small.data$TE <- small.data$Y1 - small.data$Y0
small.data <- small.data[c("id", "sex","A","Y1","Y0", "TE")]
small.data$Y <- NULL
small.data$sex <- as.character(small.data$sex)
m.Y1 <- mean(small.data$Y1, na.rm = TRUE)
m.Y0 <- mean(small.data$Y0, na.rm = TRUE)
mean.values <- round(c(NA,NA, NA, m.Y1, m.Y0,
                 m.Y1 - m.Y0),0)
small.data2 <- rbind(small.data, mean.values)
kable(small.data2, booktabs = TRUE, digits=1,
             col.names = c("Subject ID","Sex",
                           "RHC status (A)", 
                           "Y when A=1 (RHC)", 
                           "Y when A=0 (no RHC)", 
                           "Treatment Effect"))%>%
  row_spec(7, bold = TRUE, color = "white", 
           background = "#D7261E")

Subject ID	Sex	RHC status (A)	Y when A=1 (RHC)	Y when A=0 (no RHC)	Treatment Effect
John	Male	0	NA	9	NA
Emma	Female	1	45	NA	NA
Isabella	Female	1	60	NA	NA
Sophia	Female	0	NA	37	NA
Luke	Male	1	2	NA	NA
Mia	Female	0	NA	7	NA
NA	NA	NA	36	18	18

Now, treat the problem as a missing value problem. The imputation was done by linear regression.

vars <- names(dplyr::select(ObsData, !c(A,Y)))
formula <- as.formula(paste("Y ~ A+", paste(vars, collapse = "+")))
lm <- lm(formula, data = ObsData)

2.2 G-computation Step

2.2.1 Treatment Effect Estimation

First, we assume all the obs are being treated and we computed the predicted outcome; then we compute the predicted outcome for untreated. Finally the treatment effect was computed (treated - untreated).

ObsData$treated <- round(predict(lm, newdata = data.frame(A=1, dplyr::select(ObsData, !A)), type = "response"),3)
ObsData$untreated <- round(predict(lm, newdata = data.frame(A=0, dplyr::select(ObsData, !A)), type = "response"),3)
ObsData$Treatment_Effect <- ObsData$treated - ObsData$untreated
ObsData[1:6, c("treated", "untreated", "Treatment_Effect")]

##   treated untreated Treatment_Effect
## 1  17.519    14.616            2.903
## 2  28.663    25.761            2.902
## 3  24.578    21.676            2.902
## 4  21.606    18.704            2.902
## 5  13.650    10.748            2.902
## 6  25.471    22.569            2.902

2.2.2 CI

Bootstrapping method should be used. R=250 used as follows:

require(boot)

## Loading required package: boot

gcomp <- function(formula = formula, data = ObsData, indices) {
  boot_sample <- data[indices, ]
  lmfit <- lm(formula, data = boot_sample)
  treated <- predict(lmfit, newdata = data.frame(A=1, dplyr::select(boot_sample, !A)), type = "response")
  untreated <- predict(lmfit, newdata = data.frame(A=0, dplyr::select(boot_sample, !A)), type="response")
  predTE <- mean(treated) - mean(untreated)
  return(predTE)
}

set.seed(123)
res <- boot(statistic = gcomp, R=250,data = ObsData, formula=formula)
plot(res)

CI1 <- boot.ci(res, type = "norm")
CI2 <- boot.ci(res, type="perc")
CI1

## BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
## Based on 250 bootstrap replicates
## 
## CALL : 
## boot.ci(boot.out = res, type = "norm")
## 
## Intervals : 
## Level      Normal        
## 95%   ( 1.315,  4.444 )  
## Calculations and Intervals on Original Scale

CI2

## BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
## Based on 250 bootstrap replicates
## 
## CALL : 
## boot.ci(boot.out = res, type = "perc")
## 
## Intervals : 
## Level     Percentile     
## 95%   ( 1.515,  4.589 )  
## Calculations and Intervals on Original Scale
## Some percentile intervals may be unstable

TE <- mean(ObsData$treated) - mean(ObsData$untreated)
saveRDS(TE, file = "gcomp.RDS")
saveRDS(CI2, file = "gcompci.RDS")

G computation is highly sensitive to model misspecification and when model is not correctly specified, result is subject to bias.

3 Machine Learning methods

3.1 SuperLearner

Superlearner is an ensemble ML technique, that uses CV to find a weighted combination of estimated provided by different candidate learners.

linear regression;
Regularized regression (lasso);
gradient boosting (tree)

XGBoost is a fast version of gradient boosting algorithm.

# Read the data saved at the last chapter
ObsData <- readRDS(file = "rhcAnalytic.RDS")
baselinevars <- names(dplyr::select(ObsData, !A))
out.formula <- as.formula(paste("Y~ A +",
                               paste(baselinevars,
                                     collapse = "+")))

The following listed all the methods could be used. Here we chose “SL.glm”, “SL.glmnet”, “SL.xgboost”.

require(SuperLearner)

## Loading required package: SuperLearner

## Loading required package: nnls

## Loading required package: gam

## Loading required package: splines

## Loading required package: foreach

## Loaded gam 1.20.1

## Super Learner

## Version: 2.0-28

## Package created on 2021-05-04

listWrappers()

## All prediction algorithm wrappers in SuperLearner:

##  [1] "SL.bartMachine"      "SL.bayesglm"         "SL.biglasso"        
##  [4] "SL.caret"            "SL.caret.rpart"      "SL.cforest"         
##  [7] "SL.earth"            "SL.extraTrees"       "SL.gam"             
## [10] "SL.gbm"              "SL.glm"              "SL.glm.interaction" 
## [13] "SL.glmnet"           "SL.ipredbagg"        "SL.kernelKnn"       
## [16] "SL.knn"              "SL.ksvm"             "SL.lda"             
## [19] "SL.leekasso"         "SL.lm"               "SL.loess"           
## [22] "SL.logreg"           "SL.mean"             "SL.nnet"            
## [25] "SL.nnls"             "SL.polymars"         "SL.qda"             
## [28] "SL.randomForest"     "SL.ranger"           "SL.ridge"           
## [31] "SL.rpart"            "SL.rpartPrune"       "SL.speedglm"        
## [34] "SL.speedlm"          "SL.step"             "SL.step.forward"    
## [37] "SL.step.interaction" "SL.stepAIC"          "SL.svm"             
## [40] "SL.template"         "SL.xgboost"

## 
## All screening algorithm wrappers in SuperLearner:

## [1] "All"
## [1] "screen.corP"           "screen.corRank"        "screen.glmnet"        
## [4] "screen.randomForest"   "screen.SIS"            "screen.template"      
## [7] "screen.ttest"          "write.screen.template"

SL.library.chosen=c("SL.glm", "SL.glmnet", "SL.xgboost")

3.1.1 CV

Splits the data by choosing K fold CV:

cvControl.chosen = list(V=3)

3.1.2 Select loss function and estimate risk

loss.chosen = "method.NNLS"

3.1.3 Find SL Prediction

require(SuperLearner)
x <- dplyr::select(ObsData, !Y)
fitsl <- SuperLearner(Y=ObsData$Y, 
                      X=x,
                      cvControl = cvControl.chosen, 
                      SL.library = SL.library.chosen,
                      method = loss.chosen, family = "gaussian")

## Loading required namespace: glmnet

## Loading required namespace: xgboost

all.pred <- predict(fitsl, type = "response")
yhat <- all.pred$library.predict
head(yhat)

##   SL.glm_All SL.glmnet_All SL.xgboost_All
## 1   14.61647      14.56257       8.815431
## 2   28.66305      28.62920      33.660389
## 3   24.57800      24.61116      51.599380
## 4   18.70422      18.95597      21.558762
## 5   13.64956      13.64539       7.508749
## 6   22.56895      22.25339      14.596238

# K fold CV risk estimates
fitsl$cvRisk

##     SL.glm_All  SL.glmnet_All SL.xgboost_All 
##       635.2421       631.5006       713.4396

Here we have two routes: - 1. choose the best method (glmnet above) OR - 2. assign weight for each method (ensamble SL) to fit a meta leanrner

Here we talk about the option 2.

First, we use linear regression (without intercept); then we get the estimated coefficient for each \(\hat{Y}\) below and scale them to 1; the scaled coefficients represents the importance of the corresponding candidate learner.

\(Y_{obs}\) ~ \(\hat{Y}_{SL.glm} + \hat{Y}_{SL.glmnet} + \hat{Y}_{SL.xgboost}\)

fitsl$coef

##     SL.glm_All  SL.glmnet_All SL.xgboost_All 
##     0.09953576     0.74589362     0.15457062

We have the new predicted values as follows:

slen <- t(t(yhat)*fitsl$coef)
head(slen)

##   SL.glm_All SL.glmnet_All SL.xgboost_All
## 1   1.454862      10.86213       1.362607
## 2   2.852998      21.35434       5.202907
## 3   2.446390      18.35731       7.975748
## 4   1.861739      14.13914       3.332351
## 5   1.358619      10.17801       1.160632
## 6   2.246418      16.59866       2.256150

as.matrix(head(rowSums(slen)), ncol = 1)

##       [,1]
## 1 13.67960
## 2 29.41025
## 3 28.77944
## 4 19.33323
## 5 12.69726
## 6 21.10123

3.2 Treatment Effect

x <- dplyr::select(ObsData, !Y)
x$A <- 1
treated <- predict(fitsl, newdata = x, type = "response")$pred
x <- dplyr::select(ObsData, !Y)
x$A <- 0
untreated <- predict(fitsl, newdata = x, type = "response")$pred
mean(treated) - mean(untreated)

## [1] 2.740845

4 IPTW

IPTW uses the propensity score to balance baseline patient characteristics in the treatment group and control groups by weighting each individual by the inverse probability of receiving his/her actual treatment. Weights are calculated for each individual as 1/PS for the treatment group and 1/(1-PS) for the control group.

As such, the treated subjects with a lower probability of exposure (or control subjects with a higher probability of unexposure) receive larger weights and therefore their relative influence on the comparison is increased.

SMD should be applied before and after the IPTW to check the balance. P-values should be avoided when assessing the balance, as they are highly influenced by sample size. If the SMD is still large after the IPTW, other terms such as interaction/transformation/splines should be added. Besides having similar means, continuous variables should also be examined the distribution and variance homogeneious.

For extreme IPTW values, we should check the reasons and may consider them as outliers. After all, patients who have a 100% probability of receiving treatment would not eligible to be randomized to both treatments. Extreme weights could be dealt with through weight stabilization, which can be achieved by replacing the numerator with the crude probability of exposure (i.e. given by the PS model without covariates).

Strength:

IPTW retain most individuals in the analysis, increasing the effective sample size;
IPTW can be applied in categorical or continuous exposures;
Compared with PS stratification, IPTW has been shown to estimate hazard ratios with less bias.

Limitations:

Some simulations showed that IPTW perform no better than multivariable regression;
Caution use of IPTW when N<150 due to underestimation of the variance of effect estimate;
IPTW is sensitive to misspecifications of the PS model, as omission of interaction or function forms of included covariates may induce imbalance groups, biasing the effect estimate. ## Modeling - PS

ps.formula <- as.formula(paste("A~", paste(vars, collapse = "+")))
ps.fit <- glm(ps.formula, family = "binomial", data = ObsData)
require(Publish)

## Loading required package: Publish

## Loading required package: prodlim

publish(ps.fit, format = "[u;l]")

##               Variable               Units OddsRatio        CI.95     p-value 
##       Disease.category                 ARF       Ref                          
##                                        CHF      1.79  [1.32;2.43]   0.0002047 
##                                      Other      0.54  [0.43;0.68]     < 1e-04 
##                                       MOSF      1.58  [1.34;1.87]     < 1e-04 
##                 Cancer                None       Ref                          
##                            Localized (Yes)      0.46  [0.22;0.96]   0.0389310 
##                                 Metastatic      0.37  [0.17;0.81]   0.0131229 
##         Cardiovascular                   0       Ref                          
##                                          1      1.04  [0.86;1.25]   0.7036980 
##          Congestive.HF                   0       Ref                          
##                                          1      1.10  [0.90;1.35]   0.3461245 
##               Dementia                   0       Ref                          
##                                          1      0.67  [0.53;0.85]   0.0011382 
##            Psychiatric                   0       Ref                          
##                                          1      0.65  [0.50;0.85]   0.0018616 
##              Pulmonary                   0       Ref                          
##                                          1      0.97  [0.81;1.18]   0.7814947 
##                  Renal                   0       Ref                          
##                                          1      0.70  [0.49;1.00]   0.0523978 
##                Hepatic                   0       Ref                          
##                                          1      0.79  [0.57;1.11]   0.1817681 
##               GI.Bleed                   0       Ref                          
##                                          1      0.76  [0.49;1.17]   0.2148665 
##                  Tumor                   0       Ref                          
##                                          1      1.48  [0.71;3.12]   0.2987694 
##      Immunosupperssion                   0       Ref                          
##                                          1      1.00  [0.86;1.15]   0.9778387 
##            Transfer.hx                   0       Ref                          
##                                          1      1.46  [1.20;1.77]   0.0001356 
##                     MI                   0       Ref                          
##                                          1      1.12  [0.80;1.59]   0.5031463 
##                    age           [-Inf,50)       Ref                          
##                                    [50,60)      1.04  [0.85;1.27]   0.7327200 
##                                    [60,70)      1.22  [1.00;1.49]   0.0559205 
##                                    [70,80)      1.15  [0.91;1.45]   0.2414163 
##                                  [80, Inf)      0.66  [0.49;0.89]   0.0054612 
##                    sex                Male       Ref                          
##                                     Female      0.98  [0.86;1.12]   0.7661356 
##                    edu                          1.03  [1.01;1.05]   0.0101741 
##               DASIndex                          1.00  [0.98;1.01]   0.6635060 
##           APACHE.score                          1.01  [1.01;1.02]     < 1e-04 
##     Glasgow.Coma.Score                          1.00  [1.00;1.00]   0.2853053 
##         blood.pressure                          0.99  [0.99;0.99]     < 1e-04 
##                    WBC                          1.00  [0.99;1.00]   0.7368619 
##             Heart.rate                          1.00  [1.00;1.01]     < 1e-04 
##       Respiratory.rate                          0.98  [0.97;0.98]     < 1e-04 
##            Temperature                          0.97  [0.93;1.01]   0.1255016 
##            PaO2vs.FIO2                          0.99  [0.99;1.00]     < 1e-04 
##                Albumin                          0.93  [0.85;1.02]   0.1032557 
##             Hematocrit                          0.99  [0.98;1.00]   0.0103236 
##              Bilirubin                          1.01  [1.00;1.02]   0.1719966 
##             Creatinine                          1.04  [1.00;1.09]   0.0576310 
##                 Sodium                          0.99  [0.98;1.00]   0.0049192 
##              Potassium                          0.85  [0.79;0.91]     < 1e-04 
##                  PaCo2                          0.98  [0.97;0.98]     < 1e-04 
##                     PH                          0.22  [0.11;0.47]     < 1e-04 
##                 Weight                          1.01  [1.00;1.01]     < 1e-04 
##             DNR.status                  No       Ref                          
##                                        Yes      0.58  [0.46;0.73]     < 1e-04 
##      Medical.insurance            Medicaid       Ref                          
##                                   Medicare      1.34  [1.03;1.74]   0.0315228 
##                        Medicare & Medicaid      1.49  [1.07;2.07]   0.0184712 
##                               No insurance      1.66  [1.20;2.30]   0.0023684 
##                                    Private      1.55  [1.21;1.97]   0.0004402 
##                         Private & Medicare      1.46  [1.11;1.92]   0.0071028 
##       Respiratory.Diag                  No       Ref                          
##                                        Yes      0.76  [0.65;0.90]   0.0010144 
##    Cardiovascular.Diag                  No       Ref                          
##                                        Yes      1.80  [1.52;2.13]     < 1e-04 
##      Neurological.Diag                  No       Ref                          
##                                        Yes      0.62  [0.47;0.80]   0.0002731 
##  Gastrointestinal.Diag                  No       Ref                          
##                                        Yes      1.41  [1.15;1.73]   0.0010690 
##             Renal.Diag                  No       Ref                          
##                                        Yes      1.35  [1.01;1.80]   0.0461405 
##         Metabolic.Diag                  No       Ref                          
##                                        Yes      0.85  [0.63;1.15]   0.2953120 
##       Hematologic.Diag                  No       Ref                          
##                                        Yes      0.59  [0.45;0.78]   0.0002225 
##            Sepsis.Diag                  No       Ref                          
##                                        Yes      1.31  [1.10;1.57]   0.0029968 
##            Trauma.Diag                  No       Ref                          
##                                        Yes      3.45  [1.80;6.64]   0.0002014 
##        Orthopedic.Diag                  No       Ref                          
##                                        Yes      3.74 [0.53;26.25]   0.1843027 
##                   race               white       Ref                          
##                                      black      1.05  [0.87;1.26]   0.6250189 
##                                      other      1.09  [0.84;1.41]   0.5272282 
##                 income              > $50k       Ref                          
##                                   $11-$25k      0.98  [0.75;1.28]   0.8810245 
##                                   $25-$50k      1.06  [0.81;1.39]   0.6676461 
##                                 Under $11k      1.04  [0.80;1.36]   0.7673002

Notice: check the multicollinearity - SE too high?

Getting the PS:

ObsData$PS <- predict(ps.fit, type = "response")

Check if the PS values very close to 0 and 1?

summary(ObsData$PS)

##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## 0.002478 0.161446 0.358300 0.380819 0.574319 0.968425

tapply(ObsData$PS, ObsData$A, summary)

## $`0`
##     Min.  1st Qu.   Median     Mean  3rd Qu.     Max. 
## 0.002478 0.106718 0.241301 0.283816 0.427138 0.951927 
## 
## $`1`
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## 0.01575 0.37205 0.55243 0.53854 0.70999 0.96842

plot(density(ObsData$PS[ObsData$A == 0]), col = "red", main = "")
lines(density(ObsData$PS[ObsData$A == 1]), col = "blue", lty = 2)
legend("topright", c("No RHC, RHC"), col = c("red", "blue"), lty=1:2)

4.1 Convert to IPTW

IPTW = A/PS + (1-A)/(1-PS)

ObsData$IPW <- ObsData$A/ObsData$PS + (1-ObsData$A)/ (1- ObsData$PS)
summary(ObsData$IPW)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.002   1.183   1.472   1.986   2.064  63.509

require(WeightIt)

## Loading required package: WeightIt

W.out <- weightit(ps.formula, 
                    data = ObsData, 
                    estimand = "ATE",
                    method = "ps")
summary(W.out$weights)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.002   1.183   1.472   1.986   2.064  63.509

4.2 Balance Checking

SMD=0.1 as the threshold for balance and SMD>0.1 means no balance.

require(cobalt)
bal.tab(W.out, un=TRUE, threshold =c(m=0.1))

## Call
##  weightit(formula = ps.formula, data = ObsData, method = "ps", 
##     estimand = "ATE")
## 
## Balance Measures
##                                           Type Diff.Un Diff.Adj    M.Threshold
## prop.score                            Distance  1.1926   0.0224 Balanced, <0.1
## Disease.category_ARF                    Binary -0.0290   0.0025 Balanced, <0.1
## Disease.category_CHF                    Binary  0.0261   0.0008 Balanced, <0.1
## Disease.category_Other                  Binary -0.1737  -0.0080 Balanced, <0.1
## Disease.category_MOSF                   Binary  0.1766   0.0047 Balanced, <0.1
## Cancer_None                             Binary  0.0439   0.0017 Balanced, <0.1
## Cancer_Localized (Yes)                  Binary -0.0267   0.0017 Balanced, <0.1
## Cancer_Metastatic                       Binary -0.0172  -0.0034 Balanced, <0.1
## Cardiovascular                          Binary  0.0445   0.0051 Balanced, <0.1
## Congestive.HF                           Binary  0.0268   0.0013 Balanced, <0.1
## Dementia                                Binary -0.0472  -0.0138 Balanced, <0.1
## Psychiatric                             Binary -0.0348  -0.0050 Balanced, <0.1
## Pulmonary                               Binary -0.0737  -0.0058 Balanced, <0.1
## Renal                                   Binary  0.0066   0.0027 Balanced, <0.1
## Hepatic                                 Binary -0.0124  -0.0012 Balanced, <0.1
## GI.Bleed                                Binary -0.0122  -0.0026 Balanced, <0.1
## Tumor                                   Binary -0.0423  -0.0016 Balanced, <0.1
## Immunosupperssion                       Binary  0.0358  -0.0027 Balanced, <0.1
## Transfer.hx                             Binary  0.0554   0.0047 Balanced, <0.1
## MI                                      Binary  0.0139   0.0005 Balanced, <0.1
## age_[-Inf,50)                           Binary -0.0017  -0.0098 Balanced, <0.1
## age_[50,60)                             Binary  0.0161   0.0149 Balanced, <0.1
## age_[60,70)                             Binary  0.0355  -0.0108 Balanced, <0.1
## age_[70,80)                             Binary  0.0144   0.0047 Balanced, <0.1
## age_[80, Inf)                           Binary -0.0643   0.0010 Balanced, <0.1
## sex_Female                              Binary -0.0462  -0.0143 Balanced, <0.1
## edu                                    Contin.  0.0914  -0.0000 Balanced, <0.1
## DASIndex                               Contin.  0.0626   0.0435 Balanced, <0.1
## APACHE.score                           Contin.  0.5014   0.0109 Balanced, <0.1
## Glasgow.Coma.Score                     Contin. -0.1098   0.0034 Balanced, <0.1
## blood.pressure                         Contin. -0.4551   0.0057 Balanced, <0.1
## WBC                                    Contin.  0.0836   0.0470 Balanced, <0.1
## Heart.rate                             Contin.  0.1469   0.0210 Balanced, <0.1
## Respiratory.rate                       Contin. -0.1655   0.0037 Balanced, <0.1
## Temperature                            Contin. -0.0214   0.0090 Balanced, <0.1
## PaO2vs.FIO2                            Contin. -0.4332  -0.0016 Balanced, <0.1
## Albumin                                Contin. -0.2299  -0.0279 Balanced, <0.1
## Hematocrit                             Contin. -0.2693  -0.0247 Balanced, <0.1
## Bilirubin                              Contin.  0.1446  -0.0069 Balanced, <0.1
## Creatinine                             Contin.  0.2696   0.0148 Balanced, <0.1
## Sodium                                 Contin. -0.0922  -0.0059 Balanced, <0.1
## Potassium                              Contin. -0.0271  -0.0264 Balanced, <0.1
## PaCo2                                  Contin. -0.2486  -0.0201 Balanced, <0.1
## PH                                     Contin. -0.1198   0.0095 Balanced, <0.1
## Weight                                 Contin.  0.2557   0.0209 Balanced, <0.1
## DNR.status_Yes                          Binary -0.0696  -0.0112 Balanced, <0.1
## Medical.insurance_Medicaid              Binary -0.0395   0.0058 Balanced, <0.1
## Medical.insurance_Medicare              Binary -0.0327  -0.0119 Balanced, <0.1
## Medical.insurance_Medicare & Medicaid   Binary -0.0144  -0.0001 Balanced, <0.1
## Medical.insurance_No insurance          Binary  0.0099  -0.0002 Balanced, <0.1
## Medical.insurance_Private               Binary  0.0624   0.0013 Balanced, <0.1
## Medical.insurance_Private & Medicare    Binary  0.0143   0.0052 Balanced, <0.1
## Respiratory.Diag_Yes                    Binary -0.1277  -0.0056 Balanced, <0.1
## Cardiovascular.Diag_Yes                 Binary  0.1395   0.0034 Balanced, <0.1
## Neurological.Diag_Yes                   Binary -0.1079  -0.0038 Balanced, <0.1
## Gastrointestinal.Diag_Yes               Binary  0.0453  -0.0028 Balanced, <0.1
## Renal.Diag_Yes                          Binary  0.0264   0.0021 Balanced, <0.1
## Metabolic.Diag_Yes                      Binary -0.0059   0.0002 Balanced, <0.1
## Hematologic.Diag_Yes                    Binary -0.0146  -0.0000 Balanced, <0.1
## Sepsis.Diag_Yes                         Binary  0.0912   0.0035 Balanced, <0.1
## Trauma.Diag_Yes                         Binary  0.0105   0.0011 Balanced, <0.1
## Orthopedic.Diag_Yes                     Binary  0.0010   0.0002 Balanced, <0.1
## race_white                              Binary  0.0063  -0.0030 Balanced, <0.1
## race_black                              Binary -0.0114   0.0067 Balanced, <0.1
## race_other                              Binary  0.0050  -0.0036 Balanced, <0.1
## income_> $50k                           Binary  0.0165  -0.0001 Balanced, <0.1
## income_$11-$25k                         Binary  0.0062  -0.0096 Balanced, <0.1
## income_$25-$50k                         Binary  0.0391   0.0032 Balanced, <0.1
## income_Under $11k                       Binary -0.0618   0.0065 Balanced, <0.1
## 
## Balance tally for mean differences
##                    count
## Balanced, <0.1        69
## Not Balanced, >0.1     0
## 
## Variable with the greatest mean difference
##  Variable Diff.Adj    M.Threshold
##       WBC    0.047 Balanced, <0.1
## 
## Effective sample sizes
##            Control Treated
## Unadjusted 3551.   2184.  
## Adjusted   2532.46 1039.44

love.plot(W.out, binart = "std", thresholds = c(m=0.1), abs = TRUE, var.order = "adjusted",
          line = FALSE)

4.3 Outcome modeling

outfit <- glm(Y ~A, data = ObsData, weights = IPW)
Publish::publish(outfit)

##     Variable Units Coefficient         CI.95 p-value 
##  (Intercept)             20.68 [19.73;21.64] < 1e-04 
##            A              3.24   [1.88;4.60] < 1e-04

saveRDS(outfit, file = "ipw.RDS")

5 References

https://ehsanx.github.io/TMLEworkshop/iptw.html#ref-austin2011tutorial

chrome-extension://efaidnbmnnnibpcajpcglclefindmkaj/viewer.html?pdfurl=https%3A%2F%2Fwww.ncbi.nlm.nih.gov%2Fpmc%2Farticles%2FPMC3266945%2Fpdf%2Fhmbr46_119.pdf&clen=1588375&chunk=true&pdffilename=hmbr46_119.pdf

IPTW Real Data Study

Yi Huang

23 March, 2022