set.seed(111)
baselinevars <- names(dplyr::select(ObsData,
!c(A,Y)))
ps.formula <- as.formula(paste("A~", paste(baselinevars, collapse = "+")))
ps.fit <- glm(ps.formula, family = "binomial", data = ObsData)
ObsData$PS <- predict(ps.fit, newdata = ObsData, type = "response")
p <- ObsData$PS
logitPS <- log(p/(1-p))
match.obj <- matchit(ps.formula, data = ObsData, distance = ObsData$PS, method = "nearest", replace = FALSE, ratio = 1, caliper = 0.2*sd(logitPS))require(cobalt)## Loading required package: cobalt
## cobalt (Version 4.3.2, Build Date: 2022-01-19)
##
## Attaching package: 'cobalt'
## The following object is masked from 'package:MatchIt':
##
## lalonde
bal.plot(match.obj, var.name = "distance", which = "both", type = "histogram", mirror = TRUE)bal.tab(match.obj, un = TRUE, thresholds = c(m=0.1))## Call
## matchit(formula = ps.formula, data = ObsData, method = "nearest",
## distance = ObsData$PS, replace = FALSE, caliper = 0.2 * sd(logitPS),
## ratio = 1)
##
## Balance Measures
## Type Diff.Un Diff.Adj M.Threshold
## distance Distance 1.1558 0.1820
## Disease.category_ARF Binary -0.0290 -0.0178 Balanced, <0.1
## Disease.category_CHF Binary 0.0261 -0.0006 Balanced, <0.1
## Disease.category_Other Binary -0.1737 -0.0092 Balanced, <0.1
## Disease.category_MOSF Binary 0.1766 0.0276 Balanced, <0.1
## Cancer_None Binary 0.0439 0.0075 Balanced, <0.1
## Cancer_Localized (Yes) Binary -0.0267 -0.0109 Balanced, <0.1
## Cancer_Metastatic Binary -0.0172 0.0035 Balanced, <0.1
## Cardiovascular Binary 0.0445 -0.0104 Balanced, <0.1
## Congestive.HF Binary 0.0268 0.0012 Balanced, <0.1
## Dementia Binary -0.0472 -0.0023 Balanced, <0.1
## Psychiatric Binary -0.0348 -0.0081 Balanced, <0.1
## Pulmonary Binary -0.0737 -0.0138 Balanced, <0.1
## Renal Binary 0.0066 -0.0058 Balanced, <0.1
## Hepatic Binary -0.0124 -0.0023 Balanced, <0.1
## GI.Bleed Binary -0.0122 -0.0006 Balanced, <0.1
## Tumor Binary -0.0423 -0.0052 Balanced, <0.1
## Immunosupperssion Binary 0.0358 -0.0046 Balanced, <0.1
## Transfer.hx Binary 0.0554 0.0115 Balanced, <0.1
## MI Binary 0.0139 -0.0012 Balanced, <0.1
## age_[-Inf,50) Binary -0.0017 0.0063 Balanced, <0.1
## age_[50,60) Binary 0.0161 0.0104 Balanced, <0.1
## age_[60,70) Binary 0.0355 0.0006 Balanced, <0.1
## age_[70,80) Binary 0.0144 -0.0132 Balanced, <0.1
## age_[80, Inf) Binary -0.0643 -0.0040 Balanced, <0.1
## sex_Female Binary -0.0462 -0.0092 Balanced, <0.1
## edu Contin. 0.0910 0.0293 Balanced, <0.1
## DASIndex Contin. 0.0654 0.0263 Balanced, <0.1
## APACHE.score Contin. 0.4837 0.0813 Balanced, <0.1
## Glasgow.Coma.Score Contin. -0.1160 -0.0147 Balanced, <0.1
## blood.pressure Contin. -0.4869 -0.0680 Balanced, <0.1
## WBC Contin. 0.0799 -0.0096 Balanced, <0.1
## Heart.rate Contin. 0.1460 -0.0005 Balanced, <0.1
## Respiratory.rate Contin. -0.1641 -0.0361 Balanced, <0.1
## Temperature Contin. -0.0209 -0.0219 Balanced, <0.1
## PaO2vs.FIO2 Contin. -0.4566 -0.0560 Balanced, <0.1
## Albumin Contin. -0.2010 -0.0281 Balanced, <0.1
## Hematocrit Contin. -0.2954 -0.0293 Balanced, <0.1
## Bilirubin Contin. 0.1329 0.0319 Balanced, <0.1
## Creatinine Contin. 0.2678 0.0339 Balanced, <0.1
## Sodium Contin. -0.0927 -0.0218 Balanced, <0.1
## Potassium Contin. -0.0274 0.0064 Balanced, <0.1
## PaCo2 Contin. -0.2880 -0.0456 Balanced, <0.1
## PH Contin. -0.1163 -0.0228 Balanced, <0.1
## Weight Contin. 0.2640 0.0241 Balanced, <0.1
## DNR.status_Yes Binary -0.0696 0.0006 Balanced, <0.1
## Medical.insurance_Medicaid Binary -0.0395 -0.0035 Balanced, <0.1
## Medical.insurance_Medicare Binary -0.0327 -0.0075 Balanced, <0.1
## Medical.insurance_Medicare & Medicaid Binary -0.0144 -0.0058 Balanced, <0.1
## Medical.insurance_No insurance Binary 0.0099 0.0046 Balanced, <0.1
## Medical.insurance_Private Binary 0.0624 0.0259 Balanced, <0.1
## Medical.insurance_Private & Medicare Binary 0.0143 -0.0138 Balanced, <0.1
## Respiratory.Diag_Yes Binary -0.1277 -0.0299 Balanced, <0.1
## Cardiovascular.Diag_Yes Binary 0.1395 0.0236 Balanced, <0.1
## Neurological.Diag_Yes Binary -0.1079 -0.0098 Balanced, <0.1
## Gastrointestinal.Diag_Yes Binary 0.0453 0.0052 Balanced, <0.1
## Renal.Diag_Yes Binary 0.0264 0.0040 Balanced, <0.1
## Metabolic.Diag_Yes Binary -0.0059 0.0017 Balanced, <0.1
## Hematologic.Diag_Yes Binary -0.0146 -0.0035 Balanced, <0.1
## Sepsis.Diag_Yes Binary 0.0912 0.0138 Balanced, <0.1
## Trauma.Diag_Yes Binary 0.0105 0.0017 Balanced, <0.1
## Orthopedic.Diag_Yes Binary 0.0010 0.0012 Balanced, <0.1
## race_white Binary 0.0063 0.0069 Balanced, <0.1
## race_black Binary -0.0114 -0.0081 Balanced, <0.1
## race_other Binary 0.0050 0.0012 Balanced, <0.1
## income_> $50k Binary 0.0165 0.0086 Balanced, <0.1
## income_$11-$25k Binary 0.0062 -0.0104 Balanced, <0.1
## income_$25-$50k Binary 0.0391 0.0173 Balanced, <0.1
## income_Under $11k Binary -0.0618 -0.0155 Balanced, <0.1
##
## Balance tally for mean differences
## count
## Balanced, <0.1 68
## Not Balanced, >0.1 0
##
## Variable with the greatest mean difference
## Variable Diff.Adj M.Threshold
## APACHE.score 0.0813 Balanced, <0.1
##
## Sample sizes
## Control Treated
## All 3551 2184
## Matched 1739 1739
## Unmatched 1812 445
love.plot(match.obj, binary = "std", thresholds = c(m=0.1))require(tableone)## Loading required package: tableone
matched <- match.data(match.obj)
CreateTableOne(vars = c("Y"), data = matched, strata = "A", test = TRUE)## Stratified by A
## 0 1 p test
## n 1739 1739
## Y (mean (SD)) 21.22 (25.36) 24.47 (28.79) <0.001
After matching, the significant different between A=0 and A=1 exist for Y (p-value < 0.001).
# Read the data saved at the last chapter
ObsData <- readRDS(file = "rhcAnalytic.RDS")
dim(ObsData)## [1] 5735 51
Now, we change our outcome variable as follows:
Then the data changed to the following format (the first 6 rows):
require(kableExtra)## Loading required package: kableExtra
small.data <- ObsData[1:6,c("sex","A","Y")]
small.data$id <- c("John","Emma","Isabella","Sophia","Luke", "Mia")
small.data$Y1 <- ifelse(small.data$A==1, small.data$Y, NA)
small.data$Y0 <- ifelse(small.data$A==0, small.data$Y, NA)
small.data$TE <- small.data$Y1 - small.data$Y0
small.data <- small.data[c("id", "sex","A","Y1","Y0", "TE")]
small.data$Y <- NULL
small.data$sex <- as.character(small.data$sex)
m.Y1 <- mean(small.data$Y1, na.rm = TRUE)
m.Y0 <- mean(small.data$Y0, na.rm = TRUE)
mean.values <- round(c(NA,NA, NA, m.Y1, m.Y0,
m.Y1 - m.Y0),0)
small.data2 <- rbind(small.data, mean.values)
kable(small.data2, booktabs = TRUE, digits=1,
col.names = c("Subject ID","Sex",
"RHC status (A)",
"Y when A=1 (RHC)",
"Y when A=0 (no RHC)",
"Treatment Effect"))%>%
row_spec(7, bold = TRUE, color = "white",
background = "#D7261E")| Subject ID | Sex | RHC status (A) | Y when A=1 (RHC) | Y when A=0 (no RHC) | Treatment Effect |
|---|---|---|---|---|---|
| John | Male | 0 | NA | 9 | NA |
| Emma | Female | 1 | 45 | NA | NA |
| Isabella | Female | 1 | 60 | NA | NA |
| Sophia | Female | 0 | NA | 37 | NA |
| Luke | Male | 1 | 2 | NA | NA |
| Mia | Female | 0 | NA | 7 | NA |
| NA | NA | NA | 36 | 18 | 18 |
Now, treat the problem as a missing value problem. The imputation was done by linear regression.
vars <- names(dplyr::select(ObsData, !c(A,Y)))
formula <- as.formula(paste("Y ~ A+", paste(vars, collapse = "+")))
lm <- lm(formula, data = ObsData)First, we assume all the obs are being treated and we computed the predicted outcome; then we compute the predicted outcome for untreated. Finally the treatment effect was computed (treated - untreated).
ObsData$treated <- round(predict(lm, newdata = data.frame(A=1, dplyr::select(ObsData, !A)), type = "response"),3)
ObsData$untreated <- round(predict(lm, newdata = data.frame(A=0, dplyr::select(ObsData, !A)), type = "response"),3)
ObsData$Treatment_Effect <- ObsData$treated - ObsData$untreated
ObsData[1:6, c("treated", "untreated", "Treatment_Effect")]## treated untreated Treatment_Effect
## 1 17.519 14.616 2.903
## 2 28.663 25.761 2.902
## 3 24.578 21.676 2.902
## 4 21.606 18.704 2.902
## 5 13.650 10.748 2.902
## 6 25.471 22.569 2.902
Bootstrapping method should be used. R=250 used as follows:
require(boot)## Loading required package: boot
gcomp <- function(formula = formula, data = ObsData, indices) {
boot_sample <- data[indices, ]
lmfit <- lm(formula, data = boot_sample)
treated <- predict(lmfit, newdata = data.frame(A=1, dplyr::select(boot_sample, !A)), type = "response")
untreated <- predict(lmfit, newdata = data.frame(A=0, dplyr::select(boot_sample, !A)), type="response")
predTE <- mean(treated) - mean(untreated)
return(predTE)
}
set.seed(123)
res <- boot(statistic = gcomp, R=250,data = ObsData, formula=formula)
plot(res)CI1 <- boot.ci(res, type = "norm")
CI2 <- boot.ci(res, type="perc")
CI1## BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
## Based on 250 bootstrap replicates
##
## CALL :
## boot.ci(boot.out = res, type = "norm")
##
## Intervals :
## Level Normal
## 95% ( 1.315, 4.444 )
## Calculations and Intervals on Original Scale
CI2## BOOTSTRAP CONFIDENCE INTERVAL CALCULATIONS
## Based on 250 bootstrap replicates
##
## CALL :
## boot.ci(boot.out = res, type = "perc")
##
## Intervals :
## Level Percentile
## 95% ( 1.515, 4.589 )
## Calculations and Intervals on Original Scale
## Some percentile intervals may be unstable
TE <- mean(ObsData$treated) - mean(ObsData$untreated)
saveRDS(TE, file = "gcomp.RDS")
saveRDS(CI2, file = "gcompci.RDS")G computation is highly sensitive to model misspecification and when model is not correctly specified, result is subject to bias.
Superlearner is an ensemble ML technique, that uses CV to find a weighted combination of estimated provided by different candidate learners.
XGBoost is a fast version of gradient boosting algorithm.
# Read the data saved at the last chapter
ObsData <- readRDS(file = "rhcAnalytic.RDS")
baselinevars <- names(dplyr::select(ObsData, !A))
out.formula <- as.formula(paste("Y~ A +",
paste(baselinevars,
collapse = "+")))The following listed all the methods could be used. Here we chose “SL.glm”, “SL.glmnet”, “SL.xgboost”.
require(SuperLearner)## Loading required package: SuperLearner
## Loading required package: nnls
## Loading required package: gam
## Loading required package: splines
## Loading required package: foreach
## Loaded gam 1.20.1
## Super Learner
## Version: 2.0-28
## Package created on 2021-05-04
listWrappers()## All prediction algorithm wrappers in SuperLearner:
## [1] "SL.bartMachine" "SL.bayesglm" "SL.biglasso"
## [4] "SL.caret" "SL.caret.rpart" "SL.cforest"
## [7] "SL.earth" "SL.extraTrees" "SL.gam"
## [10] "SL.gbm" "SL.glm" "SL.glm.interaction"
## [13] "SL.glmnet" "SL.ipredbagg" "SL.kernelKnn"
## [16] "SL.knn" "SL.ksvm" "SL.lda"
## [19] "SL.leekasso" "SL.lm" "SL.loess"
## [22] "SL.logreg" "SL.mean" "SL.nnet"
## [25] "SL.nnls" "SL.polymars" "SL.qda"
## [28] "SL.randomForest" "SL.ranger" "SL.ridge"
## [31] "SL.rpart" "SL.rpartPrune" "SL.speedglm"
## [34] "SL.speedlm" "SL.step" "SL.step.forward"
## [37] "SL.step.interaction" "SL.stepAIC" "SL.svm"
## [40] "SL.template" "SL.xgboost"
##
## All screening algorithm wrappers in SuperLearner:
## [1] "All"
## [1] "screen.corP" "screen.corRank" "screen.glmnet"
## [4] "screen.randomForest" "screen.SIS" "screen.template"
## [7] "screen.ttest" "write.screen.template"
SL.library.chosen=c("SL.glm", "SL.glmnet", "SL.xgboost")Splits the data by choosing K fold CV:
cvControl.chosen = list(V=3)loss.chosen = "method.NNLS"require(SuperLearner)
x <- dplyr::select(ObsData, !Y)
fitsl <- SuperLearner(Y=ObsData$Y,
X=x,
cvControl = cvControl.chosen,
SL.library = SL.library.chosen,
method = loss.chosen, family = "gaussian")## Loading required namespace: glmnet
## Loading required namespace: xgboost
all.pred <- predict(fitsl, type = "response")
yhat <- all.pred$library.predict
head(yhat)## SL.glm_All SL.glmnet_All SL.xgboost_All
## 1 14.61647 14.56257 8.815431
## 2 28.66305 28.62920 33.660389
## 3 24.57800 24.61116 51.599380
## 4 18.70422 18.95597 21.558762
## 5 13.64956 13.64539 7.508749
## 6 22.56895 22.25339 14.596238
# K fold CV risk estimates
fitsl$cvRisk## SL.glm_All SL.glmnet_All SL.xgboost_All
## 635.2421 631.5006 713.4396
Here we have two routes: - 1. choose the best method (glmnet above) OR - 2. assign weight for each method (ensamble SL) to fit a meta leanrner
Here we talk about the option 2.
First, we use linear regression (without intercept); then we get the estimated coefficient for each \(\hat{Y}\) below and scale them to 1; the scaled coefficients represents the importance of the corresponding candidate learner.
\(Y_{obs}\) ~ \(\hat{Y}_{SL.glm} + \hat{Y}_{SL.glmnet} + \hat{Y}_{SL.xgboost}\)
fitsl$coef## SL.glm_All SL.glmnet_All SL.xgboost_All
## 0.09953576 0.74589362 0.15457062
We have the new predicted values as follows:
slen <- t(t(yhat)*fitsl$coef)
head(slen)## SL.glm_All SL.glmnet_All SL.xgboost_All
## 1 1.454862 10.86213 1.362607
## 2 2.852998 21.35434 5.202907
## 3 2.446390 18.35731 7.975748
## 4 1.861739 14.13914 3.332351
## 5 1.358619 10.17801 1.160632
## 6 2.246418 16.59866 2.256150
as.matrix(head(rowSums(slen)), ncol = 1)## [,1]
## 1 13.67960
## 2 29.41025
## 3 28.77944
## 4 19.33323
## 5 12.69726
## 6 21.10123
x <- dplyr::select(ObsData, !Y)
x$A <- 1
treated <- predict(fitsl, newdata = x, type = "response")$pred
x <- dplyr::select(ObsData, !Y)
x$A <- 0
untreated <- predict(fitsl, newdata = x, type = "response")$pred
mean(treated) - mean(untreated)## [1] 2.740845
IPTW uses the propensity score to balance baseline patient characteristics in the treatment group and control groups by weighting each individual by the inverse probability of receiving his/her actual treatment. Weights are calculated for each individual as 1/PS for the treatment group and 1/(1-PS) for the control group.
As such, the treated subjects with a lower probability of exposure (or control subjects with a higher probability of unexposure) receive larger weights and therefore their relative influence on the comparison is increased.
SMD should be applied before and after the IPTW to check the balance. P-values should be avoided when assessing the balance, as they are highly influenced by sample size. If the SMD is still large after the IPTW, other terms such as interaction/transformation/splines should be added. Besides having similar means, continuous variables should also be examined the distribution and variance homogeneious.
For extreme IPTW values, we should check the reasons and may consider them as outliers. After all, patients who have a 100% probability of receiving treatment would not eligible to be randomized to both treatments. Extreme weights could be dealt with through weight stabilization, which can be achieved by replacing the numerator with the crude probability of exposure (i.e. given by the PS model without covariates).
Strength:
Limitations:
ps.formula <- as.formula(paste("A~", paste(vars, collapse = "+")))
ps.fit <- glm(ps.formula, family = "binomial", data = ObsData)
require(Publish)## Loading required package: Publish
## Loading required package: prodlim
publish(ps.fit, format = "[u;l]")## Variable Units OddsRatio CI.95 p-value
## Disease.category ARF Ref
## CHF 1.79 [1.32;2.43] 0.0002047
## Other 0.54 [0.43;0.68] < 1e-04
## MOSF 1.58 [1.34;1.87] < 1e-04
## Cancer None Ref
## Localized (Yes) 0.46 [0.22;0.96] 0.0389310
## Metastatic 0.37 [0.17;0.81] 0.0131229
## Cardiovascular 0 Ref
## 1 1.04 [0.86;1.25] 0.7036980
## Congestive.HF 0 Ref
## 1 1.10 [0.90;1.35] 0.3461245
## Dementia 0 Ref
## 1 0.67 [0.53;0.85] 0.0011382
## Psychiatric 0 Ref
## 1 0.65 [0.50;0.85] 0.0018616
## Pulmonary 0 Ref
## 1 0.97 [0.81;1.18] 0.7814947
## Renal 0 Ref
## 1 0.70 [0.49;1.00] 0.0523978
## Hepatic 0 Ref
## 1 0.79 [0.57;1.11] 0.1817681
## GI.Bleed 0 Ref
## 1 0.76 [0.49;1.17] 0.2148665
## Tumor 0 Ref
## 1 1.48 [0.71;3.12] 0.2987694
## Immunosupperssion 0 Ref
## 1 1.00 [0.86;1.15] 0.9778387
## Transfer.hx 0 Ref
## 1 1.46 [1.20;1.77] 0.0001356
## MI 0 Ref
## 1 1.12 [0.80;1.59] 0.5031463
## age [-Inf,50) Ref
## [50,60) 1.04 [0.85;1.27] 0.7327200
## [60,70) 1.22 [1.00;1.49] 0.0559205
## [70,80) 1.15 [0.91;1.45] 0.2414163
## [80, Inf) 0.66 [0.49;0.89] 0.0054612
## sex Male Ref
## Female 0.98 [0.86;1.12] 0.7661356
## edu 1.03 [1.01;1.05] 0.0101741
## DASIndex 1.00 [0.98;1.01] 0.6635060
## APACHE.score 1.01 [1.01;1.02] < 1e-04
## Glasgow.Coma.Score 1.00 [1.00;1.00] 0.2853053
## blood.pressure 0.99 [0.99;0.99] < 1e-04
## WBC 1.00 [0.99;1.00] 0.7368619
## Heart.rate 1.00 [1.00;1.01] < 1e-04
## Respiratory.rate 0.98 [0.97;0.98] < 1e-04
## Temperature 0.97 [0.93;1.01] 0.1255016
## PaO2vs.FIO2 0.99 [0.99;1.00] < 1e-04
## Albumin 0.93 [0.85;1.02] 0.1032557
## Hematocrit 0.99 [0.98;1.00] 0.0103236
## Bilirubin 1.01 [1.00;1.02] 0.1719966
## Creatinine 1.04 [1.00;1.09] 0.0576310
## Sodium 0.99 [0.98;1.00] 0.0049192
## Potassium 0.85 [0.79;0.91] < 1e-04
## PaCo2 0.98 [0.97;0.98] < 1e-04
## PH 0.22 [0.11;0.47] < 1e-04
## Weight 1.01 [1.00;1.01] < 1e-04
## DNR.status No Ref
## Yes 0.58 [0.46;0.73] < 1e-04
## Medical.insurance Medicaid Ref
## Medicare 1.34 [1.03;1.74] 0.0315228
## Medicare & Medicaid 1.49 [1.07;2.07] 0.0184712
## No insurance 1.66 [1.20;2.30] 0.0023684
## Private 1.55 [1.21;1.97] 0.0004402
## Private & Medicare 1.46 [1.11;1.92] 0.0071028
## Respiratory.Diag No Ref
## Yes 0.76 [0.65;0.90] 0.0010144
## Cardiovascular.Diag No Ref
## Yes 1.80 [1.52;2.13] < 1e-04
## Neurological.Diag No Ref
## Yes 0.62 [0.47;0.80] 0.0002731
## Gastrointestinal.Diag No Ref
## Yes 1.41 [1.15;1.73] 0.0010690
## Renal.Diag No Ref
## Yes 1.35 [1.01;1.80] 0.0461405
## Metabolic.Diag No Ref
## Yes 0.85 [0.63;1.15] 0.2953120
## Hematologic.Diag No Ref
## Yes 0.59 [0.45;0.78] 0.0002225
## Sepsis.Diag No Ref
## Yes 1.31 [1.10;1.57] 0.0029968
## Trauma.Diag No Ref
## Yes 3.45 [1.80;6.64] 0.0002014
## Orthopedic.Diag No Ref
## Yes 3.74 [0.53;26.25] 0.1843027
## race white Ref
## black 1.05 [0.87;1.26] 0.6250189
## other 1.09 [0.84;1.41] 0.5272282
## income > $50k Ref
## $11-$25k 0.98 [0.75;1.28] 0.8810245
## $25-$50k 1.06 [0.81;1.39] 0.6676461
## Under $11k 1.04 [0.80;1.36] 0.7673002
Notice: check the multicollinearity - SE too high?
Getting the PS:
ObsData$PS <- predict(ps.fit, type = "response")Check if the PS values very close to 0 and 1?
summary(ObsData$PS)## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.002478 0.161446 0.358300 0.380819 0.574319 0.968425
tapply(ObsData$PS, ObsData$A, summary)## $`0`
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.002478 0.106718 0.241301 0.283816 0.427138 0.951927
##
## $`1`
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.01575 0.37205 0.55243 0.53854 0.70999 0.96842
plot(density(ObsData$PS[ObsData$A == 0]), col = "red", main = "")
lines(density(ObsData$PS[ObsData$A == 1]), col = "blue", lty = 2)
legend("topright", c("No RHC, RHC"), col = c("red", "blue"), lty=1:2)IPTW = A/PS + (1-A)/(1-PS)
ObsData$IPW <- ObsData$A/ObsData$PS + (1-ObsData$A)/ (1- ObsData$PS)
summary(ObsData$IPW)## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.002 1.183 1.472 1.986 2.064 63.509
require(WeightIt)## Loading required package: WeightIt
W.out <- weightit(ps.formula,
data = ObsData,
estimand = "ATE",
method = "ps")
summary(W.out$weights)## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.002 1.183 1.472 1.986 2.064 63.509
SMD=0.1 as the threshold for balance and SMD>0.1 means no balance.
require(cobalt)
bal.tab(W.out, un=TRUE, threshold =c(m=0.1))## Call
## weightit(formula = ps.formula, data = ObsData, method = "ps",
## estimand = "ATE")
##
## Balance Measures
## Type Diff.Un Diff.Adj M.Threshold
## prop.score Distance 1.1926 0.0224 Balanced, <0.1
## Disease.category_ARF Binary -0.0290 0.0025 Balanced, <0.1
## Disease.category_CHF Binary 0.0261 0.0008 Balanced, <0.1
## Disease.category_Other Binary -0.1737 -0.0080 Balanced, <0.1
## Disease.category_MOSF Binary 0.1766 0.0047 Balanced, <0.1
## Cancer_None Binary 0.0439 0.0017 Balanced, <0.1
## Cancer_Localized (Yes) Binary -0.0267 0.0017 Balanced, <0.1
## Cancer_Metastatic Binary -0.0172 -0.0034 Balanced, <0.1
## Cardiovascular Binary 0.0445 0.0051 Balanced, <0.1
## Congestive.HF Binary 0.0268 0.0013 Balanced, <0.1
## Dementia Binary -0.0472 -0.0138 Balanced, <0.1
## Psychiatric Binary -0.0348 -0.0050 Balanced, <0.1
## Pulmonary Binary -0.0737 -0.0058 Balanced, <0.1
## Renal Binary 0.0066 0.0027 Balanced, <0.1
## Hepatic Binary -0.0124 -0.0012 Balanced, <0.1
## GI.Bleed Binary -0.0122 -0.0026 Balanced, <0.1
## Tumor Binary -0.0423 -0.0016 Balanced, <0.1
## Immunosupperssion Binary 0.0358 -0.0027 Balanced, <0.1
## Transfer.hx Binary 0.0554 0.0047 Balanced, <0.1
## MI Binary 0.0139 0.0005 Balanced, <0.1
## age_[-Inf,50) Binary -0.0017 -0.0098 Balanced, <0.1
## age_[50,60) Binary 0.0161 0.0149 Balanced, <0.1
## age_[60,70) Binary 0.0355 -0.0108 Balanced, <0.1
## age_[70,80) Binary 0.0144 0.0047 Balanced, <0.1
## age_[80, Inf) Binary -0.0643 0.0010 Balanced, <0.1
## sex_Female Binary -0.0462 -0.0143 Balanced, <0.1
## edu Contin. 0.0914 -0.0000 Balanced, <0.1
## DASIndex Contin. 0.0626 0.0435 Balanced, <0.1
## APACHE.score Contin. 0.5014 0.0109 Balanced, <0.1
## Glasgow.Coma.Score Contin. -0.1098 0.0034 Balanced, <0.1
## blood.pressure Contin. -0.4551 0.0057 Balanced, <0.1
## WBC Contin. 0.0836 0.0470 Balanced, <0.1
## Heart.rate Contin. 0.1469 0.0210 Balanced, <0.1
## Respiratory.rate Contin. -0.1655 0.0037 Balanced, <0.1
## Temperature Contin. -0.0214 0.0090 Balanced, <0.1
## PaO2vs.FIO2 Contin. -0.4332 -0.0016 Balanced, <0.1
## Albumin Contin. -0.2299 -0.0279 Balanced, <0.1
## Hematocrit Contin. -0.2693 -0.0247 Balanced, <0.1
## Bilirubin Contin. 0.1446 -0.0069 Balanced, <0.1
## Creatinine Contin. 0.2696 0.0148 Balanced, <0.1
## Sodium Contin. -0.0922 -0.0059 Balanced, <0.1
## Potassium Contin. -0.0271 -0.0264 Balanced, <0.1
## PaCo2 Contin. -0.2486 -0.0201 Balanced, <0.1
## PH Contin. -0.1198 0.0095 Balanced, <0.1
## Weight Contin. 0.2557 0.0209 Balanced, <0.1
## DNR.status_Yes Binary -0.0696 -0.0112 Balanced, <0.1
## Medical.insurance_Medicaid Binary -0.0395 0.0058 Balanced, <0.1
## Medical.insurance_Medicare Binary -0.0327 -0.0119 Balanced, <0.1
## Medical.insurance_Medicare & Medicaid Binary -0.0144 -0.0001 Balanced, <0.1
## Medical.insurance_No insurance Binary 0.0099 -0.0002 Balanced, <0.1
## Medical.insurance_Private Binary 0.0624 0.0013 Balanced, <0.1
## Medical.insurance_Private & Medicare Binary 0.0143 0.0052 Balanced, <0.1
## Respiratory.Diag_Yes Binary -0.1277 -0.0056 Balanced, <0.1
## Cardiovascular.Diag_Yes Binary 0.1395 0.0034 Balanced, <0.1
## Neurological.Diag_Yes Binary -0.1079 -0.0038 Balanced, <0.1
## Gastrointestinal.Diag_Yes Binary 0.0453 -0.0028 Balanced, <0.1
## Renal.Diag_Yes Binary 0.0264 0.0021 Balanced, <0.1
## Metabolic.Diag_Yes Binary -0.0059 0.0002 Balanced, <0.1
## Hematologic.Diag_Yes Binary -0.0146 -0.0000 Balanced, <0.1
## Sepsis.Diag_Yes Binary 0.0912 0.0035 Balanced, <0.1
## Trauma.Diag_Yes Binary 0.0105 0.0011 Balanced, <0.1
## Orthopedic.Diag_Yes Binary 0.0010 0.0002 Balanced, <0.1
## race_white Binary 0.0063 -0.0030 Balanced, <0.1
## race_black Binary -0.0114 0.0067 Balanced, <0.1
## race_other Binary 0.0050 -0.0036 Balanced, <0.1
## income_> $50k Binary 0.0165 -0.0001 Balanced, <0.1
## income_$11-$25k Binary 0.0062 -0.0096 Balanced, <0.1
## income_$25-$50k Binary 0.0391 0.0032 Balanced, <0.1
## income_Under $11k Binary -0.0618 0.0065 Balanced, <0.1
##
## Balance tally for mean differences
## count
## Balanced, <0.1 69
## Not Balanced, >0.1 0
##
## Variable with the greatest mean difference
## Variable Diff.Adj M.Threshold
## WBC 0.047 Balanced, <0.1
##
## Effective sample sizes
## Control Treated
## Unadjusted 3551. 2184.
## Adjusted 2532.46 1039.44
love.plot(W.out, binart = "std", thresholds = c(m=0.1), abs = TRUE, var.order = "adjusted",
line = FALSE)outfit <- glm(Y ~A, data = ObsData, weights = IPW)
Publish::publish(outfit)## Variable Units Coefficient CI.95 p-value
## (Intercept) 20.68 [19.73;21.64] < 1e-04
## A 3.24 [1.88;4.60] < 1e-04
saveRDS(outfit, file = "ipw.RDS")