# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# ~ CRP 245 Module 4 Days 3 & 4 ~
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# RHC data
# Data Set: rhc
# Data dictionary:
# (1) sadmdte study admission date
# (2) dschdte hospital discharge date
# (3) lstctdte date of last contact
# (4) cardiohx if co1-9 in(1,3,4,5) then cardiohx = 1
# (5) chfhx if co1-9 = 2 then chfhx = 1
# (6) dementhx if co1-9 in(6,7,8) then dementhx = 1
# (7) psychhx if co1-9 in(9,10) then psychhx = 1
# (8) chrpulhx if co1-9 in(11,12,13) then chrpulhx = 1
# (9) renalhx if co1-9 in(14,15) then renalhx = 1
# (10) liverhx if co1-9 in(16,18) then liverhx = 1
# (11) gibledhx if co1-9 = 17 then gibledhx = 1
# (12) malighx if 19<= co1-9 <=23 then malighx = 1
# (13) immunhx if 24<= co1-9 <=29 then immunhx = 1
# (14) transhx if co1-9 in(39,40,41) then transhx = 1
# (15) amihx if co1-9 = 42 then amihx = 1
# (16) age Age
# (17) sex
# (18) edu Years of Education
# (19) aps1 acute physiology component of APACHE III score
# (20) scoma1 SUPPORT Coma Score based on Glasgow day 1
# (21) meanbp1 Mean Blood Pressure Day 1
# (22) wblc1 white blood cell count Day 1
# (23) hrt1 heart rate Day 1
# (24) resp1 respiration rate Day 1
# (25) temp1 temperature (celcius) Day 1
# (26) pafi1 PaO2/(.01*FiO2) ratio Day 1 (ratio of partial pressure of
# arterial oxygen to fraction of
# inpsired oxygen)
# (27) alb1 albumin Day 1
# (28) hema1 hematocrit Day 1
# (29) bili1 bilirubin Day 1
# (30) crea1 serum creatinine Day 1
# (31) sod1 serum sodium Day 1
# (32) pot1 serum potassium Day 1
# (33) paco21 PaCO2 (partial pressure of arterial oxygen) Day 1
# (34) ph1 serum ph (arterial) Day 1
# (35) swang1 whether RHC was received - updated to "treated" in code
# (36) wtkilo1 weight in kilograms
# (37) dnr1 existence of do not resuscitate order day 1
# (38) ninsclas type of medical insurance (private, Medicare,
# Medicaid, private and Medicare, Medicare and Medicaid, or none)
# (39) cat1 Disease categories on admission to ICU (acute respiratory
# failure (ARF), COPD, congestive heart failure (CHF), cirrhosis,
# nontraumatic coma, colon cancer metatstatic to the liver,
# non-small cell cancer of the lung, and multiorgan system
# failure (MOSF) with malignancy or sepsis)
# (40) race White, Black, Asian, other
# (41) income Under $11k, $11-$25k, $25-$50k, > $50k
# (42) ca Cancer (none, localized, metastatic)
# (43) ptid Patient ID
# packages to bring in for the program to run
library("MatchIt")
## Warning: package 'MatchIt' was built under R version 4.0.5
library("dplyr")
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library("ggplot2")
library("tableone")
## Warning: package 'tableone' was built under R version 4.0.5
library("reshape2")
# Following steps prepare data for analysis
rhc.readin <- read.csv("https://hbiostat.org/data/repo/rhc.csv")
rhc <- data.frame(rhc.readin)
drops <- c("cat2","dthdte", "adld3p", "urin1", "dschdte", "surv2md1",
"t3d30", "das2d3pc", "dth30", "resp","card", "neuro",
"gastr", "renal", "meta", "hema", "seps", "trauma", "ortho")
rhc<-rhc[ , !(names(rhc) %in% drops)]
rhc$treat <- ifelse(rhc$swang1=="RHC", 1, 0) # MatchIt need 0/1 treat indicator
# aggregate over cat1 categories as in paper to get ARF, MOSF, CHF, other
rhc$ARF <- ifelse(rhc$cat1=="ARF", "Yes", "No")
rhc$MOSF <- ifelse(rhc$cat=="MOSF w/Malignancy" | rhc$cat1=="MOSF w/Sepsis",
"Yes", "No")
rhc$otherdiseasecat <- ifelse(rhc$cat1=="CHF"|rhc$cat1=="Cirrhosis"|
rhc$cat1=="Colon Cancer"|rhc$cat1=="Coma"
|rhc$cat1=="COPD"|rhc$cat1=="Lung Cancer","Yes", "No")
vars.full <- c("age","sex","race","edu","income","ninsclas","ARF", "MOSF",
"otherdiseasecat","dnr1","ca", "aps1","scoma1","wtkilo1",
"temp1","meanbp1","resp1","hrt1","pafi1","paco21","ph1",
"wblc1","hema1","sod1","pot1","crea1","bili1","alb1",
"cardiohx","chfhx","dementhx","psychhx","chrpulhx","renalhx",
"liverhx","gibledhx","malighx","immunhx",
"transhx","amihx")
vars.limited <- c("age","sex","race","edu","income","ninsclas","ARF", "MOSF",
"otherdiseasecat","dnr1","ca")
## Construct a table
tabUnmatched <- CreateTableOne(vars = vars.limited, strata = "treat",
data = rhc, test = FALSE)
## Show table with SMD
print(tabUnmatched, smd = TRUE)
## Stratified by treat
## 0 1 SMD
## n 3551 2184
## age (mean (SD)) 61.76 (17.29) 60.75 (15.63) 0.061
## sex = Male (%) 1914 (53.9) 1278 (58.5) 0.093
## race (%) 0.036
## black 585 (16.5) 335 (15.3)
## other 213 ( 6.0) 142 ( 6.5)
## white 2753 (77.5) 1707 (78.2)
## edu (mean (SD)) 11.57 (3.13) 11.86 (3.16) 0.091
## income (%) 0.142
## $11-$25k 713 (20.1) 452 (20.7)
## $25-$50k 500 (14.1) 393 (18.0)
## > $50k 257 ( 7.2) 194 ( 8.9)
## Under $11k 2081 (58.6) 1145 (52.4)
## ninsclas (%) 0.194
## Medicaid 454 (12.8) 193 ( 8.8)
## Medicare 947 (26.7) 511 (23.4)
## Medicare & Medicaid 251 ( 7.1) 123 ( 5.6)
## No insurance 186 ( 5.2) 136 ( 6.2)
## Private 967 (27.2) 731 (33.5)
## Private & Medicare 746 (21.0) 490 (22.4)
## ARF = Yes (%) 1581 (44.5) 909 (41.6) 0.059
## MOSF = Yes (%) 768 (21.6) 858 (39.3) 0.391
## otherdiseasecat = Yes (%) 1202 (33.8) 417 (19.1) 0.339
## dnr1 = Yes (%) 499 (14.1) 155 ( 7.1) 0.228
## ca (%) 0.107
## Metastatic 261 ( 7.4) 123 ( 5.6)
## No 2652 (74.7) 1727 (79.1)
## Yes 638 (18.0) 334 (15.3)
## Count covariates with important imbalance
addmargins(table(ExtractSmd(tabUnmatched) > 0.1))
##
## FALSE TRUE Sum
## 5 6 11
psModel <- glm(formula = treat ~ age + sex + race + edu + income + ninsclas +
ARF + MOSF +
dnr1 + ca, family = binomial(link = "logit"), data = rhc)
summary(psModel)
##
## Call:
## glm(formula = treat ~ age + sex + race + edu + income + ninsclas +
## ARF + MOSF + dnr1 + ca, family = binomial(link = "logit"),
## data = rhc)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -1.5061 -0.9695 -0.7420 1.2178 2.1949
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -2.282018 0.250429 -9.112 < 2e-16 ***
## age 0.002991 0.002260 1.323 0.185822
## sexMale 0.203890 0.057692 3.534 0.000409 ***
## raceother 0.064945 0.134373 0.483 0.628866
## racewhite 0.041429 0.081652 0.507 0.611881
## edu 0.014487 0.010331 1.402 0.160852
## income$25-$50k 0.119667 0.097119 1.232 0.217885
## income> $50k 0.069558 0.122353 0.568 0.569696
## incomeUnder $11k -0.002933 0.076971 -0.038 0.969601
## ninsclasMedicare 0.215567 0.119126 1.810 0.070364 .
## ninsclasMedicare & Medicaid 0.207803 0.149392 1.391 0.164227
## ninsclasNo insurance 0.453603 0.146539 3.095 0.001965 **
## ninsclasPrivate 0.451435 0.109796 4.112 3.93e-05 ***
## ninsclasPrivate & Medicare 0.395203 0.122462 3.227 0.001250 **
## ARFYes 0.520215 0.071837 7.242 4.43e-13 ***
## MOSFYes 1.265800 0.078858 16.052 < 2e-16 ***
## dnr1Yes -0.691611 0.101430 -6.819 9.19e-12 ***
## caNo 0.525722 0.120607 4.359 1.31e-05 ***
## caYes 0.064958 0.134464 0.483 0.629033
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 7621.4 on 5734 degrees of freedom
## Residual deviance: 7199.5 on 5716 degrees of freedom
## AIC: 7237.5
##
## Number of Fisher Scoring iterations: 4
## Predicted probability of being assigned to RHC
rhc$pRhc <- predict(psModel, type = "response")
## Predicted probability of being assigned to no RHC
rhc$pNoRhc <- 1 - rhc$pRhc
min(rhc$pRhc[rhc$treat==0])
## [1] 0.06650796
min(rhc$pRhc[rhc$treat==1])
## [1] 0.08992939
max(rhc$pRhc[rhc$treat==0])
## [1] 0.6782902
max(rhc$pRhc[rhc$treat==1])
## [1] 0.6869823
# get predicted propensity scores for each person in dataset
pred_pscore <- data.frame(rhc$pRhc,psModel$model$treat)
head(pred_pscore)
## rhc.pRhc psModel.model.treat
## 1 0.2017841 0
## 2 0.5868207 1
## 3 0.5007341 1
## 4 0.3908386 0
## 5 0.4068518 1
## 6 0.2443598 0
# box plot to assess overlap of propensity score distributions between groups
plot.new()
boxplot(rhc$pRhc~psModel$model$treat, data=pred_pscore, ylim=c(0,1),
names=c("No RHC", "RHC"), ylab="Estimated propensity score",
axis(side=1, at=c(.3, .8), labels=c("No RHC", "RHC")))


# histogram of p score distributions
labs <- paste("RHC status:", c("Treated", "Not Treated"))
pred_pscore %>%
mutate(treated = ifelse(rhc$treat == 1, labs[1], labs[2])) %>%
ggplot(aes(x = rhc$pRhc)) +
geom_histogram(color = "white", binwidth=.05) +
facet_wrap(~treated) +
xlab("Probability of RHC") +
theme_bw()

# match with caliper of 0.1
psMatch <- matchit(treat ~ age + sex + race + edu + income + ninsclas + ARF +
MOSF + otherdiseasecat +dnr1 + ca, distance="logit",
method="nearest", caliper=.1, data=rhc)
summary(psMatch)
##
## Call:
## matchit(formula = treat ~ age + sex + race + edu + income + ninsclas +
## ARF + MOSF + otherdiseasecat + dnr1 + ca, data = rhc, method = "nearest",
## distance = "logit", caliper = 0.1)
##
## Summary of Balance for All Data:
## Means Treated Means Control Std. Mean Diff.
## distance 0.4251 0.3536 0.5597
## age 60.7498 61.7609 -0.0647
## sexFemale 0.4148 0.4610 -0.0937
## sexMale 0.5852 0.5390 0.0937
## raceblack 0.1534 0.1647 -0.0315
## raceother 0.0650 0.0600 0.0204
## racewhite 0.7816 0.7753 0.0153
## edu 11.8564 11.5690 0.0910
## income$11-$25k 0.2070 0.2008 0.0152
## income$25-$50k 0.1799 0.1408 0.1019
## income> $50k 0.0888 0.0724 0.0578
## incomeUnder $11k 0.5243 0.5860 -0.1237
## ninsclasMedicaid 0.0884 0.1279 -0.1391
## ninsclasMedicare 0.2340 0.2667 -0.0773
## ninsclasMedicare & Medicaid 0.0563 0.0707 -0.0623
## ninsclasNo insurance 0.0623 0.0524 0.0409
## ninsclasPrivate 0.3347 0.2723 0.1322
## ninsclasPrivate & Medicare 0.2244 0.2101 0.0342
## ARFNo 0.5838 0.5548 0.0589
## ARFYes 0.4162 0.4452 -0.0589
## MOSFNo 0.6071 0.7837 -0.3616
## MOSFYes 0.3929 0.2163 0.3616
## otherdiseasecatNo 0.8091 0.6615 0.3754
## otherdiseasecatYes 0.1909 0.3385 -0.3754
## dnr1No 0.9290 0.8595 0.2709
## dnr1Yes 0.0710 0.1405 -0.2709
## caMetastatic 0.0563 0.0735 -0.0745
## caNo 0.7908 0.7468 0.1080
## caYes 0.1529 0.1797 -0.0743
## Var. Ratio eCDF Mean eCDF Max
## distance 1.0656 0.1563 0.2259
## age 0.8175 0.0285 0.0703
## sexFemale . 0.0462 0.0462
## sexMale . 0.0462 0.0462
## raceblack . 0.0114 0.0114
## raceother . 0.0050 0.0050
## racewhite . 0.0063 0.0063
## edu 1.0147 0.0181 0.0511
## income$11-$25k . 0.0062 0.0062
## income$25-$50k . 0.0391 0.0391
## income> $50k . 0.0165 0.0165
## incomeUnder $11k . 0.0618 0.0618
## ninsclasMedicaid . 0.0395 0.0395
## ninsclasMedicare . 0.0327 0.0327
## ninsclasMedicare & Medicaid . 0.0144 0.0144
## ninsclasNo insurance . 0.0099 0.0099
## ninsclasPrivate . 0.0624 0.0624
## ninsclasPrivate & Medicare . 0.0143 0.0143
## ARFNo . 0.0290 0.0290
## ARFYes . 0.0290 0.0290
## MOSFNo . 0.1766 0.1766
## MOSFYes . 0.1766 0.1766
## otherdiseasecatNo . 0.1476 0.1476
## otherdiseasecatYes . 0.1476 0.1476
## dnr1No . 0.0696 0.0696
## dnr1Yes . 0.0696 0.0696
## caMetastatic . 0.0172 0.0172
## caNo . 0.0439 0.0439
## caYes . 0.0267 0.0267
##
##
## Summary of Balance for Matched Data:
## Means Treated Means Control Std. Mean Diff.
## distance 0.4124 0.4091 0.0254
## age 60.7513 60.7563 -0.0003
## sexFemale 0.4215 0.4269 -0.0110
## sexMale 0.5785 0.5731 0.0110
## raceblack 0.1531 0.1624 -0.0259
## raceother 0.0662 0.0623 0.0159
## racewhite 0.7807 0.7753 0.0131
## edu 11.8504 11.8327 0.0056
## income$11-$25k 0.2041 0.2026 0.0036
## income$25-$50k 0.1771 0.1688 0.0217
## income> $50k 0.0868 0.0829 0.0138
## incomeUnder $11k 0.5319 0.5456 -0.0275
## ninsclasMedicaid 0.0947 0.1016 -0.0242
## ninsclasMedicare 0.2360 0.2439 -0.0185
## ninsclasMedicare & Medicaid 0.0584 0.0613 -0.0128
## ninsclasNo insurance 0.0618 0.0559 0.0244
## ninsclasPrivate 0.3283 0.3194 0.0187
## ninsclasPrivate & Medicare 0.2208 0.2179 0.0071
## ARFNo 0.5540 0.5388 0.0309
## ARFYes 0.4460 0.4612 -0.0309
## MOSFNo 0.6506 0.6575 -0.0141
## MOSFYes 0.3494 0.3425 0.0141
## otherdiseasecatNo 0.7954 0.8037 -0.0212
## otherdiseasecatYes 0.2046 0.1963 0.0212
## dnr1No 0.9239 0.9235 0.0019
## dnr1Yes 0.0761 0.0765 -0.0019
## caMetastatic 0.0599 0.0628 -0.0128
## caNo 0.7812 0.7738 0.0181
## caYes 0.1590 0.1634 -0.0123
## Var. Ratio eCDF Mean eCDF Max Std. Pair Dist.
## distance 1.0663 0.0044 0.0236 0.0256
## age 0.8228 0.0219 0.0456 1.1824
## sexFemale . 0.0054 0.0054 0.8973
## sexMale . 0.0054 0.0054 0.8973
## raceblack . 0.0093 0.0093 0.7394
## raceother . 0.0039 0.0039 0.4776
## racewhite . 0.0054 0.0054 0.8563
## edu 1.0217 0.0063 0.0191 0.9990
## income$11-$25k . 0.0015 0.0015 0.8079
## income$25-$50k . 0.0083 0.0083 0.6451
## income> $50k . 0.0039 0.0039 0.5140
## incomeUnder $11k . 0.0137 0.0137 0.8921
## ninsclasMedicaid . 0.0069 0.0069 0.5359
## ninsclasMedicare . 0.0079 0.0079 0.7696
## ninsclasMedicare & Medicaid . 0.0029 0.0029 0.4938
## ninsclasNo insurance . 0.0059 0.0059 0.4345
## ninsclasPrivate . 0.0088 0.0088 0.8630
## ninsclasPrivate & Medicare . 0.0029 0.0029 0.7998
## ARFNo . 0.0152 0.0152 0.6102
## ARFYes . 0.0152 0.0152 0.6102
## MOSFNo . 0.0069 0.0069 0.3315
## MOSFYes . 0.0069 0.0069 0.3315
## otherdiseasecatNo . 0.0083 0.0083 0.4557
## otherdiseasecatYes . 0.0083 0.0083 0.4557
## dnr1No . 0.0005 0.0005 0.4261
## dnr1Yes . 0.0005 0.0005 0.4261
## caMetastatic . 0.0029 0.0029 0.4768
## caNo . 0.0074 0.0074 0.7322
## caYes . 0.0044 0.0044 0.6612
##
## Percent Balance Improvement:
## Std. Mean Diff. Var. Ratio eCDF Mean eCDF Max
## distance 95.5 -1.1 97.2 89.6
## age 99.5 3.2 23.0 35.1
## sexFemale 88.3 . 88.3 88.3
## sexMale 88.3 . 88.3 88.3
## raceblack 17.9 . 17.9 17.9
## raceother 22.0 . 22.0 22.0
## racewhite 14.6 . 14.6 14.6
## edu 93.9 -47.1 65.1 62.5
## income$11-$25k 76.1 . 76.1 76.1
## income$25-$50k 78.7 . 78.7 78.7
## income> $50k 76.1 . 76.1 76.1
## incomeUnder $11k 77.8 . 77.8 77.8
## ninsclasMedicaid 82.6 . 82.6 82.6
## ninsclasMedicare 76.0 . 76.0 76.0
## ninsclasMedicare & Medicaid 79.5 . 79.5 79.5
## ninsclasNo insurance 40.5 . 40.5 40.5
## ninsclasPrivate 85.8 . 85.8 85.8
## ninsclasPrivate & Medicare 79.4 . 79.4 79.4
## ARFNo 47.6 . 47.6 47.6
## ARFYes 47.6 . 47.6 47.6
## MOSFNo 96.1 . 96.1 96.1
## MOSFYes 96.1 . 96.1 96.1
## otherdiseasecatNo 94.3 . 94.3 94.3
## otherdiseasecatYes 94.3 . 94.3 94.3
## dnr1No 99.3 . 99.3 99.3
## dnr1Yes 99.3 . 99.3 99.3
## caMetastatic 82.9 . 82.9 82.9
## caNo 83.2 . 83.2 83.2
## caYes 83.5 . 83.5 83.5
##
## Sample Sizes:
## Control Treated
## All 3551 2184
## Matched 2038 2038
## Unmatched 1513 146
## Discarded 0 0
matchedrhc <- data.frame(match.data(psMatch))
## Construct a table
tabMatched <- CreateTableOne(vars = vars.limited, strata = "treat",
data = matchedrhc, test = FALSE)
## Show table with SMD
print(tabMatched, smd = TRUE)
## Stratified by treat
## 0 1 SMD
## n 2038 2038
## age (mean (SD)) 60.76 (17.27) 60.75 (15.67) <0.001
## sex = Male (%) 1168 (57.3) 1179 (57.9) 0.011
## race (%) 0.029
## black 331 (16.2) 312 (15.3)
## other 127 ( 6.2) 135 ( 6.6)
## white 1580 (77.5) 1591 (78.1)
## edu (mean (SD)) 11.83 (3.11) 11.85 (3.14) 0.006
## income (%) 0.031
## $11-$25k 413 (20.3) 416 (20.4)
## $25-$50k 344 (16.9) 361 (17.7)
## > $50k 169 ( 8.3) 177 ( 8.7)
## Under $11k 1112 (54.6) 1084 (53.2)
## ninsclas (%) 0.042
## Medicaid 207 (10.2) 193 ( 9.5)
## Medicare 497 (24.4) 481 (23.6)
## Medicare & Medicaid 125 ( 6.1) 119 ( 5.8)
## No insurance 114 ( 5.6) 126 ( 6.2)
## Private 651 (31.9) 669 (32.8)
## Private & Medicare 444 (21.8) 450 (22.1)
## ARF = Yes (%) 940 (46.1) 909 (44.6) 0.031
## MOSF = Yes (%) 698 (34.2) 712 (34.9) 0.014
## otherdiseasecat = Yes (%) 400 (19.6) 417 (20.5) 0.021
## dnr1 = Yes (%) 156 ( 7.7) 155 ( 7.6) 0.002
## ca (%) 0.018
## Metastatic 128 ( 6.3) 122 ( 6.0)
## No 1577 (77.4) 1592 (78.1)
## Yes 333 (16.3) 324 (15.9)
dataPlot <- data.frame(variable = rownames(ExtractSmd(tabUnmatched)),
unMatched = ExtractSmd(tabUnmatched),
Matched = ExtractSmd(tabMatched))
# changing variable names -- need to figure out better
# way to code this
names(dataPlot)[which(names(dataPlot) == "X1.vs.2")] <- "Unmatched"
names(dataPlot)[which(names(dataPlot) == "X1.vs.2.1")] <- "Matched"
## Create long-format data for ggplot2
dataPlotMelt <- melt(data = dataPlot,
id.vars = c("variable"),
variable.name = "Method",
value.name = "SMD")
# change factor levels
## Order variable names by magnitude of SMD
varNames <- as.character(dataPlot$variable)[order(dataPlot$Unmatched)]
## Order factor levels in the same order
dataPlotMelt$variable <- factor(dataPlotMelt$variable,
levels = varNames)
## Plot using ggplot2
ggplot(data = dataPlotMelt, mapping = aes(x = variable, y = SMD,
group = Method, color = Method)) +
geom_line(size=1) +
geom_point() +
geom_hline(yintercept = 0.1, color = "black", size = 0.1) +
coord_flip() +
theme_bw() + theme(legend.key = element_blank())

# match with caliper of 0.4
psMatch <- matchit(treat ~ age + sex + race + edu + income + ninsclas + ARF +
MOSF + otherdiseasecat +dnr1 + ca, distance="logit",
method="nearest", caliper=.4, data=rhc)
summary(psMatch)
##
## Call:
## matchit(formula = treat ~ age + sex + race + edu + income + ninsclas +
## ARF + MOSF + otherdiseasecat + dnr1 + ca, data = rhc, method = "nearest",
## distance = "logit", caliper = 0.4)
##
## Summary of Balance for All Data:
## Means Treated Means Control Std. Mean Diff.
## distance 0.4251 0.3536 0.5597
## age 60.7498 61.7609 -0.0647
## sexFemale 0.4148 0.4610 -0.0937
## sexMale 0.5852 0.5390 0.0937
## raceblack 0.1534 0.1647 -0.0315
## raceother 0.0650 0.0600 0.0204
## racewhite 0.7816 0.7753 0.0153
## edu 11.8564 11.5690 0.0910
## income$11-$25k 0.2070 0.2008 0.0152
## income$25-$50k 0.1799 0.1408 0.1019
## income> $50k 0.0888 0.0724 0.0578
## incomeUnder $11k 0.5243 0.5860 -0.1237
## ninsclasMedicaid 0.0884 0.1279 -0.1391
## ninsclasMedicare 0.2340 0.2667 -0.0773
## ninsclasMedicare & Medicaid 0.0563 0.0707 -0.0623
## ninsclasNo insurance 0.0623 0.0524 0.0409
## ninsclasPrivate 0.3347 0.2723 0.1322
## ninsclasPrivate & Medicare 0.2244 0.2101 0.0342
## ARFNo 0.5838 0.5548 0.0589
## ARFYes 0.4162 0.4452 -0.0589
## MOSFNo 0.6071 0.7837 -0.3616
## MOSFYes 0.3929 0.2163 0.3616
## otherdiseasecatNo 0.8091 0.6615 0.3754
## otherdiseasecatYes 0.1909 0.3385 -0.3754
## dnr1No 0.9290 0.8595 0.2709
## dnr1Yes 0.0710 0.1405 -0.2709
## caMetastatic 0.0563 0.0735 -0.0745
## caNo 0.7908 0.7468 0.1080
## caYes 0.1529 0.1797 -0.0743
## Var. Ratio eCDF Mean eCDF Max
## distance 1.0656 0.1563 0.2259
## age 0.8175 0.0285 0.0703
## sexFemale . 0.0462 0.0462
## sexMale . 0.0462 0.0462
## raceblack . 0.0114 0.0114
## raceother . 0.0050 0.0050
## racewhite . 0.0063 0.0063
## edu 1.0147 0.0181 0.0511
## income$11-$25k . 0.0062 0.0062
## income$25-$50k . 0.0391 0.0391
## income> $50k . 0.0165 0.0165
## incomeUnder $11k . 0.0618 0.0618
## ninsclasMedicaid . 0.0395 0.0395
## ninsclasMedicare . 0.0327 0.0327
## ninsclasMedicare & Medicaid . 0.0144 0.0144
## ninsclasNo insurance . 0.0099 0.0099
## ninsclasPrivate . 0.0624 0.0624
## ninsclasPrivate & Medicare . 0.0143 0.0143
## ARFNo . 0.0290 0.0290
## ARFYes . 0.0290 0.0290
## MOSFNo . 0.1766 0.1766
## MOSFYes . 0.1766 0.1766
## otherdiseasecatNo . 0.1476 0.1476
## otherdiseasecatYes . 0.1476 0.1476
## dnr1No . 0.0696 0.0696
## dnr1Yes . 0.0696 0.0696
## caMetastatic . 0.0172 0.0172
## caNo . 0.0439 0.0439
## caYes . 0.0267 0.0267
##
##
## Summary of Balance for Matched Data:
## Means Treated Means Control Std. Mean Diff.
## distance 0.4242 0.4106 0.1064
## age 60.7831 60.5225 0.0167
## sexFemale 0.4132 0.4224 -0.0187
## sexMale 0.5868 0.5776 0.0187
## raceblack 0.1538 0.1649 -0.0307
## raceother 0.0649 0.0626 0.0093
## racewhite 0.7812 0.7725 0.0212
## edu 11.8615 11.8260 0.0113
## income$11-$25k 0.2064 0.2050 0.0034
## income$25-$50k 0.1806 0.1709 0.0252
## income> $50k 0.0894 0.0843 0.0178
## incomeUnder $11k 0.5237 0.5398 -0.0323
## ninsclasMedicaid 0.0889 0.0972 -0.0292
## ninsclasMedicare 0.2340 0.2400 -0.0141
## ninsclasMedicare & Medicaid 0.0567 0.0590 -0.0100
## ninsclasNo insurance 0.0626 0.0608 0.0076
## ninsclasPrivate 0.3330 0.3238 0.0195
## ninsclasPrivate & Medicare 0.2248 0.2193 0.0133
## ARFNo 0.5813 0.5154 0.1336
## ARFYes 0.4187 0.4846 -0.1336
## MOSFNo 0.6108 0.6688 -0.1188
## MOSFYes 0.3892 0.3312 0.1188
## otherdiseasecatNo 0.8079 0.8158 -0.0199
## otherdiseasecatYes 0.1921 0.1842 0.0199
## dnr1No 0.9286 0.9272 0.0054
## dnr1Yes 0.0714 0.0728 -0.0054
## caMetastatic 0.0567 0.0617 -0.0220
## caNo 0.7900 0.7807 0.0226
## caYes 0.1534 0.1575 -0.0115
## Var. Ratio eCDF Mean eCDF Max Std. Pair Dist.
## distance 1.2300 0.0207 0.0829 0.1066
## age 0.8048 0.0237 0.0461 1.1818
## sexFemale . 0.0092 0.0092 0.8919
## sexMale . 0.0092 0.0092 0.8919
## raceblack . 0.0111 0.0111 0.7158
## raceother . 0.0023 0.0023 0.4689
## racewhite . 0.0088 0.0088 0.8261
## edu 1.0232 0.0073 0.0235 0.9961
## income$11-$25k . 0.0014 0.0014 0.8129
## income$25-$50k . 0.0097 0.0097 0.7206
## income> $50k . 0.0051 0.0051 0.5456
## incomeUnder $11k . 0.0161 0.0161 0.9583
## ninsclasMedicaid . 0.0083 0.0083 0.5226
## ninsclasMedicare . 0.0060 0.0060 0.7975
## ninsclasMedicare & Medicaid . 0.0023 0.0023 0.4815
## ninsclasNo insurance . 0.0018 0.0018 0.4613
## ninsclasPrivate . 0.0092 0.0092 0.8785
## ninsclasPrivate & Medicare . 0.0055 0.0055 0.8171
## ARFNo . 0.0659 0.0659 0.6251
## ARFYes . 0.0659 0.0659 0.6251
## MOSFNo . 0.0580 0.0580 0.3641
## MOSFYes . 0.0580 0.0580 0.3641
## otherdiseasecatNo . 0.0078 0.0078 0.4278
## otherdiseasecatYes . 0.0078 0.0078 0.4278
## dnr1No . 0.0014 0.0014 0.4036
## dnr1Yes . 0.0014 0.0014 0.4036
## caMetastatic . 0.0051 0.0051 0.4695
## caNo . 0.0092 0.0092 0.7677
## caYes . 0.0041 0.0041 0.6668
##
## Percent Balance Improvement:
## Std. Mean Diff. Var. Ratio eCDF Mean eCDF Max
## distance 81.0 -226.0 86.7 63.3
## age 74.2 -7.7 16.6 34.5
## sexFemale 80.0 . 80.0 80.0
## sexMale 80.0 . 80.0 80.0
## raceblack 2.6 . 2.6 2.6
## raceother 54.3 . 54.3 54.3
## racewhite -38.5 . -38.5 -38.5
## edu 87.6 -56.7 59.4 54.0
## income$11-$25k 77.6 . 77.6 77.6
## income$25-$50k 75.3 . 75.3 75.3
## income> $50k 69.2 . 69.2 69.2
## incomeUnder $11k 73.9 . 73.9 73.9
## ninsclasMedicaid 79.0 . 79.0 79.0
## ninsclasMedicare 81.7 . 81.7 81.7
## ninsclasMedicare & Medicaid 84.0 . 84.0 84.0
## ninsclasNo insurance 81.4 . 81.4 81.4
## ninsclasPrivate 85.2 . 85.2 85.2
## ninsclasPrivate & Medicare 61.3 . 61.3 61.3
## ARFNo -127.0 . -127.0 -127.0
## ARFYes -127.0 . -127.0 -127.0
## MOSFNo 67.1 . 67.1 67.1
## MOSFYes 67.1 . 67.1 67.1
## otherdiseasecatNo 94.7 . 94.7 94.7
## otherdiseasecatYes 94.7 . 94.7 94.7
## dnr1No 98.0 . 98.0 98.0
## dnr1Yes 98.0 . 98.0 98.0
## caMetastatic 70.5 . 70.5 70.5
## caNo 79.0 . 79.0 79.0
## caYes 84.5 . 84.5 84.5
##
## Sample Sizes:
## Control Treated
## All 3551 2184
## Matched 2171 2171
## Unmatched 1380 13
## Discarded 0 0
matchedrhc <- data.frame(match.data(psMatch))
## Construct a table
tabMatched <- CreateTableOne(vars = vars.limited, strata = "treat",
data = matchedrhc, test = FALSE)
## Show table with SMD
print(tabMatched, smd = TRUE)
## Stratified by treat
## 0 1 SMD
## n 2171 2171
## age (mean (SD)) 60.52 (17.42) 60.78 (15.63) 0.016
## sex = Male (%) 1254 (57.8) 1274 (58.7) 0.019
## race (%) 0.031
## black 358 (16.5) 334 (15.4)
## other 136 ( 6.3) 141 ( 6.5)
## white 1677 (77.2) 1696 (78.1)
## edu (mean (SD)) 11.83 (3.12) 11.86 (3.16) 0.011
## income (%) 0.036
## $11-$25k 445 (20.5) 448 (20.6)
## $25-$50k 371 (17.1) 392 (18.1)
## > $50k 183 ( 8.4) 194 ( 8.9)
## Under $11k 1172 (54.0) 1137 (52.4)
## ninsclas (%) 0.038
## Medicaid 211 ( 9.7) 193 ( 8.9)
## Medicare 521 (24.0) 508 (23.4)
## Medicare & Medicaid 128 ( 5.9) 123 ( 5.7)
## No insurance 132 ( 6.1) 136 ( 6.3)
## Private 703 (32.4) 723 (33.3)
## Private & Medicare 476 (21.9) 488 (22.5)
## ARF = Yes (%) 1052 (48.5) 909 (41.9) 0.133
## MOSF = Yes (%) 719 (33.1) 845 (38.9) 0.121
## otherdiseasecat = Yes (%) 400 (18.4) 417 (19.2) 0.020
## dnr1 = Yes (%) 158 ( 7.3) 155 ( 7.1) 0.005
## ca (%) 0.026
## Metastatic 134 ( 6.2) 123 ( 5.7)
## No 1695 (78.1) 1715 (79.0)
## Yes 342 (15.8) 333 (15.3)
## Count covariates with important imbalance
addmargins(table(ExtractSmd(tabMatched) > 0.1))
##
## FALSE TRUE Sum
## 9 2 11
dataPlot <- data.frame(variable = rownames(ExtractSmd(tabUnmatched)),
Unmatched = ExtractSmd(tabUnmatched),
Matched = ExtractSmd(tabMatched))
# changing variable names -- need to figure out better
# way to code this
names(dataPlot)[which(names(dataPlot) == "X1.vs.2")] <- "Unmatched"
names(dataPlot)[which(names(dataPlot) == "X1.vs.2.1")] <- "Matched"
## Create long-format data for ggplot2
dataPlotMelt <- melt(data = dataPlot,
id.vars = c("variable"),
variable.name = "Method",
value.name = "SMD")
## Order variable names by magnitude of SMD
varNames <- as.character(dataPlot$variable)[order(dataPlot$Unmatched)]
## Order factor levels in the same order
dataPlotMelt$variable <- factor(dataPlotMelt$variable,
levels = varNames)
## Plot using ggplot2
ggplot(data = dataPlotMelt, mapping = aes(x = variable, y = SMD,
group = Method, color = Method)) +
geom_line(size=1) +
geom_point() +
geom_hline(yintercept = 0.1, color = "black", size = 0.1) +
coord_flip() +
theme_bw() + theme(legend.key = element_blank())
