# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# ~ CRP 245 Module 4 Days 3 & 4 ~
# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

# RHC data

# Data Set: rhc


# Data dictionary:
# (1)  sadmdte    study admission date  
# (2)  dschdte    hospital discharge date   
# (3)  lstctdte   date of last contact  
# (4)  cardiohx   if co1-9 in(1,3,4,5) then cardiohx = 1    
# (5)  chfhx        if co1-9 = 2 then chfhx = 1 
# (6)  dementhx   if co1-9 in(6,7,8) then dementhx = 1  
# (7)  psychhx    if co1-9 in(9,10) then psychhx = 1    
# (8)  chrpulhx   if co1-9 in(11,12,13) then chrpulhx = 1   
# (9) renalhx     if co1-9 in(14,15) then renalhx = 1   
# (10) liverhx    if co1-9 in(16,18) then liverhx = 1   
# (11) gibledhx   if co1-9 = 17 then gibledhx = 1   
# (12) malighx    if 19<= co1-9 <=23 then malighx = 1   
# (13) immunhx      if 24<= co1-9 <=29 then immunhx = 1 
# (14) transhx    if co1-9 in(39,40,41) then transhx = 1    
# (15) amihx        if co1-9 = 42 then amihx = 1    
# (16) age        Age   
# (17) sex      
# (18) edu          Years of Education  
# (19) aps1       acute physiology component of APACHE III score    
# (20) scoma1       SUPPORT Coma Score based on Glasgow day 1   
# (21) meanbp1    Mean Blood Pressure Day 1 
# (22) wblc1      white blood cell count Day 1  
# (23) hrt1         heart rate Day 1    
# (24) resp1        respiration rate Day 1  
# (25) temp1        temperature (celcius) Day 1 
# (26) pafi1        PaO2/(.01*FiO2) ratio Day 1 (ratio of partial pressure of 
#                 arterial oxygen to fraction of 
#                 inpsired oxygen)
# (27) alb1         albumin Day 1   
# (28) hema1        hematocrit Day 1    
# (29) bili1        bilirubin Day 1 
# (30) crea1        serum creatinine Day 1  
# (31) sod1       serum sodium Day 1    
# (32) pot1         serum potassium Day 1   
# (33) paco21       PaCO2 (partial pressure of arterial oxygen) Day 1   
# (34) ph1          serum ph (arterial) Day 1   
# (35) swang1         whether RHC was received - updated to "treated" in code
# (36) wtkilo1      weight in kilograms 
# (37) dnr1       existence of do not resuscitate order day 1
# (38) ninsclas     type of medical insurance (private, Medicare, 
#                 Medicaid, private and Medicare, Medicare and Medicaid, or none)
# (39) cat1       Disease categories on admission to ICU (acute respiratory 
#                 failure (ARF), COPD, congestive heart failure (CHF), cirrhosis,
#                 nontraumatic coma, colon cancer metatstatic to the liver, 
#                 non-small cell cancer of the lung, and multiorgan system 
#                 failure (MOSF) with malignancy or sepsis)
# (40) race       White, Black, Asian, other
# (41) income         Under $11k, $11-$25k, $25-$50k, > $50k
# (42) ca         Cancer (none, localized, metastatic)
# (43) ptid       Patient ID    

# packages to bring in for the program to run
library("MatchIt")
## Warning: package 'MatchIt' was built under R version 4.0.5
library("dplyr")
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library("ggplot2")
library("tableone")
## Warning: package 'tableone' was built under R version 4.0.5
library("reshape2")


# Following steps prepare data for analysis
rhc.readin <- read.csv("https://hbiostat.org/data/repo/rhc.csv")
rhc <- data.frame(rhc.readin)
drops <- c("cat2","dthdte", "adld3p", "urin1", "dschdte", "surv2md1", 
           "t3d30", "das2d3pc", "dth30", "resp","card", "neuro", 
           "gastr", "renal", "meta", "hema", "seps", "trauma", "ortho")
rhc<-rhc[ , !(names(rhc) %in% drops)]
rhc$treat <- ifelse(rhc$swang1=="RHC", 1, 0) # MatchIt need 0/1 treat indicator

# aggregate over cat1 categories as in paper to get ARF, MOSF, CHF, other
rhc$ARF <- ifelse(rhc$cat1=="ARF", "Yes", "No")
rhc$MOSF <- ifelse(rhc$cat=="MOSF w/Malignancy" | rhc$cat1=="MOSF w/Sepsis",
                   "Yes", "No")
rhc$otherdiseasecat <- ifelse(rhc$cat1=="CHF"|rhc$cat1=="Cirrhosis"|
                                rhc$cat1=="Colon Cancer"|rhc$cat1=="Coma"
                              |rhc$cat1=="COPD"|rhc$cat1=="Lung Cancer","Yes", "No")


vars.full <- c("age","sex","race","edu","income","ninsclas","ARF", "MOSF", 
               "otherdiseasecat","dnr1","ca", "aps1","scoma1","wtkilo1",
               "temp1","meanbp1","resp1","hrt1","pafi1","paco21","ph1",
               "wblc1","hema1","sod1","pot1","crea1","bili1","alb1",
               "cardiohx","chfhx","dementhx","psychhx","chrpulhx","renalhx",
               "liverhx","gibledhx","malighx","immunhx",
               "transhx","amihx")

vars.limited <- c("age","sex","race","edu","income","ninsclas","ARF", "MOSF", 
                  "otherdiseasecat","dnr1","ca")

## Construct a table
tabUnmatched <- CreateTableOne(vars = vars.limited, strata = "treat",
                               data = rhc, test = FALSE)
## Show table with SMD
print(tabUnmatched, smd = TRUE)
##                            Stratified by treat
##                             0             1             SMD   
##   n                          3551          2184               
##   age (mean (SD))           61.76 (17.29) 60.75 (15.63)  0.061
##   sex = Male (%)             1914 (53.9)   1278 (58.5)   0.093
##   race (%)                                               0.036
##      black                    585 (16.5)    335 (15.3)        
##      other                    213 ( 6.0)    142 ( 6.5)        
##      white                   2753 (77.5)   1707 (78.2)        
##   edu (mean (SD))           11.57 (3.13)  11.86 (3.16)   0.091
##   income (%)                                             0.142
##      $11-$25k                 713 (20.1)    452 (20.7)        
##      $25-$50k                 500 (14.1)    393 (18.0)        
##      > $50k                   257 ( 7.2)    194 ( 8.9)        
##      Under $11k              2081 (58.6)   1145 (52.4)        
##   ninsclas (%)                                           0.194
##      Medicaid                 454 (12.8)    193 ( 8.8)        
##      Medicare                 947 (26.7)    511 (23.4)        
##      Medicare & Medicaid      251 ( 7.1)    123 ( 5.6)        
##      No insurance             186 ( 5.2)    136 ( 6.2)        
##      Private                  967 (27.2)    731 (33.5)        
##      Private & Medicare       746 (21.0)    490 (22.4)        
##   ARF = Yes (%)              1581 (44.5)    909 (41.6)   0.059
##   MOSF = Yes (%)              768 (21.6)    858 (39.3)   0.391
##   otherdiseasecat = Yes (%)  1202 (33.8)    417 (19.1)   0.339
##   dnr1 = Yes (%)              499 (14.1)    155 ( 7.1)   0.228
##   ca (%)                                                 0.107
##      Metastatic               261 ( 7.4)    123 ( 5.6)        
##      No                      2652 (74.7)   1727 (79.1)        
##      Yes                      638 (18.0)    334 (15.3)
## Count covariates with important imbalance
addmargins(table(ExtractSmd(tabUnmatched) > 0.1))
## 
## FALSE  TRUE   Sum 
##     5     6    11
psModel <- glm(formula = treat ~ age + sex + race + edu + income + ninsclas +
                 ARF + MOSF +
                 dnr1 + ca, family  = binomial(link = "logit"), data    = rhc)

summary(psModel)
## 
## Call:
## glm(formula = treat ~ age + sex + race + edu + income + ninsclas + 
##     ARF + MOSF + dnr1 + ca, family = binomial(link = "logit"), 
##     data = rhc)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.5061  -0.9695  -0.7420   1.2178   2.1949  
## 
## Coefficients:
##                              Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                 -2.282018   0.250429  -9.112  < 2e-16 ***
## age                          0.002991   0.002260   1.323 0.185822    
## sexMale                      0.203890   0.057692   3.534 0.000409 ***
## raceother                    0.064945   0.134373   0.483 0.628866    
## racewhite                    0.041429   0.081652   0.507 0.611881    
## edu                          0.014487   0.010331   1.402 0.160852    
## income$25-$50k               0.119667   0.097119   1.232 0.217885    
## income> $50k                 0.069558   0.122353   0.568 0.569696    
## incomeUnder $11k            -0.002933   0.076971  -0.038 0.969601    
## ninsclasMedicare             0.215567   0.119126   1.810 0.070364 .  
## ninsclasMedicare & Medicaid  0.207803   0.149392   1.391 0.164227    
## ninsclasNo insurance         0.453603   0.146539   3.095 0.001965 ** 
## ninsclasPrivate              0.451435   0.109796   4.112 3.93e-05 ***
## ninsclasPrivate & Medicare   0.395203   0.122462   3.227 0.001250 ** 
## ARFYes                       0.520215   0.071837   7.242 4.43e-13 ***
## MOSFYes                      1.265800   0.078858  16.052  < 2e-16 ***
## dnr1Yes                     -0.691611   0.101430  -6.819 9.19e-12 ***
## caNo                         0.525722   0.120607   4.359 1.31e-05 ***
## caYes                        0.064958   0.134464   0.483 0.629033    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 7621.4  on 5734  degrees of freedom
## Residual deviance: 7199.5  on 5716  degrees of freedom
## AIC: 7237.5
## 
## Number of Fisher Scoring iterations: 4
## Predicted probability of being assigned to RHC
rhc$pRhc <- predict(psModel, type = "response")
## Predicted probability of being assigned to no RHC
rhc$pNoRhc <- 1 - rhc$pRhc
min(rhc$pRhc[rhc$treat==0])
## [1] 0.06650796
min(rhc$pRhc[rhc$treat==1])
## [1] 0.08992939
max(rhc$pRhc[rhc$treat==0])
## [1] 0.6782902
max(rhc$pRhc[rhc$treat==1])
## [1] 0.6869823
# get predicted propensity scores for each person in dataset
pred_pscore <- data.frame(rhc$pRhc,psModel$model$treat)
head(pred_pscore)
##    rhc.pRhc psModel.model.treat
## 1 0.2017841                   0
## 2 0.5868207                   1
## 3 0.5007341                   1
## 4 0.3908386                   0
## 5 0.4068518                   1
## 6 0.2443598                   0
# box plot to assess overlap of propensity score distributions between groups
plot.new()
boxplot(rhc$pRhc~psModel$model$treat, data=pred_pscore, ylim=c(0,1),
        names=c("No RHC", "RHC"), ylab="Estimated propensity score",
        axis(side=1, at=c(.3, .8), labels=c("No RHC", "RHC")))

# histogram of p score distributions
labs <- paste("RHC status:", c("Treated", "Not Treated"))
pred_pscore %>%
  mutate(treated = ifelse(rhc$treat == 1, labs[1], labs[2])) %>%
  ggplot(aes(x = rhc$pRhc)) +
  geom_histogram(color = "white", binwidth=.05) +
  facet_wrap(~treated) +
  xlab("Probability of RHC") +
  theme_bw()

# match with caliper of 0.1
psMatch <- matchit(treat ~ age + sex + race + edu + income + ninsclas + ARF +
                     MOSF + otherdiseasecat +dnr1 + ca, distance="logit", 
                   method="nearest", caliper=.1, data=rhc)
summary(psMatch)
## 
## Call:
## matchit(formula = treat ~ age + sex + race + edu + income + ninsclas + 
##     ARF + MOSF + otherdiseasecat + dnr1 + ca, data = rhc, method = "nearest", 
##     distance = "logit", caliper = 0.1)
## 
## Summary of Balance for All Data:
##                             Means Treated Means Control Std. Mean Diff.
## distance                           0.4251        0.3536          0.5597
## age                               60.7498       61.7609         -0.0647
## sexFemale                          0.4148        0.4610         -0.0937
## sexMale                            0.5852        0.5390          0.0937
## raceblack                          0.1534        0.1647         -0.0315
## raceother                          0.0650        0.0600          0.0204
## racewhite                          0.7816        0.7753          0.0153
## edu                               11.8564       11.5690          0.0910
## income$11-$25k                     0.2070        0.2008          0.0152
## income$25-$50k                     0.1799        0.1408          0.1019
## income> $50k                       0.0888        0.0724          0.0578
## incomeUnder $11k                   0.5243        0.5860         -0.1237
## ninsclasMedicaid                   0.0884        0.1279         -0.1391
## ninsclasMedicare                   0.2340        0.2667         -0.0773
## ninsclasMedicare & Medicaid        0.0563        0.0707         -0.0623
## ninsclasNo insurance               0.0623        0.0524          0.0409
## ninsclasPrivate                    0.3347        0.2723          0.1322
## ninsclasPrivate & Medicare         0.2244        0.2101          0.0342
## ARFNo                              0.5838        0.5548          0.0589
## ARFYes                             0.4162        0.4452         -0.0589
## MOSFNo                             0.6071        0.7837         -0.3616
## MOSFYes                            0.3929        0.2163          0.3616
## otherdiseasecatNo                  0.8091        0.6615          0.3754
## otherdiseasecatYes                 0.1909        0.3385         -0.3754
## dnr1No                             0.9290        0.8595          0.2709
## dnr1Yes                            0.0710        0.1405         -0.2709
## caMetastatic                       0.0563        0.0735         -0.0745
## caNo                               0.7908        0.7468          0.1080
## caYes                              0.1529        0.1797         -0.0743
##                             Var. Ratio eCDF Mean eCDF Max
## distance                        1.0656    0.1563   0.2259
## age                             0.8175    0.0285   0.0703
## sexFemale                            .    0.0462   0.0462
## sexMale                              .    0.0462   0.0462
## raceblack                            .    0.0114   0.0114
## raceother                            .    0.0050   0.0050
## racewhite                            .    0.0063   0.0063
## edu                             1.0147    0.0181   0.0511
## income$11-$25k                       .    0.0062   0.0062
## income$25-$50k                       .    0.0391   0.0391
## income> $50k                         .    0.0165   0.0165
## incomeUnder $11k                     .    0.0618   0.0618
## ninsclasMedicaid                     .    0.0395   0.0395
## ninsclasMedicare                     .    0.0327   0.0327
## ninsclasMedicare & Medicaid          .    0.0144   0.0144
## ninsclasNo insurance                 .    0.0099   0.0099
## ninsclasPrivate                      .    0.0624   0.0624
## ninsclasPrivate & Medicare           .    0.0143   0.0143
## ARFNo                                .    0.0290   0.0290
## ARFYes                               .    0.0290   0.0290
## MOSFNo                               .    0.1766   0.1766
## MOSFYes                              .    0.1766   0.1766
## otherdiseasecatNo                    .    0.1476   0.1476
## otherdiseasecatYes                   .    0.1476   0.1476
## dnr1No                               .    0.0696   0.0696
## dnr1Yes                              .    0.0696   0.0696
## caMetastatic                         .    0.0172   0.0172
## caNo                                 .    0.0439   0.0439
## caYes                                .    0.0267   0.0267
## 
## 
## Summary of Balance for Matched Data:
##                             Means Treated Means Control Std. Mean Diff.
## distance                           0.4124        0.4091          0.0254
## age                               60.7513       60.7563         -0.0003
## sexFemale                          0.4215        0.4269         -0.0110
## sexMale                            0.5785        0.5731          0.0110
## raceblack                          0.1531        0.1624         -0.0259
## raceother                          0.0662        0.0623          0.0159
## racewhite                          0.7807        0.7753          0.0131
## edu                               11.8504       11.8327          0.0056
## income$11-$25k                     0.2041        0.2026          0.0036
## income$25-$50k                     0.1771        0.1688          0.0217
## income> $50k                       0.0868        0.0829          0.0138
## incomeUnder $11k                   0.5319        0.5456         -0.0275
## ninsclasMedicaid                   0.0947        0.1016         -0.0242
## ninsclasMedicare                   0.2360        0.2439         -0.0185
## ninsclasMedicare & Medicaid        0.0584        0.0613         -0.0128
## ninsclasNo insurance               0.0618        0.0559          0.0244
## ninsclasPrivate                    0.3283        0.3194          0.0187
## ninsclasPrivate & Medicare         0.2208        0.2179          0.0071
## ARFNo                              0.5540        0.5388          0.0309
## ARFYes                             0.4460        0.4612         -0.0309
## MOSFNo                             0.6506        0.6575         -0.0141
## MOSFYes                            0.3494        0.3425          0.0141
## otherdiseasecatNo                  0.7954        0.8037         -0.0212
## otherdiseasecatYes                 0.2046        0.1963          0.0212
## dnr1No                             0.9239        0.9235          0.0019
## dnr1Yes                            0.0761        0.0765         -0.0019
## caMetastatic                       0.0599        0.0628         -0.0128
## caNo                               0.7812        0.7738          0.0181
## caYes                              0.1590        0.1634         -0.0123
##                             Var. Ratio eCDF Mean eCDF Max Std. Pair Dist.
## distance                        1.0663    0.0044   0.0236          0.0256
## age                             0.8228    0.0219   0.0456          1.1824
## sexFemale                            .    0.0054   0.0054          0.8973
## sexMale                              .    0.0054   0.0054          0.8973
## raceblack                            .    0.0093   0.0093          0.7394
## raceother                            .    0.0039   0.0039          0.4776
## racewhite                            .    0.0054   0.0054          0.8563
## edu                             1.0217    0.0063   0.0191          0.9990
## income$11-$25k                       .    0.0015   0.0015          0.8079
## income$25-$50k                       .    0.0083   0.0083          0.6451
## income> $50k                         .    0.0039   0.0039          0.5140
## incomeUnder $11k                     .    0.0137   0.0137          0.8921
## ninsclasMedicaid                     .    0.0069   0.0069          0.5359
## ninsclasMedicare                     .    0.0079   0.0079          0.7696
## ninsclasMedicare & Medicaid          .    0.0029   0.0029          0.4938
## ninsclasNo insurance                 .    0.0059   0.0059          0.4345
## ninsclasPrivate                      .    0.0088   0.0088          0.8630
## ninsclasPrivate & Medicare           .    0.0029   0.0029          0.7998
## ARFNo                                .    0.0152   0.0152          0.6102
## ARFYes                               .    0.0152   0.0152          0.6102
## MOSFNo                               .    0.0069   0.0069          0.3315
## MOSFYes                              .    0.0069   0.0069          0.3315
## otherdiseasecatNo                    .    0.0083   0.0083          0.4557
## otherdiseasecatYes                   .    0.0083   0.0083          0.4557
## dnr1No                               .    0.0005   0.0005          0.4261
## dnr1Yes                              .    0.0005   0.0005          0.4261
## caMetastatic                         .    0.0029   0.0029          0.4768
## caNo                                 .    0.0074   0.0074          0.7322
## caYes                                .    0.0044   0.0044          0.6612
## 
## Percent Balance Improvement:
##                             Std. Mean Diff. Var. Ratio eCDF Mean eCDF Max
## distance                               95.5       -1.1      97.2     89.6
## age                                    99.5        3.2      23.0     35.1
## sexFemale                              88.3          .      88.3     88.3
## sexMale                                88.3          .      88.3     88.3
## raceblack                              17.9          .      17.9     17.9
## raceother                              22.0          .      22.0     22.0
## racewhite                              14.6          .      14.6     14.6
## edu                                    93.9      -47.1      65.1     62.5
## income$11-$25k                         76.1          .      76.1     76.1
## income$25-$50k                         78.7          .      78.7     78.7
## income> $50k                           76.1          .      76.1     76.1
## incomeUnder $11k                       77.8          .      77.8     77.8
## ninsclasMedicaid                       82.6          .      82.6     82.6
## ninsclasMedicare                       76.0          .      76.0     76.0
## ninsclasMedicare & Medicaid            79.5          .      79.5     79.5
## ninsclasNo insurance                   40.5          .      40.5     40.5
## ninsclasPrivate                        85.8          .      85.8     85.8
## ninsclasPrivate & Medicare             79.4          .      79.4     79.4
## ARFNo                                  47.6          .      47.6     47.6
## ARFYes                                 47.6          .      47.6     47.6
## MOSFNo                                 96.1          .      96.1     96.1
## MOSFYes                                96.1          .      96.1     96.1
## otherdiseasecatNo                      94.3          .      94.3     94.3
## otherdiseasecatYes                     94.3          .      94.3     94.3
## dnr1No                                 99.3          .      99.3     99.3
## dnr1Yes                                99.3          .      99.3     99.3
## caMetastatic                           82.9          .      82.9     82.9
## caNo                                   83.2          .      83.2     83.2
## caYes                                  83.5          .      83.5     83.5
## 
## Sample Sizes:
##           Control Treated
## All          3551    2184
## Matched      2038    2038
## Unmatched    1513     146
## Discarded       0       0
matchedrhc <- data.frame(match.data(psMatch))

## Construct a table
tabMatched <- CreateTableOne(vars = vars.limited, strata = "treat", 
                             data = matchedrhc, test = FALSE)
## Show table with SMD
print(tabMatched, smd = TRUE)
##                            Stratified by treat
##                             0             1             SMD   
##   n                          2038          2038               
##   age (mean (SD))           60.76 (17.27) 60.75 (15.67) <0.001
##   sex = Male (%)             1168 (57.3)   1179 (57.9)   0.011
##   race (%)                                               0.029
##      black                    331 (16.2)    312 (15.3)        
##      other                    127 ( 6.2)    135 ( 6.6)        
##      white                   1580 (77.5)   1591 (78.1)        
##   edu (mean (SD))           11.83 (3.11)  11.85 (3.14)   0.006
##   income (%)                                             0.031
##      $11-$25k                 413 (20.3)    416 (20.4)        
##      $25-$50k                 344 (16.9)    361 (17.7)        
##      > $50k                   169 ( 8.3)    177 ( 8.7)        
##      Under $11k              1112 (54.6)   1084 (53.2)        
##   ninsclas (%)                                           0.042
##      Medicaid                 207 (10.2)    193 ( 9.5)        
##      Medicare                 497 (24.4)    481 (23.6)        
##      Medicare & Medicaid      125 ( 6.1)    119 ( 5.8)        
##      No insurance             114 ( 5.6)    126 ( 6.2)        
##      Private                  651 (31.9)    669 (32.8)        
##      Private & Medicare       444 (21.8)    450 (22.1)        
##   ARF = Yes (%)               940 (46.1)    909 (44.6)   0.031
##   MOSF = Yes (%)              698 (34.2)    712 (34.9)   0.014
##   otherdiseasecat = Yes (%)   400 (19.6)    417 (20.5)   0.021
##   dnr1 = Yes (%)              156 ( 7.7)    155 ( 7.6)   0.002
##   ca (%)                                                 0.018
##      Metastatic               128 ( 6.3)    122 ( 6.0)        
##      No                      1577 (77.4)   1592 (78.1)        
##      Yes                      333 (16.3)    324 (15.9)
dataPlot <- data.frame(variable  = rownames(ExtractSmd(tabUnmatched)),
                       unMatched = ExtractSmd(tabUnmatched),
                       Matched   = ExtractSmd(tabMatched))
# changing variable names -- need to figure out better
# way to code this
names(dataPlot)[which(names(dataPlot) == "X1.vs.2")] <- "Unmatched"
names(dataPlot)[which(names(dataPlot) == "X1.vs.2.1")] <- "Matched"


## Create long-format data for ggplot2
dataPlotMelt <- melt(data          = dataPlot,
                     id.vars       = c("variable"),
                     variable.name = "Method",
                     value.name    = "SMD")
# change factor levels

## Order variable names by magnitude of SMD
varNames <- as.character(dataPlot$variable)[order(dataPlot$Unmatched)]

## Order factor levels in the same order
dataPlotMelt$variable <- factor(dataPlotMelt$variable,
                                levels = varNames)
## Plot using ggplot2
ggplot(data = dataPlotMelt, mapping = aes(x = variable, y = SMD,
                                          group = Method, color = Method)) +
  geom_line(size=1) +
  geom_point() +
  geom_hline(yintercept = 0.1, color = "black", size = 0.1) +
  coord_flip() +
  theme_bw() + theme(legend.key = element_blank())

# match with caliper of 0.4
psMatch <- matchit(treat ~ age + sex + race + edu + income + ninsclas + ARF + 
                     MOSF + otherdiseasecat +dnr1 + ca, distance="logit", 
                   method="nearest", caliper=.4, data=rhc)
summary(psMatch)
## 
## Call:
## matchit(formula = treat ~ age + sex + race + edu + income + ninsclas + 
##     ARF + MOSF + otherdiseasecat + dnr1 + ca, data = rhc, method = "nearest", 
##     distance = "logit", caliper = 0.4)
## 
## Summary of Balance for All Data:
##                             Means Treated Means Control Std. Mean Diff.
## distance                           0.4251        0.3536          0.5597
## age                               60.7498       61.7609         -0.0647
## sexFemale                          0.4148        0.4610         -0.0937
## sexMale                            0.5852        0.5390          0.0937
## raceblack                          0.1534        0.1647         -0.0315
## raceother                          0.0650        0.0600          0.0204
## racewhite                          0.7816        0.7753          0.0153
## edu                               11.8564       11.5690          0.0910
## income$11-$25k                     0.2070        0.2008          0.0152
## income$25-$50k                     0.1799        0.1408          0.1019
## income> $50k                       0.0888        0.0724          0.0578
## incomeUnder $11k                   0.5243        0.5860         -0.1237
## ninsclasMedicaid                   0.0884        0.1279         -0.1391
## ninsclasMedicare                   0.2340        0.2667         -0.0773
## ninsclasMedicare & Medicaid        0.0563        0.0707         -0.0623
## ninsclasNo insurance               0.0623        0.0524          0.0409
## ninsclasPrivate                    0.3347        0.2723          0.1322
## ninsclasPrivate & Medicare         0.2244        0.2101          0.0342
## ARFNo                              0.5838        0.5548          0.0589
## ARFYes                             0.4162        0.4452         -0.0589
## MOSFNo                             0.6071        0.7837         -0.3616
## MOSFYes                            0.3929        0.2163          0.3616
## otherdiseasecatNo                  0.8091        0.6615          0.3754
## otherdiseasecatYes                 0.1909        0.3385         -0.3754
## dnr1No                             0.9290        0.8595          0.2709
## dnr1Yes                            0.0710        0.1405         -0.2709
## caMetastatic                       0.0563        0.0735         -0.0745
## caNo                               0.7908        0.7468          0.1080
## caYes                              0.1529        0.1797         -0.0743
##                             Var. Ratio eCDF Mean eCDF Max
## distance                        1.0656    0.1563   0.2259
## age                             0.8175    0.0285   0.0703
## sexFemale                            .    0.0462   0.0462
## sexMale                              .    0.0462   0.0462
## raceblack                            .    0.0114   0.0114
## raceother                            .    0.0050   0.0050
## racewhite                            .    0.0063   0.0063
## edu                             1.0147    0.0181   0.0511
## income$11-$25k                       .    0.0062   0.0062
## income$25-$50k                       .    0.0391   0.0391
## income> $50k                         .    0.0165   0.0165
## incomeUnder $11k                     .    0.0618   0.0618
## ninsclasMedicaid                     .    0.0395   0.0395
## ninsclasMedicare                     .    0.0327   0.0327
## ninsclasMedicare & Medicaid          .    0.0144   0.0144
## ninsclasNo insurance                 .    0.0099   0.0099
## ninsclasPrivate                      .    0.0624   0.0624
## ninsclasPrivate & Medicare           .    0.0143   0.0143
## ARFNo                                .    0.0290   0.0290
## ARFYes                               .    0.0290   0.0290
## MOSFNo                               .    0.1766   0.1766
## MOSFYes                              .    0.1766   0.1766
## otherdiseasecatNo                    .    0.1476   0.1476
## otherdiseasecatYes                   .    0.1476   0.1476
## dnr1No                               .    0.0696   0.0696
## dnr1Yes                              .    0.0696   0.0696
## caMetastatic                         .    0.0172   0.0172
## caNo                                 .    0.0439   0.0439
## caYes                                .    0.0267   0.0267
## 
## 
## Summary of Balance for Matched Data:
##                             Means Treated Means Control Std. Mean Diff.
## distance                           0.4242        0.4106          0.1064
## age                               60.7831       60.5225          0.0167
## sexFemale                          0.4132        0.4224         -0.0187
## sexMale                            0.5868        0.5776          0.0187
## raceblack                          0.1538        0.1649         -0.0307
## raceother                          0.0649        0.0626          0.0093
## racewhite                          0.7812        0.7725          0.0212
## edu                               11.8615       11.8260          0.0113
## income$11-$25k                     0.2064        0.2050          0.0034
## income$25-$50k                     0.1806        0.1709          0.0252
## income> $50k                       0.0894        0.0843          0.0178
## incomeUnder $11k                   0.5237        0.5398         -0.0323
## ninsclasMedicaid                   0.0889        0.0972         -0.0292
## ninsclasMedicare                   0.2340        0.2400         -0.0141
## ninsclasMedicare & Medicaid        0.0567        0.0590         -0.0100
## ninsclasNo insurance               0.0626        0.0608          0.0076
## ninsclasPrivate                    0.3330        0.3238          0.0195
## ninsclasPrivate & Medicare         0.2248        0.2193          0.0133
## ARFNo                              0.5813        0.5154          0.1336
## ARFYes                             0.4187        0.4846         -0.1336
## MOSFNo                             0.6108        0.6688         -0.1188
## MOSFYes                            0.3892        0.3312          0.1188
## otherdiseasecatNo                  0.8079        0.8158         -0.0199
## otherdiseasecatYes                 0.1921        0.1842          0.0199
## dnr1No                             0.9286        0.9272          0.0054
## dnr1Yes                            0.0714        0.0728         -0.0054
## caMetastatic                       0.0567        0.0617         -0.0220
## caNo                               0.7900        0.7807          0.0226
## caYes                              0.1534        0.1575         -0.0115
##                             Var. Ratio eCDF Mean eCDF Max Std. Pair Dist.
## distance                        1.2300    0.0207   0.0829          0.1066
## age                             0.8048    0.0237   0.0461          1.1818
## sexFemale                            .    0.0092   0.0092          0.8919
## sexMale                              .    0.0092   0.0092          0.8919
## raceblack                            .    0.0111   0.0111          0.7158
## raceother                            .    0.0023   0.0023          0.4689
## racewhite                            .    0.0088   0.0088          0.8261
## edu                             1.0232    0.0073   0.0235          0.9961
## income$11-$25k                       .    0.0014   0.0014          0.8129
## income$25-$50k                       .    0.0097   0.0097          0.7206
## income> $50k                         .    0.0051   0.0051          0.5456
## incomeUnder $11k                     .    0.0161   0.0161          0.9583
## ninsclasMedicaid                     .    0.0083   0.0083          0.5226
## ninsclasMedicare                     .    0.0060   0.0060          0.7975
## ninsclasMedicare & Medicaid          .    0.0023   0.0023          0.4815
## ninsclasNo insurance                 .    0.0018   0.0018          0.4613
## ninsclasPrivate                      .    0.0092   0.0092          0.8785
## ninsclasPrivate & Medicare           .    0.0055   0.0055          0.8171
## ARFNo                                .    0.0659   0.0659          0.6251
## ARFYes                               .    0.0659   0.0659          0.6251
## MOSFNo                               .    0.0580   0.0580          0.3641
## MOSFYes                              .    0.0580   0.0580          0.3641
## otherdiseasecatNo                    .    0.0078   0.0078          0.4278
## otherdiseasecatYes                   .    0.0078   0.0078          0.4278
## dnr1No                               .    0.0014   0.0014          0.4036
## dnr1Yes                              .    0.0014   0.0014          0.4036
## caMetastatic                         .    0.0051   0.0051          0.4695
## caNo                                 .    0.0092   0.0092          0.7677
## caYes                                .    0.0041   0.0041          0.6668
## 
## Percent Balance Improvement:
##                             Std. Mean Diff. Var. Ratio eCDF Mean eCDF Max
## distance                               81.0     -226.0      86.7     63.3
## age                                    74.2       -7.7      16.6     34.5
## sexFemale                              80.0          .      80.0     80.0
## sexMale                                80.0          .      80.0     80.0
## raceblack                               2.6          .       2.6      2.6
## raceother                              54.3          .      54.3     54.3
## racewhite                             -38.5          .     -38.5    -38.5
## edu                                    87.6      -56.7      59.4     54.0
## income$11-$25k                         77.6          .      77.6     77.6
## income$25-$50k                         75.3          .      75.3     75.3
## income> $50k                           69.2          .      69.2     69.2
## incomeUnder $11k                       73.9          .      73.9     73.9
## ninsclasMedicaid                       79.0          .      79.0     79.0
## ninsclasMedicare                       81.7          .      81.7     81.7
## ninsclasMedicare & Medicaid            84.0          .      84.0     84.0
## ninsclasNo insurance                   81.4          .      81.4     81.4
## ninsclasPrivate                        85.2          .      85.2     85.2
## ninsclasPrivate & Medicare             61.3          .      61.3     61.3
## ARFNo                                -127.0          .    -127.0   -127.0
## ARFYes                               -127.0          .    -127.0   -127.0
## MOSFNo                                 67.1          .      67.1     67.1
## MOSFYes                                67.1          .      67.1     67.1
## otherdiseasecatNo                      94.7          .      94.7     94.7
## otherdiseasecatYes                     94.7          .      94.7     94.7
## dnr1No                                 98.0          .      98.0     98.0
## dnr1Yes                                98.0          .      98.0     98.0
## caMetastatic                           70.5          .      70.5     70.5
## caNo                                   79.0          .      79.0     79.0
## caYes                                  84.5          .      84.5     84.5
## 
## Sample Sizes:
##           Control Treated
## All          3551    2184
## Matched      2171    2171
## Unmatched    1380      13
## Discarded       0       0
matchedrhc <- data.frame(match.data(psMatch))

## Construct a table
tabMatched <- CreateTableOne(vars = vars.limited, strata = "treat", 
                             data = matchedrhc, test = FALSE)
## Show table with SMD
print(tabMatched, smd = TRUE)
##                            Stratified by treat
##                             0             1             SMD   
##   n                          2171          2171               
##   age (mean (SD))           60.52 (17.42) 60.78 (15.63)  0.016
##   sex = Male (%)             1254 (57.8)   1274 (58.7)   0.019
##   race (%)                                               0.031
##      black                    358 (16.5)    334 (15.4)        
##      other                    136 ( 6.3)    141 ( 6.5)        
##      white                   1677 (77.2)   1696 (78.1)        
##   edu (mean (SD))           11.83 (3.12)  11.86 (3.16)   0.011
##   income (%)                                             0.036
##      $11-$25k                 445 (20.5)    448 (20.6)        
##      $25-$50k                 371 (17.1)    392 (18.1)        
##      > $50k                   183 ( 8.4)    194 ( 8.9)        
##      Under $11k              1172 (54.0)   1137 (52.4)        
##   ninsclas (%)                                           0.038
##      Medicaid                 211 ( 9.7)    193 ( 8.9)        
##      Medicare                 521 (24.0)    508 (23.4)        
##      Medicare & Medicaid      128 ( 5.9)    123 ( 5.7)        
##      No insurance             132 ( 6.1)    136 ( 6.3)        
##      Private                  703 (32.4)    723 (33.3)        
##      Private & Medicare       476 (21.9)    488 (22.5)        
##   ARF = Yes (%)              1052 (48.5)    909 (41.9)   0.133
##   MOSF = Yes (%)              719 (33.1)    845 (38.9)   0.121
##   otherdiseasecat = Yes (%)   400 (18.4)    417 (19.2)   0.020
##   dnr1 = Yes (%)              158 ( 7.3)    155 ( 7.1)   0.005
##   ca (%)                                                 0.026
##      Metastatic               134 ( 6.2)    123 ( 5.7)        
##      No                      1695 (78.1)   1715 (79.0)        
##      Yes                      342 (15.8)    333 (15.3)
## Count covariates with important imbalance
addmargins(table(ExtractSmd(tabMatched) > 0.1))
## 
## FALSE  TRUE   Sum 
##     9     2    11
dataPlot <- data.frame(variable  = rownames(ExtractSmd(tabUnmatched)),
                       Unmatched = ExtractSmd(tabUnmatched),
                       Matched   = ExtractSmd(tabMatched))

# changing variable names -- need to figure out better
# way to code this
names(dataPlot)[which(names(dataPlot) == "X1.vs.2")] <- "Unmatched"
names(dataPlot)[which(names(dataPlot) == "X1.vs.2.1")] <- "Matched"




## Create long-format data for ggplot2
dataPlotMelt <- melt(data          = dataPlot,
                     id.vars       = c("variable"),
                     variable.name = "Method",
                     value.name    = "SMD")

## Order variable names by magnitude of SMD
varNames <- as.character(dataPlot$variable)[order(dataPlot$Unmatched)]

## Order factor levels in the same order
dataPlotMelt$variable <- factor(dataPlotMelt$variable,
                                levels = varNames)
## Plot using ggplot2
ggplot(data = dataPlotMelt, mapping = aes(x = variable, y = SMD,
                                          group = Method, color = Method)) +
  geom_line(size=1) +
  geom_point() +
  geom_hline(yintercept = 0.1, color = "black", size = 0.1) +
  coord_flip() +
  theme_bw() + theme(legend.key = element_blank())