Propensity score using right heart cath dataset

References

Load packages

## plotting
library(ggplot2)
## ROC
library(pROC)
## Load nonrandom for PS matching
library(nonrandom)
## Load Matching for PS matching
library(Matching)
## Load sandwich package for robust sandwich covariance matrix estimators
library(sandwich)
## Load lmtest package for coeftest
library(lmtest)
## Load geepack for robust sandwich covariance matrix estimators
library(geepack)

Prepare data

## Right heart cath dataset
rhc <- read.csv("http://biostat.mc.vanderbilt.edu/wiki/pub/Main/DataSets/rhc.csv")
## first 6 rows
head(rhc)
  X              cat1          cat2  ca sadmdte dschdte dthdte lstctdte death cardiohx chfhx dementhx psychhx
1 1              COPD          <NA> Yes   11142   11151     NA    11382    No        0     0        0       0
2 2     MOSF w/Sepsis          <NA>  No   11799   11844  11844    11844   Yes        1     1        0       0
3 3 MOSF w/Malignancy MOSF w/Sepsis Yes   12083   12143     NA    12400    No        0     0        0       0
4 4               ARF          <NA>  No   11146   11183  11183    11182   Yes        0     0        0       0
5 5     MOSF w/Sepsis          <NA>  No   12035   12037  12037    12036   Yes        0     0        0       0
6 6              COPD          <NA>  No   12389   12396     NA    12590    No        0     1        0       0
  chrpulhx renalhx liverhx gibledhx malighx immunhx transhx amihx   age    sex    edu surv2md1 das2d3pc t3d30 dth30
1        1       0       0        0       1       0       0     0 70.25   Male 12.000    0.641    23.50    30    No
2        0       0       0        0       0       1       1     0 78.18 Female 12.000    0.755    14.75    30    No
3        0       0       0        0       1       1       0     0 46.09 Female 14.070    0.317    18.14    30    No
4        0       0       0        0       0       1       0     0 75.33 Female  9.000    0.441    22.93    30    No
5        0       0       0        0       0       0       0     0 67.91   Male  9.945    0.437    21.05     2   Yes
6        1       0       0        0       0       0       0     0 86.08 Female  8.000    0.665    17.50    30    No
  aps1 scoma1 meanbp1 wblc1 hrt1 resp1 temp1 pafi1 alb1 hema1 bili1 crea1 sod1  pot1 paco21   ph1 swang1 wtkilo1
1   46      0      41 22.10  124    10  38.7  68.0  3.5  58.0  1.01   1.2  145 4.000     40 7.359 No RHC    64.7
2   50      0      63 28.90  137    38  38.9 218.3  2.6  32.5  0.70   0.6  137 3.300     34 7.329    RHC    45.7
3   82      0      57  0.05  130    40  36.4 275.5  3.5  21.1  1.01   2.6  146 2.900     16 7.359    RHC     0.0
4   48      0      55 23.30   58    26  35.8 156.7  3.5  26.3  0.40   1.7  117 5.800     30 7.460 No RHC    54.6
5   72     41      65 29.70  125    27  34.8 478.0  3.5  24.0  1.01   3.6  126 5.800     17 7.229    RHC    78.4
6   38      0     115 18.00  134    36  39.2 184.2  3.1  30.5  1.01   1.4  138 5.399     68 7.300 No RHC    54.9
  dnr1           ninsclas resp card neuro gastr renal meta hema seps trauma ortho adld3p urin1  race     income
1   No           Medicare  Yes  Yes    No    No    No   No   No   No     No    No      0    NA white Under $11k
2   No Private & Medicare   No   No    No    No    No   No   No  Yes     No    No     NA  1437 white Under $11k
3   No            Private   No  Yes    No    No    No   No   No   No     No    No     NA   599 white   $25-$50k
4   No Private & Medicare  Yes   No    No    No    No   No   No   No     No    No     NA    NA white   $11-$25k
5  Yes           Medicare   No  Yes    No    No    No   No   No   No     No    No     NA    64 white Under $11k
6   No           Medicare  Yes   No    No    No    No   No   No   No     No    No      0   242 white Under $11k
  ptid
1    5
2    7
3    9
4   10
5   11
6   12
## Show outcome (death) and exposure (swang1)
addmargins(table(rhc[,c("swang1", "death")]))
        death
swang1     No  Yes  Sum
  No RHC 1315 2236 3551
  RHC     698 1486 2184
  Sum    2013 3722 5735
## Create weights for plotting
rhc$one <- 1

Define a function to obtain ORs

## Define function to obtain ORs with 95% confidence intervals from a result matrix
GetConfInt <- function(obj) {

    logitsticModel <- FALSE

    if (identical(class(obj), c("glm", "lm")) == TRUE) {

        mat <- coef(summary(obj))
        logitsticModel <- TRUE

    } else if (identical(class(obj), c("geeglm", "gee", "glm")) == TRUE) {

        mat <- coef(summary(obj))

    } else if (identical(class(obj), c("coeftest")) == TRUE) {

        mat <- obj

    } else if (identical(class(obj), c("matrix")) == TRUE) {

        mat <- obj

    } else {

        stop("Not a supported object")
    }

    ## Add point estimates
    matRes <- mat[, 1, drop = F]

    ## 1.96 * SE
    matSe <-  mat[, 2, drop = F] * qnorm(0.975)

    ## Estimate, lower 95% limit, upper 95% limit
    matRes <- cbind(matRes, (matRes - matSe), (matRes + matSe))

    ## Name
    colnames(matRes) <- c("OR","lower","upper")

    ## Exponentiate
    matRes <- exp(matRes)

    ## Add p-value
    matRes <- cbind(matRes, mat[, 3:4, drop = F])

    if (logitsticModel == TRUE) {

        matRes[, c("lower","upper")] <- exp(suppressMessages(confint(obj)))
    }

    ## Show
    matRes
}

Perform a “regular” logistic regression

Estimate the effect within each combination of confounder values.

## Crude analysis (confounded!)
glmCrude <- glm(formula = death ~ swang1,
                family  = binomial(link = "logit"),
                data    = rhc)
GetConfInt(glmCrude)
               OR lower upper z value  Pr(>|z|)
(Intercept) 1.700 1.589 1.821  15.276 1.113e-52
swang1RHC   1.252 1.119 1.402   3.905 9.425e-05

## Fit a "regular" fully adjusted model
glmFull <- glm(formula =  death ~ swang1 + age + sex + race + edu + income + ninsclas + cat1 + das2d3pc + dnr1 + ca + surv2md1 + aps1 + scoma1 + wtkilo1 + temp1 + meanbp1 + resp1 + hrt1 + pafi1 + paco21 + ph1 + wblc1 + hema1 + sod1 + pot1 + crea1 + bili1 + alb1 + resp + card + neuro + gastr + renal + meta + hema + seps + trauma + ortho + cardiohx + chfhx + dementhx + psychhx + chrpulhx + renalhx + liverhx + gibledhx + malighx + immunhx + transhx + amihx,
               family  = binomial(link = "logit"),
               data    = rhc)
## Show result
GetConfInt(coef(summary(glmFull)))
                                   OR   lower       upper  z value  Pr(>|z|)
(Intercept)                 167.87104 0.29673 94971.25049  1.58426 1.131e-01
swang1RHC                     1.38273 1.19345     1.60204  4.31442 1.600e-05
age                           1.01402 1.00815     1.01993  4.69781 2.630e-06
sexMale                       1.33138 1.16651     1.51956  4.24326 2.203e-05
raceother                     1.05862 0.78456     1.42840  0.37265 7.094e-01
racewhite                     0.98723 0.82196     1.18573 -0.13749 8.906e-01
edu                           1.00956 0.98686     1.03277  0.81987 4.123e-01
income$11-$25k                1.19981 0.91699     1.56985  1.32811 1.841e-01
income$25-$50k                1.02190 0.78072     1.33756  0.15770 8.747e-01
incomeUnder $11k              1.42425 1.09033     1.86043  2.59438 9.476e-03
ninsclasMedicare              1.42434 1.11051     1.82687  2.78539 5.346e-03
ninsclasMedicare & Medicaid   1.31466 0.95872     1.80274  1.69828 8.945e-02
ninsclasNo insurance          1.48120 1.08369     2.02451  2.46410 1.374e-02
ninsclasPrivate               1.22142 0.96864     1.54017  1.69065 9.090e-02
ninsclasPrivate & Medicare    1.09231 0.84216     1.41675  0.66538 5.058e-01
cat1CHF                       1.26240 0.91746     1.73702  1.43095 1.524e-01
cat1Cirrhosis                 1.13208 0.71111     1.80228  0.52292 6.010e-01
cat1Colon Cancer              0.28777 0.03871     2.13912 -1.21701 2.236e-01
cat1Coma                      1.80364 1.26471     2.57223  3.25668 1.127e-03
cat1COPD                      1.29154 0.94190     1.77096  1.58836 1.122e-01
cat1Lung Cancer               4.01275 1.11068    14.49757  2.12013 3.399e-02
cat1MOSF w/Malignancy         1.73908 1.12698     2.68362  2.50006 1.242e-02
cat1MOSF w/Sepsis             0.91042 0.75809     1.09336 -1.00454 3.151e-01
das2d3pc                      0.95889 0.94689     0.97104 -6.53305 6.444e-11
dnr1Yes                       2.31916 1.78286     3.01679  6.26929 3.627e-10
caNo                          0.18449 0.05407     0.62950 -2.69906 6.954e-03
caYes                         0.42786 0.27535     0.66485 -3.77509 1.600e-04
surv2md1                      0.04019 0.01883     0.08578 -8.30912 9.642e-17
aps1                          1.00538 0.99943     1.01136  1.77058 7.663e-02
scoma1                        1.00044 0.99722     1.00366  0.26510 7.909e-01
wtkilo1                       0.99736 0.99511     0.99961 -2.29640 2.165e-02
temp1                         0.95026 0.91226     0.98986 -2.44955 1.430e-02
meanbp1                       1.00038 0.99846     1.00231  0.39015 6.964e-01
resp1                         1.00338 0.99826     1.00852  1.29310 1.960e-01
hrt1                          1.00153 0.99971     1.00335  1.64830 9.929e-02
pafi1                         1.00085 1.00020     1.00151  2.56012 1.046e-02
paco21                        0.99823 0.99156     1.00494 -0.51798 6.045e-01
ph1                           0.85616 0.39295     1.86541 -0.39084 6.959e-01
wblc1                         1.00632 1.00004     1.01263  1.97373 4.841e-02
hema1                         0.99130 0.98244     1.00023 -1.90896 5.627e-02
sod1                          1.00111 0.99255     1.00975  0.25283 8.004e-01
pot1                          1.00723 0.94178     1.07723  0.21019 8.335e-01
crea1                         1.02221 0.97765     1.06880  0.96596 3.341e-01
bili1                         1.03104 1.01123     1.05125  3.08768 2.017e-03
alb1                          1.00139 0.91701     1.09353  0.03092 9.753e-01
respYes                       1.24568 1.05427     1.47184  2.58087 9.855e-03
cardYes                       1.15927 0.97681     1.37580  1.69146 9.075e-02
neuroYes                      1.38910 1.08475     1.77882  2.60473 9.195e-03
gastrYes                      1.34301 1.07557     1.67696  2.60294 9.243e-03
renalYes                      0.84960 0.62081     1.16272 -1.01815 3.086e-01
metaYes                       1.11002 0.81446     1.51283  0.66076 5.088e-01
hemaYes                       2.21181 1.57649     3.10316  4.59481 4.331e-06
sepsYes                       1.22869 1.01105     1.49318  2.07046 3.841e-02
traumaYes                     0.87248 0.46277     1.64492 -0.42165 6.733e-01
orthoYes                      1.27121 0.21965     7.35710  0.26789 7.888e-01
cardiohx                      1.22679 1.01373     1.48464  2.10006 3.572e-02
chfhx                         1.62536 1.31689     2.00608  4.52354 6.081e-06
dementhx                      1.30321 1.03392     1.64264  2.24241 2.493e-02
psychhx                       0.98048 0.76760     1.25241 -0.15783 8.746e-01
chrpulhx                      1.21488 0.99444     1.48419  1.90543 5.672e-02
renalhx                       1.33725 0.92444     1.93440  1.54284 1.229e-01
liverhx                       1.35474 0.92463     1.98490  1.55789 1.193e-01
gibledhx                      1.27017 0.80962     1.99271  1.04084 2.980e-01
malighx                       0.50745 0.16687     1.54313 -1.19547 2.319e-01
immunhx                       1.27160 1.09645     1.47473  3.17773 1.484e-03
transhx                       0.91900 0.75992     1.11139 -0.87098 3.838e-01
amihx                         0.87264 0.62159     1.22509 -0.78708 4.312e-01

Construct propensity score model

## PS model
psModel <- glm(formula = swang1 ~ age + sex + race + edu + income + ninsclas + cat1 + das2d3pc + dnr1 + ca + surv2md1 + aps1 + scoma1 + wtkilo1 + temp1 + meanbp1 + resp1 + hrt1 + pafi1 + paco21 + ph1 + wblc1 + hema1 + sod1 + pot1 + crea1 + bili1 + alb1 + resp + card + neuro + gastr + renal + meta + hema + seps + trauma + ortho + cardiohx + chfhx + dementhx + psychhx + chrpulhx + renalhx + liverhx + gibledhx + malighx + immunhx + transhx + amihx,
               family  = binomial(link = "logit"),
               data    = rhc)

## PS (predicted probability of treatment)
rhc$ps <- predict(psModel, type = "response")

## PS model diagnostic by ROC
rocPsModel <- roc(swang1 ~ ps, data = rhc)
plot(rocPsModel, legacy.axes = TRUE)

plot of chunk unnamed-chunk-6


Call:
roc.formula(formula = swang1 ~ ps, data = rhc)

Data: ps in 3551 controls (swang1 No RHC) < 2184 cases (swang1 RHC).
Area under the curve: 0.798

Plot PS distribution by treatment groups

## Define a function to create density data (x = ps, y = density) frame given weights
CreateDataset <- function(data = rhc, ps = "ps", weights = "one") {
    ## Density for overall cohort
    dfDataAll <- suppressWarnings(density(x = data[,ps], weights = data[,weights]))
    dfDataAll <- data.frame(dfDataAll[c("x","y")])
    dfDataAll$group <- "All"

    ## Density for RHC
    dfDataRhc <- suppressWarnings(density(x = subset(data, swang1 == "RHC")[,ps], weights = subset(data, swang1 == "RHC")[,weights]))
    dfDataRhc <- data.frame(dfDataRhc[c("x","y")])
    dfDataRhc$group <- "RHC"

    ## Density for No RHC
    dfDataNoRhc <- suppressWarnings(density(x = subset(data, swang1 == "No RHC")[,ps], weights = subset(data, swang1 != "RHC")[,weights]))
    dfDataNoRhc <- data.frame(dfDataNoRhc[c("x","y")])
    dfDataNoRhc$group <- "No RHC"

    ## combine datasets
    dfData <- rbind(dfDataAll, dfDataRhc, dfDataNoRhc)

    ## Return
    dfData
}

## Create dataset with weight = 1 for every subject
dfData <- CreateDataset(data = rhc, ps = "ps", weights = "one")

## Framewrok only (no layer)
plotBase <- ggplot(data = dfData,
                   mapping = aes(x = x, y = y, color = group)) +
    scale_x_continuous(limit = c(-0.1,1.1), breaks = c(0,0.5,1.0)) +
    scale_y_continuous(breaks = NULL) +
    ## scale_color_manual(values = c("All" = "black", "RHC" = "red", "No RHC" = "blue")) +
    scale_color_manual(values = c("All" = "#999999", "RHC" = "#D55E00", "No RHC" = "#0072B2")) +
    labs(title = "PS distribution by treatment", x = "Propensity score", y = "Distribution") +
    theme_bw() +
    theme(legend.key = element_blank())

## Plot with total
plotOrig <- layer(geom = "line")
plotBase + plotOrig

plot of chunk unnamed-chunk-7


## Plot like mirror image
dfDataMirror <- dfData
dfDataMirror <- subset(dfDataMirror, group != "All")
dfDataMirror <- within(dfDataMirror, {
    y[group != "RHC"] <- (y * -1)[group != "RHC"]
 })
plotMirror <- layer(geom = "line", data = dfDataMirror)
plotBase + plotMirror + geom_hline(yintercept = 0)

plot of chunk unnamed-chunk-7


## Add total distribution both above and below (y = -y)
plotTotalPos <- layer(geom = "line", data = subset(dfData, group == "All"), size = 0.1)
plotTotalNeg <- layer(geom = "line", data = subset(dfData, group == "All"), mapping = aes(y = -y), size = 0.1)
plotMirrorTotal <- plotBase + plotMirror + geom_hline(yintercept = 0) + plotTotalPos + plotTotalNeg
plotMirrorTotal

plot of chunk unnamed-chunk-7


## Flip
plotMirrorTotal + coord_flip()

plot of chunk unnamed-chunk-7

Visual illustration of PS uses

Stratification

Estimate the effect within each propensity score quintile. The exposed and the unexposed within each quintile are considered exchangeable.

## Check quintiles
quintilePoints <- quantile(x = rhc$ps, probs = seq(from = 0.2, to = 0.8, by = 0.2))
quintilePoints <- c(0,quintilePoints,1)

## Plot the idea
plotBase + plotOrig + geom_vline(xintercept = quintilePoints) +
    labs(title = "Stratification into quintiles")

plot of chunk unnamed-chunk-8


## Plot the idea (Mirror image)
plotMirrorTotal + coord_flip()+ geom_vline(xintercept = quintilePoints) +
    labs(title = "Stratification into quintiles")

plot of chunk unnamed-chunk-8


## Add quintile variable
rhc$psQunintile <- cut(x = rhc$ps, breaks = quintilePoints, labels = paste0("q",1:5))

## Perform unadjusted analysis in each quintile
glmQuint <- dlply(.data = rhc,
                  .variables = c("psQunintile"),
                  .drop = TRUE, .parallel = TRUE,
                  .fun = function(DF) {

                      ## Perform
                      glmWithinQuint <- glm(formula = death ~ swang1,
                                            family  = binomial(link = "logit"),
                                            data    = DF)
                      ## Return
                      glmWithinQuint
                  })
## Create GetConfIn objects in each quintile
resGlmQuint <- lapply(glmQuint,
                      FUN = function(model) {

                          GetConfInt(model)[2, , drop = F]
                      })
## Combine
resGlmQuint <- do.call(rbind, resGlmQuint)
## Name
rownames(resGlmQuint) <- paste0("q", 1:5, ":  ", rownames(resGlmQuint))
## Show
print(resGlmQuint, quote = F)
                  OR  lower upper z value Pr(>|z|)
q1:  swang1RHC 1.366 0.8282 2.326  1.1900 0.234031
q2:  swang1RHC 1.498 1.1017 2.056  2.5436 0.010971
q3:  swang1RHC 1.282 0.9945 1.658  1.9081 0.056372
q4:  swang1RHC 1.393 1.0878 1.785  2.6235 0.008703
q5:  swang1RHC 1.100 0.8309 1.452  0.6716 0.501831

Adjustment

The propensity score is included in the outcome model. It is simple, but the relationship between the PS and the outcome must be in the correct functional form. It is basically a stratification method with additional assumptions and fine strata. If propensity score is modeled into quintiles and the PS quintile * exposure interaction term is included, it is equivalent to stratification in the previous sections.

## Plot
plotBase + plotOrig + geom_vline(xintercept = seq(from = 0, to = 1, by = 0.05)) +
    labs(title = "Adjustment (stratification with a model)")

plot of chunk unnamed-chunk-9


## Plot (Mirror image)
plotMirrorTotal + coord_flip() + geom_vline(xintercept = seq(from = 0, to = 1, by = 0.05)) +
    labs(title = "Adjustment (stratification with a model)")

plot of chunk unnamed-chunk-9


## Perform PS-adjusted logistic regression (linear PS)
glmPsAdjLinear <- glm(formula = death ~ swang1 + ps,
                      family  = binomial(link = "logit"),
                      data    = rhc)
GetConfInt(glmPsAdjLinear)
                OR  lower upper z value  Pr(>|z|)
(Intercept) 1.7688 1.6014 1.955  11.215 3.424e-29
swang1RHC   1.2975 1.1389 1.479   3.909 9.256e-05
ps          0.8704 0.6742 1.124  -1.065 2.867e-01

## Perform PS-adjusted logistic regression (quadratic PS)
glmPsAdjQuad <- glm(formula = death ~ swang1 + ps + I(ps^2),
                    family  = binomial(link = "logit"),
                    data    = rhc)
GetConfInt(glmPsAdjQuad)
                OR  lower upper z value  Pr(>|z|)
(Intercept) 1.7380 1.5014 2.014  7.3754 1.638e-13
swang1RHC   1.2977 1.1390 1.479  3.9094 9.251e-05
ps          0.9887 0.4329 2.251 -0.0271 9.784e-01
I(ps^2)     0.8601 0.3411 2.179 -0.3188 7.499e-01

## Perform PS-adjusted logistic regression (PS quintiles)
glmPsAdjQuintile <- glm(formula = death ~ swang1 + psQunintile,
                        family  = binomial(link = "logit"),
                        data    = rhc)
GetConfInt(glmPsAdjQuintile)
                  OR  lower upper  z value  Pr(>|z|)
(Intercept)   1.7346 1.5383 1.959  8.94078 3.864e-19
swang1RHC     1.3096 1.1503 1.492  4.06847 4.732e-05
psQunintileq2 0.9927 0.8361 1.179 -0.08306 9.338e-01
psQunintileq3 0.9506 0.7982 1.132 -0.56847 5.697e-01
psQunintileq4 1.0349 0.8630 1.241  0.36971 7.116e-01
psQunintileq5 0.8536 0.7041 1.035 -1.61258 1.068e-01

## Perform PS-adjusted logistic regression (PS quintiles with interaction)
glmPsAdjQuintileInt <- glm(formula = death ~ swang1 + psQunintile + swang1:psQunintile,
                           family  = binomial(link = "logit"),
                           data    = rhc)
GetConfInt(glmPsAdjQuintileInt)
                            OR  lower upper  z value  Pr(>|z|)
(Intercept)             1.7303 1.5292 1.961  8.65278 5.026e-18
swang1RHC               1.3660 0.8282 2.326  1.19004 2.340e-01
psQunintileq2           0.9701 0.8078 1.165 -0.32553 7.448e-01
psQunintileq3           0.9598 0.7907 1.166 -0.41493 6.782e-01
psQunintileq4           1.0064 0.8135 1.247  0.05829 9.535e-01
psQunintileq5           0.9742 0.7438 1.281 -0.18837 8.506e-01
swang1RHC:psQunintileq2 1.0968 0.5940 1.983  0.30130 7.632e-01
swang1RHC:psQunintileq3 0.9388 0.5213 1.649 -0.21559 8.293e-01
swang1RHC:psQunintileq4 1.0196 0.5679 1.784  0.06669 9.468e-01
swang1RHC:psQunintileq5 0.8055 0.4424 1.430 -0.72536 4.682e-01

## Perform PS-adjusted logistic regression (PS decile)
## Create decile category variable
decilePoints <- quantile(x = rhc$ps, probs = seq(from = 0.1, to = 0.9, by = 0.1))
decilePoints <- c(0,decilePoints,1)
rhc$psDecile <- cut(x = rhc$ps, breaks = decilePoints, labels = paste0("q",1:10))
## Perform PS-adjusted logistic regression (PS deciles)
glmPsAdjDecile <- glm(formula = death ~ swang1 + psDecile,
                      family  = binomial(link = "logit"),
                      data    = rhc)
GetConfInt(glmPsAdjDecile)
                OR  lower upper  z value  Pr(>|z|)
(Intercept) 1.7863 1.5077 2.122  6.65688 2.797e-11
swang1RHC   1.3095 1.1494 1.493  4.04516 5.229e-05
psDecileq2  0.9430 0.7408 1.200 -0.47710 6.333e-01
psDecileq3  0.9826 0.7708 1.253 -0.14181 8.872e-01
psDecileq4  0.9458 0.7417 1.206 -0.44915 6.533e-01
psDecileq5  0.8891 0.6965 1.135 -0.94416 3.451e-01
psDecileq6  0.9591 0.7494 1.228 -0.33218 7.398e-01
psDecileq7  0.9918 0.7719 1.275 -0.06467 9.484e-01
psDecileq8  1.0185 0.7907 1.312  0.14168 8.873e-01
psDecileq9  0.8268 0.6404 1.067 -1.46034 1.442e-01
psDecileq10 0.8311 0.6385 1.082 -1.37574 1.689e-01

## Perform PS-adjusted logistic regression (PS deciles with interaction)
glmPsAdjDecileInt <- glm(formula = death ~ swang1 + psDecile + swang1:psDecile,
                         family  = binomial(link = "logit"),
                         data    = rhc)
GetConfInt(coef(summary(glmPsAdjDecileInt)))
                          OR  lower upper z value  Pr(>|z|)
(Intercept)           1.8274 1.5360 2.174  6.8030 1.024e-11
swang1RHC             0.6156 0.2338 1.621 -0.9822 3.260e-01
psDecileq2            0.8934 0.6968 1.146 -0.8887 3.742e-01
psDecileq3            0.9417 0.7291 1.216 -0.4598 6.456e-01
psDecileq4            0.8946 0.6902 1.159 -0.8422 3.997e-01
psDecileq5            0.9082 0.6933 1.190 -0.6986 4.848e-01
psDecileq6            0.9093 0.6897 1.199 -0.6741 5.003e-01
psDecileq7            0.8795 0.6561 1.179 -0.8594 3.901e-01
psDecileq8            1.0453 0.7669 1.425  0.2801 7.794e-01
psDecileq9            0.9279 0.6575 1.310 -0.4258 6.703e-01
psDecileq10           0.9120 0.5826 1.428 -0.4026 6.872e-01
swang1RHC:psDecileq2  3.0558 0.9634 9.693  1.8966 5.787e-02
swang1RHC:psDecileq3  2.3912 0.8178 6.991  1.5926 1.113e-01
swang1RHC:psDecileq4  2.4841 0.8638 7.144  1.6884 9.134e-02
swang1RHC:psDecileq5  1.8536 0.6592 5.212  1.1699 2.420e-01
swang1RHC:psDecileq6  2.3161 0.8242 6.508  1.5932 1.111e-01
swang1RHC:psDecileq7  2.6305 0.9390 7.368  1.8403 6.572e-02
swang1RHC:psDecileq8  1.9400 0.6927 5.433  1.2612 2.072e-01
swang1RHC:psDecileq9  1.7285 0.6147 4.861  1.0374 2.995e-01
swang1RHC:psDecileq10 1.8488 0.6346 5.386  1.1264 2.600e-01

Matching

The exposed and the unexposed are matched based on their propensity scores, creating an RCT like cohort of exchangeable exposed and unexposed.

nonrandom package

## Propensity score estimation (pscore object returned)
psObj <- pscore(formula     = swang1 ~ age + sex + race + edu + income + ninsclas + cat1 + das2d3pc + dnr1 + ca + surv2md1 + aps1 + scoma1 + wtkilo1 + temp1 + meanbp1 + resp1 + hrt1 + pafi1 + paco21 + ph1 + wblc1 + hema1 + sod1 + pot1 + crea1 + bili1 + alb1 + resp + card + neuro + gastr + renal + meta + hema + seps + trauma + ortho + cardiohx + chfhx + dementhx + psychhx + chrpulhx + renalhx + liverhx + gibledhx + malighx + immunhx + transhx + amihx,
                data        = rhc,
                family      = "binomial",
                name.pscore = "pscore")

## Define a function to match and show
MatchAndPlot <- function(x = 0.05) {

    ## Matching 1:1 (caliper x * sd(logit(PS)))
    psMatchObj <- ps.match(object             = psObj,
                           who.treated        = "RHC",
                           ratio              = 1,              # default 1:1 match
                           caliper            = "logit",        # caliper = x * SD(PS)
                           x                  = x
                           )
    ## Show results
    print(psMatchObj)

    ## Extract density
    dfDataMatched <- CreateDataset(data = psMatchObj$data.matched, ps = "ps", weights = "one")

    ## Plot matched
    plotMatched <- layer(geom = "line", data = dfDataMatched, size = 2)
    plotObj <- plotBase + plotOrig + plotMatched +
        labs(title = paste("nonrandom::ps.match() Caliper", x,"* SD(logit(PS))"))

    ## Show plot
    print(plotObj)

    ## Show results
    glmMatched <- glm(formula = death ~ swang1,
                      family  = binomial(link = "logit"),
                      data    = psMatchObj$data.matched)

    ## Print result
    print(GetConfInt(glmMatched))
}

## caliper 0.2 * sd(logit(PS))
MatchAndPlot(x = 0.2)

 Matched by:  pscore 

 Matching parameter:

Caliper size:    "0.279"
Ratio:           "1"    
Who is treated?: "RHC"  

 Matching information:

Untreated to treated?: TRUE
Best match?:           TRUE

 Matching data:

Number of treated obs.:             2184
Number of matched treated obs.:     2184
Number of untreated obs.:           3551
Number of matched untreated obs.:   2184
Number of total matched obs.:       4368
Number of not matched obs.:         1367
Number of matching sets:            2184
Number of incomplete matching sets:    0

plot of chunk unnamed-chunk-10

               OR lower upper z value  Pr(>|z|)
(Intercept) 1.693 1.553 1.847  11.888 1.368e-32
swang1RHC   1.258 1.110 1.425   3.593 3.268e-04
## caliper 0.1 * sd(logit(PS))
MatchAndPlot(x = 0.1)

 Matched by:  pscore 

 Matching parameter:

Caliper size:    "0.139"
Ratio:           "1"    
Who is treated?: "RHC"  

 Matching information:

Untreated to treated?: TRUE
Best match?:           TRUE

 Matching data:

Number of treated obs.:             2184
Number of matched treated obs.:     1969
Number of untreated obs.:           3551
Number of matched untreated obs.:   1969
Number of total matched obs.:       3938
Number of not matched obs.:         1797
Number of matching sets:            1969
Number of incomplete matching sets:    0

plot of chunk unnamed-chunk-10

               OR lower upper z value  Pr(>|z|)
(Intercept) 1.694 1.546 1.856  11.295 1.394e-29
swang1RHC   1.315 1.153 1.502   4.065 4.810e-05
## caliper 0.05 * sd(logit(PS))
MatchAndPlot(x = 0.05)

 Matched by:  pscore 

 Matching parameter:

Caliper size:    "0.07"
Ratio:           "1"   
Who is treated?: "RHC" 

 Matching information:

Untreated to treated?: TRUE
Best match?:           TRUE

 Matching data:

Number of treated obs.:             2184
Number of matched treated obs.:     1729
Number of untreated obs.:           3551
Number of matched untreated obs.:   1729
Number of total matched obs.:       3458
Number of not matched obs.:         2277
Number of matching sets:            1729
Number of incomplete matching sets:    0

plot of chunk unnamed-chunk-10

               OR lower upper z value  Pr(>|z|)
(Intercept) 1.723 1.563 1.901  10.904 1.107e-27
swang1RHC   1.278 1.110 1.472   3.408 6.543e-04

Matching package

## Define similarly to Matching::Match()
MatchAndPlot2 <- function(x = 0.05, returnData = FALSE) {

    ## Match by Matching::Match()
    listMatch <- Match(Tr       = (rhc$swang1 == "RHC"),      # Need to be in 0,1
                       X        = log(rhc$ps / (1 - rhc$ps)), # logit of PS,i.e., log(PS/(1-PS)) as matching variable
                       M        = 1,                          # 1:1 match
                       caliper  = x,                          # caliper = x * SD(each matching variable)
                       replace  = FALSE,
                       ties     = TRUE,
                       version  = "fast")

    ## Show number matched
    print(str(listMatch[c("index.treated","index.control")]))

    ## Extract matched data
    psMatchData <- rhc[unlist(listMatch[c("index.treated","index.control")]), ]

    ## Extract density
    dfDataMatched <- CreateDataset(data = psMatchData, ps = "ps", weights = "one")

    ## Plot matched
    plotMatched <- layer(geom = "line", data = dfDataMatched, size = 2)
    plotObj <- plotBase + plotOrig + plotMatched +
        labs(title = paste("Matching::Match() Caliper", x,"* SD(logit(PS))"))

    ## Show plot
    print(plotObj)

    ## Show results
    glmMatched <- glm(formula = death ~ swang1,
                      family  = binomial(link = "logit"),
                      data    = psMatchData)
    print(GetConfInt(glmMatched))

    ## Return dataset for plotting
    if(returnData == TRUE) {
        return(dfDataMatched)
    }
}

## caliper 0.2 * sd(logit(PS))
## MatchAndPlot2(x = 0.2)
dfDataMatched <- MatchAndPlot2(x = 0.2, returnData = TRUE)
List of 2
 $ index.treated: num [1:1563] 2 3 5 10 12 13 17 18 21 22 ...
 $ index.control: num [1:1563] 3826 1386 450 3319 5644 ...
NULL

plot of chunk unnamed-chunk-11

               OR lower upper z value  Pr(>|z|)
(Intercept) 1.631 1.474 1.808   9.391 5.925e-21
swang1RHC   1.379 1.189 1.600   4.249 2.148e-05
## caliper 0.1 * sd(logit(PS))
MatchAndPlot2(x = 0.1)
List of 2
 $ index.treated: num [1:1546] 2 3 5 10 12 13 17 18 21 22 ...
 $ index.control: num [1:1546] 3826 1855 3772 4214 5644 ...
NULL

plot of chunk unnamed-chunk-11

               OR lower upper z value  Pr(>|z|)
(Intercept) 1.703 1.536 1.889  10.104 5.287e-24
swang1RHC   1.336 1.151 1.552   3.798 1.456e-04
## caliper 0.05 * sd(logit(PS))
MatchAndPlot2(x = 0.05)
List of 2
 $ index.treated: num [1:1539] 2 3 5 10 12 13 17 18 21 22 ...
 $ index.control: num [1:1539] 3826 4125 3772 3243 5644 ...
NULL

plot of chunk unnamed-chunk-11

               OR lower upper z value  Pr(>|z|)
(Intercept) 1.773 1.599 1.968   10.79 3.945e-27
swang1RHC   1.275 1.097 1.482    3.17 1.523e-03

## Change data for mirror plot
dfDataMatched <- subset(dfDataMatched, group != "All")
dfDataMatched$y[dfDataMatched$group != "RHC"] <- -1 * dfDataMatched$y[dfDataMatched$group != "RHC"]
## Create 
plotMatched <- layer(geom = "line", data = dfDataMatched, size = 2)
## Plot
plotMirrorTotal + plotMatched + coord_flip() +
    labs(title = "PS Matching")

plot of chunk unnamed-chunk-11

Weighting

There is confounding when the exposed and the unexposed have different distributions of propensity score. These distributions can be re-weighted to have the identical distribution. Inverse probability of treatment weight (IPTW) and standardized mortality ratio weight (SMRW) are the most common ones.

Create weights

rhc <- within(rhc, {

    ## Create inverse probability of treatment assignment weights (IPTW)
    iptw <- NA
    ## 1/PS for treated
    iptw[swang1 == "RHC"] <- 1/ps[swang1 == "RHC"]
    ## 1/(1-PS) for untreated (1/probability of not treated)
    iptw[swang1 != "RHC"] <- (1/(1 - ps))[swang1 != "RHC"]

    ## Create SMR (Average Treatment effect in Treated, ATT) weights
    smrw <- NA
    ## 1 for treated
    smrw[swang1 == "RHC"] <- 1
    ## PS/(1-PS), i.e., odds of treatment for untreated
    smrw[swang1 != "RHC"] <- (ps/(1 - ps))[swang1 != "RHC"]

    ## Numerator weight for stabilization
    probRhc <- NA
    ## P(RHC) for treated
    probRhc[swang1 == "RHC"] <- sum(swang1 == "RHC")/length(swang1)
    ## P(No RHC) for untreated
    probRhc[swang1 != "RHC"] <- sum(swang1 != "RHC")/length(swang1)

    ## Create IPTW stabilized with marginal probabilities of assigned treatment
    iptwStab <- iptw * probRhc

    ## Create matching weights
    matchWeightNumerator <- pmin(ps, 1 - ps)
    matchWeight <- matchWeightNumerator * iptw
 })

## Check weights
summary(rhc[,c("iptw","iptwStab","smrw","matchWeight")])
      iptw          iptwStab           smrw         matchWeight    
 Min.   : 1.00   Min.   : 0.389   Min.   : 0.003   Min.   :0.0027  
 1st Qu.: 1.18   1st Qu.: 0.657   1st Qu.: 0.226   1st Qu.:0.1833  
 Median : 1.47   Median : 0.780   Median : 0.915   Median :0.4728  
 Mean   : 1.98   Mean   : 0.993   Mean   : 0.771   Mean   :0.5306  
 3rd Qu.: 2.08   3rd Qu.: 1.055   3rd Qu.: 1.000   3rd Qu.:1.0000  
 Max.   :52.52   Max.   :20.001   Max.   :24.488   Max.   :1.0000  

IPTW

## Plot IPTW
dfDataIptw <- CreateDataset(data = rhc, ps = "ps", weights = "iptw")
plotIptw <- layer(geom = "line", data = subset(dfDataIptw, group != "All"), size = 2)
plotBase + plotOrig + plotIptw + labs(title = "IPTW")

plot of chunk unnamed-chunk-13


## Plot IPTW (Mirror image)
dfDataIptwMirror <- subset(dfDataIptw, group != "All")
dfDataIptwMirror$y[dfDataIptwMirror$group != "RHC"] <- -1 * dfDataIptwMirror$y[dfDataIptwMirror$group != "RHC"]
plotIptwMirror <- layer(geom = "line", data = dfDataIptwMirror, size = 2)
plotMirrorTotal + coord_flip() + plotIptwMirror + labs(title = "IPTW")

plot of chunk unnamed-chunk-13



## IPTW analysis
glmIptw <- glm(formula = death ~ swang1,
               family  = binomial(link = "logit"),
               data    = rhc,
               weights = iptw)
## The regular SE and confidence intervals are not valid
GetConfInt(glmIptw)
               OR lower upper z value  Pr(>|z|)
(Intercept) 1.685 1.598 1.777  19.190 4.484e-82
swang1RHC   1.272 1.177 1.375   6.075 1.240e-09
## Robust variance estimator using lmtest::coeftest() using sandwich::sandwich()
GetConfInt(coeftest(glmIptw, vcov = sandwich))
               OR lower upper z value  Pr(>|z|)
(Intercept) 1.685 1.548 1.833  12.105 9.890e-34
swang1RHC   1.272 1.089 1.487   3.029 2.454e-03
## Robust variance estimator using geepack::geeglm()
geeglmIptw <- geeglm(formula   = as.numeric(death == "Yes") ~ swang1,
                     family    = binomial(link = "logit"),
                     id        = X,
                     data      = rhc,
                     weights   = iptw,
                     corstr    = "exchangeable"
                     )
GetConfInt(geeglmIptw)
               OR lower upper    Wald Pr(>|W|)
(Intercept) 1.685 1.548 1.833 146.540 0.000000
swang1RHC   1.272 1.089 1.487   9.174 0.002454

Stabilized IPTW

## Plot IPTW stabilized with marginal probabilities of assigned treatment
dfDataIptwStab <- CreateDataset(data = rhc, ps = "ps", weights = "iptwStab")
plotIptwStab <- layer(geom = "line", data = dfDataIptwStab, size = 2)
plotBase + plotOrig + plotIptwStab + labs(title = "Stabilized IPTW")