Propensity score using right heart cath dataset

References

How To Use Propensity Score Analysis: http://www.mc.vanderbilt.edu/crc/workshop_files/2008-04-11.pdf
Vanderbilt Biostatistics Wiki>Main Web>DataSets: http://biostat.mc.vanderbilt.edu/twiki/bin/view/Main/DataSets
Illustration of PS uses: http://rpubs.com/kaz_yos/use-ps
Propensity score demos: http://rpubs.com/kaz_yos/epi271-lab3
AHRQ Summary Variables in Observational Research: Propensity Scores and Disease Risk Scores: http://effectivehealthcare.ahrq.gov/index.cfm/search-for-guides-reviews-and-reports/?productid=1084&pageaction=displayproduct (quotes are from this article).
Colors in ggplot2: http://www.cookbook-r.com/Graphs/Colors_(ggplot2)/

Load packages

## plotting
library(ggplot2)
## dlply
library(plyr)
## ROC
library(pROC)
## Load nonrandom for PS matching
library(nonrandom)
## Load Matching for PS matching
library(Matching)
## Load sandwich package for robust sandwich covariance matrix estimators
library(sandwich)
## Load lmtest package for coeftest
library(lmtest)
## Load geepack for robust sandwich covariance matrix estimators
library(geepack)
## Load tableone for summarizing regression results
library(tableone)

Prepare data

## Right heart cath dataset
rhc <- read.csv("http://biostat.mc.vanderbilt.edu/wiki/pub/Main/DataSets/rhc.csv")
## first 6 rows
head(rhc)

  X              cat1          cat2  ca sadmdte dschdte dthdte lstctdte death cardiohx chfhx dementhx psychhx
1 1              COPD          <NA> Yes   11142   11151     NA    11382    No        0     0        0       0
2 2     MOSF w/Sepsis          <NA>  No   11799   11844  11844    11844   Yes        1     1        0       0
3 3 MOSF w/Malignancy MOSF w/Sepsis Yes   12083   12143     NA    12400    No        0     0        0       0
4 4               ARF          <NA>  No   11146   11183  11183    11182   Yes        0     0        0       0
5 5     MOSF w/Sepsis          <NA>  No   12035   12037  12037    12036   Yes        0     0        0       0
6 6              COPD          <NA>  No   12389   12396     NA    12590    No        0     1        0       0
  chrpulhx renalhx liverhx gibledhx malighx immunhx transhx amihx   age    sex    edu surv2md1 das2d3pc t3d30 dth30
1        1       0       0        0       1       0       0     0 70.25   Male 12.000    0.641    23.50    30    No
2        0       0       0        0       0       1       1     0 78.18 Female 12.000    0.755    14.75    30    No
3        0       0       0        0       1       1       0     0 46.09 Female 14.070    0.317    18.14    30    No
4        0       0       0        0       0       1       0     0 75.33 Female  9.000    0.441    22.93    30    No
5        0       0       0        0       0       0       0     0 67.91   Male  9.945    0.437    21.05     2   Yes
6        1       0       0        0       0       0       0     0 86.08 Female  8.000    0.665    17.50    30    No
  aps1 scoma1 meanbp1 wblc1 hrt1 resp1 temp1 pafi1 alb1 hema1 bili1 crea1 sod1  pot1 paco21   ph1 swang1 wtkilo1
1   46      0      41 22.10  124    10  38.7  68.0  3.5  58.0  1.01   1.2  145 4.000     40 7.359 No RHC    64.7
2   50      0      63 28.90  137    38  38.9 218.3  2.6  32.5  0.70   0.6  137 3.300     34 7.329    RHC    45.7
3   82      0      57  0.05  130    40  36.4 275.5  3.5  21.1  1.01   2.6  146 2.900     16 7.359    RHC     0.0
4   48      0      55 23.30   58    26  35.8 156.7  3.5  26.3  0.40   1.7  117 5.800     30 7.460 No RHC    54.6
5   72     41      65 29.70  125    27  34.8 478.0  3.5  24.0  1.01   3.6  126 5.800     17 7.229    RHC    78.4
6   38      0     115 18.00  134    36  39.2 184.2  3.1  30.5  1.01   1.4  138 5.399     68 7.300 No RHC    54.9
  dnr1           ninsclas resp card neuro gastr renal meta hema seps trauma ortho adld3p urin1  race     income
1   No           Medicare  Yes  Yes    No    No    No   No   No   No     No    No      0    NA white Under $11k
2   No Private & Medicare   No   No    No    No    No   No   No  Yes     No    No     NA  1437 white Under $11k
3   No            Private   No  Yes    No    No    No   No   No   No     No    No     NA   599 white   $25-$50k
4   No Private & Medicare  Yes   No    No    No    No   No   No   No     No    No     NA    NA white   $11-$25k
5  Yes           Medicare   No  Yes    No    No    No   No   No   No     No    No     NA    64 white Under $11k
6   No           Medicare  Yes   No    No    No    No   No   No   No     No    No      0   242 white Under $11k
  ptid
1    5
2    7
3    9
4   10
5   11
6   12

## Show outcome (death) and exposure (swang1)
addmargins(table(rhc[,c("swang1", "death")]))

        death
swang1     No  Yes  Sum
  No RHC 1315 2236 3551
  RHC     698 1486 2184
  Sum    2013 3722 5735

## Create weights for plotting
rhc$one <- 1

Define a function to obtain ORs

## Define function to obtain ORs with 95% confidence intervals from a result matrix
GetConfInt <- function(obj) {

    logitsticModel <- FALSE

    if (identical(class(obj), c("glm", "lm")) == TRUE) {

        mat <- coef(summary(obj))
        logitsticModel <- TRUE

    } else if (identical(class(obj), c("geeglm", "gee", "glm")) == TRUE) {

        mat <- coef(summary(obj))

    } else if (identical(class(obj), c("coeftest")) == TRUE) {

        mat <- obj

    } else if (identical(class(obj), c("matrix")) == TRUE) {

        mat <- obj

    } else {

        stop("Not a supported object")
    }

    ## Add point estimates
    matRes <- mat[, 1, drop = F]

    ## 1.96 * SE
    matSe <-  mat[, 2, drop = F] * qnorm(0.975)

    ## Estimate, lower 95% limit, upper 95% limit
    matRes <- cbind(matRes, (matRes - matSe), (matRes + matSe))

    ## Name
    colnames(matRes) <- c("OR","lower","upper")

    ## Exponentiate
    matRes <- exp(matRes)

    ## Add p-value
    matRes <- cbind(matRes, mat[, 3:4, drop = F])

    if (logitsticModel == TRUE) {

        matRes[, c("lower","upper")] <- exp(suppressMessages(confint(obj)))
    }

    ## Show
    matRes
}

Perform a “regular” logistic regression

Estimate the effect within each combination of confounder values.

Pros:
This is what people all know.
Cons:
Hard to fit if there are few outcomes.
Effect measure is limited by the model, i.e., mostly odds ratio.
Only conditional effect is obtained. Conditional odds ratio is not comparable to marginal odds ratio in RCTs. Non-collapsibility issue.
Effect modification by the confounders needs consideration.

## Crude analysis (confounded!)
glmCrude <- glm(formula = death ~ swang1,
                family  = binomial(link = "logit"),
                data    = rhc)
GetConfInt(glmCrude)

               OR lower upper z value  Pr(>|z|)
(Intercept) 1.700 1.589 1.821  15.276 1.113e-52
swang1RHC   1.252 1.119 1.402   3.905 9.425e-05


## Fit a "regular" fully adjusted model
glmFull <- glm(formula =  death ~ swang1 + age + sex + race + edu + income + ninsclas + cat1 + das2d3pc + dnr1 + ca + surv2md1 + aps1 + scoma1 + wtkilo1 + temp1 + meanbp1 + resp1 + hrt1 + pafi1 + paco21 + ph1 + wblc1 + hema1 + sod1 + pot1 + crea1 + bili1 + alb1 + resp + card + neuro + gastr + renal + meta + hema + seps + trauma + ortho + cardiohx + chfhx + dementhx + psychhx + chrpulhx + renalhx + liverhx + gibledhx + malighx + immunhx + transhx + amihx,
               family  = binomial(link = "logit"),
               data    = rhc)
## Show result
ShowRegTable(glmFull)

                            exp(beta) [confint]     p     
(Intercept)                 167.87 [0.30, 96818.05]  0.113
swang1RHC                     1.38 [1.19, 1.60]     <0.001
age                           1.01 [1.01, 1.02]     <0.001
sexMale                       1.33 [1.17, 1.52]     <0.001
raceother                     1.06 [0.79, 1.43]      0.709
racewhite                     0.99 [0.82, 1.19]      0.891
edu                           1.01 [0.99, 1.03]      0.412
income$11-$25k                1.20 [0.92, 1.57]      0.184
income$25-$50k                1.02 [0.78, 1.34]      0.875
incomeUnder $11k              1.42 [1.09, 1.86]      0.009
ninsclasMedicare              1.42 [1.11, 1.83]      0.005
ninsclasMedicare & Medicaid   1.31 [0.96, 1.81]      0.089
ninsclasNo insurance          1.48 [1.08, 2.03]      0.014
ninsclasPrivate               1.22 [0.97, 1.54]      0.091
ninsclasPrivate & Medicare    1.09 [0.84, 1.42]      0.506
cat1CHF                       1.26 [0.92, 1.74]      0.152
cat1Cirrhosis                 1.13 [0.71, 1.81]      0.601
cat1Colon Cancer              0.29 [0.04, 2.26]      0.224
cat1Coma                      1.80 [1.27, 2.58]      0.001
cat1COPD                      1.29 [0.94, 1.77]      0.112
cat1Lung Cancer               4.01 [1.26, 17.93]     0.034
cat1MOSF w/Malignancy         1.74 [1.14, 2.71]      0.012
cat1MOSF w/Sepsis             0.91 [0.76, 1.09]      0.315
das2d3pc                      0.96 [0.95, 0.97]     <0.001
dnr1Yes                       2.32 [1.79, 3.03]     <0.001
caNo                          0.18 [0.05, 0.58]      0.007
caYes                         0.43 [0.27, 0.66]     <0.001
surv2md1                      0.04 [0.02, 0.09]     <0.001
aps1                          1.01 [1.00, 1.01]      0.077
scoma1                        1.00 [1.00, 1.00]      0.791
wtkilo1                       1.00 [1.00, 1.00]      0.022
temp1                         0.95 [0.91, 0.99]      0.014
meanbp1                       1.00 [1.00, 1.00]      0.696
resp1                         1.00 [1.00, 1.01]      0.196
hrt1                          1.00 [1.00, 1.00]      0.099
pafi1                         1.00 [1.00, 1.00]      0.010
paco21                        1.00 [0.99, 1.00]      0.604
ph1                           0.86 [0.39, 1.86]      0.696
wblc1                         1.01 [1.00, 1.01]      0.048
hema1                         0.99 [0.98, 1.00]      0.056
sod1                          1.00 [0.99, 1.01]      0.800
pot1                          1.01 [0.94, 1.08]      0.834
crea1                         1.02 [0.98, 1.07]      0.334
bili1                         1.03 [1.01, 1.05]      0.002
alb1                          1.00 [0.92, 1.10]      0.975
respYes                       1.25 [1.05, 1.47]      0.010
cardYes                       1.16 [0.98, 1.38]      0.091
neuroYes                      1.39 [1.09, 1.78]      0.009
gastrYes                      1.34 [1.08, 1.68]      0.009
renalYes                      0.85 [0.62, 1.17]      0.309
metaYes                       1.11 [0.82, 1.52]      0.509
hemaYes                       2.21 [1.59, 3.12]     <0.001
sepsYes                       1.23 [1.01, 1.49]      0.038
traumaYes                     0.87 [0.46, 1.63]      0.673
orthoYes                      1.27 [0.24, 9.57]      0.789
cardiohx                      1.23 [1.01, 1.49]      0.036
chfhx                         1.63 [1.32, 2.01]     <0.001
dementhx                      1.30 [1.04, 1.65]      0.025
psychhx                       0.98 [0.77, 1.25]      0.875
chrpulhx                      1.21 [1.00, 1.49]      0.057
renalhx                       1.34 [0.93, 1.94]      0.123
liverhx                       1.35 [0.93, 1.99]      0.119
gibledhx                      1.27 [0.81, 2.00]      0.298
malighx                       0.51 [0.14, 1.40]      0.232
immunhx                       1.27 [1.10, 1.48]      0.001
transhx                       0.92 [0.76, 1.11]      0.384
amihx                         0.87 [0.62, 1.23]      0.431

Construct propensity score model

## PS model
psModel <- glm(formula = swang1 ~ age + sex + race + edu + income + ninsclas + cat1 + das2d3pc + dnr1 + ca + surv2md1 + aps1 + scoma1 + wtkilo1 + temp1 + meanbp1 + resp1 + hrt1 + pafi1 + paco21 + ph1 + wblc1 + hema1 + sod1 + pot1 + crea1 + bili1 + alb1 + resp + card + neuro + gastr + renal + meta + hema + seps + trauma + ortho + cardiohx + chfhx + dementhx + psychhx + chrpulhx + renalhx + liverhx + gibledhx + malighx + immunhx + transhx + amihx,
               family  = binomial(link = "logit"),
               data    = rhc)

## PS (predicted probability of treatment)
rhc$ps <- predict(psModel, type = "response")

## PS model diagnostic by ROC
rocPsModel <- roc(swang1 ~ ps, data = rhc)
plot(rocPsModel, legacy.axes = TRUE)

plot of chunk unnamed-chunk-6


Call:
roc.formula(formula = swang1 ~ ps, data = rhc)

Data: ps in 3551 controls (swang1 No RHC) < 2184 cases (swang1 RHC).
Area under the curve: 0.798

Plot PS distribution by treatment groups

## Define a function to create density data (x = ps, y = density) frame given weights
CreateDataset <- function(data = rhc, ps = "ps", weights = "one") {
    ## Density for overall cohort
    dfDataAll <- suppressWarnings(density(x = data[,ps], weights = data[,weights]))
    dfDataAll <- data.frame(dfDataAll[c("x","y")])
    dfDataAll$group <- "All"

    ## Density for RHC
    dfDataRhc <- suppressWarnings(density(x = subset(data, swang1 == "RHC")[,ps], weights = subset(data, swang1 == "RHC")[,weights]))
    dfDataRhc <- data.frame(dfDataRhc[c("x","y")])
    dfDataRhc$group <- "RHC"

    ## Density for No RHC
    dfDataNoRhc <- suppressWarnings(density(x = subset(data, swang1 == "No RHC")[,ps], weights = subset(data, swang1 != "RHC")[,weights]))
    dfDataNoRhc <- data.frame(dfDataNoRhc[c("x","y")])
    dfDataNoRhc$group <- "No RHC"

    ## combine datasets
    dfData <- rbind(dfDataAll, dfDataRhc, dfDataNoRhc)

    ## Return
    dfData
}

## Create dataset with weight = 1 for every subject
dfData <- CreateDataset(data = rhc, ps = "ps", weights = "one")

## Framewrok only (no layer)
plotBase <- ggplot(data = dfData,
                   mapping = aes(x = x, y = y, color = group)) +
    scale_x_continuous(limit = c(-0.1,1.1), breaks = c(0,0.5,1.0)) +
    scale_y_continuous(breaks = NULL) +
    ## scale_color_manual(values = c("All" = "black", "RHC" = "red", "No RHC" = "blue")) +
    scale_color_manual(values = c("All" = "#999999", "RHC" = "#D55E00", "No RHC" = "#0072B2")) +
    labs(title = "PS distribution by treatment", x = "Propensity score", y = "Distribution") +
    theme_bw() +
    theme(legend.key = element_blank())

## Plot with total
plotOrig <- layer(geom = "line")
plotBase + plotOrig

plot of chunk unnamed-chunk-7


## Plot like mirror image
dfDataMirror <- dfData
dfDataMirror <- subset(dfDataMirror, group != "All")
dfDataMirror <- within(dfDataMirror, {
    y[group != "RHC"] <- (y * -1)[group != "RHC"]
 })
plotMirror <- layer(geom = "line", data = dfDataMirror)
plotBase + plotMirror + geom_hline(yintercept = 0)

plot of chunk unnamed-chunk-7


## Add total distribution both above and below (y = -y)
plotTotalPos <- layer(geom = "line", data = subset(dfData, group == "All"), size = 0.1)
plotTotalNeg <- layer(geom = "line", data = subset(dfData, group == "All"), mapping = aes(y = -y), size = 0.1)
plotMirrorTotal <- plotBase + plotMirror + geom_hline(yintercept = 0) + plotTotalPos + plotTotalNeg
plotMirrorTotal

plot of chunk unnamed-chunk-7


## Flip
plotMirrorTotal + coord_flip()

plot of chunk unnamed-chunk-7

Visual illustration of PS uses

Stratification

Estimate the effect within each propensity score quintile. The exposed and the unexposed within each quintile are considered exchangeable.

Pros:
Relatively simple in terms of what is done
Any effect measure can be used (risk difference, risk ratio, odds ratio)
Effect modification can be studied
Cons:
Are the strata narrow enough?
The first stratum has too few exposed, the last stratum has too few unexposed (positivity violation).

## Check quintiles
quintilePoints <- quantile(x = rhc$ps, probs = seq(from = 0.2, to = 0.8, by = 0.2))
quintilePoints <- c(0,quintilePoints,1)

## Plot the idea
plotBase + plotOrig + geom_vline(xintercept = quintilePoints) +
    labs(title = "Stratification into quintiles")

plot of chunk unnamed-chunk-8


## Plot the idea (Mirror image)
plotMirrorTotal + coord_flip()+ geom_vline(xintercept = quintilePoints) +
    labs(title = "Stratification into quintiles")

plot of chunk unnamed-chunk-8


## Add quintile variable
rhc$psQunintile <- cut(x = rhc$ps, breaks = quintilePoints, labels = paste0("q",1:5))

## Perform unadjusted analysis in each quintile
glmQuint <- dlply(.data = rhc,
                  .variables = c("psQunintile"),
                  .drop = TRUE, .parallel = TRUE,
                  .fun = function(DF) {

                      ## Perform
                      glmWithinQuint <- glm(formula = death ~ swang1,
                                            family  = binomial(link = "logit"),
                                            data    = DF)
                      ## Return
                      glmWithinQuint
                  })
## Create GetConfIn objects in each quintile
resGlmQuint <- lapply(glmQuint,
                      FUN = function(model) {

                          ShowRegTable(model, printToggle = FALSE)[2, , drop = F]
                      })
## Combine
resGlmQuint <- do.call(rbind, resGlmQuint)
## Name
rownames(resGlmQuint) <- paste0("q", 1:5, ":  ", rownames(resGlmQuint))
## Show
print(resGlmQuint, quote = F)

               exp(beta) [confint] p     
q1:  swang1RHC 1.37 [0.83, 2.33]    0.234
q2:  swang1RHC 1.50 [1.10, 2.06]    0.011
q3:  swang1RHC 1.28 [0.99, 1.66]    0.056
q4:  swang1RHC 1.39 [1.09, 1.78]    0.009
q5:  swang1RHC 1.10 [0.83, 1.45]    0.502

Adjustment

The propensity score is included in the outcome model. It is simple, but the relationship between the PS and the outcome must be in the correct functional form. It is basically a stratification method with additional assumptions and fine strata. If propensity score is modeled into quintiles and the PS quintile * exposure interaction term is included, it is equivalent to stratification in the previous sections.

Pros:
Relatively simple in terms of software use
Cons:
Effect measure is limited by the model, i.e., mostly odds ratio.
Only conditional effect is obtained. Conditional odds ratio is not comparable to marginal odds ratio in RCTs. Non-collapsibility issue.
Need to model the functional form between the outcome. Is it linear, quadratic, etc?
Effect modification needs explicit modeling if suspected (PS quantile * Exposure interaction term). Otherwise, no effect modification is assumed.

## Plot
plotBase + plotOrig + geom_vline(xintercept = seq(from = 0, to = 1, by = 0.05)) +
    labs(title = "Adjustment (stratification with a model)")

plot of chunk unnamed-chunk-9


## Plot (Mirror image)
plotMirrorTotal + coord_flip() + geom_vline(xintercept = seq(from = 0, to = 1, by = 0.05)) +
    labs(title = "Adjustment (stratification with a model)")

plot of chunk unnamed-chunk-9


## Perform PS-adjusted logistic regression (linear PS)
glmPsAdjLinear <- glm(formula = death ~ swang1 + ps,
                      family  = binomial(link = "logit"),
                      data    = rhc)
ShowRegTable(glmPsAdjLinear)

            exp(beta) [confint] p     
(Intercept) 1.77 [1.60, 1.95]   <0.001
swang1RHC   1.30 [1.14, 1.48]   <0.001
ps          0.87 [0.67, 1.12]    0.287


## Perform PS-adjusted logistic regression (quadratic PS)
glmPsAdjQuad <- glm(formula = death ~ swang1 + ps + I(ps^2),
                    family  = binomial(link = "logit"),
                    data    = rhc)
ShowRegTable(glmPsAdjQuad)

            exp(beta) [confint] p     
(Intercept) 1.74 [1.50, 2.01]   <0.001
swang1RHC   1.30 [1.14, 1.48]   <0.001
ps          0.99 [0.43, 2.25]    0.978
I(ps^2)     0.86 [0.34, 2.18]    0.750


## Perform PS-adjusted logistic regression (PS quintiles)
glmPsAdjQuintile <- glm(formula = death ~ swang1 + psQunintile,
                        family  = binomial(link = "logit"),
                        data    = rhc)
ShowRegTable(glmPsAdjQuintile)

              exp(beta) [confint] p     
(Intercept)   1.73 [1.54, 1.96]   <0.001
swang1RHC     1.31 [1.15, 1.49]   <0.001
psQunintileq2 0.99 [0.84, 1.18]    0.934
psQunintileq3 0.95 [0.80, 1.13]    0.570
psQunintileq4 1.03 [0.86, 1.24]    0.712
psQunintileq5 0.85 [0.70, 1.03]    0.107


## Perform PS-adjusted logistic regression (PS quintiles with interaction)
glmPsAdjQuintileInt <- glm(formula = death ~ swang1 + psQunintile + swang1:psQunintile,
                           family  = binomial(link = "logit"),
                           data    = rhc)
ShowRegTable(glmPsAdjQuintileInt)

                        exp(beta) [confint] p     
(Intercept)             1.73 [1.53, 1.96]   <0.001
swang1RHC               1.37 [0.83, 2.33]    0.234
psQunintileq2           0.97 [0.81, 1.17]    0.745
psQunintileq3           0.96 [0.79, 1.17]    0.678
psQunintileq4           1.01 [0.81, 1.25]    0.954
psQunintileq5           0.97 [0.74, 1.28]    0.851
swang1RHC:psQunintileq2 1.10 [0.59, 1.98]    0.763
swang1RHC:psQunintileq3 0.94 [0.52, 1.65]    0.829
swang1RHC:psQunintileq4 1.02 [0.57, 1.78]    0.947
swang1RHC:psQunintileq5 0.81 [0.44, 1.43]    0.468


## Perform PS-adjusted logistic regression (PS decile)
## Create decile category variable
decilePoints <- quantile(x = rhc$ps, probs = seq(from = 0.1, to = 0.9, by = 0.1))
decilePoints <- c(0,decilePoints,1)
rhc$psDecile <- cut(x = rhc$ps, breaks = decilePoints, labels = paste0("q",1:10))
## Perform PS-adjusted logistic regression (PS deciles)
glmPsAdjDecile <- glm(formula = death ~ swang1 + psDecile,
                      family  = binomial(link = "logit"),
                      data    = rhc)
ShowRegTable(glmPsAdjDecile)

            exp(beta) [confint] p     
(Intercept) 1.79 [1.51, 2.12]   <0.001
swang1RHC   1.31 [1.15, 1.49]   <0.001
psDecileq2  0.94 [0.74, 1.20]    0.633
psDecileq3  0.98 [0.77, 1.25]    0.887
psDecileq4  0.95 [0.74, 1.21]    0.653
psDecileq5  0.89 [0.70, 1.13]    0.345
psDecileq6  0.96 [0.75, 1.23]    0.740
psDecileq7  0.99 [0.77, 1.27]    0.948
psDecileq8  1.02 [0.79, 1.31]    0.887
psDecileq9  0.83 [0.64, 1.07]    0.144
psDecileq10 0.83 [0.64, 1.08]    0.169


## Perform PS-adjusted logistic regression (PS deciles with interaction)
glmPsAdjDecileInt <- glm(formula = death ~ swang1 + psDecile + swang1:psDecile,
                         family  = binomial(link = "logit"),
                         data    = rhc)
ShowRegTable(glmPsAdjDecileInt)

                      exp(beta) [confint] p     
(Intercept)           1.83 [1.54, 2.18]   <0.001
swang1RHC             0.62 [0.23, 1.66]    0.326
psDecileq2            0.89 [0.70, 1.15]    0.374
psDecileq3            0.94 [0.73, 1.22]    0.646
psDecileq4            0.89 [0.69, 1.16]    0.400
psDecileq5            0.91 [0.69, 1.19]    0.485
psDecileq6            0.91 [0.69, 1.20]    0.500
psDecileq7            0.88 [0.66, 1.18]    0.390
psDecileq8            1.05 [0.77, 1.43]    0.779
psDecileq9            0.93 [0.66, 1.31]    0.670
psDecileq10           0.91 [0.59, 1.44]    0.687
swang1RHC:psDecileq2  3.06 [0.95, 9.82]    0.058
swang1RHC:psDecileq3  2.39 [0.80, 7.05]    0.111
swang1RHC:psDecileq4  2.48 [0.85, 7.20]    0.091
swang1RHC:psDecileq5  1.85 [0.64, 5.25]    0.242
swang1RHC:psDecileq6  2.32 [0.81, 6.55]    0.111
swang1RHC:psDecileq7  2.63 [0.92, 7.42]    0.066
swang1RHC:psDecileq8  1.94 [0.68, 5.47]    0.207
swang1RHC:psDecileq9  1.73 [0.60, 4.89]    0.300
swang1RHC:psDecileq10 1.85 [0.62, 5.41]    0.260

Matching

The exposed and the unexposed are matched based on their propensity scores, creating an RCT like cohort of exchangeable exposed and unexposed.

Pros:
RCT-like cohort of exchangeable exposed and unexposed.
Those who are likely to receive either treatment are compared, i.e., equipois, another RCT-like feature.
Any effect measure can be used (risk difference, risk ratio, odds ratio)
The result is marginal, yet another RCT-like feature.
Effect modification can be studied if you want to.
Cons:
Those who do not match drop out of the study (This is also a good thing as stated above).
No consensus on the matching procedure.
R-specific: nonrandom::ps.match() is simple, but slow. Matching::Match() is fast, but just return match IDs?

nonrandom package

## Propensity score estimation (pscore object returned)
psObj <- pscore(formula     = swang1 ~ age + sex + race + edu + income + ninsclas + cat1 + das2d3pc + dnr1 + ca + surv2md1 + aps1 + scoma1 + wtkilo1 + temp1 + meanbp1 + resp1 + hrt1 + pafi1 + paco21 + ph1 + wblc1 + hema1 + sod1 + pot1 + crea1 + bili1 + alb1 + resp + card + neuro + gastr + renal + meta + hema + seps + trauma + ortho + cardiohx + chfhx + dementhx + psychhx + chrpulhx + renalhx + liverhx + gibledhx + malighx + immunhx + transhx + amihx,
                data        = rhc,
                family      = "binomial",
                name.pscore = "pscore")

## Define a function to match and show
MatchAndPlot <- function(x = 0.05) {

    ## Matching 1:1 (caliper x * sd(logit(PS)))
    psMatchObj <- ps.match(object             = psObj,
                           who.treated        = "RHC",
                           ratio              = 1,              # default 1:1 match
                           caliper            = "logit",        # caliper = x * SD(logit(PS))
                           x                  = x               # x for x * SD(PS)
                           )
    ## Show results
    print(psMatchObj)

    ## Extract density
    dfDataMatched <- CreateDataset(data = psMatchObj$data.matched, ps = "ps", weights = "one")

    ## Plot matched
    plotMatched <- layer(geom = "line", data = dfDataMatched, size = 2)
    plotObj <- plotBase + plotOrig + plotMatched +
        labs(title = paste("nonrandom::ps.match() Caliper", x,"* SD(logit(PS))"))

    ## Show plot
    print(plotObj)

    ## Show results
    glmMatched <- glm(formula = death ~ swang1,
                      family  = binomial(link = "logit"),
                      data    = psMatchObj$data.matched)

    ## Print result
    ShowRegTable(glmMatched)
}

## caliper 0.2 * sd(logit(PS)): Most common setting.
MatchAndPlot(x = 0.2)


 Matched by:  pscore 

 Matching parameter:

Caliper size:    "0.279"
Ratio:           "1"    
Who is treated?: "RHC"  

 Matching information:

Untreated to treated?: TRUE
Best match?:           TRUE

 Matching data:

Number of treated obs.:             2184
Number of matched treated obs.:     2184
Number of untreated obs.:           3551
Number of matched untreated obs.:   2184
Number of total matched obs.:       4368
Number of not matched obs.:         1367
Number of matching sets:            2184
Number of incomplete matching sets:    0

plot of chunk unnamed-chunk-10

            exp(beta) [confint] p     
(Intercept) 1.69 [1.55, 1.85]   <0.001
swang1RHC   1.26 [1.11, 1.43]   <0.001

## caliper 0.1 * sd(logit(PS))
MatchAndPlot(x = 0.1)


 Matched by:  pscore 

 Matching parameter:

Caliper size:    "0.139"
Ratio:           "1"    
Who is treated?: "RHC"  

 Matching information:

Untreated to treated?: TRUE
Best match?:           TRUE

 Matching data:

Number of treated obs.:             2184
Number of matched treated obs.:     1969
Number of untreated obs.:           3551
Number of matched untreated obs.:   1969
Number of total matched obs.:       3938
Number of not matched obs.:         1797
Number of matching sets:            1969
Number of incomplete matching sets:    0

plot of chunk unnamed-chunk-10

            exp(beta) [confint] p     
(Intercept) 1.69 [1.55, 1.86]   <0.001
swang1RHC   1.32 [1.15, 1.50]   <0.001

## caliper 0.05 * sd(logit(PS))
MatchAndPlot(x = 0.05)


 Matched by:  pscore 

 Matching parameter:

Caliper size:    "0.07"
Ratio:           "1"   
Who is treated?: "RHC" 

 Matching information:

Untreated to treated?: TRUE
Best match?:           TRUE

 Matching data:

Number of treated obs.:             2184
Number of matched treated obs.:     1729
Number of untreated obs.:           3551
Number of matched untreated obs.:   1729
Number of total matched obs.:       3458
Number of not matched obs.:         2277
Number of matching sets:            1729
Number of incomplete matching sets:    0

plot of chunk unnamed-chunk-10

            exp(beta) [confint] p     
(Intercept) 1.72 [1.56, 1.90]   <0.001
swang1RHC   1.28 [1.11, 1.47]    0.001

Matching package

## Define similarly to Matching::Match()
MatchAndPlot2 <- function(x = 0.05, returnData = FALSE) {

    ## Match by Matching::Match()
    listMatch <- Match(Tr       = (rhc$swang1 == "RHC"),      # Need to be in 0,1
                       X        = log(rhc$ps / (1 - rhc$ps)), # logit of PS,i.e., log(PS/(1-PS)) as matching variable
                       M        = 1,                          # 1:1 match
                       caliper  = x,                          # caliper = x * SD(each matching variable)
                       replace  = FALSE,
                       ties     = TRUE,
                       version  = "fast")

    ## Show number matched
    ## print(str(listMatch[c("index.treated","index.control")]))
    print(summary(listMatch))

    ## Extract matched data
    psMatchData <- rhc[unlist(listMatch[c("index.treated","index.control")]), ]

    ## Extract density
    dfDataMatched <- CreateDataset(data = psMatchData, ps = "ps", weights = "one")

    ## Plot matched
    plotMatched <- layer(geom = "line", data = dfDataMatched, size = 2)
    plotObj <- plotBase + plotOrig + plotMatched +
        labs(title = paste("Matching::Match() Caliper", x,"* SD(logit(PS))"))

    ## Show plot
    print(plotObj)

    ## Show results
    glmMatched <- glm(formula = death ~ swang1,
                      family  = binomial(link = "logit"),
                      data    = psMatchData)
    ShowRegTable(glmMatched)

    ## Return dataset for plotting
    if(returnData == TRUE) {
        return(dfDataMatched)
    }
}

## caliper 0.2 * sd(logit(PS)): Most common setting.
## MatchAndPlot2(x = 0.2)
dfDataMatched <- MatchAndPlot2(x = 0.2, returnData = TRUE)


Estimate...  0 
SE.........  0 
T-stat.....  NaN 
p.val......  NA 

Original number of observations..............  5735 
Original number of treated obs...............  2184 
Matched number of observations...............  1563 
Matched number of observations  (unweighted).  1563 

Caliper (SDs)........................................   0.2 
Number of obs dropped by 'exact' or 'caliper'  621

plot of chunk unnamed-chunk-11

            exp(beta) [confint] p     
(Intercept) 1.71 [1.54, 1.89]   <0.001
swang1RHC   1.32 [1.13, 1.53]   <0.001

## caliper 0.1 * sd(logit(PS))
MatchAndPlot2(x = 0.1)


Estimate...  0 
SE.........  0 
T-stat.....  NaN 
p.val......  NA 

Original number of observations..............  5735 
Original number of treated obs...............  2184 
Matched number of observations...............  1546 
Matched number of observations  (unweighted).  1546 

Caliper (SDs)........................................   0.1 
Number of obs dropped by 'exact' or 'caliper'  638

plot of chunk unnamed-chunk-11

            exp(beta) [confint] p     
(Intercept) 1.77 [1.60, 1.97]   <0.001
swang1RHC   1.28 [1.10, 1.49]    0.001

## caliper 0.05 * sd(logit(PS))
MatchAndPlot2(x = 0.05)


Estimate...  0 
SE.........  0 
T-stat.....  NaN 
p.val......  NA 

Original number of observations..............  5735 
Original number of treated obs...............  2184 
Matched number of observations...............  1538 
Matched number of observations  (unweighted).  1538 

Caliper (SDs)........................................   0.05 
Number of obs dropped by 'exact' or 'caliper'  646

plot of chunk unnamed-chunk-11

            exp(beta) [confint] p     
(Intercept) 1.72 [1.55, 1.91]   <0.001
swang1RHC   1.32 [1.13, 1.53]   <0.001


## Change data for mirror plot
dfDataMatched <- subset(dfDataMatched, group != "All")
dfDataMatched$y[dfDataMatched$group != "RHC"] <- -1 * dfDataMatched$y[dfDataMatched$group != "RHC"]
## Create 
plotMatched <- layer(geom = "line", data = dfDataMatched, size = 2)
## Plot
plotMirrorTotal + plotMatched + coord_flip() +
    labs(title = "PS Matching")

plot of chunk unnamed-chunk-11

Weighting

There is confounding when the exposed and the unexposed have different distributions of propensity score. These distributions can be re-weighted to have the identical distribution. Inverse probability of treatment weight (IPTW) and standardized mortality ratio weight (SMRW) are the most common ones.

Pros:
Mathematically simple, and no arbitariness of matching algorithms.
Do not explicitly drop people unlike matching.
IPTW gives the average treatment effect (ATE) in the population, i.e., the causal effect estimate if the whole cohort underwent treatment.
SMRW gives the average treatment effect in the treated (ATT), i.e., the causal effect estimate in the treated subcohort. Also it is possible to do the opposite, the average treatment effect in the control (ATC).
The result is marginal.
Effect modification can be studied if you want to.
Cons:
People with unusual treatment assignment will get unusually high weights. For example, unexposed with high PS and exposed with low PS are up-weighted to meet the distribution of the whole cohort.

Create weights

rhc <- within(rhc, {

    ## Create inverse probability of treatment assignment weights (IPTW)
    iptw <- NA
    ## 1/PS for treated
    iptw[swang1 == "RHC"] <- 1/ps[swang1 == "RHC"]
    ## 1/(1-PS) for untreated (1/probability of not treated)
    iptw[swang1 != "RHC"] <- (1/(1 - ps))[swang1 != "RHC"]

    ## Create SMR (Average Treatment effect in Treated, ATT) weights
    smrw <- NA
    ## 1 for treated
    smrw[swang1 == "RHC"] <- 1
    ## PS/(1-PS), i.e., odds of treatment for untreated
    smrw[swang1 != "RHC"] <- (ps/(1 - ps))[swang1 != "RHC"]

    ## Numerator weight for stabilization
    probRhc <- NA
    ## P(RHC) for treated
    probRhc[swang1 == "RHC"] <- sum(swang1 == "RHC")/length(swang1)
    ## P(No RHC) for untreated
    probRhc[swang1 != "RHC"] <- sum(swang1 != "RHC")/length(swang1)

    ## Create IPTW stabilized with marginal probabilities of assigned treatment
    iptwStab <- iptw * probRhc

    ## Create matching weights
    matchWeightNumerator <- pmin(ps, 1 - ps)
    matchWeight <- matchWeightNumerator * iptw
 })

## Check weights
## iptw has a mean of 2
## stabilized iptw has a mean of 1
summary(rhc[,c("iptw","iptwStab","smrw","matchWeight")])

      iptw          iptwStab           smrw         matchWeight    
 Min.   : 1.00   Min.   : 0.389   Min.   : 0.003   Min.   :0.0027  
 1st Qu.: 1.18   1st Qu.: 0.657   1st Qu.: 0.226   1st Qu.:0.1833  
 Median : 1.47   Median : 0.780   Median : 0.915   Median :0.4728  
 Mean   : 1.98   Mean   : 0.993   Mean   : 0.771   Mean   :0.5306  
 3rd Qu.: 2.08   3rd Qu.: 1.055   3rd Qu.: 1.000   3rd Qu.:1.0000  
 Max.   :52.52   Max.   :20.001   Max.   :24.488   Max.   :1.0000

IPTW

\( W = 1/PS \) for the treated.
\( W = 1/(1-PS) \) for the untreated.
A pseudo-population twice as large as the original cohort is created with well-balanced treated and untreated.
The causal effect in the whole cohort. Average treatment effect (ATE).

## Plot IPTW
dfDataIptw <- CreateDataset(data = rhc, ps = "ps", weights = "iptw")
plotIptw <- layer(geom = "line", data = subset(dfDataIptw, group != "All"), size = 2)
plotBase + plotOrig + plotIptw + labs(title = "IPTW")

plot of chunk unnamed-chunk-13


## Plot IPTW (Mirror image)
dfDataIptwMirror <- subset(dfDataIptw, group != "All")
dfDataIptwMirror$y[dfDataIptwMirror$group != "RHC"] <- -1 * dfDataIptwMirror$y[dfDataIptwMirror$group != "RHC"]
plotIptwMirror <- layer(geom = "line", data = dfDataIptwMirror, size = 2)
plotMirrorTotal + coord_flip() + plotIptwMirror + labs(title = "IPTW")

plot of chunk unnamed-chunk-13



## IPTW analysis
glmIptw <- glm(formula = death ~ swang1,
               family  = binomial(link = "logit"),
               data    = rhc,
               weights = iptw)
## The regular SE and confidence intervals are not valid
ShowRegTable(glmIptw)

            exp(beta) [confint] p     
(Intercept) 1.68 [1.60, 1.78]   <0.001
swang1RHC   1.27 [1.18, 1.37]   <0.001

## Robust variance estimator using lmtest::coeftest() using sandwich::sandwich()
GetConfInt(coeftest(glmIptw, vcov = sandwich))

               OR lower upper z value  Pr(>|z|)
(Intercept) 1.685 1.548 1.833  12.105 9.890e-34
swang1RHC   1.272 1.089 1.487   3.029 2.454e-03

## Robust variance estimator using geepack::geeglm()
geeglmIptw <- geeglm(formula   = as.numeric(death == "Yes") ~ swang1,
                     family    = binomial(link = "logit"),
                     id        = X,
                     data      = rhc,
                     weights   = iptw,
                     corstr    = "exchangeable"
                     )
GetConfInt(geeglmIptw)

               OR lower upper    Wald Pr(>|W|)
(Intercept) 1.685 1.548 1.833 146.540 0.000000
swang1RHC   1.272 1.089 1.487   9.174 0.002454

Stabilized IPTW

\( W = P \) (treated in original cohort) \( * 1/PS \) for the treated.
\( W = P \) (untreated in original cohort) \( * 1/(1-PS) \) for the untreated.
A pseudo-population as large as the original cohort is created with the original treated/untreated ratio.

## Plot IPTW stabilized with marginal probabilities of assigned treatment
dfDataIptwStab <- CreateDataset(data = rhc, ps = "ps", weights = "iptwStab")
plotIptwStab <- layer(geom = "line", data = dfDataIptwStab, size = 2)
plotBase + plotOrig + plotIptwStab + labs(title = "Stabilized IPTW")

plot of chunk unnamed-chunk-14


## Plot stabilized IPTW (Mirror image)
dfDataIptwStabMirror <- subset(dfDataIptwStab, group != "All")
dfDataIptwStabMirror$y[dfDataIptwStabMirror$group != "RHC"] <- -1 * dfDataIptwStabMirror$y[dfDataIptwStabMirror$group != "RHC"]
plotIptwStabMirror <- layer(geom = "line", data = dfDataIptwStabMirror, size = 2)
plotMirrorTotal + coord_flip() + plotIptwStabMirror + labs(title = "Stabilized IPTW")

plot of chunk unnamed-chunk-14



## Stabilized IPTW analysis
glmIptwStab <- glm(formula = death ~ swang1,
                   family  = binomial(link = "logit"),
                   data    = rhc,
                   weights = iptwStab)
## The regular SE and confidence intervals are not valid
ShowRegTable(glmIptwStab)

            exp(beta) [confint] p     
(Intercept) 1.68 [1.57, 1.80]   <0.001
swang1RHC   1.27 [1.14, 1.43]   <0.001

ShowRegTable(glmIptw)

            exp(beta) [confint] p     
(Intercept) 1.68 [1.60, 1.78]   <0.001
swang1RHC   1.27 [1.18, 1.37]   <0.001

## Robust variance estimator using lmtest::coeftest() using sandwich::sandwich()
GetConfInt(coeftest(glmIptwStab, vcov = sandwich))

               OR lower upper z value  Pr(>|z|)
(Intercept) 1.685 1.548 1.833  12.105 9.890e-34
swang1RHC   1.272 1.089 1.487   3.029 2.454e-03

## Robust variance estimator using geepack::geeglm()
geeglmIptwStab <- geeglm(formula   = as.numeric(death == "Yes") ~ swang1,
                         family    = binomial(link = "logit"),
                         id        = X,
                         data      = rhc,
                         weights   = iptwStab,
                         corstr    = "exchangeable"
                         )
GetConfInt(geeglmIptwStab)

               OR lower upper    Wald Pr(>|W|)
(Intercept) 1.685 1.548 1.833 146.540 0.000000
swang1RHC   1.272 1.089 1.487   9.174 0.002454

SMRW

\( W = 1 \) for the treated. (Size of treated maintained.)
\( W = PS * 1/(1-PS) \) for the untreated. (Up-weighted to the whole cohort, then down-weighted to the treated size)
A pseudo-population twice as large as the original treated is created with well-balanced treated and untreated.
The causal effect in the treated subset. Average treatment effect in the treated (ATT).

## Plot SMRW
dfDataSmrw <- CreateDataset(data = rhc, ps = "ps", weights = "smrw")
plotSmrw <- layer(geom = "line", data = dfDataSmrw, size = 2)
plotBase + plotOrig + plotSmrw + labs(title = "SMRW")


## Plot SMRW (Mirror image)
dfDataSmrwMirror <- subset(dfDataSmrw, group != "All")
dfDataSmrwMirror$y[dfDataSmrwMirror$group != "RHC"] <- -1 * dfDataSmrwMirror$y[dfDataSmrwMirror$group != "RHC"]
plotSmrwMirror <- layer(geom = "line", data = dfDataSmrwMirror, size = 2)
plotMirrorTotal + coord_flip() + plotSmrwMirror + labs(title = "SMRW")

plot of chunk unnamed-chunk-15


## SMRW analysis
glmSmrw <- glm(formula = death ~ swang1,
               family  = binomial(link = "logit"),
               data    = rhc,
               weights = smrw)
## The regular SE and confidence intervals are not valid
ShowRegTable(glmSmrw)

            exp(beta) [confint] p     
(Intercept) 1.66 [1.53, 1.81]   <0.001
swang1RHC   1.28 [1.13, 1.45]   <0.001

## Robust variance estimator using lmtest::coeftest() using sandwich::sandwich()
GetConfInt(coeftest(glmSmrw, vcov = sandwich))

               OR lower upper z value  Pr(>|z|)
(Intercept) 1.661 1.436 1.921   6.829 8.527e-12
swang1RHC   1.282 1.080 1.521   2.844 4.456e-03

## Robust variance estimator using geepack::geeglm()
geeglmSmrw <- geeglm(formula   = as.numeric(death == "Yes") ~ swang1,
                     family    = binomial(link = "logit"),
                     id        = X,
                     data      = rhc,
                     weights   = smrw,
                     corstr    = "exchangeable"
                     )
GetConfInt(geeglmSmrw)

               OR lower upper   Wald  Pr(>|W|)
(Intercept) 1.661 1.436 1.921 46.641 8.527e-12
swang1RHC   1.282 1.080 1.521  8.088 4.456e-03

Matching weights

\( W = \) min \( (PS, 1-PS) * 1/PS \) for the treated.
\( W = \) min \( (PS, 1-PS) * 1/(1-PS) \) for the untreated.
The same in effect as the matching method.

##  Plot matching weights
dfDataMatchWeight <- CreateDataset(data = rhc, ps = "ps", weights = "matchWeight")
plotMatchWeight <- layer(geom = "line", data = dfDataMatchWeight, size = 2)
plotBase + plotOrig + plotMatchWeight + labs(title = "Matching weights")

plot of chunk unnamed-chunk-16


## Plot matching weights (Mirror image)
dfDataMatchWeightMirror <- subset(dfDataMatchWeight, group != "All")
dfDataMatchWeightMirror$y[dfDataMatchWeightMirror$group != "RHC"] <- -1 * dfDataMatchWeightMirror$y[dfDataMatchWeightMirror$group != "RHC"]
plotMatchWeightMirror <- layer(geom = "line", data = dfDataMatchWeightMirror, size = 2)
plotMirrorTotal + coord_flip() + plotMatchWeightMirror + labs(title = "Matching weights")

plot of chunk unnamed-chunk-16


## Matching weight analysis
glmMatchWeight <- glm(formula = death ~ swang1,
                      family  = binomial(link = "logit"),
                      data    = rhc,
                      weights = matchWeight)
## Robust variance estimator using lmtest::coeftest() using sandwich::sandwich()
GetConfInt(coeftest(glmMatchWeight, vcov = sandwich))

               OR lower upper z value  Pr(>|z|)
(Intercept) 1.698 1.555 1.853  11.820 3.077e-32
swang1RHC   1.307 1.145 1.493   3.961 7.466e-05

## Robust variance estimator using geepack::geeglm()
geeglmMatchWeight <- geeglm(formula   = as.numeric(death == "Yes") ~ swang1,
                            family    = binomial(link = "logit"),
                            id        = X,
                            data      = rhc,
                            weights   = matchWeight,
                            corstr    = "exchangeable"
                            )
GetConfInt(geeglmMatchWeight)

               OR lower upper   Wald   Pr(>|W|)
(Intercept) 1.698 1.555 1.853 139.71 0.00000000
swang1RHC   1.307 1.145 1.493  15.69 0.00007466

Summarize results

Table of Contents: http://rpubs.com/kaz_yos/toc
Table of Contents (Japanese): http://rpubs.com/kaz_yos/toc-j
Top Page: http://rpubs.com/kaz_yos/
SlideShare: http://www.slideshare.net/kaz_yos