How To Use Propensity Score Analysis: http://www.mc.vanderbilt.edu/crc/workshop_files/2008-04-11.pdf
Vanderbilt Biostatistics Wiki>Main Web>DataSets: http://biostat.mc.vanderbilt.edu/twiki/bin/view/Main/DataSets
Illustration of PS uses: http://rpubs.com/kaz_yos/use-ps
Propensity score demos: http://rpubs.com/kaz_yos/epi271-lab3
AHRQ Summary Variables in Observational Research: Propensity Scores and Disease Risk Scores: http://effectivehealthcare.ahrq.gov/index.cfm/search-for-guides-reviews-and-reports/?productid=1084&pageaction=displayproduct (quotes are from this article).
Colors in ggplot2: http://www.cookbook-r.com/Graphs/Colors_(ggplot2)/
## plotting
library(ggplot2)
## ROC
library(pROC)
## Load nonrandom for PS matching
library(nonrandom)
## Load Matching for PS matching
library(Matching)
## Load sandwich package for robust sandwich covariance matrix estimators
library(sandwich)
## Load lmtest package for coeftest
library(lmtest)
## Load geepack for robust sandwich covariance matrix estimators
library(geepack)
## Right heart cath dataset
rhc <- read.csv("http://biostat.mc.vanderbilt.edu/wiki/pub/Main/DataSets/rhc.csv")
## first 6 rows
head(rhc)
X cat1 cat2 ca sadmdte dschdte dthdte lstctdte death cardiohx chfhx dementhx psychhx
1 1 COPD <NA> Yes 11142 11151 NA 11382 No 0 0 0 0
2 2 MOSF w/Sepsis <NA> No 11799 11844 11844 11844 Yes 1 1 0 0
3 3 MOSF w/Malignancy MOSF w/Sepsis Yes 12083 12143 NA 12400 No 0 0 0 0
4 4 ARF <NA> No 11146 11183 11183 11182 Yes 0 0 0 0
5 5 MOSF w/Sepsis <NA> No 12035 12037 12037 12036 Yes 0 0 0 0
6 6 COPD <NA> No 12389 12396 NA 12590 No 0 1 0 0
chrpulhx renalhx liverhx gibledhx malighx immunhx transhx amihx age sex edu surv2md1 das2d3pc t3d30 dth30
1 1 0 0 0 1 0 0 0 70.25 Male 12.000 0.641 23.50 30 No
2 0 0 0 0 0 1 1 0 78.18 Female 12.000 0.755 14.75 30 No
3 0 0 0 0 1 1 0 0 46.09 Female 14.070 0.317 18.14 30 No
4 0 0 0 0 0 1 0 0 75.33 Female 9.000 0.441 22.93 30 No
5 0 0 0 0 0 0 0 0 67.91 Male 9.945 0.437 21.05 2 Yes
6 1 0 0 0 0 0 0 0 86.08 Female 8.000 0.665 17.50 30 No
aps1 scoma1 meanbp1 wblc1 hrt1 resp1 temp1 pafi1 alb1 hema1 bili1 crea1 sod1 pot1 paco21 ph1 swang1 wtkilo1
1 46 0 41 22.10 124 10 38.7 68.0 3.5 58.0 1.01 1.2 145 4.000 40 7.359 No RHC 64.7
2 50 0 63 28.90 137 38 38.9 218.3 2.6 32.5 0.70 0.6 137 3.300 34 7.329 RHC 45.7
3 82 0 57 0.05 130 40 36.4 275.5 3.5 21.1 1.01 2.6 146 2.900 16 7.359 RHC 0.0
4 48 0 55 23.30 58 26 35.8 156.7 3.5 26.3 0.40 1.7 117 5.800 30 7.460 No RHC 54.6
5 72 41 65 29.70 125 27 34.8 478.0 3.5 24.0 1.01 3.6 126 5.800 17 7.229 RHC 78.4
6 38 0 115 18.00 134 36 39.2 184.2 3.1 30.5 1.01 1.4 138 5.399 68 7.300 No RHC 54.9
dnr1 ninsclas resp card neuro gastr renal meta hema seps trauma ortho adld3p urin1 race income
1 No Medicare Yes Yes No No No No No No No No 0 NA white Under $11k
2 No Private & Medicare No No No No No No No Yes No No NA 1437 white Under $11k
3 No Private No Yes No No No No No No No No NA 599 white $25-$50k
4 No Private & Medicare Yes No No No No No No No No No NA NA white $11-$25k
5 Yes Medicare No Yes No No No No No No No No NA 64 white Under $11k
6 No Medicare Yes No No No No No No No No No 0 242 white Under $11k
ptid
1 5
2 7
3 9
4 10
5 11
6 12
## Show outcome (death) and exposure (swang1)
addmargins(table(rhc[,c("swang1", "death")]))
death
swang1 No Yes Sum
No RHC 1315 2236 3551
RHC 698 1486 2184
Sum 2013 3722 5735
## Create weights for plotting
rhc$one <- 1
## Define function to obtain ORs with 95% confidence intervals from a result matrix
GetConfInt <- function(obj) {
logitsticModel <- FALSE
if (identical(class(obj), c("glm", "lm")) == TRUE) {
mat <- coef(summary(obj))
logitsticModel <- TRUE
} else if (identical(class(obj), c("geeglm", "gee", "glm")) == TRUE) {
mat <- coef(summary(obj))
} else if (identical(class(obj), c("coeftest")) == TRUE) {
mat <- obj
} else if (identical(class(obj), c("matrix")) == TRUE) {
mat <- obj
} else {
stop("Not a supported object")
}
## Add point estimates
matRes <- mat[, 1, drop = F]
## 1.96 * SE
matSe <- mat[, 2, drop = F] * qnorm(0.975)
## Estimate, lower 95% limit, upper 95% limit
matRes <- cbind(matRes, (matRes - matSe), (matRes + matSe))
## Name
colnames(matRes) <- c("OR","lower","upper")
## Exponentiate
matRes <- exp(matRes)
## Add p-value
matRes <- cbind(matRes, mat[, 3:4, drop = F])
if (logitsticModel == TRUE) {
matRes[, c("lower","upper")] <- exp(suppressMessages(confint(obj)))
}
## Show
matRes
}
Estimate the effect within each combination of confounder values.
## Crude analysis (confounded!)
glmCrude <- glm(formula = death ~ swang1,
family = binomial(link = "logit"),
data = rhc)
GetConfInt(glmCrude)
OR lower upper z value Pr(>|z|)
(Intercept) 1.700 1.589 1.821 15.276 1.113e-52
swang1RHC 1.252 1.119 1.402 3.905 9.425e-05
## Fit a "regular" fully adjusted model
glmFull <- glm(formula = death ~ swang1 + age + sex + race + edu + income + ninsclas + cat1 + das2d3pc + dnr1 + ca + surv2md1 + aps1 + scoma1 + wtkilo1 + temp1 + meanbp1 + resp1 + hrt1 + pafi1 + paco21 + ph1 + wblc1 + hema1 + sod1 + pot1 + crea1 + bili1 + alb1 + resp + card + neuro + gastr + renal + meta + hema + seps + trauma + ortho + cardiohx + chfhx + dementhx + psychhx + chrpulhx + renalhx + liverhx + gibledhx + malighx + immunhx + transhx + amihx,
family = binomial(link = "logit"),
data = rhc)
## Show result
GetConfInt(coef(summary(glmFull)))
OR lower upper z value Pr(>|z|)
(Intercept) 167.87104 0.29673 94971.25049 1.58426 1.131e-01
swang1RHC 1.38273 1.19345 1.60204 4.31442 1.600e-05
age 1.01402 1.00815 1.01993 4.69781 2.630e-06
sexMale 1.33138 1.16651 1.51956 4.24326 2.203e-05
raceother 1.05862 0.78456 1.42840 0.37265 7.094e-01
racewhite 0.98723 0.82196 1.18573 -0.13749 8.906e-01
edu 1.00956 0.98686 1.03277 0.81987 4.123e-01
income$11-$25k 1.19981 0.91699 1.56985 1.32811 1.841e-01
income$25-$50k 1.02190 0.78072 1.33756 0.15770 8.747e-01
incomeUnder $11k 1.42425 1.09033 1.86043 2.59438 9.476e-03
ninsclasMedicare 1.42434 1.11051 1.82687 2.78539 5.346e-03
ninsclasMedicare & Medicaid 1.31466 0.95872 1.80274 1.69828 8.945e-02
ninsclasNo insurance 1.48120 1.08369 2.02451 2.46410 1.374e-02
ninsclasPrivate 1.22142 0.96864 1.54017 1.69065 9.090e-02
ninsclasPrivate & Medicare 1.09231 0.84216 1.41675 0.66538 5.058e-01
cat1CHF 1.26240 0.91746 1.73702 1.43095 1.524e-01
cat1Cirrhosis 1.13208 0.71111 1.80228 0.52292 6.010e-01
cat1Colon Cancer 0.28777 0.03871 2.13912 -1.21701 2.236e-01
cat1Coma 1.80364 1.26471 2.57223 3.25668 1.127e-03
cat1COPD 1.29154 0.94190 1.77096 1.58836 1.122e-01
cat1Lung Cancer 4.01275 1.11068 14.49757 2.12013 3.399e-02
cat1MOSF w/Malignancy 1.73908 1.12698 2.68362 2.50006 1.242e-02
cat1MOSF w/Sepsis 0.91042 0.75809 1.09336 -1.00454 3.151e-01
das2d3pc 0.95889 0.94689 0.97104 -6.53305 6.444e-11
dnr1Yes 2.31916 1.78286 3.01679 6.26929 3.627e-10
caNo 0.18449 0.05407 0.62950 -2.69906 6.954e-03
caYes 0.42786 0.27535 0.66485 -3.77509 1.600e-04
surv2md1 0.04019 0.01883 0.08578 -8.30912 9.642e-17
aps1 1.00538 0.99943 1.01136 1.77058 7.663e-02
scoma1 1.00044 0.99722 1.00366 0.26510 7.909e-01
wtkilo1 0.99736 0.99511 0.99961 -2.29640 2.165e-02
temp1 0.95026 0.91226 0.98986 -2.44955 1.430e-02
meanbp1 1.00038 0.99846 1.00231 0.39015 6.964e-01
resp1 1.00338 0.99826 1.00852 1.29310 1.960e-01
hrt1 1.00153 0.99971 1.00335 1.64830 9.929e-02
pafi1 1.00085 1.00020 1.00151 2.56012 1.046e-02
paco21 0.99823 0.99156 1.00494 -0.51798 6.045e-01
ph1 0.85616 0.39295 1.86541 -0.39084 6.959e-01
wblc1 1.00632 1.00004 1.01263 1.97373 4.841e-02
hema1 0.99130 0.98244 1.00023 -1.90896 5.627e-02
sod1 1.00111 0.99255 1.00975 0.25283 8.004e-01
pot1 1.00723 0.94178 1.07723 0.21019 8.335e-01
crea1 1.02221 0.97765 1.06880 0.96596 3.341e-01
bili1 1.03104 1.01123 1.05125 3.08768 2.017e-03
alb1 1.00139 0.91701 1.09353 0.03092 9.753e-01
respYes 1.24568 1.05427 1.47184 2.58087 9.855e-03
cardYes 1.15927 0.97681 1.37580 1.69146 9.075e-02
neuroYes 1.38910 1.08475 1.77882 2.60473 9.195e-03
gastrYes 1.34301 1.07557 1.67696 2.60294 9.243e-03
renalYes 0.84960 0.62081 1.16272 -1.01815 3.086e-01
metaYes 1.11002 0.81446 1.51283 0.66076 5.088e-01
hemaYes 2.21181 1.57649 3.10316 4.59481 4.331e-06
sepsYes 1.22869 1.01105 1.49318 2.07046 3.841e-02
traumaYes 0.87248 0.46277 1.64492 -0.42165 6.733e-01
orthoYes 1.27121 0.21965 7.35710 0.26789 7.888e-01
cardiohx 1.22679 1.01373 1.48464 2.10006 3.572e-02
chfhx 1.62536 1.31689 2.00608 4.52354 6.081e-06
dementhx 1.30321 1.03392 1.64264 2.24241 2.493e-02
psychhx 0.98048 0.76760 1.25241 -0.15783 8.746e-01
chrpulhx 1.21488 0.99444 1.48419 1.90543 5.672e-02
renalhx 1.33725 0.92444 1.93440 1.54284 1.229e-01
liverhx 1.35474 0.92463 1.98490 1.55789 1.193e-01
gibledhx 1.27017 0.80962 1.99271 1.04084 2.980e-01
malighx 0.50745 0.16687 1.54313 -1.19547 2.319e-01
immunhx 1.27160 1.09645 1.47473 3.17773 1.484e-03
transhx 0.91900 0.75992 1.11139 -0.87098 3.838e-01
amihx 0.87264 0.62159 1.22509 -0.78708 4.312e-01
## PS model
psModel <- glm(formula = swang1 ~ age + sex + race + edu + income + ninsclas + cat1 + das2d3pc + dnr1 + ca + surv2md1 + aps1 + scoma1 + wtkilo1 + temp1 + meanbp1 + resp1 + hrt1 + pafi1 + paco21 + ph1 + wblc1 + hema1 + sod1 + pot1 + crea1 + bili1 + alb1 + resp + card + neuro + gastr + renal + meta + hema + seps + trauma + ortho + cardiohx + chfhx + dementhx + psychhx + chrpulhx + renalhx + liverhx + gibledhx + malighx + immunhx + transhx + amihx,
family = binomial(link = "logit"),
data = rhc)
## PS (predicted probability of treatment)
rhc$ps <- predict(psModel, type = "response")
## PS model diagnostic by ROC
rocPsModel <- roc(swang1 ~ ps, data = rhc)
plot(rocPsModel, legacy.axes = TRUE)
Call:
roc.formula(formula = swang1 ~ ps, data = rhc)
Data: ps in 3551 controls (swang1 No RHC) < 2184 cases (swang1 RHC).
Area under the curve: 0.798
## Define a function to create density data (x = ps, y = density) frame given weights
CreateDataset <- function(data = rhc, ps = "ps", weights = "one") {
## Density for overall cohort
dfDataAll <- suppressWarnings(density(x = data[,ps], weights = data[,weights]))
dfDataAll <- data.frame(dfDataAll[c("x","y")])
dfDataAll$group <- "All"
## Density for RHC
dfDataRhc <- suppressWarnings(density(x = subset(data, swang1 == "RHC")[,ps], weights = subset(data, swang1 == "RHC")[,weights]))
dfDataRhc <- data.frame(dfDataRhc[c("x","y")])
dfDataRhc$group <- "RHC"
## Density for No RHC
dfDataNoRhc <- suppressWarnings(density(x = subset(data, swang1 == "No RHC")[,ps], weights = subset(data, swang1 != "RHC")[,weights]))
dfDataNoRhc <- data.frame(dfDataNoRhc[c("x","y")])
dfDataNoRhc$group <- "No RHC"
## combine datasets
dfData <- rbind(dfDataAll, dfDataRhc, dfDataNoRhc)
## Return
dfData
}
## Create dataset with weight = 1 for every subject
dfData <- CreateDataset(data = rhc, ps = "ps", weights = "one")
## Framewrok only (no layer)
plotBase <- ggplot(data = dfData,
mapping = aes(x = x, y = y, color = group)) +
scale_x_continuous(limit = c(-0.1,1.1), breaks = c(0,0.5,1.0)) +
scale_y_continuous(breaks = NULL) +
## scale_color_manual(values = c("All" = "black", "RHC" = "red", "No RHC" = "blue")) +
scale_color_manual(values = c("All" = "#999999", "RHC" = "#D55E00", "No RHC" = "#0072B2")) +
labs(title = "PS distribution by treatment", x = "Propensity score", y = "Distribution") +
theme_bw() +
theme(legend.key = element_blank())
## Plot with total
plotOrig <- layer(geom = "line")
plotBase + plotOrig
## Plot like mirror image
dfDataMirror <- dfData
dfDataMirror <- subset(dfDataMirror, group != "All")
dfDataMirror <- within(dfDataMirror, {
y[group != "RHC"] <- (y * -1)[group != "RHC"]
})
plotMirror <- layer(geom = "line", data = dfDataMirror)
plotBase + plotMirror + geom_hline(yintercept = 0)
## Add total distribution both above and below (y = -y)
plotTotalPos <- layer(geom = "line", data = subset(dfData, group == "All"), size = 0.1)
plotTotalNeg <- layer(geom = "line", data = subset(dfData, group == "All"), mapping = aes(y = -y), size = 0.1)
plotMirrorTotal <- plotBase + plotMirror + geom_hline(yintercept = 0) + plotTotalPos + plotTotalNeg
plotMirrorTotal
## Flip
plotMirrorTotal + coord_flip()
Estimate the effect within each propensity score quintile. The exposed and the unexposed within each quintile are considered exchangeable.
## Check quintiles
quintilePoints <- quantile(x = rhc$ps, probs = seq(from = 0.2, to = 0.8, by = 0.2))
quintilePoints <- c(0,quintilePoints,1)
## Plot the idea
plotBase + plotOrig + geom_vline(xintercept = quintilePoints) +
labs(title = "Stratification into quintiles")
## Plot the idea (Mirror image)
plotMirrorTotal + coord_flip()+ geom_vline(xintercept = quintilePoints) +
labs(title = "Stratification into quintiles")
## Add quintile variable
rhc$psQunintile <- cut(x = rhc$ps, breaks = quintilePoints, labels = paste0("q",1:5))
## Perform unadjusted analysis in each quintile
glmQuint <- dlply(.data = rhc,
.variables = c("psQunintile"),
.drop = TRUE, .parallel = TRUE,
.fun = function(DF) {
## Perform
glmWithinQuint <- glm(formula = death ~ swang1,
family = binomial(link = "logit"),
data = DF)
## Return
glmWithinQuint
})
## Create GetConfIn objects in each quintile
resGlmQuint <- lapply(glmQuint,
FUN = function(model) {
GetConfInt(model)[2, , drop = F]
})
## Combine
resGlmQuint <- do.call(rbind, resGlmQuint)
## Name
rownames(resGlmQuint) <- paste0("q", 1:5, ": ", rownames(resGlmQuint))
## Show
print(resGlmQuint, quote = F)
OR lower upper z value Pr(>|z|)
q1: swang1RHC 1.366 0.8282 2.326 1.1900 0.234031
q2: swang1RHC 1.498 1.1017 2.056 2.5436 0.010971
q3: swang1RHC 1.282 0.9945 1.658 1.9081 0.056372
q4: swang1RHC 1.393 1.0878 1.785 2.6235 0.008703
q5: swang1RHC 1.100 0.8309 1.452 0.6716 0.501831
The propensity score is included in the outcome model. It is simple, but the relationship between the PS and the outcome must be in the correct functional form. It is basically a stratification method with additional assumptions and fine strata. If propensity score is modeled into quintiles and the PS quintile * exposure interaction term is included, it is equivalent to stratification in the previous sections.
## Plot
plotBase + plotOrig + geom_vline(xintercept = seq(from = 0, to = 1, by = 0.05)) +
labs(title = "Adjustment (stratification with a model)")
## Plot (Mirror image)
plotMirrorTotal + coord_flip() + geom_vline(xintercept = seq(from = 0, to = 1, by = 0.05)) +
labs(title = "Adjustment (stratification with a model)")
## Perform PS-adjusted logistic regression (linear PS)
glmPsAdjLinear <- glm(formula = death ~ swang1 + ps,
family = binomial(link = "logit"),
data = rhc)
GetConfInt(glmPsAdjLinear)
OR lower upper z value Pr(>|z|)
(Intercept) 1.7688 1.6014 1.955 11.215 3.424e-29
swang1RHC 1.2975 1.1389 1.479 3.909 9.256e-05
ps 0.8704 0.6742 1.124 -1.065 2.867e-01
## Perform PS-adjusted logistic regression (quadratic PS)
glmPsAdjQuad <- glm(formula = death ~ swang1 + ps + I(ps^2),
family = binomial(link = "logit"),
data = rhc)
GetConfInt(glmPsAdjQuad)
OR lower upper z value Pr(>|z|)
(Intercept) 1.7380 1.5014 2.014 7.3754 1.638e-13
swang1RHC 1.2977 1.1390 1.479 3.9094 9.251e-05
ps 0.9887 0.4329 2.251 -0.0271 9.784e-01
I(ps^2) 0.8601 0.3411 2.179 -0.3188 7.499e-01
## Perform PS-adjusted logistic regression (PS quintiles)
glmPsAdjQuintile <- glm(formula = death ~ swang1 + psQunintile,
family = binomial(link = "logit"),
data = rhc)
GetConfInt(glmPsAdjQuintile)
OR lower upper z value Pr(>|z|)
(Intercept) 1.7346 1.5383 1.959 8.94078 3.864e-19
swang1RHC 1.3096 1.1503 1.492 4.06847 4.732e-05
psQunintileq2 0.9927 0.8361 1.179 -0.08306 9.338e-01
psQunintileq3 0.9506 0.7982 1.132 -0.56847 5.697e-01
psQunintileq4 1.0349 0.8630 1.241 0.36971 7.116e-01
psQunintileq5 0.8536 0.7041 1.035 -1.61258 1.068e-01
## Perform PS-adjusted logistic regression (PS quintiles with interaction)
glmPsAdjQuintileInt <- glm(formula = death ~ swang1 + psQunintile + swang1:psQunintile,
family = binomial(link = "logit"),
data = rhc)
GetConfInt(glmPsAdjQuintileInt)
OR lower upper z value Pr(>|z|)
(Intercept) 1.7303 1.5292 1.961 8.65278 5.026e-18
swang1RHC 1.3660 0.8282 2.326 1.19004 2.340e-01
psQunintileq2 0.9701 0.8078 1.165 -0.32553 7.448e-01
psQunintileq3 0.9598 0.7907 1.166 -0.41493 6.782e-01
psQunintileq4 1.0064 0.8135 1.247 0.05829 9.535e-01
psQunintileq5 0.9742 0.7438 1.281 -0.18837 8.506e-01
swang1RHC:psQunintileq2 1.0968 0.5940 1.983 0.30130 7.632e-01
swang1RHC:psQunintileq3 0.9388 0.5213 1.649 -0.21559 8.293e-01
swang1RHC:psQunintileq4 1.0196 0.5679 1.784 0.06669 9.468e-01
swang1RHC:psQunintileq5 0.8055 0.4424 1.430 -0.72536 4.682e-01
## Perform PS-adjusted logistic regression (PS decile)
## Create decile category variable
decilePoints <- quantile(x = rhc$ps, probs = seq(from = 0.1, to = 0.9, by = 0.1))
decilePoints <- c(0,decilePoints,1)
rhc$psDecile <- cut(x = rhc$ps, breaks = decilePoints, labels = paste0("q",1:10))
## Perform PS-adjusted logistic regression (PS deciles)
glmPsAdjDecile <- glm(formula = death ~ swang1 + psDecile,
family = binomial(link = "logit"),
data = rhc)
GetConfInt(glmPsAdjDecile)
OR lower upper z value Pr(>|z|)
(Intercept) 1.7863 1.5077 2.122 6.65688 2.797e-11
swang1RHC 1.3095 1.1494 1.493 4.04516 5.229e-05
psDecileq2 0.9430 0.7408 1.200 -0.47710 6.333e-01
psDecileq3 0.9826 0.7708 1.253 -0.14181 8.872e-01
psDecileq4 0.9458 0.7417 1.206 -0.44915 6.533e-01
psDecileq5 0.8891 0.6965 1.135 -0.94416 3.451e-01
psDecileq6 0.9591 0.7494 1.228 -0.33218 7.398e-01
psDecileq7 0.9918 0.7719 1.275 -0.06467 9.484e-01
psDecileq8 1.0185 0.7907 1.312 0.14168 8.873e-01
psDecileq9 0.8268 0.6404 1.067 -1.46034 1.442e-01
psDecileq10 0.8311 0.6385 1.082 -1.37574 1.689e-01
## Perform PS-adjusted logistic regression (PS deciles with interaction)
glmPsAdjDecileInt <- glm(formula = death ~ swang1 + psDecile + swang1:psDecile,
family = binomial(link = "logit"),
data = rhc)
GetConfInt(coef(summary(glmPsAdjDecileInt)))
OR lower upper z value Pr(>|z|)
(Intercept) 1.8274 1.5360 2.174 6.8030 1.024e-11
swang1RHC 0.6156 0.2338 1.621 -0.9822 3.260e-01
psDecileq2 0.8934 0.6968 1.146 -0.8887 3.742e-01
psDecileq3 0.9417 0.7291 1.216 -0.4598 6.456e-01
psDecileq4 0.8946 0.6902 1.159 -0.8422 3.997e-01
psDecileq5 0.9082 0.6933 1.190 -0.6986 4.848e-01
psDecileq6 0.9093 0.6897 1.199 -0.6741 5.003e-01
psDecileq7 0.8795 0.6561 1.179 -0.8594 3.901e-01
psDecileq8 1.0453 0.7669 1.425 0.2801 7.794e-01
psDecileq9 0.9279 0.6575 1.310 -0.4258 6.703e-01
psDecileq10 0.9120 0.5826 1.428 -0.4026 6.872e-01
swang1RHC:psDecileq2 3.0558 0.9634 9.693 1.8966 5.787e-02
swang1RHC:psDecileq3 2.3912 0.8178 6.991 1.5926 1.113e-01
swang1RHC:psDecileq4 2.4841 0.8638 7.144 1.6884 9.134e-02
swang1RHC:psDecileq5 1.8536 0.6592 5.212 1.1699 2.420e-01
swang1RHC:psDecileq6 2.3161 0.8242 6.508 1.5932 1.111e-01
swang1RHC:psDecileq7 2.6305 0.9390 7.368 1.8403 6.572e-02
swang1RHC:psDecileq8 1.9400 0.6927 5.433 1.2612 2.072e-01
swang1RHC:psDecileq9 1.7285 0.6147 4.861 1.0374 2.995e-01
swang1RHC:psDecileq10 1.8488 0.6346 5.386 1.1264 2.600e-01
The exposed and the unexposed are matched based on their propensity scores, creating an RCT like cohort of exchangeable exposed and unexposed.
nonrandom package
## Propensity score estimation (pscore object returned)
psObj <- pscore(formula = swang1 ~ age + sex + race + edu + income + ninsclas + cat1 + das2d3pc + dnr1 + ca + surv2md1 + aps1 + scoma1 + wtkilo1 + temp1 + meanbp1 + resp1 + hrt1 + pafi1 + paco21 + ph1 + wblc1 + hema1 + sod1 + pot1 + crea1 + bili1 + alb1 + resp + card + neuro + gastr + renal + meta + hema + seps + trauma + ortho + cardiohx + chfhx + dementhx + psychhx + chrpulhx + renalhx + liverhx + gibledhx + malighx + immunhx + transhx + amihx,
data = rhc,
family = "binomial",
name.pscore = "pscore")
## Define a function to match and show
MatchAndPlot <- function(x = 0.05) {
## Matching 1:1 (caliper x * sd(logit(PS)))
psMatchObj <- ps.match(object = psObj,
who.treated = "RHC",
ratio = 1, # default 1:1 match
caliper = "logit", # caliper = x * SD(PS)
x = x
)
## Show results
print(psMatchObj)
## Extract density
dfDataMatched <- CreateDataset(data = psMatchObj$data.matched, ps = "ps", weights = "one")
## Plot matched
plotMatched <- layer(geom = "line", data = dfDataMatched, size = 2)
plotObj <- plotBase + plotOrig + plotMatched +
labs(title = paste("nonrandom::ps.match() Caliper", x,"* SD(logit(PS))"))
## Show plot
print(plotObj)
## Show results
glmMatched <- glm(formula = death ~ swang1,
family = binomial(link = "logit"),
data = psMatchObj$data.matched)
## Print result
print(GetConfInt(glmMatched))
}
## caliper 0.2 * sd(logit(PS))
MatchAndPlot(x = 0.2)
Matched by: pscore
Matching parameter:
Caliper size: "0.279"
Ratio: "1"
Who is treated?: "RHC"
Matching information:
Untreated to treated?: TRUE
Best match?: TRUE
Matching data:
Number of treated obs.: 2184
Number of matched treated obs.: 2184
Number of untreated obs.: 3551
Number of matched untreated obs.: 2184
Number of total matched obs.: 4368
Number of not matched obs.: 1367
Number of matching sets: 2184
Number of incomplete matching sets: 0
OR lower upper z value Pr(>|z|)
(Intercept) 1.693 1.553 1.847 11.888 1.368e-32
swang1RHC 1.258 1.110 1.425 3.593 3.268e-04
## caliper 0.1 * sd(logit(PS))
MatchAndPlot(x = 0.1)
Matched by: pscore
Matching parameter:
Caliper size: "0.139"
Ratio: "1"
Who is treated?: "RHC"
Matching information:
Untreated to treated?: TRUE
Best match?: TRUE
Matching data:
Number of treated obs.: 2184
Number of matched treated obs.: 1969
Number of untreated obs.: 3551
Number of matched untreated obs.: 1969
Number of total matched obs.: 3938
Number of not matched obs.: 1797
Number of matching sets: 1969
Number of incomplete matching sets: 0
OR lower upper z value Pr(>|z|)
(Intercept) 1.694 1.546 1.856 11.295 1.394e-29
swang1RHC 1.315 1.153 1.502 4.065 4.810e-05
## caliper 0.05 * sd(logit(PS))
MatchAndPlot(x = 0.05)
Matched by: pscore
Matching parameter:
Caliper size: "0.07"
Ratio: "1"
Who is treated?: "RHC"
Matching information:
Untreated to treated?: TRUE
Best match?: TRUE
Matching data:
Number of treated obs.: 2184
Number of matched treated obs.: 1729
Number of untreated obs.: 3551
Number of matched untreated obs.: 1729
Number of total matched obs.: 3458
Number of not matched obs.: 2277
Number of matching sets: 1729
Number of incomplete matching sets: 0
OR lower upper z value Pr(>|z|)
(Intercept) 1.723 1.563 1.901 10.904 1.107e-27
swang1RHC 1.278 1.110 1.472 3.408 6.543e-04
Matching package
## Define similarly to Matching::Match()
MatchAndPlot2 <- function(x = 0.05, returnData = FALSE) {
## Match by Matching::Match()
listMatch <- Match(Tr = (rhc$swang1 == "RHC"), # Need to be in 0,1
X = log(rhc$ps / (1 - rhc$ps)), # logit of PS,i.e., log(PS/(1-PS)) as matching variable
M = 1, # 1:1 match
caliper = x, # caliper = x * SD(each matching variable)
replace = FALSE,
ties = TRUE,
version = "fast")
## Show number matched
print(str(listMatch[c("index.treated","index.control")]))
## Extract matched data
psMatchData <- rhc[unlist(listMatch[c("index.treated","index.control")]), ]
## Extract density
dfDataMatched <- CreateDataset(data = psMatchData, ps = "ps", weights = "one")
## Plot matched
plotMatched <- layer(geom = "line", data = dfDataMatched, size = 2)
plotObj <- plotBase + plotOrig + plotMatched +
labs(title = paste("Matching::Match() Caliper", x,"* SD(logit(PS))"))
## Show plot
print(plotObj)
## Show results
glmMatched <- glm(formula = death ~ swang1,
family = binomial(link = "logit"),
data = psMatchData)
print(GetConfInt(glmMatched))
## Return dataset for plotting
if(returnData == TRUE) {
return(dfDataMatched)
}
}
## caliper 0.2 * sd(logit(PS))
## MatchAndPlot2(x = 0.2)
dfDataMatched <- MatchAndPlot2(x = 0.2, returnData = TRUE)
List of 2
$ index.treated: num [1:1563] 2 3 5 10 12 13 17 18 21 22 ...
$ index.control: num [1:1563] 3826 1386 450 3319 5644 ...
NULL
OR lower upper z value Pr(>|z|)
(Intercept) 1.631 1.474 1.808 9.391 5.925e-21
swang1RHC 1.379 1.189 1.600 4.249 2.148e-05
## caliper 0.1 * sd(logit(PS))
MatchAndPlot2(x = 0.1)
List of 2
$ index.treated: num [1:1546] 2 3 5 10 12 13 17 18 21 22 ...
$ index.control: num [1:1546] 3826 1855 3772 4214 5644 ...
NULL
OR lower upper z value Pr(>|z|)
(Intercept) 1.703 1.536 1.889 10.104 5.287e-24
swang1RHC 1.336 1.151 1.552 3.798 1.456e-04
## caliper 0.05 * sd(logit(PS))
MatchAndPlot2(x = 0.05)
List of 2
$ index.treated: num [1:1539] 2 3 5 10 12 13 17 18 21 22 ...
$ index.control: num [1:1539] 3826 4125 3772 3243 5644 ...
NULL
OR lower upper z value Pr(>|z|)
(Intercept) 1.773 1.599 1.968 10.79 3.945e-27
swang1RHC 1.275 1.097 1.482 3.17 1.523e-03
## Change data for mirror plot
dfDataMatched <- subset(dfDataMatched, group != "All")
dfDataMatched$y[dfDataMatched$group != "RHC"] <- -1 * dfDataMatched$y[dfDataMatched$group != "RHC"]
## Create
plotMatched <- layer(geom = "line", data = dfDataMatched, size = 2)
## Plot
plotMirrorTotal + plotMatched + coord_flip() +
labs(title = "PS Matching")
There is confounding when the exposed and the unexposed have different distributions of propensity score. These distributions can be re-weighted to have the identical distribution. Inverse probability of treatment weight (IPTW) and standardized mortality ratio weight (SMRW) are the most common ones.
Create weights
rhc <- within(rhc, {
## Create inverse probability of treatment assignment weights (IPTW)
iptw <- NA
## 1/PS for treated
iptw[swang1 == "RHC"] <- 1/ps[swang1 == "RHC"]
## 1/(1-PS) for untreated (1/probability of not treated)
iptw[swang1 != "RHC"] <- (1/(1 - ps))[swang1 != "RHC"]
## Create SMR (Average Treatment effect in Treated, ATT) weights
smrw <- NA
## 1 for treated
smrw[swang1 == "RHC"] <- 1
## PS/(1-PS), i.e., odds of treatment for untreated
smrw[swang1 != "RHC"] <- (ps/(1 - ps))[swang1 != "RHC"]
## Numerator weight for stabilization
probRhc <- NA
## P(RHC) for treated
probRhc[swang1 == "RHC"] <- sum(swang1 == "RHC")/length(swang1)
## P(No RHC) for untreated
probRhc[swang1 != "RHC"] <- sum(swang1 != "RHC")/length(swang1)
## Create IPTW stabilized with marginal probabilities of assigned treatment
iptwStab <- iptw * probRhc
## Create matching weights
matchWeightNumerator <- pmin(ps, 1 - ps)
matchWeight <- matchWeightNumerator * iptw
})
## Check weights
summary(rhc[,c("iptw","iptwStab","smrw","matchWeight")])
iptw iptwStab smrw matchWeight
Min. : 1.00 Min. : 0.389 Min. : 0.003 Min. :0.0027
1st Qu.: 1.18 1st Qu.: 0.657 1st Qu.: 0.226 1st Qu.:0.1833
Median : 1.47 Median : 0.780 Median : 0.915 Median :0.4728
Mean : 1.98 Mean : 0.993 Mean : 0.771 Mean :0.5306
3rd Qu.: 2.08 3rd Qu.: 1.055 3rd Qu.: 1.000 3rd Qu.:1.0000
Max. :52.52 Max. :20.001 Max. :24.488 Max. :1.0000
IPTW
## Plot IPTW
dfDataIptw <- CreateDataset(data = rhc, ps = "ps", weights = "iptw")
plotIptw <- layer(geom = "line", data = subset(dfDataIptw, group != "All"), size = 2)
plotBase + plotOrig + plotIptw + labs(title = "IPTW")
## Plot IPTW (Mirror image)
dfDataIptwMirror <- subset(dfDataIptw, group != "All")
dfDataIptwMirror$y[dfDataIptwMirror$group != "RHC"] <- -1 * dfDataIptwMirror$y[dfDataIptwMirror$group != "RHC"]
plotIptwMirror <- layer(geom = "line", data = dfDataIptwMirror, size = 2)
plotMirrorTotal + coord_flip() + plotIptwMirror + labs(title = "IPTW")
## IPTW analysis
glmIptw <- glm(formula = death ~ swang1,
family = binomial(link = "logit"),
data = rhc,
weights = iptw)
## The regular SE and confidence intervals are not valid
GetConfInt(glmIptw)
OR lower upper z value Pr(>|z|)
(Intercept) 1.685 1.598 1.777 19.190 4.484e-82
swang1RHC 1.272 1.177 1.375 6.075 1.240e-09
## Robust variance estimator using lmtest::coeftest() using sandwich::sandwich()
GetConfInt(coeftest(glmIptw, vcov = sandwich))
OR lower upper z value Pr(>|z|)
(Intercept) 1.685 1.548 1.833 12.105 9.890e-34
swang1RHC 1.272 1.089 1.487 3.029 2.454e-03
## Robust variance estimator using geepack::geeglm()
geeglmIptw <- geeglm(formula = as.numeric(death == "Yes") ~ swang1,
family = binomial(link = "logit"),
id = X,
data = rhc,
weights = iptw,
corstr = "exchangeable"
)
GetConfInt(geeglmIptw)
OR lower upper Wald Pr(>|W|)
(Intercept) 1.685 1.548 1.833 146.540 0.000000
swang1RHC 1.272 1.089 1.487 9.174 0.002454
Stabilized IPTW
## Plot IPTW stabilized with marginal probabilities of assigned treatment
dfDataIptwStab <- CreateDataset(data = rhc, ps = "ps", weights = "iptwStab")
plotIptwStab <- layer(geom = "line", data = dfDataIptwStab, size = 2)
plotBase + plotOrig + plotIptwStab + labs(title = "Stabilized IPTW")
## Plot stabilized IPTW (Mirror image)
dfDataIptwStabMirror <- subset(dfDataIptwStab, group != "All")
dfDataIptwStabMirror$y[dfDataIptwStabMirror$group != "RHC"] <- -1 * dfDataIptwStabMirror$y[dfDataIptwStabMirror$group != "RHC"]
plotIptwStabMirror <- layer(geom = "line", data = dfDataIptwStabMirror, size = 2)
plotMirrorTotal + coord_flip() + plotIptwStabMirror + labs(title = "Stabilized IPTW")
## Stabilized IPTW analysis
glmIptwStab <- glm(formula = death ~ swang1,
family = binomial(link = "logit"),
data = rhc,
weights = iptwStab)
## The regular SE and confidence intervals are not valid
GetConfInt(glmIptwStab)
OR lower upper z value Pr(>|z|)
(Intercept) 1.685 1.575 1.803 15.100 1.613e-51
swang1RHC 1.272 1.136 1.426 4.143 3.425e-05
GetConfInt(glmIptw)
OR lower upper z value Pr(>|z|)
(Intercept) 1.685 1.598 1.777 19.190 4.484e-82
swang1RHC 1.272 1.177 1.375 6.075 1.240e-09
## Robust variance estimator using lmtest::coeftest() using sandwich::sandwich()
GetConfInt(coeftest(glmIptwStab, vcov = sandwich))
OR lower upper z value Pr(>|z|)
(Intercept) 1.685 1.548 1.833 12.105 9.890e-34
swang1RHC 1.272 1.089 1.487 3.029 2.454e-03
## Robust variance estimator using geepack::geeglm()
geeglmIptwStab <- geeglm(formula = as.numeric(death == "Yes") ~ swang1,
family = binomial(link = "logit"),
id = X,
data = rhc,
weights = iptwStab,
corstr = "exchangeable"
)
GetConfInt(geeglmIptwStab)
OR lower upper Wald Pr(>|W|)
(Intercept) 1.685 1.548 1.833 146.540 0.000000
swang1RHC 1.272 1.089 1.487 9.174 0.002454
SMRW
## Plot SMRW
dfDataSmrw <- CreateDataset(data = rhc, ps = "ps", weights = "smrw")
plotSmrw <- layer(geom = "line", data = dfDataSmrw, size = 2)
plotBase + plotOrig + plotSmrw + labs(title = "SMRW")
## Plot SMRW (Mirror image)
dfDataSmrwMirror <- subset(dfDataSmrw, group != "All")
dfDataSmrwMirror$y[dfDataSmrwMirror$group != "RHC"] <- -1 * dfDataSmrwMirror$y[dfDataSmrwMirror$group != "RHC"]
plotSmrwMirror <- layer(geom = "line", data = dfDataSmrwMirror, size = 2)
plotMirrorTotal + coord_flip() + plotSmrwMirror + labs(title = "SMRW")
## SMRW analysis
glmSmrw <- glm(formula = death ~ swang1,
family = binomial(link = "logit"),
data = rhc,
weights = smrw)
## The regular SE and confidence intervals are not valid
GetConfInt(glmSmrw)
OR lower upper z value Pr(>|z|)
(Intercept) 1.661 1.525 1.810 11.621 3.207e-31
swang1RHC 1.282 1.132 1.451 3.921 8.830e-05
## Robust variance estimator using lmtest::coeftest() using sandwich::sandwich()
GetConfInt(coeftest(glmSmrw, vcov = sandwich))
OR lower upper z value Pr(>|z|)
(Intercept) 1.661 1.436 1.921 6.829 8.527e-12
swang1RHC 1.282 1.080 1.521 2.844 4.456e-03
## Robust variance estimator using geepack::geeglm()
geeglmSmrw <- geeglm(formula = as.numeric(death == "Yes") ~ swang1,
family = binomial(link = "logit"),
id = X,
data = rhc,
weights = smrw,
corstr = "exchangeable"
)
GetConfInt(geeglmSmrw)
OR lower upper Wald Pr(>|W|)
(Intercept) 1.661 1.436 1.921 46.641 8.527e-12
swang1RHC 1.282 1.080 1.521 8.088 4.456e-03
Matching weights
## Plot matching weights
dfDataMatchWeight <- CreateDataset(data = rhc, ps = "ps", weights = "matchWeight")
plotMatchWeight <- layer(geom = "line", data = dfDataMatchWeight, size = 2)
plotBase + plotOrig + plotMatchWeight + labs(title = "Matching weights")
## Plot matching weights (Mirror image)
dfDataMatchWeightMirror <- subset(dfDataMatchWeight, group != "All")
dfDataMatchWeightMirror$y[dfDataMatchWeightMirror$group != "RHC"] <- -1 * dfDataMatchWeightMirror$y[dfDataMatchWeightMirror$group != "RHC"]
plotMatchWeightMirror <- layer(geom = "line", data = dfDataMatchWeightMirror, size = 2)
plotMirrorTotal + coord_flip() + plotMatchWeightMirror + labs(title = "Matching weights")
## Matching weight analysis
glmMatchWeight <- glm(formula = death ~ swang1,
family = binomial(link = "logit"),
data = rhc,
weights = matchWeight)
## Robust variance estimator using lmtest::coeftest() using sandwich::sandwich()
GetConfInt(coeftest(glmMatchWeight, vcov = sandwich))
OR lower upper z value Pr(>|z|)
(Intercept) 1.698 1.555 1.853 11.820 3.077e-32
swang1RHC 1.307 1.145 1.493 3.961 7.466e-05
## Robust variance estimator using geepack::geeglm()
geeglmMatchWeight <- geeglm(formula = as.numeric(death == "Yes") ~ swang1,
family = binomial(link = "logit"),
id = X,
data = rhc,
weights = matchWeight,
corstr = "exchangeable"
)
GetConfInt(geeglmMatchWeight)
OR lower upper Wald Pr(>|W|)
(Intercept) 1.698 1.555 1.853 139.71 0.00000000
swang1RHC 1.307 1.145 1.493 15.69 0.00007466