How To Use Propensity Score Analysis: http://www.mc.vanderbilt.edu/crc/workshop_files/2008-04-11.pdf
Vanderbilt Biostatistics Wiki>Main Web>DataSets: http://biostat.mc.vanderbilt.edu/twiki/bin/view/Main/DataSets
Illustration of PS uses: http://rpubs.com/kaz_yos/use-ps
Propensity score demos: http://rpubs.com/kaz_yos/epi271-lab3
AHRQ Summary Variables in Observational Research: Propensity Scores and Disease Risk Scores: http://effectivehealthcare.ahrq.gov/index.cfm/search-for-guides-reviews-and-reports/?productid=1084&pageaction=displayproduct (quotes are from this article).
Colors in ggplot2: http://www.cookbook-r.com/Graphs/Colors_(ggplot2)/
## plotting
library(ggplot2)
## dlply
library(plyr)
## ROC
library(pROC)
## Load nonrandom for PS matching
library(nonrandom)
## Load Matching for PS matching
library(Matching)
## Load sandwich package for robust sandwich covariance matrix estimators
library(sandwich)
## Load lmtest package for coeftest
library(lmtest)
## Load geepack for robust sandwich covariance matrix estimators
library(geepack)
## Load tableone for summarizing regression results
library(tableone)
## Right heart cath dataset
rhc <- read.csv("http://biostat.mc.vanderbilt.edu/wiki/pub/Main/DataSets/rhc.csv")
## first 6 rows
head(rhc)
X cat1 cat2 ca sadmdte dschdte dthdte lstctdte death cardiohx chfhx dementhx psychhx
1 1 COPD <NA> Yes 11142 11151 NA 11382 No 0 0 0 0
2 2 MOSF w/Sepsis <NA> No 11799 11844 11844 11844 Yes 1 1 0 0
3 3 MOSF w/Malignancy MOSF w/Sepsis Yes 12083 12143 NA 12400 No 0 0 0 0
4 4 ARF <NA> No 11146 11183 11183 11182 Yes 0 0 0 0
5 5 MOSF w/Sepsis <NA> No 12035 12037 12037 12036 Yes 0 0 0 0
6 6 COPD <NA> No 12389 12396 NA 12590 No 0 1 0 0
chrpulhx renalhx liverhx gibledhx malighx immunhx transhx amihx age sex edu surv2md1 das2d3pc t3d30 dth30
1 1 0 0 0 1 0 0 0 70.25 Male 12.000 0.641 23.50 30 No
2 0 0 0 0 0 1 1 0 78.18 Female 12.000 0.755 14.75 30 No
3 0 0 0 0 1 1 0 0 46.09 Female 14.070 0.317 18.14 30 No
4 0 0 0 0 0 1 0 0 75.33 Female 9.000 0.441 22.93 30 No
5 0 0 0 0 0 0 0 0 67.91 Male 9.945 0.437 21.05 2 Yes
6 1 0 0 0 0 0 0 0 86.08 Female 8.000 0.665 17.50 30 No
aps1 scoma1 meanbp1 wblc1 hrt1 resp1 temp1 pafi1 alb1 hema1 bili1 crea1 sod1 pot1 paco21 ph1 swang1 wtkilo1
1 46 0 41 22.10 124 10 38.7 68.0 3.5 58.0 1.01 1.2 145 4.000 40 7.359 No RHC 64.7
2 50 0 63 28.90 137 38 38.9 218.3 2.6 32.5 0.70 0.6 137 3.300 34 7.329 RHC 45.7
3 82 0 57 0.05 130 40 36.4 275.5 3.5 21.1 1.01 2.6 146 2.900 16 7.359 RHC 0.0
4 48 0 55 23.30 58 26 35.8 156.7 3.5 26.3 0.40 1.7 117 5.800 30 7.460 No RHC 54.6
5 72 41 65 29.70 125 27 34.8 478.0 3.5 24.0 1.01 3.6 126 5.800 17 7.229 RHC 78.4
6 38 0 115 18.00 134 36 39.2 184.2 3.1 30.5 1.01 1.4 138 5.399 68 7.300 No RHC 54.9
dnr1 ninsclas resp card neuro gastr renal meta hema seps trauma ortho adld3p urin1 race income
1 No Medicare Yes Yes No No No No No No No No 0 NA white Under $11k
2 No Private & Medicare No No No No No No No Yes No No NA 1437 white Under $11k
3 No Private No Yes No No No No No No No No NA 599 white $25-$50k
4 No Private & Medicare Yes No No No No No No No No No NA NA white $11-$25k
5 Yes Medicare No Yes No No No No No No No No NA 64 white Under $11k
6 No Medicare Yes No No No No No No No No No 0 242 white Under $11k
ptid
1 5
2 7
3 9
4 10
5 11
6 12
## Show outcome (death) and exposure (swang1)
addmargins(table(rhc[,c("swang1", "death")]))
death
swang1 No Yes Sum
No RHC 1315 2236 3551
RHC 698 1486 2184
Sum 2013 3722 5735
## Create weights for plotting
rhc$one <- 1
## Define function to obtain ORs with 95% confidence intervals from a result matrix
GetConfInt <- function(obj) {
logitsticModel <- FALSE
if (identical(class(obj), c("glm", "lm")) == TRUE) {
mat <- coef(summary(obj))
logitsticModel <- TRUE
} else if (identical(class(obj), c("geeglm", "gee", "glm")) == TRUE) {
mat <- coef(summary(obj))
} else if (identical(class(obj), c("coeftest")) == TRUE) {
mat <- obj
} else if (identical(class(obj), c("matrix")) == TRUE) {
mat <- obj
} else {
stop("Not a supported object")
}
## Add point estimates
matRes <- mat[, 1, drop = F]
## 1.96 * SE
matSe <- mat[, 2, drop = F] * qnorm(0.975)
## Estimate, lower 95% limit, upper 95% limit
matRes <- cbind(matRes, (matRes - matSe), (matRes + matSe))
## Name
colnames(matRes) <- c("OR","lower","upper")
## Exponentiate
matRes <- exp(matRes)
## Add p-value
matRes <- cbind(matRes, mat[, 3:4, drop = F])
if (logitsticModel == TRUE) {
matRes[, c("lower","upper")] <- exp(suppressMessages(confint(obj)))
}
## Show
matRes
}
Estimate the effect within each combination of confounder values.
## Crude analysis (confounded!)
glmCrude <- glm(formula = death ~ swang1,
family = binomial(link = "logit"),
data = rhc)
GetConfInt(glmCrude)
OR lower upper z value Pr(>|z|)
(Intercept) 1.700 1.589 1.821 15.276 1.113e-52
swang1RHC 1.252 1.119 1.402 3.905 9.425e-05
## Fit a "regular" fully adjusted model
glmFull <- glm(formula = death ~ swang1 + age + sex + race + edu + income + ninsclas + cat1 + das2d3pc + dnr1 + ca + surv2md1 + aps1 + scoma1 + wtkilo1 + temp1 + meanbp1 + resp1 + hrt1 + pafi1 + paco21 + ph1 + wblc1 + hema1 + sod1 + pot1 + crea1 + bili1 + alb1 + resp + card + neuro + gastr + renal + meta + hema + seps + trauma + ortho + cardiohx + chfhx + dementhx + psychhx + chrpulhx + renalhx + liverhx + gibledhx + malighx + immunhx + transhx + amihx,
family = binomial(link = "logit"),
data = rhc)
## Show result
ShowRegTable(glmFull)
exp(beta) [confint] p
(Intercept) 167.87 [0.30, 96818.05] 0.113
swang1RHC 1.38 [1.19, 1.60] <0.001
age 1.01 [1.01, 1.02] <0.001
sexMale 1.33 [1.17, 1.52] <0.001
raceother 1.06 [0.79, 1.43] 0.709
racewhite 0.99 [0.82, 1.19] 0.891
edu 1.01 [0.99, 1.03] 0.412
income$11-$25k 1.20 [0.92, 1.57] 0.184
income$25-$50k 1.02 [0.78, 1.34] 0.875
incomeUnder $11k 1.42 [1.09, 1.86] 0.009
ninsclasMedicare 1.42 [1.11, 1.83] 0.005
ninsclasMedicare & Medicaid 1.31 [0.96, 1.81] 0.089
ninsclasNo insurance 1.48 [1.08, 2.03] 0.014
ninsclasPrivate 1.22 [0.97, 1.54] 0.091
ninsclasPrivate & Medicare 1.09 [0.84, 1.42] 0.506
cat1CHF 1.26 [0.92, 1.74] 0.152
cat1Cirrhosis 1.13 [0.71, 1.81] 0.601
cat1Colon Cancer 0.29 [0.04, 2.26] 0.224
cat1Coma 1.80 [1.27, 2.58] 0.001
cat1COPD 1.29 [0.94, 1.77] 0.112
cat1Lung Cancer 4.01 [1.26, 17.93] 0.034
cat1MOSF w/Malignancy 1.74 [1.14, 2.71] 0.012
cat1MOSF w/Sepsis 0.91 [0.76, 1.09] 0.315
das2d3pc 0.96 [0.95, 0.97] <0.001
dnr1Yes 2.32 [1.79, 3.03] <0.001
caNo 0.18 [0.05, 0.58] 0.007
caYes 0.43 [0.27, 0.66] <0.001
surv2md1 0.04 [0.02, 0.09] <0.001
aps1 1.01 [1.00, 1.01] 0.077
scoma1 1.00 [1.00, 1.00] 0.791
wtkilo1 1.00 [1.00, 1.00] 0.022
temp1 0.95 [0.91, 0.99] 0.014
meanbp1 1.00 [1.00, 1.00] 0.696
resp1 1.00 [1.00, 1.01] 0.196
hrt1 1.00 [1.00, 1.00] 0.099
pafi1 1.00 [1.00, 1.00] 0.010
paco21 1.00 [0.99, 1.00] 0.604
ph1 0.86 [0.39, 1.86] 0.696
wblc1 1.01 [1.00, 1.01] 0.048
hema1 0.99 [0.98, 1.00] 0.056
sod1 1.00 [0.99, 1.01] 0.800
pot1 1.01 [0.94, 1.08] 0.834
crea1 1.02 [0.98, 1.07] 0.334
bili1 1.03 [1.01, 1.05] 0.002
alb1 1.00 [0.92, 1.10] 0.975
respYes 1.25 [1.05, 1.47] 0.010
cardYes 1.16 [0.98, 1.38] 0.091
neuroYes 1.39 [1.09, 1.78] 0.009
gastrYes 1.34 [1.08, 1.68] 0.009
renalYes 0.85 [0.62, 1.17] 0.309
metaYes 1.11 [0.82, 1.52] 0.509
hemaYes 2.21 [1.59, 3.12] <0.001
sepsYes 1.23 [1.01, 1.49] 0.038
traumaYes 0.87 [0.46, 1.63] 0.673
orthoYes 1.27 [0.24, 9.57] 0.789
cardiohx 1.23 [1.01, 1.49] 0.036
chfhx 1.63 [1.32, 2.01] <0.001
dementhx 1.30 [1.04, 1.65] 0.025
psychhx 0.98 [0.77, 1.25] 0.875
chrpulhx 1.21 [1.00, 1.49] 0.057
renalhx 1.34 [0.93, 1.94] 0.123
liverhx 1.35 [0.93, 1.99] 0.119
gibledhx 1.27 [0.81, 2.00] 0.298
malighx 0.51 [0.14, 1.40] 0.232
immunhx 1.27 [1.10, 1.48] 0.001
transhx 0.92 [0.76, 1.11] 0.384
amihx 0.87 [0.62, 1.23] 0.431
## PS model
psModel <- glm(formula = swang1 ~ age + sex + race + edu + income + ninsclas + cat1 + das2d3pc + dnr1 + ca + surv2md1 + aps1 + scoma1 + wtkilo1 + temp1 + meanbp1 + resp1 + hrt1 + pafi1 + paco21 + ph1 + wblc1 + hema1 + sod1 + pot1 + crea1 + bili1 + alb1 + resp + card + neuro + gastr + renal + meta + hema + seps + trauma + ortho + cardiohx + chfhx + dementhx + psychhx + chrpulhx + renalhx + liverhx + gibledhx + malighx + immunhx + transhx + amihx,
family = binomial(link = "logit"),
data = rhc)
## PS (predicted probability of treatment)
rhc$ps <- predict(psModel, type = "response")
## PS model diagnostic by ROC
rocPsModel <- roc(swang1 ~ ps, data = rhc)
plot(rocPsModel, legacy.axes = TRUE)
Call:
roc.formula(formula = swang1 ~ ps, data = rhc)
Data: ps in 3551 controls (swang1 No RHC) < 2184 cases (swang1 RHC).
Area under the curve: 0.798
## Define a function to create density data (x = ps, y = density) frame given weights
CreateDataset <- function(data = rhc, ps = "ps", weights = "one") {
## Density for overall cohort
dfDataAll <- suppressWarnings(density(x = data[,ps], weights = data[,weights]))
dfDataAll <- data.frame(dfDataAll[c("x","y")])
dfDataAll$group <- "All"
## Density for RHC
dfDataRhc <- suppressWarnings(density(x = subset(data, swang1 == "RHC")[,ps], weights = subset(data, swang1 == "RHC")[,weights]))
dfDataRhc <- data.frame(dfDataRhc[c("x","y")])
dfDataRhc$group <- "RHC"
## Density for No RHC
dfDataNoRhc <- suppressWarnings(density(x = subset(data, swang1 == "No RHC")[,ps], weights = subset(data, swang1 != "RHC")[,weights]))
dfDataNoRhc <- data.frame(dfDataNoRhc[c("x","y")])
dfDataNoRhc$group <- "No RHC"
## combine datasets
dfData <- rbind(dfDataAll, dfDataRhc, dfDataNoRhc)
## Return
dfData
}
## Create dataset with weight = 1 for every subject
dfData <- CreateDataset(data = rhc, ps = "ps", weights = "one")
## Framewrok only (no layer)
plotBase <- ggplot(data = dfData,
mapping = aes(x = x, y = y, color = group)) +
scale_x_continuous(limit = c(-0.1,1.1), breaks = c(0,0.5,1.0)) +
scale_y_continuous(breaks = NULL) +
## scale_color_manual(values = c("All" = "black", "RHC" = "red", "No RHC" = "blue")) +
scale_color_manual(values = c("All" = "#999999", "RHC" = "#D55E00", "No RHC" = "#0072B2")) +
labs(title = "PS distribution by treatment", x = "Propensity score", y = "Distribution") +
theme_bw() +
theme(legend.key = element_blank())
## Plot with total
plotOrig <- layer(geom = "line")
plotBase + plotOrig
## Plot like mirror image
dfDataMirror <- dfData
dfDataMirror <- subset(dfDataMirror, group != "All")
dfDataMirror <- within(dfDataMirror, {
y[group != "RHC"] <- (y * -1)[group != "RHC"]
})
plotMirror <- layer(geom = "line", data = dfDataMirror)
plotBase + plotMirror + geom_hline(yintercept = 0)
## Add total distribution both above and below (y = -y)
plotTotalPos <- layer(geom = "line", data = subset(dfData, group == "All"), size = 0.1)
plotTotalNeg <- layer(geom = "line", data = subset(dfData, group == "All"), mapping = aes(y = -y), size = 0.1)
plotMirrorTotal <- plotBase + plotMirror + geom_hline(yintercept = 0) + plotTotalPos + plotTotalNeg
plotMirrorTotal
## Flip
plotMirrorTotal + coord_flip()
Estimate the effect within each propensity score quintile. The exposed and the unexposed within each quintile are considered exchangeable.
## Check quintiles
quintilePoints <- quantile(x = rhc$ps, probs = seq(from = 0.2, to = 0.8, by = 0.2))
quintilePoints <- c(0,quintilePoints,1)
## Plot the idea
plotBase + plotOrig + geom_vline(xintercept = quintilePoints) +
labs(title = "Stratification into quintiles")
## Plot the idea (Mirror image)
plotMirrorTotal + coord_flip()+ geom_vline(xintercept = quintilePoints) +
labs(title = "Stratification into quintiles")
## Add quintile variable
rhc$psQunintile <- cut(x = rhc$ps, breaks = quintilePoints, labels = paste0("q",1:5))
## Perform unadjusted analysis in each quintile
glmQuint <- dlply(.data = rhc,
.variables = c("psQunintile"),
.drop = TRUE, .parallel = TRUE,
.fun = function(DF) {
## Perform
glmWithinQuint <- glm(formula = death ~ swang1,
family = binomial(link = "logit"),
data = DF)
## Return
glmWithinQuint
})
## Create GetConfIn objects in each quintile
resGlmQuint <- lapply(glmQuint,
FUN = function(model) {
ShowRegTable(model, printToggle = FALSE)[2, , drop = F]
})
## Combine
resGlmQuint <- do.call(rbind, resGlmQuint)
## Name
rownames(resGlmQuint) <- paste0("q", 1:5, ": ", rownames(resGlmQuint))
## Show
print(resGlmQuint, quote = F)
exp(beta) [confint] p
q1: swang1RHC 1.37 [0.83, 2.33] 0.234
q2: swang1RHC 1.50 [1.10, 2.06] 0.011
q3: swang1RHC 1.28 [0.99, 1.66] 0.056
q4: swang1RHC 1.39 [1.09, 1.78] 0.009
q5: swang1RHC 1.10 [0.83, 1.45] 0.502
The propensity score is included in the outcome model. It is simple, but the relationship between the PS and the outcome must be in the correct functional form. It is basically a stratification method with additional assumptions and fine strata. If propensity score is modeled into quintiles and the PS quintile * exposure interaction term is included, it is equivalent to stratification in the previous sections.
## Plot
plotBase + plotOrig + geom_vline(xintercept = seq(from = 0, to = 1, by = 0.05)) +
labs(title = "Adjustment (stratification with a model)")
## Plot (Mirror image)
plotMirrorTotal + coord_flip() + geom_vline(xintercept = seq(from = 0, to = 1, by = 0.05)) +
labs(title = "Adjustment (stratification with a model)")
## Perform PS-adjusted logistic regression (linear PS)
glmPsAdjLinear <- glm(formula = death ~ swang1 + ps,
family = binomial(link = "logit"),
data = rhc)
ShowRegTable(glmPsAdjLinear)
exp(beta) [confint] p
(Intercept) 1.77 [1.60, 1.95] <0.001
swang1RHC 1.30 [1.14, 1.48] <0.001
ps 0.87 [0.67, 1.12] 0.287
## Perform PS-adjusted logistic regression (quadratic PS)
glmPsAdjQuad <- glm(formula = death ~ swang1 + ps + I(ps^2),
family = binomial(link = "logit"),
data = rhc)
ShowRegTable(glmPsAdjQuad)
exp(beta) [confint] p
(Intercept) 1.74 [1.50, 2.01] <0.001
swang1RHC 1.30 [1.14, 1.48] <0.001
ps 0.99 [0.43, 2.25] 0.978
I(ps^2) 0.86 [0.34, 2.18] 0.750
## Perform PS-adjusted logistic regression (PS quintiles)
glmPsAdjQuintile <- glm(formula = death ~ swang1 + psQunintile,
family = binomial(link = "logit"),
data = rhc)
ShowRegTable(glmPsAdjQuintile)
exp(beta) [confint] p
(Intercept) 1.73 [1.54, 1.96] <0.001
swang1RHC 1.31 [1.15, 1.49] <0.001
psQunintileq2 0.99 [0.84, 1.18] 0.934
psQunintileq3 0.95 [0.80, 1.13] 0.570
psQunintileq4 1.03 [0.86, 1.24] 0.712
psQunintileq5 0.85 [0.70, 1.03] 0.107
## Perform PS-adjusted logistic regression (PS quintiles with interaction)
glmPsAdjQuintileInt <- glm(formula = death ~ swang1 + psQunintile + swang1:psQunintile,
family = binomial(link = "logit"),
data = rhc)
ShowRegTable(glmPsAdjQuintileInt)
exp(beta) [confint] p
(Intercept) 1.73 [1.53, 1.96] <0.001
swang1RHC 1.37 [0.83, 2.33] 0.234
psQunintileq2 0.97 [0.81, 1.17] 0.745
psQunintileq3 0.96 [0.79, 1.17] 0.678
psQunintileq4 1.01 [0.81, 1.25] 0.954
psQunintileq5 0.97 [0.74, 1.28] 0.851
swang1RHC:psQunintileq2 1.10 [0.59, 1.98] 0.763
swang1RHC:psQunintileq3 0.94 [0.52, 1.65] 0.829
swang1RHC:psQunintileq4 1.02 [0.57, 1.78] 0.947
swang1RHC:psQunintileq5 0.81 [0.44, 1.43] 0.468
## Perform PS-adjusted logistic regression (PS decile)
## Create decile category variable
decilePoints <- quantile(x = rhc$ps, probs = seq(from = 0.1, to = 0.9, by = 0.1))
decilePoints <- c(0,decilePoints,1)
rhc$psDecile <- cut(x = rhc$ps, breaks = decilePoints, labels = paste0("q",1:10))
## Perform PS-adjusted logistic regression (PS deciles)
glmPsAdjDecile <- glm(formula = death ~ swang1 + psDecile,
family = binomial(link = "logit"),
data = rhc)
ShowRegTable(glmPsAdjDecile)
exp(beta) [confint] p
(Intercept) 1.79 [1.51, 2.12] <0.001
swang1RHC 1.31 [1.15, 1.49] <0.001
psDecileq2 0.94 [0.74, 1.20] 0.633
psDecileq3 0.98 [0.77, 1.25] 0.887
psDecileq4 0.95 [0.74, 1.21] 0.653
psDecileq5 0.89 [0.70, 1.13] 0.345
psDecileq6 0.96 [0.75, 1.23] 0.740
psDecileq7 0.99 [0.77, 1.27] 0.948
psDecileq8 1.02 [0.79, 1.31] 0.887
psDecileq9 0.83 [0.64, 1.07] 0.144
psDecileq10 0.83 [0.64, 1.08] 0.169
## Perform PS-adjusted logistic regression (PS deciles with interaction)
glmPsAdjDecileInt <- glm(formula = death ~ swang1 + psDecile + swang1:psDecile,
family = binomial(link = "logit"),
data = rhc)
ShowRegTable(glmPsAdjDecileInt)
exp(beta) [confint] p
(Intercept) 1.83 [1.54, 2.18] <0.001
swang1RHC 0.62 [0.23, 1.66] 0.326
psDecileq2 0.89 [0.70, 1.15] 0.374
psDecileq3 0.94 [0.73, 1.22] 0.646
psDecileq4 0.89 [0.69, 1.16] 0.400
psDecileq5 0.91 [0.69, 1.19] 0.485
psDecileq6 0.91 [0.69, 1.20] 0.500
psDecileq7 0.88 [0.66, 1.18] 0.390
psDecileq8 1.05 [0.77, 1.43] 0.779
psDecileq9 0.93 [0.66, 1.31] 0.670
psDecileq10 0.91 [0.59, 1.44] 0.687
swang1RHC:psDecileq2 3.06 [0.95, 9.82] 0.058
swang1RHC:psDecileq3 2.39 [0.80, 7.05] 0.111
swang1RHC:psDecileq4 2.48 [0.85, 7.20] 0.091
swang1RHC:psDecileq5 1.85 [0.64, 5.25] 0.242
swang1RHC:psDecileq6 2.32 [0.81, 6.55] 0.111
swang1RHC:psDecileq7 2.63 [0.92, 7.42] 0.066
swang1RHC:psDecileq8 1.94 [0.68, 5.47] 0.207
swang1RHC:psDecileq9 1.73 [0.60, 4.89] 0.300
swang1RHC:psDecileq10 1.85 [0.62, 5.41] 0.260
The exposed and the unexposed are matched based on their propensity scores, creating an RCT like cohort of exchangeable exposed and unexposed.
nonrandom package
## Propensity score estimation (pscore object returned)
psObj <- pscore(formula = swang1 ~ age + sex + race + edu + income + ninsclas + cat1 + das2d3pc + dnr1 + ca + surv2md1 + aps1 + scoma1 + wtkilo1 + temp1 + meanbp1 + resp1 + hrt1 + pafi1 + paco21 + ph1 + wblc1 + hema1 + sod1 + pot1 + crea1 + bili1 + alb1 + resp + card + neuro + gastr + renal + meta + hema + seps + trauma + ortho + cardiohx + chfhx + dementhx + psychhx + chrpulhx + renalhx + liverhx + gibledhx + malighx + immunhx + transhx + amihx,
data = rhc,
family = "binomial",
name.pscore = "pscore")
## Define a function to match and show
MatchAndPlot <- function(x = 0.05) {
## Matching 1:1 (caliper x * sd(logit(PS)))
psMatchObj <- ps.match(object = psObj,
who.treated = "RHC",
ratio = 1, # default 1:1 match
caliper = "logit", # caliper = x * SD(logit(PS))
x = x # x for x * SD(PS)
)
## Show results
print(psMatchObj)
## Extract density
dfDataMatched <- CreateDataset(data = psMatchObj$data.matched, ps = "ps", weights = "one")
## Plot matched
plotMatched <- layer(geom = "line", data = dfDataMatched, size = 2)
plotObj <- plotBase + plotOrig + plotMatched +
labs(title = paste("nonrandom::ps.match() Caliper", x,"* SD(logit(PS))"))
## Show plot
print(plotObj)
## Show results
glmMatched <- glm(formula = death ~ swang1,
family = binomial(link = "logit"),
data = psMatchObj$data.matched)
## Print result
ShowRegTable(glmMatched)
}
## caliper 0.2 * sd(logit(PS)): Most common setting.
MatchAndPlot(x = 0.2)
Matched by: pscore
Matching parameter:
Caliper size: "0.279"
Ratio: "1"
Who is treated?: "RHC"
Matching information:
Untreated to treated?: TRUE
Best match?: TRUE
Matching data:
Number of treated obs.: 2184
Number of matched treated obs.: 2184
Number of untreated obs.: 3551
Number of matched untreated obs.: 2184
Number of total matched obs.: 4368
Number of not matched obs.: 1367
Number of matching sets: 2184
Number of incomplete matching sets: 0
exp(beta) [confint] p
(Intercept) 1.69 [1.55, 1.85] <0.001
swang1RHC 1.26 [1.11, 1.43] <0.001
## caliper 0.1 * sd(logit(PS))
MatchAndPlot(x = 0.1)
Matched by: pscore
Matching parameter:
Caliper size: "0.139"
Ratio: "1"
Who is treated?: "RHC"
Matching information:
Untreated to treated?: TRUE
Best match?: TRUE
Matching data:
Number of treated obs.: 2184
Number of matched treated obs.: 1969
Number of untreated obs.: 3551
Number of matched untreated obs.: 1969
Number of total matched obs.: 3938
Number of not matched obs.: 1797
Number of matching sets: 1969
Number of incomplete matching sets: 0
exp(beta) [confint] p
(Intercept) 1.69 [1.55, 1.86] <0.001
swang1RHC 1.32 [1.15, 1.50] <0.001
## caliper 0.05 * sd(logit(PS))
MatchAndPlot(x = 0.05)
Matched by: pscore
Matching parameter:
Caliper size: "0.07"
Ratio: "1"
Who is treated?: "RHC"
Matching information:
Untreated to treated?: TRUE
Best match?: TRUE
Matching data:
Number of treated obs.: 2184
Number of matched treated obs.: 1729
Number of untreated obs.: 3551
Number of matched untreated obs.: 1729
Number of total matched obs.: 3458
Number of not matched obs.: 2277
Number of matching sets: 1729
Number of incomplete matching sets: 0
exp(beta) [confint] p
(Intercept) 1.72 [1.56, 1.90] <0.001
swang1RHC 1.28 [1.11, 1.47] 0.001
Matching package
## Define similarly to Matching::Match()
MatchAndPlot2 <- function(x = 0.05, returnData = FALSE) {
## Match by Matching::Match()
listMatch <- Match(Tr = (rhc$swang1 == "RHC"), # Need to be in 0,1
X = log(rhc$ps / (1 - rhc$ps)), # logit of PS,i.e., log(PS/(1-PS)) as matching variable
M = 1, # 1:1 match
caliper = x, # caliper = x * SD(each matching variable)
replace = FALSE,
ties = TRUE,
version = "fast")
## Show number matched
## print(str(listMatch[c("index.treated","index.control")]))
print(summary(listMatch))
## Extract matched data
psMatchData <- rhc[unlist(listMatch[c("index.treated","index.control")]), ]
## Extract density
dfDataMatched <- CreateDataset(data = psMatchData, ps = "ps", weights = "one")
## Plot matched
plotMatched <- layer(geom = "line", data = dfDataMatched, size = 2)
plotObj <- plotBase + plotOrig + plotMatched +
labs(title = paste("Matching::Match() Caliper", x,"* SD(logit(PS))"))
## Show plot
print(plotObj)
## Show results
glmMatched <- glm(formula = death ~ swang1,
family = binomial(link = "logit"),
data = psMatchData)
ShowRegTable(glmMatched)
## Return dataset for plotting
if(returnData == TRUE) {
return(dfDataMatched)
}
}
## caliper 0.2 * sd(logit(PS)): Most common setting.
## MatchAndPlot2(x = 0.2)
dfDataMatched <- MatchAndPlot2(x = 0.2, returnData = TRUE)
Estimate... 0
SE......... 0
T-stat..... NaN
p.val...... NA
Original number of observations.............. 5735
Original number of treated obs............... 2184
Matched number of observations............... 1563
Matched number of observations (unweighted). 1563
Caliper (SDs)........................................ 0.2
Number of obs dropped by 'exact' or 'caliper' 621
exp(beta) [confint] p
(Intercept) 1.71 [1.54, 1.89] <0.001
swang1RHC 1.32 [1.13, 1.53] <0.001
## caliper 0.1 * sd(logit(PS))
MatchAndPlot2(x = 0.1)
Estimate... 0
SE......... 0
T-stat..... NaN
p.val...... NA
Original number of observations.............. 5735
Original number of treated obs............... 2184
Matched number of observations............... 1546
Matched number of observations (unweighted). 1546
Caliper (SDs)........................................ 0.1
Number of obs dropped by 'exact' or 'caliper' 638
exp(beta) [confint] p
(Intercept) 1.77 [1.60, 1.97] <0.001
swang1RHC 1.28 [1.10, 1.49] 0.001
## caliper 0.05 * sd(logit(PS))
MatchAndPlot2(x = 0.05)
Estimate... 0
SE......... 0
T-stat..... NaN
p.val...... NA
Original number of observations.............. 5735
Original number of treated obs............... 2184
Matched number of observations............... 1538
Matched number of observations (unweighted). 1538
Caliper (SDs)........................................ 0.05
Number of obs dropped by 'exact' or 'caliper' 646
exp(beta) [confint] p
(Intercept) 1.72 [1.55, 1.91] <0.001
swang1RHC 1.32 [1.13, 1.53] <0.001
## Change data for mirror plot
dfDataMatched <- subset(dfDataMatched, group != "All")
dfDataMatched$y[dfDataMatched$group != "RHC"] <- -1 * dfDataMatched$y[dfDataMatched$group != "RHC"]
## Create
plotMatched <- layer(geom = "line", data = dfDataMatched, size = 2)
## Plot
plotMirrorTotal + plotMatched + coord_flip() +
labs(title = "PS Matching")
There is confounding when the exposed and the unexposed have different distributions of propensity score. These distributions can be re-weighted to have the identical distribution. Inverse probability of treatment weight (IPTW) and standardized mortality ratio weight (SMRW) are the most common ones.
Create weights
rhc <- within(rhc, {
## Create inverse probability of treatment assignment weights (IPTW)
iptw <- NA
## 1/PS for treated
iptw[swang1 == "RHC"] <- 1/ps[swang1 == "RHC"]
## 1/(1-PS) for untreated (1/probability of not treated)
iptw[swang1 != "RHC"] <- (1/(1 - ps))[swang1 != "RHC"]
## Create SMR (Average Treatment effect in Treated, ATT) weights
smrw <- NA
## 1 for treated
smrw[swang1 == "RHC"] <- 1
## PS/(1-PS), i.e., odds of treatment for untreated
smrw[swang1 != "RHC"] <- (ps/(1 - ps))[swang1 != "RHC"]
## Numerator weight for stabilization
probRhc <- NA
## P(RHC) for treated
probRhc[swang1 == "RHC"] <- sum(swang1 == "RHC")/length(swang1)
## P(No RHC) for untreated
probRhc[swang1 != "RHC"] <- sum(swang1 != "RHC")/length(swang1)
## Create IPTW stabilized with marginal probabilities of assigned treatment
iptwStab <- iptw * probRhc
## Create matching weights
matchWeightNumerator <- pmin(ps, 1 - ps)
matchWeight <- matchWeightNumerator * iptw
})
## Check weights
## iptw has a mean of 2
## stabilized iptw has a mean of 1
summary(rhc[,c("iptw","iptwStab","smrw","matchWeight")])
iptw iptwStab smrw matchWeight
Min. : 1.00 Min. : 0.389 Min. : 0.003 Min. :0.0027
1st Qu.: 1.18 1st Qu.: 0.657 1st Qu.: 0.226 1st Qu.:0.1833
Median : 1.47 Median : 0.780 Median : 0.915 Median :0.4728
Mean : 1.98 Mean : 0.993 Mean : 0.771 Mean :0.5306
3rd Qu.: 2.08 3rd Qu.: 1.055 3rd Qu.: 1.000 3rd Qu.:1.0000
Max. :52.52 Max. :20.001 Max. :24.488 Max. :1.0000
IPTW
## Plot IPTW
dfDataIptw <- CreateDataset(data = rhc, ps = "ps", weights = "iptw")
plotIptw <- layer(geom = "line", data = subset(dfDataIptw, group != "All"), size = 2)
plotBase + plotOrig + plotIptw + labs(title = "IPTW")
## Plot IPTW (Mirror image)
dfDataIptwMirror <- subset(dfDataIptw, group != "All")
dfDataIptwMirror$y[dfDataIptwMirror$group != "RHC"] <- -1 * dfDataIptwMirror$y[dfDataIptwMirror$group != "RHC"]
plotIptwMirror <- layer(geom = "line", data = dfDataIptwMirror, size = 2)
plotMirrorTotal + coord_flip() + plotIptwMirror + labs(title = "IPTW")
## IPTW analysis
glmIptw <- glm(formula = death ~ swang1,
family = binomial(link = "logit"),
data = rhc,
weights = iptw)
## The regular SE and confidence intervals are not valid
ShowRegTable(glmIptw)
exp(beta) [confint] p
(Intercept) 1.68 [1.60, 1.78] <0.001
swang1RHC 1.27 [1.18, 1.37] <0.001
## Robust variance estimator using lmtest::coeftest() using sandwich::sandwich()
GetConfInt(coeftest(glmIptw, vcov = sandwich))
OR lower upper z value Pr(>|z|)
(Intercept) 1.685 1.548 1.833 12.105 9.890e-34
swang1RHC 1.272 1.089 1.487 3.029 2.454e-03
## Robust variance estimator using geepack::geeglm()
geeglmIptw <- geeglm(formula = as.numeric(death == "Yes") ~ swang1,
family = binomial(link = "logit"),
id = X,
data = rhc,
weights = iptw,
corstr = "exchangeable"
)
GetConfInt(geeglmIptw)
OR lower upper Wald Pr(>|W|)
(Intercept) 1.685 1.548 1.833 146.540 0.000000
swang1RHC 1.272 1.089 1.487 9.174 0.002454
Stabilized IPTW
## Plot IPTW stabilized with marginal probabilities of assigned treatment
dfDataIptwStab <- CreateDataset(data = rhc, ps = "ps", weights = "iptwStab")
plotIptwStab <- layer(geom = "line", data = dfDataIptwStab, size = 2)
plotBase + plotOrig + plotIptwStab + labs(title = "Stabilized IPTW")
## Plot stabilized IPTW (Mirror image)
dfDataIptwStabMirror <- subset(dfDataIptwStab, group != "All")
dfDataIptwStabMirror$y[dfDataIptwStabMirror$group != "RHC"] <- -1 * dfDataIptwStabMirror$y[dfDataIptwStabMirror$group != "RHC"]
plotIptwStabMirror <- layer(geom = "line", data = dfDataIptwStabMirror, size = 2)
plotMirrorTotal + coord_flip() + plotIptwStabMirror + labs(title = "Stabilized IPTW")
## Stabilized IPTW analysis
glmIptwStab <- glm(formula = death ~ swang1,
family = binomial(link = "logit"),
data = rhc,
weights = iptwStab)
## The regular SE and confidence intervals are not valid
ShowRegTable(glmIptwStab)
exp(beta) [confint] p
(Intercept) 1.68 [1.57, 1.80] <0.001
swang1RHC 1.27 [1.14, 1.43] <0.001
ShowRegTable(glmIptw)
exp(beta) [confint] p
(Intercept) 1.68 [1.60, 1.78] <0.001
swang1RHC 1.27 [1.18, 1.37] <0.001
## Robust variance estimator using lmtest::coeftest() using sandwich::sandwich()
GetConfInt(coeftest(glmIptwStab, vcov = sandwich))
OR lower upper z value Pr(>|z|)
(Intercept) 1.685 1.548 1.833 12.105 9.890e-34
swang1RHC 1.272 1.089 1.487 3.029 2.454e-03
## Robust variance estimator using geepack::geeglm()
geeglmIptwStab <- geeglm(formula = as.numeric(death == "Yes") ~ swang1,
family = binomial(link = "logit"),
id = X,
data = rhc,
weights = iptwStab,
corstr = "exchangeable"
)
GetConfInt(geeglmIptwStab)
OR lower upper Wald Pr(>|W|)
(Intercept) 1.685 1.548 1.833 146.540 0.000000
swang1RHC 1.272 1.089 1.487 9.174 0.002454
SMRW
## Plot SMRW
dfDataSmrw <- CreateDataset(data = rhc, ps = "ps", weights = "smrw")
plotSmrw <- layer(geom = "line", data = dfDataSmrw, size = 2)
plotBase + plotOrig + plotSmrw + labs(title = "SMRW")
## Plot SMRW (Mirror image)
dfDataSmrwMirror <- subset(dfDataSmrw, group != "All")
dfDataSmrwMirror$y[dfDataSmrwMirror$group != "RHC"] <- -1 * dfDataSmrwMirror$y[dfDataSmrwMirror$group != "RHC"]
plotSmrwMirror <- layer(geom = "line", data = dfDataSmrwMirror, size = 2)
plotMirrorTotal + coord_flip() + plotSmrwMirror + labs(title = "SMRW")
## SMRW analysis
glmSmrw <- glm(formula = death ~ swang1,
family = binomial(link = "logit"),
data = rhc,
weights = smrw)
## The regular SE and confidence intervals are not valid
ShowRegTable(glmSmrw)
exp(beta) [confint] p
(Intercept) 1.66 [1.53, 1.81] <0.001
swang1RHC 1.28 [1.13, 1.45] <0.001
## Robust variance estimator using lmtest::coeftest() using sandwich::sandwich()
GetConfInt(coeftest(glmSmrw, vcov = sandwich))
OR lower upper z value Pr(>|z|)
(Intercept) 1.661 1.436 1.921 6.829 8.527e-12
swang1RHC 1.282 1.080 1.521 2.844 4.456e-03
## Robust variance estimator using geepack::geeglm()
geeglmSmrw <- geeglm(formula = as.numeric(death == "Yes") ~ swang1,
family = binomial(link = "logit"),
id = X,
data = rhc,
weights = smrw,
corstr = "exchangeable"
)
GetConfInt(geeglmSmrw)
OR lower upper Wald Pr(>|W|)
(Intercept) 1.661 1.436 1.921 46.641 8.527e-12
swang1RHC 1.282 1.080 1.521 8.088 4.456e-03
Matching weights
## Plot matching weights
dfDataMatchWeight <- CreateDataset(data = rhc, ps = "ps", weights = "matchWeight")
plotMatchWeight <- layer(geom = "line", data = dfDataMatchWeight, size = 2)
plotBase + plotOrig + plotMatchWeight + labs(title = "Matching weights")
## Plot matching weights (Mirror image)
dfDataMatchWeightMirror <- subset(dfDataMatchWeight, group != "All")
dfDataMatchWeightMirror$y[dfDataMatchWeightMirror$group != "RHC"] <- -1 * dfDataMatchWeightMirror$y[dfDataMatchWeightMirror$group != "RHC"]
plotMatchWeightMirror <- layer(geom = "line", data = dfDataMatchWeightMirror, size = 2)
plotMirrorTotal + coord_flip() + plotMatchWeightMirror + labs(title = "Matching weights")
## Matching weight analysis
glmMatchWeight <- glm(formula = death ~ swang1,
family = binomial(link = "logit"),
data = rhc,
weights = matchWeight)
## Robust variance estimator using lmtest::coeftest() using sandwich::sandwich()
GetConfInt(coeftest(glmMatchWeight, vcov = sandwich))
OR lower upper z value Pr(>|z|)
(Intercept) 1.698 1.555 1.853 11.820 3.077e-32
swang1RHC 1.307 1.145 1.493 3.961 7.466e-05
## Robust variance estimator using geepack::geeglm()
geeglmMatchWeight <- geeglm(formula = as.numeric(death == "Yes") ~ swang1,
family = binomial(link = "logit"),
id = X,
data = rhc,
weights = matchWeight,
corstr = "exchangeable"
)
GetConfInt(geeglmMatchWeight)
OR lower upper Wald Pr(>|W|)
(Intercept) 1.698 1.555 1.853 139.71 0.00000000
swang1RHC 1.307 1.145 1.493 15.69 0.00007466