Create dataset
## Load data
library(sas7bdat)
dat <- read.sas7bdat("./epi271data.sas7bdat")
names(dat) <- tolower(names(dat))
## Create new variables
dat <- within(dat, {
## age ≥ 70
age70 <- as.numeric(age >= 70)
## submart ≥ 90
sumbart90 <- as.numeric(sumbartel >= 90)
## rankpre 4,5,6 to 5
rankpre[rankpre %in% c(4,5,6)] <- 5
## Categorical time
timeintcat <- cut(timeint, breaks = c(-Inf,1,3,5,Inf), labels = 1:4,
right = FALSE, include.lowest = T)
## Quantile of resident
residentq <- cut(residents, breaks = quantile(residents), labels = 1:4, include.lowest = T)
})
## Convert categoricals to factors
categoricals <- c("age5","age70","afib","aphasia","living","gender","rankpre","time","residentq","referral","paresis","sumbart90","timeintcat","transport","vigilanz","ward")
dat[categoricals] <- lapply(dat[categoricals],
function(var) {
var <- factor(var)
})
1. Identify your set of confounding variables
age5 afib aphasia cardiac gender htn hyperchol icu living rankpre residentq referral paresis prevstroke sumbart90 transport timeintcat vigilanz ward timeintcat*year age70*year
2. Calculate the association between t-PA treatment and death controlling for potential confounders using a traditional logistic regression model
## Fit a traditional logistic regression model
logit.traditional <- glm(death ~ tpa + age5 + afib + aphasia + cardiac + gender + htn + hyperchol + icu + living + rankpre + residentq + referral + paresis + prevstroke + sumbart90 + transport + timeintcat + vigilanz + ward + timeintcat:year + age70:year,
data = dat,
family = binomial(link = "logit"))
## Wald test
summary(logit.traditional)
Call:
glm(formula = death ~ tpa + age5 + afib + aphasia + cardiac +
gender + htn + hyperchol + icu + living + rankpre + residentq +
referral + paresis + prevstroke + sumbart90 + transport +
timeintcat + vigilanz + ward + timeintcat:year + age70:year,
family = binomial(link = "logit"), data = dat)
Deviance Residuals:
Min 1Q Median 3Q Max
-1.868 -0.316 -0.195 -0.106 3.328
Coefficients:
Estimate Std. Error z value Pr(>|z|)
(Intercept) -3.7701 0.5617 -6.71 1.9e-11 ***
tpa 0.6493 0.2093 3.10 0.00192 **
age52 1.1180 0.3490 3.20 0.00136 **
age53 1.0534 0.3487 3.02 0.00252 **
age54 1.2830 0.3352 3.83 0.00013 ***
age55 1.4219 0.3288 4.33 1.5e-05 ***
age56 1.2855 0.3277 3.92 8.7e-05 ***
age57 1.5415 0.3365 4.58 4.6e-06 ***
age58 1.7201 0.3344 5.14 2.7e-07 ***
afib1 0.3240 0.1087 2.98 0.00288 **
aphasia1 0.0918 0.1150 0.80 0.42473
cardiac 0.1852 0.1089 1.70 0.08895 .
gender1 -0.5661 0.1932 -2.93 0.00339 **
gender2 -0.4122 0.1872 -2.20 0.02765 *
htn -0.0124 0.1157 -0.11 0.91444
hyperchol -0.4905 0.1380 -3.55 0.00038 ***
icu 0.1338 0.2637 0.51 0.61205
living1 -0.1849 0.1774 -1.04 0.29750
living2 -0.1434 0.1551 -0.92 0.35508
living3 0.0309 0.2392 0.13 0.89734
rankpre1 -0.5071 0.2492 -2.03 0.04186 *
rankpre2 -0.6026 0.2612 -2.31 0.02105 *
rankpre3 -0.4020 0.2785 -1.44 0.14895
rankpre5 -0.1540 0.2727 -0.56 0.57211
residentq2 -0.4823 0.1488 -3.24 0.00119 **
residentq3 -0.3720 0.1472 -2.53 0.01149 *
residentq4 -0.5688 0.1440 -3.95 7.8e-05 ***
referral1 -0.0456 0.2301 -0.20 0.84306
referral2 0.0542 0.2206 0.25 0.80602
referral3 0.2877 0.2069 1.39 0.16436
referral4 0.0979 0.2440 0.40 0.68820
paresis1 -0.2921 0.3158 -0.93 0.35493
paresis2 0.4372 0.1582 2.76 0.00572 **
paresis3 0.6542 0.3116 2.10 0.03579 *
prevstroke -0.2971 0.1448 -2.05 0.04019 *
sumbart901 -0.3011 0.2091 -1.44 0.14978
transport1 -1.1560 0.3563 -3.24 0.00118 **
transport2 0.5993 0.2199 2.73 0.00643 **
transport3 0.1434 0.2194 0.65 0.51347
transport4 -0.2055 0.3706 -0.55 0.57910
timeintcat2 0.4682 0.3272 1.43 0.15251
timeintcat3 -0.2626 0.4559 -0.58 0.56463
timeintcat4 -0.5497 0.3732 -1.47 0.14076
vigilanz1 2.0840 0.2498 8.34 < 2e-16 ***
vigilanz2 0.8826 0.1895 4.66 3.2e-06 ***
vigilanz3 -0.4024 0.1779 -2.26 0.02370 *
ward1 0.4670 0.2376 1.97 0.04933 *
ward2 0.6719 0.2051 3.28 0.00105 **
ward3 1.2874 0.2498 5.15 2.6e-07 ***
timeintcat1:year -0.0850 0.0768 -1.11 0.26828
timeintcat2:year -0.2690 0.0842 -3.19 0.00141 **
timeintcat3:year -0.0733 0.1283 -0.57 0.56810
timeintcat4:year -0.0198 0.0975 -0.20 0.83943
year:age701 -0.0441 0.0353 -1.25 0.21143
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
(Dispersion parameter for binomial family taken to be 1)
Null deviance: 4086.7 on 9145 degrees of freedom
Residual deviance: 3057.7 on 9092 degrees of freedom
(854 observations deleted due to missingness)
AIC: 3166
Number of Fisher Scoring iterations: 8
## To display OR, epicalc::logistic.display() is easy.
library(epicalc)
res.epicalc <- logistic.display(logit.traditional)
res.epicalc
OR lower95ci upper95ci Pr(>|Z|)
tpa 1.9141 1.2700 2.8850 1.924e-03
age52 3.0587 1.5434 6.0620 1.358e-03
age53 2.8673 1.4475 5.6798 2.524e-03
age54 3.6074 1.8700 6.9591 1.297e-04
age55 4.1449 2.1761 7.8950 1.525e-05
age56 3.6166 1.9027 6.8744 8.748e-05
age57 4.6716 2.4158 9.0340 4.622e-06
age58 5.5853 2.9001 10.7567 2.687e-07
afib1 1.3827 1.1174 1.7110 2.875e-03
aphasia1 1.0961 0.8750 1.3732 4.247e-01
cardiac 1.2034 0.9722 1.4896 8.895e-02
gender1 0.5678 0.3888 0.8291 3.388e-03
gender2 0.6622 0.4588 0.9557 2.765e-02
htn 0.9876 0.7872 1.2391 9.144e-01
hyperchol 0.6123 0.4672 0.8025 3.783e-04
icu 1.1431 0.6817 1.9168 6.121e-01
living1 0.8312 0.5870 1.1769 2.975e-01
living2 0.8664 0.6393 1.1742 3.551e-01
living3 1.0313 0.6453 1.6483 8.973e-01
rankpre1 0.6022 0.3695 0.9815 4.186e-02
rankpre2 0.5474 0.3280 0.9133 2.105e-02
rankpre3 0.6690 0.3876 1.1548 1.490e-01
rankpre5 0.8572 0.5024 1.4628 5.721e-01
residentq2 0.6173 0.4612 0.8263 1.185e-03
residentq3 0.6894 0.5166 0.9199 1.149e-02
residentq4 0.5662 0.4270 0.7508 7.784e-05
referral1 0.9555 0.6087 1.4999 8.431e-01
referral2 1.0557 0.6851 1.6265 8.060e-01
referral3 1.3333 0.8889 1.9999 1.644e-01
referral4 1.1029 0.6837 1.7790 6.882e-01
paresis1 0.7467 0.4021 1.3865 3.549e-01
paresis2 1.5484 1.1355 2.1113 5.725e-03
paresis3 1.9236 1.0444 3.5431 3.579e-02
prevstroke 0.7430 0.5594 0.9868 4.019e-02
sumbart901 0.7400 0.4912 1.1148 1.498e-01
transport1 0.3147 0.1566 0.6327 1.176e-03
transport2 1.8208 1.1832 2.8019 6.430e-03
transport3 1.1542 0.7507 1.7744 5.135e-01
transport4 0.8142 0.3938 1.6833 5.791e-01
timeintcat2 1.5971 0.8410 3.0332 1.525e-01
timeintcat3 0.7691 0.3147 1.8794 5.646e-01
timeintcat4 0.5771 0.2777 1.1993 1.408e-01
vigilanz1 8.0362 4.9248 13.1131 7.339e-17
vigilanz2 2.4172 1.6673 3.5043 3.197e-06
vigilanz3 0.6687 0.4719 0.9477 2.370e-02
ward1 1.5952 1.0014 2.5412 4.933e-02
ward2 1.9580 1.3100 2.9266 1.050e-03
ward3 3.6235 2.2206 5.9128 2.562e-07
timeintcat1:year 0.9185 0.7902 1.0677 2.683e-01
timeintcat2:year 0.7641 0.6478 0.9013 1.406e-03
timeintcat3:year 0.9294 0.7227 1.1951 5.681e-01
timeintcat4:year 0.9804 0.8099 1.1869 8.394e-01
year:age701 0.9569 0.8929 1.0254 2.114e-01
## Single term likelihood ratio test
drop1(logit.traditional, test = "LRT")
Single term deletions
Model:
death ~ tpa + age5 + afib + aphasia + cardiac + gender + htn +
hyperchol + icu + living + rankpre + residentq + referral +
paresis + prevstroke + sumbart90 + transport + timeintcat +
vigilanz + ward + timeintcat:year + age70:year
Df Deviance AIC LRT Pr(>Chi)
<none> 3058 3166
tpa 1 3067 3173 9.0 0.00272 **
age5 7 3095 3189 37.1 4.6e-06 ***
afib 1 3066 3172 8.8 0.00305 **
aphasia 1 3058 3164 0.6 0.42697
cardiac 1 3061 3167 2.9 0.09104 .
gender 2 3066 3170 8.3 0.01609 *
htn 1 3058 3164 0.0 0.91449
hyperchol 1 3071 3177 13.5 0.00024 ***
icu 1 3058 3164 0.3 0.60815
living 3 3059 3161 1.8 0.62353
rankpre 4 3068 3168 10.2 0.03735 *
residentq 3 3077 3179 19.6 0.00021 ***
referral 4 3064 3164 6.2 0.18541
paresis 3 3072 3174 14.3 0.00253 **
prevstroke 1 3062 3168 4.4 0.03681 *
sumbart90 1 3060 3166 2.2 0.14103
transport 4 3107 3207 49.6 4.5e-10 ***
vigilanz 3 3260 3362 201.8 < 2e-16 ***
ward 3 3087 3189 29.8 1.5e-06 ***
timeintcat:year 4 3069 3169 11.5 0.02162 *
year:age70 1 3059 3165 1.6 0.20892
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
a. What is the multivariable adjusted odds ratio?
OR 1.91 [1.27, 2.88] for TPA.
res.epicalc$table["tpa",,drop = F]
OR lower95ci upper95ci Pr(>|Z|)
tpa 1.914 1.27 2.885 0.001924
3. Did the odds ratio change compared to the crude odds ratio? By how much?
Down from 3.17.
Crude analysis
logistic.display(glm(death ~ tpa, data = dat, family = binomial(link = "logit")))
Logistic regression predicting death
OR(95%CI) P(Wald's test) P(LR-test)
tpa: 1 vs 0 3.17 (2.33,4.31) < 0.001 < 0.001
Log-likelihood = -2234.6526
No. of observations = 10000
AIC value = 4473.3053
4. Use your “confounder set” and calculate the propensity score for t-PA treatment for the study population
logit.ps <- glm(tpa ~ age5 + afib + aphasia + cardiac + gender + htn + hyperchol + icu + living + rankpre + residentq + referral + paresis + prevstroke + sumbart90 + transport + timeintcat + vigilanz + ward + timeintcat:year + age70:year,
data = dat,
family = binomial(link = "logit"))
5. What is the range of the propensity score? Does the range differ for treated and untreated?
There are very few patients who received TPA, and most of the non-TPA patients have very low propensity scores.
## Extract the fitted probabilities (propensity scores)
pscores <- fitted(logit.ps)
## Put them back into the dataset using case numbers (those with missing values)
dat$pscore <- NA
dat$pscore[as.numeric(names(pscores))] <- pscores
## Summaries
summary(dat$pscore)
Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
0.0 0.0 0.0 0.0 0.0 0.8 854
## Summaries by group
library(doBy)
res.group.summ <- summaryBy(pscore ~ tpa, dat, FUN = summary)
round(res.group.summ, 5)
tpa pscore.Min. pscore.1st Qu. pscore.Median pscore.Mean pscore.3rd Qu. pscore.Max. pscore.NA's
1 0 0.00000 0.00034 0.00184 0.0239 0.0107 0.740 825
2 1 0.00022 0.09270 0.28500 0.2910 0.4370 0.819 29
## Histograms
library(ggplot2)
ggplot(data = dat[,c("pscore","tpa")], mapping = aes(x = pscore, fill = factor(tpa))) +
layer(geom = "histogram", stat = "bin", binwidth = 0.02, color = "white") +
facet_grid(tpa ~ ., scales = "free") +
theme_bw() +
theme(legend.key = element_blank())
## Density plots
ggplot(data = dat[,c("pscore","tpa")], mapping = aes(x = pscore, fill = factor(tpa))) +
layer(geom = "density", stat = "density") +
facet_grid(tpa ~ ., scales = "fixed") +
theme_bw() +
theme(legend.key = element_blank())
## Box plots
ggplot(data = dat[,c("pscore","tpa")], mapping = aes(x = factor(tpa), y = pscore, fill = factor(tpa))) +
layer(geom = "boxplot") +
theme_bw() +
theme(legend.key = element_blank())
6. What is the c-statistic of your propensity score model?
## Load pROC
library(pROC)
## Construct an ROC
roc <- roc(logit.ps$model$tpa ~ logit.ps$fitted.values)
## Plot
plot(roc)
Call:
roc.formula(formula = logit.ps$model$tpa ~ logit.ps$fitted.values)
Data: logit.ps$fitted.values in 8847 controls (logit.ps$model$tpa 0) < 299 cases (logit.ps$model$tpa 1).
Area under the curve: 0.939