1. Read in the data set on “g:\shared\epi271” (epi271data.sasbdat)
## Load data
library(sas7bdat)
dat <- read.sas7bdat("./epi271data.sas7bdat")
names(dat) <- tolower(names(dat))
## Create new variables
dat <- within(dat, {
## age ≥ 70
age70 <- as.numeric(age >= 70)
## submart ≥ 90
sumbart90 <- as.numeric(sumbartel >= 90)
## rankpre 4,5,6 to 5
rankpre[rankpre %in% c(4,5,6)] <- 5
## Categorical time
timeintcat <- cut(timeint, breaks = c(-Inf,1,3,5,Inf), labels = 1:4,
right = FALSE, include.lowest = T)
## Quantile of resident
residentq <- cut(residents, breaks = quantile(residents), labels = 1:4, include.lowest = T)
})
## Convert categoricals to factors
categoricals <- c("age5","age70","afib","aphasia","living","gender","rankpre","time","residentq","referral","paresis","sumbart90","timeintcat","transport","vigilanz","ward")
dat[categoricals] <- lapply(dat[categoricals],
function(var) {
var <- factor(var)
})
2. How many observations are in data set?
nrow(dat)
[1] 10000
3. Identify the main exposure and the main outcome variable
4. What is the prevalence of the exposure? 3.28%
tab.tpa <- table(dat$tpa)
tab.tpa
0 1
9672 328
prop.table(tab.tpa)
0 1
0.9672 0.0328
5. What is the prevalence of the outcome? 5.95%
tab.death <- table(dat$death)
tab.death
0 1
9405 595
prop.table(tab.death)
0 1
0.9405 0.0595
6. What is the crude association between exposure and outcome? Significant by both chi-squared test and Fisher’s exact test
xtabs.tpa.death <- xtabs(~ tpa + death, data = dat)
chisq.test(xtabs.tpa.death, correct = F)
Pearson's Chi-squared test
data: xtabs.tpa.death
X-squared = 59.44, df = 1, p-value = 1.261e-14
fisher.test(xtabs.tpa.death)
Fisher's Exact Test for Count Data
data: xtabs.tpa.death
p-value = 5.397e-11
alternative hypothesis: true odds ratio is not equal to 1
95 percent confidence interval:
2.279 4.332
sample estimates:
odds ratio
3.167
7. Identify potential confounding factors A common cause of the exposure and outcomes is a potential confounder. So factors relating to both may be confounders, but can also can be a source of selection bias (common effect).
## List covariates (other than the exposure and outcome)
covariates <- names(dat)[grep("tpa|death", names(dat), invert = T)]
covariates
[1] "vigilanz" "dysart" "doppler" "duplex" "tcd" "gerinn" "hypercho" "hyperton"
[9] "stunit" "rentner" "time" "randnum" "id" "gender" "age" "working"
[17] "cardiac" "comorbid" "residents" "ward" "bed_res" "beds" "diabetes" "referral"
[25] "occupat" "department" "prevstroke" "year" "coagulo" "hyperchol" "htn" "icu"
[33] "speech" "ems" "age5" "afib" "paresis" "rankpre" "rankat" "retired"
[41] "sumbartel" "transport" "living" "carotsteno" "stdward" "aphasia" "timeint" "residentq"
[49] "timeintcat" "sumbart90" "age70"
## Perform univariate logistic regression against outcome for each covariate
logit.outcome.var <- lapply(dat[, covariates],
function(Var) {
## Perform single predictor glm
glm(death ~ Var, data = dat, family = binomial(link = "logit"))
})
## Extract p-values for likelihood ratio test
factors.assoc.with.outcome <- sapply(logit.outcome.var,
function(X) {
## LRT for each glm object
drop1(X, test = "LRT")["Var", "Pr(>Chi)"]
})
## Perform univariate logistic regression against exposure for each covariate
logit.exposure.var <- lapply(dat[, covariates],
function(Var) {
## Perform single predictor glm
glm(tpa ~ Var, data = dat, family = binomial)
})
## Extract p-values for likelihood ratio test
factors.assoc.with.exposure <- sapply(logit.exposure.var,
function(X) {
drop1(X, test = "LRT")["Var", "Pr(>Chi)"]
})
## Mark factors significant at p < 0.2 threshold
data.frame(outcome = factors.assoc.with.outcome,
outcome.sig = factor(factors.assoc.with.outcome < 0.2, c(FALSE,TRUE), c("","*")),
exposure = factors.assoc.with.exposure,
exposure.sig = factor(factors.assoc.with.exposure < 0.2, c(FALSE,TRUE), c("","*"))
)
outcome outcome.sig exposure exposure.sig
vigilanz 4.762e-144 * 5.227e-07 *
dysart 1.925e-03 * 2.780e-03 *
doppler 5.280e-30 * 5.713e-04 *
duplex 2.823e-23 * 1.970e-01 *
tcd 5.823e-19 * 2.072e-01
gerinn 4.083e-01 1.005e-01 *
hypercho 2.903e-21 * 3.037e-01
hyperton 5.063e-01 2.109e-01
stunit 2.835e-03 * 6.508e-02 *
rentner 1.415e-05 * 7.840e-01
time 1.727e-03 * 3.442e-01
randnum 7.355e-01 6.329e-01
id 5.363e-02 * 6.764e-01
gender 2.954e-08 * 1.760e-09 *
age 4.594e-01 8.890e-01
working 1.477e-09 * 2.012e-05 *
cardiac 4.494e-04 * 5.207e-01
comorbid 5.253e-02 * 2.013e-01
residents 2.458e-03 * 2.906e-04 *
ward 8.496e-33 * 4.314e-44 *
bed_res 8.452e-01 4.968e-01
beds 8.164e-01 5.702e-02 *
diabetes 5.290e-01 9.598e-01
referral 3.638e-28 * 8.523e-36 *
occupat 4.630e-01 3.645e-01
department 2.320e-06 * 7.888e-03 *
prevstroke 8.387e-01 3.425e-04 *
year 5.290e-02 * 7.759e-01
coagulo 4.083e-01 1.005e-01 *
hyperchol 2.903e-21 * 3.037e-01
htn 5.063e-01 2.109e-01
icu 1.710e-01 * 2.696e-05 *
speech 1.696e-03 * 7.813e-02 *
ems 1.012e-01 * 1.947e-01 *
age5 2.412e-32 * 3.012e-13 *
afib 5.128e-38 * 5.174e-09 *
paresis 1.072e-48 * 1.030e-29 *
rankpre 6.565e-20 * 4.987e-17 *
rankat 6.054e-01 9.408e-01
retired 1.415e-05 * 7.840e-01
sumbartel 6.184e-195 * 9.148e-56 *
transport 1.653e-81 * 2.484e-58 *
living 1.603e-10 * 6.679e-07 *
carotsteno 1.934e-01 * 1.322e-01 *
stdward 4.927e-06 * 1.293e-22 *
aphasia 8.543e-01 4.357e-02 *
timeint 3.675e-05 * 1.156e-49 *
residentq 3.314e-07 * 1.221e-03 *
timeintcat 1.674e-31 * 2.498e-144 *
sumbart90 8.666e-47 * 2.609e-22 *
age70 4.222e-01 9.191e-01