# EPI 271 Propensity Score Lab 1

1. Read in the data set on “g:\shared\epi271” (epi271data.sasbdat)

``````## Load data
library(sas7bdat)
names(dat) <- tolower(names(dat))

## Create new variables
dat <- within(dat, {
## age ≥ 70
age70 <- as.numeric(age >= 70)

## submart ≥ 90
sumbart90 <- as.numeric(sumbartel >= 90)

## rankpre 4,5,6 to 5
rankpre[rankpre %in% c(4,5,6)] <- 5

## Categorical time
timeintcat <- cut(timeint, breaks = c(-Inf,1,3,5,Inf), labels = 1:4,
right = FALSE, include.lowest = T)

## Quantile of resident
residentq <- cut(residents, breaks = quantile(residents), labels = 1:4, include.lowest = T)
})

## Convert categoricals to factors
categoricals <- c("age5","age70","afib","aphasia","living","gender","rankpre","time","residentq","referral","paresis","sumbart90","timeintcat","transport","vigilanz","ward")

dat[categoricals] <- lapply(dat[categoricals],
function(var) {
var <- factor(var)
})
``````

2. How many observations are in data set?

``````nrow(dat)
``````
`````` 10000
``````

3. Identify the main exposure and the main outcome variable

• Main outcome: death
• Main exposure: tpa

4. What is the prevalence of the exposure? 3.28%

``````tab.tpa <- table(dat\$tpa)
tab.tpa
``````
``````
0    1
9672  328
``````
``````prop.table(tab.tpa)
``````
``````
0      1
0.9672 0.0328
``````

5. What is the prevalence of the outcome? 5.95%

``````tab.death <- table(dat\$death)
tab.death
``````
``````
0    1
9405  595
``````
``````prop.table(tab.death)
``````
``````
0      1
0.9405 0.0595
``````

6. What is the crude association between exposure and outcome? Significant by both chi-squared test and Fisher’s exact test

``````xtabs.tpa.death <- xtabs(~ tpa + death, data = dat)
chisq.test(xtabs.tpa.death, correct = F)
``````
``````
Pearson's Chi-squared test

data:  xtabs.tpa.death
X-squared = 59.44, df = 1, p-value = 1.261e-14
``````
``````fisher.test(xtabs.tpa.death)
``````
``````
Fisher's Exact Test for Count Data

data:  xtabs.tpa.death
p-value = 5.397e-11
alternative hypothesis: true odds ratio is not equal to 1
95 percent confidence interval:
2.279 4.332
sample estimates:
odds ratio
3.167
``````

7. Identify potential confounding factors A common cause of the exposure and outcomes is a potential confounder. So factors relating to both may be confounders, but can also can be a source of selection bias (common effect).

``````## List covariates (other than the exposure and outcome)
covariates <- names(dat)[grep("tpa|death", names(dat), invert = T)]
covariates
``````
``````  "vigilanz"   "dysart"     "doppler"    "duplex"     "tcd"        "gerinn"     "hypercho"   "hyperton"
 "stunit"     "rentner"    "time"       "randnum"    "id"         "gender"     "age"        "working"
 "cardiac"    "comorbid"   "residents"  "ward"       "bed_res"    "beds"       "diabetes"   "referral"
 "occupat"    "department" "prevstroke" "year"       "coagulo"    "hyperchol"  "htn"        "icu"
 "speech"     "ems"        "age5"       "afib"       "paresis"    "rankpre"    "rankat"     "retired"
 "sumbartel"  "transport"  "living"     "carotsteno" "stdward"    "aphasia"    "timeint"    "residentq"
 "timeintcat" "sumbart90"  "age70"
``````
``````
## Perform univariate logistic regression against outcome for each covariate
logit.outcome.var <- lapply(dat[, covariates],
function(Var) {
## Perform single predictor glm
glm(death ~ Var, data = dat, family = binomial(link = "logit"))
})

## Extract p-values for likelihood ratio test
factors.assoc.with.outcome <- sapply(logit.outcome.var,
function(X) {
## LRT for each glm object
drop1(X, test = "LRT")["Var", "Pr(>Chi)"]
})

## Perform univariate logistic regression against exposure for each covariate
logit.exposure.var <- lapply(dat[, covariates],
function(Var) {
## Perform single predictor glm
glm(tpa ~ Var, data = dat, family = binomial)
})

## Extract p-values for likelihood ratio test
factors.assoc.with.exposure <- sapply(logit.exposure.var,
function(X) {
drop1(X, test = "LRT")["Var", "Pr(>Chi)"]
})

## Mark factors significant at p < 0.2 threshold
data.frame(outcome      = factors.assoc.with.outcome,
outcome.sig  = factor(factors.assoc.with.outcome < 0.2, c(FALSE,TRUE), c("","*")),
exposure     = factors.assoc.with.exposure,
exposure.sig = factor(factors.assoc.with.exposure < 0.2, c(FALSE,TRUE), c("","*"))
)
``````
``````              outcome outcome.sig   exposure exposure.sig
vigilanz   4.762e-144           *  5.227e-07            *
dysart      1.925e-03           *  2.780e-03            *
doppler     5.280e-30           *  5.713e-04            *
duplex      2.823e-23           *  1.970e-01            *
tcd         5.823e-19           *  2.072e-01
gerinn      4.083e-01              1.005e-01            *
hypercho    2.903e-21           *  3.037e-01
hyperton    5.063e-01              2.109e-01
stunit      2.835e-03           *  6.508e-02            *
rentner     1.415e-05           *  7.840e-01
time        1.727e-03           *  3.442e-01
randnum     7.355e-01              6.329e-01
id          5.363e-02           *  6.764e-01
gender      2.954e-08           *  1.760e-09            *
age         4.594e-01              8.890e-01
working     1.477e-09           *  2.012e-05            *
cardiac     4.494e-04           *  5.207e-01
comorbid    5.253e-02           *  2.013e-01
residents   2.458e-03           *  2.906e-04            *
ward        8.496e-33           *  4.314e-44            *
bed_res     8.452e-01              4.968e-01
beds        8.164e-01              5.702e-02            *
diabetes    5.290e-01              9.598e-01
referral    3.638e-28           *  8.523e-36            *
occupat     4.630e-01              3.645e-01
department  2.320e-06           *  7.888e-03            *
prevstroke  8.387e-01              3.425e-04            *
year        5.290e-02           *  7.759e-01
coagulo     4.083e-01              1.005e-01            *
hyperchol   2.903e-21           *  3.037e-01
htn         5.063e-01              2.109e-01
icu         1.710e-01           *  2.696e-05            *
speech      1.696e-03           *  7.813e-02            *
ems         1.012e-01           *  1.947e-01            *
age5        2.412e-32           *  3.012e-13            *
afib        5.128e-38           *  5.174e-09            *
paresis     1.072e-48           *  1.030e-29            *
rankpre     6.565e-20           *  4.987e-17            *
rankat      6.054e-01              9.408e-01
retired     1.415e-05           *  7.840e-01
sumbartel  6.184e-195           *  9.148e-56            *
transport   1.653e-81           *  2.484e-58            *
living      1.603e-10           *  6.679e-07            *
carotsteno  1.934e-01           *  1.322e-01            *
stdward     4.927e-06           *  1.293e-22            *
aphasia     8.543e-01              4.357e-02            *
timeint     3.675e-05           *  1.156e-49            *
residentq   3.314e-07           *  1.221e-03            *
timeintcat  1.674e-31           * 2.498e-144            *
sumbart90   8.666e-47           *  2.609e-22            *
age70       4.222e-01              9.191e-01
``````