Unconditional logistic regression is biased (overestimation of OR) in matched study. Use conditional logistic regression.
Comparison is within each stratum. Interpret this way.
The intercepts are not estimated.
Thus, no predicted probability, so no ROC or Hosmer-Lemeshow test.
library(survival)
## Not run: clogit(case ~ spontaneous + induced + strata(stratum), data=infert)
## A multinomial response recoded to use clogit
## The revised data set has one copy per possible outcome level, with new
## variable tocc = target occupation for this copy, and case = whether
## that is the actual outcome for each subject.
## See the catspec package for more details on the Logan approach.
resp <- levels(logan$occupation)
n <- nrow(logan)
indx <- rep(1:n, length(resp))
logan2 <- data.frame(logan[indx,],
id = indx,
tocc = factor(rep(resp, each=n)))
logan2$case <- (logan2$occupation == logan2$tocc)
logan2 <- logan2[order(logan2$id),]
## Show dataset for first three strata
logan2[logan2$id %in% c(1,2,3), ]
occupation focc education race id tocc case
1 sales professional 14 non-black 1 farm FALSE
1.1 sales professional 14 non-black 1 operatives FALSE
1.2 sales professional 14 non-black 1 craftsmen FALSE
1.3 sales professional 14 non-black 1 sales TRUE
1.4 sales professional 14 non-black 1 professional FALSE
2 craftsmen sales 13 non-black 2 farm FALSE
2.1 craftsmen sales 13 non-black 2 operatives FALSE
2.2 craftsmen sales 13 non-black 2 craftsmen TRUE
2.3 craftsmen sales 13 non-black 2 sales FALSE
2.4 craftsmen sales 13 non-black 2 professional FALSE
3 sales professional 16 non-black 3 farm FALSE
3.1 sales professional 16 non-black 3 operatives FALSE
3.2 sales professional 16 non-black 3 craftsmen FALSE
3.3 sales professional 16 non-black 3 sales TRUE
3.4 sales professional 16 non-black 3 professional FALSE
## Analysis clogit (this particular analysis did not work in clogistic)
res.clogit <- clogit(case ~ tocc + tocc:education + strata(id), logan2)
summ.clogit <- summary(res.clogit)
summ.clogit
Call:
coxph(formula = Surv(rep(1, 4190L), case) ~ tocc + tocc:education +
strata(id), data = logan2, method = "exact")
n= 4190, number of events= 838
coef exp(coef) se(coef) z Pr(>|z|)
toccfarm -1.896463 0.150099 1.380782 -1.37 0.1696
toccoperatives 1.166750 3.211539 0.565646 2.06 0.0391 *
toccprofessional -8.100549 0.000303 0.698724 -11.59 < 2e-16 ***
toccsales -5.029230 0.006544 0.770086 -6.53 6.5e-11 ***
tocccraftsmen:education -0.332284 0.717283 0.056868 -5.84 5.1e-09 ***
toccfarm:education -0.370286 0.690537 0.116410 -3.18 0.0015 **
toccoperatives:education -0.422219 0.655591 0.058433 -7.23 5.0e-13 ***
toccprofessional:education 0.278247 1.320812 0.051021 5.45 4.9e-08 ***
toccsales:education NA NA 0.000000 NA NA
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
exp(coef) exp(-coef) lower .95 upper .95
toccfarm 0.150099 6.662 0.0100243 2.24750
toccoperatives 3.211539 0.311 1.0598246 9.73178
toccprofessional 0.000303 3296.278 0.0000771 0.00119
toccsales 0.006544 152.815 0.0014466 0.02960
tocccraftsmen:education 0.717283 1.394 0.6416297 0.80186
toccfarm:education 0.690537 1.448 0.5496656 0.86751
toccoperatives:education 0.655591 1.525 0.5846481 0.73514
toccprofessional:education 1.320812 0.757 1.1951206 1.45972
toccsales:education NA NA NA NA
Rsquare= 0.147 (max possible= 0.475 )
Likelihood ratio test= 666 on 8 df, p=0
Wald test = 414 on 8 df, p=0
Score (logrank) test = 682 on 8 df, p=0
## Uncoditional logistic regression (not appropriate)
res.logit1 <- glm(case ~ tocc + tocc:education + factor(id), logan2, family = binomial(link = "logit"))
summary(res.logit1)$coef[c(1:5,843:846),]
Estimate Std. Error z value Pr(>|z|)
(Intercept) 4.1110 1.43414 2.867 4.150e-03
toccfarm -2.7176 1.43439 -1.895 5.815e-02
toccoperatives 1.7751 0.68699 2.584 9.768e-03
toccprofessional -11.5476 0.82091 -14.067 6.076e-45
toccsales -5.8349 0.82293 -7.090 1.337e-12
tocccraftsmen:education -0.3800 0.06062 -6.270 3.619e-10
toccfarm:education -0.3779 0.12012 -3.146 1.657e-03
toccoperatives:education -0.5139 0.06384 -8.050 8.298e-16
toccprofessional:education 0.4981 0.05956 8.363 6.108e-17
exp(coef(res.logit1)[c(1:5,843:846)])
(Intercept) toccfarm toccoperatives toccprofessional
61.007195773 0.066033211 5.901084569 0.000009659
toccsales tocccraftsmen:education toccfarm:education toccoperatives:education
0.002923648 0.683836589 0.685320145 0.598166153
toccprofessional:education
1.645564209
## Uncoditional logistic regression, ignoring strata (not appropriate)
res.logit1b <- glm(case ~ tocc + tocc:education, logan2, family = binomial(link = "logit"))
summary(res.logit1b)$coef
Estimate Std. Error z value Pr(>|z|)
(Intercept) 1.85771 0.43803 4.241 2.225e-05
toccfarm -3.22405 1.13783 -2.834 4.604e-03
toccoperatives 1.66071 0.65561 2.533 1.131e-02
toccprofessional -10.00008 0.72279 -13.835 1.559e-43
toccsales -4.67442 0.69818 -6.695 2.155e-11
tocccraftsmen:education -0.22820 0.03357 -6.798 1.057e-11
toccfarm:education -0.18559 0.08352 -2.222 2.627e-02
toccoperatives:education -0.35063 0.03810 -9.204 3.448e-20
toccprofessional:education 0.53842 0.03999 13.464 2.535e-41
toccsales:education 0.06351 0.03830 1.658 9.728e-02
exp(coef(res.logit1b))
(Intercept) toccfarm toccoperatives toccprofessional
6.4090214 0.0397935 5.2630345 0.0000454
toccsales tocccraftsmen:education toccfarm:education toccoperatives:education
0.0093309 0.7959686 0.8306101 0.7042440
toccprofessional:education toccsales:education
1.7133004 1.0655733
OR obtained from different methods
clogit logit1 logit2
toccfarm 0.1501 0.0660 0.0398
toccoperatives 3.2115 5.9011 5.2630
toccprofessional 0.0003 0.0000 0.0000
toccsales 0.0065 0.0029 0.0093
tocccraftsmen:education 0.7173 0.6838 0.7960
toccfarm:education 0.6905 0.6853 0.8306
toccoperatives:education 0.6556 0.5982 0.7042
toccprofessional:education 1.3208 1.6456 1.7133
toccsales:education NA ? 1.0656
library(Epi)
data(bdendo)
## Show dataset for first three strata
bdendo[bdendo$set %in% c(1,2,3),]
set d gall hyp ob est dur non duration age cest agegrp age3
1 1 1 No No Yes Yes 4 Yes 96 74 3 70-74 65-74
2 1 0 No No <NA> No 0 No 0 75 0 70-74 65-74
3 1 0 No No <NA> No 0 No 0 74 0 70-74 65-74
4 1 0 No No <NA> No 0 No 0 74 0 70-74 65-74
5 1 0 No No Yes Yes 3 Yes 48 75 1 70-74 65-74
6 2 1 No No No Yes 4 Yes 96 67 3 65-69 65-74
7 2 0 No No No Yes 1 No 5 67 3 65-69 65-74
8 2 0 No Yes Yes No 0 Yes 0 67 0 65-69 65-74
9 2 0 No No No Yes 3 No 53 67 2 65-69 65-74
10 2 0 No No No Yes 2 Yes 45 68 2 65-69 65-74
11 3 1 No Yes Yes Yes 1 Yes 9 76 1 75-79 75+
12 3 0 No Yes Yes Yes 4 Yes 96 76 2 75-79 75+
13 3 0 No Yes No Yes 1 Yes 3 76 1 75-79 75+
14 3 0 No Yes Yes Yes 2 Yes 15 76 2 75-79 75+
15 3 0 No No No Yes 2 Yes 36 77 1 75-79 75+
## Analysis
res.clogistic <- clogistic(d ~ cest + dur, strata = set, data = bdendo)
res.clogistic
Call:
clogistic(formula = d ~ cest + dur, strata = set, data = bdendo)
coef exp(coef) se(coef) z p
cest.L 0.240 1.271 2.276 0.105 0.92
cest.Q 0.890 2.435 1.812 0.491 0.62
cest.C 0.113 1.120 0.891 0.127 0.90
dur.L 1.965 7.134 2.222 0.884 0.38
dur.Q -0.716 0.489 1.858 -0.385 0.70
dur.C 0.136 1.146 1.168 0.117 0.91
dur^4 NA NA 0.000 NA NA
Likelihood ratio test=35.3 on 6 df, p=0.0000038, n=254
## clogit
res.clogit2 <- clogit(d ~ cest + dur + strata(set), bdendo)
summary(res.clogit2)
Call:
coxph(formula = Surv(rep(1, 315L), d) ~ cest + dur + strata(set),
data = bdendo, method = "exact")
n= 295, number of events= 54
(20 observations deleted due to missingness)
coef exp(coef) se(coef) z Pr(>|z|)
cest.L 0.240 1.271 2.276 0.11 0.92
cest.Q 0.890 2.435 1.812 0.49 0.62
cest.C 0.113 1.120 0.891 0.13 0.90
dur.L 1.965 7.134 2.222 0.88 0.38
dur.Q -0.716 0.489 1.858 -0.39 0.70
dur.C 0.136 1.146 1.168 0.12 0.91
dur^4 NA NA 0.000 NA NA
exp(coef) exp(-coef) lower .95 upper .95
cest.L 1.271 0.787 0.0147 109.94
cest.Q 2.435 0.411 0.0698 84.93
cest.C 1.120 0.893 0.1953 6.42
dur.L 7.134 0.140 0.0916 555.94
dur.Q 0.489 2.046 0.0128 18.67
dur.C 1.146 0.873 0.1161 11.31
dur^4 NA NA NA NA
Rsquare= 0.113 (max possible= 0.439 )
Likelihood ratio test= 35.3 on 6 df, p=0.0000038
Wald test = 27.7 on 6 df, p=0.000105
Score (logrank) test = 36.8 on 6 df, p=0.0000019
## Uncoditional logistic regression (not appropriate)
res.logit2 <- glm(d ~ cest + dur + factor(set), bdendo, family = binomial(link = "logit"))
..glm.ratio(res.logit2)[1:10,]
OR 2.5 % 97.5 % P
(Intercept) 0.21 0.01 3.04 0.296
cest.L 1.29 0.01 260.47 0.925
cest.Q 4.17 0.06 317.18 0.507
cest.C 1.15 0.14 9.34 0.892
dur.L 17.44 0.10 3614.14 0.280
dur.Q 0.36 0.00 27.69 0.643
dur.C 1.22 0.08 19.62 0.887
dur^4 NA NA NA 0.809
factor(set)2 0.63 0.01 33.31 0.815
factor(set)3 1.56 0.03 81.06 0.996
## Uncoditional logistic regression, ignoring strata (not appropriate)
res.logit2b <- glm(d ~ cest + dur, bdendo, family = binomial(link = "logit"))
..glm.ratio(res.logit2b)
OR 2.5 % 97.5 % P
(Intercept) 0.33 0.21 0.49 0.000
cest.L 1.07 0.01 81.75 0.976
cest.Q 2.85 0.10 96.14 0.549
cest.C 1.02 0.19 5.41 0.982
dur.L 7.20 0.11 569.14 0.364
dur.Q 0.38 0.01 13.12 0.593
dur.C 1.28 0.14 11.96 0.827
dur^4 NA NA NA 0.000
OR obtained from different methods
clogit logit1 logit2
cest.L 1.27 1.29 1.07
cest.Q 2.43 4.17 2.85
cest.C 1.12 1.15 1.02
dur.L 7.13 17.44 7.20
dur.Q 0.49 0.36 0.38
dur.C 1.15 1.22 1.28
1:2 match of MI patients to non-MI patients.
The exposure of interest is smoking status.
Matched on age, race, sex, and hospital.
Not matched on SBP and ECG
## Load data
library(foreign)
midat <- read.dta("http://www.sph.emory.edu/~dkleinb/datasets/mi.dta")
## show first three strata
midat[midat$match %in% c(1,2,3),]
match person mi smk sbp ecg survtime
1 1 1 1 0 160 1 1
2 1 2 0 0 140 0 2
3 1 3 0 0 120 0 2
4 2 4 1 0 160 1 1
5 2 5 0 0 140 0 2
6 2 6 0 0 120 0 2
7 3 7 1 0 160 0 1
8 3 8 0 0 140 0 2
9 3 9 0 0 120 0 2
## Conditional logistic regression
res.clogit3.int <- clogit(mi ~ smk + sbp + ecg + smk:sbp + smk:ecg + strata(match), midat)
res.clogit3 <- clogit(mi ~ smk + sbp + ecg + strata(match), midat)
## No interaction (smaller) model is adequate
anova(res.clogit3.int, res.clogit3)
Analysis of Deviance Table
Cox model: response is Surv(rep(1, 117L), mi)
Model 1: ~ smk + sbp + ecg + smk:sbp + smk:ecg + strata(match)
Model 2: ~ smk + sbp + ecg + strata(match)
loglik Chisq Df P(>|Chi|)
1 -31.5
2 -31.7 0.55 2 0.76
## Show result
summary(res.clogit3)
Call:
coxph(formula = Surv(rep(1, 117L), mi) ~ smk + sbp + ecg + strata(match),
data = midat, method = "exact")
n= 117, number of events= 39
coef exp(coef) se(coef) z Pr(>|z|)
smk 0.7291 2.0731 0.5613 1.30 0.1940
sbp 0.0456 1.0467 0.0152 2.99 0.0028 **
ecg 1.5993 4.9494 0.8534 1.87 0.0609 .
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
exp(coef) exp(-coef) lower .95 upper .95
smk 2.07 0.482 0.690 6.23
sbp 1.05 0.955 1.016 1.08
ecg 4.95 0.202 0.929 26.36
Rsquare= 0.173 (max possible= 0.519 )
Likelihood ratio test= 22.2 on 3 df, p=0.0000592
Wald test = 13.7 on 3 df, p=0.00338
Score (logrank) test = 19.7 on 3 df, p=0.000198
## Uncoditional logistic regression, using match as categorical variable (not appropriate)
res.logit3 <- glm(mi ~ smk + sbp + ecg + factor(match), midat, family = binomial(link = "logit"))
..glm.ratio(res.logit3)
OR 2.5 % 97.5 % P
(Intercept) 0.00 0.00 0.00 0.001
smk 3.38 0.85 14.55 0.090
sbp 1.08 1.04 1.12 0.000
ecg 16.18 2.06 203.43 0.015
factor(match)2 1.00 0.00 346.51 1.000
factor(match)3 3.76 0.03 745.27 0.615
factor(match)4 3.76 0.03 745.27 0.615
factor(match)5 3.76 0.03 745.27 0.615
factor(match)6 3.76 0.03 745.27 0.615
factor(match)7 3.76 0.03 745.27 0.615
factor(match)8 3.76 0.03 745.27 0.615
factor(match)9 3.76 0.03 745.27 0.615
factor(match)10 3.76 0.03 745.27 0.615
factor(match)11 6.61 0.04 1391.13 0.483
factor(match)12 20.54 0.19 4213.57 0.246
factor(match)13 20.54 0.19 4213.57 0.246
factor(match)14 4.74 0.05 804.49 0.542
factor(match)15 0.70 0.01 82.52 0.886
factor(match)16 0.23 0.00 30.38 0.580
factor(match)17 0.55 0.00 311.53 0.866
factor(match)18 0.55 0.00 311.53 0.866
factor(match)19 0.60 0.00 94.07 0.843
factor(match)20 0.15 0.00 30.80 0.509
factor(match)21 2.13 0.01 523.61 0.785
factor(match)22 13.05 0.11 2769.44 0.330
factor(match)23 3.01 0.03 533.36 0.670
factor(match)24 2.92 0.03 532.64 0.680
factor(match)25 2.92 0.03 532.64 0.680
factor(match)26 2.23 0.02 435.65 0.759
factor(match)27 13.05 0.11 2769.44 0.330
factor(match)28 0.82 0.00 238.63 0.945
factor(match)29 2.92 0.03 532.64 0.680
factor(match)30 2.13 0.01 523.61 0.785
factor(match)31 3.01 0.03 533.36 0.670
factor(match)32 0.32 0.00 137.94 0.722
factor(match)33 0.08 0.00 16.05 0.374
factor(match)34 0.53 0.00 78.14 0.813
factor(match)35 1.70 0.01 384.23 0.845
factor(match)36 0.12 0.00 14.70 0.412
factor(match)37 1.26 0.01 298.55 0.933
factor(match)38 0.32 0.00 61.02 0.670
factor(match)39 6.08 0.05 1387.95 0.501
## Uncoditional logistic regression, ignoring strata (not appropriate)
res.logit3b <- glm(mi ~ smk + sbp + ecg, midat, family = binomial(link = "logit"))
..glm.ratio(res.logit3b)
OR 2.5 % 97.5 % P
(Intercept) 0.00 0.00 0.01 0.000
smk 1.79 0.70 4.59 0.222
sbp 1.05 1.02 1.08 0.000
ecg 2.12 0.76 5.96 0.149
OR obtained from different methods
clogit logit1 logit2
smk 2.07 3.38 1.79
sbp 1.05 1.08 1.05
ecg 4.95 16.18 2.12