Conditional logistic regression in matched case-control studies

References

Conditional Logistic Regression

Using survival:clogit()

library(survival)
## Not run: clogit(case ~ spontaneous + induced + strata(stratum), data=infert)
## A multinomial response recoded to use clogit
##  The revised data set has one copy per possible outcome level, with new
##  variable tocc = target occupation for this copy, and case = whether
##  that is the actual outcome for each subject.
## See the catspec package for more details on the Logan approach.
resp <- levels(logan$occupation)
n <- nrow(logan)
indx <- rep(1:n, length(resp))
logan2 <- data.frame(logan[indx,],
                     id = indx,
                     tocc = factor(rep(resp, each=n)))
logan2$case <- (logan2$occupation == logan2$tocc)
logan2 <- logan2[order(logan2$id),]

## Show dataset for first three strata
logan2[logan2$id %in% c(1,2,3), ]
    occupation         focc education      race id         tocc  case
1        sales professional        14 non-black  1         farm FALSE
1.1      sales professional        14 non-black  1   operatives FALSE
1.2      sales professional        14 non-black  1    craftsmen FALSE
1.3      sales professional        14 non-black  1        sales  TRUE
1.4      sales professional        14 non-black  1 professional FALSE
2    craftsmen        sales        13 non-black  2         farm FALSE
2.1  craftsmen        sales        13 non-black  2   operatives FALSE
2.2  craftsmen        sales        13 non-black  2    craftsmen  TRUE
2.3  craftsmen        sales        13 non-black  2        sales FALSE
2.4  craftsmen        sales        13 non-black  2 professional FALSE
3        sales professional        16 non-black  3         farm FALSE
3.1      sales professional        16 non-black  3   operatives FALSE
3.2      sales professional        16 non-black  3    craftsmen FALSE
3.3      sales professional        16 non-black  3        sales  TRUE
3.4      sales professional        16 non-black  3 professional FALSE

## Analysis clogit (this particular analysis did not work in clogistic)
res.clogit <- clogit(case ~ tocc + tocc:education + strata(id), logan2)
summ.clogit <- summary(res.clogit)
summ.clogit
Call:
coxph(formula = Surv(rep(1, 4190L), case) ~ tocc + tocc:education + 
    strata(id), data = logan2, method = "exact")

  n= 4190, number of events= 838 

                                coef exp(coef)  se(coef)      z Pr(>|z|)    
toccfarm                   -1.896463  0.150099  1.380782  -1.37   0.1696    
toccoperatives              1.166750  3.211539  0.565646   2.06   0.0391 *  
toccprofessional           -8.100549  0.000303  0.698724 -11.59  < 2e-16 ***
toccsales                  -5.029230  0.006544  0.770086  -6.53  6.5e-11 ***
tocccraftsmen:education    -0.332284  0.717283  0.056868  -5.84  5.1e-09 ***
toccfarm:education         -0.370286  0.690537  0.116410  -3.18   0.0015 ** 
toccoperatives:education   -0.422219  0.655591  0.058433  -7.23  5.0e-13 ***
toccprofessional:education  0.278247  1.320812  0.051021   5.45  4.9e-08 ***
toccsales:education               NA        NA  0.000000     NA       NA    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

                           exp(coef) exp(-coef) lower .95 upper .95
toccfarm                    0.150099      6.662 0.0100243   2.24750
toccoperatives              3.211539      0.311 1.0598246   9.73178
toccprofessional            0.000303   3296.278 0.0000771   0.00119
toccsales                   0.006544    152.815 0.0014466   0.02960
tocccraftsmen:education     0.717283      1.394 0.6416297   0.80186
toccfarm:education          0.690537      1.448 0.5496656   0.86751
toccoperatives:education    0.655591      1.525 0.5846481   0.73514
toccprofessional:education  1.320812      0.757 1.1951206   1.45972
toccsales:education               NA         NA        NA        NA

Rsquare= 0.147   (max possible= 0.475 )
Likelihood ratio test= 666  on 8 df,   p=0
Wald test            = 414  on 8 df,   p=0
Score (logrank) test = 682  on 8 df,   p=0

## Uncoditional logistic regression (not appropriate)
res.logit1 <- glm(case ~ tocc + tocc:education + factor(id), logan2, family = binomial(link = "logit"))
summary(res.logit1)$coef[c(1:5,843:846),]
                           Estimate Std. Error z value  Pr(>|z|)
(Intercept)                  4.1110    1.43414   2.867 4.150e-03
toccfarm                    -2.7176    1.43439  -1.895 5.815e-02
toccoperatives               1.7751    0.68699   2.584 9.768e-03
toccprofessional           -11.5476    0.82091 -14.067 6.076e-45
toccsales                   -5.8349    0.82293  -7.090 1.337e-12
tocccraftsmen:education     -0.3800    0.06062  -6.270 3.619e-10
toccfarm:education          -0.3779    0.12012  -3.146 1.657e-03
toccoperatives:education    -0.5139    0.06384  -8.050 8.298e-16
toccprofessional:education   0.4981    0.05956   8.363 6.108e-17
exp(coef(res.logit1)[c(1:5,843:846)])
               (Intercept)                   toccfarm             toccoperatives           toccprofessional 
              61.007195773                0.066033211                5.901084569                0.000009659 
                 toccsales    tocccraftsmen:education         toccfarm:education   toccoperatives:education 
               0.002923648                0.683836589                0.685320145                0.598166153 
toccprofessional:education 
               1.645564209 

## Uncoditional logistic regression, ignoring strata (not appropriate)
res.logit1b <- glm(case ~ tocc + tocc:education, logan2, family = binomial(link = "logit"))
summary(res.logit1b)$coef
                            Estimate Std. Error z value  Pr(>|z|)
(Intercept)                  1.85771    0.43803   4.241 2.225e-05
toccfarm                    -3.22405    1.13783  -2.834 4.604e-03
toccoperatives               1.66071    0.65561   2.533 1.131e-02
toccprofessional           -10.00008    0.72279 -13.835 1.559e-43
toccsales                   -4.67442    0.69818  -6.695 2.155e-11
tocccraftsmen:education     -0.22820    0.03357  -6.798 1.057e-11
toccfarm:education          -0.18559    0.08352  -2.222 2.627e-02
toccoperatives:education    -0.35063    0.03810  -9.204 3.448e-20
toccprofessional:education   0.53842    0.03999  13.464 2.535e-41
toccsales:education          0.06351    0.03830   1.658 9.728e-02
exp(coef(res.logit1b))
               (Intercept)                   toccfarm             toccoperatives           toccprofessional 
                 6.4090214                  0.0397935                  5.2630345                  0.0000454 
                 toccsales    tocccraftsmen:education         toccfarm:education   toccoperatives:education 
                 0.0093309                  0.7959686                  0.8306101                  0.7042440 
toccprofessional:education        toccsales:education 
                 1.7133004                  1.0655733 

OR obtained from different methods

                              clogit  logit1  logit2
toccfarm                      0.1501  0.0660  0.0398
toccoperatives                3.2115  5.9011  5.2630
toccprofessional              0.0003  0.0000  0.0000
toccsales                     0.0065  0.0029  0.0093
tocccraftsmen:education       0.7173  0.6838  0.7960
toccfarm:education            0.6905  0.6853  0.8306
toccoperatives:education      0.6556  0.5982  0.7042
toccprofessional:education    1.3208  1.6456  1.7133
toccsales:education               NA       ?  1.0656

Using Epi::clogistic()

library(Epi)
data(bdendo)

## Show dataset for first three strata
bdendo[bdendo$set %in% c(1,2,3),]
   set d gall hyp   ob est dur non duration age cest agegrp  age3
1    1 1   No  No  Yes Yes   4 Yes       96  74    3  70-74 65-74
2    1 0   No  No <NA>  No   0  No        0  75    0  70-74 65-74
3    1 0   No  No <NA>  No   0  No        0  74    0  70-74 65-74
4    1 0   No  No <NA>  No   0  No        0  74    0  70-74 65-74
5    1 0   No  No  Yes Yes   3 Yes       48  75    1  70-74 65-74
6    2 1   No  No   No Yes   4 Yes       96  67    3  65-69 65-74
7    2 0   No  No   No Yes   1  No        5  67    3  65-69 65-74
8    2 0   No Yes  Yes  No   0 Yes        0  67    0  65-69 65-74
9    2 0   No  No   No Yes   3  No       53  67    2  65-69 65-74
10   2 0   No  No   No Yes   2 Yes       45  68    2  65-69 65-74
11   3 1   No Yes  Yes Yes   1 Yes        9  76    1  75-79   75+
12   3 0   No Yes  Yes Yes   4 Yes       96  76    2  75-79   75+
13   3 0   No Yes   No Yes   1 Yes        3  76    1  75-79   75+
14   3 0   No Yes  Yes Yes   2 Yes       15  76    2  75-79   75+
15   3 0   No  No   No Yes   2 Yes       36  77    1  75-79   75+

## Analysis
res.clogistic <- clogistic(d ~ cest + dur, strata = set, data = bdendo)
res.clogistic

Call: 
clogistic(formula = d ~ cest + dur, strata = set, data = bdendo)

         coef exp(coef) se(coef)      z    p
cest.L  0.240     1.271    2.276  0.105 0.92
cest.Q  0.890     2.435    1.812  0.491 0.62
cest.C  0.113     1.120    0.891  0.127 0.90
dur.L   1.965     7.134    2.222  0.884 0.38
dur.Q  -0.716     0.489    1.858 -0.385 0.70
dur.C   0.136     1.146    1.168  0.117 0.91
dur^4      NA        NA    0.000     NA   NA

Likelihood ratio test=35.3  on 6 df, p=0.0000038, n=254

## clogit
res.clogit2 <- clogit(d ~ cest + dur + strata(set), bdendo)
summary(res.clogit2)
Call:
coxph(formula = Surv(rep(1, 315L), d) ~ cest + dur + strata(set), 
    data = bdendo, method = "exact")

  n= 295, number of events= 54 
   (20 observations deleted due to missingness)

         coef exp(coef) se(coef)     z Pr(>|z|)
cest.L  0.240     1.271    2.276  0.11     0.92
cest.Q  0.890     2.435    1.812  0.49     0.62
cest.C  0.113     1.120    0.891  0.13     0.90
dur.L   1.965     7.134    2.222  0.88     0.38
dur.Q  -0.716     0.489    1.858 -0.39     0.70
dur.C   0.136     1.146    1.168  0.12     0.91
dur^4      NA        NA    0.000    NA       NA

       exp(coef) exp(-coef) lower .95 upper .95
cest.L     1.271      0.787    0.0147    109.94
cest.Q     2.435      0.411    0.0698     84.93
cest.C     1.120      0.893    0.1953      6.42
dur.L      7.134      0.140    0.0916    555.94
dur.Q      0.489      2.046    0.0128     18.67
dur.C      1.146      0.873    0.1161     11.31
dur^4         NA         NA        NA        NA

Rsquare= 0.113   (max possible= 0.439 )
Likelihood ratio test= 35.3  on 6 df,   p=0.0000038
Wald test            = 27.7  on 6 df,   p=0.000105
Score (logrank) test = 36.8  on 6 df,   p=0.0000019

## Uncoditional logistic regression  (not appropriate)
res.logit2 <- glm(d ~ cest + dur + factor(set), bdendo, family = binomial(link = "logit"))
..glm.ratio(res.logit2)[1:10,]
                OR 2.5 %  97.5 %     P
(Intercept)   0.21  0.01    3.04 0.296
cest.L        1.29  0.01  260.47 0.925
cest.Q        4.17  0.06  317.18 0.507
cest.C        1.15  0.14    9.34 0.892
dur.L        17.44  0.10 3614.14 0.280
dur.Q         0.36  0.00   27.69 0.643
dur.C         1.22  0.08   19.62 0.887
dur^4           NA    NA      NA 0.809
factor(set)2  0.63  0.01   33.31 0.815
factor(set)3  1.56  0.03   81.06 0.996

## Uncoditional logistic regression, ignoring strata  (not appropriate)
res.logit2b <- glm(d ~ cest + dur, bdendo, family = binomial(link = "logit"))
..glm.ratio(res.logit2b)
              OR 2.5 % 97.5 %     P
(Intercept) 0.33  0.21   0.49 0.000
cest.L      1.07  0.01  81.75 0.976
cest.Q      2.85  0.10  96.14 0.549
cest.C      1.02  0.19   5.41 0.982
dur.L       7.20  0.11 569.14 0.364
dur.Q       0.38  0.01  13.12 0.593
dur.C       1.28  0.14  11.96 0.827
dur^4         NA    NA     NA 0.000

OR obtained from different methods

        clogit logit1 logit2
cest.L    1.27  1.29    1.07
cest.Q    2.43  4.17    2.85
cest.C    1.12  1.15    1.02
dur.L     7.13 17.44    7.20
dur.Q     0.49  0.36    0.38
dur.C     1.15  1.22    1.28

Kleinbaum MI example

## Load data
library(foreign)
midat <- read.dta("http://www.sph.emory.edu/~dkleinb/datasets/mi.dta")

## show first three strata
midat[midat$match %in% c(1,2,3),]
  match person mi smk sbp ecg survtime
1     1      1  1   0 160   1        1
2     1      2  0   0 140   0        2
3     1      3  0   0 120   0        2
4     2      4  1   0 160   1        1
5     2      5  0   0 140   0        2
6     2      6  0   0 120   0        2
7     3      7  1   0 160   0        1
8     3      8  0   0 140   0        2
9     3      9  0   0 120   0        2

## Conditional logistic regression
res.clogit3.int <- clogit(mi ~ smk + sbp + ecg + smk:sbp + smk:ecg + strata(match), midat)
res.clogit3 <- clogit(mi ~ smk + sbp + ecg + strata(match), midat)

## No interaction (smaller) model is adequate
anova(res.clogit3.int, res.clogit3)
Analysis of Deviance Table
 Cox model: response is  Surv(rep(1, 117L), mi)
 Model 1: ~ smk + sbp + ecg + smk:sbp + smk:ecg + strata(match)
 Model 2: ~ smk + sbp + ecg + strata(match)
  loglik Chisq Df P(>|Chi|)
1  -31.5                   
2  -31.7  0.55  2      0.76

## Show result
summary(res.clogit3)
Call:
coxph(formula = Surv(rep(1, 117L), mi) ~ smk + sbp + ecg + strata(match), 
    data = midat, method = "exact")

  n= 117, number of events= 39 

      coef exp(coef) se(coef)    z Pr(>|z|)   
smk 0.7291    2.0731   0.5613 1.30   0.1940   
sbp 0.0456    1.0467   0.0152 2.99   0.0028 **
ecg 1.5993    4.9494   0.8534 1.87   0.0609 . 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

    exp(coef) exp(-coef) lower .95 upper .95
smk      2.07      0.482     0.690      6.23
sbp      1.05      0.955     1.016      1.08
ecg      4.95      0.202     0.929     26.36

Rsquare= 0.173   (max possible= 0.519 )
Likelihood ratio test= 22.2  on 3 df,   p=0.0000592
Wald test            = 13.7  on 3 df,   p=0.00338
Score (logrank) test = 19.7  on 3 df,   p=0.000198

## Uncoditional logistic regression, using match as categorical variable (not appropriate)
res.logit3 <- glm(mi ~ smk + sbp + ecg + factor(match), midat, family = binomial(link = "logit"))
..glm.ratio(res.logit3)
                   OR 2.5 %  97.5 %     P
(Intercept)      0.00  0.00    0.00 0.001
smk              3.38  0.85   14.55 0.090
sbp              1.08  1.04    1.12 0.000
ecg             16.18  2.06  203.43 0.015
factor(match)2   1.00  0.00  346.51 1.000
factor(match)3   3.76  0.03  745.27 0.615
factor(match)4   3.76  0.03  745.27 0.615
factor(match)5   3.76  0.03  745.27 0.615
factor(match)6   3.76  0.03  745.27 0.615
factor(match)7   3.76  0.03  745.27 0.615
factor(match)8   3.76  0.03  745.27 0.615
factor(match)9   3.76  0.03  745.27 0.615
factor(match)10  3.76  0.03  745.27 0.615
factor(match)11  6.61  0.04 1391.13 0.483
factor(match)12 20.54  0.19 4213.57 0.246
factor(match)13 20.54  0.19 4213.57 0.246
factor(match)14  4.74  0.05  804.49 0.542
factor(match)15  0.70  0.01   82.52 0.886
factor(match)16  0.23  0.00   30.38 0.580
factor(match)17  0.55  0.00  311.53 0.866
factor(match)18  0.55  0.00  311.53 0.866
factor(match)19  0.60  0.00   94.07 0.843
factor(match)20  0.15  0.00   30.80 0.509
factor(match)21  2.13  0.01  523.61 0.785
factor(match)22 13.05  0.11 2769.44 0.330
factor(match)23  3.01  0.03  533.36 0.670
factor(match)24  2.92  0.03  532.64 0.680
factor(match)25  2.92  0.03  532.64 0.680
factor(match)26  2.23  0.02  435.65 0.759
factor(match)27 13.05  0.11 2769.44 0.330
factor(match)28  0.82  0.00  238.63 0.945
factor(match)29  2.92  0.03  532.64 0.680
factor(match)30  2.13  0.01  523.61 0.785
factor(match)31  3.01  0.03  533.36 0.670
factor(match)32  0.32  0.00  137.94 0.722
factor(match)33  0.08  0.00   16.05 0.374
factor(match)34  0.53  0.00   78.14 0.813
factor(match)35  1.70  0.01  384.23 0.845
factor(match)36  0.12  0.00   14.70 0.412
factor(match)37  1.26  0.01  298.55 0.933
factor(match)38  0.32  0.00   61.02 0.670
factor(match)39  6.08  0.05 1387.95 0.501

## Uncoditional logistic regression, ignoring strata (not appropriate)
res.logit3b <- glm(mi ~ smk + sbp + ecg, midat, family = binomial(link = "logit"))
..glm.ratio(res.logit3b)
              OR 2.5 % 97.5 %     P
(Intercept) 0.00  0.00   0.01 0.000
smk         1.79  0.70   4.59 0.222
sbp         1.05  1.02   1.08 0.000
ecg         2.12  0.76   5.96 0.149

OR obtained from different methods

        clogit logit1  logit2
smk     2.07    3.38    1.79
sbp     1.05    1.08    1.05
ecg     4.95   16.18    2.12