BIO226 Applied Longitudinal Analysis: Marginal Model using Generalized Estimating Equations

References

Prepare data

## Load foreign package
library(foreign)

## Leprosy dataset
leprosy <- read.dta("~/statistics/bio226AppliedLongitudinalAnalysis/leprosy.dta")

## To long format
leprosy.long <- reshape(data      = leprosy,
                 varying   = paste0("y",1:2),
                 ## v.names   = "",
                 timevar   = "time",
                 idvar     = "id",
                 direction = "long",
                 sep       = ""
                 )
## Change time 1, 2 to 0, 1
leprosy.long$time <- leprosy.long$time - 1
## Placebo as control
leprosy.long$drug <- relevel(leprosy.long$drug, ref = "Drug C")
## Order by id and then time
leprosy.long <- leprosy.long[with(leprosy.long, order(id, time)),]

## Onychomycosis dataset
toenail <- read.dta("~/statistics/bio226AppliedLongitudinalAnalysis/toenail.dta")

Count outcome: Leprosy example

## Summary
library(Hmisc)
library(plyr)
ddply(leprosy.long,
      .(drug, time),
      function(X) {
          smean.sd(X$y)
      })
    drug time Mean    SD
1 Drug C    0 12.9 3.957
2 Drug C    1 12.3 7.150
3 Drug A    0  9.3 4.762
4 Drug A    1  5.3 4.644
5 Drug B    0 10.0 5.249
6 Drug B    1  6.1 6.154

## Using gee (This matches SAS PROC GENMOD results)
library(gee)
res.gee.lepro <- gee(formula   = y ~ time + drug:time,
                     id        = id,
                     data      = leprosy.long,
                     family    = poisson(link = "log"),
                     corstr    = "exchangeable",
                     scale.fix = FALSE
                     )
    (Intercept)            time time:drugDrug A time:drugDrug B 
         2.3734          0.1362         -0.8419         -0.7013 
summary(res.gee.lepro)

 GEE:  GENERALIZED LINEAR MODELS FOR DEPENDENT DATA
 gee S-function, version 4.13 modified 98/01/27 (1998) 

Model:
 Link:                      Logarithm 
 Variance to Mean Relation: Poisson 
 Correlation Structure:     Exchangeable 

Call:
gee(formula = y ~ time + drug:time, id = id, data = leprosy.long, 
    family = poisson(link = "log"), corstr = "exchangeable", 
    scale.fix = FALSE)

Summary of Residuals:
    Min      1Q  Median      3Q     Max 
-9.5860 -4.7333 -0.3757  3.5167 12.4140 


Coefficients:
                Estimate Naive S.E. Naive z Robust S.E. Robust z
(Intercept)      2.37335     0.1035 22.9238     0.08014 29.61589
time            -0.01382     0.1111 -0.1245     0.15734 -0.08785
time:drugDrug A -0.54059     0.1818 -2.9740     0.21858 -2.47323
time:drugDrug B -0.47907     0.1779 -2.6924     0.22786 -2.10243

Estimated Scale Parameter:  3.451
Number of Iterations:  5

Working Correlation
       [,1]   [,2]
[1,] 1.0000 0.7966
[2,] 0.7966 1.0000

res.gee.lepro2 <- gee(formula   = y ~ time + (drug != "Drug C"):time,
                      id        = id,
                      data      = leprosy.long,
                      family    = poisson(link = "log"),
                      corstr    = "exchangeable",
                      scale.fix = FALSE
                      )
              (Intercept)                      time time:drug != "Drug C"TRUE 
                   2.3734                    0.1362                   -0.7691 
summary(res.gee.lepro2)

 GEE:  GENERALIZED LINEAR MODELS FOR DEPENDENT DATA
 gee S-function, version 4.13 modified 98/01/27 (1998) 

Model:
 Link:                      Logarithm 
 Variance to Mean Relation: Poisson 
 Correlation Structure:     Exchangeable 

Call:
gee(formula = y ~ time + (drug != "Drug C"):time, id = id, data = leprosy.long, 
    family = poisson(link = "log"), corstr = "exchangeable", 
    scale.fix = FALSE)

Summary of Residuals:
    Min      1Q  Median      3Q     Max 
-9.6185 -4.7333 -0.4843  3.5167 12.3815 


Coefficients:
                          Estimate Naive S.E.  Naive z Robust S.E. Robust z
(Intercept)                2.37335     0.1028 23.07801     0.08014 29.61589
time                      -0.01076     0.1142 -0.09421     0.15722 -0.06842
time:drug != "Drug C"TRUE -0.51412     0.1536 -3.34698     0.19656 -2.61559

Estimated Scale Parameter:  3.405
Number of Iterations:  5

Working Correlation
       [,1]   [,2]
[1,] 1.0000 0.7803
[2,] 0.7803 1.0000


## Using geepack (This does not match SAS result.)
## http://courses.washington.edu/b571/homeworks/old/2005/ex2key.pdf
library(geepack)
res.geepack.lepro <- geeglm(formula     = y ~ time + drug:time,
                            family      = poisson(link = "log"),
                            id          = id,
                            waves       = NULL,
                            data        = leprosy.long,
                            corstr      = "exchangeable",
                            scale.fix   = FALSE
                            )
summary(res.geepack.lepro)

Call:
geeglm(formula = y ~ time + drug:time, family = poisson(link = "log"), 
    data = leprosy.long, id = id, waves = NULL, corstr = "exchangeable", 
    scale.fix = FALSE)

 Coefficients:
                Estimate  Std.err   Wald Pr(>|W|)    
(Intercept)      2.37335  0.08014 877.10   <2e-16 ***
time            -0.00288  0.15701   0.00    0.985    
time:drugDrug A -0.56257  0.22198   6.42    0.011 *  
time:drugDrug B -0.49528  0.23420   4.47    0.034 *  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Estimated Scale Parameters:
            Estimate Std.err
(Intercept)     3.21     0.5

Correlation: Structure = exchangeable  Link = identity 

Estimated Correlation Parameters:
      Estimate Std.err
alpha    0.738  0.0815
Number of clusters:   30   Maximum cluster size: 2 

res.geepack.lepro2 <- geeglm(formula     = y ~ time + (drug != "Drug C"):time,
                             family      = poisson(link = "log"),
                             id          = id,
                             waves       = NULL,
                             data        = leprosy.long,
                             corstr      = "exchangeable",
                             scale.fix   = FALSE
                             )
summary(res.geepack.lepro2)

Call:
geeglm(formula = y ~ time + (drug != "Drug C"):time, family = poisson(link = "log"), 
    data = leprosy.long, id = id, waves = NULL, corstr = "exchangeable", 
    scale.fix = FALSE)

 Coefficients:
                          Estimate  Std.err   Wald Pr(>|W|)    
(Intercept)                2.37335  0.08014 877.10   <2e-16 ***
time                      -0.00286  0.15700   0.00   0.9855    
time:drug != "Drug C"TRUE -0.52783  0.19883   7.05   0.0079 ** 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Estimated Scale Parameters:
            Estimate Std.err
(Intercept)     3.23    0.52

Correlation: Structure = exchangeable  Link = identity 

Estimated Correlation Parameters:
      Estimate Std.err
alpha    0.738   0.081
Number of clusters:   30   Maximum cluster size: 2 

SAS code and result

libname bio226 '//psf/Home/Documents/statistics/bio226/';

data leprosy;
    set bio226.leprosy;
    placebo = (drug = 'C');
run;

/* Wide to long using array*/
data leprosy_long;
    set leprosy;

    array ay(1:2) y1 - y2;

    do i = 1 to 2;
        bacilli = ay(i);
        time = i -1;
        t = time;
        output;
        end;

    drop y1 - y2;
run;

/* three groups */
proc genmod data = leprosy_long;
    class t id drug (ref = 'C') / param = ref;
    model bacilli = time drug*time / dist = poisson link = log;
    repeated subject = id / withinsubject = t type = CS;
run;

                             Analysis Of GEE Parameter Estimates
                              Empirical Standard Error Estimates

                                    Standard   95% Confidence
               Parameter   Estimate    Error       Limits            Z Pr > |Z|
               Intercept     2.3734   0.0801   2.2163   2.5304   29.62   <.0001
               time         -0.0138   0.1573  -0.3222   0.2946   -0.09   0.9300
               time*drug A  -0.5406   0.2186  -0.9690  -0.1122   -2.47   0.0134
               time*drug B  -0.4791   0.2279  -0.9257  -0.0325   -2.10   0.0355


/* two groups*/
proc genmod data = leprosy_long;
    class t id placebo;
    model bacilli = time placebo*time / dist = poisson link = log;
    repeated subject = id / withinsubject = t type = CS;
run;

                              Analysis Of GEE Parameter Estimates
                              Empirical Standard Error Estimates
                                      Standard   95% Confidence
              Parameter      Estimate    Error       Limits            Z Pr > |Z|
              Intercept        2.3734   0.0801   2.2163   2.5304   29.62   <.0001
              time            -0.0108   0.1572  -0.3189   0.2974   -0.07   0.9455
              time*placebo 0  -0.5141   0.1966  -0.8994  -0.1289   -2.62   0.0089
              time*placebo 1   0.0000   0.0000   0.0000   0.0000     .      .

Binary outcome: Onychomyocosis example

## Using gee (This is closer to the SAS result.)
res.gee.toe <- gee(formula = y ~ month + trt:month,
                   family  = binomial(link = "logit"),
                   id      = id,
                   data    = toenail,
                   corstr  = "unstructured"
                   )
(Intercept)       month   month:trt 
    -0.5569     -0.1703     -0.0673 
summary(res.gee.toe)

 GEE:  GENERALIZED LINEAR MODELS FOR DEPENDENT DATA
 gee S-function, version 4.13 modified 98/01/27 (1998) 

Model:
 Link:                      Logit 
 Variance to Mean Relation: Binomial 
 Correlation Structure:     Unstructured 

Call:
gee(formula = y ~ month + trt:month, id = id, data = toenail, 
    family = binomial(link = "logit"), corstr = "unstructured")

Summary of Residuals:
    Min      1Q  Median      3Q     Max 
-0.3360 -0.2524 -0.1227 -0.0334  0.9743 


Coefficients:
            Estimate Naive S.E. Naive z Robust S.E. Robust z
(Intercept)  -0.6810     0.1205   -5.65      0.1219    -5.59
month        -0.1429     0.0247   -5.78      0.0257    -5.56
month:trt    -0.0795     0.0381   -2.09      0.0424    -1.88

Estimated Scale Parameter:  1.04
Number of Iterations:  5

Working Correlation
      [,1]  [,2]  [,3]  [,4]  [,5]  [,6]  [,7]
[1,] 1.000 0.885 0.690 0.486 0.235 0.144 0.103
[2,] 0.885 1.000 0.799 0.577 0.257 0.218 0.123
[3,] 0.690 0.799 1.000 0.750 0.268 0.196 0.149
[4,] 0.486 0.577 0.750 1.000 0.348 0.257 0.194
[5,] 0.235 0.257 0.268 0.348 1.000 0.453 0.368
[6,] 0.144 0.218 0.196 0.257 0.453 1.000 0.548
[7,] 0.103 0.123 0.149 0.194 0.368 0.548 1.000

## Using geepack (This does not match SAS result.)
res.geepack.toe <- geeglm(formula = y ~ month + trt:month,
                          family  = binomial(link = "logit"),
                          id      = id,
                          ## waves   = visit, # This crashes R
                          data    = toenail,
                          corstr  = "unstructured"
                          )
summary(res.geepack.toe)

Call:
geeglm(formula = y ~ month + trt:month, family = binomial(link = "logit"), 
    data = toenail, id = id, corstr = "unstructured")

 Coefficients:
            Estimate Std.err  Wald     Pr(>|W|)    
(Intercept)  -0.7217  0.1234 34.22 0.0000000049 ***
month        -0.1334  0.0251 28.30 0.0000001037 ***
month:trt    -0.0863  0.0424  4.15        0.042 *  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Estimated Scale Parameters:
            Estimate Std.err
(Intercept)     1.04   0.337

Correlation: Structure = unstructured  Link = identity 

Estimated Correlation Parameters:
          Estimate Std.err
alpha.1:2    0.904  0.3008
alpha.1:3    0.713  0.2431
alpha.1:4    0.513  0.1896
alpha.1:5    0.246  0.1191
alpha.1:6    0.157  0.0930
alpha.1:7    0.131  0.0947
alpha.2:3    0.825  0.2754
alpha.2:4    0.609  0.2190
alpha.2:5    0.271  0.1257
alpha.2:6    0.238  0.1155
alpha.2:7    0.158  0.1029
alpha.3:4    0.790  0.2734
alpha.3:5    0.283  0.1288
alpha.3:6    0.215  0.1110
alpha.3:7    0.192  0.1137
alpha.4:5    0.367  0.1536
alpha.4:6    0.282  0.1300
alpha.4:7    0.251  0.1304
alpha.5:6    0.498  0.1994
alpha.5:7    0.475  0.2038
alpha.6:7    0.705  0.2611
Number of clusters:   294   Maximum cluster size: 7 

SAS code and result

/* This matches the slides in Lecture 18*/
proc genmod descending data = toe;
    class id trt (ref = '0') visit / param = ref;
    model y = month trt*month / dist = binomial link = logit;
    repeated subject = id / withinsubject = visit type = un;
run;

                             Analysis Of GEE Parameter Estimates
                              Empirical Standard Error Estimates

                                    Standard   95% Confidence
               Parameter   Estimate    Error       Limits            Z Pr > |Z|
               Intercept    -0.6981   0.1217  -0.9365  -0.4596   -5.74   <.0001
               MONTH        -0.1401   0.0261  -0.1913  -0.0889   -5.36   <.0001
               MONTH*TRT 1  -0.0806   0.0415  -0.1620   0.0008   -1.94   0.0523