URL <- "http://static.lib.virginia.edu/statlab/materials/data/depression.csv"
dat <- read.csv(URL, stringsAsFactors = TRUE)
dat$id <- factor(dat$id)
dat$drug <- relevel(dat$drug, ref = "standard")
head(dat, n = 3)
##   diagnose     drug id time depression
## 1     mild standard  1    0          1
## 2     mild standard  1    1          1
## 3     mild standard  1    2          1
length(unique(dat$id))
## [1] 340
library(magrittr)
with(dat, tapply(depression, list(diagnose, drug, time), mean)) %>% 
  ftable() %>% 
  round(2)
##                     0    1    2
##                                
## mild   standard  0.51 0.59 0.68
##        new       0.53 0.79 0.97
## severe standard  0.21 0.28 0.46
##        new       0.18 0.50 0.83
library(gee) 
dep_gee <- gee(depression ~ diagnose + drug*time,
               data = dat, 
               id = id, 
               family = binomial,
               corstr = "independence", scale.fix = TRUE)
## Beginning Cgee S-function, @(#) geeformula.q 4.13 98/01/27
## running glm to get initial regression estimate
##    (Intercept) diagnosesevere        drugnew           time   drugnew:time 
##    -0.02798843    -1.31391092    -0.05960381     0.48241209     1.01744498
summary(dep_gee)
## 
##  GEE:  GENERALIZED LINEAR MODELS FOR DEPENDENT DATA
##  gee S-function, version 4.13 modified 98/01/27 (1998) 
## 
## Model:
##  Link:                      Logit 
##  Variance to Mean Relation: Binomial 
##  Correlation Structure:     Independent 
## 
## Call:
## gee(formula = depression ~ diagnose + drug * time, id = id, data = dat, 
##     family = binomial, corstr = "independence", scale.fix = TRUE)
## 
## Summary of Residuals:
##         Min          1Q      Median          3Q         Max 
## -0.94844242 -0.40683252  0.05155758  0.38830952  0.80242231 
## 
## 
## Coefficients:
##                   Estimate Naive S.E.    Naive z Robust S.E.   Robust z
## (Intercept)    -0.02798843  0.1639083 -0.1707566   0.1741865 -0.1606808
## diagnosesevere -1.31391092  0.1464151 -8.9738733   0.1459845 -9.0003423
## drugnew        -0.05960381  0.2222080 -0.2682343   0.2285385 -0.2608042
## time            0.48241209  0.1147626  4.2035644   0.1199350  4.0222784
## drugnew:time    1.01744498  0.1887954  5.3891398   0.1876938  5.4207709
## 
## Estimated Scale Parameter:  1
## Number of Iterations:  1
## 
## Working Correlation
##      [,1] [,2] [,3]
## [1,]    1    0    0
## [2,]    0    1    0
## [3,]    0    0    1
dep_gee2 <- gee(depression ~ diagnose + drug*time,
               data = dat, 
               id = id, 
               family = binomial,
               corstr = "exchangeable", scale.fix = TRUE)
## Beginning Cgee S-function, @(#) geeformula.q 4.13 98/01/27
## running glm to get initial regression estimate
##    (Intercept) diagnosesevere        drugnew           time   drugnew:time 
##    -0.02798843    -1.31391092    -0.05960381     0.48241209     1.01744498
summary(dep_gee2)
## 
##  GEE:  GENERALIZED LINEAR MODELS FOR DEPENDENT DATA
##  gee S-function, version 4.13 modified 98/01/27 (1998) 
## 
## Model:
##  Link:                      Logit 
##  Variance to Mean Relation: Binomial 
##  Correlation Structure:     Exchangeable 
## 
## Call:
## gee(formula = depression ~ diagnose + drug * time, id = id, data = dat, 
##     family = binomial, corstr = "exchangeable", scale.fix = TRUE)
## 
## Summary of Residuals:
##         Min          1Q      Median          3Q         Max 
## -0.94843397 -0.40683122  0.05156603  0.38832332  0.80238627 
## 
## 
## Coefficients:
##                   Estimate Naive S.E.    Naive z Robust S.E.   Robust z
## (Intercept)    -0.02809866  0.1637503 -0.1715945   0.1741791 -0.1613205
## diagnosesevere -1.31391033  0.1459325 -9.0035505   0.1459630 -9.0016667
## drugnew        -0.05926689  0.2221626 -0.2667725   0.2285569 -0.2593091
## time            0.48246420  0.1149581  4.1968686   0.1199383  4.0226037
## drugnew:time    1.01719312  0.1890913  5.3793750   0.1877014  5.4192084
## 
## Estimated Scale Parameter:  1
## Number of Iterations:  2
## 
## Working Correlation
##              [,1]         [,2]         [,3]
## [1,]  1.000000000 -0.003432732 -0.003432732
## [2,] -0.003432732  1.000000000 -0.003432732
## [3,] -0.003432732 -0.003432732  1.000000000
dep_gee3 <- gee(depression ~ diagnose + drug*time,
               data = dat, 
               id = id, 
               family = binomial,
               corstr = "AR-M", Mv = 1, scale.fix = TRUE)
## Beginning Cgee S-function, @(#) geeformula.q 4.13 98/01/27
## running glm to get initial regression estimate
##    (Intercept) diagnosesevere        drugnew           time   drugnew:time 
##    -0.02798843    -1.31391092    -0.05960381     0.48241209     1.01744498
summary(dep_gee3)
## 
##  GEE:  GENERALIZED LINEAR MODELS FOR DEPENDENT DATA
##  gee S-function, version 4.13 modified 98/01/27 (1998) 
## 
## Model:
##  Link:                      Logit 
##  Variance to Mean Relation: Binomial 
##  Correlation Structure:     AR-M , M = 1 
## 
## Call:
## gee(formula = depression ~ diagnose + drug * time, id = id, data = dat, 
##     family = binomial, corstr = "AR-M", Mv = 1, scale.fix = TRUE)
## 
## Summary of Residuals:
##         Min          1Q      Median          3Q         Max 
## -0.94844464 -0.40691023  0.05155536  0.38824284  0.80236892 
## 
## 
## Coefficients:
##                   Estimate Naive S.E.    Naive z Robust S.E.   Robust z
## (Intercept)    -0.02770314  0.1643892 -0.1685216   0.1741163 -0.1591071
## diagnosesevere -1.31386512  0.1472418 -8.9231818   0.1459894 -8.9997314
## drugnew        -0.05959816  0.2226449 -0.2676826   0.2284592 -0.2608700
## time            0.48240753  0.1147578  4.2036998   0.1199244  4.0225960
## drugnew:time    1.01732678  0.1887941  5.3885527   0.1876727  5.4207510
## 
## Estimated Scale Parameter:  1
## Number of Iterations:  2
## 
## Working Correlation
##              [,1]        [,2]         [,3]
## [1,] 1.000000e+00 0.008477443 7.186704e-05
## [2,] 8.477443e-03 1.000000000 8.477443e-03
## [3,] 7.186704e-05 0.008477443 1.000000e+00
dep_gee4 <- gee(depression ~ diagnose + drug*time,
               data = dat, 
               id = id, 
               family = binomial,
               corstr = "unstructured", scale.fix = TRUE)
## Beginning Cgee S-function, @(#) geeformula.q 4.13 98/01/27
## running glm to get initial regression estimate
##    (Intercept) diagnosesevere        drugnew           time   drugnew:time 
##    -0.02798843    -1.31391092    -0.05960381     0.48241209     1.01744498
summary(dep_gee4)
## 
##  GEE:  GENERALIZED LINEAR MODELS FOR DEPENDENT DATA
##  gee S-function, version 4.13 modified 98/01/27 (1998) 
## 
## Model:
##  Link:                      Logit 
##  Variance to Mean Relation: Binomial 
##  Correlation Structure:     Unstructured 
## 
## Call:
## gee(formula = depression ~ diagnose + drug * time, id = id, data = dat, 
##     family = binomial, corstr = "unstructured", scale.fix = TRUE)
## 
## Summary of Residuals:
##         Min          1Q      Median          3Q         Max 
## -0.94773674 -0.40645713  0.05226326  0.38927858  0.79975454 
## 
## 
## Coefficients:
##                   Estimate Naive S.E.    Naive z Robust S.E.   Robust z
## (Intercept)    -0.02552611  0.1679741 -0.1519645   0.1726392 -0.1478581
## diagnosesevere -1.30484850  0.1461691 -8.9269772   0.1450136 -8.9981088
## drugnew        -0.05438636  0.2282121 -0.2383149   0.2271321 -0.2394481
## time            0.47587182  0.1160832  4.0994035   0.1190418  3.9975178
## drugnew:time    1.01297603  0.1887379  5.3671034   0.1865407  5.4303205
## 
## Estimated Scale Parameter:  1
## Number of Iterations:  3
## 
## Working Correlation
##             [,1]        [,2]        [,3]
## [1,]  1.00000000  0.07393977 -0.02741128
## [2,]  0.07393977  1.00000000 -0.05669559
## [3,] -0.02741128 -0.05669559  1.00000000