## Load foreign package
library(foreign)
## Leprosy dataset
leprosy <- read.dta("~/statistics/bio226AppliedLongitudinalAnalysis/leprosy.dta")
## To long format
leprosy.long <- reshape(data = leprosy,
varying = paste0("y",1:2),
## v.names = "",
timevar = "time",
idvar = "id",
direction = "long",
sep = ""
)
## Change time 1, 2 to 0, 1
leprosy.long$time <- leprosy.long$time - 1
## Placebo as control
leprosy.long$drug <- relevel(leprosy.long$drug, ref = "Drug C")
## Order by id and then time
leprosy.long <- leprosy.long[with(leprosy.long, order(id, time)),]
## Onychomycosis dataset
toenail <- read.dta("~/statistics/bio226AppliedLongitudinalAnalysis/toenail.dta")
## Summary
library(Hmisc)
library(plyr)
ddply(leprosy.long,
.(drug, time),
function(X) {
smean.sd(X$y)
})
drug time Mean SD
1 Drug C 0 12.9 3.957
2 Drug C 1 12.3 7.150
3 Drug A 0 9.3 4.762
4 Drug A 1 5.3 4.644
5 Drug B 0 10.0 5.249
6 Drug B 1 6.1 6.154
## Using gee (This matches SAS PROC GENMOD results)
library(gee)
res.gee.lepro <- gee(formula = y ~ time + drug:time,
id = id,
data = leprosy.long,
family = poisson(link = "log"),
corstr = "exchangeable",
scale.fix = FALSE
)
(Intercept) time time:drugDrug A time:drugDrug B
2.3734 0.1362 -0.8419 -0.7013
summary(res.gee.lepro)
GEE: GENERALIZED LINEAR MODELS FOR DEPENDENT DATA
gee S-function, version 4.13 modified 98/01/27 (1998)
Model:
Link: Logarithm
Variance to Mean Relation: Poisson
Correlation Structure: Exchangeable
Call:
gee(formula = y ~ time + drug:time, id = id, data = leprosy.long,
family = poisson(link = "log"), corstr = "exchangeable",
scale.fix = FALSE)
Summary of Residuals:
Min 1Q Median 3Q Max
-9.5860 -4.7333 -0.3757 3.5167 12.4140
Coefficients:
Estimate Naive S.E. Naive z Robust S.E. Robust z
(Intercept) 2.37335 0.1035 22.9238 0.08014 29.61589
time -0.01382 0.1111 -0.1245 0.15734 -0.08785
time:drugDrug A -0.54059 0.1818 -2.9740 0.21858 -2.47323
time:drugDrug B -0.47907 0.1779 -2.6924 0.22786 -2.10243
Estimated Scale Parameter: 3.451
Number of Iterations: 5
Working Correlation
[,1] [,2]
[1,] 1.0000 0.7966
[2,] 0.7966 1.0000
res.gee.lepro2 <- gee(formula = y ~ time + (drug != "Drug C"):time,
id = id,
data = leprosy.long,
family = poisson(link = "log"),
corstr = "exchangeable",
scale.fix = FALSE
)
(Intercept) time time:drug != "Drug C"TRUE
2.3734 0.1362 -0.7691
summary(res.gee.lepro2)
GEE: GENERALIZED LINEAR MODELS FOR DEPENDENT DATA
gee S-function, version 4.13 modified 98/01/27 (1998)
Model:
Link: Logarithm
Variance to Mean Relation: Poisson
Correlation Structure: Exchangeable
Call:
gee(formula = y ~ time + (drug != "Drug C"):time, id = id, data = leprosy.long,
family = poisson(link = "log"), corstr = "exchangeable",
scale.fix = FALSE)
Summary of Residuals:
Min 1Q Median 3Q Max
-9.6185 -4.7333 -0.4843 3.5167 12.3815
Coefficients:
Estimate Naive S.E. Naive z Robust S.E. Robust z
(Intercept) 2.37335 0.1028 23.07801 0.08014 29.61589
time -0.01076 0.1142 -0.09421 0.15722 -0.06842
time:drug != "Drug C"TRUE -0.51412 0.1536 -3.34698 0.19656 -2.61559
Estimated Scale Parameter: 3.405
Number of Iterations: 5
Working Correlation
[,1] [,2]
[1,] 1.0000 0.7803
[2,] 0.7803 1.0000
## Using geepack (This does not match SAS result.)
## http://courses.washington.edu/b571/homeworks/old/2005/ex2key.pdf
library(geepack)
res.geepack.lepro <- geeglm(formula = y ~ time + drug:time,
family = poisson(link = "log"),
id = id,
waves = NULL,
data = leprosy.long,
corstr = "exchangeable",
scale.fix = FALSE
)
summary(res.geepack.lepro)
Call:
geeglm(formula = y ~ time + drug:time, family = poisson(link = "log"),
data = leprosy.long, id = id, waves = NULL, corstr = "exchangeable",
scale.fix = FALSE)
Coefficients:
Estimate Std.err Wald Pr(>|W|)
(Intercept) 2.37335 0.08014 877.10 <2e-16 ***
time -0.00288 0.15701 0.00 0.985
time:drugDrug A -0.56257 0.22198 6.42 0.011 *
time:drugDrug B -0.49528 0.23420 4.47 0.034 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Estimated Scale Parameters:
Estimate Std.err
(Intercept) 3.21 0.5
Correlation: Structure = exchangeable Link = identity
Estimated Correlation Parameters:
Estimate Std.err
alpha 0.738 0.0815
Number of clusters: 30 Maximum cluster size: 2
res.geepack.lepro2 <- geeglm(formula = y ~ time + (drug != "Drug C"):time,
family = poisson(link = "log"),
id = id,
waves = NULL,
data = leprosy.long,
corstr = "exchangeable",
scale.fix = FALSE
)
summary(res.geepack.lepro2)
Call:
geeglm(formula = y ~ time + (drug != "Drug C"):time, family = poisson(link = "log"),
data = leprosy.long, id = id, waves = NULL, corstr = "exchangeable",
scale.fix = FALSE)
Coefficients:
Estimate Std.err Wald Pr(>|W|)
(Intercept) 2.37335 0.08014 877.10 <2e-16 ***
time -0.00286 0.15700 0.00 0.9855
time:drug != "Drug C"TRUE -0.52783 0.19883 7.05 0.0079 **
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Estimated Scale Parameters:
Estimate Std.err
(Intercept) 3.23 0.52
Correlation: Structure = exchangeable Link = identity
Estimated Correlation Parameters:
Estimate Std.err
alpha 0.738 0.081
Number of clusters: 30 Maximum cluster size: 2
SAS code and result
libname bio226 '//psf/Home/Documents/statistics/bio226/';
data leprosy;
set bio226.leprosy;
placebo = (drug = 'C');
run;
/* Wide to long using array*/
data leprosy_long;
set leprosy;
array ay(1:2) y1 - y2;
do i = 1 to 2;
bacilli = ay(i);
time = i -1;
t = time;
output;
end;
drop y1 - y2;
run;
/* three groups */
proc genmod data = leprosy_long;
class t id drug (ref = 'C') / param = ref;
model bacilli = time drug*time / dist = poisson link = log;
repeated subject = id / withinsubject = t type = CS;
run;
Analysis Of GEE Parameter Estimates
Empirical Standard Error Estimates
Standard 95% Confidence
Parameter Estimate Error Limits Z Pr > |Z|
Intercept 2.3734 0.0801 2.2163 2.5304 29.62 <.0001
time -0.0138 0.1573 -0.3222 0.2946 -0.09 0.9300
time*drug A -0.5406 0.2186 -0.9690 -0.1122 -2.47 0.0134
time*drug B -0.4791 0.2279 -0.9257 -0.0325 -2.10 0.0355
/* two groups*/
proc genmod data = leprosy_long;
class t id placebo;
model bacilli = time placebo*time / dist = poisson link = log;
repeated subject = id / withinsubject = t type = CS;
run;
Analysis Of GEE Parameter Estimates
Empirical Standard Error Estimates
Standard 95% Confidence
Parameter Estimate Error Limits Z Pr > |Z|
Intercept 2.3734 0.0801 2.2163 2.5304 29.62 <.0001
time -0.0108 0.1572 -0.3189 0.2974 -0.07 0.9455
time*placebo 0 -0.5141 0.1966 -0.8994 -0.1289 -2.62 0.0089
time*placebo 1 0.0000 0.0000 0.0000 0.0000 . .
## Using gee (This is closer to the SAS result.)
res.gee.toe <- gee(formula = y ~ month + trt:month,
family = binomial(link = "logit"),
id = id,
data = toenail,
corstr = "unstructured"
)
(Intercept) month month:trt
-0.5569 -0.1703 -0.0673
summary(res.gee.toe)
GEE: GENERALIZED LINEAR MODELS FOR DEPENDENT DATA
gee S-function, version 4.13 modified 98/01/27 (1998)
Model:
Link: Logit
Variance to Mean Relation: Binomial
Correlation Structure: Unstructured
Call:
gee(formula = y ~ month + trt:month, id = id, data = toenail,
family = binomial(link = "logit"), corstr = "unstructured")
Summary of Residuals:
Min 1Q Median 3Q Max
-0.3360 -0.2524 -0.1227 -0.0334 0.9743
Coefficients:
Estimate Naive S.E. Naive z Robust S.E. Robust z
(Intercept) -0.6810 0.1205 -5.65 0.1219 -5.59
month -0.1429 0.0247 -5.78 0.0257 -5.56
month:trt -0.0795 0.0381 -2.09 0.0424 -1.88
Estimated Scale Parameter: 1.04
Number of Iterations: 5
Working Correlation
[,1] [,2] [,3] [,4] [,5] [,6] [,7]
[1,] 1.000 0.885 0.690 0.486 0.235 0.144 0.103
[2,] 0.885 1.000 0.799 0.577 0.257 0.218 0.123
[3,] 0.690 0.799 1.000 0.750 0.268 0.196 0.149
[4,] 0.486 0.577 0.750 1.000 0.348 0.257 0.194
[5,] 0.235 0.257 0.268 0.348 1.000 0.453 0.368
[6,] 0.144 0.218 0.196 0.257 0.453 1.000 0.548
[7,] 0.103 0.123 0.149 0.194 0.368 0.548 1.000
## Using geepack (This does not match SAS result.)
res.geepack.toe <- geeglm(formula = y ~ month + trt:month,
family = binomial(link = "logit"),
id = id,
## waves = visit, # This crashes R
data = toenail,
corstr = "unstructured"
)
summary(res.geepack.toe)
Call:
geeglm(formula = y ~ month + trt:month, family = binomial(link = "logit"),
data = toenail, id = id, corstr = "unstructured")
Coefficients:
Estimate Std.err Wald Pr(>|W|)
(Intercept) -0.7217 0.1234 34.22 0.0000000049 ***
month -0.1334 0.0251 28.30 0.0000001037 ***
month:trt -0.0863 0.0424 4.15 0.042 *
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Estimated Scale Parameters:
Estimate Std.err
(Intercept) 1.04 0.337
Correlation: Structure = unstructured Link = identity
Estimated Correlation Parameters:
Estimate Std.err
alpha.1:2 0.904 0.3008
alpha.1:3 0.713 0.2431
alpha.1:4 0.513 0.1896
alpha.1:5 0.246 0.1191
alpha.1:6 0.157 0.0930
alpha.1:7 0.131 0.0947
alpha.2:3 0.825 0.2754
alpha.2:4 0.609 0.2190
alpha.2:5 0.271 0.1257
alpha.2:6 0.238 0.1155
alpha.2:7 0.158 0.1029
alpha.3:4 0.790 0.2734
alpha.3:5 0.283 0.1288
alpha.3:6 0.215 0.1110
alpha.3:7 0.192 0.1137
alpha.4:5 0.367 0.1536
alpha.4:6 0.282 0.1300
alpha.4:7 0.251 0.1304
alpha.5:6 0.498 0.1994
alpha.5:7 0.475 0.2038
alpha.6:7 0.705 0.2611
Number of clusters: 294 Maximum cluster size: 7
SAS code and result
/* This matches the slides in Lecture 18*/
proc genmod descending data = toe;
class id trt (ref = '0') visit / param = ref;
model y = month trt*month / dist = binomial link = logit;
repeated subject = id / withinsubject = visit type = un;
run;
Analysis Of GEE Parameter Estimates
Empirical Standard Error Estimates
Standard 95% Confidence
Parameter Estimate Error Limits Z Pr > |Z|
Intercept -0.6981 0.1217 -0.9365 -0.4596 -5.74 <.0001
MONTH -0.1401 0.0261 -0.1913 -0.0889 -5.36 <.0001
MONTH*TRT 1 -0.0806 0.0415 -0.1620 0.0008 -1.94 0.0523