gee model for multiple sources data
## Loading required package: pacman
## ID Problem Single Parent Teacher
## 1 1 Y Y 0 NA
## 2 2 N N 1 NA
## 3 3 Y N 0 NA
## 4 4 Y Y 1 NA
## 5 5 Y N 0 NA
## 6 6 Y N 0 NA
## 'data.frame': 2501 obs. of 5 variables:
## $ ID : Factor w/ 2501 levels "1","2","3","4",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ Problem: Factor w/ 2 levels "N","Y": 2 1 2 2 2 2 2 1 1 1 ...
## $ Single : Factor w/ 2 levels "N","Y": 2 1 1 2 1 1 1 1 2 1 ...
## $ Parent : int 0 1 0 1 0 0 0 0 0 0 ...
## $ Teacher: int NA NA NA NA NA NA NA 0 NA 0 ...
## Parent 0 1
## Teacher
## 0 1030 129
## 1 165 104
165>129 老師報告學生違規和攻擊行為較多。
## Parent Teacher
## Parent 1.0000000 0.2913296
## Teacher 0.2913296 1.0000000
老師和家長的的相關為0.2913
## [1] 0.4290284
42.9%的missing data
call them informants
dtaL <- dta %>%
gather(Informant, Report, 4:5)
# sort according to child id
dtaL <- dtaL[order(dtaL$ID), ]
# get rid of annoying row names
rownames(dtaL) <- NULL
# have a look
head(dtaL)## ID Problem Single Informant Report
## 1 1 Y Y Parent 0
## 2 1 Y Y Teacher NA
## 3 2 N N Parent 1
## 4 2 N N Teacher NA
## 5 3 Y N Parent 0
## 6 3 Y N Teacher NA
#
summary(m1 <- gee(Report ~ Informant + Problem + Single + Informant:Problem,
data = dtaL, id = ID, corstr = "exchangeable", family = binomial))## Beginning Cgee S-function, @(#) geeformula.q 4.13 98/01/27
## running glm to get initial regression estimate
## (Intercept) InformantTeacher ProblemY
## -2.1595398 0.4712270 0.5996091
## SingleY InformantTeacher:ProblemY
## 0.6332404 -0.4247098
##
## GEE: GENERALIZED LINEAR MODELS FOR DEPENDENT DATA
## gee S-function, version 4.13 modified 98/01/27 (1998)
##
## Model:
## Link: Logit
## Variance to Mean Relation: Binomial
## Correlation Structure: Exchangeable
##
## Call:
## gee(formula = Report ~ Informant + Problem + Single + Informant:Problem,
## id = ID, data = dtaL, family = binomial, corstr = "exchangeable")
##
## Summary of Residuals:
## Min 1Q Median 3Q Max
## -0.2841247 -0.1765046 -0.1570826 -0.1039640 0.8960360
##
##
## Coefficients:
## Estimate Naive S.E. Naive z Robust S.E.
## (Intercept) -2.1539359 0.09016552 -23.888688 0.08888885
## InformantTeacher 0.4738391 0.11498088 4.121025 0.11798848
## ProblemY 0.5995711 0.11284664 5.313149 0.11278090
## SingleY 0.6137249 0.10596325 5.791865 0.10759321
## InformantTeacher:ProblemY -0.4572920 0.15701454 -2.912418 0.15711252
## Robust z
## (Intercept) -24.231790
## InformantTeacher 4.015977
## ProblemY 5.316247
## SingleY 5.704123
## InformantTeacher:ProblemY -2.910602
##
## Estimated Scale Parameter: 1.001019
## Number of Iterations: 2
##
## Working Correlation
## [,1] [,2]
## [1,] 1.0000000 0.2681987
## [2,] 0.2681987 1.0000000
summary(m1a <- geeglm(Report ~ Informant + Problem + Single +
Informant:Problem + Single:Problem,
data = dtaL, id = ID, corstr = "exchangeable", family = binomial))##
## Call:
## geeglm(formula = Report ~ Informant + Problem + Single + Informant:Problem +
## Single:Problem, family = binomial, data = dtaL, id = ID,
## corstr = "exchangeable")
##
## Coefficients:
## Estimate Std.err Wald Pr(>|W|)
## (Intercept) -2.16615 0.09410 529.882 < 2e-16 ***
## InformantTeacher 0.47515 0.11819 16.162 5.81e-05 ***
## ProblemY 0.62177 0.12654 24.144 8.94e-07 ***
## SingleY 0.65789 0.15949 17.014 3.71e-05 ***
## InformantTeacher:ProblemY -0.45906 0.15722 8.526 0.0035 **
## ProblemY:SingleY -0.07945 0.21572 0.136 0.7127
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation structure = exchangeable
## Estimated Scale Parameters:
##
## Estimate Std.err
## (Intercept) 1 0.06415
## Link = identity
##
## Estimated Correlation Parameters:
## Estimate Std.err
## alpha 0.2668 0.03029
## Number of clusters: 2501 Maximum cluster size: 2
theoretical Correlation =0.268 ,比0.291稍微低一點
sjPlot::tab_model(m1 <- geeglm(Report ~ Informant + Problem + Single + Informant:Problem, data=dtaL,
id=ID, corstr="exchangeable", family=binomial), show.obs=F, show.ngroups = F)| Report | |||
|---|---|---|---|
| Predictors | Odds Ratios | CI | p |
| (Intercept) | 0.12 | 0.10 – 0.14 | <0.001 |
| Informant [Teacher] | 1.61 | 1.27 – 2.02 | <0.001 |
| Problem [Y] | 1.82 | 1.46 – 2.27 | <0.001 |
| Single [Y] | 1.85 | 1.50 – 2.28 | <0.001 |
|
Informant [Teacher] * Problem [Y] |
0.63 | 0.47 – 0.86 | 0.004 |
sjPlot::tab_model(m1a <- geeglm(Report ~ Informant + Problem + Single +
Informant:Problem + Single:Problem, data=dtaL,
id=ID, corstr="exchangeable", family=binomial), show.obs=F, show.ngroups = F)| Report | |||
|---|---|---|---|
| Predictors | Odds Ratios | CI | p |
| (Intercept) | 0.11 | 0.10 – 0.14 | <0.001 |
| Informant [Teacher] | 1.61 | 1.28 – 2.03 | <0.001 |
| Problem [Y] | 1.86 | 1.45 – 2.39 | <0.001 |
| Single [Y] | 1.93 | 1.41 – 2.64 | <0.001 |
|
Informant [Teacher] * Problem [Y] |
0.63 | 0.46 – 0.86 | 0.004 |
| Problem [Y] * Single [Y] | 0.92 | 0.61 – 1.41 | 0.713 |
geeglm可經由ln進行換算即與Gee顯示會相等
老師的report較高
# set theme
ot <- theme_set(theme_bw())
# fortify data with model fits
dta_m1 <- data.frame(na.omit(dtaL), phat = fitted(m1a))
# compare observed with fitted
ggplot(dta_m1, aes(Problem, Report, color = Informant)) +
geom_point(aes(Problem, phat),position = position_dodge(width=.2),
pch = 1, size = rel(2)) +
geom_hline(yintercept = c(mean(dta$Parent, na.rm = T),
mean(dta$Teacher, na.rm = T)), linetype = "dotted")+
stat_summary(fun.data = "mean_cl_boot", position = position_dodge(width=.2)) +
facet_wrap( ~ Single)+
labs(x = "Physical Problem", y = "Probability of report") +
theme(legend.position = c(.1, .9))由圖可知老師report的level比較高,空心=model estimated,實心=從data裡算出來的。 model prediction 與raw data沒有差太多。
## case treat occasion resp
## 1 1 1 0 1
## 2 1 1 1 1
## 3 2 1 0 1
## 4 2 1 1 1
## 5 3 1 0 1
## 6 3 1 1 1
logit(P{Yt <= j}) = αk + β1 time + β2 trt + β3 time × trt,
j = 1, 2, 3, 4; k = j - 1 for j > 1.
##
## Attaching package: 'repolr'
## The following objects are masked from 'package:geepack':
##
## ordgee, QIC
#
dta_repolr <- repolr(resp ~ treat + occasion + treat:occasion,
subjects="case", data=dta2, times=c(1,2), categories=4,
corr.mod="independence")
#
summary(dta_repolr$gee)## Length Class Mode
## 0 NULL NULL
##
## repolr: 2016-02-26 version 3.4
##
## Call:
## repolr(formula = resp ~ treat + occasion + treat:occasion, subjects = "case",
## data = dta2, times = c(1, 2), categories = 4, corr.mod = "independence")
##
## Coefficients:
## coeff se.robust z.robust p.value
## cuts1|2 -2.2671 0.2091 -10.8422 0.0000
## cuts2|3 -0.9515 0.1769 -5.3787 0.0000
## cuts3|4 0.3517 0.1751 2.0086 0.0446
## treat 0.0336 0.2369 0.1418 0.8872
## occasion 1.0381 0.1564 6.6375 0.0000
## treat:occasion 0.7077 0.2458 2.8792 0.0040
##
## Correlation Structure: independence
## Fixed Correlation: 0
Occasion effect = 1.04 for placebo, 1.04 + 0.71 = 1.75 for treatment.
Odds ratios: exp(1.04) = 2.8, exp(1.75) = 5.7
Treatment effect exp(0.03) = 1.03 initially, exp(0.03+0.71) = 2.1 at follow-up.
#
dta2$treat <- factor(ifelse(dta2$treat==1, "Active", "Placebo"))
#
dta2$occasion <- factor(ifelse(dta2$occasion==1, "Follow-up", "Initial"))
dta.tbl <- ftable(dta2$treat, dta2$occasion , dta2$resp)
dta.tbl## 1 2 3 4
##
## Active Follow-up 40 49 19 11
## Initial 12 20 40 47
## Placebo Follow-up 31 29 35 25
## Initial 14 20 35 51
plot(y[,1], y[,2], type="n",
xlab="Time to Sleep (min.)", ylab="Cumulative proportion")
#
lines(y[1:4, 1], y[1:4,2], lty=1, col="steelblue")
lines(y[5:8, 1], y[5:8,2], lty=5, col="steelblue")
lines(y[9:12, 1], y[9:12,2], lty=2, col="indianred")
lines(y[13:16,1], y[13:16,2], lty=4, col="indianred")
#
text(22, .12, "T,I")
text(22, .47, "T,F")
text(22, .20, "P,I")
text(22, .33, "P,F")
#
grid()由累積折線圖可知採用催眠藥物組和安慰劑組間起初皆以相似的時間入睡。但在兩週後,接受催眠藥物治療的人比起安慰劑組,能更迅速入眠。(TF>PF)