Introduction
The data are from the Muscatine Coronary Risk Factor (MCRF) study, a longitudinal survey of school-age children in Muscatine, Iowa. The MCRF study had the goal of examining the development and persistence of risk factors for coronary disease in children. In the MCRF study, weight and height measurements of five cohorts of children, initially aged 5-7, 7-9, 9-11, 11-13, and 13-15 years, were obtained biennially from 1977 to 1981.
Data were collected on 4856 boys and girls. On the basis of a comparison of their weight to age-gender specific norms, children were classified as obese or not obese.
The goal of the analysis is to determine whether the risk of obesity increases with age and whether patterns of change in obesity are the same for boys and girls.
Source: Woolson, R.F., & Clarke, W.R. (1984). Analysis of categorical incompletel longitudinal data. Journal of the Royal Statistical Society, Series A, 147, 87-99. Reported in Fitzmaurice, G.M., Laired, N.M., & Ware, J.H. (2004). Applied Longitudinal Data Analysis. pp. 306-312.
Column 1: ID Column 2: Gender, 0 = Male, 1 = Female Column 3: Baseline Age Column 4: Current Age, Age denotes mid-point of age-cohort Column 5: Occasions of measurements Column 6: Obesity Status, 1 = Obese, 0 = Non-Obese, . = Missing
Data management
#load packages
pacman::p_load(magrittr, dplyr, knitr, rmdformats, geepack, repolr, geesmv)
#input data
dta <- read.table("C:/Users/Ching-Fang Wu/Documents/data/muscatine-data.txt",h=F,na.strings = '.')
## V1 V2 V3 V4 V5 V6
## 1 1 0 6 6 1 1
## 2 1 0 6 8 2 1
## 3 1 0 6 10 3 1
## 4 2 0 6 6 1 1
## 5 2 0 6 8 2 1
## 6 2 0 6 10 3 1
## 'data.frame': 14568 obs. of 6 variables:
## $ ID : int 1 1 1 2 2 2 3 3 3 4 ...
## $ Gender : int 0 0 0 0 0 0 0 0 0 0 ...
## $ Age0 : int 6 6 6 6 6 6 6 6 6 6 ...
## $ Age : int 6 8 10 6 8 10 6 8 10 6 ...
## $ Occasion: int 1 2 3 1 2 3 1 2 3 1 ...
## $ Obese : int 1 1 1 1 1 1 1 1 1 1 ...
dta1t <- dta %>%
mutate(Occasion = factor(Occasion, labels=c("1977", "1979", "1981"), ordered=T), #標註測量時間
Gender = factor(ifelse(Gender == 0, "M", "F")),#Gender=0改為M
Obese = factor(ifelse(Obese == 1, "Obese", "Non-Obese")))
ftable(dta1t, row.vars = c(2, 4), col.vars = c(5,6))
## Occasion 1977 1979 1981
## Obese Non-Obese Obese Non-Obese Obese Non-Obese Obese
## Gender Age
## F 6 141 23 0 0 0 0
## 8 294 58 265 55 0 0
## 10 270 92 276 87 268 90
## 12 291 91 279 99 278 92
## 14 300 89 226 64 256 73
## 16 0 0 250 87 226 56
## 18 0 0 0 0 140 37
## M 6 174 15 0 0 0 0
## 8 289 67 296 54 0 0
## 10 312 84 298 77 308 83
## 12 281 90 299 88 290 90
## 14 307 73 233 65 269 78
## 16 0 0 251 67 224 54
## 18 0 0 0 0 153 34
par(mfrow=c(1,2))
barplot(prop.table(with(subset(dta, Gender=="0"),
ftable(Obese, Age)), m=2),
xlab="Age \n (6, 8, 10, 12, 14, 16, 18 y/o)",
ylab="Proportion",
ylim=c(0,1),
main="Male",
beside=T)
legend('topleft', c("Non-Obese","Obese"),
col=c("black","gray"),
pch=15, bty='n', cex=.5)
barplot(prop.table(with(subset(dta, Gender=="1"),
ftable(Obese, Age)), m=2),
xlab="Age \n (6, 8, 10, 12, 14, 16, 18 y/o)",
ylab="Proportion",
ylim=c(0,1),
main="Female",
beside=T)
Male肥胖比率最高在12歲(超過0.2%),14歲到18歲的肥胖比率約0.2%(略低)。 Female肥胖比率最高在10、12歲(超過0.2%),14歲到18歲的肥胖比率約0.2%(略高)。
Marginal models: GEE
##m0
dta_m <- dta %>%
mutate(Age = Age - 12,
Age2 = Age^2)
summary(m0 <- geeglm(Obese ~ Gender + Age + Age2 + Gender:Age + Gender:Age2, data=dta_m, id = ID, family = binomial, corstr = "ar1"))
##
## Call:
## geeglm(formula = Obese ~ Gender + Age + Age2 + Gender:Age + Gender:Age2,
## family = binomial, data = dta_m, id = ID, corstr = "ar1")
##
## Coefficients:
## Estimate Std.err Wald Pr(>|W|)
## (Intercept) -1.205697 0.050734 564.783 < 2e-16 ***
## Gender 0.105306 0.071357 2.178 0.14001
## Age 0.042264 0.013433 9.899 0.00165 **
## Age2 -0.018092 0.003416 28.044 1.19e-07 ***
## Gender:Age 0.002900 0.018563 0.024 0.87585
## Gender:Age2 0.003495 0.004715 0.549 0.45857
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation structure = ar1
## Estimated Scale Parameters:
##
## Estimate Std.err
## (Intercept) 0.9943 0.02807
## Link = identity
##
## Estimated Correlation Parameters:
## Estimate Std.err
## alpha 0.6106 0.02036
## Number of clusters: 4856 Maximum cluster size: 3
##m1
summary(m1 <- geeglm(Obese ~ Gender + Age + Age2, data=dta_m, id = ID, family = binomial, corstr = "ar1"))
##
## Call:
## geeglm(formula = Obese ~ Gender + Age + Age2, family = binomial,
## data = dta_m, id = ID, corstr = "ar1")
##
## Coefficients:
## Estimate Std.err Wald Pr(>|W|)
## (Intercept) -1.21897 0.04788 648.17 < 2e-16 ***
## Gender 0.13145 0.06288 4.37 0.037 *
## Age 0.04387 0.00926 22.43 2.2e-06 ***
## Age2 -0.01630 0.00235 48.05 4.2e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation structure = ar1
## Estimated Scale Parameters:
##
## Estimate Std.err
## (Intercept) 0.994 0.028
## Link = identity
##
## Estimated Correlation Parameters:
## Estimate Std.err
## alpha 0.61 0.0203
## Number of clusters: 4856 Maximum cluster size: 3
##Comparison
Obese | Obese | |||||
---|---|---|---|---|---|---|
Predictors | Odds Ratios | CI | p | Odds Ratios | CI | p |
(Intercept) | 0.30 | 0.27 – 0.33 | <0.001 | 0.30 | 0.27 – 0.32 | <0.001 |
Gender | 1.11 | 0.97 – 1.28 | 0.140 | 1.14 | 1.01 – 1.29 | 0.037 |
Age | 1.04 | 1.02 – 1.07 | 0.002 | 1.04 | 1.03 – 1.06 | <0.001 |
Age2 | 0.98 | 0.98 – 0.99 | <0.001 | 0.98 | 0.98 – 0.99 | <0.001 |
Gender * Age | 1.00 | 0.97 – 1.04 | 0.876 | |||
Gender * Age2 | 1.00 | 0.99 – 1.01 | 0.459 |
Estimated Probability Plot
pacman::p_load(tidyr, ggplot2,knitr,rmdformats)
dta_p <- dta %>%
mutate(Gender = factor(ifelse(Gender == 0, "Boys", "Girls")))
dtap <- data.frame(dta_p, id=row.names(dta_p))
yhat_m0 <- data.frame(id=row.names(fitted(m0)),
phat=fitted(m0))
dta_m0 <- inner_join(dtap, yhat_m0, by="id")
ggplot(dta_m0, aes(Age, group = Gender)) +
geom_line(data = dta_m0, aes(y = phat, color = Gender)) +
labs(x="Age (years)",
y="Estimated Probability of Obesity") +
scale_y_continuous(limits = c(0.1, 0.27)) +
scale_x_continuous(limits = c(6, 18),
breaks = c(6, 8, 10, 12, 14, 16, 18),
minor_breaks = NULL) +
theme_minimal()+
theme(legend.position=c(.9, .2))
估計結果女性肥胖比率高於男性
#The end