fL <- "https://content.sph.harvard.edu/fitzmaur/ala2e/muscatine-data.txt"
dta <- read.table(fL, h=F, na.strings = '.')
names(dta) <- c("ID","Gender","Age0","Age","Occasion","Obese")
head(dta)
## ID Gender Age0 Age Occasion Obese
## 1 1 0 6 6 1 1
## 2 1 0 6 8 2 1
## 3 1 0 6 10 3 1
## 4 2 0 6 6 1 1
## 5 2 0 6 8 2 1
## 6 2 0 6 10 3 1
par(mfrow=c(1,2))
barplot(prop.table(with(subset(dta, Gender=="0"),
ftable(Obese, Age)), m=2),
xlab="Age \n (6, 8, 10, 12, 14, 16, 18 y/o)",
ylab="Proportion",
ylim=c(0,1),
main="Male",
beside=T)
legend('topleft', c("Non-Obese","Obese"),
col=c("black","gray"),
pch=15, bty='n', cex=.5)
barplot(prop.table(with(subset(dta, Gender=="1"),
ftable(Obese, Age)), m=2),
xlab="Age \n (6, 8, 10, 12, 14, 16, 18 y/o)",
ylab="Proportion",
ylim=c(0,1),
main="Female",
beside=T)
直方圖顯示:男性和女性隨年齡增加肥胖比例變化模式非常相似
dta$Age2 = dta$Age^2
library(geepack)
# m0
summary(m0 <- geeglm(Obese ~ Gender + Age + Age2 + Gender:Age + Gender:Age2, data=dta, id = ID, family = binomial, corstr = "ar1"))
##
## Call:
## geeglm(formula = Obese ~ Gender + Age + Age2 + Gender:Age + Gender:Age2,
## family = binomial, data = dta, id = ID, corstr = "ar1")
##
## Coefficients:
## Estimate Std.err Wald Pr(>|W|)
## (Intercept) -4.318069 0.496876 75.524 < 2e-16 ***
## Gender 0.573716 0.693639 0.684 0.408
## Age 0.476464 0.083021 32.937 9.52e-09 ***
## Age2 -0.018092 0.003416 28.044 1.19e-07 ***
## Gender:Age -0.080969 0.115276 0.493 0.482
## Gender:Age2 0.003495 0.004715 0.549 0.459
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation structure = ar1
## Estimated Scale Parameters:
##
## Estimate Std.err
## (Intercept) 0.9943 0.02807
## Link = identity
##
## Estimated Correlation Parameters:
## Estimate Std.err
## alpha 0.6106 0.02036
## Number of clusters: 4856 Maximum cluster size: 3
# m1
summary(m1 <- geeglm(Obese ~ Gender + Age + Gender:Age , data=dta, id = ID, family = binomial, corstr = "ar1"))
##
## Call:
## geeglm(formula = Obese ~ Gender + Age + Gender:Age, family = binomial,
## data = dta, id = ID, corstr = "ar1")
##
## Coefficients:
## Estimate Std.err Wald Pr(>|W|)
## (Intercept) -1.83625 0.14992 150.01 <2e-16 ***
## Gender 0.11594 0.21094 0.30 0.5826
## Age 0.04011 0.01184 11.48 0.0007 ***
## Gender:Age 0.00148 0.01659 0.01 0.9290
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation structure = ar1
## Estimated Scale Parameters:
##
## Estimate Std.err
## (Intercept) 0.996 0.0266
## Link = identity
##
## Estimated Correlation Parameters:
## Estimate Std.err
## alpha 0.607 0.0197
## Number of clusters: 4856 Maximum cluster size: 3
# m2
summary(m2 <- geeglm(Obese ~ Gender + Age2 + Gender:Age2, data=dta, id = ID, family = binomial, corstr = "ar1"))
##
## Call:
## geeglm(formula = Obese ~ Gender + Age2 + Gender:Age2, family = binomial,
## data = dta, id = ID, corstr = "ar1")
##
## Coefficients:
## Estimate Std.err Wald Pr(>|W|)
## (Intercept) -1.535185 0.086656 313.85 <2e-16 ***
## Gender 0.113042 0.121883 0.86 0.354
## Age2 0.001176 0.000486 5.86 0.015 *
## Gender:Age2 0.000142 0.000677 0.04 0.834
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation structure = ar1
## Estimated Scale Parameters:
##
## Estimate Std.err
## (Intercept) 0.996 0.0263
## Link = identity
##
## Estimated Correlation Parameters:
## Estimate Std.err
## alpha 0.606 0.0196
## Number of clusters: 4856 Maximum cluster size: 3
## Classes and Methods for R developed in the
## Political Science Computational Laboratory
## Department of Political Science
## Stanford University
## Simon Jackman
## hurdle and zeroinfl functions by Achim Zeileis
## Vuong Non-Nested Hypothesis Test-Statistic:
## (test-statistic is asymptotically distributed N(0,1) under the
## null that the models are indistinguishible)
## -------------------------------------------------------------
## Vuong z-statistic H_A p-value
## Raw 3.24 model1 > model2 6e-04
## AIC-corrected 2.90 model1 > model2 0.002
## BIC-corrected 1.71 model1 > model2 0.044
## Vuong Non-Nested Hypothesis Test-Statistic:
## (test-statistic is asymptotically distributed N(0,1) under the
## null that the models are indistinguishible)
## -------------------------------------------------------------
## Vuong z-statistic H_A p-value
## Raw 3.28 model1 > model2 5e-04
## AIC-corrected 2.97 model1 > model2 0.001
## BIC-corrected 1.87 model1 > model2 0.031
## Vuong Non-Nested Hypothesis Test-Statistic:
## (test-statistic is asymptotically distributed N(0,1) under the
## null that the models are indistinguishible)
## -------------------------------------------------------------
## Vuong z-statistic H_A p-value
## Raw 1.48 model1 > model2 0.07
## AIC-corrected 1.48 model1 > model2 0.07
## BIC-corrected 1.48 model1 > model2 0.07
Obese | Obese | Obese | |||||||
---|---|---|---|---|---|---|---|---|---|
Predictors | Odds Ratios | CI | p | Odds Ratios | CI | p | Odds Ratios | CI | p |
(Intercept) | 0.01 | 0.01 – 0.04 | <0.001 | 0.16 | 0.12 – 0.21 | <0.001 | 0.22 | 0.18 – 0.26 | <0.001 |
Gender | 1.77 | 0.46 – 6.91 | 0.408 | 1.12 | 0.74 – 1.70 | 0.583 | 1.12 | 0.88 – 1.42 | 0.354 |
Age | 1.61 | 1.37 – 1.89 | <0.001 | 1.04 | 1.02 – 1.07 | 0.001 | |||
Age2 | 0.98 | 0.98 – 0.99 | <0.001 | 1.00 | 1.00 – 1.00 | 0.015 | |||
Gender * Age | 0.92 | 0.74 – 1.16 | 0.482 | 1.00 | 0.97 – 1.03 | 0.929 | |||
Gender * Age2 | 1.00 | 0.99 – 1.01 | 0.459 | 1.00 | 1.00 – 1.00 | 0.834 |
pacman::p_load(dplyr,tidyr, ggplot2)
dta1_p <- dta %>%
mutate(Gender = factor(ifelse(Gender == 0, "Boys", "Girls")))
dta1p <- data.frame(dta1_p, id=row.names(dta1_p))
yhat_m0 <- data.frame(id=row.names(fitted(m0)),
phat=fitted(m0))
dta1_m0 <- inner_join(dta1p, yhat_m0, by="id")
ggplot(dta1_m0, aes(Age, group = Gender)) +
geom_line(data = dta1_m0, aes(y = phat, color = Gender)) +
labs(x="Age (years)",
y="Estimated Probability of Obesity") +
scale_y_continuous(limits = c(0.1, 0.27)) +
scale_x_continuous(limits = c(6, 18),
breaks = c(6, 8, 10, 12, 14, 16, 18),
minor_breaks = NULL) +
theme_minimal()+
theme(legend.position=c(.9, .2))
利用modle fitted value畫男女性隨年齡增加的百分比變化圖,可清楚看出男性肥率增加的比例比女性小