1 data input

fL <- "https://content.sph.harvard.edu/fitzmaur/ala2e/muscatine-data.txt"
dta <- read.table(fL, h=F, na.strings = '.')

names(dta) <- c("ID","Gender","Age0","Age","Occasion","Obese")

head(dta)

##   ID Gender Age0 Age Occasion Obese
## 1  1      0    6   6        1     1
## 2  1      0    6   8        2     1
## 3  1      0    6  10        3     1
## 4  2      0    6   6        1     1
## 5  2      0    6   8        2     1
## 6  2      0    6  10        3     1

2 bar chart plot

par(mfrow=c(1,2))
barplot(prop.table(with(subset(dta, Gender=="0"), 
                           ftable(Obese, Age)), m=2), 
        xlab="Age \n (6, 8, 10, 12, 14, 16, 18 y/o)",
        ylab="Proportion", 
        ylim=c(0,1),
        main="Male", 
        beside=T)
legend('topleft', c("Non-Obese","Obese"), 
       col=c("black","gray"), 
       pch=15, bty='n', cex=.5)
barplot(prop.table(with(subset(dta, Gender=="1"), 
                         ftable(Obese, Age)), m=2), 
        xlab="Age \n (6, 8, 10, 12, 14, 16, 18 y/o)",
        ylab="Proportion", 
        ylim=c(0,1),
        main="Female", 
        beside=T)

直方圖顯示:男性和女性隨年齡增加肥胖比例變化模式非常相似

3 Marginal models: GEE

dta$Age2 = dta$Age^2

library(geepack)
# m0
summary(m0 <- geeglm(Obese ~ Gender + Age + Age2 + Gender:Age + Gender:Age2, data=dta, id = ID, family = binomial, corstr = "ar1"))

## 
## Call:
## geeglm(formula = Obese ~ Gender + Age + Age2 + Gender:Age + Gender:Age2, 
##     family = binomial, data = dta, id = ID, corstr = "ar1")
## 
##  Coefficients:
##              Estimate   Std.err   Wald Pr(>|W|)    
## (Intercept) -4.318069  0.496876 75.524  < 2e-16 ***
## Gender       0.573716  0.693639  0.684    0.408    
## Age          0.476464  0.083021 32.937 9.52e-09 ***
## Age2        -0.018092  0.003416 28.044 1.19e-07 ***
## Gender:Age  -0.080969  0.115276  0.493    0.482    
## Gender:Age2  0.003495  0.004715  0.549    0.459    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation structure = ar1 
## Estimated Scale Parameters:
## 
##             Estimate Std.err
## (Intercept)   0.9943 0.02807
##   Link = identity 
## 
## Estimated Correlation Parameters:
##       Estimate Std.err
## alpha   0.6106 0.02036
## Number of clusters:   4856  Maximum cluster size: 3

# m1
summary(m1 <- geeglm(Obese ~ Gender + Age + Gender:Age , data=dta, id = ID, family = binomial, corstr = "ar1"))

## 
## Call:
## geeglm(formula = Obese ~ Gender + Age + Gender:Age, family = binomial, 
##     data = dta, id = ID, corstr = "ar1")
## 
##  Coefficients:
##             Estimate  Std.err   Wald Pr(>|W|)    
## (Intercept) -1.83625  0.14992 150.01   <2e-16 ***
## Gender       0.11594  0.21094   0.30   0.5826    
## Age          0.04011  0.01184  11.48   0.0007 ***
## Gender:Age   0.00148  0.01659   0.01   0.9290    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation structure = ar1 
## Estimated Scale Parameters:
## 
##             Estimate Std.err
## (Intercept)    0.996  0.0266
##   Link = identity 
## 
## Estimated Correlation Parameters:
##       Estimate Std.err
## alpha    0.607  0.0197
## Number of clusters:   4856  Maximum cluster size: 3

# m2
summary(m2 <- geeglm(Obese ~ Gender  + Age2  + Gender:Age2, data=dta, id = ID, family = binomial, corstr = "ar1"))

## 
## Call:
## geeglm(formula = Obese ~ Gender + Age2 + Gender:Age2, family = binomial, 
##     data = dta, id = ID, corstr = "ar1")
## 
##  Coefficients:
##              Estimate   Std.err   Wald Pr(>|W|)    
## (Intercept) -1.535185  0.086656 313.85   <2e-16 ***
## Gender       0.113042  0.121883   0.86    0.354    
## Age2         0.001176  0.000486   5.86    0.015 *  
## Gender:Age2  0.000142  0.000677   0.04    0.834    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation structure = ar1 
## Estimated Scale Parameters:
## 
##             Estimate Std.err
## (Intercept)    0.996  0.0263
##   Link = identity 
## 
## Estimated Correlation Parameters:
##       Estimate Std.err
## alpha    0.606  0.0196
## Number of clusters:   4856  Maximum cluster size: 3

4 modle comparisom

library(pscl)

## Classes and Methods for R developed in the
## Political Science Computational Laboratory
## Department of Political Science
## Stanford University
## Simon Jackman
## hurdle and zeroinfl functions by Achim Zeileis

vuong(m0, m1)

## Vuong Non-Nested Hypothesis Test-Statistic: 
## (test-statistic is asymptotically distributed N(0,1) under the
##  null that the models are indistinguishible)
## -------------------------------------------------------------
##               Vuong z-statistic             H_A p-value
## Raw                        3.24 model1 > model2   6e-04
## AIC-corrected              2.90 model1 > model2   0.002
## BIC-corrected              1.71 model1 > model2   0.044

vuong(m0, m2)

## Vuong Non-Nested Hypothesis Test-Statistic: 
## (test-statistic is asymptotically distributed N(0,1) under the
##  null that the models are indistinguishible)
## -------------------------------------------------------------
##               Vuong z-statistic             H_A p-value
## Raw                        3.28 model1 > model2   5e-04
## AIC-corrected              2.97 model1 > model2   0.001
## BIC-corrected              1.87 model1 > model2   0.031

vuong(m1, m2)

## Vuong Non-Nested Hypothesis Test-Statistic: 
## (test-statistic is asymptotically distributed N(0,1) under the
##  null that the models are indistinguishible)
## -------------------------------------------------------------
##               Vuong z-statistic             H_A p-value
## Raw                        1.48 model1 > model2    0.07
## AIC-corrected              1.48 model1 > model2    0.07
## BIC-corrected              1.48 model1 > model2    0.07

sjPlot::tab_model(m0, m1, m2, show.obs=F, show.ngroups = F)

	Obese			Obese			Obese
Predictors	Odds Ratios	CI	p	Odds Ratios	CI	p	Odds Ratios	CI	p
(Intercept)	0.01	0.01 – 0.04	<0.001	0.16	0.12 – 0.21	<0.001	0.22	0.18 – 0.26	<0.001
Gender	1.77	0.46 – 6.91	0.408	1.12	0.74 – 1.70	0.583	1.12	0.88 – 1.42	0.354
Age	1.61	1.37 – 1.89	<0.001	1.04	1.02 – 1.07	0.001
Age2	0.98	0.98 – 0.99	<0.001				1.00	1.00 – 1.00	0.015
Gender * Age	0.92	0.74 – 1.16	0.482	1.00	0.97 – 1.03	0.929
Gender * Age2	1.00	0.99 – 1.01	0.459				1.00	1.00 – 1.00	0.834

5 Probability Plot

pacman::p_load(dplyr,tidyr, ggplot2)

dta1_p <- dta %>% 
  mutate(Gender = factor(ifelse(Gender == 0, "Boys", "Girls")))

dta1p <- data.frame(dta1_p, id=row.names(dta1_p))

yhat_m0 <- data.frame(id=row.names(fitted(m0)), 
                      phat=fitted(m0))

dta1_m0 <- inner_join(dta1p, yhat_m0, by="id")

ggplot(dta1_m0, aes(Age, group = Gender)) +
  geom_line(data = dta1_m0, aes(y = phat, color = Gender)) + 
  labs(x="Age (years)", 
       y="Estimated Probability of Obesity") +
  scale_y_continuous(limits = c(0.1, 0.27)) +
  scale_x_continuous(limits = c(6, 18),
                        breaks = c(6, 8, 10, 12, 14, 16, 18),
                        minor_breaks = NULL) +
  theme_minimal()+
  theme(legend.position=c(.9, .2))

利用modle fitted value畫男女性隨年齡增加的百分比變化圖，可清楚看出男性肥率增加的比例比女性小

hw1

1 data input

2 bar chart plot

3 Marginal models: GEE

4 modle comparisom

5 Probability Plot