p = c("tidyverse", "magrittr", "gee", "repolr")
lapply(p, library, character.only = TRUE)
Q1
#Q1
dat = read.table("http://titan.ccunix.ccu.edu.tw/~psycfs/lmm/Data/cantabrian.txt", h = T)
str(dat)
## 'data.frame': 1646 obs. of 4 variables:
## $ sbj : int 1 1 2 2 3 3 4 4 5 5 ...
## $ resp : int 0 0 0 0 0 0 0 0 0 0 ...
## $ gender: Factor w/ 2 levels "F","M": 2 2 2 2 2 2 2 2 2 2 ...
## $ type : Factor w/ 2 levels "GHQ","GP": 2 1 2 1 2 1 2 1 2 1 ...
dat$sbj %<>% as.factor
#Plot
ggplot(dat, aes(x = type, y = resp, color = gender))+
stat_summary(fun.data = mean_se)+
theme_bw()+
labs(x = "Source", y = "Proportion of mental illness")

dat%<>% mutate(resp_f = factor(resp))
summary(m0 <- glm(resp_f~gender*type, data = dat, family = "binomial"))
##
## Call:
## glm(formula = resp_f ~ gender * type, family = "binomial", data = dat)
##
## Deviance Residuals:
## Min 1Q Median 3Q Max
## -0.9890 -0.7531 -0.5871 -0.4925 2.0832
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -0.46089 0.09192 -5.014 5.33e-07 ***
## genderM -0.65425 0.15826 -4.134 3.57e-05 ***
## typeGP -1.20991 0.15326 -7.895 2.91e-15 ***
## genderM:typeGP 0.27649 0.26570 1.041 0.298
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## (Dispersion parameter for binomial family taken to be 1)
##
## Null deviance: 1800.1 on 1645 degrees of freedom
## Residual deviance: 1694.3 on 1642 degrees of freedom
## AIC: 1702.3
##
## Number of Fisher Scoring iterations: 4
summary(m1 <- gee(resp_f~gender*type, id = sbj, data = dat, family = binomial, corstr = "independence"))
## Beginning Cgee S-function, @(#) geeformula.q 4.13 98/01/27
## running glm to get initial regression estimate
## (Intercept) genderM typeGP genderM:typeGP
## -0.4608949 -0.6542467 -1.2099119 0.2764892
##
## GEE: GENERALIZED LINEAR MODELS FOR DEPENDENT DATA
## gee S-function, version 4.13 modified 98/01/27 (1998)
##
## Model:
## Link: Logit
## Variance to Mean Relation: Binomial
## Correlation Structure: Independent
##
## Call:
## gee(formula = resp_f ~ gender * type, id = sbj, data = dat, family = binomial,
## corstr = "independence")
##
## Summary of Residuals:
## Min 1Q Median 3Q Max
## -0.3867735 -0.2469136 -0.1583166 -0.1141975 0.8858025
##
##
## Coefficients:
## Estimate Naive S.E. Naive z Robust S.E. Robust z
## (Intercept) -0.4608949 0.09203208 -5.007981 0.09192019 -5.014077
## genderM -0.6542467 0.15845725 -4.128853 0.15826459 -4.133879
## typeGP -1.2099119 0.15344615 -7.884929 0.12651216 -9.563602
## genderM:typeGP 0.2764892 0.26602667 1.039329 0.22827821 1.211194
##
## Estimated Scale Parameter: 1.002436
## Number of Iterations: 1
##
## Working Correlation
## [,1] [,2]
## [1,] 1 0
## [2,] 0 1
summary(m3 <- gee(resp_f~gender*type, id = sbj, data = dat, family = binomial, corstr = "exchangeable"))
## Beginning Cgee S-function, @(#) geeformula.q 4.13 98/01/27
## running glm to get initial regression estimate
## (Intercept) genderM typeGP genderM:typeGP
## -0.4608949 -0.6542467 -1.2099119 0.2764892
##
## GEE: GENERALIZED LINEAR MODELS FOR DEPENDENT DATA
## gee S-function, version 4.13 modified 98/01/27 (1998)
##
## Model:
## Link: Logit
## Variance to Mean Relation: Binomial
## Correlation Structure: Exchangeable
##
## Call:
## gee(formula = resp_f ~ gender * type, id = sbj, data = dat, family = binomial,
## corstr = "exchangeable")
##
## Summary of Residuals:
## Min 1Q Median 3Q Max
## -0.3867735 -0.2469136 -0.1583166 -0.1141975 0.8858025
##
##
## Coefficients:
## Estimate Naive S.E. Naive z Robust S.E. Robust z
## (Intercept) -0.4608949 0.09203208 -5.007981 0.09192019 -5.014077
## genderM -0.6542467 0.15845725 -4.128853 0.15826459 -4.133879
## typeGP -1.2099119 0.12963711 -9.333068 0.12651216 -9.563602
## genderM:typeGP 0.2764892 0.22488854 1.229450 0.22827821 1.211194
##
## Estimated Scale Parameter: 1.002436
## Number of Iterations: 1
##
## Working Correlation
## [,1] [,2]
## [1,] 1.0000000 0.2982266
## [2,] 0.2982266 1.0000000
Q3
dat = read.table("http://titan.ccunix.ccu.edu.tw/~psycfs/lmm/Data/insomnia.txt", h=T)
str(dat)
## 'data.frame': 478 obs. of 4 variables:
## $ case : int 1 1 2 2 3 3 4 4 5 5 ...
## $ treat : int 1 1 1 1 1 1 1 1 1 1 ...
## $ occasion: int 0 1 0 1 0 1 0 1 0 1 ...
## $ resp : int 1 1 1 1 1 1 1 1 1 1 ...
idx = c(1,2,3)
dat[,idx] = lapply(dat[,idx], as.factor)
#Plot
ggplot(dat, aes(x = resp))+
stat_count()+
facet_grid(~occasion)+
theme_bw()+
labs(x = "Time to sleep")

#fit the model
summary(m1 <-repolr(resp~treat*occasion, subjects = "case", data = dat, times = c(1,2), categories = 4))
##
## repolr: 2016-02-26 version 3.4
##
## Call:
## repolr(formula = resp ~ treat * occasion, subjects = "case",
## data = dat, times = c(1, 2), categories = 4)
##
## Coefficients:
## coeff se.robust z.robust p.value
## cuts1|2 -2.2671 0.2091 -10.8422 0.0000
## cuts2|3 -0.9515 0.1769 -5.3787 0.0000
## cuts3|4 0.3517 0.1751 2.0086 0.0446
## treat1 0.0336 0.2369 0.1418 0.8872
## occasion1 1.0381 0.1564 6.6375 0.0000
## treat1:occasion1 0.7077 0.2458 2.8792 0.0040
##
## Correlation Structure: independence
## Fixed Correlation: 0