p = c("tidyverse", "magrittr", "gee", "repolr")
lapply(p, library, character.only = TRUE)



Q1

#Q1
dat = read.table("http://titan.ccunix.ccu.edu.tw/~psycfs/lmm/Data/cantabrian.txt", h = T)
str(dat)
## 'data.frame':    1646 obs. of  4 variables:
##  $ sbj   : int  1 1 2 2 3 3 4 4 5 5 ...
##  $ resp  : int  0 0 0 0 0 0 0 0 0 0 ...
##  $ gender: Factor w/ 2 levels "F","M": 2 2 2 2 2 2 2 2 2 2 ...
##  $ type  : Factor w/ 2 levels "GHQ","GP": 2 1 2 1 2 1 2 1 2 1 ...
dat$sbj %<>% as.factor
#Plot
ggplot(dat, aes(x = type, y = resp, color = gender))+
  stat_summary(fun.data = mean_se)+
  theme_bw()+
  labs(x = "Source", y = "Proportion of mental illness")

dat%<>% mutate(resp_f = factor(resp))

summary(m0 <- glm(resp_f~gender*type, data = dat, family = "binomial"))
## 
## Call:
## glm(formula = resp_f ~ gender * type, family = "binomial", data = dat)
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -0.9890  -0.7531  -0.5871  -0.4925   2.0832  
## 
## Coefficients:
##                Estimate Std. Error z value Pr(>|z|)    
## (Intercept)    -0.46089    0.09192  -5.014 5.33e-07 ***
## genderM        -0.65425    0.15826  -4.134 3.57e-05 ***
## typeGP         -1.20991    0.15326  -7.895 2.91e-15 ***
## genderM:typeGP  0.27649    0.26570   1.041    0.298    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 1800.1  on 1645  degrees of freedom
## Residual deviance: 1694.3  on 1642  degrees of freedom
## AIC: 1702.3
## 
## Number of Fisher Scoring iterations: 4
summary(m1 <- gee(resp_f~gender*type, id = sbj, data = dat, family = binomial, corstr = "independence"))
## Beginning Cgee S-function, @(#) geeformula.q 4.13 98/01/27
## running glm to get initial regression estimate
##    (Intercept)        genderM         typeGP genderM:typeGP 
##     -0.4608949     -0.6542467     -1.2099119      0.2764892
## 
##  GEE:  GENERALIZED LINEAR MODELS FOR DEPENDENT DATA
##  gee S-function, version 4.13 modified 98/01/27 (1998) 
## 
## Model:
##  Link:                      Logit 
##  Variance to Mean Relation: Binomial 
##  Correlation Structure:     Independent 
## 
## Call:
## gee(formula = resp_f ~ gender * type, id = sbj, data = dat, family = binomial, 
##     corstr = "independence")
## 
## Summary of Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.3867735 -0.2469136 -0.1583166 -0.1141975  0.8858025 
## 
## 
## Coefficients:
##                  Estimate Naive S.E.   Naive z Robust S.E.  Robust z
## (Intercept)    -0.4608949 0.09203208 -5.007981  0.09192019 -5.014077
## genderM        -0.6542467 0.15845725 -4.128853  0.15826459 -4.133879
## typeGP         -1.2099119 0.15344615 -7.884929  0.12651216 -9.563602
## genderM:typeGP  0.2764892 0.26602667  1.039329  0.22827821  1.211194
## 
## Estimated Scale Parameter:  1.002436
## Number of Iterations:  1
## 
## Working Correlation
##      [,1] [,2]
## [1,]    1    0
## [2,]    0    1
summary(m3 <- gee(resp_f~gender*type, id = sbj, data = dat, family = binomial, corstr = "exchangeable"))
## Beginning Cgee S-function, @(#) geeformula.q 4.13 98/01/27
## running glm to get initial regression estimate
##    (Intercept)        genderM         typeGP genderM:typeGP 
##     -0.4608949     -0.6542467     -1.2099119      0.2764892
## 
##  GEE:  GENERALIZED LINEAR MODELS FOR DEPENDENT DATA
##  gee S-function, version 4.13 modified 98/01/27 (1998) 
## 
## Model:
##  Link:                      Logit 
##  Variance to Mean Relation: Binomial 
##  Correlation Structure:     Exchangeable 
## 
## Call:
## gee(formula = resp_f ~ gender * type, id = sbj, data = dat, family = binomial, 
##     corstr = "exchangeable")
## 
## Summary of Residuals:
##        Min         1Q     Median         3Q        Max 
## -0.3867735 -0.2469136 -0.1583166 -0.1141975  0.8858025 
## 
## 
## Coefficients:
##                  Estimate Naive S.E.   Naive z Robust S.E.  Robust z
## (Intercept)    -0.4608949 0.09203208 -5.007981  0.09192019 -5.014077
## genderM        -0.6542467 0.15845725 -4.128853  0.15826459 -4.133879
## typeGP         -1.2099119 0.12963711 -9.333068  0.12651216 -9.563602
## genderM:typeGP  0.2764892 0.22488854  1.229450  0.22827821  1.211194
## 
## Estimated Scale Parameter:  1.002436
## Number of Iterations:  1
## 
## Working Correlation
##           [,1]      [,2]
## [1,] 1.0000000 0.2982266
## [2,] 0.2982266 1.0000000



Q3

dat = read.table("http://titan.ccunix.ccu.edu.tw/~psycfs/lmm/Data/insomnia.txt", h=T)
str(dat)
## 'data.frame':    478 obs. of  4 variables:
##  $ case    : int  1 1 2 2 3 3 4 4 5 5 ...
##  $ treat   : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ occasion: int  0 1 0 1 0 1 0 1 0 1 ...
##  $ resp    : int  1 1 1 1 1 1 1 1 1 1 ...
idx = c(1,2,3)
dat[,idx] = lapply(dat[,idx], as.factor)

#Plot
ggplot(dat, aes(x = resp))+
  stat_count()+
  facet_grid(~occasion)+
  theme_bw()+
  labs(x = "Time to sleep")

#fit the model
summary(m1 <-repolr(resp~treat*occasion, subjects = "case", data = dat, times = c(1,2), categories = 4))
## 
## repolr: 2016-02-26 version 3.4 
## 
## Call:
## repolr(formula = resp ~ treat * occasion, subjects = "case", 
##     data = dat, times = c(1, 2), categories = 4)
## 
## Coefficients: 
##                   coeff     se.robust  z.robust  p.value 
## cuts1|2            -2.2671    0.2091   -10.8422    0.0000
## cuts2|3            -0.9515    0.1769    -5.3787    0.0000
## cuts3|4             0.3517    0.1751     2.0086    0.0446
## treat1              0.0336    0.2369     0.1418    0.8872
## occasion1           1.0381    0.1564     6.6375    0.0000
## treat1:occasion1    0.7077    0.2458     2.8792    0.0040
## 
## Correlation Structure:  independence 
## Fixed Correlation:  0