p = c("tidyverse", "magrittr", "lme4", "ordinal", "vcrpart")
lapply(p, library, character.only = TRUE)



Q1

dat = read.table("http://titan.ccunix.ccu.edu.tw/~psycfs/lmm/Data/starter.txt", h = T)
dat$sid %<>% as.factor
dat$item %<>% as.factor
dat$resp_f = factor(dat$resp)

#Plot raw data
ggplot(dat, aes(x = item, y = resp))+
  stat_summary(fun.data = mean_se)+
  theme_bw()+
  labs(x = "Item", y = "Average accuracy")+
  coord_flip()

#fit the model
summary(m1 <- glmer(resp_f~item+(1|sid), family = "binomial", data = dat))
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.0588413 (tol =
## 0.001, component 1)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: resp_f ~ item + (1 | sid)
##    Data: dat
## 
##      AIC      BIC   logLik deviance df.resid 
##     1430     1482     -705     1410     1340 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -5.9836 -0.5793  0.3340  0.5407  3.6684 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  sid    (Intercept) 1.179    1.086   
## Number of obs: 1350, groups:  sid, 150
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)   2.8941     0.3347   8.647  < 2e-16 ***
## item2        -1.1904     0.3842  -3.098  0.00195 ** 
## item3        -1.5564     0.3763  -4.135 3.54e-05 ***
## item4        -1.6327     0.3751  -4.352 1.35e-05 ***
## item5        -1.8690     0.3721  -5.022 5.10e-07 ***
## item6        -1.9394     0.3715  -5.221 1.78e-07 ***
## item7        -1.0326     0.3887  -2.656  0.00790 ** 
## item8        -2.6924     0.3698  -7.280 3.34e-13 ***
## item9        -3.7625     0.3829  -9.826  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##       (Intr) item2  item3  item4  item5  item6  item7  item8 
## item2 -0.777                                                 
## item3 -0.800  0.684                                          
## item4 -0.803  0.687  0.704                                   
## item5 -0.814  0.694  0.712  0.715                            
## item6 -0.817  0.695  0.713  0.716  0.725                     
## item7 -0.766  0.660  0.675  0.678  0.684  0.686              
## item8 -0.836  0.703  0.722  0.726  0.735  0.738  0.692       
## item9 -0.830  0.684  0.706  0.710  0.720  0.723  0.673  0.744
## convergence code: 0
## Model failed to converge with max|grad| = 0.0588413 (tol = 0.001, component 1)



Q2

dat = read.table("http://titan.ccunix.ccu.edu.tw/~psycfs/lmm/Data/adolescentSmoking.txt", h = T)
idx = c(1,2,3,4)
dat[,idx] = lapply(dat[,idx], as.factor)
dat$smkreg_f = factor(dat$smkreg)

#Plot raw data
m = aggregate(smkreg~wave+sex, FUN = mean, data = dat)

ggplot(m, aes(x = wave, y = smkreg, group = sex, linetype = sex))+
  geom_point()+
  geom_line()+
  theme_bw()+
  ylim(0,.2)

#fit the model
summary(m1 <- glmer(smkreg_f~sex+parsmk+wave+(1|newid), family = "binomial", data = dat))
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 4.75774 (tol =
## 0.001, component 1)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: binomial  ( logit )
## Formula: smkreg_f ~ sex + parsmk + wave + (1 | newid)
##    Data: dat
## 
##      AIC      BIC   logLik deviance df.resid 
##   3803.4   3867.1  -1892.7   3785.4     8721 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.4829 -0.0328 -0.0209 -0.0144  4.2244 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  newid  (Intercept) 40.98    6.402   
## Number of obs: 8730, groups:  newid, 1760
## 
## Fixed effects:
##             Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  -8.0360     0.3551 -22.630  < 2e-16 ***
## sex1          0.2196     0.2694   0.815 0.414944    
## parsmk1       1.1726     0.2762   4.246 2.18e-05 ***
## wave2        -0.8710     0.2454  -3.549 0.000386 ***
## wave3        -0.3562     0.2394  -1.488 0.136775    
## wave4         0.1828     0.2352   0.777 0.437049    
## wave5         0.6205     0.2339   2.653 0.007983 ** 
## wave6         1.1000     0.2330   4.720 2.36e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##         (Intr) sex1   prsmk1 wave2  wave3  wave4  wave5 
## sex1    -0.415                                          
## parsmk1 -0.261  0.033                                   
## wave2   -0.381 -0.009  0.012                            
## wave3   -0.422 -0.007  0.009  0.642                     
## wave4   -0.465 -0.007  0.008  0.649  0.667              
## wave5   -0.504 -0.007  0.007  0.647  0.670  0.693       
## wave6   -0.548 -0.012  0.005  0.639  0.666  0.693  0.711
## convergence code: 0
## Model failed to converge with max|grad| = 4.75774 (tol = 0.001, component 1)



Q3

dat = read.table("http://titan.ccunix.ccu.edu.tw/~psycfs/lmm/Data/bitterness.txt", h = T)
idx = c(1,2,3,4,5)
dat[,idx] = lapply(dat[,idx], as.factor)

#Plot
ggplot(dat, aes(x = jdg, y = rating))+
  geom_boxplot()+
  theme_bw()+
  labs(x = "judge", y = "Rating")

ggplot(dat, aes(x = tmpr, y = rating, shape = cntct, linetype = cntct, group = cntct))+
  stat_summary(fun.y = mean, geom = "point")+
  stat_summary(fun.y = mean, geom = "line")+
  theme_bw()+
  labs(x = "temperature", y = "Rating")+
  scale_shape_discrete(name = "contact")+
  scale_linetype_discrete(name = "contact")

#fit the model
dat$rating_f = factor(dat$rating)
summary(m1 <- clmm(rating_f~btl+cntct+tmpr+(1|jdg), data = dat))
## Cumulative Link Mixed Model fitted with the Laplace approximation
## 
## formula: rating_f ~ btl + cntct + tmpr + (1 | jdg)
## data:    dat
## 
##  link  threshold nobs logLik AIC    niter     max.grad cond.H 
##  logit flexible  72   -81.43 178.85 382(1149) 1.59e-06 2.1e+01
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  jdg    (Intercept) 1.304    1.142   
## Number of groups:  jdg 9 
## 
## Coefficients:
##        Estimate Std. Error z value Pr(>|z|)    
## btl1    -0.2441     0.4641  -0.526 0.598909    
## cntct1   1.8344     0.5125   3.579 0.000345 ***
## tmpr1   -3.0727     0.5967  -5.149 2.61e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Threshold coefficients:
##     Estimate Std. Error z value
## 1|2  -4.8252     0.9154  -5.271
## 2|3  -1.6720     0.6675  -2.505
## 3|4   1.0560     0.6439   1.640
## 4|5   2.9172     0.7459   3.911



Q5

dat = read.table("http://titan.ccunix.ccu.edu.tw/~psycfs/lmm/Data/geometryAL.txt", h = T)
idx = c(2,3,6,7)
dat[,idx] = lapply(dat[,idx], as.factor)

#Plot
ggplot(dat, aes(x = score))+
  geom_histogram(aes(y =..density..), binwidth = 1)+
  facet_grid(itype~board)+
  theme_bw()+
  labs(x = "Geometry score", y = "Density")

ggplot(dat, aes(x = score, fill = gender))+
  geom_histogram(binwidth =1.5, position ="dodge")+
  theme_bw()+
  scale_fill_discrete(label = c("Male", "Female"))+
  scale_x_continuous(breaks = c(0,2,4,6,8,10))+
  labs(x = "Geometry score")

dat$score %<>% as.factor
summary(m1 <- olmm(score~gender+age+mgcse+re(1|iid), 
                   data = dat, family = cumulative(link = "logit")))
## Linear Mixed Model fit by Marginal Maximum
## Likelihood with Gauss-Hermite Quadrature 
## 
##  Family: cumulative logit 
## Formula: score ~ gender + age + mgcse + re(1 | iid) 
##    Data: dat 
## 
## Goodness of fit:
##       AIC      BIC    logLik
##  110043.6 110119.3 -55012.78
## 
## Random effects:
## Subject: iid 
##             Variance  StdDev   
## (Intercept) 0.2847814 0.5336492
## Number of obs: 33276, subjects: 2317
## 
## Global fixed effects:
##           Estimate Std. Error  z value
## gender1 -0.2788466  0.0194297 -14.3515
## age      0.0021999  0.0028603   0.7691
## mgcse   -1.6664873  0.0339073 -49.1483
## 
## Category-specific fixed effects:
##                   Estimate Std. Error  z value
## 0|2:(Intercept)  -2.357155   0.022811 -103.334
## 2|4:(Intercept)  -1.243897   0.020793  -59.824
## 4|6:(Intercept)  -0.219066   0.020336  -10.772
## 6|8:(Intercept)   0.908221   0.021368   42.504
## 8|10:(Intercept)  2.390568   0.022736  105.142



Q8

dat = read.table("http://titan.ccunix.ccu.edu.tw/~psycfs/lmm/Data/pirls_us.txt", h = T)
idx = c(1,2,3,4,7)
dat[,idx] = lapply(dat[,idx], as.factor)

#Plot 
ggplot(dat, aes(x = enjoy, color = enjoy))+
  geom_density()+
  facet_grid(econDisadv~watchTV)+
  theme_bw()+
  labs(title = "Extent of watching TV by economically disadvantaged")

#fit the model
summary(m1 <- clmm(enjoy~female+bornUS+watchTV+econDisadv+reading+(1|schoolid), data = dat))
## Warning: (2) Model is nearly unidentifiable: very large eigenvalue
##  - Rescale variables? 
## In addition: Absolute and relative convergence criteria were met
## Warning in update.uC(rho): Non finite negative log-likelihood
##   at iteration 48
## Cumulative Link Mixed Model fitted with the Laplace approximation
## 
## formula: enjoy ~ female + bornUS + watchTV + econDisadv + reading + (1 |  
##     schoolid)
## data:    dat
## 
##  link  threshold nobs logLik   AIC      niter     max.grad cond.H 
##  logit flexible  4614 -5132.97 10293.93 459(1383) 1.06e+02 1.7e+07
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  schoolid (Intercept) 1        1       
## Number of groups:  schoolid 180 
## 
## Coefficients:
##                   Estimate Std. Error z value Pr(>|z|)    
## female1          0.6698164  0.0601591  11.134  < 2e-16 ***
## bornUS1         -0.1502057  0.1140087  -1.317   0.1877    
## watchTV2         0.2216440  0.0894947   2.477   0.0133 *  
## watchTV3         0.5372828  0.0799874   6.717 1.85e-11 ***
## watchTV4         0.6960549  0.0888350   7.835 4.67e-15 ***
## watchTV5         0.9222968  0.1850988   4.983 6.27e-07 ***
## econDisadv>50%   0.2099663  0.2072481   1.013   0.3110    
## econDisadv11-25  0.1372571  0.3032294   0.453   0.6508    
## econDisadv26-50  0.1040835  0.2346631   0.444   0.6574    
## reading          0.0057379  0.0004793  11.972  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Threshold coefficients:
##     Estimate Std. Error z value
## 1|2   1.5512     0.3210   4.833
## 2|3   2.3216     0.3216   7.220
## 3|4   3.5819     0.3241  11.052
summary(m2 <- update(m1, .~. - bornUS))
## Warning: (2) Model is nearly unidentifiable: very large eigenvalue
##  - Rescale variables? 
## In addition: Absolute and relative convergence criteria were met
## Warning in update.uC(rho): Non finite negative log-likelihood
##   at iteration 45
## Cumulative Link Mixed Model fitted with the Laplace approximation
## 
## formula: enjoy ~ female + watchTV + econDisadv + reading + (1 | schoolid)
## data:    dat
## 
##  link  threshold nobs logLik   AIC      niter      max.grad cond.H 
##  logit flexible  4614 -5084.60 10195.20 1211(3639) 1.25e+01 3.7e+07
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  schoolid (Intercept) 0.1606   0.4007  
## Number of groups:  schoolid 180 
## 
## Coefficients:
##                 Estimate Std. Error z value Pr(>|z|)    
## female1         0.693759   0.059283  11.703  < 2e-16 ***
## watchTV2        0.221764   0.088199   2.514   0.0119 *  
## watchTV3        0.534597   0.078333   6.825 8.81e-12 ***
## watchTV4        0.695595   0.086958   7.999 1.25e-15 ***
## watchTV5        0.921411   0.181152   5.086 3.65e-07 ***
## econDisadv>50%  0.227047   0.111260   2.041   0.0413 *  
## econDisadv11-25 0.211088   0.157781   1.338   0.1809    
## econDisadv26-50 0.125915   0.124772   1.009   0.3129    
## reading         0.005857   0.000461  12.706  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Threshold coefficients:
##     Estimate Std. Error z value
## 1|2   1.8074     0.2705   6.682
## 2|3   2.5966     0.2713   9.571
## 3|4   3.8964     0.2750  14.171
anova(m1,m2)
## Likelihood ratio tests of cumulative link models:
##  
##    formula:                                                                 
## m2 enjoy ~ female + watchTV + econDisadv + reading + (1 | schoolid)         
## m1 enjoy ~ female + bornUS + watchTV + econDisadv + reading + (1 | schoolid)
##    link: threshold:
## m2 logit flexible  
## m1 logit flexible  
## 
##    no.par   AIC  logLik LR.stat df Pr(>Chisq)
## m2     13 10195 -5084.6                      
## m1     14 10294 -5133.0 -96.733  1          1

m2 is better.
That is, gender, extent of watching TV, economical situation and reading score are suitable covariates to predict “enjoy reading”.
There is variance between schools as well.