p = c("tidyverse", "magrittr", "lme4", "ordinal", "vcrpart")
lapply(p, library, character.only = TRUE)
dat = read.table("http://titan.ccunix.ccu.edu.tw/~psycfs/lmm/Data/starter.txt", h = T)
dat$sid %<>% as.factor
dat$item %<>% as.factor
dat$resp_f = factor(dat$resp)
#Plot raw data
ggplot(dat, aes(x = item, y = resp))+
stat_summary(fun.data = mean_se)+
theme_bw()+
labs(x = "Item", y = "Average accuracy")+
coord_flip()
#fit the model
summary(m1 <- glmer(resp_f~item+(1|sid), family = "binomial", data = dat))
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 0.0588413 (tol =
## 0.001, component 1)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: resp_f ~ item + (1 | sid)
## Data: dat
##
## AIC BIC logLik deviance df.resid
## 1430 1482 -705 1410 1340
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -5.9836 -0.5793 0.3340 0.5407 3.6684
##
## Random effects:
## Groups Name Variance Std.Dev.
## sid (Intercept) 1.179 1.086
## Number of obs: 1350, groups: sid, 150
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) 2.8941 0.3347 8.647 < 2e-16 ***
## item2 -1.1904 0.3842 -3.098 0.00195 **
## item3 -1.5564 0.3763 -4.135 3.54e-05 ***
## item4 -1.6327 0.3751 -4.352 1.35e-05 ***
## item5 -1.8690 0.3721 -5.022 5.10e-07 ***
## item6 -1.9394 0.3715 -5.221 1.78e-07 ***
## item7 -1.0326 0.3887 -2.656 0.00790 **
## item8 -2.6924 0.3698 -7.280 3.34e-13 ***
## item9 -3.7625 0.3829 -9.826 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) item2 item3 item4 item5 item6 item7 item8
## item2 -0.777
## item3 -0.800 0.684
## item4 -0.803 0.687 0.704
## item5 -0.814 0.694 0.712 0.715
## item6 -0.817 0.695 0.713 0.716 0.725
## item7 -0.766 0.660 0.675 0.678 0.684 0.686
## item8 -0.836 0.703 0.722 0.726 0.735 0.738 0.692
## item9 -0.830 0.684 0.706 0.710 0.720 0.723 0.673 0.744
## convergence code: 0
## Model failed to converge with max|grad| = 0.0588413 (tol = 0.001, component 1)
dat = read.table("http://titan.ccunix.ccu.edu.tw/~psycfs/lmm/Data/adolescentSmoking.txt", h = T)
idx = c(1,2,3,4)
dat[,idx] = lapply(dat[,idx], as.factor)
dat$smkreg_f = factor(dat$smkreg)
#Plot raw data
m = aggregate(smkreg~wave+sex, FUN = mean, data = dat)
ggplot(m, aes(x = wave, y = smkreg, group = sex, linetype = sex))+
geom_point()+
geom_line()+
theme_bw()+
ylim(0,.2)
#fit the model
summary(m1 <- glmer(smkreg_f~sex+parsmk+wave+(1|newid), family = "binomial", data = dat))
## Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control
## $checkConv, : Model failed to converge with max|grad| = 4.75774 (tol =
## 0.001, component 1)
## Generalized linear mixed model fit by maximum likelihood (Laplace
## Approximation) [glmerMod]
## Family: binomial ( logit )
## Formula: smkreg_f ~ sex + parsmk + wave + (1 | newid)
## Data: dat
##
## AIC BIC logLik deviance df.resid
## 3803.4 3867.1 -1892.7 3785.4 8721
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.4829 -0.0328 -0.0209 -0.0144 4.2244
##
## Random effects:
## Groups Name Variance Std.Dev.
## newid (Intercept) 40.98 6.402
## Number of obs: 8730, groups: newid, 1760
##
## Fixed effects:
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -8.0360 0.3551 -22.630 < 2e-16 ***
## sex1 0.2196 0.2694 0.815 0.414944
## parsmk1 1.1726 0.2762 4.246 2.18e-05 ***
## wave2 -0.8710 0.2454 -3.549 0.000386 ***
## wave3 -0.3562 0.2394 -1.488 0.136775
## wave4 0.1828 0.2352 0.777 0.437049
## wave5 0.6205 0.2339 2.653 0.007983 **
## wave6 1.1000 0.2330 4.720 2.36e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) sex1 prsmk1 wave2 wave3 wave4 wave5
## sex1 -0.415
## parsmk1 -0.261 0.033
## wave2 -0.381 -0.009 0.012
## wave3 -0.422 -0.007 0.009 0.642
## wave4 -0.465 -0.007 0.008 0.649 0.667
## wave5 -0.504 -0.007 0.007 0.647 0.670 0.693
## wave6 -0.548 -0.012 0.005 0.639 0.666 0.693 0.711
## convergence code: 0
## Model failed to converge with max|grad| = 4.75774 (tol = 0.001, component 1)
dat = read.table("http://titan.ccunix.ccu.edu.tw/~psycfs/lmm/Data/bitterness.txt", h = T)
idx = c(1,2,3,4,5)
dat[,idx] = lapply(dat[,idx], as.factor)
#Plot
ggplot(dat, aes(x = jdg, y = rating))+
geom_boxplot()+
theme_bw()+
labs(x = "judge", y = "Rating")
ggplot(dat, aes(x = tmpr, y = rating, shape = cntct, linetype = cntct, group = cntct))+
stat_summary(fun.y = mean, geom = "point")+
stat_summary(fun.y = mean, geom = "line")+
theme_bw()+
labs(x = "temperature", y = "Rating")+
scale_shape_discrete(name = "contact")+
scale_linetype_discrete(name = "contact")
#fit the model
dat$rating_f = factor(dat$rating)
summary(m1 <- clmm(rating_f~btl+cntct+tmpr+(1|jdg), data = dat))
## Cumulative Link Mixed Model fitted with the Laplace approximation
##
## formula: rating_f ~ btl + cntct + tmpr + (1 | jdg)
## data: dat
##
## link threshold nobs logLik AIC niter max.grad cond.H
## logit flexible 72 -81.43 178.85 382(1149) 1.59e-06 2.1e+01
##
## Random effects:
## Groups Name Variance Std.Dev.
## jdg (Intercept) 1.304 1.142
## Number of groups: jdg 9
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## btl1 -0.2441 0.4641 -0.526 0.598909
## cntct1 1.8344 0.5125 3.579 0.000345 ***
## tmpr1 -3.0727 0.5967 -5.149 2.61e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Threshold coefficients:
## Estimate Std. Error z value
## 1|2 -4.8252 0.9154 -5.271
## 2|3 -1.6720 0.6675 -2.505
## 3|4 1.0560 0.6439 1.640
## 4|5 2.9172 0.7459 3.911
dat = read.table("http://titan.ccunix.ccu.edu.tw/~psycfs/lmm/Data/geometryAL.txt", h = T)
idx = c(2,3,6,7)
dat[,idx] = lapply(dat[,idx], as.factor)
#Plot
ggplot(dat, aes(x = score))+
geom_histogram(aes(y =..density..), binwidth = 1)+
facet_grid(itype~board)+
theme_bw()+
labs(x = "Geometry score", y = "Density")
ggplot(dat, aes(x = score, fill = gender))+
geom_histogram(binwidth =1.5, position ="dodge")+
theme_bw()+
scale_fill_discrete(label = c("Male", "Female"))+
scale_x_continuous(breaks = c(0,2,4,6,8,10))+
labs(x = "Geometry score")
dat$score %<>% as.factor
summary(m1 <- olmm(score~gender+age+mgcse+re(1|iid),
data = dat, family = cumulative(link = "logit")))
## Linear Mixed Model fit by Marginal Maximum
## Likelihood with Gauss-Hermite Quadrature
##
## Family: cumulative logit
## Formula: score ~ gender + age + mgcse + re(1 | iid)
## Data: dat
##
## Goodness of fit:
## AIC BIC logLik
## 110043.6 110119.3 -55012.78
##
## Random effects:
## Subject: iid
## Variance StdDev
## (Intercept) 0.2847814 0.5336492
## Number of obs: 33276, subjects: 2317
##
## Global fixed effects:
## Estimate Std. Error z value
## gender1 -0.2788466 0.0194297 -14.3515
## age 0.0021999 0.0028603 0.7691
## mgcse -1.6664873 0.0339073 -49.1483
##
## Category-specific fixed effects:
## Estimate Std. Error z value
## 0|2:(Intercept) -2.357155 0.022811 -103.334
## 2|4:(Intercept) -1.243897 0.020793 -59.824
## 4|6:(Intercept) -0.219066 0.020336 -10.772
## 6|8:(Intercept) 0.908221 0.021368 42.504
## 8|10:(Intercept) 2.390568 0.022736 105.142
dat = read.table("http://titan.ccunix.ccu.edu.tw/~psycfs/lmm/Data/pirls_us.txt", h = T)
idx = c(1,2,3,4,7)
dat[,idx] = lapply(dat[,idx], as.factor)
#Plot
ggplot(dat, aes(x = enjoy, color = enjoy))+
geom_density()+
facet_grid(econDisadv~watchTV)+
theme_bw()+
labs(title = "Extent of watching TV by economically disadvantaged")
#fit the model
summary(m1 <- clmm(enjoy~female+bornUS+watchTV+econDisadv+reading+(1|schoolid), data = dat))
## Warning: (2) Model is nearly unidentifiable: very large eigenvalue
## - Rescale variables?
## In addition: Absolute and relative convergence criteria were met
## Warning in update.uC(rho): Non finite negative log-likelihood
## at iteration 48
## Cumulative Link Mixed Model fitted with the Laplace approximation
##
## formula: enjoy ~ female + bornUS + watchTV + econDisadv + reading + (1 |
## schoolid)
## data: dat
##
## link threshold nobs logLik AIC niter max.grad cond.H
## logit flexible 4614 -5132.97 10293.93 459(1383) 1.06e+02 1.7e+07
##
## Random effects:
## Groups Name Variance Std.Dev.
## schoolid (Intercept) 1 1
## Number of groups: schoolid 180
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## female1 0.6698164 0.0601591 11.134 < 2e-16 ***
## bornUS1 -0.1502057 0.1140087 -1.317 0.1877
## watchTV2 0.2216440 0.0894947 2.477 0.0133 *
## watchTV3 0.5372828 0.0799874 6.717 1.85e-11 ***
## watchTV4 0.6960549 0.0888350 7.835 4.67e-15 ***
## watchTV5 0.9222968 0.1850988 4.983 6.27e-07 ***
## econDisadv>50% 0.2099663 0.2072481 1.013 0.3110
## econDisadv11-25 0.1372571 0.3032294 0.453 0.6508
## econDisadv26-50 0.1040835 0.2346631 0.444 0.6574
## reading 0.0057379 0.0004793 11.972 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Threshold coefficients:
## Estimate Std. Error z value
## 1|2 1.5512 0.3210 4.833
## 2|3 2.3216 0.3216 7.220
## 3|4 3.5819 0.3241 11.052
summary(m2 <- update(m1, .~. - bornUS))
## Warning: (2) Model is nearly unidentifiable: very large eigenvalue
## - Rescale variables?
## In addition: Absolute and relative convergence criteria were met
## Warning in update.uC(rho): Non finite negative log-likelihood
## at iteration 45
## Cumulative Link Mixed Model fitted with the Laplace approximation
##
## formula: enjoy ~ female + watchTV + econDisadv + reading + (1 | schoolid)
## data: dat
##
## link threshold nobs logLik AIC niter max.grad cond.H
## logit flexible 4614 -5084.60 10195.20 1211(3639) 1.25e+01 3.7e+07
##
## Random effects:
## Groups Name Variance Std.Dev.
## schoolid (Intercept) 0.1606 0.4007
## Number of groups: schoolid 180
##
## Coefficients:
## Estimate Std. Error z value Pr(>|z|)
## female1 0.693759 0.059283 11.703 < 2e-16 ***
## watchTV2 0.221764 0.088199 2.514 0.0119 *
## watchTV3 0.534597 0.078333 6.825 8.81e-12 ***
## watchTV4 0.695595 0.086958 7.999 1.25e-15 ***
## watchTV5 0.921411 0.181152 5.086 3.65e-07 ***
## econDisadv>50% 0.227047 0.111260 2.041 0.0413 *
## econDisadv11-25 0.211088 0.157781 1.338 0.1809
## econDisadv26-50 0.125915 0.124772 1.009 0.3129
## reading 0.005857 0.000461 12.706 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Threshold coefficients:
## Estimate Std. Error z value
## 1|2 1.8074 0.2705 6.682
## 2|3 2.5966 0.2713 9.571
## 3|4 3.8964 0.2750 14.171
anova(m1,m2)
## Likelihood ratio tests of cumulative link models:
##
## formula:
## m2 enjoy ~ female + watchTV + econDisadv + reading + (1 | schoolid)
## m1 enjoy ~ female + bornUS + watchTV + econDisadv + reading + (1 | schoolid)
## link: threshold:
## m2 logit flexible
## m1 logit flexible
##
## no.par AIC logLik LR.stat df Pr(>Chisq)
## m2 13 10195 -5084.6
## m1 14 10294 -5133.0 -96.733 1 1
m2 is better.
That is, gender, extent of watching TV, economical situation and reading score are suitable covariates to predict “enjoy reading”.
There is variance between schools as well.