data management
dta$gender <- factor(ifelse(dta$gender==1,"Female","Male"))
dta$board <- factor(dta$board)
dta$iid <- factor(dta$iid)
dta$itype <- factor(dta$itype)
str(dta)
## 'data.frame': 33276 obs. of 7 variables:
## $ score : int 8 8 8 10 8 10 10 8 6 4 ...
## $ board : Factor w/ 7 levels "1","2","3","5",..: 3 3 3 3 3 3 3 3 3 3 ...
## $ gender: Factor w/ 2 levels "Female","Male": 1 1 1 1 1 1 1 1 1 2 ...
## $ age : int 1 -6 5 -1 -5 3 3 4 -2 1 ...
## $ mgcse : num 0.856 0.856 0.856 0.856 0.856 0.856 0.856 0.856 0.856 0.657 ...
## $ itype : Factor w/ 11 levels "1","2","3","4",..: 7 7 7 7 7 7 7 7 7 7 ...
## $ iid : Factor w/ 2317 levels "1001","1002",..: 1 1 1 1 1 1 1 1 1 2 ...
library(ggplot2)
#
ggplot(data=dta, aes(x=score)) +
geom_histogram(binwidth=1) +
aes(y = ..density..) +
facet_grid(. ~ itype) +
labs(x="Geometry score", y="Density") +
ggtitle("Institution type")

#
ggplot(data=dta, aes(x=score)) +
geom_histogram(binwidth=1) +
aes(y = ..density..) +
facet_grid(itype ~ board) +
labs(x="Geometry score", y="Density")+
ggtitle("Institution Type by Examination Board")

#
ggplot(dta, aes(x=score, fill=gender)) +
geom_bar(binwidth=1, position="dodge") +
labs(x="Geometry score", y="Count")
## Warning: Ignoring unknown parameters: binwidth

#
ggplot(dta, aes(x=mgcse, y=score)) +
geom_point(alpha=I(0.3), cex=0.1) +
stat_smooth(method="lm") +
facet_grid(itype ~ gender) +
labs(y="Geometry score", x="GCSE score")
## `geom_smooth()` using formula 'y ~ x'

model
#
dta$score <- factor(dta$score)
#
library(ordinal)
#
summary(dta.clmm <- clmm2(score ~ gender + age + mgcse + board + itype,
random=iid, data=dta, Hess = TRUE))
## Warning: clmm2 may not have converged:
## optimizer 'ucminf' terminated with max|gradient|: 0.0160943494036054
## Cumulative Link Mixed Model fitted with the Laplace approximation
##
## Call:
## clmm2(location = score ~ gender + age + mgcse + board + itype,
## random = iid, data = dta, Hess = TRUE)
##
## Random effects:
## Var Std.Dev
## iid 0.2347193 0.4844784
##
## Location coefficients:
## Estimate Std. Error z value Pr(>|z|)
## genderMale -0.2862 0.0218 -13.1255 < 2.22e-16
## age -0.0020 0.0029 -0.6811 0.49582285
## mgcse 1.5431 0.0407 37.9416 < 2.22e-16
## board2 -0.2158 0.0552 -3.9073 9.3326e-05
## board3 0.0574 0.0452 1.2705 0.20391396
## board5 -0.9067 0.1027 -8.8307 < 2.22e-16
## board6 -0.0038 0.0559 -0.0684 0.94547377
## board7 0.1275 0.0734 1.7370 0.08239029
## board8 0.5117 0.3426 1.4935 0.13530562
## itype2 0.0292 0.0891 0.3276 0.74319435
## itype3 -0.1547 0.1897 -0.8156 0.41470086
## itype4 0.0507 0.0496 1.0222 0.30670678
## itype5 0.0788 0.0736 1.0703 0.28450407
## itype6 -0.9874 0.2539 -3.8892 0.00010056
## itype7 0.2200 0.0532 4.1347 3.5539e-05
## itype8 -0.0442 0.1525 -0.2900 0.77181787
## itype9 0.0064 0.0612 0.1039 0.91725637
## itype10 -0.3173 0.0654 -4.8526 1.2183e-06
## itype11 -0.2345 0.1685 -1.3915 0.16408490
##
## No scale coefficients
##
## Threshold coefficients:
## Estimate Std. Error z value
## 0|2 -2.6390 0.0481 -54.8113
## 2|4 -1.5242 0.0460 -33.1390
## 4|6 -0.4985 0.0452 -11.0218
## 6|8 0.6294 0.0452 13.9192
## 8|10 2.1112 0.0469 44.9725
##
## log-likelihood: -54914.43
## AIC: 109878.85
## Condition number of Hessian: 14033.00