1 data input

dta <- read.table("geometryAL.txt", h=T)

head(dta)
##   score board gender age mgcse itype  iid
## 1     8     3      1   1 0.856     7 1001
## 2     8     3      1  -6 0.856     7 1001
## 3     8     3      1   5 0.856     7 1001
## 4    10     3      1  -1 0.856     7 1001
## 5     8     3      1  -5 0.856     7 1001
## 6    10     3      1   3 0.856     7 1001

2 data management

dta$gender <- factor(ifelse(dta$gender==1,"Female","Male"))

dta$board <- factor(dta$board)

dta$iid <- factor(dta$iid)

dta$itype <- factor(dta$itype)

str(dta)
## 'data.frame':    33276 obs. of  7 variables:
##  $ score : int  8 8 8 10 8 10 10 8 6 4 ...
##  $ board : Factor w/ 7 levels "1","2","3","5",..: 3 3 3 3 3 3 3 3 3 3 ...
##  $ gender: Factor w/ 2 levels "Female","Male": 1 1 1 1 1 1 1 1 1 2 ...
##  $ age   : int  1 -6 5 -1 -5 3 3 4 -2 1 ...
##  $ mgcse : num  0.856 0.856 0.856 0.856 0.856 0.856 0.856 0.856 0.856 0.657 ...
##  $ itype : Factor w/ 11 levels "1","2","3","4",..: 7 7 7 7 7 7 7 7 7 7 ...
##  $ iid   : Factor w/ 2317 levels "1001","1002",..: 1 1 1 1 1 1 1 1 1 2 ...
library(ggplot2)

#
ggplot(data=dta, aes(x=score)) +
 geom_histogram(binwidth=1) +
 aes(y = ..density..) +
 facet_grid(. ~ itype) +
 labs(x="Geometry score", y="Density") +
 ggtitle("Institution type")

#
ggplot(data=dta, aes(x=score)) +
 geom_histogram(binwidth=1) +
 aes(y = ..density..) +
 facet_grid(itype ~ board) +
 labs(x="Geometry score", y="Density")+
 ggtitle("Institution Type by Examination Board")

#
ggplot(dta, aes(x=score, fill=gender)) +
 geom_bar(binwidth=1, position="dodge") + 
 labs(x="Geometry score", y="Count")
## Warning: Ignoring unknown parameters: binwidth

#
ggplot(dta, aes(x=mgcse, y=score)) +
 geom_point(alpha=I(0.3), cex=0.1) +
 stat_smooth(method="lm") +
 facet_grid(itype ~ gender) +
 labs(y="Geometry score", x="GCSE score")
## `geom_smooth()` using formula 'y ~ x'

3 model

#
dta$score <- factor(dta$score)

#
library(ordinal)

#
summary(dta.clmm <- clmm2(score ~ gender + age + mgcse + board + itype, 
                          random=iid, data=dta, Hess = TRUE))
## Warning: clmm2 may not have converged:
##   optimizer 'ucminf' terminated with max|gradient|: 0.0160943494036054
## Cumulative Link Mixed Model fitted with the Laplace approximation
## 
## Call:
## clmm2(location = score ~ gender + age + mgcse + board + itype, 
##     random = iid, data = dta, Hess = TRUE)
## 
## Random effects:
##           Var   Std.Dev
## iid 0.2347193 0.4844784
## 
## Location coefficients:
##            Estimate Std. Error z value  Pr(>|z|)  
## genderMale  -0.2862   0.0218   -13.1255 < 2.22e-16
## age         -0.0020   0.0029    -0.6811 0.49582285
## mgcse        1.5431   0.0407    37.9416 < 2.22e-16
## board2      -0.2158   0.0552    -3.9073 9.3326e-05
## board3       0.0574   0.0452     1.2705 0.20391396
## board5      -0.9067   0.1027    -8.8307 < 2.22e-16
## board6      -0.0038   0.0559    -0.0684 0.94547377
## board7       0.1275   0.0734     1.7370 0.08239029
## board8       0.5117   0.3426     1.4935 0.13530562
## itype2       0.0292   0.0891     0.3276 0.74319435
## itype3      -0.1547   0.1897    -0.8156 0.41470086
## itype4       0.0507   0.0496     1.0222 0.30670678
## itype5       0.0788   0.0736     1.0703 0.28450407
## itype6      -0.9874   0.2539    -3.8892 0.00010056
## itype7       0.2200   0.0532     4.1347 3.5539e-05
## itype8      -0.0442   0.1525    -0.2900 0.77181787
## itype9       0.0064   0.0612     0.1039 0.91725637
## itype10     -0.3173   0.0654    -4.8526 1.2183e-06
## itype11     -0.2345   0.1685    -1.3915 0.16408490
## 
## No scale coefficients
## 
## Threshold coefficients:
##      Estimate Std. Error z value 
## 0|2   -2.6390   0.0481   -54.8113
## 2|4   -1.5242   0.0460   -33.1390
## 4|6   -0.4985   0.0452   -11.0218
## 6|8    0.6294   0.0452    13.9192
## 8|10   2.1112   0.0469    44.9725
## 
## log-likelihood: -54914.43 
## AIC: 109878.85 
## Condition number of Hessian: 14033.00
# The end