The data set contains the results of examination on A-level geometry for 33,276 students from 2,317 institutions in England in 1997. A mean centred average General Certificate of Secondary Education (GCSE) score is derived from all GCSE subjects of the student. This is used as a prior attainment covariate. Also recorded are gender and age of the student. The cohort is aged between 18 and 19 years with a mean of 18.5 years. Institutions were grouped into 11 categories according to their admission policy and type of funding. Eight examination boards were involved in the study although two boards were combined into one.
Source: Fielding, A., Yang, M., & Goldstein, H. (2003). Multilevel ordinal models for examination grades. Statistical Modelling, 3, 127-153.
Column 1: Score on the A-Level Geometry, (A=10, B=8, C=6, D=4, E=2, with F indicating unclassified or fail at 0)
Column 2: The examination board ID, 1=Associate, 2=Cambridge, 3=London, 5=Oxforld, 6=Joint Matriculation, 7=Oxford-Cambridge, 8=WJB
Column 3: Gender ID, 0=Male, 1=Female
Column 4: Age in years, mean-centered
Column 5: Institution average GCSE score, mean-centered
Column 6: Institution type, 1=LEA Maintained Comprehensive, 2=Maintained Selective, 3=Maintained Modern, 4=Grammar Comprehensive, 5=Grammar Selective, 6=Grammar Modern, 7=Independent selective, 8=Independent non-selective, 9=Sixth Form College, 10=Further Education College, 11=Others
Column 7: Institution ID
## score board gender age mgcse itype iid
## 1 8 3 1 1 0.856 7 1001
## 2 8 3 1 -6 0.856 7 1001
## 3 8 3 1 5 0.856 7 1001
## 4 10 3 1 -1 0.856 7 1001
## 5 8 3 1 -5 0.856 7 1001
## 6 10 3 1 3 0.856 7 1001
#
dta3$gender <- factor(ifelse(dta3$gender==1,"Female","Male"))
#
dta3$board <- factor(dta3$board)
#
dta3$iid <- factor(dta3$iid)
#
dta3$itype <- factor(dta3$itype)
#
str(dta3)## 'data.frame': 33276 obs. of 7 variables:
## $ score : int 8 8 8 10 8 10 10 8 6 4 ...
## $ board : Factor w/ 7 levels "1","2","3","5",..: 3 3 3 3 3 3 3 3 3 3 ...
## $ gender: Factor w/ 2 levels "Female","Male": 1 1 1 1 1 1 1 1 1 2 ...
## $ age : int 1 -6 5 -1 -5 3 3 4 -2 1 ...
## $ mgcse : num 0.856 0.856 0.856 0.856 0.856 0.856 0.856 0.856 0.856 0.657 ...
## $ itype : Factor w/ 11 levels "1","2","3","4",..: 7 7 7 7 7 7 7 7 7 7 ...
## $ iid : Factor w/ 2317 levels "1001","1002",..: 1 1 1 1 1 1 1 1 1 2 ...
#
ggplot(data=dta3, aes(x=score)) +
geom_histogram(binwidth=1) +
aes(y = ..density..) +
facet_grid(. ~ itype) +
labs(x="Geometry score", y="Density") +
ggtitle("Institution type")#
ggplot(data=dta3, aes(x=score)) +
geom_histogram(binwidth=1) +
aes(y = ..density..) +
facet_grid(itype ~ board) +
labs(x="Geometry score", y="Density")+
ggtitle("Institution Type by Examination Board")#
ggplot(dta3, aes(x=score, fill=gender)) +
geom_bar(binwidth=1, position="dodge") +
labs(x="Geometry score", y="Count")## Warning: Ignoring unknown parameters: binwidth
#
ggplot(dta3, aes(x=mgcse, y=score)) +
geom_point(alpha=I(0.3), cex=0.1) +
stat_smooth(method="lm") +
facet_grid(itype ~ gender) +
labs(y="Geometry score", x="GCSE score")## `geom_smooth()` using formula 'y ~ x'
#
pacman::p_load(ordinal)
#
summary(dta3.clmm <- clmm2(score ~ gender + age + mgcse + board + itype,
random=iid, data=dta3, Hess = TRUE))## Warning: clmm2 may not have converged:
## optimizer 'ucminf' terminated with max|gradient|: 0.0160943494036054
## Cumulative Link Mixed Model fitted with the Laplace approximation
##
## Call:
## clmm2(location = score ~ gender + age + mgcse + board + itype,
## random = iid, data = dta3, Hess = TRUE)
##
## Random effects:
## Var Std.Dev
## iid 0.2347193 0.4844784
##
## Location coefficients:
## Estimate Std. Error z value Pr(>|z|)
## genderMale -0.2862 0.0218 -13.1255 < 2.22e-16
## age -0.0020 0.0029 -0.6811 0.49582285
## mgcse 1.5431 0.0407 37.9416 < 2.22e-16
## board2 -0.2158 0.0552 -3.9073 9.3326e-05
## board3 0.0574 0.0452 1.2705 0.20391396
## board5 -0.9067 0.1027 -8.8307 < 2.22e-16
## board6 -0.0038 0.0559 -0.0684 0.94547377
## board7 0.1275 0.0734 1.7370 0.08239029
## board8 0.5117 0.3426 1.4935 0.13530562
## itype2 0.0292 0.0891 0.3276 0.74319435
## itype3 -0.1547 0.1897 -0.8156 0.41470086
## itype4 0.0507 0.0496 1.0222 0.30670678
## itype5 0.0788 0.0736 1.0703 0.28450407
## itype6 -0.9874 0.2539 -3.8892 0.00010056
## itype7 0.2200 0.0532 4.1347 3.5539e-05
## itype8 -0.0442 0.1525 -0.2900 0.77181787
## itype9 0.0064 0.0612 0.1039 0.91725637
## itype10 -0.3173 0.0654 -4.8526 1.2183e-06
## itype11 -0.2345 0.1685 -1.3915 0.16408490
##
## No scale coefficients
##
## Threshold coefficients:
## Estimate Std. Error z value
## 0|2 -2.6390 0.0481 -54.8113
## 2|4 -1.5242 0.0460 -33.1390
## 4|6 -0.4985 0.0452 -11.0218
## 6|8 0.6294 0.0452 13.9192
## 8|10 2.1112 0.0469 44.9725
##
## log-likelihood: -54914.43
## AIC: 109878.85
## Condition number of Hessian: 14033.00