rm(list = ls())
library(reshape2)
library(lmerTest)
## Loading required package: lme4
## Loading required package: Matrix
##
## Attaching package: 'lmerTest'
## The following object is masked from 'package:lme4':
##
## lmer
## The following object is masked from 'package:stats':
##
## step
library(lattice)
library(lme4)
Read in data and change from wide to long format. Then do a PCA and some basic EDA.
rm(list=ls()); cat('/14')
## /14
dat <-openxlsx::read.xlsx("df_En_Districts_For_Weighting_Scores.xlsx")
dat2 <- melt(dat,id.vars = "District")
dat2$Type <- substring(dat2$variable,1,4)
#EDA
hist(apply(dat[,-1],2,mean),xlab="Average Score all of the exams",main="Histogram of Averages for all of the exams")
hist(apply(dat[,-1],2,sd),xlab="Average Standard Deviation all of the exams",main="Histogram of sd all of the exams")
plot(apply(dat[,-1],2,mean),apply(dat[,-1],2,sd),xlab="Average Score by Test",ylab="Average sd by Test")
PCs <- eigen(cor(as.matrix(dat[,-1]))) #Calculate the principal components.
round(PCs$values/sum(PCs$values)*100,2) #% of variation each PC is responsible for.
## [1] 65.83 9.25 6.85 4.41 2.51 2.09 1.71 1.54 0.97 0.73 0.56 0.51
## [13] 0.39 0.34 0.34 0.29 0.24 0.24 0.18 0.16 0.14 0.12 0.11 0.10
## [25] 0.07 0.05 0.05 0.04 0.04 0.03 0.03 0.02 0.02 0.01 0.01 0.01
## [37] 0.01 0.01 0.00 0.00 0.00 0.00 0.00
Pvecs <- PCs$vectors
rownames(Pvecs) <- colnames(dat[,-1])
dotplot(Pvecs[,1],xlab="First Principal Component",xlim=c(-0.2,0.2)) #First principal component is overall testing skill.
dotplot(Pvecs[,2],xlab="Second Principal Component",xlim=c(-0.35,0.35))
dotplot(Pvecs[,3],xlab="Third Principal Component") #Separates Maharati from other Tests.
dotplot(Pvecs[,4],xlab="Fourth Principal Component") #Separates PIRLS from PISA.
Create mixed effects model.
library(lmerTest)
# switching 'lme4' packages to 'lmerTest' would give us the p values, but the model is exactly the same.
mod <- lmerTest::lmer(value~-1+Type+(1|variable)+(-1+Type|District),data=dat2) # intercepts for each district * test type.
summary(mod) # this model fits the data better than other models.
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: value ~ -1 + Type + (1 | variable) + (-1 + Type | District)
## Data: dat2
##
## REML criterion at convergence: 9118.4
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.7711 -0.5272 -0.0247 0.5763 4.5325
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## District TypeMhrt 6.835 2.614
## TypePisa 9.287 3.047 0.70
## TypePrls 13.043 3.612 0.84 0.78
## variable (Intercept) 165.264 12.856
## Residual 3.806 1.951
## Number of obs: 2021, groups: District, 47; variable, 43
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## TypeMhrt 64.421 2.378 42.113 27.085 < 2e-16 ***
## TypePisa 46.085 4.880 40.670 9.443 8.32e-12 ***
## TypePrls 74.554 5.276 40.806 14.131 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## TypMhr TypePs
## TypePisa 0.010
## TypePrls 0.013 0.007
# TypePrls - 13.043 = means more variance between districts than Pisa and Mhrt.
head(dat)
## District Pisa1 Pisa2 Pisa3 Pisa4 Pisa5 Pisa6 Pisa7 Prls1 Prls2
## 1 Jeddah 31.866 58.538 58.538 32.714 57.443 63.107 71.442 85.608 79.732
## 2 Baha 32.434 52.573 52.573 32.914 57.011 60.561 67.953 85.240 79.413
## 3 Ahsa 31.744 53.053 53.053 33.622 56.627 60.305 68.220 86.400 81.272
## 4 Eastern 33.184 51.732 51.732 33.438 53.993 58.043 68.916 84.745 78.662
## 5 Meckwah 31.681 51.000 51.000 32.006 55.241 57.713 67.264 83.466 75.997
## 6 Qunfuthah 32.896 45.210 45.210 32.349 54.111 53.683 63.305 84.043 77.783
## Prls3 Prls4 Prls5 Prls6 Mhrt1 Mhrt2 Mhrt3 Mhrt4 Mhrt5
## 1 84.049 74.421 83.762 81.551 98.42226 93.03170 90.46348 79.10931 78.46042
## 2 85.726 74.525 83.933 81.566 98.46929 93.43368 90.92052 79.28742 80.15645
## 3 82.624 75.937 84.736 82.757 97.95256 93.07151 89.16381 78.83420 77.24026
## 4 83.443 73.270 82.729 80.490 98.14617 93.13531 90.02646 78.01775 76.57968
## 5 83.984 71.977 82.432 79.676 98.09042 92.57551 88.98161 75.83941 76.46144
## 6 83.440 72.277 83.230 80.820 98.25614 92.90136 89.38549 78.15212 78.15179
## Mhrt6 Mhrt7 Mhrt8 Mhrt9 Mhrt10 Mhrt11 Mhrt12 Mhrt13
## 1 69.40237 68.57444 68.35780 56.58085 83.45310 77.27585 65.51925 61.79977
## 2 71.02271 70.14880 68.04308 57.89820 82.63135 76.70535 64.90877 61.57452
## 3 68.32753 63.76735 64.37093 54.94676 82.61093 75.71779 65.21888 62.14288
## 4 68.41887 66.38714 66.64794 55.55920 83.00960 76.22788 64.63464 59.92810
## 5 67.93017 65.65087 64.28576 55.27598 79.27085 71.85314 59.30017 55.73212
## 6 69.45304 65.46827 65.06856 53.91173 80.98060 72.65978 61.93391 59.30717
## Mhrt14 Mhrt15 Mhrt16 Mhrt17 Mhrt18 Mhrt19 Mhrt20 Mhrt21
## 1 54.28293 62.77386 62.77495 34.42063 84.11392 68.75583 71.73111 63.04461
## 2 55.40484 63.79248 64.90797 34.22887 85.50200 69.25310 74.13974 64.39619
## 3 53.03750 57.22819 57.83966 32.90279 84.01176 68.94452 70.28699 60.73228
## 4 52.85769 60.30671 59.54003 33.89671 84.01869 68.29370 69.03791 61.25340
## 5 51.88347 58.83446 57.54457 32.59880 82.12542 66.74907 69.61983 62.34093
## 6 52.80272 56.24885 59.64842 32.06101 83.06538 68.58364 72.41842 62.41565
## Mhrt22 Mhrt23 Mhrt24 Mhrt25 Mhrt26 Mhrt27 Mhrt28 Mhrt29
## 1 58.78964 72.48343 52.11203 86.12941 74.82685 70.56023 73.91031 67.05814
## 2 58.83233 72.63737 50.69955 85.34684 72.98129 68.21013 69.97188 63.76406
## 3 55.67338 68.11922 49.59083 85.98337 72.61346 68.86260 67.81397 61.55353
## 4 58.38126 71.63353 50.76682 86.67011 73.86445 69.35893 73.26773 65.92420
## 5 56.54641 67.02413 46.99435 80.28398 64.40644 64.28314 62.79315 58.35707
## 6 54.78108 66.48640 47.23540 81.61092 66.27261 64.25022 60.32173 56.86928
## Mhrt30
## 1 75.85464
## 2 71.31218
## 3 68.53863
## 4 72.96168
## 5 62.90109
## 6 63.56727
mod2 <- lmerTest::lmer(value~-1+Type+(1|variable)+(1|District),data=dat2) # intercepts for each district.
lme4::ranef(mod2)
## $District
## (Intercept)
## Afeef -4.05660842
## Aflaj -3.66339915
## Ahsa 4.68887315
## Asser 2.08521227
## Baha 6.05168864
## Beshah 0.71791698
## Bikiriah -2.48416605
## BneeTameem 1.78450720
## DahranAljanoub -1.95363706
## dawadmi -0.28073055
## Eastern 5.04771166
## Ghat -1.74479485
## HafrAlbatin 0.95291811
## Hail -1.92763609
## Jazan 0.18869470
## Jeddah 6.50572071
## Jouf 2.00767707
## Kharj -0.40114453
## Leath -0.57344702
## Madinah -0.60297890
## MahayelAseer -2.07101997
## Mahd -6.01545629
## majmaah -0.74628515
## Mecca 1.46649932
## Meckwah 2.68709912
## Methneb -0.69201284
## Najran 0.24854087
## Namas 0.64554037
## NorthernBorder 1.79570933
## Qaseem -3.04105761
## Qunfuthah 2.74785933
## Qurayat 0.48979144
## Quwayehyah -1.01402189
## Ress -1.11830913
## RijalAlmaa -1.59356618
## Riyadh 2.71556348
## Sabia -1.17406092
## SaratAbeedah -2.29712997
## Shagra 1.86804167
## Sharorah -4.32309629
## Tabuk -0.38819506
## Taif 2.43751816
## Ula -1.08712493
## Unaizah -2.60173974
## WadiAldawaser 0.44837816
## Yanbu 0.04617224
## Zulfi -1.77601541
##
## $variable
## (Intercept)
## Pisa1 -14.0717042
## Pisa2 -0.3202781
## Pisa3 -0.3202781
## Pisa4 -14.4072583
## Pisa5 6.1390701
## Pisa6 7.1048010
## Pisa7 15.8756476
## Prls1 4.0016336
## Prls2 -4.4640226
## Prls3 7.7165015
## Prls4 -8.5921402
## Prls5 2.6958409
## Prls6 -1.3578131
## Mhrt1 32.8491489
## Mhrt2 27.2400363
## Mhrt3 22.4307406
## Mhrt4 10.1951420
## Mhrt5 9.5835140
## Mhrt6 1.2858975
## Mhrt7 -2.0567105
## Mhrt8 -1.9683440
## Mhrt9 -10.3182259
## Mhrt10 14.1660003
## Mhrt11 4.9404746
## Mhrt12 -7.5445911
## Mhrt13 -11.4163461
## Mhrt14 -17.6068157
## Mhrt15 -10.4644644
## Mhrt16 -10.2495227
## Mhrt17 -32.9791367
## Mhrt18 14.7438738
## Mhrt19 -0.9955033
## Mhrt20 2.6167503
## Mhrt21 -6.6111363
## Mhrt22 -11.2374613
## Mhrt23 0.9415789
## Mhrt24 -17.8095911
## Mhrt25 15.3696916
## Mhrt26 -0.6038600
## Mhrt27 -2.1472416
## Mhrt28 -3.4290833
## Mhrt29 -7.1343323
## Mhrt30 -1.7904826
##
## with conditional variances for "District" "variable"
# # # # # # # # # # # # # # # # # # Final Ranking
# # # # # # # # # # # # # # # # # # Final Ranking
# # # # # # # # # # # # # # # # # # Final Ranking
dotplot(lme4::ranef(mod2))
## $District
##
## $variable
# This is ranking all the tests from easiest to hardiest after accounting for Test Type.
# This also shows the ranking of districts combining all of the test.
summary(mod2)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: value ~ -1 + Type + (1 | variable) + (1 | District)
## Data: dat2
##
## REML criterion at convergence: 9438.5
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.3961 -0.6003 -0.0200 0.5866 4.3249
##
## Random effects:
## Groups Name Variance Std.Dev.
## District (Intercept) 7.004 2.647
## variable (Intercept) 165.242 12.855
## Residual 4.902 2.214
## Number of obs: 2021, groups: District, 47; variable, 43
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## TypeMhrt 64.421 2.379 42.165 27.077 < 2e-16 ***
## TypePisa 46.085 4.875 40.505 9.453 8.42e-12 ***
## TypePrls 74.554 5.264 40.433 14.164 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## TypMhr TypePs
## TypePisa 0.013
## TypePrls 0.012 0.006
# exporting the file:
FinDat <- cbind(lme4::ranef(mod2)$District,lme4::ranef(mod)$District)
colnames(FinDat)[1] <- "Exp_Score_Diff_VS_Avg_Distric"
FinDat$District <- rownames(FinDat)
#openxlsx::write.xlsx(FinDat, 'FinDat_July27_B_th2022.xlsx')
# #
dotplot(lme4::ranef(mod)) #Looking at which Districts are the best performers, what the confidence intervals for their impacts are. Also this is showing the difficulty of each test type.
## $District
##
## $variable
lme4::ranef(mod) #Print out the point estimates of each Districts impact by test type.
## $District
## TypeMhrt TypePisa TypePrls
## Afeef -3.402571002 -6.04324395 -5.0676695
## Aflaj -3.653130932 -3.74781583 -3.6357123
## Ahsa 4.167511843 4.85119904 7.3288486
## Asser 1.420155484 4.27279886 2.8843523
## Baha 6.150582178 4.80966061 7.1530712
## Beshah 0.023809098 3.83125185 0.4765280
## Bikiriah -2.347932424 -1.28636808 -4.7742657
## BneeTameem 2.343073061 0.48227513 0.4509611
## DahranAljanoub -1.171324033 -4.25342791 -3.2412197
## dawadmi 0.005164841 -0.64157089 -1.3638047
## Eastern 5.110835794 4.08212199 5.9806325
## Ghat -1.595121318 -2.53258727 -1.5542294
## HafrAlbatin 1.824926651 -1.35808667 -0.7847683
## Hail -2.534412121 0.18936851 -1.3802990
## Jazan -0.418664419 0.70422876 2.8089524
## Jeddah 6.273597893 6.99715382 7.1735524
## Jouf 0.885416120 4.10119859 5.3965234
## Kharj -0.073798456 -1.56546418 -0.6778041
## Leath -1.109550028 1.37836572 -0.1865338
## Madinah -0.570151447 0.12104376 -1.7136000
## MahayelAseer -2.287301400 -2.35528385 -0.5522396
## Mahd -5.282047220 -6.86923029 -8.9225385
## majmaah -0.680902592 -1.13055695 -0.6114212
## Mecca 1.533534555 1.55478820 1.0011961
## Meckwah 2.175174532 3.28384099 4.7048406
## Methneb -0.447064042 -1.81470311 -0.5790395
## Najran 0.389368482 -0.02302755 -0.1618975
## Namas 1.417155617 -2.97390163 1.1271626
## NorthernBorder 1.836313306 1.10619043 2.4736212
## Qaseem -2.516304658 -3.82317977 -4.8926183
## Qunfuthah 2.731606316 0.95929410 5.1498656
## Qurayat -0.417477397 3.42532846 1.6263664
## Quwayehyah -0.895008221 -0.36517168 -2.4965200
## Ress -0.350544741 -2.69247338 -3.2527878
## RijalAlmaa -2.018736310 -1.88135889 1.0613075
## Riyadh 3.155475104 1.74224335 1.6072083
## Sabia -2.246772641 2.19585752 0.2872346
## SaratAbeedah -2.649491644 0.48326430 -3.9778662
## Shagra 2.263404536 1.38888197 0.3603107
## Sharorah -4.490926808 -2.49244288 -5.7956446
## Tabuk -0.586372413 -0.51767800 0.8457324
## Taif 2.469943104 3.08601890 1.4468943
## Ula -0.667845433 -2.85924121 -1.0850597
## Unaizah -3.071298746 0.41164558 -3.9505510
## WadiAldawaser 1.057710132 -1.58232609 -0.2336864
## Yanbu 0.226601347 -1.13150965 0.5832870
## Zulfi -1.976609547 -1.51737074 -1.0366726
##
## $variable
## (Intercept)
## Pisa1 -14.0736902
## Pisa2 -0.3203233
## Pisa3 -0.3203233
## Pisa4 -14.4092917
## Pisa5 6.1399366
## Pisa6 7.1058037
## Pisa7 15.8778881
## Prls1 4.0021984
## Prls2 -4.4646526
## Prls3 7.7175905
## Prls4 -8.5933529
## Prls5 2.6962213
## Prls6 -1.3580048
## Mhrt1 32.8537849
## Mhrt2 27.2438807
## Mhrt3 22.4339063
## Mhrt4 10.1965809
## Mhrt5 9.5848665
## Mhrt6 1.2860790
## Mhrt7 -2.0570008
## Mhrt8 -1.9686218
## Mhrt9 -10.3196821
## Mhrt10 14.1679995
## Mhrt11 4.9411719
## Mhrt12 -7.5456559
## Mhrt13 -11.4179573
## Mhrt14 -17.6093006
## Mhrt15 -10.4659412
## Mhrt16 -10.2509692
## Mhrt17 -32.9837910
## Mhrt18 14.7459546
## Mhrt19 -0.9956438
## Mhrt20 2.6171196
## Mhrt21 -6.6120693
## Mhrt22 -11.2390472
## Mhrt23 0.9417118
## Mhrt24 -17.8121046
## Mhrt25 15.3718607
## Mhrt26 -0.6039452
## Mhrt27 -2.1475447
## Mhrt28 -3.4295673
## Mhrt29 -7.1353392
## Mhrt30 -1.7907353
##
## with conditional variances for "District" "variable"
qqnorm(resid(mod)) #There does not seem to be much of heteroskedasticity, and residuals seem normally distributed.
plot(mod)
# # # # # # # # # # # # # # # # # # Final Ranking
# # # # # # # # # # # # # # # # # # Final Ranking
# # # # # # # # # # # # # # # # # # Final Ranking
dotplot(lme4::ranef(mod2))
## $District
##
## $variable