Modeling District Average Test Scores

rm(list = ls())

library(reshape2)
library(lmerTest)

## Loading required package: lme4

## Loading required package: Matrix

## 
## Attaching package: 'lmerTest'

## The following object is masked from 'package:lme4':
## 
##     lmer

## The following object is masked from 'package:stats':
## 
##     step

library(lattice)
library(lme4)

Read in data and change from wide to long format. Then do a PCA and some basic EDA.

rm(list=ls()); cat('/14')

## /14

dat <-openxlsx::read.xlsx("df_En_Districts_For_Weighting_Scores.xlsx")
dat2 <- melt(dat,id.vars = "District")
dat2$Type <- substring(dat2$variable,1,4)

#EDA
hist(apply(dat[,-1],2,mean),xlab="Average Score all of the exams",main="Histogram of Averages for all of the exams")

hist(apply(dat[,-1],2,sd),xlab="Average Standard Deviation all of the exams",main="Histogram of sd all of the exams")

plot(apply(dat[,-1],2,mean),apply(dat[,-1],2,sd),xlab="Average Score by Test",ylab="Average sd by Test")

PCs <- eigen(cor(as.matrix(dat[,-1]))) #Calculate the principal components.
round(PCs$values/sum(PCs$values)*100,2) #% of variation each PC is responsible for.

##  [1] 65.83  9.25  6.85  4.41  2.51  2.09  1.71  1.54  0.97  0.73  0.56  0.51
## [13]  0.39  0.34  0.34  0.29  0.24  0.24  0.18  0.16  0.14  0.12  0.11  0.10
## [25]  0.07  0.05  0.05  0.04  0.04  0.03  0.03  0.02  0.02  0.01  0.01  0.01
## [37]  0.01  0.01  0.00  0.00  0.00  0.00  0.00

Pvecs <- PCs$vectors
rownames(Pvecs) <- colnames(dat[,-1])
dotplot(Pvecs[,1],xlab="First Principal Component",xlim=c(-0.2,0.2)) #First principal component is overall testing skill.

dotplot(Pvecs[,2],xlab="Second Principal Component",xlim=c(-0.35,0.35))

dotplot(Pvecs[,3],xlab="Third Principal Component") #Separates Maharati from other Tests.

dotplot(Pvecs[,4],xlab="Fourth Principal Component") #Separates PIRLS from PISA.

Create mixed effects model.

library(lmerTest)
# switching 'lme4' packages to 'lmerTest' would give us the p values, but the model is exactly the same.
mod <- lmerTest::lmer(value~-1+Type+(1|variable)+(-1+Type|District),data=dat2) # intercepts for each district * test type.
summary(mod) # this model fits the data better than other models.

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: value ~ -1 + Type + (1 | variable) + (-1 + Type | District)
##    Data: dat2
## 
## REML criterion at convergence: 9118.4
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.7711 -0.5272 -0.0247  0.5763  4.5325 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr     
##  District TypeMhrt      6.835   2.614            
##           TypePisa      9.287   3.047   0.70     
##           TypePrls     13.043   3.612   0.84 0.78
##  variable (Intercept) 165.264  12.856            
##  Residual               3.806   1.951            
## Number of obs: 2021, groups:  District, 47; variable, 43
## 
## Fixed effects:
##          Estimate Std. Error     df t value Pr(>|t|)    
## TypeMhrt   64.421      2.378 42.113  27.085  < 2e-16 ***
## TypePisa   46.085      4.880 40.670   9.443 8.32e-12 ***
## TypePrls   74.554      5.276 40.806  14.131  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##          TypMhr TypePs
## TypePisa 0.010        
## TypePrls 0.013  0.007

# TypePrls - 13.043 = means more variance between districts than Pisa and Mhrt.

head(dat)

##    District  Pisa1  Pisa2  Pisa3  Pisa4  Pisa5  Pisa6  Pisa7  Prls1  Prls2
## 1    Jeddah 31.866 58.538 58.538 32.714 57.443 63.107 71.442 85.608 79.732
## 2      Baha 32.434 52.573 52.573 32.914 57.011 60.561 67.953 85.240 79.413
## 3      Ahsa 31.744 53.053 53.053 33.622 56.627 60.305 68.220 86.400 81.272
## 4   Eastern 33.184 51.732 51.732 33.438 53.993 58.043 68.916 84.745 78.662
## 5   Meckwah 31.681 51.000 51.000 32.006 55.241 57.713 67.264 83.466 75.997
## 6 Qunfuthah 32.896 45.210 45.210 32.349 54.111 53.683 63.305 84.043 77.783
##    Prls3  Prls4  Prls5  Prls6    Mhrt1    Mhrt2    Mhrt3    Mhrt4    Mhrt5
## 1 84.049 74.421 83.762 81.551 98.42226 93.03170 90.46348 79.10931 78.46042
## 2 85.726 74.525 83.933 81.566 98.46929 93.43368 90.92052 79.28742 80.15645
## 3 82.624 75.937 84.736 82.757 97.95256 93.07151 89.16381 78.83420 77.24026
## 4 83.443 73.270 82.729 80.490 98.14617 93.13531 90.02646 78.01775 76.57968
## 5 83.984 71.977 82.432 79.676 98.09042 92.57551 88.98161 75.83941 76.46144
## 6 83.440 72.277 83.230 80.820 98.25614 92.90136 89.38549 78.15212 78.15179
##      Mhrt6    Mhrt7    Mhrt8    Mhrt9   Mhrt10   Mhrt11   Mhrt12   Mhrt13
## 1 69.40237 68.57444 68.35780 56.58085 83.45310 77.27585 65.51925 61.79977
## 2 71.02271 70.14880 68.04308 57.89820 82.63135 76.70535 64.90877 61.57452
## 3 68.32753 63.76735 64.37093 54.94676 82.61093 75.71779 65.21888 62.14288
## 4 68.41887 66.38714 66.64794 55.55920 83.00960 76.22788 64.63464 59.92810
## 5 67.93017 65.65087 64.28576 55.27598 79.27085 71.85314 59.30017 55.73212
## 6 69.45304 65.46827 65.06856 53.91173 80.98060 72.65978 61.93391 59.30717
##     Mhrt14   Mhrt15   Mhrt16   Mhrt17   Mhrt18   Mhrt19   Mhrt20   Mhrt21
## 1 54.28293 62.77386 62.77495 34.42063 84.11392 68.75583 71.73111 63.04461
## 2 55.40484 63.79248 64.90797 34.22887 85.50200 69.25310 74.13974 64.39619
## 3 53.03750 57.22819 57.83966 32.90279 84.01176 68.94452 70.28699 60.73228
## 4 52.85769 60.30671 59.54003 33.89671 84.01869 68.29370 69.03791 61.25340
## 5 51.88347 58.83446 57.54457 32.59880 82.12542 66.74907 69.61983 62.34093
## 6 52.80272 56.24885 59.64842 32.06101 83.06538 68.58364 72.41842 62.41565
##     Mhrt22   Mhrt23   Mhrt24   Mhrt25   Mhrt26   Mhrt27   Mhrt28   Mhrt29
## 1 58.78964 72.48343 52.11203 86.12941 74.82685 70.56023 73.91031 67.05814
## 2 58.83233 72.63737 50.69955 85.34684 72.98129 68.21013 69.97188 63.76406
## 3 55.67338 68.11922 49.59083 85.98337 72.61346 68.86260 67.81397 61.55353
## 4 58.38126 71.63353 50.76682 86.67011 73.86445 69.35893 73.26773 65.92420
## 5 56.54641 67.02413 46.99435 80.28398 64.40644 64.28314 62.79315 58.35707
## 6 54.78108 66.48640 47.23540 81.61092 66.27261 64.25022 60.32173 56.86928
##     Mhrt30
## 1 75.85464
## 2 71.31218
## 3 68.53863
## 4 72.96168
## 5 62.90109
## 6 63.56727

mod2 <- lmerTest::lmer(value~-1+Type+(1|variable)+(1|District),data=dat2) # intercepts for each district.
lme4::ranef(mod2)

## $District
##                (Intercept)
## Afeef          -4.05660842
## Aflaj          -3.66339915
## Ahsa            4.68887315
## Asser           2.08521227
## Baha            6.05168864
## Beshah          0.71791698
## Bikiriah       -2.48416605
## BneeTameem      1.78450720
## DahranAljanoub -1.95363706
## dawadmi        -0.28073055
## Eastern         5.04771166
## Ghat           -1.74479485
## HafrAlbatin     0.95291811
## Hail           -1.92763609
## Jazan           0.18869470
## Jeddah          6.50572071
## Jouf            2.00767707
## Kharj          -0.40114453
## Leath          -0.57344702
## Madinah        -0.60297890
## MahayelAseer   -2.07101997
## Mahd           -6.01545629
## majmaah        -0.74628515
## Mecca           1.46649932
## Meckwah         2.68709912
## Methneb        -0.69201284
## Najran          0.24854087
## Namas           0.64554037
## NorthernBorder  1.79570933
## Qaseem         -3.04105761
## Qunfuthah       2.74785933
## Qurayat         0.48979144
## Quwayehyah     -1.01402189
## Ress           -1.11830913
## RijalAlmaa     -1.59356618
## Riyadh          2.71556348
## Sabia          -1.17406092
## SaratAbeedah   -2.29712997
## Shagra          1.86804167
## Sharorah       -4.32309629
## Tabuk          -0.38819506
## Taif            2.43751816
## Ula            -1.08712493
## Unaizah        -2.60173974
## WadiAldawaser   0.44837816
## Yanbu           0.04617224
## Zulfi          -1.77601541
## 
## $variable
##        (Intercept)
## Pisa1  -14.0717042
## Pisa2   -0.3202781
## Pisa3   -0.3202781
## Pisa4  -14.4072583
## Pisa5    6.1390701
## Pisa6    7.1048010
## Pisa7   15.8756476
## Prls1    4.0016336
## Prls2   -4.4640226
## Prls3    7.7165015
## Prls4   -8.5921402
## Prls5    2.6958409
## Prls6   -1.3578131
## Mhrt1   32.8491489
## Mhrt2   27.2400363
## Mhrt3   22.4307406
## Mhrt4   10.1951420
## Mhrt5    9.5835140
## Mhrt6    1.2858975
## Mhrt7   -2.0567105
## Mhrt8   -1.9683440
## Mhrt9  -10.3182259
## Mhrt10  14.1660003
## Mhrt11   4.9404746
## Mhrt12  -7.5445911
## Mhrt13 -11.4163461
## Mhrt14 -17.6068157
## Mhrt15 -10.4644644
## Mhrt16 -10.2495227
## Mhrt17 -32.9791367
## Mhrt18  14.7438738
## Mhrt19  -0.9955033
## Mhrt20   2.6167503
## Mhrt21  -6.6111363
## Mhrt22 -11.2374613
## Mhrt23   0.9415789
## Mhrt24 -17.8095911
## Mhrt25  15.3696916
## Mhrt26  -0.6038600
## Mhrt27  -2.1472416
## Mhrt28  -3.4290833
## Mhrt29  -7.1343323
## Mhrt30  -1.7904826
## 
## with conditional variances for "District" "variable"

# # # # # # # # # # # # # # # # # # Final Ranking
# # # # # # # # # # # # # # # # # # Final Ranking
# # # # # # # # # # # # # # # # # # Final Ranking
dotplot(lme4::ranef(mod2))

## $District

## 
## $variable

# This is ranking all the tests from easiest to hardiest after accounting for Test Type.
# This also shows the ranking of districts combining all of the test.
summary(mod2)

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: value ~ -1 + Type + (1 | variable) + (1 | District)
##    Data: dat2
## 
## REML criterion at convergence: 9438.5
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.3961 -0.6003 -0.0200  0.5866  4.3249 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  District (Intercept)   7.004   2.647  
##  variable (Intercept) 165.242  12.855  
##  Residual               4.902   2.214  
## Number of obs: 2021, groups:  District, 47; variable, 43
## 
## Fixed effects:
##          Estimate Std. Error     df t value Pr(>|t|)    
## TypeMhrt   64.421      2.379 42.165  27.077  < 2e-16 ***
## TypePisa   46.085      4.875 40.505   9.453 8.42e-12 ***
## TypePrls   74.554      5.264 40.433  14.164  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##          TypMhr TypePs
## TypePisa 0.013        
## TypePrls 0.012  0.006

# exporting the file:
FinDat <- cbind(lme4::ranef(mod2)$District,lme4::ranef(mod)$District)
colnames(FinDat)[1] <- "Exp_Score_Diff_VS_Avg_Distric"
FinDat$District <- rownames(FinDat)
#openxlsx::write.xlsx(FinDat, 'FinDat_July27_B_th2022.xlsx')
# # 
dotplot(lme4::ranef(mod)) #Looking at which Districts are the best performers, what the confidence intervals for their impacts are. Also this is showing the difficulty of each test type.

## $District

## 
## $variable

lme4::ranef(mod) #Print out the point estimates of each Districts impact by test type.

## $District
##                    TypeMhrt    TypePisa   TypePrls
## Afeef          -3.402571002 -6.04324395 -5.0676695
## Aflaj          -3.653130932 -3.74781583 -3.6357123
## Ahsa            4.167511843  4.85119904  7.3288486
## Asser           1.420155484  4.27279886  2.8843523
## Baha            6.150582178  4.80966061  7.1530712
## Beshah          0.023809098  3.83125185  0.4765280
## Bikiriah       -2.347932424 -1.28636808 -4.7742657
## BneeTameem      2.343073061  0.48227513  0.4509611
## DahranAljanoub -1.171324033 -4.25342791 -3.2412197
## dawadmi         0.005164841 -0.64157089 -1.3638047
## Eastern         5.110835794  4.08212199  5.9806325
## Ghat           -1.595121318 -2.53258727 -1.5542294
## HafrAlbatin     1.824926651 -1.35808667 -0.7847683
## Hail           -2.534412121  0.18936851 -1.3802990
## Jazan          -0.418664419  0.70422876  2.8089524
## Jeddah          6.273597893  6.99715382  7.1735524
## Jouf            0.885416120  4.10119859  5.3965234
## Kharj          -0.073798456 -1.56546418 -0.6778041
## Leath          -1.109550028  1.37836572 -0.1865338
## Madinah        -0.570151447  0.12104376 -1.7136000
## MahayelAseer   -2.287301400 -2.35528385 -0.5522396
## Mahd           -5.282047220 -6.86923029 -8.9225385
## majmaah        -0.680902592 -1.13055695 -0.6114212
## Mecca           1.533534555  1.55478820  1.0011961
## Meckwah         2.175174532  3.28384099  4.7048406
## Methneb        -0.447064042 -1.81470311 -0.5790395
## Najran          0.389368482 -0.02302755 -0.1618975
## Namas           1.417155617 -2.97390163  1.1271626
## NorthernBorder  1.836313306  1.10619043  2.4736212
## Qaseem         -2.516304658 -3.82317977 -4.8926183
## Qunfuthah       2.731606316  0.95929410  5.1498656
## Qurayat        -0.417477397  3.42532846  1.6263664
## Quwayehyah     -0.895008221 -0.36517168 -2.4965200
## Ress           -0.350544741 -2.69247338 -3.2527878
## RijalAlmaa     -2.018736310 -1.88135889  1.0613075
## Riyadh          3.155475104  1.74224335  1.6072083
## Sabia          -2.246772641  2.19585752  0.2872346
## SaratAbeedah   -2.649491644  0.48326430 -3.9778662
## Shagra          2.263404536  1.38888197  0.3603107
## Sharorah       -4.490926808 -2.49244288 -5.7956446
## Tabuk          -0.586372413 -0.51767800  0.8457324
## Taif            2.469943104  3.08601890  1.4468943
## Ula            -0.667845433 -2.85924121 -1.0850597
## Unaizah        -3.071298746  0.41164558 -3.9505510
## WadiAldawaser   1.057710132 -1.58232609 -0.2336864
## Yanbu           0.226601347 -1.13150965  0.5832870
## Zulfi          -1.976609547 -1.51737074 -1.0366726
## 
## $variable
##        (Intercept)
## Pisa1  -14.0736902
## Pisa2   -0.3203233
## Pisa3   -0.3203233
## Pisa4  -14.4092917
## Pisa5    6.1399366
## Pisa6    7.1058037
## Pisa7   15.8778881
## Prls1    4.0021984
## Prls2   -4.4646526
## Prls3    7.7175905
## Prls4   -8.5933529
## Prls5    2.6962213
## Prls6   -1.3580048
## Mhrt1   32.8537849
## Mhrt2   27.2438807
## Mhrt3   22.4339063
## Mhrt4   10.1965809
## Mhrt5    9.5848665
## Mhrt6    1.2860790
## Mhrt7   -2.0570008
## Mhrt8   -1.9686218
## Mhrt9  -10.3196821
## Mhrt10  14.1679995
## Mhrt11   4.9411719
## Mhrt12  -7.5456559
## Mhrt13 -11.4179573
## Mhrt14 -17.6093006
## Mhrt15 -10.4659412
## Mhrt16 -10.2509692
## Mhrt17 -32.9837910
## Mhrt18  14.7459546
## Mhrt19  -0.9956438
## Mhrt20   2.6171196
## Mhrt21  -6.6120693
## Mhrt22 -11.2390472
## Mhrt23   0.9417118
## Mhrt24 -17.8121046
## Mhrt25  15.3718607
## Mhrt26  -0.6039452
## Mhrt27  -2.1475447
## Mhrt28  -3.4295673
## Mhrt29  -7.1353392
## Mhrt30  -1.7907353
## 
## with conditional variances for "District" "variable"

qqnorm(resid(mod)) #There does not seem to be much of heteroskedasticity, and residuals seem normally distributed.

plot(mod)

# # # # # # # # # # # # # # # # # # Final Ranking
# # # # # # # # # # # # # # # # # # Final Ranking
# # # # # # # # # # # # # # # # # # Final Ranking
dotplot(lme4::ranef(mod2))

## $District

## 
## $variable

Modeling District Average Test Scores

Ahmed Alsayer

7/27/2022