#Load packages

library(tidyverse) 
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6     ✔ purrr   0.3.4
## ✔ tibble  3.1.8     ✔ dplyr   1.0.9
## ✔ tidyr   1.2.1     ✔ stringr 1.4.1
## ✔ readr   2.1.3     ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(Hmisc) # label()
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## 
## Attaching package: 'Hmisc'
## 
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## 
## The following objects are masked from 'package:base':
## 
##     format.pval, units
library(lme4)
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## 
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
library(lmerTest)
## 
## Attaching package: 'lmerTest'
## 
## The following object is masked from 'package:lme4':
## 
##     lmer
## 
## The following object is masked from 'package:stats':
## 
##     step
library(haven)

#Load data

hsbmerged <- read_dta("hsbmerged (2).dta")

glimpse(hsbmerged)
## Rows: 7,185
## Columns: 12
## $ schoolid <chr> "1224", "1224", "1224", "1224", "1224", "1224", "1224", "1224…
## $ minority <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0…
## $ female   <dbl> 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1…
## $ ses      <dbl> -1.528, -0.588, -0.528, -0.668, -0.158, 0.022, -0.618, -0.998…
## $ mathach  <dbl> 5.876, 19.708, 20.349, 8.781, 17.898, 4.583, -2.832, 0.523, 1…
## $ size     <dbl> 842, 842, 842, 842, 842, 842, 842, 842, 842, 842, 842, 842, 8…
## $ sector   <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ pracad   <dbl> 0.35, 0.35, 0.35, 0.35, 0.35, 0.35, 0.35, 0.35, 0.35, 0.35, 0…
## $ disclim  <dbl> 1.597, 1.597, 1.597, 1.597, 1.597, 1.597, 1.597, 1.597, 1.597…
## $ himinty  <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ meanses  <dbl> -0.428, -0.428, -0.428, -0.428, -0.428, -0.428, -0.428, -0.42…
## $ readach  <dbl> 51.96463, 39.28524, 43.81590, 52.01301, 38.99202, 39.04975, 5…
psych::describe(hsbmerged, 
                fast = TRUE)
## Warning in FUN(newX[, i], ...): no non-missing arguments to min; returning Inf
## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf
##          vars    n    mean     sd    min     max   range   se
## schoolid    1 7185     NaN     NA    Inf    -Inf    -Inf   NA
## minority    2 7185    0.27   0.45   0.00    1.00    1.00 0.01
## female      3 7185    0.53   0.50   0.00    1.00    1.00 0.01
## ses         4 7185    0.00   0.78  -3.76    2.69    6.45 0.01
## mathach     5 7185   12.75   6.88  -2.83   24.99   27.82 0.08
## size        6 7185 1056.86 604.17 100.00 2713.00 2613.00 7.13
## sector      7 7185    0.49   0.50   0.00    1.00    1.00 0.01
## pracad      8 7185    0.53   0.25   0.00    1.00    1.00 0.00
## disclim     9 7185   -0.13   0.94  -2.42    2.76    5.17 0.01
## himinty    10 7185    0.28   0.45   0.00    1.00    1.00 0.01
## meanses    11 7185    0.01   0.41  -1.19    0.83    2.02 0.00
## readach    12 7185   55.08   9.18  17.68   85.06   67.37 0.11

#designating categorical variables

hsbmerged.1 <- hsbmerged %>%
  mutate(., female.factor = as_factor(female))

hsbmerged.2 <- hsbmerged.1 %>%
  mutate(., sector.factor = as_factor(sector))

glimpse(hsbmerged.2) 
## Rows: 7,185
## Columns: 14
## $ schoolid      <chr> "1224", "1224", "1224", "1224", "1224", "1224", "1224", …
## $ minority      <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,…
## $ female        <dbl> 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1,…
## $ ses           <dbl> -1.528, -0.588, -0.528, -0.668, -0.158, 0.022, -0.618, -…
## $ mathach       <dbl> 5.876, 19.708, 20.349, 8.781, 17.898, 4.583, -2.832, 0.5…
## $ size          <dbl> 842, 842, 842, 842, 842, 842, 842, 842, 842, 842, 842, 8…
## $ sector        <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ pracad        <dbl> 0.35, 0.35, 0.35, 0.35, 0.35, 0.35, 0.35, 0.35, 0.35, 0.…
## $ disclim       <dbl> 1.597, 1.597, 1.597, 1.597, 1.597, 1.597, 1.597, 1.597, …
## $ himinty       <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ meanses       <dbl> -0.428, -0.428, -0.428, -0.428, -0.428, -0.428, -0.428, …
## $ readach       <dbl> 51.96463, 39.28524, 43.81590, 52.01301, 38.99202, 39.049…
## $ female.factor <fct> 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1,…
## $ sector.factor <fct> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…

#Null Model

model.null <- lmer(mathach ~ (1|schoolid), REML = FALSE, data = hsbmerged.2)
summary(model.null)
## Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
##   method [lmerModLmerTest]
## Formula: mathach ~ (1 | schoolid)
##    Data: hsbmerged.2
## 
##      AIC      BIC   logLik deviance df.resid 
##  47121.8  47142.4 -23557.9  47115.8     7182 
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -3.06262 -0.75365  0.02676  0.76070  2.74184 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  schoolid (Intercept)  8.553   2.925   
##  Residual             39.148   6.257   
## Number of obs: 7185, groups:  schoolid, 160
## 
## Fixed effects:
##             Estimate Std. Error       df t value Pr(>|t|)    
## (Intercept)  12.6371     0.2436 157.6209   51.87   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

#Calculate ICC

null.ICC <- 8.553/(8.553 + 39.148)
null.ICC
## [1] 0.1793044

ICC = 0.18 > 0.05 – suggests MLM can be used

lmerTest::rand(model.null)
## ANOVA-like table for random-effects: Single term deletions
## 
## Model:
## mathach ~ (1 | schoolid)
##                npar logLik   AIC    LRT Df Pr(>Chisq)    
## <none>            3 -23558 47122                         
## (1 | schoolid)    2 -24050 48104 983.92  1  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

p <.05 – suggests deleting the L2 RI is a significant worse model fit. Retain L2 RI.

#Run Conditional RI Model

model.1 <- lmer(mathach ~ female.factor + ses + size + sector.factor + (1|schoolid), REML=FALSE, data = hsbmerged.2)
summary(model.1)
## Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
##   method [lmerModLmerTest]
## Formula: mathach ~ female.factor + ses + size + sector.factor + (1 | schoolid)
##    Data: hsbmerged.2
## 
##      AIC      BIC   logLik deviance df.resid 
##  46564.2  46612.3 -23275.1  46550.2     7178 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.2032 -0.7404  0.0323  0.7584  2.8130 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  schoolid (Intercept)  3.27    1.808   
##  Residual             36.81    6.067   
## Number of obs: 7185, groups:  schoolid, 160
## 
## Fixed effects:
##                  Estimate Std. Error         df t value Pr(>|t|)    
## (Intercept)     1.168e+01  4.543e-01  1.613e+02  25.707  < 2e-16 ***
## female.factor1 -1.201e+00  1.641e-01  6.464e+03  -7.318 2.81e-13 ***
## ses             2.345e+00  1.050e-01  6.643e+03  22.325  < 2e-16 ***
## size            4.972e-04  2.894e-04  1.506e+02   1.718   0.0878 .  
## sector.factor1  2.378e+00  3.633e-01  1.465e+02   6.544 9.40e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) fml.f1 ses    size  
## femal.fctr1 -0.202                     
## ses          0.028  0.057              
## size        -0.858  0.016 -0.008       
## sectr.fctr1 -0.671  0.006 -0.089  0.447
library(modelsummary)
## 
## Attaching package: 'modelsummary'
## The following object is masked from 'package:Hmisc':
## 
##     Mean
library(broom.mixed)

models <- list(model.null, model.1)
modelsummary(models, output = "markdown")
Model 1 Model 2
(Intercept) 12.637 11.679
(0.244) (0.454)
female.factor1 -1.201
(0.164)
ses 2.345
(0.105)
size 0.000
(0.000)
sector.factor1 2.378
(0.363)
SD (Intercept schoolid) 2.925 1.808
SD (Observations) 6.257 6.067
Num.Obs. 7185 7185
R2 Marg. 0.000 0.127
R2 Cond. 0.179 0.198
AIC 47122.8 46585.3
BIC 47143.4 46633.5
ICC 0.2 0.1
RMSE 6.19 6.01

#Model Comparison via LRT

anova(model.null, model.1)
## Data: hsbmerged.2
## Models:
## model.null: mathach ~ (1 | schoolid)
## model.1: mathach ~ female.factor + ses + size + sector.factor + (1 | schoolid)
##            npar   AIC   BIC logLik deviance  Chisq Df Pr(>Chisq)    
## model.null    3 47122 47142 -23558    47116                         
## model.1       7 46564 46612 -23275    46550 565.65  4  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

p < .05 - suggests that the inclusion of predictors was a significant improvement in model fit.

#Calculate WG-PRV #Also see spreadsheet

((39.148-38.845)/39.148)*100
## [1] 0.7739859

The inclusion of the predictors reduced the within-group variance by 0.77%.

#Calculate BG-PRV

((8.553-8.109)/8.553)*100
## [1] 5.191161

The inclusion of the predictors reduced the between-group variance by 5.19%. Model is better at reducing between-group variance than within-group variance.

#Calculate Multi-level R^2

((1-(38.845+8.109)/(39.148+8.553)))
## [1] 0.01566005

#Turning off scientific notation

options(scipen=999)

#Add random slope for ses

model.2.ses.slp <- lmer(mathach ~ female.factor + ses + size + sector.factor + 
                          (ses|schoolid), REML = FALSE, data = hsbmerged.2)
summary(model.2.ses.slp)
## Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
##   method [lmerModLmerTest]
## Formula: mathach ~ female.factor + ses + size + sector.factor + (ses |  
##     schoolid)
##    Data: hsbmerged.2
## 
##      AIC      BIC   logLik deviance df.resid 
##  46560.0  46621.9 -23271.0  46542.0     7176 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.2053 -0.7384  0.0281  0.7570  2.8322 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr
##  schoolid (Intercept)  3.5435  1.8824       
##           ses          0.3548  0.5956   0.60
##  Residual             36.6025  6.0500       
## Number of obs: 7185, groups:  schoolid, 160
## 
## Fixed effects:
##                    Estimate   Std. Error           df t value
## (Intercept)      11.4758217    0.4505476  157.9527209  25.471
## female.factor1   -1.1940658    0.1641275 6560.5508919  -7.275
## ses               2.3527077    0.1151975  160.8800989  20.423
## size              0.0004656    0.0002850  141.5985243   1.634
## sector.factor1    2.7891409    0.3651071  149.5881056   7.639
##                            Pr(>|t|)    
## (Intercept)    < 0.0000000000000002 ***
## female.factor1    0.000000000000386 ***
## ses            < 0.0000000000000002 ***
## size                          0.104    
## sector.factor1    0.000000000002416 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) fml.f1 ses    size  
## femal.fctr1 -0.207                     
## ses          0.109  0.055              
## size        -0.850  0.019 -0.006       
## sectr.fctr1 -0.660  0.009 -0.078  0.436

#LRT

lmerTest::rand(model.2.ses.slp)
## ANOVA-like table for random-effects: Single term deletions
## 
## Model:
## mathach ~ female.factor + ses + size + sector.factor + (ses | schoolid)
##                         npar logLik   AIC    LRT Df Pr(>Chisq)  
## <none>                     9 -23271 46560                       
## ses in (ses | schoolid)    7 -23275 46564 8.1673  2    0.01685 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
models2 <- list(model.null, model.1,model.2.ses.slp)
modelsummary(models2, output = "markdown")
Model 1 Model 2 Model 3
(Intercept) 12.637 11.679 11.476
(0.244) (0.454) (0.451)
female.factor1 -1.201 -1.194
(0.164) (0.164)
ses 2.345 2.353
(0.105) (0.115)
size 0.000 0.000
(0.000) (0.000)
sector.factor1 2.378 2.789
(0.363) (0.365)
SD (Intercept schoolid) 2.925 1.808 1.882
SD (ses schoolid) 0.596
Cor (Intercept~ses schoolid) 0.604
SD (Observations) 6.257 6.067 6.050
Num.Obs. 7185 7185 7185
R2 Marg. 0.000 0.127 0.138
R2 Cond. 0.179 0.198 0.218
AIC 47122.8 46585.3 46580.9
BIC 47143.4 46633.5 46642.9
ICC 0.2 0.1 0.1
RMSE 6.19 6.01 5.99