#Load packages
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.8 ✔ dplyr 1.0.9
## ✔ tidyr 1.2.1 ✔ stringr 1.4.1
## ✔ readr 2.1.3 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(Hmisc) # label()
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
##
## Attaching package: 'Hmisc'
##
## The following objects are masked from 'package:dplyr':
##
## src, summarize
##
## The following objects are masked from 'package:base':
##
## format.pval, units
library(lme4)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
##
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
library(lmerTest)
##
## Attaching package: 'lmerTest'
##
## The following object is masked from 'package:lme4':
##
## lmer
##
## The following object is masked from 'package:stats':
##
## step
library(haven)
#Load data
hsbmerged <- read_dta("hsbmerged (2).dta")
glimpse(hsbmerged)
## Rows: 7,185
## Columns: 12
## $ schoolid <chr> "1224", "1224", "1224", "1224", "1224", "1224", "1224", "1224…
## $ minority <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0…
## $ female <dbl> 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1…
## $ ses <dbl> -1.528, -0.588, -0.528, -0.668, -0.158, 0.022, -0.618, -0.998…
## $ mathach <dbl> 5.876, 19.708, 20.349, 8.781, 17.898, 4.583, -2.832, 0.523, 1…
## $ size <dbl> 842, 842, 842, 842, 842, 842, 842, 842, 842, 842, 842, 842, 8…
## $ sector <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ pracad <dbl> 0.35, 0.35, 0.35, 0.35, 0.35, 0.35, 0.35, 0.35, 0.35, 0.35, 0…
## $ disclim <dbl> 1.597, 1.597, 1.597, 1.597, 1.597, 1.597, 1.597, 1.597, 1.597…
## $ himinty <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ meanses <dbl> -0.428, -0.428, -0.428, -0.428, -0.428, -0.428, -0.428, -0.42…
## $ readach <dbl> 51.96463, 39.28524, 43.81590, 52.01301, 38.99202, 39.04975, 5…
psych::describe(hsbmerged,
fast = TRUE)
## Warning in FUN(newX[, i], ...): no non-missing arguments to min; returning Inf
## Warning in FUN(newX[, i], ...): no non-missing arguments to max; returning -Inf
## vars n mean sd min max range se
## schoolid 1 7185 NaN NA Inf -Inf -Inf NA
## minority 2 7185 0.27 0.45 0.00 1.00 1.00 0.01
## female 3 7185 0.53 0.50 0.00 1.00 1.00 0.01
## ses 4 7185 0.00 0.78 -3.76 2.69 6.45 0.01
## mathach 5 7185 12.75 6.88 -2.83 24.99 27.82 0.08
## size 6 7185 1056.86 604.17 100.00 2713.00 2613.00 7.13
## sector 7 7185 0.49 0.50 0.00 1.00 1.00 0.01
## pracad 8 7185 0.53 0.25 0.00 1.00 1.00 0.00
## disclim 9 7185 -0.13 0.94 -2.42 2.76 5.17 0.01
## himinty 10 7185 0.28 0.45 0.00 1.00 1.00 0.01
## meanses 11 7185 0.01 0.41 -1.19 0.83 2.02 0.00
## readach 12 7185 55.08 9.18 17.68 85.06 67.37 0.11
#designating categorical variables
hsbmerged.1 <- hsbmerged %>%
mutate(., female.factor = as_factor(female))
hsbmerged.2 <- hsbmerged.1 %>%
mutate(., sector.factor = as_factor(sector))
glimpse(hsbmerged.2)
## Rows: 7,185
## Columns: 14
## $ schoolid <chr> "1224", "1224", "1224", "1224", "1224", "1224", "1224", …
## $ minority <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,…
## $ female <dbl> 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1,…
## $ ses <dbl> -1.528, -0.588, -0.528, -0.668, -0.158, 0.022, -0.618, -…
## $ mathach <dbl> 5.876, 19.708, 20.349, 8.781, 17.898, 4.583, -2.832, 0.5…
## $ size <dbl> 842, 842, 842, 842, 842, 842, 842, 842, 842, 842, 842, 8…
## $ sector <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ pracad <dbl> 0.35, 0.35, 0.35, 0.35, 0.35, 0.35, 0.35, 0.35, 0.35, 0.…
## $ disclim <dbl> 1.597, 1.597, 1.597, 1.597, 1.597, 1.597, 1.597, 1.597, …
## $ himinty <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
## $ meanses <dbl> -0.428, -0.428, -0.428, -0.428, -0.428, -0.428, -0.428, …
## $ readach <dbl> 51.96463, 39.28524, 43.81590, 52.01301, 38.99202, 39.049…
## $ female.factor <fct> 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1,…
## $ sector.factor <fct> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
#Null Model
model.null <- lmer(mathach ~ (1|schoolid), REML = FALSE, data = hsbmerged.2)
summary(model.null)
## Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
## method [lmerModLmerTest]
## Formula: mathach ~ (1 | schoolid)
## Data: hsbmerged.2
##
## AIC BIC logLik deviance df.resid
## 47121.8 47142.4 -23557.9 47115.8 7182
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.06262 -0.75365 0.02676 0.76070 2.74184
##
## Random effects:
## Groups Name Variance Std.Dev.
## schoolid (Intercept) 8.553 2.925
## Residual 39.148 6.257
## Number of obs: 7185, groups: schoolid, 160
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 12.6371 0.2436 157.6209 51.87 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Calculate ICC
null.ICC <- 8.553/(8.553 + 39.148)
null.ICC
## [1] 0.1793044
ICC = 0.18 > 0.05 – suggests MLM can be used
lmerTest::rand(model.null)
## ANOVA-like table for random-effects: Single term deletions
##
## Model:
## mathach ~ (1 | schoolid)
## npar logLik AIC LRT Df Pr(>Chisq)
## <none> 3 -23558 47122
## (1 | schoolid) 2 -24050 48104 983.92 1 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
p <.05 – suggests deleting the L2 RI is a significant worse model fit. Retain L2 RI.
#Run Conditional RI Model
model.1 <- lmer(mathach ~ female.factor + ses + size + sector.factor + (1|schoolid), REML=FALSE, data = hsbmerged.2)
summary(model.1)
## Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
## method [lmerModLmerTest]
## Formula: mathach ~ female.factor + ses + size + sector.factor + (1 | schoolid)
## Data: hsbmerged.2
##
## AIC BIC logLik deviance df.resid
## 46564.2 46612.3 -23275.1 46550.2 7178
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.2032 -0.7404 0.0323 0.7584 2.8130
##
## Random effects:
## Groups Name Variance Std.Dev.
## schoolid (Intercept) 3.27 1.808
## Residual 36.81 6.067
## Number of obs: 7185, groups: schoolid, 160
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 1.168e+01 4.543e-01 1.613e+02 25.707 < 2e-16 ***
## female.factor1 -1.201e+00 1.641e-01 6.464e+03 -7.318 2.81e-13 ***
## ses 2.345e+00 1.050e-01 6.643e+03 22.325 < 2e-16 ***
## size 4.972e-04 2.894e-04 1.506e+02 1.718 0.0878 .
## sector.factor1 2.378e+00 3.633e-01 1.465e+02 6.544 9.40e-10 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) fml.f1 ses size
## femal.fctr1 -0.202
## ses 0.028 0.057
## size -0.858 0.016 -0.008
## sectr.fctr1 -0.671 0.006 -0.089 0.447
library(modelsummary)
##
## Attaching package: 'modelsummary'
## The following object is masked from 'package:Hmisc':
##
## Mean
library(broom.mixed)
models <- list(model.null, model.1)
modelsummary(models, output = "markdown")
| Model 1 | Model 2 | |
|---|---|---|
| (Intercept) | 12.637 | 11.679 |
| (0.244) | (0.454) | |
| female.factor1 | -1.201 | |
| (0.164) | ||
| ses | 2.345 | |
| (0.105) | ||
| size | 0.000 | |
| (0.000) | ||
| sector.factor1 | 2.378 | |
| (0.363) | ||
| SD (Intercept schoolid) | 2.925 | 1.808 |
| SD (Observations) | 6.257 | 6.067 |
| Num.Obs. | 7185 | 7185 |
| R2 Marg. | 0.000 | 0.127 |
| R2 Cond. | 0.179 | 0.198 |
| AIC | 47122.8 | 46585.3 |
| BIC | 47143.4 | 46633.5 |
| ICC | 0.2 | 0.1 |
| RMSE | 6.19 | 6.01 |
#Model Comparison via LRT
anova(model.null, model.1)
## Data: hsbmerged.2
## Models:
## model.null: mathach ~ (1 | schoolid)
## model.1: mathach ~ female.factor + ses + size + sector.factor + (1 | schoolid)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## model.null 3 47122 47142 -23558 47116
## model.1 7 46564 46612 -23275 46550 565.65 4 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
p < .05 - suggests that the inclusion of predictors was a significant improvement in model fit.
#Calculate WG-PRV #Also see spreadsheet
((39.148-38.845)/39.148)*100
## [1] 0.7739859
The inclusion of the predictors reduced the within-group variance by 0.77%.
#Calculate BG-PRV
((8.553-8.109)/8.553)*100
## [1] 5.191161
The inclusion of the predictors reduced the between-group variance by 5.19%. Model is better at reducing between-group variance than within-group variance.
#Calculate Multi-level R^2
((1-(38.845+8.109)/(39.148+8.553)))
## [1] 0.01566005
#Turning off scientific notation
options(scipen=999)
#Add random slope for ses
model.2.ses.slp <- lmer(mathach ~ female.factor + ses + size + sector.factor +
(ses|schoolid), REML = FALSE, data = hsbmerged.2)
summary(model.2.ses.slp)
## Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
## method [lmerModLmerTest]
## Formula: mathach ~ female.factor + ses + size + sector.factor + (ses |
## schoolid)
## Data: hsbmerged.2
##
## AIC BIC logLik deviance df.resid
## 46560.0 46621.9 -23271.0 46542.0 7176
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.2053 -0.7384 0.0281 0.7570 2.8322
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## schoolid (Intercept) 3.5435 1.8824
## ses 0.3548 0.5956 0.60
## Residual 36.6025 6.0500
## Number of obs: 7185, groups: schoolid, 160
##
## Fixed effects:
## Estimate Std. Error df t value
## (Intercept) 11.4758217 0.4505476 157.9527209 25.471
## female.factor1 -1.1940658 0.1641275 6560.5508919 -7.275
## ses 2.3527077 0.1151975 160.8800989 20.423
## size 0.0004656 0.0002850 141.5985243 1.634
## sector.factor1 2.7891409 0.3651071 149.5881056 7.639
## Pr(>|t|)
## (Intercept) < 0.0000000000000002 ***
## female.factor1 0.000000000000386 ***
## ses < 0.0000000000000002 ***
## size 0.104
## sector.factor1 0.000000000002416 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) fml.f1 ses size
## femal.fctr1 -0.207
## ses 0.109 0.055
## size -0.850 0.019 -0.006
## sectr.fctr1 -0.660 0.009 -0.078 0.436
#LRT
lmerTest::rand(model.2.ses.slp)
## ANOVA-like table for random-effects: Single term deletions
##
## Model:
## mathach ~ female.factor + ses + size + sector.factor + (ses | schoolid)
## npar logLik AIC LRT Df Pr(>Chisq)
## <none> 9 -23271 46560
## ses in (ses | schoolid) 7 -23275 46564 8.1673 2 0.01685 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
models2 <- list(model.null, model.1,model.2.ses.slp)
modelsummary(models2, output = "markdown")
| Model 1 | Model 2 | Model 3 | |
|---|---|---|---|
| (Intercept) | 12.637 | 11.679 | 11.476 |
| (0.244) | (0.454) | (0.451) | |
| female.factor1 | -1.201 | -1.194 | |
| (0.164) | (0.164) | ||
| ses | 2.345 | 2.353 | |
| (0.105) | (0.115) | ||
| size | 0.000 | 0.000 | |
| (0.000) | (0.000) | ||
| sector.factor1 | 2.378 | 2.789 | |
| (0.363) | (0.365) | ||
| SD (Intercept schoolid) | 2.925 | 1.808 | 1.882 |
| SD (ses schoolid) | 0.596 | ||
| Cor (Intercept~ses schoolid) | 0.604 | ||
| SD (Observations) | 6.257 | 6.067 | 6.050 |
| Num.Obs. | 7185 | 7185 | 7185 |
| R2 Marg. | 0.000 | 0.127 | 0.138 |
| R2 Cond. | 0.179 | 0.198 | 0.218 |
| AIC | 47122.8 | 46585.3 | 46580.9 |
| BIC | 47143.4 | 46633.5 | 46642.9 |
| ICC | 0.2 | 0.1 | 0.1 |
| RMSE | 6.19 | 6.01 | 5.99 |