library(haven)
library(tidyverse)
library(Hmisc)
library(lme4)
library(lmerTest)
This week, we will continue using the hsbmerged dataset that we worked with in class. This is a sample of more than 7,000 students nested within 160 schools collected in 1982. Part 1: Reading Achievement
hsb <- read_dta("hsbmerged.dta")
glimpse(hsb)
Rows: 7,185
Columns: 12
$ schoolid <chr> "1224", "1224", "1224", "1224", "1224", "1224", "1224", "1224", "1224", "1224", "1224", "1...
$ minority <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, ...
$ female <dbl> 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, ...
$ ses <dbl> -1.528, -0.588, -0.528, -0.668, -0.158, 0.022, -0.618, -0.998, -0.888, -0.458, -1.448, -0....
$ mathach <dbl> 5.876, 19.708, 20.349, 8.781, 17.898, 4.583, -2.832, 0.523, 1.527, 21.521, 9.475, 16.057, ...
$ size <dbl> 842, 842, 842, 842, 842, 842, 842, 842, 842, 842, 842, 842, 842, 842, 842, 842, 842, 842, ...
$ sector <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
$ pracad <dbl> 0.35, 0.35, 0.35, 0.35, 0.35, 0.35, 0.35, 0.35, 0.35, 0.35, 0.35, 0.35, 0.35, 0.35, 0.35, ...
$ disclim <dbl> 1.597, 1.597, 1.597, 1.597, 1.597, 1.597, 1.597, 1.597, 1.597, 1.597, 1.597, 1.597, 1.597,...
$ himinty <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ...
$ meanses <dbl> -0.428, -0.428, -0.428, -0.428, -0.428, -0.428, -0.428, -0.428, -0.428, -0.428, -0.428, -0...
$ readach <dbl> 51.96463, 39.28524, 43.81590, 52.01301, 38.99202, 39.04975, 54.93985, 58.78317, 53.67974, ...
summary(hsb)
schoolid minority female ses mathach size
Length:7185 Min. :0.0000 Min. :0.0000 Min. :-3.758000 Min. :-2.832 Min. : 100
Class :character 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:-0.538000 1st Qu.: 7.275 1st Qu.: 565
Mode :character Median :0.0000 Median :1.0000 Median : 0.002000 Median :13.131 Median :1016
Mean :0.2747 Mean :0.5282 Mean : 0.000143 Mean :12.748 Mean :1057
3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.: 0.602000 3rd Qu.:18.317 3rd Qu.:1436
Max. :1.0000 Max. :1.0000 Max. : 2.692000 Max. :24.993 Max. :2713
sector pracad disclim himinty meanses readach
Min. :0.0000 Min. :0.0000 Min. :-2.4160 Min. :0.00 Min. :-1.188000 Min. :17.68
1st Qu.:0.0000 1st Qu.:0.3200 1st Qu.:-0.8170 1st Qu.:0.00 1st Qu.:-0.317000 1st Qu.:49.75
Median :0.0000 Median :0.5300 Median :-0.2310 Median :0.00 Median : 0.038000 Median :56.59
Mean :0.4931 Mean :0.5345 Mean :-0.1319 Mean :0.28 Mean : 0.006138 Mean :55.08
3rd Qu.:1.0000 3rd Qu.:0.7000 3rd Qu.: 0.4600 3rd Qu.:1.00 3rd Qu.: 0.333000 3rd Qu.:61.56
Max. :1.0000 Max. :1.0000 Max. : 2.7560 Max. :1.00 Max. : 0.831000 Max. :85.06
label(hsb$readach) = "Std. Reading Score"
label(hsb$schoolid) = "School Identification Number"
hsb.clean <- hsb %>%
mutate(.,
sector.factor = as.factor(sector),
female.factor = as.factor(female),
minority.factor = as.factor(minority))
levels(hsb.clean$sector.factor)=c("Public", "Parochial")
levels(hsb.clean$female.factor)=c("Male", "Female")
levels(hsb.clean$minority.factor)=c("Non-Minority", "Minority")
str(hsb.clean$sector.factor)
Factor w/ 2 levels "Public","Parochial": 1 1 1 1 1 1 1 1 1 1 ...
table(hsb.clean$sector.factor)
Public Parochial
3642 3543
glimpse(hsb.clean)
Rows: 7,185
Columns: 15
$ schoolid <labelled> "1224", "1224", "1224", "1224", "1224", "1224", "1224", "1224", "1224", "1224"...
$ minority <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,...
$ female <dbl> 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1,...
$ ses <dbl> -1.528, -0.588, -0.528, -0.668, -0.158, 0.022, -0.618, -0.998, -0.888, -0.458, -1.4...
$ mathach <dbl> 5.876, 19.708, 20.349, 8.781, 17.898, 4.583, -2.832, 0.523, 1.527, 21.521, 9.475, 1...
$ size <dbl> 842, 842, 842, 842, 842, 842, 842, 842, 842, 842, 842, 842, 842, 842, 842, 842, 842...
$ sector <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
$ pracad <dbl> 0.35, 0.35, 0.35, 0.35, 0.35, 0.35, 0.35, 0.35, 0.35, 0.35, 0.35, 0.35, 0.35, 0.35,...
$ disclim <dbl> 1.597, 1.597, 1.597, 1.597, 1.597, 1.597, 1.597, 1.597, 1.597, 1.597, 1.597, 1.597,...
$ himinty <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
$ meanses <dbl> -0.428, -0.428, -0.428, -0.428, -0.428, -0.428, -0.428, -0.428, -0.428, -0.428, -0....
$ readach <labelled> 51.96463, 39.28524, 43.81590, 52.01301, 38.99202, 39.04975, 54.93985, 58.78317...
$ sector.factor <fct> Public, Public, Public, Public, Public, Public, Public, Public, Public, Public, Pub...
$ female.factor <fct> Female, Female, Male, Male, Male, Male, Female, Male, Female, Male, Female, Female,...
$ minority.factor <fct> Non-Minority, Non-Minority, Non-Minority, Non-Minority, Non-Minority, Non-Minority,...
summary(hsb.clean)
schoolid minority female ses mathach size
Length:7185 Min. :0.0000 Min. :0.0000 Min. :-3.758000 Min. :-2.832 Min. : 100
Class1:labelled 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:-0.538000 1st Qu.: 7.275 1st Qu.: 565
Class2:character Median :0.0000 Median :1.0000 Median : 0.002000 Median :13.131 Median :1016
Mode :character Mean :0.2747 Mean :0.5282 Mean : 0.000143 Mean :12.748 Mean :1057
3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.: 0.602000 3rd Qu.:18.317 3rd Qu.:1436
Max. :1.0000 Max. :1.0000 Max. : 2.692000 Max. :24.993 Max. :2713
sector pracad disclim himinty meanses readach
Min. :0.0000 Min. :0.0000 Min. :-2.4160 Min. :0.00 Min. :-1.188000 Min. :17.68
1st Qu.:0.0000 1st Qu.:0.3200 1st Qu.:-0.8170 1st Qu.:0.00 1st Qu.:-0.317000 1st Qu.:49.75
Median :0.0000 Median :0.5300 Median :-0.2310 Median :0.00 Median : 0.038000 Median :56.59
Mean :0.4931 Mean :0.5345 Mean :-0.1319 Mean :0.28 Mean : 0.006138 Mean :55.08
3rd Qu.:1.0000 3rd Qu.:0.7000 3rd Qu.: 0.4600 3rd Qu.:1.00 3rd Qu.: 0.333000 3rd Qu.:61.56
Max. :1.0000 Max. :1.0000 Max. : 2.7560 Max. :1.00 Max. : 0.831000 Max. :85.06
sector.factor female.factor minority.factor
Public :3642 Male :3390 Non-Minority:5211
Parochial:3543 Female:3795 Minority :1974
describe(hsb.clean$readach)
hsb.clean$readach : Std. Reading Score Format:%9.0g
n missing distinct Info Mean Gmd .05 .10 .25 .50 .75 .90
7185 0 7180 1 55.08 10.19 37.76 42.13 49.75 56.59 61.56 65.53
.95
67.74
lowest : 17.68499 17.78988 18.98676 19.92461 20.25478, highest: 78.85266 78.97929 80.71032 80.83939 85.05837
hist(hsb.clean$readach,
main = "Distribution of Reading Achievement Scores",
sub = "N = 7,185. Students from the High School and Beyond Survey (1988).",
xlab = "Standardized Reading Achievement Score")
model.null <- lmer(readach ~ (1|schoolid), REML = FALSE, data = hsb.clean)
summary(model.null)
Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's method ['lmerModLmerTest']
Formula: readach ~ (1 | schoolid)
Data: hsb.clean
AIC BIC logLik deviance df.resid
50395.7 50416.4 -25194.9 50389.7 7182
Scaled residuals:
Min 1Q Median 3Q Max
-4.4250 -0.5740 0.0116 0.5943 4.3166
Random effects:
Groups Name Variance Std.Dev.
schoolid (Intercept) 22.94 4.789
Residual 61.06 7.814
Number of obs: 7185, groups: schoolid, 160
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) 54.6081 0.3906 160.4370 139.8 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
null.ICC <- 22.94 / (22.94+61.06)
null.ICC
[1] 0.2730952
rand(model.null)
ANOVA-like table for random-effects: Single term deletions
Model:
readach ~ (1 | schoolid)
npar logLik AIC LRT Df Pr(>Chisq)
<none> 3 -25195 50396
(1 | schoolid) 2 -26126 52255 1861.8 1 < 2.2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
model.regular <-lm(readach ~ NULL, data = hsb.clean)
summary(model.regular)
Call:
lm(formula = readach ~ NULL, data = hsb.clean)
Residuals:
Std. Reading Score
Min 1Q Median 3Q Max
-37.400 -5.335 1.506 6.471 29.974
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 55.0848 0.1083 508.5 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Residual standard error: 9.182 on 7184 degrees of freedom
Is it worth conducting MLM on these data? Yes, it is worth conducting MLM on these data.
In the null model, this number is measuring the adjusted mean when considering clustering. The results show us there is a clustering effect.
Part 2: SES
glimpse(hsb.clean)
Rows: 7,185
Columns: 15
$ schoolid <labelled> "1224", "1224", "1224", "1224", "1224", "1224", "1224", "1224", "1224", "1224"...
$ minority <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,...
$ female <dbl> 1, 1, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1,...
$ ses <dbl> -1.528, -0.588, -0.528, -0.668, -0.158, 0.022, -0.618, -0.998, -0.888, -0.458, -1.4...
$ mathach <dbl> 5.876, 19.708, 20.349, 8.781, 17.898, 4.583, -2.832, 0.523, 1.527, 21.521, 9.475, 1...
$ size <dbl> 842, 842, 842, 842, 842, 842, 842, 842, 842, 842, 842, 842, 842, 842, 842, 842, 842...
$ sector <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
$ pracad <dbl> 0.35, 0.35, 0.35, 0.35, 0.35, 0.35, 0.35, 0.35, 0.35, 0.35, 0.35, 0.35, 0.35, 0.35,...
$ disclim <dbl> 1.597, 1.597, 1.597, 1.597, 1.597, 1.597, 1.597, 1.597, 1.597, 1.597, 1.597, 1.597,...
$ himinty <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,...
$ meanses <dbl> -0.428, -0.428, -0.428, -0.428, -0.428, -0.428, -0.428, -0.428, -0.428, -0.428, -0....
$ readach <labelled> 51.96463, 39.28524, 43.81590, 52.01301, 38.99202, 39.04975, 54.93985, 58.78317...
$ sector.factor <fct> Public, Public, Public, Public, Public, Public, Public, Public, Public, Public, Pub...
$ female.factor <fct> Female, Female, Male, Male, Male, Male, Female, Male, Female, Male, Female, Female,...
$ minority.factor <fct> Non-Minority, Non-Minority, Non-Minority, Non-Minority, Non-Minority, Non-Minority,...
summary(hsb.clean)
schoolid minority female ses mathach size
Length:7185 Min. :0.0000 Min. :0.0000 Min. :-3.758000 Min. :-2.832 Min. : 100
Class1:labelled 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:-0.538000 1st Qu.: 7.275 1st Qu.: 565
Class2:character Median :0.0000 Median :1.0000 Median : 0.002000 Median :13.131 Median :1016
Mode :character Mean :0.2747 Mean :0.5282 Mean : 0.000143 Mean :12.748 Mean :1057
3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.: 0.602000 3rd Qu.:18.317 3rd Qu.:1436
Max. :1.0000 Max. :1.0000 Max. : 2.692000 Max. :24.993 Max. :2713
sector pracad disclim himinty meanses readach
Min. :0.0000 Min. :0.0000 Min. :-2.4160 Min. :0.00 Min. :-1.188000 Min. :17.68
1st Qu.:0.0000 1st Qu.:0.3200 1st Qu.:-0.8170 1st Qu.:0.00 1st Qu.:-0.317000 1st Qu.:49.75
Median :0.0000 Median :0.5300 Median :-0.2310 Median :0.00 Median : 0.038000 Median :56.59
Mean :0.4931 Mean :0.5345 Mean :-0.1319 Mean :0.28 Mean : 0.006138 Mean :55.08
3rd Qu.:1.0000 3rd Qu.:0.7000 3rd Qu.: 0.4600 3rd Qu.:1.00 3rd Qu.: 0.333000 3rd Qu.:61.56
Max. :1.0000 Max. :1.0000 Max. : 2.7560 Max. :1.00 Max. : 0.831000 Max. :85.06
sector.factor female.factor minority.factor
Public :3642 Male :3390 Non-Minority:5211
Parochial:3543 Female:3795 Minority :1974
describe(hsb.clean$ses)
hsb.clean$ses Format:%12.0g
n missing distinct Info Mean Gmd .05 .10 .25 .50 .75
7185 0 373 1 0.0001434 0.8877 -1.318 -1.038 -0.538 0.002 0.602
.90 .95
1.022 1.212
lowest : -3.758 -2.838 -2.508 -2.498 -2.468, highest: 1.652 1.732 1.762 1.832 2.692
hist(hsb.clean$ses,
main = "Distribution of SES",
sub = "N = 7,185. Students from the High School and Beyond Survey (1988).",
xlab = "SES")
model.null.ses <- lmer(ses ~ (1|schoolid), REML = FALSE, data = hsb.clean)
summary(model.null.ses)
Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's method ['lmerModLmerTest']
Formula: ses ~ (1 | schoolid)
Data: hsb.clean
AIC BIC logLik deviance df.resid
15047.0 15067.7 -7520.5 15041.0 7182
Scaled residuals:
Min 1Q Median 3Q Max
-5.4793 -0.6724 0.0179 0.7065 4.2632
Random effects:
Groups Name Variance Std.Dev.
schoolid (Intercept) 0.1597 0.3997
Residual 0.4462 0.6680
Number of obs: 7185, groups: schoolid, 160
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) -0.005674 0.032647 160.189296 -0.174 0.862
null.ICC.SES <- 0.1597 / (0.1597+0.4462)
null.ICC.SES
[1] 0.2635748
model.regular.ses <-lm(ses ~ NULL, data = hsb.clean)
summary(model.regular.ses)
Call:
lm(formula = ses ~ NULL, data = hsb.clean)
Residuals:
Min 1Q Median 3Q Max
-3.7581 -0.5381 0.0019 0.6019 2.6919
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 0.0001434 0.0091944 0.016 0.988
Residual standard error: 0.7794 on 7184 degrees of freedom
Given that this is not a typical outcome (like achievement), what does this result “mean” in your own words?
This is a tough one to answer. If you consider the ICC, it suggests that a MLM is needed. However, I think it is important how you use the NULL model. In this case, I believe that the model is trying to tell us that we “have our arrows wrong.”
This is interesting. The number is measuring the adjusted mean of SES, when considering the effect of clustering. However, the p-value is 0.988, which is showing very little significance.