This week, we continue with the data from the US Sustaining Effects Study (Carter 1984), which attempted to assess the sustaining impacts of compensatory schooling for students over time. We will dig into nonlinear growth patterns and heterogeneous variance models, two advanced topics in growth modeling.
suppressPackageStartupMessages(library(tidyverse))
library(lme4)
Loading required package: Matrix
Attaching package: ‘Matrix’
The following objects are masked from ‘package:tidyr’:
expand, pack, unpack
egmerged <- haven::read_dta("egmerged.dta")
glimpse(egmerged)
Rows: 7,230
Columns: 12
$ schoolid <dbl> 3430, 3610, 2180, 3040, 3221, 3360, 4450, 4390, 3610, 361…
$ childid <dbl> 289376791, 288278731, 292789461, 275898121, 300246721, 24…
$ year <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
$ grade <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ math <dbl> -3.887, -4.055, -3.525, -3.592, -3.525, -3.068, -2.596, -…
$ retained <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ female <dbl> 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, …
$ black <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, …
$ hispanic <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, …
$ size <dbl> 641, 1062, 777, 724, 185, 526, 710, 532, 1062, 1062, 1062…
$ lowinc <dbl> 84.8, 97.8, 96.6, 45.3, 94.6, 100.0, 66.2, 66.4, 97.8, 97…
$ mobility <dbl> 38.2, 26.8, 44.4, 18.7, 47.4, 55.4, 25.4, 20.9, 26.8, 26.…
pivot_wider to reshape from long to wide….egmerged.wide <- egmerged %>%
pivot_wider(.,
id_cols = c("childid", "schoolid", "female", "black", "hispanic", "size", "lowinc", "mobility"),
names_from = year,
values_from = math,
names_prefix = "math_year",
)
glimpse(egmerged.wide)
Rows: 1,721
Columns: 14
$ childid <dbl> 289376791, 288278731, 292789461, 275898121, 300246721, …
$ schoolid <dbl> 3430, 3610, 2180, 3040, 3221, 3360, 4450, 4390, 3610, 3…
$ female <dbl> 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1…
$ black <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1…
$ hispanic <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0…
$ size <dbl> 641, 1062, 777, 724, 185, 526, 710, 532, 1062, 1062, 10…
$ lowinc <dbl> 84.8, 97.8, 96.6, 45.3, 94.6, 100.0, 66.2, 66.4, 97.8, …
$ mobility <dbl> 38.2, 26.8, 44.4, 18.7, 47.4, 55.4, 25.4, 20.9, 26.8, 2…
$ math_year1 <dbl> -3.887, -4.055, -3.525, -3.592, -3.525, -3.068, -2.596,…
$ math_year2 <dbl> -2.839, -3.395, -0.365, -1.130, -2.097, -1.538, -1.218,…
$ math_year3 <dbl> -2.596, -3.395, -0.574, -2.009, -1.314, -1.757, -1.120,…
$ math_year4 <dbl> -1.236, -1.641, NA, NA, NA, -2.270, -0.419, NA, -0.522,…
$ math_year5 <dbl> -0.456, -2.263, NA, NA, NA, -1.484, -0.403, NA, -0.945,…
$ math_year6 <dbl> 0.601, -1.552, NA, NA, NA, NA, NA, NA, -0.170, -1.763, …
pivot_longer to reshape from wide to long….egmerged.long <- egmerged.wide %>%
pivot_longer(.,
cols = starts_with("math"),
names_to = "year",
values_to = "math",
names_prefix = "math_year",
)
glimpse(egmerged.long)
Rows: 10,326
Columns: 10
$ childid <dbl> 289376791, 289376791, 289376791, 289376791, 289376791, 28…
$ schoolid <dbl> 3430, 3430, 3430, 3430, 3430, 3430, 3610, 3610, 3610, 361…
$ female <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, …
$ black <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
$ hispanic <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ size <dbl> 641, 641, 641, 641, 641, 641, 1062, 1062, 1062, 1062, 106…
$ lowinc <dbl> 84.8, 84.8, 84.8, 84.8, 84.8, 84.8, 97.8, 97.8, 97.8, 97.…
$ mobility <dbl> 38.2, 38.2, 38.2, 38.2, 38.2, 38.2, 26.8, 26.8, 26.8, 26.…
$ year <chr> "1", "2", "3", "4", "5", "6", "1", "2", "3", "4", "5", "6…
$ math <dbl> -3.887, -2.839, -2.596, -1.236, -0.456, 0.601, -4.055, -3…
year to numeric, get ridding of observations with missing math scores…egmerged.long.clean <- egmerged.long %>%
mutate(.,
year = as.numeric(year)) %>%
na.omit()
glimpse(egmerged.long.clean)
Rows: 7,230
Columns: 10
$ childid <dbl> 289376791, 289376791, 289376791, 289376791, 289376791, 28…
$ schoolid <dbl> 3430, 3430, 3430, 3430, 3430, 3430, 3610, 3610, 3610, 361…
$ female <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, …
$ black <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
$ hispanic <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, …
$ size <dbl> 641, 641, 641, 641, 641, 641, 1062, 1062, 1062, 1062, 106…
$ lowinc <dbl> 84.8, 84.8, 84.8, 84.8, 84.8, 84.8, 97.8, 97.8, 97.8, 97.…
$ mobility <dbl> 38.2, 38.2, 38.2, 38.2, 38.2, 38.2, 26.8, 26.8, 26.8, 26.…
$ year <dbl> 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 1, 2, 3, 1, …
$ math <dbl> -3.887, -2.839, -2.596, -1.236, -0.456, 0.601, -4.055, -3…
egmerged.clean <- egmerged %>%
mutate(.,
retained.fac = as_factor(retained),
female.fac = as_factor(female),
black.fac = as_factor(black),
hispanic.fac = as_factor(hispanic),
year0 = year - 1,
year_sq = year0^2,
year_cube = year0^3)
glimpse(egmerged.clean)
Rows: 7,230
Columns: 19
$ schoolid <dbl> 3430, 3610, 2180, 3040, 3221, 3360, 4450, 4390, 3610,…
$ childid <dbl> 289376791, 288278731, 292789461, 275898121, 300246721…
$ year <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
$ grade <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ math <dbl> -3.887, -4.055, -3.525, -3.592, -3.525, -3.068, -2.59…
$ retained <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ female <dbl> 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0,…
$ black <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,…
$ hispanic <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,…
$ size <dbl> 641, 1062, 777, 724, 185, 526, 710, 532, 1062, 1062, …
$ lowinc <dbl> 84.8, 97.8, 96.6, 45.3, 94.6, 100.0, 66.2, 66.4, 97.8…
$ mobility <dbl> 38.2, 26.8, 44.4, 18.7, 47.4, 55.4, 25.4, 20.9, 26.8,…
$ retained.fac <fct> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ female.fac <fct> 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0,…
$ black.fac <fct> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1,…
$ hispanic.fac <fct> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0,…
$ year0 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ year_sq <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ year_cube <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
model.null.growth <- lmer(math ~ year0 + (1|childid), data = egmerged.clean)
summary(model.null.growth)
Linear mixed model fit by REML ['lmerMod']
Formula: math ~ year0 + (1 | childid)
Data: egmerged.clean
REML criterion at convergence: 17045.1
Scaled residuals:
Min 1Q Median 3Q Max
-3.7528 -0.5789 -0.0243 0.5598 6.0968
Random effects:
Groups Name Variance Std.Dev.
childid (Intercept) 0.8683 0.9318
Residual 0.3470 0.5891
Number of obs: 7230, groups: childid, 1721
Fixed effects:
Estimate Std. Error t value
(Intercept) -2.707304 0.028169 -96.11
year0 0.747452 0.005401 138.40
Correlation of Fixed Effects:
(Intr)
year0 -0.548
null.growth.icc <- 0.8683/(0.8683 + 0.3470)
null.growth.icc
[1] 0.7144738
model.square <- lmer(math ~ year0 + year_sq + (1|childid), data = egmerged.clean)
summary(model.square)
Linear mixed model fit by REML ['lmerMod']
Formula: math ~ year0 + year_sq + (1 | childid)
Data: egmerged.clean
REML criterion at convergence: 16951.1
Scaled residuals:
Min 1Q Median 3Q Max
-3.6833 -0.5749 -0.0259 0.5607 6.2588
Random effects:
Groups Name Variance Std.Dev.
childid (Intercept) 0.8669 0.9311
Residual 0.3409 0.5839
Number of obs: 7230, groups: childid, 1721
Fixed effects:
Estimate Std. Error t value
(Intercept) -2.96054 0.03746 -79.03
year0 0.97320 0.02276 42.77
year_sq -0.03890 0.00381 -10.21
Correlation of Fixed Effects:
(Intr) year0
year0 -0.740
year_sq 0.662 -0.972
model.cube <- lmer(math ~ year0 + year_cube + (1|childid), data = egmerged.clean)
summary(model.cube)
Linear mixed model fit by REML ['lmerMod']
Formula: math ~ year0 + year_cube + (1 | childid)
Data: egmerged.clean
REML criterion at convergence: 16980.2
Scaled residuals:
Min 1Q Median 3Q Max
-3.6878 -0.5733 -0.0233 0.5630 6.2327
Random effects:
Groups Name Variance Std.Dev.
childid (Intercept) 0.8677 0.9315
Residual 0.3423 0.5851
Number of obs: 7230, groups: childid, 1721
Fixed effects:
Estimate Std. Error t value
(Intercept) -2.8770188 0.0339696 -84.69
year0 0.8617959 0.0139383 61.83
year_cube -0.0039513 0.0004445 -8.89
Correlation of Fixed Effects:
(Intr) year0
year0 -0.693
year_cube 0.562 -0.923
model.both <- lmer(math ~ year0 + year_sq + year_cube + (1|childid), REML = FALSE, data = egmerged.clean)
summary(model.both)
Linear mixed model fit by maximum likelihood ['lmerMod']
Formula: math ~ year0 + year_sq + year_cube + (1 | childid)
Data: egmerged.clean
AIC BIC logLik deviance df.resid
16882.5 16923.8 -8435.3 16870.5 7224
Scaled residuals:
Min 1Q Median 3Q Max
-3.7115 -0.5724 -0.0298 0.5588 6.2789
Random effects:
Groups Name Variance Std.Dev.
childid (Intercept) 0.8630 0.9290
Residual 0.3378 0.5812
Number of obs: 7230, groups: childid, 1721
Fixed effects:
Estimate Std. Error t value
(Intercept) -3.223851 0.051032 -63.173
year0 1.408171 0.061794 22.788
year_sq -0.220628 0.024317 -9.073
year_cube 0.021417 0.002831 7.566
Correlation of Fixed Effects:
(Intr) year0 yer_sq
year0 -0.833
year_sq 0.749 -0.975
year_cube -0.682 0.930 -0.988
anova to test…anova(model.square, model.both)
refitting model(s) with ML (instead of REML)
Data: egmerged.clean
Models:
model.square: math ~ year0 + year_sq + (1 | childid)
model.both: math ~ year0 + year_sq + year_cube + (1 | childid)
npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
model.square 5 16938 16972 -8463.8 16928
model.both 6 16882 16924 -8435.3 16870 57.008 1 4.34e-14 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
model.both.rand <- lmer(math ~ year0 + year_sq + year_cube + (year0|childid), REML = FALSE, data = egmerged.clean)
summary(model.both.rand)
Linear mixed model fit by maximum likelihood ['lmerMod']
Formula: math ~ year0 + year_sq + year_cube + (year0 | childid)
Data: egmerged.clean
AIC BIC logLik deviance df.resid
16529.3 16584.4 -8256.6 16513.3 7222
Scaled residuals:
Min 1Q Median 3Q Max
-3.0747 -0.5525 -0.0342 0.5293 5.6148
Random effects:
Groups Name Variance Std.Dev. Corr
childid (Intercept) 0.61268 0.7827
year0 0.02458 0.1568 0.09
Residual 0.28497 0.5338
Number of obs: 7230, groups: childid, 1721
Fixed effects:
Estimate Std. Error t value
(Intercept) -3.322138 0.048116 -69.044
year0 1.541861 0.059322 25.991
year_sq -0.269158 0.023175 -11.614
year_cube 0.026471 0.002685 9.857
Correlation of Fixed Effects:
(Intr) year0 yer_sq
year0 -0.852
year_sq 0.773 -0.974
year_cube -0.706 0.930 -0.988
rand to test…lmerTest::rand(model.both.rand)
ANOVA-like table for random-effects: Single term deletions
Model:
math ~ year0 + year_sq + year_cube + (year0 | childid)
npar logLik AIC LRT Df Pr(>Chisq)
<none> 8 -8256.6 16529
year0 in (year0 | childid) 6 -8435.3 16882 357.22 2 < 2.2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
modelsummary and broom.mixed Packages to Organize Your Results:| Model 1 | Model 2 | Model 3 | Model 4 | Model 5 | |
|---|---|---|---|---|---|
| (Intercept) | -2.707 | -2.961 | -2.877 | -3.224 | -3.322 |
| (0.028) | (0.037) | (0.034) | (0.051) | (0.048) | |
| year0 | 0.747 | 0.973 | 0.862 | 1.408 | 1.542 |
| (0.005) | (0.023) | (0.014) | (0.062) | (0.059) | |
| sd__(Intercept) | 0.932 | 0.931 | 0.931 | 0.929 | 0.783 |
| sd__Observation | 0.589 | 0.584 | 0.585 | 0.581 | 0.534 |
| year_sq | -0.039 | -0.221 | -0.269 | ||
| (0.004) | (0.024) | (0.023) | |||
| year_cube | -0.004 | 0.021 | 0.026 | ||
| (0.000) | (0.003) | (0.003) | |||
| cor__(Intercept).year0 | 0.090 | ||||
| sd__year0 | 0.157 | ||||
| AIC | 17053.1 | 16961.1 | 16990.2 | 16882.5 | 16529.3 |
| BIC | 17080.7 | 16995.5 | 17024.6 | 16923.8 | 16584.4 |
| Log.Lik. | -8522.569 | -8475.554 | -8490.104 | -8435.256 | -8256.647 |
| REMLcrit | 17045.139 | 16951.108 | 16980.208 |
predicted.vals <- broom.mixed::augment(model.both.rand, effects = c("ran_pars", "fixed"), conf.int = TRUE)
glimpse(predicted.vals)
Rows: 7,230
Columns: 16
$ math <dbl> -3.887, -4.055, -3.525, -3.592, -3.525, -3.068, -2.596, …
$ year0 <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ year_sq <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ year_cube <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ childid <dbl> 289376791, 288278731, 292789461, 275898121, 300246721, 2…
$ .fitted <dbl> -4.058049, -4.418839, -2.812483, -3.365132, -3.456950, -…
$ .resid <dbl> 0.17104854, 0.36383866, -0.71251725, -0.22686787, -0.068…
$ .hat <dbl> 0.3167563, 0.3167563, 0.3360663, 0.3360663, 0.3360663, 0…
$ .cooksd <dbl> 0.017416159, 0.078800926, 0.339551761, 0.034423991, 0.00…
$ .fixed <dbl> -3.322138, -3.322138, -3.322138, -3.322138, -3.322138, -…
$ .mu <dbl> -4.058049, -4.418839, -2.812483, -3.365132, -3.456950, -…
$ .offset <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,…
$ .sqrtXwt <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
$ .sqrtrwt <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
$ .weights <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
$ .wtres <dbl> 0.17104854, 0.36383866, -0.71251725, -0.22686787, -0.068…
ggplot(data = predicted.vals, mapping = aes(x = year0, y = .fitted)) +
geom_jitter(alpha = .40, color = "seagreen1") +
geom_smooth(method = "loess", formula = 'y ~ x', color = "seagreen") +
labs(title = "Predicted Change in Math Scores Over Time",
subtitle = "With Square and Cubic Effects for Nonlinear Growth",
caption = "Notes. N = 1,731 students in the Sustaining Effects Study (Carter, 1984). Year 0 = Kindergarten.") +
ylab("Predicted Math Score") +
xlab("Year") +
theme(legend.position = "none")
predicted.vals50 <- predicted.vals %>%
arrange(childid, year0) %>%
slice(., 1:252)
ggplot(data = predicted.vals50, mapping = aes(x = year0, y = .fitted, color = as_factor(childid))) +
geom_point(alpha = .50) +
geom_line() +
labs(title = "Predicted Change in Math Scores Over Time",
subtitle = "With Square and Cubic Effects for Nonlinear Growth",
caption = "Notes. Subset of N = 50 students in the Sustaining Effects Study (Carter, 1984). Year 0 = Kindergarten.") +
ylab("Predicted Math Score") +
xlab("Year") +
theme(legend.position = "none")
library(nlme)
Attaching package: ‘nlme’
The following object is masked from ‘package:lme4’:
lmList
The following object is masked from ‘package:dplyr’:
collapse
model.null.growth.homog <- lme(math ~ year0,
random = ~ year0|childid,
data = egmerged.clean,
method = "ML")
summary(model.null.growth.homog)
Linear mixed-effects model fit by maximum likelihood
Data: egmerged.clean
Random effects:
Formula: ~year0 | childid
Structure: General positive-definite, Log-Cholesky parametrization
StdDev Corr
(Intercept) 0.7922145 (Intr)
year0 0.1461291 0.113
Residual 0.5488263
Fixed effects: math ~ year0
Correlation:
(Intr)
year0 -0.442
Standardized Within-Group Residuals:
Min Q1 Med Q3 Max
-3.07338195 -0.56120525 -0.03801274 0.52482971 5.46800933
Number of Observations: 7230
Number of Groups: 1721
model.null.growth.het <- lme(math ~ year0,
random = ~ year0|childid,
data = egmerged.clean,
method = "ML",
weights = varIdent(form = ~ 1 | year0)
)
summary(model.null.growth.het)
Linear mixed-effects model fit by maximum likelihood
Data: egmerged.clean
Random effects:
Formula: ~year0 | childid
Structure: General positive-definite, Log-Cholesky parametrization
StdDev Corr
(Intercept) 0.7909930 (Intr)
year0 0.1526271 0.069
Residual 0.5643307
Variance function:
Structure: Different standard deviations per stratum
Formula: ~1 | year0
Parameter estimates:
2 3 4 5 1 0
1.0000000 1.0900648 0.8026221 0.6839841 1.1024842 1.0356196
Fixed effects: math ~ year0
Correlation:
(Intr)
year0 -0.455
Standardized Within-Group Residuals:
Min Q1 Med Q3 Max
-3.62560022 -0.55340209 -0.03040342 0.53323108 5.05558475
Number of Observations: 7230
Number of Groups: 1721