#load in packages
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2 ✓ purrr 0.3.4
## ✓ tibble 3.0.3 ✓ dplyr 1.0.2
## ✓ tidyr 1.1.1 ✓ stringr 1.4.0
## ✓ readr 1.3.1 ✓ forcats 0.5.0
## ── Conflicts ────────────────────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(lmerTest)
## Loading required package: lme4
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
##
## Attaching package: 'lmerTest'
## The following object is masked from 'package:lme4':
##
## lmer
## The following object is masked from 'package:stats':
##
## step
library(lme4)
#load in dataset
star_long <-haven::read_dta("STAR_long.dta")
glimpse(star_long)
## Rows: 8,826
## Columns: 7
## $ stdntid <dbl> 10023, 10023, 10023, 10023, 10023, 10023, 10042, 10042, 10…
## $ grade <dbl> 3, 4, 5, 6, 7, 8, 3, 4, 5, 6, 7, 8, 3, 4, 5, 6, 7, 8, 3, 4…
## $ race <dbl+lbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ gender <dbl+lbl> 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, …
## $ ac_mot <dbl> 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 52, 52, 52…
## $ ever_lunch <dbl+lbl> 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, …
## $ science <dbl> 632, 756, 729, 762, 778, 783, 618, 710, 738, 740, 734, 771…
#tried to Hmisc and it isn’t working? So I will check to make sure dataset is named correctly & it is.
glimpse(star_long)
## Rows: 8,826
## Columns: 7
## $ stdntid <dbl> 10023, 10023, 10023, 10023, 10023, 10023, 10042, 10042, 10…
## $ grade <dbl> 3, 4, 5, 6, 7, 8, 3, 4, 5, 6, 7, 8, 3, 4, 5, 6, 7, 8, 3, 4…
## $ race <dbl+lbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ gender <dbl+lbl> 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, …
## $ ac_mot <dbl> 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 50, 52, 52, 52…
## $ ever_lunch <dbl+lbl> 2, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, …
## $ science <dbl> 632, 756, 729, 762, 778, 783, 618, 710, 738, 740, 734, 771…
###Cleaning: What are my variables? #science scores (science) as the DV, and grade (grade), gender (gender), race/ethnicity (race), and academic motivation (ac_mot) as predictors. #Create a new variable, grade_0, which goes from 0-5 instead of 3-8 like our current grade variable does. Why is using a time variable that starts at 0 helpful in interpreting our results?
starlong.clean <-star_long %>%
mutate(.,
race.fac = as_factor(race),
gender.fac = as_factor(gender),
freelunch.fac = as_factor(ever_lunch),
grade0 = grade-3)
#tried arrange…didn’t seem to do the same as what happened with Dr. Broda
starlong.clean %>%
arrange(., stdntid, grade0)
## # A tibble: 8,826 x 11
## stdntid grade race gender ac_mot ever_lunch science race.fac gender.fac
## <dbl> <dbl> <dbl+l> <dbl+l> <dbl> <dbl+lbl> <dbl> <fct> <fct>
## 1 10023 3 1 [WHI… 1 [MAL… 50 2 [NON-FR… 632 WHITE MALE
## 2 10023 4 1 [WHI… 1 [MAL… 50 2 [NON-FR… 756 WHITE MALE
## 3 10023 5 1 [WHI… 1 [MAL… 50 2 [NON-FR… 729 WHITE MALE
## 4 10023 6 1 [WHI… 1 [MAL… 50 2 [NON-FR… 762 WHITE MALE
## 5 10023 7 1 [WHI… 1 [MAL… 50 2 [NON-FR… 778 WHITE MALE
## 6 10023 8 1 [WHI… 1 [MAL… 50 2 [NON-FR… 783 WHITE MALE
## 7 10042 3 1 [WHI… 2 [FEM… 50 1 [FREE L… 618 WHITE FEMALE
## 8 10042 4 1 [WHI… 2 [FEM… 50 1 [FREE L… 710 WHITE FEMALE
## 9 10042 5 1 [WHI… 2 [FEM… 50 1 [FREE L… 738 WHITE FEMALE
## 10 10042 6 1 [WHI… 2 [FEM… 50 1 [FREE L… 740 WHITE FEMALE
## # … with 8,816 more rows, and 2 more variables: freelunch.fac <fct>,
## # grade0 <dbl>
#null model: science scores (science) as the DV, with scores clustered within students (stdntid). Note to self, rename the variable so it is easier to work with.
model.null <- lmer(science ~ (1|stdntid), data = starlong.clean)
summary(model.null)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: science ~ (1 | stdntid)
## Data: starlong.clean
##
## REML criterion at convergence: 97939.7
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.1898 -0.3885 0.2381 0.6370 2.7353
##
## Random effects:
## Groups Name Variance Std.Dev.
## stdntid (Intercept) 729.4 27.01
## Residual 3361.7 57.98
## Number of obs: 8826, groups: stdntid, 1471
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 730.0741 0.9363 1469.9997 779.7 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##Calculate ICC
null.icc <- 729.4/(729.4 + 3361.7)
null.icc
## [1] 0.1782895
#Now time to run a growth null moddel for science scores with grade component (time value)
model.null.growth <- lmer(science ~ grade0 + (1|stdntid), data = starlong.clean)
summary(model.null.growth)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: science ~ grade0 + (1 | stdntid)
## Data: starlong.clean
##
## REML criterion at convergence: 90950.8
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.0250 -0.6361 0.0105 0.6366 4.0222
##
## Random effects:
## Groups Name Variance Std.Dev.
## stdntid (Intercept) 1073 32.76
## Residual 1300 36.05
## Number of obs: 8826, groups: stdntid, 1471
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 669.3920 1.0919 2650.0494 613 <2e-16 ***
## grade0 24.2729 0.2247 7354.0000 108 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## grade0 -0.514
##ICC for null growth model
null.growth.icc <-1073 / (1073 + 1300)
null.growth.icc
## [1] 0.4521702
#Now, run a random intercept growth model with science scores (science) as the DV, and grade (grade_0), gender (gender), race/ethnicity (race), and academic motivation (ac_mot) as predictors. Interpret the results and evaluate model fit using AIC/BIC and your choice of effect size.
model.1 <- lmer(science ~ grade0 + gender + race + ac_mot + (1|stdntid), data = starlong.clean)
summary(model.1)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: science ~ grade0 + gender + race + ac_mot + (1 | stdntid)
## Data: starlong.clean
##
## REML criterion at convergence: 90850.9
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.0751 -0.6323 0.0104 0.6353 3.9929
##
## Random effects:
## Groups Name Variance Std.Dev.
## stdntid (Intercept) 995 31.54
## Residual 1300 36.05
## Number of obs: 8826, groups: stdntid, 1471
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 717.9111 11.2852 1474.2937 63.615 < 2e-16 ***
## grade0 24.2729 0.2247 7354.0000 108.021 < 2e-16 ***
## gender -6.4345 1.8672 1467.0000 -3.446 0.000585 ***
## race -18.4500 2.0271 1467.0000 -9.102 < 2e-16 ***
## ac_mot -0.3384 0.2294 1466.9983 -1.475 0.140412
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) grade0 gender race
## grade0 -0.050
## gender -0.048 0.000
## race -0.194 0.000 0.056
## ac_mot -0.937 0.000 -0.218 -0.034
#model fit
lmerTest::rand(model.1)
## ANOVA-like table for random-effects: Single term deletions
##
## Model:
## science ~ grade0 + gender + race + ac_mot + (1 | stdntid)
## npar logLik AIC LRT Df Pr(>Chisq)
## <none> 7 -45425 90865
## (1 | stdntid) 6 -46666 93343 2480.5 1 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#Run an interaction effect between ac_mot and grade_0 to test whether students with different levels of academic motivation have different growth in science scores. Interpret the results.
model.2 <- lmer(science ~ grade0 + gender + race + ac_mot + grade0:ac_mot + (1|stdntid), data = starlong.clean)
summary(model.2)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: science ~ grade0 + gender + race + ac_mot + grade0:ac_mot + (1 |
## stdntid)
## Data: starlong.clean
##
## REML criterion at convergence: 90854.8
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.0727 -0.6324 0.0102 0.6354 3.9923
##
## Random effects:
## Groups Name Variance Std.Dev.
## stdntid (Intercept) 995 31.54
## Residual 1300 36.06
## Number of obs: 8826, groups: stdntid, 1471
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 7.187e+02 1.318e+01 2.674e+03 54.512 < 2e-16 ***
## grade0 2.397e+01 2.735e+00 7.353e+03 8.763 < 2e-16 ***
## gender -6.434e+00 1.867e+00 1.467e+03 -3.446 0.000585 ***
## race -1.845e+01 2.027e+00 1.467e+03 -9.102 < 2e-16 ***
## ac_mot -3.537e-01 2.680e-01 2.662e+03 -1.320 0.187029
## grade0:ac_mot 6.121e-03 5.542e-02 7.353e+03 0.110 0.912047
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) grade0 gender race ac_mot
## grade0 -0.519
## gender -0.041 0.000
## race -0.166 0.000 0.056
## ac_mot -0.954 0.515 -0.187 -0.029
## grade0:c_mt 0.517 -0.997 0.000 0.000 -0.517
##Try adding a random slope for time (grade_0) at the student level.
model.3 <- lmer(science ~ grade0 + gender + race + ac_mot + (grade0|stdntid), data = starlong.clean)
## boundary (singular) fit: see ?isSingular
summary(model.3)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: science ~ grade0 + gender + race + ac_mot + (grade0 | stdntid)
## Data: starlong.clean
##
## REML criterion at convergence: 90849.6
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.0444 -0.6345 0.0095 0.6351 4.0093
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## stdntid (Intercept) 9.529e+02 30.8685
## grade0 7.319e-02 0.2705 1.00
## Residual 1.300e+03 36.0488
## Number of obs: 8826, groups: stdntid, 1471
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 717.8706 11.2795 1474.0410 63.644 <2e-16 ***
## grade0 24.2729 0.2248 7291.9444 107.980 <2e-16 ***
## gender -6.3401 1.8665 1467.3490 -3.397 0.0007 ***
## race -18.3997 2.0262 1467.3490 -9.081 <2e-16 ***
## ac_mot -0.3417 0.2293 1467.3497 -1.490 0.1364
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) grade0 gender race
## grade0 -0.048
## gender -0.048 0.000
## race -0.194 0.000 0.056
## ac_mot -0.937 0.000 -0.218 -0.034
## convergence code: 0
## boundary (singular) fit: see ?isSingular
##Use the rand function (R) to evaluate whether the slope should be treated as random.
rand(model.3)
## ANOVA-like table for random-effects: Single term deletions
##
## Model:
## science ~ grade0 + gender + race + ac_mot + (grade0 | stdntid)
## npar logLik AIC LRT Df Pr(>Chisq)
## <none> 9 -45425 90868
## grade0 in (grade0 | stdntid) 7 -45425 90865 1.3075 2 0.5201
##this looks weird, I will try again
lmerTest::rand(model.3)
## ANOVA-like table for random-effects: Single term deletions
##
## Model:
## science ~ grade0 + gender + race + ac_mot + (grade0 | stdntid)
## npar logLik AIC LRT Df Pr(>Chisq)
## <none> 9 -45425 90868
## grade0 in (grade0 | stdntid) 7 -45425 90865 1.3075 2 0.5201