##Load packages

suppressPackageStartupMessages(library(tidyverse))
library(lme4)
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
library(Hmisc)
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## The following objects are masked from 'package:base':
## 
##     format.pval, units

##PART 1: LOADING IN DATA & RECODING

#Load in Data

star <- haven::read_dta("projectSTAR.dta")
glimpse(star)
## Rows: 6,325
## Columns: 27
## $ stdntid       <dbl> 10001, 10133, 10246, 10263, 10266, 10275, 10281, 10282, …
## $ race          <dbl+lbl> 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1…
## $ gender        <dbl+lbl> 1, 1, 2, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 2, 1, 2, 2…
## $ FLAGSGK       <dbl+lbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ flaggk        <dbl+lbl> 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1…
## $ gkclasstype   <dbl+lbl> 3, 3, 3, 1, 2, 3, 1, 3, 1, 2, 3, 3, 2, 3, 3, 3, 1, 2…
## $ gkschid       <dbl> 169229, 169280, 218562, 205492, 257899, 161176, 189382, …
## $ gksurban      <dbl+lbl> 2, 2, 4, 2, 3, 3, 2, 2, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3…
## $ gktchid       <dbl> 16922904, 16928003, 21856202, 20549204, 25789904, 161176…
## $ gktgen        <dbl+lbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2…
## $ gktrace       <dbl+lbl> 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1…
## $ gkthighdegree <dbl+lbl> 2, 2, 3, 2, 3, 2, 3, 3, 3, 3, 2, 2, 3, 2, 2, 2, 2, 2…
## $ gktcareer     <dbl+lbl>  4,  4,  4,  4,  4, NA,  4,  4,  4,  4,  4,  4,  4, …
## $ gktyears      <dbl> 5, 7, 8, 3, 12, 2, 7, 14, 4, 6, 11, 16, 12, 5, 17, 10, 6…
## $ gkclasssize   <dbl> 24, 22, 17, 17, 24, 24, 13, 24, 14, 23, 23, 22, 20, 24, …
## $ gkfreelunch   <dbl+lbl> 2, 2, 2, 1, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 1, 2…
## $ gkrepeat      <dbl+lbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ gkspeced      <dbl+lbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2…
## $ gkspecin      <dbl+lbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2…
## $ gkpresent     <dbl> 161, 175, 132, 178, 170, 94, 160, 154, 172, 95, 163, 172…
## $ gkabsent      <dbl> 19, 5, 28, 2, 10, 3, 2, 7, 8, 2, 17, 8, 0, 31, 7, NA, 20…
## $ gktreadss     <dbl> NA, 427, 450, 483, 456, 411, 443, 448, 463, 472, 428, 54…
## $ gktmathss     <dbl> NA, 478, 494, 513, 513, 468, 473, 449, 520, 536, 484, 62…
## $ gktlistss     <dbl> NA, 509, 549, 554, 520, 571, 595, 540, 565, 595, NA, 622…
## $ gkwordskillss <dbl> NA, 418, 444, 431, 468, 396, 444, 444, 480, 486, 423, 52…
## $ gkmotivraw    <dbl> 23, 24, 28, 27, 25, 24, NA, NA, 26, 27, 24, 24, 23, 28, …
## $ gkselfconcraw <dbl> 52, 53, 56, 61, 54, 55, NA, NA, 52, 61, 55, 49, 49, 59, …
str((star))
## tibble [6,325 × 27] (S3: tbl_df/tbl/data.frame)
##  $ stdntid      : num [1:6325] 10001 10133 10246 10263 10266 ...
##   ..- attr(*, "label")= chr "STUDENT ID"
##   ..- attr(*, "format.stata")= chr "%10.0g"
##  $ race         : dbl+lbl [1:6325] 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1,...
##    ..@ label       : chr "STUDENT RACE/ETHNICITY"
##    ..@ format.stata: chr "%10.0g"
##    ..@ labels      : Named num [1:6] 1 2 3 4 5 6
##    .. ..- attr(*, "names")= chr [1:6] "WHITE" "BLACK" "ASIAN" "HISPANIC" ...
##  $ gender       : dbl+lbl [1:6325] 1, 1, 2, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 2, 1, 2, 2,...
##    ..@ label       : chr "STUDENT GENDER"
##    ..@ format.stata: chr "%10.0g"
##    ..@ labels      : Named num [1:2] 1 2
##    .. ..- attr(*, "names")= chr [1:2] "MALE" "FEMALE"
##  $ FLAGSGK      : dbl+lbl [1:6325] 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
##    ..@ label       : chr "IN STAR IN KINDERGARTEN"
##    ..@ format.stata: chr "%10.0g"
##    ..@ labels      : Named num [1:2] 0 1
##    .. ..- attr(*, "names")= chr [1:2] "NO" "YES"
##  $ flaggk       : dbl+lbl [1:6325] 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1,...
##    ..@ label       : chr "ACHIEVEMENT DATA AVAILABLE GRADE K"
##    ..@ format.stata: chr "%10.0g"
##    ..@ labels      : Named num [1:2] 0 1
##    .. ..- attr(*, "names")= chr [1:2] "NO" "YES"
##  $ gkclasstype  : dbl+lbl [1:6325] 3, 3, 3, 1, 2, 3, 1, 3, 1, 2, 3, 3, 2, 3, 3, 3, 1, 2,...
##    ..@ label       : chr "CLASSROOM TYPE KINDERGARTEN"
##    ..@ format.stata: chr "%10.0g"
##    ..@ labels      : Named num [1:3] 1 2 3
##    .. ..- attr(*, "names")= chr [1:3] "SMALL CLASS" "REGULAR CLASS" "REGULAR + AIDE CLASS"
##  $ gkschid      : num [1:6325] 169229 169280 218562 205492 257899 ...
##   ..- attr(*, "label")= chr "KINDERGARTEN SCHOOL ID"
##   ..- attr(*, "format.stata")= chr "%10.0g"
##  $ gksurban     : dbl+lbl [1:6325] 2, 2, 4, 2, 3, 3, 2, 2, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3,...
##    ..@ label       : chr "SCHOOL URBANICITY KINDERGARTEN"
##    ..@ format.stata: chr "%10.0g"
##    ..@ labels      : Named num [1:4] 1 2 3 4
##    .. ..- attr(*, "names")= chr [1:4] "INNER CITY" "SUBURBAN" "RURAL" "URBAN"
##  $ gktchid      : num [1:6325] 16922904 16928003 21856202 20549204 25789904 ...
##   ..- attr(*, "label")= chr "KINDERGARTEN TEACHER ID"
##   ..- attr(*, "format.stata")= chr "%10.0g"
##  $ gktgen       : dbl+lbl [1:6325] 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,...
##    ..@ label       : chr "TEACHER GENDER KINDERGARTEN"
##    ..@ format.stata: chr "%10.0g"
##    ..@ labels      : Named num [1:2] 1 2
##    .. ..- attr(*, "names")= chr [1:2] "MALE" "FEMALE"
##  $ gktrace      : dbl+lbl [1:6325]  2,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  ...
##    ..@ label       : chr "TEACHER RACE/ETHNICITY KINDERGARTEN"
##    ..@ format.stata: chr "%10.0g"
##    ..@ labels      : Named num [1:6] 1 2 3 4 5 6
##    .. ..- attr(*, "names")= chr [1:6] "WHITE" "BLACK" "ASIAN" "HISPANIC" ...
##  $ gkthighdegree: dbl+lbl [1:6325]  2,  2,  3,  2,  3,  2,  3,  3,  3,  3,  2,  2,  3,  ...
##    ..@ label       : chr "TEACHER HIGHEST DEGREE KINDERGARTEN"
##    ..@ format.stata: chr "%10.0g"
##    ..@ labels      : Named num [1:6] 1 2 3 4 5 6
##    .. ..- attr(*, "names")= chr [1:6] "ASSOCIATES" "BACHELORS" "MASTERS" "MASTERS +" ...
##  $ gktcareer    : dbl+lbl [1:6325]  4,  4,  4,  4,  4, NA,  4,  4,  4,  4,  4,  4,  4,  ...
##    ..@ label       : chr "TEACHER CAREER LADDER LEVEL KINDERGARTEN"
##    ..@ format.stata: chr "%10.0g"
##    ..@ labels      : Named num [1:7] 1 2 3 4 5 6 7
##    .. ..- attr(*, "names")= chr [1:7] "CHOSE NO TO BE ON CAREER LADDER" "APPRENTICE" "PROBATION" "LADDER LEVEL 1" ...
##  $ gktyears     : num [1:6325] 5 7 8 3 12 2 7 14 4 6 ...
##   ..- attr(*, "label")= chr "YEARS OF TOTAL TEACHING EXPERIENCE KINDERGARTEN"
##   ..- attr(*, "format.stata")= chr "%10.0g"
##  $ gkclasssize  : num [1:6325] 24 22 17 17 24 24 13 24 14 23 ...
##   ..- attr(*, "label")= chr "CLASS SIZE KINDERGARTEN"
##   ..- attr(*, "format.stata")= chr "%10.0g"
##  $ gkfreelunch  : dbl+lbl [1:6325] 2, 2, 2, 1, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 1, 2,...
##    ..@ label       : chr "FREE/REDUCED LUNCH STATUS KINDERGARTEN"
##    ..@ format.stata: chr "%10.0g"
##    ..@ labels      : Named num [1:2] 1 2
##    .. ..- attr(*, "names")= chr [1:2] "FREE LUNCH" "NON-FREE LUNCH"
##  $ gkrepeat     : dbl+lbl [1:6325] 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
##    ..@ label       : chr "REPEATING KINDERGARTEN IN 1985-1986 SCHOOL YEAR"
##    ..@ format.stata: chr "%10.0g"
##    ..@ labels      : Named num [1:2] 1 2
##    .. ..- attr(*, "names")= chr [1:2] "YES, PROMOTION RECOMMENDED" "NO, PROMOTION NOT RECOMMENDED"
##  $ gkspeced     : dbl+lbl [1:6325] 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,...
##    ..@ label       : chr "SPECIAL EDUCATION STATUS KINDERGARTEN"
##    ..@ format.stata: chr "%10.0g"
##    ..@ labels      : Named num [1:2] 1 2
##    .. ..- attr(*, "names")= chr [1:2] "YES" "NO"
##  $ gkspecin     : dbl+lbl [1:6325] 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2,...
##    ..@ label       : chr "PULLED OUT FOR SPECIAL INSTRUCTION KINDERGARTEN"
##    ..@ format.stata: chr "%10.0g"
##    ..@ labels      : Named num [1:2] 1 2
##    .. ..- attr(*, "names")= chr [1:2] "YES" "NO"
##  $ gkpresent    : num [1:6325] 161 175 132 178 170 94 160 154 172 95 ...
##   ..- attr(*, "label")= chr "DAYS PRESENT AT SCHOOL KINDERGARTEN"
##   ..- attr(*, "format.stata")= chr "%10.0g"
##  $ gkabsent     : num [1:6325] 19 5 28 2 10 3 2 7 8 2 ...
##   ..- attr(*, "label")= chr "DAYS ABSENT FROM SCHOOL KINDERGARTEN"
##   ..- attr(*, "format.stata")= chr "%10.0g"
##  $ gktreadss    : num [1:6325] NA 427 450 483 456 411 443 448 463 472 ...
##   ..- attr(*, "label")= chr "TOTAL READING SCALE SCORE SAT KINDERGARTEN"
##   ..- attr(*, "format.stata")= chr "%10.0g"
##  $ gktmathss    : num [1:6325] NA 478 494 513 513 468 473 449 520 536 ...
##   ..- attr(*, "label")= chr "TOTAL MATH SCALE SCORE SAT KINDERGARTEN"
##   ..- attr(*, "format.stata")= chr "%10.0g"
##  $ gktlistss    : num [1:6325] NA 509 549 554 520 571 595 540 565 595 ...
##   ..- attr(*, "label")= chr "TOTAL LISTENING SCALE SCORE SAT KINDERGARTEN"
##   ..- attr(*, "format.stata")= chr "%10.0g"
##  $ gkwordskillss: num [1:6325] NA 418 444 431 468 396 444 444 480 486 ...
##   ..- attr(*, "label")= chr "WORD STUDY SKILLS SCALE SCORE SAT KINDERGARTEN"
##   ..- attr(*, "format.stata")= chr "%10.0g"
##  $ gkmotivraw   : num [1:6325] 23 24 28 27 25 24 NA NA 26 27 ...
##   ..- attr(*, "label")= chr "MOTIVATION RAW SCORE SCAMIN KINDERGARTEN"
##   ..- attr(*, "format.stata")= chr "%10.0g"
##  $ gkselfconcraw: num [1:6325] 52 53 56 61 54 55 NA NA 52 61 ...
##   ..- attr(*, "label")= chr "SELF-CONCEPT RAW SCORE SCAMIN KINDERGARTEN"
##   ..- attr(*, "format.stata")= chr "%10.0g"

#Data cleaning

star.clean <- star %>%
  mutate(.,
         classtype.fac = as_factor(gkclasstype), 
         degree.fac = as_factor(gkthighdegree))
glimpse(star.clean)
## Rows: 6,325
## Columns: 29
## $ stdntid       <dbl> 10001, 10133, 10246, 10263, 10266, 10275, 10281, 10282, …
## $ race          <dbl+lbl> 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1…
## $ gender        <dbl+lbl> 1, 1, 2, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 2, 1, 2, 2…
## $ FLAGSGK       <dbl+lbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ flaggk        <dbl+lbl> 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1…
## $ gkclasstype   <dbl+lbl> 3, 3, 3, 1, 2, 3, 1, 3, 1, 2, 3, 3, 2, 3, 3, 3, 1, 2…
## $ gkschid       <dbl> 169229, 169280, 218562, 205492, 257899, 161176, 189382, …
## $ gksurban      <dbl+lbl> 2, 2, 4, 2, 3, 3, 2, 2, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3…
## $ gktchid       <dbl> 16922904, 16928003, 21856202, 20549204, 25789904, 161176…
## $ gktgen        <dbl+lbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2…
## $ gktrace       <dbl+lbl> 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1…
## $ gkthighdegree <dbl+lbl> 2, 2, 3, 2, 3, 2, 3, 3, 3, 3, 2, 2, 3, 2, 2, 2, 2, 2…
## $ gktcareer     <dbl+lbl>  4,  4,  4,  4,  4, NA,  4,  4,  4,  4,  4,  4,  4, …
## $ gktyears      <dbl> 5, 7, 8, 3, 12, 2, 7, 14, 4, 6, 11, 16, 12, 5, 17, 10, 6…
## $ gkclasssize   <dbl> 24, 22, 17, 17, 24, 24, 13, 24, 14, 23, 23, 22, 20, 24, …
## $ gkfreelunch   <dbl+lbl> 2, 2, 2, 1, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 1, 2…
## $ gkrepeat      <dbl+lbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ gkspeced      <dbl+lbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2…
## $ gkspecin      <dbl+lbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2…
## $ gkpresent     <dbl> 161, 175, 132, 178, 170, 94, 160, 154, 172, 95, 163, 172…
## $ gkabsent      <dbl> 19, 5, 28, 2, 10, 3, 2, 7, 8, 2, 17, 8, 0, 31, 7, NA, 20…
## $ gktreadss     <dbl> NA, 427, 450, 483, 456, 411, 443, 448, 463, 472, 428, 54…
## $ gktmathss     <dbl> NA, 478, 494, 513, 513, 468, 473, 449, 520, 536, 484, 62…
## $ gktlistss     <dbl> NA, 509, 549, 554, 520, 571, 595, 540, 565, 595, NA, 622…
## $ gkwordskillss <dbl> NA, 418, 444, 431, 468, 396, 444, 444, 480, 486, 423, 52…
## $ gkmotivraw    <dbl> 23, 24, 28, 27, 25, 24, NA, NA, 26, 27, 24, 24, 23, 28, …
## $ gkselfconcraw <dbl> 52, 53, 56, 61, 54, 55, NA, NA, 52, 61, 55, 49, 49, 59, …
## $ classtype.fac <fct> REGULAR + AIDE CLASS, REGULAR + AIDE CLASS, REGULAR + AI…
## $ degree.fac    <fct> BACHELORS, BACHELORS, MASTERS, BACHELORS, MASTERS, BACHE…
summary(star.clean$degree.fac)
## ASSOCIATES  BACHELORS    MASTERS  MASTERS + SPECIALIST   DOCTORAL       NA's 
##          0       4119       1981        161         43          0         21
summary(star.clean$classtype.fac)
##          SMALL CLASS        REGULAR CLASS REGULAR + AIDE CLASS 
##                 1900                 2194                 2231
summary(star.clean$gktmathss) 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   288.0   454.0   484.0   485.4   513.0   626.0     454

##MODEL BUILDING

#Step 1: Null model

model.null <- lmer(gktmathss ~ (1|gkschid), REML = FALSE, data = star.clean)
summary(model.null)
## Linear mixed model fit by maximum likelihood  ['lmerMod']
## Formula: gktmathss ~ (1 | gkschid)
##    Data: star.clean
## 
##      AIC      BIC   logLik deviance df.resid 
##  60989.6  61009.6 -30491.8  60983.6     5868 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.6470 -0.6540 -0.0636  0.5819  4.3669 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  gkschid  (Intercept)  450.8   21.23   
##  Residual             1826.4   42.74   
## Number of obs: 5871, groups:  gkschid, 79
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  485.500      2.459   197.5
null.ICC <- 450.8/(450.8+1826.4)
null.ICC
## [1] 0.1979624

#L1 predictor

model.1 <- lmer(gktmathss ~ gkselfconcraw + (1|gkschid), REML = FALSE, data = star.clean)
summary(model.1)
## Linear mixed model fit by maximum likelihood  ['lmerMod']
## Formula: gktmathss ~ gkselfconcraw + (1 | gkschid)
##    Data: star.clean
## 
##      AIC      BIC   logLik deviance df.resid 
##  49252.7  49278.5 -24622.3  49244.7     4752 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.3826 -0.6680 -0.0776  0.5780  4.4658 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  gkschid  (Intercept)  460.5   21.46   
##  Residual             1759.2   41.94   
## Number of obs: 4756, groups:  gkschid, 73
## 
## Fixed effects:
##               Estimate Std. Error t value
## (Intercept)   456.3309     7.2258  63.153
## gkselfconcraw   0.5371     0.1206   4.453
## 
## Correlation of Fixed Effects:
##             (Intr)
## gkselfcncrw -0.933

#L2 predictors

model.2a <- lmer(gktmathss ~ gkselfconcraw + classtype.fac + (1|gkschid), REML = FALSE, data = star.clean) 
summary(model.2a)
## Linear mixed model fit by maximum likelihood  ['lmerMod']
## Formula: gktmathss ~ gkselfconcraw + classtype.fac + (1 | gkschid)
##    Data: star.clean
## 
##      AIC      BIC   logLik deviance df.resid 
##  49203.7  49242.5 -24595.9  49191.7     4750 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.3557 -0.6580 -0.0825  0.5803  4.3301 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  gkschid  (Intercept)  464.1   21.54   
##  Residual             1739.3   41.70   
## Number of obs: 4756, groups:  gkschid, 73
## 
## Fixed effects:
##                                   Estimate Std. Error t value
## (Intercept)                       465.9590     7.3134  63.713
## gkselfconcraw                       0.4849     0.1201   4.036
## classtype.facREGULAR CLASS         -9.6617     1.5291  -6.319
## classtype.facREGULAR + AIDE CLASS  -9.7393     1.5144  -6.431
## 
## Correlation of Fixed Effects:
##             (Intr) gkslfc c.REGC
## gkselfcncrw -0.926              
## c.REGULARCL -0.161  0.056       
## c.REGULA+AC -0.154  0.048  0.527
model.2b <- lmer(gktmathss ~ gkselfconcraw + degree.fac + (1|gkschid), REML = FALSE, data = star.clean) 
summary(model.2b)
## Linear mixed model fit by maximum likelihood  ['lmerMod']
## Formula: gktmathss ~ gkselfconcraw + degree.fac + (1 | gkschid)
##    Data: star.clean
## 
##      AIC      BIC   logLik deviance df.resid 
##  49244.4  49289.7 -24615.2  49230.4     4749 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.3872 -0.6683 -0.0720  0.5802  4.4658 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  gkschid  (Intercept)  463.8   21.54   
##  Residual             1753.7   41.88   
## Number of obs: 4756, groups:  gkschid, 73
## 
## Fixed effects:
##                      Estimate Std. Error t value
## (Intercept)          457.2823     7.2292  63.255
## gkselfconcraw          0.5246     0.1206   4.351
## degree.facMASTERS     -1.0985     1.5939  -0.689
## degree.facMASTERS +   -6.0048     4.2379  -1.417
## degree.facSPECIALIST  28.9689     8.6214   3.360
## 
## Correlation of Fixed Effects:
##             (Intr) gkslfc d.MAST d.MAS+
## gkselfcncrw -0.930                     
## dgr.MASTERS -0.045 -0.031              
## dg.MASTERS+ -0.008 -0.017  0.141       
## d.SPECIALIS  0.023 -0.044  0.113  0.017
model.2ab <- lmer(gktmathss ~ gkselfconcraw + classtype.fac + degree.fac + (1|gkschid), REML = FALSE, data = star.clean) 
summary(model.2ab)
## Linear mixed model fit by maximum likelihood  ['lmerMod']
## Formula: gktmathss ~ gkselfconcraw + classtype.fac + degree.fac + (1 |  
##     gkschid)
##    Data: star.clean
## 
##      AIC      BIC   logLik deviance df.resid 
##  49202.0  49260.2 -24592.0  49184.0     4747 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.3594 -0.6586 -0.0782  0.5862  4.3409 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  gkschid  (Intercept)  465.1   21.57   
##  Residual             1736.4   41.67   
## Number of obs: 4756, groups:  gkschid, 73
## 
## Fixed effects:
##                                   Estimate Std. Error t value
## (Intercept)                       466.0099     7.3119  63.733
## gkselfconcraw                       0.4775     0.1202   3.972
## classtype.facREGULAR CLASS         -9.1386     1.5455  -5.913
## classtype.facREGULAR + AIDE CLASS  -9.2654     1.5292  -6.059
## degree.facMASTERS                  -0.1069     1.5930  -0.067
## degree.facMASTERS +                -5.2694     4.2195  -1.249
## degree.facSPECIALIST               21.2655     8.6539   2.457
## 
## Correlation of Fixed Effects:
##             (Intr) gkslfc c.REGC c.R+AC d.MAST d.MAS+
## gkselfcncrw -0.924                                   
## c.REGULARCL -0.156  0.055                            
## c.REGULA+AC -0.150  0.046  0.538                     
## dgr.MASTERS -0.028 -0.036 -0.083 -0.078              
## dg.MASTERS+ -0.003 -0.018 -0.033 -0.013  0.143       
## d.SPECIALIS  0.000 -0.036  0.115  0.114  0.099  0.014
library(modelsummary)
## 
## Attaching package: 'modelsummary'
## The following object is masked from 'package:Hmisc':
## 
##     Mean
library(broom.mixed)

models <- list(model.null, model.1, model.2a, model.2b, model.2ab)
modelsummary(models)
Model 1 Model 2 Model 3 Model 4 Model 5
(Intercept) 485.500 456.331 465.959 457.282 466.010
(2.459) (7.226) (7.313) (7.229) (7.312)
gkselfconcraw 0.537 0.485 0.525 0.477
(0.121) (0.120) (0.121) (0.120)
classtype.facREGULAR CLASS −9.662 −9.139
(1.529) (1.545)
classtype.facREGULAR + AIDE CLASS −9.739 −9.265
(1.514) (1.529)
degree.facMASTERS −1.098 −0.107
(1.594) (1.593)
degree.facMASTERS + −6.005 −5.269
(4.238) (4.220)
degree.facSPECIALIST 28.969 21.265
(8.621) (8.654)
SD (Intercept gkschid) 21.231 21.460 21.543 21.535 21.567
SD (Observations) 42.736 41.943 41.705 41.878 41.670
Num.Obs. 5871 4756 4756 4756 4756
R2 Marg. 0.000 0.003 0.012 0.006 0.014
R2 Cond. 0.198 0.210 0.220 0.214 0.222
AIC 60985.9 49251.3 49197.4 49229.5 49182.1
BIC 61006.0 49277.2 49236.2 49274.8 49240.3
ICC 0.2 0.2 0.2 0.2 0.2
RMSE 42.46 41.64 41.40 41.57 41.37

Use the augment function from the broom.mixed Package to Get Predictions, Residuals, Cook’s Distances, Etc.:

library(broom.mixed)

diagnostics <- augment(model.2ab)

#Check normality of residuals via histogram

ggplot(data = diagnostics, mapping = aes(x = .resid)) +
  geom_histogram(binwidth = .25) + theme_classic() + 
  labs(title = "Histogram of Residuals for Math Score Model",
                      x = "Residual Value") +
  geom_vline(xintercept = c(-125, 125), linetype = "dotted")

Shapiro-Wilk Test

shapiro.test(diagnostics$.resid)
## 
##  Shapiro-Wilk normality test
## 
## data:  diagnostics$.resid
## W = 0.98919, p-value < 2.2e-16

**Suggests non-normality

Step 2: Use Residuals vs. Fitted (RVF) Plot to Assess Homoskedasticity of Errors

Basic RVF Plot, Without Groups

ggplot(data = diagnostics, mapping = aes(x = .fitted, y = .resid)) +
  geom_point() + labs(title = "RVF Plot for Math Scores Model",
                      x = "Predicted Value, Math Scores",
                      y = "Residual Value") + theme_classic()

Use Cook’s Distance to Identify Outliers

ggplot(data = diagnostics, mapping = aes(x = .fitted, y = .cooksd, label = gkschid)) +
  geom_point() + geom_text(nudge_x = .25) + theme_classic() + 
  labs(title = "Cook's Distance Plot for Math Scores Model",
                      x = "Predicted Value, Math Scores",
                      y = "Cook's Distance") + 
  geom_hline(yintercept = 4/4756, linetype = "dotted")

##RE-RUNNING WITH ROBUST STANDARD ERRORS

library(robustlmm)
## Warning: package 'robustlmm' was built under R version 4.2.2
model.robust <- rlmer(gktmathss ~ gkselfconcraw + classtype.fac + degree.fac + (1|gkschid), REML = FALSE, data = star.clean) 
summary(model.robust)
## Robust linear mixed model fit by DAStau 
## Formula: gktmathss ~ gkselfconcraw + classtype.fac + degree.fac + (1 |      gkschid) 
##    Data: star.clean 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.4841 -0.6398 -0.0407  0.6306  4.6094 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  gkschid  (Intercept)  471.5   21.71   
##  Residual             1624.9   40.31   
## Number of obs: 4756, groups: gkschid, 73
## 
## Fixed effects:
##                                   Estimate Std. Error t value
## (Intercept)                       463.3663     7.2912   63.55
## gkselfconcraw                       0.4916     0.1193    4.12
## classtype.facREGULAR CLASS         -7.7881     1.5335   -5.08
## classtype.facREGULAR + AIDE CLASS  -8.2842     1.5173   -5.46
## degree.facMASTERS                  -0.5478     1.5820   -0.35
## degree.facMASTERS +                -4.8965     4.1885   -1.17
## degree.facSPECIALIST               23.4328     8.5932    2.73
## 
## Correlation of Fixed Effects:
##             (Intr) gkslfc c.REGC c.R+AC d.MAST d.MAS+
## gkselfcncrw -0.919                                   
## c.REGULARCL -0.156  0.055                            
## c.REGULA+AC -0.150  0.046  0.538                     
## dgr.MASTERS -0.028 -0.036 -0.083 -0.078              
## dg.MASTERS+ -0.003 -0.018 -0.033 -0.013  0.143       
## d.SPECIALIS  0.000 -0.036  0.115  0.114  0.100  0.014
## 
## Robustness weights for the residuals: 
##  3779 weights are ~= 1. The remaining 977 ones are summarized as
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.292   0.638   0.817   0.778   0.931   0.999 
## 
## Robustness weights for the random effects: 
##  59 weights are ~= 1. The remaining 14 ones are summarized as
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.514   0.753   0.885   0.820   0.926   0.995 
## 
## Rho functions used for fitting:
##   Residuals:
##     eff: smoothed Huber (k = 1.345, s = 10) 
##     sig: smoothed Huber, Proposal 2 (k = 1.345, s = 10) 
##   Random Effects, variance component 1 (gkschid):
##     eff: smoothed Huber (k = 1.345, s = 10) 
##     vcp: smoothed Huber, Proposal 2 (k = 1.345, s = 10)

##RUNNING WITH OUTLIERS REMOVED (COOK’s DISTANCE >0.20)

star.trimmed <- diagnostics %>% 
  filter(., .cooksd <.20)

model.trimmed <- lmer(gktmathss ~ gkselfconcraw + classtype.fac + degree.fac + (1|gkschid), REML = FALSE, data=star.trimmed)
summary(model.trimmed)
## Linear mixed model fit by maximum likelihood  ['lmerMod']
## Formula: gktmathss ~ gkselfconcraw + classtype.fac + degree.fac + (1 |  
##     gkschid)
##    Data: star.trimmed
## 
##      AIC      BIC   logLik deviance df.resid 
##  49202.0  49260.2 -24592.0  49184.0     4747 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.3594 -0.6586 -0.0782  0.5862  4.3409 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  gkschid  (Intercept)  465.1   21.57   
##  Residual             1736.4   41.67   
## Number of obs: 4756, groups:  gkschid, 73
## 
## Fixed effects:
##                                   Estimate Std. Error t value
## (Intercept)                       466.0099     7.3119  63.733
## gkselfconcraw                       0.4775     0.1202   3.972
## classtype.facREGULAR CLASS         -9.1386     1.5455  -5.913
## classtype.facREGULAR + AIDE CLASS  -9.2654     1.5292  -6.059
## degree.facMASTERS                  -0.1069     1.5930  -0.067
## degree.facMASTERS +                -5.2694     4.2195  -1.249
## degree.facSPECIALIST               21.2655     8.6539   2.457
## 
## Correlation of Fixed Effects:
##             (Intr) gkslfc c.REGC c.R+AC d.MAST d.MAS+
## gkselfcncrw -0.924                                   
## c.REGULARCL -0.156  0.055                            
## c.REGULA+AC -0.150  0.046  0.538                     
## dgr.MASTERS -0.028 -0.036 -0.083 -0.078              
## dg.MASTERS+ -0.003 -0.018 -0.033 -0.013  0.143       
## d.SPECIALIS  0.000 -0.036  0.115  0.114  0.099  0.014

##CREATE TABLE COMPARING ORIGINAL FINAL MODEL, ROBUST MODEL, & TRIMMED MODEL

library(modelsummary)

models <- list(model.2ab, model.robust, model.trimmed)
modelsummary(models, output = "markdown")
Model 1 Model 2 Model 3
(Intercept) 466.010 463.366 466.010
(7.312) (7.291) (7.312)
gkselfconcraw 0.477 0.492 0.477
(0.120) (0.119) (0.120)
classtype.facREGULAR CLASS -9.139 -7.788 -9.139
(1.545) (1.533) (1.545)
classtype.facREGULAR + AIDE CLASS -9.265 -8.284 -9.265
(1.529) (1.517) (1.529)
degree.facMASTERS -0.107 -0.548 -0.107
(1.593) (1.582) (1.593)
degree.facMASTERS + -5.269 -4.896 -5.269
(4.220) (4.189) (4.220)
degree.facSPECIALIST 21.265 23.433 21.265
(8.654) (8.593) (8.654)
SD (Intercept gkschid) 21.567 21.714 21.567
SD (Observations) 41.670 40.310 41.670
Num.Obs. 4756 4756 4756
R2 Marg. 0.014 0.013 0.014
R2 Cond. 0.222 0.235 0.222
AIC 49182.1 49182.1
BIC 49240.3 49240.3
ICC 0.2 0.2 0.2
RMSE 41.37 41.45 41.37