##Load packages

suppressPackageStartupMessages(library(tidyverse))
library(lme4)

## Loading required package: Matrix

## 
## Attaching package: 'Matrix'

## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack

library(Hmisc)

## Loading required package: lattice

## Loading required package: survival

## Loading required package: Formula

## 
## Attaching package: 'Hmisc'

## The following objects are masked from 'package:dplyr':
## 
##     src, summarize

## The following objects are masked from 'package:base':
## 
##     format.pval, units

##PART 1: LOADING IN DATA & RECODING

#Load in Data

star <- haven::read_dta("projectSTAR.dta")
glimpse(star)

## Rows: 6,325
## Columns: 27
## $ stdntid       <dbl> 10001, 10133, 10246, 10263, 10266, 10275, 10281, 10282, …
## $ race          <dbl+lbl> 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1…
## $ gender        <dbl+lbl> 1, 1, 2, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 2, 1, 2, 2…
## $ FLAGSGK       <dbl+lbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ flaggk        <dbl+lbl> 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1…
## $ gkclasstype   <dbl+lbl> 3, 3, 3, 1, 2, 3, 1, 3, 1, 2, 3, 3, 2, 3, 3, 3, 1, 2…
## $ gkschid       <dbl> 169229, 169280, 218562, 205492, 257899, 161176, 189382, …
## $ gksurban      <dbl+lbl> 2, 2, 4, 2, 3, 3, 2, 2, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3…
## $ gktchid       <dbl> 16922904, 16928003, 21856202, 20549204, 25789904, 161176…
## $ gktgen        <dbl+lbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2…
## $ gktrace       <dbl+lbl> 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1…
## $ gkthighdegree <dbl+lbl> 2, 2, 3, 2, 3, 2, 3, 3, 3, 3, 2, 2, 3, 2, 2, 2, 2, 2…
## $ gktcareer     <dbl+lbl>  4,  4,  4,  4,  4, NA,  4,  4,  4,  4,  4,  4,  4, …
## $ gktyears      <dbl> 5, 7, 8, 3, 12, 2, 7, 14, 4, 6, 11, 16, 12, 5, 17, 10, 6…
## $ gkclasssize   <dbl> 24, 22, 17, 17, 24, 24, 13, 24, 14, 23, 23, 22, 20, 24, …
## $ gkfreelunch   <dbl+lbl> 2, 2, 2, 1, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 1, 2…
## $ gkrepeat      <dbl+lbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ gkspeced      <dbl+lbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2…
## $ gkspecin      <dbl+lbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2…
## $ gkpresent     <dbl> 161, 175, 132, 178, 170, 94, 160, 154, 172, 95, 163, 172…
## $ gkabsent      <dbl> 19, 5, 28, 2, 10, 3, 2, 7, 8, 2, 17, 8, 0, 31, 7, NA, 20…
## $ gktreadss     <dbl> NA, 427, 450, 483, 456, 411, 443, 448, 463, 472, 428, 54…
## $ gktmathss     <dbl> NA, 478, 494, 513, 513, 468, 473, 449, 520, 536, 484, 62…
## $ gktlistss     <dbl> NA, 509, 549, 554, 520, 571, 595, 540, 565, 595, NA, 622…
## $ gkwordskillss <dbl> NA, 418, 444, 431, 468, 396, 444, 444, 480, 486, 423, 52…
## $ gkmotivraw    <dbl> 23, 24, 28, 27, 25, 24, NA, NA, 26, 27, 24, 24, 23, 28, …
## $ gkselfconcraw <dbl> 52, 53, 56, 61, 54, 55, NA, NA, 52, 61, 55, 49, 49, 59, …

str((star))

## tibble [6,325 × 27] (S3: tbl_df/tbl/data.frame)
##  $ stdntid      : num [1:6325] 10001 10133 10246 10263 10266 ...
##   ..- attr(*, "label")= chr "STUDENT ID"
##   ..- attr(*, "format.stata")= chr "%10.0g"
##  $ race         : dbl+lbl [1:6325] 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1,...
##    ..@ label       : chr "STUDENT RACE/ETHNICITY"
##    ..@ format.stata: chr "%10.0g"
##    ..@ labels      : Named num [1:6] 1 2 3 4 5 6
##    .. ..- attr(*, "names")= chr [1:6] "WHITE" "BLACK" "ASIAN" "HISPANIC" ...
##  $ gender       : dbl+lbl [1:6325] 1, 1, 2, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 2, 1, 2, 2,...
##    ..@ label       : chr "STUDENT GENDER"
##    ..@ format.stata: chr "%10.0g"
##    ..@ labels      : Named num [1:2] 1 2
##    .. ..- attr(*, "names")= chr [1:2] "MALE" "FEMALE"
##  $ FLAGSGK      : dbl+lbl [1:6325] 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
##    ..@ label       : chr "IN STAR IN KINDERGARTEN"
##    ..@ format.stata: chr "%10.0g"
##    ..@ labels      : Named num [1:2] 0 1
##    .. ..- attr(*, "names")= chr [1:2] "NO" "YES"
##  $ flaggk       : dbl+lbl [1:6325] 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1,...
##    ..@ label       : chr "ACHIEVEMENT DATA AVAILABLE GRADE K"
##    ..@ format.stata: chr "%10.0g"
##    ..@ labels      : Named num [1:2] 0 1
##    .. ..- attr(*, "names")= chr [1:2] "NO" "YES"
##  $ gkclasstype  : dbl+lbl [1:6325] 3, 3, 3, 1, 2, 3, 1, 3, 1, 2, 3, 3, 2, 3, 3, 3, 1, 2,...
##    ..@ label       : chr "CLASSROOM TYPE KINDERGARTEN"
##    ..@ format.stata: chr "%10.0g"
##    ..@ labels      : Named num [1:3] 1 2 3
##    .. ..- attr(*, "names")= chr [1:3] "SMALL CLASS" "REGULAR CLASS" "REGULAR + AIDE CLASS"
##  $ gkschid      : num [1:6325] 169229 169280 218562 205492 257899 ...
##   ..- attr(*, "label")= chr "KINDERGARTEN SCHOOL ID"
##   ..- attr(*, "format.stata")= chr "%10.0g"
##  $ gksurban     : dbl+lbl [1:6325] 2, 2, 4, 2, 3, 3, 2, 2, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3,...
##    ..@ label       : chr "SCHOOL URBANICITY KINDERGARTEN"
##    ..@ format.stata: chr "%10.0g"
##    ..@ labels      : Named num [1:4] 1 2 3 4
##    .. ..- attr(*, "names")= chr [1:4] "INNER CITY" "SUBURBAN" "RURAL" "URBAN"
##  $ gktchid      : num [1:6325] 16922904 16928003 21856202 20549204 25789904 ...
##   ..- attr(*, "label")= chr "KINDERGARTEN TEACHER ID"
##   ..- attr(*, "format.stata")= chr "%10.0g"
##  $ gktgen       : dbl+lbl [1:6325] 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,...
##    ..@ label       : chr "TEACHER GENDER KINDERGARTEN"
##    ..@ format.stata: chr "%10.0g"
##    ..@ labels      : Named num [1:2] 1 2
##    .. ..- attr(*, "names")= chr [1:2] "MALE" "FEMALE"
##  $ gktrace      : dbl+lbl [1:6325]  2,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  ...
##    ..@ label       : chr "TEACHER RACE/ETHNICITY KINDERGARTEN"
##    ..@ format.stata: chr "%10.0g"
##    ..@ labels      : Named num [1:6] 1 2 3 4 5 6
##    .. ..- attr(*, "names")= chr [1:6] "WHITE" "BLACK" "ASIAN" "HISPANIC" ...
##  $ gkthighdegree: dbl+lbl [1:6325]  2,  2,  3,  2,  3,  2,  3,  3,  3,  3,  2,  2,  3,  ...
##    ..@ label       : chr "TEACHER HIGHEST DEGREE KINDERGARTEN"
##    ..@ format.stata: chr "%10.0g"
##    ..@ labels      : Named num [1:6] 1 2 3 4 5 6
##    .. ..- attr(*, "names")= chr [1:6] "ASSOCIATES" "BACHELORS" "MASTERS" "MASTERS +" ...
##  $ gktcareer    : dbl+lbl [1:6325]  4,  4,  4,  4,  4, NA,  4,  4,  4,  4,  4,  4,  4,  ...
##    ..@ label       : chr "TEACHER CAREER LADDER LEVEL KINDERGARTEN"
##    ..@ format.stata: chr "%10.0g"
##    ..@ labels      : Named num [1:7] 1 2 3 4 5 6 7
##    .. ..- attr(*, "names")= chr [1:7] "CHOSE NO TO BE ON CAREER LADDER" "APPRENTICE" "PROBATION" "LADDER LEVEL 1" ...
##  $ gktyears     : num [1:6325] 5 7 8 3 12 2 7 14 4 6 ...
##   ..- attr(*, "label")= chr "YEARS OF TOTAL TEACHING EXPERIENCE KINDERGARTEN"
##   ..- attr(*, "format.stata")= chr "%10.0g"
##  $ gkclasssize  : num [1:6325] 24 22 17 17 24 24 13 24 14 23 ...
##   ..- attr(*, "label")= chr "CLASS SIZE KINDERGARTEN"
##   ..- attr(*, "format.stata")= chr "%10.0g"
##  $ gkfreelunch  : dbl+lbl [1:6325] 2, 2, 2, 1, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 1, 2,...
##    ..@ label       : chr "FREE/REDUCED LUNCH STATUS KINDERGARTEN"
##    ..@ format.stata: chr "%10.0g"
##    ..@ labels      : Named num [1:2] 1 2
##    .. ..- attr(*, "names")= chr [1:2] "FREE LUNCH" "NON-FREE LUNCH"
##  $ gkrepeat     : dbl+lbl [1:6325] 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
##    ..@ label       : chr "REPEATING KINDERGARTEN IN 1985-1986 SCHOOL YEAR"
##    ..@ format.stata: chr "%10.0g"
##    ..@ labels      : Named num [1:2] 1 2
##    .. ..- attr(*, "names")= chr [1:2] "YES, PROMOTION RECOMMENDED" "NO, PROMOTION NOT RECOMMENDED"
##  $ gkspeced     : dbl+lbl [1:6325] 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,...
##    ..@ label       : chr "SPECIAL EDUCATION STATUS KINDERGARTEN"
##    ..@ format.stata: chr "%10.0g"
##    ..@ labels      : Named num [1:2] 1 2
##    .. ..- attr(*, "names")= chr [1:2] "YES" "NO"
##  $ gkspecin     : dbl+lbl [1:6325] 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2,...
##    ..@ label       : chr "PULLED OUT FOR SPECIAL INSTRUCTION KINDERGARTEN"
##    ..@ format.stata: chr "%10.0g"
##    ..@ labels      : Named num [1:2] 1 2
##    .. ..- attr(*, "names")= chr [1:2] "YES" "NO"
##  $ gkpresent    : num [1:6325] 161 175 132 178 170 94 160 154 172 95 ...
##   ..- attr(*, "label")= chr "DAYS PRESENT AT SCHOOL KINDERGARTEN"
##   ..- attr(*, "format.stata")= chr "%10.0g"
##  $ gkabsent     : num [1:6325] 19 5 28 2 10 3 2 7 8 2 ...
##   ..- attr(*, "label")= chr "DAYS ABSENT FROM SCHOOL KINDERGARTEN"
##   ..- attr(*, "format.stata")= chr "%10.0g"
##  $ gktreadss    : num [1:6325] NA 427 450 483 456 411 443 448 463 472 ...
##   ..- attr(*, "label")= chr "TOTAL READING SCALE SCORE SAT KINDERGARTEN"
##   ..- attr(*, "format.stata")= chr "%10.0g"
##  $ gktmathss    : num [1:6325] NA 478 494 513 513 468 473 449 520 536 ...
##   ..- attr(*, "label")= chr "TOTAL MATH SCALE SCORE SAT KINDERGARTEN"
##   ..- attr(*, "format.stata")= chr "%10.0g"
##  $ gktlistss    : num [1:6325] NA 509 549 554 520 571 595 540 565 595 ...
##   ..- attr(*, "label")= chr "TOTAL LISTENING SCALE SCORE SAT KINDERGARTEN"
##   ..- attr(*, "format.stata")= chr "%10.0g"
##  $ gkwordskillss: num [1:6325] NA 418 444 431 468 396 444 444 480 486 ...
##   ..- attr(*, "label")= chr "WORD STUDY SKILLS SCALE SCORE SAT KINDERGARTEN"
##   ..- attr(*, "format.stata")= chr "%10.0g"
##  $ gkmotivraw   : num [1:6325] 23 24 28 27 25 24 NA NA 26 27 ...
##   ..- attr(*, "label")= chr "MOTIVATION RAW SCORE SCAMIN KINDERGARTEN"
##   ..- attr(*, "format.stata")= chr "%10.0g"
##  $ gkselfconcraw: num [1:6325] 52 53 56 61 54 55 NA NA 52 61 ...
##   ..- attr(*, "label")= chr "SELF-CONCEPT RAW SCORE SCAMIN KINDERGARTEN"
##   ..- attr(*, "format.stata")= chr "%10.0g"

#Data cleaning

star.clean <- star %>%
  mutate(.,
         classtype.fac = as_factor(gkclasstype), 
         degree.fac = as_factor(gkthighdegree))
glimpse(star.clean)

## Rows: 6,325
## Columns: 29
## $ stdntid       <dbl> 10001, 10133, 10246, 10263, 10266, 10275, 10281, 10282, …
## $ race          <dbl+lbl> 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1…
## $ gender        <dbl+lbl> 1, 1, 2, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 2, 1, 2, 2…
## $ FLAGSGK       <dbl+lbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ flaggk        <dbl+lbl> 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1…
## $ gkclasstype   <dbl+lbl> 3, 3, 3, 1, 2, 3, 1, 3, 1, 2, 3, 3, 2, 3, 3, 3, 1, 2…
## $ gkschid       <dbl> 169229, 169280, 218562, 205492, 257899, 161176, 189382, …
## $ gksurban      <dbl+lbl> 2, 2, 4, 2, 3, 3, 2, 2, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3…
## $ gktchid       <dbl> 16922904, 16928003, 21856202, 20549204, 25789904, 161176…
## $ gktgen        <dbl+lbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2…
## $ gktrace       <dbl+lbl> 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1…
## $ gkthighdegree <dbl+lbl> 2, 2, 3, 2, 3, 2, 3, 3, 3, 3, 2, 2, 3, 2, 2, 2, 2, 2…
## $ gktcareer     <dbl+lbl>  4,  4,  4,  4,  4, NA,  4,  4,  4,  4,  4,  4,  4, …
## $ gktyears      <dbl> 5, 7, 8, 3, 12, 2, 7, 14, 4, 6, 11, 16, 12, 5, 17, 10, 6…
## $ gkclasssize   <dbl> 24, 22, 17, 17, 24, 24, 13, 24, 14, 23, 23, 22, 20, 24, …
## $ gkfreelunch   <dbl+lbl> 2, 2, 2, 1, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 1, 2…
## $ gkrepeat      <dbl+lbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ gkspeced      <dbl+lbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2…
## $ gkspecin      <dbl+lbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2…
## $ gkpresent     <dbl> 161, 175, 132, 178, 170, 94, 160, 154, 172, 95, 163, 172…
## $ gkabsent      <dbl> 19, 5, 28, 2, 10, 3, 2, 7, 8, 2, 17, 8, 0, 31, 7, NA, 20…
## $ gktreadss     <dbl> NA, 427, 450, 483, 456, 411, 443, 448, 463, 472, 428, 54…
## $ gktmathss     <dbl> NA, 478, 494, 513, 513, 468, 473, 449, 520, 536, 484, 62…
## $ gktlistss     <dbl> NA, 509, 549, 554, 520, 571, 595, 540, 565, 595, NA, 622…
## $ gkwordskillss <dbl> NA, 418, 444, 431, 468, 396, 444, 444, 480, 486, 423, 52…
## $ gkmotivraw    <dbl> 23, 24, 28, 27, 25, 24, NA, NA, 26, 27, 24, 24, 23, 28, …
## $ gkselfconcraw <dbl> 52, 53, 56, 61, 54, 55, NA, NA, 52, 61, 55, 49, 49, 59, …
## $ classtype.fac <fct> REGULAR + AIDE CLASS, REGULAR + AIDE CLASS, REGULAR + AI…
## $ degree.fac    <fct> BACHELORS, BACHELORS, MASTERS, BACHELORS, MASTERS, BACHE…

summary(star.clean$degree.fac)

## ASSOCIATES  BACHELORS    MASTERS  MASTERS + SPECIALIST   DOCTORAL       NA's 
##          0       4119       1981        161         43          0         21

summary(star.clean$classtype.fac)

##          SMALL CLASS        REGULAR CLASS REGULAR + AIDE CLASS 
##                 1900                 2194                 2231

summary(star.clean$gktmathss)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max.    NA's 
##   288.0   454.0   484.0   485.4   513.0   626.0     454

##MODEL BUILDING

#Step 1: Null model

model.null <- lmer(gktmathss ~ (1|gkschid), REML = FALSE, data = star.clean)
summary(model.null)

## Linear mixed model fit by maximum likelihood  ['lmerMod']
## Formula: gktmathss ~ (1 | gkschid)
##    Data: star.clean
## 
##      AIC      BIC   logLik deviance df.resid 
##  60989.6  61009.6 -30491.8  60983.6     5868 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.6470 -0.6540 -0.0636  0.5819  4.3669 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  gkschid  (Intercept)  450.8   21.23   
##  Residual             1826.4   42.74   
## Number of obs: 5871, groups:  gkschid, 79
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  485.500      2.459   197.5

null.ICC <- 450.8/(450.8+1826.4)
null.ICC

## [1] 0.1979624

#L1 predictor

model.1 <- lmer(gktmathss ~ gkselfconcraw + (1|gkschid), REML = FALSE, data = star.clean)
summary(model.1)

## Linear mixed model fit by maximum likelihood  ['lmerMod']
## Formula: gktmathss ~ gkselfconcraw + (1 | gkschid)
##    Data: star.clean
## 
##      AIC      BIC   logLik deviance df.resid 
##  49252.7  49278.5 -24622.3  49244.7     4752 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.3826 -0.6680 -0.0776  0.5780  4.4658 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  gkschid  (Intercept)  460.5   21.46   
##  Residual             1759.2   41.94   
## Number of obs: 4756, groups:  gkschid, 73
## 
## Fixed effects:
##               Estimate Std. Error t value
## (Intercept)   456.3309     7.2258  63.153
## gkselfconcraw   0.5371     0.1206   4.453
## 
## Correlation of Fixed Effects:
##             (Intr)
## gkselfcncrw -0.933

#L2 predictors

model.2a <- lmer(gktmathss ~ gkselfconcraw + classtype.fac + (1|gkschid), REML = FALSE, data = star.clean) 
summary(model.2a)

## Linear mixed model fit by maximum likelihood  ['lmerMod']
## Formula: gktmathss ~ gkselfconcraw + classtype.fac + (1 | gkschid)
##    Data: star.clean
## 
##      AIC      BIC   logLik deviance df.resid 
##  49203.7  49242.5 -24595.9  49191.7     4750 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.3557 -0.6580 -0.0825  0.5803  4.3301 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  gkschid  (Intercept)  464.1   21.54   
##  Residual             1739.3   41.70   
## Number of obs: 4756, groups:  gkschid, 73
## 
## Fixed effects:
##                                   Estimate Std. Error t value
## (Intercept)                       465.9590     7.3134  63.713
## gkselfconcraw                       0.4849     0.1201   4.036
## classtype.facREGULAR CLASS         -9.6617     1.5291  -6.319
## classtype.facREGULAR + AIDE CLASS  -9.7393     1.5144  -6.431
## 
## Correlation of Fixed Effects:
##             (Intr) gkslfc c.REGC
## gkselfcncrw -0.926              
## c.REGULARCL -0.161  0.056       
## c.REGULA+AC -0.154  0.048  0.527

model.2b <- lmer(gktmathss ~ gkselfconcraw + degree.fac + (1|gkschid), REML = FALSE, data = star.clean) 
summary(model.2b)

## Linear mixed model fit by maximum likelihood  ['lmerMod']
## Formula: gktmathss ~ gkselfconcraw + degree.fac + (1 | gkschid)
##    Data: star.clean
## 
##      AIC      BIC   logLik deviance df.resid 
##  49244.4  49289.7 -24615.2  49230.4     4749 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.3872 -0.6683 -0.0720  0.5802  4.4658 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  gkschid  (Intercept)  463.8   21.54   
##  Residual             1753.7   41.88   
## Number of obs: 4756, groups:  gkschid, 73
## 
## Fixed effects:
##                      Estimate Std. Error t value
## (Intercept)          457.2823     7.2292  63.255
## gkselfconcraw          0.5246     0.1206   4.351
## degree.facMASTERS     -1.0985     1.5939  -0.689
## degree.facMASTERS +   -6.0048     4.2379  -1.417
## degree.facSPECIALIST  28.9689     8.6214   3.360
## 
## Correlation of Fixed Effects:
##             (Intr) gkslfc d.MAST d.MAS+
## gkselfcncrw -0.930                     
## dgr.MASTERS -0.045 -0.031              
## dg.MASTERS+ -0.008 -0.017  0.141       
## d.SPECIALIS  0.023 -0.044  0.113  0.017

model.2ab <- lmer(gktmathss ~ gkselfconcraw + classtype.fac + degree.fac + (1|gkschid), REML = FALSE, data = star.clean) 
summary(model.2ab)

## Linear mixed model fit by maximum likelihood  ['lmerMod']
## Formula: gktmathss ~ gkselfconcraw + classtype.fac + degree.fac + (1 |  
##     gkschid)
##    Data: star.clean
## 
##      AIC      BIC   logLik deviance df.resid 
##  49202.0  49260.2 -24592.0  49184.0     4747 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.3594 -0.6586 -0.0782  0.5862  4.3409 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  gkschid  (Intercept)  465.1   21.57   
##  Residual             1736.4   41.67   
## Number of obs: 4756, groups:  gkschid, 73
## 
## Fixed effects:
##                                   Estimate Std. Error t value
## (Intercept)                       466.0099     7.3119  63.733
## gkselfconcraw                       0.4775     0.1202   3.972
## classtype.facREGULAR CLASS         -9.1386     1.5455  -5.913
## classtype.facREGULAR + AIDE CLASS  -9.2654     1.5292  -6.059
## degree.facMASTERS                  -0.1069     1.5930  -0.067
## degree.facMASTERS +                -5.2694     4.2195  -1.249
## degree.facSPECIALIST               21.2655     8.6539   2.457
## 
## Correlation of Fixed Effects:
##             (Intr) gkslfc c.REGC c.R+AC d.MAST d.MAS+
## gkselfcncrw -0.924                                   
## c.REGULARCL -0.156  0.055                            
## c.REGULA+AC -0.150  0.046  0.538                     
## dgr.MASTERS -0.028 -0.036 -0.083 -0.078              
## dg.MASTERS+ -0.003 -0.018 -0.033 -0.013  0.143       
## d.SPECIALIS  0.000 -0.036  0.115  0.114  0.099  0.014

library(modelsummary)

## 
## Attaching package: 'modelsummary'

## The following object is masked from 'package:Hmisc':
## 
##     Mean

library(broom.mixed)

models <- list(model.null, model.1, model.2a, model.2b, model.2ab)
modelsummary(models)

	Model 1	Model 2	Model 3	Model 4	Model 5
(Intercept)	485.500	456.331	465.959	457.282	466.010
	(2.459)	(7.226)	(7.313)	(7.229)	(7.312)
gkselfconcraw		0.537	0.485	0.525	0.477
		(0.121)	(0.120)	(0.121)	(0.120)
classtype.facREGULAR CLASS			−9.662		−9.139
			(1.529)		(1.545)
classtype.facREGULAR + AIDE CLASS			−9.739		−9.265
			(1.514)		(1.529)
degree.facMASTERS				−1.098	−0.107
				(1.594)	(1.593)
degree.facMASTERS +				−6.005	−5.269
				(4.238)	(4.220)
degree.facSPECIALIST				28.969	21.265
				(8.621)	(8.654)
SD (Intercept gkschid)	21.231	21.460	21.543	21.535	21.567
SD (Observations)	42.736	41.943	41.705	41.878	41.670
Num.Obs.	5871	4756	4756	4756	4756
R2 Marg.	0.000	0.003	0.012	0.006	0.014
R2 Cond.	0.198	0.210	0.220	0.214	0.222
AIC	60985.9	49251.3	49197.4	49229.5	49182.1
BIC	61006.0	49277.2	49236.2	49274.8	49240.3
ICC	0.2	0.2	0.2	0.2	0.2
RMSE	42.46	41.64	41.40	41.57	41.37

Use the `augment` function from the `broom.mixed` Package to Get Predictions, Residuals, Cook’s Distances, Etc.:

library(broom.mixed)

diagnostics <- augment(model.2ab)

#Check normality of residuals via histogram

ggplot(data = diagnostics, mapping = aes(x = .resid)) +
  geom_histogram(binwidth = .25) + theme_classic() + 
  labs(title = "Histogram of Residuals for Math Score Model",
                      x = "Residual Value") +
  geom_vline(xintercept = c(-125, 125), linetype = "dotted")

Shapiro-Wilk Test

shapiro.test(diagnostics$.resid)

## 
##  Shapiro-Wilk normality test
## 
## data:  diagnostics$.resid
## W = 0.98919, p-value < 2.2e-16

**Suggests non-normality

Step 2: Use Residuals vs. Fitted (RVF) Plot to Assess Homoskedasticity of Errors

Basic RVF Plot, Without Groups

ggplot(data = diagnostics, mapping = aes(x = .fitted, y = .resid)) +
  geom_point() + labs(title = "RVF Plot for Math Scores Model",
                      x = "Predicted Value, Math Scores",
                      y = "Residual Value") + theme_classic()

Use Cook’s Distance to Identify Outliers

ggplot(data = diagnostics, mapping = aes(x = .fitted, y = .cooksd, label = gkschid)) +
  geom_point() + geom_text(nudge_x = .25) + theme_classic() + 
  labs(title = "Cook's Distance Plot for Math Scores Model",
                      x = "Predicted Value, Math Scores",
                      y = "Cook's Distance") + 
  geom_hline(yintercept = 4/4756, linetype = "dotted")

##RE-RUNNING WITH ROBUST STANDARD ERRORS

library(robustlmm)

## Warning: package 'robustlmm' was built under R version 4.2.2

model.robust <- rlmer(gktmathss ~ gkselfconcraw + classtype.fac + degree.fac + (1|gkschid), REML = FALSE, data = star.clean) 
summary(model.robust)

## Robust linear mixed model fit by DAStau 
## Formula: gktmathss ~ gkselfconcraw + classtype.fac + degree.fac + (1 |      gkschid) 
##    Data: star.clean 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.4841 -0.6398 -0.0407  0.6306  4.6094 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  gkschid  (Intercept)  471.5   21.71   
##  Residual             1624.9   40.31   
## Number of obs: 4756, groups: gkschid, 73
## 
## Fixed effects:
##                                   Estimate Std. Error t value
## (Intercept)                       463.3663     7.2912   63.55
## gkselfconcraw                       0.4916     0.1193    4.12
## classtype.facREGULAR CLASS         -7.7881     1.5335   -5.08
## classtype.facREGULAR + AIDE CLASS  -8.2842     1.5173   -5.46
## degree.facMASTERS                  -0.5478     1.5820   -0.35
## degree.facMASTERS +                -4.8965     4.1885   -1.17
## degree.facSPECIALIST               23.4328     8.5932    2.73
## 
## Correlation of Fixed Effects:
##             (Intr) gkslfc c.REGC c.R+AC d.MAST d.MAS+
## gkselfcncrw -0.919                                   
## c.REGULARCL -0.156  0.055                            
## c.REGULA+AC -0.150  0.046  0.538                     
## dgr.MASTERS -0.028 -0.036 -0.083 -0.078              
## dg.MASTERS+ -0.003 -0.018 -0.033 -0.013  0.143       
## d.SPECIALIS  0.000 -0.036  0.115  0.114  0.100  0.014
## 
## Robustness weights for the residuals: 
##  3779 weights are ~= 1. The remaining 977 ones are summarized as
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.292   0.638   0.817   0.778   0.931   0.999 
## 
## Robustness weights for the random effects: 
##  59 weights are ~= 1. The remaining 14 ones are summarized as
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.514   0.753   0.885   0.820   0.926   0.995 
## 
## Rho functions used for fitting:
##   Residuals:
##     eff: smoothed Huber (k = 1.345, s = 10) 
##     sig: smoothed Huber, Proposal 2 (k = 1.345, s = 10) 
##   Random Effects, variance component 1 (gkschid):
##     eff: smoothed Huber (k = 1.345, s = 10) 
##     vcp: smoothed Huber, Proposal 2 (k = 1.345, s = 10)

##RUNNING WITH OUTLIERS REMOVED (COOK’s DISTANCE >0.20)

star.trimmed <- diagnostics %>% 
  filter(., .cooksd <.20)

model.trimmed <- lmer(gktmathss ~ gkselfconcraw + classtype.fac + degree.fac + (1|gkschid), REML = FALSE, data=star.trimmed)
summary(model.trimmed)

## Linear mixed model fit by maximum likelihood  ['lmerMod']
## Formula: gktmathss ~ gkselfconcraw + classtype.fac + degree.fac + (1 |  
##     gkschid)
##    Data: star.trimmed
## 
##      AIC      BIC   logLik deviance df.resid 
##  49202.0  49260.2 -24592.0  49184.0     4747 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.3594 -0.6586 -0.0782  0.5862  4.3409 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  gkschid  (Intercept)  465.1   21.57   
##  Residual             1736.4   41.67   
## Number of obs: 4756, groups:  gkschid, 73
## 
## Fixed effects:
##                                   Estimate Std. Error t value
## (Intercept)                       466.0099     7.3119  63.733
## gkselfconcraw                       0.4775     0.1202   3.972
## classtype.facREGULAR CLASS         -9.1386     1.5455  -5.913
## classtype.facREGULAR + AIDE CLASS  -9.2654     1.5292  -6.059
## degree.facMASTERS                  -0.1069     1.5930  -0.067
## degree.facMASTERS +                -5.2694     4.2195  -1.249
## degree.facSPECIALIST               21.2655     8.6539   2.457
## 
## Correlation of Fixed Effects:
##             (Intr) gkslfc c.REGC c.R+AC d.MAST d.MAS+
## gkselfcncrw -0.924                                   
## c.REGULARCL -0.156  0.055                            
## c.REGULA+AC -0.150  0.046  0.538                     
## dgr.MASTERS -0.028 -0.036 -0.083 -0.078              
## dg.MASTERS+ -0.003 -0.018 -0.033 -0.013  0.143       
## d.SPECIALIS  0.000 -0.036  0.115  0.114  0.099  0.014

##CREATE TABLE COMPARING ORIGINAL FINAL MODEL, ROBUST MODEL, & TRIMMED MODEL

library(modelsummary)

models <- list(model.2ab, model.robust, model.trimmed)
modelsummary(models, output = "markdown")

	Model 1	Model 2	Model 3
(Intercept)	466.010	463.366	466.010
	(7.312)	(7.291)	(7.312)
gkselfconcraw	0.477	0.492	0.477
	(0.120)	(0.119)	(0.120)
classtype.facREGULAR CLASS	-9.139	-7.788	-9.139
	(1.545)	(1.533)	(1.545)
classtype.facREGULAR + AIDE CLASS	-9.265	-8.284	-9.265
	(1.529)	(1.517)	(1.529)
degree.facMASTERS	-0.107	-0.548	-0.107
	(1.593)	(1.582)	(1.593)
degree.facMASTERS +	-5.269	-4.896	-5.269
	(4.220)	(4.189)	(4.220)
degree.facSPECIALIST	21.265	23.433	21.265
	(8.654)	(8.593)	(8.654)
SD (Intercept gkschid)	21.567	21.714	21.567
SD (Observations)	41.670	40.310	41.670
Num.Obs.	4756	4756	4756
R2 Marg.	0.014	0.013	0.014
R2 Cond.	0.222	0.235	0.222
AIC	49182.1		49182.1
BIC	49240.3		49240.3
ICC	0.2	0.2	0.2
RMSE	41.37	41.45	41.37

Module 10 Content Review

Camie Tomlinson

2022-11-20

Use the `augment` function from the `broom.mixed` Package to Get Predictions, Residuals, Cook’s Distances, Etc.:

Shapiro-Wilk Test

Step 2: Use Residuals vs. Fitted (RVF) Plot to Assess Homoskedasticity of Errors

Basic RVF Plot, Without Groups

Use Cook’s Distance to Identify Outliers

Module 10 Content Review

Camie Tomlinson

2022-11-20

Use the augment function from the broom.mixed Package to Get Predictions, Residuals, Cook’s Distances, Etc.:

Shapiro-Wilk Test

Step 2: Use Residuals vs. Fitted (RVF) Plot to Assess Homoskedasticity of Errors

Basic RVF Plot, Without Groups

Use Cook’s Distance to Identify Outliers

Use the `augment` function from the `broom.mixed` Package to Get Predictions, Residuals, Cook’s Distances, Etc.: