##Load packages
suppressPackageStartupMessages(library(tidyverse))
library(lme4)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
library(Hmisc)
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
##
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:dplyr':
##
## src, summarize
## The following objects are masked from 'package:base':
##
## format.pval, units
##PART 1: LOADING IN DATA & RECODING
#Load in Data
star <- haven::read_dta("projectSTAR.dta")
glimpse(star)
## Rows: 6,325
## Columns: 27
## $ stdntid <dbl> 10001, 10133, 10246, 10263, 10266, 10275, 10281, 10282, …
## $ race <dbl+lbl> 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1…
## $ gender <dbl+lbl> 1, 1, 2, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 2, 1, 2, 2…
## $ FLAGSGK <dbl+lbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ flaggk <dbl+lbl> 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1…
## $ gkclasstype <dbl+lbl> 3, 3, 3, 1, 2, 3, 1, 3, 1, 2, 3, 3, 2, 3, 3, 3, 1, 2…
## $ gkschid <dbl> 169229, 169280, 218562, 205492, 257899, 161176, 189382, …
## $ gksurban <dbl+lbl> 2, 2, 4, 2, 3, 3, 2, 2, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3…
## $ gktchid <dbl> 16922904, 16928003, 21856202, 20549204, 25789904, 161176…
## $ gktgen <dbl+lbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2…
## $ gktrace <dbl+lbl> 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1…
## $ gkthighdegree <dbl+lbl> 2, 2, 3, 2, 3, 2, 3, 3, 3, 3, 2, 2, 3, 2, 2, 2, 2, 2…
## $ gktcareer <dbl+lbl> 4, 4, 4, 4, 4, NA, 4, 4, 4, 4, 4, 4, 4, …
## $ gktyears <dbl> 5, 7, 8, 3, 12, 2, 7, 14, 4, 6, 11, 16, 12, 5, 17, 10, 6…
## $ gkclasssize <dbl> 24, 22, 17, 17, 24, 24, 13, 24, 14, 23, 23, 22, 20, 24, …
## $ gkfreelunch <dbl+lbl> 2, 2, 2, 1, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 1, 2…
## $ gkrepeat <dbl+lbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ gkspeced <dbl+lbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2…
## $ gkspecin <dbl+lbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2…
## $ gkpresent <dbl> 161, 175, 132, 178, 170, 94, 160, 154, 172, 95, 163, 172…
## $ gkabsent <dbl> 19, 5, 28, 2, 10, 3, 2, 7, 8, 2, 17, 8, 0, 31, 7, NA, 20…
## $ gktreadss <dbl> NA, 427, 450, 483, 456, 411, 443, 448, 463, 472, 428, 54…
## $ gktmathss <dbl> NA, 478, 494, 513, 513, 468, 473, 449, 520, 536, 484, 62…
## $ gktlistss <dbl> NA, 509, 549, 554, 520, 571, 595, 540, 565, 595, NA, 622…
## $ gkwordskillss <dbl> NA, 418, 444, 431, 468, 396, 444, 444, 480, 486, 423, 52…
## $ gkmotivraw <dbl> 23, 24, 28, 27, 25, 24, NA, NA, 26, 27, 24, 24, 23, 28, …
## $ gkselfconcraw <dbl> 52, 53, 56, 61, 54, 55, NA, NA, 52, 61, 55, 49, 49, 59, …
str((star))
## tibble [6,325 × 27] (S3: tbl_df/tbl/data.frame)
## $ stdntid : num [1:6325] 10001 10133 10246 10263 10266 ...
## ..- attr(*, "label")= chr "STUDENT ID"
## ..- attr(*, "format.stata")= chr "%10.0g"
## $ race : dbl+lbl [1:6325] 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1,...
## ..@ label : chr "STUDENT RACE/ETHNICITY"
## ..@ format.stata: chr "%10.0g"
## ..@ labels : Named num [1:6] 1 2 3 4 5 6
## .. ..- attr(*, "names")= chr [1:6] "WHITE" "BLACK" "ASIAN" "HISPANIC" ...
## $ gender : dbl+lbl [1:6325] 1, 1, 2, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 2, 1, 2, 2,...
## ..@ label : chr "STUDENT GENDER"
## ..@ format.stata: chr "%10.0g"
## ..@ labels : Named num [1:2] 1 2
## .. ..- attr(*, "names")= chr [1:2] "MALE" "FEMALE"
## $ FLAGSGK : dbl+lbl [1:6325] 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
## ..@ label : chr "IN STAR IN KINDERGARTEN"
## ..@ format.stata: chr "%10.0g"
## ..@ labels : Named num [1:2] 0 1
## .. ..- attr(*, "names")= chr [1:2] "NO" "YES"
## $ flaggk : dbl+lbl [1:6325] 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1,...
## ..@ label : chr "ACHIEVEMENT DATA AVAILABLE GRADE K"
## ..@ format.stata: chr "%10.0g"
## ..@ labels : Named num [1:2] 0 1
## .. ..- attr(*, "names")= chr [1:2] "NO" "YES"
## $ gkclasstype : dbl+lbl [1:6325] 3, 3, 3, 1, 2, 3, 1, 3, 1, 2, 3, 3, 2, 3, 3, 3, 1, 2,...
## ..@ label : chr "CLASSROOM TYPE KINDERGARTEN"
## ..@ format.stata: chr "%10.0g"
## ..@ labels : Named num [1:3] 1 2 3
## .. ..- attr(*, "names")= chr [1:3] "SMALL CLASS" "REGULAR CLASS" "REGULAR + AIDE CLASS"
## $ gkschid : num [1:6325] 169229 169280 218562 205492 257899 ...
## ..- attr(*, "label")= chr "KINDERGARTEN SCHOOL ID"
## ..- attr(*, "format.stata")= chr "%10.0g"
## $ gksurban : dbl+lbl [1:6325] 2, 2, 4, 2, 3, 3, 2, 2, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3,...
## ..@ label : chr "SCHOOL URBANICITY KINDERGARTEN"
## ..@ format.stata: chr "%10.0g"
## ..@ labels : Named num [1:4] 1 2 3 4
## .. ..- attr(*, "names")= chr [1:4] "INNER CITY" "SUBURBAN" "RURAL" "URBAN"
## $ gktchid : num [1:6325] 16922904 16928003 21856202 20549204 25789904 ...
## ..- attr(*, "label")= chr "KINDERGARTEN TEACHER ID"
## ..- attr(*, "format.stata")= chr "%10.0g"
## $ gktgen : dbl+lbl [1:6325] 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,...
## ..@ label : chr "TEACHER GENDER KINDERGARTEN"
## ..@ format.stata: chr "%10.0g"
## ..@ labels : Named num [1:2] 1 2
## .. ..- attr(*, "names")= chr [1:2] "MALE" "FEMALE"
## $ gktrace : dbl+lbl [1:6325] 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, ...
## ..@ label : chr "TEACHER RACE/ETHNICITY KINDERGARTEN"
## ..@ format.stata: chr "%10.0g"
## ..@ labels : Named num [1:6] 1 2 3 4 5 6
## .. ..- attr(*, "names")= chr [1:6] "WHITE" "BLACK" "ASIAN" "HISPANIC" ...
## $ gkthighdegree: dbl+lbl [1:6325] 2, 2, 3, 2, 3, 2, 3, 3, 3, 3, 2, 2, 3, ...
## ..@ label : chr "TEACHER HIGHEST DEGREE KINDERGARTEN"
## ..@ format.stata: chr "%10.0g"
## ..@ labels : Named num [1:6] 1 2 3 4 5 6
## .. ..- attr(*, "names")= chr [1:6] "ASSOCIATES" "BACHELORS" "MASTERS" "MASTERS +" ...
## $ gktcareer : dbl+lbl [1:6325] 4, 4, 4, 4, 4, NA, 4, 4, 4, 4, 4, 4, 4, ...
## ..@ label : chr "TEACHER CAREER LADDER LEVEL KINDERGARTEN"
## ..@ format.stata: chr "%10.0g"
## ..@ labels : Named num [1:7] 1 2 3 4 5 6 7
## .. ..- attr(*, "names")= chr [1:7] "CHOSE NO TO BE ON CAREER LADDER" "APPRENTICE" "PROBATION" "LADDER LEVEL 1" ...
## $ gktyears : num [1:6325] 5 7 8 3 12 2 7 14 4 6 ...
## ..- attr(*, "label")= chr "YEARS OF TOTAL TEACHING EXPERIENCE KINDERGARTEN"
## ..- attr(*, "format.stata")= chr "%10.0g"
## $ gkclasssize : num [1:6325] 24 22 17 17 24 24 13 24 14 23 ...
## ..- attr(*, "label")= chr "CLASS SIZE KINDERGARTEN"
## ..- attr(*, "format.stata")= chr "%10.0g"
## $ gkfreelunch : dbl+lbl [1:6325] 2, 2, 2, 1, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 1, 2,...
## ..@ label : chr "FREE/REDUCED LUNCH STATUS KINDERGARTEN"
## ..@ format.stata: chr "%10.0g"
## ..@ labels : Named num [1:2] 1 2
## .. ..- attr(*, "names")= chr [1:2] "FREE LUNCH" "NON-FREE LUNCH"
## $ gkrepeat : dbl+lbl [1:6325] 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
## ..@ label : chr "REPEATING KINDERGARTEN IN 1985-1986 SCHOOL YEAR"
## ..@ format.stata: chr "%10.0g"
## ..@ labels : Named num [1:2] 1 2
## .. ..- attr(*, "names")= chr [1:2] "YES, PROMOTION RECOMMENDED" "NO, PROMOTION NOT RECOMMENDED"
## $ gkspeced : dbl+lbl [1:6325] 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,...
## ..@ label : chr "SPECIAL EDUCATION STATUS KINDERGARTEN"
## ..@ format.stata: chr "%10.0g"
## ..@ labels : Named num [1:2] 1 2
## .. ..- attr(*, "names")= chr [1:2] "YES" "NO"
## $ gkspecin : dbl+lbl [1:6325] 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2,...
## ..@ label : chr "PULLED OUT FOR SPECIAL INSTRUCTION KINDERGARTEN"
## ..@ format.stata: chr "%10.0g"
## ..@ labels : Named num [1:2] 1 2
## .. ..- attr(*, "names")= chr [1:2] "YES" "NO"
## $ gkpresent : num [1:6325] 161 175 132 178 170 94 160 154 172 95 ...
## ..- attr(*, "label")= chr "DAYS PRESENT AT SCHOOL KINDERGARTEN"
## ..- attr(*, "format.stata")= chr "%10.0g"
## $ gkabsent : num [1:6325] 19 5 28 2 10 3 2 7 8 2 ...
## ..- attr(*, "label")= chr "DAYS ABSENT FROM SCHOOL KINDERGARTEN"
## ..- attr(*, "format.stata")= chr "%10.0g"
## $ gktreadss : num [1:6325] NA 427 450 483 456 411 443 448 463 472 ...
## ..- attr(*, "label")= chr "TOTAL READING SCALE SCORE SAT KINDERGARTEN"
## ..- attr(*, "format.stata")= chr "%10.0g"
## $ gktmathss : num [1:6325] NA 478 494 513 513 468 473 449 520 536 ...
## ..- attr(*, "label")= chr "TOTAL MATH SCALE SCORE SAT KINDERGARTEN"
## ..- attr(*, "format.stata")= chr "%10.0g"
## $ gktlistss : num [1:6325] NA 509 549 554 520 571 595 540 565 595 ...
## ..- attr(*, "label")= chr "TOTAL LISTENING SCALE SCORE SAT KINDERGARTEN"
## ..- attr(*, "format.stata")= chr "%10.0g"
## $ gkwordskillss: num [1:6325] NA 418 444 431 468 396 444 444 480 486 ...
## ..- attr(*, "label")= chr "WORD STUDY SKILLS SCALE SCORE SAT KINDERGARTEN"
## ..- attr(*, "format.stata")= chr "%10.0g"
## $ gkmotivraw : num [1:6325] 23 24 28 27 25 24 NA NA 26 27 ...
## ..- attr(*, "label")= chr "MOTIVATION RAW SCORE SCAMIN KINDERGARTEN"
## ..- attr(*, "format.stata")= chr "%10.0g"
## $ gkselfconcraw: num [1:6325] 52 53 56 61 54 55 NA NA 52 61 ...
## ..- attr(*, "label")= chr "SELF-CONCEPT RAW SCORE SCAMIN KINDERGARTEN"
## ..- attr(*, "format.stata")= chr "%10.0g"
#Data cleaning
star.clean <- star %>%
mutate(.,
classtype.fac = as_factor(gkclasstype),
degree.fac = as_factor(gkthighdegree))
glimpse(star.clean)
## Rows: 6,325
## Columns: 29
## $ stdntid <dbl> 10001, 10133, 10246, 10263, 10266, 10275, 10281, 10282, …
## $ race <dbl+lbl> 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1…
## $ gender <dbl+lbl> 1, 1, 2, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1, 1, 2, 1, 2, 2…
## $ FLAGSGK <dbl+lbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ flaggk <dbl+lbl> 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1…
## $ gkclasstype <dbl+lbl> 3, 3, 3, 1, 2, 3, 1, 3, 1, 2, 3, 3, 2, 3, 3, 3, 1, 2…
## $ gkschid <dbl> 169229, 169280, 218562, 205492, 257899, 161176, 189382, …
## $ gksurban <dbl+lbl> 2, 2, 4, 2, 3, 3, 2, 2, 3, 3, 3, 2, 2, 2, 2, 3, 3, 3…
## $ gktchid <dbl> 16922904, 16928003, 21856202, 20549204, 25789904, 161176…
## $ gktgen <dbl+lbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2…
## $ gktrace <dbl+lbl> 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 1, 1, 1, 1…
## $ gkthighdegree <dbl+lbl> 2, 2, 3, 2, 3, 2, 3, 3, 3, 3, 2, 2, 3, 2, 2, 2, 2, 2…
## $ gktcareer <dbl+lbl> 4, 4, 4, 4, 4, NA, 4, 4, 4, 4, 4, 4, 4, …
## $ gktyears <dbl> 5, 7, 8, 3, 12, 2, 7, 14, 4, 6, 11, 16, 12, 5, 17, 10, 6…
## $ gkclasssize <dbl> 24, 22, 17, 17, 24, 24, 13, 24, 14, 23, 23, 22, 20, 24, …
## $ gkfreelunch <dbl+lbl> 2, 2, 2, 1, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 2, 1, 2…
## $ gkrepeat <dbl+lbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ gkspeced <dbl+lbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2…
## $ gkspecin <dbl+lbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1, 2, 2, 2, 2, 2…
## $ gkpresent <dbl> 161, 175, 132, 178, 170, 94, 160, 154, 172, 95, 163, 172…
## $ gkabsent <dbl> 19, 5, 28, 2, 10, 3, 2, 7, 8, 2, 17, 8, 0, 31, 7, NA, 20…
## $ gktreadss <dbl> NA, 427, 450, 483, 456, 411, 443, 448, 463, 472, 428, 54…
## $ gktmathss <dbl> NA, 478, 494, 513, 513, 468, 473, 449, 520, 536, 484, 62…
## $ gktlistss <dbl> NA, 509, 549, 554, 520, 571, 595, 540, 565, 595, NA, 622…
## $ gkwordskillss <dbl> NA, 418, 444, 431, 468, 396, 444, 444, 480, 486, 423, 52…
## $ gkmotivraw <dbl> 23, 24, 28, 27, 25, 24, NA, NA, 26, 27, 24, 24, 23, 28, …
## $ gkselfconcraw <dbl> 52, 53, 56, 61, 54, 55, NA, NA, 52, 61, 55, 49, 49, 59, …
## $ classtype.fac <fct> REGULAR + AIDE CLASS, REGULAR + AIDE CLASS, REGULAR + AI…
## $ degree.fac <fct> BACHELORS, BACHELORS, MASTERS, BACHELORS, MASTERS, BACHE…
summary(star.clean$degree.fac)
## ASSOCIATES BACHELORS MASTERS MASTERS + SPECIALIST DOCTORAL NA's
## 0 4119 1981 161 43 0 21
summary(star.clean$classtype.fac)
## SMALL CLASS REGULAR CLASS REGULAR + AIDE CLASS
## 1900 2194 2231
summary(star.clean$gktmathss)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 288.0 454.0 484.0 485.4 513.0 626.0 454
##MODEL BUILDING
#Step 1: Null model
model.null <- lmer(gktmathss ~ (1|gkschid), REML = FALSE, data = star.clean)
summary(model.null)
## Linear mixed model fit by maximum likelihood ['lmerMod']
## Formula: gktmathss ~ (1 | gkschid)
## Data: star.clean
##
## AIC BIC logLik deviance df.resid
## 60989.6 61009.6 -30491.8 60983.6 5868
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.6470 -0.6540 -0.0636 0.5819 4.3669
##
## Random effects:
## Groups Name Variance Std.Dev.
## gkschid (Intercept) 450.8 21.23
## Residual 1826.4 42.74
## Number of obs: 5871, groups: gkschid, 79
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 485.500 2.459 197.5
null.ICC <- 450.8/(450.8+1826.4)
null.ICC
## [1] 0.1979624
#L1 predictor
model.1 <- lmer(gktmathss ~ gkselfconcraw + (1|gkschid), REML = FALSE, data = star.clean)
summary(model.1)
## Linear mixed model fit by maximum likelihood ['lmerMod']
## Formula: gktmathss ~ gkselfconcraw + (1 | gkschid)
## Data: star.clean
##
## AIC BIC logLik deviance df.resid
## 49252.7 49278.5 -24622.3 49244.7 4752
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.3826 -0.6680 -0.0776 0.5780 4.4658
##
## Random effects:
## Groups Name Variance Std.Dev.
## gkschid (Intercept) 460.5 21.46
## Residual 1759.2 41.94
## Number of obs: 4756, groups: gkschid, 73
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 456.3309 7.2258 63.153
## gkselfconcraw 0.5371 0.1206 4.453
##
## Correlation of Fixed Effects:
## (Intr)
## gkselfcncrw -0.933
#L2 predictors
model.2a <- lmer(gktmathss ~ gkselfconcraw + classtype.fac + (1|gkschid), REML = FALSE, data = star.clean)
summary(model.2a)
## Linear mixed model fit by maximum likelihood ['lmerMod']
## Formula: gktmathss ~ gkselfconcraw + classtype.fac + (1 | gkschid)
## Data: star.clean
##
## AIC BIC logLik deviance df.resid
## 49203.7 49242.5 -24595.9 49191.7 4750
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.3557 -0.6580 -0.0825 0.5803 4.3301
##
## Random effects:
## Groups Name Variance Std.Dev.
## gkschid (Intercept) 464.1 21.54
## Residual 1739.3 41.70
## Number of obs: 4756, groups: gkschid, 73
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 465.9590 7.3134 63.713
## gkselfconcraw 0.4849 0.1201 4.036
## classtype.facREGULAR CLASS -9.6617 1.5291 -6.319
## classtype.facREGULAR + AIDE CLASS -9.7393 1.5144 -6.431
##
## Correlation of Fixed Effects:
## (Intr) gkslfc c.REGC
## gkselfcncrw -0.926
## c.REGULARCL -0.161 0.056
## c.REGULA+AC -0.154 0.048 0.527
model.2b <- lmer(gktmathss ~ gkselfconcraw + degree.fac + (1|gkschid), REML = FALSE, data = star.clean)
summary(model.2b)
## Linear mixed model fit by maximum likelihood ['lmerMod']
## Formula: gktmathss ~ gkselfconcraw + degree.fac + (1 | gkschid)
## Data: star.clean
##
## AIC BIC logLik deviance df.resid
## 49244.4 49289.7 -24615.2 49230.4 4749
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.3872 -0.6683 -0.0720 0.5802 4.4658
##
## Random effects:
## Groups Name Variance Std.Dev.
## gkschid (Intercept) 463.8 21.54
## Residual 1753.7 41.88
## Number of obs: 4756, groups: gkschid, 73
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 457.2823 7.2292 63.255
## gkselfconcraw 0.5246 0.1206 4.351
## degree.facMASTERS -1.0985 1.5939 -0.689
## degree.facMASTERS + -6.0048 4.2379 -1.417
## degree.facSPECIALIST 28.9689 8.6214 3.360
##
## Correlation of Fixed Effects:
## (Intr) gkslfc d.MAST d.MAS+
## gkselfcncrw -0.930
## dgr.MASTERS -0.045 -0.031
## dg.MASTERS+ -0.008 -0.017 0.141
## d.SPECIALIS 0.023 -0.044 0.113 0.017
model.2ab <- lmer(gktmathss ~ gkselfconcraw + classtype.fac + degree.fac + (1|gkschid), REML = FALSE, data = star.clean)
summary(model.2ab)
## Linear mixed model fit by maximum likelihood ['lmerMod']
## Formula: gktmathss ~ gkselfconcraw + classtype.fac + degree.fac + (1 |
## gkschid)
## Data: star.clean
##
## AIC BIC logLik deviance df.resid
## 49202.0 49260.2 -24592.0 49184.0 4747
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.3594 -0.6586 -0.0782 0.5862 4.3409
##
## Random effects:
## Groups Name Variance Std.Dev.
## gkschid (Intercept) 465.1 21.57
## Residual 1736.4 41.67
## Number of obs: 4756, groups: gkschid, 73
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 466.0099 7.3119 63.733
## gkselfconcraw 0.4775 0.1202 3.972
## classtype.facREGULAR CLASS -9.1386 1.5455 -5.913
## classtype.facREGULAR + AIDE CLASS -9.2654 1.5292 -6.059
## degree.facMASTERS -0.1069 1.5930 -0.067
## degree.facMASTERS + -5.2694 4.2195 -1.249
## degree.facSPECIALIST 21.2655 8.6539 2.457
##
## Correlation of Fixed Effects:
## (Intr) gkslfc c.REGC c.R+AC d.MAST d.MAS+
## gkselfcncrw -0.924
## c.REGULARCL -0.156 0.055
## c.REGULA+AC -0.150 0.046 0.538
## dgr.MASTERS -0.028 -0.036 -0.083 -0.078
## dg.MASTERS+ -0.003 -0.018 -0.033 -0.013 0.143
## d.SPECIALIS 0.000 -0.036 0.115 0.114 0.099 0.014
library(modelsummary)
##
## Attaching package: 'modelsummary'
## The following object is masked from 'package:Hmisc':
##
## Mean
library(broom.mixed)
models <- list(model.null, model.1, model.2a, model.2b, model.2ab)
modelsummary(models)
| Model 1 | Model 2 | Model 3 | Model 4 | Model 5 | |
|---|---|---|---|---|---|
| (Intercept) | 485.500 | 456.331 | 465.959 | 457.282 | 466.010 |
| (2.459) | (7.226) | (7.313) | (7.229) | (7.312) | |
| gkselfconcraw | 0.537 | 0.485 | 0.525 | 0.477 | |
| (0.121) | (0.120) | (0.121) | (0.120) | ||
| classtype.facREGULAR CLASS | −9.662 | −9.139 | |||
| (1.529) | (1.545) | ||||
| classtype.facREGULAR + AIDE CLASS | −9.739 | −9.265 | |||
| (1.514) | (1.529) | ||||
| degree.facMASTERS | −1.098 | −0.107 | |||
| (1.594) | (1.593) | ||||
| degree.facMASTERS + | −6.005 | −5.269 | |||
| (4.238) | (4.220) | ||||
| degree.facSPECIALIST | 28.969 | 21.265 | |||
| (8.621) | (8.654) | ||||
| SD (Intercept gkschid) | 21.231 | 21.460 | 21.543 | 21.535 | 21.567 |
| SD (Observations) | 42.736 | 41.943 | 41.705 | 41.878 | 41.670 |
| Num.Obs. | 5871 | 4756 | 4756 | 4756 | 4756 |
| R2 Marg. | 0.000 | 0.003 | 0.012 | 0.006 | 0.014 |
| R2 Cond. | 0.198 | 0.210 | 0.220 | 0.214 | 0.222 |
| AIC | 60985.9 | 49251.3 | 49197.4 | 49229.5 | 49182.1 |
| BIC | 61006.0 | 49277.2 | 49236.2 | 49274.8 | 49240.3 |
| ICC | 0.2 | 0.2 | 0.2 | 0.2 | 0.2 |
| RMSE | 42.46 | 41.64 | 41.40 | 41.57 | 41.37 |
augment function from the
broom.mixed Package to Get Predictions, Residuals, Cook’s
Distances, Etc.:library(broom.mixed)
diagnostics <- augment(model.2ab)
#Check normality of residuals via histogram
ggplot(data = diagnostics, mapping = aes(x = .resid)) +
geom_histogram(binwidth = .25) + theme_classic() +
labs(title = "Histogram of Residuals for Math Score Model",
x = "Residual Value") +
geom_vline(xintercept = c(-125, 125), linetype = "dotted")
shapiro.test(diagnostics$.resid)
##
## Shapiro-Wilk normality test
##
## data: diagnostics$.resid
## W = 0.98919, p-value < 2.2e-16
**Suggests non-normality
ggplot(data = diagnostics, mapping = aes(x = .fitted, y = .resid)) +
geom_point() + labs(title = "RVF Plot for Math Scores Model",
x = "Predicted Value, Math Scores",
y = "Residual Value") + theme_classic()
ggplot(data = diagnostics, mapping = aes(x = .fitted, y = .cooksd, label = gkschid)) +
geom_point() + geom_text(nudge_x = .25) + theme_classic() +
labs(title = "Cook's Distance Plot for Math Scores Model",
x = "Predicted Value, Math Scores",
y = "Cook's Distance") +
geom_hline(yintercept = 4/4756, linetype = "dotted")
##RE-RUNNING WITH ROBUST STANDARD ERRORS
library(robustlmm)
## Warning: package 'robustlmm' was built under R version 4.2.2
model.robust <- rlmer(gktmathss ~ gkselfconcraw + classtype.fac + degree.fac + (1|gkschid), REML = FALSE, data = star.clean)
summary(model.robust)
## Robust linear mixed model fit by DAStau
## Formula: gktmathss ~ gkselfconcraw + classtype.fac + degree.fac + (1 | gkschid)
## Data: star.clean
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.4841 -0.6398 -0.0407 0.6306 4.6094
##
## Random effects:
## Groups Name Variance Std.Dev.
## gkschid (Intercept) 471.5 21.71
## Residual 1624.9 40.31
## Number of obs: 4756, groups: gkschid, 73
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 463.3663 7.2912 63.55
## gkselfconcraw 0.4916 0.1193 4.12
## classtype.facREGULAR CLASS -7.7881 1.5335 -5.08
## classtype.facREGULAR + AIDE CLASS -8.2842 1.5173 -5.46
## degree.facMASTERS -0.5478 1.5820 -0.35
## degree.facMASTERS + -4.8965 4.1885 -1.17
## degree.facSPECIALIST 23.4328 8.5932 2.73
##
## Correlation of Fixed Effects:
## (Intr) gkslfc c.REGC c.R+AC d.MAST d.MAS+
## gkselfcncrw -0.919
## c.REGULARCL -0.156 0.055
## c.REGULA+AC -0.150 0.046 0.538
## dgr.MASTERS -0.028 -0.036 -0.083 -0.078
## dg.MASTERS+ -0.003 -0.018 -0.033 -0.013 0.143
## d.SPECIALIS 0.000 -0.036 0.115 0.114 0.100 0.014
##
## Robustness weights for the residuals:
## 3779 weights are ~= 1. The remaining 977 ones are summarized as
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.292 0.638 0.817 0.778 0.931 0.999
##
## Robustness weights for the random effects:
## 59 weights are ~= 1. The remaining 14 ones are summarized as
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.514 0.753 0.885 0.820 0.926 0.995
##
## Rho functions used for fitting:
## Residuals:
## eff: smoothed Huber (k = 1.345, s = 10)
## sig: smoothed Huber, Proposal 2 (k = 1.345, s = 10)
## Random Effects, variance component 1 (gkschid):
## eff: smoothed Huber (k = 1.345, s = 10)
## vcp: smoothed Huber, Proposal 2 (k = 1.345, s = 10)
##RUNNING WITH OUTLIERS REMOVED (COOK’s DISTANCE >0.20)
star.trimmed <- diagnostics %>%
filter(., .cooksd <.20)
model.trimmed <- lmer(gktmathss ~ gkselfconcraw + classtype.fac + degree.fac + (1|gkschid), REML = FALSE, data=star.trimmed)
summary(model.trimmed)
## Linear mixed model fit by maximum likelihood ['lmerMod']
## Formula: gktmathss ~ gkselfconcraw + classtype.fac + degree.fac + (1 |
## gkschid)
## Data: star.trimmed
##
## AIC BIC logLik deviance df.resid
## 49202.0 49260.2 -24592.0 49184.0 4747
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.3594 -0.6586 -0.0782 0.5862 4.3409
##
## Random effects:
## Groups Name Variance Std.Dev.
## gkschid (Intercept) 465.1 21.57
## Residual 1736.4 41.67
## Number of obs: 4756, groups: gkschid, 73
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 466.0099 7.3119 63.733
## gkselfconcraw 0.4775 0.1202 3.972
## classtype.facREGULAR CLASS -9.1386 1.5455 -5.913
## classtype.facREGULAR + AIDE CLASS -9.2654 1.5292 -6.059
## degree.facMASTERS -0.1069 1.5930 -0.067
## degree.facMASTERS + -5.2694 4.2195 -1.249
## degree.facSPECIALIST 21.2655 8.6539 2.457
##
## Correlation of Fixed Effects:
## (Intr) gkslfc c.REGC c.R+AC d.MAST d.MAS+
## gkselfcncrw -0.924
## c.REGULARCL -0.156 0.055
## c.REGULA+AC -0.150 0.046 0.538
## dgr.MASTERS -0.028 -0.036 -0.083 -0.078
## dg.MASTERS+ -0.003 -0.018 -0.033 -0.013 0.143
## d.SPECIALIS 0.000 -0.036 0.115 0.114 0.099 0.014
##CREATE TABLE COMPARING ORIGINAL FINAL MODEL, ROBUST MODEL, & TRIMMED MODEL
library(modelsummary)
models <- list(model.2ab, model.robust, model.trimmed)
modelsummary(models, output = "markdown")
| Model 1 | Model 2 | Model 3 | |
|---|---|---|---|
| (Intercept) | 466.010 | 463.366 | 466.010 |
| (7.312) | (7.291) | (7.312) | |
| gkselfconcraw | 0.477 | 0.492 | 0.477 |
| (0.120) | (0.119) | (0.120) | |
| classtype.facREGULAR CLASS | -9.139 | -7.788 | -9.139 |
| (1.545) | (1.533) | (1.545) | |
| classtype.facREGULAR + AIDE CLASS | -9.265 | -8.284 | -9.265 |
| (1.529) | (1.517) | (1.529) | |
| degree.facMASTERS | -0.107 | -0.548 | -0.107 |
| (1.593) | (1.582) | (1.593) | |
| degree.facMASTERS + | -5.269 | -4.896 | -5.269 |
| (4.220) | (4.189) | (4.220) | |
| degree.facSPECIALIST | 21.265 | 23.433 | 21.265 |
| (8.654) | (8.593) | (8.654) | |
| SD (Intercept gkschid) | 21.567 | 21.714 | 21.567 |
| SD (Observations) | 41.670 | 40.310 | 41.670 |
| Num.Obs. | 4756 | 4756 | 4756 |
| R2 Marg. | 0.014 | 0.013 | 0.014 |
| R2 Cond. | 0.222 | 0.235 | 0.222 |
| AIC | 49182.1 | 49182.1 | |
| BIC | 49240.3 | 49240.3 | |
| ICC | 0.2 | 0.2 | 0.2 |
| RMSE | 41.37 | 41.45 | 41.37 |