This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.
Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Ctrl+Shift+Enter.
plot(cars)
library(tidyverse)
Registered S3 methods overwritten by 'dbplyr':
method from
print.tbl_lazy
print.tbl_sql
-- Attaching packages --------------------------------------- tidyverse 1.3.0 --
v ggplot2 3.3.2 v purrr 0.3.4
v tibble 3.0.3 v dplyr 1.0.2
v tidyr 1.1.1 v stringr 1.4.0
v readr 1.3.1 v forcats 0.5.0
-- Conflicts ------------------------------------------ tidyverse_conflicts() --
x dplyr::filter() masks stats::filter()
x dplyr::lag() masks stats::lag()
library(lme4)
Loading required package: Matrix
Attaching package: 㤼㸱Matrix㤼㸲
The following objects are masked from 㤼㸱package:tidyr㤼㸲:
expand, pack, unpack
library(Hmisc)
Loading required package: lattice
Loading required package: survival
Loading required package: Formula
Registered S3 method overwritten by 'htmlwidgets':
method from
print.htmlwidget tools:rstudio
Registered S3 method overwritten by 'data.table':
method from
print.data.table
Attaching package: 㤼㸱Hmisc㤼㸲
The following objects are masked from 㤼㸱package:dplyr㤼㸲:
src, summarize
The following objects are masked from 㤼㸱package:base㤼㸲:
format.pval, units
projectSTAR_1_ <- read_dta("EDUS 651/projectSTAR (1).dta")
glimpse(projectSTAR_1_)
Rows: 6,325
Columns: 27
$ stdntid <dbl> 10001, 10133, 10246, 10263, 10266, 10275, ...
$ race <dbl+lbl> 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 2,...
$ gender <dbl+lbl> 1, 1, 2, 2, 1, 1, 1, 1, 2, 1, 1, 1, 1,...
$ FLAGSGK <dbl+lbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
$ flaggk <dbl+lbl> 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
$ gkclasstype <dbl+lbl> 3, 3, 3, 1, 2, 3, 1, 3, 1, 2, 3, 3, 2,...
$ gkschid <dbl> 169229, 169280, 218562, 205492, 257899, 16...
$ gksurban <dbl+lbl> 2, 2, 4, 2, 3, 3, 2, 2, 3, 3, 3, 2, 2,...
$ gktchid <dbl> 16922904, 16928003, 21856202, 20549204, 25...
$ gktgen <dbl+lbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,...
$ gktrace <dbl+lbl> 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
$ gkthighdegree <dbl+lbl> 2, 2, 3, 2, 3, 2, 3, 3, 3, 3, 2, 2, 3,...
$ gktcareer <dbl+lbl> 4, 4, 4, 4, 4, NA, 4, 4, 4, 4...
$ gktyears <dbl> 5, 7, 8, 3, 12, 2, 7, 14, 4, 6, 11, 16, 12...
$ gkclasssize <dbl> 24, 22, 17, 17, 24, 24, 13, 24, 14, 23, 23...
$ gkfreelunch <dbl+lbl> 2, 2, 2, 1, 2, 2, 2, 1, 1, 2, 2, 2, 2,...
$ gkrepeat <dbl+lbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,...
$ gkspeced <dbl+lbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,...
$ gkspecin <dbl+lbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 1,...
$ gkpresent <dbl> 161, 175, 132, 178, 170, 94, 160, 154, 172...
$ gkabsent <dbl> 19, 5, 28, 2, 10, 3, 2, 7, 8, 2, 17, 8, 0,...
$ gktreadss <dbl> NA, 427, 450, 483, 456, 411, 443, 448, 463...
$ gktmathss <dbl> NA, 478, 494, 513, 513, 468, 473, 449, 520...
$ gktlistss <dbl> NA, 509, 549, 554, 520, 571, 595, 540, 565...
$ gkwordskillss <dbl> NA, 418, 444, 431, 468, 396, 444, 444, 480...
$ gkmotivraw <dbl> 23, 24, 28, 27, 25, 24, NA, NA, 26, 27, 24...
$ gkselfconcraw <dbl> 52, 53, 56, 61, 54, 55, NA, NA, 52, 61, 55...
clean.projectSTAR_1 <- projectSTAR_1_ %>%
mutate(.,
gkclasstype.fac = as_factor(gkclasstype),
gkthighdegree.fac = as_factor(gkthighdegree),
classid = gktchid)
glimpse(clean.projectSTAR_1)
Rows: 6,325
Columns: 30
$ stdntid <dbl> 10001, 10133, 10246, 10263, 10266, 102...
$ race <dbl+lbl> 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1...
$ gender <dbl+lbl> 1, 1, 2, 2, 1, 1, 1, 1, 2, 1, 1, 1...
$ FLAGSGK <dbl+lbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1...
$ flaggk <dbl+lbl> 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1...
$ gkclasstype <dbl+lbl> 3, 3, 3, 1, 2, 3, 1, 3, 1, 2, 3, 3...
$ gkschid <dbl> 169229, 169280, 218562, 205492, 257899...
$ gksurban <dbl+lbl> 2, 2, 4, 2, 3, 3, 2, 2, 3, 3, 3, 2...
$ gktchid <dbl> 16922904, 16928003, 21856202, 20549204...
$ gktgen <dbl+lbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2...
$ gktrace <dbl+lbl> 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1...
$ gkthighdegree <dbl+lbl> 2, 2, 3, 2, 3, 2, 3, 3, 3, 3, 2, 2...
$ gktcareer <dbl+lbl> 4, 4, 4, 4, 4, NA, 4, 4, 4...
$ gktyears <dbl> 5, 7, 8, 3, 12, 2, 7, 14, 4, 6, 11, 16...
$ gkclasssize <dbl> 24, 22, 17, 17, 24, 24, 13, 24, 14, 23...
$ gkfreelunch <dbl+lbl> 2, 2, 2, 1, 2, 2, 2, 1, 1, 2, 2, 2...
$ gkrepeat <dbl+lbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1...
$ gkspeced <dbl+lbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2...
$ gkspecin <dbl+lbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2...
$ gkpresent <dbl> 161, 175, 132, 178, 170, 94, 160, 154,...
$ gkabsent <dbl> 19, 5, 28, 2, 10, 3, 2, 7, 8, 2, 17, 8...
$ gktreadss <dbl> NA, 427, 450, 483, 456, 411, 443, 448,...
$ gktmathss <dbl> NA, 478, 494, 513, 513, 468, 473, 449,...
$ gktlistss <dbl> NA, 509, 549, 554, 520, 571, 595, 540,...
$ gkwordskillss <dbl> NA, 418, 444, 431, 468, 396, 444, 444,...
$ gkmotivraw <dbl> 23, 24, 28, 27, 25, 24, NA, NA, 26, 27...
$ gkselfconcraw <dbl> 52, 53, 56, 61, 54, 55, NA, NA, 52, 61...
$ gkclasstype.fac <fct> REGULAR + AIDE CLASS, REGULAR + AIDE C...
$ gkthighdegree.fac <fct> BACHELORS, BACHELORS, MASTERS, BACHELO...
$ classid <dbl> 16922904, 16928003, 21856202, 20549204...
1.Run a conditional random intercept model with math scores (gktmathss) as the DV, and self-concept (gkselfconcraw) as a student-level IV, and classroom type (gkclasstype) and teacher highest degree (gkthighdegree) as teacher/classroom-level IVs. Save these model results for later.
model.1 <- lmer(gktmathss ~ gkselfconcraw + gkclasstype.fac + gkthighdegree.fac +(1|classid), REML = FALSE, data = clean.projectSTAR_1)
summary(model.1)
Linear mixed model fit by maximum likelihood ['lmerMod']
Formula:
gktmathss ~ gkselfconcraw + gkclasstype.fac + gkthighdegree.fac +
(1 | classid)
Data: clean.projectSTAR_1
AIC BIC logLik deviance df.resid
49060.3 49118.5 -24521.1 49042.3 4747
Scaled residuals:
Min 1Q Median 3Q Max
-3.8420 -0.6386 -0.0638 0.5808 3.8959
Random effects:
Groups Name Variance Std.Dev.
classid (Intercept) 659.1 25.67
Residual 1550.2 39.37
Number of obs: 4756, groups: classid, 301
Fixed effects:
Estimate Std. Error t value
(Intercept) 466.5310 7.2874 64.019
gkselfconcraw 0.4449 0.1196 3.719
gkclasstype.facREGULAR CLASS -6.8528 3.8833 -1.765
gkclasstype.facREGULAR + AIDE CLASS -8.6284 3.8933 -2.216
gkthighdegree.facMASTERS 1.7043 3.5020 0.487
gkthighdegree.facMASTERS + 1.9140 9.9387 0.193
gkthighdegree.facSPECIALIST 23.2069 16.7318 1.387
Correlation of Fixed Effects:
(Intr) gkslfc g.REGC g.R+AC g.MAST g.MAS+
gkselfcncrw -0.924
g.REGULARCL -0.249 0.018
g.REGULA+AC -0.249 0.017 0.458
gkt.MASTERS -0.122 -0.008 -0.072 -0.066
gk.MASTERS+ -0.045 -0.005 -0.023 0.002 0.112
g.SPECIALIS -0.060 -0.005 0.101 0.101 0.056 0.022
library(broom.mixed)
Registered S3 method overwritten by 'broom.mixed':
method from
tidy.gamlss broom
diagnostics <- augment(model.1)
ggplot(data = diagnostics, mapping = aes(x = .resid)) +
geom_histogram(binwidth = .25) + theme_classic() +
labs(title = "Histogram of Residuals for Math Scores Model",
x = "Residual Value")
shapiro.test(diagnostics$.resid)
Shapiro-Wilk normality test
data: diagnostics$.resid
W = 0.99119, p-value < 2.2e-16
The residuals do not meet the assumptions of normalcy. The histogram is bimodal and the Sapiro-wilk normality test was significant meaning the null hypothesis that the residuals are normally distributed can be rejected.
ggplot(data = diagnostics, mapping = aes(x = .fitted, y = .resid)) +
geom_point() + labs(title = "RVF Plot for Math Scores Model",
x = "Predicted Value, Math Scores",
y = "Residual Value") + theme_classic()
There is funnenling which shows that there is heteroskedasticity. I also think there is a curve which is a sign of non-linearity.
ggplot(data = diagnostics, mapping = aes(x = .fitted, y = .cooksd, label = classid)) +
geom_point() + geom_text(nudge_x = .25) + theme_classic() +
labs(title = "Cook's Distance Plot for Classroom Math Scores Model",
x = "Predicted Value, Math Scores",
y = "Cook's Distance") +
geom_hline(yintercept = 4/6325, linetype = "dotted")
I can’t see the dotted line and there is only one part where the observations go from one side to the other. The cutoff of 4/6325 does not seem reasonable because it would exclude a large amount of data. There are a few outliers though perhaps a cutoff of a cook’s distance of 0.10 would be good. It is hard to tell becaue they are clumped together.
library(robustlmm)
package 㤼㸱robustlmm㤼㸲 was built under R version 4.0.3
model.robust <- rlmer(gktmathss ~ gkselfconcraw + gkclasstype.fac + gkthighdegree.fac + (1|classid), REML = FALSE, data = clean.projectSTAR_1)
summary(model.robust)