Using the variable READING as the dependent variable, use GENDER as a level-1 predictor, and use MeanSES as a level-2 predictor.
my_data <-read.table("~/Desktop/ecls.txt", fileEncoding="UTF-8-BOM", na.strings=c("","NA", "."))
names(my_data)[names(my_data) == "V1"] <- "childid"
names(my_data)[names(my_data) == "V2"] <- "S_ID"
names(my_data)[names(my_data) == "V3"] <- "gender"
names(my_data)[names(my_data) == "V4"] <- "math"
names(my_data)[names(my_data) == "V5"] <- "reading"
names(my_data)[names(my_data) == "V6"] <- "meanses"
names(my_data)[names(my_data) == "V7"] <- "pubpri"
names(my_data)[names(my_data) == "V8"] <- "risknum"
names(my_data)[names(my_data) == "V9"] <- "sample"
names(my_data)[names(my_data) == "V10"] <- "timemos"
colnames(my_data)
## [1] "childid" "S_ID" "gender" "math" "reading" "meanses" "pubpri"
## [8] "risknum" "sample" "timemos"
my_data$childid<-as.factor(my_data$childid)
my_data$S_ID<-as.factor(my_data$S_ID)
my_data$gender<-as.numeric(my_data$gender)
my_data$math<-as.numeric(my_data$math)
my_data$reading<-as.numeric(my_data$reading)
my_data$meanses<-as.numeric(my_data$meanses)
## Warning: NAs introduced by coercion
my_data$pubpri<-as.numeric(my_data$pubpri)
my_data$risknum<-as.numeric(my_data$risknum)
my_data$sample<-as.numeric(my_data$sample)
my_data$timemos<-as.numeric(my_data$timemos)
str(my_data)
## 'data.frame': 839 obs. of 10 variables:
## $ childid: Factor w/ 839 levels "15001","15003",..: 1 2 3 4 5 6 7 8 9 10 ...
## $ S_ID : Factor w/ 60 levels "15","37","53",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ gender : num 1 1 1 1 0 1 0 1 1 0 ...
## $ math : num 39.9 49.4 31.7 44.6 28.7 ...
## $ reading: num 54.2 48.2 42.8 47.2 46.3 ...
## $ meanses: num -0.18 -0.18 -0.18 -0.18 -0.18 -0.18 -0.18 -0.18 -0.18 -0.18 ...
## $ pubpri : num 0 0 0 0 0 0 0 0 0 0 ...
## $ risknum: num 0 0 1 0 1 0 0 0 1 0 ...
## $ sample : num 4 4 4 4 4 4 4 4 4 4 ...
## $ timemos: num 20 20 20 20 20 20 20 20 20 20 ...
mod_0 <- lmer(reading ~ 1 + (1 |S_ID), data = my_data, REML = TRUE)
summary(mod_0)
## Linear mixed model fit by REML ['lmerMod']
## Formula: reading ~ 1 + (1 | S_ID)
## Data: my_data
##
## REML criterion at convergence: 6501.7
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.94590 -0.71673 -0.01708 0.69958 2.85534
##
## Random effects:
## Groups Name Variance Std.Dev.
## S_ID (Intercept) 51.02 7.142
## Residual 136.58 11.687
## Number of obs: 825, groups: S_ID, 60
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 56.787 1.015 55.94
summary(mod_0)
## Linear mixed model fit by REML ['lmerMod']
## Formula: reading ~ 1 + (1 | S_ID)
## Data: my_data
##
## REML criterion at convergence: 6501.7
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.94590 -0.71673 -0.01708 0.69958 2.85534
##
## Random effects:
## Groups Name Variance Std.Dev.
## S_ID (Intercept) 51.02 7.142
## Residual 136.58 11.687
## Number of obs: 825, groups: S_ID, 60
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 56.787 1.015 55.94
VarCorr(mod_0)
## Groups Name Std.Dev.
## S_ID (Intercept) 7.1425
## Residual 11.6868
vcov(mod_0, full = TRUE, ranpar = "var")
## 1 x 1 Matrix of class "dpoMatrix"
## (Intercept)
## (Intercept) 1.03063
mod_full <- lmer(reading ~ 1 + meanses + gender + meanses*gender +
(1 + gender|S_ID), data = my_data, REML = TRUE)
## boundary (singular) fit: see ?isSingular
summary(mod_full)
## Linear mixed model fit by REML ['lmerMod']
## Formula: reading ~ 1 + meanses + gender + meanses * gender + (1 + gender |
## S_ID)
## Data: my_data
##
## REML criterion at convergence: 6454.2
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.67709 -0.70636 -0.02468 0.71716 2.88587
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## S_ID (Intercept) 2.625e+01 5.1232
## gender 6.302e-04 0.0251 1.00
## Residual 1.366e+02 11.6880
## Number of obs: 824, groups: S_ID, 60
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 56.8435 0.9132 62.247
## meanses 8.7030 1.6601 5.243
## gender -1.4299 0.8621 -1.659
## meanses:gender 0.8833 1.5418 0.573
##
## Correlation of Fixed Effects:
## (Intr) meanss gender
## meanses -0.164
## gender -0.491 0.109
## meanss:gndr 0.111 -0.488 -0.243
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see ?isSingular
VarCorr(mod_full)
## Groups Name Std.Dev. Corr
## S_ID (Intercept) 5.123225
## gender 0.025104 1.000
## Residual 11.687959
vcov_full<-vcov(mod_full, full = TRUE, ranpar = "var")
vcov_full
## 4 x 4 Matrix of class "dpoMatrix"
## (Intercept) meanses gender meanses:gender
## (Intercept) 0.8339339 -0.2487025 -0.3866191 0.1555954
## meanses -0.2487025 2.7558375 0.1557326 -1.2486993
## gender -0.3866191 0.1557326 0.7432948 -0.3236076
## meanses:gender 0.1555954 -1.2486993 -0.3236076 2.3772256
vcov_full %>% cov2cor()
## 4 x 4 Matrix of class "dpoMatrix"
## (Intercept) meanses gender meanses:gender
## (Intercept) 1.0000000 -0.1640543 -0.4910627 0.1105085
## meanses -0.1640543 1.0000000 0.1088108 -0.4878607
## gender -0.4910627 0.1088108 1.0000000 -0.2434462
## meanses:gender 0.1105085 -0.4878607 -0.2434462 1.0000000
vc <- VarCorr(mod_full)
vc$S_ID
## (Intercept) gender
## (Intercept) 26.247433 0.1286129677
## gender 0.128613 0.0006302062
## attr(,"stddev")
## (Intercept) gender
## 5.12322490 0.02510391
## attr(,"correlation")
## (Intercept) gender
## (Intercept) 1 1
## gender 1 1
ICC(outcome = "reading", group = "S_ID", data = my_data)
## [1] 0.2719409
Level_1 <- HLMresid(mod_full, 1, type = "LS", sim = NULL, standardize = FALSE)
plot(Level_1$LS.resid ~ Level_1$fitted)
boxplot(Level_1$LS.resid)
Level_2<-HLMresid(mod_full, "S_ID", type = "LS", sim = NULL, standardize = FALSE)
plot(Level_2$gender)
plot(Level_2$`(Intercept)`)
boxplot(Level_2$`(Intercept)`)
boxplot(Level_2$gender)
residuals <- resid(mod_full)
hist(residuals)
qqnorm(residuals)
qqline(residuals)
plot(resid(mod_full) ~ fitted(mod_full))
1.The value of the ICC is 0.27, which tells us that 27% of the variance in reading scores is from school ID.
2.
Reading ij = Y 00 + Y 01 MeanSES + Y 10 Gender + Y 11 MeanSES * Gender + u 0+ u 1Gender + r 1j
Reading ij = 56.84 + 8.70 MeanSES + -.143 Gender + 0.88 MeanSES * Gender + u 0+ u 1Gender + r 1j
Covariance matrices with sibstituted estimates:
vcov(mod_0, full = TRUE, ranpar = "var")
## 1 x 1 Matrix of class "dpoMatrix"
## (Intercept)
## (Intercept) 1.03063
print(vcov_full)
## 4 x 4 Matrix of class "dpoMatrix"
## (Intercept) meanses gender meanses:gender
## (Intercept) 0.8339339 -0.2487025 -0.3866191 0.1555954
## meanses -0.2487025 2.7558375 0.1557326 -1.2486993
## gender -0.3866191 0.1557326 0.7432948 -0.3236076
## meanses:gender 0.1555954 -1.2486993 -0.3236076 2.3772256
print(vcov_full %>% cov2cor())
## 4 x 4 Matrix of class "dpoMatrix"
## (Intercept) meanses gender meanses:gender
## (Intercept) 1.0000000 -0.1640543 -0.4910627 0.1105085
## meanses -0.1640543 1.0000000 0.1088108 -0.4878607
## gender -0.4910627 0.1088108 1.0000000 -0.2434462
## meanses:gender 0.1105085 -0.4878607 -0.2434462 1.0000000
vc$S_ID
## (Intercept) gender
## (Intercept) 26.247433 0.1286129677
## gender 0.128613 0.0006302062
## attr(,"stddev")
## (Intercept) gender
## 5.12322490 0.02510391
## attr(,"correlation")
## (Intercept) gender
## (Intercept) 1 1
## gender 1 1
3. Using the HLMresid package in R (above), the residual analysis shows fairly normal distribution. There seem to be two outliers that appear in the boxplot for Level 1, but the remaining boxplots, qqplots, and residual plots seem to suggest normal, linear, distribution, suggesting homogeneity in the data.