HLM Project instructions

Using the variable READING as the dependent variable, use GENDER as a level-1 predictor, and use MeanSES as a level-2 predictor.

my_data <-read.table("~/Desktop/ecls.txt", fileEncoding="UTF-8-BOM", na.strings=c("","NA", "."))
names(my_data)[names(my_data) == "V1"] <- "childid"
names(my_data)[names(my_data) == "V2"] <- "S_ID"
names(my_data)[names(my_data) == "V3"] <- "gender"
names(my_data)[names(my_data) == "V4"] <- "math"
names(my_data)[names(my_data) == "V5"] <- "reading"
names(my_data)[names(my_data) == "V6"] <- "meanses"
names(my_data)[names(my_data) == "V7"] <- "pubpri"
names(my_data)[names(my_data) == "V8"] <- "risknum"
names(my_data)[names(my_data) == "V9"] <- "sample"
names(my_data)[names(my_data) == "V10"] <- "timemos"
colnames(my_data) 
##  [1] "childid" "S_ID"    "gender"  "math"    "reading" "meanses" "pubpri" 
##  [8] "risknum" "sample"  "timemos"
my_data$childid<-as.factor(my_data$childid)
my_data$S_ID<-as.factor(my_data$S_ID)
my_data$gender<-as.numeric(my_data$gender)
my_data$math<-as.numeric(my_data$math)
my_data$reading<-as.numeric(my_data$reading)
my_data$meanses<-as.numeric(my_data$meanses)
## Warning: NAs introduced by coercion
my_data$pubpri<-as.numeric(my_data$pubpri)
my_data$risknum<-as.numeric(my_data$risknum)
my_data$sample<-as.numeric(my_data$sample)
my_data$timemos<-as.numeric(my_data$timemos)
str(my_data)
## 'data.frame':    839 obs. of  10 variables:
##  $ childid: Factor w/ 839 levels "15001","15003",..: 1 2 3 4 5 6 7 8 9 10 ...
##  $ S_ID   : Factor w/ 60 levels "15","37","53",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ gender : num  1 1 1 1 0 1 0 1 1 0 ...
##  $ math   : num  39.9 49.4 31.7 44.6 28.7 ...
##  $ reading: num  54.2 48.2 42.8 47.2 46.3 ...
##  $ meanses: num  -0.18 -0.18 -0.18 -0.18 -0.18 -0.18 -0.18 -0.18 -0.18 -0.18 ...
##  $ pubpri : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ risknum: num  0 0 1 0 1 0 0 0 1 0 ...
##  $ sample : num  4 4 4 4 4 4 4 4 4 4 ...
##  $ timemos: num  20 20 20 20 20 20 20 20 20 20 ...

Unconditional Model

mod_0 <- lmer(reading ~ 1 + (1 |S_ID), data = my_data, REML = TRUE)
summary(mod_0)
## Linear mixed model fit by REML ['lmerMod']
## Formula: reading ~ 1 + (1 | S_ID)
##    Data: my_data
## 
## REML criterion at convergence: 6501.7
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.94590 -0.71673 -0.01708  0.69958  2.85534 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  S_ID     (Intercept)  51.02    7.142  
##  Residual             136.58   11.687  
## Number of obs: 825, groups:  S_ID, 60
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)   56.787      1.015   55.94
summary(mod_0)
## Linear mixed model fit by REML ['lmerMod']
## Formula: reading ~ 1 + (1 | S_ID)
##    Data: my_data
## 
## REML criterion at convergence: 6501.7
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.94590 -0.71673 -0.01708  0.69958  2.85534 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  S_ID     (Intercept)  51.02    7.142  
##  Residual             136.58   11.687  
## Number of obs: 825, groups:  S_ID, 60
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)   56.787      1.015   55.94
VarCorr(mod_0)
##  Groups   Name        Std.Dev.
##  S_ID     (Intercept)  7.1425 
##  Residual             11.6868
vcov(mod_0, full = TRUE, ranpar = "var")
## 1 x 1 Matrix of class "dpoMatrix"
##             (Intercept)
## (Intercept)     1.03063

Full Model

mod_full <- lmer(reading ~ 1 + meanses + gender + meanses*gender + 
                   (1 + gender|S_ID), data = my_data, REML = TRUE)
## boundary (singular) fit: see ?isSingular
summary(mod_full)
## Linear mixed model fit by REML ['lmerMod']
## Formula: reading ~ 1 + meanses + gender + meanses * gender + (1 + gender |  
##     S_ID)
##    Data: my_data
## 
## REML criterion at convergence: 6454.2
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.67709 -0.70636 -0.02468  0.71716  2.88587 
## 
## Random effects:
##  Groups   Name        Variance  Std.Dev. Corr
##  S_ID     (Intercept) 2.625e+01  5.1232      
##           gender      6.302e-04  0.0251  1.00
##  Residual             1.366e+02 11.6880      
## Number of obs: 824, groups:  S_ID, 60
## 
## Fixed effects:
##                Estimate Std. Error t value
## (Intercept)     56.8435     0.9132  62.247
## meanses          8.7030     1.6601   5.243
## gender          -1.4299     0.8621  -1.659
## meanses:gender   0.8833     1.5418   0.573
## 
## Correlation of Fixed Effects:
##             (Intr) meanss gender
## meanses     -0.164              
## gender      -0.491  0.109       
## meanss:gndr  0.111 -0.488 -0.243
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see ?isSingular
VarCorr(mod_full)
##  Groups   Name        Std.Dev.  Corr 
##  S_ID     (Intercept)  5.123225      
##           gender       0.025104 1.000
##  Residual             11.687959
vcov_full<-vcov(mod_full, full = TRUE, ranpar = "var")
vcov_full
## 4 x 4 Matrix of class "dpoMatrix"
##                (Intercept)    meanses     gender meanses:gender
## (Intercept)      0.8339339 -0.2487025 -0.3866191      0.1555954
## meanses         -0.2487025  2.7558375  0.1557326     -1.2486993
## gender          -0.3866191  0.1557326  0.7432948     -0.3236076
## meanses:gender   0.1555954 -1.2486993 -0.3236076      2.3772256
vcov_full %>% cov2cor()
## 4 x 4 Matrix of class "dpoMatrix"
##                (Intercept)    meanses     gender meanses:gender
## (Intercept)      1.0000000 -0.1640543 -0.4910627      0.1105085
## meanses         -0.1640543  1.0000000  0.1088108     -0.4878607
## gender          -0.4910627  0.1088108  1.0000000     -0.2434462
## meanses:gender   0.1105085 -0.4878607 -0.2434462      1.0000000
vc <- VarCorr(mod_full)
vc$S_ID
##             (Intercept)       gender
## (Intercept)   26.247433 0.1286129677
## gender         0.128613 0.0006302062
## attr(,"stddev")
## (Intercept)      gender 
##  5.12322490  0.02510391 
## attr(,"correlation")
##             (Intercept) gender
## (Intercept)           1      1
## gender                1      1

ICC

ICC(outcome = "reading", group = "S_ID", data = my_data)
## [1] 0.2719409

Level 1 Residuals

Level_1 <- HLMresid(mod_full, 1, type = "LS", sim = NULL, standardize = FALSE)
plot(Level_1$LS.resid ~ Level_1$fitted)

boxplot(Level_1$LS.resid)

Level 2 Residuals

Level_2<-HLMresid(mod_full, "S_ID", type = "LS", sim = NULL, standardize = FALSE)
plot(Level_2$gender)

plot(Level_2$`(Intercept)`)

boxplot(Level_2$`(Intercept)`)

boxplot(Level_2$gender)

Residual Analysis

residuals <- resid(mod_full)
hist(residuals)

qqnorm(residuals)
qqline(residuals)

plot(resid(mod_full) ~ fitted(mod_full))

Assignment Questions

1.The value of the ICC is 0.27, which tells us that 27% of the variance in reading scores is from school ID.

2.

Reading ij = Y 00 + Y 01 MeanSES + Y 10 Gender + Y 11 MeanSES * Gender + u 0+ u 1Gender + r 1j

Reading ij = 56.84 + 8.70 MeanSES + -.143 Gender + 0.88 MeanSES * Gender + u 0+ u 1Gender + r 1j

Covariance matrices with sibstituted estimates:

vcov(mod_0, full = TRUE, ranpar = "var")
## 1 x 1 Matrix of class "dpoMatrix"
##             (Intercept)
## (Intercept)     1.03063
print(vcov_full)
## 4 x 4 Matrix of class "dpoMatrix"
##                (Intercept)    meanses     gender meanses:gender
## (Intercept)      0.8339339 -0.2487025 -0.3866191      0.1555954
## meanses         -0.2487025  2.7558375  0.1557326     -1.2486993
## gender          -0.3866191  0.1557326  0.7432948     -0.3236076
## meanses:gender   0.1555954 -1.2486993 -0.3236076      2.3772256
print(vcov_full %>% cov2cor())
## 4 x 4 Matrix of class "dpoMatrix"
##                (Intercept)    meanses     gender meanses:gender
## (Intercept)      1.0000000 -0.1640543 -0.4910627      0.1105085
## meanses         -0.1640543  1.0000000  0.1088108     -0.4878607
## gender          -0.4910627  0.1088108  1.0000000     -0.2434462
## meanses:gender   0.1105085 -0.4878607 -0.2434462      1.0000000
vc$S_ID
##             (Intercept)       gender
## (Intercept)   26.247433 0.1286129677
## gender         0.128613 0.0006302062
## attr(,"stddev")
## (Intercept)      gender 
##  5.12322490  0.02510391 
## attr(,"correlation")
##             (Intercept) gender
## (Intercept)           1      1
## gender                1      1

3. Using the HLMresid package in R (above), the residual analysis shows fairly normal distribution. There seem to be two outliers that appear in the boxplot for Level 1, but the remaining boxplots, qqplots, and residual plots seem to suggest normal, linear, distribution, suggesting homogeneity in the data.