HLM project

HLM Project instructions

Using the variable READING as the dependent variable, use GENDER as a level-1 predictor, and use MeanSES as a level-2 predictor.

my_data <-read.table("~/Desktop/ecls.txt", fileEncoding="UTF-8-BOM", na.strings=c("","NA", "."))
names(my_data)[names(my_data) == "V1"] <- "childid"
names(my_data)[names(my_data) == "V2"] <- "S_ID"
names(my_data)[names(my_data) == "V3"] <- "gender"
names(my_data)[names(my_data) == "V4"] <- "math"
names(my_data)[names(my_data) == "V5"] <- "reading"
names(my_data)[names(my_data) == "V6"] <- "meanses"
names(my_data)[names(my_data) == "V7"] <- "pubpri"
names(my_data)[names(my_data) == "V8"] <- "risknum"
names(my_data)[names(my_data) == "V9"] <- "sample"
names(my_data)[names(my_data) == "V10"] <- "timemos"
colnames(my_data)

##  [1] "childid" "S_ID"    "gender"  "math"    "reading" "meanses" "pubpri" 
##  [8] "risknum" "sample"  "timemos"

my_data$childid<-as.factor(my_data$childid)
my_data$S_ID<-as.factor(my_data$S_ID)
my_data$gender<-as.numeric(my_data$gender)
my_data$math<-as.numeric(my_data$math)
my_data$reading<-as.numeric(my_data$reading)
my_data$meanses<-as.numeric(my_data$meanses)

## Warning: NAs introduced by coercion

my_data$pubpri<-as.numeric(my_data$pubpri)
my_data$risknum<-as.numeric(my_data$risknum)
my_data$sample<-as.numeric(my_data$sample)
my_data$timemos<-as.numeric(my_data$timemos)
str(my_data)

## 'data.frame':    839 obs. of  10 variables:
##  $ childid: Factor w/ 839 levels "15001","15003",..: 1 2 3 4 5 6 7 8 9 10 ...
##  $ S_ID   : Factor w/ 60 levels "15","37","53",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ gender : num  1 1 1 1 0 1 0 1 1 0 ...
##  $ math   : num  39.9 49.4 31.7 44.6 28.7 ...
##  $ reading: num  54.2 48.2 42.8 47.2 46.3 ...
##  $ meanses: num  -0.18 -0.18 -0.18 -0.18 -0.18 -0.18 -0.18 -0.18 -0.18 -0.18 ...
##  $ pubpri : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ risknum: num  0 0 1 0 1 0 0 0 1 0 ...
##  $ sample : num  4 4 4 4 4 4 4 4 4 4 ...
##  $ timemos: num  20 20 20 20 20 20 20 20 20 20 ...

Unconditional Model

mod_0 <- lmer(reading ~ 1 + (1 |S_ID), data = my_data, REML = TRUE)
summary(mod_0)

## Linear mixed model fit by REML ['lmerMod']
## Formula: reading ~ 1 + (1 | S_ID)
##    Data: my_data
## 
## REML criterion at convergence: 6501.7
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.94590 -0.71673 -0.01708  0.69958  2.85534 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  S_ID     (Intercept)  51.02    7.142  
##  Residual             136.58   11.687  
## Number of obs: 825, groups:  S_ID, 60
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)   56.787      1.015   55.94

summary(mod_0)

## Linear mixed model fit by REML ['lmerMod']
## Formula: reading ~ 1 + (1 | S_ID)
##    Data: my_data
## 
## REML criterion at convergence: 6501.7
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.94590 -0.71673 -0.01708  0.69958  2.85534 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  S_ID     (Intercept)  51.02    7.142  
##  Residual             136.58   11.687  
## Number of obs: 825, groups:  S_ID, 60
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)   56.787      1.015   55.94

VarCorr(mod_0)

##  Groups   Name        Std.Dev.
##  S_ID     (Intercept)  7.1425 
##  Residual             11.6868

vcov(mod_0, full = TRUE, ranpar = "var")

## 1 x 1 Matrix of class "dpoMatrix"
##             (Intercept)
## (Intercept)     1.03063

Full Model

mod_full <- lmer(reading ~ 1 + meanses + gender + meanses*gender + 
                   (1 + gender|S_ID), data = my_data, REML = TRUE)

## boundary (singular) fit: see ?isSingular

summary(mod_full)

## Linear mixed model fit by REML ['lmerMod']
## Formula: reading ~ 1 + meanses + gender + meanses * gender + (1 + gender |  
##     S_ID)
##    Data: my_data
## 
## REML criterion at convergence: 6454.2
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.67709 -0.70636 -0.02468  0.71716  2.88587 
## 
## Random effects:
##  Groups   Name        Variance  Std.Dev. Corr
##  S_ID     (Intercept) 2.625e+01  5.1232      
##           gender      6.302e-04  0.0251  1.00
##  Residual             1.366e+02 11.6880      
## Number of obs: 824, groups:  S_ID, 60
## 
## Fixed effects:
##                Estimate Std. Error t value
## (Intercept)     56.8435     0.9132  62.247
## meanses          8.7030     1.6601   5.243
## gender          -1.4299     0.8621  -1.659
## meanses:gender   0.8833     1.5418   0.573
## 
## Correlation of Fixed Effects:
##             (Intr) meanss gender
## meanses     -0.164              
## gender      -0.491  0.109       
## meanss:gndr  0.111 -0.488 -0.243
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see ?isSingular

VarCorr(mod_full)

##  Groups   Name        Std.Dev.  Corr 
##  S_ID     (Intercept)  5.123225      
##           gender       0.025104 1.000
##  Residual             11.687959

vcov_full<-vcov(mod_full, full = TRUE, ranpar = "var")
vcov_full

## 4 x 4 Matrix of class "dpoMatrix"
##                (Intercept)    meanses     gender meanses:gender
## (Intercept)      0.8339339 -0.2487025 -0.3866191      0.1555954
## meanses         -0.2487025  2.7558375  0.1557326     -1.2486993
## gender          -0.3866191  0.1557326  0.7432948     -0.3236076
## meanses:gender   0.1555954 -1.2486993 -0.3236076      2.3772256

vcov_full %>% cov2cor()

## 4 x 4 Matrix of class "dpoMatrix"
##                (Intercept)    meanses     gender meanses:gender
## (Intercept)      1.0000000 -0.1640543 -0.4910627      0.1105085
## meanses         -0.1640543  1.0000000  0.1088108     -0.4878607
## gender          -0.4910627  0.1088108  1.0000000     -0.2434462
## meanses:gender   0.1105085 -0.4878607 -0.2434462      1.0000000

vc <- VarCorr(mod_full)
vc$S_ID

##             (Intercept)       gender
## (Intercept)   26.247433 0.1286129677
## gender         0.128613 0.0006302062
## attr(,"stddev")
## (Intercept)      gender 
##  5.12322490  0.02510391 
## attr(,"correlation")
##             (Intercept) gender
## (Intercept)           1      1
## gender                1      1

ICC

ICC(outcome = "reading", group = "S_ID", data = my_data)

## [1] 0.2719409

Level 1 Residuals

Level_1 <- HLMresid(mod_full, 1, type = "LS", sim = NULL, standardize = FALSE)
plot(Level_1$LS.resid ~ Level_1$fitted)

boxplot(Level_1$LS.resid)

Level 2 Residuals

Level_2<-HLMresid(mod_full, "S_ID", type = "LS", sim = NULL, standardize = FALSE)
plot(Level_2$gender)

plot(Level_2$`(Intercept)`)

boxplot(Level_2$`(Intercept)`)

boxplot(Level_2$gender)

Residual Analysis

residuals <- resid(mod_full)
hist(residuals)

qqnorm(residuals)
qqline(residuals)

plot(resid(mod_full) ~ fitted(mod_full))

Assignment Questions

1.The value of the ICC is 0.27, which tells us that 27% of the variance in reading scores is from school ID.

Mixed model equation:

Reading _ij = Y ₀₀ + Y ₀₁ MeanSES + Y ₁₀ Gender + Y ₁₁ MeanSES * Gender + u ₀+ u ₁Gender + r _1j

Substituted estimates:

Reading _ij = 56.84 + 8.70 MeanSES + -.143 Gender + 0.88 MeanSES * Gender + u ₀+ u ₁Gender + r _1j

Covariance matrices with sibstituted estimates:

Level 1:

vcov(mod_0, full = TRUE, ranpar = "var")

## 1 x 1 Matrix of class "dpoMatrix"
##             (Intercept)
## (Intercept)     1.03063

Level 2:

print(vcov_full)

## 4 x 4 Matrix of class "dpoMatrix"
##                (Intercept)    meanses     gender meanses:gender
## (Intercept)      0.8339339 -0.2487025 -0.3866191      0.1555954
## meanses         -0.2487025  2.7558375  0.1557326     -1.2486993
## gender          -0.3866191  0.1557326  0.7432948     -0.3236076
## meanses:gender   0.1555954 -1.2486993 -0.3236076      2.3772256

print(vcov_full %>% cov2cor())

## 4 x 4 Matrix of class "dpoMatrix"
##                (Intercept)    meanses     gender meanses:gender
## (Intercept)      1.0000000 -0.1640543 -0.4910627      0.1105085
## meanses         -0.1640543  1.0000000  0.1088108     -0.4878607
## gender          -0.4910627  0.1088108  1.0000000     -0.2434462
## meanses:gender   0.1105085 -0.4878607 -0.2434462      1.0000000

vc$S_ID

##             (Intercept)       gender
## (Intercept)   26.247433 0.1286129677
## gender         0.128613 0.0006302062
## attr(,"stddev")
## (Intercept)      gender 
##  5.12322490  0.02510391 
## attr(,"correlation")
##             (Intercept) gender
## (Intercept)           1      1
## gender                1      1

3. Using the HLMresid package in R (above), the residual analysis shows fairly normal distribution. There seem to be two outliers that appear in the boxplot for Level 1, but the remaining boxplots, qqplots, and residual plots seem to suggest normal, linear, distribution, suggesting homogeneity in the data.