Data Simulation for Multi-level Model

set.seed(2042001)
n.classrooms <- 100
n.stu.per.class <- 200
# generate data
X_ij <- runif(n.classrooms * n.stu.per.class, min = 0, max = 1)
eta_j <- rnorm(n.classrooms, mean = 0, sd = sqrt(2))
epsilon_ij <- rnorm(n.classrooms * n.stu.per.class, mean = 0, sd = sqrt(2))
# calculate outcome variable
Y_ij <- 0 + 1*X_ij + rep(eta_j, each = n.stu.per.class) + epsilon_ij
# store variables in dataframe
dat <- data.frame(
  studentid = 1:(n.classrooms * n.stu.per.class),
  classid = rep(1:n.classrooms, each = n.stu.per.class),
  predictor = X_ij,
  outcome = Y_ij
)

# Insert code to fit model and print summary
lm1<-lmerTest::lmer(outcome ~ predictor + (1|classid), data = dat,REML = T)
summary(lm1)

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: outcome ~ predictor + (1 | classid)
##    Data: dat
## 
## REML criterion at convergence: 71227.3
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.0143 -0.6761  0.0024  0.6711  3.7584 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  classid  (Intercept) 1.893    1.376   
##  Residual             2.008    1.417   
## Number of obs: 20000, groups:  classid, 100
## 
## Fixed effects:
##               Estimate Std. Error         df t value Pr(>|t|)    
## (Intercept) -7.493e-03  1.391e-01  1.022e+02  -0.054    0.957    
## predictor    9.864e-01  3.496e-02  1.990e+04  28.216   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##           (Intr)
## predictor -0.126

coefs <- summary(lm1)$coef
lower <- coefs[2,1] - (coefs[2,2] * 2)
upper <- coefs[2,1] + (coefs[2,2] * 2)

dat2<-dat
Z_ij <- rbinom(n = 200*100 , size = 1, prob = 0.5)
dat2$outcome<- ifelse(Z_ij == 1, NA, dat$outcome)

lm2<-lmerTest::lmer(outcome ~ predictor + (1|classid), data = dat2)
summary(lm2)

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: outcome ~ predictor + (1 | classid)
##    Data: dat2
## 
## REML criterion at convergence: 35607.1
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.9102 -0.6698  0.0146  0.6663  3.8709 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  classid  (Intercept) 1.880    1.371   
##  Residual             2.007    1.417   
## Number of obs: 9945, groups:  classid, 100
## 
## Fixed effects:
##               Estimate Std. Error         df t value Pr(>|t|)    
## (Intercept)   -0.02359    0.14005  105.47627  -0.168    0.867    
## predictor      1.02485    0.04963 9846.41935  20.649   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##           (Intr)
## predictor -0.177

coefs2<-summary(lm2)$coef
lower2<-coefs2[2,1]-coefs2[2,2]*2
upper2<-coefs2[2,1]+coefs2[2,2]*2

length(dat$outcome)-sum(is.na(dat2$outcome))

## [1] 9945

summary(lm2)

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: outcome ~ predictor + (1 | classid)
##    Data: dat2
## 
## REML criterion at convergence: 35607.1
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.9102 -0.6698  0.0146  0.6663  3.8709 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  classid  (Intercept) 1.880    1.371   
##  Residual             2.007    1.417   
## Number of obs: 9945, groups:  classid, 100
## 
## Fixed effects:
##               Estimate Std. Error         df t value Pr(>|t|)    
## (Intercept)   -0.02359    0.14005  105.47627  -0.168    0.867    
## predictor      1.02485    0.04963 9846.41935  20.649   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##           (Intr)
## predictor -0.177

dat3<-dat
Z_ij <- rbinom(n = 200*100, size = 1, prob = dat3$predictor)
dat3$predictor <- ifelse(Z_ij == 1, NA, dat3$predictor)

lm3<-lmerTest::lmer(outcome ~ predictor + (1 | classid), data = dat3,na.action = "na.omit")
summary(lm3)

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: outcome ~ predictor + (1 | classid)
##    Data: dat3
## 
## REML criterion at convergence: 35850.3
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.8356 -0.6795  0.0052  0.6608  3.7058 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  classid  (Intercept) 1.874    1.369   
##  Residual             2.015    1.420   
## Number of obs: 10002, groups:  classid, 100
## 
## Fixed effects:
##              Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept) 3.442e-03  1.391e-01 1.034e+02   0.025     0.98    
## predictor   9.547e-01  6.031e-02 9.903e+03  15.831   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##           (Intr)
## predictor -0.147

coefs3<-summary(lm3)$coef
lower3<-coefs3[2,1]-coefs3[2,2]*2
upper3<-coefs3[2,1]+coefs3[2,2]*2

sum(!is.na(dat3$predictor))

## [1] 10002

dat4<-dat
expit <- function(x) exp(x)/(1+exp(x))
Z_ij4<- rbinom(n = 200*100, size = 1,prob = expit(dat4$outcome))
dat4$outcome<- ifelse(Z_ij4 == 1, NA, dat4$outcome)

lm4<- lmerTest::lmer(outcome ~ predictor + (1|classid), data = dat4)
summary(lm4)

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: outcome ~ predictor + (1 | classid)
##    Data: dat4
## 
## REML criterion at convergence: 28257.5
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.0870 -0.6596  0.0090  0.6679  3.1897 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  classid  (Intercept) 1.078    1.038   
##  Residual             1.539    1.240   
## Number of obs: 8522, groups:  classid, 100
## 
## Fixed effects:
##              Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept)   -0.7488     0.1074  105.0594  -6.972 2.86e-10 ***
## predictor      0.7069     0.0475 8423.2269  14.881  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##           (Intr)
## predictor -0.208

coefs4<-summary(lm4)$coef
lower4 <- coefs4[2,1] - (coefs4[2,2] * 2)
upper4 <- coefs4[2,1] + (coefs4[2,2] * 2)

sum(!is.na(dat4$outcome))

## [1] 8522

length(unique(dat4[complete.cases(dat4) == 1,]$classid))

## [1] 100