set.seed(2042001)
n.classrooms <- 100
n.stu.per.class <- 200
# generate data
X_ij <- runif(n.classrooms * n.stu.per.class, min = 0, max = 1)
eta_j <- rnorm(n.classrooms, mean = 0, sd = sqrt(2))
epsilon_ij <- rnorm(n.classrooms * n.stu.per.class, mean = 0, sd = sqrt(2))
# calculate outcome variable
Y_ij <- 0 + 1*X_ij + rep(eta_j, each = n.stu.per.class) + epsilon_ij
# store variables in dataframe
dat <- data.frame(
studentid = 1:(n.classrooms * n.stu.per.class),
classid = rep(1:n.classrooms, each = n.stu.per.class),
predictor = X_ij,
outcome = Y_ij
)
# Insert code to fit model and print summary
lm1<-lmerTest::lmer(outcome ~ predictor + (1|classid), data = dat,REML = T)
summary(lm1)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: outcome ~ predictor + (1 | classid)
## Data: dat
##
## REML criterion at convergence: 71227.3
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.0143 -0.6761 0.0024 0.6711 3.7584
##
## Random effects:
## Groups Name Variance Std.Dev.
## classid (Intercept) 1.893 1.376
## Residual 2.008 1.417
## Number of obs: 20000, groups: classid, 100
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) -7.493e-03 1.391e-01 1.022e+02 -0.054 0.957
## predictor 9.864e-01 3.496e-02 1.990e+04 28.216 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## predictor -0.126
coefs <- summary(lm1)$coef
lower <- coefs[2,1] - (coefs[2,2] * 2)
upper <- coefs[2,1] + (coefs[2,2] * 2)
dat2<-dat
Z_ij <- rbinom(n = 200*100 , size = 1, prob = 0.5)
dat2$outcome<- ifelse(Z_ij == 1, NA, dat$outcome)
lm2<-lmerTest::lmer(outcome ~ predictor + (1|classid), data = dat2)
summary(lm2)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: outcome ~ predictor + (1 | classid)
## Data: dat2
##
## REML criterion at convergence: 35607.1
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.9102 -0.6698 0.0146 0.6663 3.8709
##
## Random effects:
## Groups Name Variance Std.Dev.
## classid (Intercept) 1.880 1.371
## Residual 2.007 1.417
## Number of obs: 9945, groups: classid, 100
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) -0.02359 0.14005 105.47627 -0.168 0.867
## predictor 1.02485 0.04963 9846.41935 20.649 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## predictor -0.177
coefs2<-summary(lm2)$coef
lower2<-coefs2[2,1]-coefs2[2,2]*2
upper2<-coefs2[2,1]+coefs2[2,2]*2
length(dat$outcome)-sum(is.na(dat2$outcome))
## [1] 9945
summary(lm2)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: outcome ~ predictor + (1 | classid)
## Data: dat2
##
## REML criterion at convergence: 35607.1
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.9102 -0.6698 0.0146 0.6663 3.8709
##
## Random effects:
## Groups Name Variance Std.Dev.
## classid (Intercept) 1.880 1.371
## Residual 2.007 1.417
## Number of obs: 9945, groups: classid, 100
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) -0.02359 0.14005 105.47627 -0.168 0.867
## predictor 1.02485 0.04963 9846.41935 20.649 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## predictor -0.177
dat3<-dat
Z_ij <- rbinom(n = 200*100, size = 1, prob = dat3$predictor)
dat3$predictor <- ifelse(Z_ij == 1, NA, dat3$predictor)
lm3<-lmerTest::lmer(outcome ~ predictor + (1 | classid), data = dat3,na.action = "na.omit")
summary(lm3)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: outcome ~ predictor + (1 | classid)
## Data: dat3
##
## REML criterion at convergence: 35850.3
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.8356 -0.6795 0.0052 0.6608 3.7058
##
## Random effects:
## Groups Name Variance Std.Dev.
## classid (Intercept) 1.874 1.369
## Residual 2.015 1.420
## Number of obs: 10002, groups: classid, 100
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 3.442e-03 1.391e-01 1.034e+02 0.025 0.98
## predictor 9.547e-01 6.031e-02 9.903e+03 15.831 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## predictor -0.147
coefs3<-summary(lm3)$coef
lower3<-coefs3[2,1]-coefs3[2,2]*2
upper3<-coefs3[2,1]+coefs3[2,2]*2
sum(!is.na(dat3$predictor))
## [1] 10002
dat4<-dat
expit <- function(x) exp(x)/(1+exp(x))
Z_ij4<- rbinom(n = 200*100, size = 1,prob = expit(dat4$outcome))
dat4$outcome<- ifelse(Z_ij4 == 1, NA, dat4$outcome)
lm4<- lmerTest::lmer(outcome ~ predictor + (1|classid), data = dat4)
summary(lm4)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: outcome ~ predictor + (1 | classid)
## Data: dat4
##
## REML criterion at convergence: 28257.5
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -4.0870 -0.6596 0.0090 0.6679 3.1897
##
## Random effects:
## Groups Name Variance Std.Dev.
## classid (Intercept) 1.078 1.038
## Residual 1.539 1.240
## Number of obs: 8522, groups: classid, 100
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) -0.7488 0.1074 105.0594 -6.972 2.86e-10 ***
## predictor 0.7069 0.0475 8423.2269 14.881 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr)
## predictor -0.208
coefs4<-summary(lm4)$coef
lower4 <- coefs4[2,1] - (coefs4[2,2] * 2)
upper4 <- coefs4[2,1] + (coefs4[2,2] * 2)
sum(!is.na(dat4$outcome))
## [1] 8522
length(unique(dat4[complete.cases(dat4) == 1,]$classid))
## [1] 100