Inclass Exercise 1

school-effects model version 0

pacman::p_load(mlmRev, tidyverse, lme4)

# data from mlmRev package
data(Hsb82, "mlmRev")

#
str(Hsb82)
## 'data.frame':    7185 obs. of  8 variables:
##  $ school : Ord.factor w/ 160 levels "8367"<"8854"<..: 59 59 59 59 59 59 59 59 59 59 ...
##  $ minrty : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
##  $ sx     : Factor w/ 2 levels "Male","Female": 2 2 1 1 1 1 2 1 2 1 ...
##  $ ses    : num  -1.528 -0.588 -0.528 -0.668 -0.158 ...
##  $ mAch   : num  5.88 19.71 20.35 8.78 17.9 ...
##  $ meanses: num  -0.434 -0.434 -0.434 -0.434 -0.434 ...
##  $ sector : Factor w/ 2 levels "Public","Catholic": 1 1 1 1 1 1 1 1 1 1 ...
##  $ cses   : num  -1.0936 -0.1536 -0.0936 -0.2336 0.2764 ...
# make a copy of data to the dta object 
dta <- Hsb82            

# use sensible variable names
names(dta) <- c("School", "Minority", "Gender", "SES", "Math",
                "Ave_SES", "Sector", "C_SES")
#
head(dta)
##   School Minority Gender    SES   Math   Ave_SES Sector       C_SES
## 1   1224       No Female -1.528  5.876 -0.434383 Public -1.09361702
## 2   1224       No Female -0.588 19.708 -0.434383 Public -0.15361702
## 3   1224       No   Male -0.528 20.349 -0.434383 Public -0.09361702
## 4   1224       No   Male -0.668  8.781 -0.434383 Public -0.23361702
## 5   1224       No   Male -0.158 17.898 -0.434383 Public  0.27638298
## 6   1224       No   Male  0.022  4.583 -0.434383 Public  0.45638298
##
ot <- theme_set(theme_bw())

# freedman-diaconis rule for binwidth
bw <- 2*IQR(dta$Math)/(dim(dta)[1]^(1/3))
# histograms
ggplot(dta, aes(Math)) +
  stat_bin(aes(fill = ..count..), binwidth=bw) +
  labs(x="Math achievement scores")

# seed the random number to reproduce the sample
set.seed(20160923)

# pick 60 out of 160
n <- sample(160, 60)

# box plots for a sample of 60 schools
ggplot(filter(dta, School %in% levels(School)[n]),
       aes(reorder(School, Math, mean), Math, color=Sector)) +
 geom_boxplot() +
 geom_point(size= rel(0.6)) +
 labs(x="School ID", y="Math achievement score")+
 theme(axis.text.x=element_text(size=6))+
 ggtitle("A sample of 60 schools")

#
# math mean by school
dta_mmath <- dta %>% 
             group_by(School) %>% 
             mutate( ave_math = mean(Math)) %>% 
             select( School, ave_math ) %>%  
             unique %>%
             as.data.frame

summary(dta_mmath$ave_math)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    4.24   10.47   12.90   12.62   14.65   19.72
sd(dta_mmath$ave_math)
## [1] 3.117651
# grand mean
summary(dta$Math)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  -2.832   7.275  13.131  12.748  18.317  24.993
# sd for overall Math scores
sd(dta$Math)
## [1] 6.878246
# freedman-diaconis rule for binwidth
bw <- 2*IQR(dta_mmath$ave_math)/(160^(1/3))

#
ggplot(dta_mmath, aes(ave_math)) +
 geom_histogram(aes(y= ..density.., alpha= .2), binwidth=bw) + 
 geom_density(fill="NA") +
 geom_vline(xintercept=mean(dta_mmath$ave_math), linetype="longdash") +
 geom_text(aes(7, 0.17, label= "ave = 12.62, sd = 3.12")) +
 labs(x="Math achievement school means", y="Density") +
 theme(legend.position="NONE")

# baseline model
summary(m0 <- lmer(Math ~ (1 | School), data = dta))
## Linear mixed model fit by REML ['lmerMod']
## Formula: Math ~ (1 | School)
##    Data: dta
## 
## REML criterion at convergence: 47116.8
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.0631 -0.7539  0.0267  0.7606  2.7426 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  School   (Intercept)  8.614   2.935   
##  Residual             39.148   6.257   
## Number of obs: 7185, groups:  School, 160
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  12.6370     0.2444   51.71
# CIs for model parameters
confint(m0, method="boot")
##                 2.5 %    97.5 %
## .sig01       2.596281  3.333500
## .sigma       6.151525  6.360499
## (Intercept) 12.128943 13.114645
# default residual plot
plot(m0, xlab="Fitted values", ylab="Pearson residuals", 
     pch=20, cex=.5, type = c("p","g"))

# The end

Including the effect of a school level predictor

pacman::p_load(mlmRev, tidyverse, lme4)

# data from mlmRev package
data(Hsb82)

# first 6 lines
head(Hsb82)
##   school minrty     sx    ses   mAch   meanses sector        cses
## 1   1224     No Female -1.528  5.876 -0.434383 Public -1.09361702
## 2   1224     No Female -0.588 19.708 -0.434383 Public -0.15361702
## 3   1224     No   Male -0.528 20.349 -0.434383 Public -0.09361702
## 4   1224     No   Male -0.668  8.781 -0.434383 Public -0.23361702
## 5   1224     No   Male -0.158 17.898 -0.434383 Public  0.27638298
## 6   1224     No   Male  0.022  4.583 -0.434383 Public  0.45638298
# make a copy of data to the dta object 
dta <- Hsb82            

# names for human
names(dta) <- c("School", "Minority", "Gender", "SES", "Math",
                "Ave_SES", "Sector", "C_SES")
#
str(dta)
## 'data.frame':    7185 obs. of  8 variables:
##  $ School  : Ord.factor w/ 160 levels "8367"<"8854"<..: 59 59 59 59 59 59 59 59 59 59 ...
##  $ Minority: Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Gender  : Factor w/ 2 levels "Male","Female": 2 2 1 1 1 1 2 1 2 1 ...
##  $ SES     : num  -1.528 -0.588 -0.528 -0.668 -0.158 ...
##  $ Math    : num  5.88 19.71 20.35 8.78 17.9 ...
##  $ Ave_SES : num  -0.434 -0.434 -0.434 -0.434 -0.434 ...
##  $ Sector  : Factor w/ 2 levels "Public","Catholic": 1 1 1 1 1 1 1 1 1 1 ...
##  $ C_SES   : num  -1.0936 -0.1536 -0.0936 -0.2336 0.2764 ...
# random effects anova - intercept only model
summary(m1 <- lmer(Math ~ (1 | School), data = dta))
## Linear mixed model fit by REML ['lmerMod']
## Formula: Math ~ (1 | School)
##    Data: dta
## 
## REML criterion at convergence: 47116.8
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.0631 -0.7539  0.0267  0.7606  2.7426 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  School   (Intercept)  8.614   2.935   
##  Residual             39.148   6.257   
## Number of obs: 7185, groups:  School, 160
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  12.6370     0.2444   51.71
# random intercept; fixed-effect is MeanSES 
summary(m2 <- lmer(Math ~ Ave_SES + (1 | School), data = dta))
## Linear mixed model fit by REML ['lmerMod']
## Formula: Math ~ Ave_SES + (1 | School)
##    Data: dta
## 
## REML criterion at convergence: 46961.3
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -3.13493 -0.75254  0.02413  0.76766  2.78515 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  School   (Intercept)  2.639   1.624   
##  Residual             39.157   6.258   
## Number of obs: 7185, groups:  School, 160
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  12.6846     0.1493   84.97
## Ave_SES       5.8635     0.3615   16.22
## 
## Correlation of Fixed Effects:
##         (Intr)
## Ave_SES 0.010
# residual plot
plot(m2, xlab = "Fitted values", ylab = "Pearson residuals", 
     main = "Model 2", pch = 20, cex = .5, type = c("p", "g"))

# one regression for all
summary(m0a <- lm(Math ~ Ave_SES, data = dta))
## 
## Call:
## lm(formula = Math ~ Ave_SES, data = dta)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -19.4520  -4.8332   0.1735   5.1117  16.2787 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 12.74703    0.07621  167.27   <2e-16 ***
## Ave_SES      5.71688    0.18429   31.02   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.46 on 7183 degrees of freedom
## Multiple R-squared:  0.1181, Adjusted R-squared:  0.118 
## F-statistic: 962.3 on 1 and 7183 DF,  p-value: < 2.2e-16
#
theme_set(theme_bw())

# individual math scores vs mean school ses
ggplot(dta, aes(Ave_SES, Math)) +
  geom_smooth(method = "lm") +
  geom_point(alpha = .2) +
  labs(x = "School mean SES", y = "Math achievement score")

# math mean by school
dta_mmath <- dta %>% 
             group_by(School) %>% 
             mutate( ave_math = mean(Math)) %>% 
             select( School, Ave_SES, ave_math ) %>%  
             unique %>%
             as.data.frame

# regression of mean math over mean ses by school
ggplot(dta_mmath, aes(Ave_SES, ave_math)) +
  geom_smooth(method = "lm") +
  geom_point(alpha = .2)+
  labs(x = "Mean School SES", y = "Mean School Math Scores")

# The end

Including the effect of a student-level predictor

# data from mlmRev package
data(Hsb82)

#
str(Hsb82)
## 'data.frame':    7185 obs. of  8 variables:
##  $ school : Ord.factor w/ 160 levels "8367"<"8854"<..: 59 59 59 59 59 59 59 59 59 59 ...
##  $ minrty : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
##  $ sx     : Factor w/ 2 levels "Male","Female": 2 2 1 1 1 1 2 1 2 1 ...
##  $ ses    : num  -1.528 -0.588 -0.528 -0.668 -0.158 ...
##  $ mAch   : num  5.88 19.71 20.35 8.78 17.9 ...
##  $ meanses: num  -0.434 -0.434 -0.434 -0.434 -0.434 ...
##  $ sector : Factor w/ 2 levels "Public","Catholic": 1 1 1 1 1 1 1 1 1 1 ...
##  $ cses   : num  -1.0936 -0.1536 -0.0936 -0.2336 0.2764 ...
# make a copy of data to the dta object 
dta <- Hsb82            

# variable names for human
names(dta) <- c("School", "Minority", "Gender", "SES", "Math",
                "Ave_SES", "Sector", "C_SES")

# see the random number generator 
set.seed(20160923)

# take a sample of size 31 from total number of schools
n <- sample(length(levels(dta$School)), 31)

#
theme_set(theme_bw())

#
ggplot(data = filter(dta, School %in% levels(School)[n]), 
       aes(C_SES, Math, color = Sector)) +
 geom_smooth(method = "lm", se = F, lwd = .6) +
 geom_point(size = rel(0.5)) +
 facet_wrap(~ School, nrow = 2) +
 labs(x = "Centered SES", y="Math achievement score")+
 theme(axis.text.x = element_text(size = 6))

##
# one regression line per school
summary(m1b <- lm(Math ~ School / C_SES - 1, data = dta))
## 
## Call:
## lm(formula = Math ~ School/C_SES - 1, data = dta)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -19.2662  -4.3241   0.1164   4.4434  18.3617 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## School8367        4.55279    1.61953   2.811 0.004950 ** 
## School8854        4.23978    1.07122   3.958 7.64e-05 ***
## School4458        5.81140    0.87465   6.644 3.28e-11 ***
## School5762        4.32486    0.99621   4.341 1.44e-05 ***
## School6990        5.97679    0.83237   7.180 7.68e-13 ***
## School5815        7.27136    1.21194   6.000 2.08e-09 ***
## School7172        8.06682    0.91354   8.830  < 2e-16 ***
## School4868       12.31018    1.03923  11.845  < 2e-16 ***
## School7341        9.79418    0.84853  11.543  < 2e-16 ***
## School1358       11.20623    1.10635  10.129  < 2e-16 ***
## School4383       11.46568    1.21194   9.461  < 2e-16 ***
## School2305       11.13776    0.74031  15.045  < 2e-16 ***
## School8800        7.33594    1.07122   6.848 8.13e-12 ***
## School3088        9.14585    0.97033   9.425  < 2e-16 ***
## School8775        9.46700    0.87465  10.824  < 2e-16 ***
## School7890        8.34110    0.84853   9.830  < 2e-16 ***
## School6144        8.54507    0.92410   9.247  < 2e-16 ***
## School6443        9.47553    1.10635   8.565  < 2e-16 ***
## School5192       10.40950    1.14518   9.090  < 2e-16 ***
## School6808        9.28611    0.91354  10.165  < 2e-16 ***
## School2818       13.87283    0.93504  14.837  < 2e-16 ***
## School9340       11.17855    1.12526   9.934  < 2e-16 ***
## School4523        8.35174    0.88390   9.449  < 2e-16 ***
## School6816       14.53824    0.81709  17.793  < 2e-16 ***
## School2277        9.29761    0.77587  11.983  < 2e-16 ***
## School8009       14.08472    0.88390  15.935  < 2e-16 ***
## School5783       13.15034    1.12526  11.686  < 2e-16 ***
## School3013       12.61083    0.83237  15.151  < 2e-16 ***
## School7101       11.84993    1.14518  10.348  < 2e-16 ***
## School4530        9.05570    0.76345  11.861  < 2e-16 ***
## School9021       14.69666    0.80976  18.149  < 2e-16 ***
## School4511       13.40903    0.79568  16.852  < 2e-16 ***
## School2639        6.61548    0.93504   7.075 1.64e-12 ***
## School3377        9.18671    0.90333  10.170  < 2e-16 ***
## School6578       11.99400    0.80976  14.812  < 2e-16 ***
## School9347       13.53875    0.80263  16.868  < 2e-16 ***
## School3705       10.33169    0.90333  11.437  < 2e-16 ***
## School3533       10.40904    0.87465  11.901  < 2e-16 ***
## School1296        7.63596    0.87465   8.730  < 2e-16 ***
## School4350       11.85542    1.05486  11.239  < 2e-16 ***
## School9397       10.35547    0.88390  11.716  < 2e-16 ***
## School4253        9.41286    0.79568  11.830  < 2e-16 ***
## School2655       12.34517    0.84033  14.691  < 2e-16 ***
## School7342       11.16641    0.79568  14.034  < 2e-16 ***
## School9292       10.27963    1.39020   7.394 1.59e-13 ***
## School3499       13.27653    0.98302  13.506  < 2e-16 ***
## School7364       14.17214    0.91354  15.513  < 2e-16 ***
## School8983       10.99200    0.84853  12.954  < 2e-16 ***
## School5650       14.27353    0.90333  15.801  < 2e-16 ***
## School2658       13.39616    0.90333  14.830  < 2e-16 ***
## School8188       12.74097    1.10635  11.516  < 2e-16 ***
## School4410       13.47298    0.94637  14.236  < 2e-16 ***
## School9508       13.57466    1.02428  13.253  < 2e-16 ***
## School8707       12.88394    0.87465  14.730  < 2e-16 ***
## School1499        7.66036    0.83237   9.203  < 2e-16 ***
## School8477       12.52224    0.99621  12.570  < 2e-16 ***
## School1288       13.51080    1.21194  11.148  < 2e-16 ***
## School6291       10.10731    1.02428   9.868  < 2e-16 ***
## School1224        9.71545    0.88390  10.992  < 2e-16 ***
## School4292       12.86435    0.75162  17.116  < 2e-16 ***
## School8857       15.29694    0.75747  20.195  < 2e-16 ***
## School3967       12.03508    0.84033  14.322  < 2e-16 ***
## School6415       11.86020    0.82462  14.383  < 2e-16 ***
## School1317       13.17769    0.87465  15.066  < 2e-16 ***
## School2629       14.90777    0.80263  18.574  < 2e-16 ***
## School4223       14.62262    0.90333  16.187  < 2e-16 ***
## School1462       10.49556    0.80263  13.076  < 2e-16 ***
## School9550       11.08914    1.12526   9.855  < 2e-16 ***
## School6464        7.09162    1.12526   6.302 3.12e-10 ***
## School4931       13.79081    0.79568  17.332  < 2e-16 ***
## School5937       16.77597    1.12526  14.908  < 2e-16 ***
## School7919       14.84997    0.99621  14.906  < 2e-16 ***
## School3716       10.36866    0.94637  10.956  < 2e-16 ***
## School1909       14.42332    1.14518  12.595  < 2e-16 ***
## School2651       11.08432    0.98302  11.276  < 2e-16 ***
## School2467       10.14752    0.84033  12.076  < 2e-16 ***
## School1374        9.72846    1.14518   8.495  < 2e-16 ***
## School6600       11.70389    0.80976  14.453  < 2e-16 ***
## School5667       13.77823    0.77587  17.758  < 2e-16 ***
## School5720       14.28230    0.83237  17.159  < 2e-16 ***
## School3498       16.39045    0.83237  19.691  < 2e-16 ***
## School3881       11.94922    0.94637  12.626  < 2e-16 ***
## School2995        9.54611    0.89346  10.684  < 2e-16 ***
## School5838       13.68961    1.08836  12.578  < 2e-16 ***
## School3688       14.65626    0.92410  15.860  < 2e-16 ***
## School9158        8.54517    0.83237  10.266  < 2e-16 ***
## School8946       10.37509    0.79568  13.039  < 2e-16 ***
## School7232       12.54263    0.84033  14.926  < 2e-16 ***
## School2917        7.97895    0.92410   8.634  < 2e-16 ***
## School6170       14.18105    1.32234  10.724  < 2e-16 ***
## School8165       16.45122    0.86567  19.004  < 2e-16 ***
## School9104       16.83211    0.81709  20.600  < 2e-16 ***
## School2030       12.07819    0.88390  13.665  < 2e-16 ***
## School8150       14.85236    0.91354  16.258  < 2e-16 ***
## School4042       14.31542    0.75747  18.899  < 2e-16 ***
## School8357       14.38185    1.16619  12.332  < 2e-16 ***
## School8531       13.52868    0.94637  14.295  < 2e-16 ***
## School6074       13.77909    0.80976  17.016  < 2e-16 ***
## School4420       13.87416    1.07122  12.952  < 2e-16 ***
## School1906       15.98317    0.83237  19.202  < 2e-16 ***
## School3992       14.64521    0.83237  17.595  < 2e-16 ***
## School3999       10.94404    0.89346  12.249  < 2e-16 ***
## School4173       12.72466    0.91354  13.929  < 2e-16 ***
## School4325       13.24000    0.83237  15.906  < 2e-16 ***
## School5761       11.13806    0.84033  13.254  < 2e-16 ***
## School6484       12.91240    1.02428  12.606  < 2e-16 ***
## School6897       15.09763    0.86567  17.440  < 2e-16 ***
## School7635       15.06553    0.84853  17.755  < 2e-16 ***
## School7734       10.55964    1.29194   8.173 3.54e-16 ***
## School8175       11.69809    1.05486  11.090  < 2e-16 ***
## School8874       12.05503    1.00995  11.936  < 2e-16 ***
## School9225       14.66733    1.00995  14.523  < 2e-16 ***
## School2458       13.98568    0.80263  17.425  < 2e-16 ***
## School3610       15.35495    0.75747  20.271  < 2e-16 ***
## School5640       13.16011    0.80263  16.396  < 2e-16 ***
## School3838       16.06281    0.82462  19.479  < 2e-16 ***
## School9359       15.27062    0.83237  18.346  < 2e-16 ***
## School2208       15.40467    0.78231  19.691  < 2e-16 ***
## School6089       15.56958    1.05486  14.760  < 2e-16 ***
## School1477       14.22847    0.76959  18.488  < 2e-16 ***
## School2768       10.88692    1.21194   8.983  < 2e-16 ***
## School3039       16.96386    1.32234  12.829  < 2e-16 ***
## School5819       12.13890    0.85697  14.165  < 2e-16 ***
## School6397       12.79610    0.78231  16.357  < 2e-16 ***
## School1308       16.25550    1.35500  11.997  < 2e-16 ***
## School1433       19.71914    1.02428  19.252  < 2e-16 ***
## School1436       18.11161    0.91354  19.826  < 2e-16 ***
## School1461       16.84264    1.05486  15.967  < 2e-16 ***
## School1637        7.02411    1.16619   6.023 1.80e-09 ***
## School1942       18.11090    1.12526  16.095  < 2e-16 ***
## School1946       12.90844    0.97033  13.303  < 2e-16 ***
## School2336       16.51770    0.88390  18.687  < 2e-16 ***
## School2526       17.05300    0.80263  21.246  < 2e-16 ***
## School2626       13.39661    0.98302  13.628  < 2e-16 ***
## School2755       16.47651    0.88390  18.641  < 2e-16 ***
## School2771       11.84411    0.81709  14.495  < 2e-16 ***
## School2990       18.44792    0.87465  21.092  < 2e-16 ***
## School3020       14.39527    0.78891  18.247  < 2e-16 ***
## School3152       13.20904    0.84033  15.719  < 2e-16 ***
## School3332       14.27816    0.98302  14.525  < 2e-16 ***
## School3351       11.46518    0.97033  11.816  < 2e-16 ***
## School3427       19.71559    0.86567  22.775  < 2e-16 ***
## School3657        9.52118    0.84853  11.221  < 2e-16 ***
## School4642       14.59903    0.77587  18.816  < 2e-16 ***
## School5404       15.41498    0.80263  19.206  < 2e-16 ***
## School5619       15.41624    0.74590  20.668  < 2e-16 ***
## School6366       15.65640    0.79568  19.677  < 2e-16 ***
## School6469       18.45572    0.80263  22.994  < 2e-16 ***
## School7011       13.81358    1.05486  13.095  < 2e-16 ***
## School7276       12.67940    0.83237  15.233  < 2e-16 ***
## School7332       14.63610    0.87465  16.734  < 2e-16 ***
## School7345       11.33855    0.80976  14.002  < 2e-16 ***
## School7688       18.42231    0.82462  22.340  < 2e-16 ***
## School7697       15.72178    1.07122  14.677  < 2e-16 ***
## School8193       16.23226    0.92410  17.565  < 2e-16 ***
## School8202       11.71243    1.02428  11.435  < 2e-16 ***
## School8627       10.88372    0.83237  13.076  < 2e-16 ***
## School8628       16.52838    0.77587  21.303  < 2e-16 ***
## School9198       19.09229    1.08836  17.542  < 2e-16 ***
## School9586       14.86369    0.78891  18.841  < 2e-16 ***
## School8367:C_SES  0.25037    2.22488   0.113 0.910403    
## School8854:C_SES  1.93884    1.35428   1.432 0.152292    
## School4458:C_SES  1.13184    1.36793   0.827 0.408034    
## School5762:C_SES -1.01410    1.95950  -0.518 0.604803    
## School6990:C_SES  0.94769    0.98169   0.965 0.334393    
## School5815:C_SES  3.01800    2.16485   1.394 0.163335    
## School7172:C_SES  0.99448    1.36612   0.728 0.466662    
## School4868:C_SES  1.28647    1.48571   0.866 0.386576    
## School7341:C_SES  1.70370    1.00528   1.695 0.090169 .  
## School1358:C_SES  5.06801    1.71168   2.961 0.003078 ** 
## School4383:C_SES  6.18019    2.11601   2.921 0.003504 ** 
## School2305:C_SES -0.78211    1.13763  -0.687 0.491795    
## School8800:C_SES  2.56813    1.42668   1.800 0.071893 .  
## School3088:C_SES  1.79134    1.23144   1.455 0.145807    
## School8775:C_SES  1.00146    1.32013   0.759 0.448114    
## School7890:C_SES -0.65597    1.44460  -0.454 0.649784    
## School6144:C_SES  2.77027    1.30158   2.128 0.033340 *  
## School6443:C_SES -0.74335    1.46027  -0.509 0.610734    
## School5192:C_SES  1.60349    1.46102   1.098 0.272452    
## School6808:C_SES  2.27610    1.15285   1.974 0.048384 *  
## School2818:C_SES  2.80241    1.52348   1.839 0.065888 .  
## School9340:C_SES  3.30950    2.00186   1.653 0.098334 .  
## School4523:C_SES  2.38079    1.24545   1.912 0.055971 .  
## School6816:C_SES  1.35272    1.16781   1.158 0.246767    
## School2277:C_SES -2.01503    1.17745  -1.711 0.087062 .  
## School8009:C_SES  1.55687    1.58675   0.981 0.326542    
## School5783:C_SES  3.11406    1.49402   2.084 0.037165 *  
## School3013:C_SES  3.83980    1.75094   2.193 0.028341 *  
## School7101:C_SES  1.29545    1.42842   0.907 0.364484    
## School4530:C_SES  1.64743    1.23926   1.329 0.183771    
## School9021:C_SES  2.52416    1.39478   1.810 0.070384 .  
## School4511:C_SES  0.04251    1.38066   0.031 0.975438    
## School2639:C_SES -0.63010    1.52971  -0.412 0.680417    
## School3377:C_SES -0.74685    1.31955  -0.566 0.571419    
## School6578:C_SES  2.39054    1.30725   1.829 0.067492 .  
## School9347:C_SES  2.68599    1.18051   2.275 0.022920 *  
## School3705:C_SES  1.15850    1.33049   0.871 0.383933    
## School3533:C_SES -0.31177    1.64469  -0.190 0.849658    
## School1296:C_SES  1.07596    1.36610   0.788 0.430948    
## School4350:C_SES  4.37192    1.59458   2.742 0.006127 ** 
## School9397:C_SES  2.44644    1.47583   1.658 0.097430 .  
## School4253:C_SES -0.39954    1.15329  -0.346 0.729023    
## School2655:C_SES  5.22704    1.35364   3.861 0.000114 ***
## School7342:C_SES  1.01246    1.42097   0.713 0.476173    
## School9292:C_SES  0.75868    2.74811   0.276 0.782501    
## School3499:C_SES  0.99238    1.58184   0.627 0.530445    
## School7364:C_SES  0.25950    1.82833   0.142 0.887139    
## School8983:C_SES  1.38594    1.34299   1.032 0.302119    
## School5650:C_SES  0.68062    1.17460   0.579 0.562308    
## School2658:C_SES  2.62990    1.42677   1.843 0.065335 .  
## School8188:C_SES  4.39759    1.65556   2.656 0.007920 ** 
## School4410:C_SES  2.76020    1.55700   1.773 0.076312 .  
## School9508:C_SES  3.95379    1.82178   2.170 0.030019 *  
## School8707:C_SES  3.39153    1.09903   3.086 0.002037 ** 
## School1499:C_SES  3.63473    1.16428   3.122 0.001805 ** 
## School8477:C_SES  3.81216    1.25548   3.036 0.002403 ** 
## School1288:C_SES  3.25545    1.84816   1.761 0.078205 .  
## School6291:C_SES  3.98087    1.69353   2.351 0.018770 *  
## School1224:C_SES  2.50858    1.42433   1.761 0.078243 .  
## School4292:C_SES -0.16061    1.16329  -0.138 0.890195    
## School8857:C_SES  0.80222    1.43452   0.559 0.576025    
## School3967:C_SES  3.31107    1.35069   2.451 0.014255 *  
## School6415:C_SES  3.53008    1.21524   2.905 0.003686 ** 
## School1317:C_SES  1.27391    1.58930   0.802 0.422837    
## School2629:C_SES  0.22235    1.14645   0.194 0.846225    
## School4223:C_SES  2.48659    1.50770   1.649 0.099140 .  
## School1462:C_SES -0.82881    1.39839  -0.593 0.553411    
## School9550:C_SES  3.89194    1.45938   2.667 0.007675 ** 
## School6464:C_SES  1.00349    1.82459   0.550 0.582348    
## School4931:C_SES  0.91185    1.17597   0.775 0.438133    
## School5937:C_SES  1.03962    2.00531   0.518 0.604175    
## School7919:C_SES  3.98937    1.88178   2.120 0.034042 *  
## School3716:C_SES  5.86379    1.18088   4.966 7.01e-07 ***
## School1909:C_SES  2.85479    1.63722   1.744 0.081260 .  
## School2651:C_SES  4.89906    1.55631   3.148 0.001652 ** 
## School2467:C_SES  3.13713    1.65830   1.892 0.058563 .  
## School1374:C_SES  3.85432    1.66836   2.310 0.020904 *  
## School6600:C_SES  4.70429    1.16489   4.038 5.44e-05 ***
## School5667:C_SES  3.52297    1.24071   2.839 0.004532 ** 
## School5720:C_SES  2.46631    1.26524   1.949 0.051302 .  
## School3498:C_SES -0.13109    1.45712  -0.090 0.928320    
## School3881:C_SES  2.39071    1.79271   1.334 0.182388    
## School2995:C_SES  1.43231    1.01804   1.407 0.159492    
## School5838:C_SES  1.85305    1.77992   1.041 0.297872    
## School3688:C_SES  1.53672    1.72035   0.893 0.371749    
## School9158:C_SES  3.86121    1.12532   3.431 0.000604 ***
## School8946:C_SES  1.69048    1.06749   1.584 0.113331    
## School7232:C_SES  5.00160    1.47738   3.385 0.000715 ***
## School2917:C_SES  1.13585    1.02982   1.103 0.270083    
## School6170:C_SES  4.81178    1.64962   2.917 0.003547 ** 
## School8165:C_SES  1.80224    1.38480   1.301 0.193148    
## School9104:C_SES  1.49398    1.83440   0.814 0.415430    
## School2030:C_SES  1.41198    1.31420   1.074 0.282681    
## School8150:C_SES -0.18571    1.51064  -0.123 0.902162    
## School4042:C_SES  1.69362    1.20501   1.405 0.159922    
## School8357:C_SES  2.67578    1.15717   2.312 0.020788 *  
## School8531:C_SES  3.31823    1.40287   2.365 0.018043 *  
## School6074:C_SES  1.52909    1.30292   1.174 0.240603    
## School4420:C_SES  2.95866    1.58878   1.862 0.062614 .  
## School1906:C_SES  2.14551    1.36955   1.567 0.117259    
## School3992:C_SES  0.53788    1.38802   0.388 0.698389    
## School3999:C_SES  3.56698    0.98836   3.609 0.000310 ***
## School4173:C_SES  3.36567    1.45956   2.306 0.021143 *  
## School4325:C_SES  2.75605    1.03273   2.669 0.007632 ** 
## School5761:C_SES  3.10801    1.19136   2.609 0.009106 ** 
## School6484:C_SES  0.60568    1.49351   0.406 0.685093    
## School6897:C_SES  3.58049    1.17477   3.048 0.002314 ** 
## School7635:C_SES  2.44847    1.26927   1.929 0.053767 .  
## School7734:C_SES  6.03523    1.48469   4.065 4.86e-05 ***
## School8175:C_SES  1.61237    1.54334   1.045 0.296186    
## School8874:C_SES  4.09630    1.43512   2.854 0.004326 ** 
## School9225:C_SES  2.88589    1.39642   2.067 0.038806 *  
## School2458:C_SES  2.95669    1.22988   2.404 0.016241 *  
## School3610:C_SES  2.95585    1.20868   2.446 0.014489 *  
## School5640:C_SES  3.82774    1.38890   2.756 0.005868 ** 
## School3838:C_SES  0.59790    1.23391   0.485 0.628005    
## School9359:C_SES -0.83348    1.46679  -0.568 0.569895    
## School2208:C_SES  2.63664    1.31898   1.999 0.045648 *  
## School6089:C_SES  1.69245    1.51926   1.114 0.265318    
## School1477:C_SES  1.23061    1.14871   1.071 0.284075    
## School2768:C_SES  3.51228    1.37517   2.554 0.010668 *  
## School3039:C_SES  2.95567    1.92045   1.539 0.123839    
## School5819:C_SES  1.97252    1.43955   1.370 0.170658    
## School6397:C_SES  2.75901    1.03849   2.657 0.007908 ** 
## School1308:C_SES  0.12602    2.89741   0.043 0.965308    
## School1433:C_SES  1.85429    1.74798   1.061 0.288809    
## School1436:C_SES  1.60056    1.61653   0.990 0.322150    
## School1461:C_SES  6.26650    1.54684   4.051 5.15e-05 ***
## School1637:C_SES  3.11681    1.56230   1.995 0.046081 *  
## School1942:C_SES  0.08938    2.17762   0.041 0.967260    
## School1946:C_SES  3.58583    1.40776   2.547 0.010881 *  
## School2336:C_SES  1.90497    1.39610   1.364 0.172457    
## School2526:C_SES  0.15950    1.32609   0.120 0.904264    
## School2626:C_SES  4.09968    1.77861   2.305 0.021197 *  
## School2755:C_SES  0.56050    1.41945   0.395 0.692950    
## School2771:C_SES  4.26819    1.60528   2.659 0.007859 ** 
## School2990:C_SES  1.32454    1.32327   1.001 0.316883    
## School3020:C_SES  1.65368    1.21797   1.358 0.174595    
## School3152:C_SES  2.76825    1.00158   2.764 0.005727 ** 
## School3332:C_SES  2.03095    1.71772   1.182 0.237106    
## School3351:C_SES  2.45504    1.16971   2.099 0.035867 *  
## School3427:C_SES -0.48817    1.31793  -0.370 0.711094    
## School3657:C_SES  3.73591    1.10047   3.395 0.000691 ***
## School4642:C_SES  3.27239    1.17344   2.789 0.005306 ** 
## School5404:C_SES  1.21423    1.60712   0.756 0.449955    
## School5619:C_SES  5.25753    1.25841   4.178 2.98e-05 ***
## School6366:C_SES  1.51752    1.21527   1.249 0.211812    
## School6469:C_SES  1.75529    1.29272   1.358 0.174562    
## School7011:C_SES  5.07465    1.72585   2.940 0.003289 ** 
## School7276:C_SES  3.77336    1.06358   3.548 0.000391 ***
## School7332:C_SES  2.46320    1.32282   1.862 0.062634 .  
## School7345:C_SES  4.21192    0.98954   4.256 2.10e-05 ***
## School7688:C_SES  0.11634    1.47469   0.079 0.937119    
## School7697:C_SES  3.13622    1.77439   1.767 0.077190 .  
## School8193:C_SES  2.33521    1.62482   1.437 0.150704    
## School8202:C_SES  3.70590    1.47606   2.511 0.012073 *  
## School8627:C_SES  1.86956    1.18737   1.575 0.115408    
## School8628:C_SES  1.23139    1.34892   0.913 0.361339    
## School9198:C_SES  2.61055    1.69902   1.537 0.124462    
## School9586:C_SES  1.67208    1.33730   1.250 0.211217    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.06 on 6865 degrees of freedom
## Multiple R-squared:  0.8328, Adjusted R-squared:  0.825 
## F-statistic: 106.8 on 320 and 6865 DF,  p-value: < 2.2e-16
# collect regression estimates from individual regression by school
m1b_betas <- m1b %>% 
             coef %>% 
             matrix(ncol = 2) %>%
             as.data.frame %>%
             rename( b0 = V1, b1 = V2) 
#
colMeans(m1b_betas)
##        b0        b1 
## 12.620755  2.201641
#
apply(m1b_betas, 2, sd)
##       b0       b1 
## 3.117651 1.630998
#
cor(m1b_betas)
##            b0         b1
## b0 1.00000000 0.01346686
## b1 0.01346686 1.00000000
# scatter plot of regression coefficient estimates
ggplot(m1b_betas, aes(b0, b1)) +
 geom_point(pch = 20) +
 stat_smooth(method = "lm", se = F) +
 stat_ellipse() +
 annotate("text", x = 5, y = 5, label = "r = 0.013") +
 labs(x = "Intercept estimates", y = "Slope estimates")

# random intercept only
summary(m3a <- lmer(Math ~ C_SES + (1 | School), data = dta))
## Linear mixed model fit by REML ['lmerMod']
## Formula: Math ~ C_SES + (1 | School)
##    Data: dta
## 
## REML criterion at convergence: 46724
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.0969 -0.7322  0.0194  0.7572  2.9147 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  School   (Intercept)  8.672   2.945   
##  Residual             37.010   6.084   
## Number of obs: 7185, groups:  School, 160
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  12.6361     0.2445   51.68
## C_SES         2.1912     0.1087   20.17
## 
## Correlation of Fixed Effects:
##       (Intr)
## C_SES 0.000
# random intercept and slope - correlated 
summary(m3b <- lmer(Math ~ C_SES + (C_SES | School), data = dta))
## Linear mixed model fit by REML ['lmerMod']
## Formula: Math ~ C_SES + (C_SES | School)
##    Data: dta
## 
## REML criterion at convergence: 46714.2
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -3.09680 -0.73193  0.01855  0.75386  2.89924 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr
##  School   (Intercept)  8.681   2.9464       
##           C_SES        0.694   0.8331   0.02
##  Residual             36.700   6.0581       
## Number of obs: 7185, groups:  School, 160
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  12.6362     0.2445   51.68
## C_SES         2.1932     0.1283   17.10
## 
## Correlation of Fixed Effects:
##       (Intr)
## C_SES 0.009
# random intercept and slope - uncorrelated 
summary(m3c <- lmer(Math ~ C_SES + (1 | School) + 
                           (C_SES - 1 | School), data = dta))
## Linear mixed model fit by REML ['lmerMod']
## Formula: Math ~ C_SES + (1 | School) + (C_SES - 1 | School)
##    Data: dta
## 
## REML criterion at convergence: 46714.2
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -3.09607 -0.73095  0.01694  0.75391  2.90167 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  School   (Intercept)  8.6807  2.9463  
##  School.1 C_SES        0.6941  0.8331  
##  Residual             36.7002  6.0581  
## Number of obs: 7185, groups:  School, 160
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  12.6360     0.2445   51.68
## C_SES         2.1926     0.1283   17.09
## 
## Correlation of Fixed Effects:
##       (Intr)
## C_SES 0.000
# summary(m3c <- lme(Math ~ C_SES, 
#              random = list(School = pdDiag(~ C_SES)), data = dta))

# default residual plot
plot(m3c, xlab = "Fitted values", ylab = "Pearson residuals", 
     pch = 20, cex = .5, type = c("p", "g"))

# model comparisons
anova(m3a, m3b, m3c)
## Data: dta
## Models:
## m3a: Math ~ C_SES + (1 | School)
## m3c: Math ~ C_SES + (1 | School) + (C_SES - 1 | School)
## m3b: Math ~ C_SES + (C_SES | School)
##     npar   AIC   BIC logLik deviance  Chisq Df Pr(>Chisq)   
## m3a    4 46728 46756 -23360    46720                        
## m3c    5 46721 46755 -23356    46711 9.4196  1   0.002147 **
## m3b    6 46723 46764 -23356    46711 0.0134  1   0.907954   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# CIs for model parameters
confint(m3c, method="boot")
##                  2.5 %    97.5 %
## .sig01       2.6100481  3.269883
## .sig02       0.3848003  1.141918
## .sigma       5.9425454  6.150370
## (Intercept) 12.1304752 13.088948
## C_SES        1.9525498  2.439487
# The end

Including both the effects of student-level and school-level predictors

# load them
pacman::p_load(mlmRev, tidyverse, lme4, merTools)

# data from mlmRev package
data(Hsb82)

#
str(Hsb82)
## 'data.frame':    7185 obs. of  8 variables:
##  $ school : Ord.factor w/ 160 levels "8367"<"8854"<..: 59 59 59 59 59 59 59 59 59 59 ...
##  $ minrty : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
##  $ sx     : Factor w/ 2 levels "Male","Female": 2 2 1 1 1 1 2 1 2 1 ...
##  $ ses    : num  -1.528 -0.588 -0.528 -0.668 -0.158 ...
##  $ mAch   : num  5.88 19.71 20.35 8.78 17.9 ...
##  $ meanses: num  -0.434 -0.434 -0.434 -0.434 -0.434 ...
##  $ sector : Factor w/ 2 levels "Public","Catholic": 1 1 1 1 1 1 1 1 1 1 ...
##  $ cses   : num  -1.0936 -0.1536 -0.0936 -0.2336 0.2764 ...
# make a copy of data to the dta object 
dta <- Hsb82            

names(dta) <- c("School", "Minority", "Gender", "SES", "Math",
                "Ave_SES", "Sector", "C_SES")

theme_set(theme_bw())

# Math vs SES by sector
ggplot(dta, aes(C_SES, Math, color = Sector)) +
 geom_smooth(method = "lm") +
 geom_jitter(alpha = 0.2) +
 labs(x = "Centered SES", y = "Math achievement score") +
 theme(legend.position = c(.1, .8))

# to facilitate comparison with sas output
#options(contrasts = c(factor = "contr.SAS", ordered = "contr.poly"))

# full
summary(m4a <- lmer(Math ~ Ave_SES + Sector + C_SES + Ave_SES:C_SES +
        Sector:C_SES + (C_SES | School), data = dta))
## Linear mixed model fit by REML ['lmerMod']
## Formula: Math ~ Ave_SES + Sector + C_SES + Ave_SES:C_SES + Sector:C_SES +  
##     (C_SES | School)
##    Data: dta
## 
## REML criterion at convergence: 46503.7
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -3.15926 -0.72319  0.01704  0.75444  2.95822 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr
##  School   (Intercept)  2.380   1.5426       
##           C_SES        0.101   0.3179   0.39
##  Residual             36.721   6.0598       
## Number of obs: 7185, groups:  School, 160
## 
## Fixed effects:
##                      Estimate Std. Error t value
## (Intercept)           12.1279     0.1993  60.856
## Ave_SES                5.3329     0.3692  14.446
## SectorCatholic         1.2266     0.3063   4.005
## C_SES                  2.9450     0.1556  18.928
## Ave_SES:C_SES          1.0393     0.2989   3.477
## SectorCatholic:C_SES  -1.6427     0.2398  -6.851
## 
## Correlation of Fixed Effects:
##             (Intr) Av_SES SctrCt C_SES  A_SES:
## Ave_SES      0.256                            
## SectorCthlc -0.699 -0.356                     
## C_SES        0.075  0.019 -0.053              
## A_SES:C_SES  0.019  0.074 -0.026  0.293       
## SctrC:C_SES -0.052 -0.027  0.077 -0.696 -0.351
# reduced
summary(m4b <- lmer(Math ~ Ave_SES + Sector + C_SES + Ave_SES:C_SES +
        Sector:C_SES + (1 | School), data = dta))
## Linear mixed model fit by REML ['lmerMod']
## Formula: Math ~ Ave_SES + Sector + C_SES + Ave_SES:C_SES + Sector:C_SES +  
##     (1 | School)
##    Data: dta
## 
## REML criterion at convergence: 46504.8
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.1701 -0.7249  0.0148  0.7542  2.9655 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  School   (Intercept)  2.375   1.541   
##  Residual             36.766   6.064   
## Number of obs: 7185, groups:  School, 160
## 
## Fixed effects:
##                      Estimate Std. Error t value
## (Intercept)           12.1282     0.1992  60.885
## Ave_SES                5.3367     0.3690  14.463
## SectorCatholic         1.2245     0.3061   4.000
## C_SES                  2.9421     0.1512  19.457
## Ave_SES:C_SES          1.0444     0.2910   3.589
## SectorCatholic:C_SES  -1.6422     0.2331  -7.045
## 
## Correlation of Fixed Effects:
##             (Intr) Av_SES SctrCt C_SES  A_SES:
## Ave_SES      0.256                            
## SectorCthlc -0.699 -0.356                     
## C_SES        0.000  0.000  0.000              
## A_SES:C_SES  0.000  0.000  0.000  0.295       
## SctrC:C_SES  0.000  0.000  0.000 -0.696 -0.351
# will refit be ML estimation
anova(m4a, m4b)
## Data: dta
## Models:
## m4b: Math ~ Ave_SES + Sector + C_SES + Ave_SES:C_SES + Sector:C_SES + 
## m4b:     (1 | School)
## m4a: Math ~ Ave_SES + Sector + C_SES + Ave_SES:C_SES + Sector:C_SES + 
## m4a:     (C_SES | School)
##     npar   AIC   BIC logLik deviance  Chisq Df Pr(>Chisq)
## m4b    8 46513 46568 -23249    46497                     
## m4a   10 46516 46585 -23248    46496 1.0016  2     0.6061
# residualual plot
plot(m4b, xlab = "Fitted values", ylab = "Pearson residuals", 
     pch = 20, cex = .5, type = c("p", "g"))

# quick summary of model parameter estimates
fastdisp(m4b)
## lmer(formula = Math ~ Ave_SES + Sector + C_SES + Ave_SES:C_SES + 
##     Sector:C_SES + (1 | School), data = dta)
##                      coef.est coef.se
## (Intercept)          12.13     0.20  
## Ave_SES               5.34     0.37  
## SectorCatholic        1.22     0.31  
## C_SES                 2.94     0.15  
## Ave_SES:C_SES         1.04     0.29  
## SectorCatholic:C_SES -1.64     0.23  
## 
## Error terms:
##  Groups   Name        Std.Dev.
##  School   (Intercept) 1.54    
##  Residual             6.06    
## ---
## number of obs: 7185, groups: School, 160
## AIC = 46520.8
# estimate fixed-effects parameters by simulation
m4b_fe <- FEsim(m4b, 1000)

# plot for fixed effects
plotFEsim(m4b_fe) + 
 labs(title = "Coefficient Plot", x = "Median Effect Estimate")

# estimate random-effects parameters by simulation
m4b_re <- REsim(m4b)

# normality plot for random effects
plotREsim(m4b_re)

# The end

Inclass Exercise 2

# load them
pacman::p_load(mlmRev, tidyverse, lme4, merTools)

# input data
dta <- read.csv("pts.csv")

#
head(dta)
##   School Teacher Pupil      Read_0 Read_1
## 1      1      11    18 -0.16407000 4.2426
## 2      1      11    19 -0.98126000 1.0000
## 3      1      11    20 -1.25370000 2.2361
## 4      1      11    15 -0.87230000 3.1623
## 5      1      11    17 -0.00063104 3.4641
## 6      1      11    16 -0.92678000 4.0000
# coerce variables to factor type and
# compute mean Read_0 by school
dta <- dta %>%
  mutate(School = factor(School), Teacher = factor(Teacher),
         Pupil = factor(Pupil)) %>%
  group_by( School ) %>%
  mutate(msRead_0 = mean(Read_0))

#
# compute mean Read_0 by teacher and
# centered teacher mean of Read_0 (from respective school means)
#
dta <- dta %>%
  group_by( Teacher ) %>%
  mutate(mtRead_0 = mean(Read_0), ctRead_0 = mean(Read_0) - msRead_0 )

##
# no predictor
summary(m0 <- lmer(Read_1 ~  (1 | School) + (1 | Teacher), data = dta))
## Linear mixed model fit by REML ['lmerMod']
## Formula: Read_1 ~ (1 | School) + (1 | Teacher)
##    Data: dta
## 
## REML criterion at convergence: 2486.8
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -3.05438 -0.65686  0.06497  0.62523  2.47233 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Teacher  (Intercept) 0.1370   0.3701  
##  School   (Intercept) 0.1277   0.3574  
##  Residual             1.3250   1.1511  
## Number of obs: 777, groups:  Teacher, 46; School, 20
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)   3.3755     0.1065   31.69
# add a school-level predictor
summary(m0_s <- update(m0, . ~ . + msRead_0))
## Linear mixed model fit by REML ['lmerMod']
## Formula: Read_1 ~ (1 | School) + (1 | Teacher) + msRead_0
##    Data: dta
## 
## REML criterion at convergence: 2467.4
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -3.06110 -0.65597  0.06822  0.62772  2.45391 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Teacher  (Intercept) 0.1234   0.3513  
##  School   (Intercept) 0.0000   0.0000  
##  Residual             1.3224   1.1500  
## Number of obs: 777, groups:  Teacher, 46; School, 20
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  3.39822    0.06808  49.918
## msRead_0     1.00159    0.17807   5.625
## 
## Correlation of Fixed Effects:
##          (Intr)
## msRead_0 0.086 
## convergence code: 0
## boundary (singular) fit: see ?isSingular
# add a teacher-level predictor
summary(m0_t <- update(m0, . ~ . + mtRead_0))
## Linear mixed model fit by REML ['lmerMod']
## Formula: Read_1 ~ (1 | School) + (1 | Teacher) + mtRead_0
##    Data: dta
## 
## REML criterion at convergence: 2434.9
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -3.07539 -0.65979  0.06707  0.63259  2.49973 
## 
## Random effects:
##  Groups   Name        Variance  Std.Dev. 
##  Teacher  (Intercept) 3.296e-15 5.741e-08
##  School   (Intercept) 4.471e-02 2.114e-01
##  Residual             1.309e+00 1.144e+00
## Number of obs: 777, groups:  Teacher, 46; School, 20
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  3.40649    0.06307  54.010
## mtRead_0     1.07319    0.11495   9.336
## 
## Correlation of Fixed Effects:
##          (Intr)
## mtRead_0 0.024 
## convergence code: 0
## boundary (singular) fit: see ?isSingular
# add a teacher-level predictor away from respective school means
summary(m0_ct <- update(m0, . ~ . + ctRead_0))
## Linear mixed model fit by REML ['lmerMod']
## Formula: Read_1 ~ (1 | School) + (1 | Teacher) + ctRead_0
##    Data: dta
## 
## REML criterion at convergence: 2454.1
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.1804 -0.6461  0.0791  0.6420  2.5272 
## 
## Random effects:
##  Groups   Name        Variance  Std.Dev. 
##  Teacher  (Intercept) 2.462e-10 1.569e-05
##  School   (Intercept) 1.764e-01 4.200e-01
##  Residual             1.311e+00 1.145e+00
## Number of obs: 777, groups:  Teacher, 46; School, 20
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)   3.3896     0.1029  32.932
## ctRead_0      1.1742     0.1581   7.427
## 
## Correlation of Fixed Effects:
##          (Intr)
## ctRead_0 0.000 
## convergence code: 0
## boundary (singular) fit: see ?isSingular
###

Inclass Exercise 3

pacman::p_load(mlmRev, tidyverse, lme4, merTools, sjPlot)
library(WWGbook)
data(classroom, package = "WWGbook")
head(classroom)
##   sex minority mathkind mathgain   ses yearstea mathknow housepov mathprep
## 1   1        1      448       32  0.46        1       NA    0.082     2.00
## 2   0        1      460      109 -0.27        1       NA    0.082     2.00
## 3   1        1      511       56 -0.03        1       NA    0.082     2.00
## 4   0        1      449       83 -0.38        2    -0.11    0.082     3.25
## 5   0        1      425       53 -0.03        2    -0.11    0.082     3.25
## 6   1        1      450       65  0.76        2    -0.11    0.082     3.25
##   classid schoolid childid
## 1     160        1       1
## 2     160        1       2
## 3     160        1       3
## 4     217        1       4
## 5     217        1       5
## 6     217        1       6
m0 <-lme4::lmer(mathgain~(1|schoolid/classid), data=classroom)
sjPlot::tab_model(m0, show.p=F, show.r2=F)
  mathgain
Predictors Estimates CI
(Intercept) 57.43 54.60 – 60.26
Random Effects
σ2 1028.23
τ00 classid:schoolid 99.22
τ00 schoolid 77.50
ICC 0.15
N classid 312
N schoolid 107
Observations 1190
m1<- lme4::lmer(mathgain~(1|schoolid/classid), data=classroom)
sjPlot::tab_model(m0, m1, show.p=F, show.r2=F, show.obs = F, show.ngroups = F,
                  show.se = T, show.ci = F)
  mathgain mathgain
Predictors Estimates std. Error Estimates std. Error
(Intercept) 57.43 1.44 57.43 1.44
Random Effects
σ2 1028.23 1028.23
τ00 99.22 classid:schoolid 99.22 classid:schoolid
77.50 schoolid 77.50 schoolid
ICC 0.15 0.15
m2<-lme4::lmer(mathgain~mathkind+sex+minority+ses|schoolid, data=classroom)
sjPlot::tab_model(m0, m1,m2, show.p=F, show.r2=F, show.obs = F, show.ngroups = F,
                  show.se = T, show.ci = F)
  mathgain mathgain mathgain
Predictors Estimates std. Error Estimates std. Error Estimates std. Error
(Intercept) 57.43 1.44 57.43 1.44 62.32 1.98
Random Effects
σ2 1028.23 1028.23 915.56
τ00 99.22 classid:schoolid 99.22 classid:schoolid 1072.21 schoolid
77.50 schoolid 77.50 schoolid  
τ11     0.01 schoolid.mathkind
    114.05 schoolid.sex
    678.35 schoolid.minority
    176.34 schoolid.ses
ρ01     -1.00
    -0.15
    0.19
    -0.08
ICC 0.15 0.15  

Exercise 1

pacman::p_load(mlmRev, tidyverse, lme4, nlme)
dta_e1<-read.table('iq_language.txt', h=T)
head(dta_e1)
##   School Pupil   IQ Language Group_size       IQ_c School_mean Group_mean
## 1      1 17001 15.0       46         29  3.1659379    -1.51406        5.9
## 2      1 17002 14.5       45         29  2.6659379    -1.51406        5.9
## 3      1 17003  9.5       33         29 -2.3340621    -1.51406        5.9
## 4      1 17004 11.0       46         29 -0.8340621    -1.51406        5.9
## 5      1 17005  8.0       20         29 -3.8340621    -1.51406        5.9
## 6      1 17006  9.5       30         29 -2.3340621    -1.51406        5.9
##        SES_c
## 1  -4.811981
## 2 -17.811981
## 3 -12.811981
## 4  -4.811981
## 5 -17.811981
## 6 -17.811981

m0

summary(m0 <- lmer(Language ~ (1|School), data = dta_e1))
## Linear mixed model fit by REML ['lmerMod']
## Formula: Language ~ (1 | School)
##    Data: dta_e1
## 
## REML criterion at convergence: 16253.1
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -3.11618 -0.65703  0.07597  0.74128  2.50639 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  School   (Intercept) 19.63    4.431   
##  Residual             64.56    8.035   
## Number of obs: 2287, groups:  School, 131
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  40.3624     0.4282   94.26
  • The estimated overall mean language score is 40.36.
  • The estimated between school variance in mean language score is 19.63.
  • The variation between students within the same school is estimated to be 64.56.
  • The between school variance is about 19.63 / (19.63 + 64.56) = 0.2332 (23.3%)of the total variance. This is the intraclass correlation.

m1

summary(m1 <- lmer(Language ~ IQ_c + (1|School), data = dta_e1))
## Linear mixed model fit by REML ['lmerMod']
## Formula: Language ~ IQ_c + (1 | School)
##    Data: dta_e1
## 
## REML criterion at convergence: 15255.8
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.0939 -0.6375  0.0579  0.7061  3.1448 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  School   (Intercept)  9.602   3.099   
##  Residual             42.245   6.500   
## Number of obs: 2287, groups:  School, 131
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept) 40.60823    0.30819   131.8
## IQ_c         2.48759    0.07008    35.5
## 
## Correlation of Fixed Effects:
##      (Intr)
## IQ_c 0.018
sjPlot::tab_model(m0, m1, show.p=FALSE, show.r2=FALSE, show.obs=FALSE, show.ngroups=FALSE, show.se=TRUE, show.ci=FALSE)
  Language Language
Predictors Estimates std. Error Estimates std. Error
(Intercept) 40.36 0.43 40.61 0.31
IQ_c 2.49 0.07
Random Effects
σ2 64.56 42.24
τ00 19.63 School 9.60 School
ICC 0.23 0.19
  • The estimated mean of language scores for all schools is 40.61 with as SD of \[\sqrt{9.60 }\]=3.098387.
  • The estimated mean of language scores for all children is 40.61 with as SD of \[\sqrt{(42.24+9.60)}\]=7.2
  • On average, there is a gain of 2.49 point on language score from an increase of one point in IQ_c.
  • After adjusting for IQ_c difference, correlation between language scores among children attending the same school is 0.19.
  • About 51.1%(=(19.63-9.60)/19.63) of variation in language scores between schools can be attributed to differences in IQ_c between schools.

Exercise 2

# load them
pacman::p_load(foreign, tidyverse, lme4, merTools)

# read spss format and covert it to data frame 
dta <- read.spss(file="eg_hlm.sav") %>% as.data.frame

# check data structure
str(dta)
## 'data.frame':    7230 obs. of  12 variables:
##  $ size    : num  380 380 380 380 380 380 380 380 380 380 ...
##  $ lowinc  : num  40.3 40.3 40.3 40.3 40.3 40.3 40.3 40.3 40.3 40.3 ...
##  $ mobility: num  12.5 12.5 12.5 12.5 12.5 12.5 12.5 12.5 12.5 12.5 ...
##  $ female  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ black   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ hispanic: num  1 1 1 0 0 0 0 0 1 1 ...
##  $ year    : num  0.5 1.5 2.5 -1.5 -0.5 0.5 1.5 2.5 -1.5 -0.5 ...
##  $ grade   : num  2 3 4 0 1 2 3 4 0 1 ...
##  $ math    : num  1.146 1.134 2.3 -1.303 0.439 ...
##  $ retained: num  0 0 0 0 0 0 0 0 0 0 ...
##  $ school  : Factor w/ 60 levels "        2020",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ cid     : Factor w/ 1721 levels "   101480302",..: 244 244 244 248 248 248 248 248 253 253 ...
# set black-and-white theme
theme_set(theme_bw())
#
ggplot(dta, aes(year, math, group = cid)) +
 geom_point(alpha = .1) +
 geom_line(alpha = .1) +
 stat_smooth(aes(group = school), method = "lm", se = F, lwd = .6) +
 stat_smooth(aes(group = 1), method = "lm", se = F, col = "magenta") +
 labs(x = "Year of age (centered)", y = "Math score")

Model: Mathti\[X_{(j)}\]=\[b_{0j}\]+\[b_{1i(j)}\]+\[ β_1Year_ti\]+\[ε_ti(j)\], where\[b_{0j}\],j = 1, …, 60, are school random effects, and\[b_{1i(j)}\]are student random effects.

# nested specification with random intercepts only
# fixed-effect for slope
summary(m1 <- lmer(math ~ year + (1 | school/cid), data = dta))
## Linear mixed model fit by REML ['lmerMod']
## Formula: math ~ year + (1 | school/cid)
##    Data: dta
## 
## REML criterion at convergence: 16759.4
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.8093 -0.5831 -0.0270  0.5566  6.0867 
## 
## Random effects:
##  Groups     Name        Variance Std.Dev.
##  cid:school (Intercept) 0.6699   0.8185  
##  school     (Intercept) 0.1869   0.4324  
##  Residual               0.3470   0.5891  
## Number of obs: 7230, groups:  cid:school, 1721; school, 60
## 
## Fixed effects:
##              Estimate Std. Error t value
## (Intercept) -0.780482   0.061088  -12.78
## year         0.746123   0.005396  138.26
## 
## Correlation of Fixed Effects:
##      (Intr)
## year -0.031
# same as
#lmer(math ~ year + (1 | school) +( 1 | school:cid) , data = dta))
  • 0.6699+0.1869+0.3470=1.2038
  • 0.6699/1.2038=0.5564878
  • 0.1869/1.2038=0.1552583
  • (0.6699+0.1869)/1.2038=0.7117461
  • The similarity with math scores of classes in the same schools controlling for years is about 71.2%.
# random slopes
summary(m2 <- lmer(math ~ ( year | school/cid), data = dta))
## Linear mixed model fit by REML ['lmerMod']
## Formula: math ~ (year | school/cid)
##    Data: dta
## 
## REML criterion at convergence: 16553.9
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.2231 -0.5611 -0.0308  0.5312  5.1579 
## 
## Random effects:
##  Groups     Name        Variance Std.Dev. Corr
##  cid:school (Intercept) 0.64127  0.8008       
##             year        0.01132  0.1064   0.55
##  school     (Intercept) 0.92436  0.9614       
##             year        0.60793  0.7797   0.92
##  Residual               0.30115  0.5488       
## Number of obs: 7230, groups:  cid:school, 1721; school, 60
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept) -1.64424    0.05462  -30.11
## convergence code: 0
## Model failed to converge with max|grad| = 0.00321233 (tol = 0.002, component 1)
# compare models
anova(m1, m2)
## Data: dta
## Models:
## m1: math ~ year + (1 | school/cid)
## m2: math ~ (year | school/cid)
##    npar   AIC   BIC  logLik deviance Chisq Df Pr(>Chisq)    
## m1    5 16757 16792 -8373.5    16747                        
## m2    8 16566 16621 -8275.0    16550 197.1  3  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# merTools
# estimate random-effects parameters by simulation
m2_re <- REsim(m2)
# plot for random effects
plotREsim(m2_re)

# residual plots
plot(m1, ylim = c(-4, 4), 
     xlab = "Fitted values", ylab = "Pearson residuals", cex =.5)

#
plot(m2, ylim = c(-4, 4),
     xlab = "Fitted values", ylab = "Pearson residuals", cex =.5)

Exercise 3

#Read data
dta_31<-read.table("demo1.txt", h=T)
dta_32<-read.table("demo2.txt", h=T)
dta_31 <- dta_31 %>% 
  group_by(School) %>% 
  mutate( ave_Score = mean(Score),
          c_Score = ave_Score - Score,
          n=n())
dta_32 <- dta_32 %>% 
  group_by(School) %>% 
  mutate( ave_Score = mean(Score),
          c_Score = ave_Score - Score,
          n=n())
ggplot(data = dta_31, aes(School, Score)) +
  geom_point(data = dta_31, aes(School, Score), colour = 'skyblue', size = 1) +
  geom_point(data = dta_32, aes(School, Score), colour = 'coral', size = 1) +
  geom_hline(yintercept = mean(dta_31$Score), colour = 'skyblue') +
  geom_hline(yintercept = mean(dta_32$Score), colour = 'coral')

m0_31

summary(m0_31 <- lmer(Score ~ (1|School), data = dta_31))
## Linear mixed model fit by REML ['lmerMod']
## Formula: Score ~ (1 | School)
##    Data: dta_31
## 
## REML criterion at convergence: 51.5
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.3280 -0.4745  0.0000  0.4608  1.3553 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  School   (Intercept) 1679     40.976  
##  Residual                5      2.236  
## Number of obs: 9, groups:  School, 3
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)    53.01      23.67    2.24

m0_32

summary(m0_32 <- lmer(Score ~ (1|School), data = dta_32))
## Linear mixed model fit by REML ['lmerMod']
## Formula: Score ~ (1 | School)
##    Data: dta_32
## 
## REML criterion at convergence: 80.4
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -1.36734 -0.34286 -0.02776  1.07153  1.13999 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  School   (Intercept) 104.2    10.21   
##  Residual             967.8    31.11   
## Number of obs: 9, groups:  School, 3
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)    56.36      12.01   4.692

comparison

sjPlot::tab_model(m0_31, m0_32, show.p=FALSE, show.r2=FALSE, show.obs=FALSE, show.ngroups=FALSE, show.se=TRUE, show.ci=FALSE)
  Score Score
Predictors Estimates std. Error Estimates std. Error
(Intercept) 53.01 23.67 56.36 12.01
Random Effects
σ2 5.00 967.76
τ00 1679.06 School 104.17 School
ICC 1.00 0.10
  • The estimated mean of scores for all children in demo1 data is 53.01 with an SD of\[\sqrt{1679.06}\]=40.98

  • The estimated mean of scores for all children in demo2 data is 56.36 with an SD of\[\sqrt{104.17}\]=10.21

  • The correlation between scores in same school is 1.0 and 0.1 in demo1 data and demo2 data respectively.Because demo1 data are perfectly correlated, the unweighted grand mean in m0_21 is more convincing.

Residual Plot

plot(m0_31, ylim = c(-20, 20), 
     xlab = "Fitted values", ylab = "Pearson residuals", cex =.5)

plot(m0_32, ylim = c(-20, 20), 
     xlab = "Fitted values", ylab = "Pearson residuals", cex =.5)