Homework

Inclass Exercise 1

school-effects model version 0

pacman::p_load(mlmRev, tidyverse, lme4)

# data from mlmRev package
data(Hsb82, "mlmRev")

#
str(Hsb82)

## 'data.frame':    7185 obs. of  8 variables:
##  $ school : Ord.factor w/ 160 levels "8367"<"8854"<..: 59 59 59 59 59 59 59 59 59 59 ...
##  $ minrty : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
##  $ sx     : Factor w/ 2 levels "Male","Female": 2 2 1 1 1 1 2 1 2 1 ...
##  $ ses    : num  -1.528 -0.588 -0.528 -0.668 -0.158 ...
##  $ mAch   : num  5.88 19.71 20.35 8.78 17.9 ...
##  $ meanses: num  -0.434 -0.434 -0.434 -0.434 -0.434 ...
##  $ sector : Factor w/ 2 levels "Public","Catholic": 1 1 1 1 1 1 1 1 1 1 ...
##  $ cses   : num  -1.0936 -0.1536 -0.0936 -0.2336 0.2764 ...

# make a copy of data to the dta object 
dta <- Hsb82            

# use sensible variable names
names(dta) <- c("School", "Minority", "Gender", "SES", "Math",
                "Ave_SES", "Sector", "C_SES")
#
head(dta)

##   School Minority Gender    SES   Math   Ave_SES Sector       C_SES
## 1   1224       No Female -1.528  5.876 -0.434383 Public -1.09361702
## 2   1224       No Female -0.588 19.708 -0.434383 Public -0.15361702
## 3   1224       No   Male -0.528 20.349 -0.434383 Public -0.09361702
## 4   1224       No   Male -0.668  8.781 -0.434383 Public -0.23361702
## 5   1224       No   Male -0.158 17.898 -0.434383 Public  0.27638298
## 6   1224       No   Male  0.022  4.583 -0.434383 Public  0.45638298

##
ot <- theme_set(theme_bw())

# freedman-diaconis rule for binwidth
bw <- 2*IQR(dta$Math)/(dim(dta)[1]^(1/3))

# histograms
ggplot(dta, aes(Math)) +
  stat_bin(aes(fill = ..count..), binwidth=bw) +
  labs(x="Math achievement scores")

# seed the random number to reproduce the sample
set.seed(20160923)

# pick 60 out of 160
n <- sample(160, 60)

# box plots for a sample of 60 schools
ggplot(filter(dta, School %in% levels(School)[n]),
       aes(reorder(School, Math, mean), Math, color=Sector)) +
 geom_boxplot() +
 geom_point(size= rel(0.6)) +
 labs(x="School ID", y="Math achievement score")+
 theme(axis.text.x=element_text(size=6))+
 ggtitle("A sample of 60 schools")

#
# math mean by school
dta_mmath <- dta %>% 
             group_by(School) %>% 
             mutate( ave_math = mean(Math)) %>% 
             select( School, ave_math ) %>%  
             unique %>%
             as.data.frame

summary(dta_mmath$ave_math)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    4.24   10.47   12.90   12.62   14.65   19.72

sd(dta_mmath$ave_math)

## [1] 3.117651

# grand mean
summary(dta$Math)

##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  -2.832   7.275  13.131  12.748  18.317  24.993

# sd for overall Math scores
sd(dta$Math)

## [1] 6.878246

# freedman-diaconis rule for binwidth
bw <- 2*IQR(dta_mmath$ave_math)/(160^(1/3))

#
ggplot(dta_mmath, aes(ave_math)) +
 geom_histogram(aes(y= ..density.., alpha= .2), binwidth=bw) + 
 geom_density(fill="NA") +
 geom_vline(xintercept=mean(dta_mmath$ave_math), linetype="longdash") +
 geom_text(aes(7, 0.17, label= "ave = 12.62, sd = 3.12")) +
 labs(x="Math achievement school means", y="Density") +
 theme(legend.position="NONE")

# baseline model
summary(m0 <- lmer(Math ~ (1 | School), data = dta))

## Linear mixed model fit by REML ['lmerMod']
## Formula: Math ~ (1 | School)
##    Data: dta
## 
## REML criterion at convergence: 47116.8
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.0631 -0.7539  0.0267  0.7606  2.7426 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  School   (Intercept)  8.614   2.935   
##  Residual             39.148   6.257   
## Number of obs: 7185, groups:  School, 160
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  12.6370     0.2444   51.71

# CIs for model parameters
confint(m0, method="boot")

##                 2.5 %    97.5 %
## .sig01       2.596281  3.333500
## .sigma       6.151525  6.360499
## (Intercept) 12.128943 13.114645

# default residual plot
plot(m0, xlab="Fitted values", ylab="Pearson residuals", 
     pch=20, cex=.5, type = c("p","g"))

# The end

Including the effect of a school level predictor

pacman::p_load(mlmRev, tidyverse, lme4)

# data from mlmRev package
data(Hsb82)

# first 6 lines
head(Hsb82)

##   school minrty     sx    ses   mAch   meanses sector        cses
## 1   1224     No Female -1.528  5.876 -0.434383 Public -1.09361702
## 2   1224     No Female -0.588 19.708 -0.434383 Public -0.15361702
## 3   1224     No   Male -0.528 20.349 -0.434383 Public -0.09361702
## 4   1224     No   Male -0.668  8.781 -0.434383 Public -0.23361702
## 5   1224     No   Male -0.158 17.898 -0.434383 Public  0.27638298
## 6   1224     No   Male  0.022  4.583 -0.434383 Public  0.45638298

# make a copy of data to the dta object 
dta <- Hsb82            

# names for human
names(dta) <- c("School", "Minority", "Gender", "SES", "Math",
                "Ave_SES", "Sector", "C_SES")
#
str(dta)

## 'data.frame':    7185 obs. of  8 variables:
##  $ School  : Ord.factor w/ 160 levels "8367"<"8854"<..: 59 59 59 59 59 59 59 59 59 59 ...
##  $ Minority: Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
##  $ Gender  : Factor w/ 2 levels "Male","Female": 2 2 1 1 1 1 2 1 2 1 ...
##  $ SES     : num  -1.528 -0.588 -0.528 -0.668 -0.158 ...
##  $ Math    : num  5.88 19.71 20.35 8.78 17.9 ...
##  $ Ave_SES : num  -0.434 -0.434 -0.434 -0.434 -0.434 ...
##  $ Sector  : Factor w/ 2 levels "Public","Catholic": 1 1 1 1 1 1 1 1 1 1 ...
##  $ C_SES   : num  -1.0936 -0.1536 -0.0936 -0.2336 0.2764 ...

# random effects anova - intercept only model
summary(m1 <- lmer(Math ~ (1 | School), data = dta))

## Linear mixed model fit by REML ['lmerMod']
## Formula: Math ~ (1 | School)
##    Data: dta
## 
## REML criterion at convergence: 47116.8
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.0631 -0.7539  0.0267  0.7606  2.7426 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  School   (Intercept)  8.614   2.935   
##  Residual             39.148   6.257   
## Number of obs: 7185, groups:  School, 160
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  12.6370     0.2444   51.71

# random intercept; fixed-effect is MeanSES 
summary(m2 <- lmer(Math ~ Ave_SES + (1 | School), data = dta))

## Linear mixed model fit by REML ['lmerMod']
## Formula: Math ~ Ave_SES + (1 | School)
##    Data: dta
## 
## REML criterion at convergence: 46961.3
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -3.13493 -0.75254  0.02413  0.76766  2.78515 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  School   (Intercept)  2.639   1.624   
##  Residual             39.157   6.258   
## Number of obs: 7185, groups:  School, 160
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  12.6846     0.1493   84.97
## Ave_SES       5.8635     0.3615   16.22
## 
## Correlation of Fixed Effects:
##         (Intr)
## Ave_SES 0.010

# residual plot
plot(m2, xlab = "Fitted values", ylab = "Pearson residuals", 
     main = "Model 2", pch = 20, cex = .5, type = c("p", "g"))

# one regression for all
summary(m0a <- lm(Math ~ Ave_SES, data = dta))

## 
## Call:
## lm(formula = Math ~ Ave_SES, data = dta)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -19.4520  -4.8332   0.1735   5.1117  16.2787 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 12.74703    0.07621  167.27   <2e-16 ***
## Ave_SES      5.71688    0.18429   31.02   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.46 on 7183 degrees of freedom
## Multiple R-squared:  0.1181, Adjusted R-squared:  0.118 
## F-statistic: 962.3 on 1 and 7183 DF,  p-value: < 2.2e-16

#
theme_set(theme_bw())

# individual math scores vs mean school ses
ggplot(dta, aes(Ave_SES, Math)) +
  geom_smooth(method = "lm") +
  geom_point(alpha = .2) +
  labs(x = "School mean SES", y = "Math achievement score")

# math mean by school
dta_mmath <- dta %>% 
             group_by(School) %>% 
             mutate( ave_math = mean(Math)) %>% 
             select( School, Ave_SES, ave_math ) %>%  
             unique %>%
             as.data.frame

# regression of mean math over mean ses by school
ggplot(dta_mmath, aes(Ave_SES, ave_math)) +
  geom_smooth(method = "lm") +
  geom_point(alpha = .2)+
  labs(x = "Mean School SES", y = "Mean School Math Scores")

# The end

Including the effect of a student-level predictor

# data from mlmRev package
data(Hsb82)

#
str(Hsb82)

## 'data.frame':    7185 obs. of  8 variables:
##  $ school : Ord.factor w/ 160 levels "8367"<"8854"<..: 59 59 59 59 59 59 59 59 59 59 ...
##  $ minrty : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
##  $ sx     : Factor w/ 2 levels "Male","Female": 2 2 1 1 1 1 2 1 2 1 ...
##  $ ses    : num  -1.528 -0.588 -0.528 -0.668 -0.158 ...
##  $ mAch   : num  5.88 19.71 20.35 8.78 17.9 ...
##  $ meanses: num  -0.434 -0.434 -0.434 -0.434 -0.434 ...
##  $ sector : Factor w/ 2 levels "Public","Catholic": 1 1 1 1 1 1 1 1 1 1 ...
##  $ cses   : num  -1.0936 -0.1536 -0.0936 -0.2336 0.2764 ...

# make a copy of data to the dta object 
dta <- Hsb82            

# variable names for human
names(dta) <- c("School", "Minority", "Gender", "SES", "Math",
                "Ave_SES", "Sector", "C_SES")

# see the random number generator 
set.seed(20160923)

# take a sample of size 31 from total number of schools
n <- sample(length(levels(dta$School)), 31)

#
theme_set(theme_bw())

#
ggplot(data = filter(dta, School %in% levels(School)[n]), 
       aes(C_SES, Math, color = Sector)) +
 geom_smooth(method = "lm", se = F, lwd = .6) +
 geom_point(size = rel(0.5)) +
 facet_wrap(~ School, nrow = 2) +
 labs(x = "Centered SES", y="Math achievement score")+
 theme(axis.text.x = element_text(size = 6))

##
# one regression line per school
summary(m1b <- lm(Math ~ School / C_SES - 1, data = dta))

## 
## Call:
## lm(formula = Math ~ School/C_SES - 1, data = dta)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -19.2662  -4.3241   0.1164   4.4434  18.3617 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## School8367        4.55279    1.61953   2.811 0.004950 ** 
## School8854        4.23978    1.07122   3.958 7.64e-05 ***
## School4458        5.81140    0.87465   6.644 3.28e-11 ***
## School5762        4.32486    0.99621   4.341 1.44e-05 ***
## School6990        5.97679    0.83237   7.180 7.68e-13 ***
## School5815        7.27136    1.21194   6.000 2.08e-09 ***
## School7172        8.06682    0.91354   8.830  < 2e-16 ***
## School4868       12.31018    1.03923  11.845  < 2e-16 ***
## School7341        9.79418    0.84853  11.543  < 2e-16 ***
## School1358       11.20623    1.10635  10.129  < 2e-16 ***
## School4383       11.46568    1.21194   9.461  < 2e-16 ***
## School2305       11.13776    0.74031  15.045  < 2e-16 ***
## School8800        7.33594    1.07122   6.848 8.13e-12 ***
## School3088        9.14585    0.97033   9.425  < 2e-16 ***
## School8775        9.46700    0.87465  10.824  < 2e-16 ***
## School7890        8.34110    0.84853   9.830  < 2e-16 ***
## School6144        8.54507    0.92410   9.247  < 2e-16 ***
## School6443        9.47553    1.10635   8.565  < 2e-16 ***
## School5192       10.40950    1.14518   9.090  < 2e-16 ***
## School6808        9.28611    0.91354  10.165  < 2e-16 ***
## School2818       13.87283    0.93504  14.837  < 2e-16 ***
## School9340       11.17855    1.12526   9.934  < 2e-16 ***
## School4523        8.35174    0.88390   9.449  < 2e-16 ***
## School6816       14.53824    0.81709  17.793  < 2e-16 ***
## School2277        9.29761    0.77587  11.983  < 2e-16 ***
## School8009       14.08472    0.88390  15.935  < 2e-16 ***
## School5783       13.15034    1.12526  11.686  < 2e-16 ***
## School3013       12.61083    0.83237  15.151  < 2e-16 ***
## School7101       11.84993    1.14518  10.348  < 2e-16 ***
## School4530        9.05570    0.76345  11.861  < 2e-16 ***
## School9021       14.69666    0.80976  18.149  < 2e-16 ***
## School4511       13.40903    0.79568  16.852  < 2e-16 ***
## School2639        6.61548    0.93504   7.075 1.64e-12 ***
## School3377        9.18671    0.90333  10.170  < 2e-16 ***
## School6578       11.99400    0.80976  14.812  < 2e-16 ***
## School9347       13.53875    0.80263  16.868  < 2e-16 ***
## School3705       10.33169    0.90333  11.437  < 2e-16 ***
## School3533       10.40904    0.87465  11.901  < 2e-16 ***
## School1296        7.63596    0.87465   8.730  < 2e-16 ***
## School4350       11.85542    1.05486  11.239  < 2e-16 ***
## School9397       10.35547    0.88390  11.716  < 2e-16 ***
## School4253        9.41286    0.79568  11.830  < 2e-16 ***
## School2655       12.34517    0.84033  14.691  < 2e-16 ***
## School7342       11.16641    0.79568  14.034  < 2e-16 ***
## School9292       10.27963    1.39020   7.394 1.59e-13 ***
## School3499       13.27653    0.98302  13.506  < 2e-16 ***
## School7364       14.17214    0.91354  15.513  < 2e-16 ***
## School8983       10.99200    0.84853  12.954  < 2e-16 ***
## School5650       14.27353    0.90333  15.801  < 2e-16 ***
## School2658       13.39616    0.90333  14.830  < 2e-16 ***
## School8188       12.74097    1.10635  11.516  < 2e-16 ***
## School4410       13.47298    0.94637  14.236  < 2e-16 ***
## School9508       13.57466    1.02428  13.253  < 2e-16 ***
## School8707       12.88394    0.87465  14.730  < 2e-16 ***
## School1499        7.66036    0.83237   9.203  < 2e-16 ***
## School8477       12.52224    0.99621  12.570  < 2e-16 ***
## School1288       13.51080    1.21194  11.148  < 2e-16 ***
## School6291       10.10731    1.02428   9.868  < 2e-16 ***
## School1224        9.71545    0.88390  10.992  < 2e-16 ***
## School4292       12.86435    0.75162  17.116  < 2e-16 ***
## School8857       15.29694    0.75747  20.195  < 2e-16 ***
## School3967       12.03508    0.84033  14.322  < 2e-16 ***
## School6415       11.86020    0.82462  14.383  < 2e-16 ***
## School1317       13.17769    0.87465  15.066  < 2e-16 ***
## School2629       14.90777    0.80263  18.574  < 2e-16 ***
## School4223       14.62262    0.90333  16.187  < 2e-16 ***
## School1462       10.49556    0.80263  13.076  < 2e-16 ***
## School9550       11.08914    1.12526   9.855  < 2e-16 ***
## School6464        7.09162    1.12526   6.302 3.12e-10 ***
## School4931       13.79081    0.79568  17.332  < 2e-16 ***
## School5937       16.77597    1.12526  14.908  < 2e-16 ***
## School7919       14.84997    0.99621  14.906  < 2e-16 ***
## School3716       10.36866    0.94637  10.956  < 2e-16 ***
## School1909       14.42332    1.14518  12.595  < 2e-16 ***
## School2651       11.08432    0.98302  11.276  < 2e-16 ***
## School2467       10.14752    0.84033  12.076  < 2e-16 ***
## School1374        9.72846    1.14518   8.495  < 2e-16 ***
## School6600       11.70389    0.80976  14.453  < 2e-16 ***
## School5667       13.77823    0.77587  17.758  < 2e-16 ***
## School5720       14.28230    0.83237  17.159  < 2e-16 ***
## School3498       16.39045    0.83237  19.691  < 2e-16 ***
## School3881       11.94922    0.94637  12.626  < 2e-16 ***
## School2995        9.54611    0.89346  10.684  < 2e-16 ***
## School5838       13.68961    1.08836  12.578  < 2e-16 ***
## School3688       14.65626    0.92410  15.860  < 2e-16 ***
## School9158        8.54517    0.83237  10.266  < 2e-16 ***
## School8946       10.37509    0.79568  13.039  < 2e-16 ***
## School7232       12.54263    0.84033  14.926  < 2e-16 ***
## School2917        7.97895    0.92410   8.634  < 2e-16 ***
## School6170       14.18105    1.32234  10.724  < 2e-16 ***
## School8165       16.45122    0.86567  19.004  < 2e-16 ***
## School9104       16.83211    0.81709  20.600  < 2e-16 ***
## School2030       12.07819    0.88390  13.665  < 2e-16 ***
## School8150       14.85236    0.91354  16.258  < 2e-16 ***
## School4042       14.31542    0.75747  18.899  < 2e-16 ***
## School8357       14.38185    1.16619  12.332  < 2e-16 ***
## School8531       13.52868    0.94637  14.295  < 2e-16 ***
## School6074       13.77909    0.80976  17.016  < 2e-16 ***
## School4420       13.87416    1.07122  12.952  < 2e-16 ***
## School1906       15.98317    0.83237  19.202  < 2e-16 ***
## School3992       14.64521    0.83237  17.595  < 2e-16 ***
## School3999       10.94404    0.89346  12.249  < 2e-16 ***
## School4173       12.72466    0.91354  13.929  < 2e-16 ***
## School4325       13.24000    0.83237  15.906  < 2e-16 ***
## School5761       11.13806    0.84033  13.254  < 2e-16 ***
## School6484       12.91240    1.02428  12.606  < 2e-16 ***
## School6897       15.09763    0.86567  17.440  < 2e-16 ***
## School7635       15.06553    0.84853  17.755  < 2e-16 ***
## School7734       10.55964    1.29194   8.173 3.54e-16 ***
## School8175       11.69809    1.05486  11.090  < 2e-16 ***
## School8874       12.05503    1.00995  11.936  < 2e-16 ***
## School9225       14.66733    1.00995  14.523  < 2e-16 ***
## School2458       13.98568    0.80263  17.425  < 2e-16 ***
## School3610       15.35495    0.75747  20.271  < 2e-16 ***
## School5640       13.16011    0.80263  16.396  < 2e-16 ***
## School3838       16.06281    0.82462  19.479  < 2e-16 ***
## School9359       15.27062    0.83237  18.346  < 2e-16 ***
## School2208       15.40467    0.78231  19.691  < 2e-16 ***
## School6089       15.56958    1.05486  14.760  < 2e-16 ***
## School1477       14.22847    0.76959  18.488  < 2e-16 ***
## School2768       10.88692    1.21194   8.983  < 2e-16 ***
## School3039       16.96386    1.32234  12.829  < 2e-16 ***
## School5819       12.13890    0.85697  14.165  < 2e-16 ***
## School6397       12.79610    0.78231  16.357  < 2e-16 ***
## School1308       16.25550    1.35500  11.997  < 2e-16 ***
## School1433       19.71914    1.02428  19.252  < 2e-16 ***
## School1436       18.11161    0.91354  19.826  < 2e-16 ***
## School1461       16.84264    1.05486  15.967  < 2e-16 ***
## School1637        7.02411    1.16619   6.023 1.80e-09 ***
## School1942       18.11090    1.12526  16.095  < 2e-16 ***
## School1946       12.90844    0.97033  13.303  < 2e-16 ***
## School2336       16.51770    0.88390  18.687  < 2e-16 ***
## School2526       17.05300    0.80263  21.246  < 2e-16 ***
## School2626       13.39661    0.98302  13.628  < 2e-16 ***
## School2755       16.47651    0.88390  18.641  < 2e-16 ***
## School2771       11.84411    0.81709  14.495  < 2e-16 ***
## School2990       18.44792    0.87465  21.092  < 2e-16 ***
## School3020       14.39527    0.78891  18.247  < 2e-16 ***
## School3152       13.20904    0.84033  15.719  < 2e-16 ***
## School3332       14.27816    0.98302  14.525  < 2e-16 ***
## School3351       11.46518    0.97033  11.816  < 2e-16 ***
## School3427       19.71559    0.86567  22.775  < 2e-16 ***
## School3657        9.52118    0.84853  11.221  < 2e-16 ***
## School4642       14.59903    0.77587  18.816  < 2e-16 ***
## School5404       15.41498    0.80263  19.206  < 2e-16 ***
## School5619       15.41624    0.74590  20.668  < 2e-16 ***
## School6366       15.65640    0.79568  19.677  < 2e-16 ***
## School6469       18.45572    0.80263  22.994  < 2e-16 ***
## School7011       13.81358    1.05486  13.095  < 2e-16 ***
## School7276       12.67940    0.83237  15.233  < 2e-16 ***
## School7332       14.63610    0.87465  16.734  < 2e-16 ***
## School7345       11.33855    0.80976  14.002  < 2e-16 ***
## School7688       18.42231    0.82462  22.340  < 2e-16 ***
## School7697       15.72178    1.07122  14.677  < 2e-16 ***
## School8193       16.23226    0.92410  17.565  < 2e-16 ***
## School8202       11.71243    1.02428  11.435  < 2e-16 ***
## School8627       10.88372    0.83237  13.076  < 2e-16 ***
## School8628       16.52838    0.77587  21.303  < 2e-16 ***
## School9198       19.09229    1.08836  17.542  < 2e-16 ***
## School9586       14.86369    0.78891  18.841  < 2e-16 ***
## School8367:C_SES  0.25037    2.22488   0.113 0.910403    
## School8854:C_SES  1.93884    1.35428   1.432 0.152292    
## School4458:C_SES  1.13184    1.36793   0.827 0.408034    
## School5762:C_SES -1.01410    1.95950  -0.518 0.604803    
## School6990:C_SES  0.94769    0.98169   0.965 0.334393    
## School5815:C_SES  3.01800    2.16485   1.394 0.163335    
## School7172:C_SES  0.99448    1.36612   0.728 0.466662    
## School4868:C_SES  1.28647    1.48571   0.866 0.386576    
## School7341:C_SES  1.70370    1.00528   1.695 0.090169 .  
## School1358:C_SES  5.06801    1.71168   2.961 0.003078 ** 
## School4383:C_SES  6.18019    2.11601   2.921 0.003504 ** 
## School2305:C_SES -0.78211    1.13763  -0.687 0.491795    
## School8800:C_SES  2.56813    1.42668   1.800 0.071893 .  
## School3088:C_SES  1.79134    1.23144   1.455 0.145807    
## School8775:C_SES  1.00146    1.32013   0.759 0.448114    
## School7890:C_SES -0.65597    1.44460  -0.454 0.649784    
## School6144:C_SES  2.77027    1.30158   2.128 0.033340 *  
## School6443:C_SES -0.74335    1.46027  -0.509 0.610734    
## School5192:C_SES  1.60349    1.46102   1.098 0.272452    
## School6808:C_SES  2.27610    1.15285   1.974 0.048384 *  
## School2818:C_SES  2.80241    1.52348   1.839 0.065888 .  
## School9340:C_SES  3.30950    2.00186   1.653 0.098334 .  
## School4523:C_SES  2.38079    1.24545   1.912 0.055971 .  
## School6816:C_SES  1.35272    1.16781   1.158 0.246767    
## School2277:C_SES -2.01503    1.17745  -1.711 0.087062 .  
## School8009:C_SES  1.55687    1.58675   0.981 0.326542    
## School5783:C_SES  3.11406    1.49402   2.084 0.037165 *  
## School3013:C_SES  3.83980    1.75094   2.193 0.028341 *  
## School7101:C_SES  1.29545    1.42842   0.907 0.364484    
## School4530:C_SES  1.64743    1.23926   1.329 0.183771    
## School9021:C_SES  2.52416    1.39478   1.810 0.070384 .  
## School4511:C_SES  0.04251    1.38066   0.031 0.975438    
## School2639:C_SES -0.63010    1.52971  -0.412 0.680417    
## School3377:C_SES -0.74685    1.31955  -0.566 0.571419    
## School6578:C_SES  2.39054    1.30725   1.829 0.067492 .  
## School9347:C_SES  2.68599    1.18051   2.275 0.022920 *  
## School3705:C_SES  1.15850    1.33049   0.871 0.383933    
## School3533:C_SES -0.31177    1.64469  -0.190 0.849658    
## School1296:C_SES  1.07596    1.36610   0.788 0.430948    
## School4350:C_SES  4.37192    1.59458   2.742 0.006127 ** 
## School9397:C_SES  2.44644    1.47583   1.658 0.097430 .  
## School4253:C_SES -0.39954    1.15329  -0.346 0.729023    
## School2655:C_SES  5.22704    1.35364   3.861 0.000114 ***
## School7342:C_SES  1.01246    1.42097   0.713 0.476173    
## School9292:C_SES  0.75868    2.74811   0.276 0.782501    
## School3499:C_SES  0.99238    1.58184   0.627 0.530445    
## School7364:C_SES  0.25950    1.82833   0.142 0.887139    
## School8983:C_SES  1.38594    1.34299   1.032 0.302119    
## School5650:C_SES  0.68062    1.17460   0.579 0.562308    
## School2658:C_SES  2.62990    1.42677   1.843 0.065335 .  
## School8188:C_SES  4.39759    1.65556   2.656 0.007920 ** 
## School4410:C_SES  2.76020    1.55700   1.773 0.076312 .  
## School9508:C_SES  3.95379    1.82178   2.170 0.030019 *  
## School8707:C_SES  3.39153    1.09903   3.086 0.002037 ** 
## School1499:C_SES  3.63473    1.16428   3.122 0.001805 ** 
## School8477:C_SES  3.81216    1.25548   3.036 0.002403 ** 
## School1288:C_SES  3.25545    1.84816   1.761 0.078205 .  
## School6291:C_SES  3.98087    1.69353   2.351 0.018770 *  
## School1224:C_SES  2.50858    1.42433   1.761 0.078243 .  
## School4292:C_SES -0.16061    1.16329  -0.138 0.890195    
## School8857:C_SES  0.80222    1.43452   0.559 0.576025    
## School3967:C_SES  3.31107    1.35069   2.451 0.014255 *  
## School6415:C_SES  3.53008    1.21524   2.905 0.003686 ** 
## School1317:C_SES  1.27391    1.58930   0.802 0.422837    
## School2629:C_SES  0.22235    1.14645   0.194 0.846225    
## School4223:C_SES  2.48659    1.50770   1.649 0.099140 .  
## School1462:C_SES -0.82881    1.39839  -0.593 0.553411    
## School9550:C_SES  3.89194    1.45938   2.667 0.007675 ** 
## School6464:C_SES  1.00349    1.82459   0.550 0.582348    
## School4931:C_SES  0.91185    1.17597   0.775 0.438133    
## School5937:C_SES  1.03962    2.00531   0.518 0.604175    
## School7919:C_SES  3.98937    1.88178   2.120 0.034042 *  
## School3716:C_SES  5.86379    1.18088   4.966 7.01e-07 ***
## School1909:C_SES  2.85479    1.63722   1.744 0.081260 .  
## School2651:C_SES  4.89906    1.55631   3.148 0.001652 ** 
## School2467:C_SES  3.13713    1.65830   1.892 0.058563 .  
## School1374:C_SES  3.85432    1.66836   2.310 0.020904 *  
## School6600:C_SES  4.70429    1.16489   4.038 5.44e-05 ***
## School5667:C_SES  3.52297    1.24071   2.839 0.004532 ** 
## School5720:C_SES  2.46631    1.26524   1.949 0.051302 .  
## School3498:C_SES -0.13109    1.45712  -0.090 0.928320    
## School3881:C_SES  2.39071    1.79271   1.334 0.182388    
## School2995:C_SES  1.43231    1.01804   1.407 0.159492    
## School5838:C_SES  1.85305    1.77992   1.041 0.297872    
## School3688:C_SES  1.53672    1.72035   0.893 0.371749    
## School9158:C_SES  3.86121    1.12532   3.431 0.000604 ***
## School8946:C_SES  1.69048    1.06749   1.584 0.113331    
## School7232:C_SES  5.00160    1.47738   3.385 0.000715 ***
## School2917:C_SES  1.13585    1.02982   1.103 0.270083    
## School6170:C_SES  4.81178    1.64962   2.917 0.003547 ** 
## School8165:C_SES  1.80224    1.38480   1.301 0.193148    
## School9104:C_SES  1.49398    1.83440   0.814 0.415430    
## School2030:C_SES  1.41198    1.31420   1.074 0.282681    
## School8150:C_SES -0.18571    1.51064  -0.123 0.902162    
## School4042:C_SES  1.69362    1.20501   1.405 0.159922    
## School8357:C_SES  2.67578    1.15717   2.312 0.020788 *  
## School8531:C_SES  3.31823    1.40287   2.365 0.018043 *  
## School6074:C_SES  1.52909    1.30292   1.174 0.240603    
## School4420:C_SES  2.95866    1.58878   1.862 0.062614 .  
## School1906:C_SES  2.14551    1.36955   1.567 0.117259    
## School3992:C_SES  0.53788    1.38802   0.388 0.698389    
## School3999:C_SES  3.56698    0.98836   3.609 0.000310 ***
## School4173:C_SES  3.36567    1.45956   2.306 0.021143 *  
## School4325:C_SES  2.75605    1.03273   2.669 0.007632 ** 
## School5761:C_SES  3.10801    1.19136   2.609 0.009106 ** 
## School6484:C_SES  0.60568    1.49351   0.406 0.685093    
## School6897:C_SES  3.58049    1.17477   3.048 0.002314 ** 
## School7635:C_SES  2.44847    1.26927   1.929 0.053767 .  
## School7734:C_SES  6.03523    1.48469   4.065 4.86e-05 ***
## School8175:C_SES  1.61237    1.54334   1.045 0.296186    
## School8874:C_SES  4.09630    1.43512   2.854 0.004326 ** 
## School9225:C_SES  2.88589    1.39642   2.067 0.038806 *  
## School2458:C_SES  2.95669    1.22988   2.404 0.016241 *  
## School3610:C_SES  2.95585    1.20868   2.446 0.014489 *  
## School5640:C_SES  3.82774    1.38890   2.756 0.005868 ** 
## School3838:C_SES  0.59790    1.23391   0.485 0.628005    
## School9359:C_SES -0.83348    1.46679  -0.568 0.569895    
## School2208:C_SES  2.63664    1.31898   1.999 0.045648 *  
## School6089:C_SES  1.69245    1.51926   1.114 0.265318    
## School1477:C_SES  1.23061    1.14871   1.071 0.284075    
## School2768:C_SES  3.51228    1.37517   2.554 0.010668 *  
## School3039:C_SES  2.95567    1.92045   1.539 0.123839    
## School5819:C_SES  1.97252    1.43955   1.370 0.170658    
## School6397:C_SES  2.75901    1.03849   2.657 0.007908 ** 
## School1308:C_SES  0.12602    2.89741   0.043 0.965308    
## School1433:C_SES  1.85429    1.74798   1.061 0.288809    
## School1436:C_SES  1.60056    1.61653   0.990 0.322150    
## School1461:C_SES  6.26650    1.54684   4.051 5.15e-05 ***
## School1637:C_SES  3.11681    1.56230   1.995 0.046081 *  
## School1942:C_SES  0.08938    2.17762   0.041 0.967260    
## School1946:C_SES  3.58583    1.40776   2.547 0.010881 *  
## School2336:C_SES  1.90497    1.39610   1.364 0.172457    
## School2526:C_SES  0.15950    1.32609   0.120 0.904264    
## School2626:C_SES  4.09968    1.77861   2.305 0.021197 *  
## School2755:C_SES  0.56050    1.41945   0.395 0.692950    
## School2771:C_SES  4.26819    1.60528   2.659 0.007859 ** 
## School2990:C_SES  1.32454    1.32327   1.001 0.316883    
## School3020:C_SES  1.65368    1.21797   1.358 0.174595    
## School3152:C_SES  2.76825    1.00158   2.764 0.005727 ** 
## School3332:C_SES  2.03095    1.71772   1.182 0.237106    
## School3351:C_SES  2.45504    1.16971   2.099 0.035867 *  
## School3427:C_SES -0.48817    1.31793  -0.370 0.711094    
## School3657:C_SES  3.73591    1.10047   3.395 0.000691 ***
## School4642:C_SES  3.27239    1.17344   2.789 0.005306 ** 
## School5404:C_SES  1.21423    1.60712   0.756 0.449955    
## School5619:C_SES  5.25753    1.25841   4.178 2.98e-05 ***
## School6366:C_SES  1.51752    1.21527   1.249 0.211812    
## School6469:C_SES  1.75529    1.29272   1.358 0.174562    
## School7011:C_SES  5.07465    1.72585   2.940 0.003289 ** 
## School7276:C_SES  3.77336    1.06358   3.548 0.000391 ***
## School7332:C_SES  2.46320    1.32282   1.862 0.062634 .  
## School7345:C_SES  4.21192    0.98954   4.256 2.10e-05 ***
## School7688:C_SES  0.11634    1.47469   0.079 0.937119    
## School7697:C_SES  3.13622    1.77439   1.767 0.077190 .  
## School8193:C_SES  2.33521    1.62482   1.437 0.150704    
## School8202:C_SES  3.70590    1.47606   2.511 0.012073 *  
## School8627:C_SES  1.86956    1.18737   1.575 0.115408    
## School8628:C_SES  1.23139    1.34892   0.913 0.361339    
## School9198:C_SES  2.61055    1.69902   1.537 0.124462    
## School9586:C_SES  1.67208    1.33730   1.250 0.211217    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.06 on 6865 degrees of freedom
## Multiple R-squared:  0.8328, Adjusted R-squared:  0.825 
## F-statistic: 106.8 on 320 and 6865 DF,  p-value: < 2.2e-16

# collect regression estimates from individual regression by school
m1b_betas <- m1b %>% 
             coef %>% 
             matrix(ncol = 2) %>%
             as.data.frame %>%
             rename( b0 = V1, b1 = V2) 
#
colMeans(m1b_betas)

##        b0        b1 
## 12.620755  2.201641

#
apply(m1b_betas, 2, sd)

##       b0       b1 
## 3.117651 1.630998

#
cor(m1b_betas)

##            b0         b1
## b0 1.00000000 0.01346686
## b1 0.01346686 1.00000000

# scatter plot of regression coefficient estimates
ggplot(m1b_betas, aes(b0, b1)) +
 geom_point(pch = 20) +
 stat_smooth(method = "lm", se = F) +
 stat_ellipse() +
 annotate("text", x = 5, y = 5, label = "r = 0.013") +
 labs(x = "Intercept estimates", y = "Slope estimates")

# random intercept only
summary(m3a <- lmer(Math ~ C_SES + (1 | School), data = dta))

## Linear mixed model fit by REML ['lmerMod']
## Formula: Math ~ C_SES + (1 | School)
##    Data: dta
## 
## REML criterion at convergence: 46724
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.0969 -0.7322  0.0194  0.7572  2.9147 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  School   (Intercept)  8.672   2.945   
##  Residual             37.010   6.084   
## Number of obs: 7185, groups:  School, 160
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  12.6361     0.2445   51.68
## C_SES         2.1912     0.1087   20.17
## 
## Correlation of Fixed Effects:
##       (Intr)
## C_SES 0.000

# random intercept and slope - correlated 
summary(m3b <- lmer(Math ~ C_SES + (C_SES | School), data = dta))

## Linear mixed model fit by REML ['lmerMod']
## Formula: Math ~ C_SES + (C_SES | School)
##    Data: dta
## 
## REML criterion at convergence: 46714.2
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -3.09680 -0.73193  0.01855  0.75386  2.89924 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr
##  School   (Intercept)  8.681   2.9464       
##           C_SES        0.694   0.8331   0.02
##  Residual             36.700   6.0581       
## Number of obs: 7185, groups:  School, 160
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  12.6362     0.2445   51.68
## C_SES         2.1932     0.1283   17.10
## 
## Correlation of Fixed Effects:
##       (Intr)
## C_SES 0.009

# random intercept and slope - uncorrelated 
summary(m3c <- lmer(Math ~ C_SES + (1 | School) + 
                           (C_SES - 1 | School), data = dta))

## Linear mixed model fit by REML ['lmerMod']
## Formula: Math ~ C_SES + (1 | School) + (C_SES - 1 | School)
##    Data: dta
## 
## REML criterion at convergence: 46714.2
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -3.09607 -0.73095  0.01694  0.75391  2.90167 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  School   (Intercept)  8.6807  2.9463  
##  School.1 C_SES        0.6941  0.8331  
##  Residual             36.7002  6.0581  
## Number of obs: 7185, groups:  School, 160
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  12.6360     0.2445   51.68
## C_SES         2.1926     0.1283   17.09
## 
## Correlation of Fixed Effects:
##       (Intr)
## C_SES 0.000

# summary(m3c <- lme(Math ~ C_SES, 
#              random = list(School = pdDiag(~ C_SES)), data = dta))

# default residual plot
plot(m3c, xlab = "Fitted values", ylab = "Pearson residuals", 
     pch = 20, cex = .5, type = c("p", "g"))

# model comparisons
anova(m3a, m3b, m3c)

## Data: dta
## Models:
## m3a: Math ~ C_SES + (1 | School)
## m3c: Math ~ C_SES + (1 | School) + (C_SES - 1 | School)
## m3b: Math ~ C_SES + (C_SES | School)
##     npar   AIC   BIC logLik deviance  Chisq Df Pr(>Chisq)   
## m3a    4 46728 46756 -23360    46720                        
## m3c    5 46721 46755 -23356    46711 9.4196  1   0.002147 **
## m3b    6 46723 46764 -23356    46711 0.0134  1   0.907954   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# CIs for model parameters
confint(m3c, method="boot")

##                  2.5 %    97.5 %
## .sig01       2.6100481  3.269883
## .sig02       0.3848003  1.141918
## .sigma       5.9425454  6.150370
## (Intercept) 12.1304752 13.088948
## C_SES        1.9525498  2.439487

# The end

Including both the effects of student-level and school-level predictors

# load them
pacman::p_load(mlmRev, tidyverse, lme4, merTools)

# data from mlmRev package
data(Hsb82)

#
str(Hsb82)

## 'data.frame':    7185 obs. of  8 variables:
##  $ school : Ord.factor w/ 160 levels "8367"<"8854"<..: 59 59 59 59 59 59 59 59 59 59 ...
##  $ minrty : Factor w/ 2 levels "No","Yes": 1 1 1 1 1 1 1 1 1 1 ...
##  $ sx     : Factor w/ 2 levels "Male","Female": 2 2 1 1 1 1 2 1 2 1 ...
##  $ ses    : num  -1.528 -0.588 -0.528 -0.668 -0.158 ...
##  $ mAch   : num  5.88 19.71 20.35 8.78 17.9 ...
##  $ meanses: num  -0.434 -0.434 -0.434 -0.434 -0.434 ...
##  $ sector : Factor w/ 2 levels "Public","Catholic": 1 1 1 1 1 1 1 1 1 1 ...
##  $ cses   : num  -1.0936 -0.1536 -0.0936 -0.2336 0.2764 ...

# make a copy of data to the dta object 
dta <- Hsb82            

names(dta) <- c("School", "Minority", "Gender", "SES", "Math",
                "Ave_SES", "Sector", "C_SES")

theme_set(theme_bw())

# Math vs SES by sector
ggplot(dta, aes(C_SES, Math, color = Sector)) +
 geom_smooth(method = "lm") +
 geom_jitter(alpha = 0.2) +
 labs(x = "Centered SES", y = "Math achievement score") +
 theme(legend.position = c(.1, .8))

# to facilitate comparison with sas output
#options(contrasts = c(factor = "contr.SAS", ordered = "contr.poly"))

# full
summary(m4a <- lmer(Math ~ Ave_SES + Sector + C_SES + Ave_SES:C_SES +
        Sector:C_SES + (C_SES | School), data = dta))

## Linear mixed model fit by REML ['lmerMod']
## Formula: Math ~ Ave_SES + Sector + C_SES + Ave_SES:C_SES + Sector:C_SES +  
##     (C_SES | School)
##    Data: dta
## 
## REML criterion at convergence: 46503.7
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -3.15926 -0.72319  0.01704  0.75444  2.95822 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev. Corr
##  School   (Intercept)  2.380   1.5426       
##           C_SES        0.101   0.3179   0.39
##  Residual             36.721   6.0598       
## Number of obs: 7185, groups:  School, 160
## 
## Fixed effects:
##                      Estimate Std. Error t value
## (Intercept)           12.1279     0.1993  60.856
## Ave_SES                5.3329     0.3692  14.446
## SectorCatholic         1.2266     0.3063   4.005
## C_SES                  2.9450     0.1556  18.928
## Ave_SES:C_SES          1.0393     0.2989   3.477
## SectorCatholic:C_SES  -1.6427     0.2398  -6.851
## 
## Correlation of Fixed Effects:
##             (Intr) Av_SES SctrCt C_SES  A_SES:
## Ave_SES      0.256                            
## SectorCthlc -0.699 -0.356                     
## C_SES        0.075  0.019 -0.053              
## A_SES:C_SES  0.019  0.074 -0.026  0.293       
## SctrC:C_SES -0.052 -0.027  0.077 -0.696 -0.351

# reduced
summary(m4b <- lmer(Math ~ Ave_SES + Sector + C_SES + Ave_SES:C_SES +
        Sector:C_SES + (1 | School), data = dta))

## Linear mixed model fit by REML ['lmerMod']
## Formula: Math ~ Ave_SES + Sector + C_SES + Ave_SES:C_SES + Sector:C_SES +  
##     (1 | School)
##    Data: dta
## 
## REML criterion at convergence: 46504.8
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.1701 -0.7249  0.0148  0.7542  2.9655 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  School   (Intercept)  2.375   1.541   
##  Residual             36.766   6.064   
## Number of obs: 7185, groups:  School, 160
## 
## Fixed effects:
##                      Estimate Std. Error t value
## (Intercept)           12.1282     0.1992  60.885
## Ave_SES                5.3367     0.3690  14.463
## SectorCatholic         1.2245     0.3061   4.000
## C_SES                  2.9421     0.1512  19.457
## Ave_SES:C_SES          1.0444     0.2910   3.589
## SectorCatholic:C_SES  -1.6422     0.2331  -7.045
## 
## Correlation of Fixed Effects:
##             (Intr) Av_SES SctrCt C_SES  A_SES:
## Ave_SES      0.256                            
## SectorCthlc -0.699 -0.356                     
## C_SES        0.000  0.000  0.000              
## A_SES:C_SES  0.000  0.000  0.000  0.295       
## SctrC:C_SES  0.000  0.000  0.000 -0.696 -0.351

# will refit be ML estimation
anova(m4a, m4b)

## Data: dta
## Models:
## m4b: Math ~ Ave_SES + Sector + C_SES + Ave_SES:C_SES + Sector:C_SES + 
## m4b:     (1 | School)
## m4a: Math ~ Ave_SES + Sector + C_SES + Ave_SES:C_SES + Sector:C_SES + 
## m4a:     (C_SES | School)
##     npar   AIC   BIC logLik deviance  Chisq Df Pr(>Chisq)
## m4b    8 46513 46568 -23249    46497                     
## m4a   10 46516 46585 -23248    46496 1.0016  2     0.6061

# residualual plot
plot(m4b, xlab = "Fitted values", ylab = "Pearson residuals", 
     pch = 20, cex = .5, type = c("p", "g"))

# quick summary of model parameter estimates
fastdisp(m4b)

## lmer(formula = Math ~ Ave_SES + Sector + C_SES + Ave_SES:C_SES + 
##     Sector:C_SES + (1 | School), data = dta)
##                      coef.est coef.se
## (Intercept)          12.13     0.20  
## Ave_SES               5.34     0.37  
## SectorCatholic        1.22     0.31  
## C_SES                 2.94     0.15  
## Ave_SES:C_SES         1.04     0.29  
## SectorCatholic:C_SES -1.64     0.23  
## 
## Error terms:
##  Groups   Name        Std.Dev.
##  School   (Intercept) 1.54    
##  Residual             6.06    
## ---
## number of obs: 7185, groups: School, 160
## AIC = 46520.8

# estimate fixed-effects parameters by simulation
m4b_fe <- FEsim(m4b, 1000)

# plot for fixed effects
plotFEsim(m4b_fe) + 
 labs(title = "Coefficient Plot", x = "Median Effect Estimate")

# estimate random-effects parameters by simulation
m4b_re <- REsim(m4b)

# normality plot for random effects
plotREsim(m4b_re)

# The end

Inclass Exercise 2

# load them
pacman::p_load(mlmRev, tidyverse, lme4, merTools)

# input data
dta <- read.csv("pts.csv")

#
head(dta)

##   School Teacher Pupil      Read_0 Read_1
## 1      1      11    18 -0.16407000 4.2426
## 2      1      11    19 -0.98126000 1.0000
## 3      1      11    20 -1.25370000 2.2361
## 4      1      11    15 -0.87230000 3.1623
## 5      1      11    17 -0.00063104 3.4641
## 6      1      11    16 -0.92678000 4.0000

# coerce variables to factor type and
# compute mean Read_0 by school
dta <- dta %>%
  mutate(School = factor(School), Teacher = factor(Teacher),
         Pupil = factor(Pupil)) %>%
  group_by( School ) %>%
  mutate(msRead_0 = mean(Read_0))

#
# compute mean Read_0 by teacher and
# centered teacher mean of Read_0 (from respective school means)
#
dta <- dta %>%
  group_by( Teacher ) %>%
  mutate(mtRead_0 = mean(Read_0), ctRead_0 = mean(Read_0) - msRead_0 )

##
# no predictor
summary(m0 <- lmer(Read_1 ~  (1 | School) + (1 | Teacher), data = dta))

## Linear mixed model fit by REML ['lmerMod']
## Formula: Read_1 ~ (1 | School) + (1 | Teacher)
##    Data: dta
## 
## REML criterion at convergence: 2486.8
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -3.05438 -0.65686  0.06497  0.62523  2.47233 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Teacher  (Intercept) 0.1370   0.3701  
##  School   (Intercept) 0.1277   0.3574  
##  Residual             1.3250   1.1511  
## Number of obs: 777, groups:  Teacher, 46; School, 20
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)   3.3755     0.1065   31.69

# add a school-level predictor
summary(m0_s <- update(m0, . ~ . + msRead_0))

## Linear mixed model fit by REML ['lmerMod']
## Formula: Read_1 ~ (1 | School) + (1 | Teacher) + msRead_0
##    Data: dta
## 
## REML criterion at convergence: 2467.4
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -3.06110 -0.65597  0.06822  0.62772  2.45391 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Teacher  (Intercept) 0.1234   0.3513  
##  School   (Intercept) 0.0000   0.0000  
##  Residual             1.3224   1.1500  
## Number of obs: 777, groups:  Teacher, 46; School, 20
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  3.39822    0.06808  49.918
## msRead_0     1.00159    0.17807   5.625
## 
## Correlation of Fixed Effects:
##          (Intr)
## msRead_0 0.086 
## convergence code: 0
## boundary (singular) fit: see ?isSingular

# add a teacher-level predictor
summary(m0_t <- update(m0, . ~ . + mtRead_0))

## Linear mixed model fit by REML ['lmerMod']
## Formula: Read_1 ~ (1 | School) + (1 | Teacher) + mtRead_0
##    Data: dta
## 
## REML criterion at convergence: 2434.9
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -3.07539 -0.65979  0.06707  0.63259  2.49973 
## 
## Random effects:
##  Groups   Name        Variance  Std.Dev. 
##  Teacher  (Intercept) 3.296e-15 5.741e-08
##  School   (Intercept) 4.471e-02 2.114e-01
##  Residual             1.309e+00 1.144e+00
## Number of obs: 777, groups:  Teacher, 46; School, 20
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  3.40649    0.06307  54.010
## mtRead_0     1.07319    0.11495   9.336
## 
## Correlation of Fixed Effects:
##          (Intr)
## mtRead_0 0.024 
## convergence code: 0
## boundary (singular) fit: see ?isSingular

# add a teacher-level predictor away from respective school means
summary(m0_ct <- update(m0, . ~ . + ctRead_0))

## Linear mixed model fit by REML ['lmerMod']
## Formula: Read_1 ~ (1 | School) + (1 | Teacher) + ctRead_0
##    Data: dta
## 
## REML criterion at convergence: 2454.1
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.1804 -0.6461  0.0791  0.6420  2.5272 
## 
## Random effects:
##  Groups   Name        Variance  Std.Dev. 
##  Teacher  (Intercept) 2.462e-10 1.569e-05
##  School   (Intercept) 1.764e-01 4.200e-01
##  Residual             1.311e+00 1.145e+00
## Number of obs: 777, groups:  Teacher, 46; School, 20
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)   3.3896     0.1029  32.932
## ctRead_0      1.1742     0.1581   7.427
## 
## Correlation of Fixed Effects:
##          (Intr)
## ctRead_0 0.000 
## convergence code: 0
## boundary (singular) fit: see ?isSingular

###

Inclass Exercise 3

pacman::p_load(mlmRev, tidyverse, lme4, merTools, sjPlot)
library(WWGbook)
data(classroom, package = "WWGbook")
head(classroom)

##   sex minority mathkind mathgain   ses yearstea mathknow housepov mathprep
## 1   1        1      448       32  0.46        1       NA    0.082     2.00
## 2   0        1      460      109 -0.27        1       NA    0.082     2.00
## 3   1        1      511       56 -0.03        1       NA    0.082     2.00
## 4   0        1      449       83 -0.38        2    -0.11    0.082     3.25
## 5   0        1      425       53 -0.03        2    -0.11    0.082     3.25
## 6   1        1      450       65  0.76        2    -0.11    0.082     3.25
##   classid schoolid childid
## 1     160        1       1
## 2     160        1       2
## 3     160        1       3
## 4     217        1       4
## 5     217        1       5
## 6     217        1       6

m0 <-lme4::lmer(mathgain~(1|schoolid/classid), data=classroom)
sjPlot::tab_model(m0, show.p=F, show.r2=F)

	mathgain
Predictors	Estimates	CI
(Intercept)	57.43	54.60 – 60.26
Random Effects
σ²	1028.23
τ₀₀ _{classid:schoolid}	99.22
τ₀₀ _schoolid	77.50
ICC	0.15
N _classid	312
N _schoolid	107
Observations	1190

m1<- lme4::lmer(mathgain~(1|schoolid/classid), data=classroom)
sjPlot::tab_model(m0, m1, show.p=F, show.r2=F, show.obs = F, show.ngroups = F,
                  show.se = T, show.ci = F)

	mathgain		mathgain
Predictors	Estimates	std. Error	Estimates	std. Error
(Intercept)	57.43	1.44	57.43	1.44
Random Effects
σ²	1028.23		1028.23
τ₀₀	99.22 _{classid:schoolid}		99.22 _{classid:schoolid}
	77.50 _schoolid		77.50 _schoolid
ICC	0.15		0.15

m2<-lme4::lmer(mathgain~mathkind+sex+minority+ses|schoolid, data=classroom)
sjPlot::tab_model(m0, m1,m2, show.p=F, show.r2=F, show.obs = F, show.ngroups = F,
                  show.se = T, show.ci = F)

	mathgain		mathgain		mathgain
Predictors	Estimates	std. Error	Estimates	std. Error	Estimates	std. Error
(Intercept)	57.43	1.44	57.43	1.44	62.32	1.98
Random Effects
σ²	1028.23		1028.23		915.56
τ₀₀	99.22 _{classid:schoolid}		99.22 _{classid:schoolid}		1072.21 _schoolid
	77.50 _schoolid		77.50 _schoolid
τ₁₁					0.01 _{schoolid.mathkind}
					114.05 _schoolid.sex
					678.35 _{schoolid.minority}
					176.34 _schoolid.ses
ρ₀₁					-1.00
					-0.15
					0.19
					-0.08
ICC	0.15		0.15

Exercise 1

pacman::p_load(mlmRev, tidyverse, lme4, nlme)
dta_e1<-read.table('iq_language.txt', h=T)
head(dta_e1)

##   School Pupil   IQ Language Group_size       IQ_c School_mean Group_mean
## 1      1 17001 15.0       46         29  3.1659379    -1.51406        5.9
## 2      1 17002 14.5       45         29  2.6659379    -1.51406        5.9
## 3      1 17003  9.5       33         29 -2.3340621    -1.51406        5.9
## 4      1 17004 11.0       46         29 -0.8340621    -1.51406        5.9
## 5      1 17005  8.0       20         29 -3.8340621    -1.51406        5.9
## 6      1 17006  9.5       30         29 -2.3340621    -1.51406        5.9
##        SES_c
## 1  -4.811981
## 2 -17.811981
## 3 -12.811981
## 4  -4.811981
## 5 -17.811981
## 6 -17.811981

m0

summary(m0 <- lmer(Language ~ (1|School), data = dta_e1))

## Linear mixed model fit by REML ['lmerMod']
## Formula: Language ~ (1 | School)
##    Data: dta_e1
## 
## REML criterion at convergence: 16253.1
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -3.11618 -0.65703  0.07597  0.74128  2.50639 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  School   (Intercept) 19.63    4.431   
##  Residual             64.56    8.035   
## Number of obs: 2287, groups:  School, 131
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)  40.3624     0.4282   94.26

The estimated overall mean language score is 40.36.
The estimated between school variance in mean language score is 19.63.
The variation between students within the same school is estimated to be 64.56.
The between school variance is about 19.63 / (19.63 + 64.56) = 0.2332 (23.3%)of the total variance. This is the intraclass correlation.

m1

summary(m1 <- lmer(Language ~ IQ_c + (1|School), data = dta_e1))

## Linear mixed model fit by REML ['lmerMod']
## Formula: Language ~ IQ_c + (1 | School)
##    Data: dta_e1
## 
## REML criterion at convergence: 15255.8
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -4.0939 -0.6375  0.0579  0.7061  3.1448 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  School   (Intercept)  9.602   3.099   
##  Residual             42.245   6.500   
## Number of obs: 2287, groups:  School, 131
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept) 40.60823    0.30819   131.8
## IQ_c         2.48759    0.07008    35.5
## 
## Correlation of Fixed Effects:
##      (Intr)
## IQ_c 0.018

sjPlot::tab_model(m0, m1, show.p=FALSE, show.r2=FALSE, show.obs=FALSE, show.ngroups=FALSE, show.se=TRUE, show.ci=FALSE)

	Language		Language
Predictors	Estimates	std. Error	Estimates	std. Error
(Intercept)	40.36	0.43	40.61	0.31
IQ_c			2.49	0.07
Random Effects
σ²	64.56		42.24
τ₀₀	19.63 _School		9.60 _School
ICC	0.23		0.19

The estimated mean of language scores for all schools is 40.61 with as SD of \[\sqrt{9.60 }\]=3.098387.
The estimated mean of language scores for all children is 40.61 with as SD of \[\sqrt{(42.24+9.60)}\]=7.2
On average, there is a gain of 2.49 point on language score from an increase of one point in IQ_c.
After adjusting for IQ_c difference, correlation between language scores among children attending the same school is 0.19.
About 51.1%(=(19.63-9.60)/19.63) of variation in language scores between schools can be attributed to differences in IQ_c between schools.

Exercise 2

# load them
pacman::p_load(foreign, tidyverse, lme4, merTools)

# read spss format and covert it to data frame 
dta <- read.spss(file="eg_hlm.sav") %>% as.data.frame

# check data structure
str(dta)

## 'data.frame':    7230 obs. of  12 variables:
##  $ size    : num  380 380 380 380 380 380 380 380 380 380 ...
##  $ lowinc  : num  40.3 40.3 40.3 40.3 40.3 40.3 40.3 40.3 40.3 40.3 ...
##  $ mobility: num  12.5 12.5 12.5 12.5 12.5 12.5 12.5 12.5 12.5 12.5 ...
##  $ female  : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ black   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ hispanic: num  1 1 1 0 0 0 0 0 1 1 ...
##  $ year    : num  0.5 1.5 2.5 -1.5 -0.5 0.5 1.5 2.5 -1.5 -0.5 ...
##  $ grade   : num  2 3 4 0 1 2 3 4 0 1 ...
##  $ math    : num  1.146 1.134 2.3 -1.303 0.439 ...
##  $ retained: num  0 0 0 0 0 0 0 0 0 0 ...
##  $ school  : Factor w/ 60 levels "        2020",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ cid     : Factor w/ 1721 levels "   101480302",..: 244 244 244 248 248 248 248 248 253 253 ...

# set black-and-white theme
theme_set(theme_bw())

#
ggplot(dta, aes(year, math, group = cid)) +
 geom_point(alpha = .1) +
 geom_line(alpha = .1) +
 stat_smooth(aes(group = school), method = "lm", se = F, lwd = .6) +
 stat_smooth(aes(group = 1), method = "lm", se = F, col = "magenta") +
 labs(x = "Year of age (centered)", y = "Math score")

Model: Mathti\[X_{(j)}\]=\[b_{0j}\]+\[b_{1i(j)}\]+\[ β_1Year_ti\]+\[ε_ti(j)\], where\[b_{0j}\],j = 1, …, 60, are school random effects, and\[b_{1i(j)}\]are student random effects.

# nested specification with random intercepts only
# fixed-effect for slope
summary(m1 <- lmer(math ~ year + (1 | school/cid), data = dta))

## Linear mixed model fit by REML ['lmerMod']
## Formula: math ~ year + (1 | school/cid)
##    Data: dta
## 
## REML criterion at convergence: 16759.4
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.8093 -0.5831 -0.0270  0.5566  6.0867 
## 
## Random effects:
##  Groups     Name        Variance Std.Dev.
##  cid:school (Intercept) 0.6699   0.8185  
##  school     (Intercept) 0.1869   0.4324  
##  Residual               0.3470   0.5891  
## Number of obs: 7230, groups:  cid:school, 1721; school, 60
## 
## Fixed effects:
##              Estimate Std. Error t value
## (Intercept) -0.780482   0.061088  -12.78
## year         0.746123   0.005396  138.26
## 
## Correlation of Fixed Effects:
##      (Intr)
## year -0.031

# same as
#lmer(math ~ year + (1 | school) +( 1 | school:cid) , data = dta))

0.6699+0.1869+0.3470=1.2038
0.6699/1.2038=0.5564878
0.1869/1.2038=0.1552583
(0.6699+0.1869)/1.2038=0.7117461
The similarity with math scores of classes in the same schools controlling for years is about 71.2%.

# random slopes
summary(m2 <- lmer(math ~ ( year | school/cid), data = dta))

## Linear mixed model fit by REML ['lmerMod']
## Formula: math ~ (year | school/cid)
##    Data: dta
## 
## REML criterion at convergence: 16553.9
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.2231 -0.5611 -0.0308  0.5312  5.1579 
## 
## Random effects:
##  Groups     Name        Variance Std.Dev. Corr
##  cid:school (Intercept) 0.64127  0.8008       
##             year        0.01132  0.1064   0.55
##  school     (Intercept) 0.92436  0.9614       
##             year        0.60793  0.7797   0.92
##  Residual               0.30115  0.5488       
## Number of obs: 7230, groups:  cid:school, 1721; school, 60
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept) -1.64424    0.05462  -30.11
## convergence code: 0
## Model failed to converge with max|grad| = 0.00321233 (tol = 0.002, component 1)

# compare models
anova(m1, m2)

## Data: dta
## Models:
## m1: math ~ year + (1 | school/cid)
## m2: math ~ (year | school/cid)
##    npar   AIC   BIC  logLik deviance Chisq Df Pr(>Chisq)    
## m1    5 16757 16792 -8373.5    16747                        
## m2    8 16566 16621 -8275.0    16550 197.1  3  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# merTools
# estimate random-effects parameters by simulation
m2_re <- REsim(m2)

# plot for random effects
plotREsim(m2_re)

# residual plots
plot(m1, ylim = c(-4, 4), 
     xlab = "Fitted values", ylab = "Pearson residuals", cex =.5)

#
plot(m2, ylim = c(-4, 4),
     xlab = "Fitted values", ylab = "Pearson residuals", cex =.5)

Exercise 3

#Read data
dta_31<-read.table("demo1.txt", h=T)
dta_32<-read.table("demo2.txt", h=T)

dta_31 <- dta_31 %>% 
  group_by(School) %>% 
  mutate( ave_Score = mean(Score),
          c_Score = ave_Score - Score,
          n=n())
dta_32 <- dta_32 %>% 
  group_by(School) %>% 
  mutate( ave_Score = mean(Score),
          c_Score = ave_Score - Score,
          n=n())

ggplot(data = dta_31, aes(School, Score)) +
  geom_point(data = dta_31, aes(School, Score), colour = 'skyblue', size = 1) +
  geom_point(data = dta_32, aes(School, Score), colour = 'coral', size = 1) +
  geom_hline(yintercept = mean(dta_31$Score), colour = 'skyblue') +
  geom_hline(yintercept = mean(dta_32$Score), colour = 'coral')

m0_31

summary(m0_31 <- lmer(Score ~ (1|School), data = dta_31))

## Linear mixed model fit by REML ['lmerMod']
## Formula: Score ~ (1 | School)
##    Data: dta_31
## 
## REML criterion at convergence: 51.5
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.3280 -0.4745  0.0000  0.4608  1.3553 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  School   (Intercept) 1679     40.976  
##  Residual                5      2.236  
## Number of obs: 9, groups:  School, 3
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)    53.01      23.67    2.24

m0_32

summary(m0_32 <- lmer(Score ~ (1|School), data = dta_32))

## Linear mixed model fit by REML ['lmerMod']
## Formula: Score ~ (1 | School)
##    Data: dta_32
## 
## REML criterion at convergence: 80.4
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -1.36734 -0.34286 -0.02776  1.07153  1.13999 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  School   (Intercept) 104.2    10.21   
##  Residual             967.8    31.11   
## Number of obs: 9, groups:  School, 3
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)    56.36      12.01   4.692

comparison

sjPlot::tab_model(m0_31, m0_32, show.p=FALSE, show.r2=FALSE, show.obs=FALSE, show.ngroups=FALSE, show.se=TRUE, show.ci=FALSE)

	Score		Score
Predictors	Estimates	std. Error	Estimates	std. Error
(Intercept)	53.01	23.67	56.36	12.01
Random Effects
σ²	5.00		967.76
τ₀₀	1679.06 _School		104.17 _School
ICC	1.00		0.10

The estimated mean of scores for all children in demo1 data is 53.01 with an SD of\[\sqrt{1679.06}\]=40.98
The estimated mean of scores for all children in demo2 data is 56.36 with an SD of\[\sqrt{104.17}\]=10.21
The correlation between scores in same school is 1.0 and 0.1 in demo1 data and demo2 data respectively.Because demo1 data are perfectly correlated, the unweighted grand mean in m0_21 is more convincing.

Residual Plot

plot(m0_31, ylim = c(-20, 20), 
     xlab = "Fitted values", ylab = "Pearson residuals", cex =.5)

plot(m0_32, ylim = c(-20, 20), 
     xlab = "Fitted values", ylab = "Pearson residuals", cex =.5)

Homework_1005

yuwenchen

10/5/2020

Inclass Exercise 1

school-effects model version 0

Including the effect of a school level predictor

Including the effect of a student-level predictor

Including both the effects of student-level and school-level predictors

Inclass Exercise 2

Inclass Exercise 3

Exercise 1

m0

m1

Exercise 2

Exercise 3

m0_31

m0_32

comparison

Residual Plot