Multilevel Model Comparison

Preparation

library(lme4)
#get descriptives
library(Hmisc)
library(lmerTest)
library(nlme)
library(dplyr)
#install.packages('VCA')
library(VCA)
library(tidyverse)
library(afex)

data<-read.csv('lab1.csv')
data<-data[,1:6]
names(data) <- c("deptid","morale","satpay","female","white","pctbelow")

attach(data)
cols<-c('deptid','female','white')
data[cols]<-lapply(data[cols],factor)
head(data)

Check for design balance. Do we have the same number of people from each department?

isBalanced(form=morale~deptid+deptid:pctbelow, data,na.rm=TRUE)

## [1] FALSE

Unbalanced data is better estimated with ML rather than REML. Both give the same fixed effects, but slightly different random effect estimates.

Intercept only, unconditional model

If the data were completely balanced, same number of people in each department, the null model will equal those from an ANOVA procedure.

model1<-lmer(morale~1+(1|deptid),data, REML=FALSE)
summary(model1)

## Linear mixed model fit by maximum likelihood . t-tests use
##   Satterthwaite's method [lmerModLmerTest]
## Formula: morale ~ 1 + (1 | deptid)
##    Data: data
## 
##      AIC      BIC   logLik deviance df.resid 
##  84088.1  84110.6 -42041.1  84082.1    13186 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.6967 -0.6565  0.0620  0.6924  2.7307 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  deptid   (Intercept)  5.359   2.315   
##  Residual             33.302   5.771   
## Number of obs: 13189, groups:  deptid, 165
## 
## Fixed effects:
##             Estimate Std. Error       df t value Pr(>|t|)    
## (Intercept)  26.4284     0.1888 162.3666   139.9   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

ICC of null model

5.35/(5.35+33.30)

## [1] 0.1384217

Grand mean centering for all predictors

library(dplyr)
df <- data %>% 
  # Grand mean centering (CMC)
  mutate(pctbelow.gmc = pctbelow-mean(pctbelow)) %>%
  mutate(satpay.gmc = satpay-mean(satpay)) 
#View(df)
summary(df)

##      deptid          morale          satpay       female   white   
##  131    :  200   Min.   :11.00   Min.   : 0.000   0:6776   0:7306  
##  129    :  199   1st Qu.:22.00   1st Qu.: 6.000   1:6413   1:5883  
##  99     :  189   Median :27.00   Median : 9.000                    
##  232    :  182   Mean   :26.36   Mean   : 8.882                    
##  240    :  163   3rd Qu.:31.00   3rd Qu.:11.000                    
##  205    :  160   Max.   :40.00   Max.   :16.000                    
##  (Other):12096                                                     
##     pctbelow      pctbelow.gmc       satpay.gmc    
##  Min.   : 2.40   Min.   :-28.154   Min.   :-8.882  
##  1st Qu.:19.20   1st Qu.:-11.354   1st Qu.:-2.882  
##  Median :26.97   Median : -3.584   Median : 0.118  
##  Mean   :30.55   Mean   :  0.000   Mean   : 0.000  
##  3rd Qu.:38.80   3rd Qu.:  8.246   3rd Qu.: 2.118  
##  Max.   :82.90   Max.   : 52.346   Max.   : 7.118  
##

Scatterplot of satpay predict morale for within each deptid

This graph just has satpay as the sole predictor of morale

#I have made sure at the data preparation stage that deptid is of factor type, this way, ggplot2 can plot separate lines for each dept.
library(ggplot2)
ggplot(df[1:800,], aes(satpay.gmc,morale,color=deptid))+
  geom_point()+
  geom_smooth(method=lm)

Random Intercept, fixed slope, only level 1 predictor

Resembles an ANCOVA based on deptid with the covariate satpay

model2<-lmer(morale~1+satpay.gmc+female+white+(1|deptid),df,REML=FALSE)
summary(model2)

## Linear mixed model fit by maximum likelihood . t-tests use
##   Satterthwaite's method [lmerModLmerTest]
## Formula: morale ~ 1 + satpay.gmc + female + white + (1 | deptid)
##    Data: df
## 
##      AIC      BIC   logLik deviance df.resid 
##  75579.0  75623.9 -37783.5  75567.0    13183 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.7618 -0.6697  0.0095  0.6835  3.5079 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  deptid   (Intercept)  1.848   1.359   
##  Residual             17.544   4.189   
## Number of obs: 13189, groups:  deptid, 165
## 
## Fixed effects:
##              Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept) 2.602e+01  1.236e-01 2.246e+02 210.437   <2e-16 ***
## satpay.gmc  1.201e+00  1.136e-02 1.319e+04 105.790   <2e-16 ***
## female1     6.711e-04  7.400e-02 1.306e+04   0.009    0.993    
## white1      9.162e-01  7.771e-02 1.319e+04  11.790   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##            (Intr) stpy.g femal1
## satpay.gmc  0.069              
## female1    -0.295 -0.129       
## white1     -0.274 -0.121  0.020

ICC for random intercept only model

1.848/(1.848+17.544)

## [1] 0.09529703

Random intercept, random slope model

model3<-lmer(morale~1+satpay.gmc+female+white+(1+satpay.gmc|deptid),df,REML=FALSE)
summary(model3)

## Linear mixed model fit by maximum likelihood . t-tests use
##   Satterthwaite's method [lmerModLmerTest]
## Formula: morale ~ 1 + satpay.gmc + female + white + (1 + satpay.gmc |  
##     deptid)
##    Data: df
## 
##      AIC      BIC   logLik deviance df.resid 
##  75570.5  75630.4 -37777.2  75554.5    13181 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.7704 -0.6638  0.0129  0.6866  3.5157 
## 
## Random effects:
##  Groups   Name        Variance  Std.Dev. Corr
##  deptid   (Intercept)  1.860620 1.36405      
##           satpay.gmc   0.008085 0.08992  0.05
##  Residual             17.454995 4.17792      
## Number of obs: 13189, groups:  deptid, 165
## 
## Fixed effects:
##              Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept) 2.603e+01  1.241e-01 2.238e+02 209.705   <2e-16 ***
## satpay.gmc  1.197e+00  1.365e-02 1.588e+02  87.730   <2e-16 ***
## female1     3.273e-03  7.392e-02 1.304e+04   0.044    0.965    
## white1      9.139e-01  7.769e-02 1.319e+04  11.764   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##            (Intr) stpy.g femal1
## satpay.gmc  0.081              
## female1    -0.294 -0.107       
## white1     -0.273 -0.102  0.020

ICC for intercept and slope model

1.86/(1.86+17.455)

## [1] 0.09629821

One fixed level 2 factor, and 2 random level 1 factors

model4<-lmer(morale~satpay.gmc+female+white+pctbelow.gmc+
               (1+satpay.gmc|deptid),df,REML=FALSE)
summary(model4)

## Linear mixed model fit by maximum likelihood . t-tests use
##   Satterthwaite's method [lmerModLmerTest]
## Formula: 
## morale ~ satpay.gmc + female + white + pctbelow.gmc + (1 + satpay.gmc |  
##     deptid)
##    Data: df
## 
##      AIC      BIC   logLik deviance df.resid 
##  75557.9  75625.3 -37770.0  75539.9    13180 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.7740 -0.6633  0.0126  0.6859  3.5135 
## 
## Random effects:
##  Groups   Name        Variance  Std.Dev. Corr
##  deptid   (Intercept)  1.691332 1.30051      
##           satpay.gmc   0.007932 0.08906  0.14
##  Residual             17.455650 4.17800      
## Number of obs: 13189, groups:  deptid, 165
## 
## Fixed effects:
##                Estimate Std. Error         df t value Pr(>|t|)    
## (Intercept)   2.594e+01  1.215e-01  2.222e+02 213.573  < 2e-16 ***
## satpay.gmc    1.196e+00  1.362e-02  1.587e+02  87.773  < 2e-16 ***
## female1       5.174e-03  7.392e-02  1.304e+04   0.070 0.944192    
## white1        9.085e-01  7.765e-02  1.318e+04  11.699  < 2e-16 ***
## pctbelow.gmc -2.718e-02  6.931e-03  1.526e+02  -3.922 0.000132 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) stpy.g femal1 white1
## satpay.gmc   0.127                     
## female1     -0.302 -0.107              
## white1      -0.276 -0.101  0.020       
## pctbelw.gmc  0.162  0.056 -0.008  0.016

ICC for fixed level 2, random level 1 factor model

1.69/(1.69+17.456)

## [1] 0.08826909

Cross level interaction model

model5<-lmer(morale~white+female+satpay.gmc+pctbelow.gmc+
               satpay.gmc*pctbelow.gmc+
               (1+satpay.gmc|deptid),df,REML=FALSE)
summary(model5)

## Linear mixed model fit by maximum likelihood . t-tests use
##   Satterthwaite's method [lmerModLmerTest]
## Formula: 
## morale ~ white + female + satpay.gmc + pctbelow.gmc + satpay.gmc *  
##     pctbelow.gmc + (1 + satpay.gmc | deptid)
##    Data: df
## 
##      AIC      BIC   logLik deviance df.resid 
##  75557.6  75632.5 -37768.8  75537.6    13179 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.7705 -0.6646  0.0137  0.6831  3.5129 
## 
## Random effects:
##  Groups   Name        Variance  Std.Dev. Corr
##  deptid   (Intercept)  1.701296 1.30434      
##           satpay.gmc   0.007308 0.08549  0.12
##  Residual             17.455860 4.17802      
## Number of obs: 13189, groups:  deptid, 165
## 
## Fixed effects:
##                           Estimate Std. Error         df t value Pr(>|t|)
## (Intercept)              2.595e+01  1.219e-01  2.221e+02 212.939  < 2e-16
## white1                   9.099e-01  7.765e-02  1.318e+04  11.718  < 2e-16
## female1                  4.711e-03  7.391e-02  1.304e+04   0.064 0.949181
## satpay.gmc               1.196e+00  1.347e-02  1.516e+02  88.819  < 2e-16
## pctbelow.gmc            -2.642e-02  6.967e-03  1.528e+02  -3.792 0.000214
## satpay.gmc:pctbelow.gmc  1.224e-03  7.927e-04  1.101e+02   1.545 0.125309
##                            
## (Intercept)             ***
## white1                  ***
## female1                    
## satpay.gmc              ***
## pctbelow.gmc            ***
## satpay.gmc:pctbelow.gmc    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) white1 femal1 stpy.g pctbl.
## white1      -0.275                            
## female1     -0.301  0.020                     
## satpay.gmc   0.121 -0.102 -0.108              
## pctbelw.gmc  0.166  0.017 -0.008  0.057       
## stpy.gmc:p.  0.049  0.011 -0.003  0.032  0.065

ICC for cross-level interaction model

1.7/(1.7+15.456)

## [1] 0.0990907

Create a model comparison table

#install.packages('texreg')
library(texreg)

## Version:  1.36.23
## Date:     2017-03-03
## Author:   Philip Leifeld (University of Glasgow)
## 
## Please cite the JSS article in your publications -- see citation("texreg").

## 
## Attaching package: 'texreg'

## The following object is masked from 'package:tidyr':
## 
##     extract

htmlreg(list(model1, model2, model3, model4, model5), 
          file='texreg.doc',
          single.row = T, 
          stars = numeric(0),
          caption = "",
          custom.note = "Model 1 = null model, 
                         Model 2 = intercept only model, 
                         Model 3 = random intercept and slope model, 
                         Model 4 = fixed level 2 factor model,
                         Model 5 = cross-level interaction model
        ")

## The table was written to the file 'texreg.doc'.

Refrences:

To understand model building and subscripts https://stat.utexas.edu/images/SSC/Site/hlm_comparison-1.pdf
http://philippmasur.de/blog/2018/05/23/how-to-center-in-multilevel-models/