library(lme4)
#get descriptives
library(Hmisc)
library(lmerTest)
library(nlme)
library(dplyr)
#install.packages('VCA')
library(VCA)
library(tidyverse)
library(afex)
data<-read.csv('lab1.csv')
data<-data[,1:6]
names(data) <- c("deptid","morale","satpay","female","white","pctbelow")
attach(data)
cols<-c('deptid','female','white')
data[cols]<-lapply(data[cols],factor)
head(data)
isBalanced(form=morale~deptid+deptid:pctbelow, data,na.rm=TRUE)
## [1] FALSE
Unbalanced data is better estimated with ML rather than REML. Both give the same fixed effects, but slightly different random effect estimates.
If the data were completely balanced, same number of people in each department, the null model will equal those from an ANOVA procedure.
model1<-lmer(morale~1+(1|deptid),data, REML=FALSE)
summary(model1)
## Linear mixed model fit by maximum likelihood . t-tests use
## Satterthwaite's method [lmerModLmerTest]
## Formula: morale ~ 1 + (1 | deptid)
## Data: data
##
## AIC BIC logLik deviance df.resid
## 84088.1 84110.6 -42041.1 84082.1 13186
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.6967 -0.6565 0.0620 0.6924 2.7307
##
## Random effects:
## Groups Name Variance Std.Dev.
## deptid (Intercept) 5.359 2.315
## Residual 33.302 5.771
## Number of obs: 13189, groups: deptid, 165
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 26.4284 0.1888 162.3666 139.9 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
5.35/(5.35+33.30)
## [1] 0.1384217
library(dplyr)
df <- data %>%
# Grand mean centering (CMC)
mutate(pctbelow.gmc = pctbelow-mean(pctbelow)) %>%
mutate(satpay.gmc = satpay-mean(satpay))
#View(df)
summary(df)
## deptid morale satpay female white
## 131 : 200 Min. :11.00 Min. : 0.000 0:6776 0:7306
## 129 : 199 1st Qu.:22.00 1st Qu.: 6.000 1:6413 1:5883
## 99 : 189 Median :27.00 Median : 9.000
## 232 : 182 Mean :26.36 Mean : 8.882
## 240 : 163 3rd Qu.:31.00 3rd Qu.:11.000
## 205 : 160 Max. :40.00 Max. :16.000
## (Other):12096
## pctbelow pctbelow.gmc satpay.gmc
## Min. : 2.40 Min. :-28.154 Min. :-8.882
## 1st Qu.:19.20 1st Qu.:-11.354 1st Qu.:-2.882
## Median :26.97 Median : -3.584 Median : 0.118
## Mean :30.55 Mean : 0.000 Mean : 0.000
## 3rd Qu.:38.80 3rd Qu.: 8.246 3rd Qu.: 2.118
## Max. :82.90 Max. : 52.346 Max. : 7.118
##
This graph just has satpay as the sole predictor of morale
#I have made sure at the data preparation stage that deptid is of factor type, this way, ggplot2 can plot separate lines for each dept.
library(ggplot2)
ggplot(df[1:800,], aes(satpay.gmc,morale,color=deptid))+
geom_point()+
geom_smooth(method=lm)
Resembles an ANCOVA based on deptid with the covariate satpay
model2<-lmer(morale~1+satpay.gmc+female+white+(1|deptid),df,REML=FALSE)
summary(model2)
## Linear mixed model fit by maximum likelihood . t-tests use
## Satterthwaite's method [lmerModLmerTest]
## Formula: morale ~ 1 + satpay.gmc + female + white + (1 | deptid)
## Data: df
##
## AIC BIC logLik deviance df.resid
## 75579.0 75623.9 -37783.5 75567.0 13183
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.7618 -0.6697 0.0095 0.6835 3.5079
##
## Random effects:
## Groups Name Variance Std.Dev.
## deptid (Intercept) 1.848 1.359
## Residual 17.544 4.189
## Number of obs: 13189, groups: deptid, 165
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 2.602e+01 1.236e-01 2.246e+02 210.437 <2e-16 ***
## satpay.gmc 1.201e+00 1.136e-02 1.319e+04 105.790 <2e-16 ***
## female1 6.711e-04 7.400e-02 1.306e+04 0.009 0.993
## white1 9.162e-01 7.771e-02 1.319e+04 11.790 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) stpy.g femal1
## satpay.gmc 0.069
## female1 -0.295 -0.129
## white1 -0.274 -0.121 0.020
1.848/(1.848+17.544)
## [1] 0.09529703
model3<-lmer(morale~1+satpay.gmc+female+white+(1+satpay.gmc|deptid),df,REML=FALSE)
summary(model3)
## Linear mixed model fit by maximum likelihood . t-tests use
## Satterthwaite's method [lmerModLmerTest]
## Formula: morale ~ 1 + satpay.gmc + female + white + (1 + satpay.gmc |
## deptid)
## Data: df
##
## AIC BIC logLik deviance df.resid
## 75570.5 75630.4 -37777.2 75554.5 13181
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.7704 -0.6638 0.0129 0.6866 3.5157
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## deptid (Intercept) 1.860620 1.36405
## satpay.gmc 0.008085 0.08992 0.05
## Residual 17.454995 4.17792
## Number of obs: 13189, groups: deptid, 165
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 2.603e+01 1.241e-01 2.238e+02 209.705 <2e-16 ***
## satpay.gmc 1.197e+00 1.365e-02 1.588e+02 87.730 <2e-16 ***
## female1 3.273e-03 7.392e-02 1.304e+04 0.044 0.965
## white1 9.139e-01 7.769e-02 1.319e+04 11.764 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) stpy.g femal1
## satpay.gmc 0.081
## female1 -0.294 -0.107
## white1 -0.273 -0.102 0.020
1.86/(1.86+17.455)
## [1] 0.09629821
model4<-lmer(morale~satpay.gmc+female+white+pctbelow.gmc+
(1+satpay.gmc|deptid),df,REML=FALSE)
summary(model4)
## Linear mixed model fit by maximum likelihood . t-tests use
## Satterthwaite's method [lmerModLmerTest]
## Formula:
## morale ~ satpay.gmc + female + white + pctbelow.gmc + (1 + satpay.gmc |
## deptid)
## Data: df
##
## AIC BIC logLik deviance df.resid
## 75557.9 75625.3 -37770.0 75539.9 13180
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.7740 -0.6633 0.0126 0.6859 3.5135
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## deptid (Intercept) 1.691332 1.30051
## satpay.gmc 0.007932 0.08906 0.14
## Residual 17.455650 4.17800
## Number of obs: 13189, groups: deptid, 165
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 2.594e+01 1.215e-01 2.222e+02 213.573 < 2e-16 ***
## satpay.gmc 1.196e+00 1.362e-02 1.587e+02 87.773 < 2e-16 ***
## female1 5.174e-03 7.392e-02 1.304e+04 0.070 0.944192
## white1 9.085e-01 7.765e-02 1.318e+04 11.699 < 2e-16 ***
## pctbelow.gmc -2.718e-02 6.931e-03 1.526e+02 -3.922 0.000132 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) stpy.g femal1 white1
## satpay.gmc 0.127
## female1 -0.302 -0.107
## white1 -0.276 -0.101 0.020
## pctbelw.gmc 0.162 0.056 -0.008 0.016
1.69/(1.69+17.456)
## [1] 0.08826909
model5<-lmer(morale~white+female+satpay.gmc+pctbelow.gmc+
satpay.gmc*pctbelow.gmc+
(1+satpay.gmc|deptid),df,REML=FALSE)
summary(model5)
## Linear mixed model fit by maximum likelihood . t-tests use
## Satterthwaite's method [lmerModLmerTest]
## Formula:
## morale ~ white + female + satpay.gmc + pctbelow.gmc + satpay.gmc *
## pctbelow.gmc + (1 + satpay.gmc | deptid)
## Data: df
##
## AIC BIC logLik deviance df.resid
## 75557.6 75632.5 -37768.8 75537.6 13179
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.7705 -0.6646 0.0137 0.6831 3.5129
##
## Random effects:
## Groups Name Variance Std.Dev. Corr
## deptid (Intercept) 1.701296 1.30434
## satpay.gmc 0.007308 0.08549 0.12
## Residual 17.455860 4.17802
## Number of obs: 13189, groups: deptid, 165
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) 2.595e+01 1.219e-01 2.221e+02 212.939 < 2e-16
## white1 9.099e-01 7.765e-02 1.318e+04 11.718 < 2e-16
## female1 4.711e-03 7.391e-02 1.304e+04 0.064 0.949181
## satpay.gmc 1.196e+00 1.347e-02 1.516e+02 88.819 < 2e-16
## pctbelow.gmc -2.642e-02 6.967e-03 1.528e+02 -3.792 0.000214
## satpay.gmc:pctbelow.gmc 1.224e-03 7.927e-04 1.101e+02 1.545 0.125309
##
## (Intercept) ***
## white1 ***
## female1
## satpay.gmc ***
## pctbelow.gmc ***
## satpay.gmc:pctbelow.gmc
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) white1 femal1 stpy.g pctbl.
## white1 -0.275
## female1 -0.301 0.020
## satpay.gmc 0.121 -0.102 -0.108
## pctbelw.gmc 0.166 0.017 -0.008 0.057
## stpy.gmc:p. 0.049 0.011 -0.003 0.032 0.065
1.7/(1.7+15.456)
## [1] 0.0990907
#install.packages('texreg')
library(texreg)
## Version: 1.36.23
## Date: 2017-03-03
## Author: Philip Leifeld (University of Glasgow)
##
## Please cite the JSS article in your publications -- see citation("texreg").
##
## Attaching package: 'texreg'
## The following object is masked from 'package:tidyr':
##
## extract
htmlreg(list(model1, model2, model3, model4, model5),
file='texreg.doc',
single.row = T,
stars = numeric(0),
caption = "",
custom.note = "Model 1 = null model,
Model 2 = intercept only model,
Model 3 = random intercept and slope model,
Model 4 = fixed level 2 factor model,
Model 5 = cross-level interaction model
")
## The table was written to the file 'texreg.doc'.
Refrences:
To understand model building and subscripts https://stat.utexas.edu/images/SSC/Site/hlm_comparison-1.pdf
http://philippmasur.de/blog/2018/05/23/how-to-center-in-multilevel-models/