library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.5 ✓ purrr 0.3.4
## ✓ tibble 3.1.2 ✓ dplyr 1.0.6
## ✓ tidyr 1.1.3 ✓ stringr 1.4.0
## ✓ readr 1.4.0 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(lme4)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
data("sleepstudy")
head(sleepstudy)
## Reaction Days Subject
## 1 249.5600 0 308
## 2 258.7047 1 308
## 3 250.8006 2 308
## 4 321.4398 3 308
## 5 356.8519 4 308
## 6 414.6901 5 308
# Some simple MM
sleepstudy_model=lmer(Reaction ~ Days + (1 | Subject), sleepstudy)
summary(sleepstudy_model)
## Linear mixed model fit by REML ['lmerMod']
## Formula: Reaction ~ Days + (1 | Subject)
## Data: sleepstudy
##
## REML criterion at convergence: 1786.5
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.2257 -0.5529 0.0109 0.5188 4.2506
##
## Random effects:
## Groups Name Variance Std.Dev.
## Subject (Intercept) 1378.2 37.12
## Residual 960.5 30.99
## Number of obs: 180, groups: Subject, 18
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 251.4051 9.7467 25.79
## Days 10.4673 0.8042 13.02
##
## Correlation of Fixed Effects:
## (Intr)
## Days -0.371
Generate new columns for diagnostics.
sleepstudy = mutate(sleepstudy,
prediction = fitted(sleepstudy_model),
resid = Reaction - prediction,
resid2 = resid^2) # Squared errors
Default diagnostic plot
plot(sleepstudy_model)
Same thing using generated columns
smoothplot = function(data, xvar, yvar, jitter = .2){
ggplot(data, aes({{xvar}}, {{yvar}})) +
geom_point(position = position_jitter(width = jitter),
alpha = .5) +
stat_smooth(method = 'loess', formula = y ~ x)
}
smoothplot(sleepstudy, prediction, resid)
Distribution of random intercepts (not exactly Gaussian, but OK)
rfx = data.frame(ranef(sleepstudy_model))
hist(rfx$condval)
Are residuals centred on zero, with homogenous variance, across predictors?
smoothplot(sleepstudy, Days, resid)
smoothplot(sleepstudy, Days, resid2)
ggplot(sleepstudy, aes(Days, resid2)) +
stat_summary() + labs(y = 'Mean Squared Error')
## No summary function supplied, defaulting to `mean_se()`
smoothplot(sleepstudy, Subject, resid)
smoothplot(sleepstudy, Subject, resid2)
ggplot(sleepstudy, aes(Subject, resid2)) +
stat_summary() + labs(y = 'Mean Squared Error')
## No summary function supplied, defaulting to `mean_se()`