Example 1
### https://www.tjmahr.com/plotting-partial-pooling-in-mixed-effects-models/
library(lme4)
## Warning: package 'lme4' was built under R version 3.6.1
## Loading required package: Matrix
#> Loading required package: Matrix
#> Loading required package: methods
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.6.2
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
## Warning: package 'tibble' was built under R version 3.6.2
# Convert to tibble for better printing. Convert factors to strings
sleepstudy <- sleepstudy %>%
as_tibble() %>%
mutate(Subject = as.character(Subject))
# Add two fake participants
df_sleep <- bind_rows(
sleepstudy,
data_frame(Reaction = c(286, 288), Days = 0:1, Subject = "374"),
data_frame(Reaction = 245, Days = 0, Subject = "373"))
## Warning: `data_frame()` is deprecated, use `tibble()`.
## This warning is displayed once per session.
## # A tibble: 183 x 3
## Reaction Days Subject
## <dbl> <dbl> <chr>
## 1 250. 0 308
## 2 259. 1 308
## 3 251. 2 308
## 4 321. 3 308
## 5 357. 4 308
## 6 415. 5 308
## 7 382. 6 308
## 8 290. 7 308
## 9 431. 8 308
## 10 466. 9 308
## # … with 173 more rows
## Reaction Days Subject
## Min. :194.3 Min. :0.000 Length:183
## 1st Qu.:255.2 1st Qu.:2.000 Class :character
## Median :287.7 Median :4.000 Mode :character
## Mean :298.1 Mean :4.432
## 3rd Qu.:336.0 3rd Qu.:7.000
## Max. :466.4 Max. :9.000
## Warning: package 'ggplot2' was built under R version 3.6.2
xlab <- "Days of sleep deprivation"
ylab <- "Average reaction time (ms)"
ggplot(df_sleep) +
aes(x = Days, y = Reaction) +
stat_smooth(method = "lm", se = FALSE) +
# Put the points on top of lines
geom_point() +
facet_wrap("Subject") +
labs(x = xlab, y = ylab) +
# We also need to help the x-axis, so it doesn't
# create gridlines/ticks on 2.5 days
scale_x_continuous(breaks = 0:4 * 2)+theme_bw()

df_no_pooling <- lmList(Reaction ~ Days | Subject, df_sleep) %>%
coef() %>%
# Subject IDs are stored as row-names. Make them an explicit column
rownames_to_column("Subject") %>%
rename(Intercept = `(Intercept)`, Slope_Days = Days) %>%
add_column(Model = "No pooling") %>%
# Remove the participant who only had one data-point
filter(Subject != "373")
## Warning: Unknown or uninitialised column: '(weights)'.
## Warning: Unknown or uninitialised column: '(offset)'.
## Warning: Unknown or uninitialised column: '(weights)'.
## Warning: Unknown or uninitialised column: '(offset)'.
## Warning: Unknown or uninitialised column: '(weights)'.
## Warning: Unknown or uninitialised column: '(offset)'.
## Warning: Unknown or uninitialised column: '(weights)'.
## Warning: Unknown or uninitialised column: '(offset)'.
## Warning: Unknown or uninitialised column: '(weights)'.
## Warning: Unknown or uninitialised column: '(offset)'.
## Warning: Unknown or uninitialised column: '(weights)'.
## Warning: Unknown or uninitialised column: '(offset)'.
## Warning: Unknown or uninitialised column: '(weights)'.
## Warning: Unknown or uninitialised column: '(offset)'.
## Warning: Unknown or uninitialised column: '(weights)'.
## Warning: Unknown or uninitialised column: '(offset)'.
## Warning: Unknown or uninitialised column: '(weights)'.
## Warning: Unknown or uninitialised column: '(offset)'.
## Warning: Unknown or uninitialised column: '(weights)'.
## Warning: Unknown or uninitialised column: '(offset)'.
## Warning: Unknown or uninitialised column: '(weights)'.
## Warning: Unknown or uninitialised column: '(offset)'.
## Warning: Unknown or uninitialised column: '(weights)'.
## Warning: Unknown or uninitialised column: '(offset)'.
## Warning: Unknown or uninitialised column: '(weights)'.
## Warning: Unknown or uninitialised column: '(offset)'.
## Warning: Unknown or uninitialised column: '(weights)'.
## Warning: Unknown or uninitialised column: '(offset)'.
## Warning: Unknown or uninitialised column: '(weights)'.
## Warning: Unknown or uninitialised column: '(offset)'.
## Warning: Unknown or uninitialised column: '(weights)'.
## Warning: Unknown or uninitialised column: '(offset)'.
## Warning: Unknown or uninitialised column: '(weights)'.
## Warning: Unknown or uninitialised column: '(offset)'.
## Warning: Unknown or uninitialised column: '(weights)'.
## Warning: Unknown or uninitialised column: '(offset)'.
## Warning: Unknown or uninitialised column: '(weights)'.
## Warning: Unknown or uninitialised column: '(offset)'.
## Warning: Unknown or uninitialised column: '(weights)'.
## Warning: Unknown or uninitialised column: '(offset)'.
## Subject Intercept Slope_Days Model
## 1 308 244.1927 21.764702 No pooling
## 2 309 205.0549 2.261785 No pooling
## 3 310 203.4842 6.114899 No pooling
## 4 330 289.6851 3.008073 No pooling
## 5 331 285.7390 5.266019 No pooling
## 6 332 264.2516 9.566768 No pooling
# Fit a model on all the data pooled together
m_pooled <- lm(Reaction ~ Days, df_sleep)
# Repeat the intercept and slope terms for each participant
df_pooled <- data_frame(
Model = "Complete pooling",
Subject = unique(df_sleep$Subject),
Intercept = coef(m_pooled)[1],
Slope_Days = coef(m_pooled)[2])
head(df_pooled)
## # A tibble: 6 x 4
## Model Subject Intercept Slope_Days
## <chr> <chr> <dbl> <dbl>
## 1 Complete pooling 308 252. 10.3
## 2 Complete pooling 309 252. 10.3
## 3 Complete pooling 310 252. 10.3
## 4 Complete pooling 330 252. 10.3
## 5 Complete pooling 331 252. 10.3
## 6 Complete pooling 332 252. 10.3
# Join the raw data so we can use plot the points and the lines.
df_models <- bind_rows(df_pooled, df_no_pooling) %>%
left_join(df_sleep, by = "Subject")
p_model_comparison <- ggplot(df_models) +
aes(x = Days, y = Reaction) +
# Set the color mapping in this layer so the points don't get a color
geom_abline(aes(intercept = Intercept, slope = Slope_Days, color = Model),
size = .75) +
geom_point() +
facet_wrap("Subject") + theme_bw()+
labs(x = xlab, y = ylab) +
scale_x_continuous(breaks = 0:4 * 2) +
# Fix the color palette
scale_color_brewer(palette = "Dark2") +
theme(legend.position = "bottom")
p_model_comparison

m <- lmer(Reaction ~ 1 + Days + (1 + Days | Subject), df_sleep)
arm::display(m)
## lmer(formula = Reaction ~ 1 + Days + (1 + Days | Subject), data = df_sleep)
## coef.est coef.se
## (Intercept) 252.54 6.43
## Days 10.45 1.54
##
## Error terms:
## Groups Name Std.Dev. Corr
## Subject (Intercept) 24.14
## Days 5.92 0.07
## Residual 25.48
## ---
## number of obs: 183, groups: Subject, 20
## AIC = 1783.4, DIC = 1787.8
## deviance = 1779.6
# Make a dataframe with the fitted effects
df_partial_pooling <- coef(m)[["Subject"]] %>%
rownames_to_column("Subject") %>%
as_tibble() %>%
rename(Intercept = `(Intercept)`, Slope_Days = Days) %>%
add_column(Model = "Partial pooling")
head(df_partial_pooling)
## # A tibble: 6 x 4
## Subject Intercept Slope_Days Model
## <chr> <dbl> <dbl> <chr>
## 1 308 254. 19.6 Partial pooling
## 2 309 212. 1.73 Partial pooling
## 3 310 213. 4.91 Partial pooling
## 4 330 275. 5.64 Partial pooling
## 5 331 274. 7.39 Partial pooling
## 6 332 261. 10.2 Partial pooling
df_models <- bind_rows(df_pooled, df_no_pooling, df_partial_pooling) %>%
left_join(df_sleep, by = "Subject")
# Replace the data-set of the last plot
p_model_comparison %+% df_models

df_zoom <- df_models %>%
filter(Subject %in% c("335", "350", "373", "374"))
p_model_comparison %+% df_zoom

# Also visualize the point for the fixed effects
df_fixef <- data_frame(
Model = "Partial pooling (average)",
Intercept = fixef(m)[1],
Slope_Days = fixef(m)[2])
# Complete pooling / fixed effects are center of gravity in the plot
df_gravity <- df_pooled %>%
distinct(Model, Intercept, Slope_Days) %>%
bind_rows(df_fixef)
df_gravity
## # A tibble: 2 x 3
## Model Intercept Slope_Days
## <chr> <dbl> <dbl>
## 1 Complete pooling 252. 10.3
## 2 Partial pooling (average) 253. 10.5
#> # A tibble: 2 x 3
#> Model Intercept Slope_Days
#> <chr> <dbl> <dbl>
#> 1 Complete pooling 252.3207 10.32766
#> 2 Partial pooling (average) 252.5426 10.45212
df_pulled <- bind_rows(df_no_pooling, df_partial_pooling)
ggplot(df_pulled) +
aes(x = Intercept, y = Slope_Days, color = Model) +
geom_point(size = 2) +
geom_point(data = df_gravity, size = 5) +
# Draw an arrow connecting the observations between models
geom_path(aes(group = Subject, color = NULL),
arrow = arrow(length = unit(.02, "npc"))) +
# Use ggrepel to jitter the labels away from the points
ggrepel::geom_text_repel(
aes(label = Subject, color = NULL),
data = df_no_pooling) +
# Don't forget 373
ggrepel::geom_text_repel(
aes(label = Subject, color = NULL),
data = filter(df_partial_pooling, Subject == "373")) +
theme(legend.position = "bottom") +
ggtitle("Pooling of regression parameters") +
xlab("Intercept estimate") +
ylab("Slope estimate") +
scale_color_brewer(palette = "Dark2")

# Extract the matrix
cov_mat <- VarCorr(m)[["Subject"]]
# Strip off some details so that just the useful part is printed
attr(cov_mat, "stddev") <- NULL
attr(cov_mat, "correlation") <- NULL
cov_mat
## (Intercept) Days
## (Intercept) 582.72951 9.89868
## Days 9.89868 35.03280
## Warning: package 'ellipse' was built under R version 3.6.2
##
## Attaching package: 'ellipse'
## The following object is masked from 'package:graphics':
##
## pairs
# Helper function to make a data-frame of ellipse points that
# includes the level as a column
make_ellipse <- function(cov_mat, center, level) {
ellipse(cov_mat, centre = center, level = level) %>%
as.data.frame() %>%
add_column(level = level) %>%
as_tibble()
}
center <- fixef(m)
levels <- c(.1, .3, .5, .7, .9)
# Create an ellipse dataframe for each of the levels defined
# above and combine them
df_ellipse <- levels %>%
purrr::map_df(~ make_ellipse(cov_mat, center, level = .x)) %>%
rename(Intercept = `(Intercept)`, Slope_Days = Days)
df_ellipse
## # A tibble: 500 x 3
## Intercept Slope_Days level
## <dbl> <dbl> <dbl>
## 1 261. 12.4 0.1
## 2 260. 12.6 0.1
## 3 260. 12.7 0.1
## 4 259. 12.8 0.1
## 5 258. 12.8 0.1
## 6 258. 12.9 0.1
## 7 257. 13.0 0.1
## 8 257. 13.0 0.1
## 9 256. 13.1 0.1
## 10 255. 13.1 0.1
## # … with 490 more rows
ggplot(df_pulled) +
aes(x = Intercept, y = Slope_Days, color = Model) +
# Draw contour lines from the distribution of effects
geom_path(aes(group = level, color = NULL), data = df_ellipse,
linetype = "dashed", color = "grey40") +
geom_point(data = df_gravity, size = 5) +
geom_point(size = 2) +
geom_path(aes(group = Subject, color = NULL),
arrow = arrow(length = unit(.02, "npc"))) +
theme(legend.position = "bottom") +
ggtitle("Topographic map of regression parameters") +
xlab("Intercept estimate") +
ylab("Slope estimate") +
scale_color_brewer(palette = "Dark2") +theme_bw()

last_plot() +
coord_cartesian(
xlim = range(df_pulled$Intercept),
ylim = range(df_pulled$Slope_Days),
expand = TRUE) +theme_bw()

# Euclidean distance
contour_dist <- function(xs, ys, center_x, center_y) {
x_diff <- (center_x - xs) ^ 2
y_diff <- (center_y - ys) ^ 2
sqrt(x_diff + y_diff)
}
# Find the point to label in each ellipse.
df_label_locations <- df_ellipse %>%
group_by(level) %>%
filter(Intercept < quantile(Intercept, .25),
Slope_Days < quantile(Slope_Days, .25)) %>%
# Compute distance from center.
mutate(dist = contour_dist(Intercept, Slope_Days,
fixef(m)[1], fixef(m)[2])) %>%
# Keep smallest values.
top_n(-1, wt = dist) %>%
ungroup()
# Tweak the last plot one more time!
last_plot() +
geom_text(aes(label = level, color = NULL), data = df_label_locations,
nudge_x = .5, nudge_y = .8, size = 3.5, color = "grey40")
