ME 001

Example 1

### https://www.tjmahr.com/plotting-partial-pooling-in-mixed-effects-models/


  
library(lme4)
## Warning: package 'lme4' was built under R version 3.6.1
## Loading required package: Matrix
#> Loading required package: Matrix
#> Loading required package: methods
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.6.2
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tibble)
## Warning: package 'tibble' was built under R version 3.6.2
# Convert to tibble for better printing. Convert factors to strings
sleepstudy <- sleepstudy %>% 
  as_tibble() %>% 
  mutate(Subject = as.character(Subject))

# Add two fake participants
df_sleep <- bind_rows(
  sleepstudy,
  data_frame(Reaction = c(286, 288), Days = 0:1, Subject = "374"),
  data_frame(Reaction = 245, Days = 0, Subject = "373"))
## Warning: `data_frame()` is deprecated, use `tibble()`.
## This warning is displayed once per session.
df_sleep
## # A tibble: 183 x 3
##    Reaction  Days Subject
##       <dbl> <dbl> <chr>  
##  1     250.     0 308    
##  2     259.     1 308    
##  3     251.     2 308    
##  4     321.     3 308    
##  5     357.     4 308    
##  6     415.     5 308    
##  7     382.     6 308    
##  8     290.     7 308    
##  9     431.     8 308    
## 10     466.     9 308    
## # … with 173 more rows
summary(df_sleep)
##     Reaction          Days         Subject         
##  Min.   :194.3   Min.   :0.000   Length:183        
##  1st Qu.:255.2   1st Qu.:2.000   Class :character  
##  Median :287.7   Median :4.000   Mode  :character  
##  Mean   :298.1   Mean   :4.432                     
##  3rd Qu.:336.0   3rd Qu.:7.000                     
##  Max.   :466.4   Max.   :9.000
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.6.2
xlab <- "Days of sleep deprivation"
ylab <- "Average reaction time (ms)"

ggplot(df_sleep) + 
  aes(x = Days, y = Reaction) + 
  stat_smooth(method = "lm", se = FALSE) +
  # Put the points on top of lines
  geom_point() +
  facet_wrap("Subject") +
  labs(x = xlab, y = ylab) + 
  # We also need to help the x-axis, so it doesn't 
  # create gridlines/ticks on 2.5 days
  scale_x_continuous(breaks = 0:4 * 2)+theme_bw()

df_no_pooling <- lmList(Reaction ~ Days | Subject, df_sleep) %>% 
  coef() %>% 
  # Subject IDs are stored as row-names. Make them an explicit column
  rownames_to_column("Subject") %>% 
  rename(Intercept = `(Intercept)`, Slope_Days = Days) %>% 
  add_column(Model = "No pooling") %>% 
  # Remove the participant who only had one data-point
  filter(Subject != "373")
## Warning: Unknown or uninitialised column: '(weights)'.
## Warning: Unknown or uninitialised column: '(offset)'.
## Warning: Unknown or uninitialised column: '(weights)'.
## Warning: Unknown or uninitialised column: '(offset)'.
## Warning: Unknown or uninitialised column: '(weights)'.
## Warning: Unknown or uninitialised column: '(offset)'.
## Warning: Unknown or uninitialised column: '(weights)'.
## Warning: Unknown or uninitialised column: '(offset)'.
## Warning: Unknown or uninitialised column: '(weights)'.
## Warning: Unknown or uninitialised column: '(offset)'.
## Warning: Unknown or uninitialised column: '(weights)'.
## Warning: Unknown or uninitialised column: '(offset)'.
## Warning: Unknown or uninitialised column: '(weights)'.
## Warning: Unknown or uninitialised column: '(offset)'.
## Warning: Unknown or uninitialised column: '(weights)'.
## Warning: Unknown or uninitialised column: '(offset)'.
## Warning: Unknown or uninitialised column: '(weights)'.
## Warning: Unknown or uninitialised column: '(offset)'.
## Warning: Unknown or uninitialised column: '(weights)'.
## Warning: Unknown or uninitialised column: '(offset)'.
## Warning: Unknown or uninitialised column: '(weights)'.
## Warning: Unknown or uninitialised column: '(offset)'.
## Warning: Unknown or uninitialised column: '(weights)'.
## Warning: Unknown or uninitialised column: '(offset)'.
## Warning: Unknown or uninitialised column: '(weights)'.
## Warning: Unknown or uninitialised column: '(offset)'.
## Warning: Unknown or uninitialised column: '(weights)'.
## Warning: Unknown or uninitialised column: '(offset)'.
## Warning: Unknown or uninitialised column: '(weights)'.
## Warning: Unknown or uninitialised column: '(offset)'.
## Warning: Unknown or uninitialised column: '(weights)'.
## Warning: Unknown or uninitialised column: '(offset)'.
## Warning: Unknown or uninitialised column: '(weights)'.
## Warning: Unknown or uninitialised column: '(offset)'.
## Warning: Unknown or uninitialised column: '(weights)'.
## Warning: Unknown or uninitialised column: '(offset)'.
## Warning: Unknown or uninitialised column: '(weights)'.
## Warning: Unknown or uninitialised column: '(offset)'.
## Warning: Unknown or uninitialised column: '(weights)'.
## Warning: Unknown or uninitialised column: '(offset)'.
head(df_no_pooling)
##   Subject Intercept Slope_Days      Model
## 1     308  244.1927  21.764702 No pooling
## 2     309  205.0549   2.261785 No pooling
## 3     310  203.4842   6.114899 No pooling
## 4     330  289.6851   3.008073 No pooling
## 5     331  285.7390   5.266019 No pooling
## 6     332  264.2516   9.566768 No pooling
# Fit a model on all the data pooled together
m_pooled <- lm(Reaction ~ Days, df_sleep) 

# Repeat the intercept and slope terms for each participant
df_pooled <- data_frame(
  Model = "Complete pooling",
  Subject = unique(df_sleep$Subject),
  Intercept = coef(m_pooled)[1], 
  Slope_Days = coef(m_pooled)[2])

head(df_pooled)
## # A tibble: 6 x 4
##   Model            Subject Intercept Slope_Days
##   <chr>            <chr>       <dbl>      <dbl>
## 1 Complete pooling 308          252.       10.3
## 2 Complete pooling 309          252.       10.3
## 3 Complete pooling 310          252.       10.3
## 4 Complete pooling 330          252.       10.3
## 5 Complete pooling 331          252.       10.3
## 6 Complete pooling 332          252.       10.3
# Join the raw data so we can use plot the points and the lines.
df_models <- bind_rows(df_pooled, df_no_pooling) %>% 
  left_join(df_sleep, by = "Subject")

p_model_comparison <- ggplot(df_models) + 
  aes(x = Days, y = Reaction) + 
  # Set the color mapping in this layer so the points don't get a color
  geom_abline(aes(intercept = Intercept, slope = Slope_Days, color = Model),
              size = .75) + 
  geom_point() +
  facet_wrap("Subject") + theme_bw()+
  labs(x = xlab, y = ylab) + 
  scale_x_continuous(breaks = 0:4 * 2) + 
  # Fix the color palette 
  scale_color_brewer(palette = "Dark2") + 
  theme(legend.position = "bottom")

p_model_comparison

m <- lmer(Reaction ~ 1 + Days + (1 + Days | Subject), df_sleep)
arm::display(m)
## lmer(formula = Reaction ~ 1 + Days + (1 + Days | Subject), data = df_sleep)
##             coef.est coef.se
## (Intercept) 252.54     6.43 
## Days         10.45     1.54 
## 
## Error terms:
##  Groups   Name        Std.Dev. Corr 
##  Subject  (Intercept) 24.14         
##           Days         5.92    0.07 
##  Residual             25.48         
## ---
## number of obs: 183, groups: Subject, 20
## AIC = 1783.4, DIC = 1787.8
## deviance = 1779.6
# Make a dataframe with the fitted effects
df_partial_pooling <- coef(m)[["Subject"]] %>% 
  rownames_to_column("Subject") %>% 
  as_tibble() %>% 
  rename(Intercept = `(Intercept)`, Slope_Days = Days) %>% 
  add_column(Model = "Partial pooling")

head(df_partial_pooling)
## # A tibble: 6 x 4
##   Subject Intercept Slope_Days Model          
##   <chr>       <dbl>      <dbl> <chr>          
## 1 308          254.      19.6  Partial pooling
## 2 309          212.       1.73 Partial pooling
## 3 310          213.       4.91 Partial pooling
## 4 330          275.       5.64 Partial pooling
## 5 331          274.       7.39 Partial pooling
## 6 332          261.      10.2  Partial pooling
df_models <- bind_rows(df_pooled, df_no_pooling, df_partial_pooling) %>% 
  left_join(df_sleep, by = "Subject")

# Replace the data-set of the last plot
p_model_comparison %+% df_models

df_zoom <- df_models %>% 
  filter(Subject %in% c("335", "350", "373", "374"))

p_model_comparison %+% df_zoom

# Also visualize the point for the fixed effects
df_fixef <- data_frame(
  Model = "Partial pooling (average)",
  Intercept = fixef(m)[1],
  Slope_Days = fixef(m)[2])

# Complete pooling / fixed effects are center of gravity in the plot
df_gravity <- df_pooled %>% 
  distinct(Model, Intercept, Slope_Days) %>% 
  bind_rows(df_fixef)
df_gravity
## # A tibble: 2 x 3
##   Model                     Intercept Slope_Days
##   <chr>                         <dbl>      <dbl>
## 1 Complete pooling               252.       10.3
## 2 Partial pooling (average)      253.       10.5
#> # A tibble: 2 x 3
#>                       Model Intercept Slope_Days
#>                       <chr>     <dbl>      <dbl>
#> 1          Complete pooling  252.3207   10.32766
#> 2 Partial pooling (average)  252.5426   10.45212

df_pulled <- bind_rows(df_no_pooling, df_partial_pooling)

ggplot(df_pulled) + 
  aes(x = Intercept, y = Slope_Days, color = Model) + 
  geom_point(size = 2) + 
  geom_point(data = df_gravity, size = 5) + 
  # Draw an arrow connecting the observations between models
  geom_path(aes(group = Subject, color = NULL), 
            arrow = arrow(length = unit(.02, "npc"))) + 
  # Use ggrepel to jitter the labels away from the points
  ggrepel::geom_text_repel(
    aes(label = Subject, color = NULL), 
    data = df_no_pooling) + 
  # Don't forget 373
  ggrepel::geom_text_repel(
    aes(label = Subject, color = NULL), 
    data = filter(df_partial_pooling, Subject == "373")) + 
  theme(legend.position = "bottom") + 
  ggtitle("Pooling of regression parameters") + 
  xlab("Intercept estimate") + 
  ylab("Slope estimate") + 
  scale_color_brewer(palette = "Dark2") 

# Extract the matrix
cov_mat <- VarCorr(m)[["Subject"]]

# Strip off some details so that just the useful part is printed
attr(cov_mat, "stddev") <- NULL
attr(cov_mat, "correlation") <- NULL
cov_mat
##             (Intercept)     Days
## (Intercept)   582.72951  9.89868
## Days            9.89868 35.03280
library(ellipse)
## Warning: package 'ellipse' was built under R version 3.6.2
## 
## Attaching package: 'ellipse'
## The following object is masked from 'package:graphics':
## 
##     pairs
# Helper function to make a data-frame of ellipse points that 
# includes the level as a column
make_ellipse <- function(cov_mat, center, level) {
  ellipse(cov_mat, centre = center, level = level) %>%
    as.data.frame() %>%
    add_column(level = level) %>% 
    as_tibble()
}

center <- fixef(m)
levels <- c(.1, .3, .5, .7, .9)

# Create an ellipse dataframe for each of the levels defined 
# above and combine them
df_ellipse <- levels %>%
  purrr::map_df(~ make_ellipse(cov_mat, center, level = .x)) %>% 
  rename(Intercept = `(Intercept)`, Slope_Days = Days)

df_ellipse
## # A tibble: 500 x 3
##    Intercept Slope_Days level
##        <dbl>      <dbl> <dbl>
##  1      261.       12.4   0.1
##  2      260.       12.6   0.1
##  3      260.       12.7   0.1
##  4      259.       12.8   0.1
##  5      258.       12.8   0.1
##  6      258.       12.9   0.1
##  7      257.       13.0   0.1
##  8      257.       13.0   0.1
##  9      256.       13.1   0.1
## 10      255.       13.1   0.1
## # … with 490 more rows
ggplot(df_pulled) + 
  aes(x = Intercept, y = Slope_Days, color = Model) + 
  # Draw contour lines from the distribution of effects
  geom_path(aes(group = level, color = NULL), data = df_ellipse, 
            linetype = "dashed", color = "grey40") + 
  geom_point(data = df_gravity, size = 5) + 
  geom_point(size = 2) + 
  geom_path(aes(group = Subject, color = NULL), 
            arrow = arrow(length = unit(.02, "npc"))) + 
  theme(legend.position = "bottom") + 
  ggtitle("Topographic map of regression parameters") + 
  xlab("Intercept estimate") + 
  ylab("Slope estimate") + 
  scale_color_brewer(palette = "Dark2") +theme_bw()

last_plot() +
  coord_cartesian(
    xlim = range(df_pulled$Intercept), 
    ylim = range(df_pulled$Slope_Days),
    expand = TRUE) +theme_bw()

# Euclidean distance
contour_dist <- function(xs, ys, center_x, center_y) {
  x_diff <- (center_x - xs) ^ 2
  y_diff <- (center_y - ys) ^ 2
  sqrt(x_diff + y_diff)
}

# Find the point to label in each ellipse.
df_label_locations <- df_ellipse %>% 
  group_by(level) %>%
  filter(Intercept < quantile(Intercept, .25), 
         Slope_Days < quantile(Slope_Days, .25)) %>% 
  # Compute distance from center.
  mutate(dist = contour_dist(Intercept, Slope_Days, 
                             fixef(m)[1], fixef(m)[2])) %>% 
  # Keep smallest values.
  top_n(-1, wt = dist) %>% 
  ungroup()

# Tweak the last plot one more time!
last_plot() +
  geom_text(aes(label = level, color = NULL), data = df_label_locations, 
            nudge_x = .5, nudge_y = .8, size = 3.5, color = "grey40")

2019-12-29