Camille Gasser - R code sample

About this file

The following data & analyses come from a longitudinal survey study with N = 41 participants. Broadly, the goal of this study was to understand how novelty in everyday life affects memory formation.

For 2 weeks, participants completed “daily diary” surveys at the end of each day. These diaries asked a variety of questions about what each person experienced on that day, including information about any novel experiences they’d engaged in (e.g., interacting with somebody new, going somewhere new). The diaries also asked participants to describe in detail 3 specific events that had happened that day. Several weeks later, participants were tested on their memory for the events they’d reported. We then looked at whether participants’ memory performance differed as a function of the novelty they experienced (both during the event itself and throughout the day).

Note that data was collected in 2 waves:

Wave A: 11/23/21 - 12/27/21
Wave B: 3/22/22 - 5/3/22

The manuscript reporting this project can be found in Chapter 3 of my dissertation @ this link.

0. Setup

#### load libraries

library(Rmisc)
library(tidyverse)
library(lme4)
library(lmerTest)
library(brms)
library(sjPlot)
library(emmeans)
library(see)
library(rstatix)
library(effsize)
library(parameters)

#### define plotting theme + helper functions

plot_theme <- theme_light() + 
  theme(panel.grid = element_blank(),
        text = element_text(size = 14,),
        legend.position = 'none',
        strip.background = element_rect(fill = 'white'),
        strip.text.x = element_text(color = 'black'))
theme_set(plot_theme)

source('https://raw.githubusercontent.com/datavizpyr/data/master/half_flat_violinplot.R')

# function that computes (between-subjects) standard error of the mean
sem <- function(x) sd(x)/sqrt(length(x)) 

# function for nicely formatting model output (for easy copy-paste into manuscript)
lmer_output <- function(mdl) {
  coef <- rownames_to_column(as.data.frame(summary(mdl)$coefficients, make.names = F), 'Parameter')
  ci <- ci_satterthwaite(mdl)
  comb <- full_join(coef, ci, by = 'Parameter')
  for (n in 1:nrow(comb)) {
    p = comb[n, 'Pr(>|t|)']
    p_str = ifelse(p < .001, 'p < .001', sprintf('p = %.3f', p))
    print(sprintf('%s: b = %.3f, SE = %.3f, CI = [%.3f, %.3f], %s',
                  comb[n, 'Parameter'], comb[n, 'Estimate'], comb[n, 'Std. Error'],
                  comb[n, 'Std. Error'], comb[n, 'Std. Error'], p_str))
  }
}

1. Load data

# change path depending on where the datafile is stored
base_dir <- '/Users/camille/Documents/career/interviews/nyc - DOHMH'
setwd(base_dir)

all_data <- read.csv('diary_data.csv', stringsAsFactors = F)
survey_data <- all_data %>%
  filter(!(is.na(test_type))) # filter out events that weren't included in the memory test

2. Data exploration

Pull out & plot all survey variables related to novelty.

Event novelty

The events participants recorded were labeled as one of three categories:

routine (something done almost every day)
periodic (something done occasionally)
new (something never done before)

# get proportion of events per participant that were routine v. periodic v. new
event_novelty <- survey_data %>%
  group_by(subject_id, group) %>%
  summarise(prop_routine = sum(freq == 'routine') / length(freq),
            prop_periodic = sum(freq == 'periodic') / length(freq),
            prop_new = sum(freq == 'new') / length(freq)) %>%
  pivot_longer(cols = starts_with('prop_'), names_to = 'freq', names_prefix = 'prop_', values_to = 'proportion')

event_novelty_group <- event_novelty %>%
  group_by(freq) %>%
  summarise(mean_prop = mean(proportion),
            sd_prop = sd(proportion))
event_novelty_group$sem_prop <- summarySEwithin(data = event_novelty,
                                              measurevar = 'proportion',
                                              withinvars = 'freq',
                                              idvar = 'subject_id')$se # within-subject SEM

# plot
ggplot(data = event_novelty, aes(x = factor(freq, levels = c('routine', 'periodic', 'new')), y = proportion, fill = freq)) +
  geom_jitter(width = 0.15, height = 0, color = 'white', size = 2.5, pch = 21, alpha = 0.8) +
  geom_errorbar(data = event_novelty_group, width = 0.6, color = 'grey30', linewidth = 1,
                aes(x = freq, y = mean_prop, ymin = mean_prop, ymax = mean_prop)) +
  geom_errorbar(data = event_novelty_group, width = 0.08, color = 'grey30', linewidth = 1,
                aes(x = freq, y = mean_prop, ymin = mean_prop - sem_prop, ymax = mean_prop + sem_prop)) +
  ylim(0, 1) +
  labs(x = 'event regularity', y = 'proportion of events') +
  theme(legend.position = 'none') +
  scale_fill_manual(values = c('#DD5A71', '#E1A079', '#F1C68D')) +
  scale_color_manual(values = c('#DD5A71', '#E1A079', '#F1C68D'))

# check difference in distribution by participant group

# get event counts in each bin
event_novelty_n <- survey_data %>%
  group_by(group) %>%
  summarise(n_routine = sum(freq == 'routine'),
            n_periodic = sum(freq == 'periodic'),
            n_new = sum(freq == 'new')) %>%
  pivot_longer(cols = starts_with('n_'), names_to = 'frequency', names_prefix = 'n_', values_to = 'count')

# run chi-squared test
freq_tab <- as.table(matrix(c(subset(event_novelty_n, group == 'A')$count, subset(event_novelty_n, group == 'B')$count), nrow = 3))
chisq.test(freq_tab)

## 
##  Pearson's Chi-squared test
## 
## data:  freq_tab
## X-squared = 50.5, df = 2, p-value = 1.082e-11

Novel locations visited

On each day, participants indicated whether they’d visited anywhere new.

# get proportion of days per participant where novel locations were v. weren't visited
novel_loc <- survey_data %>%
  group_by(subject_id, group) %>%
  summarise(prop_novel = sum(day_novelty_newLocation_bin) / length(subject_id),
            prop_notnovel = sum(day_novelty_newLocation_bin == 0) / length(subject_id)) %>%
  pivot_longer(cols = starts_with('prop_'), names_to = 'novel_cat',
               names_prefix = 'prop_', values_to = 'proportion')

group_novel_loc <- novel_loc %>%
  group_by(novel_cat) %>%
  summarise(mean_prop = mean(proportion), sd_prop = sd(proportion))

group_novel_loc$sem_prop <- summarySEwithin(data = novel_loc, measurevar = 'proportion',
                                            withinvars = 'novel_cat', idvar = 'subject_id')$se

# plot
ggplot(data = novel_loc, aes(x = novel_cat, y = proportion, fill = novel_cat)) +
  geom_jitter(width = 0.19, height = 0, color = 'white', size = 3.5, pch = 21, alpha = 0.8) +
  geom_errorbar(data = group_novel_loc, width = 0.6, color = 'grey30', linewidth = 1,
                aes(x = novel_cat, y = mean_prop, ymin = mean_prop, ymax = mean_prop)) +
  geom_errorbar(data = group_novel_loc, width = 0.08, color = 'grey30', linewidth = 1,
                aes(x = novel_cat, y = mean_prop, ymin = mean_prop - sem_prop, ymax = mean_prop + sem_prop)) +
  ylim(0, 1) +
  scale_x_discrete(labels = c('no', 'yes')) +
  labs(x = 'visited novel location?', y = 'proportion of days') +
  scale_fill_manual(values = c('#E68770', '#DD5A71')) +
  scale_color_manual(values = c('#E68770', '#DD5A71'))

# check for differences between two data collection waves

t.test(filter(novel_loc, group == 'A', novel_cat == 'novel')$proportion,
       filter(novel_loc, group == 'B', novel_cat == 'novel')$proportion, paired = F)

## 
##  Welch Two Sample t-test
## 
## data:  filter(novel_loc, group == "A", novel_cat == "novel")$proportion and filter(novel_loc, group == "B", novel_cat == "novel")$proportion
## t = -0.17619, df = 28.847, p-value = 0.8614
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.1379737  0.1160913
## sample estimates:
## mean of x mean of y 
## 0.2869067 0.2978480

# wilcox.test(filter(novel_loc, group == 'A', novel_cat == 'novel')$proportion,
#             filter(novel_loc, group == 'B', novel_cat == 'novel')$proportion, paired = F)
effsize::cohen.d(proportion ~ group, data = filter(novel_loc, novel_cat == 'novel'), paired = F)

## 
## Cohen's d
## 
## d estimate: -0.05744993 (negligible)
## 95 percent confidence interval:
##      lower      upper 
## -0.7134022  0.5985024

Novel social interactions

On each day, participants indicated whether they’d interacted with any new people.

# get proportion of days where participants did v. didn't have a novel social interaction
novel_social <- survey_data %>%
  group_by(subject_id, group) %>%
  summarise(prop_novel = sum(day_novelty_newSocial_interact_bin) / length(subject_id),
            prop_notnovel = sum(day_novelty_newSocial_interact_bin == 0) / length(subject_id)) %>%
  pivot_longer(cols = starts_with('prop_'), names_to = 'novel_cat', names_prefix = 'prop_', values_to = 'proportion')

group_novel_social <- novel_social %>%
  group_by(novel_cat) %>%
  summarise(mean_prop = mean(proportion), sd_prop = sd(proportion))
group_novel_social$sem_prop <- summarySEwithin(data = novel_social, measurevar = 'proportion',
                                               withinvars = 'novel_cat', idvar = 'subject_id')$se

# plot
ggplot(data = novel_social, aes(x = novel_cat, y = proportion, fill = novel_cat)) +
  geom_jitter(width = 0.19, height = 0, color = 'white', size = 3.5, pch = 21, alpha = 0.8) +
  geom_errorbar(data = group_novel_social, width = 0.6, color = 'grey30', linewidth = 1,
                aes(x = novel_cat, y = mean_prop, ymin = mean_prop, ymax = mean_prop)) +
  geom_errorbar(data = group_novel_social, width = 0.08, color = 'grey30', linewidth = 1,
                aes(x = novel_cat, y = mean_prop, ymin = mean_prop - sem_prop, ymax = mean_prop + sem_prop)) +
  ylim(0, 1) +
  scale_x_discrete(labels = c('no', 'yes')) +
  labs(x = 'novel social interaction?', y = 'proportion of days') +
  scale_fill_manual(values = c('#E68770', '#DD5A71')) +
  scale_color_manual(values = c('#E68770', '#DD5A71'))

# check for differences between two data collection waves
t.test(filter(novel_social, group == 'A', novel_cat == 'novel')$proportion,
       filter(novel_social, group == 'B', novel_cat == 'novel')$proportion, paired = F)

## 
##  Welch Two Sample t-test
## 
## data:  filter(novel_social, group == "A", novel_cat == "novel")$proportion and filter(novel_social, group == "B", novel_cat == "novel")$proportion
## t = -0.62463, df = 31.484, p-value = 0.5367
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.2025531  0.1075269
## sample estimates:
## mean of x mean of y 
## 0.3172795 0.3647926

# wilcox.test(filter(novel_social, group == 'A', novel_cat == 'novel')$proportion,
#             filter(novel_social, group == 'B', novel_cat == 'novel')$proportion, paired = F)
effsize::cohen.d(proportion ~ group, data = filter(novel_social, novel_cat == 'novel'), paired = F)

## 
## Cohen's d
## 
## d estimate: -0.1977999 (negligible)
## 95 percent confidence interval:
##      lower      upper 
## -0.8551132  0.4595135

Ratings of day typicality

On each day, participants rated how typical their day was on a scale from 1 to 5.

# plot
ggplot(survey_data, aes(x = similarity_typical, fill = after_stat(x), color = after_stat(x))) +
  geom_histogram(bins = 5, aes(y = after_stat(density)), alpha = 0.7) +
  labs(x = 'day typicality rating', y = 'proportion of days') +
  scale_fill_gradient(high = '#F1C68D', low = '#DD5A71') +
  scale_color_gradient(high = '#F1C68D', low = '#DD5A71')

# check for differences between two data collection waves
typic_sub <- survey_data %>%
  group_by(subject_id, group) %>%
  summarise(mean_typic = mean(similarity_typical))

t.test(filter(typic_sub, group == 'A')$mean_typic, filter(typic_sub, group == 'B')$mean_typic, paired = F)

## 
##  Welch Two Sample t-test
## 
## data:  filter(typic_sub, group == "A")$mean_typic and filter(typic_sub, group == "B")$mean_typic
## t = -1.354, df = 38.975, p-value = 0.1835
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.45511485  0.09012837
## sample estimates:
## mean of x mean of y 
##  2.316392  2.498885

effsize::cohen.d(mean_typic ~ group, data = typic_sub, paired = F)

## 
## Cohen's d
## 
## d estimate: -0.3832661 (small)
## 95 percent confidence interval:
##      lower      upper 
## -1.0446569  0.2781247

Pull out & plot all memory performance variables.

During the memory test, we measured memory performance in two ways.

“memory vividness ratings” — participants were asked to report how vividly they remembered a given event on a scale from 1 to 5
“event recall” — participants were asked to write as much as they could remember about a given event; from these descriptions, we then extracted the number of unique details that each recall contained

Details recalled

# plot distribution
ggplot(subset(survey_data, test_type == 'recall'),
       aes(x = n_episodic_details_recall, color = after_stat(x), fill = after_stat(x))) +
  geom_histogram(binwidth = 1, alpha = 0.8) +
  labs(x = 'number of details recalled\n(per event)', y = 'event count') +
  #ylim(0, 500) +
  scale_colour_gradient(low = '#6788D4', high = '#804382') +
  scale_fill_gradient(low = '#6788D4', high = '#804382')

# look at details recalled as a function of the delay between the day of the event & the memory test
details_delay <- survey_data %>%
  filter(test_type == 'recall') %>%
  select(subject_id, group, start_date, test_date, n_episodic_details_recall) %>%
  mutate(test_delay = as.numeric(ymd(test_date) - ymd(start_date)),
         test_delay.c = as.numeric(scale(test_delay, center = T, scale = F)))

mdl <- lmer(n_episodic_details_recall ~ test_delay + (1 | subject_id), data = details_delay)
summary(mdl)

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: n_episodic_details_recall ~ test_delay + (1 | subject_id)
##    Data: details_delay
## 
## REML criterion at convergence: 2605
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.6771 -0.6315 -0.0663  0.4843  4.8071 
## 
## Random effects:
##  Groups     Name        Variance Std.Dev.
##  subject_id (Intercept) 3.757    1.938   
##  Residual               4.892    2.212   
## Number of obs: 565, groups:  subject_id, 41
## 
## Fixed effects:
##              Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept)   6.64661    0.60380 337.29454  11.008   <2e-16 ***
## test_delay   -0.03421    0.02269 531.07946  -1.508    0.132    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##            (Intr)
## test_delay -0.851

lmer_output(mdl)

## [1] "(Intercept): b = 6.647, SE = 0.604, CI = [0.604, 0.604], p < .001"
## [1] "test_delay: b = -0.034, SE = 0.023, CI = [0.023, 0.023], p = 0.132"

# plot
plot_model(mdl, type = 'pred', terms = 'test_delay', color = '#6788D4', alpha = 0.3, line.size = 1, title = '') +
  geom_jitter(data = details_delay, aes(x = test_delay, y = n_episodic_details_recall), width = 0, height = 0.15,
              alpha = 0.1, size = 1.5, color = 'grey50') +
  labs(x = 'encoding-test delay (in days)', y = 'number of details recalled')

Memory vividness ratings

# plot distribution
ggplot(survey_data, aes(x = test_vividness, color = after_stat(x), fill = after_stat(x))) +
  geom_histogram(binwidth = 1, alpha = 0.8) +
  labs(x = 'memory vividness rating', y = 'event count') +
  scale_colour_gradient(low = '#6788D4', high = '#804382') +
  scale_fill_gradient(low = '#6788D4', high = '#804382')

# look at vividness ratings as a function of the delay between the day of the event & the memory test
vividness_delay <- survey_data %>%
  select(subject_id, group, start_date, test_date, test_vividness) %>%
  mutate(test_delay = as.numeric(ymd(test_date) - ymd(start_date)))

vividness_delay %>%
  summarise(mean_delay = mean(test_delay),
            sd_delay = sd(test_delay))

##   mean_delay sd_delay
## 1   22.63107 4.238753

mdl <- lmer(test_vividness ~ test_delay + (1 | subject_id), data = vividness_delay)
summary(mdl)

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: test_vividness ~ test_delay + (1 | subject_id)
##    Data: vividness_delay
## 
## REML criterion at convergence: 5561.1
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.40954 -0.78842  0.00264  0.81136  2.35731 
## 
## Random effects:
##  Groups     Name        Variance Std.Dev.
##  subject_id (Intercept) 0.2759   0.5253  
##  Residual               1.6159   1.2712  
## Number of obs: 1648, groups:  subject_id, 41
## 
## Fixed effects:
##               Estimate Std. Error         df t value Pr(>|t|)    
## (Intercept)  3.455e+00  1.945e-01  6.602e+02  17.768  < 2e-16 ***
## test_delay  -2.427e-02  7.669e-03  1.634e+03  -3.164  0.00158 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##            (Intr)
## test_delay -0.892

lmer_output(mdl)

## [1] "(Intercept): b = 3.455, SE = 0.194, CI = [0.194, 0.194], p < .001"
## [1] "test_delay: b = -0.024, SE = 0.008, CI = [0.008, 0.008], p = 0.002"

plot_model(mdl, type = 'pred', terms = 'test_delay', color = '#6788D4', alpha = 0.3, line.size = 1, title = '') +
  geom_jitter(data = vividness_delay, aes(x = test_delay, y = test_vividness), width = 0.1, height = 0.15,
              alpha = 0.1, size = 1.5, color = 'grey50') +
  scale_y_continuous(breaks = 1:5) +
  labs(x = 'encoding-test delay (in days)', y = 'memory vividness rating')

Association between episodic details & memory vividnes

# look at how the two measures of memory performance (vividness & number of details recalled) relate to each other
recall_sub <- filter(survey_data, test_type == 'recall')

mdl <- lmer(test_vividness ~ n_episodic_details_recall + (1 | subject_id), data = recall_sub)
summary(mdl)

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: test_vividness ~ n_episodic_details_recall + (1 | subject_id)
##    Data: recall_sub
## 
## REML criterion at convergence: 1827.6
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.57075 -0.78660 -0.01065  0.65421  2.87303 
## 
## Random effects:
##  Groups     Name        Variance Std.Dev.
##  subject_id (Intercept) 0.1947   0.4413  
##  Residual               1.3569   1.1649  
## Number of obs: 565, groups:  subject_id, 41
## 
## Fixed effects:
##                            Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept)                 1.70213    0.14565 154.00474   11.69   <2e-16 ***
## n_episodic_details_recall   0.21963    0.02019 409.58921   10.88   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## n_psdc_dtl_ -0.814

plot_model(mdl, type = 'pred', terms = 'n_episodic_details_recall', color = '#625EA5',
           alpha = 0.3, line.size = 1, title = '') +
  geom_jitter(data = recall_sub, aes(x = n_episodic_details_recall, y = test_vividness),
              width = 0.1, height = 0.15, alpha = 0.15, size = 1.5, color = 'grey50') +
  scale_y_continuous(breaks = 1:5) +
  labs(x = 'number of details recalled', y = 'memory vividness rating')

3. Main analyses

Association between event regularity & memory vividness

Result: New events are remembered more vividly than periodic events, which are remembered more vividly than routine events

# run model
mdl <- lmer(test_vividness ~ freq + (freq | subject_id), data = survey_data)
summary(mdl)

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: test_vividness ~ freq + (freq | subject_id)
##    Data: survey_data
## 
## REML criterion at convergence: 5335.2
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.83992 -0.74290  0.03798  0.75328  2.77888 
## 
## Random effects:
##  Groups     Name         Variance Std.Dev. Corr       
##  subject_id (Intercept)  0.3062   0.5533              
##             freqperiodic 0.1523   0.3902   -0.45      
##             freqroutine  0.2981   0.5460   -0.74  0.84
##  Residual                1.3896   1.1788              
## Number of obs: 1648, groups:  subject_id, 41
## 
## Fixed effects:
##              Estimate Std. Error       df t value Pr(>|t|)    
## (Intercept)   3.66444    0.11026 39.23251  33.234  < 2e-16 ***
## freqperiodic -0.80125    0.09906 42.88533  -8.089 3.62e-10 ***
## freqroutine  -1.62155    0.13526 34.07322 -11.988 8.96e-14 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) frqprd
## freqperiodc -0.648       
## freqroutine -0.676  0.669

# extract estimated marginal means for plotting
em <- emmeans(mdl, pairwise ~ freq, lmer.df = 'satterthwaite')
ci <- confint(em)
em.con <- as.data.frame(em$contrasts)
em.ci <- as.data.frame(ci$contrasts)
em.all <- cbind(em.ci, em.con$p.value)
  
em_df <- as.data.frame(em$emmeans)
ggplot(data = survey_data, aes(x = freq, y = test_vividness, fill = freq, color = freq)) +
  geom_jitter(height = 0.2, width = 0.2, alpha = 0.2, size = 1, color = 'grey') +
  geom_point(data = em_df, aes(x = freq, y = emmean), size = 4) +
  geom_errorbar(data = em_df, aes(x = freq, y = emmean, ymin = lower.CL, ymax = upper.CL),
                width = 0, linewidth = 1.5) +
  scale_fill_manual(values = c('#DD5A71', '#E68770', '#F1C68D')) +
  scale_color_manual(values = c('#DD5A71', '#E68770', '#F1C68D')) +
  labs(x = 'event regularity', y = 'memory vividness')

Association between number of details recalled & memory vividness

Result: New events are remembered in more detail than routine events.

recall_sub <- filter(survey_data, test_type == 'recall')

# run model
mdl <- lmer(n_episodic_details_recall ~ freq + (freq | subject_id), data = recall_sub)

# extract estimated marginal means for plotting
em <- emmeans(mdl, pairwise ~ freq, lmer.df = 'satterthwaite')
ci <- confint(em)
em.con <- as.data.frame(em$contrasts)
em.ci <- as.data.frame(ci$contrasts)
em.all <- cbind(em.ci, em.con$p.value)
em.all %>% mutate_if(is.numeric, round, digits = 3)

##             contrast estimate    SE     df lower.CL upper.CL em.con$p.value
## 1     new - periodic    0.669 0.321 33.296   -0.118    1.456          0.108
## 2      new - routine    1.641 0.376 37.885    0.724    2.558          0.000
## 3 periodic - routine    0.972 0.283 18.998    0.252    1.692          0.008

em_df <- as.data.frame(em$emmeans)
ggplot(data = recall_sub, aes(x = freq, y = n_episodic_details_recall, fill = freq, color = freq)) +
  geom_violinhalf(trim = F, alpha = 0.5, position = position_nudge(0.05)) +
  geom_point(data = em_df, aes(x = freq, y = emmean), size = 2, position = position_nudge(-0.05)) +
  geom_errorbar(data = em_df, aes(x = freq, y = emmean, ymin = lower.CL, ymax = upper.CL),
                width = 0, linewidth = 1, position = position_nudge(-0.05)) +
  scale_fill_manual(values = c('#DD5A71', '#E68770', '#F1C68D')) +
  scale_color_manual(values = c('#DD5A71', '#E68770', '#F1C68D')) +
  labs(x = 'event regularity', y = 'number of details recalled')

# run model controlling for details recorded at the time of the diary
mdl <- lmer(n_episodic_details_recall ~ freq + n_episodic_details_survey + (1 | subject_id), data = recall_sub)
summary(mdl)

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: n_episodic_details_recall ~ freq + n_episodic_details_survey +  
##     (1 | subject_id)
##    Data: recall_sub
## 
## REML criterion at convergence: 2564.3
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.3853 -0.6196 -0.0834  0.5139  4.0118 
## 
## Random effects:
##  Groups     Name        Variance Std.Dev.
##  subject_id (Intercept) 2.756    1.660   
##  Residual               4.636    2.153   
## Number of obs: 565, groups:  subject_id, 41
## 
## Fixed effects:
##                            Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept)                 5.44581    0.43184 161.64544  12.611  < 2e-16 ***
## freqperiodic               -0.68902    0.24186 534.61781  -2.849  0.00456 ** 
## freqroutine                -1.62901    0.32430 539.63258  -5.023 6.92e-07 ***
## n_episodic_details_survey   0.15660    0.03686 485.27761   4.248 2.58e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) frqprd frqrtn
## freqperiodc -0.452              
## freqroutine -0.369  0.579       
## n_psdc_dtl_ -0.640  0.052  0.069

# run Bayesian model (with random slopes) for comparison
overwrite <- F
mdl_fn <- 'recalldetails_by_event_regularity_ctrl.Rdata'

if (!file.exists(mdl_fn) | overwrite)  {
  bmdl <- brm(n_episodic_details_recall ~ freq + n_episodic_details_survey + (freq + n_episodic_details_survey | subject_id),
              data = recall_sub, iter = 30000, seed = 123, control = list(adapt_delta = 0.99))
  save(bmdl, file = mdl_fn)
} else { load(mdl_fn) }

print(bmdl, digits = 3)

##  Family: gaussian 
##   Links: mu = identity; sigma = identity 
## Formula: n_episodic_details_recall ~ freq + n_episodic_details_survey + (freq + n_episodic_details_survey | subject_id) 
##    Data: recall_sub (Number of observations: 565) 
##   Draws: 4 chains, each with iter = 30000; warmup = 15000; thin = 1;
##          total post-warmup draws = 60000
## 
## Group-Level Effects: 
## ~subject_id (Number of levels: 41) 
##                                             Estimate Est.Error l-95% CI
## sd(Intercept)                                  1.987     0.447    1.180
## sd(freqperiodic)                               0.763     0.390    0.056
## sd(freqroutine)                                0.596     0.404    0.027
## sd(n_episodic_details_survey)                  0.128     0.059    0.015
## cor(Intercept,freqperiodic)                   -0.359     0.324   -0.839
## cor(Intercept,freqroutine)                    -0.274     0.398   -0.883
## cor(freqperiodic,freqroutine)                  0.225     0.438   -0.703
## cor(Intercept,n_episodic_details_survey)      -0.216     0.363   -0.749
## cor(freqperiodic,n_episodic_details_survey)   -0.038     0.404   -0.772
## cor(freqroutine,n_episodic_details_survey)    -0.037     0.436   -0.818
##                                             u-95% CI  Rhat Bulk_ESS Tail_ESS
## sd(Intercept)                                  2.939 1.000    20008    32572
## sd(freqperiodic)                               1.528 1.000    11085    18146
## sd(freqroutine)                                1.509 1.000    18807    28111
## sd(n_episodic_details_survey)                  0.246 1.000     7709    10839
## cor(Intercept,freqperiodic)                    0.434 1.000    33396    32016
## cor(Intercept,freqroutine)                     0.614 1.000    48421    43532
## cor(freqperiodic,freqroutine)                  0.888 1.000    34843    44887
## cor(Intercept,n_episodic_details_survey)       0.634 1.000    19693    24470
## cor(freqperiodic,n_episodic_details_survey)    0.741 1.000    17301    28363
## cor(freqroutine,n_episodic_details_survey)     0.777 1.000    13816    29332
## 
## Population-Level Effects: 
##                           Estimate Est.Error l-95% CI u-95% CI  Rhat Bulk_ESS
## Intercept                    5.355     0.481    4.417    6.310 1.000    38387
## freqperiodic                -0.636     0.281   -1.185   -0.081 1.000    59506
## freqroutine                 -1.613     0.354   -2.305   -0.915 1.000    66672
## n_episodic_details_survey    0.165     0.047    0.072    0.259 1.000    55299
##                           Tail_ESS
## Intercept                    42696
## freqperiodic                 45061
## freqroutine                  47241
## n_episodic_details_survey    41415
## 
## Family Specific Parameters: 
##       Estimate Est.Error l-95% CI u-95% CI  Rhat Bulk_ESS Tail_ESS
## sigma    2.100     0.071    1.965    2.244 1.000    37331    42542
## 
## Draws were sampled using sampling(NUTS). For each parameter, Bulk_ESS
## and Tail_ESS are effective sample size measures, and Rhat is the potential
## scale reduction factor on split chains (at convergence, Rhat = 1).

# extract posterior probabilities for each effect
# (i.e., the proportion of the posterior distribution for that term that falls above/below 0)
print(hypothesis(bmdl, 'freqperiodic < 0'), digits = 3)

## Hypothesis Tests for class b:
##           Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio Post.Prob
## 1 (freqperiodic) < 0   -0.636     0.281   -1.099   -0.173      78.26     0.987
##   Star
## 1    *
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.

print(hypothesis(bmdl, 'freqroutine < 0'), digits = 3)

## Hypothesis Tests for class b:
##          Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio Post.Prob
## 1 (freqroutine) < 0   -1.613     0.354    -2.19   -1.029      19999         1
##   Star
## 1    *
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.

print(hypothesis(bmdl, 'n_episodic_details_survey > 0'), digits = 3)

## Hypothesis Tests for class b:
##                 Hypothesis Estimate Est.Error CI.Lower CI.Upper Evid.Ratio
## 1 (n_episodic_detai... > 0    0.165     0.047    0.088    0.243   2067.966
##   Post.Prob Star
## 1         1    *
## ---
## 'CI': 90%-CI for one-sided and 95%-CI for two-sided hypotheses.
## '*': For one-sided hypotheses, the posterior probability exceeds 95%;
## for two-sided hypotheses, the value tested against lies outside the 95%-CI.
## Posterior probabilities of point hypotheses assume equal prior probabilities.

Evidence for a “novelty penumbra”

A “novelty penumbra” refers to the idea that the effect of novelty on memory extends across time, such that doing something new will boost memory for non-novel experiences that happen shortly before or after. To test this idea, we can ask whether memory for non-novel events differs based on whether or not it occurred on the same day as a new event.

Result: Vividness ratings were higher for non-novel events when they happened on the same day as a novel event.

# figure out days of novel events
novel_events <- survey_data %>%
  filter(freq == 'new') %>%
  select(subject_id, group, title, survey_day, test_vividness) %>%
  group_by(subject_id, group) %>%
  summarise(new_events = list(unique(survey_day)))

# figure out when a non-novel event comes from the same day as a novel one
check_day_same <- function(day, day_list) { day %in% day_list }
vividness_near_novel <- left_join(survey_data, novel_events, by = c('subject_id', 'group')) %>%
  filter(freq != 'new') %>%
  mutate(test_delay = as.numeric(ymd(test_date) - ymd(start_date))) %>%
  select(subject_id, group, new_events, survey_day, test_delay, title, freq, test_vividness) %>%
  mutate(novel_sameday = map2_int(survey_day, new_events, check_day_same), .after = new_events) %>%
  mutate(novel_sameday = ifelse(novel_sameday == 1, 'yes', 'no'))

# run model testing if vividness ratings show a novelty penumbra 
mdl <- lmer(test_vividness ~ novel_sameday + (1 | subject_id), data = vividness_near_novel)
summary(mdl)

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: test_vividness ~ novel_sameday + (1 | subject_id)
##    Data: vividness_near_novel
## 
## REML criterion at convergence: 4252.8
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.37541 -0.78305 -0.02406  0.76808  2.49296 
## 
## Random effects:
##  Groups     Name        Variance Std.Dev.
##  subject_id (Intercept) 0.2666   0.5163  
##  Residual               1.4777   1.2156  
## Number of obs: 1292, groups:  subject_id, 41
## 
## Fixed effects:
##                   Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept)      2.643e+00  9.134e-02 4.651e+01  28.941  < 2e-16 ***
## novel_samedayyes 2.201e-01  7.814e-02 1.290e+03   2.817  0.00492 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr)
## novl_smdyys -0.282

# plot
em <- emmeans(mdl, pairwise ~ novel_sameday)
em_df <- as.data.frame(em$emmeans)
ggplot(data = vividness_near_novel, aes(x = novel_sameday, y = test_vividness, fill = novel_sameday, color = novel_sameday)) +
  geom_jitter(height = 0.2, width = 0.2, alpha = 0.2, size = 1, color = 'grey') +
  geom_point(data = em_df, aes(x = novel_sameday, y = emmean), size = 3.5) +
  geom_errorbar(data = em_df, aes(x = novel_sameday, y = emmean, ymin = lower.CL, ymax = upper.CL),
                width = 0, linewidth = 1.5) +
  scale_fill_manual(values = c('#F1C68D', '#E68770')) +
  scale_color_manual(values = c('#F1C68D', '#E68770')) +
  labs(x = 'on same day as novel event?', y = 'memory vividness')

# run model testing if the number of details recalled shows a novelty penumbra

# figure out when a non-novel event comes from the same day as a novel one
recall_near_novel <- left_join(survey_data, novel_events, by = c('subject_id', 'group')) %>%
  filter(freq != 'new', test_type == 'recall') %>%
  mutate(novel_sameday = map2_int(survey_day, new_events, check_day_same), .after = new_events) %>%
  mutate(novel_sameday = ifelse(novel_sameday == 1, 'yes', 'no'))

# recall for events that occur on the same day as a novel event was reported
# model controls for the number of details recorded during the survey/diary
mdl <- lmer(n_episodic_details_recall ~ novel_sameday + n_episodic_details_survey + (1 | subject_id), data = recall_near_novel)
summary(mdl)

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: 
## n_episodic_details_recall ~ novel_sameday + n_episodic_details_survey +  
##     (1 | subject_id)
##    Data: recall_near_novel
## 
## REML criterion at convergence: 2012.3
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.6060 -0.6306 -0.1217  0.5243  3.6853 
## 
## Random effects:
##  Groups     Name        Variance Std.Dev.
##  subject_id (Intercept) 2.730    1.652   
##  Residual               4.488    2.119   
## Number of obs: 444, groups:  subject_id, 41
## 
## Fixed effects:
##                            Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept)                 4.66807    0.41763 107.54482  11.178  < 2e-16 ***
## novel_samedayyes            0.30074    0.23374 417.95998   1.287  0.19892    
## n_episodic_details_survey   0.12718    0.04321 350.83764   2.943  0.00346 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) nvl_sm
## novl_smdyys -0.140       
## n_psdc_dtl_ -0.724 -0.059

# plot
em <- emmeans(mdl, pairwise ~ novel_sameday)
em_df <- as.data.frame(em$emmeans)
ggplot(data = recall_near_novel, aes(x = novel_sameday, y = n_episodic_details_recall, fill = novel_sameday, color = novel_sameday)) +
  geom_violinhalf(trim = F, alpha = 0.5, position = position_nudge(0.05)) +
  geom_point(data = em_df, aes(x = novel_sameday, y = emmean), size = 3.5, position = position_nudge(-0.05)) +
  geom_errorbar(data = em_df, aes(x = novel_sameday, y = emmean, ymin = lower.CL, ymax = upper.CL),
                width = 0, linewidth = 1.5, position = position_nudge(-0.05)) +
  scale_fill_manual(values = c('#F1C68D', '#E68770')) +
  scale_color_manual(values = c('#F1C68D', '#E68770')) +
  labs(x = 'on same day as novel event?', y = 'number of details recalled')