EM5_2_analysis

Preregistered plan

Hypotheses will be tested using Generalized Linear Mixed-Effect Models (Lo & Andrews, 2015) with Condition as a fixed effect and child (participant) as a random intercept, and with appropriate linking functions.

The model for Study Time will have an inverse Gaussian linking function because of its similar distribution to reaction times (Lo & Andrews, 2015).

The model for Memory Usage will have an identity linking function because based on the results of our previous studies, we expect it to follow a normal distribution. If there are substantive violations of the assumptions of these analyses, we will corroborate results with analogous non-parametric tests.

Load libraries and data

library(tidyverse)

Warning: package 'tidyverse' was built under R version 4.2.3

Warning: package 'ggplot2' was built under R version 4.2.3

Warning: package 'readr' was built under R version 4.2.3

Warning: package 'stringr' was built under R version 4.2.3

Warning: package 'forcats' was built under R version 4.2.3

Warning: package 'lubridate' was built under R version 4.2.3

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.0.10     ✔ readr     2.1.4 
✔ forcats   1.0.0      ✔ stringr   1.5.0 
✔ ggplot2   3.5.1      ✔ tibble    3.1.8 
✔ lubridate 1.9.2      ✔ tidyr     1.2.1 
✔ purrr     0.3.4      
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(ggpubr)
library(ggforce)

Warning: package 'ggforce' was built under R version 4.2.3

library(effectsize)
library(lme4)

Warning: package 'lme4' was built under R version 4.2.3

Loading required package: Matrix

Attaching package: 'Matrix'

The following objects are masked from 'package:tidyr':

    expand, pack, unpack

library(lmerTest)

Warning: package 'lmerTest' was built under R version 4.2.3


Attaching package: 'lmerTest'

The following object is masked from 'package:lme4':

    lmer

The following object is masked from 'package:stats':

    step

library(emmeans)

Warning: package 'emmeans' was built under R version 4.2.3

library(MuMIn)

Warning: package 'MuMIn' was built under R version 4.2.3

library(MCMCglmm)

Warning: package 'MCMCglmm' was built under R version 4.2.3

Loading required package: coda

Warning: package 'coda' was built under R version 4.2.3

Loading required package: ape

Warning: package 'ape' was built under R version 4.2.3


Attaching package: 'ape'

The following object is masked from 'package:ggpubr':

    rotate

library(mixedpower)
library(DHARMa)

Warning: package 'DHARMa' was built under R version 4.2.3

This is DHARMa 0.4.6. For overview type '?DHARMa'. For recent changes, type news(package = 'DHARMa')

# Disable scientific notation
options(scipen = 999)

# Set working directory if working locally
setwd("D:/Canna_d/EM5_stuff/EM5_April_2025")

# Read in file
data <- read.csv("Summarized_first_trips_April25.csv", header = TRUE)

# label closing trial
data$condsFile <- ifelse(data$trial == 4, 
                       paste0(data$condsFile, "Closing"), 
                       data$condsFile)

# Read in file for age and combine with data
ages <- read.csv("ages.csv", header = T)
data <- left_join(data, ages, by = "participant")

# Make output more interpretable
options(contrasts = c("contr.sum","contr.poly"))

Study time

Process data

# Average over all trials for each condition, for each child, as per preregistration
summarized_data <- data %>%
  group_by(participant, condsFile) %>%
  summarize(mean_MU = mean(meanMU, na.rm = TRUE),
            mean_StudyTime = mean(meanListTime, na.rm = TRUE),
            mean_Streak = mean(meanCorBefInc, na.rm = TRUE),
            age = first(Age),
            .groups = 'drop')

# make sure condition is a factor
summarized_data$condsFile <- as.factor(summarized_data$condsFile)

# Remove closing trials
freeResponseOnly <- summarized_data %>%
  filter(!(condsFile %in% c("noDelayClosing", "longDelayClosing")))

Model study time

# Run the model
studyTimeGlmm <- glmer(mean_StudyTime ~ condsFile + (1|participant), 
                         data = freeResponseOnly, 
                         family = inverse.gaussian(link = "identity"))

# no errors or warnings

# Looks OK 4/12/25
plot(density(freeResponseOnly$mean_StudyTime), 
     xlim=c(0, 160), ylim=c(0, 1), main='M_1 y_hat')
lines(density(predict(studyTimeGlmm, type='response')), col='red')

# Heteroskedasticity 4/12
plot(studyTimeGlmm)

# Looks good to me 4/12
residuals <- residuals(studyTimeGlmm)
qqnorm(residuals)
qqline(residuals, col = "red")

# Get an overall check
library(performance)
check_model(studyTimeGlmm)

Loading required namespace: statmod

Simulations from your fitted model produce infinite values. Consider if this is sensible

Cannot simulate residuals for models of class `glmerMod`. Please try
  `check_model(..., residual_type = "normal")` instead.

# Heterogeneity of variance.....

# Below we will compare model with one that has no fixed effects
# To see if the fixed effects (condsFile) is important at explaining the data
mixConTime_GLMM_null <- glmer(mean_StudyTime ~ 1 + (1|participant), 
                              data = freeResponseOnly, 
                              family = inverse.gaussian(link = "identity"))

Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
Model failed to converge with max|grad| = 0.0721721 (tol = 0.002, component 1)

Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
 - Rescale variables?

# Null model failed to converge?
mixTimeConAov <- cbind(car::Anova(studyTimeGlmm),
                       r.squaredGLMM(studyTimeGlmm,mixConTime_GLMM_null))

Warning: 'r.squaredGLMM' now calculates a revised statistic. See the help page.

# Compare the models
anova(mixConTime_GLMM_null,studyTimeGlmm)

Data: freeResponseOnly
Models:
mixConTime_GLMM_null: mean_StudyTime ~ 1 + (1 | participant)
studyTimeGlmm: mean_StudyTime ~ condsFile + (1 | participant)
                     npar    AIC    BIC  logLik deviance  Chisq Df Pr(>Chisq)
mixConTime_GLMM_null    3 625.04 634.05 -309.52   619.04                     
studyTimeGlmm           4 618.61 630.62 -305.30   610.61 8.4319  1   0.003687
                       
mixConTime_GLMM_null   
studyTimeGlmm        **
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# The full model fits better 4/12/25

summary(studyTimeGlmm)

Generalized linear mixed model fit by maximum likelihood (Laplace
  Approximation) [glmerMod]
 Family: inverse.gaussian  ( identity )
Formula: mean_StudyTime ~ condsFile + (1 | participant)
   Data: freeResponseOnly

     AIC      BIC   logLik deviance df.resid 
   618.6    630.6   -305.3    610.6      145 

Scaled residuals: 
     Min       1Q   Median       3Q      Max 
-1.74842 -0.54638 -0.08409  0.47357  2.28651 

Random effects:
 Groups      Name        Variance Std.Dev.
 participant (Intercept) 3.08496  1.7564  
 Residual                0.02398  0.1548  
Number of obs: 149, groups:  participant, 75

Fixed effects:
            Estimate Std. Error t value             Pr(>|z|)    
(Intercept)   6.2179     0.3769  16.497 < 0.0000000000000002 ***
condsFile1    0.3175     0.1087   2.921              0.00348 ** 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
           (Intr)
condsFile1 -0.001

# No delay has less study time than long delay, sig 4/12

contrasts(freeResponseOnly$condsFile)

                 [,1] [,2] [,3]
longDelay           1    0    0
longDelayClosing    0    1    0
noDelay             0    0    1
noDelayClosing     -1   -1   -1

emmeans(studyTimeGlmm, pairwise ~ condsFile)

$emmeans
 condsFile emmean    SE  df asymp.LCL asymp.UCL
 longDelay   6.54 0.392 Inf      5.77      7.30
 noDelay     5.90 0.392 Inf      5.13      6.67

Confidence level used: 0.95 

$contrasts
 contrast            estimate    SE  df z.ratio p.value
 longDelay - noDelay    0.635 0.217 Inf   2.921  0.0035

Age effects model & KS test

To check the distribution of age and to (exploratory) see if age has an effect on any DV’s.

# Run the KS test
ks.test(freeResponseOnly$age, "punif", 
        min(freeResponseOnly$age), 
        max(freeResponseOnly$age))

Warning in ks.test.default(freeResponseOnly$age, "punif",
min(freeResponseOnly$age), : ties should not be present for the
Kolmogorov-Smirnov test


    Asymptotic one-sample Kolmogorov-Smirnov test

data:  freeResponseOnly$age
D = 0.20422, p-value = 0.000008008
alternative hypothesis: two-sided

# Plot the density of actual data
plot(density(freeResponseOnly$age), 
     main = "Age Distribution vs Uniform", 
     xlab = "Age", 
     ylab = "Density", 
     xlim = c(min(freeResponseOnly$age), max(freeResponseOnly$age)),
     lwd = 2)

# Add a horizontal line for the uniform distribution's density
abline(h = 1 / (max(freeResponseOnly$age) - min(freeResponseOnly$age)), 
       col = "red", lty = 2, lwd = 2)

legend("topright", legend = c("Observed Density", "Uniform Density"),
       col = c("black", "red"), lty = c(1,2), lwd = 2)

# Center age
freeResponseOnly$age.ct <- freeResponseOnly$age - mean(freeResponseOnly$age)

# Run the age model for study time
studyTimeGlmmAge <- glmer(mean_StudyTime ~ condsFile + 
                         + age.ct + age.ct*condsFile + (1|participant), 
                         data = freeResponseOnly, 
                         family = inverse.gaussian(link = "identity"))

# no errors or warnings

# Looks OK 4/21/25
plot(density(freeResponseOnly$mean_StudyTime), 
     xlim=c(0, 160), ylim=c(0, 1), main='M_1 y_hat')
lines(density(predict(studyTimeGlmmAge, type='response')), col='red')

# Heteroskedasticity 4/21
plot(studyTimeGlmmAge)

# Looks good to me 4/21
residuals <- residuals(studyTimeGlmmAge)
qqnorm(residuals)
qqline(residuals, col = "red")

# Looks good 4/21
library(performance)
check_model(studyTimeGlmmAge)

Simulations from your fitted model produce infinite values. Consider if this is sensible

Cannot simulate residuals for models of class `glmerMod`. Please try
  `check_model(..., residual_type = "normal")` instead.

# Below we will compare model with one that has no fixed effects
# To see if the fixed effects (condsFile) is important at explaining the data
mixConTime_GLMM_null <- glmer(mean_StudyTime ~ 1 + (1|participant), 
                              data = freeResponseOnly, 
                              family = inverse.gaussian(link = "identity"))

Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
Model failed to converge with max|grad| = 0.0721721 (tol = 0.002, component 1)

Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, : Model is nearly unidentifiable: very large eigenvalue
 - Rescale variables?

# Null model failed to converge?
mixTimeConAov <- cbind(car::Anova(studyTimeGlmmAge),
                       r.squaredGLMM(studyTimeGlmmAge,mixConTime_GLMM_null)) 

# Compare the models
anova(mixConTime_GLMM_null,studyTimeGlmmAge)

Data: freeResponseOnly
Models:
mixConTime_GLMM_null: mean_StudyTime ~ 1 + (1 | participant)
studyTimeGlmmAge: mean_StudyTime ~ condsFile + +age.ct + age.ct * condsFile + (1 | participant)
                     npar    AIC    BIC  logLik deviance  Chisq Df Pr(>Chisq)
mixConTime_GLMM_null    3 625.04 634.05 -309.52   619.04                     
studyTimeGlmmAge        6 619.37 637.39 -303.68   607.37 11.672  3   0.008597
                       
mixConTime_GLMM_null   
studyTimeGlmmAge     **
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# The full model fits better 4/21/25

summary(studyTimeGlmmAge)

Generalized linear mixed model fit by maximum likelihood (Laplace
  Approximation) [glmerMod]
 Family: inverse.gaussian  ( identity )
Formula: mean_StudyTime ~ condsFile + +age.ct + age.ct * condsFile + (1 |  
    participant)
   Data: freeResponseOnly

     AIC      BIC   logLik deviance df.resid 
   619.4    637.4   -303.7    607.4      143 

Scaled residuals: 
     Min       1Q   Median       3Q      Max 
-1.80065 -0.52542 -0.09949  0.38394  2.36748 

Random effects:
 Groups      Name        Variance Std.Dev.
 participant (Intercept) 3.04963  1.7463  
 Residual                0.02351  0.1533  
Number of obs: 149, groups:  participant, 75

Fixed effects:
                  Estimate Std. Error t value             Pr(>|z|)    
(Intercept)         6.2127     0.3753  16.555 < 0.0000000000000002 ***
condsFile1          0.2716     0.1050   2.587              0.00969 ** 
age.ct             -0.3665     0.7453  -0.492              0.62289    
condsFile1:age.ct   0.4024     0.2364   1.702              0.08867 .  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr) cndsF1 age.ct
condsFile1  -0.019              
age.ct       0.102  0.040       
cndsFl1:g.c  0.033 -0.198  0.061

# No delay has less study time than long delay, sig 4/21
# Age fixed effect non sig
# Older kids marginally study in no delay for less time

Plot study time

Age effects plot

# Define the levels of condsFile
conds_levels <- c("noDelay", "longDelay")

# Define colors for each condition
colors <- c("#0072B2", "#D55E00")

# Create an empty plot
plot(
  NA, xlim = range(freeResponseOnly$age), ylim = c(0, 20),  # Adjusted y-axis range
  xlab = "Age", ylab = "Study time (seconds)",
  main = "",
  cex.lab = 1.6,       # Axis labels (xlab, ylab)
cex.axis = 1.5,    # Tick labels
cex.main = 1.2     # Title

)

# Add points and trend lines for each condition
for (i in 1:length(conds_levels)) {
  # Filter data for the current condition
  condition_data <- freeResponseOnly[freeResponseOnly$condsFile == conds_levels[i], ]
  
  # Add points
  points(condition_data$age, condition_data$mean_StudyTime, col = adjustcolor(colors[i], alpha.f = 0.4), pch = 16)
  
  # Add a trend line (linear regression line)
  abline(lm(mean_StudyTime ~ age, data = condition_data), col = colors[i], lwd = 2)
}

# Add a legend to the plot
legend("topright", legend = conds_levels, col = colors, lty = 1, lwd = 2, pch = 16)

Memory Usage

Model for MU

# View the distribution of MU
hist(freeResponseOnly$mean_MU)

# Run the model
MU_GLMM <- glmer(mean_MU ~ condsFile + (1|participant),
                 data = freeResponseOnly, 
                    family = gaussian(link = "identity"))

Warning in glmer(mean_MU ~ condsFile + (1 | participant), data =
freeResponseOnly, : calling glmer() with family=gaussian (identity link) as a
shortcut to lmer() is deprecated; please call lmer() directly

# No errors etc. It says this is an lmer, truee

# Could be better 4/12
plot(density(freeResponseOnly$mean_MU), xlim=c(-50, 50), ylim=c(0, 0.6), 
     main='M_1 y_hat')
lines(density(predict(MU_GLMM, type='response') ), col='red')

# Heteroskedasticity 4/12
plot(MU_GLMM)

# The qq plot is quite wiggly in the upper tail 4/12
residuals <- residuals(MU_GLMM)
qqnorm(residuals)
qqline(residuals, col = "red")

# This model does not fit very well... 4/12
check_model(MU_GLMM)

MUfree_null <- glmer(mean_MU ~ 1 + (1|participant), 
                     data = freeResponseOnly, family = gaussian(link = "identity"))

Warning in glmer(mean_MU ~ 1 + (1 | participant), data = freeResponseOnly, :
calling glmer() with family=gaussian (identity link) as a shortcut to lmer() is
deprecated; please call lmer() directly

MUfreeAov <- cbind(car::Anova(MU_GLMM),
                   r.squaredGLMM(MU_GLMM,MUfree_null))

# The null not necessarily fitting better but less AIC 4/12
anova(MUfree_null,MU_GLMM)

refitting model(s) with ML (instead of REML)

Data: freeResponseOnly
Models:
MUfree_null: mean_MU ~ 1 + (1 | participant)
MU_GLMM: mean_MU ~ condsFile + (1 | participant)
            npar    AIC    BIC  logLik deviance  Chisq Df Pr(>Chisq)  
MUfree_null    3 441.31 450.32 -217.65   435.31                       
MU_GLMM        4 440.48 452.50 -216.24   432.48 2.8228  1    0.09293 .
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# No delay has less MU but it is non-significant!
summary(MU_GLMM)

Linear mixed model fit by REML ['lmerMod']
Formula: mean_MU ~ condsFile + (1 | participant)
   Data: freeResponseOnly

REML criterion at convergence: 438.7

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-1.9526 -0.5412 -0.1849  0.4072  4.3791 

Random effects:
 Groups      Name        Variance Std.Dev.
 participant (Intercept) 0.3115   0.5581  
 Residual                0.8135   0.9020  
Number of obs: 149, groups:  participant, 75

Fixed effects:
            Estimate Std. Error t value
(Intercept)  1.72009    0.09810  17.534
condsFile1   0.12453    0.07396   1.684

Correlation of Fixed Effects:
           (Intr)
condsFile1 0.006

MUfreeAov

             Chisq Df Pr(>Chisq)        R2m       R2c
condsFile 2.834912  1 0.09223585 0.01368697 0.2867805

emmeans(MU_GLMM, pairwise ~ condsFile)

$emmeans
 condsFile emmean    SE  df lower.CL upper.CL
 longDelay   1.84 0.123 137     1.60     2.09
 noDelay     1.60 0.122 137     1.35     1.84

Degrees-of-freedom method: kenward-roger 
Confidence level used: 0.95 

$contrasts
 contrast            estimate    SE   df t.ratio p.value
 longDelay - noDelay    0.249 0.148 73.6   1.684  0.0965

Degrees-of-freedom method: kenward-roger

Streak correct GLMM

This is just an exploratory analysis. Don’t think we will do anything with this, was just comparing the results with MU.

# Try streak
#library(glmmTMB)
# Only on whole values!
#streak_NB <- glmmTMB(mean_Streak ~ condsFile + (1|participant),
#                     data = freeResponseOnly,
#                     family = nbinom2)  # or nbinom1 depending on the data

# make sure condition is a factor
data$condsFile <- as.factor(data$condsFile)

# Remove closing trials
FR_rawdata <- data %>%
  filter(!(condsFile %in% c("noDelayClosing", "longDelayClosing")))

# Run the model
streakModel <- glmer.nb(meanCorBefInc ~ condsFile + (1|participant),
                        data = FR_rawdata)

Warning in theta.ml(Y, mu, weights = object@resp$weights, limit = limit, :
iteration limit reached

# not the best 4/21
plot(density(FR_rawdata$meanCorBefInc), xlim=c(-50, 50), ylim=c(0, 0.6), 
     main='M_1 y_hat')
lines(density(predict(streakModel, type='response') ), col='red')

# ?? 4/21
plot(streakModel)

# The qq plot is quite wiggly in the tail 4/21
residuals <- residuals(streakModel)
qqnorm(residuals)
qqline(residuals, col = "red")

# Pfft what 4/21
check_model(streakModel)

streak_null <- glmer.nb(meanCorBefInc ~ 1 + (1|participant), 
                     data = FR_rawdata)

Warning in theta.ml(Y, mu, weights = object@resp$weights, limit = limit, :
iteration limit reached

streak_Aov <- cbind(car::Anova(streakModel),
                   r.squaredGLMM(streakModel,streak_null))

Warning in data.frame(..., check.names = FALSE): row names were found from a
short variable and have been discarded

# Streak model fits marginally better than null
anova(streak_null,streakModel)

Data: FR_rawdata
Models:
streak_null: meanCorBefInc ~ 1 + (1 | participant)
streakModel: meanCorBefInc ~ condsFile + (1 | participant)
            npar    AIC    BIC logLik deviance  Chisq Df Pr(>Chisq)  
streak_null    3 1436.0 1448.3 -715.0   1430.0                       
streakModel    4 1434.6 1451.0 -713.3   1426.6 3.4182  1    0.06448 .
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# No delay has less streak but it is non-significant!
summary(streakModel)

Generalized linear mixed model fit by maximum likelihood (Laplace
  Approximation) [glmerMod]
 Family: Negative Binomial(137120.8)  ( log )
Formula: meanCorBefInc ~ condsFile + (1 | participant)
   Data: FR_rawdata

     AIC      BIC   logLik deviance df.resid 
  1434.6   1451.0   -713.3   1426.6      441 

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-2.0088 -0.5448 -0.0923  0.4035  3.1895 

Random effects:
 Groups      Name        Variance Std.Dev.
 participant (Intercept) 0.1015   0.3185  
Number of obs: 445, groups:  participant, 75

Fixed effects:
            Estimate Std. Error z value            Pr(>|z|)    
(Intercept)  0.59859    0.05189  11.536 <0.0000000000000002 ***
condsFile1   0.06326    0.03424   1.847              0.0647 .  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
           (Intr)
condsFile1 -0.031

streak_Aov

     Chisq Df Pr(>Chisq)         R2m       R2c
1 3.412602  1 0.06470035 0.006392880 0.1681533
2 3.412602  1 0.06470035 0.007632884 0.2007694
3 3.412602  1 0.06470035 0.005101987 0.1341987

# Also there were weird warnings

# Replicate Bill's model:
streak_lmer <- lmer(mean_Streak ~ condsFile + (1|participant),
                     data = freeResponseOnly)

plot(streak_lmer) # heteroskedatsic 4/21

summary(streak_lmer)

Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: mean_Streak ~ condsFile + (1 | participant)
   Data: freeResponseOnly

REML criterion at convergence: 440.8

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-1.6181 -0.5669 -0.1768  0.3657  4.6999 

Random effects:
 Groups      Name        Variance Std.Dev.
 participant (Intercept) 0.4233   0.6506  
 Residual                0.7524   0.8674  
Number of obs: 149, groups:  participant, 75

Fixed effects:
            Estimate Std. Error       df t value            Pr(>|t|)    
(Intercept)  1.92508    0.10347 74.09021  18.605 <0.0000000000000002 ***
condsFile1   0.12508    0.07115 73.72598   1.758              0.0829 .  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
           (Intr)
condsFile1 0.006

# No delay has less of a streak, but non-sig 4/21

Age effects model

# Run the model for age effects with MU
MU_GLMMAge <- glmer(mean_MU ~ condsFile + 
                   + age.ct + age.ct*condsFile + (1|participant),
                 data = freeResponseOnly, family = gaussian(link = "identity"))

Warning in glmer(mean_MU ~ condsFile + +age.ct + age.ct * condsFile + (1 | :
calling glmer() with family=gaussian (identity link) as a shortcut to lmer() is
deprecated; please call lmer() directly

# No errors etc. It says this is an lmer, truee  

# Could be better 4/21
plot(density(freeResponseOnly$mean_MU), xlim=c(-50, 50), ylim=c(0, 0.6), 
     main='M_1 y_hat')
lines(density(predict(MU_GLMMAge, type='response') ), col='red')

# Heteroskedasticity 4/21
plot(MU_GLMMAge)

# The qq plot is quite wiggly in the upper tail 4/21
residuals <- residuals(MU_GLMMAge)
qqnorm(residuals)
qqline(residuals, col = "red")

# This model does not fit very well... 4/21
check_model(MU_GLMMAge)

MUfree_null <- glmer(mean_MU ~ 1 + (1|participant), 
                     data = freeResponseOnly, family = gaussian(link = "identity"))

Warning in glmer(mean_MU ~ 1 + (1 | participant), data = freeResponseOnly, :
calling glmer() with family=gaussian (identity link) as a shortcut to lmer() is
deprecated; please call lmer() directly

MUfreeAov <- cbind(car::Anova(MU_GLMMAge),
                   r.squaredGLMM(MU_GLMMAge,MUfree_null))

# The null not necessarily fitting better and less AIC 4/21
anova(MUfree_null,MU_GLMMAge)

refitting model(s) with ML (instead of REML)

Data: freeResponseOnly
Models:
MUfree_null: mean_MU ~ 1 + (1 | participant)
MU_GLMMAge: mean_MU ~ condsFile + +age.ct + age.ct * condsFile + (1 | participant)
            npar    AIC    BIC  logLik deviance  Chisq Df Pr(>Chisq)
MUfree_null    3 441.31 450.32 -217.65   435.31                     
MU_GLMMAge     6 443.73 461.75 -215.86   431.73 3.5778  3     0.3108

# No delay has less MU but it is non-significant!
summary(MU_GLMMAge)

Linear mixed model fit by REML ['lmerMod']
Formula: mean_MU ~ condsFile + +age.ct + age.ct * condsFile + (1 | participant)
   Data: freeResponseOnly

REML criterion at convergence: 440.6

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-1.9435 -0.5616 -0.1774  0.4750  4.2326 

Random effects:
 Groups      Name        Variance Std.Dev.
 participant (Intercept) 0.3135   0.5599  
 Residual                0.8211   0.9062  
Number of obs: 149, groups:  participant, 75

Fixed effects:
                  Estimate Std. Error t value
(Intercept)        1.72029    0.09850  17.466
condsFile1         0.12481    0.07431   1.680
age.ct            -0.14612    0.23863  -0.612
condsFile1:age.ct -0.10857    0.17988  -0.604

Correlation of Fixed Effects:
            (Intr) cndsF1 age.ct
condsFile1   0.006              
age.ct       0.001 -0.005       
cndsFl1:g.c -0.005 -0.001  0.004

MUfreeAov

                     Chisq Df Pr(>Chisq)        R2m       R2c
condsFile        2.8185851  1 0.09317851 0.01840625 0.2896174
age.ct           0.3723134  1 0.54174598 0.01840625 0.2896174
condsFile:age.ct 0.3643126  1 0.54612083 0.01840625 0.2896174

# Age not a factor at all 4/21

Memory usage plot

MU Age plot

# Define the levels of condsFile
conds_levels <- c("noDelay", "longDelay")

# Define colors for each condition
colors <- c("#0072B2", "#D55E00")

# Create an empty plot
plot(
  NA, xlim = range(freeResponseOnly$age), ylim = c(-5, 10),  # Adjusted y-axis range
  xlab = "Age", ylab = "Memory Usage (items)",
  main = "",
  cex.lab = 1.6,       # Axis labels (xlab, ylab)
cex.axis = 1.5,    # Tick labels
cex.main = 1.2     # Title

)

# Add points and trend lines for each condition
for (i in 1:length(conds_levels)) {
  # Filter data for the current condition
  condition_data <- freeResponseOnly[freeResponseOnly$condsFile == conds_levels[i], ]
  
  # Add points
  points(condition_data$age, condition_data$mean_MU, col = adjustcolor(colors[i], alpha.f = 0.4), pch = 16)
  
  # Add a trend line (linear regression line)
  abline(lm(mean_MU ~ age, data = condition_data), col = colors[i], lwd = 2)
}

# Add a legend to the plot
legend("topright", legend = conds_levels, col = colors, lty = 1, lwd = 2, pch = 16)

Free VS Total response

Process Free V Total

library(dplyr)
# Label cond-type to say if its free or total-response
summarized_data <- summarized_data %>%
  mutate(condType = case_when(
    condsFile %in% c("noDelayClosing", "longDelayClosing") ~ "Total",
    condsFile %in% c("noDelay", "longDelay") ~ "Free",
    TRUE ~ NA_character_
  ))

# Average over participant and condition
freeVtotal <- summarized_data %>%
  group_by(participant, condType) %>%
  summarize(mean_MU = mean(mean_MU, na.rm = TRUE),
            mean_StudyTime = mean(mean_StudyTime, na.rm = TRUE),
            age = first(age),
            .groups = 'drop')

Model free V total

# Observe distribution
hist(freeVtotal$mean_MU)

# Run the model
freeVtotal_glmm <- glmer(mean_MU ~ condType + (1|participant),
                 data = freeVtotal, 
                    family = gaussian(link = "identity"))

Warning in glmer(mean_MU ~ condType + (1 | participant), data = freeVtotal, :
calling glmer() with family=gaussian (identity link) as a shortcut to lmer() is
deprecated; please call lmer() directly

# No errors etc. It says this is an lmer, truee

# Could be better 4/12
plot(density(freeVtotal$mean_MU), xlim=c(-50, 50), ylim=c(0, 0.6), 
     main='M_1 y_hat')
lines(density(predict(freeVtotal_glmm, type='response') ), col='red')

# Heteroskedasticity  ALOT 4/12
plot(freeVtotal_glmm)

# The qq plot is quite wiggly in both tails 4/12
residuals <- residuals(freeVtotal_glmm)
qqnorm(residuals)
qqline(residuals, col = "red")

# This model does not fit very well... 4/12
check_model(freeVtotal_glmm)

MUfree_null <- glmer(mean_MU ~ 1 + (1|participant), 
                     data = freeVtotal, family = gaussian(link = "identity"))

Warning in glmer(mean_MU ~ 1 + (1 | participant), data = freeVtotal, family =
gaussian(link = "identity")): calling glmer() with family=gaussian (identity
link) as a shortcut to lmer() is deprecated; please call lmer() directly

MUfreeAov <- cbind(car::Anova(freeVtotal_glmm),
                   r.squaredGLMM(freeVtotal_glmm,MUfree_null))

# The null not fitting better but less AIC 4/12
anova(MUfree_null,freeVtotal_glmm)

refitting model(s) with ML (instead of REML)

Data: freeVtotal
Models:
MUfree_null: mean_MU ~ 1 + (1 | participant)
freeVtotal_glmm: mean_MU ~ condType + (1 | participant)
                npar    AIC    BIC  logLik deviance  Chisq Df Pr(>Chisq)
MUfree_null        3 638.84 647.88 -316.42   632.84                     
freeVtotal_glmm    4 639.97 652.01 -315.98   631.97 0.8786  1     0.3486

# Free response has less MU but it is non-significant!
summary(freeVtotal_glmm)

Linear mixed model fit by REML ['lmerMod']
Formula: mean_MU ~ condType + (1 | participant)
   Data: freeVtotal

REML criterion at convergence: 635.5

Scaled residuals: 
     Min       1Q   Median       3Q      Max 
-3.12767 -0.40792  0.03309  0.43621  2.86641 

Random effects:
 Groups      Name        Variance Std.Dev.
 participant (Intercept) 0.9624   0.981   
 Residual                3.1610   1.778   
Number of obs: 150, groups:  participant, 75

Fixed effects:
            Estimate Std. Error t value
(Intercept)   1.8511     0.1841  10.053
condType1    -0.1356     0.1452  -0.934

Correlation of Fixed Effects:
          (Intr)
condType1 0.000

MUfreeAov

             Chisq Df Pr(>Chisq)         R2m       R2c
condType 0.8719833  1  0.3504068 0.004466303 0.2368211

Age effects model Free V Total

# Center age
freeVtotal$age.ct <- freeVtotal$age - mean(freeVtotal$age)

# Run the model
freeVtotal_glmmAge <- glmer(mean_MU ~ condType + + age.ct + age.ct*condType
                         + (1|participant), data = freeVtotal, 
                    family = gaussian(link = "identity"))

Warning in glmer(mean_MU ~ condType + +age.ct + age.ct * condType + (1 | :
calling glmer() with family=gaussian (identity link) as a shortcut to lmer() is
deprecated; please call lmer() directly

# No errors etc. It says this is an lmer, truee

# Could be better 4/21
plot(density(freeVtotal$mean_MU), xlim=c(-50, 50), ylim=c(0, 0.6), 
     main='M_1 y_hat')
lines(density(predict(freeVtotal_glmmAge, type='response') ), col='red')

# Heteroskedasticity  4/21
plot(freeVtotal_glmmAge)

# The qq plot is quite wiggly in both tails 4/21
residuals <- residuals(freeVtotal_glmmAge)
qqnorm(residuals)
qqline(residuals, col = "red")

# This model does not fit very well... 4/21
check_model(freeVtotal_glmmAge)

MUfree_null <- glmer(mean_MU ~ 1 + (1|participant), 
                     data = freeVtotal, family = gaussian(link = "identity"))

Warning in glmer(mean_MU ~ 1 + (1 | participant), data = freeVtotal, family =
gaussian(link = "identity")): calling glmer() with family=gaussian (identity
link) as a shortcut to lmer() is deprecated; please call lmer() directly

MUfreeAov <- cbind(car::Anova(freeVtotal_glmmAge),
                   r.squaredGLMM(freeVtotal_glmmAge,MUfree_null))

# The null not fitting better and less AIC 4/21
anova(MUfree_null,freeVtotal_glmmAge)

refitting model(s) with ML (instead of REML)

Data: freeVtotal
Models:
MUfree_null: mean_MU ~ 1 + (1 | participant)
freeVtotal_glmmAge: mean_MU ~ condType + +age.ct + age.ct * condType + (1 | participant)
                   npar    AIC    BIC  logLik deviance  Chisq Df Pr(>Chisq)
MUfree_null           3 638.84 647.88 -316.42   632.84                     
freeVtotal_glmmAge    6 641.47 659.53 -314.73   629.47 3.3756  3     0.3373

# Free response has less MU but it is non-significant!
summary(freeVtotal_glmmAge)

Linear mixed model fit by REML ['lmerMod']
Formula: mean_MU ~ condType + +age.ct + age.ct * condType + (1 | participant)
   Data: freeVtotal

REML criterion at convergence: 633.1

Scaled residuals: 
     Min       1Q   Median       3Q      Max 
-3.13533 -0.39617 -0.01993  0.39116  2.75476 

Random effects:
 Groups      Name        Variance Std.Dev.
 participant (Intercept) 0.9543   0.9769  
 Residual                3.1553   1.7763  
Number of obs: 150, groups:  participant, 75

Fixed effects:
                 Estimate Std. Error t value
(Intercept)        1.8511     0.1837  10.075
condType1         -0.1356     0.1450  -0.935
age.ct            -0.5116     0.4457  -1.148
condType1:age.ct   0.3745     0.3518   1.064

Correlation of Fixed Effects:
            (Intr) cndTy1 age.ct
condType1   0.000               
age.ct      0.000  0.000        
cndTyp1:g.c 0.000  0.000  0.000

MUfreeAov

                    Chisq Df Pr(>Chisq)        R2m       R2c
condType        0.8735523  1  0.3499738 0.02079634 0.2481882
age.ct          1.3177491  1  0.2509964 0.02079634 0.2481882
condType:age.ct 1.1331529  1  0.2871038 0.02079634 0.2481882

Free V Total plot

# memory usage -----------
# Calculate means and standard errors
means <- tapply(freeVtotal$mean_MU, freeVtotal$condType, mean)

# Calculate standard deviations and sample sizes for each condition
sds <- tapply(freeVtotal$mean_MU, freeVtotal$condType, sd)
n_per_condition <- tapply(freeVtotal$mean_MU, freeVtotal$condType, length)

# Calculate standard errors
se <- sds / sqrt(n_per_condition)

# Define the x-axis levels (to match the ggplot2 ordering)
conds_levels <- c("Free", "Total")
x_positions <- 1:length(conds_levels)

# Create an empty plot
plot(
  x_positions, means[conds_levels], 
  type = "n", xaxt = "n", ylim = c(-5, 10.0), xlim = c(0.5, 2.5),
  xlab = "", ylab = "Mean Memory Usage (items)", 
  main = "",
    cex.lab = 1.5,
  cex.axis = 1.5,
  cex.main = 1.5
)

participants <- unique(freeVtotal$participant)

for (p in participants) {
  this_participant <- freeVtotal[freeVtotal$participant == p, ]
  if (nrow(this_participant) == 2) {
    # Get the x positions for this participant’s conditions
    x_vals <- match(this_participant$condType, conds_levels)
    y_vals <- this_participant$mean_MU
    lines(x_vals, y_vals, col = adjustcolor("#009E73", alpha.f = 0.3), lwd = 1)
  }
}


# Add x-axis labels normally
axis(1, at = x_positions, labels = conds_levels, cex.axis = 1.5)

# Add points for each participant
for (i in 1:length(conds_levels)) {
  condition_data <- freeVtotal[freeVtotal$condType == conds_levels[i], ]
  points(rep(x_positions[i], nrow(condition_data)), condition_data$mean_MU, 
         pch = 1, col = "#0072B2")
}

# Add points for the means
points(x_positions, means[conds_levels], pch = 10, col = "black")

# Add a solid line connecting the means
lines(x_positions, means[conds_levels], col = "black", lwd = 2)

# Add horizontal segments at the means (optional, similar to ggplot2's stat_summary)
segments(
  x_positions - 0.1, means[conds_levels], 
  x_positions + 0.1, means[conds_levels],
  col = "black", lwd = 3.0
)

# Add mean labels for each condition
text(x_positions, means[conds_levels] + 2, 
     labels = round(means[conds_levels], 2), cex = 2.5, col = "black")

# Add error bars using arrows() with specified lower and upper bounds
# Function to compute standard error
se <- function(x) sd(x, na.rm = TRUE) / sqrt(sum(!is.na(x)))

# Get standard errors for each condition
ses <- tapply(freeVtotal$mean_MU, freeVtotal$condType, se)

# Get means again, just to be sure
means <- tapply(freeVtotal$mean_MU, freeVtotal$condType, mean)

# Define bounds for error bars
lower_bounds <- means[conds_levels] - 2*ses[conds_levels]
upper_bounds <- means[conds_levels] + 2*ses[conds_levels]


# Add vertical error bars using arrows()
for (i in 1:length(x_positions)) {
  arrows(
    x0 = x_positions[i], y0 = lower_bounds[i], 
    x1 = x_positions[i], y1 = upper_bounds[i], 
    length = 0.1, angle = 90, code = 3, col = "#D55E00", lwd = 3.5
  )
}

Free V Total age plot

# Define the levels of condsFile
conds_levels <- c("Free", "Total")

# Define colors for each condition
colors <- c("#0072B2", "#D55E00")

# Create an empty plot
plot(
  NA, xlim = range(freeVtotal$age), ylim = c(-5, 10),  # Adjusted y-axis range
  xlab = "Age", ylab = "Memory Usage (items)",
  main = "",
  cex.lab = 1.6,       # Axis labels (xlab, ylab)
cex.axis = 1.5,    # Tick labels
cex.main = 1.2     # Title

)

# Add points and trend lines for each condition
for (i in 1:length(conds_levels)) {
  # Filter data for the current condition
  condition_data <- freeVtotal[freeVtotal$condType == conds_levels[i], ]
  
  # Add points
  points(condition_data$age, condition_data$mean_MU, col = adjustcolor(colors[i], alpha.f = 0.4), pch = 16)
  
  # Add a trend line (linear regression line)
  abline(lm(mean_MU ~ age, data = condition_data), col = colors[i], lwd = 2)
}

# Add a legend to the plot
legend("topright", legend = conds_levels, col = colors, lty = 1, lwd = 2, pch = 16)

SRCD poster plots

par(mfrow = c(1, 2))

# Rename values in the 'condsFile' column
freeResponseOnly$condsFile <- as.character(freeResponseOnly$condsFile)
freeResponseOnly$condsFile[freeResponseOnly$condsFile == "noDelay"] <- "No-Delay"
freeResponseOnly$condsFile[freeResponseOnly$condsFile == "longDelay"] <- "Long-Delay"

# study time ------------
# Calculate means
means <- tapply(freeResponseOnly$mean_StudyTime, freeResponseOnly$condsFile, mean)

# Define the x-axis levels (to match the ggplot2 ordering)
conds_levels <- c("No-Delay", "Long-Delay")
x_positions <- 1:length(conds_levels)

# Create an empty plot
plot(
  x_positions, means[conds_levels], 
  type = "n", xaxt = "n", ylim = c(0, 20.0), xlim = c(0.5, 2.5),
  xlab = "", ylab = "Mean Study Time (seconds)", 
  main = "",
    cex.lab = 1.5,
  cex.axis = 1.5,
  cex.main = 1.5
)

participants <- unique(freeResponseOnly$participant)

for (p in participants) {
  this_participant <- freeResponseOnly[freeResponseOnly$participant == p, ]
  if (nrow(this_participant) == 2) {
    # Get the x positions for this participant’s conditions
    x_vals <- match(this_participant$condsFile, conds_levels)
    y_vals <- this_participant$mean_StudyTime
    lines(x_vals, y_vals, col = adjustcolor("#009E73", alpha.f = 0.3), lwd = 1)
  }
}

# Add x-axis labels normally
axis(1, at = x_positions, labels = conds_levels, cex.axis = 1.5)

# Add points for each participant
for (i in 1:length(conds_levels)) {
  condition_data <- freeResponseOnly[freeResponseOnly$condsFile == conds_levels[i], ]
  points(rep(x_positions[i], nrow(condition_data)), condition_data$mean_StudyTime, 
         pch = 1, col = "#0072B2")
}

# Add points for the means
points(x_positions, means[conds_levels], pch = 10, col = "black")

# Add a solid line connecting the means
lines(x_positions, means[conds_levels], col = "black", lwd = 2)

# Add horizontal segments at the means (optional, similar to ggplot2's stat_summary)
segments(
  x_positions - 0.1, means[conds_levels], 
  x_positions + 0.1, means[conds_levels],
  col = "black", lwd = 3.0
)

# Add mean labels for each condition
text(x_positions, means[conds_levels] + 3, 
     labels = round(means[conds_levels], 2), cex = 2.5, col = "black")

# Add error bars using arrows() with specified lower and upper bounds
# Function to compute standard error
se <- function(x) sd(x, na.rm = TRUE) / sqrt(sum(!is.na(x)))

# Get standard errors for each condition
ses <- tapply(freeResponseOnly$mean_StudyTime, freeResponseOnly$condsFile, se)

# Get means again, just to be sure
means <- tapply(freeResponseOnly$mean_StudyTime, freeResponseOnly$condsFile, mean)

# Define bounds for error bars
lower_bounds <- means[conds_levels] - 2*ses[conds_levels]
upper_bounds <- means[conds_levels] + 2*ses[conds_levels]


# Add vertical error bars using arrows()
for (i in 1:length(x_positions)) {
  arrows(
    x0 = x_positions[i], y0 = lower_bounds[i], 
    x1 = x_positions[i], y1 = upper_bounds[i], 
    length = 0.1, angle = 90, code = 3, col = "#D55E00", lwd = 3.5
  )
}

# memory usage -----------
# Calculate means and standard errors
means <- tapply(freeResponseOnly$mean_MU, freeResponseOnly$condsFile, mean)

# Calculate standard deviations and sample sizes for each condition
sds <- tapply(freeResponseOnly$mean_MU, freeResponseOnly$condsFile, sd)
n_per_condition <- tapply(freeResponseOnly$mean_MU, freeResponseOnly$condsFile, length)

# Calculate standard errors
se <- sds / sqrt(n_per_condition)

# Define the x-axis levels (to match the ggplot2 ordering)
conds_levels <- c("No-Delay", "Long-Delay")
x_positions <- 1:length(conds_levels)

# Create an empty plot
plot(
  x_positions, means[conds_levels], 
  type = "n", xaxt = "n", ylim = c(-1, 7.0), xlim = c(0.5, 2.5),
  xlab = "", ylab = "Mean Memory Usage (items)", 
  main = "",
    cex.lab = 1.5,
  cex.axis = 1.5,
  cex.main = 1.5
)

participants <- unique(freeResponseOnly$participant)

for (p in participants) {
  this_participant <- freeResponseOnly[freeResponseOnly$participant == p, ]
  if (nrow(this_participant) == 2) {
    # Get the x positions for this participant’s conditions
    x_vals <- match(this_participant$condsFile, conds_levels)
    y_vals <- this_participant$mean_MU
    lines(x_vals, y_vals, col = adjustcolor("#009E73", alpha.f = 0.3), lwd = 1)
  }
}


# Add x-axis labels normally
axis(1, at = x_positions, labels = conds_levels, cex.axis = 1.5)

# Add points for each participant
for (i in 1:length(conds_levels)) {
  condition_data <- freeResponseOnly[freeResponseOnly$condsFile == conds_levels[i], ]
  points(rep(x_positions[i], nrow(condition_data)), condition_data$mean_MU, 
         pch = 1, col = "#0072B2")
}

# Add points for the means
points(x_positions, means[conds_levels], pch = 10, col = "black")

# Add a solid line connecting the means
lines(x_positions, means[conds_levels], col = "black", lwd = 2)

# Add horizontal segments at the means (optional, similar to ggplot2's stat_summary)
segments(
  x_positions - 0.1, means[conds_levels], 
  x_positions + 0.1, means[conds_levels],
  col = "black", lwd = 3.0
)

# Add mean labels for each condition
text(x_positions, means[conds_levels] + 2, 
     labels = round(means[conds_levels], 2), cex = 2.5, col = "black")

# Add error bars using arrows() with specified lower and upper bounds
# Function to compute standard error
se <- function(x) sd(x, na.rm = TRUE) / sqrt(sum(!is.na(x)))

# Get standard errors for each condition
ses <- tapply(freeResponseOnly$mean_MU, freeResponseOnly$condsFile, se)

# Get means again, just to be sure
means <- tapply(freeResponseOnly$mean_MU, freeResponseOnly$condsFile, mean)

# Define bounds for error bars
lower_bounds <- means[conds_levels] - 2*ses[conds_levels]
upper_bounds <- means[conds_levels] + 2*ses[conds_levels]


# Add vertical error bars using arrows()
for (i in 1:length(x_positions)) {
  arrows(
    x0 = x_positions[i], y0 = lower_bounds[i], 
    x1 = x_positions[i], y1 = upper_bounds[i], 
    length = 0.1, angle = 90, code = 3, col = "#D55E00", lwd = 3.5
  )
}

# Calculate means and standard errors
means <- tapply(freeVtotal$mean_MU, freeVtotal$condType, mean)

# Calculate standard deviations and sample sizes for each condition
sds <- tapply(freeVtotal$mean_MU, freeVtotal$condType, sd)
n_per_condition <- tapply(freeVtotal$mean_MU, freeVtotal$condType, length)

# Calculate standard errors
se <- sds / sqrt(n_per_condition)

# Define the x-axis levels (to match the ggplot2 ordering)
conds_levels <- c("Free", "Total")
x_positions <- 1:length(conds_levels)

# Create an empty plot
plot(
  x_positions, means[conds_levels], 
  type = "n", xaxt = "n", ylim = c(-5, 10.0), xlim = c(0.5, 2.5),
  xlab = "", ylab = "Mean Memory Usage (items)", 
  main = "",
  cex.lab = 1.5,
  cex.axis = 1.5,
  cex.main = 1.5
)


participants <- unique(freeVtotal$participant)

for (p in participants) {
  this_participant <- freeVtotal[freeVtotal$participant == p, ]
  if (nrow(this_participant) == 2) {
    # Get the x positions for this participant’s conditions
    x_vals <- match(this_participant$condType, conds_levels)
    y_vals <- this_participant$mean_MU
    lines(x_vals, y_vals, col = adjustcolor("#009E73", alpha.f = 0.3), lwd = 1)
  }
}

# Add x-axis labels normally
axis(1, at = x_positions, labels = conds_levels, cex.axis = 1.6)

# Add points for each participant
for (i in 1:length(conds_levels)) {
  condition_data <- freeVtotal[freeVtotal$condType == conds_levels[i], ]
  points(rep(x_positions[i], nrow(condition_data)), condition_data$mean_MU, 
         pch = 1, col = "#0072B2")
}

# Add points for the means
points(x_positions, means[conds_levels], pch = 10, col = "black")

# Add a solid line connecting the means
lines(x_positions, means[conds_levels], col = "black", lwd = 2)

# Add horizontal segments at the means (optional, similar to ggplot2's stat_summary)
segments(
  x_positions - 0.1, means[conds_levels], 
  x_positions + 0.1, means[conds_levels],
  col = "black", lwd = 3.0
)

# Add mean labels for each condition
text(x_positions, means[conds_levels] + 2, 
     labels = round(means[conds_levels], 2), cex = 2.5, col = "black")

# Add error bars using arrows() with specified lower and upper bounds
# Function to compute standard error
se <- function(x) sd(x, na.rm = TRUE) / sqrt(sum(!is.na(x)))

# Get standard errors for each condition
ses <- tapply(freeVtotal$mean_MU, freeVtotal$condType, se)

# Get means again, just to be sure
means <- tapply(freeVtotal$mean_MU, freeVtotal$condType, mean)

# Define bounds for error bars
lower_bounds <- means[conds_levels] - 2*ses[conds_levels]
upper_bounds <- means[conds_levels] + 2*ses[conds_levels]


# Add vertical error bars using arrows()
for (i in 1:length(x_positions)) {
  arrows(
    x0 = x_positions[i], y0 = lower_bounds[i], 
    x1 = x_positions[i], y1 = upper_bounds[i], 
    length = 0.1, angle = 90, code = 3, col = "#D55E00", lwd = 3.5
  )
}

EM5 1 plots

par(mfrow = c(1, 2))

study_time <- read.csv("study_time.csv", header = TRUE)
MU_data <- read.csv("MU_data.csv", header = TRUE)

# Rename values in the 'condsFile' column
study_time$condsFile[study_time$condsFile == "noDelay"] <- "No-Delay"
study_time$condsFile[study_time$condsFile == "longDelay"] <- "Long-Delay"
study_time$condsFile[study_time$condsFile == "totalResponse"] <- "One-shot"

# study time ------------
# Calculate means
means <- tapply(study_time$mean_StudyTime, study_time$condsFile, mean)

# Define the x-axis levels (to match the ggplot2 ordering)
conds_levels <- c("No-Delay", "Long-Delay", "One-shot")
x_positions <- 1:length(conds_levels)

# Create an empty plot
plot(
  x_positions, means[conds_levels], 
  type = "n", xaxt = "n", ylim = c(0, 30.0), xlim = c(0.5, 3.5),
  xlab = "", ylab = "Mean Study Time (seconds)", 
  main = "",
  cex.lab = 1.5,
  cex.axis = 1.5,
  cex.main = 1.1
)

participants <- unique(study_time$participant)

for (p in participants) {
  this_participant <- study_time[study_time$participant == p, ]
  if (nrow(this_participant) == 3) {
    # Get the x positions for this participant’s conditions
    x_vals <- match(this_participant$condsFile, conds_levels)
    y_vals <- this_participant$mean_StudyTime
    lines(x_vals, y_vals, col = adjustcolor("#009E73", alpha.f = 0.3), lwd = 1)
  }
}

# Add x-axis labels normally
axis(1, at = x_positions, labels = conds_levels, cex.axis = 1.2)

# Add points for each participant
for (i in 1:length(conds_levels)) {
  condition_data <- study_time[study_time$condsFile == conds_levels[i], ]
  points(rep(x_positions[i], nrow(condition_data)), condition_data$mean_StudyTime, 
         pch = 1, col = "#0072B2")
}

# Add points for the means
points(x_positions, means[conds_levels], pch = 10, col = "black")

# Add a solid line connecting the means
lines(x_positions, means[conds_levels], col = "black", lwd = 2)

# Add horizontal segments at the means (optional, similar to ggplot2's stat_summary)
segments(
  x_positions - 0.1, means[conds_levels], 
  x_positions + 0.1, means[conds_levels],
  col = "black", lwd = 3.0
)

# Add mean labels for each condition
text(x_positions, means[conds_levels] + 3, 
     labels = round(means[conds_levels], 2), cex = 2.5, col = "black")

# Add error bars using arrows() with specified lower and upper bounds
# Function to compute standard error
se <- function(x) sd(x, na.rm = TRUE) / sqrt(sum(!is.na(x)))

# Get standard errors for each condition
ses <- tapply(study_time$mean_StudyTime, study_time$condsFile, se)

# Get means again, just to be sure
means <- tapply(study_time$mean_StudyTime, study_time$condsFile, mean)

# Define bounds for error bars
lower_bounds <- means[conds_levels] - 2*ses[conds_levels]
upper_bounds <- means[conds_levels] + 2*ses[conds_levels]


# Add vertical error bars using arrows()
for (i in 1:length(x_positions)) {
  arrows(
    x0 = x_positions[i], y0 = lower_bounds[i], 
    x1 = x_positions[i], y1 = upper_bounds[i], 
    length = 0.1, angle = 90, code = 3, col = "#D55E00", lwd = 3.5
  )
}



# MU ------------

MU_data$condsFile[MU_data$condsFile == "noDelay"] <- "No-Delay"
MU_data$condsFile[MU_data$condsFile == "longDelay"] <- "Long-Delay"
MU_data$condsFile[MU_data$condsFile == "totalResponse"] <- "One-shot"

# Calculate means and standard errors
means <- tapply(MU_data$mean_MU, MU_data$condsFile, mean)

# Calculate standard deviations and sample sizes for each condition
sds <- tapply(MU_data$mean_MU, MU_data$condsFile, sd)
n_per_condition <- tapply(MU_data$mean_MU, MU_data$condsFile, length)

# Calculate standard errors
se <- sds / sqrt(n_per_condition)

# Define the x-axis levels (to match the ggplot2 ordering)
conds_levels <- c("No-Delay", "Long-Delay", "One-shot")
x_positions <- 1:length(conds_levels)

# Create an empty plot
plot(
  x_positions, means[conds_levels], 
  type = "n", xaxt = "n", ylim = c(-5, 10.0), xlim = c(0.5, 3.5),
  xlab = "", ylab = "Mean Memory Usage (items)", 
  main = "",
  cex.lab = 1.5,
  cex.axis = 1.5,
  cex.main = 1.1
)

participants <- unique(MU_data$participant)

for (p in participants) {
  this_participant <- MU_data[MU_data$participant == p, ]
  if (nrow(this_participant) == 3) {
    # Get the x positions for this participant’s conditions
    x_vals <- match(this_participant$condsFile, conds_levels)
    y_vals <- this_participant$mean_MU
    lines(x_vals, y_vals, col = adjustcolor("#009E73", alpha.f = 0.3), lwd = 1)
  }
}

# Add x-axis labels normally
axis(1, at = x_positions, labels = conds_levels, cex.axis = 1.2)

# Add points for each participant
for (i in 1:length(conds_levels)) {
  condition_data <- MU_data[MU_data$condsFile == conds_levels[i], ]
  points(rep(x_positions[i], nrow(condition_data)), condition_data$mean_MU, 
         pch = 1, col = "#0072B2")
}

# Add points for the means
points(x_positions, means[conds_levels], pch = 10, col = "black")

# Add a solid line connecting the means
lines(x_positions, means[conds_levels], col = "black", lwd = 2)

# Add horizontal segments at the means (optional, similar to ggplot2's stat_summary)
segments(
  x_positions - 0.1, means[conds_levels], 
  x_positions + 0.1, means[conds_levels],
  col = "black", lwd = 3.0
)

# Add mean labels for each condition
text(x_positions, means[conds_levels] + 2, 
     labels = round(means[conds_levels], 2), cex = 2.5, col = "black")

# Add error bars using arrows() with specified lower and upper bounds
# Function to compute standard error
se <- function(x) sd(x, na.rm = TRUE) / sqrt(sum(!is.na(x)))

# Get standard errors for each condition
ses <- tapply(MU_data$mean_MU, MU_data$condsFile, se)

# Get means again, just to be sure
means <- tapply(MU_data$mean_MU, MU_data$condsFile, mean)

# Define bounds for error bars
lower_bounds <- means[conds_levels] - 2*ses[conds_levels]
upper_bounds <- means[conds_levels] + 2*ses[conds_levels]

# Add vertical error bars using arrows()
for (i in 1:length(x_positions)) {
  arrows(
    x0 = x_positions[i], y0 = lower_bounds[i], 
    x1 = x_positions[i], y1 = upper_bounds[i], 
    length = 0.1, angle = 90, code = 3, col = "#D55E00", lwd = 3.5
  )
}

Age plots

par(mfrow = c(1, 2))

# Define the levels of condsFile
conds_levels <- c("No-Delay", "Long-Delay", "One-shot")

# Define colors for each condition
colors <- c("#0072B2", "#D55E00", "#009E73")

# Create an empty plot
plot(
  NA, xlim = range(study_time$age), ylim = c(0, 15),  # Adjusted y-axis range
  xlab = "Age", ylab = "Study time (seconds)",
  main = "",
  cex.lab = 1.6,       # Axis labels (xlab, ylab)
cex.axis = 1.5,    # Tick labels
cex.main = 1.2     # Title

)

# Add points and trend lines for each condition
for (i in 1:length(conds_levels)) {
  # Filter data for the current condition
  condition_data <- study_time[study_time$condsFile == conds_levels[i], ]
  
  # Add points
  points(condition_data$age, condition_data$mean_StudyTime, col = adjustcolor(colors[i], alpha.f = 0.4), pch = 16)
  
  # Add a trend line (linear regression line)
  abline(lm(mean_StudyTime ~ age, data = condition_data), col = colors[i], lwd = 2)
}

# Add a legend to the plot
legend("topright", legend = conds_levels, col = colors, lty = 1, lwd = 2, pch = 16)

# MU age ---------------

# Define the levels of condsFile
conds_levels <- c("No-Delay", "Long-Delay", "One-shot")

# Define colors for each condition
colors <- c("#0072B2", "#D55E00", "#009E73")

# Create an empty plot
plot(
  NA, xlim = range(MU_data$age), ylim = c(-5, 10),  # Adjusted y-axis range
  xlab = "Age", ylab = "Memory usage (items)",
  main = "",
  cex.lab = 1.6,       # Axis labels (xlab, ylab)
cex.axis = 1.5,    # Tick labels
cex.main = 1.2     # Title

)

# Add points and trend lines for each condition
for (i in 1:length(conds_levels)) {
  # Filter data for the current condition
  condition_data <- MU_data[MU_data$condsFile == conds_levels[i], ]
  
  # Add points
  points(condition_data$age, condition_data$mean_MU, col = adjustcolor(colors[i], alpha.f = 0.4), pch = 16)
  
  # Add a trend line (linear regression line)
  abline(lm(mean_MU ~ age, data = condition_data), col = colors[i], lwd = 2)
}

# Add a legend to the plot
legend("topright", legend = conds_levels, col = colors, lty = 1, lwd = 2, pch = 16)

Exploratory analyses

Wilcoxon paired t-tests

# Rename values in the 'condsFile' column
freeResponseOnly$condsFile <- as.character(freeResponseOnly$condsFile)
freeResponseOnly$condsFile[freeResponseOnly$condsFile == "No-Delay"] <- "noDelay"
freeResponseOnly$condsFile[freeResponseOnly$condsFile == "Long-Delay"] <- "longDelay"

# MU firts, we need to make the data in wide format
wide_data <- freeResponseOnly %>%
  select(participant, condsFile, mean_MU) %>%
  pivot_wider(names_from = condsFile, values_from = mean_MU)

# Run the test
wilcox.test(wide_data$noDelay, wide_data$longDelay, paired = TRUE, alternative = "two.sided")


    Wilcoxon signed rank test with continuity correction

data:  wide_data$noDelay and wide_data$longDelay
V = 704.5, p-value = 0.3334
alternative hypothesis: true location shift is not equal to 0

# Study time, make it wide format
wide_data_ST <- freeResponseOnly %>%
  select(participant, condsFile, mean_StudyTime) %>%
  pivot_wider(names_from = condsFile, values_from = mean_StudyTime)

# Run the test
wilcox.test(wide_data_ST$noDelay, wide_data_ST$longDelay, paired = TRUE, alternative = "two.sided")


    Wilcoxon signed rank test with continuity correction

data:  wide_data_ST$noDelay and wide_data_ST$longDelay
V = 767, p-value = 0.0008375
alternative hypothesis: true location shift is not equal to 0

# Free V Total, make it wide
wide_data_FvT <- freeVtotal %>%
  select(participant, condType, mean_MU) %>%
  pivot_wider(names_from = condType, values_from = mean_MU)

# Run the test
wilcox.test(wide_data_FvT$Free, wide_data_FvT$Total, paired = TRUE, alternative = "two.sided")


    Wilcoxon signed rank test with continuity correction

data:  wide_data_FvT$Free and wide_data_FvT$Total
V = 1089.5, p-value = 0.2813
alternative hypothesis: true location shift is not equal to 0

Metacognition questions

# Set WD if working locally
setwd("D:/Canna_d/EM5_stuff/EM5_April_2025")

# Read in data
metacog <- read.csv("metacog_EM52.csv",header=TRUE)

# Only get participants who were included in analyses
metacog_d <- metacog[metacog$Participant %in% freeResponseOnly$participant, ]

# Get tables for kids responses
# Q: Which game did you think was harder?
metaSum <- cbind(harder= table(metacog_d$Harder_clean)) %>% data.frame()
# Q: When the store was closing, did the game get harder?
metaSumClosing <- cbind(Closing= table(metacog_d$Closing_harder_clean)) %>% data.frame()
# Q: When the store was closing, did you feel like you were guessing or not?
metaSumGuess <- cbind(Guess = table(metacog_d$Guess_clean)) %>% data.frame()

# Run binomial tests on answers
harderTest <- binom.test(metaSum$harder[2],sum(metaSum$harder),0.5)
closingTest <- binom.test(metaSumClosing$Closing[2],sum(metaSumClosing$Closing),0.5)
guessTest <- binom.test(metaSumGuess$Guess[2],sum(metaSumGuess$Guess),0.5)

Order effects

Have to do preprocessing again:

# Average over all trials for each condition, for each child, as per preregistration
summarized_data <- data %>%
  group_by(participant, condsFile, order) %>%
  summarize(mean_MU = mean(meanMU, na.rm = TRUE),
            mean_StudyTime = mean(meanListTime, na.rm = TRUE),
            mean_Streak = mean(meanCorBefInc, na.rm = TRUE),
            age = first(Age),
            .groups = 'drop')

# make sure condition and order is a factor
summarized_data$condsFile <- as.factor(summarized_data$condsFile)
summarized_data$order <- as.factor(summarized_data$order)

# Remove closing trials
freeResponseOnly <- summarized_data %>%
  filter(!(condsFile %in% c("noDelayClosing", "longDelayClosing")))

Order effects: study time

# Run model
studyTimeOrder <- glmer(mean_StudyTime ~ condsFile + order + (1|participant), 
                         data = freeResponseOnly, 
                         family = inverse.gaussian(link = "identity"))
# Check summary
summary(studyTimeOrder)

Generalized linear mixed model fit by maximum likelihood (Laplace
  Approximation) [glmerMod]
 Family: inverse.gaussian  ( identity )
Formula: mean_StudyTime ~ condsFile + order + (1 | participant)
   Data: freeResponseOnly

     AIC      BIC   logLik deviance df.resid 
   619.4    634.4   -304.7    609.4      144 

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-1.7848 -0.5443 -0.0570  0.4411  2.2386 

Random effects:
 Groups      Name        Variance Std.Dev.
 participant (Intercept) 2.94471  1.7160  
 Residual                0.02327  0.1525  
Number of obs: 149, groups:  participant, 75

Fixed effects:
            Estimate Std. Error t value             Pr(>|z|)    
(Intercept)   6.1911     0.3725  16.619 < 0.0000000000000002 ***
condsFile1    0.3143     0.1084   2.901              0.00372 ** 
order1        0.3252     0.2957   1.100              0.27135    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
           (Intr) cndsF1
condsFile1 -0.002       
order1     -0.035 -0.038

# Visualize
plot(freeResponseOnly$order, freeResponseOnly$mean_StudyTime)

Order effects: MU

# Run model
MU_order <- glmer(mean_MU ~ condsFile + order + (1|participant),
                 data = freeResponseOnly, 
                    family = gaussian(link = "identity"))

Warning in glmer(mean_MU ~ condsFile + order + (1 | participant), data =
freeResponseOnly, : calling glmer() with family=gaussian (identity link) as a
shortcut to lmer() is deprecated; please call lmer() directly

# Check summary
summary(MU_order)

Linear mixed model fit by REML ['lmerMod']
Formula: mean_MU ~ condsFile + order + (1 | participant)
   Data: freeResponseOnly

REML criterion at convergence: 439.7

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-2.0393 -0.5121 -0.1910  0.3967  4.3142 

Random effects:
 Groups      Name        Variance Std.Dev.
 participant (Intercept) 0.3046   0.5520  
 Residual                0.8131   0.9017  
Number of obs: 149, groups:  participant, 75

Fixed effects:
            Estimate Std. Error t value
(Intercept)  1.72121    0.09762  17.632
condsFile1   0.12392    0.07394   1.676
order1       0.13014    0.09762   1.333

Correlation of Fixed Effects:
           (Intr) cndsF1
condsFile1  0.006       
order1      0.008 -0.006

# Compare null with full
MUOrder_null <- glmer(mean_MU ~ 1 + (1|participant), 
                     data = freeResponseOnly, family = gaussian(link = "identity"))

Warning in glmer(mean_MU ~ 1 + (1 | participant), data = freeResponseOnly, :
calling glmer() with family=gaussian (identity link) as a shortcut to lmer() is
deprecated; please call lmer() directly

MUfreeAov <- cbind(car::Anova(MU_order),
                   r.squaredGLMM(MU_order,MUOrder_null))
# Visualize
plot(freeResponseOnly$order, freeResponseOnly$mean_MU)

# Check p values
MUfreeAov

             Chisq Df Pr(>Chisq)        R2m       R2c
condsFile 2.808638  1 0.09375796 0.02844643 0.2932568
order     1.777299  1 0.18248139 0.02844643 0.2932568

Block effects

Whether or not performance decreases over time. Doesn’t seem to.

# Make sure trial is a factor
data$trial <- as.factor(data$trial)

# Order B, no delay first
orderB <- data[data$order == "B",]

orderB <- orderB %>%
  filter(!(condsFile %in% c("noDelayClosing", "longDelayClosing")))

ggplot(orderB, aes(x = factor(trial), y = meanListTime)) +
  geom_boxplot() +
  facet_wrap(~ condsFile) +
  labs(x = "Trial", y = "Study Time", title = "Study Time by Trial and Condition, order B") +
  theme_minimal()

ggplot(orderB, aes(x = factor(trial), y = meanMU)) +
  geom_boxplot() +
  facet_wrap(~ condsFile) +
  labs(x = "Trial", y = "MU", title = "MU by Trial and Condition, order B") +
  theme_minimal()

# Order D, long delay first
orderD <- data[data$order == "D",]

orderD <- orderD %>%
  filter(!(condsFile %in% c("noDelayClosing", "longDelayClosing")))

ggplot(orderD, aes(x = factor(trial), y = meanListTime)) +
  geom_boxplot() +
  facet_wrap(~ condsFile) +
  labs(x = "Trial", y = "Study Time", title = "Study Time by Trial and Condition, order D") +
  theme_minimal()

ggplot(orderD, aes(x = factor(trial), y = meanMU)) +
  geom_boxplot() +
  facet_wrap(~ condsFile) +
  labs(x = "Trial", y = "MU", title = "MU by Trial and Condition, order D") +
  theme_minimal()

Block effects model

Marginally significant (only study time) but would not survive corrections for multiple testing.

# Remove closing trials
data_noClosing <- data %>%
  filter(!(condsFile %in% c("noDelayClosing", "longDelayClosing")))

# Run model for study time
studyTime_block <- glmer(meanListTime ~ condsFile + trial + (1|participant), 
                         data = data_noClosing, 
                         family = inverse.gaussian(link = "identity"))
# Check summary
summary(studyTime_block)

Generalized linear mixed model fit by maximum likelihood (Laplace
  Approximation) [glmerMod]
 Family: inverse.gaussian  ( identity )
Formula: meanListTime ~ condsFile + trial + (1 | participant)
   Data: data_noClosing

     AIC      BIC   logLik deviance df.resid 
  2011.7   2036.3   -999.9   1999.7      439 

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-1.5077 -0.6487 -0.1901  0.3335  3.6372 

Random effects:
 Groups      Name        Variance Std.Dev.
 participant (Intercept) 2.36244  1.5370  
 Residual                0.04854  0.2203  
Number of obs: 445, groups:  participant, 75

Fixed effects:
            Estimate Std. Error t value             Pr(>|z|)    
(Intercept)  6.25143    0.36220  17.260 < 0.0000000000000002 ***
condsFile1   0.29200    0.09687   3.014              0.00258 ** 
trial1       0.28631    0.13428   2.132              0.03298 *  
trial2      -0.12617    0.11904  -1.060              0.28920    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
           (Intr) cndsF1 trial1
condsFile1 -0.009              
trial1      0.038 -0.101       
trial2     -0.015  0.054 -0.546

## MU
# Run model for MU
MU_block <- glmer(meanMU ~ condsFile + trial + (1|participant),
                 data = data_noClosing, 
                    family = gaussian(link = "identity"))

Warning in glmer(meanMU ~ condsFile + trial + (1 | participant), data =
data_noClosing, : calling glmer() with family=gaussian (identity link) as a
shortcut to lmer() is deprecated; please call lmer() directly

# Check summary
summary(MU_block)

Linear mixed model fit by REML ['lmerMod']
Formula: meanMU ~ condsFile + trial + (1 | participant)
   Data: data_noClosing

REML criterion at convergence: 1627.8

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-3.9222 -0.4627 -0.0730  0.4501  5.1315 

Random effects:
 Groups      Name        Variance Std.Dev.
 participant (Intercept) 0.396    0.6293  
 Residual                1.950    1.3964  
Number of obs: 445, groups:  participant, 75

Fixed effects:
            Estimate Std. Error t value
(Intercept)  1.71746    0.09837  17.460
condsFile1   0.12190    0.06630   1.839
trial1      -0.03359    0.09352  -0.359
trial2      -0.07386    0.09352  -0.790

Correlation of Fixed Effects:
           (Intr) cndsF1 trial1
condsFile1  0.010              
trial1     -0.002 -0.004       
trial2     -0.002 -0.004 -0.496

# Compare null with full
MU_null <- glmer(meanMU ~ 1 + (1|participant), 
                     data = data_noClosing, family = gaussian(link = "identity"))

Warning in glmer(meanMU ~ 1 + (1 | participant), data = data_noClosing, :
calling glmer() with family=gaussian (identity link) as a shortcut to lmer() is
deprecated; please call lmer() directly

MUfreeAov <- cbind(car::Anova(MU_block),
                   r.squaredGLMM(MU_block,MU_null))
# Check p values
MUfreeAov

             Chisq Df Pr(>Chisq)         R2m       R2c
condsFile 3.380689  1 0.06596447 0.008790228 0.1761191
trial     1.372174  2 0.50354269 0.008790228 0.1761191

No-delay Total VS Long-delay Total

# Only look at Closing trials
totalResponseOnly <- summarized_data %>%
  filter((condsFile %in% c("noDelayClosing", "longDelayClosing")))

# Look at distirbution
hist(totalResponseOnly$mean_MU)

# Run model
MUtotal_GLMM <- glmer(mean_MU ~ condsFile + (1|participant),
                 data = totalResponseOnly, 
                    family = gaussian(link = "identity"))

Warning in glmer(mean_MU ~ condsFile + (1 | participant), data =
totalResponseOnly, : calling glmer() with family=gaussian (identity link) as a
shortcut to lmer() is deprecated; please call lmer() directly

# No errors etc. It says this is an lmer, truee

# Could be better 6/6
plot(density(totalResponseOnly$mean_MU), xlim=c(-50, 50), ylim=c(0, 0.6), 
     main='M_1 y_hat')
lines(density(predict(MUtotal_GLMM, type='response') ), col='red')

# somewhat ok 6/6
plot(MUtotal_GLMM)

# The qq plot is beautiful 6/6
residuals <- residuals(MUtotal_GLMM)
qqnorm(residuals)
qqline(residuals, col = "red")

# This model fits great 6/6
check_model(MUtotal_GLMM)

MUfree_null <- glmer(mean_MU ~ 1 + (1|participant), 
                     data = totalResponseOnly, family = gaussian(link = "identity"))

Warning in glmer(mean_MU ~ 1 + (1 | participant), data = totalResponseOnly, :
calling glmer() with family=gaussian (identity link) as a shortcut to lmer() is
deprecated; please call lmer() directly

MUfreeAov <- cbind(car::Anova(MUtotal_GLMM),
                   r.squaredGLMM(MUtotal_GLMM,MUfree_null))

# The total model fits better 6/6
anova(MUfree_null,MUtotal_GLMM)

refitting model(s) with ML (instead of REML)

Data: totalResponseOnly
Models:
MUfree_null: mean_MU ~ 1 + (1 | participant)
MUtotal_GLMM: mean_MU ~ condsFile + (1 | participant)
             npar    AIC    BIC  logLik deviance  Chisq Df Pr(>Chisq)  
MUfree_null     3 810.32 819.33 -402.16   804.32                       
MUtotal_GLMM    4 807.30 819.31 -399.65   799.30 5.0213  1    0.02504 *
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# No delay has less MU but it is non-significant!
summary(MUtotal_GLMM)

Linear mixed model fit by REML ['lmerMod']
Formula: mean_MU ~ condsFile + (1 | participant)
   Data: totalResponseOnly

REML criterion at convergence: 800.6

Scaled residuals: 
     Min       1Q   Median       3Q      Max 
-2.08868 -0.51808  0.00402  0.61783  2.18411 

Random effects:
 Groups      Name        Variance Std.Dev.
 participant (Intercept)  2.262   1.504   
 Residual                10.617   3.258   
Number of obs: 149, groups:  participant, 75

Fixed effects:
            Estimate Std. Error t value
(Intercept)   1.9914     0.3186   6.250
condsFile1    0.6047     0.2671   2.264

Correlation of Fixed Effects:
           (Intr)
condsFile1 0.007

MUfreeAov

             Chisq Df Pr(>Chisq)        R2m       R2c
condsFile 5.125439  1  0.0235776 0.02778731 0.1985702

^ Same but interaction model

# Add columns to say if its long/no delay or free/total response
summarized_data$Delay <- ifelse(grepl("longDelay", summarized_data$condsFile), "long", "no")
summarized_data$Response <- ifelse(grepl("Closing", summarized_data$condsFile), "Total", "Free")

# Make sure they're factors
summarized_data$Delay <- factor(summarized_data$Delay)
summarized_data$Response <- factor(summarized_data$Response)

# Now fit the interaction model
MU_interaction <- lmer(mean_MU ~ Delay + Response +  
                         Delay*Response + (1|participant), 
                       data = summarized_data)

#  good 6/6
plot(density(summarized_data$mean_MU), xlim=c(-50, 50), ylim=c(0, 0.6), 
     main='M_1 y_hat')
lines(density(predict(MU_interaction, type='response') ), col='red')

# somewhat ok 6/6
plot(MU_interaction)

# The qq plot is not good 6/6
residuals <- residuals(MU_interaction)
qqnorm(residuals)
qqline(residuals, col = "red")

# This model fits horrible 6/6
check_model(MU_interaction)

MUfree_null <- lmer(mean_MU ~ 1 + (1|participant), 
                     data = summarized_data)
MUfreeAov <- cbind(car::Anova(MU_interaction),
                   r.squaredGLMM(MU_interaction,MUfree_null))

# The total model fits better 6/6
anova(MUfree_null,MU_interaction)

refitting model(s) with ML (instead of REML)

Data: summarized_data
Models:
MUfree_null: mean_MU ~ 1 + (1 | participant)
MU_interaction: mean_MU ~ Delay + Response + Delay * Response + (1 | participant)
               npar    AIC    BIC  logLik deviance Chisq Df Pr(>Chisq)  
MUfree_null       3 1429.0 1440.1 -711.51   1423.0                      
MU_interaction    6 1424.7 1446.9 -706.34   1412.7 10.34  3    0.01589 *
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

# No delay has less MU but it is non-significant!
summary(MU_interaction)

Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: mean_MU ~ Delay + Response + Delay * Response + (1 | participant)
   Data: summarized_data

REML criterion at convergence: 1420.5

Scaled residuals: 
     Min       1Q   Median       3Q      Max 
-2.97992 -0.41702 -0.02921  0.40376  3.08151 

Random effects:
 Groups      Name        Variance Std.Dev.
 participant (Intercept) 1.071    1.035   
 Residual                5.930    2.435   
Number of obs: 298, groups:  participant, 75

Fixed effects:
                 Estimate Std. Error       df t value            Pr(>|t|)    
(Intercept)        1.8559     0.1850  74.3279  10.033 0.00000000000000183 ***
Delay1             0.3648     0.1412 221.9684   2.584              0.0104 *  
Response1         -0.1347     0.1411 220.5990  -0.955              0.3407    
Delay1:Response1  -0.2392     0.1411 220.5990  -1.695              0.0914 .  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr) Delay1 Rspns1
Delay1      0.006               
Response1   0.000  0.000        
Dly1:Rspns1 0.000  0.000  0.007

MUfreeAov

                   Chisq Df  Pr(>Chisq)        R2m       R2c
Delay          6.6756533  1 0.009773868 0.02894451 0.1775105
Response       0.8903198  1 0.345390670 0.02894451 0.1775105
Delay:Response 2.8739658  1 0.090022789 0.02894451 0.1775105

# delay
Emm <- emmeans(MU_interaction,~Delay)

NOTE: Results may be misleading due to involvement in interactions

Pair <- summary(pairs(Emm,adjust="holm"))
Pair

 contrast  estimate    SE  df t.ratio p.value
 long - no     0.73 0.282 222   2.584  0.0104

Results are averaged over the levels of: Response 
Degrees-of-freedom method: kenward-roger

# response
Emm <- emmeans(MU_interaction,~Response)

NOTE: Results may be misleading due to involvement in interactions

Pair <- summary(pairs(Emm,adjust="holm"))
Pair

 contrast     estimate    SE  df t.ratio p.value
 Free - Total   -0.269 0.282 220  -0.955  0.3407

Results are averaged over the levels of: Delay 
Degrees-of-freedom method: kenward-roger

##### Prism reproduce
options(contrasts = c("contr.sum","contr.poly"))

model <-lmer(mean_MU ~ Delay*Response + (1|participant) + (1|Delay:participant) + (1|Response:participant), 
data = summarized_data, REML = TRUE)

boundary (singular) fit: see help('isSingular')

MU_interaction <- lmer(mean_MU ~ Delay + Response +  
                         Delay*Response + (1|participant), 
                       data = summarized_data)

summary(model)

Linear mixed model fit by REML. t-tests use Satterthwaite's method [
lmerModLmerTest]
Formula: 
mean_MU ~ Delay * Response + (1 | participant) + (1 | Delay:participant) +  
    (1 | Response:participant)
   Data: summarized_data

REML criterion at convergence: 1420.2

Scaled residuals: 
     Min       1Q   Median       3Q      Max 
-2.88873 -0.42050 -0.05041  0.36612  3.03416 

Random effects:
 Groups               Name        Variance       Std.Dev.  
 Response:participant (Intercept) 0.322785947531 0.56814254
 Delay:participant    (Intercept) 0.000000002831 0.00005321
 participant          (Intercept) 0.963311047033 0.98148410
 Residual                         5.715309898058 2.39067143
Number of obs: 298, groups:  
Response:participant, 150; Delay:participant, 149; participant, 75

Fixed effects:
                 Estimate Std. Error       df t value            Pr(>|t|)    
(Intercept)        1.8559     0.1850  74.3292  10.034 0.00000000000000182 ***
Delay1             0.3648     0.1386 148.1557   2.632             0.00939 ** 
Response1         -0.1349     0.1461  74.2093  -0.924             0.35863    
Delay1:Response1  -0.2394     0.1385 148.2336  -1.728             0.08604 .  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Correlation of Fixed Effects:
            (Intr) Delay1 Rspns1
Delay1      0.006               
Response1   0.000  0.000        
Dly1:Rspns1 0.000  0.000  0.007 
optimizer (nloptwrap) convergence code: 0 (OK)
boundary (singular) fit: see help('isSingular')

anova(model)

Type III Analysis of Variance Table with Satterthwaite's method
               Sum Sq Mean Sq NumDF   DenDF F value   Pr(>F)   
Delay          39.585  39.585     1 148.156  6.9262 0.009393 **
Response        4.877   4.877     1  74.209  0.8532 0.358631   
Delay:Response 17.069  17.069     1 148.234  2.9865 0.086042 . 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

MU(total) in No-delay + Long-delay plot

# Calculate means and standard errors
means <- tapply(totalResponseOnly$mean_MU, totalResponseOnly$condsFile, mean)

# Calculate standard deviations and sample sizes for each condition
sds <- tapply(totalResponseOnly$mean_MU, totalResponseOnly$condsFile, sd)
n_per_condition <- tapply(totalResponseOnly$mean_MU, totalResponseOnly$condsFile, length)

# Calculate standard errors
se <- sds / sqrt(n_per_condition)

# Define the x-axis levels (to match the ggplot2 ordering)
conds_levels <- c("noDelayClosing", "longDelayClosing")
x_positions <- 1:length(conds_levels)

# Create an empty plot
plot(
  x_positions, means[conds_levels], 
  type = "n", xaxt = "n", ylim = c(-5, 10.0), xlim = c(0.5, 2.5),
  xlab = "", ylab = "Mean Memory Usage (items)", 
  main = "",
  cex.lab = 1.5,
  cex.axis = 1.5,
  cex.main = 1.5
)


participants <- unique(totalResponseOnly$participant)

for (p in participants) {
  this_participant <- totalResponseOnly[totalResponseOnly$participant == p, ]
  if (nrow(this_participant) == 2) {
    # Get the x positions for this participant’s conditions
    x_vals <- match(this_participant$condsFile, conds_levels)
    y_vals <- this_participant$mean_MU
    lines(x_vals, y_vals, col = adjustcolor("#009E73", alpha.f = 0.3), lwd = 1)
  }
}

# Add x-axis labels normally
axis(1, at = x_positions, labels = conds_levels, cex.axis = 1.6)

# Add points for each participant
for (i in 1:length(conds_levels)) {
  condition_data <- totalResponseOnly[totalResponseOnly$condsFile == conds_levels[i], ]
  points(rep(x_positions[i], nrow(condition_data)), condition_data$mean_MU, 
         pch = 1, col = "#0072B2")
}

# Add points for the means
points(x_positions, means[conds_levels], pch = 10, col = "black")

# Add a solid line connecting the means
lines(x_positions, means[conds_levels], col = "black", lwd = 2)

# Add horizontal segments at the means (optional, similar to ggplot2's stat_summary)
segments(
  x_positions - 0.1, means[conds_levels], 
  x_positions + 0.1, means[conds_levels],
  col = "black", lwd = 3.0
)

# Add mean labels for each condition
text(x_positions, means[conds_levels] + 2, 
     labels = round(means[conds_levels], 2), cex = 2.5, col = "black")

# Add error bars using arrows() with specified lower and upper bounds
# Function to compute standard error
se <- function(x) sd(x, na.rm = TRUE) / sqrt(sum(!is.na(x)))

# Get standard errors for each condition
ses <- tapply(totalResponseOnly$mean_MU, totalResponseOnly$condsFile, se)

# Get means again, just to be sure
means <- tapply(totalResponseOnly$mean_MU, totalResponseOnly$condsFile, mean)

# Define bounds for error bars
lower_bounds <- means[conds_levels] - 2*ses[conds_levels]
upper_bounds <- means[conds_levels] + 2*ses[conds_levels]


# Add vertical error bars using arrows()
for (i in 1:length(x_positions)) {
  arrows(
    x0 = x_positions[i], y0 = lower_bounds[i], 
    x1 = x_positions[i], y1 = upper_bounds[i], 
    length = 0.1, angle = 90, code = 3, col = "#D55E00", lwd = 3.5
  )
}

Effect size

It can be difficult and/or problematic to compute effect size for mixed models perhaps, but here it goes.

library(emmeans)

# Study time
Emm <- emmeans(studyTimeGlmm,~condsFile)
Pair <- summary(pairs(Emm,adjust="holm"))
Pair

 contrast            estimate    SE  df z.ratio p.value
 longDelay - noDelay    0.635 0.217 Inf   2.921  0.0035

n_paired <- length(unique(freeResponseOnly$participant))
d <- z_to_d(z = Pair$z.ratio, n = n_paired, paired = TRUE)

# MU
Emm <- emmeans(MU_GLMM,~condsFile)
Pair <- summary(pairs(Emm,adjust="holm"))
Pair

 contrast            estimate    SE   df t.ratio p.value
 longDelay - noDelay    0.249 0.148 73.6   1.684  0.0965

Degrees-of-freedom method: kenward-roger

d_MU <- t_to_d(t=Pair$t.ratio,Pair$df,paired = TRUE)

# Free V Total
Emm <- emmeans(freeVtotal_glmm,~condType)
Pair <- summary(pairs(Emm,adjust="holm"))
Pair

 contrast     estimate   SE df t.ratio p.value
 Free - Total   -0.271 0.29 74  -0.934  0.3534

Degrees-of-freedom method: kenward-roger

d_fVt <- t_to_d(t=Pair$t.ratio,Pair$df,paired = TRUE)

# MU interaction
Emm <- emmeans(MU_interaction,~Delay)

NOTE: Results may be misleading due to involvement in interactions

Pair <- summary(pairs(Emm,adjust="holm"))
Pair

 contrast  estimate    SE  df t.ratio p.value
 long - no     0.73 0.282 222   2.584  0.0104

Results are averaged over the levels of: Response 
Degrees-of-freedom method: kenward-roger

d_fVt <- t_to_d(t=Pair$t.ratio,Pair$df,paired = TRUE)

Emm <- emmeans(MU_interaction,~Response)

NOTE: Results may be misleading due to involvement in interactions

Pair <- summary(pairs(Emm,adjust="holm"))
Pair

 contrast     estimate    SE  df t.ratio p.value
 Free - Total   -0.269 0.282 220  -0.955  0.3407

Results are averaged over the levels of: Delay 
Degrees-of-freedom method: kenward-roger

d_fVt <- t_to_d(t=Pair$t.ratio,Pair$df,paired = TRUE)

Emm <- emmeans(MU_interaction, ~ Response * Delay)
# This creates the interaction contrasts (difference of differences)
int_contrast <- contrast(Emm, interaction = "pairwise", adjust = "holm")
int_sum <- summary(int_contrast)
d_int <- t_to_d(t = int_sum$t.ratio[1], df = int_sum$df[1], paired = TRUE)