1 Overview
2 Setup
- 2.1 Libraries
- 2.2 Configuration
3 Helper functions
4 Data loading
5 Descriptive statistics
6 Late window analyses (300–700 ms)
7 Early window analyses (150–300 ms)
8 Vocabulary analyses
9 Final figures

1 Overview

For each of the two time windows (early: 150–300 ms; late: 300–700 ms), EEG mean amplitudes were exported from NetStation as a SS_Stats_*.txt file containing four condition blocks (BoubaMatch, BoubaMismatch, KikiMatch, KikiMismatch) and three scalp regions (frontal, central, parietal). Vocabulary scores at 12 months (T1) come from an Excel file (vocab_12mo.xlsx).

The pipeline is structured as:

Setup and configuration.
Helper functions (parsing, cleaning, outlier removal).
Data loading for both time windows.
Descriptive statistics.
Late window analyses (LMM, ANOVA, post-hocs, equivalence test).
Early window analyses (same + area-stratified post-hocs because of the significant three-way interaction).
Vocabulary analyses on both windows.
Final figures used in the thesis.

To re-use this pipeline with a different sample, only the file paths in the “Configuration” chunk need to be updated.

2 Setup

2.1 Libraries

library(tidyr)        
library(dplyr)       
library(readxl)       
library(ggplot2)      
library(lme4)         
library(lmerTest)    
library(emmeans)      
library(effectsize)

2.2 Configuration

# --- File paths --------------------------------------------------------------
# Late time window (300–700 ms post-onset)
path_ss_late  <- "SS_Stats_n26.txt"
# Early time window (150–300 ms post-onset)
path_ss_early <- "SS_Stats_n26_early.txt"
# Vocabulary scores at 12 months (T1)
path_vocab    <- "vocab_12mo.xlsx"

# --- Analysis parameters ----------------------------------------------------
# Names of the four experimental conditions, as they appear as block headers
# in the SS_Stats text file.
condition_blocks <- c("BoubaMatch", "BoubaMismatch", "KikiMatch", "KikiMismatch")

# Names of the three scalp regions (in the order they appear in the file).
region_cols <- c("SS_Frontal", "SS_Central", "SS_Parietal")

# Threshold (in absolute z-score) to flag outliers
outlier_z_threshold <- 3

3 Helper functions

3.1 Parsing the SS_Stats text file

The function reads a NetStation export and returns a long data frame with one row per (participant × condition × region) combination.

parse_eeg_file <- function(file_path,
                           blocks      = condition_blocks,
                           value_cols  = region_cols) {

  lines <- readLines(file_path)

  # Index of the first line of each condition block
  block_starts <- which(trimws(lines) %in% blocks)
  # Add a virtual "end" so the last block has an upper bound
  block_bounds <- c(block_starts, length(lines) + 3)

  # Helper: parse one block (one condition) into a data frame
  parse_one_block <- function(start, end, condition_name) {
    # The data rows of a block are between (start + 3) and (end - 3),
    # because each block has a 3-line header and an empty footer.
    block_lines <- lines[(start + 3):(end - 3)]

    do.call(rbind, lapply(block_lines, function(line) {
      parts  <- strsplit(trimws(line), "\\s+")[[1]]
      id     <- parts[2]                                  # participant id
      values <- as.numeric(tail(parts, length(value_cols)))

      row <- data.frame(participant = id,
                        condition   = condition_name,
                        stringsAsFactors = FALSE)
      # Fill one column per region
      for (i in seq_along(value_cols)) {
        row[[value_cols[i]]] <- values[i]
      }
      row
    }))
  }

  # Apply parse_one_block() to each condition block, then bind rows
  wide <- do.call(rbind, lapply(seq_len(length(block_bounds) - 1), function(i) {
    parse_one_block(start          = block_bounds[i],
                    end            = block_bounds[i + 1],
                    condition_name = trimws(lines[block_bounds[i]]))
  }))

  # Pivot to long format: one row per (participant × condition × region)
  wide %>%
    pivot_longer(cols      = all_of(value_cols),
                 names_to  = "area",
                 values_to = "amplitude")
}

3.2 Cleaning / preparing variables

This function adds clean participant IDs and recodes the categorical variables (condition, word_type, congruence, area).

prepare_eeg_data <- function(data) {

  # 1. Participant IDs as S1_S12, S2_S12, ...
  unique_p <- unique(data$participant)
  id_map   <- setNames(paste0("S", seq_along(unique_p), "_S12"), unique_p)
  data$participant_id <- id_map[data$participant]

  # 2. Make condition a factor
  data$condition <- factor(data$condition, levels = condition_blocks)

  # 3. Build word_type (Bouba / Kiki) from the condition label
  data$word_type <- factor(
    ifelse(grepl("Bouba", data$condition), "Bouba", "Kiki"),
    levels = c("Bouba", "Kiki")
  )

  # 4. Build congruence (Match / Mismatch) from the condition label
  data$congruence <- factor(
    ifelse(grepl("Match", data$condition), "Match", "Mismatch"),
    levels = c("Match", "Mismatch")
  )

  # 5. Recode the region labels to short, lowercase names
  data$area <- recode_factor(data$area,
                             "SS_Frontal"  = "frontal",
                             "SS_Central"  = "central",
                             "SS_Parietal" = "parietal")

  data
}

3.3 Removing outliers (z-score)

A z-score is computed within each cell defined by group_vars. Trials with |z| > threshold (default 3) are flagged as outliers and their amplitude is replaced by NA, so that later models drop them automatically. The default grouping is by area only (3 cells, ~104 observations each): a trial is treated as an outlier if its amplitude is extreme relative to the overall amplitude distribution within its scalp region. This captures recording-level artifacts.

To group by (condition × area) instead — i.e. 12 cells, ~26 observations each — call remove_outliers_zscore(data, group_vars = c("condition", "area")).

remove_outliers_zscore <- function(data,
                                   threshold  = outlier_z_threshold,
                                   group_vars = "area") {

  flagged <- data %>%
    group_by(across(all_of(group_vars))) %>%
    mutate(
      mean_amp = mean(amplitude, na.rm = TRUE),
      sd_amp   = sd(amplitude,   na.rm = TRUE),
      z_score  = (amplitude - mean_amp) / sd_amp,
      outlier  = abs(z_score) > threshold
    ) %>%
    ungroup()

  outlier_ids <- flagged %>%
    filter(outlier) %>%
    distinct(participant_id)
  message("Grouping: ", paste(group_vars, collapse = " × "))
  message("Participants with at least one outlier trial: ",
          paste(outlier_ids$participant_id, collapse = ", "))
  message("Number of trials flagged as outliers: ",
          sum(flagged$outlier, na.rm = TRUE))

  flagged %>%
    mutate(amplitude = if_else(outlier, NA_real_, amplitude)) %>%
    select(-mean_amp, -sd_amp, -z_score, -outlier)
}

4 Data loading

4.1 Late time window (300–700 ms)

d_late_raw   <- parse_eeg_file(path_ss_late)
d_late       <- prepare_eeg_data(d_late_raw)
d_late_clean <- remove_outliers_zscore(d_late)

4.2 Early time window (150–300 ms)

d_early_raw   <- parse_eeg_file(path_ss_early)
d_early       <- prepare_eeg_data(d_early_raw)
d_early_clean <- remove_outliers_zscore(d_early)

4.3 Structure check

str(d_late_clean)

## tibble [312 × 7] (S3: tbl_df/tbl/data.frame)
##  $ participant   : chr [1:312] "SS_S48_S12" "SS_S48_S12" "SS_S48_S12" "S42_SS_20250415_ref.mff" ...
##  $ condition     : Factor w/ 4 levels "BoubaMatch","BoubaMismatch",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ area          : Factor w/ 3 levels "frontal","central",..: 1 2 3 1 2 3 1 2 3 1 ...
##  $ amplitude     : num [1:312] 9.549 0.887 -1.748 1.282 -7.56 ...
##  $ participant_id: Named chr [1:312] "S1_S12" "S1_S12" "S1_S12" "S2_S12" ...
##   ..- attr(*, "names")= chr [1:312] "SS_S48_S12" "SS_S48_S12" "SS_S48_S12" "S42_SS_20250415_ref.mff" ...
##  $ word_type     : Factor w/ 2 levels "Bouba","Kiki": 1 1 1 1 1 1 1 1 1 1 ...
##  $ congruence    : Factor w/ 2 levels "Match","Mismatch": 1 1 1 1 1 1 1 1 1 1 ...

length(unique(d_late_clean$participant_id))   # number of participants

## [1] 26

5 Descriptive statistics

5.1 Sample composition

table(d_late_clean$condition)

## 
##    BoubaMatch BoubaMismatch     KikiMatch  KikiMismatch 
##            78            78            78            78

table(d_late_clean$word_type)

## 
## Bouba  Kiki 
##   156   156

table(d_late_clean$congruence)

## 
##    Match Mismatch 
##      156      156

table(d_late_clean$area)

## 
##  frontal  central parietal 
##      104      104      104

table(d_early_clean$condition)

## 
##    BoubaMatch BoubaMismatch     KikiMatch  KikiMismatch 
##            78            78            78            78

table(d_early_clean$word_type)

## 
## Bouba  Kiki 
##   156   156

table(d_early_clean$congruence)

## 
##    Match Mismatch 
##      156      156

table(d_early_clean$area)

## 
##  frontal  central parietal 
##      104      104      104

5.2 Amplitude — mean and SD per condition (late window)

d_late_clean %>%
  group_by(word_type, congruence) %>%
  summarise(M  = mean(amplitude, na.rm = TRUE),
            SD = sd(amplitude,   na.rm = TRUE),
            n  = sum(!is.na(amplitude)),
            .groups = "drop")

## # A tibble: 4 × 5
##   word_type congruence      M    SD     n
##   <fct>     <fct>       <dbl> <dbl> <int>
## 1 Bouba     Match      -2.28   6.50    74
## 2 Bouba     Mismatch    2.35   4.55    78
## 3 Kiki      Match       0.159  6.09    78
## 4 Kiki      Mismatch   -2.60   7.50    78

5.3 Amplitude — mean and SD per condition (early window)

d_early_clean %>%
  group_by(word_type, congruence) %>%
  summarise(M  = mean(amplitude, na.rm = TRUE),
            SD = sd(amplitude,   na.rm = TRUE),
            n  = sum(!is.na(amplitude)),
            .groups = "drop")

## # A tibble: 4 × 5
##   word_type congruence      M    SD     n
##   <fct>     <fct>       <dbl> <dbl> <int>
## 1 Bouba     Match      -0.687  5.16    76
## 2 Bouba     Mismatch    2.55   5.80    78
## 3 Kiki      Match       1.25   5.53    76
## 4 Kiki      Mismatch   -1.48   5.96    78

6 Late window analyses (300–700 ms)

6.1 Full 3-way LMM

Random-intercept LMM with all three factors (word_type, congruence, area) and their interactions. Amplitude is z-scored so that fixed-effect coefficients are standardised.

m_full_late <- lmer(scale(amplitude) ~ word_type * congruence * area +
                                       (1 | participant_id),
                    data = d_late_clean)
summary(m_full_late)

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: 
## scale(amplitude) ~ word_type * congruence * area + (1 | participant_id)
##    Data: d_late_clean
## 
## REML criterion at convergence: 816.2
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.54231 -0.59125  0.07641  0.56824  2.89477 
## 
## Random effects:
##  Groups         Name        Variance Std.Dev.
##  participant_id (Intercept) 0.2044   0.4521  
##  Residual                   0.7141   0.8451  
## Number of obs: 308, groups:  participant_id, 26
## 
## Fixed effects:
##                                               Estimate Std. Error       df
## (Intercept)                                    -0.1266     0.1944 203.1168
## word_typeKiki                                   0.2533     0.2396 271.1807
## congruenceMismatch                              0.8041     0.2396 271.1807
## areacentral                                    -0.3046     0.2440 270.6383
## areaparietal                                   -0.1976     0.2396 271.1807
## word_typeKiki:congruenceMismatch               -1.3848     0.3352 270.9160
## word_typeKiki:areacentral                       0.2471     0.3383 270.6383
## word_typeKiki:areaparietal                      0.2099     0.3352 270.9160
## congruenceMismatch:areacentral                  0.1291     0.3383 270.6383
## congruenceMismatch:areaparietal                -0.3168     0.3352 270.9160
## word_typeKiki:congruenceMismatch:areacentral   -0.0289     0.4736 270.6383
## word_typeKiki:congruenceMismatch:areaparietal   0.6907     0.4714 270.7788
##                                               t value Pr(>|t|)    
## (Intercept)                                    -0.651 0.515564    
## word_typeKiki                                   1.058 0.291218    
## congruenceMismatch                              3.357 0.000902 ***
## areacentral                                    -1.249 0.212905    
## areaparietal                                   -0.825 0.410173    
## word_typeKiki:congruenceMismatch               -4.132  4.8e-05 ***
## word_typeKiki:areacentral                       0.730 0.465742    
## word_typeKiki:areaparietal                      0.626 0.531661    
## congruenceMismatch:areacentral                  0.382 0.703102    
## congruenceMismatch:areaparietal                -0.945 0.345439    
## word_typeKiki:congruenceMismatch:areacentral   -0.061 0.951390    
## word_typeKiki:congruenceMismatch:areaparietal   1.465 0.144022    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##                           (Intr) wrd_tK cngrnM arcntr arprtl wr_K:M
## word_typeKk               -0.643                                   
## cngrncMsmtc               -0.643  0.521                            
## areacentral               -0.627  0.509  0.509                     
## areaparietl               -0.643  0.521  0.521  0.509              
## wrd_typKk:M                0.459 -0.715 -0.715 -0.364 -0.373       
## wrd_typKk:rc               0.452 -0.706 -0.367 -0.721 -0.367  0.505
## wrd_typKk:rp               0.459 -0.715 -0.373 -0.364 -0.715  0.511
## cngrncMsmtch:rc            0.452 -0.367 -0.706 -0.721 -0.367  0.505
## cngrncMsmtch:rp            0.459 -0.373 -0.715 -0.364 -0.715  0.511
## wrd_typKk:cngrncMsmtch:rc -0.323  0.504  0.504  0.515  0.262 -0.707
## wrd_typKk:cngrncMsmtch:rp -0.327  0.508  0.508  0.259  0.508 -0.711
##                           wrd_typKk:rc wrd_typKk:rp cngrncMsmtch:rc
## word_typeKk                                                        
## cngrncMsmtc                                                        
## areacentral                                                        
## areaparietl                                                        
## wrd_typKk:M                                                        
## wrd_typKk:rc                                                       
## wrd_typKk:rp               0.505                                   
## cngrncMsmtch:rc            0.520        0.262                      
## cngrncMsmtch:rp            0.262        0.511        0.505         
## wrd_typKk:cngrncMsmtch:rc -0.714       -0.360       -0.714         
## wrd_typKk:cngrncMsmtch:rp -0.359       -0.711       -0.359         
##                           cngrncMsmtch:rp wrd_typKk:cngrncMsmtch:rc
## word_typeKk                                                        
## cngrncMsmtc                                                        
## areacentral                                                        
## areaparietl                                                        
## wrd_typKk:M                                                        
## wrd_typKk:rc                                                       
## wrd_typKk:rp                                                       
## cngrncMsmtch:rc                                                    
## cngrncMsmtch:rp                                                    
## wrd_typKk:cngrncMsmtch:rc -0.360                                   
## wrd_typKk:cngrncMsmtch:rp -0.711           0.502

6.2 ANOVA (Type II) on the full LMM — late window

anova_late_full <- anova(m_full_late, type = 2)
anova_late_full

## Type II Analysis of Variance Table with Satterthwaite's method
##                            Sum Sq Mean Sq NumDF  DenDF F value    Pr(>F)    
## word_type                  2.5982  2.5982     1 270.99  3.6383   0.05752 .  
## congruence                 1.7254  1.7254     1 270.99  2.4161   0.12126    
## area                       0.7548  0.3774     2 270.73  0.5285   0.59010    
## word_type:congruence      25.8750 25.8750     1 271.01 36.2327 5.669e-09 ***
## word_type:area             4.0374  2.0187     2 270.73  2.8268   0.06095 .  
## congruence:area            0.1784  0.0892     2 270.73  0.1249   0.88261    
## word_type:congruence:area  2.1401  1.0700     2 270.73  1.4984   0.22534    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

eta_squared(anova_late_full)

## # Effect Size for ANOVA (Type II)
## 
## Parameter                 | Eta2 (partial) |       95% CI
## ---------------------------------------------------------
## word_type                 |           0.01 | [0.00, 1.00]
## congruence                |       8.84e-03 | [0.00, 1.00]
## area                      |       3.89e-03 | [0.00, 1.00]
## word_type:congruence      |           0.12 | [0.06, 1.00]
## word_type:area            |           0.02 | [0.00, 1.00]
## congruence:area           |       9.22e-04 | [0.00, 1.00]
## word_type:congruence:area |           0.01 | [0.00, 1.00]
## 
## - One-sided CIs: upper bound fixed at [1.00].

6.3 Residual diagnostics — full LMM, late window

par(mfrow = c(1, 2))
hist(residuals(m_full_late), breaks = 10,
     main = "Residuals — late window (full LMM)",
     xlab = "Residuals")
plot(residuals(m_full_late) ~ fitted(m_full_late),
     main = "Residuals vs fitted — late window (full LMM)",
     xlab = "Fitted", ylab = "Residuals")

par(mfrow = c(1, 1))

6.4 Reduced LMM: word_type × congruence — late window

Reference levels are reset so that the contrasts reflect the predicted direction of the bouba-kiki effect (Kiki and Mismatch as references). A separate d_late_relev data frame is used so that the original d_late_clean (with Match / Bouba as references) is left intact for later plots.

d_late_relev <- d_late_clean %>%
  mutate(
    congruence = relevel(congruence, ref = "Mismatch"),
    word_type  = relevel(word_type,  ref = "Kiki")
  )

m_int_late <- lmer(scale(amplitude) ~ word_type * congruence +
                                      (1 | participant_id),
                   data = d_late_relev)
summary(m_int_late)

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: scale(amplitude) ~ word_type * congruence + (1 | participant_id)
##    Data: d_late_relev
## 
## REML criterion at convergence: 816.6
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.70158 -0.61366  0.01864  0.58411  2.90391 
## 
## Random effects:
##  Groups         Name        Variance Std.Dev.
##  participant_id (Intercept) 0.2043   0.452   
##  Residual                   0.7190   0.848   
## Number of obs: 308, groups:  participant_id, 26
## 
## Fixed effects:
##                                Estimate Std. Error       df t value Pr(>|t|)
## (Intercept)                     -0.3110     0.1307  66.5594  -2.380  0.02019
## word_typeBouba                   0.7586     0.1358 278.6458   5.587 5.51e-08
## congruenceMatch                  0.4227     0.1358 278.6458   3.113  0.00205
## word_typeBouba:congruenceMatch  -1.1651     0.1935 279.0077  -6.021 5.44e-09
##                                   
## (Intercept)                    *  
## word_typeBouba                 ***
## congruenceMatch                ** 
## word_typeBouba:congruenceMatch ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) wrd_tB cngrnM
## word_typeBb -0.520              
## congrncMtch -0.520  0.500       
## wrd_typBb:M  0.365 -0.702 -0.702

6.5 ANOVA (Type II) on the reduced LMM — late window

anova_late_int <- anova(m_int_late, type = 2)
anova_late_int

## Type II Analysis of Variance Table with Satterthwaite's method
##                       Sum Sq Mean Sq NumDF  DenDF F value    Pr(>F)    
## word_type             2.5947  2.5947     1 279.00  3.6086   0.05851 .  
## congruence            1.7781  1.7781     1 279.00  2.4729   0.11696    
## word_type:congruence 26.0682 26.0682     1 279.01 36.2543 5.442e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

eta_squared(anova_late_int)

## # Effect Size for ANOVA (Type II)
## 
## Parameter            | Eta2 (partial) |       95% CI
## ----------------------------------------------------
## word_type            |           0.01 | [0.00, 1.00]
## congruence           |       8.79e-03 | [0.00, 1.00]
## word_type:congruence |           0.11 | [0.06, 1.00]
## 
## - One-sided CIs: upper bound fixed at [1.00].

6.6 Pairwise contrasts (Mismatch − Match by word_type) — late window

emm_late <- emmeans(m_int_late, ~ congruence | word_type)
contrast(emm_late, method = "pairwise", adjust = "none") %>%
  confint(level = 0.95)

## word_type = Kiki:
##  contrast         estimate    SE  df lower.CL upper.CL
##  Mismatch - Match   -0.423 0.136 279   -0.690   -0.155
## 
## word_type = Bouba:
##  contrast         estimate    SE  df lower.CL upper.CL
##  Mismatch - Match    0.742 0.138 280    0.471    1.014
## 
## Note: contrasts are still on the scale(-0.571, 6.54) scale. Consider using
##       regrid() if you want contrasts of back-transformed estimates. 
## Degrees-of-freedom method: kenward-roger 
## Confidence level used: 0.95

6.7 Equivalence test (TOST, ROPE = [-0.2, 0.2]) — late window

equivalence_test(m_int_late, range = c(-0.2, 0.2))

## # TOST-test for Practical Equivalence
## 
##   ROPE: [-0.20 0.20]
## 
## Parameter                              |         90% CI |   SGPV | Equivalence |      p
## ---------------------------------------------------------------------------------------
## (Intercept)                            | [-0.53, -0.10] | 0.157  |    Rejected | 0.802 
## word type [Bouba]                      | [ 0.53,  0.98] | < .001 |    Rejected | > .999
## congruence [Match]                     | [ 0.20,  0.65] | 0.026  |    Rejected | 0.949 
## word type [Bouba] × congruence [Match] | [-1.48, -0.85] | < .001 |    Rejected | > .999

7 Early window analyses (150–300 ms)

7.1 Full 3-way LMM

m_full_early <- lmer(scale(amplitude) ~ word_type * congruence * area +
                                        (1 | participant_id),
                     data = d_early_clean)
summary(m_full_early)

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: 
## scale(amplitude) ~ word_type * congruence * area + (1 | participant_id)
##    Data: d_early_clean
## 
## REML criterion at convergence: 781.4
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.1650 -0.6263  0.0513  0.5585  2.8725 
## 
## Random effects:
##  Groups         Name        Variance Std.Dev.
##  participant_id (Intercept) 0.2047   0.4525  
##  Residual                   0.6295   0.7934  
## Number of obs: 308, groups:  participant_id, 26
## 
## Fixed effects:
##                                                Estimate Std. Error        df
## (Intercept)                                     0.15795    0.18201 183.67383
## word_typeKiki                                   0.53488    0.22241 270.86864
## congruenceMismatch                              0.66513    0.22241 270.86864
## areacentral                                    -0.41045    0.22441 270.62517
## areaparietal                                   -0.65583    0.22241 270.86864
## word_typeKiki:congruenceMismatch               -1.48461    0.31287 270.74831
## word_typeKiki:areacentral                      -0.07916    0.31595 270.72942
## word_typeKiki:areaparietal                     -0.47127    0.31452 270.84104
## congruenceMismatch:areacentral                  0.07694    0.31430 270.62516
## congruenceMismatch:areaparietal                -0.37558    0.31287 270.74831
## word_typeKiki:congruenceMismatch:areacentral    0.22426    0.44347 270.67810
## word_typeKiki:congruenceMismatch:areaparietal   1.09485    0.44246 270.73433
##                                               t value Pr(>|t|)    
## (Intercept)                                     0.868  0.38663    
## word_typeKiki                                   2.405  0.01685 *  
## congruenceMismatch                              2.991  0.00304 ** 
## areacentral                                    -1.829  0.06850 .  
## areaparietal                                   -2.949  0.00347 ** 
## word_typeKiki:congruenceMismatch               -4.745 3.38e-06 ***
## word_typeKiki:areacentral                      -0.251  0.80234    
## word_typeKiki:areaparietal                     -1.498  0.13519    
## congruenceMismatch:areacentral                  0.245  0.80681    
## congruenceMismatch:areaparietal                -1.200  0.23103    
## word_typeKiki:congruenceMismatch:areacentral    0.506  0.61348    
## word_typeKiki:congruenceMismatch:areaparietal   2.474  0.01396 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##                           (Intr) wrd_tK cngrnM arcntr arprtl wr_K:M
## word_typeKk               -0.624                                   
## cngrncMsmtc               -0.624  0.511                            
## areacentral               -0.616  0.504  0.504                     
## areaparietl               -0.624  0.511  0.511  0.504              
## wrd_typKk:M                0.443 -0.711 -0.711 -0.359 -0.363       
## wrd_typKk:rc               0.438 -0.703 -0.358 -0.710 -0.358  0.500
## wrd_typKk:rp               0.441 -0.707 -0.361 -0.357 -0.707  0.503
## cngrncMsmtch:rc            0.440 -0.360 -0.707 -0.714 -0.360  0.502
## cngrncMsmtch:rp            0.443 -0.363 -0.711 -0.359 -0.711  0.505
## wrd_typKk:cngrncMsmtch:rc -0.312  0.501  0.501  0.506  0.255 -0.705
## wrd_typKk:cngrncMsmtch:rp -0.314  0.503  0.503  0.254  0.503 -0.707
##                           wrd_typKk:rc wrd_typKk:rp cngrncMsmtch:rc
## word_typeKk                                                        
## cngrncMsmtc                                                        
## areacentral                                                        
## areaparietl                                                        
## wrd_typKk:M                                                        
## wrd_typKk:rc                                                       
## wrd_typKk:rp               0.497                                   
## cngrncMsmtch:rc            0.507        0.255                      
## cngrncMsmtch:rp            0.255        0.503        0.502         
## wrd_typKk:cngrncMsmtch:rc -0.712       -0.354       -0.709         
## wrd_typKk:cngrncMsmtch:rp -0.353       -0.711       -0.355         
##                           cngrncMsmtch:rp wrd_typKk:cngrncMsmtch:rc
## word_typeKk                                                        
## cngrncMsmtc                                                        
## areacentral                                                        
## areaparietl                                                        
## wrd_typKk:M                                                        
## wrd_typKk:rc                                                       
## wrd_typKk:rp                                                       
## cngrncMsmtch:rc                                                    
## cngrncMsmtch:rp                                                    
## wrd_typKk:cngrncMsmtch:rc -0.356                                   
## wrd_typKk:cngrncMsmtch:rp -0.707           0.498

7.2 ANOVA (Type II) on the full LMM — early window

anova_early_full <- anova(m_full_early, type = 2)
anova_early_full

## Type II Analysis of Variance Table with Satterthwaite's method
##                           Sum Sq Mean Sq NumDF  DenDF F value    Pr(>F)    
## word_type                  2.442  2.4421     1 270.86  3.8793   0.04990 *  
## congruence                 0.130  0.1298     1 270.84  0.2061   0.65017    
## area                      33.426 16.7129     2 270.74 26.5493 2.961e-11 ***
## word_type:congruence      20.950 20.9503     1 270.87 33.2805 2.175e-08 ***
## word_type:area             0.087  0.0433     2 270.73  0.0688   0.93354    
## congruence:area            0.566  0.2832     2 270.74  0.4498   0.63823    
## word_type:congruence:area  4.298  2.1491     2 270.74  3.4139   0.03434 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

eta_squared(anova_early_full)

## # Effect Size for ANOVA (Type II)
## 
## Parameter                 | Eta2 (partial) |       95% CI
## ---------------------------------------------------------
## word_type                 |           0.01 | [0.00, 1.00]
## congruence                |       7.61e-04 | [0.00, 1.00]
## area                      |           0.16 | [0.10, 1.00]
## word_type:congruence      |           0.11 | [0.06, 1.00]
## word_type:area            |       5.08e-04 | [0.00, 1.00]
## congruence:area           |       3.31e-03 | [0.00, 1.00]
## word_type:congruence:area |           0.02 | [0.00, 1.00]
## 
## - One-sided CIs: upper bound fixed at [1.00].

7.3 Residual diagnostics — full LMM, early window

par(mfrow = c(1, 2))
hist(residuals(m_full_early), breaks = 10,
     main = "Residuals — early window (full LMM)",
     xlab = "Residuals")
plot(residuals(m_full_early) ~ fitted(m_full_early),
     main = "Residuals vs fitted — early window (full LMM)",
     xlab = "Fitted", ylab = "Residuals")

par(mfrow = c(1, 1))

7.4 Reduced LMM: word_type × congruence — early window

d_early_relev <- d_early_clean %>%
  mutate(
    congruence = relevel(congruence, ref = "Mismatch"),
    word_type  = relevel(word_type,  ref = "Kiki")
  )

m_int_early <- lmer(scale(amplitude) ~ word_type * congruence +
                                       (1 | participant_id),
                    data = d_early_relev)
summary(m_int_early)

## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: scale(amplitude) ~ word_type * congruence + (1 | participant_id)
##    Data: d_early_relev
## 
## REML criterion at convergence: 827.4
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.1619 -0.6591  0.0126  0.5953  3.2610 
## 
## Random effects:
##  Groups         Name        Variance Std.Dev.
##  participant_id (Intercept) 0.1947   0.4412  
##  Residual                   0.7490   0.8655  
## Number of obs: 308, groups:  participant_id, 26
## 
## Fixed effects:
##                                Estimate Std. Error       df t value Pr(>|t|)
## (Intercept)                     -0.3254     0.1307  69.8197  -2.489 0.015192
## word_typeBouba                   0.6935     0.1386 278.5599   5.004 9.95e-07
## congruenceMatch                  0.4860     0.1395 278.7199   3.483 0.000576
## word_typeBouba:congruenceMatch  -1.0555     0.1974 278.8489  -5.347 1.87e-07
##                                   
## (Intercept)                    *  
## word_typeBouba                 ***
## congruenceMatch                ***
## word_typeBouba:congruenceMatch ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) wrd_tB cngrnM
## word_typeBb -0.530              
## congrncMtch -0.526  0.497       
## wrd_typBb:M  0.372 -0.702 -0.707

7.5 ANOVA (Type II) on the reduced LMM — early window

anova_early_int <- anova(m_int_early, type = 2)
anova_early_int

## Type II Analysis of Variance Table with Satterthwaite's method
##                       Sum Sq Mean Sq NumDF  DenDF F value    Pr(>F)    
## word_type             2.2913  2.2913     1 278.84  3.0591   0.08138 .  
## congruence            0.1336  0.1336     1 278.81  0.1784   0.67309    
## word_type:congruence 21.4105 21.4105     1 278.85 28.5852 1.868e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

eta_squared(anova_early_int)

## # Effect Size for ANOVA (Type II)
## 
## Parameter            | Eta2 (partial) |       95% CI
## ----------------------------------------------------
## word_type            |           0.01 | [0.00, 1.00]
## congruence           |       6.39e-04 | [0.00, 1.00]
## word_type:congruence |           0.09 | [0.05, 1.00]
## 
## - One-sided CIs: upper bound fixed at [1.00].

7.6 Pairwise contrasts (Mismatch − Match by word_type) — early window

emm_early <- emmeans(m_int_early, ~ congruence | word_type)
contrast(emm_early, method = "pairwise", adjust = "none") %>%
  confint(level = 0.95)

## word_type = Kiki:
##  contrast         estimate   SE  df lower.CL upper.CL
##  Mismatch - Match   -0.486 0.14 279   -0.761   -0.211
## 
## word_type = Bouba:
##  contrast         estimate   SE  df lower.CL upper.CL
##  Mismatch - Match    0.569 0.14 279    0.295    0.844
## 
## Note: contrasts are still on the scale(0.411, 5.82) scale. Consider using
##       regrid() if you want contrasts of back-transformed estimates. 
## Degrees-of-freedom method: kenward-roger 
## Confidence level used: 0.95

7.7 Equivalence test (TOST, ROPE = [-0.2, 0.2]) — early window

equivalence_test(m_int_early, range = c(-0.2, 0.2))

## # TOST-test for Practical Equivalence
## 
##   ROPE: [-0.20 0.20]
## 
## Parameter                              |         90% CI |   SGPV | Equivalence |      p
## ---------------------------------------------------------------------------------------
## (Intercept)                            | [-0.54, -0.11] | 0.127  |    Rejected | 0.831 
## word type [Bouba]                      | [ 0.46,  0.92] | < .001 |    Rejected | > .999
## congruence [Match]                     | [ 0.26,  0.72] | 0.007  |    Rejected | 0.979 
## word type [Bouba] × congruence [Match] | [-1.38, -0.73] | < .001 |    Rejected | > .999

7.8 Area-stratified post-hocs — early window

Because the three-way Word × Trial × Area interaction is (marginally) significant in the early window, we follow up with per-area pairwise contrasts to identify which scalp regions drive the effect.

emm_area_early <- emmeans(m_full_early, ~ congruence | word_type * area)
contrast(emm_area_early, method = "pairwise", adjust = "none") %>%
  confint(level = 0.95)

## word_type = Bouba, area = frontal:
##  contrast         estimate    SE  df lower.CL upper.CL
##  Match - Mismatch   -0.665 0.222 271  -1.1030   -0.227
## 
## word_type = Kiki, area = frontal:
##  contrast         estimate    SE  df lower.CL upper.CL
##  Match - Mismatch    0.819 0.220 271   0.3863    1.253
## 
## word_type = Bouba, area = central:
##  contrast         estimate    SE  df lower.CL upper.CL
##  Match - Mismatch   -0.742 0.222 271  -1.1799   -0.304
## 
## word_type = Kiki, area = central:
##  contrast         estimate    SE  df lower.CL upper.CL
##  Match - Mismatch    0.518 0.222 271   0.0804    0.956
## 
## word_type = Bouba, area = parietal:
##  contrast         estimate    SE  df lower.CL upper.CL
##  Match - Mismatch   -0.290 0.220 271  -0.7228    0.144
## 
## word_type = Kiki, area = parietal:
##  contrast         estimate    SE  df lower.CL upper.CL
##  Match - Mismatch    0.100 0.222 271  -0.3376    0.538
## 
## Note: contrasts are still on the scale(0.411, 5.82) scale. Consider using
##       regrid() if you want contrasts of back-transformed estimates. 
## Degrees-of-freedom method: kenward-roger 
## Confidence level used: 0.95

8 Vocabulary analyses

8.1 Loading and joining vocabulary data

Three subscales of the parental CDI are used : word comprehension (receptive_vocab_T1), word production (productive_vocab_T1) and onomatopoeic word comprehension (symbolic_vocab_T1).

vocab_raw <- read_excel(path_vocab)
message("Columns found in the vocabulary file: ",
        paste(names(vocab_raw), collapse = ", "))

desired_cols <- c("participant_id",
                  "receptive_vocab_T1",
                  "productive_vocab_T1",
                  "symbolic_vocab_T1",
                  "Age",
                  "Gender")
available_cols <- intersect(desired_cols, names(vocab_raw))

vocab_T1 <- vocab_raw %>%
  select(all_of(available_cols)) %>%
  mutate(across(any_of(c("receptive_vocab_T1",
                         "productive_vocab_T1",
                         "symbolic_vocab_T1")),
                ~ as.numeric(.)))

vocab_outcomes <- intersect(
  c("receptive_vocab_T1", "productive_vocab_T1", "symbolic_vocab_T1"),
  names(vocab_T1)
)
vocab_outcomes

## [1] "receptive_vocab_T1"  "productive_vocab_T1" "symbolic_vocab_T1"

8.2 Building the per-participant amplitude differences

For each participant and each word type, we compute the average incongruent − congruent (Mismatch − Match) amplitude across the three scalp regions and word type, then join the vocabulary scores.

build_amp_diff <- function(data) {
  data %>%
    group_by(participant_id, word_type, congruence) %>%
    summarise(mean_amp = mean(amplitude, na.rm = TRUE),
              .groups = "drop") %>%
    pivot_wider(names_from  = c(word_type, congruence),
                values_from = mean_amp,
                names_sep   = "_") %>%
    mutate(
      bouba_diff = Bouba_Mismatch - Bouba_Match,
      kiki_diff  = Kiki_Mismatch  - Kiki_Match
    ) %>%
    select(participant_id, bouba_diff, kiki_diff)
}

d_amp_late  <- build_amp_diff(d_late_clean)  %>% left_join(vocab_T1, by = "participant_id")
d_amp_early <- build_amp_diff(d_early_clean) %>% left_join(vocab_T1, by = "participant_id")

8.3 Sanity check: participants with no vocabulary match

d_amp_late %>% anti_join(vocab_T1, by = "participant_id")

## # A tibble: 0 × 8
## # ℹ 8 variables: participant_id <chr>, bouba_diff <dbl>, kiki_diff <dbl>,
## #   receptive_vocab_T1 <dbl>, productive_vocab_T1 <dbl>,
## #   symbolic_vocab_T1 <dbl>, Age <dbl>, Gender <chr>

8.4 Descriptive statistics — vocabulary

vocab_T1 %>%
  summarise(across(where(is.numeric),
                   list(M  = ~ mean(., na.rm = TRUE),
                        SD = ~ sd(.,   na.rm = TRUE),
                        n  = ~ sum(!is.na(.))),
                   .names = "{.col}_{.fn}"))

## # A tibble: 1 × 12
##   receptive_vocab_T1_M receptive_vocab_T1_SD receptive_vocab_T1_n
##                  <dbl>                 <dbl>                <int>
## 1                 39.1                  46.1                   24
## # ℹ 9 more variables: productive_vocab_T1_M <dbl>,
## #   productive_vocab_T1_SD <dbl>, productive_vocab_T1_n <int>,
## #   symbolic_vocab_T1_M <dbl>, symbolic_vocab_T1_SD <dbl>,
## #   symbolic_vocab_T1_n <int>, Age_M <dbl>, Age_SD <dbl>, Age_n <int>

# Age summary in months (assuming Age is in days)
if ("Age" %in% names(vocab_T1)) {
  vocab_T1 %>%
    summarise(
      mean_month   = mean(Age,   na.rm = TRUE) / 30.44,
      median_month = median(Age, na.rm = TRUE) / 30.44,
      min_month    = min(Age,    na.rm = TRUE) / 30.44,
      max_month    = max(Age,    na.rm = TRUE) / 30.44
    )
}

## # A tibble: 1 × 4
##   mean_month median_month min_month max_month
##        <dbl>        <dbl>     <dbl>     <dbl>
## 1       12.1         12.0      11.5      12.7

8.5 Linear models — vocabulary predicted by amplitude difference

For each (word_type effect × vocabulary subscale × time window) we fit a simple linear regression on z-scored variables: scale(vocab) ~ scale(amplitude_diff).

fit_vocab_lm <- function(data, vocab_var, amp_var) {
  # Variables are z-scored inside the formula, so the resulting
  # coefficients in summary() are already standardised betas.
  formula <- as.formula(
    paste0("scale(", vocab_var, ") ~ scale(", amp_var, ")")
  )
  m <- lm(formula, data = data)
  cat("Model:", vocab_var, "~", amp_var, "\n")
  print(summary(m))
  invisible(m)
}

8.5.1 Late window — bouba effect

for (v in vocab_outcomes) fit_vocab_lm(d_amp_late, v, "bouba_diff")

## Model: receptive_vocab_T1 ~ bouba_diff 
## 
## Call:
## lm(formula = formula, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.8361 -0.5141 -0.3227  0.1130  3.3377 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)
## (Intercept)        0.0003929  0.2088210   0.002    0.999
## scale(bouba_diff) -0.0139051  0.2541153  -0.055    0.957
## 
## Residual standard error: 1.022 on 22 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.0001361,  Adjusted R-squared:  -0.04531 
## F-statistic: 0.002994 on 1 and 22 DF,  p-value: 0.9569
## 
## Model: productive_vocab_T1 ~ bouba_diff 
## 
## Call:
## lm(formula = formula, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.2064 -0.5989 -0.3836  0.5991  1.9297 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)
## (Intercept)       -0.01063    0.19816  -0.054    0.958
## scale(bouba_diff)  0.37625    0.24114   1.560    0.133
## 
## Residual standard error: 0.9702 on 22 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.09963,    Adjusted R-squared:  0.05871 
## F-statistic: 2.435 on 1 and 22 DF,  p-value: 0.133
## 
## Model: symbolic_vocab_T1 ~ bouba_diff 
## 
## Call:
## lm(formula = formula, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.1920 -0.5339 -0.3270  0.1966  2.8790 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)  
## (Intercept)        -0.0127     0.1934  -0.066   0.9483  
## scale(bouba_diff)   0.4494     0.2354   1.909   0.0694 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.947 on 22 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.1421, Adjusted R-squared:  0.1031 
## F-statistic: 3.644 on 1 and 22 DF,  p-value: 0.06939

8.5.2 Late window — kiki effect

for (v in vocab_outcomes) fit_vocab_lm(d_amp_late, v, "kiki_diff")

## Model: receptive_vocab_T1 ~ kiki_diff 
## 
## Call:
## lm(formula = formula, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.8265 -0.5083 -0.3870  0.1065  3.2532 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)
## (Intercept)      -0.01091    0.21090  -0.052    0.959
## scale(kiki_diff)  0.07831    0.24078   0.325    0.748
## 
## Residual standard error: 1.02 on 22 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.004786,   Adjusted R-squared:  -0.04045 
## F-statistic: 0.1058 on 1 and 22 DF,  p-value: 0.7481
## 
## Model: productive_vocab_T1 ~ kiki_diff 
## 
## Call:
## lm(formula = formula, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.9394 -0.6965 -0.4870  0.4214  2.5339 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)
## (Intercept)       0.01883    0.20989   0.090    0.929
## scale(kiki_diff) -0.13522    0.23963  -0.564    0.578
## 
## Residual standard error: 1.015 on 22 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.01427,    Adjusted R-squared:  -0.03054 
## F-statistic: 0.3184 on 1 and 22 DF,  p-value: 0.5783
## 
## Model: symbolic_vocab_T1 ~ kiki_diff 
## 
## Call:
## lm(formula = formula, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.9020 -0.5692 -0.3406  0.1373  2.7563 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)
## (Intercept)      -0.02192    0.20935  -0.105    0.918
## scale(kiki_diff)  0.15737    0.23902   0.658    0.517
## 
## Residual standard error: 1.013 on 22 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.01932,    Adjusted R-squared:  -0.02525 
## F-statistic: 0.4335 on 1 and 22 DF,  p-value: 0.5171

8.5.3 Early window — bouba effect

for (v in vocab_outcomes) fit_vocab_lm(d_amp_early, v, "bouba_diff")

## Model: receptive_vocab_T1 ~ bouba_diff 
## 
## Call:
## lm(formula = formula, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.9827 -0.6012 -0.2597  0.1003  3.3321 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)
## (Intercept)       -0.00581    0.20795  -0.028    0.978
## scale(bouba_diff) -0.11577    0.23838  -0.486    0.632
## 
## Residual standard error: 1.017 on 22 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.01061,    Adjusted R-squared:  -0.03437 
## F-statistic: 0.2359 on 1 and 22 DF,  p-value: 0.632
## 
## Model: productive_vocab_T1 ~ bouba_diff 
## 
## Call:
## lm(formula = formula, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -0.9255 -0.6140 -0.2976  0.2102  2.4130 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)  
## (Intercept)         0.0223     0.1920   0.116   0.9086  
## scale(bouba_diff)   0.4444     0.2201   2.019   0.0559 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9392 on 22 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.1563, Adjusted R-squared:  0.1179 
## F-statistic: 4.075 on 1 and 22 DF,  p-value: 0.05586
## 
## Model: symbolic_vocab_T1 ~ bouba_diff 
## 
## Call:
## lm(formula = formula, data = data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.70243 -0.62902 -0.42485  0.09248  2.92679 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)
## (Intercept)       0.004691   0.208334   0.023    0.982
## scale(bouba_diff) 0.093469   0.238823   0.391    0.699
## 
## Residual standard error: 1.019 on 22 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.006914,   Adjusted R-squared:  -0.03823 
## F-statistic: 0.1532 on 1 and 22 DF,  p-value: 0.6993

8.5.4 Early window — kiki effect

for (v in vocab_outcomes) fit_vocab_lm(d_amp_early, v, "kiki_diff")

## Model: receptive_vocab_T1 ~ kiki_diff 
## 
## Call:
## lm(formula = formula, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.1677 -0.5326 -0.2368  0.1602  3.3522 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)
## (Intercept)       0.00729    0.20697   0.035    0.972
## scale(kiki_diff) -0.13680    0.20723  -0.660    0.516
## 
## Residual standard error: 1.012 on 22 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.01942,    Adjusted R-squared:  -0.02515 
## F-statistic: 0.4358 on 1 and 22 DF,  p-value: 0.516
## 
## Model: productive_vocab_T1 ~ kiki_diff 
## 
## Call:
## lm(formula = formula, data = data)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.2767 -0.5757 -0.3543  0.3225  2.5063 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)
## (Intercept)       0.01207    0.20337   0.059    0.953
## scale(kiki_diff) -0.22639    0.20363  -1.112    0.278
## 
## Residual standard error: 0.9949 on 22 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.0532, Adjusted R-squared:  0.01016 
## F-statistic: 1.236 on 1 and 22 DF,  p-value: 0.2782
## 
## Model: symbolic_vocab_T1 ~ kiki_diff 
## 
## Call:
## lm(formula = formula, data = data)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.77109 -0.62971 -0.35582  0.05431  2.92863 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)
## (Intercept)       0.002434   0.208783   0.012    0.991
## scale(kiki_diff) -0.045662   0.209046  -0.218    0.829
## 
## Residual standard error: 1.021 on 22 degrees of freedom
##   (2 observations deleted due to missingness)
## Multiple R-squared:  0.002164,   Adjusted R-squared:  -0.04319 
## F-statistic: 0.04771 on 1 and 22 DF,  p-value: 0.8291

8.6 Pearson correlations (complementary summary)

cor_vocab <- function(data, vocab_var, amp_var) {
  ct <- cor.test(data[[vocab_var]], data[[amp_var]],
                 method = "pearson", use = "complete.obs")
  data.frame(
    vocab = vocab_var,
    amp   = amp_var,
    r     = unname(ct$estimate),
    p     = ct$p.value,
    n     = sum(complete.cases(data[, c(vocab_var, amp_var)]))
  )
}

cor_results <- bind_rows(
  lapply(vocab_outcomes, function(v) {
    bind_rows(
      cor_vocab(d_amp_late,  v, "bouba_diff") %>% mutate(window = "late"),
      cor_vocab(d_amp_late,  v, "kiki_diff")  %>% mutate(window = "late"),
      cor_vocab(d_amp_early, v, "bouba_diff") %>% mutate(window = "early"),
      cor_vocab(d_amp_early, v, "kiki_diff")  %>% mutate(window = "early")
    )
  })
)
cor_results

##                  vocab        amp           r          p  n window
## 1   receptive_vocab_T1 bouba_diff -0.01166546 0.95685575 24   late
## 2   receptive_vocab_T1  kiki_diff  0.06917767 0.74806395 24   late
## 3   receptive_vocab_T1 bouba_diff -0.10299092 0.63201508 24  early
## 4   receptive_vocab_T1  kiki_diff -0.13936381 0.51603168 24  early
## 5  productive_vocab_T1 bouba_diff  0.31565005 0.13295996 24   late
## 6  productive_vocab_T1  kiki_diff -0.11944201 0.57827697 24   late
## 7  productive_vocab_T1 bouba_diff  0.39534087 0.05586260 24  early
## 8  productive_vocab_T1  kiki_diff -0.23064284 0.27823256 24  early
## 9    symbolic_vocab_T1 bouba_diff  0.37697912 0.06938726 24   late
## 10   symbolic_vocab_T1  kiki_diff  0.13900822 0.51711363 24   late
## 11   symbolic_vocab_T1 bouba_diff  0.08315239 0.69928522 24  early
## 12   symbolic_vocab_T1  kiki_diff -0.04651945 0.82910605 24  early

9 Final figures

The figures below are similar to the ones reported in the thesis.

9.1 Figure 1 — boxplots faceted by area × word_type (late window)

label_word <- c(Bouba = "Bouba-type trials",
                Kiki  = "Kiki-type trials")
label_area <- c(frontal  = "Frontal area",
                central  = "Central area",
                parietal = "Parietal area")

plot_area_by_wordtype <- function(data, plot_title) {
  ggplot(data, aes(x = congruence, y = amplitude)) +
    geom_hline(yintercept = 0, linetype = "dashed", color = "grey50") +
    geom_boxplot(aes(fill = congruence),
                 width = 0.3, outlier.shape = NA, alpha = 0.3,
                 position = position_dodge(width = 0.4)) +
    geom_line(aes(group = participant_id), colour = "grey80", size = 0.3) +
    geom_point(aes(color = congruence),
               position = position_jitterdodge(jitter.width = 0.1,
                                               dodge.width  = 0.4),
               size = 1.5, alpha = 0.6) +
    stat_summary(fun = mean, geom = "point",
                 shape = 18, size = 3.5, color = "black",
                 position = position_dodge(width = 0.4)) +
    scale_x_discrete(labels = c(Match = "Congruent", Mismatch = "Incongruent")) +
    scale_color_manual(values = c(Match = "darkgreen", Mismatch = "firebrick")) +
    scale_fill_manual( values = c(Match = "darkgreen", Mismatch = "firebrick")) +
    facet_grid(area ~ word_type,
               labeller = labeller(word_type = label_word,
                                   area      = label_area),
               scales = "free_y") +
    theme_minimal(base_size = 14) +
    labs(x = "Congruence", y = "Mean amplitude (µV)",
         title = plot_title) +
    theme(
      legend.position    = "none",
      strip.text.x       = element_text(size = 14, face = "bold"),
      strip.text.y       = element_text(size = 14, face = "bold", angle = 0),
      panel.grid.major.y = element_line(color = "grey90"),
      panel.grid.minor   = element_blank(),
      panel.spacing      = unit(1.5, "lines"),
      plot.title         = element_text(hjust = 0.5, face = "bold")
    )
}

plot_area_by_wordtype(d_late_clean, "Late window (300–700 ms)")

9.2 Figure 2 — boxplots faceted by area × word_type (early window)

plot_area_by_wordtype(d_early_clean, "Early window (150–300 ms)")

9.3 Figure 3 — pooled-areas boxplots (one point per participant, late window)

plot_pooled <- function(data, plot_title) {
  data %>%
    group_by(participant_id, word_type, congruence) %>%
    summarise(amplitude = mean(amplitude, na.rm = TRUE), .groups = "drop") %>%
    ggplot(aes(x = congruence, y = amplitude)) +
    geom_boxplot(aes(fill = congruence),
                 width = 0.2, outlier.shape = NA, alpha = 0.4, coef = 2.5) +
    geom_line(aes(group = participant_id), colour = "grey70", size = 0.4) +
    geom_point(aes(color = congruence),
               position = position_jitter(width = 0.05),
               size = 1.8, alpha = 0.7) +
    stat_summary(fun = mean, geom = "point",
                 shape = 18, size = 3, color = "black") +
    scale_x_discrete(labels = c(Match = "Congruent", Mismatch = "Incongruent")) +
    scale_color_manual(values = c(Match = "darkgreen", Mismatch = "firebrick")) +
    scale_fill_manual( values = c(Match = "darkgreen", Mismatch = "firebrick")) +
    facet_wrap(~ word_type) +
    theme_classic(base_size = 14) +
    labs(x = "Congruence", y = "Mean amplitude (µV)",
         title = plot_title) +
    theme(legend.position = "none",
          strip.text      = element_text(size = 14, face = "bold"),
          plot.title      = element_text(hjust = 0.5, face = "bold"))
}

plot_pooled(d_late_clean, "Late window (300–700 ms)")

9.4 Figure 4 — pooled-areas boxplots (early window)

plot_pooled(d_early_clean, "Early window (150–300 ms)")

9.5 Figure 5 — vocabulary × amplitude difference (illustrative scatter)

We plot one combination as illustration. To plot a different combination, change vocab_var, effect_var, and the data frame (d_amp_late or d_amp_early).

plot_vocab_scatter <- function(data, vocab_var, effect_var,
                                xlab, ylab, plot_title) {
  data <- data %>% filter(!is.na(.data[[vocab_var]]),
                          !is.na(.data[[effect_var]]))
  r <- cor(data[[vocab_var]], data[[effect_var]])
  p <- cor.test(data[[vocab_var]], data[[effect_var]])$p.value

  ggplot(data, aes(x = .data[[vocab_var]], y = .data[[effect_var]])) +
    geom_point(color = "#336699", size = 2, alpha = 0.7) +
    geom_smooth(method = "lm", se = TRUE,
                color = "black", fill = "#CCCCCC") +
    theme_classic(base_size = 13) +
    labs(x = xlab, y = ylab,
         title    = plot_title,
         subtitle = paste0("r = ", round(r, 3),
                           ", p = ", signif(p, 3))) +
    theme(plot.title = element_text(face = "bold", hjust = 0.5))
}

plot_vocab_scatter(
  d_amp_late, "receptive_vocab_T1", "bouba_diff",
  xlab = "Receptive vocabulary at 12 months",
  ylab = "Bouba effect (Mismatch − Match, late window)",
  plot_title = "Receptive vocabulary and bouba effect (late window)"
)

Bouba-Kiki (N = 26)

Constance Jourdan

2026-05-12