For each of the two time windows (early: 150–300 ms; late: 300–700
ms), EEG mean amplitudes were exported from NetStation as a
SS_Stats_*.txt file containing four condition blocks
(BoubaMatch, BoubaMismatch,
KikiMatch, KikiMismatch) and three scalp
regions (frontal, central, parietal). Vocabulary scores at 12 months
(T1) come from an Excel file (vocab_12mo.xlsx).
The pipeline is structured as:
To re-use this pipeline with a different sample, only the file paths in the “Configuration” chunk need to be updated.
library(tidyr)
library(dplyr)
library(readxl)
library(ggplot2)
library(lme4)
library(lmerTest)
library(emmeans)
library(effectsize)
# --- File paths --------------------------------------------------------------
# Late time window (300–700 ms post-onset)
path_ss_late <- "SS_Stats_n26.txt"
# Early time window (150–300 ms post-onset)
path_ss_early <- "SS_Stats_n26_early.txt"
# Vocabulary scores at 12 months (T1)
path_vocab <- "vocab_12mo.xlsx"
# --- Analysis parameters ----------------------------------------------------
# Names of the four experimental conditions, as they appear as block headers
# in the SS_Stats text file.
condition_blocks <- c("BoubaMatch", "BoubaMismatch", "KikiMatch", "KikiMismatch")
# Names of the three scalp regions (in the order they appear in the file).
region_cols <- c("SS_Frontal", "SS_Central", "SS_Parietal")
# Threshold (in absolute z-score) to flag outliers
outlier_z_threshold <- 3
The function reads a NetStation export and returns a long data frame with one row per (participant × condition × region) combination.
parse_eeg_file <- function(file_path,
blocks = condition_blocks,
value_cols = region_cols) {
lines <- readLines(file_path)
# Index of the first line of each condition block
block_starts <- which(trimws(lines) %in% blocks)
# Add a virtual "end" so the last block has an upper bound
block_bounds <- c(block_starts, length(lines) + 3)
# Helper: parse one block (one condition) into a data frame
parse_one_block <- function(start, end, condition_name) {
# The data rows of a block are between (start + 3) and (end - 3),
# because each block has a 3-line header and an empty footer.
block_lines <- lines[(start + 3):(end - 3)]
do.call(rbind, lapply(block_lines, function(line) {
parts <- strsplit(trimws(line), "\\s+")[[1]]
id <- parts[2] # participant id
values <- as.numeric(tail(parts, length(value_cols)))
row <- data.frame(participant = id,
condition = condition_name,
stringsAsFactors = FALSE)
# Fill one column per region
for (i in seq_along(value_cols)) {
row[[value_cols[i]]] <- values[i]
}
row
}))
}
# Apply parse_one_block() to each condition block, then bind rows
wide <- do.call(rbind, lapply(seq_len(length(block_bounds) - 1), function(i) {
parse_one_block(start = block_bounds[i],
end = block_bounds[i + 1],
condition_name = trimws(lines[block_bounds[i]]))
}))
# Pivot to long format: one row per (participant × condition × region)
wide %>%
pivot_longer(cols = all_of(value_cols),
names_to = "area",
values_to = "amplitude")
}
This function adds clean participant IDs and recodes the categorical variables (condition, word_type, congruence, area).
prepare_eeg_data <- function(data) {
# 1. Participant IDs as S1_S12, S2_S12, ...
unique_p <- unique(data$participant)
id_map <- setNames(paste0("S", seq_along(unique_p), "_S12"), unique_p)
data$participant_id <- id_map[data$participant]
# 2. Make condition a factor
data$condition <- factor(data$condition, levels = condition_blocks)
# 3. Build word_type (Bouba / Kiki) from the condition label
data$word_type <- factor(
ifelse(grepl("Bouba", data$condition), "Bouba", "Kiki"),
levels = c("Bouba", "Kiki")
)
# 4. Build congruence (Match / Mismatch) from the condition label
data$congruence <- factor(
ifelse(grepl("Match", data$condition), "Match", "Mismatch"),
levels = c("Match", "Mismatch")
)
# 5. Recode the region labels to short, lowercase names
data$area <- recode_factor(data$area,
"SS_Frontal" = "frontal",
"SS_Central" = "central",
"SS_Parietal" = "parietal")
data
}
A z-score is computed within each cell defined by
group_vars. Trials with |z| > threshold
(default 3) are flagged as outliers and their amplitude is replaced by
NA, so that later models drop them automatically. The
default grouping is by area only (3 cells, ~104
observations each): a trial is treated as an outlier if its amplitude is
extreme relative to the overall amplitude distribution within its scalp
region. This captures recording-level artifacts.
To group by (condition × area) instead — i.e. 12 cells, ~26
observations each — call
remove_outliers_zscore(data, group_vars = c("condition", "area")).
remove_outliers_zscore <- function(data,
threshold = outlier_z_threshold,
group_vars = "area") {
flagged <- data %>%
group_by(across(all_of(group_vars))) %>%
mutate(
mean_amp = mean(amplitude, na.rm = TRUE),
sd_amp = sd(amplitude, na.rm = TRUE),
z_score = (amplitude - mean_amp) / sd_amp,
outlier = abs(z_score) > threshold
) %>%
ungroup()
outlier_ids <- flagged %>%
filter(outlier) %>%
distinct(participant_id)
message("Grouping: ", paste(group_vars, collapse = " × "))
message("Participants with at least one outlier trial: ",
paste(outlier_ids$participant_id, collapse = ", "))
message("Number of trials flagged as outliers: ",
sum(flagged$outlier, na.rm = TRUE))
flagged %>%
mutate(amplitude = if_else(outlier, NA_real_, amplitude)) %>%
select(-mean_amp, -sd_amp, -z_score, -outlier)
}
d_late_raw <- parse_eeg_file(path_ss_late)
d_late <- prepare_eeg_data(d_late_raw)
d_late_clean <- remove_outliers_zscore(d_late)
d_early_raw <- parse_eeg_file(path_ss_early)
d_early <- prepare_eeg_data(d_early_raw)
d_early_clean <- remove_outliers_zscore(d_early)
str(d_late_clean)
## tibble [312 × 7] (S3: tbl_df/tbl/data.frame)
## $ participant : chr [1:312] "SS_S48_S12" "SS_S48_S12" "SS_S48_S12" "S42_SS_20250415_ref.mff" ...
## $ condition : Factor w/ 4 levels "BoubaMatch","BoubaMismatch",..: 1 1 1 1 1 1 1 1 1 1 ...
## $ area : Factor w/ 3 levels "frontal","central",..: 1 2 3 1 2 3 1 2 3 1 ...
## $ amplitude : num [1:312] 9.549 0.887 -1.748 1.282 -7.56 ...
## $ participant_id: Named chr [1:312] "S1_S12" "S1_S12" "S1_S12" "S2_S12" ...
## ..- attr(*, "names")= chr [1:312] "SS_S48_S12" "SS_S48_S12" "SS_S48_S12" "S42_SS_20250415_ref.mff" ...
## $ word_type : Factor w/ 2 levels "Bouba","Kiki": 1 1 1 1 1 1 1 1 1 1 ...
## $ congruence : Factor w/ 2 levels "Match","Mismatch": 1 1 1 1 1 1 1 1 1 1 ...
length(unique(d_late_clean$participant_id)) # number of participants
## [1] 26
table(d_late_clean$condition)
##
## BoubaMatch BoubaMismatch KikiMatch KikiMismatch
## 78 78 78 78
table(d_late_clean$word_type)
##
## Bouba Kiki
## 156 156
table(d_late_clean$congruence)
##
## Match Mismatch
## 156 156
table(d_late_clean$area)
##
## frontal central parietal
## 104 104 104
table(d_early_clean$condition)
##
## BoubaMatch BoubaMismatch KikiMatch KikiMismatch
## 78 78 78 78
table(d_early_clean$word_type)
##
## Bouba Kiki
## 156 156
table(d_early_clean$congruence)
##
## Match Mismatch
## 156 156
table(d_early_clean$area)
##
## frontal central parietal
## 104 104 104
d_late_clean %>%
group_by(word_type, congruence) %>%
summarise(M = mean(amplitude, na.rm = TRUE),
SD = sd(amplitude, na.rm = TRUE),
n = sum(!is.na(amplitude)),
.groups = "drop")
## # A tibble: 4 × 5
## word_type congruence M SD n
## <fct> <fct> <dbl> <dbl> <int>
## 1 Bouba Match -2.28 6.50 74
## 2 Bouba Mismatch 2.35 4.55 78
## 3 Kiki Match 0.159 6.09 78
## 4 Kiki Mismatch -2.60 7.50 78
d_early_clean %>%
group_by(word_type, congruence) %>%
summarise(M = mean(amplitude, na.rm = TRUE),
SD = sd(amplitude, na.rm = TRUE),
n = sum(!is.na(amplitude)),
.groups = "drop")
## # A tibble: 4 × 5
## word_type congruence M SD n
## <fct> <fct> <dbl> <dbl> <int>
## 1 Bouba Match -0.687 5.16 76
## 2 Bouba Mismatch 2.55 5.80 78
## 3 Kiki Match 1.25 5.53 76
## 4 Kiki Mismatch -1.48 5.96 78
Random-intercept LMM with all three factors (word_type, congruence, area) and their interactions. Amplitude is z-scored so that fixed-effect coefficients are standardised.
m_full_late <- lmer(scale(amplitude) ~ word_type * congruence * area +
(1 | participant_id),
data = d_late_clean)
summary(m_full_late)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula:
## scale(amplitude) ~ word_type * congruence * area + (1 | participant_id)
## Data: d_late_clean
##
## REML criterion at convergence: 816.2
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.54231 -0.59125 0.07641 0.56824 2.89477
##
## Random effects:
## Groups Name Variance Std.Dev.
## participant_id (Intercept) 0.2044 0.4521
## Residual 0.7141 0.8451
## Number of obs: 308, groups: participant_id, 26
##
## Fixed effects:
## Estimate Std. Error df
## (Intercept) -0.1266 0.1944 203.1168
## word_typeKiki 0.2533 0.2396 271.1807
## congruenceMismatch 0.8041 0.2396 271.1807
## areacentral -0.3046 0.2440 270.6383
## areaparietal -0.1976 0.2396 271.1807
## word_typeKiki:congruenceMismatch -1.3848 0.3352 270.9160
## word_typeKiki:areacentral 0.2471 0.3383 270.6383
## word_typeKiki:areaparietal 0.2099 0.3352 270.9160
## congruenceMismatch:areacentral 0.1291 0.3383 270.6383
## congruenceMismatch:areaparietal -0.3168 0.3352 270.9160
## word_typeKiki:congruenceMismatch:areacentral -0.0289 0.4736 270.6383
## word_typeKiki:congruenceMismatch:areaparietal 0.6907 0.4714 270.7788
## t value Pr(>|t|)
## (Intercept) -0.651 0.515564
## word_typeKiki 1.058 0.291218
## congruenceMismatch 3.357 0.000902 ***
## areacentral -1.249 0.212905
## areaparietal -0.825 0.410173
## word_typeKiki:congruenceMismatch -4.132 4.8e-05 ***
## word_typeKiki:areacentral 0.730 0.465742
## word_typeKiki:areaparietal 0.626 0.531661
## congruenceMismatch:areacentral 0.382 0.703102
## congruenceMismatch:areaparietal -0.945 0.345439
## word_typeKiki:congruenceMismatch:areacentral -0.061 0.951390
## word_typeKiki:congruenceMismatch:areaparietal 1.465 0.144022
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) wrd_tK cngrnM arcntr arprtl wr_K:M
## word_typeKk -0.643
## cngrncMsmtc -0.643 0.521
## areacentral -0.627 0.509 0.509
## areaparietl -0.643 0.521 0.521 0.509
## wrd_typKk:M 0.459 -0.715 -0.715 -0.364 -0.373
## wrd_typKk:rc 0.452 -0.706 -0.367 -0.721 -0.367 0.505
## wrd_typKk:rp 0.459 -0.715 -0.373 -0.364 -0.715 0.511
## cngrncMsmtch:rc 0.452 -0.367 -0.706 -0.721 -0.367 0.505
## cngrncMsmtch:rp 0.459 -0.373 -0.715 -0.364 -0.715 0.511
## wrd_typKk:cngrncMsmtch:rc -0.323 0.504 0.504 0.515 0.262 -0.707
## wrd_typKk:cngrncMsmtch:rp -0.327 0.508 0.508 0.259 0.508 -0.711
## wrd_typKk:rc wrd_typKk:rp cngrncMsmtch:rc
## word_typeKk
## cngrncMsmtc
## areacentral
## areaparietl
## wrd_typKk:M
## wrd_typKk:rc
## wrd_typKk:rp 0.505
## cngrncMsmtch:rc 0.520 0.262
## cngrncMsmtch:rp 0.262 0.511 0.505
## wrd_typKk:cngrncMsmtch:rc -0.714 -0.360 -0.714
## wrd_typKk:cngrncMsmtch:rp -0.359 -0.711 -0.359
## cngrncMsmtch:rp wrd_typKk:cngrncMsmtch:rc
## word_typeKk
## cngrncMsmtc
## areacentral
## areaparietl
## wrd_typKk:M
## wrd_typKk:rc
## wrd_typKk:rp
## cngrncMsmtch:rc
## cngrncMsmtch:rp
## wrd_typKk:cngrncMsmtch:rc -0.360
## wrd_typKk:cngrncMsmtch:rp -0.711 0.502
anova_late_full <- anova(m_full_late, type = 2)
anova_late_full
## Type II Analysis of Variance Table with Satterthwaite's method
## Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## word_type 2.5982 2.5982 1 270.99 3.6383 0.05752 .
## congruence 1.7254 1.7254 1 270.99 2.4161 0.12126
## area 0.7548 0.3774 2 270.73 0.5285 0.59010
## word_type:congruence 25.8750 25.8750 1 271.01 36.2327 5.669e-09 ***
## word_type:area 4.0374 2.0187 2 270.73 2.8268 0.06095 .
## congruence:area 0.1784 0.0892 2 270.73 0.1249 0.88261
## word_type:congruence:area 2.1401 1.0700 2 270.73 1.4984 0.22534
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
eta_squared(anova_late_full)
## # Effect Size for ANOVA (Type II)
##
## Parameter | Eta2 (partial) | 95% CI
## ---------------------------------------------------------
## word_type | 0.01 | [0.00, 1.00]
## congruence | 8.84e-03 | [0.00, 1.00]
## area | 3.89e-03 | [0.00, 1.00]
## word_type:congruence | 0.12 | [0.06, 1.00]
## word_type:area | 0.02 | [0.00, 1.00]
## congruence:area | 9.22e-04 | [0.00, 1.00]
## word_type:congruence:area | 0.01 | [0.00, 1.00]
##
## - One-sided CIs: upper bound fixed at [1.00].
par(mfrow = c(1, 2))
hist(residuals(m_full_late), breaks = 10,
main = "Residuals — late window (full LMM)",
xlab = "Residuals")
plot(residuals(m_full_late) ~ fitted(m_full_late),
main = "Residuals vs fitted — late window (full LMM)",
xlab = "Fitted", ylab = "Residuals")
par(mfrow = c(1, 1))
Reference levels are reset so that the contrasts reflect the
predicted direction of the bouba-kiki effect (Kiki and Mismatch as
references). A separate d_late_relev data frame is used so
that the original d_late_clean (with Match / Bouba as
references) is left intact for later plots.
d_late_relev <- d_late_clean %>%
mutate(
congruence = relevel(congruence, ref = "Mismatch"),
word_type = relevel(word_type, ref = "Kiki")
)
m_int_late <- lmer(scale(amplitude) ~ word_type * congruence +
(1 | participant_id),
data = d_late_relev)
summary(m_int_late)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: scale(amplitude) ~ word_type * congruence + (1 | participant_id)
## Data: d_late_relev
##
## REML criterion at convergence: 816.6
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -2.70158 -0.61366 0.01864 0.58411 2.90391
##
## Random effects:
## Groups Name Variance Std.Dev.
## participant_id (Intercept) 0.2043 0.452
## Residual 0.7190 0.848
## Number of obs: 308, groups: participant_id, 26
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) -0.3110 0.1307 66.5594 -2.380 0.02019
## word_typeBouba 0.7586 0.1358 278.6458 5.587 5.51e-08
## congruenceMatch 0.4227 0.1358 278.6458 3.113 0.00205
## word_typeBouba:congruenceMatch -1.1651 0.1935 279.0077 -6.021 5.44e-09
##
## (Intercept) *
## word_typeBouba ***
## congruenceMatch **
## word_typeBouba:congruenceMatch ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) wrd_tB cngrnM
## word_typeBb -0.520
## congrncMtch -0.520 0.500
## wrd_typBb:M 0.365 -0.702 -0.702
anova_late_int <- anova(m_int_late, type = 2)
anova_late_int
## Type II Analysis of Variance Table with Satterthwaite's method
## Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## word_type 2.5947 2.5947 1 279.00 3.6086 0.05851 .
## congruence 1.7781 1.7781 1 279.00 2.4729 0.11696
## word_type:congruence 26.0682 26.0682 1 279.01 36.2543 5.442e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
eta_squared(anova_late_int)
## # Effect Size for ANOVA (Type II)
##
## Parameter | Eta2 (partial) | 95% CI
## ----------------------------------------------------
## word_type | 0.01 | [0.00, 1.00]
## congruence | 8.79e-03 | [0.00, 1.00]
## word_type:congruence | 0.11 | [0.06, 1.00]
##
## - One-sided CIs: upper bound fixed at [1.00].
emm_late <- emmeans(m_int_late, ~ congruence | word_type)
contrast(emm_late, method = "pairwise", adjust = "none") %>%
confint(level = 0.95)
## word_type = Kiki:
## contrast estimate SE df lower.CL upper.CL
## Mismatch - Match -0.423 0.136 279 -0.690 -0.155
##
## word_type = Bouba:
## contrast estimate SE df lower.CL upper.CL
## Mismatch - Match 0.742 0.138 280 0.471 1.014
##
## Note: contrasts are still on the scale(-0.571, 6.54) scale. Consider using
## regrid() if you want contrasts of back-transformed estimates.
## Degrees-of-freedom method: kenward-roger
## Confidence level used: 0.95
equivalence_test(m_int_late, range = c(-0.2, 0.2))
## # TOST-test for Practical Equivalence
##
## ROPE: [-0.20 0.20]
##
## Parameter | 90% CI | SGPV | Equivalence | p
## ---------------------------------------------------------------------------------------
## (Intercept) | [-0.53, -0.10] | 0.157 | Rejected | 0.802
## word type [Bouba] | [ 0.53, 0.98] | < .001 | Rejected | > .999
## congruence [Match] | [ 0.20, 0.65] | 0.026 | Rejected | 0.949
## word type [Bouba] × congruence [Match] | [-1.48, -0.85] | < .001 | Rejected | > .999
m_full_early <- lmer(scale(amplitude) ~ word_type * congruence * area +
(1 | participant_id),
data = d_early_clean)
summary(m_full_early)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula:
## scale(amplitude) ~ word_type * congruence * area + (1 | participant_id)
## Data: d_early_clean
##
## REML criterion at convergence: 781.4
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.1650 -0.6263 0.0513 0.5585 2.8725
##
## Random effects:
## Groups Name Variance Std.Dev.
## participant_id (Intercept) 0.2047 0.4525
## Residual 0.6295 0.7934
## Number of obs: 308, groups: participant_id, 26
##
## Fixed effects:
## Estimate Std. Error df
## (Intercept) 0.15795 0.18201 183.67383
## word_typeKiki 0.53488 0.22241 270.86864
## congruenceMismatch 0.66513 0.22241 270.86864
## areacentral -0.41045 0.22441 270.62517
## areaparietal -0.65583 0.22241 270.86864
## word_typeKiki:congruenceMismatch -1.48461 0.31287 270.74831
## word_typeKiki:areacentral -0.07916 0.31595 270.72942
## word_typeKiki:areaparietal -0.47127 0.31452 270.84104
## congruenceMismatch:areacentral 0.07694 0.31430 270.62516
## congruenceMismatch:areaparietal -0.37558 0.31287 270.74831
## word_typeKiki:congruenceMismatch:areacentral 0.22426 0.44347 270.67810
## word_typeKiki:congruenceMismatch:areaparietal 1.09485 0.44246 270.73433
## t value Pr(>|t|)
## (Intercept) 0.868 0.38663
## word_typeKiki 2.405 0.01685 *
## congruenceMismatch 2.991 0.00304 **
## areacentral -1.829 0.06850 .
## areaparietal -2.949 0.00347 **
## word_typeKiki:congruenceMismatch -4.745 3.38e-06 ***
## word_typeKiki:areacentral -0.251 0.80234
## word_typeKiki:areaparietal -1.498 0.13519
## congruenceMismatch:areacentral 0.245 0.80681
## congruenceMismatch:areaparietal -1.200 0.23103
## word_typeKiki:congruenceMismatch:areacentral 0.506 0.61348
## word_typeKiki:congruenceMismatch:areaparietal 2.474 0.01396 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) wrd_tK cngrnM arcntr arprtl wr_K:M
## word_typeKk -0.624
## cngrncMsmtc -0.624 0.511
## areacentral -0.616 0.504 0.504
## areaparietl -0.624 0.511 0.511 0.504
## wrd_typKk:M 0.443 -0.711 -0.711 -0.359 -0.363
## wrd_typKk:rc 0.438 -0.703 -0.358 -0.710 -0.358 0.500
## wrd_typKk:rp 0.441 -0.707 -0.361 -0.357 -0.707 0.503
## cngrncMsmtch:rc 0.440 -0.360 -0.707 -0.714 -0.360 0.502
## cngrncMsmtch:rp 0.443 -0.363 -0.711 -0.359 -0.711 0.505
## wrd_typKk:cngrncMsmtch:rc -0.312 0.501 0.501 0.506 0.255 -0.705
## wrd_typKk:cngrncMsmtch:rp -0.314 0.503 0.503 0.254 0.503 -0.707
## wrd_typKk:rc wrd_typKk:rp cngrncMsmtch:rc
## word_typeKk
## cngrncMsmtc
## areacentral
## areaparietl
## wrd_typKk:M
## wrd_typKk:rc
## wrd_typKk:rp 0.497
## cngrncMsmtch:rc 0.507 0.255
## cngrncMsmtch:rp 0.255 0.503 0.502
## wrd_typKk:cngrncMsmtch:rc -0.712 -0.354 -0.709
## wrd_typKk:cngrncMsmtch:rp -0.353 -0.711 -0.355
## cngrncMsmtch:rp wrd_typKk:cngrncMsmtch:rc
## word_typeKk
## cngrncMsmtc
## areacentral
## areaparietl
## wrd_typKk:M
## wrd_typKk:rc
## wrd_typKk:rp
## cngrncMsmtch:rc
## cngrncMsmtch:rp
## wrd_typKk:cngrncMsmtch:rc -0.356
## wrd_typKk:cngrncMsmtch:rp -0.707 0.498
anova_early_full <- anova(m_full_early, type = 2)
anova_early_full
## Type II Analysis of Variance Table with Satterthwaite's method
## Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## word_type 2.442 2.4421 1 270.86 3.8793 0.04990 *
## congruence 0.130 0.1298 1 270.84 0.2061 0.65017
## area 33.426 16.7129 2 270.74 26.5493 2.961e-11 ***
## word_type:congruence 20.950 20.9503 1 270.87 33.2805 2.175e-08 ***
## word_type:area 0.087 0.0433 2 270.73 0.0688 0.93354
## congruence:area 0.566 0.2832 2 270.74 0.4498 0.63823
## word_type:congruence:area 4.298 2.1491 2 270.74 3.4139 0.03434 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
eta_squared(anova_early_full)
## # Effect Size for ANOVA (Type II)
##
## Parameter | Eta2 (partial) | 95% CI
## ---------------------------------------------------------
## word_type | 0.01 | [0.00, 1.00]
## congruence | 7.61e-04 | [0.00, 1.00]
## area | 0.16 | [0.10, 1.00]
## word_type:congruence | 0.11 | [0.06, 1.00]
## word_type:area | 5.08e-04 | [0.00, 1.00]
## congruence:area | 3.31e-03 | [0.00, 1.00]
## word_type:congruence:area | 0.02 | [0.00, 1.00]
##
## - One-sided CIs: upper bound fixed at [1.00].
par(mfrow = c(1, 2))
hist(residuals(m_full_early), breaks = 10,
main = "Residuals — early window (full LMM)",
xlab = "Residuals")
plot(residuals(m_full_early) ~ fitted(m_full_early),
main = "Residuals vs fitted — early window (full LMM)",
xlab = "Fitted", ylab = "Residuals")
par(mfrow = c(1, 1))
d_early_relev <- d_early_clean %>%
mutate(
congruence = relevel(congruence, ref = "Mismatch"),
word_type = relevel(word_type, ref = "Kiki")
)
m_int_early <- lmer(scale(amplitude) ~ word_type * congruence +
(1 | participant_id),
data = d_early_relev)
summary(m_int_early)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: scale(amplitude) ~ word_type * congruence + (1 | participant_id)
## Data: d_early_relev
##
## REML criterion at convergence: 827.4
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -3.1619 -0.6591 0.0126 0.5953 3.2610
##
## Random effects:
## Groups Name Variance Std.Dev.
## participant_id (Intercept) 0.1947 0.4412
## Residual 0.7490 0.8655
## Number of obs: 308, groups: participant_id, 26
##
## Fixed effects:
## Estimate Std. Error df t value Pr(>|t|)
## (Intercept) -0.3254 0.1307 69.8197 -2.489 0.015192
## word_typeBouba 0.6935 0.1386 278.5599 5.004 9.95e-07
## congruenceMatch 0.4860 0.1395 278.7199 3.483 0.000576
## word_typeBouba:congruenceMatch -1.0555 0.1974 278.8489 -5.347 1.87e-07
##
## (Intercept) *
## word_typeBouba ***
## congruenceMatch ***
## word_typeBouba:congruenceMatch ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Correlation of Fixed Effects:
## (Intr) wrd_tB cngrnM
## word_typeBb -0.530
## congrncMtch -0.526 0.497
## wrd_typBb:M 0.372 -0.702 -0.707
anova_early_int <- anova(m_int_early, type = 2)
anova_early_int
## Type II Analysis of Variance Table with Satterthwaite's method
## Sum Sq Mean Sq NumDF DenDF F value Pr(>F)
## word_type 2.2913 2.2913 1 278.84 3.0591 0.08138 .
## congruence 0.1336 0.1336 1 278.81 0.1784 0.67309
## word_type:congruence 21.4105 21.4105 1 278.85 28.5852 1.868e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
eta_squared(anova_early_int)
## # Effect Size for ANOVA (Type II)
##
## Parameter | Eta2 (partial) | 95% CI
## ----------------------------------------------------
## word_type | 0.01 | [0.00, 1.00]
## congruence | 6.39e-04 | [0.00, 1.00]
## word_type:congruence | 0.09 | [0.05, 1.00]
##
## - One-sided CIs: upper bound fixed at [1.00].
emm_early <- emmeans(m_int_early, ~ congruence | word_type)
contrast(emm_early, method = "pairwise", adjust = "none") %>%
confint(level = 0.95)
## word_type = Kiki:
## contrast estimate SE df lower.CL upper.CL
## Mismatch - Match -0.486 0.14 279 -0.761 -0.211
##
## word_type = Bouba:
## contrast estimate SE df lower.CL upper.CL
## Mismatch - Match 0.569 0.14 279 0.295 0.844
##
## Note: contrasts are still on the scale(0.411, 5.82) scale. Consider using
## regrid() if you want contrasts of back-transformed estimates.
## Degrees-of-freedom method: kenward-roger
## Confidence level used: 0.95
equivalence_test(m_int_early, range = c(-0.2, 0.2))
## # TOST-test for Practical Equivalence
##
## ROPE: [-0.20 0.20]
##
## Parameter | 90% CI | SGPV | Equivalence | p
## ---------------------------------------------------------------------------------------
## (Intercept) | [-0.54, -0.11] | 0.127 | Rejected | 0.831
## word type [Bouba] | [ 0.46, 0.92] | < .001 | Rejected | > .999
## congruence [Match] | [ 0.26, 0.72] | 0.007 | Rejected | 0.979
## word type [Bouba] × congruence [Match] | [-1.38, -0.73] | < .001 | Rejected | > .999
Because the three-way Word × Trial × Area interaction is (marginally) significant in the early window, we follow up with per-area pairwise contrasts to identify which scalp regions drive the effect.
emm_area_early <- emmeans(m_full_early, ~ congruence | word_type * area)
contrast(emm_area_early, method = "pairwise", adjust = "none") %>%
confint(level = 0.95)
## word_type = Bouba, area = frontal:
## contrast estimate SE df lower.CL upper.CL
## Match - Mismatch -0.665 0.222 271 -1.1030 -0.227
##
## word_type = Kiki, area = frontal:
## contrast estimate SE df lower.CL upper.CL
## Match - Mismatch 0.819 0.220 271 0.3863 1.253
##
## word_type = Bouba, area = central:
## contrast estimate SE df lower.CL upper.CL
## Match - Mismatch -0.742 0.222 271 -1.1799 -0.304
##
## word_type = Kiki, area = central:
## contrast estimate SE df lower.CL upper.CL
## Match - Mismatch 0.518 0.222 271 0.0804 0.956
##
## word_type = Bouba, area = parietal:
## contrast estimate SE df lower.CL upper.CL
## Match - Mismatch -0.290 0.220 271 -0.7228 0.144
##
## word_type = Kiki, area = parietal:
## contrast estimate SE df lower.CL upper.CL
## Match - Mismatch 0.100 0.222 271 -0.3376 0.538
##
## Note: contrasts are still on the scale(0.411, 5.82) scale. Consider using
## regrid() if you want contrasts of back-transformed estimates.
## Degrees-of-freedom method: kenward-roger
## Confidence level used: 0.95
Three subscales of the parental CDI are used : word comprehension
(receptive_vocab_T1), word production
(productive_vocab_T1) and onomatopoeic word comprehension
(symbolic_vocab_T1).
vocab_raw <- read_excel(path_vocab)
message("Columns found in the vocabulary file: ",
paste(names(vocab_raw), collapse = ", "))
desired_cols <- c("participant_id",
"receptive_vocab_T1",
"productive_vocab_T1",
"symbolic_vocab_T1",
"Age",
"Gender")
available_cols <- intersect(desired_cols, names(vocab_raw))
vocab_T1 <- vocab_raw %>%
select(all_of(available_cols)) %>%
mutate(across(any_of(c("receptive_vocab_T1",
"productive_vocab_T1",
"symbolic_vocab_T1")),
~ as.numeric(.)))
vocab_outcomes <- intersect(
c("receptive_vocab_T1", "productive_vocab_T1", "symbolic_vocab_T1"),
names(vocab_T1)
)
vocab_outcomes
## [1] "receptive_vocab_T1" "productive_vocab_T1" "symbolic_vocab_T1"
For each participant and each word type, we compute the average incongruent − congruent (Mismatch − Match) amplitude across the three scalp regions and word type, then join the vocabulary scores.
build_amp_diff <- function(data) {
data %>%
group_by(participant_id, word_type, congruence) %>%
summarise(mean_amp = mean(amplitude, na.rm = TRUE),
.groups = "drop") %>%
pivot_wider(names_from = c(word_type, congruence),
values_from = mean_amp,
names_sep = "_") %>%
mutate(
bouba_diff = Bouba_Mismatch - Bouba_Match,
kiki_diff = Kiki_Mismatch - Kiki_Match
) %>%
select(participant_id, bouba_diff, kiki_diff)
}
d_amp_late <- build_amp_diff(d_late_clean) %>% left_join(vocab_T1, by = "participant_id")
d_amp_early <- build_amp_diff(d_early_clean) %>% left_join(vocab_T1, by = "participant_id")
d_amp_late %>% anti_join(vocab_T1, by = "participant_id")
## # A tibble: 0 × 8
## # ℹ 8 variables: participant_id <chr>, bouba_diff <dbl>, kiki_diff <dbl>,
## # receptive_vocab_T1 <dbl>, productive_vocab_T1 <dbl>,
## # symbolic_vocab_T1 <dbl>, Age <dbl>, Gender <chr>
vocab_T1 %>%
summarise(across(where(is.numeric),
list(M = ~ mean(., na.rm = TRUE),
SD = ~ sd(., na.rm = TRUE),
n = ~ sum(!is.na(.))),
.names = "{.col}_{.fn}"))
## # A tibble: 1 × 12
## receptive_vocab_T1_M receptive_vocab_T1_SD receptive_vocab_T1_n
## <dbl> <dbl> <int>
## 1 39.1 46.1 24
## # ℹ 9 more variables: productive_vocab_T1_M <dbl>,
## # productive_vocab_T1_SD <dbl>, productive_vocab_T1_n <int>,
## # symbolic_vocab_T1_M <dbl>, symbolic_vocab_T1_SD <dbl>,
## # symbolic_vocab_T1_n <int>, Age_M <dbl>, Age_SD <dbl>, Age_n <int>
# Age summary in months (assuming Age is in days)
if ("Age" %in% names(vocab_T1)) {
vocab_T1 %>%
summarise(
mean_month = mean(Age, na.rm = TRUE) / 30.44,
median_month = median(Age, na.rm = TRUE) / 30.44,
min_month = min(Age, na.rm = TRUE) / 30.44,
max_month = max(Age, na.rm = TRUE) / 30.44
)
}
## # A tibble: 1 × 4
## mean_month median_month min_month max_month
## <dbl> <dbl> <dbl> <dbl>
## 1 12.1 12.0 11.5 12.7
For each (word_type effect × vocabulary subscale × time window) we
fit a simple linear regression on z-scored variables:
scale(vocab) ~ scale(amplitude_diff).
fit_vocab_lm <- function(data, vocab_var, amp_var) {
# Variables are z-scored inside the formula, so the resulting
# coefficients in summary() are already standardised betas.
formula <- as.formula(
paste0("scale(", vocab_var, ") ~ scale(", amp_var, ")")
)
m <- lm(formula, data = data)
cat("Model:", vocab_var, "~", amp_var, "\n")
print(summary(m))
invisible(m)
}
for (v in vocab_outcomes) fit_vocab_lm(d_amp_late, v, "bouba_diff")
## Model: receptive_vocab_T1 ~ bouba_diff
##
## Call:
## lm(formula = formula, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.8361 -0.5141 -0.3227 0.1130 3.3377
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0003929 0.2088210 0.002 0.999
## scale(bouba_diff) -0.0139051 0.2541153 -0.055 0.957
##
## Residual standard error: 1.022 on 22 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.0001361, Adjusted R-squared: -0.04531
## F-statistic: 0.002994 on 1 and 22 DF, p-value: 0.9569
##
## Model: productive_vocab_T1 ~ bouba_diff
##
## Call:
## lm(formula = formula, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.2064 -0.5989 -0.3836 0.5991 1.9297
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.01063 0.19816 -0.054 0.958
## scale(bouba_diff) 0.37625 0.24114 1.560 0.133
##
## Residual standard error: 0.9702 on 22 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.09963, Adjusted R-squared: 0.05871
## F-statistic: 2.435 on 1 and 22 DF, p-value: 0.133
##
## Model: symbolic_vocab_T1 ~ bouba_diff
##
## Call:
## lm(formula = formula, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.1920 -0.5339 -0.3270 0.1966 2.8790
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.0127 0.1934 -0.066 0.9483
## scale(bouba_diff) 0.4494 0.2354 1.909 0.0694 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.947 on 22 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.1421, Adjusted R-squared: 0.1031
## F-statistic: 3.644 on 1 and 22 DF, p-value: 0.06939
for (v in vocab_outcomes) fit_vocab_lm(d_amp_late, v, "kiki_diff")
## Model: receptive_vocab_T1 ~ kiki_diff
##
## Call:
## lm(formula = formula, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.8265 -0.5083 -0.3870 0.1065 3.2532
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.01091 0.21090 -0.052 0.959
## scale(kiki_diff) 0.07831 0.24078 0.325 0.748
##
## Residual standard error: 1.02 on 22 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.004786, Adjusted R-squared: -0.04045
## F-statistic: 0.1058 on 1 and 22 DF, p-value: 0.7481
##
## Model: productive_vocab_T1 ~ kiki_diff
##
## Call:
## lm(formula = formula, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.9394 -0.6965 -0.4870 0.4214 2.5339
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.01883 0.20989 0.090 0.929
## scale(kiki_diff) -0.13522 0.23963 -0.564 0.578
##
## Residual standard error: 1.015 on 22 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.01427, Adjusted R-squared: -0.03054
## F-statistic: 0.3184 on 1 and 22 DF, p-value: 0.5783
##
## Model: symbolic_vocab_T1 ~ kiki_diff
##
## Call:
## lm(formula = formula, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.9020 -0.5692 -0.3406 0.1373 2.7563
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.02192 0.20935 -0.105 0.918
## scale(kiki_diff) 0.15737 0.23902 0.658 0.517
##
## Residual standard error: 1.013 on 22 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.01932, Adjusted R-squared: -0.02525
## F-statistic: 0.4335 on 1 and 22 DF, p-value: 0.5171
for (v in vocab_outcomes) fit_vocab_lm(d_amp_early, v, "bouba_diff")
## Model: receptive_vocab_T1 ~ bouba_diff
##
## Call:
## lm(formula = formula, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.9827 -0.6012 -0.2597 0.1003 3.3321
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.00581 0.20795 -0.028 0.978
## scale(bouba_diff) -0.11577 0.23838 -0.486 0.632
##
## Residual standard error: 1.017 on 22 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.01061, Adjusted R-squared: -0.03437
## F-statistic: 0.2359 on 1 and 22 DF, p-value: 0.632
##
## Model: productive_vocab_T1 ~ bouba_diff
##
## Call:
## lm(formula = formula, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.9255 -0.6140 -0.2976 0.2102 2.4130
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0223 0.1920 0.116 0.9086
## scale(bouba_diff) 0.4444 0.2201 2.019 0.0559 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9392 on 22 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.1563, Adjusted R-squared: 0.1179
## F-statistic: 4.075 on 1 and 22 DF, p-value: 0.05586
##
## Model: symbolic_vocab_T1 ~ bouba_diff
##
## Call:
## lm(formula = formula, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.70243 -0.62902 -0.42485 0.09248 2.92679
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.004691 0.208334 0.023 0.982
## scale(bouba_diff) 0.093469 0.238823 0.391 0.699
##
## Residual standard error: 1.019 on 22 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.006914, Adjusted R-squared: -0.03823
## F-statistic: 0.1532 on 1 and 22 DF, p-value: 0.6993
for (v in vocab_outcomes) fit_vocab_lm(d_amp_early, v, "kiki_diff")
## Model: receptive_vocab_T1 ~ kiki_diff
##
## Call:
## lm(formula = formula, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.1677 -0.5326 -0.2368 0.1602 3.3522
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.00729 0.20697 0.035 0.972
## scale(kiki_diff) -0.13680 0.20723 -0.660 0.516
##
## Residual standard error: 1.012 on 22 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.01942, Adjusted R-squared: -0.02515
## F-statistic: 0.4358 on 1 and 22 DF, p-value: 0.516
##
## Model: productive_vocab_T1 ~ kiki_diff
##
## Call:
## lm(formula = formula, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.2767 -0.5757 -0.3543 0.3225 2.5063
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.01207 0.20337 0.059 0.953
## scale(kiki_diff) -0.22639 0.20363 -1.112 0.278
##
## Residual standard error: 0.9949 on 22 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.0532, Adjusted R-squared: 0.01016
## F-statistic: 1.236 on 1 and 22 DF, p-value: 0.2782
##
## Model: symbolic_vocab_T1 ~ kiki_diff
##
## Call:
## lm(formula = formula, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.77109 -0.62971 -0.35582 0.05431 2.92863
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.002434 0.208783 0.012 0.991
## scale(kiki_diff) -0.045662 0.209046 -0.218 0.829
##
## Residual standard error: 1.021 on 22 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.002164, Adjusted R-squared: -0.04319
## F-statistic: 0.04771 on 1 and 22 DF, p-value: 0.8291
cor_vocab <- function(data, vocab_var, amp_var) {
ct <- cor.test(data[[vocab_var]], data[[amp_var]],
method = "pearson", use = "complete.obs")
data.frame(
vocab = vocab_var,
amp = amp_var,
r = unname(ct$estimate),
p = ct$p.value,
n = sum(complete.cases(data[, c(vocab_var, amp_var)]))
)
}
cor_results <- bind_rows(
lapply(vocab_outcomes, function(v) {
bind_rows(
cor_vocab(d_amp_late, v, "bouba_diff") %>% mutate(window = "late"),
cor_vocab(d_amp_late, v, "kiki_diff") %>% mutate(window = "late"),
cor_vocab(d_amp_early, v, "bouba_diff") %>% mutate(window = "early"),
cor_vocab(d_amp_early, v, "kiki_diff") %>% mutate(window = "early")
)
})
)
cor_results
## vocab amp r p n window
## 1 receptive_vocab_T1 bouba_diff -0.01166546 0.95685575 24 late
## 2 receptive_vocab_T1 kiki_diff 0.06917767 0.74806395 24 late
## 3 receptive_vocab_T1 bouba_diff -0.10299092 0.63201508 24 early
## 4 receptive_vocab_T1 kiki_diff -0.13936381 0.51603168 24 early
## 5 productive_vocab_T1 bouba_diff 0.31565005 0.13295996 24 late
## 6 productive_vocab_T1 kiki_diff -0.11944201 0.57827697 24 late
## 7 productive_vocab_T1 bouba_diff 0.39534087 0.05586260 24 early
## 8 productive_vocab_T1 kiki_diff -0.23064284 0.27823256 24 early
## 9 symbolic_vocab_T1 bouba_diff 0.37697912 0.06938726 24 late
## 10 symbolic_vocab_T1 kiki_diff 0.13900822 0.51711363 24 late
## 11 symbolic_vocab_T1 bouba_diff 0.08315239 0.69928522 24 early
## 12 symbolic_vocab_T1 kiki_diff -0.04651945 0.82910605 24 early
The figures below are similar to the ones reported in the thesis.
label_word <- c(Bouba = "Bouba-type trials",
Kiki = "Kiki-type trials")
label_area <- c(frontal = "Frontal area",
central = "Central area",
parietal = "Parietal area")
plot_area_by_wordtype <- function(data, plot_title) {
ggplot(data, aes(x = congruence, y = amplitude)) +
geom_hline(yintercept = 0, linetype = "dashed", color = "grey50") +
geom_boxplot(aes(fill = congruence),
width = 0.3, outlier.shape = NA, alpha = 0.3,
position = position_dodge(width = 0.4)) +
geom_line(aes(group = participant_id), colour = "grey80", size = 0.3) +
geom_point(aes(color = congruence),
position = position_jitterdodge(jitter.width = 0.1,
dodge.width = 0.4),
size = 1.5, alpha = 0.6) +
stat_summary(fun = mean, geom = "point",
shape = 18, size = 3.5, color = "black",
position = position_dodge(width = 0.4)) +
scale_x_discrete(labels = c(Match = "Congruent", Mismatch = "Incongruent")) +
scale_color_manual(values = c(Match = "darkgreen", Mismatch = "firebrick")) +
scale_fill_manual( values = c(Match = "darkgreen", Mismatch = "firebrick")) +
facet_grid(area ~ word_type,
labeller = labeller(word_type = label_word,
area = label_area),
scales = "free_y") +
theme_minimal(base_size = 14) +
labs(x = "Congruence", y = "Mean amplitude (µV)",
title = plot_title) +
theme(
legend.position = "none",
strip.text.x = element_text(size = 14, face = "bold"),
strip.text.y = element_text(size = 14, face = "bold", angle = 0),
panel.grid.major.y = element_line(color = "grey90"),
panel.grid.minor = element_blank(),
panel.spacing = unit(1.5, "lines"),
plot.title = element_text(hjust = 0.5, face = "bold")
)
}
plot_area_by_wordtype(d_late_clean, "Late window (300–700 ms)")
plot_area_by_wordtype(d_early_clean, "Early window (150–300 ms)")
plot_pooled <- function(data, plot_title) {
data %>%
group_by(participant_id, word_type, congruence) %>%
summarise(amplitude = mean(amplitude, na.rm = TRUE), .groups = "drop") %>%
ggplot(aes(x = congruence, y = amplitude)) +
geom_boxplot(aes(fill = congruence),
width = 0.2, outlier.shape = NA, alpha = 0.4, coef = 2.5) +
geom_line(aes(group = participant_id), colour = "grey70", size = 0.4) +
geom_point(aes(color = congruence),
position = position_jitter(width = 0.05),
size = 1.8, alpha = 0.7) +
stat_summary(fun = mean, geom = "point",
shape = 18, size = 3, color = "black") +
scale_x_discrete(labels = c(Match = "Congruent", Mismatch = "Incongruent")) +
scale_color_manual(values = c(Match = "darkgreen", Mismatch = "firebrick")) +
scale_fill_manual( values = c(Match = "darkgreen", Mismatch = "firebrick")) +
facet_wrap(~ word_type) +
theme_classic(base_size = 14) +
labs(x = "Congruence", y = "Mean amplitude (µV)",
title = plot_title) +
theme(legend.position = "none",
strip.text = element_text(size = 14, face = "bold"),
plot.title = element_text(hjust = 0.5, face = "bold"))
}
plot_pooled(d_late_clean, "Late window (300–700 ms)")
plot_pooled(d_early_clean, "Early window (150–300 ms)")
We plot one combination as illustration. To plot a different
combination, change vocab_var, effect_var, and
the data frame (d_amp_late or
d_amp_early).
plot_vocab_scatter <- function(data, vocab_var, effect_var,
xlab, ylab, plot_title) {
data <- data %>% filter(!is.na(.data[[vocab_var]]),
!is.na(.data[[effect_var]]))
r <- cor(data[[vocab_var]], data[[effect_var]])
p <- cor.test(data[[vocab_var]], data[[effect_var]])$p.value
ggplot(data, aes(x = .data[[vocab_var]], y = .data[[effect_var]])) +
geom_point(color = "#336699", size = 2, alpha = 0.7) +
geom_smooth(method = "lm", se = TRUE,
color = "black", fill = "#CCCCCC") +
theme_classic(base_size = 13) +
labs(x = xlab, y = ylab,
title = plot_title,
subtitle = paste0("r = ", round(r, 3),
", p = ", signif(p, 3))) +
theme(plot.title = element_text(face = "bold", hjust = 0.5))
}
plot_vocab_scatter(
d_amp_late, "receptive_vocab_T1", "bouba_diff",
xlab = "Receptive vocabulary at 12 months",
ylab = "Bouba effect (Mismatch − Match, late window)",
plot_title = "Receptive vocabulary and bouba effect (late window)"
)