pacman::p_load(
  "tidyverse", "readxl",
  "psych", "gt")

# Helper to calculate hit/accuracy proportions for a given trial type.
calc_accuracy_prop <- function(data, trial_type, success_value) {
  responses <- data$response_binary[data$type == trial_type]
  responses <- responses[!is.na(responses)]
  if (!length(responses)) {
    return(NA_real_)
  }
  if (success_value == 1) {
    mean(responses == 1)
  } else {
    mean(responses == 0)
  }
}

# Data quality check function to validate data completeness.
check_data_quality <- function(path, min_trials = 50) {
  data <- read_csv(path, show_col_types = FALSE)
  trial_data <- data |> filter(type %in% c("target", "filler", "nonword"))

  n_trials <- nrow(trial_data)
  n_responses <- sum(!is.na(trial_data$response_binary))
  response_rate <- if (n_trials > 0) n_responses / n_trials else 0

  is_valid <- n_trials >= min_trials & response_rate > 0.5

  tibble(
    n_trials = n_trials,
    n_responses = n_responses,
    response_rate = response_rate,
    is_valid = is_valid,
    reason = case_when(
      n_trials < min_trials ~ "Insufficient trials",
      response_rate <= 0.5 ~ "Low response rate",
      TRUE ~ "OK"
    )
  )
}

# Load trial-level data from a result file.
load_trial_data <- function(path) {
  read_csv(path, show_col_types = FALSE) |>
    filter(type %in% c("target", "filler", "nonword")) |>
    mutate(
      rt = as.numeric(rt),
      response_binary = as.numeric(response_binary)
    ) |>
    select(type, rt, response_binary)
}

# Summarise a single participant's trial data into descriptive statistics.
summarise_results <- function(trial_data) {
  if (is.null(trial_data)) {
    return(tibble(
      rt_n = 0,
      rt_mean = NA_real_,
      rt_sd = NA_real_,
      rt_median = NA_real_,
      rt_trimmed = NA_real_,
      rt_min = NA_real_,
      rt_max = NA_real_,
      rt_skew = NA_real_,
      rt_kurtosis = NA_real_,
      rt_se = NA_real_,
      hit_target_total = NA_real_,
      hit_filler_total = NA_real_,
      correct_rejection_total = NA_real_
    ))
  }

  accuracy_values <- tibble(
    hit_target_total = calc_accuracy_prop(trial_data, "target", 1),
    hit_filler_total = calc_accuracy_prop(trial_data, "filler", 1),
    correct_rejection_total = calc_accuracy_prop(trial_data, "nonword", 0)
  )

  valid_rt <- trial_data$rt[!is.na(trial_data$rt)]

  if (!length(valid_rt)) {
    return(tibble(
      rt_n = 0,
      rt_mean = NA_real_,
      rt_sd = NA_real_,
      rt_median = NA_real_,
      rt_trimmed = NA_real_,
      rt_min = NA_real_,
      rt_max = NA_real_,
      rt_skew = NA_real_,
      rt_kurtosis = NA_real_,
      rt_se = NA_real_,
      hit_target_total = accuracy_values$hit_target_total,
      hit_filler_total = accuracy_values$hit_filler_total,
      correct_rejection_total = accuracy_values$correct_rejection_total
    ))
  }

  rt_desc <- psych::describe(valid_rt)

  tibble(
    rt_n = rt_desc$n,
    rt_mean = rt_desc$mean,
    rt_sd = rt_desc$sd,
    rt_median = rt_desc$median,
    rt_trimmed = rt_desc$trimmed,
    rt_min = rt_desc$min,
    rt_max = rt_desc$max,
    rt_skew = rt_desc$skew,
    rt_kurtosis = rt_desc$kurtosis,
    rt_se = rt_desc$se,
    hit_target_total = accuracy_values$hit_target_total,
    hit_filler_total = accuracy_values$hit_filler_total,
    correct_rejection_total = accuracy_values$correct_rejection_total
  )
}

# Collect file paths for both modalities.
audio_paths <- tibble(
  modality = "Onset_Audio",
  path = list.files("Onset_Audio", pattern = "_audio_results\\.csv$", full.names = TRUE)
)

written_paths <- tibble(
  modality = "Onset_Written",
  path = list.files("Onset_Written", pattern = "_written_results\\.csv$", full.names = TRUE)
)

# Participant metadata.
participant_meta <- read_excel("Participant_List.xlsx") |>
  mutate(
    Subject_ID = as.integer(Subject_ID),
    participant_id = sprintf("Sub_%02d", Subject_ID))

# Perform data quality checks on all files.
all_paths <- bind_rows(audio_paths, written_paths)
quality_check <- all_paths |>
  mutate(quality = map(path, check_data_quality)) |>
  unnest(quality)

# Process excluded participants for detailed reporting.
excluded_participants <- quality_check |>
  filter(!is_valid) |>
  select(modality, path, n_trials, n_responses, response_rate, reason) |>
  mutate(
    participant_id = str_extract(basename(path), "Sub_\\d+"),
    Subject_ID = as.integer(str_remove(participant_id, "Sub_"))
  ) |>
  left_join(select(participant_meta, Subject_ID, Condition), by = "Subject_ID") |>
  mutate(Condition = coalesce(Condition, "Unknown"))

# Load and summarize excluded participants' data for detailed statistics.
if (nrow(excluded_participants) > 0) {
  cat("\n=== EXCLUDED PARTICIPANTS ===\n")
  print(excluded_participants |> select(modality, participant_id, Condition, n_trials, n_responses, reason))
  cat("\n")

  excluded_summary_table <- excluded_participants |>
    mutate(data = map(path, possibly(load_trial_data, otherwise = NULL))) |>
    mutate(summary = map(data, summarise_results)) |>
    unnest(cols = summary) |>
    arrange(modality, Subject_ID)
}
## 
## === EXCLUDED PARTICIPANTS ===
## # A tibble: 1 × 6
##   modality      participant_id Condition n_trials n_responses reason            
##   <chr>         <chr>          <chr>        <int>       <int> <chr>             
## 1 Onset_Written Sub_12         Viewing          7           0 Insufficient tria…
# Combine, annotate, and load all result files (excluding invalid data).
valid_paths <- quality_check |>
  filter(is_valid) |>
  pull(path)

cat("\n=== QUALITY CHECK SUMMARY ===\n")
## 
## === QUALITY CHECK SUMMARY ===
cat("Total files checked:", nrow(quality_check), "\n")
## Total files checked: 48
cat("Valid files:", length(valid_paths), "\n")
## Valid files: 47
cat("Excluded files:", nrow(quality_check) - length(valid_paths), "\n\n")
## Excluded files: 1
results_index <- bind_rows(audio_paths, written_paths) |>
  filter(path %in% valid_paths) |>
  mutate(
    participant_id = str_extract(basename(path), "Sub_\\d+"),
    Subject_ID = as.integer(str_remove(participant_id, "Sub_")),
    data = map(path, load_trial_data)) |>
  left_join(select(participant_meta, Subject_ID, Condition), by = "Subject_ID") |>
  mutate(Condition = coalesce(Condition, "Unknown"))

cat("Results index rows:", nrow(results_index), "\n\n")
## Results index rows: 47
# Participant-level summaries used for ldt_gt.
summary_table <- results_index |>
  mutate(summary = map(data, summarise_results)) |>
  unnest(cols = summary) |>
  arrange(modality, Subject_ID)

# Trial-level data used to compute grand descriptive statistics.
trial_level_data <- results_index |>
  select(modality, participant_id, Subject_ID, Condition, data) |>
  unnest(cols = data)

# Compute descriptive statistics for a grouped set of trials.
summarise_group_stats <- function(df, trial_type) {
  participant_count <- n_distinct(df$participant_id)

  success_value <- if (!is.na(trial_type) && trial_type == "nonword") 0 else 1

  responses <- df$response_binary[!is.na(df$response_binary)]
  accuracy_rate <- if (!length(responses)) {
    NA_real_
  } else if (success_value == 1) {
    mean(responses == 1)
  } else {
    mean(responses == 0)
  }

  valid_rt <- df$rt[!is.na(df$rt)]

  if (!length(valid_rt)) {
    return(tibble(
      participants = participant_count,
      rt_n = 0,
      rt_mean = NA_real_,
      rt_sd = NA_real_,
      rt_median = NA_real_,
      rt_trimmed = NA_real_,
      rt_min = NA_real_,
      rt_max = NA_real_,
      rt_skew = NA_real_,
      rt_kurtosis = NA_real_,
      rt_se = NA_real_,
      accuracy = accuracy_rate
    ))
  }

  rt_desc <- psych::describe(valid_rt)

  tibble(
    participants = participant_count,
    rt_n = rt_desc$n,
    rt_mean = rt_desc$mean,
    rt_sd = rt_desc$sd,
    rt_median = rt_desc$median,
    rt_trimmed = rt_desc$trimmed,
    rt_min = rt_desc$min,
    rt_max = rt_desc$max,
    rt_skew = rt_desc$skew,
    rt_kurtosis = rt_desc$kurtosis,
    rt_se = rt_desc$se,
    accuracy = accuracy_rate
  )
}

grand_modality_stats <- trial_level_data |>
  group_by(modality, Condition, type) |>
  group_modify(~ summarise_group_stats(.x, .y$type[[1]])) |>
  arrange(modality, Condition, type) |>
  ungroup()

# Build a gt table grouped by modality.
summary_table |>
  select(
    modality,
    participant_id,
    Condition,
    rt_n,
    rt_mean,
    rt_sd,
    rt_median,
    rt_min,
    rt_max,
    hit_target_total,
    hit_filler_total,
    correct_rejection_total) |>
  gt(groupname_col = "modality") |>
  cols_label(
    participant_id = "Participant",
    Condition = "Condition",
    rt_n = "Trials (n)",
    rt_mean = "RT Mean (ms)",
    rt_sd = "RT SD (ms)",
    rt_median = "RT Median (ms)",
    rt_min = "RT Min (ms)",
    rt_max = "RT Max (ms)",
    hit_target_total = "Hit (Target)",
    hit_filler_total = "Hit (Filler)",
    correct_rejection_total = "Correct Rejection (Nonword)") |>
  fmt_number(
    columns = c(rt_mean, rt_sd, rt_median, rt_min, rt_max),
    decimals = 1) |>
  fmt_percent(
    columns = c(hit_target_total, hit_filler_total, correct_rejection_total),
    decimals = 1) |>
  sub_missing(everything(), missing_text = "NA") |>
  tab_header(
    title = md("**Lexical Decision Task Descriptive Statistics**"),
    subtitle = "Valid participants only")
Lexical Decision Task Descriptive Statistics
Valid participants only
Participant Condition Trials (n) RT Mean (ms) RT SD (ms) RT Median (ms) RT Min (ms) RT Max (ms) Hit (Target) Hit (Filler) Correct Rejection (Nonword)
Onset_Audio
Sub_01 Reading 94 1,626.5 447.7 1,617.5 295.6 2,759.3 77.1% 100.0% 31.4%
Sub_02 Reading_While_Listening 96 1,525.6 465.5 1,463.8 833.0 2,827.3 44.4% 95.8% 69.4%
Sub_03 Viewing 95 1,620.6 421.3 1,494.8 991.7 2,782.0 68.6% 95.8% 47.2%
Sub_04 Reading 96 1,851.9 412.0 1,884.1 1,017.3 2,822.4 47.2% 79.2% 77.8%
Sub_05 Reading_While_Listening 92 1,401.2 369.4 1,372.8 803.7 2,594.2 45.7% 91.7% 45.5%
Sub_06 Viewing 88 1,703.3 460.2 1,544.8 1,054.3 2,824.0 97.1% 100.0% 50.0%
Sub_07 Reading 84 2,075.4 380.3 2,086.2 1,148.9 2,994.7 75.0% 95.2% 35.5%
Sub_08 Reading_While_Listening 96 1,718.1 365.5 1,648.1 1,111.3 2,792.3 13.9% 75.0% 83.3%
Sub_09 Viewing 95 1,611.1 374.5 1,532.7 993.3 2,698.4 50.0% 100.0% 33.3%
Sub_10 Reading 91 1,729.2 528.4 1,676.9 938.7 2,952.4 85.7% 100.0% 78.1%
Sub_11 Reading_While_Listening 95 1,509.1 323.0 1,461.6 1,017.7 2,735.1 100.0% 100.0% 77.1%
Sub_12 Viewing 96 1,331.0 231.5 1,301.0 844.4 1,969.2 91.7% 100.0% 52.8%
Sub_13 Reading 92 1,887.8 553.1 1,817.8 1,036.9 2,956.6 82.4% 91.7% 52.9%
Sub_14 Reading_While_Listening 95 1,669.3 465.2 1,676.7 633.1 2,803.1 83.3% 95.7% 86.1%
Sub_15 Viewing 96 1,615.4 360.4 1,535.9 1,078.2 2,746.1 30.6% 87.5% 69.4%
Sub_16 Reading 77 1,892.0 525.7 1,860.3 972.5 2,977.9 66.7% 94.4% 41.4%
Sub_17 Reading_While_Listening 96 1,411.9 283.3 1,364.8 875.1 2,212.2 75.0% 83.3% 75.0%
Sub_18 Viewing 94 1,841.5 307.6 1,806.2 1,291.6 2,832.7 37.1% 73.9% 58.3%
Sub_19 Reading 86 1,773.0 442.7 1,724.8 932.2 2,847.5 67.6% 90.9% 56.7%
Sub_20 Reading_While_Listening 90 1,725.7 532.1 1,626.4 914.9 2,966.9 100.0% 100.0% 64.5%
Sub_21 Viewing 96 1,339.7 266.7 1,313.5 841.2 1,995.6 61.1% 91.7% 100.0%
Sub_22 Reading 70 1,959.0 522.1 2,013.7 1,040.4 2,888.5 65.4% 77.3% 36.4%
Sub_23 Reading_While_Listening 93 1,510.4 364.4 1,457.5 871.1 2,982.3 22.9% 70.8% 67.6%
Sub_24 Viewing 94 1,701.4 501.5 1,716.8 892.5 2,960.3 62.9% 95.7% 41.7%
Onset_Written
Sub_01 Reading 94 1,211.3 376.6 1,101.3 702.8 2,642.6 85.7% 100.0% 82.9%
Sub_02 Reading_While_Listening 95 1,272.9 483.9 1,082.9 734.9 2,449.9 62.9% 100.0% 88.9%
Sub_03 Viewing 94 1,508.2 472.5 1,373.3 803.7 2,929.9 85.7% 100.0% 62.9%
Sub_04 Reading 96 1,448.6 397.8 1,400.4 849.3 2,787.3 61.1% 95.8% 100.0%
Sub_05 Reading_While_Listening 95 1,518.5 522.4 1,464.1 774.0 3,012.0 65.7% 100.0% 72.2%
Sub_06 Viewing 74 1,570.5 609.3 1,442.3 740.3 2,865.7 100.0% 100.0% 77.8%
Sub_07 Reading 82 1,888.5 519.9 1,888.0 745.5 2,945.3 96.8% 100.0% 53.3%
Sub_08 Reading_While_Listening 95 1,457.4 370.2 1,409.3 789.3 2,584.0 5.7% 100.0% 100.0%
Sub_09 Viewing 88 1,697.6 538.0 1,652.5 839.1 2,981.4 88.6% 95.0% 45.5%
Sub_10 Reading 96 1,475.6 585.9 1,302.1 789.3 2,908.7 94.4% 100.0% 75.0%
Sub_11 Reading_While_Listening 93 1,260.6 360.7 1,189.6 775.8 2,779.5 97.1% 95.8% 94.1%
Sub_13 Reading 90 1,857.1 559.6 1,876.0 880.1 2,997.8 85.3% 100.0% 62.5%
Sub_14 Reading_While_Listening 96 1,274.3 334.1 1,216.1 792.0 2,262.9 97.2% 95.8% 88.9%
Sub_15 Viewing 96 1,443.9 356.1 1,371.4 849.0 2,414.6 50.0% 100.0% 100.0%
Sub_16 Reading 90 1,473.8 480.5 1,387.0 729.1 2,889.1 85.7% 100.0% 63.6%
Sub_17 Reading_While_Listening 88 1,601.6 499.4 1,478.9 506.5 2,918.6 80.0% 95.8% 79.4%
Sub_18 Viewing 89 1,910.0 437.4 1,850.0 1,176.5 2,963.0 60.6% 100.0% 67.6%
Sub_19 Reading 87 1,539.0 522.7 1,434.5 765.8 2,924.8 60.0% 100.0% 72.7%
Sub_20 Reading_While_Listening 94 1,497.6 482.1 1,372.6 813.6 3,015.2 100.0% 100.0% 76.5%
Sub_21 Viewing 96 1,149.9 294.2 1,061.0 726.1 2,030.8 66.7% 100.0% 100.0%
Sub_22 Reading 94 1,511.4 525.9 1,447.0 768.1 2,937.3 75.0% 100.0% 88.9%
Sub_23 Reading_While_Listening 95 1,617.4 390.3 1,572.8 852.3 2,924.0 16.7% 75.0% 97.1%
Sub_24 Viewing 94 1,773.0 570.9 1,779.0 883.7 2,987.4 50.0% 100.0% 88.9%
# Build a gt table for excluded participants (if any).
if (exists("excluded_summary_table") && nrow(excluded_summary_table) > 0) {
  excluded_summary_table |>
    select(
      modality,
      participant_id,
      Condition,
      rt_n,
      rt_mean,
      rt_sd,
      rt_median,
      rt_min,
      rt_max,
      hit_target_total,
      hit_filler_total,
      correct_rejection_total) |>
    gt(groupname_col = "modality") |>
    cols_label(
      participant_id = "Participant",
      Condition = "Condition",
      rt_n = "Trials (n)",
      rt_mean = "RT Mean (ms)",
      rt_sd = "RT SD (ms)",
      rt_median = "RT Median (ms)",
      rt_min = "RT Min (ms)",
      rt_max = "RT Max (ms)",
      hit_target_total = "Hit (Target)",
      hit_filler_total = "Hit (Filler)",
      correct_rejection_total = "Correct Rejection (Nonword)") |>
    fmt_number(
      columns = c(rt_mean, rt_sd, rt_median, rt_min, rt_max),
      decimals = 1) |>
    fmt_percent(
      columns = c(hit_target_total, hit_filler_total,
                  correct_rejection_total),
      decimals = 1) |>
    tab_style(
      style = list(
        cell_fill(color = "#ffe6e6"),
        cell_text(style = "italic")
      ),
      locations = cells_body(columns = everything())
    ) |>
    sub_missing(everything(), missing_text = "NA") |>
    tab_header(
      title = md("**EXCLUDED Participants - Descriptive Statistics**"),
      subtitle = "Data quality issues detected")
}
EXCLUDED Participants - Descriptive Statistics
Data quality issues detected
Participant Condition Trials (n) RT Mean (ms) RT SD (ms) RT Median (ms) RT Min (ms) RT Max (ms) Hit (Target) Hit (Filler) Correct Rejection (Nonword)
Onset_Written
Sub_12 Viewing 0 NA NA NA NA NA NA NA NA
grand_modality_stats |>
  mutate(
    modality = str_replace_all(modality, "_", " "),
    trial_type = str_to_title(type)) |>
  select(
    modality,
    Condition,
    trial_type,
    participants,
    rt_n,
    rt_mean,
    rt_sd,
    rt_median,
    rt_min,
    rt_max,
    accuracy) |>
  gt() |>
  cols_label(
    modality = "Modality",
    Condition = "Condition",
    trial_type = "Trial Type",
    participants = "Participants (n)",
    rt_n = "Trials (n)",
    rt_mean = "RT Mean (ms)",
    rt_sd = "RT SD (ms)",
    rt_median = "RT Median (ms)",
    rt_min = "RT Min (ms)",
    rt_max = "RT Max (ms)",
    accuracy = "Accuracy") |>
  fmt_number(columns = c(participants, rt_n), decimals = 0) |>
  fmt_number(
    columns = c(rt_mean, rt_sd, rt_median, rt_min, rt_max),
    decimals = 1) |>
  fmt_percent(columns = accuracy, decimals = 1) |>
  sub_missing(everything(), missing_text = "NA") |>
  tab_header(
    title = md("**Grand Descriptive Statistics by Modality**"))
Grand Descriptive Statistics by Modality
Modality Condition Trial Type Participants (n) Trials (n) RT Mean (ms) RT SD (ms) RT Median (ms) RT Min (ms) RT Max (ms) Accuracy
Onset Audio Reading Filler 8 179 1,536.4 424.5 1,413.2 932.2 2,882.5 91.1%
Onset Audio Reading Nonword 8 249 2,025.1 438.4 2,030.5 1,045.7 2,994.7 52.2%
Onset Audio Reading Target 8 262 1,878.1 491.0 1,863.4 295.6 2,977.9 71.0%
Onset Audio Reading_While_Listening Filler 8 191 1,396.9 394.0 1,306.2 803.7 2,803.1 89.0%
Onset Audio Reading_While_Listening Nonword 8 277 1,732.8 434.0 1,673.5 871.1 2,982.3 71.5%
Onset Audio Reading_While_Listening Target 8 285 1,497.5 355.8 1,445.4 633.1 2,754.3 60.7%
Onset Audio Viewing Filler 8 187 1,397.2 362.0 1,298.1 844.4 2,703.3 93.0%
Onset Audio Viewing Nonword 8 284 1,677.3 374.7 1,623.6 1,031.0 2,944.8 56.7%
Onset Audio Viewing Target 8 283 1,638.8 428.0 1,546.3 841.2 2,960.3 62.2%
Onset Written Reading Filler 8 185 1,203.1 428.5 1,029.8 702.8 2,937.3 99.5%
Onset Written Reading Nonword 8 271 1,752.6 498.9 1,735.3 845.4 2,997.8 75.6%
Onset Written Reading Target 8 273 1,566.5 531.3 1,467.1 729.1 2,975.0 80.6%
Onset Written Reading_While_Listening Filler 8 192 1,213.5 397.2 1,095.5 734.9 2,723.1 95.3%
Onset Written Reading_While_Listening Nonword 8 281 1,603.3 461.6 1,530.6 506.5 3,015.2 87.2%
Onset Written Reading_While_Listening Target 8 278 1,421.0 416.3 1,337.0 822.2 2,924.0 65.5%
Onset Written Viewing Filler 7 154 1,268.8 397.4 1,190.0 726.1 2,981.4 99.4%
Onset Written Viewing Nonword 7 237 1,698.0 494.0 1,671.7 878.0 2,963.0 78.1%
Onset Written Viewing Target 7 240 1,646.7 555.9 1,530.9 740.3 2,987.4 71.2%
# Boxplots to visualise reaction time distributions alongside the summary tables.
rt_boxplot_data <- trial_level_data |>
  filter(!is.na(rt)) |>
  mutate(
    Condition = fct_explicit_na(Condition, "Unknown"),
    modality_pretty = str_replace_all(modality, "_", " "),
    trial_type = str_to_title(type))
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Condition = fct_explicit_na(Condition, "Unknown")`.
## Caused by warning:
## ! `fct_explicit_na()` was deprecated in forcats 1.0.0.
## ℹ Please use `fct_na_value_to_level()` instead.
# Participant-level totals for correct target responses.
target_accuracy_summary <- trial_level_data |>
  filter(type == "target") |>
  mutate(
    Condition = fct_explicit_na(Condition, "Unknown"),
    modality_pretty = str_replace_all(modality, "_", " ")
  ) |>
  drop_na(response_binary) |>
  group_by(modality_pretty, participant_id, Condition) |>
  summarise(
    trials = n(),
    total_correct = sum(response_binary == 1),
    accuracy_rate = total_correct / trials,
    .groups = "drop"
  ) |>
  arrange(modality_pretty, participant_id)

# Condition-level descriptive statistics for participant accuracy rates.
target_accuracy_descriptives <- target_accuracy_summary |>
  group_by(modality_pretty, Condition) |>
  summarise(
    n_participants = n(),
    accuracy_mean = mean(accuracy_rate),
    accuracy_sd = sd(accuracy_rate),
    accuracy_min = min(accuracy_rate),
    accuracy_max = max(accuracy_rate),
    .groups = "drop"
  )

rt_boxplot_data |>
  ggplot(aes(x = trial_type, y = rt, fill = trial_type)) +
  geom_boxplot(alpha = 0.7, outlier.alpha = 0.4, width = 0.6) +
  facet_wrap(~ modality_pretty) +
  scale_fill_brewer(palette = "Set2") +
  labs(
    title = "Reaction Time Distributions by Modality and Trial Type",
    x = "Trial Type",
    y = "Reaction Time (ms)") +
  theme_bw(base_size = 12) +
  theme(
    legend.position = "bottom",
    panel.grid.minor = element_blank())

rt_boxplot_data |>
  ggplot(aes(x = Condition, y = rt, fill = Condition)) +
  geom_boxplot(alpha = 0.7, outlier.alpha = 0.4, width = 0.6) +
  facet_grid(trial_type ~ modality_pretty) +
  scale_fill_brewer(palette = "Set2") +
  labs(
    title = "Reaction Time Distributions by Condition, Modality, and Trial Type",
    x = "Condition",
    y = "Reaction Time (ms)",
    fill = "Condition") +
  theme_bw(base_size = 12) +
  theme(
    legend.position = "bottom",
    panel.grid.minor = element_blank())

target_accuracy_summary |>
  mutate(
    Condition = fct_drop(Condition)
  ) |>
  ggplot(aes(x = Condition, y = total_correct, fill = Condition)) +
  geom_boxplot(alpha = 0.75, outlier.alpha = 0.6, width = 0.55) +
  facet_wrap(~ modality_pretty, scales = "free_y") +
  scale_fill_brewer(palette = "Set2") +
  labs(
    title = "Distribution of Correct Target Responses by Condition",
    x = "Condition",
    y = "Correct Target Responses",
    fill = "Condition") +
  theme_bw(base_size = 12) +
  theme(
    legend.position = "bottom",
    panel.grid.minor = element_blank(),
    axis.text.x = element_text(angle = 45, hjust = 1))

target_accuracy_summary |>
  mutate(
    Condition = fct_drop(Condition)
  ) |>
  filter(modality_pretty == "Onset Audio") |> 
  ggplot(aes(x = Condition, y = total_correct, fill = Condition)) +
  geom_boxplot(alpha = 0.75, outlier.alpha = 0.6, width = 0.55) +
  scale_fill_brewer(palette = "Set2") +
  labs(
    title = "Correct Target Responses: Audio Lexical Decision Task",
    x = "Condition",
    y = "Correct Target Responses: Max = 36",
    fill = "Condition") +
  theme_bw(base_size = 12) +
  theme(
    legend.position = "bottom",
    panel.grid.minor = element_blank(),
    axis.text.x = element_text(angle = 45, hjust = 1))

target_accuracy_summary |>
  mutate(
    Condition = fct_drop(Condition)
  ) |>
  filter(modality_pretty == "Onset Written") |>
  ggplot(aes(x = Condition, y = total_correct, fill = Condition)) +
  geom_boxplot(alpha = 0.75, outlier.alpha = 0.6, width = 0.55) +
  scale_fill_brewer(palette = "Set2") +
  labs(
    title = "Correct Target Responses: Written Lexical Decision Task",
    x = "Condition",
    y = "Correct Target Responses: Max = 36",
    fill = "Condition") +
  theme_bw(base_size = 12) +
  theme(
    legend.position = "bottom",
    panel.grid.minor = element_blank(),
    axis.text.x = element_text(angle = 45, hjust = 1))

# Display descriptive statistics table for target accuracy by condition.
target_accuracy_descriptives |>
  gt() |>
  cols_label(
    modality_pretty = "Modality",
    Condition = "Condition",
    n_participants = "Participants (n)",
    accuracy_mean = "Accuracy Mean",
    accuracy_sd = "Accuracy SD",
    accuracy_min = "Accuracy Min",
    accuracy_max = "Accuracy Max") |>
  fmt_number(columns = n_participants, decimals = 0) |>
  fmt_percent(
    columns = c(accuracy_mean, accuracy_sd, accuracy_min, accuracy_max),
    decimals = 1) |>
  tab_style(
    style = cell_fill(color = "#ffcccb"),
    locations = cells_body(
      columns = c(accuracy_mean, accuracy_min),
      rows = accuracy_mean < 0.5 | accuracy_min < 0.5
    )
  ) |>
  sub_missing(everything(), missing_text = "NA") |>
  tab_header(
    title = md("**Target Accuracy Descriptive Statistics by Condition**"))
Target Accuracy Descriptive Statistics by Condition
Modality Condition Participants (n) Accuracy Mean Accuracy SD Accuracy Min Accuracy Max
Onset Audio Reading 8 70.9% 12.1% 47.2% 85.7%
Onset Audio Reading_While_Listening 8 60.7% 33.6% 13.9% 100.0%
Onset Audio Viewing 8 62.4% 23.6% 30.6% 97.1%
Onset Written Reading 8 80.5% 14.0% 60.0% 96.8%
Onset Written Reading_While_Listening 8 65.7% 36.6% 5.7% 100.0%
Onset Written Viewing 7 71.7% 19.9% 50.0% 100.0%
# RT outlier detection using IQR method by participant and condition.
rt_outlier_summary <- trial_level_data |>
  filter(!is.na(rt)) |>
  group_by(modality, participant_id, Condition) |>
  summarise(
    n_trials = n(),
    q1 = quantile(rt, 0.25),
    q3 = quantile(rt, 0.75),
    iqr = q3 - q1,
    lower_bound = q1 - 3 * iqr,
    upper_bound = q3 + 3 * iqr,
    n_outliers = sum(rt < lower_bound | rt > upper_bound),
    outlier_rate = n_outliers / n_trials,
    .groups = "drop"
  ) |>
  arrange(desc(outlier_rate))

# Display participants with high outlier rates (>10%).
high_outlier_participants <- rt_outlier_summary |>
  filter(outlier_rate > 0.10)

if (nrow(high_outlier_participants) > 0) {
  cat("\n=== PARTICIPANTS WITH HIGH RT OUTLIER RATES (>10%) ===\n")
  print(high_outlier_participants |> select(modality, participant_id, Condition, n_trials, n_outliers, outlier_rate))
  cat("\n")
}

# Visualize RT distributions to identify extreme outliers.
trial_level_data |>
  filter(!is.na(rt)) |>
  mutate(
    Condition = fct_explicit_na(Condition, "Unknown"),
    modality_pretty = str_replace_all(modality, "_", " ")
  ) |>
  ggplot(aes(x = participant_id, y = rt, fill = Condition)) +
  geom_boxplot(alpha = 0.7, outlier.size = 1, outlier.alpha = 0.5) +
  facet_wrap(~ modality_pretty, scales = "free_x") +
  scale_fill_brewer(palette = "Set2") +
  labs(
    title = "RT Distribution by Participant (Outlier Detection)",
    x = "Participant",
    y = "Reaction Time (ms)",
    fill = "Condition") +
  theme_bw(base_size = 10) +
  theme(
    axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5),
    legend.position = "bottom",
    panel.grid.minor = element_blank()
  )