R_LDT_Analysis.r

pacman::p_load(
  "tidyverse", "readxl",
  "psych", "gt")

# Helper to calculate hit/accuracy proportions for a given trial type.
calc_accuracy_prop <- function(data, trial_type, success_value) {
  responses <- data$response_binary[data$type == trial_type]
  responses <- responses[!is.na(responses)]
  if (!length(responses)) {
    return(NA_real_)
  }
  if (success_value == 1) {
    mean(responses == 1)
  } else {
    mean(responses == 0)
  }
}

# Data quality check function to validate data completeness.
check_data_quality <- function(path, min_trials = 50) {
  data <- read_csv(path, show_col_types = FALSE)
  trial_data <- data |> filter(type %in% c("target", "filler", "nonword"))

  n_trials <- nrow(trial_data)
  n_responses <- sum(!is.na(trial_data$response_binary))
  response_rate <- if (n_trials > 0) n_responses / n_trials else 0

  is_valid <- n_trials >= min_trials & response_rate > 0.5

  tibble(
    n_trials = n_trials,
    n_responses = n_responses,
    response_rate = response_rate,
    is_valid = is_valid,
    reason = case_when(
      n_trials < min_trials ~ "Insufficient trials",
      response_rate <= 0.5 ~ "Low response rate",
      TRUE ~ "OK"
    )
  )
}

# Load trial-level data from a result file.
load_trial_data <- function(path) {
  read_csv(path, show_col_types = FALSE) |>
    filter(type %in% c("target", "filler", "nonword")) |>
    mutate(
      rt = as.numeric(rt),
      response_binary = as.numeric(response_binary)
    ) |>
    select(type, rt, response_binary)
}

# Summarise a single participant's trial data into descriptive statistics.
summarise_results <- function(trial_data) {
  if (is.null(trial_data)) {
    return(tibble(
      rt_n = 0,
      rt_mean = NA_real_,
      rt_sd = NA_real_,
      rt_median = NA_real_,
      rt_trimmed = NA_real_,
      rt_min = NA_real_,
      rt_max = NA_real_,
      rt_skew = NA_real_,
      rt_kurtosis = NA_real_,
      rt_se = NA_real_,
      hit_target_total = NA_real_,
      hit_filler_total = NA_real_,
      correct_rejection_total = NA_real_
    ))
  }

  accuracy_values <- tibble(
    hit_target_total = calc_accuracy_prop(trial_data, "target", 1),
    hit_filler_total = calc_accuracy_prop(trial_data, "filler", 1),
    correct_rejection_total = calc_accuracy_prop(trial_data, "nonword", 0)
  )

  valid_rt <- trial_data$rt[!is.na(trial_data$rt)]

  if (!length(valid_rt)) {
    return(tibble(
      rt_n = 0,
      rt_mean = NA_real_,
      rt_sd = NA_real_,
      rt_median = NA_real_,
      rt_trimmed = NA_real_,
      rt_min = NA_real_,
      rt_max = NA_real_,
      rt_skew = NA_real_,
      rt_kurtosis = NA_real_,
      rt_se = NA_real_,
      hit_target_total = accuracy_values$hit_target_total,
      hit_filler_total = accuracy_values$hit_filler_total,
      correct_rejection_total = accuracy_values$correct_rejection_total
    ))
  }

  rt_desc <- psych::describe(valid_rt)

  tibble(
    rt_n = rt_desc$n,
    rt_mean = rt_desc$mean,
    rt_sd = rt_desc$sd,
    rt_median = rt_desc$median,
    rt_trimmed = rt_desc$trimmed,
    rt_min = rt_desc$min,
    rt_max = rt_desc$max,
    rt_skew = rt_desc$skew,
    rt_kurtosis = rt_desc$kurtosis,
    rt_se = rt_desc$se,
    hit_target_total = accuracy_values$hit_target_total,
    hit_filler_total = accuracy_values$hit_filler_total,
    correct_rejection_total = accuracy_values$correct_rejection_total
  )
}

# Collect file paths for both modalities.
audio_paths <- tibble(
  modality = "Onset_Audio",
  path = list.files("Onset_Audio", pattern = "_audio_results\\.csv$", full.names = TRUE)
)

written_paths <- tibble(
  modality = "Onset_Written",
  path = list.files("Onset_Written", pattern = "_written_results\\.csv$", full.names = TRUE)
)

# Participant metadata.
participant_meta <- read_excel("Participant_List.xlsx") |>
  mutate(
    Subject_ID = as.integer(Subject_ID),
    participant_id = sprintf("Sub_%02d", Subject_ID))

# Perform data quality checks on all files.
all_paths <- bind_rows(audio_paths, written_paths)
quality_check <- all_paths |>
  mutate(quality = map(path, check_data_quality)) |>
  unnest(quality)

# Process excluded participants for detailed reporting.
excluded_participants <- quality_check |>
  filter(!is_valid) |>
  select(modality, path, n_trials, n_responses, response_rate, reason) |>
  mutate(
    participant_id = str_extract(basename(path), "Sub_\\d+"),
    Subject_ID = as.integer(str_remove(participant_id, "Sub_"))
  ) |>
  left_join(select(participant_meta, Subject_ID, Condition), by = "Subject_ID") |>
  mutate(Condition = coalesce(Condition, "Unknown"))

# Load and summarize excluded participants' data for detailed statistics.
if (nrow(excluded_participants) > 0) {
  cat("\n=== EXCLUDED PARTICIPANTS ===\n")
  print(excluded_participants |> select(modality, participant_id, Condition, n_trials, n_responses, reason))
  cat("\n")

  excluded_summary_table <- excluded_participants |>
    mutate(data = map(path, possibly(load_trial_data, otherwise = NULL))) |>
    mutate(summary = map(data, summarise_results)) |>
    unnest(cols = summary) |>
    arrange(modality, Subject_ID)
}

## 
## === EXCLUDED PARTICIPANTS ===
## # A tibble: 1 × 6
##   modality      participant_id Condition n_trials n_responses reason            
##   <chr>         <chr>          <chr>        <int>       <int> <chr>             
## 1 Onset_Written Sub_12         Viewing          7           0 Insufficient tria…

# Combine, annotate, and load all result files (excluding invalid data).
valid_paths <- quality_check |>
  filter(is_valid) |>
  pull(path)

cat("\n=== QUALITY CHECK SUMMARY ===\n")

## 
## === QUALITY CHECK SUMMARY ===

cat("Total files checked:", nrow(quality_check), "\n")

## Total files checked: 48

cat("Valid files:", length(valid_paths), "\n")

## Valid files: 47

cat("Excluded files:", nrow(quality_check) - length(valid_paths), "\n\n")

## Excluded files: 1

results_index <- bind_rows(audio_paths, written_paths) |>
  filter(path %in% valid_paths) |>
  mutate(
    participant_id = str_extract(basename(path), "Sub_\\d+"),
    Subject_ID = as.integer(str_remove(participant_id, "Sub_")),
    data = map(path, load_trial_data)) |>
  left_join(select(participant_meta, Subject_ID, Condition), by = "Subject_ID") |>
  mutate(Condition = coalesce(Condition, "Unknown"))

cat("Results index rows:", nrow(results_index), "\n\n")

## Results index rows: 47

# Participant-level summaries used for ldt_gt.
summary_table <- results_index |>
  mutate(summary = map(data, summarise_results)) |>
  unnest(cols = summary) |>
  arrange(modality, Subject_ID)

# Trial-level data used to compute grand descriptive statistics.
trial_level_data <- results_index |>
  select(modality, participant_id, Subject_ID, Condition, data) |>
  unnest(cols = data)

# Compute descriptive statistics for a grouped set of trials.
summarise_group_stats <- function(df, trial_type) {
  participant_count <- n_distinct(df$participant_id)

  success_value <- if (!is.na(trial_type) && trial_type == "nonword") 0 else 1

  responses <- df$response_binary[!is.na(df$response_binary)]
  accuracy_rate <- if (!length(responses)) {
    NA_real_
  } else if (success_value == 1) {
    mean(responses == 1)
  } else {
    mean(responses == 0)
  }

  valid_rt <- df$rt[!is.na(df$rt)]

  if (!length(valid_rt)) {
    return(tibble(
      participants = participant_count,
      rt_n = 0,
      rt_mean = NA_real_,
      rt_sd = NA_real_,
      rt_median = NA_real_,
      rt_trimmed = NA_real_,
      rt_min = NA_real_,
      rt_max = NA_real_,
      rt_skew = NA_real_,
      rt_kurtosis = NA_real_,
      rt_se = NA_real_,
      accuracy = accuracy_rate
    ))
  }

  rt_desc <- psych::describe(valid_rt)

  tibble(
    participants = participant_count,
    rt_n = rt_desc$n,
    rt_mean = rt_desc$mean,
    rt_sd = rt_desc$sd,
    rt_median = rt_desc$median,
    rt_trimmed = rt_desc$trimmed,
    rt_min = rt_desc$min,
    rt_max = rt_desc$max,
    rt_skew = rt_desc$skew,
    rt_kurtosis = rt_desc$kurtosis,
    rt_se = rt_desc$se,
    accuracy = accuracy_rate
  )
}

grand_modality_stats <- trial_level_data |>
  group_by(modality, Condition, type) |>
  group_modify(~ summarise_group_stats(.x, .y$type[[1]])) |>
  arrange(modality, Condition, type) |>
  ungroup()

# Build a gt table grouped by modality.
summary_table |>
  select(
    modality,
    participant_id,
    Condition,
    rt_n,
    rt_mean,
    rt_sd,
    rt_median,
    rt_min,
    rt_max,
    hit_target_total,
    hit_filler_total,
    correct_rejection_total) |>
  gt(groupname_col = "modality") |>
  cols_label(
    participant_id = "Participant",
    Condition = "Condition",
    rt_n = "Trials (n)",
    rt_mean = "RT Mean (ms)",
    rt_sd = "RT SD (ms)",
    rt_median = "RT Median (ms)",
    rt_min = "RT Min (ms)",
    rt_max = "RT Max (ms)",
    hit_target_total = "Hit (Target)",
    hit_filler_total = "Hit (Filler)",
    correct_rejection_total = "Correct Rejection (Nonword)") |>
  fmt_number(
    columns = c(rt_mean, rt_sd, rt_median, rt_min, rt_max),
    decimals = 1) |>
  fmt_percent(
    columns = c(hit_target_total, hit_filler_total, correct_rejection_total),
    decimals = 1) |>
  sub_missing(everything(), missing_text = "NA") |>
  tab_header(
    title = md("**Lexical Decision Task Descriptive Statistics**"),
    subtitle = "Valid participants only")

Participant	Condition	Trials (n)	RT Mean (ms)	RT SD (ms)	RT Median (ms)	RT Min (ms)	RT Max (ms)	Hit (Target)	Hit (Filler)	Correct Rejection (Nonword)
Lexical Decision Task Descriptive Statistics
Valid participants only
Onset_Audio
Sub_01	Reading	94	1,626.5	447.7	1,617.5	295.6	2,759.3	77.1%	100.0%	31.4%
Sub_02	Reading_While_Listening	96	1,525.6	465.5	1,463.8	833.0	2,827.3	44.4%	95.8%	69.4%
Sub_03	Viewing	95	1,620.6	421.3	1,494.8	991.7	2,782.0	68.6%	95.8%	47.2%
Sub_04	Reading	96	1,851.9	412.0	1,884.1	1,017.3	2,822.4	47.2%	79.2%	77.8%
Sub_05	Reading_While_Listening	92	1,401.2	369.4	1,372.8	803.7	2,594.2	45.7%	91.7%	45.5%
Sub_06	Viewing	88	1,703.3	460.2	1,544.8	1,054.3	2,824.0	97.1%	100.0%	50.0%
Sub_07	Reading	84	2,075.4	380.3	2,086.2	1,148.9	2,994.7	75.0%	95.2%	35.5%
Sub_08	Reading_While_Listening	96	1,718.1	365.5	1,648.1	1,111.3	2,792.3	13.9%	75.0%	83.3%
Sub_09	Viewing	95	1,611.1	374.5	1,532.7	993.3	2,698.4	50.0%	100.0%	33.3%
Sub_10	Reading	91	1,729.2	528.4	1,676.9	938.7	2,952.4	85.7%	100.0%	78.1%
Sub_11	Reading_While_Listening	95	1,509.1	323.0	1,461.6	1,017.7	2,735.1	100.0%	100.0%	77.1%
Sub_12	Viewing	96	1,331.0	231.5	1,301.0	844.4	1,969.2	91.7%	100.0%	52.8%
Sub_13	Reading	92	1,887.8	553.1	1,817.8	1,036.9	2,956.6	82.4%	91.7%	52.9%
Sub_14	Reading_While_Listening	95	1,669.3	465.2	1,676.7	633.1	2,803.1	83.3%	95.7%	86.1%
Sub_15	Viewing	96	1,615.4	360.4	1,535.9	1,078.2	2,746.1	30.6%	87.5%	69.4%
Sub_16	Reading	77	1,892.0	525.7	1,860.3	972.5	2,977.9	66.7%	94.4%	41.4%
Sub_17	Reading_While_Listening	96	1,411.9	283.3	1,364.8	875.1	2,212.2	75.0%	83.3%	75.0%
Sub_18	Viewing	94	1,841.5	307.6	1,806.2	1,291.6	2,832.7	37.1%	73.9%	58.3%
Sub_19	Reading	86	1,773.0	442.7	1,724.8	932.2	2,847.5	67.6%	90.9%	56.7%
Sub_20	Reading_While_Listening	90	1,725.7	532.1	1,626.4	914.9	2,966.9	100.0%	100.0%	64.5%
Sub_21	Viewing	96	1,339.7	266.7	1,313.5	841.2	1,995.6	61.1%	91.7%	100.0%
Sub_22	Reading	70	1,959.0	522.1	2,013.7	1,040.4	2,888.5	65.4%	77.3%	36.4%
Sub_23	Reading_While_Listening	93	1,510.4	364.4	1,457.5	871.1	2,982.3	22.9%	70.8%	67.6%
Sub_24	Viewing	94	1,701.4	501.5	1,716.8	892.5	2,960.3	62.9%	95.7%	41.7%
Onset_Written
Sub_01	Reading	94	1,211.3	376.6	1,101.3	702.8	2,642.6	85.7%	100.0%	82.9%
Sub_02	Reading_While_Listening	95	1,272.9	483.9	1,082.9	734.9	2,449.9	62.9%	100.0%	88.9%
Sub_03	Viewing	94	1,508.2	472.5	1,373.3	803.7	2,929.9	85.7%	100.0%	62.9%
Sub_04	Reading	96	1,448.6	397.8	1,400.4	849.3	2,787.3	61.1%	95.8%	100.0%
Sub_05	Reading_While_Listening	95	1,518.5	522.4	1,464.1	774.0	3,012.0	65.7%	100.0%	72.2%
Sub_06	Viewing	74	1,570.5	609.3	1,442.3	740.3	2,865.7	100.0%	100.0%	77.8%
Sub_07	Reading	82	1,888.5	519.9	1,888.0	745.5	2,945.3	96.8%	100.0%	53.3%
Sub_08	Reading_While_Listening	95	1,457.4	370.2	1,409.3	789.3	2,584.0	5.7%	100.0%	100.0%
Sub_09	Viewing	88	1,697.6	538.0	1,652.5	839.1	2,981.4	88.6%	95.0%	45.5%
Sub_10	Reading	96	1,475.6	585.9	1,302.1	789.3	2,908.7	94.4%	100.0%	75.0%
Sub_11	Reading_While_Listening	93	1,260.6	360.7	1,189.6	775.8	2,779.5	97.1%	95.8%	94.1%
Sub_13	Reading	90	1,857.1	559.6	1,876.0	880.1	2,997.8	85.3%	100.0%	62.5%
Sub_14	Reading_While_Listening	96	1,274.3	334.1	1,216.1	792.0	2,262.9	97.2%	95.8%	88.9%
Sub_15	Viewing	96	1,443.9	356.1	1,371.4	849.0	2,414.6	50.0%	100.0%	100.0%
Sub_16	Reading	90	1,473.8	480.5	1,387.0	729.1	2,889.1	85.7%	100.0%	63.6%
Sub_17	Reading_While_Listening	88	1,601.6	499.4	1,478.9	506.5	2,918.6	80.0%	95.8%	79.4%
Sub_18	Viewing	89	1,910.0	437.4	1,850.0	1,176.5	2,963.0	60.6%	100.0%	67.6%
Sub_19	Reading	87	1,539.0	522.7	1,434.5	765.8	2,924.8	60.0%	100.0%	72.7%
Sub_20	Reading_While_Listening	94	1,497.6	482.1	1,372.6	813.6	3,015.2	100.0%	100.0%	76.5%
Sub_21	Viewing	96	1,149.9	294.2	1,061.0	726.1	2,030.8	66.7%	100.0%	100.0%
Sub_22	Reading	94	1,511.4	525.9	1,447.0	768.1	2,937.3	75.0%	100.0%	88.9%
Sub_23	Reading_While_Listening	95	1,617.4	390.3	1,572.8	852.3	2,924.0	16.7%	75.0%	97.1%
Sub_24	Viewing	94	1,773.0	570.9	1,779.0	883.7	2,987.4	50.0%	100.0%	88.9%

# Build a gt table for excluded participants (if any).
if (exists("excluded_summary_table") && nrow(excluded_summary_table) > 0) {
  excluded_summary_table |>
    select(
      modality,
      participant_id,
      Condition,
      rt_n,
      rt_mean,
      rt_sd,
      rt_median,
      rt_min,
      rt_max,
      hit_target_total,
      hit_filler_total,
      correct_rejection_total) |>
    gt(groupname_col = "modality") |>
    cols_label(
      participant_id = "Participant",
      Condition = "Condition",
      rt_n = "Trials (n)",
      rt_mean = "RT Mean (ms)",
      rt_sd = "RT SD (ms)",
      rt_median = "RT Median (ms)",
      rt_min = "RT Min (ms)",
      rt_max = "RT Max (ms)",
      hit_target_total = "Hit (Target)",
      hit_filler_total = "Hit (Filler)",
      correct_rejection_total = "Correct Rejection (Nonword)") |>
    fmt_number(
      columns = c(rt_mean, rt_sd, rt_median, rt_min, rt_max),
      decimals = 1) |>
    fmt_percent(
      columns = c(hit_target_total, hit_filler_total,
                  correct_rejection_total),
      decimals = 1) |>
    tab_style(
      style = list(
        cell_fill(color = "#ffe6e6"),
        cell_text(style = "italic")
      ),
      locations = cells_body(columns = everything())
    ) |>
    sub_missing(everything(), missing_text = "NA") |>
    tab_header(
      title = md("**EXCLUDED Participants - Descriptive Statistics**"),
      subtitle = "Data quality issues detected")
}

Participant	Condition	Trials (n)	RT Mean (ms)	RT SD (ms)	RT Median (ms)	RT Min (ms)	RT Max (ms)	Hit (Target)	Hit (Filler)	Correct Rejection (Nonword)
EXCLUDED Participants - Descriptive Statistics
Data quality issues detected
Onset_Written
Sub_12	Viewing	0	NA	NA	NA	NA	NA	NA	NA	NA

grand_modality_stats |>
  mutate(
    modality = str_replace_all(modality, "_", " "),
    trial_type = str_to_title(type)) |>
  select(
    modality,
    Condition,
    trial_type,
    participants,
    rt_n,
    rt_mean,
    rt_sd,
    rt_median,
    rt_min,
    rt_max,
    accuracy) |>
  gt() |>
  cols_label(
    modality = "Modality",
    Condition = "Condition",
    trial_type = "Trial Type",
    participants = "Participants (n)",
    rt_n = "Trials (n)",
    rt_mean = "RT Mean (ms)",
    rt_sd = "RT SD (ms)",
    rt_median = "RT Median (ms)",
    rt_min = "RT Min (ms)",
    rt_max = "RT Max (ms)",
    accuracy = "Accuracy") |>
  fmt_number(columns = c(participants, rt_n), decimals = 0) |>
  fmt_number(
    columns = c(rt_mean, rt_sd, rt_median, rt_min, rt_max),
    decimals = 1) |>
  fmt_percent(columns = accuracy, decimals = 1) |>
  sub_missing(everything(), missing_text = "NA") |>
  tab_header(
    title = md("**Grand Descriptive Statistics by Modality**"))

Modality	Condition	Trial Type	Participants (n)	Trials (n)	RT Mean (ms)	RT SD (ms)	RT Median (ms)	RT Min (ms)	RT Max (ms)	Accuracy
Grand Descriptive Statistics by Modality
Onset Audio	Reading	Filler	8	179	1,536.4	424.5	1,413.2	932.2	2,882.5	91.1%
Onset Audio	Reading	Nonword	8	249	2,025.1	438.4	2,030.5	1,045.7	2,994.7	52.2%
Onset Audio	Reading	Target	8	262	1,878.1	491.0	1,863.4	295.6	2,977.9	71.0%
Onset Audio	Reading_While_Listening	Filler	8	191	1,396.9	394.0	1,306.2	803.7	2,803.1	89.0%
Onset Audio	Reading_While_Listening	Nonword	8	277	1,732.8	434.0	1,673.5	871.1	2,982.3	71.5%
Onset Audio	Reading_While_Listening	Target	8	285	1,497.5	355.8	1,445.4	633.1	2,754.3	60.7%
Onset Audio	Viewing	Filler	8	187	1,397.2	362.0	1,298.1	844.4	2,703.3	93.0%
Onset Audio	Viewing	Nonword	8	284	1,677.3	374.7	1,623.6	1,031.0	2,944.8	56.7%
Onset Audio	Viewing	Target	8	283	1,638.8	428.0	1,546.3	841.2	2,960.3	62.2%
Onset Written	Reading	Filler	8	185	1,203.1	428.5	1,029.8	702.8	2,937.3	99.5%
Onset Written	Reading	Nonword	8	271	1,752.6	498.9	1,735.3	845.4	2,997.8	75.6%
Onset Written	Reading	Target	8	273	1,566.5	531.3	1,467.1	729.1	2,975.0	80.6%
Onset Written	Reading_While_Listening	Filler	8	192	1,213.5	397.2	1,095.5	734.9	2,723.1	95.3%
Onset Written	Reading_While_Listening	Nonword	8	281	1,603.3	461.6	1,530.6	506.5	3,015.2	87.2%
Onset Written	Reading_While_Listening	Target	8	278	1,421.0	416.3	1,337.0	822.2	2,924.0	65.5%
Onset Written	Viewing	Filler	7	154	1,268.8	397.4	1,190.0	726.1	2,981.4	99.4%
Onset Written	Viewing	Nonword	7	237	1,698.0	494.0	1,671.7	878.0	2,963.0	78.1%
Onset Written	Viewing	Target	7	240	1,646.7	555.9	1,530.9	740.3	2,987.4	71.2%

# Boxplots to visualise reaction time distributions alongside the summary tables.
rt_boxplot_data <- trial_level_data |>
  filter(!is.na(rt)) |>
  mutate(
    Condition = fct_explicit_na(Condition, "Unknown"),
    modality_pretty = str_replace_all(modality, "_", " "),
    trial_type = str_to_title(type))

## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Condition = fct_explicit_na(Condition, "Unknown")`.
## Caused by warning:
## ! `fct_explicit_na()` was deprecated in forcats 1.0.0.
## ℹ Please use `fct_na_value_to_level()` instead.

# Participant-level totals for correct target responses.
target_accuracy_summary <- trial_level_data |>
  filter(type == "target") |>
  mutate(
    Condition = fct_explicit_na(Condition, "Unknown"),
    modality_pretty = str_replace_all(modality, "_", " ")
  ) |>
  drop_na(response_binary) |>
  group_by(modality_pretty, participant_id, Condition) |>
  summarise(
    trials = n(),
    total_correct = sum(response_binary == 1),
    accuracy_rate = total_correct / trials,
    .groups = "drop"
  ) |>
  arrange(modality_pretty, participant_id)

# Condition-level descriptive statistics for participant accuracy rates.
target_accuracy_descriptives <- target_accuracy_summary |>
  group_by(modality_pretty, Condition) |>
  summarise(
    n_participants = n(),
    accuracy_mean = mean(accuracy_rate),
    accuracy_sd = sd(accuracy_rate),
    accuracy_min = min(accuracy_rate),
    accuracy_max = max(accuracy_rate),
    .groups = "drop"
  )

rt_boxplot_data |>
  ggplot(aes(x = trial_type, y = rt, fill = trial_type)) +
  geom_boxplot(alpha = 0.7, outlier.alpha = 0.4, width = 0.6) +
  facet_wrap(~ modality_pretty) +
  scale_fill_brewer(palette = "Set2") +
  labs(
    title = "Reaction Time Distributions by Modality and Trial Type",
    x = "Trial Type",
    y = "Reaction Time (ms)") +
  theme_bw(base_size = 12) +
  theme(
    legend.position = "bottom",
    panel.grid.minor = element_blank())

rt_boxplot_data |>
  ggplot(aes(x = Condition, y = rt, fill = Condition)) +
  geom_boxplot(alpha = 0.7, outlier.alpha = 0.4, width = 0.6) +
  facet_grid(trial_type ~ modality_pretty) +
  scale_fill_brewer(palette = "Set2") +
  labs(
    title = "Reaction Time Distributions by Condition, Modality, and Trial Type",
    x = "Condition",
    y = "Reaction Time (ms)",
    fill = "Condition") +
  theme_bw(base_size = 12) +
  theme(
    legend.position = "bottom",
    panel.grid.minor = element_blank())

target_accuracy_summary |>
  mutate(
    Condition = fct_drop(Condition)
  ) |>
  ggplot(aes(x = Condition, y = total_correct, fill = Condition)) +
  geom_boxplot(alpha = 0.75, outlier.alpha = 0.6, width = 0.55) +
  facet_wrap(~ modality_pretty, scales = "free_y") +
  scale_fill_brewer(palette = "Set2") +
  labs(
    title = "Distribution of Correct Target Responses by Condition",
    x = "Condition",
    y = "Correct Target Responses",
    fill = "Condition") +
  theme_bw(base_size = 12) +
  theme(
    legend.position = "bottom",
    panel.grid.minor = element_blank(),
    axis.text.x = element_text(angle = 45, hjust = 1))

target_accuracy_summary |>
  mutate(
    Condition = fct_drop(Condition)
  ) |>
  filter(modality_pretty == "Onset Audio") |> 
  ggplot(aes(x = Condition, y = total_correct, fill = Condition)) +
  geom_boxplot(alpha = 0.75, outlier.alpha = 0.6, width = 0.55) +
  scale_fill_brewer(palette = "Set2") +
  labs(
    title = "Correct Target Responses: Audio Lexical Decision Task",
    x = "Condition",
    y = "Correct Target Responses: Max = 36",
    fill = "Condition") +
  theme_bw(base_size = 12) +
  theme(
    legend.position = "bottom",
    panel.grid.minor = element_blank(),
    axis.text.x = element_text(angle = 45, hjust = 1))

target_accuracy_summary |>
  mutate(
    Condition = fct_drop(Condition)
  ) |>
  filter(modality_pretty == "Onset Written") |>
  ggplot(aes(x = Condition, y = total_correct, fill = Condition)) +
  geom_boxplot(alpha = 0.75, outlier.alpha = 0.6, width = 0.55) +
  scale_fill_brewer(palette = "Set2") +
  labs(
    title = "Correct Target Responses: Written Lexical Decision Task",
    x = "Condition",
    y = "Correct Target Responses: Max = 36",
    fill = "Condition") +
  theme_bw(base_size = 12) +
  theme(
    legend.position = "bottom",
    panel.grid.minor = element_blank(),
    axis.text.x = element_text(angle = 45, hjust = 1))

# Display descriptive statistics table for target accuracy by condition.
target_accuracy_descriptives |>
  gt() |>
  cols_label(
    modality_pretty = "Modality",
    Condition = "Condition",
    n_participants = "Participants (n)",
    accuracy_mean = "Accuracy Mean",
    accuracy_sd = "Accuracy SD",
    accuracy_min = "Accuracy Min",
    accuracy_max = "Accuracy Max") |>
  fmt_number(columns = n_participants, decimals = 0) |>
  fmt_percent(
    columns = c(accuracy_mean, accuracy_sd, accuracy_min, accuracy_max),
    decimals = 1) |>
  tab_style(
    style = cell_fill(color = "#ffcccb"),
    locations = cells_body(
      columns = c(accuracy_mean, accuracy_min),
      rows = accuracy_mean < 0.5 | accuracy_min < 0.5
    )
  ) |>
  sub_missing(everything(), missing_text = "NA") |>
  tab_header(
    title = md("**Target Accuracy Descriptive Statistics by Condition**"))

Modality	Condition	Participants (n)	Accuracy Mean	Accuracy SD	Accuracy Min	Accuracy Max
Target Accuracy Descriptive Statistics by Condition
Onset Audio	Reading	8	70.9%	12.1%	47.2%	85.7%
Onset Audio	Reading_While_Listening	8	60.7%	33.6%	13.9%	100.0%
Onset Audio	Viewing	8	62.4%	23.6%	30.6%	97.1%
Onset Written	Reading	8	80.5%	14.0%	60.0%	96.8%
Onset Written	Reading_While_Listening	8	65.7%	36.6%	5.7%	100.0%
Onset Written	Viewing	7	71.7%	19.9%	50.0%	100.0%

# RT outlier detection using IQR method by participant and condition.
rt_outlier_summary <- trial_level_data |>
  filter(!is.na(rt)) |>
  group_by(modality, participant_id, Condition) |>
  summarise(
    n_trials = n(),
    q1 = quantile(rt, 0.25),
    q3 = quantile(rt, 0.75),
    iqr = q3 - q1,
    lower_bound = q1 - 3 * iqr,
    upper_bound = q3 + 3 * iqr,
    n_outliers = sum(rt < lower_bound | rt > upper_bound),
    outlier_rate = n_outliers / n_trials,
    .groups = "drop"
  ) |>
  arrange(desc(outlier_rate))

# Display participants with high outlier rates (>10%).
high_outlier_participants <- rt_outlier_summary |>
  filter(outlier_rate > 0.10)

if (nrow(high_outlier_participants) > 0) {
  cat("\n=== PARTICIPANTS WITH HIGH RT OUTLIER RATES (>10%) ===\n")
  print(high_outlier_participants |> select(modality, participant_id, Condition, n_trials, n_outliers, outlier_rate))
  cat("\n")
}

# Visualize RT distributions to identify extreme outliers.
trial_level_data |>
  filter(!is.na(rt)) |>
  mutate(
    Condition = fct_explicit_na(Condition, "Unknown"),
    modality_pretty = str_replace_all(modality, "_", " ")
  ) |>
  ggplot(aes(x = participant_id, y = rt, fill = Condition)) +
  geom_boxplot(alpha = 0.7, outlier.size = 1, outlier.alpha = 0.5) +
  facet_wrap(~ modality_pretty, scales = "free_x") +
  scale_fill_brewer(palette = "Set2") +
  labs(
    title = "RT Distribution by Participant (Outlier Detection)",
    x = "Participant",
    y = "Reaction Time (ms)",
    fill = "Condition") +
  theme_bw(base_size = 10) +
  theme(
    axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5),
    legend.position = "bottom",
    panel.grid.minor = element_blank()
  )

R_LDT_Analysis.r

tohokusla

2025-10-27