R_LDT_Analysis.r

pacman::p_load(
  "tidyverse", "readxl",
  "psych", "gt")

# Helper to calculate hit/accuracy proportions for a given trial type.
calc_accuracy_prop <- function(data, trial_type, success_value) {
  responses <- data$response_binary[data$type == trial_type]
  responses <- responses[!is.na(responses)]
  if (!length(responses)) {
    return(NA_real_)
  }
  if (success_value == 1) {
    mean(responses == 1)
  } else {
    mean(responses == 0)
  }
}

# Load trial-level data from a result file.
load_trial_data <- function(path) {
  read_csv(path, show_col_types = FALSE) |>
    filter(type %in% c("target", "filler", "nonword")) |>
    mutate(
      rt = as.numeric(rt),
      response_binary = as.numeric(response_binary)
    ) |>
    select(type, rt, response_binary)
}

# Summarise a single participant's trial data into descriptive statistics.
summarise_results <- function(trial_data) {
  if (is.null(trial_data)) {
    return(tibble(
      rt_n = 0,
      rt_mean = NA_real_,
      rt_sd = NA_real_,
      rt_median = NA_real_,
      rt_trimmed = NA_real_,
      rt_min = NA_real_,
      rt_max = NA_real_,
      rt_skew = NA_real_,
      rt_kurtosis = NA_real_,
      rt_se = NA_real_,
      hit_target_total = NA_real_,
      hit_filler_total = NA_real_,
      correct_rejection_total = NA_real_
    ))
  }

  accuracy_values <- tibble(
    hit_target_total = calc_accuracy_prop(trial_data, "target", 1),
    hit_filler_total = calc_accuracy_prop(trial_data, "filler", 1),
    correct_rejection_total = calc_accuracy_prop(trial_data, "nonword", 0)
  )

  valid_rt <- trial_data$rt[!is.na(trial_data$rt)]

  if (!length(valid_rt)) {
    return(tibble(
      rt_n = 0,
      rt_mean = NA_real_,
      rt_sd = NA_real_,
      rt_median = NA_real_,
      rt_trimmed = NA_real_,
      rt_min = NA_real_,
      rt_max = NA_real_,
      rt_skew = NA_real_,
      rt_kurtosis = NA_real_,
      rt_se = NA_real_,
      hit_target_total = accuracy_values$hit_target_total,
      hit_filler_total = accuracy_values$hit_filler_total,
      correct_rejection_total = accuracy_values$correct_rejection_total
    ))
  }

  rt_desc <- psych::describe(valid_rt)

  tibble(
    rt_n = rt_desc$n,
    rt_mean = rt_desc$mean,
    rt_sd = rt_desc$sd,
    rt_median = rt_desc$median,
    rt_trimmed = rt_desc$trimmed,
    rt_min = rt_desc$min,
    rt_max = rt_desc$max,
    rt_skew = rt_desc$skew,
    rt_kurtosis = rt_desc$kurtosis,
    rt_se = rt_desc$se,
    hit_target_total = accuracy_values$hit_target_total,
    hit_filler_total = accuracy_values$hit_filler_total,
    correct_rejection_total = accuracy_values$correct_rejection_total
  )
}

# Collect file paths for both modalities.
audio_paths <- tibble(
  modality = "Onset_Audio",
  path = list.files("Onset_Audio", pattern = "_audio_results\\.csv$", full.names = TRUE)
)

written_paths <- tibble(
  modality = "Onset_Written",
  path = list.files("Onset_Written", pattern = "_written_results\\.csv$", full.names = TRUE)
)

# Participant metadata.
participant_meta <- read_excel("Participant_List.xlsx") |>
  mutate(
    Subject_ID = as.integer(Subject_ID),
    participant_id = sprintf("Sub_%02d", Subject_ID))

# Combine, annotate, and load all result files.
results_index <- bind_rows(audio_paths, written_paths) |>
  mutate(
    participant_id = str_extract(basename(path), "Sub_\\d+"),
    Subject_ID = as.integer(str_remove(participant_id, "Sub_")),
    data = map(path, load_trial_data)) |>
  left_join(select(participant_meta, Subject_ID, Condition), by = "Subject_ID") |>
  mutate(Condition = coalesce(Condition, "Unknown"))

# Participant-level summaries used for ldt_gt.
summary_table <- results_index |>
  mutate(summary = map(data, summarise_results)) |>
  unnest(summary) |>
  arrange(modality, Subject_ID)

# Trial-level data used to compute grand descriptive statistics.
trial_level_data <- results_index |>
  select(modality, participant_id, Subject_ID, Condition, data) |>
  unnest(data)

# Compute descriptive statistics for a grouped set of trials.
summarise_group_stats <- function(df) {
  participant_count <- n_distinct(df$participant_id)

  accuracy_values <- tibble(
    hit_target_total = calc_accuracy_prop(df, "target", 1),
    hit_filler_total = calc_accuracy_prop(df, "filler", 1),
    correct_rejection_total = calc_accuracy_prop(df, "nonword", 0))

  valid_rt <- df$rt[!is.na(df$rt)]

  if (!length(valid_rt)) {
    return(tibble(
      participants = participant_count,
      rt_n = 0,
      rt_mean = NA_real_,
      rt_sd = NA_real_,
      rt_median = NA_real_,
      rt_trimmed = NA_real_,
      rt_min = NA_real_,
      rt_max = NA_real_,
      rt_skew = NA_real_,
      rt_kurtosis = NA_real_,
      rt_se = NA_real_,
      hit_target_total = accuracy_values$hit_target_total,
      hit_filler_total = accuracy_values$hit_filler_total,
      correct_rejection_total = accuracy_values$correct_rejection_total
    ))
  }

  rt_desc <- psych::describe(valid_rt)

  tibble(
    participants = participant_count,
    rt_n = rt_desc$n,
    rt_mean = rt_desc$mean,
    rt_sd = rt_desc$sd,
    rt_median = rt_desc$median,
    rt_trimmed = rt_desc$trimmed,
    rt_min = rt_desc$min,
    rt_max = rt_desc$max,
    rt_skew = rt_desc$skew,
    rt_kurtosis = rt_desc$kurtosis,
    rt_se = rt_desc$se,
    hit_target_total = accuracy_values$hit_target_total,
    hit_filler_total = accuracy_values$hit_filler_total,
    correct_rejection_total = accuracy_values$correct_rejection_total
  )
}

grand_modality_stats <- trial_level_data |>
  group_by(modality) |>
  group_modify(~ summarise_group_stats(.x)) |>
  ungroup()

grand_condition_stats <- trial_level_data |>
  group_by(Condition) |>
  group_modify(~ summarise_group_stats(.x)) |>
  ungroup() |>
  arrange(Condition)

# Build a gt table grouped by modality.
summary_table |>
  select(
    modality,
    participant_id,
    Condition,
    rt_n,
    rt_mean,
    rt_sd,
    rt_median,
    rt_min,
    rt_max,
    hit_target_total,
    hit_filler_total,
    correct_rejection_total) |>
  gt(groupname_col = "modality") |>
  cols_label(
    participant_id = "Participant",
    Condition = "Condition",
    rt_n = "Trials (n)",
    rt_mean = "RT Mean (ms)",
    rt_sd = "RT SD (ms)",
    rt_median = "RT Median (ms)",
    rt_min = "RT Min (ms)",
    rt_max = "RT Max (ms)",
    hit_target_total = "Hit (Target)",
    hit_filler_total = "Hit (Filler)",
    correct_rejection_total = "Correct Rejection (Nonword)") |>
  fmt_number(
    columns = c(rt_mean, rt_sd, rt_median, rt_min, rt_max),
    decimals = 1) |>
  fmt_percent(
    columns = c(hit_target_total, hit_filler_total, correct_rejection_total),
    decimals = 1) |>
  sub_missing(everything(), missing_text = "NA") |>
  tab_header(
    title = md("**Lexical Decision Task Descriptive Statistics**"))

Participant	Condition	Trials (n)	RT Mean (ms)	RT SD (ms)	RT Median (ms)	RT Min (ms)	RT Max (ms)	Hit (Target)	Hit (Filler)	Correct Rejection (Nonword)
Lexical Decision Task Descriptive Statistics
Onset_Audio
Sub_01	Reading	94	1,626.5	447.7	1,617.5	295.6	2,759.3	77.1%	100.0%	31.4%
Sub_02	Reading_While_Listening	96	1,525.6	465.5	1,463.8	833.0	2,827.3	44.4%	95.8%	69.4%
Sub_03	Viewing	95	1,620.6	421.3	1,494.8	991.7	2,782.0	68.6%	95.8%	47.2%
Sub_04	Reading	96	1,851.9	412.0	1,884.1	1,017.3	2,822.4	47.2%	79.2%	77.8%
Sub_05	Reading_While_Listening	92	1,401.2	369.4	1,372.8	803.7	2,594.2	45.7%	91.7%	45.5%
Sub_06	Viewing	88	1,703.3	460.2	1,544.8	1,054.3	2,824.0	97.1%	100.0%	50.0%
Sub_07	Reading	84	2,075.4	380.3	2,086.2	1,148.9	2,994.7	75.0%	95.2%	35.5%
Sub_08	Reading_While_Listening	96	1,718.1	365.5	1,648.1	1,111.3	2,792.3	13.9%	75.0%	83.3%
Sub_09	Viewing	95	1,611.1	374.5	1,532.7	993.3	2,698.4	50.0%	100.0%	33.3%
Sub_10	Reading	91	1,729.2	528.4	1,676.9	938.7	2,952.4	85.7%	100.0%	78.1%
Sub_11	Reading_While_Listening	95	1,509.1	323.0	1,461.6	1,017.7	2,735.1	100.0%	100.0%	77.1%
Sub_12	Viewing	96	1,331.0	231.5	1,301.0	844.4	1,969.2	91.7%	100.0%	52.8%
Onset_Written
Sub_01	Reading	94	1,211.3	376.6	1,101.3	702.8	2,642.6	85.7%	100.0%	82.9%
Sub_02	Reading_While_Listening	95	1,272.9	483.9	1,082.9	734.9	2,449.9	62.9%	100.0%	88.9%
Sub_03	Viewing	94	1,508.2	472.5	1,373.3	803.7	2,929.9	85.7%	100.0%	62.9%
Sub_04	Reading	96	1,448.6	397.8	1,400.4	849.3	2,787.3	61.1%	95.8%	100.0%
Sub_05	Reading_While_Listening	95	1,518.5	522.4	1,464.1	774.0	3,012.0	65.7%	100.0%	72.2%
Sub_06	Viewing	74	1,570.5	609.3	1,442.3	740.3	2,865.7	100.0%	100.0%	77.8%
Sub_07	Reading	82	1,888.5	519.9	1,888.0	745.5	2,945.3	96.8%	100.0%	53.3%
Sub_08	Reading_While_Listening	95	1,457.4	370.2	1,409.3	789.3	2,584.0	5.7%	100.0%	100.0%
Sub_09	Viewing	88	1,697.6	538.0	1,652.5	839.1	2,981.4	88.6%	95.0%	45.5%
Sub_10	Reading	96	1,475.6	585.9	1,302.1	789.3	2,908.7	94.4%	100.0%	75.0%
Sub_11	Reading_While_Listening	93	1,260.6	360.7	1,189.6	775.8	2,779.5	97.1%	95.8%	94.1%
Sub_12	Viewing	0	NA	NA	NA	NA	NA	NA	NA	NA

grand_modality_stats |>
  select(
    modality,
    participants,
    rt_n,
    rt_mean,
    rt_sd,
    rt_median,
    rt_min,
    rt_max,
    hit_target_total,
    hit_filler_total,
    correct_rejection_total) |>
  gt() |>
  cols_label(
    modality = "Modality",
    participants = "Participants (n)",
    rt_n = "Trials (n)",
    rt_mean = "RT Mean (ms)",
    rt_sd = "RT SD (ms)",
    rt_median = "RT Median (ms)",
    rt_min = "RT Min (ms)",
    rt_max = "RT Max (ms)",
    hit_target_total = "Hit (Target)",
    hit_filler_total = "Hit (Filler)",
    correct_rejection_total = "Correct Rejection (Nonword)") |>
  fmt_number(columns = c(participants, rt_n), decimals = 0) |>
  fmt_number(
    columns = c(rt_mean, rt_sd, rt_median, rt_min, rt_max),
    decimals = 1) |>
  fmt_percent(
    columns = c(hit_target_total, hit_filler_total, correct_rejection_total),
    decimals = 1) |>
  sub_missing(everything(), missing_text = "NA") |>
  tab_header(
    title = md("**Grand Descriptive Statistics by Modality**"))

Modality	Participants (n)	Trials (n)	RT Mean (ms)	RT SD (ms)	RT Median (ms)	RT Min (ms)	RT Max (ms)	Hit (Target)	Hit (Filler)	Correct Rejection (Nonword)
Grand Descriptive Statistics by Modality
Onset_Audio	12	1,118	1,637.5	443.9	1,557.3	295.6	2,994.7	66.1%	94.3%	57.0%
Onset_Written	12	1,002	1,474.7	512.0	1,369.3	702.8	3,012.0	76.3%	98.8%	78.1%

grand_condition_stats |>
  select(
    Condition,
    participants,
    rt_n,
    rt_mean,
    rt_sd,
    rt_median,
    rt_min,
    rt_max,
    hit_target_total,
    hit_filler_total,
    correct_rejection_total) |>
  gt() |>
  cols_label(
    Condition = "Condition",
    participants = "Participants (n)",
    rt_n = "Trials (n)",
    rt_mean = "RT Mean (ms)",
    rt_sd = "RT SD (ms)",
    rt_median = "RT Median (ms)",
    rt_min = "RT Min (ms)",
    rt_max = "RT Max (ms)",
    hit_target_total = "Hit (Target)",
    hit_filler_total = "Hit (Filler)",
    correct_rejection_total = "Correct Rejection (Nonword)") |>
  fmt_number(columns = c(participants, rt_n), decimals = 0) |>
  fmt_number(
    columns = c(rt_mean, rt_sd, rt_median, rt_min, rt_max),
    decimals = 1) |>
  fmt_percent(
    columns = c(hit_target_total, hit_filler_total, correct_rejection_total),
    decimals = 1) |>
  sub_missing(everything(), missing_text = "NA") |>
  tab_header(
    title = md("**Grand Descriptive Statistics by Condition**"))

Condition	Participants (n)	Trials (n)	RT Mean (ms)	RT SD (ms)	RT Median (ms)	RT Min (ms)	RT Max (ms)	Hit (Target)	Hit (Filler)	Correct Rejection (Nonword)
Grand Descriptive Statistics by Condition
Reading	4	733	1,653.2	527.8	1,630.8	295.6	2,994.7	77.5%	96.2%	67.5%
Reading_While_Listening	4	757	1,459.1	434.5	1,393.6	734.9	3,012.0	54.4%	94.8%	79.1%
Viewing	4	630	1,574.7	464.4	1,473.8	740.3	2,981.4	82.6%	98.7%	51.9%

R_LDT_Analysis.r

tohokusla

2025-10-13