pacman::p_load(
  "tidyverse", "readxl",
  "psych", "gt")

# Helper to calculate hit/accuracy proportions for a given trial type.
calc_accuracy_prop <- function(data, trial_type, success_value) {
  responses <- data$response_binary[data$type == trial_type]
  responses <- responses[!is.na(responses)]
  if (!length(responses)) {
    return(NA_real_)
  }
  if (success_value == 1) {
    mean(responses == 1)
  } else {
    mean(responses == 0)
  }
}

# Load trial-level data from a result file.
load_trial_data <- function(path) {
  read_csv(path, show_col_types = FALSE) |>
    filter(type %in% c("target", "filler", "nonword")) |>
    mutate(
      rt = as.numeric(rt),
      response_binary = as.numeric(response_binary)
    ) |>
    select(type, rt, response_binary)
}

# Summarise a single participant's trial data into descriptive statistics.
summarise_results <- function(trial_data) {
  if (is.null(trial_data)) {
    return(tibble(
      rt_n = 0,
      rt_mean = NA_real_,
      rt_sd = NA_real_,
      rt_median = NA_real_,
      rt_trimmed = NA_real_,
      rt_min = NA_real_,
      rt_max = NA_real_,
      rt_skew = NA_real_,
      rt_kurtosis = NA_real_,
      rt_se = NA_real_,
      hit_target_total = NA_real_,
      hit_filler_total = NA_real_,
      correct_rejection_total = NA_real_
    ))
  }

  accuracy_values <- tibble(
    hit_target_total = calc_accuracy_prop(trial_data, "target", 1),
    hit_filler_total = calc_accuracy_prop(trial_data, "filler", 1),
    correct_rejection_total = calc_accuracy_prop(trial_data, "nonword", 0)
  )

  valid_rt <- trial_data$rt[!is.na(trial_data$rt)]

  if (!length(valid_rt)) {
    return(tibble(
      rt_n = 0,
      rt_mean = NA_real_,
      rt_sd = NA_real_,
      rt_median = NA_real_,
      rt_trimmed = NA_real_,
      rt_min = NA_real_,
      rt_max = NA_real_,
      rt_skew = NA_real_,
      rt_kurtosis = NA_real_,
      rt_se = NA_real_,
      hit_target_total = accuracy_values$hit_target_total,
      hit_filler_total = accuracy_values$hit_filler_total,
      correct_rejection_total = accuracy_values$correct_rejection_total
    ))
  }

  rt_desc <- psych::describe(valid_rt)

  tibble(
    rt_n = rt_desc$n,
    rt_mean = rt_desc$mean,
    rt_sd = rt_desc$sd,
    rt_median = rt_desc$median,
    rt_trimmed = rt_desc$trimmed,
    rt_min = rt_desc$min,
    rt_max = rt_desc$max,
    rt_skew = rt_desc$skew,
    rt_kurtosis = rt_desc$kurtosis,
    rt_se = rt_desc$se,
    hit_target_total = accuracy_values$hit_target_total,
    hit_filler_total = accuracy_values$hit_filler_total,
    correct_rejection_total = accuracy_values$correct_rejection_total
  )
}

# Collect file paths for both modalities.
audio_paths <- tibble(
  modality = "Onset_Audio",
  path = list.files("Onset_Audio", pattern = "_audio_results\\.csv$", full.names = TRUE)
)

written_paths <- tibble(
  modality = "Onset_Written",
  path = list.files("Onset_Written", pattern = "_written_results\\.csv$", full.names = TRUE)
)

# Participant metadata.
participant_meta <- read_excel("Participant_List.xlsx") |>
  mutate(
    Subject_ID = as.integer(Subject_ID),
    participant_id = sprintf("Sub_%02d", Subject_ID))

# Combine, annotate, and load all result files.
results_index <- bind_rows(audio_paths, written_paths) |>
  mutate(
    participant_id = str_extract(basename(path), "Sub_\\d+"),
    Subject_ID = as.integer(str_remove(participant_id, "Sub_")),
    data = map(path, load_trial_data)) |>
  left_join(select(participant_meta, Subject_ID, Condition), by = "Subject_ID") |>
  mutate(Condition = coalesce(Condition, "Unknown"))

# Participant-level summaries used for ldt_gt.
summary_table <- results_index |>
  mutate(summary = map(data, summarise_results)) |>
  unnest(summary) |>
  arrange(modality, Subject_ID)

# Trial-level data used to compute grand descriptive statistics.
trial_level_data <- results_index |>
  select(modality, participant_id, Subject_ID, Condition, data) |>
  unnest(data)

# Compute descriptive statistics for a grouped set of trials.
summarise_group_stats <- function(df) {
  participant_count <- n_distinct(df$participant_id)

  accuracy_values <- tibble(
    hit_target_total = calc_accuracy_prop(df, "target", 1),
    hit_filler_total = calc_accuracy_prop(df, "filler", 1),
    correct_rejection_total = calc_accuracy_prop(df, "nonword", 0))

  valid_rt <- df$rt[!is.na(df$rt)]

  if (!length(valid_rt)) {
    return(tibble(
      participants = participant_count,
      rt_n = 0,
      rt_mean = NA_real_,
      rt_sd = NA_real_,
      rt_median = NA_real_,
      rt_trimmed = NA_real_,
      rt_min = NA_real_,
      rt_max = NA_real_,
      rt_skew = NA_real_,
      rt_kurtosis = NA_real_,
      rt_se = NA_real_,
      hit_target_total = accuracy_values$hit_target_total,
      hit_filler_total = accuracy_values$hit_filler_total,
      correct_rejection_total = accuracy_values$correct_rejection_total
    ))
  }

  rt_desc <- psych::describe(valid_rt)

  tibble(
    participants = participant_count,
    rt_n = rt_desc$n,
    rt_mean = rt_desc$mean,
    rt_sd = rt_desc$sd,
    rt_median = rt_desc$median,
    rt_trimmed = rt_desc$trimmed,
    rt_min = rt_desc$min,
    rt_max = rt_desc$max,
    rt_skew = rt_desc$skew,
    rt_kurtosis = rt_desc$kurtosis,
    rt_se = rt_desc$se,
    hit_target_total = accuracy_values$hit_target_total,
    hit_filler_total = accuracy_values$hit_filler_total,
    correct_rejection_total = accuracy_values$correct_rejection_total
  )
}

grand_modality_stats <- trial_level_data |>
  group_by(modality) |>
  group_modify(~ summarise_group_stats(.x)) |>
  ungroup()

grand_condition_stats <- trial_level_data |>
  group_by(Condition) |>
  group_modify(~ summarise_group_stats(.x)) |>
  ungroup() |>
  arrange(Condition)

# Build a gt table grouped by modality.
summary_table |>
  select(
    modality,
    participant_id,
    Condition,
    rt_n,
    rt_mean,
    rt_sd,
    rt_median,
    rt_min,
    rt_max,
    hit_target_total,
    hit_filler_total,
    correct_rejection_total) |>
  gt(groupname_col = "modality") |>
  cols_label(
    participant_id = "Participant",
    Condition = "Condition",
    rt_n = "Trials (n)",
    rt_mean = "RT Mean (ms)",
    rt_sd = "RT SD (ms)",
    rt_median = "RT Median (ms)",
    rt_min = "RT Min (ms)",
    rt_max = "RT Max (ms)",
    hit_target_total = "Hit (Target)",
    hit_filler_total = "Hit (Filler)",
    correct_rejection_total = "Correct Rejection (Nonword)") |>
  fmt_number(
    columns = c(rt_mean, rt_sd, rt_median, rt_min, rt_max),
    decimals = 1) |>
  fmt_percent(
    columns = c(hit_target_total, hit_filler_total, correct_rejection_total),
    decimals = 1) |>
  sub_missing(everything(), missing_text = "NA") |>
  tab_header(
    title = md("**Lexical Decision Task Descriptive Statistics**"))
Lexical Decision Task Descriptive Statistics
Participant Condition Trials (n) RT Mean (ms) RT SD (ms) RT Median (ms) RT Min (ms) RT Max (ms) Hit (Target) Hit (Filler) Correct Rejection (Nonword)
Onset_Audio
Sub_01 Reading 94 1,626.5 447.7 1,617.5 295.6 2,759.3 77.1% 100.0% 31.4%
Sub_02 Reading_While_Listening 96 1,525.6 465.5 1,463.8 833.0 2,827.3 44.4% 95.8% 69.4%
Sub_03 Viewing 95 1,620.6 421.3 1,494.8 991.7 2,782.0 68.6% 95.8% 47.2%
Sub_04 Reading 96 1,851.9 412.0 1,884.1 1,017.3 2,822.4 47.2% 79.2% 77.8%
Sub_05 Reading_While_Listening 92 1,401.2 369.4 1,372.8 803.7 2,594.2 45.7% 91.7% 45.5%
Sub_06 Viewing 88 1,703.3 460.2 1,544.8 1,054.3 2,824.0 97.1% 100.0% 50.0%
Sub_07 Reading 84 2,075.4 380.3 2,086.2 1,148.9 2,994.7 75.0% 95.2% 35.5%
Sub_08 Reading_While_Listening 96 1,718.1 365.5 1,648.1 1,111.3 2,792.3 13.9% 75.0% 83.3%
Sub_09 Viewing 95 1,611.1 374.5 1,532.7 993.3 2,698.4 50.0% 100.0% 33.3%
Sub_10 Reading 91 1,729.2 528.4 1,676.9 938.7 2,952.4 85.7% 100.0% 78.1%
Sub_11 Reading_While_Listening 95 1,509.1 323.0 1,461.6 1,017.7 2,735.1 100.0% 100.0% 77.1%
Sub_12 Viewing 96 1,331.0 231.5 1,301.0 844.4 1,969.2 91.7% 100.0% 52.8%
Onset_Written
Sub_01 Reading 94 1,211.3 376.6 1,101.3 702.8 2,642.6 85.7% 100.0% 82.9%
Sub_02 Reading_While_Listening 95 1,272.9 483.9 1,082.9 734.9 2,449.9 62.9% 100.0% 88.9%
Sub_03 Viewing 94 1,508.2 472.5 1,373.3 803.7 2,929.9 85.7% 100.0% 62.9%
Sub_04 Reading 96 1,448.6 397.8 1,400.4 849.3 2,787.3 61.1% 95.8% 100.0%
Sub_05 Reading_While_Listening 95 1,518.5 522.4 1,464.1 774.0 3,012.0 65.7% 100.0% 72.2%
Sub_06 Viewing 74 1,570.5 609.3 1,442.3 740.3 2,865.7 100.0% 100.0% 77.8%
Sub_07 Reading 82 1,888.5 519.9 1,888.0 745.5 2,945.3 96.8% 100.0% 53.3%
Sub_08 Reading_While_Listening 95 1,457.4 370.2 1,409.3 789.3 2,584.0 5.7% 100.0% 100.0%
Sub_09 Viewing 88 1,697.6 538.0 1,652.5 839.1 2,981.4 88.6% 95.0% 45.5%
Sub_10 Reading 96 1,475.6 585.9 1,302.1 789.3 2,908.7 94.4% 100.0% 75.0%
Sub_11 Reading_While_Listening 93 1,260.6 360.7 1,189.6 775.8 2,779.5 97.1% 95.8% 94.1%
Sub_12 Viewing 0 NA NA NA NA NA NA NA NA
grand_modality_stats |>
  select(
    modality,
    participants,
    rt_n,
    rt_mean,
    rt_sd,
    rt_median,
    rt_min,
    rt_max,
    hit_target_total,
    hit_filler_total,
    correct_rejection_total) |>
  gt() |>
  cols_label(
    modality = "Modality",
    participants = "Participants (n)",
    rt_n = "Trials (n)",
    rt_mean = "RT Mean (ms)",
    rt_sd = "RT SD (ms)",
    rt_median = "RT Median (ms)",
    rt_min = "RT Min (ms)",
    rt_max = "RT Max (ms)",
    hit_target_total = "Hit (Target)",
    hit_filler_total = "Hit (Filler)",
    correct_rejection_total = "Correct Rejection (Nonword)") |>
  fmt_number(columns = c(participants, rt_n), decimals = 0) |>
  fmt_number(
    columns = c(rt_mean, rt_sd, rt_median, rt_min, rt_max),
    decimals = 1) |>
  fmt_percent(
    columns = c(hit_target_total, hit_filler_total, correct_rejection_total),
    decimals = 1) |>
  sub_missing(everything(), missing_text = "NA") |>
  tab_header(
    title = md("**Grand Descriptive Statistics by Modality**"))
Grand Descriptive Statistics by Modality
Modality Participants (n) Trials (n) RT Mean (ms) RT SD (ms) RT Median (ms) RT Min (ms) RT Max (ms) Hit (Target) Hit (Filler) Correct Rejection (Nonword)
Onset_Audio 12 1,118 1,637.5 443.9 1,557.3 295.6 2,994.7 66.1% 94.3% 57.0%
Onset_Written 12 1,002 1,474.7 512.0 1,369.3 702.8 3,012.0 76.3% 98.8% 78.1%
grand_condition_stats |>
  select(
    Condition,
    participants,
    rt_n,
    rt_mean,
    rt_sd,
    rt_median,
    rt_min,
    rt_max,
    hit_target_total,
    hit_filler_total,
    correct_rejection_total) |>
  gt() |>
  cols_label(
    Condition = "Condition",
    participants = "Participants (n)",
    rt_n = "Trials (n)",
    rt_mean = "RT Mean (ms)",
    rt_sd = "RT SD (ms)",
    rt_median = "RT Median (ms)",
    rt_min = "RT Min (ms)",
    rt_max = "RT Max (ms)",
    hit_target_total = "Hit (Target)",
    hit_filler_total = "Hit (Filler)",
    correct_rejection_total = "Correct Rejection (Nonword)") |>
  fmt_number(columns = c(participants, rt_n), decimals = 0) |>
  fmt_number(
    columns = c(rt_mean, rt_sd, rt_median, rt_min, rt_max),
    decimals = 1) |>
  fmt_percent(
    columns = c(hit_target_total, hit_filler_total, correct_rejection_total),
    decimals = 1) |>
  sub_missing(everything(), missing_text = "NA") |>
  tab_header(
    title = md("**Grand Descriptive Statistics by Condition**"))
Grand Descriptive Statistics by Condition
Condition Participants (n) Trials (n) RT Mean (ms) RT SD (ms) RT Median (ms) RT Min (ms) RT Max (ms) Hit (Target) Hit (Filler) Correct Rejection (Nonword)
Reading 4 733 1,653.2 527.8 1,630.8 295.6 2,994.7 77.5% 96.2% 67.5%
Reading_While_Listening 4 757 1,459.1 434.5 1,393.6 734.9 3,012.0 54.4% 94.8% 79.1%
Viewing 4 630 1,574.7 464.4 1,473.8 740.3 2,981.4 82.6% 98.7% 51.9%