pacman::p_load(
"tidyverse", "readxl",
"psych", "gt")
# Helper to calculate hit/accuracy proportions for a given trial type.
calc_accuracy_prop <- function(data, trial_type, success_value) {
responses <- data$response_binary[data$type == trial_type]
responses <- responses[!is.na(responses)]
if (!length(responses)) {
return(NA_real_)
}
if (success_value == 1) {
mean(responses == 1)
} else {
mean(responses == 0)
}
}
# Data quality check function to validate data completeness.
check_data_quality <- function(path, min_trials = 50) {
data <- read_csv(path, show_col_types = FALSE)
trial_data <- data |> filter(type %in% c("target", "filler", "nonword"))
n_trials <- nrow(trial_data)
n_responses <- sum(!is.na(trial_data$response_binary))
response_rate <- if (n_trials > 0) n_responses / n_trials else 0
is_valid <- n_trials >= min_trials & response_rate > 0.5
tibble(
n_trials = n_trials,
n_responses = n_responses,
response_rate = response_rate,
is_valid = is_valid,
reason = case_when(
n_trials < min_trials ~ "Insufficient trials",
response_rate <= 0.5 ~ "Low response rate",
TRUE ~ "OK"
)
)
}
# Load trial-level data from a result file.
load_trial_data <- function(path) {
read_csv(path, show_col_types = FALSE) |>
filter(type %in% c("target", "filler", "nonword")) |>
mutate(
rt = as.numeric(rt),
response_binary = as.numeric(response_binary)
) |>
select(type, rt, response_binary)
}
# Summarise a single participant's trial data into descriptive statistics.
summarise_results <- function(trial_data) {
if (is.null(trial_data)) {
return(tibble(
rt_n = 0,
rt_mean = NA_real_,
rt_sd = NA_real_,
rt_median = NA_real_,
rt_trimmed = NA_real_,
rt_min = NA_real_,
rt_max = NA_real_,
rt_skew = NA_real_,
rt_kurtosis = NA_real_,
rt_se = NA_real_,
hit_target_total = NA_real_,
hit_filler_total = NA_real_,
correct_rejection_total = NA_real_
))
}
accuracy_values <- tibble(
hit_target_total = calc_accuracy_prop(trial_data, "target", 1),
hit_filler_total = calc_accuracy_prop(trial_data, "filler", 1),
correct_rejection_total = calc_accuracy_prop(trial_data, "nonword", 0)
)
valid_rt <- trial_data$rt[!is.na(trial_data$rt)]
if (!length(valid_rt)) {
return(tibble(
rt_n = 0,
rt_mean = NA_real_,
rt_sd = NA_real_,
rt_median = NA_real_,
rt_trimmed = NA_real_,
rt_min = NA_real_,
rt_max = NA_real_,
rt_skew = NA_real_,
rt_kurtosis = NA_real_,
rt_se = NA_real_,
hit_target_total = accuracy_values$hit_target_total,
hit_filler_total = accuracy_values$hit_filler_total,
correct_rejection_total = accuracy_values$correct_rejection_total
))
}
rt_desc <- psych::describe(valid_rt)
tibble(
rt_n = rt_desc$n,
rt_mean = rt_desc$mean,
rt_sd = rt_desc$sd,
rt_median = rt_desc$median,
rt_trimmed = rt_desc$trimmed,
rt_min = rt_desc$min,
rt_max = rt_desc$max,
rt_skew = rt_desc$skew,
rt_kurtosis = rt_desc$kurtosis,
rt_se = rt_desc$se,
hit_target_total = accuracy_values$hit_target_total,
hit_filler_total = accuracy_values$hit_filler_total,
correct_rejection_total = accuracy_values$correct_rejection_total
)
}
# Collect file paths for both modalities.
audio_paths <- tibble(
modality = "Onset_Audio",
path = list.files("Onset_Audio", pattern = "_audio_results\\.csv$", full.names = TRUE)
)
written_paths <- tibble(
modality = "Onset_Written",
path = list.files("Onset_Written", pattern = "_written_results\\.csv$", full.names = TRUE)
)
# Participant metadata.
participant_meta <- read_excel("Participant_List.xlsx") |>
mutate(
Subject_ID = as.integer(Subject_ID),
participant_id = sprintf("Sub_%02d", Subject_ID))
# Perform data quality checks on all files.
all_paths <- bind_rows(audio_paths, written_paths)
quality_check <- all_paths |>
mutate(quality = map(path, check_data_quality)) |>
unnest(quality)
# Process excluded participants for detailed reporting.
excluded_participants <- quality_check |>
filter(!is_valid) |>
select(modality, path, n_trials, n_responses, response_rate, reason) |>
mutate(
participant_id = str_extract(basename(path), "Sub_\\d+"),
Subject_ID = as.integer(str_remove(participant_id, "Sub_"))
) |>
left_join(select(participant_meta, Subject_ID, Condition), by = "Subject_ID") |>
mutate(Condition = coalesce(Condition, "Unknown"))
# Load and summarize excluded participants' data for detailed statistics.
if (nrow(excluded_participants) > 0) {
cat("\n=== EXCLUDED PARTICIPANTS ===\n")
print(excluded_participants |> select(modality, participant_id, Condition, n_trials, n_responses, reason))
cat("\n")
excluded_summary_table <- excluded_participants |>
mutate(data = map(path, possibly(load_trial_data, otherwise = NULL))) |>
mutate(summary = map(data, summarise_results)) |>
unnest(cols = summary) |>
arrange(modality, Subject_ID)
}
##
## === EXCLUDED PARTICIPANTS ===
## # A tibble: 1 × 6
## modality participant_id Condition n_trials n_responses reason
## <chr> <chr> <chr> <int> <int> <chr>
## 1 Onset_Written Sub_12 Viewing 7 0 Insufficient tria…
# Combine, annotate, and load all result files (excluding invalid data).
valid_paths <- quality_check |>
filter(is_valid) |>
pull(path)
cat("\n=== QUALITY CHECK SUMMARY ===\n")
##
## === QUALITY CHECK SUMMARY ===
cat("Total files checked:", nrow(quality_check), "\n")
## Total files checked: 48
cat("Valid files:", length(valid_paths), "\n")
## Valid files: 47
cat("Excluded files:", nrow(quality_check) - length(valid_paths), "\n\n")
## Excluded files: 1
results_index <- bind_rows(audio_paths, written_paths) |>
filter(path %in% valid_paths) |>
mutate(
participant_id = str_extract(basename(path), "Sub_\\d+"),
Subject_ID = as.integer(str_remove(participant_id, "Sub_")),
data = map(path, load_trial_data)) |>
left_join(select(participant_meta, Subject_ID, Condition), by = "Subject_ID") |>
mutate(Condition = coalesce(Condition, "Unknown"))
cat("Results index rows:", nrow(results_index), "\n\n")
## Results index rows: 47
# Participant-level summaries used for ldt_gt.
summary_table <- results_index |>
mutate(summary = map(data, summarise_results)) |>
unnest(cols = summary) |>
arrange(modality, Subject_ID)
# Trial-level data used to compute grand descriptive statistics.
trial_level_data <- results_index |>
select(modality, participant_id, Subject_ID, Condition, data) |>
unnest(cols = data)
# Compute descriptive statistics for a grouped set of trials.
summarise_group_stats <- function(df, trial_type) {
participant_count <- n_distinct(df$participant_id)
success_value <- if (!is.na(trial_type) && trial_type == "nonword") 0 else 1
responses <- df$response_binary[!is.na(df$response_binary)]
accuracy_rate <- if (!length(responses)) {
NA_real_
} else if (success_value == 1) {
mean(responses == 1)
} else {
mean(responses == 0)
}
valid_rt <- df$rt[!is.na(df$rt)]
if (!length(valid_rt)) {
return(tibble(
participants = participant_count,
rt_n = 0,
rt_mean = NA_real_,
rt_sd = NA_real_,
rt_median = NA_real_,
rt_trimmed = NA_real_,
rt_min = NA_real_,
rt_max = NA_real_,
rt_skew = NA_real_,
rt_kurtosis = NA_real_,
rt_se = NA_real_,
accuracy = accuracy_rate
))
}
rt_desc <- psych::describe(valid_rt)
tibble(
participants = participant_count,
rt_n = rt_desc$n,
rt_mean = rt_desc$mean,
rt_sd = rt_desc$sd,
rt_median = rt_desc$median,
rt_trimmed = rt_desc$trimmed,
rt_min = rt_desc$min,
rt_max = rt_desc$max,
rt_skew = rt_desc$skew,
rt_kurtosis = rt_desc$kurtosis,
rt_se = rt_desc$se,
accuracy = accuracy_rate
)
}
grand_modality_stats <- trial_level_data |>
group_by(modality, Condition, type) |>
group_modify(~ summarise_group_stats(.x, .y$type[[1]])) |>
arrange(modality, Condition, type) |>
ungroup()
# Build a gt table grouped by modality.
summary_table |>
select(
modality,
participant_id,
Condition,
rt_n,
rt_mean,
rt_sd,
rt_median,
rt_min,
rt_max,
hit_target_total,
hit_filler_total,
correct_rejection_total) |>
gt(groupname_col = "modality") |>
cols_label(
participant_id = "Participant",
Condition = "Condition",
rt_n = "Trials (n)",
rt_mean = "RT Mean (ms)",
rt_sd = "RT SD (ms)",
rt_median = "RT Median (ms)",
rt_min = "RT Min (ms)",
rt_max = "RT Max (ms)",
hit_target_total = "Hit (Target)",
hit_filler_total = "Hit (Filler)",
correct_rejection_total = "Correct Rejection (Nonword)") |>
fmt_number(
columns = c(rt_mean, rt_sd, rt_median, rt_min, rt_max),
decimals = 1) |>
fmt_percent(
columns = c(hit_target_total, hit_filler_total, correct_rejection_total),
decimals = 1) |>
sub_missing(everything(), missing_text = "NA") |>
tab_header(
title = md("**Lexical Decision Task Descriptive Statistics**"),
subtitle = "Valid participants only")
| Lexical Decision Task Descriptive Statistics |
| Valid participants only |
| Participant |
Condition |
Trials (n) |
RT Mean (ms) |
RT SD (ms) |
RT Median (ms) |
RT Min (ms) |
RT Max (ms) |
Hit (Target) |
Hit (Filler) |
Correct Rejection (Nonword) |
| Onset_Audio |
| Sub_01 |
Reading |
94 |
1,626.5 |
447.7 |
1,617.5 |
295.6 |
2,759.3 |
77.1% |
100.0% |
31.4% |
| Sub_02 |
Reading_While_Listening |
96 |
1,525.6 |
465.5 |
1,463.8 |
833.0 |
2,827.3 |
44.4% |
95.8% |
69.4% |
| Sub_03 |
Viewing |
95 |
1,620.6 |
421.3 |
1,494.8 |
991.7 |
2,782.0 |
68.6% |
95.8% |
47.2% |
| Sub_04 |
Reading |
96 |
1,851.9 |
412.0 |
1,884.1 |
1,017.3 |
2,822.4 |
47.2% |
79.2% |
77.8% |
| Sub_05 |
Reading_While_Listening |
92 |
1,401.2 |
369.4 |
1,372.8 |
803.7 |
2,594.2 |
45.7% |
91.7% |
45.5% |
| Sub_06 |
Viewing |
88 |
1,703.3 |
460.2 |
1,544.8 |
1,054.3 |
2,824.0 |
97.1% |
100.0% |
50.0% |
| Sub_07 |
Reading |
84 |
2,075.4 |
380.3 |
2,086.2 |
1,148.9 |
2,994.7 |
75.0% |
95.2% |
35.5% |
| Sub_08 |
Reading_While_Listening |
96 |
1,718.1 |
365.5 |
1,648.1 |
1,111.3 |
2,792.3 |
13.9% |
75.0% |
83.3% |
| Sub_09 |
Viewing |
95 |
1,611.1 |
374.5 |
1,532.7 |
993.3 |
2,698.4 |
50.0% |
100.0% |
33.3% |
| Sub_10 |
Reading |
91 |
1,729.2 |
528.4 |
1,676.9 |
938.7 |
2,952.4 |
85.7% |
100.0% |
78.1% |
| Sub_11 |
Reading_While_Listening |
95 |
1,509.1 |
323.0 |
1,461.6 |
1,017.7 |
2,735.1 |
100.0% |
100.0% |
77.1% |
| Sub_12 |
Viewing |
96 |
1,331.0 |
231.5 |
1,301.0 |
844.4 |
1,969.2 |
91.7% |
100.0% |
52.8% |
| Sub_13 |
Reading |
92 |
1,887.8 |
553.1 |
1,817.8 |
1,036.9 |
2,956.6 |
82.4% |
91.7% |
52.9% |
| Sub_14 |
Reading_While_Listening |
95 |
1,669.3 |
465.2 |
1,676.7 |
633.1 |
2,803.1 |
83.3% |
95.7% |
86.1% |
| Sub_15 |
Viewing |
96 |
1,615.4 |
360.4 |
1,535.9 |
1,078.2 |
2,746.1 |
30.6% |
87.5% |
69.4% |
| Sub_16 |
Reading |
77 |
1,892.0 |
525.7 |
1,860.3 |
972.5 |
2,977.9 |
66.7% |
94.4% |
41.4% |
| Sub_17 |
Reading_While_Listening |
96 |
1,411.9 |
283.3 |
1,364.8 |
875.1 |
2,212.2 |
75.0% |
83.3% |
75.0% |
| Sub_18 |
Viewing |
94 |
1,841.5 |
307.6 |
1,806.2 |
1,291.6 |
2,832.7 |
37.1% |
73.9% |
58.3% |
| Sub_19 |
Reading |
86 |
1,773.0 |
442.7 |
1,724.8 |
932.2 |
2,847.5 |
67.6% |
90.9% |
56.7% |
| Sub_20 |
Reading_While_Listening |
90 |
1,725.7 |
532.1 |
1,626.4 |
914.9 |
2,966.9 |
100.0% |
100.0% |
64.5% |
| Sub_21 |
Viewing |
96 |
1,339.7 |
266.7 |
1,313.5 |
841.2 |
1,995.6 |
61.1% |
91.7% |
100.0% |
| Sub_22 |
Reading |
70 |
1,959.0 |
522.1 |
2,013.7 |
1,040.4 |
2,888.5 |
65.4% |
77.3% |
36.4% |
| Sub_23 |
Reading_While_Listening |
93 |
1,510.4 |
364.4 |
1,457.5 |
871.1 |
2,982.3 |
22.9% |
70.8% |
67.6% |
| Sub_24 |
Viewing |
94 |
1,701.4 |
501.5 |
1,716.8 |
892.5 |
2,960.3 |
62.9% |
95.7% |
41.7% |
| Onset_Written |
| Sub_01 |
Reading |
94 |
1,211.3 |
376.6 |
1,101.3 |
702.8 |
2,642.6 |
85.7% |
100.0% |
82.9% |
| Sub_02 |
Reading_While_Listening |
95 |
1,272.9 |
483.9 |
1,082.9 |
734.9 |
2,449.9 |
62.9% |
100.0% |
88.9% |
| Sub_03 |
Viewing |
94 |
1,508.2 |
472.5 |
1,373.3 |
803.7 |
2,929.9 |
85.7% |
100.0% |
62.9% |
| Sub_04 |
Reading |
96 |
1,448.6 |
397.8 |
1,400.4 |
849.3 |
2,787.3 |
61.1% |
95.8% |
100.0% |
| Sub_05 |
Reading_While_Listening |
95 |
1,518.5 |
522.4 |
1,464.1 |
774.0 |
3,012.0 |
65.7% |
100.0% |
72.2% |
| Sub_06 |
Viewing |
74 |
1,570.5 |
609.3 |
1,442.3 |
740.3 |
2,865.7 |
100.0% |
100.0% |
77.8% |
| Sub_07 |
Reading |
82 |
1,888.5 |
519.9 |
1,888.0 |
745.5 |
2,945.3 |
96.8% |
100.0% |
53.3% |
| Sub_08 |
Reading_While_Listening |
95 |
1,457.4 |
370.2 |
1,409.3 |
789.3 |
2,584.0 |
5.7% |
100.0% |
100.0% |
| Sub_09 |
Viewing |
88 |
1,697.6 |
538.0 |
1,652.5 |
839.1 |
2,981.4 |
88.6% |
95.0% |
45.5% |
| Sub_10 |
Reading |
96 |
1,475.6 |
585.9 |
1,302.1 |
789.3 |
2,908.7 |
94.4% |
100.0% |
75.0% |
| Sub_11 |
Reading_While_Listening |
93 |
1,260.6 |
360.7 |
1,189.6 |
775.8 |
2,779.5 |
97.1% |
95.8% |
94.1% |
| Sub_13 |
Reading |
90 |
1,857.1 |
559.6 |
1,876.0 |
880.1 |
2,997.8 |
85.3% |
100.0% |
62.5% |
| Sub_14 |
Reading_While_Listening |
96 |
1,274.3 |
334.1 |
1,216.1 |
792.0 |
2,262.9 |
97.2% |
95.8% |
88.9% |
| Sub_15 |
Viewing |
96 |
1,443.9 |
356.1 |
1,371.4 |
849.0 |
2,414.6 |
50.0% |
100.0% |
100.0% |
| Sub_16 |
Reading |
90 |
1,473.8 |
480.5 |
1,387.0 |
729.1 |
2,889.1 |
85.7% |
100.0% |
63.6% |
| Sub_17 |
Reading_While_Listening |
88 |
1,601.6 |
499.4 |
1,478.9 |
506.5 |
2,918.6 |
80.0% |
95.8% |
79.4% |
| Sub_18 |
Viewing |
89 |
1,910.0 |
437.4 |
1,850.0 |
1,176.5 |
2,963.0 |
60.6% |
100.0% |
67.6% |
| Sub_19 |
Reading |
87 |
1,539.0 |
522.7 |
1,434.5 |
765.8 |
2,924.8 |
60.0% |
100.0% |
72.7% |
| Sub_20 |
Reading_While_Listening |
94 |
1,497.6 |
482.1 |
1,372.6 |
813.6 |
3,015.2 |
100.0% |
100.0% |
76.5% |
| Sub_21 |
Viewing |
96 |
1,149.9 |
294.2 |
1,061.0 |
726.1 |
2,030.8 |
66.7% |
100.0% |
100.0% |
| Sub_22 |
Reading |
94 |
1,511.4 |
525.9 |
1,447.0 |
768.1 |
2,937.3 |
75.0% |
100.0% |
88.9% |
| Sub_23 |
Reading_While_Listening |
95 |
1,617.4 |
390.3 |
1,572.8 |
852.3 |
2,924.0 |
16.7% |
75.0% |
97.1% |
| Sub_24 |
Viewing |
94 |
1,773.0 |
570.9 |
1,779.0 |
883.7 |
2,987.4 |
50.0% |
100.0% |
88.9% |
# Build a gt table for excluded participants (if any).
if (exists("excluded_summary_table") && nrow(excluded_summary_table) > 0) {
excluded_summary_table |>
select(
modality,
participant_id,
Condition,
rt_n,
rt_mean,
rt_sd,
rt_median,
rt_min,
rt_max,
hit_target_total,
hit_filler_total,
correct_rejection_total) |>
gt(groupname_col = "modality") |>
cols_label(
participant_id = "Participant",
Condition = "Condition",
rt_n = "Trials (n)",
rt_mean = "RT Mean (ms)",
rt_sd = "RT SD (ms)",
rt_median = "RT Median (ms)",
rt_min = "RT Min (ms)",
rt_max = "RT Max (ms)",
hit_target_total = "Hit (Target)",
hit_filler_total = "Hit (Filler)",
correct_rejection_total = "Correct Rejection (Nonword)") |>
fmt_number(
columns = c(rt_mean, rt_sd, rt_median, rt_min, rt_max),
decimals = 1) |>
fmt_percent(
columns = c(hit_target_total, hit_filler_total,
correct_rejection_total),
decimals = 1) |>
tab_style(
style = list(
cell_fill(color = "#ffe6e6"),
cell_text(style = "italic")
),
locations = cells_body(columns = everything())
) |>
sub_missing(everything(), missing_text = "NA") |>
tab_header(
title = md("**EXCLUDED Participants - Descriptive Statistics**"),
subtitle = "Data quality issues detected")
}
| EXCLUDED Participants - Descriptive Statistics |
| Data quality issues detected |
| Participant |
Condition |
Trials (n) |
RT Mean (ms) |
RT SD (ms) |
RT Median (ms) |
RT Min (ms) |
RT Max (ms) |
Hit (Target) |
Hit (Filler) |
Correct Rejection (Nonword) |
| Onset_Written |
| Sub_12 |
Viewing |
0 |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
NA |
grand_modality_stats |>
mutate(
modality = str_replace_all(modality, "_", " "),
trial_type = str_to_title(type)) |>
select(
modality,
Condition,
trial_type,
participants,
rt_n,
rt_mean,
rt_sd,
rt_median,
rt_min,
rt_max,
accuracy) |>
gt() |>
cols_label(
modality = "Modality",
Condition = "Condition",
trial_type = "Trial Type",
participants = "Participants (n)",
rt_n = "Trials (n)",
rt_mean = "RT Mean (ms)",
rt_sd = "RT SD (ms)",
rt_median = "RT Median (ms)",
rt_min = "RT Min (ms)",
rt_max = "RT Max (ms)",
accuracy = "Accuracy") |>
fmt_number(columns = c(participants, rt_n), decimals = 0) |>
fmt_number(
columns = c(rt_mean, rt_sd, rt_median, rt_min, rt_max),
decimals = 1) |>
fmt_percent(columns = accuracy, decimals = 1) |>
sub_missing(everything(), missing_text = "NA") |>
tab_header(
title = md("**Grand Descriptive Statistics by Modality**"))
| Grand Descriptive Statistics by Modality |
| Modality |
Condition |
Trial Type |
Participants (n) |
Trials (n) |
RT Mean (ms) |
RT SD (ms) |
RT Median (ms) |
RT Min (ms) |
RT Max (ms) |
Accuracy |
| Onset Audio |
Reading |
Filler |
8 |
179 |
1,536.4 |
424.5 |
1,413.2 |
932.2 |
2,882.5 |
91.1% |
| Onset Audio |
Reading |
Nonword |
8 |
249 |
2,025.1 |
438.4 |
2,030.5 |
1,045.7 |
2,994.7 |
52.2% |
| Onset Audio |
Reading |
Target |
8 |
262 |
1,878.1 |
491.0 |
1,863.4 |
295.6 |
2,977.9 |
71.0% |
| Onset Audio |
Reading_While_Listening |
Filler |
8 |
191 |
1,396.9 |
394.0 |
1,306.2 |
803.7 |
2,803.1 |
89.0% |
| Onset Audio |
Reading_While_Listening |
Nonword |
8 |
277 |
1,732.8 |
434.0 |
1,673.5 |
871.1 |
2,982.3 |
71.5% |
| Onset Audio |
Reading_While_Listening |
Target |
8 |
285 |
1,497.5 |
355.8 |
1,445.4 |
633.1 |
2,754.3 |
60.7% |
| Onset Audio |
Viewing |
Filler |
8 |
187 |
1,397.2 |
362.0 |
1,298.1 |
844.4 |
2,703.3 |
93.0% |
| Onset Audio |
Viewing |
Nonword |
8 |
284 |
1,677.3 |
374.7 |
1,623.6 |
1,031.0 |
2,944.8 |
56.7% |
| Onset Audio |
Viewing |
Target |
8 |
283 |
1,638.8 |
428.0 |
1,546.3 |
841.2 |
2,960.3 |
62.2% |
| Onset Written |
Reading |
Filler |
8 |
185 |
1,203.1 |
428.5 |
1,029.8 |
702.8 |
2,937.3 |
99.5% |
| Onset Written |
Reading |
Nonword |
8 |
271 |
1,752.6 |
498.9 |
1,735.3 |
845.4 |
2,997.8 |
75.6% |
| Onset Written |
Reading |
Target |
8 |
273 |
1,566.5 |
531.3 |
1,467.1 |
729.1 |
2,975.0 |
80.6% |
| Onset Written |
Reading_While_Listening |
Filler |
8 |
192 |
1,213.5 |
397.2 |
1,095.5 |
734.9 |
2,723.1 |
95.3% |
| Onset Written |
Reading_While_Listening |
Nonword |
8 |
281 |
1,603.3 |
461.6 |
1,530.6 |
506.5 |
3,015.2 |
87.2% |
| Onset Written |
Reading_While_Listening |
Target |
8 |
278 |
1,421.0 |
416.3 |
1,337.0 |
822.2 |
2,924.0 |
65.5% |
| Onset Written |
Viewing |
Filler |
7 |
154 |
1,268.8 |
397.4 |
1,190.0 |
726.1 |
2,981.4 |
99.4% |
| Onset Written |
Viewing |
Nonword |
7 |
237 |
1,698.0 |
494.0 |
1,671.7 |
878.0 |
2,963.0 |
78.1% |
| Onset Written |
Viewing |
Target |
7 |
240 |
1,646.7 |
555.9 |
1,530.9 |
740.3 |
2,987.4 |
71.2% |
# Boxplots to visualise reaction time distributions alongside the summary tables.
rt_boxplot_data <- trial_level_data |>
filter(!is.na(rt)) |>
mutate(
Condition = fct_explicit_na(Condition, "Unknown"),
modality_pretty = str_replace_all(modality, "_", " "),
trial_type = str_to_title(type))
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `Condition = fct_explicit_na(Condition, "Unknown")`.
## Caused by warning:
## ! `fct_explicit_na()` was deprecated in forcats 1.0.0.
## ℹ Please use `fct_na_value_to_level()` instead.
# Participant-level totals for correct target responses.
target_accuracy_summary <- trial_level_data |>
filter(type == "target") |>
mutate(
Condition = fct_explicit_na(Condition, "Unknown"),
modality_pretty = str_replace_all(modality, "_", " ")
) |>
drop_na(response_binary) |>
group_by(modality_pretty, participant_id, Condition) |>
summarise(
trials = n(),
total_correct = sum(response_binary == 1),
accuracy_rate = total_correct / trials,
.groups = "drop"
) |>
arrange(modality_pretty, participant_id)
# Condition-level descriptive statistics for participant accuracy rates.
target_accuracy_descriptives <- target_accuracy_summary |>
group_by(modality_pretty, Condition) |>
summarise(
n_participants = n(),
accuracy_mean = mean(accuracy_rate),
accuracy_sd = sd(accuracy_rate),
accuracy_min = min(accuracy_rate),
accuracy_max = max(accuracy_rate),
.groups = "drop"
)
rt_boxplot_data |>
ggplot(aes(x = trial_type, y = rt, fill = trial_type)) +
geom_boxplot(alpha = 0.7, outlier.alpha = 0.4, width = 0.6) +
facet_wrap(~ modality_pretty) +
scale_fill_brewer(palette = "Set2") +
labs(
title = "Reaction Time Distributions by Modality and Trial Type",
x = "Trial Type",
y = "Reaction Time (ms)") +
theme_bw(base_size = 12) +
theme(
legend.position = "bottom",
panel.grid.minor = element_blank())

rt_boxplot_data |>
ggplot(aes(x = Condition, y = rt, fill = Condition)) +
geom_boxplot(alpha = 0.7, outlier.alpha = 0.4, width = 0.6) +
facet_grid(trial_type ~ modality_pretty) +
scale_fill_brewer(palette = "Set2") +
labs(
title = "Reaction Time Distributions by Condition, Modality, and Trial Type",
x = "Condition",
y = "Reaction Time (ms)",
fill = "Condition") +
theme_bw(base_size = 12) +
theme(
legend.position = "bottom",
panel.grid.minor = element_blank())

target_accuracy_summary |>
mutate(
Condition = fct_drop(Condition)
) |>
ggplot(aes(x = Condition, y = total_correct, fill = Condition)) +
geom_boxplot(alpha = 0.75, outlier.alpha = 0.6, width = 0.55) +
facet_wrap(~ modality_pretty, scales = "free_y") +
scale_fill_brewer(palette = "Set2") +
labs(
title = "Distribution of Correct Target Responses by Condition",
x = "Condition",
y = "Correct Target Responses",
fill = "Condition") +
theme_bw(base_size = 12) +
theme(
legend.position = "bottom",
panel.grid.minor = element_blank(),
axis.text.x = element_text(angle = 45, hjust = 1))

target_accuracy_summary |>
mutate(
Condition = fct_drop(Condition)
) |>
filter(modality_pretty == "Onset Audio") |>
ggplot(aes(x = Condition, y = total_correct, fill = Condition)) +
geom_boxplot(alpha = 0.75, outlier.alpha = 0.6, width = 0.55) +
scale_fill_brewer(palette = "Set2") +
labs(
title = "Correct Target Responses: Audio Lexical Decision Task",
x = "Condition",
y = "Correct Target Responses: Max = 36",
fill = "Condition") +
theme_bw(base_size = 12) +
theme(
legend.position = "bottom",
panel.grid.minor = element_blank(),
axis.text.x = element_text(angle = 45, hjust = 1))

target_accuracy_summary |>
mutate(
Condition = fct_drop(Condition)
) |>
filter(modality_pretty == "Onset Written") |>
ggplot(aes(x = Condition, y = total_correct, fill = Condition)) +
geom_boxplot(alpha = 0.75, outlier.alpha = 0.6, width = 0.55) +
scale_fill_brewer(palette = "Set2") +
labs(
title = "Correct Target Responses: Written Lexical Decision Task",
x = "Condition",
y = "Correct Target Responses: Max = 36",
fill = "Condition") +
theme_bw(base_size = 12) +
theme(
legend.position = "bottom",
panel.grid.minor = element_blank(),
axis.text.x = element_text(angle = 45, hjust = 1))

# Display descriptive statistics table for target accuracy by condition.
target_accuracy_descriptives |>
gt() |>
cols_label(
modality_pretty = "Modality",
Condition = "Condition",
n_participants = "Participants (n)",
accuracy_mean = "Accuracy Mean",
accuracy_sd = "Accuracy SD",
accuracy_min = "Accuracy Min",
accuracy_max = "Accuracy Max") |>
fmt_number(columns = n_participants, decimals = 0) |>
fmt_percent(
columns = c(accuracy_mean, accuracy_sd, accuracy_min, accuracy_max),
decimals = 1) |>
tab_style(
style = cell_fill(color = "#ffcccb"),
locations = cells_body(
columns = c(accuracy_mean, accuracy_min),
rows = accuracy_mean < 0.5 | accuracy_min < 0.5
)
) |>
sub_missing(everything(), missing_text = "NA") |>
tab_header(
title = md("**Target Accuracy Descriptive Statistics by Condition**"))
| Target Accuracy Descriptive Statistics by Condition |
| Modality |
Condition |
Participants (n) |
Accuracy Mean |
Accuracy SD |
Accuracy Min |
Accuracy Max |
| Onset Audio |
Reading |
8 |
70.9% |
12.1% |
47.2% |
85.7% |
| Onset Audio |
Reading_While_Listening |
8 |
60.7% |
33.6% |
13.9% |
100.0% |
| Onset Audio |
Viewing |
8 |
62.4% |
23.6% |
30.6% |
97.1% |
| Onset Written |
Reading |
8 |
80.5% |
14.0% |
60.0% |
96.8% |
| Onset Written |
Reading_While_Listening |
8 |
65.7% |
36.6% |
5.7% |
100.0% |
| Onset Written |
Viewing |
7 |
71.7% |
19.9% |
50.0% |
100.0% |
# RT outlier detection using IQR method by participant and condition.
rt_outlier_summary <- trial_level_data |>
filter(!is.na(rt)) |>
group_by(modality, participant_id, Condition) |>
summarise(
n_trials = n(),
q1 = quantile(rt, 0.25),
q3 = quantile(rt, 0.75),
iqr = q3 - q1,
lower_bound = q1 - 3 * iqr,
upper_bound = q3 + 3 * iqr,
n_outliers = sum(rt < lower_bound | rt > upper_bound),
outlier_rate = n_outliers / n_trials,
.groups = "drop"
) |>
arrange(desc(outlier_rate))
# Display participants with high outlier rates (>10%).
high_outlier_participants <- rt_outlier_summary |>
filter(outlier_rate > 0.10)
if (nrow(high_outlier_participants) > 0) {
cat("\n=== PARTICIPANTS WITH HIGH RT OUTLIER RATES (>10%) ===\n")
print(high_outlier_participants |> select(modality, participant_id, Condition, n_trials, n_outliers, outlier_rate))
cat("\n")
}
# Visualize RT distributions to identify extreme outliers.
trial_level_data |>
filter(!is.na(rt)) |>
mutate(
Condition = fct_explicit_na(Condition, "Unknown"),
modality_pretty = str_replace_all(modality, "_", " ")
) |>
ggplot(aes(x = participant_id, y = rt, fill = Condition)) +
geom_boxplot(alpha = 0.7, outlier.size = 1, outlier.alpha = 0.5) +
facet_wrap(~ modality_pretty, scales = "free_x") +
scale_fill_brewer(palette = "Set2") +
labs(
title = "RT Distribution by Participant (Outlier Detection)",
x = "Participant",
y = "Reaction Time (ms)",
fill = "Condition") +
theme_bw(base_size = 10) +
theme(
axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5),
legend.position = "bottom",
panel.grid.minor = element_blank()
)
