library(readxl)
library(dplyr)
library(tidyverse)
library(openxlsx)
library(stringr)
library(purrr)
library(janitor)
library(tidyr)
Define directories and file path
# Base directory for all data
base_eprime_dir <- "/Users/vivi/UT/Thesis/Data/E-Prime"
eprime_raw_file <- file.path(base_eprime_dir, "all_excluded2.csv")
eprime_processed_dir <- file.path(base_eprime_dir)
# Ensure the output directory exists
if (!dir.exists(eprime_processed_dir)) {
dir.create(eprime_processed_dir, recursive = TRUE)
message(paste("Created E-Prime output directory:", eprime_processed_dir))
}
E-Prime Data Preprocessing
# Load E-Prime Data
A_B <- read.csv(eprime_raw_file, sep = ";")
A_B_R <- A_B %>% filter(procedure == "responsprocedure")
# Calculate Trial Numbers, Accuracy, and RT
eprime_df_cleaned <- A_B_R %>%
# 1. Prep numeric conversions needed for subsequent steps
mutate(
feedback.ACC = as.numeric(feedback.ACC),
feedback.RT = as.numeric(feedback.RT),
# Ensure sub.trial.number is numeric for max() calculation
sub.trial.number = as.numeric(sub.trial.number)
) %>%
# 2. Trial Numbering (Define 'trial' column)
group_by(session, subject) %>%
mutate(
# ASSIGN TRIAL NUMBER: Uses the assumption that sub.trial.number == 1 indicates the start of a new trial.
trial = cumsum(sub.trial.number == 1)
) %>%
ungroup() %>%
# 3. Calculate trial-level metrics, and DETERMINE Sequence
group_by(subject, session, trial) %>%
summarise(
# Trial-level metrics
trial.acc = sum(feedback.ACC, na.rm = TRUE) / n(),
trial.RT = mean(feedback.RT, na.rm = TRUE),
# CRITICAL FIX: Determine sequence length based on session type
sequence = case_when(
session == 1 ~ 6,
session == 2 ~ 12,
session == 3 ~ 18,
# For sessions 4 and 5, the length is the maximum value of sub.trial.number in this group
session %in% c(4, 5) ~ max(sub.trial.number),
TRUE ~ NA_real_ # Should not happen for sessions 1-5
),
.groups = "drop"
) %>%
# 4. Convert RT to seconds and flag bad trials
mutate(trial.RTS = trial.RT / 1000) %>%
filter(trial.acc >= 0.8 & !is.na(trial.RTS)) %>%
# Final cleanup and selection
select(subject, session, trial, sequence, trial.acc, trial.RTS)
# Check for any remaining NA sequences before saving (for debugging)
if (any(is.na(eprime_df_cleaned$sequence))) {
warning("WARNING: Not all rows have a defined sequence length after processing. Check Session mapping.")
}
# Save the cleaned E-Prime data for the participant
out_file_eprime <- file.path(eprime_processed_dir, "rt_cleaned_merged.xlsx")
write.xlsx(eprime_df_cleaned, file = out_file_eprime, row.names = FALSE)
message(paste("E-Prime data cleaned and saved:", out_file_eprime))