library(readxl)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.1 ✔ readr 2.1.5
## ✔ ggplot2 4.0.0 ✔ stringr 1.5.2
## ✔ lubridate 1.9.4 ✔ tibble 3.3.0
## ✔ purrr 1.1.0 ✔ tidyr 1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(openxlsx)
library(stringr)
library(purrr)
library(janitor)
##
## Attaching package: 'janitor'
##
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
library(tidyr)
Define directories and file path
# Base directory for all data
base_eprime_dir <- "/Users/vivi/UT/Thesis/Data/E-Prime"
eprime_raw_file <- file.path(base_eprime_dir, "all_excluded2.csv")
eprime_processed_dir <- file.path(base_eprime_dir)
# Ensure the output directory exists
if (!dir.exists(eprime_processed_dir)) {
dir.create(eprime_processed_dir, recursive = TRUE)
message(paste("Created E-Prime output directory:", eprime_processed_dir))
}
E-Prime Data Preprocessing
# Load E-Prime Data
A_B <- read.csv(eprime_raw_file, sep = ";")
A_B_R <- A_B %>% filter(procedure == "responsprocedure")
# Calculate Trial Numbers, Accuracy, and RT
eprime_df_cleaned <- A_B_R %>%
# 1. Prep numeric conversions needed for subsequent steps
mutate(
feedback.ACC = as.numeric(feedback.ACC),
feedback.RT = as.numeric(feedback.RT),
# Ensure sub.trial.number is numeric for max() calculation
sub.trial.number = as.numeric(sub.trial.number)
) %>%
# 2. Trial Numbering (Define 'trial' column)
group_by(session, subject) %>%
mutate(
# ASSIGN TRIAL NUMBER: Uses the assumption that sub.trial.number == 1 indicates the start of a new trial.
trial = cumsum(sub.trial.number == 1)
) %>%
ungroup() %>%
# 3. Calculate trial-level metrics, and DETERMINE Sequence
group_by(subject, session, trial) %>%
summarise(
# Trial-level metrics
trial.acc = mean(feedback.ACC, na.rm = TRUE) * 100,
trial.RT = mean(feedback.RT, na.rm = TRUE),
# CRITICAL FIX: Determine sequence length based on session type
sequence = case_when(
session == 1 ~ 6,
session == 2 ~ 12,
session == 3 ~ 18,
# For sessions 4 and 5, the length is the maximum value of sub.trial.number in this group
session %in% c(4, 5) ~ max(sub.trial.number),
TRUE ~ NA_real_ # Should not happen for sessions 1-5
),
.groups = "drop"
) %>%
# 4. Convert RT to seconds and flag bad trials
mutate(trial.RTS = trial.RT / 1000) %>%
#filter(trial.acc >= 0.8 & !is.na(trial.RTS)) %>%
# Final cleanup and selection
select(subject, session, trial, sequence, trial.acc, trial.RTS)
## Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in
## dplyr 1.1.0.
## ℹ Please use `reframe()` instead.
## ℹ When switching from `summarise()` to `reframe()`, remember that `reframe()`
## always returns an ungrouped data frame and adjust accordingly.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# Check for any remaining NA sequences before saving (for debugging)
if (any(is.na(eprime_df_cleaned$sequence))) {
warning("WARNING: Not all rows have a defined sequence length after processing. Check Session mapping.")
}
# Save the cleaned E-Prime data for the participant
out_file_eprime <- file.path(eprime_processed_dir, "rt_cleaned_merged.xlsx")
write.xlsx(eprime_df_cleaned, file = out_file_eprime, row.names = FALSE)
## Warning: Please use 'rowNames' instead of 'row.names'
message(paste("E-Prime data cleaned and saved:", out_file_eprime))
## E-Prime data cleaned and saved: /Users/vivi/UT/Thesis/Data/E-Prime/rt_cleaned_merged.xlsx