library(readxl)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.1     ✔ readr     2.1.5
## ✔ ggplot2   4.0.0     ✔ stringr   1.5.2
## ✔ lubridate 1.9.4     ✔ tibble    3.3.0
## ✔ purrr     1.1.0     ✔ tidyr     1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(openxlsx)
library(stringr)
library(purrr)
library(janitor)
## 
## Attaching package: 'janitor'
## 
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
library(tidyr)

Define directories and file path

# Base directory for all data
base_eprime_dir <- "/Users/vivi/UT/Thesis/Data/E-Prime"
eprime_raw_file <- file.path(base_eprime_dir, "all_excluded2.csv")
eprime_processed_dir <- file.path(base_eprime_dir)

# Ensure the output directory exists
if (!dir.exists(eprime_processed_dir)) {
  dir.create(eprime_processed_dir, recursive = TRUE)
  message(paste("Created E-Prime output directory:", eprime_processed_dir))
}

E-Prime Data Preprocessing

# Load E-Prime Data
A_B <- read.csv(eprime_raw_file, sep = ";")
A_B_R <- A_B %>% filter(procedure == "responsprocedure")

# Calculate Trial Numbers, Accuracy, and RT
eprime_df_cleaned <- A_B_R %>%
# 1. Prep numeric conversions needed for subsequent steps
  mutate(
    feedback.ACC = as.numeric(feedback.ACC),
    feedback.RT = as.numeric(feedback.RT),
    # Ensure sub.trial.number is numeric for max() calculation
    sub.trial.number = as.numeric(sub.trial.number) 
  ) %>%
  
  # 2. Trial Numbering (Define 'trial' column)
  group_by(session, subject) %>%
  mutate(
    # ASSIGN TRIAL NUMBER: Uses the assumption that sub.trial.number == 1 indicates the start of a new trial.
    trial = cumsum(sub.trial.number == 1)
  ) %>%
  ungroup() %>%
  
  # 3. Calculate trial-level metrics, and DETERMINE Sequence
  group_by(subject, session, trial) %>%
  summarise(
    # Trial-level metrics
    trial.acc = mean(feedback.ACC, na.rm = TRUE) * 100,
    trial.RT = mean(feedback.RT, na.rm = TRUE),
    
    # CRITICAL FIX: Determine sequence length based on session type
    sequence = case_when(
        session == 1 ~ 6,
        session == 2 ~ 12,
        session == 3 ~ 18,
        # For sessions 4 and 5, the length is the maximum value of sub.trial.number in this group
        session %in% c(4, 5) ~ max(sub.trial.number),
        TRUE ~ NA_real_ # Should not happen for sessions 1-5
    ),

    .groups = "drop"
  ) %>%
  
  # 4. Convert RT to seconds and flag bad trials
  mutate(trial.RTS = trial.RT / 1000) %>%
  #filter(trial.acc >= 0.8 & !is.na(trial.RTS)) %>%

  # Final cleanup and selection
  select(subject, session, trial, sequence, trial.acc, trial.RTS)
## Warning: Returning more (or less) than 1 row per `summarise()` group was deprecated in
## dplyr 1.1.0.
## ℹ Please use `reframe()` instead.
## ℹ When switching from `summarise()` to `reframe()`, remember that `reframe()`
##   always returns an ungrouped data frame and adjust accordingly.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# Check for any remaining NA sequences before saving (for debugging)
if (any(is.na(eprime_df_cleaned$sequence))) {
  warning("WARNING: Not all rows have a defined sequence length after processing. Check Session mapping.")
}

# Save the cleaned E-Prime data for the participant
out_file_eprime <- file.path(eprime_processed_dir, "rt_cleaned_merged.xlsx")
write.xlsx(eprime_df_cleaned, file = out_file_eprime, row.names = FALSE)
## Warning: Please use 'rowNames' instead of 'row.names'
message(paste("E-Prime data cleaned and saved:", out_file_eprime))
## E-Prime data cleaned and saved: /Users/vivi/UT/Thesis/Data/E-Prime/rt_cleaned_merged.xlsx