library(readxl)
library(dplyr)
library(tidyverse)
library(openxlsx)
library(stringr)
library(purrr)
library(janitor)
#CONFIGURATION: SET TARGET PARTICIPANT ID
PARTICIPANT_ID <- 2 

Define directories and file paths

base_dir <- "/Users/vivi/UT/Thesis/Data/Xsens/Participants"
xsens_exec_dir <- file.path(base_dir, "Movement Execution Period", paste0("ID", PARTICIPANT_ID))
eprime_clean_file <- file.path("/Users/vivi/UT/Thesis/Data/E-Prime", "rt_cleaned_merged.xlsx")

if (!dir.exists(xsens_exec_dir)) {
  dir.create(xsens_exec_dir, recursive = TRUE)
  message(paste("Created Xsens output directory:", xsens_exec_dir))
}

Xsens execution period preprocessing

message(paste("Processing movement execution data for Participant ID:", PARTICIPANT_ID))

process_participant_exec <- function(pid) {
  
  message(paste("Starting processing for Subject", pid, "..."))
  
  # 1. Detect and filter files for the current participant 
  # Using recursive=TRUE to search the base_dir and all subdirectories
  all_files <- list.files(base_dir, pattern = sprintf("ID%s-[0-9]+\\.xlsx$", pid), 
                          recursive = TRUE, full.names = TRUE)
  
  # Filter files to ensure we only get ID{pid}-{Block}.xlsx files
  participant_files <- all_files[grepl(sprintf("/ID%s-[0-9]+\\.xlsx$", pid), all_files)]
  
  # Extract block numbers from filenames
  block_numbers <- as.numeric(str_extract(participant_files, "(?<=ID[0-9]-)[0-9]+"))

  if (length(participant_files) == 0) {
    warning(sprintf("No files found for Subject %s in %s. Skipping.", pid, base_dir))
    return(NULL)
  }
  
  # 2. Read, Clean, and Combine Data 
  combined_data_list <- purrr::map2(
    participant_files, block_numbers,
    ~{
      block_num <- .y
      file_path <- .x
      
      # Use tryCatch for robust file reading
      df_result <- tryCatch({
        com <- read_excel(file_path, sheet = "Center of Mass") %>% clean_names()
        markers <- read_excel(file_path, sheet = "Markers") %>% clean_names()
        
        merged_df <- left_join(com, markers, by = "frame") %>%
          # CRITICAL FIX: Ensure Subject and Block are defined before returning
          mutate(Subject = as.integer(pid), Block = block_num)
        
        # Check for marker_text column and rename if necessary
        if (!"marker_text" %in% colnames(merged_df)) {
          if ("ns1_marker" %in% colnames(merged_df)) {
              merged_df <- merged_df %>% rename(marker_text = ns1_marker)
          } else {
              warning(sprintf("Marker column missing in Block %s of Subject %s. Skipping.", block_num, pid))
              return(NULL)
          }
        }
        return(merged_df)
      }, error = function(e) {
        warning(sprintf("Error reading file %s: %s", basename(file_path), e$message))
        return(NULL)
      })
      
      return(df_result)
    }
  )
  
  # Remove NULL entries and combine the list into a single dataframe
  combined_data <- bind_rows(combined_data_list)
  
  if (nrow(combined_data) == 0) {
    message(sprintf("No valid data combined for Subject %s. Returning NULL.", pid))
    return(NULL)
  }
  
  # Numeric conversion
  combined_data <- combined_data %>%
    mutate(marker_text = as.numeric(trimws(as.character(marker_text))))
  
  # 3. Trial Numbering
  combined_data_numbered <- combined_data %>%
    group_by(Subject, Block) %>%
    mutate(
      # Use tolerance to check for 27
      is_go = (abs(marker_text - 27) < 0.001), 
      # Handle NA in 'is_go' and ensure 'Trial' is an integer
      Trial = as.integer(cumsum(replace_na(is_go, FALSE)))
    ) %>%
  ungroup()

  # Apply filter
  combined_data_numbered <- combined_data_numbered %>%
    filter(Trial > 0)
  
  # 4. Filter to Movement Execution Period
  exec_data <- combined_data_numbered %>%
    mutate(is_step = marker_text %in% c(14, 15, 16, 17)) %>%
    group_by(Subject, Block, Trial) %>% # Group by Subject, Block, and Trial for accurate step filtering
    mutate(
      row_in_trial = row_number(),
      last_step_row = max(ifelse(is_step, row_in_trial, NA_integer_), na.rm = TRUE)
    ) %>%
    filter(
      row_in_trial >= 1, 
      is.finite(last_step_row) & row_in_trial <= last_step_row + 65
    ) %>%
    ungroup()
  
  # 5. Center-of-Mass Calculations (RMS is calculated per trial time series)
  final_df <- exec_data %>%
    # 5a. Calculate Time-Series Vector Magnitudes
    mutate(
      COM_pos = sqrt(co_m_pos_x^2 + co_m_pos_y^2 + co_m_pos_z^2),
      COM_vel = sqrt(co_m_vel_x^2 + co_m_vel_y^2 + co_m_vel_z^2),
      COM_acc = sqrt(co_m_acc_x^2 + co_m_acc_y^2 + co_m_acc_z^2)
    ) %>%
    group_by(Subject, Block, Trial) %>%
    mutate(
      # 5b. Calculate Trial-Level RMS Summary (RMS values are repeated for all rows in the trial)
      RMS_acc = sqrt(mean(COM_acc^2, na.rm = TRUE)),
      RMS_vel = sqrt(mean(COM_vel^2, na.rm = TRUE)),
      RMS_pos = sqrt(mean(COM_pos^2, na.rm = TRUE)),
      .groups = 'drop'
    ) %>%
    # Select final columns, including the new magnitude columns
    select(
      Subject, Block, Trial, row_in_trial, time_code = "frame", Marker = marker_text, 
      co_m_acc_x, co_m_acc_y, co_m_acc_z, # Keep component columns for Lyapunov
      COM_pos, COM_vel, COM_acc, 
      RMS_acc, RMS_vel, RMS_pos # Keep the trial-level RMS columns
    )
    
  # 6. Write to Files
  # File 1: Time Series Data (Movement Execution Period)
  out_file_ts <- file.path(xsens_exec_dir, sprintf("ID%s_execution_timeseries.xlsx", pid))
  write.xlsx(final_df, out_file_ts)
  message(paste("Successfully wrote time series data to:", out_file_ts))
  
  message(sprintf("Successfully processed and saved all files for Subject %s.", pid))
  return(final_df) # Return the processed data frame
}

# Execution
processed_data <- process_participant_exec(PARTICIPANT_ID)

Xsens and E-Prime Merge

message(paste("Starting merge for Participant ID:", PARTICIPANT_ID))

# 1. Grab the execution-period time-series file for the current participant
xsens_processed_dir <- file.path(base_dir, "Movement Execution Period", paste0("ID", PARTICIPANT_ID))

execution_list <- list.files(
  path       = xsens_processed_dir, # Look in the specific ID directory
  pattern    = sprintf("^ID%s_execution_timeseries\\.xlsx$", PARTICIPANT_ID),
  full.names = TRUE
)

if (length(execution_list) == 0) {
  stop(sprintf("Execution time-series file for ID %s not found in %s.", PARTICIPANT_ID, xsens_processed_dir))
}

# Read and format the Xsens time-series data
execution_df <- read_excel(execution_list[1]) %>%
  mutate(
    # Convert merge keys to factors for consistency
    Subject = as.factor(Subject),
    Block   = factor(Block, levels = as.character(1:5)), # Assuming max 5 blocks
    Trial   = factor(Trial, levels = as.character(1:48)), # Assuming max 48 trials
    time_code = as.character(time_code), 
    # Ensure all CoM columns are numeric
    across(starts_with("COM_"), as.numeric),
    # Ensure all RMS columns are numeric
    across(starts_with("RMS_"), as.numeric)
  )

# 2. Import and simplify RT data (E-Prime)
rt_data <- read_excel(eprime_clean_file) %>%
    # Filter only for the current participant
  filter(subject == PARTICIPANT_ID) %>%
  # NOTE: Do NOT filter bad trials here. We need to merge them to flag Xsens data.
  filter(!is.na(sequence)) %>%
  mutate(
    Subject = as.factor(subject),
    Block   = factor(session, levels = as.character(1:5)),
    Trial   = factor(trial, levels = as.character(1:48)),
    Sequence = as.factor(sequence)
  ) %>%
  # Keep one row per trial, preserving all sequence info columns (assuming 'Sequence' is in the RT data)
  distinct(Subject, Block, Trial, .keep_all = TRUE) %>%
  
  # Select the required merge keys PLUS the Sequence column
  select(Subject, Block, Trial, Sequence, trial.RTS)

# 3. Merge Xsens time-series with RT sequence data
# This merges all the Xsens time points with the single row of sequence data for that trial.
# This results in the final, merged sequence data needed for Lyapunov analysis.
execution_merged <- execution_df %>%
  left_join(rt_data, by = c("Subject", "Block", "Trial")) %>%
  filter(!is.na(Sequence)) %>%
  # Select final columns, including the time series data and the trial-level RMS/RT/Sequence data
  # The analysis script (load_data_fix.R) relies on these being present:
  select(
    Subject, Block, Trial, Sequence, row_in_trial, time_code, Marker, 
    co_m_acc_x, co_m_acc_y, co_m_acc_z, COM_acc,
    RMS_acc, RMS_vel, RMS_pos, trial.RTS
  )
  
# 4. Write out the merged file
out_file_merged <- file.path(xsens_processed_dir, sprintf("ID%s_execution_merged.xlsx", PARTICIPANT_ID))

write.xlsx(execution_merged, out_file_merged)

message(paste("Successfully merged Xsens time-series with RT data."))
message(paste("File saved to:", out_file_merged))