library(readxl)
library(dplyr)
library(tidyr)
library(openxlsx)
library(janitor)
library(purrr)
#CONFIGURATION: SET TARGET PARTICIPANT ID
PARTICIPANT_ID <- 2 

Define directories

base_dir <- "/Users/vivi/UT/Thesis/Data/Xsens/Participants" 
xsens_prep_dir <- file.path(base_dir, "Movement Preparation Period", paste0("ID", PARTICIPANT_ID))
eprime_clean_file <- file.path("/Users/vivi/UT/Thesis/Data/E-Prime", "rt_cleaned_merged.xlsx")

if (!dir.exists(xsens_prep_dir)) {
  dir.create(xsens_prep_dir, recursive = TRUE)
  message(paste("Created Xsens Preparation output directory:", xsens_prep_dir))
}

Xsens Movement Preparation Preprocessing

message(paste("Processing movement preparation data for Participant ID:", PARTICIPANT_ID))

process_participant_prep <- function(pid) {
  
  message(paste("Starting processing for Subject", pid, "..."))
  
  # 1. Detect and filter files for the current participant 
  # Using recursive=TRUE to search the base_dir and all subdirectories
  all_files <- list.files(base_dir, pattern = sprintf("ID%s-[0-9]+\\.xlsx$", pid), 
                          recursive = TRUE, full.names = TRUE)
  
  # Filter files to ensure we only get ID{pid}-{Block}.xlsx files
  participant_files <- all_files[grepl(sprintf("/ID%s-[0-9]+\\.xlsx$", pid), all_files)]
  
  # Extract block numbers from filenames
  block_numbers <- as.numeric(str_extract(participant_files, "(?<=ID[0-9]-)[0-9]+"))

  if (length(participant_files) == 0) {
    warning(sprintf("No files found for Subject %s in %s. Skipping.", pid, base_dir))
    return(NULL)
  }
  
  # 2. Read, Clean, and Combine Data from Separate Sheets
  combined_data <- purrr::map2_dfr(
    participant_files, block_numbers,
    ~{
      # Read Center of Mass and Markers sheets and clean names (e.g., 'co_m_pos_x')
      com <- read_excel(.x, sheet = "Center of Mass") %>% clean_names()
      markers <- read_excel(.x, sheet = "Markers") %>% clean_names()
      
      # Merge based on "frame" column
      merged_df <- left_join(com, markers, by = "frame") %>%
        mutate(Subject = as.integer(pid), Block = .y)
      
      # Ensure marker column is named consistently 
      if (!"marker_text" %in% colnames(merged_df)) {
        if ("ns1_marker" %in% colnames(merged_df)) {
            merged_df <- merged_df %>% rename(marker_text = ns1_marker)
        } else {
             warning(sprintf("Marker column not found in Block %s.", .y))
             return(NULL)
        }
      }
      return(merged_df)
    }
  )
  
  if (is.null(combined_data) || nrow(combined_data) == 0) {
    message(sprintf("No valid data combined for Subject %s.", pid))
    return(NULL)
  }
  
  # 3. Trial Numbering and Numeric Conversion
  combined_data <- combined_data %>%
    mutate(marker_text = as.numeric(trimws(as.character(marker_text))))
  
  combined_data_numbered <- combined_data %>%
    group_by(Subject, Block) %>%
    mutate(
      is_go = (abs(marker_text - 27) < 0.001), 
      Trial = as.integer(cumsum(replace_na(is_go, FALSE))),
      # Add a simple row index *before* filtering to keep track of row order within the block
      row_index = row_number() 
    ) %>%
    ungroup()
    
  # 4. Filter to Movement PREPARATION Period (150 frames BEFORE GO)
  
  # Find the global index of every GO marker (within the block/subject grouping)
  go_indices_df <- combined_data_numbered %>%
    filter(is_go) %>%
    # Select the index *of the row that has the GO marker*
    select(Subject, Block, Trial, go_index = row_index)
  
  # Calculate the start and end row indices for the 150-frame window
  prep_windows <- go_indices_df %>%
    # Skip Trial 0 if it exists (though Trial should be >= 1 here)
    filter(Trial > 0) %>%
    mutate(
      start_index = go_index - 201, # The row 150 frames *before* GO
      end_index   = go_index - 1    # The row immediately before GO
    ) %>%
    # Use row_number() to generate a consistent trial index for the join later
    select(Subject, Block, Trial, start_index, end_index)
  
  # Now, join the main data back to these index windows and filter
  prep_data <- purrr::map_dfr(1:nrow(prep_windows), ~ {
    window <- prep_windows[.x, ]
    combined_data_numbered %>%
      # Filter to the specific index range for this trial
      filter(
        Subject == window$Subject,
        Block == window$Block,
        row_index > window$start_index, 
        row_index <= window$end_index
      ) %>%
      # Add the trial number and block number from the window (crucial if data is missing)
      mutate(
        Trial = window$Trial,
        Block = window$Block,
        # Recalculate the row index within the 150-frame preparation period
        row_in_trial = row_number() 
      )
  })

  # DEBUG CHECK: Ensure we have data
  if (nrow(prep_data) == 0) {
    message(sprintf("ERROR: Preparation period filter produced zero rows for Subject %s.", pid))
    # Check if this error is because the start_index was <= 0
    if (any(prep_windows$start_index <= 0)) {
        message("NOTE: Found Trials where the GO marker occurs too early in the data block.")
    }
    return(NULL)
  }
  
  # # 5. Center-of-Mass Calculations (RMS is calculated per trial time series)
  final_df <- prep_data %>%
    # 5a. Calculate Time-Series Vector Magnitudes
    mutate(
      COM_pos = sqrt(co_m_pos_x^2 + co_m_pos_y^2 + co_m_pos_z^2),
      COM_vel = sqrt(co_m_vel_x^2 + co_m_vel_y^2 + co_m_vel_z^2),
      COM_acc = sqrt(co_m_acc_x^2 + co_m_acc_y^2 + co_m_acc_z^2)
    ) %>%
    group_by(Subject, Block, Trial) %>%
    mutate(
      # 5b. Calculate Trial-Level RMS Summary (RMS values are repeated for all rows in the trial)
      RMS_acc = sqrt(mean(COM_acc^2, na.rm = TRUE)),
      RMS_vel = sqrt(mean(COM_vel^2, na.rm = TRUE)),
      RMS_pos = sqrt(mean(COM_pos^2, na.rm = TRUE)),
      .groups = 'drop'
    ) %>%
    # Select final columns, including the new magnitude columns
    select(
      Subject, Block, Trial, row_in_trial, time_code = "frame", Marker = marker_text, 
      co_m_acc_x, co_m_acc_y, co_m_acc_z, # Keep component columns for Lyapunov
      COM_pos, COM_vel, COM_acc, 
      RMS_acc, RMS_vel, RMS_pos # Keep the trial-level RMS columns
    )

  # 6. Write to Files
  # File 1: Time Series Data (Movement Preparation Period)
  out_file_ts <- file.path(xsens_prep_dir, sprintf("ID%s_preparation_timeseries.xlsx", pid))
  write.xlsx(final_df, out_file_ts)
  message(paste("Successfully wrote time series data to:", out_file_ts))
  
  message(sprintf("Successfully processed and saved all files for Subject %s.", pid))
  return(final_df) # Return the processed data frame
}

# Execution
processed_data <- process_participant_prep(PARTICIPANT_ID)

Xsens and E-Prime Merge

message(paste("Starting merge for Participant ID:", PARTICIPANT_ID))

# 1. Grab the preparation-period time-series file
preparation_list <- list.files(
  path       = xsens_prep_dir, 
  pattern    = sprintf("^ID%s_preparation_timeseries\\.xlsx$", PARTICIPANT_ID),
  full.names = TRUE
)

if (length(preparation_list) == 0) {
  stop(sprintf("Preparation time-series file for ID %s not found.", PARTICIPANT_ID, xsens_prep_dir))
}

# Read and format the Xsens time-series data
preparation_df <- read_excel(preparation_list[1]) %>%
  mutate(
    Subject = as.factor(Subject),
    Block   = factor(Block, levels = as.character(1:5)), 
    Trial   = factor(Trial, levels = as.character(1:48)), 
    time_code = as.character(time_code), 
    # Ensure all CoM columns are numeric
    across(starts_with("COM_"), as.numeric),
     # Ensure all RMS columns are numeric
    across(starts_with("RMS_"), as.numeric)
  )

# 2. Import and simplify RT data (E-Prime)
rt_data <- read_excel(eprime_clean_file) %>%
  filter(subject == PARTICIPANT_ID) %>%
  mutate(
    # NOTE: Assuming the E-Prime file has columns 'subject', 'session', 'trial'
    Subject = as.factor(subject),
    Block   = factor(session, levels = as.character(1:5)),
    Trial   = factor(trial, levels = as.character(1:48)),
    Sequence = as.factor(sequence)
  ) %>%
  # Keep one row per trial
  distinct(Subject, Block, Trial, .keep_all = TRUE) %>%
  # Select the required merge keys
  select(Subject, Block, Trial, Sequence, trial.RTS) 

# 3. Merge Xsens time-series with RT data
preparation_merged <- preparation_df %>%
  left_join(rt_data, by = c("Subject", "Block", "Trial")) %>%
  filter(!is.na(Sequence)) %>%
  select(
    Subject, Block, Trial, Sequence, row_in_trial, time_code, Marker, 
    co_m_acc_x, co_m_acc_y, co_m_acc_z, COM_acc,
    RMS_acc, RMS_vel, RMS_pos, trial.RTS 
  )

# 4. Write out the merged file
out_file_merged <- file.path(xsens_prep_dir, sprintf("ID%s_preparation_merged.xlsx", PARTICIPANT_ID))

write.xlsx(preparation_merged, out_file_merged)

message(paste("Successfully merged Xsens preparation time-series with RT data."))
message(paste("File saved to:", out_file_merged))