library(readxl)
library(dplyr)
library(tidyr)
library(openxlsx)
library(janitor)
library(purrr)
library(stringr)

Configuration: Define directories and file paths

#CONFIGURATION: SET TARGET PARTICIPANT ID
PARTICIPANT_ID <- 2

base_dir <- "/Volumes/PHILIPS UFD" 
xsens_prep_dir <- file.path("/Users/vivi/UT/Thesis/Data/Xsens/Participants", "Movement Preparation Period", paste0("ID", PARTICIPANT_ID))
eprime_clean_file <- file.path("/Users/vivi/UT/Thesis/Data/E-Prime", "rt_cleaned_merged.xlsx")

if (!dir.exists(xsens_prep_dir)) {
  dir.create(xsens_prep_dir, recursive = TRUE)
  message(paste("Created Xsens Preparation output directory:", xsens_prep_dir))
}

Xsens Movement Preparation Preprocessing

message(paste("Processing movement preparation data for Participant ID:", PARTICIPANT_ID))

process_participant_prep <- function(pid) {
  
  message(paste("Starting processing for Subject", pid, "..."))
  
  # 1. Detect and filter files for the current participant 
  # Using recursive=TRUE to search the base_dir and all subdirectories
  all_files <- list.files(base_dir, pattern = sprintf("ID%s-[0-9]+\\.xlsx$", pid), 
                          recursive = TRUE, full.names = TRUE)
  
  # Filter files to ensure we only get ID{pid}-{Block}.xlsx files
  #participant_files <- all_files[grepl(sprintf("/ID%s-[0-9]+\\.xlsx$", pid), all_files)]
  participant_files <- all_files[grepl(sprintf("ID%s\\b-[0-9]+\\.xlsx$", pid), all_files)]  
  
  # Extract block numbers from filenames
  block_numbers <- as.numeric(str_extract(participant_files, "(?<=ID[0-9]{1,2}-)[0-9]+"))

  if (length(participant_files) == 0) {
    warning(sprintf("No files found for Subject %s in %s. Skipping.", pid, base_dir))
    return(NULL)
  }
  
  # 2. Read, Clean, and Combine Data from Separate Sheets
  combined_data <- purrr::map2_dfr(
    participant_files, block_numbers,
    ~{
      # Read Center of Mass and Markers sheets and clean names (e.g., 'co_m_pos_x')
      com <- read_excel(.x, sheet = "Center of Mass") %>% clean_names()
      markers <- read_excel(.x, sheet = "Markers") %>% clean_names()
      
      # Merge based on "frame" column
      merged_df <- left_join(com, markers, by = "frame") %>%
        mutate(Subject = as.integer(pid), Block = .y)
      
      # Ensure marker column is named consistently 
      if (!"marker_text" %in% colnames(merged_df)) {
        if ("ns1_marker" %in% colnames(merged_df)) {
            merged_df <- merged_df %>% rename(marker_text = ns1_marker)
        } else {
             warning(sprintf("Marker column not found in Block %s.", .y))
             return(NULL)
        }
      }
      return(merged_df)
    }
  )
  
  if (is.null(combined_data) || nrow(combined_data) == 0) {
    message(sprintf("No valid data combined for Subject %s.", pid))
    return(NULL)
  }
  
  # 3. Trial Numbering and Numeric Conversion

  combined_data <- combined_data %>%
    mutate(marker_text = as.numeric(trimws(as.character(marker_text))))

  combined_data_numbered <- combined_data %>%
    group_by(Subject, Block) %>%
    mutate(
      is_go = (abs(marker_text - 27) < 0.001), 
      Trial = as.integer(cumsum(replace_na(is_go, FALSE))),
      # Add a simple row index *before* filtering to keep track of row order within the block
      row_index = row_number() 
    ) %>%
    ungroup()

  # Apply filter
  combined_data_numbered <- combined_data_numbered %>%
    filter(Trial > 0)

  # 4. Filter to Movement PREPARATION Period (150 frames BEFORE GO)
  # Find the global index of every GO marker (within the block/subject grouping)
  go_indices_df <- combined_data_numbered %>%
    filter(is_go) %>%
    # Select the index *of the row that has the GO marker*
    select(Subject, Block, Trial, go_index = row_index)

  # Calculate the start and end row indices for the 150-frame window
  prep_windows <- go_indices_df %>%
    mutate(
      start_index = go_index - 151, # The row 150 frames *before* GO
      end_index   = go_index - 1    # The row immediately before GO
    ) %>%
    # Use row_number() to generate a consistent trial index for the join later
    select(Subject, Block, Trial, start_index, end_index)

# Now, join the main data back to these index windows and filter
  if (nrow(prep_windows) > 0) { # <--- ADD THIS CHECK
    prep_data <- purrr::map_dfr(1:nrow(prep_windows), ~ {
      window <- prep_windows[.x, ]
      combined_data_numbered %>%
        # Filter to the specific index range for this trial
        filter(
          Subject == window$Subject,
          Block == window$Block,
          row_index > window$start_index, 
          row_index <= window$end_index
        ) %>%
        # Add the trial number and block number from the window (crucial if data is missing)
        mutate(
          Trial = window$Trial,
          Block = window$Block,
          # Recalculate the row index within the 150-frame preparation period
          row_in_trial = row_number() 
        )
    })
  } else {
    # If no prep windows found, set prep_data to an empty dataframe
    message(sprintf("WARNING: No valid preparation windows found for Subject %s.", pid))
    prep_data <- tibble() # Or a structure matching what it should contain
  }


  # DEBUG CHECK: Ensure we have data
  if (nrow(prep_data) == 0) {
    message(sprintf("ERROR: Preparation period filter produced zero rows for Subject %s.", pid))
    if (any(prep_windows$start_index <= 0)) {
    message("NOTE: Found Trials where the GO marker occurs too early in the data block.")
    }
    return(NULL)
  }

  # 5. Center-of-Mass Calculations (RMS is calculated per trial time series)
  final_df <- prep_data %>%
    # 5a. Calculate Time-Series Vector Magnitudes
    mutate(
      COM_pos = sqrt(co_m_pos_x^2 + co_m_pos_y^2 + co_m_pos_z^2),
      COM_vel = sqrt(co_m_vel_x^2 + co_m_vel_y^2 + co_m_vel_z^2),
      COM_acc = sqrt(co_m_acc_x^2 + co_m_acc_y^2 + co_m_acc_z^2)
    ) %>%
    group_by(Subject, Block, Trial) %>%
    mutate(
      # 5b. Calculate Trial-Level RMS Summary (RMS values are repeated for all rows in the trial)
      RMS_acc = sqrt(mean(COM_acc^2, na.rm = TRUE)),
      RMS_vel = sqrt(mean(COM_vel^2, na.rm = TRUE)),
      RMS_pos = sqrt(mean(COM_pos^2, na.rm = TRUE)),
      .groups = 'drop'
    ) %>%
    # Select final columns, including the new magnitude columns
    select(
      Subject, Block, Trial, row_in_trial, time_code = "frame", Marker = marker_text, 
      co_m_acc_x, co_m_acc_y, co_m_acc_z, # Keep component columns for Lyapunov
      COM_pos, COM_vel, COM_acc, 
      RMS_acc, RMS_vel, RMS_pos # Keep the trial-level RMS columns
    )

  # 6. Write to Files
  # File 1: Time Series Data (Movement Preparation Period)
  out_file_ts <- file.path(xsens_prep_dir, sprintf("ID%s_preparation_timeseries.xlsx", pid))
  write.xlsx(final_df, out_file_ts)
  message(paste("Successfully wrote time series data to:", out_file_ts))
  
  message(sprintf("Successfully processed and saved all files for Subject %s.", pid))
  return(final_df) # Return the processed data frame
}

# Execution
processed_prep_data <- process_participant_prep(PARTICIPANT_ID)

Xsens and E-Prime Merge

message(paste("Starting merge for Participant ID:", PARTICIPANT_ID))

# 1. Grab the preparation-period time-series file
preparation_list <- list.files(
  path       = xsens_prep_dir, 
  pattern    = sprintf("^ID%s_preparation_timeseries\\.xlsx$", PARTICIPANT_ID),
  full.names = TRUE
)

if (length(preparation_list) == 0) {
  stop(sprintf("Preparation time-series file for ID %s not found.", PARTICIPANT_ID, xsens_prep_dir))
}

# Read and format the Xsens time-series data
preparation_df <- read_excel(preparation_list[1]) %>%
  mutate(
    Subject = as.factor(Subject),
    Block   = factor(Block, levels = as.character(1:5)), 
    Trial   = factor(Trial, levels = as.character(1:48)), 
    time_code = as.character(time_code), 
    # Ensure all CoM columns are numeric
    across(starts_with("COM_"), as.numeric),
     # Ensure all RMS columns are numeric
    across(starts_with("RMS_"), as.numeric)
  )

# 2. Import and simplify RT data (E-Prime)
rt_data <- read_excel(eprime_clean_file) %>%
  filter(subject == PARTICIPANT_ID) %>%
  mutate(
    # NOTE: Assuming the E-Prime file has columns 'subject', 'session', 'trial'
    Subject = as.factor(subject),
    Block   = factor(session, levels = as.character(1:5)),
    Trial   = factor(trial, levels = as.character(1:48)),
    Sequence = as.factor(sequence),
    Trial_Acc = as.numeric(trial.acc)
  ) %>%
  # Keep one row per trial
  distinct(Subject, Block, Trial, .keep_all = TRUE) %>%
  # Select the required merge keys
  select(Subject, Block, Trial, Sequence, trial.RTS, Trial_Acc) 

# 3. Merge Xsens time-series with RT data
preparation_merged <- preparation_df %>%
  left_join(rt_data, by = c("Subject", "Block", "Trial")) %>%
  filter(!is.na(Sequence)) %>%
  select(
    Subject, Block, Trial, Sequence, row_in_trial, time_code, Marker, 
    trial.RTS, Trial_Acc, COM_acc,
    RMS_acc, RMS_vel, RMS_pos
  )

# 4. Write out the merged file
out_file_merged <- file.path(xsens_prep_dir, sprintf("ID%s_preparation_merged.xlsx", PARTICIPANT_ID))

write.xlsx(preparation_merged, out_file_merged)

message(paste("Successfully merged Xsens preparation time-series with RT data."))
message(paste("File saved to:", out_file_merged))