library(readxl)
library(dplyr)
library(tidyverse)
library(openxlsx)
library(stringr)
library(purrr)
library(janitor)
#CONFIGURATION: SET TARGET PARTICIPANT ID
PARTICIPANT_ID <- 2
Define directories and file paths
base_dir <- "/Users/vivi/UT/Thesis/Data/Xsens/Participants"
xsens_exec_dir <- file.path(base_dir, "Movement Execution Period", paste0("ID", PARTICIPANT_ID))
eprime_clean_file <- file.path("/Users/vivi/UT/Thesis/Data/E-Prime", "rt_cleaned_merged.xlsx")
if (!dir.exists(xsens_exec_dir)) {
dir.create(xsens_exec_dir, recursive = TRUE)
message(paste("Created Xsens output directory:", xsens_exec_dir))
}
Xsens execution period preprocessing
message(paste("Processing movement execution data for Participant ID:", PARTICIPANT_ID))
process_participant_exec <- function(pid) {
message(paste("Starting processing for Subject", pid, "..."))
# 1. Detect and filter files for the current participant
# Using recursive=TRUE to search the base_dir and all subdirectories
all_files <- list.files(base_dir, pattern = sprintf("ID%s-[0-9]+\\.xlsx$", pid),
recursive = TRUE, full.names = TRUE)
# Filter files to ensure we only get ID{pid}-{Block}.xlsx files
participant_files <- all_files[grepl(sprintf("/ID%s-[0-9]+\\.xlsx$", pid), all_files)]
# Extract block numbers from filenames
block_numbers <- as.numeric(str_extract(participant_files, "(?<=ID[0-9]-)[0-9]+"))
if (length(participant_files) == 0) {
warning(sprintf("No files found for Subject %s in %s. Skipping.", pid, base_dir))
return(NULL)
}
# 2. Read, Clean, and Combine Data
combined_data_list <- purrr::map2(
participant_files, block_numbers,
~{
block_num <- .y
file_path <- .x
# Use tryCatch for robust file reading
df_result <- tryCatch({
com <- read_excel(file_path, sheet = "Center of Mass") %>% clean_names()
markers <- read_excel(file_path, sheet = "Markers") %>% clean_names()
merged_df <- left_join(com, markers, by = "frame") %>%
# CRITICAL FIX: Ensure Subject and Block are defined before returning
mutate(Subject = as.integer(pid), Block = block_num)
# Check for marker_text column and rename if necessary
if (!"marker_text" %in% colnames(merged_df)) {
if ("ns1_marker" %in% colnames(merged_df)) {
merged_df <- merged_df %>% rename(marker_text = ns1_marker)
} else {
warning(sprintf("Marker column missing in Block %s of Subject %s. Skipping.", block_num, pid))
return(NULL)
}
}
return(merged_df)
}, error = function(e) {
warning(sprintf("Error reading file %s: %s", basename(file_path), e$message))
return(NULL)
})
return(df_result)
}
)
# Remove NULL entries and combine the list into a single dataframe
combined_data <- bind_rows(combined_data_list)
if (nrow(combined_data) == 0) {
message(sprintf("No valid data combined for Subject %s. Returning NULL.", pid))
return(NULL)
}
# Numeric conversion
combined_data <- combined_data %>%
mutate(marker_text = as.numeric(trimws(as.character(marker_text))))
# 3. Trial Numbering
combined_data_numbered <- combined_data %>%
group_by(Subject, Block) %>%
mutate(
# Use tolerance to check for 27
is_go = (abs(marker_text - 27) < 0.001),
# Handle NA in 'is_go' and ensure 'Trial' is an integer
Trial = as.integer(cumsum(replace_na(is_go, FALSE)))
) %>%
ungroup()
# Apply filter
combined_data_numbered <- combined_data_numbered %>%
filter(Trial > 0)
# 4. Filter to Movement Execution Period
exec_data <- combined_data_numbered %>%
mutate(is_step = marker_text %in% c(14, 15, 16, 17)) %>%
group_by(Subject, Block, Trial) %>% # Group by Subject, Block, and Trial for accurate step filtering
mutate(
row_in_trial = row_number(),
last_step_row = max(ifelse(is_step, row_in_trial, NA_integer_), na.rm = TRUE)
) %>%
filter(
row_in_trial >= 1,
is.finite(last_step_row) & row_in_trial <= last_step_row + 65
) %>%
ungroup()
# 5. Center-of-Mass Calculations (RMS is calculated per trial time series)
final_df <- exec_data %>%
# 5a. Calculate Time-Series Vector Magnitudes
mutate(
COM_pos = sqrt(co_m_pos_x^2 + co_m_pos_y^2 + co_m_pos_z^2),
COM_vel = sqrt(co_m_vel_x^2 + co_m_vel_y^2 + co_m_vel_z^2),
COM_acc = sqrt(co_m_acc_x^2 + co_m_acc_y^2 + co_m_acc_z^2)
) %>%
group_by(Subject, Block, Trial) %>%
mutate(
# 5b. Calculate Trial-Level RMS Summary (RMS values are repeated for all rows in the trial)
RMS_acc = sqrt(mean(COM_acc^2, na.rm = TRUE)),
RMS_vel = sqrt(mean(COM_vel^2, na.rm = TRUE)),
RMS_pos = sqrt(mean(COM_pos^2, na.rm = TRUE)),
.groups = 'drop'
) %>%
# Select final columns, including the new magnitude columns
select(
Subject, Block, Trial, row_in_trial, time_code = "frame", Marker = marker_text,
co_m_acc_x, co_m_acc_y, co_m_acc_z, # Keep component columns for Lyapunov
COM_pos, COM_vel, COM_acc,
RMS_acc, RMS_vel, RMS_pos # Keep the trial-level RMS columns
)
# 6. Write to Files
# File 1: Time Series Data (Movement Execution Period)
out_file_ts <- file.path(xsens_exec_dir, sprintf("ID%s_execution_timeseries.xlsx", pid))
write.xlsx(final_df, out_file_ts)
message(paste("Successfully wrote time series data to:", out_file_ts))
message(sprintf("Successfully processed and saved all files for Subject %s.", pid))
return(final_df) # Return the processed data frame
}
# Execution
processed_data <- process_participant_exec(PARTICIPANT_ID)
Xsens and E-Prime Merge
message(paste("Starting merge for Participant ID:", PARTICIPANT_ID))
# 1. Grab the execution-period time-series file for the current participant
xsens_processed_dir <- file.path(base_dir, "Movement Execution Period", paste0("ID", PARTICIPANT_ID))
execution_list <- list.files(
path = xsens_processed_dir, # Look in the specific ID directory
pattern = sprintf("^ID%s_execution_timeseries\\.xlsx$", PARTICIPANT_ID),
full.names = TRUE
)
if (length(execution_list) == 0) {
stop(sprintf("Execution time-series file for ID %s not found in %s.", PARTICIPANT_ID, xsens_processed_dir))
}
# Read and format the Xsens time-series data
execution_df <- read_excel(execution_list[1]) %>%
mutate(
# Convert merge keys to factors for consistency
Subject = as.factor(Subject),
Block = factor(Block, levels = as.character(1:5)), # Assuming max 5 blocks
Trial = factor(Trial, levels = as.character(1:48)), # Assuming max 48 trials
time_code = as.character(time_code),
# Ensure all CoM columns are numeric
across(starts_with("COM_"), as.numeric),
# Ensure all RMS columns are numeric
across(starts_with("RMS_"), as.numeric)
)
# 2. Import and simplify RT data (E-Prime)
rt_data <- read_excel(eprime_clean_file) %>%
# Filter only for the current participant
filter(subject == PARTICIPANT_ID) %>%
# NOTE: Do NOT filter bad trials here. We need to merge them to flag Xsens data.
filter(!is.na(sequence)) %>%
mutate(
Subject = as.factor(subject),
Block = factor(session, levels = as.character(1:5)),
Trial = factor(trial, levels = as.character(1:48)),
Sequence = as.factor(sequence)
) %>%
# Keep one row per trial, preserving all sequence info columns (assuming 'Sequence' is in the RT data)
distinct(Subject, Block, Trial, .keep_all = TRUE) %>%
# Select the required merge keys PLUS the Sequence column
select(Subject, Block, Trial, Sequence, trial.RTS)
# 3. Merge Xsens time-series with RT sequence data
# This merges all the Xsens time points with the single row of sequence data for that trial.
# This results in the final, merged sequence data needed for Lyapunov analysis.
execution_merged <- execution_df %>%
left_join(rt_data, by = c("Subject", "Block", "Trial")) %>%
filter(!is.na(Sequence)) %>%
# Select final columns, including the time series data and the trial-level RMS/RT/Sequence data
# The analysis script (load_data_fix.R) relies on these being present:
select(
Subject, Block, Trial, Sequence, row_in_trial, time_code, Marker,
co_m_acc_x, co_m_acc_y, co_m_acc_z, COM_acc,
RMS_acc, RMS_vel, RMS_pos, trial.RTS
)
# 4. Write out the merged file
out_file_merged <- file.path(xsens_processed_dir, sprintf("ID%s_execution_merged.xlsx", PARTICIPANT_ID))
write.xlsx(execution_merged, out_file_merged)
message(paste("Successfully merged Xsens time-series with RT data."))
message(paste("File saved to:", out_file_merged))