── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
#1. load all xsens data per ID
# List of file paths and corresponding block numbersfile_paths <-c("/Users/can/Documents/Uni/Thesis/Data/Xsens/xlsx/id22-001.xlsx","/Users/can/Documents/Uni/Thesis/Data/Xsens/xlsx/id22-002.xlsx","/Users/can/Documents/Uni/Thesis/Data/Xsens/xlsx/id22-003.xlsx","/Users/can/Documents/Uni/Thesis/Data/Xsens/xlsx/id22-004.xlsx","/Users/can/Documents/Uni/Thesis/Data/Xsens/xlsx/id22-005.xlsx")# Optional: extract filenames for output laterfile_names <-str_extract(basename(file_paths), "id22-\\d+")# Loop over each file and processfor (i inseq_along(file_paths)) { file_path <- file_paths[i] block_number <- i # Assign Block 1–5 based on order# Read the sheets markers <-read_excel(file_path, sheet ="Markers") com <-read_excel(file_path, sheet ="Center of Mass")# Merge the datasets (left join to keep all Center of Mass rows) merged_data <-left_join(com, markers, by ="Frame")# Add Block column merged_data <- merged_data %>%mutate(Block = block_number)# Clean Marker Text to ensure it's numeric merged_data$`Marker Text`<-as.numeric(trimws(merged_data$`Marker Text`))# Find the first index where Marker Text is 28 start_index <-which(merged_data$`Marker Text`==28)[1]# Filter from that index onward (if 28 exists)if (!is.na(start_index)) { filtered_data <- merged_data[start_index:nrow(merged_data), ] } else {warning(paste("No '28' marker found in:", file_path)) filtered_data <- merged_data }# Add time columns filtered_data <- filtered_data %>%mutate(ms = Frame * (1000/60),s = Frame /60 )# Define your output folderoutput_folder <-"/Users/can/Documents/Uni/Thesis/Data/Xsens/cleaned_csv"# Save each processed file into that folderwrite.csv( filtered_data,file =file.path(output_folder, paste0(file_names[i], "_processed.csv")),row.names =FALSE)# Write to CSV (optional)write.csv(filtered_data, paste0(file_names[i], "_processed.csv"), row.names =FALSE)}
Warning: No '28' marker found in:
/Users/can/Documents/Uni/Thesis/Data/Xsens/xlsx/id22-001.xlsx
#2. merge files, so that all blocks are in one dataset
# List of file namesfile_list <-c("/Users/can/Documents/Uni/Thesis/Data/Xsens/cleaned_csv/id22-001_processed.csv","/Users/can/Documents/Uni/Thesis/Data/Xsens/cleaned_csv/id22-002_processed.csv","/Users/can/Documents/Uni/Thesis/Data/Xsens/cleaned_csv/id22-003_processed.csv","/Users/can/Documents/Uni/Thesis/Data/Xsens/cleaned_csv/id22-004_processed.csv","/Users/can/Documents/Uni/Thesis/Data/Xsens/cleaned_csv/id22-005_processed.csv")# Load and merge all filesid22_all <-do.call(rbind, lapply(file_list, read.csv))
#2.1 check if merged correctly, there should be around 48 times the markers “27” & “30” per block
# Make sure 'Marker Text' is numericid22_all$`Marker.Text`<-as.numeric(id22_all$`Marker.Text`)# Count 27s per blockcount_27_per_block <- id22_all %>%group_by(Block) %>%summarise(count_27 =sum(`Marker.Text`==27, na.rm =TRUE))# Count 30s per blockcount_30_per_block <- id22_all %>%group_by(Block) %>%summarise(count_30 =sum(`Marker.Text`==30, na.rm =TRUE))# View resultprint(count_27_per_block)
#2.1.1 check if marker amounts are roughly right, will be further processed later
# Count each unique value in "Marker Text" per Blockmarker_counts_per_block <- id22_all %>%group_by(Block, `Marker.Text`) %>%summarise(count =n(), .groups ="drop")# View resultprint(marker_counts_per_block)
# Define your output pathoutput_folder <-"/Users/can/Documents/Uni/Thesis/Data/Xsens/cleaned_csv/merged"# adjust as neededoutput_file <-file.path(output_folder, "id22-all.csv")# Save the CSVwrite.csv(id22_all, output_file, row.names =FALSE)
#4. add trialnumbers and load file if necessary
id22_all <-read.csv("/Users/can/Documents/Uni/Thesis/Data/Xsens/cleaned_csv/merged/id22-all.csv")# Ensure Marker.Text is numeric (again, just in case)id22_all$`Marker.Text`<-as.numeric(id22_all$`Marker.Text`)# Add trial numbers that increment at Marker.Text == 27 and reset for each blockid22_all <- id22_all %>%group_by(Block) %>%mutate(trial_start =ifelse(`Marker.Text`==27, 1, NA), # Flag where trial startstrial =cumsum(!is.na(trial_start)), # Count up at each 27trial =ifelse(trial ==0, NA, trial) # Replace 0 with NA if needed ) %>%fill(trial, .direction ="down") %>%# Fill trial number forwardungroup() %>%select(-trial_start) # Optional: remove helper column
#5.finding out which trials are to be excluded due to missing datapoints. #first it will be looked at if the amount of data deviates for a trial within a block too much #additionally the amount of steps per trial are checked
# Step 1: Count number of rows (data points) per trialtrial_lengths <- id22_all %>%group_by(Block, trial) %>%summarise(n_rows =n(),.groups ="drop" )# Step 2: Identify outlier trials based on row countstrial_stats <- trial_lengths %>%group_by(Block) %>%mutate(median_rows =median(n_rows, na.rm =TRUE),iqr_rows =IQR(n_rows, na.rm =TRUE),lower_bound = median_rows -1.5* iqr_rows,upper_bound = median_rows +1.5* iqr_rows,is_row_outlier = n_rows < lower_bound | n_rows > upper_bound ) %>%ungroup()# Step 3: Count marker 5–8 occurrences per trialmarker_counts <- id22_all %>%filter(`Marker.Text`%in%5:8) %>%group_by(Block, trial, `Marker.Text`) %>%summarise(count =n(), .groups ="drop") %>%pivot_wider(names_from =`Marker.Text`,names_prefix ="marker_",values_from = count,values_fill =0 ) %>%mutate(marker_total = marker_5 + marker_6 + marker_7 + marker_8)# Step 4: Merge row stats and marker counts into one summary tabletrial_quality_summary <- trial_stats %>%left_join(marker_counts, by =c("Block", "trial"))# Step 5: (Optional) View only suspicious trialsflagged_trials <- trial_quality_summary %>%filter(is_row_outlier | marker_total !=48) # assumes 48 presses expected per trial# Output full and flagged summariesprint(trial_quality_summary)
# Step 2: Create table of bad trialsbad_trials <- df_lc %>%filter(trial.acc ==0) %>%select(subject, session, trial) %>%mutate(bad_trial =TRUE)# Step 3: Add subject column to id22_all if needed (assumes 1 subject here, e.g., subject 3)# Adjust this line as needed if you're processing per participantid22_all$subject <-22# Replace with actual subject number if known# Step 4: Join to mark bad trialsid22_all <- id22_all %>%left_join(bad_trials, by =c("subject", "Block"="session", "trial")) %>%mutate(bad_trial =ifelse(is.na(bad_trial), FALSE, bad_trial))# View a few rowshead(id22_all)
# Step 1: Get trial starts (marker 27)trial_starts <- id22_all %>%filter(Marker.Text ==27) %>%select(subject, Block, trial, ms) %>%rename(start_trial = trial, start_ms = ms)# Step 2: Assign 1100ms before each trial start to that trial (even if it was previously NA or deleted)# Add a unique row ID for trackingid22_all <- id22_all %>%mutate(row_id =row_number())# Get all rows that fall within 1100ms before any trial startpre_trial_buffer <- id22_all %>%filter(!is.na(ms)) %>%inner_join(trial_starts, by =c("subject", "Block")) %>%filter(ms >= (start_ms -1100) & ms < start_ms) %>%select(row_id, reassigned_trial = start_trial)
Warning in inner_join(., trial_starts, by = c("subject", "Block")): Detected an unexpected many-to-many relationship between `x` and `y`.
ℹ Row 1 of `x` matches multiple rows in `y`.
ℹ Row 1 of `y` matches multiple rows in `x`.
ℹ If a many-to-many relationship is expected, set `relationship =
"many-to-many"` to silence this warning.
Warning in left_join(., trials_to_remove_df %>% mutate(to_remove = TRUE), : Detected an unexpected many-to-many relationship between `x` and `y`.
ℹ Row 148514 of `x` matches multiple rows in `y`.
ℹ Row 1 of `y` matches multiple rows in `x`.
ℹ If a many-to-many relationship is expected, set `relationship =
"many-to-many"` to silence this warning.
# Define output folderoutput_folder <-"/Users/can/Documents/Uni/Thesis/Data/Xsens/cleaned_csv/merged/Cleaned"# Define file name and full pathoutput_file <-file.path(output_folder, "id22_clean.csv")# Write the filewrite.csv(id22_clean, output_file, row.names =FALSE)