PreProcess2

library(tidyverse)

Warning: package 'tidyr' was built under R version 4.4.1

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.1     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(broom.mixed)

Warning: package 'broom.mixed' was built under R version 4.4.1

library(devtools)

Loading required package: usethis

library(brms)

Loading required package: Rcpp
Loading 'brms' package (version 2.21.0). Useful instructions
can be found by typing help('brms'). A more detailed introduction
to the package is available through vignette('brms_overview').

Attaching package: 'brms'

The following object is masked from 'package:stats':

    ar

library(mascutils)


Attaching package: 'mascutils'

The following object is masked from 'package:tidyr':

    expand_grid

The following object is masked from 'package:base':

    mode

library(bayr)

Registered S3 methods overwritten by 'bayr':
  method             from     
  coef.brmsfit       brms     
  knit_print.tbl_obs mascutils
  predict.brmsfit    brms     
  print.tbl_obs      mascutils

Attaching package: 'bayr'

The following objects are masked from 'package:mascutils':

    as_tbl_obs, discard_all_na, discard_redundant, expand_grid,
    go_arrange, go_first, left_union, reorder_levels, rescale_centered,
    rescale_unit, rescale_zero_one, update_by, z_score, z_trans

The following objects are masked from 'package:brms':

    fixef, ranef

The following object is masked from 'package:tidyr':

    expand_grid

library(readxl)

Warning: package 'readxl' was built under R version 4.4.1

library(ggplot2)
library(effects)

Loading required package: carData

Warning: package 'carData' was built under R version 4.4.1

lattice theme set by effectsTheme()
See ?effectsTheme for details.

library(lme4)

Warning: package 'lme4' was built under R version 4.4.1

Loading required package: Matrix

Warning: package 'Matrix' was built under R version 4.4.1


Attaching package: 'Matrix'

The following objects are masked from 'package:tidyr':

    expand, pack, unpack


Attaching package: 'lme4'

The following objects are masked from 'package:bayr':

    fixef, ranef

The following object is masked from 'package:brms':

    ngrps

library(haven)
library(lattice)

Warning: package 'lattice' was built under R version 4.4.1

library(car)

Warning: package 'car' was built under R version 4.4.1


Attaching package: 'car'

The following object is masked from 'package:mascutils':

    logit

The following object is masked from 'package:dplyr':

    recode

The following object is masked from 'package:purrr':

    some

library(knitr)

Warning: package 'knitr' was built under R version 4.4.1

library(reshape2)


Attaching package: 'reshape2'

The following object is masked from 'package:tidyr':

    smiths

library(dplyr)
library(forcats)
library(DHARMa)

Warning: package 'DHARMa' was built under R version 4.4.1

This is DHARMa 0.4.7. For overview type '?DHARMa'. For recent changes, type news(package = 'DHARMa')

library(Hmisc)

Warning: package 'Hmisc' was built under R version 4.4.1


Attaching package: 'Hmisc'

The following objects are masked from 'package:dplyr':

    src, summarize

The following objects are masked from 'package:base':

    format.pval, units

library(phia)
library(lsmeans)

Loading required package: emmeans

Warning: package 'emmeans' was built under R version 4.4.1

Welcome to emmeans.
Caution: You lose important information if you filter this package's results.
See '? untidy'

Attaching package: 'emmeans'

The following object is masked from 'package:devtools':

    test

The 'lsmeans' package is now basically a front end for 'emmeans'.
Users are encouraged to switch the rest of the way.
See help('transition') for more information, including how to
convert old 'lsmeans' objects and scripts to work with 'emmeans'.

library(emmeans)
library(multcomp)

Warning: package 'multcomp' was built under R version 4.4.1

Loading required package: mvtnorm
Loading required package: survival

Attaching package: 'survival'

The following object is masked from 'package:brms':

    kidney

Loading required package: TH.data

Warning: package 'TH.data' was built under R version 4.4.1

Loading required package: MASS

Warning: package 'MASS' was built under R version 4.4.1


Attaching package: 'MASS'

The following object is masked from 'package:dplyr':

    select


Attaching package: 'TH.data'

The following object is masked from 'package:MASS':

    geyser

library(plotly)


Attaching package: 'plotly'

The following object is masked from 'package:MASS':

    select

The following object is masked from 'package:Hmisc':

    subplot

The following object is masked from 'package:ggplot2':

    last_plot

The following object is masked from 'package:stats':

    filter

The following object is masked from 'package:graphics':

    layout

library(lmerTest)


Attaching package: 'lmerTest'

The following object is masked from 'package:lme4':

    lmer

The following object is masked from 'package:stats':

    step

library(tinytex)
library(tidyverse)
library(devtools)
library(ggthemes)
library(janitor)


Attaching package: 'janitor'

The following objects are masked from 'package:stats':

    chisq.test, fisher.test

library(magrittr)


Attaching package: 'magrittr'

The following object is masked from 'package:purrr':

    set_names

The following object is masked from 'package:tidyr':

    extract

library(tidyverse)
library(mascutils)
library(ggthemes)
library(afex)

Warning: package 'afex' was built under R version 4.4.1

************
Welcome to afex. For support visit: http://afex.singmann.science/
- Functions for ANOVAs: aov_car(), aov_ez(), and aov_4()
- Methods for calculating p-values with mixed(): 'S', 'KR', 'LRT', and 'PB'
- 'afex_aov' and 'mixed' objects can be passed to emmeans() for follow-up tests
- Get and set global package options with: afex_options()
- Set sum-to-zero contrasts globally: set_sum_contrasts()
- For example analyses see: browseVignettes("afex")
************

Attaching package: 'afex'

The following object is masked from 'package:lme4':

    lmer

library(purrr)
library(stringr)

A_B <-read.csv("/Users/can/Documents/Uni/Thesis/Data/E-Prime/all_excluded2.csv", sep = ";" )
Q_D <-read.csv("/Users/can/Documents/Uni/Thesis/Data/E-Prime/Questionaire_Data.csv", sep = ";")

##Make other dataframes

#df <- df %\>% mutate(feedback.ACC = as.numeric(feedback.ACC))

##Only include response procedre entires 

A_B_R <- A_B %>% filter(procedure == "responsprocedure")


##Create trial numbers 

df <- A_B_R %>%
  group_by(session, subject) %>%
  mutate(trial = rep(1:48, each = case_when(
    session == 1  ~ 6,
    session == 2  ~ 12,
    session == 18 ~ 18,
    TRUE ~ n() / 48  # Default: dynamically calculate N per trial
  ))[1:n()]) %>%  # Ensures correct length
  ungroup()

Warning: There were 91 warnings in `mutate()`.
The first warning was:
ℹ In argument: `trial = ...[]`.
ℹ In group 1: `session = 1` and `subject = 2`.
Caused by warning in `rep()`:
! first element used of 'each' argument
ℹ Run `dplyr::last_dplyr_warnings()` to see the 90 remaining warnings.

##Create Trial Accuracy 

df <- df %>% mutate(feedback.ACC = as.numeric(feedback.ACC), feedback.RT = as.numeric(feedback.RT))
df <- df %>%
  group_by(subject, session, trial) %>%
  mutate(trial.acc = sum(feedback.ACC, na.rm = TRUE) / n()) %>%
  ungroup()

##Make trial avg RT
df <- df %>%
  group_by(subject, session, trial) %>%
  mutate(trial.RT = sum(feedback.RT, na.rm = TRUE) / n()) %>%
  ungroup()


write.csv(df,"all_df.csv", row.names = FALSE)

all_df <-read.csv("/Users/can/Documents/Uni/Thesis/Data/E-Prime/all_df.csv" )

# Convert trial.RT from milliseconds to seconds
all_df <- all_df %>%
  mutate(trial.RTS = trial.RT / 1000)

# Aggregate data to have one row per trial
all_df <- all_df %>%
  group_by(subject, session, trial) %>%
  summarise(
    trial.RT = mean(trial.RT, na.rm = TRUE),   # Mean RT per trial
    trial.RTS = mean(trial.RTS, na.rm = TRUE), # Mean RT in seconds
    trial.acc = mean(trial.acc, na.rm = TRUE)  # Mean accuracy per trial (should remain binary)
  ) %>%
  ungroup()

`summarise()` has grouped output by 'subject', 'session'. You can override
using the `.groups` argument.

# Add flag to all_df
all_df <- all_df %>%
  mutate(bad_trial = trial.acc < 0.8)

#1. load all xsens data per ID

# List of file paths and corresponding block numbers
file_paths <- c(
  "/Users/can/Documents/Uni/Thesis/Data/Xsens/xlsx/id22-001.xlsx",
  "/Users/can/Documents/Uni/Thesis/Data/Xsens/xlsx/id22-002.xlsx",
  "/Users/can/Documents/Uni/Thesis/Data/Xsens/xlsx/id22-003.xlsx",
  "/Users/can/Documents/Uni/Thesis/Data/Xsens/xlsx/id22-004.xlsx",
  "/Users/can/Documents/Uni/Thesis/Data/Xsens/xlsx/id22-005.xlsx"
)

# Optional: extract filenames for output later
file_names <- str_extract(basename(file_paths), "id22-\\d+")

# Loop over each file and process
for (i in seq_along(file_paths)) {
  
  file_path <- file_paths[i]
  block_number <- i  # Assign Block 1–5 based on order
  
  # Read the sheets
  markers <- read_excel(file_path, sheet = "Markers")
  com <- read_excel(file_path, sheet = "Center of Mass")
  
  # Merge the datasets (left join to keep all Center of Mass rows)
  merged_data <- left_join(com, markers, by = "Frame")
  
  # Add Block column
  merged_data <- merged_data %>%
    mutate(Block = block_number)
  
  # Clean Marker Text to ensure it's numeric
  merged_data$`Marker Text` <- as.numeric(trimws(merged_data$`Marker Text`))
  
  # Find the first index where Marker Text is 28
  start_index <- which(merged_data$`Marker Text` == 28)[1]
  
  # Filter from that index onward (if 28 exists)
  if (!is.na(start_index)) {
    filtered_data <- merged_data[start_index:nrow(merged_data), ]
  } else {
    warning(paste("No '28' marker found in:", file_path))
    filtered_data <- merged_data
  }
  
  # Add time columns
  filtered_data <- filtered_data %>%
    mutate(
      ms = Frame * (1000 / 60),
      s = Frame / 60
    )
  
# Define your output folder
output_folder <- "/Users/can/Documents/Uni/Thesis/Data/Xsens/cleaned_csv"

# Save each processed file into that folder
write.csv(
  filtered_data,
  file = file.path(output_folder, paste0(file_names[i], "_processed.csv")),
  row.names = FALSE
)
  
  # Write to CSV (optional)
  write.csv(filtered_data, paste0(file_names[i], "_processed.csv"), row.names = FALSE)
}

Warning: No '28' marker found in:
/Users/can/Documents/Uni/Thesis/Data/Xsens/xlsx/id22-001.xlsx

#2. merge files, so that all blocks are in one dataset

# List of file names

file_list <- c(
  "/Users/can/Documents/Uni/Thesis/Data/Xsens/cleaned_csv/id22-001_processed.csv",
  "/Users/can/Documents/Uni/Thesis/Data/Xsens/cleaned_csv/id22-002_processed.csv",
  "/Users/can/Documents/Uni/Thesis/Data/Xsens/cleaned_csv/id22-003_processed.csv",
  "/Users/can/Documents/Uni/Thesis/Data/Xsens/cleaned_csv/id22-004_processed.csv",
  "/Users/can/Documents/Uni/Thesis/Data/Xsens/cleaned_csv/id22-005_processed.csv"
)
# Load and merge all files
id22_all <- do.call(rbind, lapply(file_list, read.csv))

#2.1 check if merged correctly, there should be around 48 times the markers “27” & “30” per block

# Make sure 'Marker Text' is numeric
id22_all$`Marker.Text` <- as.numeric(id22_all$`Marker.Text`)

# Count 27s per block
count_27_per_block <- id22_all %>%
  group_by(Block) %>%
  summarise(count_27 = sum(`Marker.Text` == 27, na.rm = TRUE))

# Count 30s per block
count_30_per_block <- id22_all %>%
  group_by(Block) %>%
  summarise(count_30 = sum(`Marker.Text` == 30, na.rm = TRUE))

# View result
print(count_27_per_block)

# A tibble: 5 × 2
  Block count_27
  <int>    <int>
1     1       48
2     2       48
3     3       44
4     4       48
5     5       47

print(count_30_per_block)

# A tibble: 5 × 2
  Block count_30
  <int>    <int>
1     1       48
2     2       48
3     3       44
4     4       48
5     5       47

#2.1.1 check if marker amounts are roughly right, will be further processed later

# Count each unique value in "Marker Text" per Block
marker_counts_per_block <- id22_all %>%
  group_by(Block, `Marker.Text`) %>%
  summarise(count = n(), .groups = "drop")

# View result
print(marker_counts_per_block)

# A tibble: 88 × 3
   Block Marker.Text count
   <int>       <dbl> <int>
 1     1           5   104
 2     1           6   104
 3     1           7    52
 4     1           8    52
 5     1          14    48
 6     1          15    94
 7     1          16    48
 8     1          17    98
 9     1          24     4
10     1          25     5
# ℹ 78 more rows

#3. save to further process any time

# Define your output path
output_folder <- "/Users/can/Documents/Uni/Thesis/Data/Xsens/cleaned_csv/merged"  # adjust as needed
output_file <- file.path(output_folder, "id22-all.csv")

# Save the CSV
write.csv(id22_all, output_file, row.names = FALSE)

#4. add trialnumbers and load file if necessary

id22_all <- read.csv("/Users/can/Documents/Uni/Thesis/Data/Xsens/cleaned_csv/merged/id22-all.csv")

# Ensure Marker.Text is numeric (again, just in case)
id22_all$`Marker.Text` <- as.numeric(id22_all$`Marker.Text`)

# Add trial numbers that increment at Marker.Text == 27 and reset for each block
id22_all <- id22_all %>%
  group_by(Block) %>%
  mutate(
    trial_start = ifelse(`Marker.Text` == 27, 1, NA),        # Flag where trial starts
    trial = cumsum(!is.na(trial_start)),                     # Count up at each 27
    trial = ifelse(trial == 0, NA, trial)                    # Replace 0 with NA if needed
  ) %>%
  fill(trial, .direction = "down") %>%                       # Fill trial number forward
  ungroup() %>%
  select(-trial_start)                                       # Optional: remove helper column

#5.finding out which trials are to be excluded due to missing datapoints. #first it will be looked at if the amount of data deviates for a trial within a block too much #additionally the amount of steps per trial are checked

# Step 1: Count number of rows (data points) per trial
trial_lengths <- id22_all %>%
  group_by(Block, trial) %>%
  summarise(
    n_rows = n(),
    .groups = "drop"
  )

# Step 2: Identify outlier trials based on row counts
trial_stats <- trial_lengths %>%
  group_by(Block) %>%
  mutate(
    median_rows = median(n_rows, na.rm = TRUE),
    iqr_rows = IQR(n_rows, na.rm = TRUE),
    lower_bound = median_rows - 1.5 * iqr_rows,
    upper_bound = median_rows + 1.5 * iqr_rows,
    is_row_outlier = n_rows < lower_bound | n_rows > upper_bound
  ) %>%
  ungroup()

# Step 3: Count marker 5–8 occurrences per trial
marker_counts <- id22_all %>%
  filter(`Marker.Text` %in% 5:8) %>%
  group_by(Block, trial, `Marker.Text`) %>%
  summarise(count = n(), .groups = "drop") %>%
  pivot_wider(
    names_from = `Marker.Text`,
    names_prefix = "marker_",
    values_from = count,
    values_fill = 0
  ) %>%
  mutate(marker_total = marker_5 + marker_6 + marker_7 + marker_8)

# Step 4: Merge row stats and marker counts into one summary table
trial_quality_summary <- trial_stats %>%
  left_join(marker_counts, by = c("Block", "trial"))

# Step 5: (Optional) View only suspicious trials
flagged_trials <- trial_quality_summary %>%
  filter(is_row_outlier | marker_total != 48)  # assumes 48 presses expected per trial

# Output full and flagged summaries
print(trial_quality_summary)

# A tibble: 240 × 13
   Block trial n_rows median_rows iqr_rows lower_bound upper_bound
   <int> <int>  <int>       <dbl>    <dbl>       <dbl>       <dbl>
 1     1     1   1419         899      204         593        1205
 2     1     2   1145         899      204         593        1205
 3     1     3   1054         899      204         593        1205
 4     1     4   1064         899      204         593        1205
 5     1     5   1270         899      204         593        1205
 6     1     6   1011         899      204         593        1205
 7     1     7    998         899      204         593        1205
 8     1     8   1003         899      204         593        1205
 9     1     9   1008         899      204         593        1205
10     1    10   1633         899      204         593        1205
# ℹ 230 more rows
# ℹ 6 more variables: is_row_outlier <lgl>, marker_5 <int>, marker_6 <int>,
#   marker_7 <int>, marker_8 <int>, marker_total <int>

print(flagged_trials)

# A tibble: 238 × 13
   Block trial n_rows median_rows iqr_rows lower_bound upper_bound
   <int> <int>  <int>       <dbl>    <dbl>       <dbl>       <dbl>
 1     1     1   1419         899      204         593        1205
 2     1     2   1145         899      204         593        1205
 3     1     3   1054         899      204         593        1205
 4     1     4   1064         899      204         593        1205
 5     1     5   1270         899      204         593        1205
 6     1     6   1011         899      204         593        1205
 7     1     7    998         899      204         593        1205
 8     1     8   1003         899      204         593        1205
 9     1     9   1008         899      204         593        1205
10     1    10   1633         899      204         593        1205
# ℹ 228 more rows
# ℹ 6 more variables: is_row_outlier <lgl>, marker_5 <int>, marker_6 <int>,
#   marker_7 <int>, marker_8 <int>, marker_total <int>

#6. important to adjust step 3

# Step 1: Convert RTs, keep accuracy as proportion
all_df <- all_df %>%
  mutate(trial.RTS = trial.RT / 1000)

# Step 2: Aggregate to one row per trial with true accuracy
all_df <- all_df %>%
  group_by(subject, session, trial) %>%
  summarise(
    trial.RT = mean(trial.RT, na.rm = TRUE),
    trial.RTS = mean(trial.RTS, na.rm = TRUE),
    trial.acc = mean(trial.acc, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  mutate(
    bad_trial = trial.acc < 0.8  # flag if accuracy < 80%
  )

# Step 3: Add subject ID to motion data (if not already present)
id22_all$subject <- 22  # Replace '2' with actual participant ID if different

# Step 4: Join trial-level info into id22_all
id22_all <- id22_all %>%
  left_join(
    all_df %>%
      select(subject, session, trial, trial.acc, trial.RT, trial.RTS, bad_trial),
    by = c("subject", "Block" = "session", "trial")
  )

# Step 5: Add subject info to trial_quality_summary (if needed)
trial_quality_summary <- trial_quality_summary %>%
  left_join(
    id22_all %>%
      select(Block, trial, subject) %>%
      distinct(),
    by = c("Block", "trial")
  )

# Step 6: Join full trial metrics into trial_quality_summary
trial_quality_summary <- trial_quality_summary %>%
  left_join(
    all_df %>%
      select(subject, session, trial, trial.acc, trial.RT, trial.RTS, bad_trial),
    by = c("subject", "Block" = "session", "trial")
  )

# ✅ Done! trial_quality_summary and id22_all now include:
# - trial.acc (as a proportion)
# - trial.RT, trial.RTS
# - bad_trial (TRUE if accuracy < 0.8)

# Optional preview:
head(trial_quality_summary)

# A tibble: 6 × 18
  Block trial n_rows median_rows iqr_rows lower_bound upper_bound is_row_outlier
  <int> <int>  <int>       <dbl>    <dbl>       <dbl>       <dbl> <lgl>         
1     1     1   1419         899      204         593        1205 TRUE          
2     1     2   1145         899      204         593        1205 FALSE         
3     1     3   1054         899      204         593        1205 FALSE         
4     1     4   1064         899      204         593        1205 FALSE         
5     1     5   1270         899      204         593        1205 TRUE          
6     1     6   1011         899      204         593        1205 FALSE         
# ℹ 10 more variables: marker_5 <int>, marker_6 <int>, marker_7 <int>,
#   marker_8 <int>, marker_total <int>, subject <dbl>, trial.acc <dbl>,
#   trial.RT <dbl>, trial.RTS <dbl>, bad_trial <lgl>

head(id22_all)

# A tibble: 6 × 20
  Frame CoM.pos.x CoM.pos.y CoM.pos.z CoM.vel.x CoM.vel.y  CoM.vel.z CoM.acc.x
  <dbl>     <dbl>     <dbl>     <dbl>     <dbl>     <dbl>      <dbl>     <dbl>
1     0    -0.779     0.580     0.966 -0.00229   -0.00205 -0.00252     0.00214
2     1    -0.779     0.580     0.966  0.000161  -0.00304 -0.00206     0.0838 
3     2    -0.779     0.580     0.966  0.00236   -0.00394 -0.00161     0.106  
4     3    -0.779     0.579     0.966  0.00421   -0.00462 -0.00112     0.119  
5     4    -0.779     0.579     0.966  0.00529   -0.00471 -0.000598    0.110  
6     5    -0.779     0.579     0.966  0.00600   -0.00469 -0.0000529   0.111  
# ℹ 12 more variables: CoM.acc.y <dbl>, CoM.acc.z <dbl>, Marker.Text <dbl>,
#   Block <int>, ms <dbl>, s <dbl>, trial <int>, subject <dbl>,
#   trial.acc <dbl>, trial.RT <dbl>, trial.RTS <dbl>, bad_trial <lgl>

# Step 1: Prepare trial metrics (accuracy, RTs, and bad_trial flag for accuracy < 0.8)
trial_metrics <- all_df %>%
  select(subject, session, trial, trial.acc, trial.RT, trial.RTS) %>%
  mutate(bad_trial = trial.acc < 0.8)



# Step 3: Add subject info to trial_quality_summary (if not already present)
trial_subject_info <- id22_all %>%
  select(Block, trial, subject) %>%
  distinct()

trial_quality_summary <- trial_quality_summary %>%
  select(-matches("subject\\.?[xy]?$")) %>%
  left_join(trial_subject_info, by = c("Block", "trial"))

# Step 4: Merge trial_metrics into trial_quality_summary
trial_quality_summary <- trial_quality_summary %>%
  left_join(trial_metrics, by = c("subject", "Block" = "session", "trial"))

# ✅ Done — trial_quality_summary now includes trial.acc, RTs, and bad_trial (based on < 0.8)
head(trial_quality_summary)

# A tibble: 6 × 22
  Block trial n_rows median_rows iqr_rows lower_bound upper_bound is_row_outlier
  <int> <int>  <int>       <dbl>    <dbl>       <dbl>       <dbl> <lgl>         
1     1     1   1419         899      204         593        1205 TRUE          
2     1     2   1145         899      204         593        1205 FALSE         
3     1     3   1054         899      204         593        1205 FALSE         
4     1     4   1064         899      204         593        1205 FALSE         
5     1     5   1270         899      204         593        1205 TRUE          
6     1     6   1011         899      204         593        1205 FALSE         
# ℹ 14 more variables: marker_5 <int>, marker_6 <int>, marker_7 <int>,
#   marker_8 <int>, marker_total <int>, trial.acc.x <dbl>, trial.RT.x <dbl>,
#   trial.RTS.x <dbl>, bad_trial.x <lgl>, subject <dbl>, trial.acc.y <dbl>,
#   trial.RT.y <dbl>, trial.RTS.y <dbl>, bad_trial.y <lgl>

# Step 1: Renumber trials within blocks in all_df and keep accuracy & timing
trial_metrics <- all_df %>%
  group_by(subject, session) %>%
  mutate(trial = row_number()) %>%
  ungroup() %>%
  select(subject, session, trial, trial.acc, trial.RT, trial.RTS) %>%
  mutate(bad_trial = trial.acc == 0)

# Step 2: Add subject info to trial_quality_summary (if not already present)
trial_subject_info <- id22_all %>%
  select(Block, trial, subject) %>%
  distinct()

trial_quality_summary <- trial_quality_summary %>%
  select(-matches("subject\\.?[xy]?$")) %>%
  left_join(trial_subject_info, by = c("Block", "trial"))

# Step 3: Join accuracy + RT info from all_df
trial_quality_summary <- trial_quality_summary %>%
  left_join(trial_metrics, by = c("subject", "Block" = "session", "trial"))



# ✅ Done
head(trial_quality_summary)

# A tibble: 6 × 26
  Block trial n_rows median_rows iqr_rows lower_bound upper_bound is_row_outlier
  <int> <int>  <int>       <dbl>    <dbl>       <dbl>       <dbl> <lgl>         
1     1     1   1419         899      204         593        1205 TRUE          
2     1     2   1145         899      204         593        1205 FALSE         
3     1     3   1054         899      204         593        1205 FALSE         
4     1     4   1064         899      204         593        1205 FALSE         
5     1     5   1270         899      204         593        1205 TRUE          
6     1     6   1011         899      204         593        1205 FALSE         
# ℹ 18 more variables: marker_5 <int>, marker_6 <int>, marker_7 <int>,
#   marker_8 <int>, marker_total <int>, trial.acc.x <dbl>, trial.RT.x <dbl>,
#   trial.RTS.x <dbl>, bad_trial.x <lgl>, trial.acc.y <dbl>, trial.RT.y <dbl>,
#   trial.RTS.y <dbl>, bad_trial.y <lgl>, subject <dbl>, trial.acc <dbl>,
#   trial.RT <dbl>, trial.RTS <dbl>, bad_trial <lgl>

wrong_trials_summary <- trial_quality_summary %>%
  filter(bad_trial.x == TRUE) %>%
  group_by(subject, Block) %>%
  summarise(
    bad_trials = paste(sort(trial), collapse = ", "),
    n_bad_trials = n(),
    .groups = "drop"
  )

# View the result
print(wrong_trials_summary)

# A tibble: 5 × 4
  subject Block bad_trials                                          n_bad_trials
    <dbl> <int> <chr>                                                      <int>
1      22     1 1, 2, 4, 12, 41                                                5
2      22     2 1, 2, 3, 5, 11, 14, 15, 16, 17, 18, 19, 20, 21, 22…           25
3      22     3 1, 2, 3, 4, 5, 6, 7, 10, 12, 13, 14, 15, 16, 18, 1…           36
4      22     4 2, 12, 17, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28,…           16
5      22     5 1, 2, 3, 4, 6, 9, 10, 11, 14, 28, 47                          11

# Step 1: Get trial starts (marker 27)
trial_starts <- id22_all %>%
  filter(Marker.Text == 27) %>%
  select(subject, Block, trial, ms) %>%
  rename(start_trial = trial, start_ms = ms)

# Step 2: Assign 1100ms before each trial start to that trial (even if it was previously NA or deleted)
# Add a unique row ID for tracking
id22_all <- id22_all %>%
  mutate(row_id = row_number())

# Get all rows that fall within 1100ms before any trial start
pre_trial_buffer <- id22_all %>%
  filter(!is.na(ms)) %>%
  inner_join(trial_starts, by = c("subject", "Block")) %>%
  filter(ms >= (start_ms - 1600) & ms < start_ms) %>%
  select(row_id, reassigned_trial = start_trial)

Warning in inner_join(., trial_starts, by = c("subject", "Block")): Detected an unexpected many-to-many relationship between `x` and `y`.
ℹ Row 1 of `x` matches multiple rows in `y`.
ℹ Row 1 of `y` matches multiple rows in `x`.
ℹ If a many-to-many relationship is expected, set `relationship =
  "many-to-many"` to silence this warning.

# Step 3: Apply the reassigned trial numbers to the original data
id22_all <- id22_all %>%
  left_join(pre_trial_buffer, by = "row_id") %>%
  mutate(
    trial = ifelse(!is.na(reassigned_trial), reassigned_trial, trial)
  ) %>%
  select(-row_id, -reassigned_trial)

#insert problematic trials and delete if wanted

# Step 4: Define trials to remove
trials_to_remove_list <- list(
  "1" = c(1, 2, 4, 12, 41,10, 15, 26, 45, 48),
  "2" = c(1, 2, 3, 5, 11, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 28, 29, 30, 31, 32, 34, 40, 48,13, 41, 47,48),
  "3" = c(1, 2, 3, 4, 5, 6, 7, 10, 12, 13, 14, 15, 16, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42,44,45,46,47,48),
  "4" = c(6,21,48),
  "5" = c(31,47,48)
)

trials_to_remove_df <- bind_rows(
  lapply(names(trials_to_remove_list), function(block) {
    data.frame(Block = as.integer(block), trial = trials_to_remove_list[[block]])
  })
)

# Step 5: Filter out unwanted trials (excluding reassigned pre-trial buffers)
id22_mixed <- id22_all %>%
  left_join(trials_to_remove_df %>% mutate(to_remove = TRUE), by = c("Block", "trial")) %>%
  filter(is.na(to_remove)) %>%
  select(-to_remove)

Warning in left_join(., trials_to_remove_df %>% mutate(to_remove = TRUE), : Detected an unexpected many-to-many relationship between `x` and `y`.
ℹ Row 148484 of `x` matches multiple rows in `y`.
ℹ Row 1 of `y` matches multiple rows in `x`.
ℹ If a many-to-many relationship is expected, set `relationship =
  "many-to-many"` to silence this warning.

#proceed with this if trials were deleted

# Define which marker values count as steps
step_markers <- c(14, 15, 16, 17)

# Add step_number column
id22_mixed <- id22_mixed %>%
  left_join(
    trial_quality_summary %>%
      select(subject, Block, trial, trial.RT, trial.RTS),
    by = c("subject", "Block", "trial")
  )

#cleaned

id22_mixed <- id22_mixed %>%
  select(-matches("^trial\\.[xy]$"), -bad_trial)

#cleaned

# Define output folder
output_folder <- "/Users/can/Documents/Uni/Thesis/Data/Xsens/cleaned_csv/merged/Cleaned"

# Define file name and full path
output_file <- file.path(output_folder, "id22_mixed.csv")

# Write the file
write.csv(id22_mixed, output_file, row.names = FALSE)