Group CoM

library(tidyverse)
library(tidyr)
library(dplyr)
library(readr)
library(purrr)
library(ggplot2)
library(e1071)
library(emmeans)
library(lme4)
library(lmerTest)
library(patchwork)
# Define the path to your folder containing the CSV files
data_folder <- "/Users/can/Documents/Uni/Thesis/Data/Xsens/cleaned_csv/merged/Cleaned"  # <-- Replace with your actual path

# Get a list of all *_clean.csv files
file_list <- list.files(data_folder, pattern = "_clean\\.csv$", full.names = TRUE)

# Read and combine all data files into one dataframe
all_data <- file_list %>%
  map_dfr(read_csv)
# Filter for Blocks 1–3
block_data <- all_data %>%
  filter(Block %in% c(1, 2, 3))

# Function to tag each row with a phase
tag_trial_phases <- function(df) {
  df %>%
    group_by(subject, Block, trial) %>%
    mutate(
      start_ms = ms[which(Marker.Text == 27)[1]],
      end_ms = ms[which(Marker.Text == 26)[1]],
      phase = case_when(
        !is.na(start_ms) & !is.na(end_ms) & ms >= start_ms & ms <= end_ms ~ "Execution",
        !is.na(start_ms) & ms >= (start_ms - 1000) & ms < start_ms ~ "Preparation",
        TRUE ~ NA_character_
      )
    ) %>%
    ungroup() %>%
    filter(!is.na(phase))  # keep only phase-tagged rows
}
tagged_data <- tag_trial_phases(block_data)

rms_data <- tagged_data %>%
  group_by(subject, Block, trial, phase) %>%
  summarise(
    rms_x = sqrt(mean(CoM.acc.x^2, na.rm = TRUE)),
    rms_y = sqrt(mean(CoM.acc.y^2, na.rm = TRUE)),
    rms_z = sqrt(mean(CoM.acc.z^2, na.rm = TRUE)),
    .groups = "drop"
  )
# Recalculate trial numbers (if not sequentially clean)
rms_data <- rms_data %>%
  group_by(subject, Block, phase) %>%
  mutate(TrialInBlock = row_number()) %>%
  ungroup()

# Group-level mean and CI
group_rms_summary <- rms_data %>%
  group_by(Block, TrialInBlock, phase) %>%
  summarise(
    mean_rms_x = mean(rms_x, na.rm = TRUE),
    se_rms_x = sd(rms_x, na.rm = TRUE) / sqrt(n()),
    mean_rms_y = mean(rms_y, na.rm = TRUE),
    se_rms_y = sd(rms_y, na.rm = TRUE) / sqrt(n()),
    mean_rms_z = mean(rms_z, na.rm = TRUE),
    se_rms_z = sd(rms_z, na.rm = TRUE) / sqrt(n()),
    .groups = "drop"
  )
ggplot(group_rms_summary, aes(x = TrialInBlock, y = mean_rms_x, color = as.factor(Block))) +
  geom_line(size = 1) +
  geom_ribbon(aes(ymin = mean_rms_x - se_rms_x, ymax = mean_rms_x + se_rms_x, fill = as.factor(Block)), alpha = 0.2, color = NA) +
  facet_wrap(~ phase, nrow = 1) +
  ylim(0, 2) +
  labs(
    title = "Mean RMS of CoM Acceleration - X Axis",
    x = "Trial",
    y = "RMS Acceleration",
    color = "Block",
    fill = "Block"
  ) +
  theme_minimal() +
  theme(text = element_text(size = 12))
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.

ggplot(group_rms_summary, aes(x = TrialInBlock, y = mean_rms_y, color = as.factor(Block))) +
  geom_line(size = 1) +
  geom_ribbon(aes(ymin = mean_rms_y - se_rms_y, ymax = mean_rms_y + se_rms_y, fill = as.factor(Block)), alpha = 0.2, color = NA) +
  facet_wrap(~ phase, nrow = 1) +
  ylim(0, 2) +
  labs(
    title = "Mean RMS of CoM Acceleration - Y Axis",
    x = "Trial",
    y = "RMS Acceleration",
    color = "Block",
    fill = "Block"
  ) +
  theme_minimal() +
  theme(text = element_text(size = 12))

ggplot(group_rms_summary, aes(x = TrialInBlock, y = mean_rms_z, color = as.factor(Block))) +
  geom_line(size = 1) +
  geom_ribbon(aes(ymin = mean_rms_z - se_rms_z, ymax = mean_rms_z + se_rms_z, fill = as.factor(Block)), alpha = 0.2, color = NA) +
  facet_wrap(~ phase, nrow = 1) +
  ylim(0, 2) +
  labs(
    title = "Mean RMS of CoM Acceleration - Z Axis",
    x = "Trial",
    y = "RMS Acceleration",
    color = "Block",
    fill = "Block"
  ) +
  theme_minimal() +
  theme(text = element_text(size = 12))

#RMS Acceleration Execution

ggplot(filter(rms_data, phase == "Execution"), aes(x = factor(Block), y = rms_x, fill = phase)) +
  geom_boxplot(alpha = 0.7, outlier.shape = NA) +
  geom_jitter(width = 0.2, alpha = 0.4, size = 0.6) +
  ylim(0, 2.5) +
  labs(
    title = "Execution Phase: CoM Acceleration RMS - X Axis",
    x = "Block",
    y = "RMS Acceleration"
  ) +
  theme_minimal() +
  theme(text = element_text(size = 12), legend.position = "none")
Warning: Removed 2 rows containing non-finite outside the scale range
(`stat_boxplot()`).
Warning: Removed 2 rows containing missing values or values outside the scale range
(`geom_point()`).

ggplot(filter(rms_data, phase == "Execution"), aes(x = factor(Block), y = rms_y, fill = phase)) +
  geom_boxplot(alpha = 0.7, outlier.shape = NA) +
  geom_jitter(width = 0.2, alpha = 0.4, size = 0.6) +
  ylim(0, 2.5) +
  labs(
    title = "Execution Phase: CoM Acceleration RMS - Y Axis",
    x = "Block",
    y = "RMS Acceleration"
  ) +
  theme_minimal() +
  theme(text = element_text(size = 12), legend.position = "none")
Warning: Removed 9 rows containing non-finite outside the scale range
(`stat_boxplot()`).
Warning: Removed 9 rows containing missing values or values outside the scale range
(`geom_point()`).

ggplot(filter(rms_data, phase == "Execution"), aes(x = factor(Block), y = rms_z, fill = phase)) +
  geom_boxplot(alpha = 0.7, outlier.shape = NA) +
  geom_jitter(width = 0.2, alpha = 0.4, size = 0.6) +
  ylim(0, 2.5) +
  labs(
    title = "Execution Phase: CoM Acceleration RMS - Z Axis",
    x = "Block",
    y = "RMS Acceleration"
  ) +
  theme_minimal() +
  theme(text = element_text(size = 12), legend.position = "none")
Warning: Removed 113 rows containing non-finite outside the scale range
(`stat_boxplot()`).
Warning: Removed 113 rows containing missing values or values outside the scale range
(`geom_point()`).

#RMS acceleration preparation phase

# Define preparation window in milliseconds
prep_window_ms <- 1000

# Extract preparation phase directly from all_data (for Blocks 1–3 only)
prep_data <- all_data %>%
  filter(Block %in% c(1, 2, 3)) %>%
  group_split(subject, Block, trial) %>%
  map_dfr(function(trial_df) {
    exec_start_row <- which(trial_df$Marker.Text == 27)[1]
    
    # Only proceed if Marker 27 exists and there's time before it
    if (!is.na(exec_start_row) && exec_start_row > 1) {
      exec_start_ms <- trial_df$ms[exec_start_row]
      
      # Extract 1000ms before execution start
      prep_df <- trial_df %>%
        filter(ms >= (exec_start_ms - prep_window_ms) & ms < exec_start_ms) %>%
        mutate(phase = "Preparation")
      
      return(prep_df)
    } else {
      return(NULL)
    }
  })

# Summarize RMS CoM acceleration per trial
prep_rms <- prep_data %>%
  group_by(subject, Block, trial, phase) %>%
  summarise(
    rms_x = sqrt(mean(CoM.acc.x^2, na.rm = TRUE)),
    rms_y = sqrt(mean(CoM.acc.y^2, na.rm = TRUE)),
    rms_z = sqrt(mean(CoM.acc.z^2, na.rm = TRUE)),
    .groups = "drop"
  )


# Boxplot for X Axis
ggplot(prep_rms, aes(x = factor(Block), y = rms_x)) +
  geom_boxplot(fill = "skyblue", alpha = 0.7, outlier.shape = NA) +
  geom_jitter(width = 0.2, alpha = 0.4, size = 0.6) +
  ylim(0, 0.5) +
  labs(
    title = "Preparation Phase: CoM RMS - X Axis",
    x = "Block",
    y = "RMS Acceleration"
  ) +
  theme_minimal()
Warning: Removed 57 rows containing non-finite outside the scale range
(`stat_boxplot()`).
Warning: Removed 57 rows containing missing values or values outside the scale range
(`geom_point()`).

# Boxplot for Y Axis
ggplot(prep_rms, aes(x = factor(Block), y = rms_y)) +
  geom_boxplot(fill = "salmon", alpha = 0.7, outlier.shape = NA) +
  geom_jitter(width = 0.2, alpha = 0.4, size = 0.6) +
  ylim(0, 0.5) +
  labs(
    title = "Preparation Phase: CoM RMS - Y Axis",
    x = "Block",
    y = "RMS Acceleration"
  ) +
  theme_minimal()
Warning: Removed 73 rows containing non-finite outside the scale range
(`stat_boxplot()`).
Warning: Removed 73 rows containing missing values or values outside the scale range
(`geom_point()`).

# Boxplot for Z Axis
ggplot(prep_rms, aes(x = factor(Block), y = rms_z)) +
  geom_boxplot(fill = "seagreen", alpha = 0.7, outlier.shape = NA) +
  geom_jitter(width = 0.2, alpha = 0.4, size = 0.6) +
  ylim(0, 0.5) +
  labs(
    title = "Preparation Phase: CoM RMS - Z Axis",
    x = "Block",
    y = "RMS Acceleration"
  ) +
  theme_minimal()
Warning: Removed 93 rows containing non-finite outside the scale range
(`stat_boxplot()`).
Warning: Removed 93 rows containing missing values or values outside the scale range
(`geom_point()`).

Linear regression over trials - Trainingblocks

#Block1 
block1_data <- rms_data %>%
  filter(Block == 1, phase == "Execution")  # or Preparation if you want

# Model: RMS ~ Trial (continuous)
model_block1_x <- lm(rms_x ~ trial, data = block1_data)
summary(model_block1_x)

Call:
lm(formula = rms_x ~ trial, data = block1_data)

Residuals:
    Min      1Q  Median      3Q     Max 
-0.5141 -0.2920 -0.1502  0.1589  1.6521 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept) 0.8300725  0.0369117  22.488   <2e-16 ***
trial       0.0007399  0.0012844   0.576    0.565    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.3972 on 601 degrees of freedom
Multiple R-squared:  0.0005519, Adjusted R-squared:  -0.001111 
F-statistic: 0.3319 on 1 and 601 DF,  p-value: 0.5648
ggplot(block1_data, aes(x = trial, y = rms_x)) +
  geom_point(alpha = 0.6) +
  geom_smooth(method = "lm", se = TRUE, color = "blue") +
  ylim(0, 2.5) +
  labs(
    title = "Trend of CoM RMS Acceleration Across Trials - Block 1 (X-axis)",
    x = "Trial", y = "RMS Acceleration (m/s²)"
  ) +
  theme_minimal()
`geom_smooth()` using formula = 'y ~ x'

ggplot(block1_data, aes(x = trial, y = rms_y)) +
  geom_point(alpha = 0.6) +
  geom_smooth(method = "lm", se = TRUE, color = "blue") +
  ylim(0, 2.5) +
  labs(
    title = "Trend of CoM RMS Acceleration Across Trials - Block 1 (Y-axis)",
    x = "Trial", y = "RMS Acceleration (m/s²)"
  ) +
  theme_minimal()
`geom_smooth()` using formula = 'y ~ x'
Warning: Removed 2 rows containing non-finite outside the scale range
(`stat_smooth()`).
Warning: Removed 2 rows containing missing values or values outside the scale range
(`geom_point()`).

ggplot(block1_data, aes(x = trial, y = rms_z)) +
  geom_point(alpha = 0.6) +
  geom_smooth(method = "lm", se = TRUE, color = "blue") +
  ylim(0, 2.5) +
  labs(
    title = "Trend of CoM RMS Acceleration Across Trials - Block 1 (Z-axis)",
    x = "Trial", y = "RMS Acceleration (m/s²)"
  ) +
  theme_minimal()
`geom_smooth()` using formula = 'y ~ x'
Warning: Removed 58 rows containing non-finite outside the scale range
(`stat_smooth()`).
Warning: Removed 58 rows containing missing values or values outside the scale range
(`geom_point()`).

#Block2
block2_data <- rms_data %>%
  filter(Block == 2, phase == "Execution")  # or Preparation if you want

# Model: RMS ~ Trial (continuous)
model_block2_x <- lm(rms_x ~ trial, data = block2_data)
summary(model_block2_x)

Call:
lm(formula = rms_x ~ trial, data = block2_data)

Residuals:
    Min      1Q  Median      3Q     Max 
-0.6407 -0.2614 -0.1076  0.1207  1.8593 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept)  1.013137   0.041411  24.465  < 2e-16 ***
trial       -0.008165   0.001441  -5.668 2.48e-08 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.4117 on 485 degrees of freedom
Multiple R-squared:  0.06213,   Adjusted R-squared:  0.0602 
F-statistic: 32.13 on 1 and 485 DF,  p-value: 2.477e-08
ggplot(block2_data, aes(x = trial, y = rms_x)) +
  geom_point(alpha = 0.6) +
  geom_smooth(method = "lm", se = TRUE, color = "blue") +
  ylim(0, 2.5) +
  labs(
    title = "Trend of CoM RMS Acceleration Across Trials - Block 2 (X-axis)",
    x = "Trial", y = "RMS Acceleration (m/s²)"
  ) +
  theme_minimal()
`geom_smooth()` using formula = 'y ~ x'
Warning: Removed 2 rows containing non-finite outside the scale range
(`stat_smooth()`).
Warning: Removed 2 rows containing missing values or values outside the scale range
(`geom_point()`).

ggplot(block2_data, aes(x = trial, y = rms_y)) +
  geom_point(alpha = 0.6) +
  geom_smooth(method = "lm", se = TRUE, color = "blue") +
  ylim(0, 2.5) +
  labs(
    title = "Trend of CoM RMS Acceleration Across Trials - Block 2 (Y-axis)",
    x = "Trial", y = "RMS Acceleration (m/s²)"
  ) +
  theme_minimal()
`geom_smooth()` using formula = 'y ~ x'
Warning: Removed 7 rows containing non-finite outside the scale range
(`stat_smooth()`).
Warning: Removed 7 rows containing missing values or values outside the scale range
(`geom_point()`).

ggplot(block2_data, aes(x = trial, y = rms_z)) +
  geom_point(alpha = 0.6) +
  geom_smooth(method = "lm", se = TRUE, color = "blue") +
  ylim(0, 2.5) +
  labs(
    title = "Trend of CoM RMS Acceleration Across Trials - Block 2 (Z-axis)",
    x = "Trial", y = "RMS Acceleration (m/s²)"
  ) +
  theme_minimal()
`geom_smooth()` using formula = 'y ~ x'
Warning: Removed 42 rows containing non-finite outside the scale range
(`stat_smooth()`).
Warning: Removed 42 rows containing missing values or values outside the scale range
(`geom_point()`).

# Block 3
block3_data <- rms_data %>%
  filter(Block == 3, phase == "Execution")  # or Preparation if you want

# Model: RMS ~ Trial (continuous)
model_block3_x <- lm(rms_x ~ trial, data = block3_data)
summary(model_block3_x)

Call:
lm(formula = rms_x ~ trial, data = block3_data)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.51321 -0.17133 -0.03764  0.08784  1.16343 

Coefficients:
             Estimate Std. Error t value Pr(>|t|)    
(Intercept)  0.915119   0.032380  28.262  < 2e-16 ***
trial       -0.010157   0.001263  -8.042 1.25e-14 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.2835 on 364 degrees of freedom
Multiple R-squared:  0.1509,    Adjusted R-squared:  0.1485 
F-statistic: 64.68 on 1 and 364 DF,  p-value: 1.254e-14
ggplot(block3_data, aes(x = trial, y = rms_x)) +
  geom_point(alpha = 0.6) +
  geom_smooth(method = "lm", se = TRUE, color = "blue") +
  ylim(0, 2.5) +
  labs(
    title = "Trend of CoM RMS Acceleration Across Trials - Block 3 (X-axis)",
    x = "Trial", y = "RMS Acceleration (m/s²)"
  ) +
  theme_minimal()
`geom_smooth()` using formula = 'y ~ x'

ggplot(block3_data, aes(x = trial, y = rms_y)) +
  geom_point(alpha = 0.6) +
  geom_smooth(method = "lm", se = TRUE, color = "blue") +
  ylim(0, 2.5) +
  labs(
    title = "Trend of CoM RMS Acceleration Across Trials - Block 3 (Y-axis)",
    x = "Trial", y = "RMS Acceleration (m/s²)"
  ) +
  theme_minimal()
`geom_smooth()` using formula = 'y ~ x'

ggplot(block3_data, aes(x = trial, y = rms_z)) +
  geom_point(alpha = 0.6) +
  geom_smooth(method = "lm", se = TRUE, color = "blue") +
  ylim(0, 2.5) +
  labs(
    title = "Trend of CoM RMS Acceleration Across Trials - Block 3 (Z-axis)",
    x = "Trial", y = "RMS Acceleration (m/s²)"
  ) +
  theme_minimal()
`geom_smooth()` using formula = 'y ~ x'
Warning: Removed 13 rows containing non-finite outside the scale range
(`stat_smooth()`).
Warning: Removed 13 rows containing missing values or values outside the scale range
(`geom_point()`).

# Block 1
block1_long <- block1_data %>%
  pivot_longer(cols = c(rms_x, rms_y, rms_z), names_to = "Axis", values_to = "RMS") %>%
  mutate(Axis = gsub("rms_", "", Axis))

p1 <- ggplot(block1_long, aes(x = trial, y = RMS)) +
  geom_point(alpha = 0.6) +
  geom_smooth(method = "lm", se = TRUE, color = "blue") +
  facet_wrap(~ Axis, nrow = 1) +
  ylim(0, 2.5) +
  labs(
    title = "Block 1",
    x = "Trial",
    y = "RMS Acceleration (m/s²)"
  ) +
  theme_minimal()

# Block 2
block2_long <- block2_data %>%
  pivot_longer(cols = c(rms_x, rms_y, rms_z), names_to = "Axis", values_to = "RMS") %>%
  mutate(Axis = gsub("rms_", "", Axis))

p2 <- ggplot(block2_long, aes(x = trial, y = RMS)) +
  geom_point(alpha = 0.6) +
  geom_smooth(method = "lm", se = TRUE, color = "green") +
  facet_wrap(~ Axis, nrow = 1) +
  ylim(0, 2.5) +
  labs(
    title = "Block 2",
    x = "Trial",
    y = "RMS Acceleration (m/s²)"
  ) +
  theme_minimal()

# Block 3
block3_long <- block3_data %>%
  pivot_longer(cols = c(rms_x, rms_y, rms_z), names_to = "Axis", values_to = "RMS") %>%
  mutate(Axis = gsub("rms_", "", Axis))

p3 <- ggplot(block3_long, aes(x = trial, y = RMS)) +
  geom_point(alpha = 0.6) +
  geom_smooth(method = "lm", se = TRUE, color = "red") +
  facet_wrap(~ Axis, nrow = 1) +
  ylim(0, 2.5) +
  labs(
    title = "Block 3",
    x = "Trial",
    y = "RMS Acceleration (m/s²)"
  ) +
  theme_minimal()
# Combine the three plots horizontally
(p1 | p2 | p3) +
  plot_annotation(title = "CoM RMS Acceleration Trends Across Trials - All Blocks",
                  theme = theme(plot.title = element_text(hjust = 0.5, size = 16)))
`geom_smooth()` using formula = 'y ~ x'
Warning: Removed 60 rows containing non-finite outside the scale range
(`stat_smooth()`).
Warning: Removed 60 rows containing missing values or values outside the scale range
(`geom_point()`).
`geom_smooth()` using formula = 'y ~ x'
Warning: Removed 51 rows containing non-finite outside the scale range
(`stat_smooth()`).
Warning: Removed 51 rows containing missing values or values outside the scale range
(`geom_point()`).
`geom_smooth()` using formula = 'y ~ x'
Warning: Removed 13 rows containing non-finite outside the scale range
(`stat_smooth()`).
Warning: Removed 13 rows containing missing values or values outside the scale range
(`geom_point()`).

# --- Filter for Preparation Phase
prep_data <- rms_data %>% filter(phase == "Preparation")

# --- Block 1
block1_prep <- prep_data %>% filter(Block == 1)

block1_long <- block1_prep %>%
  pivot_longer(cols = c(rms_x, rms_y, rms_z), names_to = "Axis", values_to = "RMS") %>%
  mutate(Axis = gsub("rms_", "", Axis))

p1 <- ggplot(block1_long, aes(x = trial, y = RMS)) +
  geom_point(alpha = 0.6) +
  geom_smooth(method = "lm", se = TRUE, color = "blue") +
  facet_wrap(~ Axis, nrow = 1) +
  ylim(0, 1) +
  labs(
    title = "Block 1",
    x = "Trial",
    y = "RMS Acceleration (m/s²)"
  ) +
  theme_minimal()


# --- Block 2
block2_prep <- prep_data %>% filter(Block == 2)

block2_long <- block2_prep %>%
  pivot_longer(cols = c(rms_x, rms_y, rms_z), names_to = "Axis", values_to = "RMS") %>%
  mutate(Axis = gsub("rms_", "", Axis))

p2 <- ggplot(block2_long, aes(x = trial, y = RMS)) +
  geom_point(alpha = 0.6) +
  geom_smooth(method = "lm", se = TRUE, color = "green") +
  facet_wrap(~ Axis, nrow = 1) +
  ylim(0, 1) +
  labs(
    title = "Block 2",
    x = "Trial",
    y = "RMS Acceleration (m/s²)"
  ) +
  theme_minimal()


# --- Block 3
block3_prep <- prep_data %>% filter(Block == 3)

block3_long <- block3_prep %>%
  pivot_longer(cols = c(rms_x, rms_y, rms_z), names_to = "Axis", values_to = "RMS") %>%
  mutate(Axis = gsub("rms_", "", Axis))

p3 <- ggplot(block3_long, aes(x = trial, y = RMS)) +
  geom_point(alpha = 0.6) +
  geom_smooth(method = "lm", se = TRUE, color = "red") +
  facet_wrap(~ Axis, nrow = 1) +
  ylim(0, 1) +
  labs(
    title = "Block 3",
    x = "Trial",
    y = "RMS Acceleration (m/s²)"
  ) +
  theme_minimal()
(p1 | p2 | p3) +
  plot_annotation(
    title = "CoM RMS Acceleration Trends Across Trials - Preparation Phase (All Blocks)",
    theme = theme(plot.title = element_text(hjust = 0.5, size = 16))
  )
`geom_smooth()` using formula = 'y ~ x'
`geom_smooth()` using formula = 'y ~ x'
Warning: Removed 38 rows containing non-finite outside the scale range
(`stat_smooth()`).
Warning: Removed 38 rows containing missing values or values outside the scale range
(`geom_point()`).
`geom_smooth()` using formula = 'y ~ x'
Warning: Removed 62 rows containing non-finite outside the scale range
(`stat_smooth()`).
Warning: Removed 62 rows containing missing values or values outside the scale range
(`geom_point()`).

# Only execution data
execution_data <- tagged_data %>%
  filter(phase == "Execution")

# Define step markers (14, 15, 16, 17)
step_markers <- c(14, 15, 16, 17)

# Extract step-wise RMS per axis
step_rms_df <- execution_data %>%
  filter(Marker.Text %in% step_markers) %>%
  group_by(subject, Block, trial, Marker.Text) %>%
  summarise(
    rms_x = sqrt(mean(CoM.acc.x^2, na.rm = TRUE)),
    rms_y = sqrt(mean(CoM.acc.y^2, na.rm = TRUE)),
    rms_z = sqrt(mean(CoM.acc.z^2, na.rm = TRUE)),
    .groups = "drop"
  ) %>%
  # reshape to long format
  tidyr::pivot_longer(cols = starts_with("rms_"), names_to = "Axis", values_to = "RMS") %>%
  mutate(
    Axis = gsub("rms_", "", Axis),
    Step = ave(Marker.Text, subject, Block, trial, FUN = seq_along)  # Step count per trial
  )


# ANOVA per axis for steps 1–6
anova_results <- step_rms_df %>%
  filter(Step <= 6) %>%
  group_by(Axis) %>%
  group_map(~ {
    cat("\nANOVA for Axis:", unique(.x$Axis), "\n")
    model <- aov(RMS ~ as.factor(Block) + Error(subject/(Block)), data = .x)
    print(summary(model))
  })
Warning: Unknown or uninitialised column: `Axis`.

ANOVA for Axis: 

Error: subject
                 Df Sum Sq Mean Sq
as.factor(Block)  1 0.0389  0.0389

Error: subject:Block
                 Df Sum Sq Mean Sq
as.factor(Block)  1  20.74   20.74

Error: Within
                   Df Sum Sq Mean Sq F value Pr(>F)
as.factor(Block)    2    1.3  0.6494   1.515   0.22
Residuals        2907 1245.7  0.4285               
Warning: Unknown or uninitialised column: `Axis`.

ANOVA for Axis: 

Error: subject
                 Df Sum Sq Mean Sq
as.factor(Block)  1  1.116   1.116

Error: subject:Block
                 Df Sum Sq Mean Sq
as.factor(Block)  1  3.294   3.294

Error: Within
                   Df Sum Sq Mean Sq F value Pr(>F)  
as.factor(Block)    2    3.1  1.5527   4.067 0.0172 *
Residuals        2907 1109.7  0.3818                 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Warning: Unknown or uninitialised column: `Axis`.

ANOVA for Axis: 

Error: subject
                 Df Sum Sq Mean Sq
as.factor(Block)  1 0.2048  0.2048

Error: subject:Block
                 Df Sum Sq Mean Sq
as.factor(Block)  1  20.78   20.78

Error: Within
                   Df Sum Sq Mean Sq F value Pr(>F)  
as.factor(Block)    2     10   5.026   3.897 0.0204 *
Residuals        2907   3749   1.290                 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
y_limits_exec <- range(step_rms_df$RMS[step_rms_df$Step <= 6], na.rm = TRUE)

step_rms_df %>%
  filter(Step <= 6) %>%
  ggplot(aes(x = as.factor(Block), y = RMS, fill = as.factor(Block))) +
  geom_boxplot(outlier.shape = NA, alpha = 0.8) +
  #geom_jitter(width = 0.2, alpha = 0.4) +
  facet_wrap(~Axis) +
  ylim(0, 3.5) +
  labs(
    title = "RMS CoM Acceleration for First 6 Steps by Block",
    x = "Block", y = "RMS Acceleration (m/s²)", fill = "Block"
  ) +
  theme_minimal()
Warning: Removed 161 rows containing non-finite outside the scale range
(`stat_boxplot()`).

rms_summary_table <- rms_data %>%
  group_by(Block, phase) %>%
  summarise(
    mean_rms_x = mean(rms_x, na.rm = TRUE),
    sd_rms_x = sd(rms_x, na.rm = TRUE),
    mean_rms_y = mean(rms_y, na.rm = TRUE),
    sd_rms_y = sd(rms_y, na.rm = TRUE),
    mean_rms_z = mean(rms_z, na.rm = TRUE),
    sd_rms_z = sd(rms_z, na.rm = TRUE),
    .groups = "drop"
  )

# View the result
print(rms_summary_table)
# A tibble: 6 × 8
  Block phase       mean_rms_x sd_rms_x mean_rms_y sd_rms_y mean_rms_z sd_rms_z
  <dbl> <chr>            <dbl>    <dbl>      <dbl>    <dbl>      <dbl>    <dbl>
1     1 Execution       0.849    0.397      0.896    0.498      1.39     0.744 
2     1 Preparation     0.0598   0.0575     0.0584   0.0590     0.0572   0.0839
3     2 Execution       0.804    0.425      0.857    0.473      1.41     0.764 
4     2 Preparation     0.112    0.227      0.121    0.235      0.158    0.430 
5     3 Execution       0.684    0.307      0.717    0.313      1.23     0.566 
6     3 Preparation     0.185    0.327      0.196    0.356      0.284    0.605 

#SD

rms_data_long <- rms_data %>%
  pivot_longer(cols = c(rms_x, rms_y, rms_z), names_to = "Axis", values_to = "RMS") %>%
  mutate(Axis = gsub("rms_", "", Axis))

# Now group by Block, Phase, Trial, and Axis
rms_summary_phase <- rms_data_long %>%
  group_by(Block, phase, trial, Axis) %>%
  summarise(
    mean_RMS = mean(RMS, na.rm = TRUE),
    sd_RMS = sd(RMS, na.rm = TRUE),
    n = n(),
    se_RMS = sd_RMS / sqrt(n),
    .groups = "drop"
  )


mean_sd_per_block_phase <- rms_summary_phase %>%
  group_by(Block, phase, Axis) %>%
  summarise(
    mean_sd = mean(sd_RMS, na.rm = TRUE),
    .groups = "drop"
  )



ggplot(mean_sd_per_block_phase, aes(x = factor(Block), y = mean_sd, fill = phase)) +
  geom_bar(stat = "identity", position = "dodge", width = 0.7) +
  facet_wrap(~ Axis, nrow = 1) +
  ylim(0, 1) +
  labs(
    title = "Mean Standard Deviation of CoM RMS - Preparation vs Execution",
    x = "Block",
    y = "Mean SD of RMS Acceleration (m/s²)",
    fill = "Phase"
  ) +
  theme_minimal() +
  theme(
    text = element_text(size = 14),
    strip.text = element_text(face = "bold"),
    legend.position = "top"
  )

sd in execution is going down while in preparation its going up

#Early preparation phase increases in variability might reflect exploratory control—the system is still learning how to best prepare for the upcoming motor act. This aligns with findings from Stergiou & Decker, who describe optimal movement variability as a balance between stability and flexibility, typically peaking mid-learning​

#Decreased execution phase variability could signal motor consolidation. The system has found efficient movement patterns and now reduces variability to exploit them—a concept echoed in dynamical systems theory and observed in Hildebrandt’s analysis of Hurst exponents as well

rms_distribution_summary <- rms_data %>%
  group_by(subject, Block, phase) %>%
  summarise(
    mean_x = mean(rms_x, na.rm = TRUE),
    sd_x   = sd(rms_x, na.rm = TRUE),
    cv_x   = sd_x / mean_x,
    skew_x = skewness(rms_x, na.rm = TRUE),
    kurt_x = kurtosis(rms_x, na.rm = TRUE),

    mean_y = mean(rms_y, na.rm = TRUE),
    sd_y   = sd(rms_y, na.rm = TRUE),
    cv_y   = sd_y / mean_y,
    skew_y = skewness(rms_y, na.rm = TRUE),
    kurt_y = kurtosis(rms_y, na.rm = TRUE),

    mean_z = mean(rms_z, na.rm = TRUE),
    sd_z   = sd(rms_z, na.rm = TRUE),
    cv_z   = sd_z / mean_z,
    skew_z = skewness(rms_z, na.rm = TRUE),
    kurt_z = kurtosis(rms_z, na.rm = TRUE),
    .groups = "drop"
  )

#CV

# CV - X Axis
ggplot(rms_distribution_summary, aes(x = factor(Block), y = cv_x, color = phase)) +
  geom_boxplot(outlier.shape = NA) +
  geom_jitter(width = 0.2, alpha = 0.4) +
  ylim(0, 3.5) +
  labs(
    title = "Coefficient of Variation (RMS X-Axis)",
    x = "Block",
    y = "CV"
  ) +
  theme_minimal() +
  theme(text = element_text(size = 12))

# CV - Y Axis
ggplot(rms_distribution_summary, aes(x = factor(Block), y = cv_y, color = phase)) +
  geom_boxplot(outlier.shape = NA) +
  geom_jitter(width = 0.2, alpha = 0.4) +
  ylim(0, 3.5) +
  labs(
    title = "Coefficient of Variation (RMS Y-Axis)",
    x = "Block",
    y = "CV"
  ) +
  theme_minimal() +
  theme(text = element_text(size = 12))

# CV - Z Axis
ggplot(rms_distribution_summary, aes(x = factor(Block), y = cv_z, color = phase)) +
  geom_boxplot(outlier.shape = NA) +
  geom_jitter(width = 0.2, alpha = 0.4) +
  ylim(0, 3.5) +
  labs(
    title = "Coefficient of Variation (RMS Z-Axis)",
    x = "Block",
    y = "CV"
  ) +
  theme_minimal() +
  theme(text = element_text(size = 12))

#Skewness = 0 → perfectly symmetric distribution #Skewness > 0 → right-skewed (more small values, few large outliers) #Skewness < 0 → left-skewed (more large values, few small outliers)

ggplot(rms_distribution_summary, aes(x = skew_x, fill = phase)) +
  geom_density(alpha = 0.6) +
  facet_wrap(~ Block, nrow = 1) +
  xlim(-2.5, 5.5) +
  labs(
    title = "Distribution of Skewness - CoM RMS X Axis",
    x = "Skewness",
    y = "Density",
    fill = "Phase"
  ) +
  theme_minimal() +
  theme(text = element_text(size = 12))

ggplot(rms_distribution_summary, aes(x = skew_y, fill = phase)) +
  geom_density(alpha = 0.6) +
  facet_wrap(~ Block, nrow = 1) +
  xlim(-2.5, 5.5) +
  labs(
    title = "Distribution of Skewness - CoM RMS Y Axis",
    x = "Skewness",
    y = "Density",
    fill = "Phase"
  ) +
  theme_minimal() +
  theme(text = element_text(size = 12))

ggplot(rms_distribution_summary, aes(x = skew_z, fill = phase)) +
  geom_density(alpha = 0.6) +
  facet_wrap(~ Block, nrow = 1) +
  xlim(-2.5, 5.5) +
  labs(
    title = "Distribution of Skewness - CoM RMS Z Axis",
    x = "Skewness",
    y = "Density",
    fill = "Phase"
  ) +
  theme_minimal() +
  theme(text = element_text(size = 12))

#anova trials over block

# List to collect results
anova_results <- list()

# Helper function to run lm and anova per axis
analyze_block <- function(data, block_number) {
  
  cat("\n\n--- ANOVA Results for Block", block_number, "---\n")
  
  for (axis in c("rms_x", "rms_y", "rms_z")) {
    
    cat("\nAxis:", axis, "\n")
    
    model <- lm(as.formula(paste(axis, "~ trial")), data = data)
    anova_result <- anova(model)
    
    print(anova_result)
    
    # Save model and anova into list
    anova_results[[paste0("Block", block_number, "_", axis)]] <<- list(model = model, anova = anova_result)
  }
}

# --- Apply to each block

# Block 1
block1_data <- rms_data %>%
  filter(Block == 1, phase == "Execution")

analyze_block(block1_data, block_number = 1)


--- ANOVA Results for Block 1 ---

Axis: rms_x 
Analysis of Variance Table

Response: rms_x
           Df Sum Sq  Mean Sq F value Pr(>F)
trial       1  0.052 0.052354  0.3319 0.5648
Residuals 601 94.814 0.157761               

Axis: rms_y 
Analysis of Variance Table

Response: rms_y
           Df  Sum Sq Mean Sq F value  Pr(>F)  
trial       1   0.708 0.70831  2.8702 0.09075 .
Residuals 601 148.317 0.24678                  
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Axis: rms_z 
Analysis of Variance Table

Response: rms_z
           Df Sum Sq Mean Sq F value Pr(>F)  
trial       1   2.65 2.65465  4.8271 0.0284 *
Residuals 601 330.52 0.54994                 
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Block 2
block2_data <- rms_data %>%
  filter(Block == 2, phase == "Execution")

analyze_block(block2_data, block_number = 2)


--- ANOVA Results for Block 2 ---

Axis: rms_x 
Analysis of Variance Table

Response: rms_x
           Df Sum Sq Mean Sq F value    Pr(>F)    
trial       1  5.445  5.4450  32.131 2.477e-08 ***
Residuals 485 82.190  0.1695                      
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Axis: rms_y 
Analysis of Variance Table

Response: rms_y
           Df  Sum Sq Mean Sq F value    Pr(>F)    
trial       1   6.078  6.0782   28.69 1.314e-07 ***
Residuals 485 102.751  0.2119                      
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Axis: rms_z 
Analysis of Variance Table

Response: rms_z
           Df  Sum Sq Mean Sq F value  Pr(>F)    
trial       1   9.253  9.2533  16.358 6.1e-05 ***
Residuals 485 274.358  0.5657                    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Block 3
block3_data <- rms_data %>%
  filter(Block == 3, phase == "Execution")

analyze_block(block3_data, block_number = 3)


--- ANOVA Results for Block 3 ---

Axis: rms_x 
Analysis of Variance Table

Response: rms_x
           Df  Sum Sq Mean Sq F value    Pr(>F)    
trial       1  5.1971  5.1971  64.675 1.254e-14 ***
Residuals 364 29.2498  0.0804                      
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Axis: rms_y 
Analysis of Variance Table

Response: rms_y
           Df  Sum Sq Mean Sq F value    Pr(>F)    
trial       1  6.0423  6.0423  73.772 2.603e-16 ***
Residuals 364 29.8137  0.0819                      
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Axis: rms_z 
Analysis of Variance Table

Response: rms_z
           Df Sum Sq Mean Sq F value    Pr(>F)    
trial       1 17.195 17.1947  62.669 2.984e-14 ***
Residuals 364 99.872  0.2744                      
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Compute mean and standard deviation per Trial × Axis × Block
rms_summary <- rms_data_long %>%
  group_by(Block, trial, Axis) %>%
  summarise(
    mean_RMS = mean(RMS, na.rm = TRUE),
    sd_RMS = sd(RMS, na.rm = TRUE),
    n = n(),
    se_RMS = sd_RMS / sqrt(n),  # Standard Error
    .groups = "drop"
  )

# Filter for X axis
rms_summary_x <- rms_summary %>% filter(Axis == "x")

ggplot(rms_summary_x, aes(x = trial, y = mean_RMS, color = factor(Block))) +
  geom_point(size = 3) +
  geom_errorbar(aes(ymin = mean_RMS - se_RMS, ymax = mean_RMS + se_RMS), width = 0.2) +
  ylim(0, 2.5) +
  labs(
    title = "CoM RMS Acceleration (X Axis) - Blocks 1, 2, 3",
    x = "Trial",
    y = "Mean RMS Acceleration (m/s²)",
    color = "Block"
  ) +
  theme_minimal() +
  theme(
    text = element_text(size = 14),
    legend.position = "top"
  )

# Filter for Y axis
rms_summary_y <- rms_summary %>% filter(Axis == "y")

ggplot(rms_summary_y, aes(x = trial, y = mean_RMS, color = factor(Block))) +
  geom_point(size = 3) +
  geom_errorbar(aes(ymin = mean_RMS - se_RMS, ymax = mean_RMS + se_RMS), width = 0.2) +
  ylim(0, 2.5) +
  labs(
    title = "CoM RMS Acceleration (Y Axis) - Blocks 1, 2, 3",
    x = "Trial",
    y = "Mean RMS Acceleration (m/s²)",
    color = "Block"
  ) +
  theme_minimal() +
  theme(
    text = element_text(size = 14),
    legend.position = "top"
  )

# Filter for Z axis
rms_summary_z <- rms_summary %>% filter(Axis == "z")

ggplot(rms_summary_z, aes(x = trial, y = mean_RMS, color = factor(Block))) +
  geom_point(size = 3) +
  geom_errorbar(aes(ymin = mean_RMS - se_RMS, ymax = mean_RMS + se_RMS), width = 0.2) +
  ylim(0, 2.5) +
  labs(
    title = "CoM RMS Acceleration (Z Axis) - Blocks 1, 2, 3",
    x = "Trial",
    y = "Mean RMS Acceleration (m/s²)",
    color = "Block"
  ) +
  theme_minimal() +
  theme(
    text = element_text(size = 14),
    legend.position = "top"
  )

2best and 2worst participants comparison

# Filter data for Subjects 7, 17, 20, 22 and reshape
compare_subjects1 <- rms_data %>%
  filter(subject %in% c(7, 17, 20, 22)) %>%
  pivot_longer(cols = c(rms_x, rms_y, rms_z), names_to = "Axis", values_to = "RMS") %>%
  mutate(
    Axis = gsub("rms_", "", Axis),
    subject = as.factor(subject)
  )


compare_summary1 <- compare_subjects1 %>%
  group_by(subject, phase, Block, Axis) %>%
  summarise(
    mean_RMS = mean(RMS, na.rm = TRUE),
    sd_RMS = sd(RMS, na.rm = TRUE),
    cv_RMS = sd_RMS / mean_RMS,
    .groups = "drop"
  )



ggplot(compare_summary1, aes(x = factor(Block), y = mean_RMS, fill = subject)) +
  geom_bar(stat = "identity", position = "dodge") +
  facet_grid(phase ~ Axis) +
  ylim(0, 1.5) +
  labs(
    title = "Mean CoM RMS Acceleration (Subjects 7, 17, 20, 22)",
    x = "Block",
    y = "Mean RMS (m/s²)",
    fill = "Subject"
  ) +
  theme_minimal()

ggplot(compare_summary1, aes(x = factor(Block), y = sd_RMS, fill = subject)) +
  geom_bar(stat = "identity", position = "dodge") +
  facet_grid(phase ~ Axis) +
  ylim(0, 0.6) +
  labs(
    title = "Standard Deviation of CoM RMS (Subjects 7, 17, 20, 22)",
    x = "Block",
    y = "SD of RMS (m/s²)",
    fill = "Subject"
  ) +
  theme_minimal()

ggplot(compare_summary1, aes(x = factor(Block), y = cv_RMS, fill = subject)) +
  geom_bar(stat = "identity", position = "dodge") +
  facet_grid(phase ~ Axis) +
  ylim(0, 1) +
  labs(
    title = "Coefficient of Variation of CoM RMS (Subjects 7, 17, 20, 22)",
    x = "Block",
    y = "CV of RMS",
    fill = "Subject"
  ) +
  theme_minimal()
Warning: Removed 30 rows containing missing values or values outside the scale range
(`geom_bar()`).

#best & worst

# Filter for the two participants
compare_subjects2 <- rms_data %>%
  filter(subject %in% c(17, 22)) %>%
  pivot_longer(cols = c(rms_x, rms_y, rms_z), names_to = "Axis", values_to = "RMS") %>%
  mutate(
    Axis = gsub("rms_", "", Axis),
    subject = as.factor(subject)
  )




compare_summary2 <- compare_subjects2 %>%
  group_by(subject, phase, Block, Axis) %>%
  summarise(
    mean_RMS = mean(RMS, na.rm = TRUE),
    sd_RMS = sd(RMS, na.rm = TRUE),
    cv_RMS = sd_RMS / mean_RMS,
    .groups = "drop"
  )


ggplot(compare_summary2, aes(x = factor(Block), y = mean_RMS, fill = subject)) +
  geom_bar(stat = "identity", position = "dodge") +
  facet_grid(phase ~ Axis) +
  ylim(0, 1) +
  labs(
    title = "Mean CoM RMS Acceleration: Subject 17 vs 22",
    x = "Block",
    y = "Mean RMS (m/s²)",
    fill = "Subject"
  ) +
  theme_minimal()

ggplot(compare_summary2, aes(x = factor(Block), y = sd_RMS, fill = subject)) +
  geom_bar(stat = "identity", position = "dodge") +
  facet_grid(phase ~ Axis) +
  ylim(0, 0.5) +
  labs(
    title = "Standard Deviation of CoM RMS: Subject 17 vs 22",
    x = "Block",
    y = "SD of RMS (m/s²)",
    fill = "Subject"
  ) +
  theme_minimal()

ggplot(compare_summary2, aes(x = factor(Block), y = cv_RMS, fill = subject)) +
  geom_bar(stat = "identity", position = "dodge") +
  facet_grid(phase ~ Axis) +
  ylim(0, 1) +
  labs(
    title = "Coefficient of Variation of CoM RMS: Subject 17 vs 22",
    x = "Block",
    y = "CV of RMS",
    fill = "Subject"
  ) +
  theme_minimal()
Warning: Removed 15 rows containing missing values or values outside the scale range
(`geom_bar()`).

the two best 17(best) & 7 (2nd best)

# Filter for the two participants
compare_subjects3 <- rms_data %>%
  filter(subject %in% c(17, 7)) %>%
  pivot_longer(cols = c(rms_x, rms_y, rms_z), names_to = "Axis", values_to = "RMS") %>%
  mutate(
    Axis = gsub("rms_", "", Axis),
    subject = as.factor(subject)
  )




compare_summary3 <- compare_subjects3 %>%
  group_by(subject, phase, Block, Axis) %>%
  summarise(
    mean_RMS = mean(RMS, na.rm = TRUE),
    sd_RMS = sd(RMS, na.rm = TRUE),
    cv_RMS = sd_RMS / mean_RMS,
    .groups = "drop"
  )


ggplot(compare_summary3, aes(x = factor(Block), y = mean_RMS, fill = subject)) +
  geom_bar(stat = "identity", position = "dodge") +
  facet_grid(phase ~ Axis) +
  ylim(0, 1.5) +
  labs(
    title = "Mean CoM RMS Acceleration: Subject 17 vs 22",
    x = "Block",
    y = "Mean RMS (m/s²)",
    fill = "Subject"
  ) +
  theme_minimal()

ggplot(compare_summary3, aes(x = factor(Block), y = sd_RMS, fill = subject)) +
  geom_bar(stat = "identity", position = "dodge") +
  facet_grid(phase ~ Axis) +
  ylim(0, 0.5) +
  labs(
    title = "Standard Deviation of CoM RMS: Subject 17 vs 22",
    x = "Block",
    y = "SD of RMS (m/s²)",
    fill = "Subject"
  ) +
  theme_minimal()
Warning: Removed 1 row containing missing values or values outside the scale range
(`geom_bar()`).

ggplot(compare_summary3, aes(x = factor(Block), y = cv_RMS, fill = subject)) +
  geom_bar(stat = "identity", position = "dodge") +
  facet_grid(phase ~ Axis) +
  ylim(0, 1) +
  labs(
    title = "Coefficient of Variation of CoM RMS: Subject 17 vs 22",
    x = "Block",
    y = "CV of RMS",
    fill = "Subject"
  ) +
  theme_minimal()
Warning: Removed 14 rows containing missing values or values outside the scale range
(`geom_bar()`).