Group CoM Analysis 2

library(tidyverse)
library(tidyr)
library(dplyr)
library(readr)
library(purrr)
library(ggplot2)
library(e1071)
library(emmeans)
library(lme4)
library(lmerTest)
library(patchwork)
library(brms)
library(bayesplot)

#Root mean square
#Root mean square (RMS) of acceleration is an often-used value in gait analysis research to quantify the magnitude of body segment accelerations(Menz et al., 2003; Mizuike et al., 2009; Sekine et al., 2013; Senden et al., 2012). RMS can be easily computed with the raw accelerometer data and is seen as an uncomplicated approach to analyse the magnitude of accelerations in each axis(Mizuike et al., 2009; Sekine et al., 2013)). Although this study does not directly analyses gait performance, the movements performed in the ds-dsp task resemble walking movements and therefore it is seen as a suitable approach for the following analysis. In the present study, RMS of the center of mass acceleration is used to evaluate the movement characteristics across task phases and sequence lengths, providing insights into movement control and paired with its standard deviation movement variability.

# -------- Load Data --------
# Cleaned
clean_folder <- "/Users/can/Documents/Uni/Thesis/Data/Xsens/cleaned_csv/merged/Cleaned"
clean_files <- list.files(clean_folder, pattern = "_clean\\.csv$", full.names = TRUE)
all_data_clean <- map_dfr(clean_files, read_csv)

# Uncleaned
unclean_folder <- "/Users/can/Documents/Uni/Thesis/Data/Xsens/cleaned_csv/merged/Cleaned"
unclean_files <- list.files(unclean_folder, pattern = "_all\\.csv$", full.names = TRUE)
all_data_unclean <- map_dfr(unclean_files, read_csv)

# -------- Tag Phases --------
tag_trial_phases <- function(df) {
  df %>%
    group_by(subject, Block, trial) %>%
    mutate(
      start_ms = ms[which(Marker.Text == 27)[1]],
      end_ms = ms[which(Marker.Text == 26)[1]],
      phase = case_when(
        !is.na(start_ms) & !is.na(end_ms) & ms >= start_ms & ms <= end_ms ~ "Execution",
        !is.na(start_ms) & ms >= (start_ms - 1000) & ms < start_ms ~ "Preparation",
        TRUE ~ NA_character_
      )
    ) %>%
    ungroup() %>%
    filter(!is.na(phase))
}

# Apply to both datasets
clean_tagged <- tag_trial_phases(all_data_clean)
unclean_tagged <- tag_trial_phases(all_data_unclean)

# -------- Combine & Flag Dataset --------
clean_tagged <- clean_tagged %>% mutate(DataType = "Cleaned")
unclean_tagged <- unclean_tagged %>% mutate(DataType = "Uncleaned")

combined_tagged <- bind_rows(clean_tagged, unclean_tagged)

# -------- Compute RMS --------
rms_data <- combined_tagged %>%
  group_by(DataType, subject, Block, trial, phase) %>%
  summarise(
    rms_x = sqrt(mean(CoM.acc.x^2, na.rm = TRUE)),
    rms_y = sqrt(mean(CoM.acc.y^2, na.rm = TRUE)),
    rms_z = sqrt(mean(CoM.acc.z^2, na.rm = TRUE)),
    .groups = "drop"
  ) %>%
  group_by(DataType, subject, Block, phase) %>%
  arrange(trial) %>%
  mutate(TrialInBlock = row_number()) %>%
  ungroup()

# -------- Summary Stats --------
group_rms_summary <- rms_data %>%
  group_by(DataType, Block, TrialInBlock, phase) %>%
  summarise(
    mean_rms_x = mean(rms_x, na.rm = TRUE),
    se_rms_x = sd(rms_x, na.rm = TRUE) / sqrt(n()),
    mean_rms_y = mean(rms_y, na.rm = TRUE),
    se_rms_y = sd(rms_y, na.rm = TRUE) / sqrt(n()),
    mean_rms_z = mean(rms_z, na.rm = TRUE),
    se_rms_z = sd(rms_z, na.rm = TRUE) / sqrt(n()),
    .groups = "drop"
  )

# -------- Compute RMS Function --------
compute_rms <- function(tagged_df) {
  tagged_df %>%
    group_by(subject, Block, trial, phase) %>%
    summarise(
      rms_x = sqrt(mean(CoM.acc.x^2, na.rm = TRUE)),
      rms_y = sqrt(mean(CoM.acc.y^2, na.rm = TRUE)),
      rms_z = sqrt(mean(CoM.acc.z^2, na.rm = TRUE)),
      .groups = "drop"
    ) %>%
    group_by(subject, Block, phase) %>%
    arrange(trial) %>%
    mutate(TrialInBlock = row_number()) %>%
    ungroup()
}

# -------- Plotting (Cleaned & Uncleaned Separately) --------

plot_rms_axis <- function(data, axis, y_label, title_prefix) {
  axis_mean <- paste0("mean_rms_", axis)
  axis_se <- paste0("se_rms_", axis)

  ggplot(data, aes(x = TrialInBlock, y = .data[[axis_mean]], color = as.factor(Block))) +
    geom_line(size = 1) +
    geom_ribbon(aes(ymin = .data[[axis_mean]] - .data[[axis_se]],
                    ymax = .data[[axis_mean]] + .data[[axis_se]],
                    fill = as.factor(Block)),
                alpha = 0.2, color = NA) +
    facet_wrap(~ phase, nrow = 1) +
    geom_vline(xintercept = max(data$TrialInBlock[data$Block == 3], na.rm = TRUE), linetype = "dashed") +
    ylim(0, 2) +
    labs(
      title = paste(title_prefix, "-", toupper(axis), "Axis"),
      x = "Trial",
      y = y_label,
      color = "Block",
      fill = "Block"
    ) +
    theme_minimal() +
    theme(text = element_text(size = 12))
}

# Filter and plot for each dataset type
for (dt in unique(group_rms_summary$DataType)) {
  dt_data <- group_rms_summary %>% filter(DataType == dt)
  print(plot_rms_axis(dt_data, "x", "RMS Acceleration", paste("Mean RMS of CoM Acceleration (", dt, ")")))
  print(plot_rms_axis(dt_data, "y", "RMS Acceleration", paste("Mean RMS of CoM Acceleration (", dt, ")")))
  print(plot_rms_axis(dt_data, "z", "RMS Acceleration", paste("Mean RMS of CoM Acceleration (", dt, ")")))
}

#RMS Acceleration Box Plots

# -------- Execution Phase Boxplots (All Blocks) --------
for (dt in unique(rms_data$DataType)) {
  exec_data <- rms_data %>% filter(DataType == dt, phase == "Execution")
  for (axis in c("x", "y", "z")) {
    axis_col <- paste0("rms_", axis)
    gg <- ggplot(exec_data, aes(x = factor(Block), y = .data[[axis_col]], fill = phase)) +
      geom_boxplot(alpha = 0.7, outlier.shape = NA) +
      geom_jitter(width = 0.2, alpha = 0.4, size = 0.6) +
      geom_vline(xintercept = 3.5, linetype = "dashed", color = "black") +
      ylim(0, 2.5) +
      labs(
        title = paste("Execution Phase: CoM Acceleration RMS -", toupper(axis), "Axis (", dt, ")"),
        x = "Block",
        y = "RMS Acceleration"
      ) +
      theme_minimal() +
      theme(text = element_text(size = 12), legend.position = "none")
    print(gg)
  }
}

# -------- Preparation Phase Extraction & Boxplots --------
prep_window_ms <- 1000

extract_preparation_phase <- function(df) {
  df %>%
    group_split(subject, Block, trial) %>%
    map_dfr(function(trial_df) {
      exec_start_row <- which(trial_df$Marker.Text == 27)[1]
      if (!is.na(exec_start_row) && exec_start_row > 1) {
        exec_start_ms <- trial_df$ms[exec_start_row]
        prep_df <- trial_df %>%
          filter(ms >= (exec_start_ms - prep_window_ms) & ms < exec_start_ms) %>%
          mutate(phase = "Preparation")
        return(prep_df)
      } else {
        return(NULL)
      }
    })
}

for (dt in unique(combined_tagged$DataType)) {
  df <- combined_tagged %>% filter(DataType == dt)
  prep_data <- extract_preparation_phase(df)
  prep_rms <- prep_data %>%
    group_by(subject, Block, trial, phase) %>%
    summarise(
      rms_x = sqrt(mean(CoM.acc.x^2, na.rm = TRUE)),
      rms_y = sqrt(mean(CoM.acc.y^2, na.rm = TRUE)),
      rms_z = sqrt(mean(CoM.acc.z^2, na.rm = TRUE)),
      .groups = "drop"
    )

  for (axis in c("x", "y", "z")) {
    axis_col <- paste0("rms_", axis)
    gg <- ggplot(prep_rms, aes(x = factor(Block), y = .data[[axis_col]])) +
      geom_boxplot(fill = ifelse(axis == "x", "skyblue", ifelse(axis == "y", "salmon", "seagreen")),
                   alpha = 0.7, outlier.shape = NA) +
      geom_jitter(width = 0.2, alpha = 0.4, size = 0.6) +
      geom_vline(xintercept = 3.5, linetype = "dashed", color = "black") +
      ylim(0, 0.5) +
      labs(
        title = paste("Preparation Phase: CoM RMS -", toupper(axis), "Axis (", dt, ")"),
        x = "Block",
        y = "RMS Acceleration"
      ) +
      theme_minimal()
    print(gg)
  }
}

#RMS Acceleration trends in Block

# -------- Acceleration Trends over Trials (Execution & Preparation) --------

plot_acc_trends <- function(df, data_type, phase_filter, y_limit, title_text) {
  plot_list <- list()
  filtered_df <- df %>% filter(DataType == data_type)
  for (block in 1:5) {
    block_data <- filtered_df %>% filter(phase == phase_filter, Block == block)
    if (nrow(block_data) > 0) {
      block_long <- block_data %>%
        pivot_longer(cols = c(rms_x, rms_y, rms_z), names_to = "Axis", values_to = "RMS") %>%
        mutate(Axis = gsub("rms_", "", Axis))

      p <- ggplot(block_long, aes(x = trial, y = RMS)) +
        geom_point(alpha = 0.6) +
        geom_smooth(method = "lm", se = TRUE, color = case_when(
          block == 1 ~ "blue",
          block == 2 ~ "green",
          block == 3 ~ "red",
          block == 4 ~ "purple",
          block == 5 ~ "orange"
        )) +
        facet_wrap(~ Axis, nrow = 1) +
        ylim(0, y_limit) +
        labs(
          title = paste("Block", block),
          x = "Trial",
          y = "RMS Acceleration (m/s²)"
        ) +
        theme_minimal()

      plot_list[[paste0("Block", block)]] <- p
    }
  }

  if (length(plot_list) > 0) {
    wrap_plots(plot_list, nrow = 1) +
      plot_annotation(
        title = paste(title_text, "-", data_type),
        theme = theme(plot.title = element_text(hjust = 0.5, size = 16))
      )
  } else {
    message("No plots to display. Check that data exists for the requested phase and blocks.")
  }
}

# Execution & Preparation Trends for both datasets
for (type in c("Cleaned", "Uncleaned")) {
  print(plot_acc_trends(rms_data, type, "Execution", 2.5, "CoM RMS Acceleration Trends Across Trials - Execution Phase (All Blocks)"))
  print(plot_acc_trends(rms_data, type, "Preparation", 1, "CoM RMS Acceleration Trends Across Trials - Preparation Phase (All Blocks)"))
}

SD Acceleration Boxplots (movement variability)

# -------- Standard Deviation Summary & Plot --------
rms_data_long <- rms_data %>%
  pivot_longer(cols = c(rms_x, rms_y, rms_z), names_to = "Axis", values_to = "RMS") %>%
  mutate(Axis = gsub("rms_", "", Axis))

rms_summary_phase <- rms_data_long %>%
  group_by(DataType, Block, phase, trial, Axis) %>%
  summarise(
    mean_RMS = mean(RMS, na.rm = TRUE),
    sd_RMS = sd(RMS, na.rm = TRUE),
    n = n(),
    se_RMS = sd_RMS / sqrt(n),
    .groups = "drop"
  )

mean_sd_per_block_phase <- rms_summary_phase %>%
  group_by(DataType, Block, phase, Axis) %>%
  summarise(mean_sd = mean(sd_RMS, na.rm = TRUE), .groups = "drop")

# Plot for both datasets
for (dtype in unique(mean_sd_per_block_phase$DataType)) {
  plot_data <- mean_sd_per_block_phase %>% filter(DataType == dtype)
  print(
    ggplot(plot_data, aes(x = factor(Block), y = mean_sd, fill = phase)) +
      geom_bar(stat = "identity", position = "dodge", width = 0.7) +
      geom_vline(xintercept = 3.5, linetype = "dashed", color = "black") +
      facet_wrap(~ Axis, nrow = 1) +
      ylim(0, 1) +
      labs(
        title = paste("Mean Standard Deviation of CoM RMS -", dtype),
        x = "Block",
        y = "Mean SD of RMS Acceleration (m/s²)",
        fill = "Phase"
      ) +
      theme_minimal() +
      theme(
        text = element_text(size = 14),
        strip.text = element_text(face = "bold"),
        legend.position = "top"
      )
  )
}

#CV Acceleration

# -------- CV, Skewness, Kurtosis Summary --------
rms_distribution_summary <- rms_data %>%
  group_by(DataType, subject, Block, phase) %>%
  summarise(
    mean_x = mean(rms_x, na.rm = TRUE),
    sd_x   = sd(rms_x, na.rm = TRUE),
    cv_x   = sd_x / mean_x,
    skew_x = skewness(rms_x, na.rm = TRUE),
    kurt_x = kurtosis(rms_x, na.rm = TRUE),

    mean_y = mean(rms_y, na.rm = TRUE),
    sd_y   = sd(rms_y, na.rm = TRUE),
    cv_y   = sd_y / mean_y,
    skew_y = skewness(rms_y, na.rm = TRUE),
    kurt_y = kurtosis(rms_y, na.rm = TRUE),

    mean_z = mean(rms_z, na.rm = TRUE),
    sd_z   = sd(rms_z, na.rm = TRUE),
    cv_z   = sd_z / mean_z,
    skew_z = skewness(rms_z, na.rm = TRUE),
    kurt_z = kurtosis(rms_z, na.rm = TRUE),
    .groups = "drop"
  )

# -------- CV Plots --------
cv_plot <- function(df, axis_label, dtype_label) {
  ggplot(df, aes(x = factor(Block), y = .data[[paste0("cv_", axis_label)]], color = phase)) +
    geom_boxplot(outlier.shape = NA) +
    geom_jitter(width = 0.2, alpha = 0.4) +
    ylim(0, 3.5) +
    labs(
      title = paste("Coefficient of Variation (RMS", toupper(axis_label), "-Axis) -", dtype_label),
      x = "Block",
      y = "CV",
      color = "Phase"
    ) +
    theme_minimal() +
    theme(text = element_text(size = 12))
}

# Generate and print for both datasets
for (dtype in unique(rms_distribution_summary$DataType)) {
  df <- rms_distribution_summary %>% filter(DataType == dtype)
  print(cv_plot(df, "x", dtype))
  print(cv_plot(df, "y", dtype))
  print(cv_plot(df, "z", dtype))
}

#step level rms

# -------- Step-Level RMS Per Block & Axis (Corrected & Separated) --------
step_counts <- tibble(
  Block = c(1, 2, 3, 4, 5),
  Steps = c(6, 12, 18, 18, 18)
)

# Helper function to compute step-wise RMS for one dataset
generate_step_rms <- function(data, label) {
  data %>%
    filter(phase == "Execution", Marker.Text %in% c(14, 15, 16, 17)) %>%
    inner_join(step_counts, by = "Block") %>%
    group_by(subject, Block, trial) %>%
    mutate(Step = cut_number(row_number(), n = unique(Steps), labels = FALSE)) %>%
    pivot_longer(cols = starts_with("CoM.acc."), names_to = "AxisRaw", values_to = "Acc") %>%
    mutate(Axis = case_when(
      AxisRaw == "CoM.acc.x" ~ "x",
      AxisRaw == "CoM.acc.y" ~ "y",
      AxisRaw == "CoM.acc.z" ~ "z"
    )) %>%
    group_by(subject, Block, Step, Axis) %>%
    summarise(
      rms = sqrt(mean(Acc^2, na.rm = TRUE)),
      .groups = "drop"
    ) %>%
    group_by(Block, Step, Axis) %>%
    summarise(
      mean_rms = mean(rms, na.rm = TRUE),
      sd_rms = sd(rms, na.rm = TRUE),
      DataType = label,
      .groups = "drop"
    )
}





step_rms_cleaned <- generate_step_rms(clean_tagged, "Cleaned")
step_rms_uncleaned <- generate_step_rms(unclean_tagged, "Uncleaned")

# -------- Plotting Function --------
plot_step_rms_dataset <- function(data, dataset_label) {
  plots <- lapply(c("x", "y", "z"), function(axis) {
    ggplot(filter(data, Axis == axis),
           aes(x = as.numeric(Step), y = mean_rms)) +
      geom_point(color = "black", size = 2) +
      geom_errorbar(aes(ymin = mean_rms - sd_rms, ymax = mean_rms + sd_rms),
                    width = 0.3) +
      facet_wrap(~ Block, scales = "free_x") +
      labs(
        title = paste(dataset_label, "- RMS Acceleration per Step (", toupper(axis), "Axis)", sep = ""),
        x = "Step Number",
        y = "RMS Acceleration (m/s²)"
      ) +
      ylim(0, 2.5) +
      theme_minimal() +
      theme(text = element_text(size = 12))
  })
  walk(plots, print)
}

# Generate and print plots
plot_step_rms_dataset(step_rms_cleaned, "Cleaned")

plot_step_rms_dataset(step_rms_uncleaned, "Uncleaned")

##Models #1. LMM to assess whether block and phase significantly influence rms (per axis)

# --- Function: Run Random Intercept LMMs and Extract ANOVA P-Values ---
extract_rms_interceptonly_pvalues <- function(data, label) {
  tagged <- tag_trial_phases(data)
  rms_data <- compute_rms(tagged) %>%
    mutate(Block = factor(Block), subject = as.factor(subject))

  # Fit LMMs with random intercepts for subject and TrialInBlock
  model_x <- lmer(rms_x ~ Block * phase + (1 | subject) + (1 | TrialInBlock), data = rms_data, REML = FALSE)
  model_y <- lmer(rms_y ~ Block * phase + (1 | subject) + (1 | TrialInBlock), data = rms_data, REML = FALSE)
  model_z <- lmer(rms_z ~ Block * phase + (1 | subject) + (1 | TrialInBlock), data = rms_data, REML = FALSE)

  # Extract ANOVA results
  anova_x <- anova(model_x)
  anova_y <- anova(model_y)
  anova_z <- anova(model_z)

  # Format output
  tibble(
    Dataset = label,
    Axis = c("X", "Y", "Z"),
    `Block p-value` = c(anova_x["Block", "Pr(>F)"], anova_y["Block", "Pr(>F)"], anova_z["Block", "Pr(>F)"]),
    `Phase p-value` = c(anova_x["phase", "Pr(>F)"], anova_y["phase", "Pr(>F)"], anova_z["phase", "Pr(>F)"]),
    `Interaction p-value` = c(anova_x["Block:phase", "Pr(>F)"], anova_y["Block:phase", "Pr(>F)"], anova_z["Block:phase", "Pr(>F)"])
  )
}


# Run LMMs with random intercepts for cleaned and uncleaned data
clean_pvals_interceptonly <- extract_rms_interceptonly_pvalues(all_data_clean, "Cleaned")

boundary (singular) fit: see help('isSingular')
boundary (singular) fit: see help('isSingular')
boundary (singular) fit: see help('isSingular')

unclean_pvals_interceptonly <- extract_rms_interceptonly_pvalues(all_data_unclean, "Uncleaned")

# Combine and show results
combined_interceptonly_pvals <- bind_rows(clean_pvals_interceptonly, unclean_pvals_interceptonly)
print(combined_interceptonly_pvals)

# A tibble: 6 × 5
  Dataset   Axis  `Block p-value` `Phase p-value` `Interaction p-value`
  <chr>     <chr>           <dbl>           <dbl>                 <dbl>
1 Cleaned   X            5.47e- 8               0              1.30e-46
2 Cleaned   Y            9.06e-10               0              2.46e-44
3 Cleaned   Z            2.24e- 4               0              5.00e-25
4 Uncleaned X            6.11e-19               0              5.85e-68
5 Uncleaned Y            1.15e-21               0              3.53e-63
6 Uncleaned Z            1.16e-14               0              4.19e-36

#results:
#block p-value <0.05 :This suggests learning or adaptation effects across blocks
#phase p-value 0 because it is either execution or preparation 
#interaction <0.05 :this suggest the effect of block is different depending on phase

#clean vs unclean dataset: unclean p-values< clean p-values: probably because of more variability

#2. Random slope model to assess whether block and phase significantly influence rms (per axis)

# --- Function to Run Random Slope LMMs and Extract ANOVA P-Values ---
extract_rms_randomslope_pvalues <- function(data, label) {
  tagged <- tag_trial_phases(data)
  rms_data <- compute_rms(tagged) %>%
    mutate(Block = factor(Block), subject = as.factor(subject))

  # Random slope for Block | subject and random intercept for TrialInBlock
  rsmodel_x <- lmer(rms_x ~ Block * phase + (1 + Block | subject) + (1 | TrialInBlock), data = rms_data, REML = FALSE)
  rsmodel_y <- lmer(rms_y ~ Block * phase + (1 + Block | subject) + (1 | TrialInBlock), data = rms_data, REML = FALSE)
  rsmodel_z <- lmer(rms_z ~ Block * phase + (1 + Block | subject) + (1 | TrialInBlock), data = rms_data, REML = FALSE)

  # Get p-values
  rsanova_x <- anova(rsmodel_x)
  rsanova_y <- anova(rsmodel_y)
  rsanova_z <- anova(rsmodel_z)

  # Format output
  tibble(
    Dataset = label,
    Axis = c("X", "Y", "Z"),
    `Block p-value` = c(rsanova_x["Block", "Pr(>F)"], rsanova_y["Block", "Pr(>F)"], rsanova_z["Block", "Pr(>F)"]),
    `Phase p-value` = c(rsanova_x["phase", "Pr(>F)"], rsanova_y["phase", "Pr(>F)"], rsanova_z["phase", "Pr(>F)"]),
    `Interaction p-value` = c(rsanova_x["Block:phase", "Pr(>F)"], rsanova_y["Block:phase", "Pr(>F)"], rsanova_z["Block:phase", "Pr(>F)"])
  )
}

# Run for cleaned data
#models_clean <- fit_random_slope_models(all_data_clean, "Cleaned")
clean_pvals_randomslope <- extract_rms_randomslope_pvalues(all_data_clean, "Cleaned")

boundary (singular) fit: see help('isSingular')

Warning: Model failed to converge with 1 negative eigenvalue: -2.3e+01

boundary (singular) fit: see help('isSingular')

# Run for uncleaned data
#models_unclean <- fit_random_slope_models(all_data_unclean, "Uncleaned")
unclean_pvals_randomslope <- extract_rms_randomslope_pvalues(all_data_unclean, "Uncleaned")

Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
Model failed to converge with max|grad| = 0.00319449 (tol = 0.002, component 1)

Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
Model failed to converge with max|grad| = 0.00268203 (tol = 0.002, component 1)

combined_randomslope_pvals <- bind_rows(clean_pvals_randomslope, unclean_pvals_randomslope)

# Show all p-values
print(combined_randomslope_pvals)

# A tibble: 6 × 5
  Dataset   Axis  `Block p-value` `Phase p-value` `Interaction p-value`
  <chr>     <chr>           <dbl>           <dbl>                 <dbl>
1 Cleaned   X             0.176                 0              1.97e-50
2 Cleaned   Y             0.368                 0              4.81e-49
3 Cleaned   Z             0.341                 0              4.02e-27
4 Uncleaned X             0.0157                0              7.21e-73
5 Uncleaned Y             0.00940               0              1.52e-69
6 Uncleaned Z             0.0262                0              6.72e-41

#compared to the first model this also includes a random slope for Block within subjects

#results:
#block p-value        >0.05 (cleaned dataset)   :This suggests no learning or adaptation effects across blocks after accounting for between subject
#block p-value        <0.05 (uncleaned dataset) :This suggests learning or adaptation effects across blocks after accounting for between subject variation
#phase p-value            0                     :because it is either execution or preparation 
#interaction          <0.05                     :this suggest the effect of block is different depending on phase

#clean vs unclean dataset: unclean p-values< clean p-values: probably because of more variability

#3. Model: CoM RMS Acceleration changes over time

# Function to run LMM and extract p-values for RMS change over time
extract_learning_pvalues <- function(data, label) {
  tagged <- tag_trial_phases(data)
  rms_data <- compute_rms(tagged) %>%
    mutate(
      Block = factor(Block),
      subject = factor(subject),
      phase = factor(phase)
    )
  
  # Fit models with interaction
  rmscotmodel_x <- lmer(rms_x ~ TrialInBlock * Block * phase + (1 + TrialInBlock | subject), data = rms_data)
  rmscotmodel_y <- lmer(rms_y ~ TrialInBlock * Block * phase + (1 + TrialInBlock | subject), data = rms_data)
  rmscotmodel_z <- lmer(rms_z ~ TrialInBlock * Block * phase + (1 + TrialInBlock | subject), data = rms_data)

  # Get ANOVA tables with p-values
  rmscotanova_x <- anova(rmscotmodel_x)
  rmscotanova_y <- anova(rmscotmodel_y)
  rmscotanova_z <- anova(rmscotmodel_z)

  # Format as tidy tibble
  tibble(
    Dataset = label,
    Axis = c("X", "Y", "Z"),
    `TrialInBlock p-value` = c(rmscotanova_x["TrialInBlock", "Pr(>F)"],
                                rmscotanova_y["TrialInBlock", "Pr(>F)"],
                                rmscotanova_z["TrialInBlock", "Pr(>F)"]),
    `Block p-value` = c(rmscotanova_x["Block", "Pr(>F)"],
                        rmscotanova_y["Block", "Pr(>F)"],
                        rmscotanova_z["Block", "Pr(>F)"]),
    `Phase p-value` = c(rmscotanova_x["phase", "Pr(>F)"],
                        rmscotanova_y["phase", "Pr(>F)"],
                        rmscotanova_z["phase", "Pr(>F)"]),
    `TrialInBlock:Block p` = c(rmscotanova_x["TrialInBlock:Block", "Pr(>F)"],
                               rmscotanova_y["TrialInBlock:Block", "Pr(>F)"],
                               rmscotanova_z["TrialInBlock:Block", "Pr(>F)"]),
    `TrialInBlock:Phase p` = c(rmscotanova_x["TrialInBlock:phase", "Pr(>F)"],
                               rmscotanova_y["TrialInBlock:phase", "Pr(>F)"],
                               rmscotanova_z["TrialInBlock:phase", "Pr(>F)"]),
    `Block:Phase p` = c(rmscotanova_x["Block:phase", "Pr(>F)"],
                        rmscotanova_y["Block:phase", "Pr(>F)"],
                        rmscotanova_z["Block:phase", "Pr(>F)"]),
    `3-way p-value` = c(rmscotanova_x["TrialInBlock:Block:phase", "Pr(>F)"],
                        rmscotanova_y["TrialInBlock:Block:phase", "Pr(>F)"],
                        rmscotanova_z["TrialInBlock:Block:phase", "Pr(>F)"])
  )
}

pvals_learning_clean <- extract_learning_pvalues(all_data_clean, "Cleaned")

Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
Model failed to converge with max|grad| = 0.134932 (tol = 0.002, component 1)

boundary (singular) fit: see help('isSingular')

Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
Model failed to converge with max|grad| = 0.0856732 (tol = 0.002, component 1)

pvals_learning_unclean <- extract_learning_pvalues(all_data_unclean, "Uncleaned")

Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
unable to evaluate scaled gradient

Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
Model failed to converge: degenerate Hessian with 1 negative eigenvalues

Warning: Model failed to converge with 1 negative eigenvalue: -3.8e+00

boundary (singular) fit: see help('isSingular')

Warning: Model failed to converge with 1 negative eigenvalue: -6.0e+05

Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
unable to evaluate scaled gradient

Warning in checkConv(attr(opt, "derivs"), opt$par, ctrl = control$checkConv, :
Model failed to converge: degenerate Hessian with 1 negative eigenvalues

Warning: Model failed to converge with 1 negative eigenvalue: -3.7e+00

combined_learning_pvals <- bind_rows(pvals_learning_clean, pvals_learning_unclean)

# Print results
print(combined_learning_pvals)

# A tibble: 6 × 9
  Dataset   Axis  `TrialInBlock p-value` `Block p-value` `Phase p-value`
  <chr>     <chr>                  <dbl>           <dbl>           <dbl>
1 Cleaned   X                      0.878        2.84e- 6               0
2 Cleaned   Y                      0.705        1.46e- 4               0
3 Cleaned   Z                      0.360        5.34e- 3               0
4 Uncleaned X                      0.495        6.70e-13               0
5 Uncleaned Y                      0.267        5.02e- 8               0
6 Uncleaned Z                      0.816        4.05e- 6               0
# ℹ 4 more variables: `TrialInBlock:Block p` <dbl>,
#   `TrialInBlock:Phase p` <dbl>, `Block:Phase p` <dbl>, `3-way p-value` <dbl>

#results:
#trial in block p-value         >0.05   : Participants do not change rms within block
#block p-value                  <0.05   : RMS differs significantly between blocks
#phase p-value                          : 0 because it is either execution or preparation 
#Trial in block x Block         >0.05   : no significant changes across blocks                    (except x & z in uncleaned)
#Trial in block x phase         <0.05   : as before different phases differ
#Block x phase                  <0.05   : changes in blocks differ across phases
#trial in block x block x phase <0.05   : trial in block changes across both blocks and phases

#clean vs unclean dataset: unclean p-values< clean p-values: probably because of more variability

#4. Model: CoM SD of Acceleration changes over time

# -------- Compute SD Function --------
compute_sd <- function(tagged_df) {
  tagged_df %>%
    group_by(subject, Block, trial, phase) %>%
    summarise(
      sd_x = sd(CoM.acc.x, na.rm = TRUE),
      sd_y = sd(CoM.acc.y, na.rm = TRUE),
      sd_z = sd(CoM.acc.z, na.rm = TRUE),
      .groups = "drop"
    ) %>%
    group_by(subject, Block, phase) %>%
    arrange(trial) %>%
    mutate(TrialInBlock = row_number()) %>%
    ungroup()
}

# -------- Run SD LMM Analysis --------
run_sd_model_analysis <- function(data, label) {
  tagged <- tag_trial_phases(data)
  sd_data <- compute_sd(tagged)
  sd_data <- sd_data %>% mutate(Block = factor(Block), subject = as.factor(subject))

  sdcotmodel_x <- lmer(sd_x ~ TrialInBlock * Block * phase + (1 + Block | subject), data = sd_data)
  sdcotmodel_y <- lmer(sd_y ~ TrialInBlock * Block * phase + (1 + Block | subject), data = sd_data)
  sdcotmodel_z <- lmer(sd_z ~ TrialInBlock * Block * phase + (1 + Block | subject), data = sd_data)

  sdcotanova_x <- anova(sdcotmodel_x)
  sdcotanova_y <- anova(sdcotmodel_y)
  sdcotanova_z <- anova(sdcotmodel_z)

  # Format as tidy tibble
  tibble(
    Dataset = label,
    Axis = c("X", "Y", "Z"),
    `TrialInBlock p-value` = c(sdcotanova_x["TrialInBlock", "Pr(>F)"],
                                sdcotanova_y["TrialInBlock", "Pr(>F)"],
                                sdcotanova_z["TrialInBlock", "Pr(>F)"]),
    `Block p-value` = c(sdcotanova_x["Block", "Pr(>F)"],
                        sdcotanova_y["Block", "Pr(>F)"],
                        sdcotanova_z["Block", "Pr(>F)"]),
    `Phase p-value` = c(sdcotanova_x["phase", "Pr(>F)"],
                        sdcotanova_y["phase", "Pr(>F)"],
                        sdcotanova_z["phase", "Pr(>F)"]),
    `TrialInBlock:Block p` = c(sdcotanova_x["TrialInBlock:Block", "Pr(>F)"],
                               sdcotanova_y["TrialInBlock:Block", "Pr(>F)"],
                               sdcotanova_z["TrialInBlock:Block", "Pr(>F)"]),
    `TrialInBlock:Phase p` = c(sdcotanova_x["TrialInBlock:phase", "Pr(>F)"],
                               sdcotanova_y["TrialInBlock:phase", "Pr(>F)"],
                               sdcotanova_z["TrialInBlock:phase", "Pr(>F)"]),
    `Block:Phase p` = c(sdcotanova_x["Block:phase", "Pr(>F)"],
                        sdcotanova_y["Block:phase", "Pr(>F)"],
                        sdcotanova_z["Block:phase", "Pr(>F)"]),
    `3-way p-value` = c(sdcotanova_x["TrialInBlock:Block:phase", "Pr(>F)"],
                        sdcotanova_y["TrialInBlock:Block:phase", "Pr(>F)"],
                        sdcotanova_z["TrialInBlock:Block:phase", "Pr(>F)"])
  )
}

# --- Run and combine results ---
sd_clean_pvals <- run_sd_model_analysis(all_data_clean, "Cleaned")
sd_unclean_pvals <- run_sd_model_analysis(all_data_unclean, "Uncleaned")

boundary (singular) fit: see help('isSingular')

sd_combined_pvals <- bind_rows(sd_clean_pvals, sd_unclean_pvals)

# --- Show results ---
print(sd_combined_pvals)

# A tibble: 6 × 9
  Dataset   Axis  `TrialInBlock p-value` `Block p-value` `Phase p-value`
  <chr>     <chr>                  <dbl>           <dbl>           <dbl>
1 Cleaned   X                    0.860          0.0738                 0
2 Cleaned   Y                    0.470          0.152                  0
3 Cleaned   Z                    0.135          0.360                  0
4 Uncleaned X                    0.00415        0.000940               0
5 Uncleaned Y                    0.0158         0.0391                 0
6 Uncleaned Z                    0.296          0.0328                 0
# ℹ 4 more variables: `TrialInBlock:Block p` <dbl>,
#   `TrialInBlock:Phase p` <dbl>, `Block:Phase p` <dbl>, `3-way p-value` <dbl>

#results:
#trial in block p-value         >0.05   : Participants do not change rms within block               (except x & y in uncleaned dataset)
#block p-value                  >0.05   : RMS does not differ significantly between blocks          (except in uncleaned dataset)
#phase p-value                          : 0 because it is either execution or preparation 
#Trial in block x Block         >0.05   : no significant changes across blocks                      (except in uncleaned dataset)
#Trial in block x phase         <0.05   : as before different phases differ                         
#Block x phase                  <0.05   : changes in blocks differ across phases
#trial in block x block x phase <0.05   : trial in block changes across both blocks and phases

#clean vs unclean dataset: unclean p-values< clean p-values: probably because of more variability

5. LMM step level

# ------------------------------
# Create step_rms datasets for model
# ------------------------------

# Define function to extract step-level RMS from a dataset
extract_step_rms <- function(df, label) {
  df %>%
    filter(phase == "Execution", Marker.Text %in% c(14, 15, 16, 17)) %>%
    group_by(subject, Block, trial, Marker.Text) %>%
    summarise(
      rms_x = sqrt(mean(CoM.acc.x^2, na.rm = TRUE)),
      rms_y = sqrt(mean(CoM.acc.y^2, na.rm = TRUE)),
      rms_z = sqrt(mean(CoM.acc.z^2, na.rm = TRUE)),
      .groups = "drop"
    ) %>%
    pivot_longer(cols = starts_with("rms_"), names_to = "Axis", values_to = "RMS") %>%
    mutate(
      Axis = gsub("rms_", "", Axis),
      Step = case_when(
        Block == 1 ~ rep(1:6, length.out = n()),
        Block == 2 ~ rep(1:12, length.out = n()),
        Block == 3 ~ rep(1:18, length.out = n()),
        Block %in% c(4, 5) ~ rep(1:18, length.out = n()),
        TRUE ~ NA_real_
      ),
      Dataset = label
    )
}

# Tag phases first
tagged_clean <- tag_trial_phases(all_data_clean)
tagged_unclean <- tag_trial_phases(all_data_unclean)

# Extract step-level RMS for both datasets
step_rms_cleaned <- extract_step_rms(tagged_clean, "Cleaned")
step_rms_uncleaned <- extract_step_rms(tagged_unclean, "Uncleaned")

# ------------------------------
# Fit model and extract p-values
# ------------------------------

run_step_model <- function(df, label) {
  df <- df %>% mutate(Block = factor(Block), subject = as.factor(subject))

  smodel_x <- lmer(RMS ~ Step * Block + (1 | subject), data = filter(df, Axis == "x"))
  smodel_y <- lmer(RMS ~ Step * Block + (1 | subject), data = filter(df, Axis == "y"))
  smodel_z <- lmer(RMS ~ Step * Block + (1 | subject), data = filter(df, Axis == "z"))

  sanova_x <- anova(smodel_x)
  sanova_y <- anova(smodel_y)
  sanova_z <- anova(smodel_z)

  tibble(
    Dataset = label,
    Axis = c("X", "Y", "Z"),
    `Step p-value` = c(sanova_x["Step", "Pr(>F)"], sanova_y["Step", "Pr(>F)"], sanova_z["Step", "Pr(>F)"]),
    `Block p-value` = c(sanova_x["Block", "Pr(>F)"], sanova_y["Block", "Pr(>F)"], sanova_z["Block", "Pr(>F)"]),
    `Interaction p-value` = c(sanova_x["Step:Block", "Pr(>F)"], sanova_y["Step:Block", "Pr(>F)"], sanova_z["Step:Block", "Pr(>F)"])
  )
}

# Run both
step_clean_pvals <- run_step_model(step_rms_cleaned, "Cleaned")
step_unclean_pvals <- run_step_model(step_rms_uncleaned, "Uncleaned")

# Combine and display
step_model_results <- bind_rows(step_clean_pvals, step_unclean_pvals)
print(step_model_results)

# A tibble: 6 × 5
  Dataset   Axis  `Step p-value` `Block p-value` `Interaction p-value`
  <chr>     <chr>          <dbl>           <dbl>                 <dbl>
1 Cleaned   X            0.706          7.60e-11             0.0443   
2 Cleaned   Y            0.682          1.23e- 8             0.000487 
3 Cleaned   Z            0.0440         4.00e- 5             0.000731 
4 Uncleaned X            0.262          4.84e-12             0.153    
5 Uncleaned Y            0.479          7.79e- 7             0.000201 
6 Uncleaned Z            0.00920        1.12e- 4             0.0000448

#results:
#step p-value                   >0.05   : rms does not vary by step position in sequence (except z-axis)
#block p-value                  <0.05   : RMS differs significantly between blocks      
#step x block                   <0.05   : step specific rms changes differ across blocks

#clean vs unclean dataset: unclean p-values< clean p-values: probably because of more variability