The Relationship Between Locomotor Profile and Acute Responses to High-Intensity Interval Training (HIIT) in Highly-Trained Soccer Players

Example R code used for statistical analyses and data visualisations

Authors

Kieran Smith

Matthew D. Wright

Paul Chesterton

Robbie Dawson

Rhiannah McCourt

Alastair Hamilton

Jonathan M. Taylor

Published

March 28, 2026

Data & Code

The data supporting this study’s findings are available upon request or openly via OSF repository at: https://osf.io/keu47

The code used to analyse the data used in this study is available upon request or openly via OSF repository at: https://osf.io/x2vy4

The code blocks below use code-link. All packages and syntax are hyperlinked to their official documentation. Click on package names or functions throughout the code to learn more.

Packages

The following packages were used

library(readxl) # read excel file
library(janitor) # clean col names
library(tidyverse) # install and load core packages from the tidyverse
library(lubridate) # reformat dates and times
library(writexl) # write excel files/sheets
library(lme4) # fitting and analyzing mixed-effects models
library(robustlmm) # fitting and analyzing robust mixed-effects models
library(broom.mixed) # tidy mixed model results 
library(performance) # regression diagnostics
library(binom) # calculate confidence intervals
library(ggalluvial) # create alluvial plots
library(ggnewscale) # plot fill and colour scales
library(grateful) # citation of R packages

Load Data

Load data into tidy format and clean names

  player_info <- read_excel("data.xlsx", sheet = "player_info") %>%
    clean_names()
  
  enviro_cond <- read_excel("data.xlsx", sheet = "enviro_cond") %>%
    clean_names()
  
  gps_and_drpe <- read_excel("data.xlsx", sheet = "gps_and_drpe") %>%
    clean_names()
  
  gps_raw_sensor <- read_csv("gps_raw_sensor.csv") %>%
    clean_names()
  
  quals <- read_excel("data.xlsx", sheet = "quals") %>%
    clean_names()
 
  # view player_info df
  head(player_info)
  # view enviro_cond df
  head(enviro_cond)
  # view gps_and_drpe df
  head(gps_and_drpe)
  # view gps_raw_sensor df
  head(gps_raw_sensor)
  # view quals df
  head(quals)

Descriptive Statistics

Calculate descriptive statistics for: participant characteristics, player positions, environmental conditions, GPS quality, player locomotor profiles, training load summary and dRPE time-on-task

  # participant characteristics -----
  # function to calculate participant summary stats
  summary_stats <- function(player_info, group_vars) {
    player_info %>%
      pivot_longer(
        cols = c(age_years, stature_cm, body_mass_kg),
        names_to = "variable",
        values_to = "value"
      ) %>%
      group_by(across(all_of(group_vars)), variable) %>%
      summarise(
        n = round(n(), 1),
        mean = round(mean(value, na.rm = TRUE), 1),
        sd = round(sd(value, na.rm = TRUE), 1),
        min = round(min(value, na.rm = TRUE), 1),
        max = round(max(value, na.rm = TRUE), 1),
        .groups = "drop"
      ) %>%
      mutate(variable = factor(
        variable,
        levels = c("age_years", "stature_cm", "body_mass_kg")
      )) %>%
      arrange(across(all_of(group_vars)), variable)
  }
  
  # overall summary (without squad grouping)
  overall_characteristics <- player_info %>%
    summary_stats(group_vars = character(0)) %>%
    mutate(squad = "All")
  
  # squad-level summary
  squad_characteristics <- player_info %>%
    summary_stats(group_vars = "squad")
  
  # combine summaries
  parcharacteristics <- bind_rows(overall_characteristics, squad_characteristics) %>%
    select(variable, squad, n, mean, sd, min, max)
  
  # view
  head(parcharacteristics)
  # count of players' positional groups -----
  positionalgroup_counts <- player_info %>%
    count(positional_group, name = "count")
  
  # view
  head(positionalgroup_counts)
  # environmental conditions ----
  enviro_summary <- enviro_cond %>%
    select(-club, -session, -date) %>%
    mutate(wind_speed_ms = wind_speed_mph * 0.44704) %>%  # convert wind speed from mph to ms
    select(-wind_speed_mph) %>%  # drop original mph column
    pivot_longer(cols = everything(),
                 names_to = "variable",
                 values_to = "value") %>%
    group_by(variable) %>%
    summarise(
      n = n(),
      mean = round(mean(value, na.rm = TRUE), 1),
      sd = round(sd(value, na.rm = TRUE), 1),
      min = round(min(value, na.rm = TRUE), 1),
      max = round(max(value, na.rm = TRUE), 1),
      .groups = "drop"
    ) %>%
    mutate(variable = factor(
      variable,
      levels = c(
        "temp_c",
        "humidity_per",
        "baro_pressure_hpa",
        "wind_speed_ms"
      )
    )) %>%
    arrange(variable)
  
  # view
  head(enviro_summary)
  # gps quality ----
  gps_quality <- gps_raw_sensor %>%
    filter(period != "40m Sprints") %>%
    select(`positional_quality_percent`, `hdop`, `number_sats`) %>%
    pivot_longer(cols = everything(),
                 names_to = "variable",
                 values_to = "value") %>%
    mutate(variable = factor(variable, levels = c("positional_quality_percent", "hdop", "number_sats"))) %>%
    group_by(variable) %>%
    summarise(
      mean = round(mean(value, na.rm = TRUE), 2),
      sd = round(sd(value, na.rm = TRUE), 2),
      min = round(min(value, na.rm = TRUE), 2),
      max = round(max(value, na.rm = TRUE), 2)
    )
  
  # view
  head(gps_quality)
  # player locomotor profiles ----
  # variables to summarise (and their display order)
  vars <- c("mas_ms", "mss_ms", "asr_ms")
  
  # pivot longer for easier summarising
  pp_long <- player_info %>%
    select(positional_group, profile, all_of(vars)) %>%
    pivot_longer(
      cols = all_of(vars),
      names_to = "variable",
      values_to = "value"
    ) %>%
    mutate(variable = factor(variable, levels = vars))
  
  # summary function
  make_block <- function(data, level_label, group_col, group_val = NULL) {
    # group_col can be "positional_group", "profile", or NULL (for overall)
    if (is.null(group_col)) {
      data %>%
        group_by(variable) %>%
        summarise(
          n = n(),
          mean = round(mean(value, na.rm = TRUE), 2),
          sd   = round(sd(value, na.rm = TRUE), 2),
          min  = round(min(value, na.rm = TRUE), 2),
          max  = round(max(value, na.rm = TRUE), 2),
          .groups = "drop"
        ) %>%
        pivot_longer(n:max, names_to = "stat", values_to = "val") %>%
        mutate(level = level_label, group = group_val %||% "All") %>%
        select(level, group, stat, variable, val) %>%
        pivot_wider(
          names_from = variable,
          values_from = val
        )
    } else {
      data %>%
        group_by(.data[[group_col]], variable) %>%
        summarise(
          n = n(),
          mean = round(mean(value, na.rm = TRUE), 2),
          sd   = round(sd(value, na.rm = TRUE), 2),
          min  = round(min(value, na.rm = TRUE), 2),
          max  = round(max(value, na.rm = TRUE), 2),
          .groups = "drop"
        ) %>%
        pivot_longer(n:max, names_to = "stat", values_to = "val") %>%
        mutate(level = level_label, group = .data[[group_col]]) %>%
        select(level, group, stat, variable, val) %>%
        pivot_wider(
          names_from = variable,
          values_from = val
        )
    }
  }
  
  # summarise each group
  overall_block   <- make_block(pp_long, "Overall",          group_col = NULL,              group_val = "All")
  by_pos_block    <- make_block(pp_long, "Positional group", group_col = "positional_group")

  # combine into one df + order
  player_profiles_comb <- bind_rows(overall_block, by_pos_block) %>%
    mutate(
      level = factor(level, levels = c("Overall", "Positional group")),
      stat  = factor(stat,  levels = c("n", "mean", "sd", "min", "max"))
    ) %>%
    arrange(level, group, stat)
  
  # view
  head(player_profiles_comb)
  # training load summary by session ----
  # variables to summarise
  tl_vars <- c(
    "period_name", "total_duration", "total_distance", "total_player_load",
     "meters_per_minute", "accel_decel_efforts", "hs_distance",
    "velocity_band_6_total_distance", "s_rpe", "d_rpe_b", 
    "d_rpe_l", "max_hr_percent_max", "avg_hr_percent_max"
  )
  
  # function: parse various time formats to numeric minutes for analysis
  parse_to_minutes <- function(x) {
    # if already difftime
    if (inherits(x, "difftime")) return(as.numeric(x, units = "mins"))
    # if already POSIXt, take h:m:s component only
    if (inherits(x, "POSIXt")) return(hour(x) * 60 + minute(x) + second(x) / 60)
    # if numeric already (assume minutes)
    if (is.numeric(x)) return(x)
    # if character (try hms or ms)
    if (is.character(x)) {
      x <- str_trim(x)
      # try hms() -> returns Duration in seconds
      parsed <- suppressWarnings(hms(x))
      if (all(is.na(parsed))) parsed <- suppressWarnings(ms(x))
      return(as.numeric(parsed) / 60)
    }
    # fallback
    suppressWarnings(as.numeric(x))
  }
  
  # function: format numeric minutes → "mm:ss"
  fmt_mmss <- function(mins) {
    ifelse(
      is.na(mins),
      NA_character_,
      sprintf("%02d:%02d", floor(mins), round((mins %% 1) * 60))
    )
  }
  
  # summarise training load data
  tl_summary <- gps_and_drpe %>%
    filter(!period_name %in% c("40m Sprints", "6minDT")) %>%
    select(all_of(tl_vars)) %>%               # use lubridate to parse time cols
    rename(sprint_distance = velocity_band_6_total_distance) %>%
    mutate( 
      total_duration     = parse_to_minutes(total_duration),
      period_name = factor(period_name, levels = c("LI-HIIT", "SI-HIIT", "RST"))
    ) %>%
    group_by(period_name) %>%
    summarise(across(
      where(is.numeric),
      list(
        mean = ~ round(mean(.x, na.rm = TRUE), 2),
        sd   = ~ round(sd(.x,   na.rm = TRUE), 2),
        min  = ~ round(min(.x,  na.rm = TRUE), 2),
        max  = ~ round(max(.x,  na.rm = TRUE), 2)
      ),
      .names = "{.col}_{.fn}"
    )) %>%
    pivot_longer(cols = -period_name,
                 names_to = c("variable", "stat"),
                 names_sep = "_(?=[^_]+$)") %>%
    pivot_wider(names_from = variable, values_from = value) %>%
    mutate(stat = factor(stat, levels = c("mean", "sd", "min", "max"))) %>%
    arrange(period_name, stat) %>%              # format durations back to mm:ss
    mutate(
      total_duration = fmt_mmss(total_duration)
    )
  
  # apply rounding
  tl_summary <- tl_summary %>%
    mutate(
      # ---- round to 0 decimals ----
      total_distance                  = round(total_distance, 0),
      meters_per_minute               = round(meters_per_minute, 0),
      hs_distance                     = round(hs_distance, 0),
      sprint_distance                 = round(sprint_distance, 0),
      total_player_load               = round(total_player_load, 0),
      accel_decel_efforts             = round(accel_decel_efforts, 0),
      s_rpe                           = round(s_rpe, 0),
      d_rpe_b                         = round(d_rpe_b, 0),
      d_rpe_l                         = round(d_rpe_l, 0),

      # ---- round to 2 decimals ----
      avg_hr_percent_max              = round(avg_hr_percent_max, 2),
      max_hr_percent_max              = round(max_hr_percent_max, 2)
    )
  
  # view
  head(tl_summary)
  # summarise dRPE time-on-task across all sessions ----
  drpe_tot_summary <- gps_and_drpe %>%
    filter(!period_name %in% c("40m Sprints", "6minDT")) %>%
    select(d_rpe_time_on_task) %>%
    mutate(d_rpe_time_on_task = parse_to_minutes(d_rpe_time_on_task)) %>%
    summarise(
      n    = sum(!is.na(d_rpe_time_on_task)),
      mean = round(mean(d_rpe_time_on_task, na.rm = TRUE), 2),
      sd   = round(sd(d_rpe_time_on_task,   na.rm = TRUE), 2),
      min  = round(min(d_rpe_time_on_task,  na.rm = TRUE), 2),
      max  = round(max(d_rpe_time_on_task,  na.rm = TRUE), 2)
    ) %>%
    mutate(
      mean = fmt_mmss(mean),
      sd   = fmt_mmss(sd),
      min  = fmt_mmss(min),
      max  = fmt_mmss(max)
    )
  
  # view
  head(drpe_tot_summary)

Plot player locomotor profiles

# plot player profiles ----
  # order positional groups & control spacing between bars
  x_order <- c("All", "Goalkeeper", "Defender", "Midfielder", "Forward")
  x_space <- 1.30
  
  # prepare summary (all + positional groups) from original object
  plot_data <- player_profiles_comb %>%
    filter(
      stat %in% c("mean", "sd", "n"),
      (level == "Positional group") | (level == "Overall" & group == "All")
    ) %>%
    select(level, group, stat, mas_ms, asr_ms, mss_ms) %>%
    mutate(
      level = str_trim(as.character(level)),
      group = str_trim(as.character(group))
    ) %>%
    pivot_wider(
      names_from  = stat,
      values_from = c(mas_ms, asr_ms, mss_ms),
      names_sep   = "_"
    ) %>%
    mutate(
      group_plot = case_when(
        level == "Overall" ~ "All",
        level == "Positional group" ~ group
      ),
      group_plot = factor(group_plot, levels = x_order),
      
      x_label = if_else(
        group_plot == "All",
        paste0("All\n(n = ", mas_ms_n, ")"),
        paste0(group_plot, "\n(n = ", mas_ms_n, ")")
      ),
      
      # labels
      mas_lab = if_else(
        group_plot == "Goalkeeper",
        paste0("MAS\n", sprintf("%.2f", mas_ms_mean)),
        paste0("MAS\n", sprintf("%.2f ± %.2f", mas_ms_mean, mas_ms_sd))
      ),
      asr_lab = if_else(
        group_plot == "Goalkeeper",
        paste0("ASR\n", sprintf("%.2f", asr_ms_mean)),
        paste0("ASR\n", sprintf("%.2f ± %.2f", asr_ms_mean, asr_ms_sd))
      ),
      mss_lab = if_else(
        group_plot == "Goalkeeper",
        paste0("MSS\n", sprintf("%.2f", mss_ms_mean)),
        paste0("MSS\n", sprintf("%.2f ± %.2f", mss_ms_mean, mss_ms_sd))
      ),
      
      # numeric x positions with extra spacing
      x_num = as.numeric(group_plot) * x_space
    )
  
  # named x labels (keyed by group_plot)
  x_labs <- setNames(plot_data$x_label, as.character(plot_data$group_plot))
  
  # stacked rectangles for MAS and ASR
  bar_width <- 0.8
  bar_half  <- bar_width / 2
  
  rect_data <- bind_rows(
    plot_data %>%
      transmute(
        group_plot,
        x_num,
        component = factor("MAS", levels = c("MAS", "ASR")),
        ymin = 0,
        ymax = mas_ms_mean
      ),
    plot_data %>%
      transmute(
        group_plot,
        x_num,
        component = factor("ASR", levels = c("MAS", "ASR")),
        ymin = mas_ms_mean,
        ymax = mas_ms_mean + asr_ms_mean
      )
  )
  
  # colours
  speed_cols <- c("MAS" = "#D9D9D9", "ASR" = "#4D4D4D")
  
  # headroom for MSS labels
  max_stack  <- max(plot_data$mas_ms_mean + plot_data$asr_ms_mean, na.rm = TRUE)
  mss_offset <- 0.05 * max_stack
  y_top      <- max_stack + mss_offset + 0.10 * max_stack
  
  x_min <- min(plot_data$x_num - bar_half)
  x_max <- max(plot_data$x_num + bar_half)
  
  # plot
  profiles_plot <- ggplot(rect_data, aes(fill = component)) +
    geom_rect(
      aes(
        xmin = x_num - bar_half,
        xmax = x_num + bar_half,
        ymin = ymin,
        ymax = ymax
      ),
      colour = "black",
      linewidth = 0.3
    ) +
    scale_fill_manual(values = speed_cols) +
    scale_y_continuous(
      limits = c(0, y_top),
      breaks = seq(0, ceiling(y_top), by = 2),
      expand = expansion(mult = c(0, 0))
    ) +
    scale_x_continuous(
      limits = c(x_min, x_max),
      breaks = plot_data$x_num,
      labels = x_labs
    ) +
    labs(
      x = "Positional Group",
      y = expression(bold("Speed (m·s"^-1*")"))
    ) +
    theme_minimal(base_size = 14) +
    theme(
      panel.grid.minor.x = element_blank(),
      legend.position = "none",
      axis.text.y  = element_text(size = 15, face = "plain"),
      axis.text.x  = element_text(size = 15, face = "plain"),
      axis.title.y = element_text(size = 20),
      axis.title.x = element_text(size = 20, face = "bold", margin = margin(t = 12)),
      plot.margin  = margin(5.5, 60, 5.5, 5.5)
    ) +
    
    # MAS label
    geom_text(
      data = plot_data,
      aes(x = x_num, y = mas_ms_mean / 2, label = mas_lab),
      inherit.aes = FALSE,
      size = 5, colour = "black", fontface = "plain"
    ) +
    
    # ASR label
    geom_text(
      data = plot_data,
      aes(x = x_num, y = mas_ms_mean + asr_ms_mean / 2, label = asr_lab),
      inherit.aes = FALSE,
      size = 5, colour = "white", fontface = "plain"
    ) +
    
    # MSS label
    geom_text(
      data = plot_data,
      aes(
        x = x_num,
        y = (mas_ms_mean + asr_ms_mean) + mss_offset,
        label = mss_lab
      ),
      inherit.aes = FALSE,
      size = 5, colour = "black", fontface = "plain"
    ) +
    coord_cartesian(clip = "off")
  
  # view plot
  profiles_plot

Pearson’s Correlations

Transform data for analysis

  # define variables for analysis ----
  # locomotor profile variables
  locomotor_vars <- c("mas_ms", "mss_ms", "asr_ms")
  
  # training load variables
  tl_vars <- c(
    "player_load_per_metre", "s_rpe", "d_rpe_b", 
    "d_rpe_l", "avg_hr_percent_max"
  )
  
  # HR variables
  hr_vars <- c(
    "avg_hr_percent_max"
  )
  
  # organise data ----
  analysis_data <- player_info %>%
    select(
      player_id, position, positional_group, mas_ms, mss_ms, asr_ms
    ) %>%
    right_join(
      gps_and_drpe %>%
        filter(!period_name %in% c("40m Sprints", "6minDT")) %>%
        select(
          player_id, period_name, player_load_per_metre, s_rpe, d_rpe_b, 
          d_rpe_l, avg_hr_percent_max
        ),
      by = "player_id"
    )
  
  # view
  head(analysis_data)
  # transform data_long
  analysis_data_long <-
    analysis_data %>%
    pivot_longer(
      cols = all_of(tl_vars),
      names_to = "tl_variable",
      values_to = "value"
    ) %>%
    # remove values < 50 for HR variables
    filter(!(tl_variable %in% hr_vars & value <= 50))
  
  # view
  head(analysis_data_long)

Calculate correlations between locomotor profiling, and acute response measures within each HIIT session

  # function to run cor.test with locomotor and TL var combo's and tidy results
  run_corr <- function(outcome, var, data) {
    
    # keep only rows for the chosen training-load variable
    data <- data %>%
      dplyr::filter(tl_variable == var)
    
    # outcome = mas_ms / mss_ms / asr_ms (wide columns)
    x <- data[[outcome]]
    
    # training-load variable is stored in `value`
    y <- data[["value"]]
    
    ct <- cor.test(
      x, y,
      use = "pairwise.complete.obs",
      method = "pearson",
      conf.level = 0.95
    )
    
    tibble(
      Outcome  = outcome,
      Variable = var,
      Estimate = unname(ct$estimate),
      Conf.Low = ct$conf.int[1],
      Conf.High = ct$conf.int[2],
      `± 95% CL` = (ct$conf.int[2] - ct$conf.int[1]) / 2,
      DF       = unname(ct$parameter),
      P.Value  = ct$p.value
    )
  }
  
  # run for all variables and combine
  combined_pccs <-
    analysis_data_long %>%
    # keep only TL variables of interest
    dplyr::filter(tl_variable %in% tl_vars) %>%
    dplyr::group_by(period_name) %>%
    dplyr::group_modify(~ {
      # only use TL vars that actually appear in this period_name
      vars_in_group <- intersect(tl_vars, unique(.x$tl_variable))
      
      tidyr::crossing(
        Outcome  = locomotor_vars,
        Variable = vars_in_group
      ) %>%
        purrr::pmap_dfr(\(Outcome, Variable) run_corr(Outcome, Variable, data = .x))
    }) %>%
    dplyr::ungroup() %>%
    dplyr::mutate(
      dplyr::across(dplyr::where(is.numeric), ~ round(.x, 2))
    )
  
  # view and export
  head(combined_pccs)  

Robust Mixed-Effects Models

Fit robust linear mixed-effects models for each combination of locomotor profile variable and acute training response measure

 # reduce tl_vars to vars in analysis_data_long
  tl_vars <- analysis_data_long %>%
    dplyr::distinct(tl_variable) %>%
    dplyr::pull(tl_variable) %>%
    sort()
  
  # run models ----
  # function to run robust mixed-effects model for each tl and locomotor var combo
  run_robust_lmm <- function(tl, x, data = analysis_data_long) {
    
    # subset to one TL variable (e.g., s_rpe, player_load_per_metre)
    df <- data %>% filter(tl_variable == tl)
    
    # model formulas
    formula_full <- as.formula(paste0("value ~ ", x, " * period_name + (1 | player_id)"))
    formula_null <- as.formula("value ~ period_name + (1 | player_id)")
    
    # robust model for estimates / CIs
    model_full_rlmer <- rlmer(formula_full, data = df)
    
    # standard lmer models for R2/ICC + ANOVA
    model_full_lmer <- lmer(formula_full, data = df, REML = FALSE)
    model_null_lmer <- lmer(formula_null, data = df, REML = FALSE)
    
    # fixed effects from robust model
    fixed <- broom.mixed::tidy(
      model_full_rlmer,
      effects = "fixed",
      conf.int = TRUE
    )
    
    # fixed-effect variance–covariance matrix from robust model
    vcov_fixed <- as.matrix(vcov(model_full_rlmer))
    
    # random effects variance
    random_effects <- as.data.frame(VarCorr(model_full_lmer))
    
    # R² and ICC
    r2_vals  <- performance::r2(model_full_lmer)
    icc_vals <- performance::icc(model_full_lmer)
    
    # nested model comparison using lmer
    cmp <- anova(model_null_lmer, model_full_lmer)
    
    tibble(
      tl_variable      = tl,
      locomotor_var     = x,
      fixed_effects    = list(fixed),
      vcov_fixed       = list(vcov_fixed),
      random_effects   = list(random_effects),
      r2               = list(r2_vals),
      icc              = list(icc_vals),
      model_comparison = list(cmp)
    )
  }
  
  # run for all var combinations
  robust_lmm_results <-
    map_df(locomotor_vars, function(x) {
      map_df(tl_vars, function(tl) {
        run_robust_lmm(tl, x, analysis_data_long)
      })
    })
  
  # view
  head(robust_lmm_results)
  # fixed-effects summary (slopes, CIs, p-values) ----
  fe_summary <- robust_lmm_results %>%
    unnest(fixed_effects) %>%
    rename(
      term      = term,
      estimate  = estimate,
      std_error = std.error,
      conf_low  = conf.low,
      conf_high = conf.high
    ) %>%
    select(
      tl_variable,
      locomotor_var,
      effect,
      term,
      estimate,
      std_error,
      statistic,
      conf_low,
      conf_high
    )
  
  # apply rounding
  fe_summary <- fe_summary %>%
    mutate(
      across(
        c(estimate, std_error, conf_low, conf_high),
        ~ dplyr::case_when(
          tl_variable %in% c("s_rpe", "d_rpe_b", "d_rpe_l") ~ round(.x, 0),
          tl_variable %in% c("player_load_per_metre", "avg_hr_percent_max") ~ round(.x, 2),
          TRUE ~ .x
        )
      )
    )
  
  # view
  head(fe_summary)
  # model comparisons ----
  model_comp <- robust_lmm_results %>%
    select(tl_variable, locomotor_var, model_comparison) %>%
    unnest(model_comparison) %>%
    # keep the row corresponding to the comparison (usually the second row)
    group_by(tl_variable, locomotor_var) %>%
    slice_tail(n = 1) %>%
    ungroup()
  
  # apply rounding
  model_comp <- model_comp %>%
    mutate(
      across(any_of(c("AIC","BIC","logLik","-2*log(L)","Chisq")), ~ round(.x, 2)),
      across(any_of(c("Df","npar")), ~ round(.x, 0)),
      across(any_of(c("Pr(>Chisq)")), ~ signif(.x, 3))
    )
  
  # view
  head(model_comp)
  # R² and ICC summary ----
  r2_icc_summary <- robust_lmm_results %>% 
    mutate(
      r2_marginal    = map_dbl(r2, ~ {
        if (is.null(.x)) return(NA_real_)
        val <- .x$R2_marginal
        if (is.null(val) || length(val) == 0) return(NA_real_)
        as.numeric(val[1])
      }),
      r2_conditional = map_dbl(r2, ~ {
        if (is.null(.x)) return(NA_real_)
        val <- .x$R2_conditional
        if (is.null(val) || length(val) == 0) return(NA_real_)
        as.numeric(val[1])
      }),
      icc_value      = map_dbl(icc, ~ {
        if (is.null(.x)) return(NA_real_)
        
        # list-style output
        if (is.list(.x)) {
          if ("ICC" %in% names(.x))            return(as.numeric(.x$ICC[1]))
          if ("ICC_adjusted" %in% names(.x))   return(as.numeric(.x$ICC_adjusted[1]))
          if ("ICC_conditional" %in% names(.x))return(as.numeric(.x$ICC_conditional[1]))
        }
        
        # data.frame / tibble-style output
        if (is.data.frame(.x)) {
          if ("ICC" %in% names(.x))            return(as.numeric(.x$ICC[1]))
          if ("ICC_adjusted" %in% names(.x))   return(as.numeric(.x$ICC_adjusted[1]))
          if ("ICC_conditional" %in% names(.x))return(as.numeric(.x$ICC_conditional[1]))
        }
        
        NA_real_
      })
    ) %>%
    select(
      tl_variable,
      locomotor_var,
      r2_marginal,
      r2_conditional,
      icc_value
    )
  
  # apply rounding
  r2_icc_summary <- r2_icc_summary %>%
    mutate(across(c(r2_marginal, r2_conditional, icc_value), ~ round(.x, 2)))
  
  # view
  head(r2_icc_summary)
  # extract HIIT session-specific slopes ----
  # function to extract slopes
  get_slopes <- function(fixed_df, vcov_mat, locomotor_var, z = 1.96) {
    
    # build linear combination vector "a" for a' beta
    lincomb_ci <- function(terms) {
      # terms = named numeric vector: c("termname1"=1, "termname2"=1, ...)
      coef_names <- colnames(vcov_mat)
      a <- rep(0, length(coef_names))
      names(a) <- coef_names
      
      # check terms exist
      missing_terms <- setdiff(names(terms), coef_names)
      if (length(missing_terms) > 0) {
        stop("These terms were not found in vcov matrix: ", paste(missing_terms, collapse = ", "))
      }
      
      a[names(terms)] <- terms
      
      # estimates from fixed_df
      beta_hat <- fixed_df$estimate
      names(beta_hat) <- fixed_df$term
      
      # ensure all needed betas exist in fixed_df
      missing_betas <- setdiff(names(terms), names(beta_hat))
      if (length(missing_betas) > 0) {
        stop("These terms were not found in fixed effects table: ", paste(missing_betas, collapse = ", "))
      }
      
      est <- sum(beta_hat[names(terms)] * terms)
      
      var_est <- as.numeric(t(a) %*% vcov_mat %*% a)
      se <- sqrt(var_est)
      
      tibble(
        estimate = est,
        se = se,
        conf_low = est - z * se,
        conf_high = est + z * se
      )
    }
    
    # term names used in model output
    term_main <- locomotor_var
    term_SI   <- paste0(locomotor_var, ":period_nameSI-HIIT")
    term_RST  <- paste0(locomotor_var, ":period_nameRST")
    
    li <- lincomb_ci(stats::setNames(1, term_main))
    si <- lincomb_ci(stats::setNames(c(1, 1), c(term_main, term_SI)))
    rs <- lincomb_ci(stats::setNames(c(1, 1), c(term_main, term_RST)))
    
    tibble(
      li_hiit      = li$estimate,
      li_hiit_conf_low  = li$conf_low,
      li_hiit_conf_high = li$conf_high,
      
      si_hiit      = si$estimate,
      si_hiit_conf_low  = si$conf_low,
      si_hiit_conf_high = si$conf_high,
      
      rst          = rs$estimate,
      rst_conf_low      = rs$conf_low,
      rst_conf_high     = rs$conf_high
    )
  }
  
  slopes <- robust_lmm_results %>%
    mutate(
      slopes = pmap(
        list(fixed_effects, vcov_fixed, locomotor_var),
        ~ get_slopes(fixed_df = ..1, vcov_mat = ..2, locomotor_var = ..3)
      )
    ) %>%
    unnest(slopes) %>%
    select(
      tl_variable,
      locomotor_var,
      li_hiit,
      li_hiit_conf_low,
      li_hiit_conf_high,
      si_hiit,
      si_hiit_conf_low,
      si_hiit_conf_high,
      rst,
      rst_conf_low,
      rst_conf_high)
  
  # apply rounding
  slopes <- slopes %>%
    mutate(across(where(is.numeric), ~ round(.x, 2)))
  
  # view
  head(slopes)

Player Preferences

Clean survey data

  # clean & extract 1st-ranked responses ----
  quals_clean <- quals %>%
    select(
      player_id,
      most_difficult =
        please_rank_the_hiit_sessions_in_order_of_perceived_exertion_most_difficult_to_least_difficult,
      most_enjoyable =
        please_rank_the_hiit_sessions_in_order_of_most_to_least_enjoyable,
      self_profile =
        what_physical_profile_do_you_believe_you_are
    ) %>%
    
    # extract FIRST choice only (most difficult / most enjoyable)
    mutate(
      most_difficult = str_trim(str_split(most_difficult, ";", simplify = TRUE)[, 1]),
      most_enjoyable = str_trim(str_split(most_enjoyable, ";", simplify = TRUE)[, 1])
    )
  
  # tidy responses ----
  quals_clean <- quals_clean %>%
    mutate(
      most_difficult = case_when(
        str_detect(most_difficult, "Long") ~ "LI-HIIT",
        str_detect(most_difficult, "Short") ~ "SI-HIIT",
        str_detect(most_difficult, "Repeated") ~ "RST",
        TRUE ~ most_difficult
      ),
      most_enjoyable = case_when(
        str_detect(most_enjoyable, "Long") ~ "LI-HIIT",
        str_detect(most_enjoyable, "Short") ~ "SI-HIIT",
        str_detect(most_enjoyable, "Repeated") ~ "RST",
        TRUE ~ most_enjoyable
      ),
      self_profile = case_when(
        str_detect(self_profile, "Speed") ~ "Speed",
        str_detect(self_profile, "Hybrid") ~ "Hybrid",
        str_detect(self_profile, "Endurance") ~ "Endurance",
        TRUE ~ self_profile
      )
    )
  
  # view
  head(quals_clean)

Calculate proportions

  # proportions + Wilson CI function ----
  prop_wilson <- function(data, var, question_label) {
    data %>%
      count({{ var }}) %>%
      mutate(
        question = question_label,
        n_total  = sum(n),
        prop     = n / n_total
      ) %>%
      bind_cols(
        binom.confint(
          x = .$n,
          n = .$n_total,
          methods = "wilson"
        )[, c("lower", "upper")]
      ) %>%
      rename(response = {{ var }}) %>%
      select(
        question,
        response,
        n,
        n_total,
        prop,
        lower,
        upper
      )
  }
  
  # build combined table ----
  survey_proportions <- bind_rows(
    prop_wilson(
      quals_clean,
      most_difficult,
      "Most difficult HIIT session"
    ),
    prop_wilson(
      quals_clean,
      most_enjoyable,
      "Most enjoyable HIIT session"
    ),
    prop_wilson(
      quals_clean,
      self_profile,
      "Self-perceived locomotor profile"
    )
  ) %>%
    mutate(
      proportion_ci = sprintf(
        "%.0f%% (%.0f to %.0f%%)",
        prop * 100,
        lower * 100,
        upper * 100
      )
    ) %>%
    select(
      question,
      response,
      n,
      n_total,
      proportion_ci
    )
  
  # view
  head(survey_proportions)

Plot survey proportions

  # collapsed-flow alluvial in 1 figure (facetted: difficult vs enjoyable) ----
  # varible orders (bottom -> top stack order)
  profile_levels <- c("Speed", "Hybrid", "Endurance")
  hiit_levels    <- c("RST", "SI-HIIT", "LI-HIIT")
  
  # palettes ----
  profile_cols <- c(
    Endurance = "#2E7D32",
    Hybrid    = "#EF6C00",
    Speed     = "#C62828"
  )
  
  hiit_cols <- c(
    `LI-HIIT` = "#E0F7FA",
    `SI-HIIT` = "#26C6DA",
    RST       = "#006064"
  )
  
  # helpers ----
  first_rank <- function(x) {
    x %>% as.character() %>%
      str_split(";", simplify = TRUE) %>%
      .[, 1] %>% str_squish() %>% na_if("")
  }
  
  recode_hiit <- function(x) {
    x <- str_squish(as.character(x))
    case_when(
      str_detect(str_to_lower(x), "long intervals|6 x 2|2 min") ~ "LI-HIIT",
      str_detect(str_to_lower(x), "short intervals|30 s|8 x 30") ~ "SI-HIIT",
      str_detect(str_to_lower(x), "repeated sprint|out/back|15 m") ~ "RST",
      x %in% hiit_levels ~ x,
      TRUE ~ NA_character_
    )
  }
  
  recode_profile <- function(x) {
    x <- str_squish(as.character(x))
    case_when(
      str_detect(str_to_lower(x), "endurance") ~ "Endurance",
      str_detect(str_to_lower(x), "hybrid")    ~ "Hybrid",
      str_detect(str_to_lower(x), "speed")     ~ "Speed",
      x %in% profile_levels ~ x,
      TRUE ~ NA_character_
    )
  }
  
  # 1) player-level responses ----
  sankey_player <- quals %>%
    transmute(
      player_id = player_id,
      profile   = recode_profile(what_physical_profile_do_you_believe_you_are),
      difficult = recode_hiit(first_rank(
        please_rank_the_hiit_sessions_in_order_of_perceived_exertion_most_difficult_to_least_difficult
      )),
      enjoyable = recode_hiit(first_rank(
        please_rank_the_hiit_sessions_in_order_of_most_to_least_enjoyable
      ))
    ) %>%
    filter(!is.na(profile), !is.na(difficult), !is.na(enjoyable)) %>%
    mutate(
      profile   = factor(profile,   levels = profile_levels),
      difficult = factor(difficult, levels = hiit_levels),
      enjoyable = factor(enjoyable, levels = hiit_levels)
    )
  
  # 2) long form for the two questions (collapsed to profile -> HIIT) ----
  twoq_long <- sankey_player %>%
    select(player_id, profile, difficult, enjoyable) %>%
    pivot_longer(
      cols = c(difficult, enjoyable),
      names_to = "question",
      values_to = "resp"
    ) %>%
    mutate(
      question = recode(
        question,
        difficult = "A. Which HIIT session was most difficult?",
        enjoyable = "B. Which HIIT session was most enjoyable?"
      ),
      resp = factor(resp, levels = hiit_levels)
    )
  
  # 3) collapsed flow counts per facet ----
  sankey_counts_2 <- twoq_long %>%
    count(question, profile, resp, name = "freq")
  
  # 4) stratum totals + manual midpoints per facet (stage 1,2) ----
  stratum_totals_2 <- bind_rows(
    twoq_long %>%
      count(question, stratum = profile, name = "n") %>%
      mutate(stage_num = 1L, stratum = factor(stratum, levels = profile_levels)),
    twoq_long %>%
      count(question, stratum = resp, name = "n") %>%
      mutate(stage_num = 2L, stratum = factor(stratum, levels = hiit_levels))
  ) %>%
    group_by(question, stage_num) %>%
    mutate(
      n_total = sum(n),
      prop    = n / n_total
    ) %>%
    ungroup() %>%
    group_by(question, stage_num) %>%
    group_modify(~{
      ci <- binom.confint(x = .x$n, n = .x$n_total, methods = "wilson")[, c("lower","upper")]
      bind_cols(.x, ci)
    }) %>%
    ungroup() %>%
    mutate(
      label = paste0(
        "n = ", n, ";\n",
        sprintf("%.0f%% (%.0f to %.0f%%)", prop*100, lower*100, upper*100)
      )
    ) %>%
    arrange(question, stage_num, desc(as.numeric(stratum))) %>%
    group_by(question, stage_num) %>%
    mutate(
      ymin = cumsum(n) - n,
      ymax = cumsum(n),
      ymid = (ymin + ymax) / 2
    ) %>%
    ungroup() %>%
    mutate(stage_num = as.numeric(stage_num))
  
  # Keep ALL labels on "most difficult" facet,
  # but on "most enjoyable" facet keep HIIT (stage 2) labels only
  
  # 4b) outside stratum labels (left for profiles, right for HIIT) ----
  profile_outside_labels <- stratum_totals_2 %>%
    filter(stage_num == 1) %>%
    mutate(
      x_lab = 0.75,                    # further LEFT of the first bar
      lab   = as.character(stratum),
      hjust = 1
    )
  
  hiit_outside_labels <- stratum_totals_2 %>%
    filter(stage_num == 2) %>%
    mutate(
      x_lab = 2.25,                    # further RIGHT of the second bar
      lab   = as.character(stratum),
      hjust = 0
    )
  
  # 5) plot (facetted) ----
  survey_alluvial_collapsed <- ggplot(
    sankey_counts_2,
    aes(axis1 = profile, axis2 = resp, y = freq)
  ) +
    # flows (no legend)
    geom_alluvium(
      aes(fill = profile),
      alpha = 0.30,
      width = 0.18,
      colour = NA,
      show.legend = FALSE
    ) +
    scale_fill_manual(values = profile_cols, guide = "none") +
    
    ggnewscale::new_scale_fill() +
    
    # STRATA (no legend)
    geom_stratum(
      aes(fill = after_stat(stratum)),
      width  = 0.45,
      colour = "#37474F",
      linewidth = 0.3,
      show.legend = FALSE
    ) +
    scale_fill_manual(values = c(profile_cols, hiit_cols), guide = "none") +
    
    # midpoint n/% labels inside segments
    geom_label(
      data = stratum_totals_2,
      aes(x = stage_num, y = ymid, label = label),
      inherit.aes = FALSE,
      fill = "white",
      colour = "black",
      label.size = 0.45,
      size = 5.2
    ) +
    
    # outside stratum labels (vertical alignment via ymid; outside via x_lab)
    geom_text(
      data = profile_outside_labels,
      aes(x = x_lab, y = ymid, label = lab),
      inherit.aes = FALSE,
      hjust = 1,
      size = 5,
      fontface = "plain"
    ) +
    geom_text(
      data = hiit_outside_labels,
      aes(x = x_lab, y = ymid, label = lab),
      inherit.aes = FALSE,
      hjust = 0,
      size = 5,
      fontface = "plain"
    ) +
    
    # column headings
    scale_x_discrete(labels = NULL, expand = expansion(add = 0.15)) +
    annotate("text", x = 1, y = Inf, label = "Self-Perceived Locomotor Profile",
             vjust = 2, fontface = "bold", size = 5) +
    annotate("text", x = 2, y = Inf, label = "HIIT Session",
             vjust = 2, fontface = "bold", size = 5) +
    
    # allow outside labels to render
    coord_cartesian(xlim = c(0.55, 2.45), clip = "off") +
    
    facet_wrap(~question, ncol = 2) +
    
    theme(
      panel.background = element_blank(),
      axis.text = element_blank(),
      axis.title = element_blank(),
      axis.ticks = element_blank(),
      legend.position = "none",                 # <- remove legend
      strip.text = element_text(size = 13, face = "bold"),
      # extra left/right margin to fit outside labels
      plot.margin  = margin(40, 60, 10, 60)
    )
  
  # view
  survey_alluvial_collapsed

RStudio Version, R Version, Environment and Package Versions

Posit team (2026). RStudio: Integrated Development Environment for R. Posit Software, PBC, Boston, MA. URL http://www.posit.co/.

$mode
[1] "desktop"

$version
[1] ‘2026.1.2.418’

$long_version
[1] "2026.01.2+418"

$release_name
[1] "Apple Blossom"

R version 4.5.3 (2026-03-11)
Platform: aarch64-apple-darwin20
Running under: macOS Tahoe 26.3.1

Matrix products: default
BLAS:   /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib 
LAPACK: /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.1

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

time zone: Europe/London
tzcode source: internal

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] grateful_0.3.0      ggnewscale_0.5.2    ggalluvial_0.12.6   scales_1.4.0        binom_1.1-1.1       performance_0.16.0  broom.mixed_0.2.9.7 robustlmm_3.4-2     lme4_2.0-1          Matrix_1.7-5       
[11] viridis_0.6.5       viridisLite_0.4.3   broom_1.0.12        patchwork_1.3.2     writexl_1.5.4       lubridate_1.9.5     forcats_1.0.1       stringr_1.6.0       dplyr_1.2.0         purrr_1.2.1        
[21] readr_2.2.0         tidyr_1.3.2         tibble_3.3.1        ggplot2_4.0.2       tidyverse_2.0.0     janitor_2.2.1       readxl_1.4.5       

loaded via a namespace (and not attached):
 [1] tidyselect_1.2.1   farver_2.1.2       S7_0.2.1           fastmap_1.2.0      digest_0.6.39      timechange_0.4.0   estimability_1.5.1 lifecycle_1.0.5    fastGHQuad_1.0.1   magrittr_2.0.4     compiler_4.5.3    
[12] rlang_1.1.7        tools_4.5.3        yaml_2.3.12        knitr_1.51         labeling_0.4.3     bit_4.6.0          RColorBrewer_1.1-3 rsconnect_1.7.0    withr_3.0.2        grid_4.5.3         xtable_1.8-8      
[23] future_1.70.0      emmeans_2.0.2      globals_0.19.1     MASS_7.3-65        insight_1.4.6      cli_3.6.5          mvtnorm_1.3-6      rmarkdown_2.31     crayon_1.5.3       ragg_1.5.2         reformulas_0.4.4  
[34] generics_0.1.4     otel_0.2.0         rstudioapi_0.18.0  robustbase_0.99-7  tzdb_0.5.0         minqa_1.2.8        splines_4.5.3      parallel_4.5.3     cellranger_1.1.0   vctrs_0.7.2        boot_1.3-32       
[45] hms_1.1.4          bit64_4.6.0-1      listenv_0.10.1     systemfonts_1.3.2  glue_1.8.0         parallelly_1.46.1  nloptr_2.2.1       DEoptimR_1.1-4     codetools_0.2-20   stringi_1.8.7      gtable_0.3.6      
[56] pillar_1.11.1      furrr_0.3.1        htmltools_0.5.9    R6_2.6.1           textshaping_1.0.5  Rdpack_2.6.6       evaluate_1.0.5     vroom_1.7.0        lattice_0.22-9     rbibutils_2.4.1    backports_1.5.0   
[67] snakecase_0.11.1   renv_1.2.0         Rcpp_1.1.1         coda_0.19-4.1      gridExtra_2.3      nlme_3.1-168       xfun_0.57          pkgconfig_2.0.3