Pacing Strategies and Variability During Time- vs Distance-Based Trials for Estimating Maximal Aerobic Speed in Academy Soccer Players

Example R code used for statistical analyses and data visualisations

Authors

Kieran Smith

Tzlil Shushan

Matthew D. Wright

Paul Chesterton

Alastair Hamilton

Jonathan M. Taylor

Published

July 7, 2025

Data & Code

The data supporting this study’s findings are available upon request or openly via OSF repository at: https://osf.io/k86je

The code used to analyse the data used in this study is available upon request or openly via OSF repository at: https://osf.io/wm7s4

The code blocks below use code-link. All packages and syntax are hyperlinked to their official documentation. Click on package names or functions throughout the code to learn more.

Packages

The following packages were used

library(readxl) # read excel file
library(tidyverse) # install and load core packages from the tidyverse
library(writexl) # write excel files/sheets
library(roll) # calculate rolling statistics for time-series data
library(lme4) # fitting and analyzing mixed models
library(lspline) # manually set knots within a model to create linear splines 
library(patchwork) # arange multi-panel plots
library(grateful) # citation of R packages

Load Data

Load data into tidy format

  participant_info <- read_excel("data.xlsx", sheet = "participant_info")
  enviro_cond <- read_excel("data.xlsx", sheet = "enviro_cond")
  data <- read_excel("data.xlsx", sheet = "gps_raw_data")

  # view participant_info df
  head(participant_info)
  # view enviro_cond df
  head(enviro_cond)
  # view data df
  head(data)

Descriptive Statistics

Calculate descriptive statistics for: participant characteristics, player positions, environmental conditions, GPS quality, MAS and within-player differences between tests for Total Distance

  # participant characteristics -----
  # function to calculate participant summary stats
  summary_stats <- function(data, group_vars) {
    participant_info %>%
      pivot_longer(
        cols = c(age_years, stature_cm, body_mass_kg),
        names_to = "variable",
        values_to = "value"
      ) %>%
      group_by(across(all_of(group_vars)), variable) %>%
      summarise(
        n = round(n(), 1),
        mean = round(mean(value, na.rm = TRUE), 1),
        sd = round(sd(value, na.rm = TRUE), 1),
        min = round(min(value, na.rm = TRUE), 1),
        max = round(max(value, na.rm = TRUE), 1),
        .groups = "drop"
      ) %>%
      mutate(variable = factor(
        variable,
        levels = c("age_years", "stature_cm", "body_mass_kg")
      )) %>%
      arrange(across(all_of(group_vars)), variable)
  }
  
  # overall summary (without squad grouping)
  overall_characteristics <- participant_info %>%
    summary_stats(group_vars = character(0)) %>%
    mutate(squad = "All")
  
  # squad-level summary
  squad_characteristics <- participant_info %>%
    summary_stats(group_vars = "squad")
  
  # combine summaries
  parcharacteristics <- bind_rows(overall_characteristics, squad_characteristics) %>%
    select(variable, squad, n, mean, sd, min, max)
  
  # view
  head(parcharacteristics)
  # count of player positions -----
  position_counts <- participant_info %>%
    count(position, name = "count")
  
  # view
  head(position_counts)
  # environmental conditions ----
  enviro_summary <- enviro_cond %>%
    select(-player_id, -test, -wind_direction) %>%  # exclude player_id to avoid type mismatch
    mutate(wind_speed_km_hr = wind_speed_mph * 1.60934) %>%  # convert wind speed from mph to kmh
    select(-wind_speed_mph) %>%  # drop original mph column
    pivot_longer(cols = everything(),
                 names_to = "variable",
                 values_to = "value") %>%
    group_by(variable) %>%
    summarise(
      n = n(),
      mean = round(mean(value, na.rm = TRUE), 1),
      sd = round(sd(value, na.rm = TRUE), 1),
      min = round(min(value, na.rm = TRUE), 1),
      max = round(max(value, na.rm = TRUE), 1),
      .groups = "drop"
    ) %>%
    mutate(variable = factor(
      variable,
      levels = c(
        "temp_c",
        "humidity_percent",
        "pressure_mbar",
        "wind_speed_km_hr"
      )
    )) %>%
    arrange(variable)
  
  # view
  head(enviro_summary)
  # gps quality ----
  gps_quality <- data %>%
    select(`Positional Quality (%)`, HDOP, `#Sats`) %>%
    pivot_longer(cols = everything(),
                 names_to = "variable",
                 values_to = "value") %>%
    mutate(variable = factor(variable, levels = c("Positional Quality (%)", "HDOP", "#Sats"))) %>%
    group_by(variable) %>%
    summarise(
      mean = round(mean(value, na.rm = TRUE), 2),
      sd = round(sd(value, na.rm = TRUE), 2),
      min = round(min(value, na.rm = TRUE), 2),
      max = round(max(value, na.rm = TRUE), 2)
    )
  
  # view
  head(gps_quality)
  # mas ----
  data <- data %>%
    group_by(player_id, test) %>%
    mutate(
      total_dist = max(Odometer),
      # total distance as captured by the gps
      time = max(Seconds),
      # time in seconds
      velocity_mean_roll = roll_mean(Velocity, 10)
    )  %>% # mas from gps output (instantaneous rolling mean velocity)
    ungroup() %>%
    glimpse()
Rows: 209,958
Columns: 17
$ player_id                <chr> "player_14", "player_14", "player_14", "playe…
$ test                     <chr> "1800mTT", "1800mTT", "1800mTT", "1800mTT", "…
$ Timestamp                <dttm> 2024-08-09 10:56:37, 2024-08-09 10:56:37, 20…
$ Seconds                  <dbl> 0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, …
$ Velocity                 <dbl> 3.56, 4.94, 6.26, 7.33, 8.18, 9.50, 10.70, 11…
$ Acceleration             <dbl> 1.9098673, 2.3974025, 2.7080548, 2.7785430, 2…
$ Odometer                 <dbl> 0.00, 0.26, 0.57, 0.88, 1.17, 1.60, 2.00, 2.3…
$ Latitude                 <dbl> 54.03289, 54.03289, 54.03289, 54.03289, 54.03…
$ Longitude                <dbl> -1.103038, -1.103041, -1.103045, -1.103050, -…
$ `Heart Rate`             <dbl> 105, 105, 105, 105, 105, 105, 105, 105, 105, …
$ `Player Load`            <dbl> 0.0, 0.0, 0.1, 0.1, 0.1, 0.2, 0.3, 0.3, 0.3, …
$ `Positional Quality (%)` <dbl> 71.5, 70.8, 71.2, 73.7, 74.1, 72.5, 73.4, 72.…
$ HDOP                     <dbl> 0.69, 0.69, 0.69, 0.69, 0.69, 0.69, 0.69, 0.6…
$ `#Sats`                  <dbl> 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 1…
$ total_dist               <dbl> 1764.78, 1764.78, 1764.78, 1764.78, 1764.78, …
$ time                     <dbl> 425.9, 425.9, 425.9, 425.9, 425.9, 425.9, 425…
$ velocity_mean_roll       <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 8.731, 9.…
  data_summary <- data %>%
    group_by(player_id, test) %>% # group for player id and test
    summarise(
      total_dist = round(first(total_dist), 0),
      # total distance
      velocity_mean_roll = round(mean(velocity_mean_roll, na.rm = T), 2)
    )  # mas from gps output (instantaneous rolling mean velocity)
  
  mas_summary <- data_summary %>%
    ungroup() %>%  # ensure no group_by is active
    summarise(
      variable = "mas",
      mean = round(mean(velocity_mean_roll, na.rm = TRUE), 2),
      sd   = round(sd(velocity_mean_roll, na.rm = TRUE), 2),
      min  = round(min(velocity_mean_roll, na.rm = TRUE), 2),
      max  = round(max(velocity_mean_roll, na.rm = TRUE), 2)
    )
  
  # view
  head(mas_summary)
  # total distance within-player differences between tests ----
  td_diff_summary <- data_summary %>%
    select(player_id, test, total_dist) %>%
    pivot_wider(names_from = test, values_from = total_dist) %>%
    mutate(diff = `1800mTT` - `6minDT`) %>%
    ungroup() %>%  # again, make sure it's not grouped
    summarise(
      variable = "total_dist",
      mean = round(mean(diff, na.rm = TRUE), 0),
      sd   = round(sd(diff, na.rm = TRUE), 0),
      min  = round(min(diff, na.rm = TRUE), 0),
      max  = round(max(diff, na.rm = TRUE), 0)
    )
  
  # view
  head(td_diff_summary)

Percentage Completion

Calculate 10% completion stages, summarise velocity for each 10% interval and identify peak mean velocity for each test

  # calculate 10% completion stages for each player's tests
  data <- data %>%
    group_by(player_id, test) %>%
    mutate(
      # create a new column to capture the percentage completion groups
      test_duration = max(Seconds),
      # total duration of test
      percent_completion = (Seconds / test_duration) * 100,
      # percent completion calculation
      interval_group = cut(
        percent_completion,
        breaks = seq(0, 100, by = 10),
        # breaks for exact intervals
        labels = paste0(seq(10, 100, by = 10), "%"),
        # match labels to breaks
        right = FALSE,
        # include lower bound in the interval
        include.lowest = TRUE  # ensure 0.0 seconds is included in the first interval
      )
    ) %>%
    ungroup()
  
  # summarise mean and sd of velocity for each 10% interval group for all tests
  df_velocity_summary_group <- data %>%
    group_by(test, interval_group) %>%
    summarize(
      mean_velocity = round(mean(velocity_mean_roll, na.rm = TRUE), 2),
      sd_velocity = round(sd(velocity_mean_roll, na.rm = TRUE), 2),
      .groups = "drop"
    )
  
  # identify peak mean velocity for each test and mutate peak_velocity_stage col
  df_velocity_summary_group <- df_velocity_summary_group %>%
    group_by(test) %>%
    mutate(peak_velocity_stage = if_else(
      mean_velocity == max(mean_velocity, na.rm = TRUE),
      paste0(as.character(interval_group), " stage"),
      NA_character_
    )) %>%
    ungroup()
  
  # view
  head(df_velocity_summary_group)

Visualise Pacing Performance

Convert velocity summary for plotting

  # summarise mean velocity for each 10% interval group by player
  df_velocity_summary <- data %>%
    group_by(player_id, test, interval_group) %>%
    summarize(mean_velocity = mean(velocity_mean_roll, na.rm = TRUE),
              .groups = "drop")
  
  # convert interval_group col to numeric for plotting
  df_velocity_summary <- df_velocity_summary %>%
    mutate(interval_group_numeric = as.numeric(gsub("%", "", interval_group)))
  
  # view
  head(df_velocity_summary)

Visualise group-level pacing profiles by test

  # group-level pacing profiles by test ----
  df_velocity_summary_group %>%
    ggplot(aes(x = interval_group, y = mean_velocity)) +
    # individual player data (light points)
    geom_point(
      data = df_velocity_summary,
      aes(x = interval_group, y = mean_velocity, group = player_id),
      shape = 21,
      size = 2,
      colour = "grey50",
      fill = "grey80",
      alpha = 0.3
    ) +
    # group-level mean data (dark points)
    geom_point(
      shape = 21,
      size = 3.2,
      colour = "grey25",
      fill = "#6CA0DC"
    ) +
    # loess smoothed curve (group average)
    geom_smooth(
      aes(group = 1),
      method = "loess",
      se = FALSE,
      colour = "#e35a5a",
      size = 0.6,
      linetype = "solid"
    ) +
    # labels for profiles
    geom_text(
      aes(
        x = 5.5,
        y = 19,
        label = case_when(
          test == "1800mTT" ~ "U-Shape Profile",
          test == "6minDT" ~ "Reverse J-Shape Profile",
          TRUE ~ ""
        )
      ),
      vjust = 0,
      size = 6,
      colour = "grey25",
      fontface = "bold"
    ) +
    # mean velocity labels @ top
    geom_text(
      aes(
        x = interval_group,
        y = 21.5,
        label = round(mean_velocity, 1)
      ),
      vjust = 0,
      size = 6,
      colour = "grey25",
      fontface = "bold"
    ) +
    # facet by test
    facet_wrap( ~ test, ncol = 4) +
    # theme and labels
    theme_classic() +
    labs(x = "Interval (%)",
         y = expression(bold("Mean Velocity (km·h"^-1 * ")")),
         title = "") +
    scale_y_continuous(limits = c(10, 22), breaks = seq(12, 20, 1)) +
    theme(
      axis.text.x = element_text(angle = 45, hjust = 1),
      axis.title.x = element_text(size = 20, face = "bold"),
      axis.title.y = element_text(size = 20, face = "bold"),
      axis.text = element_text(size = 16),
      strip.background = element_rect(fill = "grey80"),
      strip.text = element_text(size = 16, face = "bold")
    )

Visualise individual pacing profiles by player and test

  # individual pacing profiles by player and test ----
  df_velocity_summary %>%
    filter(test %in% c("1800mTT", "6minDT")) %>%
    ggplot(aes(
      x = interval_group,
      y = mean_velocity,
      group = interaction(player_id, test),
      color = test
    )) +
    # loess smoothed curve (individual trends)
    geom_smooth(
      aes(group = interaction(player_id, test), color = test),
      method = "loess",
      se = FALSE,
      size = 0.6,
      linetype = "solid"
    ) +
    # adjust y-axis limits & breaks
    scale_y_continuous(limits = c(10, 21), breaks = seq(12, 20, 1)) +
    # mean velocity labels & position
    geom_text(
      aes(
        x = interval_group,
        y = ifelse(test == "1800mTT", 11, 10),
        # Set 1800mTT and 6minDT label position
        label = round(mean_velocity, 1),
        color = test
      ),
      vjust = 0.5,
      size = 5,
      fontface = "bold"
    ) +
    # facet by player_id
    facet_wrap( ~ player_id, ncol = 4) +
    # theme & labels
    theme_classic() +
    labs(x = "Interval (%)",
         y = expression(bold("Mean Velocity (km·h"^-1 * ")")),
         color = "Test") +
    scale_color_manual(values = c("1800mTT" = "#1b9e77", "6minDT" = "#d95f02")) +  # Custom colors for differentiation
    theme(
      axis.text.x = element_text(
        angle = 45,
        hjust = 1,
        size = 16,
        face = "plain"
      ),
      axis.text.y = element_text(size = 16, face = "plain"),
      axis.title.x = element_text(size = 20, face = "bold"),
      axis.title.y = element_text(size = 20, face = "bold"),
      legend.text = element_text(size = 16, face = "plain"),
      legend.title = element_text(size = 16, face = "bold"),
      strip.background = element_rect(fill = "grey80"),
      strip.text = element_text(size = 16, face = "bold"),
      legend.position = "bottom",
      legend.background = element_rect(fill = "gray95", colour = "gray95")
    )

Re-Scale Dataset For 10% Change

Re-scale dataset for 10% change, round seconds for each 10 Hz data row and create clean dataset (for analysis & modelling use)

  # add column for 10% completion
  data <- data %>%
    mutate(Perc_stan = percent_completion / 10)
  
  # add column with seconds rounded for each 10 Hz data row
  data <- data %>%
    group_by(player_id, test) %>%
    mutate(
      Seconds_rounded = ceiling(Seconds),
      velocity_mean_roll_clean = ave(velocity_mean_roll, Seconds_rounded, FUN = mean)
    ) %>%
    ungroup()
  
  # create clean dataset (for analysis & modelling use)
  data_clean <- data %>%
    group_by(player_id, test, Seconds_rounded) %>%
    slice_tail(n = 1) %>%
    ungroup()
  
  # view
  head(data_clean)

Summarise Velocity in Each Interval by Test and Overall

Summarise velocity in each interval by test and phase & overall for each phase

  # by test and phase
  desc_summary <- data_clean %>%
    mutate(
      Phase = case_when(
        percent_completion >= 0 & percent_completion <= 30 ~ "0–30%",
        percent_completion >= 31 & percent_completion <= 80 ~ "30–80%",
        percent_completion >= 81 & percent_completion <= 100 ~ "80–100%",
        TRUE ~ NA_character_
      )
    )
  
  desc_summary <- desc_summary %>%
    filter(Phase != "NA") %>%
    group_by(test, Phase) %>%
    summarise(mean = round(mean(velocity_mean_roll_clean, na.rm = T), 2), sd = round(sd(velocity_mean_roll_clean, na.rm = T), 2)) %>% glimpse()
Rows: 6
Columns: 4
Groups: test [2]
$ test  <chr> "1800mTT", "1800mTT", "1800mTT", "6minDT", "6minDT", "6minDT"
$ Phase <chr> "0–30%", "30–80%", "80–100%", "0–30%", "30–80%", "80–100%"
$ mean  <dbl> 16.42, 14.99, 16.05, 16.88, 15.30, 15.37
$ sd    <dbl> 3.09, 1.33, 1.91, 2.40, 1.36, 1.74
  # overall for each phase
  overall_summary <- desc_summary %>%
    group_by(Phase) %>%
    summarise(test = "Overall",
              mean = round(mean(mean, na.rm = TRUE), 2),
              sd = round(mean(sd, na.rm = TRUE), 2)) # Taking the mean of SDs as an approximation
  
  # combine by test summary with overall summary
  desc_summary <- bind_rows(desc_summary, overall_summary)
  
  # view
  head(desc_summary)

Spline Linear Mixed-Effects Model

Spline linear mixed-effects model

  # position knots @ 30% and 80%
  model_spline <- lmer(
    velocity_mean_roll_clean ~ lspline(Perc_stan, c(3, 8)) * test +
      (0 + lspline(Perc_stan, c(3, 8)) |
         player_id:test),
    data = data_clean,
    control = lmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 2e5))
  ) # bound optimization by quadratic approximation optimiser & increase iterations
  
  # model summary
  summary(model_spline)
Linear mixed model fit by REML ['lmerMod']
Formula: velocity_mean_roll_clean ~ lspline(Perc_stan, c(3, 8)) * test +  
    (0 + lspline(Perc_stan, c(3, 8)) | player_id:test)
   Data: data_clean
Control: lmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 2e+05))

REML criterion at convergence: 80608

Scaled residuals: 
     Min       1Q   Median       3Q      Max 
-11.2348  -0.3593   0.0213   0.3967   4.6000 

Random effects:
 Groups         Name                         Variance Std.Dev. Corr       
 player_id:test lspline(Perc_stan, c(3, 8))1 0.13318  0.3649              
                lspline(Perc_stan, c(3, 8))2 0.04299  0.2073   -0.42      
                lspline(Perc_stan, c(3, 8))3 0.49150  0.7011   -0.12 -0.28
 Residual                                    2.67685  1.6361              
Number of obs: 20941, groups:  player_id:test, 54

Fixed effects:
                                        Estimate Std. Error t value
(Intercept)                             17.93244    0.05261 340.873
lspline(Perc_stan, c(3, 8))1            -0.95434    0.07415 -12.871
lspline(Perc_stan, c(3, 8))2            -0.01759    0.04151  -0.424
lspline(Perc_stan, c(3, 8))3             1.05452    0.14064   7.498
test6minDT                               0.49795    0.07745   6.429
lspline(Perc_stan, c(3, 8))1:test6minDT -0.04585    0.10530  -0.435
lspline(Perc_stan, c(3, 8))2:test6minDT -0.04748    0.05888  -0.806
lspline(Perc_stan, c(3, 8))3:test6minDT -0.79906    0.19951  -4.005

Correlation of Fixed Effects:
               (Intr) ls(P_,c(3,8))1 ls(P_,c(3,8))2 ls(P_,c(3,8))3 tst6DT
ls(P_,c(3,8))1 -0.287                                                    
ls(P_,c(3,8))2  0.075 -0.437                                             
ls(P_,c(3,8))3 -0.023 -0.095         -0.301                              
test6minDT     -0.679  0.195         -0.051          0.015               
l(P_,c(3,8))1:  0.202 -0.704          0.308          0.067         -0.297
l(P_,c(3,8))2: -0.053  0.308         -0.705          0.212          0.078
l(P_,c(3,8))3:  0.016  0.067          0.212         -0.705         -0.024
               l(P_,c(3,8))1: l(P_,c(3,8))2:
ls(P_,c(3,8))1                              
ls(P_,c(3,8))2                              
ls(P_,c(3,8))3                              
test6minDT                                  
l(P_,c(3,8))1:                              
l(P_,c(3,8))2: -0.438                       
l(P_,c(3,8))3: -0.094         -0.303        

Calculate fixed-effect estimates and confidence intervals

  # fixed effect values
  fe_values <- fixef(model_spline)
  
  # create data frame with all estimates
  fe_values_estimates <- data.frame(
    "Test" = rep(c("1800mTT", "6minDT"), 4),
    "Estimate" = c(
      fe_values[1],
      fe_values[1] + fe_values[5],
      fe_values[2],
      fe_values[2] + fe_values[6],
      fe_values[3],
      fe_values[3] + fe_values[7],
      fe_values[4],
      fe_values[4] + fe_values[8]
    ),
    "Interval" = c(
      "Start",
      "Start",
      "0–30%",
      "0–30%",
      "30–80%",
      "30–80%",
      "80–100%",
      "80–100%"
    )
  )
  
  
  # ci's for every slope within test ----
  fe_cis <- confint(
    model_spline,
    parm = c(
      "(Intercept)",
      "lspline(Perc_stan, c(3, 8))1",
      "lspline(Perc_stan, c(3, 8))2",
      "lspline(Perc_stan, c(3, 8))3",
      "test6minDT",
      "lspline(Perc_stan, c(3, 8))1:test6minDT",
      "lspline(Perc_stan, c(3, 8))2:test6minDT",
      "lspline(Perc_stan, c(3, 8))3:test6minDT"
    )
  )
  
  # create data frame with ci's
  fe_cis_values <- data.frame(
    "Lower_CI" = c(
      fe_cis[1],
      fe_cis[1] + fe_cis[5],
      # Create data frame with all estimates
      fe_cis[2],
      fe_cis[2] + fe_cis[6],
      fe_cis[3],
      fe_cis[3] + fe_cis[7],
      fe_cis[4],
      fe_cis[4] + fe_cis[8]
    ),
    "Upper_CI" = c(
      fe_cis[9],
      fe_cis[9] + fe_cis[13],
      fe_cis[10],
      fe_cis[10] + fe_cis[14],
      fe_cis[11],
      fe_cis[11] + fe_cis[15],
      fe_cis[12],
      fe_cis[12] + fe_cis[16]
    )
  )
  
  # merge data sets into one df
  fe_values_overall <- cbind(fe_values_estimates, fe_cis_values)
  fe_values_overall <- fe_values_overall %>%
    mutate(across(where(is.numeric), ~ round(.x, 2)))
  
  # view
  head(fe_values_overall)

Visualise spline linear mixed-effects model (estimated starting velocity [intercept] and rate of change in velocity [slope])

  # custom colour palette
  custom_colors <- c("1800mTT" = "#1b9e77", "6minDT" = "#d95f02")  # Teal & Orange
  
  
  # fixed-effect estimates scatter plot ----
  # estimated starting velocity (intercept)
  p_fe_int <- fe_values_overall %>%
    filter(Interval == "Start") %>%
    ggplot(aes(x = Interval, y = Estimate, fill = Test)) +
    geom_errorbar(
      aes(ymin = Lower_CI, ymax = Upper_CI),
      colour = "grey40",
      width = 0.05,
      position = position_dodge(width = 0.25)
    ) +
    geom_point(
      position = position_dodge(width = 0.25),
      size = 3.2,
      shape = 21,
      stroke = .35
    ) +
    scale_fill_manual(values = custom_colors) +
    scale_y_continuous(limits = c(17.25, 18.75),
                       breaks = seq(17.5, 18.5, .5)) +
    labs(
      subtitle = "A",
      x = "",
      y = expression(bold("Mean Velocity (km·h"^-1 * ")")),
      fill = "Test"
    ) +
    theme_minimal() +
    theme(
      axis.text.x = element_text(size = 16, face = "plain"),
      axis.text.y = element_text(size = 16, face = "plain"),
      axis.title.x = element_text(size = 20, face = "bold"),
      axis.title.y = element_text(size = 20, face = "bold"),
      legend.text = element_text(size = 16, face = "plain"),
      legend.title = element_text(size = 16, face = "bold"),
      axis.ticks.x = element_blank(),
      axis.line.x.bottom = element_line(size = .25, colour = "grey25"),
      axis.line.y.left = element_line(size = .25, colour = "grey25"),
      panel.grid.major.x = element_blank(),
      panel.grid.major.y = element_line(linetype = "dashed", colour = "grey90"),
      panel.grid.minor.y = element_blank(),
      plot.subtitle = element_text(size = 30, face = "bold"),
      panel.border = element_blank(),
      panel.background = element_blank(),
      legend.position = "bottom",
      legend.background = element_rect(fill = "gray95", colour = "gray95")
    )
  
  # rate of change in velocity (slope)
  p_fe_slope <- fe_values_overall %>%
    filter(Interval != "Start") %>%
    ggplot(aes(x = Interval, y = Estimate, fill = Test)) +
    geom_hline(yintercept = 0,
               linetype = "dashed",
               color = "black") +
    geom_errorbar(
      aes(ymin = Lower_CI, ymax = Upper_CI),
      colour = "grey40",
      width = 0.1,
      position = position_dodge(width = 0.25)
    ) +
    geom_point(
      position = position_dodge(width = 0.25),
      size = 3.2,
      shape = 21,
      stroke = .35
    ) +
    scale_fill_manual(values = custom_colors) +
    labs(
      subtitle = "B",
      x = "Phase",
      y = expression(bold("Rate of Change in Velocity (km·h"^-1 * ")")),
      fill = "Test"
    ) +
    theme_minimal() +
    theme(
      axis.text.x = element_text(size = 16, face = "plain"),
      axis.text.y = element_text(size = 16, face = "plain"),
      axis.title.x = element_text(size = 20, face = "bold"),
      axis.title.y = element_text(size = 20, face = "bold"),
      legend.text = element_text(size = 16, face = "plain"),
      legend.title = element_text(size = 16, face = "bold"),
      axis.ticks.x = element_blank(),
      axis.line.x.bottom = element_line(size = .25, colour = "grey25"),
      axis.line.y.left = element_line(size = .25, colour = "grey25"),
      panel.grid.major.x = element_blank(),
      panel.grid.major.y = element_line(linetype = "dashed", colour = "grey90"),
      panel.grid.minor.y = element_blank(),
      plot.subtitle = element_text(size = 30, face = "bold"),
      panel.border = element_blank(),
      panel.background = element_blank(),
      legend.position = "bottom",
      legend.background = element_rect(fill = "gray95", colour = "gray95")
    )
  
  # arrange into 2 panel plot
  combined_plot <- p_fe_int / p_fe_slope +
    plot_layout(ncol = 1, guides = "collect") &
    theme(legend.position = "bottom")
  
  # print plot
  combined_plot

Within & Between-Athlete Variability

Extract random-slope estimates and convert for plotting

  # extract random slopes
  random_slopes <- as.data.frame(ranef(model_spline)$`player_id:test`)
  
  random_slopes <- random_slopes %>%
    mutate(
      player_id_test = rownames(random_slopes),
      player_id = sub(":.*", "", player_id_test),
      Test = sub(".*:", "", player_id_test)
    ) %>% glimpse()
Rows: 54
Columns: 6
$ `lspline(Perc_stan, c(3, 8))1` <dbl> 0.229285644, 0.353964071, 0.230131312, …
$ `lspline(Perc_stan, c(3, 8))2` <dbl> 0.058269903, -0.081269252, -0.017329422…
$ `lspline(Perc_stan, c(3, 8))3` <dbl> -0.42615749, 0.16454158, -0.08454698, -…
$ player_id_test                 <chr> "player_01:1800mTT", "player_01:6minDT"…
$ player_id                      <chr> "player_01", "player_01", "player_02", …
$ Test                           <chr> "1800mTT", "6minDT", "1800mTT", "6minDT…
  # convert to long format for plotting
  random_slopes_long <- pivot_longer(random_slopes, cols = starts_with("lspline")) %>%
    mutate(
      phase = case_when(
        name == "lspline(Perc_stan, c(3, 8))1" ~ "0–30%",
        name == "lspline(Perc_stan, c(3, 8))2" ~ "30–80%",
        name == "lspline(Perc_stan, c(3, 8))3" ~ "80–100%",
        TRUE ~ NA_character_
      )
    )
  
  # view
  head(random_slopes_long)

Forest plot of random-slope estimates (individual-level deviations from the group mean across all phases and tests)

  # random-slope estimates forest plot ----
  ggplot(random_slopes_long, aes(x = value, y = player_id, fill = Test)) +
    geom_vline(xintercept = 0,
               linetype = "solid",
               color = "black") +  # Reference line at 0
    geom_point(
      shape = 21,
      alpha = .8,
      size = 3.2,
      colour = "black"
    ) +  # Show estimated slopes
    scale_fill_manual(values = c("#1b9e77", "#d95f02")) +  # Custom fill colours for test conditions
    labs(
      subtitle = "",
      y = "Player",
      x = expression(bold("Mean Velocity Change (km·h"^-1 * ")")),
      fill = "Test"
    ) +
    facet_wrap( ~ phase) +
    xlim(-2, 2) +
    theme_minimal() +
    theme(
      axis.text.x = element_text(size = 16, face = "plain"),
      axis.text.y = element_blank(),
      axis.title.x = element_text(size = 20, face = "bold"),
      axis.title.y = element_text(size = 20, face = "bold"),
      legend.text = element_text(size = 16, face = "plain"),
      legend.title = element_text(size = 16, face = "bold"),
      axis.ticks.x = element_blank(),
      axis.line.x.bottom = element_line(size = .25, colour = "grey25"),
      axis.line.y.left = element_line(size = .25, colour = "grey25"),
      panel.grid.major.x = element_blank(),
      panel.grid.major.y = element_line(linetype = "dashed", colour = "grey90"),
      panel.grid.minor.y = element_blank(),
      plot.subtitle = element_text(face = "bold"),
      panel.border = element_blank(),
      panel.background = element_blank(),
      strip.background = element_rect(fill = "grey80"),
      strip.text = element_text(size = 16, face = "bold"),
      legend.position = "bottom",
      legend.background = element_rect(fill = "gray95", colour = "gray95")
    )

Extract between-athlete estimate SD for each spline interval and within test

  # extract between-athlete estimate SD for each spline interval and within test
  between_athlete_sd <- random_slopes_long %>%
    group_by(Test, phase) %>%
    summarise(sd_between = sd(value)) %>%
    mutate(across(where(is.numeric), ~ round(.x, 2)))
  
  between_athlete_sd
  # view
  head(between_athlete_sd)

Extract within-athlete estimate SD for each spline interval across tests

  # extract within-athlete estimate SD for each spline interval across tests
  within_athlete_diff <- random_slopes_long %>%
    group_by(player_id, phase) %>%
    summarise(diff = diff(value))
  
  within_athlete_sd <- within_athlete_diff %>%
    group_by(phase) %>%
    summarise(sd_within = sd(diff)) %>%
    mutate(across(where(is.numeric), ~ round(.x, 2)))
  
  # view
  head(within_athlete_sd)

Extract variance components and calculate intra-class correlation coefficient (ICC) and percentages

  # extract variance components ----
  variance_components <- as.data.frame(VarCorr(model_spline))
  
  # between-individual variance (random intercept variance)
  between_variance <- round(variance_components$vcov[1], 2)
  
  # within-individual variance (residual variance)
  within_variance <- round(attr(VarCorr(model_spline), "sc")^2, 2)
  
  # calculate icc (intra-class correlation coefficient)
  ICC <- round(between_variance / (between_variance + within_variance), 2)
  
  # calculate % of total variance
  variance_explained <- round((between_variance / (between_variance + within_variance)) * 100, 2)
  within_variance_explained <- round(100 - variance_explained, 2)
  
  # combine into df
  variance_df <- data.frame(
    Component = c(
      "Between-Athlete Variance",
      "Within-Athlete Variance",
      "Total Variance Explained (ICC)"
    ),
    Estimate = c(between_variance, within_variance, ICC),
    Percentage = c(
      paste0(variance_explained, "%"),
      paste0(within_variance_explained, "%"),
      paste0(variance_explained, "%")
    )
  )
  
  # view
  head(variance_df)

RStudio Version, R Version, Environment and Package Versions

Posit team (2025). RStudio: Integrated Development Environment for R. Posit Software, PBC, Boston, MA. URL http://www.posit.co/.

$mode
[1] "desktop"

$version
[1] ‘2025.5.1.513’

$long_version
[1] "2025.05.1+513"

$release_name
[1] "Mariposa Orchid"

R version 4.5.0 (2025-04-11)
Platform: aarch64-apple-darwin20
Running under: macOS Sequoia 15.5

Matrix products: default
BLAS:   /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib 
LAPACK: /Library/Frameworks/R.framework/Versions/4.5-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.1

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

time zone: Europe/London
tzcode source: internal

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] grateful_0.2.12 patchwork_1.3.0 lspline_1.0-0   lme4_1.1-37     Matrix_1.7-3    roll_1.1.7      writexl_1.5.4   lubridate_1.9.4 forcats_1.0.0   stringr_1.5.1   dplyr_1.1.4     purrr_1.0.4     readr_2.1.5    
[14] tidyr_1.3.1     tibble_3.3.0    ggplot2_3.5.2   tidyverse_2.0.0 readxl_1.4.5   

loaded via a namespace (and not attached):
 [1] generics_0.1.4      stringi_1.8.7       lattice_0.22-7      hms_1.1.3           magrittr_2.0.3      grid_4.5.0          timechange_0.3.0    RColorBrewer_1.1-3  cellranger_1.1.0    scales_1.4.0       
[11] reformulas_0.4.1    Rdpack_2.6.4        cli_3.6.5           rlang_1.1.6         rbibutils_2.3       splines_4.5.0       withr_3.0.2         tools_4.5.0         tzdb_0.5.0          nloptr_2.2.1       
[21] minqa_1.2.8         boot_1.3-31         vctrs_0.6.5         R6_2.6.1            lifecycle_1.0.4     MASS_7.3-65         pkgconfig_2.0.3     RcppParallel_5.1.10 pillar_1.10.2       gtable_0.3.6       
[31] glue_1.8.0          Rcpp_1.0.14         tidyselect_1.2.1    rstudioapi_0.17.1   farver_2.1.2        nlme_3.1-168        compiler_4.5.0