Estimating Maximal Aerobic Speed in Academy Soccer Players: A Comparison Between Time-Trial Methods and the 30-15 Intermittent Fitness Test

Example R code used for statistical analyses and data visualisations

Authors

Kieran Smith

Matthew D. Wright

Paul Chesterton

Jonathan M. Taylor

Published

April 5, 2025

Data & Code

The data supporting this study’s findings are available upon request or openly via OSF repository at: https://osf.io/gqutc

The code used to analyse the data used in this study is available upon request or openly via OSF repository at: https://osf.io/8u6yj

The code blocks below use code-link. All packages and syntax are hyperlinked to their official documentation. Click on package names or functions throughout the code to learn more.

Packages

The following packages were used

library(readxl) # read excel file
library(janitor) # clean column names
library(tidyverse) # install and load core packages from the tidyverse
library(writexl) # write excel files/sheets
library(patchwork) # arange plots
library(broom) # convert statistical objects into tidy tibbles
library(performance) # model checking
library(lme4) # fitting and analyzing mixed models
library(robustlmm) # robust linear mixed effects models
library(emmeans) # estimated marginal means
library(robustbase) # basic robust statistics
library(grateful) # citation of R packages

Load Data & Clean Names

Load data into tidy format and clean names

  # participant charcteristics & 1800mTT ----
  TT18 <- read_excel("study1data.xlsx", sheet = "1800 m TT")
  names <- names(TT18 %>% clean_names)
  colnames(TT18) <- names
  
  TT18 <-
    TT18 %>% subset(
      select = c(
        player_id,
        playing_level,
        squad,
        position,
        age_years,
        stature_cm,
        body_mass_kg,
        x1800_m_tt_finish_time_s,
        mas_m_s,
        avg_heart_rate_b_min,
        maximum_heart_rate_b_min,
        temperature_c,
        humidity_percent,
        pressure_mbar,
        wind_speed_mph
      )
    ) %>%
    rename(
      TT18_mas_m_s = mas_m_s,
      TT18_avg_heart_rate_b_min = avg_heart_rate_b_min,
      TT18_peak_heart_rate_b_min = maximum_heart_rate_b_min
    )
  
  # 6minDT ----
  DT6 <- read_excel("study1data.xlsx", sheet = "6 min DT")
  
  names <- names(DT6 %>% clean_names)
  colnames(DT6) <- names
  
  DT6 <-
    DT6 %>% subset(
      select = c(
        player_id,
        x6_min_dt_total_distance_m,
        mas_m_s,
        avg_heart_rate_b_min,
        maximum_heart_rate_b_min,
        temperature_c,
        humidity_percent,
        pressure_mbar,
        wind_speed_mph
      )
    ) %>%
    rename(
      DT6_mas_m_s = mas_m_s,
      DT6_avg_heart_rate_b_min = avg_heart_rate_b_min,
      DT6_peak_heart_rate_b_min = maximum_heart_rate_b_min
    )
  
  # 30-15IFT ----
  IFT <- read_excel("study1data.xlsx", sheet = "30-15 IFT")
  names <- names(IFT %>% clean_names)
  colnames(IFT) <- names
  IFT <-
    IFT %>% subset(
      select = c(
        player_id,
        v_ift_km_hr,
        mas_m_s,
        temperature_c,
        humidity_percent,
        pressure_mbar
      )
    ) %>%
    rename(ift_mas_m_s = mas_m_s)
  
  # join 3 df's into single df "data" ----
  data <- TT18 %>%
    full_join(DT6, by = "player_id") %>%
    full_join(IFT, by = "player_id")
  
  # omit na values
  data <- data %>%
    filter(!is.na(TT18_mas_m_s),
           !is.na(DT6_mas_m_s),
           !is.na(ift_mas_m_s))

  # view data df
  head(data)

Participant Characteristics

Calculate participant characteristics

  # overall summary (without squad grouping)
  overall_characteristics <- data %>%
    select(age_years, stature_cm, body_mass_kg) %>%
    gather(key = "variable", value = "value") %>%
    group_by(variable) %>%
    summarise(
      n = round(n(), 1),
      mean = round(mean(value, na.rm = TRUE), 1),
      sd = round(sd(value, na.rm = TRUE), 1),
      min = round(min(value, na.rm = TRUE), 1),
      max = round(max(value, na.rm = TRUE), 1)
    ) %>%
    ungroup() %>%                               # Ensure no grouping for subsequent operations
    mutate(squad = "All", # Add 'All' as the squad
           variable = factor(variable, levels = c(
             "age_years", "stature_cm", "body_mass_kg"
           ))) %>%
    arrange(variable)
  
  # create squad level participant characteristics df
  parcharacteristics <- data %>%
    select(squad, age_years, stature_cm, body_mass_kg) %>%            # include squad column
    gather(key = "variable", value = "value", -squad) %>%             # organise columns
    group_by(squad, variable) %>%
    summarise(
      # select summary statistics and round
      n = round(n(), 1),
      mean = round(mean(value, na.rm = TRUE), 1),
      sd = round(sd(value, na.rm = TRUE), 1),
      min = round(min(value, na.rm = TRUE), 1),
      max = round(max(value, na.rm = TRUE), 1)
    ) %>%
    ungroup() %>%
    # factor the variable column to order rows
    mutate(variable = factor(variable, levels = c(
      "age_years", "stature_cm", "body_mass_kg"
    ))) %>%
    # arrange the df by squad and variable columns
    arrange(squad, variable)
  
  # combine squad-level and overall-level summaries
  parcharacteristics <- bind_rows(overall_characteristics, parcharacteristics) %>%
    # Rearrange the columns in the specified order
    select(variable, squad, n, mean, sd, min, max)

  # view participant characteristics df
  head(parcharacteristics)

Descriptive Statistics

Descriptive statistics for field-based testing outcomes

  # count of player positions ----
  position_counts <- data %>%
    group_by(position) %>%
    summarise(count = n())
  
  # view position_counts df
  head(position_counts)
  # conditions across testing sessions ----
  conditions <- data %>%
    select(
      player_id,
      contains("temperature"),
      contains("humidity"),
      contains("pressure"),
      contains("wind_speed")
    ) %>%
    pivot_longer(cols = -player_id,
                 names_to = "measurement_type",
                 values_to = "value") %>%
    mutate(
      condition = case_when(
        str_detect(measurement_type, "temperature") ~ "temperature_c",
        str_detect(measurement_type, "humidity") ~ "humidity_percent",
        str_detect(measurement_type, "pressure") ~ "pressure_mbar",
        str_detect(measurement_type, "wind_speed") ~ "wind_speed_km_hr"
      ),
      # convert wind speed from mph to km_hr
      value = ifelse(
        str_detect(measurement_type, "wind_speed"),
        value * 1.60934,
        value
      )
    ) %>%
    group_by(player_id, condition) %>%
    summarize(value = mean(value, na.rm = TRUE), .groups = "drop") %>%
    pivot_wider(names_from = condition, values_from = value)
  
  # avg_cond df
  avg_cond <- conditions %>%
    select(temperature_c,
           humidity_percent,
           pressure_mbar,
           wind_speed_km_hr) %>%
    pivot_longer(cols = everything(),
                 names_to = "variable",
                 values_to = "value") %>%
    group_by(variable) %>%
    summarise(
      n = round(n(), 1),
      mean = round(mean(value, na.rm = TRUE), 1),
      sd = round(sd(value, na.rm = TRUE), 1),
      min = round(min(value, na.rm = TRUE), 1),
      max = round(max(value, na.rm = TRUE), 1)
    ) %>%
    ungroup() %>%
    mutate(variable = factor(
      variable,
      levels = c(
        "temperature_c",
        "humidity_percent",
        "pressure_mbar",
        "wind_speed_km_hr"
      )
    )) %>%
    arrange(variable)

  # view avg_cond df
  head(avg_cond)
  # descriptive stats df ----
  descriptivestats <-
    data %>%                                                       # select data
    select(
      # select columns
      x1800_m_tt_finish_time_s,
      TT18_mas_m_s,
      TT18_avg_heart_rate_b_min,
      TT18_peak_heart_rate_b_min,
      x6_min_dt_total_distance_m,
      DT6_mas_m_s,
      DT6_avg_heart_rate_b_min,
      DT6_peak_heart_rate_b_min,
      v_ift_km_hr,
      ift_mas_m_s
    ) %>%
    gather(key = "variable", value = "value") %>%             # organise columns
    group_by(variable) %>%
    summarise(
      # select summary statistics and round
      mean = round(mean(value, na.rm = TRUE), 2),
      sd = round(sd(value, na.rm = TRUE), 2),
      min = round(min(value, na.rm = TRUE), 2),
      max = round(max(value, na.rm = TRUE), 2)
    )

  # view descriptive stats df
  head(descriptivestats)

MAS Distribution

Wrangle data long and plot box and whisker plot of distribution of 1800mTT, 6minDT & 30-15IFT derived MAS

  # organise data long ----
  data2 <- data %>%
    select(player_id, "Position" = position, ift_mas_m_s, DT6_mas_m_s, TT18_mas_m_s)
  
  data_long <- pivot_longer(
    data2,
    cols = c(ift_mas_m_s, DT6_mas_m_s, TT18_mas_m_s),
    names_to = "Test",
    values_to = "MAS"
  )
  
  # view data_long df
  head(data_long)
  # create plot theme
  plottheme <- theme(
    plot.margin = margin(10, 10, 10, 10),
    text = element_text(family = "Arial"),
    plot.title = element_text(size = 28, face = "bold"),
    plot.subtitle = element_text(size = 20, ),
    plot.caption = element_text(size = 16, ),
    axis.title.x = element_text(size = 20, face = "bold"),
    axis.title.y = element_text(size = 20, face = "bold"),
    axis.text = element_text(size = 16, face = "plain"),
    legend.title = element_text(size = 20, face = "bold"),
    legend.text = element_text(size = 20),
    legend.position = "bottom",
    legend.background = element_rect(fill = "gray95", colour = "gray95")
  )
  
  # box and whisker plot ----
  # colourblind palette for boxplot
  cb_box_palette <- c(
    'TT18_mas_m_s' = "#E69F00",
    'DT6_mas_m_s'  = "#56B4E9",
    'ift_mas_m_s'  = "#009E73"
  )
  
  # plot
  plot1 <-
    ggplot(data_long, aes(x = factor(
      Test, levels = c('TT18_mas_m_s', 'DT6_mas_m_s', 'ift_mas_m_s')
    ), y = MAS)) +
    geom_boxplot(staplewidth = 0.2,
                 fill = cb_box_palette,
                 alpha = 0.35) +
    geom_jitter(
      aes(fill = Position),
      # fill controls the inside colour
      shape = 21,
      # shape with border (e.g., circle with outline)
      colour = "black",
      # border colour
      stroke = 0.5,
      # thickness of the outline
      alpha = 0.7,
      size = 2.5,
      position = position_jitter(width = 0.3, height = 0)  # avoid overlap
    ) +
    theme_minimal() +
    scale_color_hue(h = c(0, 240)) +
    plottheme +
    
    # title, subtitle, caption, x and y axis labels, text & legend
    labs(x = "Test", y = expression(bold("MAS (m·s"^-1 * ")")), fill = "Position") +
    scale_x_discrete(labels = c('1800mTT', '6minDT', '30-15IFT')) +
    scale_y_continuous(
      breaks = scales::breaks_width(0.2),
      labels = scales::number_format(accuracy = 0.1)
    )
  
  
  # print plot
  plot1

Relationships & Linear Models

Associations between MAS estimates and linear models of relationships between 1800mTT, 6minDT and 30-15IFT derived MAS. A: Scatter plot of 1800mTT and 6minDT derived MAS.; B: Scatter plot of 1800mTT and 30-15IFT derived MAS.; C: Scatter plot of 6minDT and 30-15IFT derived MAS

  # pearson's product-moment correlation coefficients ----
  # 1800mTT & 6minDT
  cor1 <-
    cor.test(
      data$TT18_mas_m_s,
      data$DT6_mas_m_s,
      use = "pairwise.complete.obs",
      method = "pearson",
      conf.level = 0.95
    )
  
  # 1800mTT & 30-15IFT
  cor2 <-
    cor.test(
      data$TT18_mas_m_s,
      data$ift_mas_m_s,
      use = "pairwise.complete.obs",
      method = "pearson",
      conf.level = 0.95
    )
  
  # 6minDT & 30-15IFT
  cor3 <-
    cor.test(
      data$DT6_mas_m_s,
      data$ift_mas_m_s,
      use = "pairwise.complete.obs",
      method = "pearson",
      conf.level = 0.95
    )
  
  # extract statistics from each correlation test
  pcc1 <- data.frame(
    Test = "1800mTT & 6minDT",
    Estimate = cor1$estimate,
    P.Value = cor1$p.value,
    DF = cor3$parameter,
    Conf.Low = cor1$conf.int[1],
    Conf.High = cor1$conf.int[2]
  )
  
  pcc2 <- data.frame(
    Test = "1800mTT & 30-15IFT",
    Estimate = cor2$estimate,
    P.Value = cor2$p.value,
    DF = cor3$parameter,
    Conf.Low = cor2$conf.int[1],
    Conf.High = cor2$conf.int[2]
  )
  
  pcc3 <- data.frame(
    Test = "6minDT & 30-15IFT",
    Estimate = cor3$estimate,
    P.Value = cor3$p.value,
    DF = cor3$parameter,
    Conf.Low = cor3$conf.int[1],
    Conf.High = cor3$conf.int[2]
  )
  
  # combine results into df
  combined_pccs <- rbind(pcc1, pcc2, pcc3)
  
  # add "± 95% CL" column
  combined_pccs$`± 95% CL` <-
    (combined_pccs$Conf.High - combined_pccs$Conf.Low) / 2
  
  # round all numeric values to 2 decimals
  combined_pccs <- combined_pccs %>%
    mutate_if(is.numeric, round, 2)

  # view combined_pccs df
  head(combined_pccs)
  # correlation label plot annotation function ----
  cor_label <- function(cor_test) {
    bquote(italic(r) == .(round(cor_test$estimate, 2)) * ";" ~
             "95%CI: " ~ .(round(cor_test$conf.int[1], 2)) ~
             " to " ~ .(round(cor_test$conf.int[2], 2)))
  }
  
  cor1_label <- cor_label(cor1)
  cor2_label <- cor_label(cor2)
  cor3_label <- cor_label(cor3)

  # linear model plots with correlation annotations
  # 1800mTT & 6minDT
  plot2 <-
    ggplot(
      data2,
      aes(x = TT18_mas_m_s, y = DT6_mas_m_s, linetype = "Linear prediction with \n95% confidence interval")
    ) +
    geom_point(
      aes(fill = Position),
      shape = 21,
      colour = "black",
      stroke = 0.5,
      alpha = 0.7,
      size = 3,
      position = position_jitter(width = 0.1, height = 0.1)
    ) +
    geom_smooth(
      method = "lm",
      level = 0.95,
      linewidth = 1.5,
      colour = "dodgerblue4",
      fill = "grey",
      se = TRUE
    ) +
    theme_minimal() +
    scale_color_hue(h = c(0, 240)) +
    scale_x_continuous(
      breaks = scales::breaks_width(0.2),
      labels = scales::number_format(accuracy = 0.1)
    ) +
    scale_y_continuous(
      breaks = scales::breaks_width(0.2),
      labels = scales::number_format(accuracy = 0.1)
    ) +
    plottheme +
    
    # title, subtitle, caption, x and y axis labels, text & legend
    labs(
      title = "1800mTT & 6minDT",
      subtitle = cor1_label,
      caption = "",
      x = expression(bold("1800mTT MAS (m·s"^-1 * ")")),
      y = expression(bold("6minDT MAS (m·s"^-1 * ")")),
      linetype = "Trend Line",
      colour = "Position"
    )
  
  # 1800mTT & 30-15IFT
  plot3 <-
    ggplot(
      data2,
      aes(x = TT18_mas_m_s, y = ift_mas_m_s, linetype = "Linear prediction with \n95% confidence interval")
    ) +
    geom_point(
      aes(fill = Position),
      shape = 21,
      colour = "black",
      stroke = 0.5,
      alpha = 0.7,
      size = 3,
      position = position_jitter(width = 0.1, height = 0.1)
    ) +
    geom_smooth(
      method = "lm",
      level = 0.95,
      linewidth = 1.5,
      colour = "dodgerblue4",
      fill = "grey",
      se = TRUE
    ) +
    theme_minimal() +
    scale_color_hue(h = c(0, 240)) +
    scale_x_continuous(
      breaks = scales::breaks_width(0.2),
      labels = scales::number_format(accuracy = 0.1)
    ) +
    scale_y_continuous(
      breaks = scales::breaks_width(0.2),
      labels = scales::number_format(accuracy = 0.1)
    ) +
    plottheme +
    
    # title, subtitle, caption, x and y axis labels, text & legend
    labs(
      title = "1800mTT & 30-15IFT",
      subtitle = cor2_label,
      caption = "",
      x = expression(bold("1800mTT MAS (m·s"^-1 * ")")),
      y = expression(bold("30-15IFT MAS (m·s"^-1 * ")")),
      linetype = "Trend Line",
      colour = "Position"
    )
  
  # 6minDT & 30-15IFT
  plot4 <-
    ggplot(
      data2,
      aes(x = DT6_mas_m_s, y = ift_mas_m_s, linetype = "Linear prediction with \n95% confidence interval")
    ) +
    geom_point(
      aes(fill = Position),
      shape = 21,
      colour = "black",
      stroke = 0.5,
      alpha = 0.7,
      size = 3,
      position = position_jitter(width = 0.1, height = 0.1)
    ) +
    geom_smooth(
      method = "lm",
      level = 0.95,
      linewidth = 1.5,
      colour = "dodgerblue4",
      fill = "grey",
      se = TRUE
    ) +
    theme_minimal() +
    scale_color_hue(h = c(0, 240)) +
    scale_x_continuous(
      breaks = scales::breaks_width(0.2),
      labels = scales::number_format(accuracy = 0.1)
    ) +
    scale_y_continuous(
      breaks = scales::breaks_width(0.2),
      labels = scales::number_format(accuracy = 0.1)
    ) +
    plottheme +
    
    # title, subtitle, caption, x and y axis labels, text & legend
    labs(
      title = "6minDT & 30-15IFT",
      subtitle = cor3_label,
      caption = "",
      x = expression(bold("6minDT MAS (m·s"^-1 * ")")),
      y = expression(bold("30-15IFT MAS (m·s"^-1 * ")")),
      linetype = "Trend Line",
      colour = "Position"
    )
  
  # combine into multi panel plot
  combined_plot <- plot2 + plot3 + plot4 + plot_layout(guides = "collect") +
    plot_annotation(tag_levels = "A") &
    theme(plot.tag = element_text(size = 30, face = "bold"),
          legend.position = 'bottom')
  
  # print plot
  combined_plot

Robust Repeated Measures ANOVAs and Estimated Marginal Means

MAS and HR robust repeated measures ANOVAs and estimated marginal means


MAS linear model

  # MAS linear model ----
  lm1 <- lm(MAS ~ Test, data = data_long)
  summary(lm1)

Call:
lm(formula = MAS ~ Test, data = data_long)

Residuals:
     Min       1Q   Median       3Q      Max 
-0.61588 -0.20456  0.04579  0.17370  0.39090 

Coefficients:
                 Estimate Std. Error t value Pr(>|t|)    
(Intercept)       4.38582    0.04658  94.148  < 2e-16 ***
Testift_mas_m_s   0.55905    0.06588   8.486 1.42e-12 ***
TestTT18_mas_m_s  0.10394    0.06588   1.578    0.119    
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Residual standard error: 0.2375 on 75 degrees of freedom
Multiple R-squared:  0.5207,    Adjusted R-squared:  0.5079 
F-statistic: 40.74 on 2 and 75 DF,  p-value: 1.053e-12

Check normality

  # check normality of model
  check_model(lm1)

Check Heteroscedasticity

  # check heteroscedasticity of models
  check_heteroscedasticity(lm1)
OK: Error variance appears to be homoscedastic (p = 0.510).

MAS linear mixed-effects model

  # MAS linear mixed-effects model ----
  lmm1 <- lmer(MAS ~ 1 + Test + (1 | player_id), data = data_long)
  summary(lmm1)
Linear mixed model fit by REML ['lmerMod']
Formula: MAS ~ 1 + Test + (1 | player_id)
   Data: data_long

REML criterion at convergence: -27.6

Scaled residuals: 
    Min      1Q  Median      3Q     Max 
-2.4625 -0.4238  0.1466  0.5151  2.7330 

Random effects:
 Groups    Name        Variance Std.Dev.
 player_id (Intercept) 0.03799  0.1949  
 Residual              0.01843  0.1358  
Number of obs: 78, groups:  player_id, 26

Fixed effects:
                 Estimate Std. Error t value
(Intercept)       4.38582    0.04658   94.15
Testift_mas_m_s   0.55905    0.03765   14.85
TestTT18_mas_m_s  0.10394    0.03765    2.76

Correlation of Fixed Effects:
            (Intr) Tst___
Tstft_ms_m_ -0.404       
TstTT18_m__ -0.404  0.500

MAS robust repeated measures ANOVA

  # MAS robust repeated measures ANOVA ----
  rlmm1 <- rlmer(MAS ~ 1 + Test + (1 | player_id), data = data_long)
  rlmm1_summary <- summary(rlmm1)
  
  # extract statistics into df and round to 2 decimals
  rlmm1_df <- data.frame(
    Term = rownames(rlmm1_summary$coefficients),
    Estimate = round(rlmm1_summary$coefficients[, "Estimate"], 2),
    StdError = round(rlmm1_summary$coefficients[, "Std. Error"], 2),
    tValue = round(rlmm1_summary$coefficients[, "t value"], 2)
  )

  # view rlmm1_df df
  head(rlmm1_df)

Estimated marginal mean differences in MAS between tests

  # MAS estimated marginal means ----
  emm <- emmeans(rlmm1, pairwise ~ Test, level = 0.95)
  emm_summary <-
    summary(emm, infer = c(TRUE, TRUE))  # include p-values
  
  # extract statistics into df's
  emm_means <- as.data.frame(emm_summary$emmeans)
  emm_contrasts <- as.data.frame(emm_summary$contrasts)
  
  # combine results into a single df
  combined_emm <- bind_rows(emm_means %>% mutate(Type = "Means"),
                            emm_contrasts %>% mutate(Type = "Contrasts"))
  
  # calculate new column "± 95% CL"
  combined_emm$`± 95% CL` <-
    (combined_emm$asymp.UCL - combined_emm$asymp.LCL) / 2
  
  # reorder columns to position "± 95% CL" after asymp.UCL column
  combined_emm <- combined_emm %>%
    dplyr::relocate(`± 95% CL`, .after = asymp.UCL)
  
  # format p-value function
  format_p_value <- function(p) {
    if (is.na(p)) {
      return(NA)
    } else if (p < 0.0001) {
      return("<.0001")
    } else if (p < 0.10) {
      return(format(signif(p, digits = 1), scientific = FALSE))
    } else {
      return(format(signif(p, digits = 2), scientific = FALSE))
    }
  }
  
  # format p-values using custom function so they appear the same as in console
  combined_emm$p.value <- sapply(combined_emm$p.value, format_p_value)
  
  # round all other numeric values to 2 decimals
  combined_emm <- combined_emm %>%
    dplyr::mutate(across(where(is.numeric) &
                           !any_of("p.value"), round, 2))

  # view combined_emm df
  head(combined_emm)

HR indices robust repeated measures ANOVA

  # organise HR data long ----
  hrdata <- data %>%
    select(
      player_id,
      TT18_avg_heart_rate_b_min,
      TT18_peak_heart_rate_b_min,
      DT6_avg_heart_rate_b_min,
      DT6_peak_heart_rate_b_min
    )
  
  hrdata_long <- pivot_longer(
    hrdata,
    cols = c(
      TT18_avg_heart_rate_b_min,
      TT18_peak_heart_rate_b_min,
      DT6_avg_heart_rate_b_min,
      DT6_peak_heart_rate_b_min
    ),
    names_to = "Test",
    values_to = "HR"
  ) %>% drop_na()
  
  # view hrdata_long df
  head(hrdata_long)
  # HR robust repeated measures ANOVA ----
  rlmm2 <- rlmer(HR ~ 1 + Test + (1 | player_id), data = hrdata_long)
  rlmm2_summary <- summary(rlmm2)
  
  # extract statistics into df and round to 2 decimals
  rlmm2_df <- data.frame(
    Term = rownames(rlmm2_summary$coefficients),
    Estimate = round(rlmm2_summary$coefficients[, "Estimate"], 2),
    StdError = round(rlmm2_summary$coefficients[, "Std. Error"], 2),
    tValue = round(rlmm2_summary$coefficients[, "t value"], 2)
  )
  
  # view rlmm2_df
  head(rlmm2_df)

Estimated marginal mean differences in HR indices between tests

  # HR estimated marginal means ----
  hremm <- emmeans(rlmm2, pairwise ~ Test, level = 0.95)
  hremm_summary <-
    summary(hremm, infer = c(TRUE, TRUE))  # include p-values
  
  # extract statistics into df's
  hremm_means <- as.data.frame(hremm_summary$emmeans)
  hremm_contrasts <- as.data.frame(hremm_summary$contrasts)
  hremm_contrasts <- hremm_contrasts %>%
    slice(c(2, 5))
  
  # combine results into a single df
  combined_hremm <- bind_rows(hremm_means %>% mutate(Type = "Means"),
                              hremm_contrasts %>% mutate(Type = "Contrasts"))
  
  # calculate new column "± 95% CL"
  combined_hremm$`± 95% CL` <-
    (combined_hremm$asymp.UCL - combined_hremm$asymp.LCL) / 2
  
  # reorder columns to position "± 95% CL" after asymp.UCL column
  combined_hremm <- combined_hremm %>%
    dplyr::relocate(`± 95% CL`, .after = asymp.UCL)
  
  # format p-values using custom function so they appear the same as in console
  combined_hremm$p.value <- sapply(combined_hremm$p.value, format_p_value)
  
  # round all other numeric values to 2 decimals
  combined_hremm <- combined_hremm %>%
    dplyr::mutate(across(where(is.numeric) &
                           !any_of("p.value"), round, 0))
  
  # view combined_hremm df
  head(combined_hremm)

Percentage Correspondence

Time and distance trial percentage correspondence to vIFT calculated using raw ratio of observed data

  # select relevant columns and convert m_s to km_hr
  relevant_data <- data %>%
    select(v_ift_km_hr, DT6_mas_m_s, TT18_mas_m_s) %>%
    mutate(DT6_km_hr = DT6_mas_m_s * 3.6, TT18_km_hr = TT18_mas_m_s * 3.6) %>%
    select(v_ift_km_hr, DT6_km_hr, TT18_km_hr) # keep relevant columns
  
  # calculate percentage correspondence using raw ratio of observed data
  percentages_dt6 <- relevant_data$DT6_km_hr / relevant_data$v_ift_km_hr * 100
  percentages_tt18 <- relevant_data$TT18_km_hr / relevant_data$v_ift_km_hr * 100
  
  # calculate mean and SD for DT6_km_hr
  mean_percentage_dt6 <- round(mean(percentages_dt6, na.rm = TRUE), 0)
  sd_percentage_dt6 <- round(sd(percentages_dt6, na.rm = TRUE), 0)
  
  # calculate mean and SD for TT18_km_hr
  mean_percentage_tt18 <- round(mean(percentages_tt18, na.rm = TRUE), 0)
  sd_percentage_tt18 <- round(sd(percentages_tt18, na.rm = TRUE), 0)
  
  # combine results into df
  percent_correspondence <- data.frame(
    Metric = c("DT6_km_hr", "TT18_km_hr"),
    Mean_Percentage = c(mean_percentage_dt6, mean_percentage_tt18),
    SD_Percentage = c(sd_percentage_dt6, sd_percentage_tt18)
  )

  # view percent_correspondence df
  head(percent_correspondence)

Regression Equations

Regression equations and standard error for estimating 6minDT and 1800mTT MAS from vIFT

  # robust model for DT6_mas_m_s and vIFT
  model_dt6 <- lmrob(DT6_km_hr ~ v_ift_km_hr, data = relevant_data)
  percentages_dt6 <- (coef(model_dt6)["v_ift_km_hr"] * relevant_data$v_ift_km_hr) / relevant_data$DT6_km_hr * 100
  mean_percentage_dt6 <- round(mean(percentages_dt6, na.rm = TRUE), 0)
  sd_percentage_dt6 <- round(sd(percentages_dt6, na.rm = TRUE), 0)
  
  # robust model for TT18_mas_m_s and vIFT
  model_tt18 <- lmrob(TT18_km_hr ~ v_ift_km_hr, data = relevant_data)
  percentages_tt18 <- (coef(model_tt18)["v_ift_km_hr"] * relevant_data$v_ift_km_hr) / relevant_data$TT18_km_hr * 100
  mean_percentage_tt18 <- round(mean(percentages_tt18, na.rm = TRUE), 0)
  sd_percentage_tt18 <- round(sd(percentages_tt18, na.rm = TRUE), 0)
  
  # view models
  summary(model_dt6)

Call:
lmrob(formula = DT6_km_hr ~ v_ift_km_hr, data = relevant_data)
 \--> method = "MM"
Residuals:
      Min        1Q    Median        3Q       Max 
-1.307065 -0.465018 -0.002212  0.434420  1.704811 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)   3.2409     3.2372   1.001  0.32675    
v_ift_km_hr   0.6125     0.1543   3.969  0.00057 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Robust residual standard error: 0.6865 
Multiple R-squared:  0.4198,    Adjusted R-squared:  0.3956 
Convergence in 12 IRWLS iterations

Robustness weights: 
 2 weights are ~= 1. The remaining 24 ones are summarized as
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
 0.5170  0.9105  0.9596  0.9223  0.9810  0.9969 
Algorithmic parameters: 
       tuning.chi                bb        tuning.psi        refine.tol 
        1.548e+00         5.000e-01         4.685e+00         1.000e-07 
          rel.tol         scale.tol         solve.tol          zero.tol 
        1.000e-07         1.000e-10         1.000e-07         1.000e-10 
      eps.outlier             eps.x warn.limit.reject warn.limit.meanrw 
        3.846e-03         4.002e-11         5.000e-01         5.000e-01 
     nResample         max.it       best.r.s       k.fast.s          k.max 
           500             50              2              1            200 
   maxit.scale      trace.lev            mts     compute.rd fast.s.large.n 
           200              0           1000              0           2000 
                  psi           subsampling                   cov 
           "bisquare"         "nonsingular"         ".vcov.avar1" 
compute.outlier.stats 
                 "SM" 
seed : int(0) 
  summary(model_tt18)

Call:
lmrob(formula = TT18_km_hr ~ v_ift_km_hr, data = relevant_data)
 \--> method = "MM"
Residuals:
     Min       1Q   Median       3Q      Max 
-2.26882 -0.46439  0.03882  0.25385  1.05721 

Coefficients:
            Estimate Std. Error t value Pr(>|t|)    
(Intercept)  -0.9326     3.7461  -0.249    0.806    
v_ift_km_hr   0.8418     0.1758   4.789  7.1e-05 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Robust residual standard error: 0.3675 
Multiple R-squared:  0.7095,    Adjusted R-squared:  0.6974 
Convergence in 31 IRWLS iterations

Robustness weights: 
 observation 7 is an outlier with |weight| = 0 ( < 0.0038); 
 one weight is ~= 1. The remaining 24 ones are summarized as
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
0.09798 0.77590 0.91800 0.81470 0.97880 0.99590 
Algorithmic parameters: 
       tuning.chi                bb        tuning.psi        refine.tol 
        1.548e+00         5.000e-01         4.685e+00         1.000e-07 
          rel.tol         scale.tol         solve.tol          zero.tol 
        1.000e-07         1.000e-10         1.000e-07         1.000e-10 
      eps.outlier             eps.x warn.limit.reject warn.limit.meanrw 
        3.846e-03         4.002e-11         5.000e-01         5.000e-01 
     nResample         max.it       best.r.s       k.fast.s          k.max 
           500             50              2              1            200 
   maxit.scale      trace.lev            mts     compute.rd fast.s.large.n 
           200              0           1000              0           2000 
                  psi           subsampling                   cov 
           "bisquare"         "nonsingular"         ".vcov.avar1" 
compute.outlier.stats 
                 "SM" 
seed : int(0) 
  # create regression equation + residual standard error function
  generate_equation <- function(model) {
    coefficients <- coef(model)
    response_name <- as.character(formula(model)[[2]]) # Extract response variable name
    terms <- sapply(2:length(coefficients), function(i) {
      sign <- ifelse(coefficients[i] >= 0, " + ", " - ")
      paste0(sign, abs(round(coefficients[i], 2)), "*", names(coefficients)[i])
    })
    terms <- paste0(terms, collapse = "")
    intercept <- round(coefficients[1], 2)
    equation <- paste0(response_name, " = ", intercept, terms)
    
    # extract residual standard error and round to 2 decimals
    residual_se <- round(summary(model)$sigma, 2)
    
    return(list(equation = equation, residual_se = residual_se))
  }
  
  # generate equations and residual standard errors for 6minDT & 1800mTT
  result1 <- generate_equation(model_dt6)
  result2 <- generate_equation(model_tt18)
  
  equation1 <- result1$equation
  equation2 <- result2$equation
  se1 <- result1$residual_se
  se2 <- result2$residual_se
  
  # create df with equations and residual standard errors
  equations_df <- data.frame(
    Model = c("Model 1", "Model 2"),
    Equation = c(equation1, equation2),
    `Standard Error` = c(se1, se2)
  )
  
  # view equations_df
  head(equations_df)

RStudio Version, R Version, Environment and Package Versions

Posit team (2025). RStudio: Integrated Development Environment for R. Posit Software, PBC, Boston, MA. URL http://www.posit.co/.

$mode
[1] "desktop"

$version
[1] ‘2024.12.1.563’

$long_version
[1] "2024.12.1+563"

$release_name
[1] "Kousa Dogwood"

R version 4.4.3 (2025-02-28)
Platform: aarch64-apple-darwin20
Running under: macOS Sequoia 15.3.2

Matrix products: default
BLAS:   /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib 
LAPACK: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRlapack.dylib;  LAPACK version 3.12.0

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

time zone: Europe/London
tzcode source: internal

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] grateful_0.2.11     robustbase_0.99-4-1 emmeans_1.11.0      robustlmm_3.3-1     lme4_1.1-36         Matrix_1.7-3        performance_0.13.0  broom_1.0.7         patchwork_1.3.0     writexl_1.5.2      
[11] lubridate_1.9.4     forcats_1.0.0       stringr_1.5.1       dplyr_1.1.4         purrr_1.0.4         readr_2.1.5         tidyr_1.3.1         tibble_3.2.1        ggplot2_3.5.1       tidyverse_2.0.0    
[21] janitor_2.2.1       readxl_1.4.5       

loaded via a namespace (and not attached):
 [1] gtable_0.3.6       bayestestR_0.15.2  xfun_0.51          ggrepel_0.9.6      insight_1.1.0      lattice_0.22-6     tzdb_0.5.0         vctrs_0.6.5        tools_4.4.3        Rdpack_2.6.3       generics_0.1.3    
[12] datawizard_1.0.1   DEoptimR_1.1-3-1   pkgconfig_2.0.3    lifecycle_1.0.4    compiler_4.4.3     farver_2.1.2       munsell_0.5.1      fastGHQuad_1.0.1   codetools_0.2-20   snakecase_0.11.1   crayon_1.5.3      
[23] pillar_1.10.1      nloptr_2.2.1       MASS_7.3-65        rsconnect_1.3.4    reformulas_0.4.0   boot_1.3-31        nlme_3.1-167       tidyselect_1.2.1   mvtnorm_1.3-3      stringi_1.8.4      labeling_0.4.3    
[34] splines_4.4.3      grid_4.4.3         colorspace_2.1-1   cli_3.6.4          magrittr_2.0.3     withr_3.0.2        scales_1.3.0       backports_1.5.0    estimability_1.5.1 timechange_0.3.0   cellranger_1.1.0  
[45] hms_1.1.3          coda_0.19-4.1      evaluate_1.0.3     knitr_1.50         rbibutils_2.3      mgcv_1.9-1         rlang_1.1.5        Rcpp_1.0.14        xtable_1.8-4       glue_1.8.0         see_0.11.0        
[56] rstudioapi_0.17.1  minqa_1.2.8        R6_2.6.1