library(readxl) # read excel file
library(janitor) # clean column names
library(tidyverse) # install and load core packages from the tidyverse
library(writexl) # write excel files/sheets
library(patchwork) # arange plots
library(broom) # convert statistical objects into tidy tibbles
library(performance) # model checking
library(lme4) # fitting and analyzing mixed models
library(robustlmm) # robust linear mixed effects models
library(emmeans) # estimated marginal means
library(robustbase) # basic robust statistics
library(grateful) # citation of R packages
The data supporting this study’s findings are available upon request or openly via OSF repository at: https://osf.io/gqutc
The code used to analyse the data used in this study is available upon request or openly via OSF repository at: https://osf.io/8u6yj
The code blocks below use code-link
. All packages and syntax are hyperlinked to their official documentation. Click on package
names or functions
throughout the code to learn more.
Packages
The following packages were used
Load Data & Clean Names
Load data into tidy format and clean names
# participant charcteristics & 1800mTT ----
TT18 <- read_excel("study1data.xlsx", sheet = "1800 m TT")
names <- names(TT18 %>% clean_names)
colnames(TT18) <- names
TT18 <-
TT18 %>% subset(
select = c(
player_id,
playing_level,
squad,
position,
age_years,
stature_cm,
body_mass_kg,
x1800_m_tt_finish_time_s,
mas_m_s,
avg_heart_rate_b_min,
maximum_heart_rate_b_min,
temperature_c,
humidity_percent,
pressure_mbar,
wind_speed_mph
)
) %>%
rename(
TT18_mas_m_s = mas_m_s,
TT18_avg_heart_rate_b_min = avg_heart_rate_b_min,
TT18_peak_heart_rate_b_min = maximum_heart_rate_b_min
)
# 6minDT ----
DT6 <- read_excel("study1data.xlsx", sheet = "6 min DT")
names <- names(DT6 %>% clean_names)
colnames(DT6) <- names
DT6 <-
DT6 %>% subset(
select = c(
player_id,
x6_min_dt_total_distance_m,
mas_m_s,
avg_heart_rate_b_min,
maximum_heart_rate_b_min,
temperature_c,
humidity_percent,
pressure_mbar,
wind_speed_mph
)
) %>%
rename(
DT6_mas_m_s = mas_m_s,
DT6_avg_heart_rate_b_min = avg_heart_rate_b_min,
DT6_peak_heart_rate_b_min = maximum_heart_rate_b_min
)
# 30-15IFT ----
IFT <- read_excel("study1data.xlsx", sheet = "30-15 IFT")
names <- names(IFT %>% clean_names)
colnames(IFT) <- names
IFT <-
IFT %>% subset(
select = c(
player_id,
v_ift_km_hr,
mas_m_s,
temperature_c,
humidity_percent,
pressure_mbar
)
) %>%
rename(ift_mas_m_s = mas_m_s)
# join 3 df's into single df "data" ----
data <- TT18 %>%
full_join(DT6, by = "player_id") %>%
full_join(IFT, by = "player_id")
# omit na values
data <- data %>%
filter(!is.na(TT18_mas_m_s),
!is.na(DT6_mas_m_s),
!is.na(ift_mas_m_s))
# view data df
head(data)
Participant Characteristics
Calculate participant characteristics
# overall summary (without squad grouping)
overall_characteristics <- data %>%
select(age_years, stature_cm, body_mass_kg) %>%
gather(key = "variable", value = "value") %>%
group_by(variable) %>%
summarise(
n = round(n(), 1),
mean = round(mean(value, na.rm = TRUE), 1),
sd = round(sd(value, na.rm = TRUE), 1),
min = round(min(value, na.rm = TRUE), 1),
max = round(max(value, na.rm = TRUE), 1)
) %>%
ungroup() %>% # Ensure no grouping for subsequent operations
mutate(squad = "All", # Add 'All' as the squad
variable = factor(variable, levels = c(
"age_years", "stature_cm", "body_mass_kg"
))) %>%
arrange(variable)
# create squad level participant characteristics df
parcharacteristics <- data %>%
select(squad, age_years, stature_cm, body_mass_kg) %>% # include squad column
gather(key = "variable", value = "value", -squad) %>% # organise columns
group_by(squad, variable) %>%
summarise(
# select summary statistics and round
n = round(n(), 1),
mean = round(mean(value, na.rm = TRUE), 1),
sd = round(sd(value, na.rm = TRUE), 1),
min = round(min(value, na.rm = TRUE), 1),
max = round(max(value, na.rm = TRUE), 1)
) %>%
ungroup() %>%
# factor the variable column to order rows
mutate(variable = factor(variable, levels = c(
"age_years", "stature_cm", "body_mass_kg"
))) %>%
# arrange the df by squad and variable columns
arrange(squad, variable)
# combine squad-level and overall-level summaries
parcharacteristics <- bind_rows(overall_characteristics, parcharacteristics) %>%
# Rearrange the columns in the specified order
select(variable, squad, n, mean, sd, min, max)
# view participant characteristics df
head(parcharacteristics)
Descriptive Statistics
Descriptive statistics for field-based testing outcomes
# count of player positions ----
position_counts <- data %>%
group_by(position) %>%
summarise(count = n())
# view position_counts df
head(position_counts)
# conditions across testing sessions ----
conditions <- data %>%
select(
player_id,
contains("temperature"),
contains("humidity"),
contains("pressure"),
contains("wind_speed")
) %>%
pivot_longer(cols = -player_id,
names_to = "measurement_type",
values_to = "value") %>%
mutate(
condition = case_when(
str_detect(measurement_type, "temperature") ~ "temperature_c",
str_detect(measurement_type, "humidity") ~ "humidity_percent",
str_detect(measurement_type, "pressure") ~ "pressure_mbar",
str_detect(measurement_type, "wind_speed") ~ "wind_speed_km_hr"
),
# convert wind speed from mph to km_hr
value = ifelse(
str_detect(measurement_type, "wind_speed"),
value * 1.60934,
value
)
) %>%
group_by(player_id, condition) %>%
summarize(value = mean(value, na.rm = TRUE), .groups = "drop") %>%
pivot_wider(names_from = condition, values_from = value)
# avg_cond df
avg_cond <- conditions %>%
select(temperature_c,
humidity_percent,
pressure_mbar,
wind_speed_km_hr) %>%
pivot_longer(cols = everything(),
names_to = "variable",
values_to = "value") %>%
group_by(variable) %>%
summarise(
n = round(n(), 1),
mean = round(mean(value, na.rm = TRUE), 1),
sd = round(sd(value, na.rm = TRUE), 1),
min = round(min(value, na.rm = TRUE), 1),
max = round(max(value, na.rm = TRUE), 1)
) %>%
ungroup() %>%
mutate(variable = factor(
variable,
levels = c(
"temperature_c",
"humidity_percent",
"pressure_mbar",
"wind_speed_km_hr"
)
)) %>%
arrange(variable)
# view avg_cond df
head(avg_cond)
# descriptive stats df ----
descriptivestats <-
data %>% # select data
select(
# select columns
x1800_m_tt_finish_time_s,
TT18_mas_m_s,
TT18_avg_heart_rate_b_min,
TT18_peak_heart_rate_b_min,
x6_min_dt_total_distance_m,
DT6_mas_m_s,
DT6_avg_heart_rate_b_min,
DT6_peak_heart_rate_b_min,
v_ift_km_hr,
ift_mas_m_s
) %>%
gather(key = "variable", value = "value") %>% # organise columns
group_by(variable) %>%
summarise(
# select summary statistics and round
mean = round(mean(value, na.rm = TRUE), 2),
sd = round(sd(value, na.rm = TRUE), 2),
min = round(min(value, na.rm = TRUE), 2),
max = round(max(value, na.rm = TRUE), 2)
)
# view descriptive stats df
head(descriptivestats)
MAS Distribution
Wrangle data long and plot box and whisker plot of distribution of 1800mTT, 6minDT & 30-15IFT derived MAS
# organise data long ----
data2 <- data %>%
select(player_id, "Position" = position, ift_mas_m_s, DT6_mas_m_s, TT18_mas_m_s)
data_long <- pivot_longer(
data2,
cols = c(ift_mas_m_s, DT6_mas_m_s, TT18_mas_m_s),
names_to = "Test",
values_to = "MAS"
)
# view data_long df
head(data_long)
# create plot theme
plottheme <- theme(
plot.margin = margin(10, 10, 10, 10),
text = element_text(family = "Arial"),
plot.title = element_text(size = 28, face = "bold"),
plot.subtitle = element_text(size = 20, ),
plot.caption = element_text(size = 16, ),
axis.title.x = element_text(size = 20, face = "bold"),
axis.title.y = element_text(size = 20, face = "bold"),
axis.text = element_text(size = 16, face = "plain"),
legend.title = element_text(size = 20, face = "bold"),
legend.text = element_text(size = 20),
legend.position = "bottom",
legend.background = element_rect(fill = "gray95", colour = "gray95")
)
# box and whisker plot ----
# colourblind palette for boxplot
cb_box_palette <- c(
'TT18_mas_m_s' = "#E69F00",
'DT6_mas_m_s' = "#56B4E9",
'ift_mas_m_s' = "#009E73"
)
# plot
plot1 <-
ggplot(data_long, aes(x = factor(
Test, levels = c('TT18_mas_m_s', 'DT6_mas_m_s', 'ift_mas_m_s')
), y = MAS)) +
geom_boxplot(staplewidth = 0.2,
fill = cb_box_palette,
alpha = 0.35) +
geom_jitter(
aes(fill = Position),
# fill controls the inside colour
shape = 21,
# shape with border (e.g., circle with outline)
colour = "black",
# border colour
stroke = 0.5,
# thickness of the outline
alpha = 0.7,
size = 2.5,
position = position_jitter(width = 0.3, height = 0) # avoid overlap
) +
theme_minimal() +
scale_color_hue(h = c(0, 240)) +
plottheme +
# title, subtitle, caption, x and y axis labels, text & legend
labs(x = "Test", y = expression(bold("MAS (m·s"^-1 * ")")), fill = "Position") +
scale_x_discrete(labels = c('1800mTT', '6minDT', '30-15IFT')) +
scale_y_continuous(
breaks = scales::breaks_width(0.2),
labels = scales::number_format(accuracy = 0.1)
)
# print plot
plot1
Relationships & Linear Models
Associations between MAS estimates and linear models of relationships between 1800mTT, 6minDT and 30-15IFT derived MAS. A: Scatter plot of 1800mTT and 6minDT derived MAS.; B: Scatter plot of 1800mTT and 30-15IFT derived MAS.; C: Scatter plot of 6minDT and 30-15IFT derived MAS
# pearson's product-moment correlation coefficients ----
# 1800mTT & 6minDT
cor1 <-
cor.test(
data$TT18_mas_m_s,
data$DT6_mas_m_s,
use = "pairwise.complete.obs",
method = "pearson",
conf.level = 0.95
)
# 1800mTT & 30-15IFT
cor2 <-
cor.test(
data$TT18_mas_m_s,
data$ift_mas_m_s,
use = "pairwise.complete.obs",
method = "pearson",
conf.level = 0.95
)
# 6minDT & 30-15IFT
cor3 <-
cor.test(
data$DT6_mas_m_s,
data$ift_mas_m_s,
use = "pairwise.complete.obs",
method = "pearson",
conf.level = 0.95
)
# extract statistics from each correlation test
pcc1 <- data.frame(
Test = "1800mTT & 6minDT",
Estimate = cor1$estimate,
P.Value = cor1$p.value,
DF = cor3$parameter,
Conf.Low = cor1$conf.int[1],
Conf.High = cor1$conf.int[2]
)
pcc2 <- data.frame(
Test = "1800mTT & 30-15IFT",
Estimate = cor2$estimate,
P.Value = cor2$p.value,
DF = cor3$parameter,
Conf.Low = cor2$conf.int[1],
Conf.High = cor2$conf.int[2]
)
pcc3 <- data.frame(
Test = "6minDT & 30-15IFT",
Estimate = cor3$estimate,
P.Value = cor3$p.value,
DF = cor3$parameter,
Conf.Low = cor3$conf.int[1],
Conf.High = cor3$conf.int[2]
)
# combine results into df
combined_pccs <- rbind(pcc1, pcc2, pcc3)
# add "± 95% CL" column
combined_pccs$`± 95% CL` <-
(combined_pccs$Conf.High - combined_pccs$Conf.Low) / 2
# round all numeric values to 2 decimals
combined_pccs <- combined_pccs %>%
mutate_if(is.numeric, round, 2)
# view combined_pccs df
head(combined_pccs)
# correlation label plot annotation function ----
cor_label <- function(cor_test) {
bquote(italic(r) == .(round(cor_test$estimate, 2)) * ";" ~
"95%CI: " ~ .(round(cor_test$conf.int[1], 2)) ~
" to " ~ .(round(cor_test$conf.int[2], 2)))
}
cor1_label <- cor_label(cor1)
cor2_label <- cor_label(cor2)
cor3_label <- cor_label(cor3)
# linear model plots with correlation annotations
# 1800mTT & 6minDT
plot2 <-
ggplot(
data2,
aes(x = TT18_mas_m_s, y = DT6_mas_m_s, linetype = "Linear prediction with \n95% confidence interval")
) +
geom_point(
aes(fill = Position),
shape = 21,
colour = "black",
stroke = 0.5,
alpha = 0.7,
size = 3,
position = position_jitter(width = 0.1, height = 0.1)
) +
geom_smooth(
method = "lm",
level = 0.95,
linewidth = 1.5,
colour = "dodgerblue4",
fill = "grey",
se = TRUE
) +
theme_minimal() +
scale_color_hue(h = c(0, 240)) +
scale_x_continuous(
breaks = scales::breaks_width(0.2),
labels = scales::number_format(accuracy = 0.1)
) +
scale_y_continuous(
breaks = scales::breaks_width(0.2),
labels = scales::number_format(accuracy = 0.1)
) +
plottheme +
# title, subtitle, caption, x and y axis labels, text & legend
labs(
title = "1800mTT & 6minDT",
subtitle = cor1_label,
caption = "",
x = expression(bold("1800mTT MAS (m·s"^-1 * ")")),
y = expression(bold("6minDT MAS (m·s"^-1 * ")")),
linetype = "Trend Line",
colour = "Position"
)
# 1800mTT & 30-15IFT
plot3 <-
ggplot(
data2,
aes(x = TT18_mas_m_s, y = ift_mas_m_s, linetype = "Linear prediction with \n95% confidence interval")
) +
geom_point(
aes(fill = Position),
shape = 21,
colour = "black",
stroke = 0.5,
alpha = 0.7,
size = 3,
position = position_jitter(width = 0.1, height = 0.1)
) +
geom_smooth(
method = "lm",
level = 0.95,
linewidth = 1.5,
colour = "dodgerblue4",
fill = "grey",
se = TRUE
) +
theme_minimal() +
scale_color_hue(h = c(0, 240)) +
scale_x_continuous(
breaks = scales::breaks_width(0.2),
labels = scales::number_format(accuracy = 0.1)
) +
scale_y_continuous(
breaks = scales::breaks_width(0.2),
labels = scales::number_format(accuracy = 0.1)
) +
plottheme +
# title, subtitle, caption, x and y axis labels, text & legend
labs(
title = "1800mTT & 30-15IFT",
subtitle = cor2_label,
caption = "",
x = expression(bold("1800mTT MAS (m·s"^-1 * ")")),
y = expression(bold("30-15IFT MAS (m·s"^-1 * ")")),
linetype = "Trend Line",
colour = "Position"
)
# 6minDT & 30-15IFT
plot4 <-
ggplot(
data2,
aes(x = DT6_mas_m_s, y = ift_mas_m_s, linetype = "Linear prediction with \n95% confidence interval")
) +
geom_point(
aes(fill = Position),
shape = 21,
colour = "black",
stroke = 0.5,
alpha = 0.7,
size = 3,
position = position_jitter(width = 0.1, height = 0.1)
) +
geom_smooth(
method = "lm",
level = 0.95,
linewidth = 1.5,
colour = "dodgerblue4",
fill = "grey",
se = TRUE
) +
theme_minimal() +
scale_color_hue(h = c(0, 240)) +
scale_x_continuous(
breaks = scales::breaks_width(0.2),
labels = scales::number_format(accuracy = 0.1)
) +
scale_y_continuous(
breaks = scales::breaks_width(0.2),
labels = scales::number_format(accuracy = 0.1)
) +
plottheme +
# title, subtitle, caption, x and y axis labels, text & legend
labs(
title = "6minDT & 30-15IFT",
subtitle = cor3_label,
caption = "",
x = expression(bold("6minDT MAS (m·s"^-1 * ")")),
y = expression(bold("30-15IFT MAS (m·s"^-1 * ")")),
linetype = "Trend Line",
colour = "Position"
)
# combine into multi panel plot
combined_plot <- plot2 + plot3 + plot4 + plot_layout(guides = "collect") +
plot_annotation(tag_levels = "A") &
theme(plot.tag = element_text(size = 30, face = "bold"),
legend.position = 'bottom')
# print plot
combined_plot
Robust Repeated Measures ANOVAs and Estimated Marginal Means
MAS and HR robust repeated measures ANOVAs and estimated marginal means
MAS linear model
Call:
lm(formula = MAS ~ Test, data = data_long)
Residuals:
Min 1Q Median 3Q Max
-0.61588 -0.20456 0.04579 0.17370 0.39090
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 4.38582 0.04658 94.148 < 2e-16 ***
Testift_mas_m_s 0.55905 0.06588 8.486 1.42e-12 ***
TestTT18_mas_m_s 0.10394 0.06588 1.578 0.119
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Residual standard error: 0.2375 on 75 degrees of freedom
Multiple R-squared: 0.5207, Adjusted R-squared: 0.5079
F-statistic: 40.74 on 2 and 75 DF, p-value: 1.053e-12
Check normality
# check normality of model
check_model(lm1)
Check Heteroscedasticity
# check heteroscedasticity of models
check_heteroscedasticity(lm1)
OK: Error variance appears to be homoscedastic (p = 0.510).
MAS linear mixed-effects model
# MAS linear mixed-effects model ----
lmm1 <- lmer(MAS ~ 1 + Test + (1 | player_id), data = data_long)
summary(lmm1)
Linear mixed model fit by REML ['lmerMod']
Formula: MAS ~ 1 + Test + (1 | player_id)
Data: data_long
REML criterion at convergence: -27.6
Scaled residuals:
Min 1Q Median 3Q Max
-2.4625 -0.4238 0.1466 0.5151 2.7330
Random effects:
Groups Name Variance Std.Dev.
player_id (Intercept) 0.03799 0.1949
Residual 0.01843 0.1358
Number of obs: 78, groups: player_id, 26
Fixed effects:
Estimate Std. Error t value
(Intercept) 4.38582 0.04658 94.15
Testift_mas_m_s 0.55905 0.03765 14.85
TestTT18_mas_m_s 0.10394 0.03765 2.76
Correlation of Fixed Effects:
(Intr) Tst___
Tstft_ms_m_ -0.404
TstTT18_m__ -0.404 0.500
MAS robust repeated measures ANOVA
# MAS robust repeated measures ANOVA ----
rlmm1 <- rlmer(MAS ~ 1 + Test + (1 | player_id), data = data_long)
rlmm1_summary <- summary(rlmm1)
# extract statistics into df and round to 2 decimals
rlmm1_df <- data.frame(
Term = rownames(rlmm1_summary$coefficients),
Estimate = round(rlmm1_summary$coefficients[, "Estimate"], 2),
StdError = round(rlmm1_summary$coefficients[, "Std. Error"], 2),
tValue = round(rlmm1_summary$coefficients[, "t value"], 2)
)
# view rlmm1_df df
head(rlmm1_df)
Estimated marginal mean differences in MAS between tests
# MAS estimated marginal means ----
emm <- emmeans(rlmm1, pairwise ~ Test, level = 0.95)
emm_summary <-
summary(emm, infer = c(TRUE, TRUE)) # include p-values
# extract statistics into df's
emm_means <- as.data.frame(emm_summary$emmeans)
emm_contrasts <- as.data.frame(emm_summary$contrasts)
# combine results into a single df
combined_emm <- bind_rows(emm_means %>% mutate(Type = "Means"),
emm_contrasts %>% mutate(Type = "Contrasts"))
# calculate new column "± 95% CL"
combined_emm$`± 95% CL` <-
(combined_emm$asymp.UCL - combined_emm$asymp.LCL) / 2
# reorder columns to position "± 95% CL" after asymp.UCL column
combined_emm <- combined_emm %>%
dplyr::relocate(`± 95% CL`, .after = asymp.UCL)
# format p-value function
format_p_value <- function(p) {
if (is.na(p)) {
return(NA)
} else if (p < 0.0001) {
return("<.0001")
} else if (p < 0.10) {
return(format(signif(p, digits = 1), scientific = FALSE))
} else {
return(format(signif(p, digits = 2), scientific = FALSE))
}
}
# format p-values using custom function so they appear the same as in console
combined_emm$p.value <- sapply(combined_emm$p.value, format_p_value)
# round all other numeric values to 2 decimals
combined_emm <- combined_emm %>%
dplyr::mutate(across(where(is.numeric) &
!any_of("p.value"), round, 2))
# view combined_emm df
head(combined_emm)
HR indices robust repeated measures ANOVA
# organise HR data long ----
hrdata <- data %>%
select(
player_id,
TT18_avg_heart_rate_b_min,
TT18_peak_heart_rate_b_min,
DT6_avg_heart_rate_b_min,
DT6_peak_heart_rate_b_min
)
hrdata_long <- pivot_longer(
hrdata,
cols = c(
TT18_avg_heart_rate_b_min,
TT18_peak_heart_rate_b_min,
DT6_avg_heart_rate_b_min,
DT6_peak_heart_rate_b_min
),
names_to = "Test",
values_to = "HR"
) %>% drop_na()
# view hrdata_long df
head(hrdata_long)
# HR robust repeated measures ANOVA ----
rlmm2 <- rlmer(HR ~ 1 + Test + (1 | player_id), data = hrdata_long)
rlmm2_summary <- summary(rlmm2)
# extract statistics into df and round to 2 decimals
rlmm2_df <- data.frame(
Term = rownames(rlmm2_summary$coefficients),
Estimate = round(rlmm2_summary$coefficients[, "Estimate"], 2),
StdError = round(rlmm2_summary$coefficients[, "Std. Error"], 2),
tValue = round(rlmm2_summary$coefficients[, "t value"], 2)
)
# view rlmm2_df
head(rlmm2_df)
Estimated marginal mean differences in HR indices between tests
# HR estimated marginal means ----
hremm <- emmeans(rlmm2, pairwise ~ Test, level = 0.95)
hremm_summary <-
summary(hremm, infer = c(TRUE, TRUE)) # include p-values
# extract statistics into df's
hremm_means <- as.data.frame(hremm_summary$emmeans)
hremm_contrasts <- as.data.frame(hremm_summary$contrasts)
hremm_contrasts <- hremm_contrasts %>%
slice(c(2, 5))
# combine results into a single df
combined_hremm <- bind_rows(hremm_means %>% mutate(Type = "Means"),
hremm_contrasts %>% mutate(Type = "Contrasts"))
# calculate new column "± 95% CL"
combined_hremm$`± 95% CL` <-
(combined_hremm$asymp.UCL - combined_hremm$asymp.LCL) / 2
# reorder columns to position "± 95% CL" after asymp.UCL column
combined_hremm <- combined_hremm %>%
dplyr::relocate(`± 95% CL`, .after = asymp.UCL)
# format p-values using custom function so they appear the same as in console
combined_hremm$p.value <- sapply(combined_hremm$p.value, format_p_value)
# round all other numeric values to 2 decimals
combined_hremm <- combined_hremm %>%
dplyr::mutate(across(where(is.numeric) &
!any_of("p.value"), round, 0))
# view combined_hremm df
head(combined_hremm)
Percentage Correspondence
Time and distance trial percentage correspondence to vIFT calculated using raw ratio of observed data
# select relevant columns and convert m_s to km_hr
relevant_data <- data %>%
select(v_ift_km_hr, DT6_mas_m_s, TT18_mas_m_s) %>%
mutate(DT6_km_hr = DT6_mas_m_s * 3.6, TT18_km_hr = TT18_mas_m_s * 3.6) %>%
select(v_ift_km_hr, DT6_km_hr, TT18_km_hr) # keep relevant columns
# calculate percentage correspondence using raw ratio of observed data
percentages_dt6 <- relevant_data$DT6_km_hr / relevant_data$v_ift_km_hr * 100
percentages_tt18 <- relevant_data$TT18_km_hr / relevant_data$v_ift_km_hr * 100
# calculate mean and SD for DT6_km_hr
mean_percentage_dt6 <- round(mean(percentages_dt6, na.rm = TRUE), 0)
sd_percentage_dt6 <- round(sd(percentages_dt6, na.rm = TRUE), 0)
# calculate mean and SD for TT18_km_hr
mean_percentage_tt18 <- round(mean(percentages_tt18, na.rm = TRUE), 0)
sd_percentage_tt18 <- round(sd(percentages_tt18, na.rm = TRUE), 0)
# combine results into df
percent_correspondence <- data.frame(
Metric = c("DT6_km_hr", "TT18_km_hr"),
Mean_Percentage = c(mean_percentage_dt6, mean_percentage_tt18),
SD_Percentage = c(sd_percentage_dt6, sd_percentage_tt18)
)
# view percent_correspondence df
head(percent_correspondence)
Regression Equations
Regression equations and standard error for estimating 6minDT and 1800mTT MAS from vIFT
# robust model for DT6_mas_m_s and vIFT
model_dt6 <- lmrob(DT6_km_hr ~ v_ift_km_hr, data = relevant_data)
percentages_dt6 <- (coef(model_dt6)["v_ift_km_hr"] * relevant_data$v_ift_km_hr) / relevant_data$DT6_km_hr * 100
mean_percentage_dt6 <- round(mean(percentages_dt6, na.rm = TRUE), 0)
sd_percentage_dt6 <- round(sd(percentages_dt6, na.rm = TRUE), 0)
# robust model for TT18_mas_m_s and vIFT
model_tt18 <- lmrob(TT18_km_hr ~ v_ift_km_hr, data = relevant_data)
percentages_tt18 <- (coef(model_tt18)["v_ift_km_hr"] * relevant_data$v_ift_km_hr) / relevant_data$TT18_km_hr * 100
mean_percentage_tt18 <- round(mean(percentages_tt18, na.rm = TRUE), 0)
sd_percentage_tt18 <- round(sd(percentages_tt18, na.rm = TRUE), 0)
# view models
summary(model_dt6)
Call:
lmrob(formula = DT6_km_hr ~ v_ift_km_hr, data = relevant_data)
\--> method = "MM"
Residuals:
Min 1Q Median 3Q Max
-1.307065 -0.465018 -0.002212 0.434420 1.704811
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) 3.2409 3.2372 1.001 0.32675
v_ift_km_hr 0.6125 0.1543 3.969 0.00057 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Robust residual standard error: 0.6865
Multiple R-squared: 0.4198, Adjusted R-squared: 0.3956
Convergence in 12 IRWLS iterations
Robustness weights:
2 weights are ~= 1. The remaining 24 ones are summarized as
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.5170 0.9105 0.9596 0.9223 0.9810 0.9969
Algorithmic parameters:
tuning.chi bb tuning.psi refine.tol
1.548e+00 5.000e-01 4.685e+00 1.000e-07
rel.tol scale.tol solve.tol zero.tol
1.000e-07 1.000e-10 1.000e-07 1.000e-10
eps.outlier eps.x warn.limit.reject warn.limit.meanrw
3.846e-03 4.002e-11 5.000e-01 5.000e-01
nResample max.it best.r.s k.fast.s k.max
500 50 2 1 200
maxit.scale trace.lev mts compute.rd fast.s.large.n
200 0 1000 0 2000
psi subsampling cov
"bisquare" "nonsingular" ".vcov.avar1"
compute.outlier.stats
"SM"
seed : int(0)
summary(model_tt18)
Call:
lmrob(formula = TT18_km_hr ~ v_ift_km_hr, data = relevant_data)
\--> method = "MM"
Residuals:
Min 1Q Median 3Q Max
-2.26882 -0.46439 0.03882 0.25385 1.05721
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept) -0.9326 3.7461 -0.249 0.806
v_ift_km_hr 0.8418 0.1758 4.789 7.1e-05 ***
---
Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Robust residual standard error: 0.3675
Multiple R-squared: 0.7095, Adjusted R-squared: 0.6974
Convergence in 31 IRWLS iterations
Robustness weights:
observation 7 is an outlier with |weight| = 0 ( < 0.0038);
one weight is ~= 1. The remaining 24 ones are summarized as
Min. 1st Qu. Median Mean 3rd Qu. Max.
0.09798 0.77590 0.91800 0.81470 0.97880 0.99590
Algorithmic parameters:
tuning.chi bb tuning.psi refine.tol
1.548e+00 5.000e-01 4.685e+00 1.000e-07
rel.tol scale.tol solve.tol zero.tol
1.000e-07 1.000e-10 1.000e-07 1.000e-10
eps.outlier eps.x warn.limit.reject warn.limit.meanrw
3.846e-03 4.002e-11 5.000e-01 5.000e-01
nResample max.it best.r.s k.fast.s k.max
500 50 2 1 200
maxit.scale trace.lev mts compute.rd fast.s.large.n
200 0 1000 0 2000
psi subsampling cov
"bisquare" "nonsingular" ".vcov.avar1"
compute.outlier.stats
"SM"
seed : int(0)
# create regression equation + residual standard error function
generate_equation <- function(model) {
coefficients <- coef(model)
response_name <- as.character(formula(model)[[2]]) # Extract response variable name
terms <- sapply(2:length(coefficients), function(i) {
sign <- ifelse(coefficients[i] >= 0, " + ", " - ")
paste0(sign, abs(round(coefficients[i], 2)), "*", names(coefficients)[i])
})
terms <- paste0(terms, collapse = "")
intercept <- round(coefficients[1], 2)
equation <- paste0(response_name, " = ", intercept, terms)
# extract residual standard error and round to 2 decimals
residual_se <- round(summary(model)$sigma, 2)
return(list(equation = equation, residual_se = residual_se))
}
# generate equations and residual standard errors for 6minDT & 1800mTT
result1 <- generate_equation(model_dt6)
result2 <- generate_equation(model_tt18)
equation1 <- result1$equation
equation2 <- result2$equation
se1 <- result1$residual_se
se2 <- result2$residual_se
# create df with equations and residual standard errors
equations_df <- data.frame(
Model = c("Model 1", "Model 2"),
Equation = c(equation1, equation2),
`Standard Error` = c(se1, se2)
)
# view equations_df
head(equations_df)
RStudio Version, R Version, Environment and Package Versions
Posit team (2025). RStudio: Integrated Development Environment for R. Posit Software, PBC, Boston, MA. URL http://www.posit.co/.
$mode
[1] "desktop"
$version
[1] ‘2024.12.1.563’
$long_version
[1] "2024.12.1+563"
$release_name
[1] "Kousa Dogwood"
R version 4.4.3 (2025-02-28)
Platform: aarch64-apple-darwin20
Running under: macOS Sequoia 15.3.2
Matrix products: default
BLAS: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/4.4-arm64/Resources/lib/libRlapack.dylib; LAPACK version 3.12.0
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
time zone: Europe/London
tzcode source: internal
attached base packages:
[1] stats graphics grDevices utils datasets methods base
other attached packages:
[1] grateful_0.2.11 robustbase_0.99-4-1 emmeans_1.11.0 robustlmm_3.3-1 lme4_1.1-36 Matrix_1.7-3 performance_0.13.0 broom_1.0.7 patchwork_1.3.0 writexl_1.5.2
[11] lubridate_1.9.4 forcats_1.0.0 stringr_1.5.1 dplyr_1.1.4 purrr_1.0.4 readr_2.1.5 tidyr_1.3.1 tibble_3.2.1 ggplot2_3.5.1 tidyverse_2.0.0
[21] janitor_2.2.1 readxl_1.4.5
loaded via a namespace (and not attached):
[1] gtable_0.3.6 bayestestR_0.15.2 xfun_0.51 ggrepel_0.9.6 insight_1.1.0 lattice_0.22-6 tzdb_0.5.0 vctrs_0.6.5 tools_4.4.3 Rdpack_2.6.3 generics_0.1.3
[12] datawizard_1.0.1 DEoptimR_1.1-3-1 pkgconfig_2.0.3 lifecycle_1.0.4 compiler_4.4.3 farver_2.1.2 munsell_0.5.1 fastGHQuad_1.0.1 codetools_0.2-20 snakecase_0.11.1 crayon_1.5.3
[23] pillar_1.10.1 nloptr_2.2.1 MASS_7.3-65 rsconnect_1.3.4 reformulas_0.4.0 boot_1.3-31 nlme_3.1-167 tidyselect_1.2.1 mvtnorm_1.3-3 stringi_1.8.4 labeling_0.4.3
[34] splines_4.4.3 grid_4.4.3 colorspace_2.1-1 cli_3.6.4 magrittr_2.0.3 withr_3.0.2 scales_1.3.0 backports_1.5.0 estimability_1.5.1 timechange_0.3.0 cellranger_1.1.0
[45] hms_1.1.3 coda_0.19-4.1 evaluate_1.0.3 knitr_1.50 rbibutils_2.3 mgcv_1.9-1 rlang_1.1.5 Rcpp_1.0.14 xtable_1.8-4 glue_1.8.0 see_0.11.0
[56] rstudioapi_0.17.1 minqa_1.2.8 R6_2.6.1