# make sure these are installed first!
library(readxl) # for reading Excel files
library(janitor) # for cleaning column names
library(tidyverse) # for data wrangling and plotting
library(reshape2) # for melting correlation matrix
library(corrplot)
library(GGally)Vald
Profiling the Stretch-Shortening Cycle
Context
We might be interested to understand how an athlete produces force. Some athletes are very strong and produce maximal force when given time to do so, others maybe more powerful.
The stretch-shortening cycle is critical to force production in most sports, some athletes are skilled in utilsing stored elastic energy from the eccentric part of the cycle to enhance concentric impulse & power. Others may rely on their ability to contract concentrically.
We might also be interested in how athletes utilise the stretch-shortening cycle in slower (e.g. counter-movement jumps) and faster (drop jump, repeated-jump tests).
Data
Here we will look at how we can import data, downloaded from the Vald Hub, for the following tests:
Counter-movement Jump - hands on hips
10-5 Repeated Jump Test
Isometric Mid-Thigh Pull
The data is for girls’ football players from under 12s, under 14s and under 16s age groups, the “IDs” excel sheet has their general information:
Code
The data wrangling challenge here is to combine data from 4 different places into a complete data sheet.
Load packages
Import data
To start with we should import our player ID sheet - importing the data is quite easy BUT we have data of births and they are in the American order MM-DD-YYYY - this is a default on Excel on-line but we need to ensure R knows the data order.
To do so we are importing all columns as text and then telling R exactly what we want to do with each date. It’s a bit complicated but it works!
# 1) Read everything as text so we control parsing ourselves
raw_ID <- read_excel(
"IDs.xlsx",
col_types = c("text", "text", "text", "text") # e.g., ID, Name, DOB, Group
) %>%
select(-Group) %>%
clean_names()
# 2) Parse DOB robustly:
# - Excel serial numbers (pure digits) -> as.Date with Excel origin
# - Text dates in m/d/Y (with or without leading zeros) -> lubridate::mdy
ID <- raw_ID %>%
mutate(
dob = case_when(
# Excel serial numbers (all digits). Some files may have decimals, so handle that too.
str_detect(dob, "^\\d+(\\.\\d+)?$") ~ as.Date(as.numeric(dob), origin = "1899-12-30"),
# Text dates like 2/28/2012 or 05/03/2012 -> force mdy interpretation
str_detect(dob, "^\\d{1,2}/\\d{1,2}/\\d{4}$") ~ mdy(dob),
TRUE ~ NA_Date_
)
)
ID <- clean_names(ID)For counter-movement Jump things are a little easier:
cmj<- read_csv("CMJ.csv",
col_types = cols(Date = col_date(format = "%d/%m/%Y")))But I want to select a few metrics I am interested in (you can choose your own)
cmj<- clean_names(cmj) %>%
select(id = name,
date,
cmj_height = jump_height_imp_mom_cm,
cmj_rsi = rsi_modified_imp_mom_m_s,
ct = contraction_time_ms,
epv = eccentric_peak_velocity_m_s,
rel_pp = peak_power_bm_w_kg,
con_imp = concentric_impulse_n_s)
head(cmj)# A tibble: 6 × 8
id date cmj_height cmj_rsi ct epv rel_pp con_imp
<chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 ETC 57 2026-01-06 26.2 0.49 537 -1.09 46.5 131.
2 RTC 88 2026-01-06 26.4 0.34 772 -1.26 40.7 173.
3 RTC 85 2026-01-06 21 0.34 630 -0.54 40.4 113.
4 ETC 58 2026-01-06 23.4 0.37 628 -1.03 42.2 118.
5 RTC 64 2026-01-06 25.1 0.32 775 -1.07 41.5 129.
6 RTC 86 2026-01-06 27.2 0.36 748 -0.99 44.2 137.
Let’s now combine these files
I have used merge() and I have chosen the common row (ID) - in the code below I have also filtered to ensure I am only looking at players who are part of the 2025 - 2026 squad.
data <- merge(ID, cmj, by = "id", all.x = TRUE) %>%
filter(year == "25_26")
head(data) id dob year date cmj_height cmj_rsi ct epv rel_pp
1 ETC 01 2012-02-28 25_26 <NA> NA NA NA NA NA
2 ETC 02 2012-03-05 25_26 <NA> NA NA NA NA NA
3 ETC 05 2013-04-25 25_26 2026-01-05 18.1 0.29 636 -1.09 33.3
4 ETC 07 2011-01-11 25_26 <NA> NA NA NA NA NA
5 ETC 101 2014-08-17 25_26 2026-01-05 23.0 0.38 604 -0.78 41.3
6 ETC 102 2014-08-05 25_26 2026-01-05 13.3 0.20 667 -1.02 26.5
con_imp
1 NA
2 NA
3 111.2
4 NA
5 95.7
6 84.7
I might want to get rid of any rows with N/A - these occur when a player is in the squad (in the ID sheet) but didn’t attend testing. To do this we can add in another filter:
data <- merge(ID, cmj, by = "id", all.x = TRUE) %>%
filter(year == "25_26")%>%
filter(!is.na(cmj_height))
head(data) id dob year date cmj_height cmj_rsi ct epv rel_pp
1 ETC 05 2013-04-25 25_26 2026-01-05 18.1 0.29 636 -1.09 33.3
2 ETC 101 2014-08-17 25_26 2026-01-05 23.0 0.38 604 -0.78 41.3
3 ETC 102 2014-08-05 25_26 2026-01-05 13.3 0.20 667 -1.02 26.5
4 ETC 104 2015-02-24 25_26 2026-01-05 18.2 0.32 570 -1.13 32.1
5 ETC 105 2013-11-27 25_26 2026-01-05 14.0 0.23 602 -0.75 31.2
6 ETC 107 2014-09-23 25_26 2026-01-05 17.6 0.30 595 -0.91 36.1
con_imp
1 111.2
2 95.7
3 84.7
4 58.0
5 114.8
6 93.5
We can continue to do this for our other tests (RJT.csv and imtp.csv)
Data exploration
Here we will look at how to explore data through correlation analysis - we might be interested to know which metrics correlate with each other, are some telling us something very similar are others telling us something different. For now I will focus purely on the counter-movement jump.
We can visualize correlations in ggplot using geom_point() and stats_smooth() choosing either a linear model (method - “lm”) or a smooth loess curve (method = loess)
ggplot(cmj) +
aes(x = cmj_rsi, y = cmj_height) +
geom_point(colour = "purple", size = 2) +
geom_smooth(method = "lm", se = TRUE, colour = "grey2") +
labs(
x = "Reactive Strength Index Modified (m/s)",
y = "Countermovement Jump Height (cm)",
title = "Relationships between CMJ measures"
) +
theme_minimal() +
theme(
plot.title = element_text(size = 14L,
face = "bold"),
axis.title.y = element_text(size = 12L,
face = "bold"),
axis.title.x = element_text(size = 12L,
face = "bold")
)`geom_smooth()` using formula = 'y ~ x'
We can also run a simple Pearson’s correlation:
cor.test(cmj$cmj_rsi, cmj$cmj_height, method = "pearson")
Pearson's product-moment correlation
data: cmj$cmj_rsi and cmj$cmj_height
t = 6.5344, df = 36, p-value = 1.35e-07
alternative hypothesis: true correlation is not equal to 0
95 percent confidence interval:
0.5453099 0.8549509
sample estimates:
cor
0.7365867
Correlating all items in a data set
We might just want to look at the correlations between each of our metrics
We can do this simply by creating a subseted data set and then using one of a few methods (I’ve selected all comments between cmj_height and con_imp:
# Select numeric columns you want to correlate
perf_data <- data %>%
select(cmj_height:con_imp)I’ve used corrplot here
# Use corrplot
corrplot(cor(perf_data, use = "pairwise.complete.obs"),
method = "circle",
type = "upper",
addCoef.col = "black",
tl.col = "black",
tl.srt = 45,
lowCI.mat = TRUE,
uppCI.mat = TRUE)And here I’ve got rid of the correlations between the same metrics which = 1
# Use corrplot with custom limits for the color legend to improve visualisation
corr_matrix <- cor(perf_data, use = "pairwise.complete.obs")
corrplot(
corr_matrix,
method = "circle",
type = "upper",
diag = FALSE,
addCoef.col = "black",
tl.col = "black",
tl.srt = 45,
cl.lim = c(-0.8, 0.8) # <- force legend limits (corrplot argument)
)Warning in text.default(pos.xlabel[, 1], pos.xlabel[, 2], newcolnames, srt =
tl.srt, : "cl.lim" is not a graphical parameter
Warning in text.default(pos.ylabel[, 1], pos.ylabel[, 2], newrownames, col =
tl.col, : "cl.lim" is not a graphical parameter
Warning in title(title, ...): "cl.lim" is not a graphical parameter
Another method that is quite nice is uses the package “GGally” - this is cool as it gives us the correlation, the scatter plot and the distribution all in one plot.
ggpairs(
data %>% select(cmj_height:con_imp),
lower = list(continuous = wrap("smooth", method = "lm", se = TRUE)),
upper = list(continuous = wrap("cor")),
title = "Pairwise CMJ Performance Relationships"
) +
theme_classic()