South Africa has a rich (and often under-recognised) history in Olympic weightlifting. The South African Weightlifting Federation (SAWF) wants to recognise historic bests and showcase performance standards for the next generation.
Primary task Identify and rank the best male and female weightlifters by Sinclair (from their best recorded total).
Secondary questions
Stakeholders SAWF, national coaches/selectors, sport scientists/analysts, athletes, and the public.
Sources
Fields athlete, year,
bodyweight, snatch,
clean_and_jerk, total,
sinclair.
Credibility & limitations
# ---- Libraries ----
library(googlesheets4)
library(dplyr)
library(tidyr)
library(readr)
library(janitor)
library(ggplot2)
# If the sheet is publicly readable this avoids auth prompts during knit:
suppressMessages(gs4_deauth())
# ---- Global styling (consistent theme & colours) ----
theme_set(theme_minimal(base_size = 13))
wal_blue_dark <- "#003566" # Men
wal_red_dark <- "#A4161A" # Women
male_fill_low <- "#90E0EF"; male_fill_high <- "#0077B6"
fem_fill_low <- "#F8C8DC"; fem_fill_high <- "#C9184A"
# ---- Data import ----
sheet_url <- "https://docs.google.com/spreadsheets/d/1zhDv-Kvhj86CT6qwcwV1uRskGmXYio7xsqrdtH_sS-g/edit?usp=sharing"
mens_results <- read_sheet(sheet_url, sheet = "Men") |> clean_names()
womens_results <- read_sheet(sheet_url, sheet = "Women") |> clean_names()
# ---- Basic cleaning & types ----
num_cols <- c("year","bodyweight","snatch","clean_and_jerk","total","sinclair")
mens_results[num_cols] <- lapply(mens_results[num_cols], \(x) suppressWarnings(as.numeric(x)))
womens_results[num_cols] <- lapply(womens_results[num_cols], \(x) suppressWarnings(as.numeric(x)))
# Keep rows usable for rankings/ratios
mens_results <- mens_results |> drop_na(sinclair, snatch, clean_and_jerk)
womens_results <- womens_results |> drop_na(sinclair, snatch, clean_and_jerk)
# Quick peek
dplyr::glimpse(mens_results)
## Rows: 37
## Columns: 7
## $ athlete <chr> "Darryn Anthony", "Bennie Oldewage", "Greg Shushu", "Go…
## $ year <dbl> 2008, 1977, 2008, 2019, 2014, 2025, 2025, 1988, 1985, 2…
## $ bodyweight <dbl> 76.70, 87.00, 66.26, 125.00, 55.35, 87.65, 92.97, 95.00…
## $ snatch <dbl> 143.0, 150.0, 125.0, 176.0, 103.0, 146.0, 142.0, 145.0,…
## $ clean_and_jerk <dbl> 172.0, 185.0, 160.0, 201.0, 135.0, 173.0, 180.0, 180.0,…
## $ total <dbl> 315.0, 335.0, 285.0, 377.0, 238.0, 319.0, 322.0, 325.0,…
## $ sinclair <dbl> 412.28, 409.52, 408.86, 400.35, 389.33, 388.51, 381.25,…
dplyr::glimpse(womens_results)
## Rows: 19
## Columns: 7
## $ athlete <chr> "Mona Pretorius", "Anneke Spies Burger", "Johanni Talja…
## $ year <dbl> 2018, 2022, 2018, 2010, 2025, 2018, 2008, 2025, 2018, 2…
## $ bodyweight <dbl> 62.77, 58.89, 58.85, 47.99, 68.65, 63.94, 73.03, 63.00,…
## $ snatch <dbl> 91, 85, 84, 68, 91, 83, 83, 81, 78, 69, 75, 83, 74, 71,…
## $ clean_and_jerk <dbl> 115, 105, 105, 94, 112, 110, 120, 97, 99, 93, 90, 98, 9…
## $ total <dbl> 206, 190, 189, 162, 203, 193, 203, 178, 177, 162, 165, …
## $ sinclair <dbl> 271.02, 260.30, 259.04, 257.49, 253.52, 251.09, 245.34,…
I standardised weight-class bins, created the snatch:clean-and-jerk ratio, and built compact Top-10 tables.
# Men classes
mens_results <- mens_results |>
mutate(weight_class = case_when(
bodyweight < 65 ~ "<65 kg",
bodyweight < 75 ~ "65–75 kg",
bodyweight < 82.5 ~ "75–82.5 kg",
bodyweight < 90 ~ "82.5–90 kg",
bodyweight < 100 ~ "90–100 kg",
TRUE ~ "100+ kg"
)) |>
mutate(weight_class = factor(
weight_class,
levels = c("<65 kg","65–75 kg","75–82.5 kg","82.5–90 kg","90–100 kg","100+ kg")
))
# Women classes
womens_results <- womens_results |>
mutate(weight_class = case_when(
bodyweight < 55 ~ "<55 kg",
bodyweight < 59 ~ "55–59 kg",
bodyweight < 64 ~ "59–64 kg",
bodyweight < 71 ~ "64–71 kg",
bodyweight < 81 ~ "71–81 kg",
TRUE ~ "81+ kg"
)) |>
mutate(weight_class = factor(
weight_class,
levels = c("<55 kg","55–59 kg","59–64 kg","64–71 kg","71–81 kg","81+ kg")
))
# Ratios
mens_results <- mens_results |> mutate(snatch_cj_ratio = snatch / clean_and_jerk)
womens_results <- womens_results |> mutate(snatch_cj_ratio = snatch / clean_and_jerk)
# Top-10 by Sinclair
top10_men <- mens_results |> arrange(desc(sinclair)) |> slice_head(n = 10)
top10_women <- womens_results |> arrange(desc(sinclair)) |> slice_head(n = 10)
ggplot(top10_men, aes(x = reorder(athlete, sinclair), y = sinclair, fill = sinclair)) +
geom_col(show.legend = FALSE) +
# Add Sinclair score inside each bar
geom_text(aes(label = round(sinclair, 1)),
hjust = 1.2, colour = "white", size = 4, fontface = "bold") +
scale_fill_gradient(low = male_fill_low, high = male_fill_high) +
coord_flip() +
labs(
title = "Top 10 Weightlifters (All-Time) — Men",
subtitle = "Sinclair scores displayed inside bars",
x = "Athlete",
y = "Sinclair Score"
) +
theme_minimal(base_size = 13) +
theme(
plot.title = element_text(face = "bold", hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5)
)
ggplot(top10_women, aes(x = reorder(athlete, sinclair), y = sinclair, fill = sinclair)) +
geom_col(show.legend = FALSE) +
# Add Sinclair score labels inside bars
geom_text(aes(label = round(sinclair, 1)),
hjust = 1.2, colour = "white", size = 4, fontface = "bold") +
scale_fill_gradient(low = fem_fill_low, high = fem_fill_high) +
coord_flip() +
labs(
title = "Top 10 Weightlifters (All-Time) — Women",
subtitle = "Sinclair scores displayed inside bars",
x = "Athlete",
y = "Sinclair Score"
) +
theme_minimal(base_size = 13) +
theme(
plot.title = element_text(face = "bold", hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))
(Optional quick scatter view for men)
mean_sinclair_men <- mean(top10_men$sinclair, na.rm = TRUE)
top10_men_plot <- top10_men |>
dplyr::mutate(above_mean = ifelse(sinclair >= mean_sinclair_men, "Above mean", "Below mean"))
ggplot(top10_men_plot, aes(x = reorder(athlete, sinclair), y = sinclair)) +
geom_segment(aes(xend = athlete, y = mean_sinclair_men, yend = sinclair),
colour = male_fill_low, linewidth = 1.1, alpha = 0.6) +
geom_point(aes(colour = above_mean), size = 4) +
geom_hline(yintercept = mean_sinclair_men, linetype = "dashed", colour = "grey40") +
coord_flip() +
scale_colour_manual(values = c("Above mean" = wal_blue_dark, "Below mean" = male_fill_low),
guide = "none") +
labs(
title = "Top 10 Weightlifters — Men (Deviation from Mean Sinclair)",
subtitle = paste0("Dashed line = mean Sinclair (", round(mean_sinclair_men, 1), ")"),
x = "Athlete",
y = "Sinclair Score"
) +
theme(
plot.title = element_text(face = "bold", hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5)
)
(Optional quick scatter view for women)
mean_sinclair_women <- mean(top10_women$sinclair, na.rm = TRUE)
top10_women_plot <- top10_women |>
dplyr::mutate(above_mean = ifelse(sinclair >= mean_sinclair_women, "Above mean", "Below mean"))
ggplot(top10_women_plot, aes(x = reorder(athlete, sinclair), y = sinclair)) +
geom_segment(aes(xend = athlete, y = mean_sinclair_women, yend = sinclair),
colour = fem_fill_low, linewidth = 1.1, alpha = 0.6) +
geom_point(aes(colour = above_mean), size = 4) +
geom_hline(yintercept = mean_sinclair_women, linetype = "dashed", colour = "grey40") +
coord_flip() +
scale_colour_manual(values = c("Above mean" = wal_red_dark, "Below mean" = fem_fill_low),
guide = "none") +
labs(title = "Top 10 Weightlifters — Women (Deviation from Mean Sinclair)",
subtitle = paste0("Dashed line = mean Sinclair (", round(mean_sinclair_women, 1), ")"),
x = "Athlete",
y = "Sinclair Score"
) +
theme(plot.title = element_text(face = "bold", hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5)
)
ggplot(mens_results, aes(x = bodyweight, y = sinclair)) +
geom_point(alpha = 0.8, colour = male_fill_high) +
labs(title = "Bodyweight vs Sinclair — Men",
x = "Bodyweight (kg)", y = "Sinclair Score")
ggplot(womens_results, aes(x = bodyweight, y = sinclair)) +
geom_point(alpha = 0.8, colour = fem_fill_high) +
labs(title = "Bodyweight vs Sinclair — Women",
x = "Bodyweight (kg)", y = "Sinclair Score")
avg_by_class_men <- mens_results |>
group_by(weight_class) |>
summarise(
avg_sinclair = mean(sinclair, na.rm = TRUE),
max_sinclair = max(sinclair, na.rm = TRUE),
avg_total = mean(total, na.rm = TRUE),
n = n(),
.groups = "drop"
) |>
arrange(desc(avg_sinclair))
ggplot(avg_by_class_men, aes(x = reorder(weight_class, avg_sinclair), y = avg_sinclair, fill = avg_sinclair)) +
geom_col(show.legend = FALSE) +
geom_text(aes(label = paste0("n=", n)), hjust = -0.1, size = 3) +
coord_flip(ylim = c(0, max(avg_by_class_men$avg_sinclair, na.rm = TRUE) * 1.10)) +
scale_fill_gradient(low = male_fill_low, high = male_fill_high) +
labs(title = "Average Sinclair by Weight Class — Men",
x = "Weight Class", y = "Average Sinclair")
avg_by_class_women <- womens_results |>
group_by(weight_class) |>
summarise(
avg_sinclair = mean(sinclair, na.rm = TRUE),
max_sinclair = max(sinclair, na.rm = TRUE),
avg_total = mean(total, na.rm = TRUE),
n = n(),
.groups = "drop"
) |>
arrange(desc(avg_sinclair))
ggplot(avg_by_class_women, aes(x = reorder(weight_class, avg_sinclair), y = avg_sinclair, fill = avg_sinclair)) +
geom_col(show.legend = FALSE) +
geom_text(aes(label = paste0("n=", n)), hjust = -0.1, size = 3) +
coord_flip(ylim = c(0, max(avg_by_class_women$avg_sinclair, na.rm = TRUE) * 1.10)) +
scale_fill_gradient(low = fem_fill_low, high = fem_fill_high) +
labs(title = "Average Sinclair by Weight Class — Women",
x = "Weight Class", y = "Average Sinclair")
men_time <- mens_results |>
mutate(year = as.integer(year)) |>
drop_na(year, sinclair, weight_class)
ggplot(men_time, aes(x = year, y = sinclair, colour = weight_class)) +
geom_point(aes(alpha = sinclair), size = 3, position = position_jitter(width = 0.2, height = 0)) +
scale_alpha_continuous(range = c(0.3, 1), guide = "none") +
labs(title = "Sinclair Over Time by Weight Class — Men",
x = "Year", y = "Sinclair Score", colour = "Weight Class") +
theme(legend.position = "bottom")
women_time <- womens_results |>
mutate(year = as.integer(year)) |>
drop_na(year, sinclair, weight_class)
ggplot(women_time, aes(x = year, y = sinclair, colour = weight_class)) +
geom_point(aes(alpha = sinclair), size = 3, position = position_jitter(width = 0.2, height = 0)) +
scale_alpha_continuous(range = c(0.3, 1), guide = "none") +
labs(title = "Sinclair Over Time by Weight Class — Women",
x = "Year", y = "Sinclair Score", colour = "Weight Class") +
theme(legend.position = "bottom")
(Optional companion: yearly means with sample size to contextualise any “trend” claims.)
trend_by_year_men <- men_time |>
group_by(year) |>
summarise(avg_sinclair = mean(sinclair, na.rm = TRUE), n = n(), .groups = "drop")
ggplot(trend_by_year_men, aes(x = year, y = avg_sinclair)) +
geom_line(size = 1) +
geom_point(aes(size = n), colour = wal_blue_dark) +
scale_size(range = c(2, 6), guide = "none") +
labs(title = "Average Sinclair by Year — Men (Point Size = Sample Size)",
x = "Year", y = "Average Sinclair")
trend_by_year_women <- women_time |>
group_by(year) |>
summarise(avg_sinclair = mean(sinclair, na.rm = TRUE), n = n(), .groups = "drop")
ggplot(trend_by_year_women, aes(x = year, y = avg_sinclair)) +
geom_line(size = 1) +
geom_point(aes(size = n), colour = wal_red_dark) +
scale_size(range = c(2, 6), guide = "none") +
labs(title = "Average Sinclair by Year — Women (Point Size = Sample Size)",
x = "Year", y = "Average Sinclair")
# Whole-dataset relationship (context)
cor_m_all <- cor(mens_results$snatch_cj_ratio, mens_results$sinclair, use = "complete.obs")
cor_w_all <- cor(womens_results$snatch_cj_ratio, womens_results$sinclair, use = "complete.obs")
paste("Correlation (Men, all):", round(cor_m_all, 3))
## [1] "Correlation (Men, all): 0.148"
paste("Correlation (Women, all):", round(cor_w_all, 3))
## [1] "Correlation (Women, all): -0.228"
ggplot(mens_results, aes(x = snatch_cj_ratio, y = sinclair)) +
geom_point(aes(colour = weight_class), size = 3, alpha = 0.85) +
labs(title = "Snatch:C&J Ratio vs Sinclair — Men",
x = "Snatch ÷ Clean & Jerk Ratio", y = "Sinclair Score", colour = "Weight Class")
ggplot(womens_results, aes(x = snatch_cj_ratio, y = sinclair)) +
geom_point(aes(colour = weight_class), size = 3, alpha = 0.85) +
labs(title = "Snatch:C&J Ratio vs Sinclair — Women",
x = "Snatch ÷ Clean & Jerk Ratio", y = "Sinclair Score", colour = "Weight Class")
The snatch-to-clean-and-jerk ratio reflects how
balanced a lifter is between explosive power (snatch) and total strength
(clean & jerk).
In elite-level lifters, this ratio typically falls within a narrow range
— too low suggests a strength bias, while too high suggests technical
limitations or poor heavy-lift efficiency.
In this analysis, the term “golden ratio” refers
not to the mathematical constant (≈1.618), but to an
empirical balance zone — the average
snatch-to-clean-and-jerk ratio achieved by South Africa’s top
performers.
By examining this range (mean ± SD and median values), we can identify a
performance sweet spot that maximises Sinclair
efficiency without over-emphasising either lift.
# Top-10 correlations
top10_men <- top10_men |> mutate(snatch_cj_ratio = snatch / clean_and_jerk)
top10_women <- top10_women |> mutate(snatch_cj_ratio = snatch / clean_and_jerk)
cor_m_top10 <- cor(top10_men$snatch_cj_ratio, top10_men$sinclair, use = "complete.obs")
cor_w_top10 <- cor(top10_women$snatch_cj_ratio, top10_women$sinclair, use = "complete.obs")
paste("Correlation (Men, Top-10):", round(cor_m_top10, 3))
## [1] "Correlation (Men, Top-10): 0.437"
paste("Correlation (Women, Top-10):", round(cor_w_top10, 3))
## [1] "Correlation (Women, Top-10): 0.115"
# Mean/Median/SD for the ratio among elites (empirical “golden zone”)
men_ratio_summary <- top10_men |>
summarise(
mean_ratio = mean(snatch / clean_and_jerk, na.rm = TRUE),
median_ratio = median(snatch / clean_and_jerk, na.rm = TRUE),
sd_ratio = sd(snatch / clean_and_jerk, na.rm = TRUE)
) |> mutate(gender = "Men")
women_ratio_summary <- top10_women |>
summarise(
mean_ratio = mean(snatch / clean_and_jerk, na.rm = TRUE),
median_ratio = median(snatch / clean_and_jerk, na.rm = TRUE),
sd_ratio = sd(snatch / clean_and_jerk, na.rm = TRUE)
) |> mutate(gender = "Women")
ratio_summary <- bind_rows(men_ratio_summary, women_ratio_summary) |>
mutate(across(where(is.numeric), \(x) round(x, 3))) |>
select(gender, mean_ratio, median_ratio, sd_ratio)
ratio_summary
ggplot(ratio_summary, aes(x = gender)) +
geom_errorbar(aes(ymin = mean_ratio - sd_ratio, ymax = mean_ratio + sd_ratio, colour = gender),
width = 0.1, size = 1.1) +
geom_point(aes(y = mean_ratio, colour = gender), size = 5, shape = 19) +
geom_point(aes(y = median_ratio, colour = gender), size = 4, shape = 17, alpha = 0.5) +
scale_colour_manual(values = c("Men" = wal_blue_dark, "Women" = wal_red_dark)) +
scale_y_continuous(breaks = seq(0.72, 0.86, by = 0.02), limits = c(0.72, 0.86)) +
labs(title = "Snatch:C&J Ratio Summary (Top-10)",
subtitle = "Dark circles = Mean ± SD | Transparent triangles = Median",
x = "Gender", y = "Snatch ÷ Clean & Jerk Ratio", colour = "Gender")
ggplot(mens_results, aes(x = snatch_cj_ratio)) +
geom_histogram(bins = 12, fill = male_fill_high, alpha = 0.85, colour = "white") +
labs(title = "Distribution of Snatch:C&J Ratio — Men", x = "Snatch ÷ C&J Ratio", y = "Count")
ggplot(womens_results, aes(x = snatch_cj_ratio)) +
geom_histogram(bins = 12, fill = fem_fill_high, alpha = 0.85, colour = "white") +
labs(title = "Distribution of Snatch:C&J Ratio — Women", x = "Snatch ÷ C&J Ratio", y = "Count")
For coaches & selectors
For SAWF
For analysts
sessionInfo()
## R version 4.5.2 (2025-10-31 ucrt)
## Platform: x86_64-w64-mingw32/x64
## Running under: Windows 11 x64 (build 26200)
##
## Matrix products: default
## LAPACK version 3.12.1
##
## locale:
## [1] LC_COLLATE=English_South Africa.utf8 LC_CTYPE=English_South Africa.utf8
## [3] LC_MONETARY=English_South Africa.utf8 LC_NUMERIC=C
## [5] LC_TIME=English_South Africa.utf8
##
## time zone: Africa/Johannesburg
## tzcode source: internal
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] ggplot2_4.0.0 janitor_2.2.1 readr_2.1.5
## [4] tidyr_1.3.1 dplyr_1.1.4 googlesheets4_1.1.2
##
## loaded via a namespace (and not attached):
## [1] sass_0.4.10 generics_0.1.4 stringi_1.8.7 hms_1.1.4
## [5] digest_0.6.37 magrittr_2.0.4 evaluate_1.0.5 grid_4.5.2
## [9] timechange_0.3.0 RColorBrewer_1.1-3 fastmap_1.2.0 cellranger_1.1.0
## [13] jsonlite_2.0.0 googledrive_2.1.2 httr_1.4.7 purrr_1.1.0
## [17] scales_1.4.0 jquerylib_0.1.4 cli_3.6.5 rlang_1.1.6
## [21] withr_3.0.2 cachem_1.1.0 yaml_2.3.10 tools_4.5.2
## [25] tzdb_0.5.0 gargle_1.6.0 curl_7.0.0 vctrs_0.6.5
## [29] R6_2.6.1 lifecycle_1.0.4 lubridate_1.9.4 snakecase_0.11.1
## [33] stringr_1.5.2 fs_1.6.6 pkgconfig_2.0.3 pillar_1.11.1
## [37] bslib_0.9.0 gtable_0.3.6 glue_1.8.0 xfun_0.54
## [41] tibble_3.3.0 tidyselect_1.2.1 rstudioapi_0.17.1 knitr_1.50
## [45] farver_2.1.2 htmltools_0.5.8.1 labeling_0.4.3 rmarkdown_2.30
## [49] compiler_4.5.2 S7_0.2.0