library(dplyr) library(lubridate) library(ggplot2) library(readr)

Chargement des données

load("Z:/SAEPROJET/donneehomme.rdata") library(arrow) donneefemme <- readparquet("Z:/SAEPROJET/donneefemme.parquet")

Ajout du sexe pour les hommes

donnee_homme$Sexe <- "H"

Correction de l'inversion entre Date_naiss et Sexe

donneehomme <- donneehomme %>% mutate( m = Datenaiss, Datenaiss = Sexe, Sexe = m ) %>% select(-m)

df <- donneehomme %>% rename( n = Datenaiss, Date_naiss = Sexe ) %>% rename(Sexe = n)

donnee_homme <- df rm(df)

Vérification des colonnes femmes

if (!"Sexe" %in% names(donneefemme)) { donneefemme$Sexe <- "F" } if (!"Datenaiss" %in% names(donneefemme)) { donneefemme$Datenaiss <- NA }

Réorganisation et fusion

donneefemme <- donneefemme[, names(donnee_homme)] donneepropre <- rbind(donneehomme, donnee_femme)

Performance propre

donneepropre$Perfpropre <- ((donnee_propre$Perf) * -1) / 100

Conversion des dates

Date1 <- as.Date(donneepropre$Datenaiss) Date2 <- as.Date(donneepropre$Datecompet)

Âge exact

donneepropre$Age <- as.numeric(timelength(interval(Date1, Date2), unit = "year"))

Partie décimale de l'âge

n <- as.numeric(donnee_propre$Age) decimale <- n - floor(n)

Date_compet au format Date

donneepropre$Datecompet <- as.Date(donneepropre$Datecompet)

Saison sportive FFA

donneepropre$Saison <- ifelse( month(donneepropre$Datecompet) >= 9, year(donneepropre$Datecompet) + 1, year(donneepropre$Date_compet) )

Année de naissance

donneepropre$Anneenaiss <- year(as.Date(donneepropre$Datenaiss))

Mois relatif

donnee_propre$Mois <- ceiling(decimale * 10) + 1

Trimestre de naissance

donneepropre$Trimestre <- quarter(donneepropre$Mois)

Arrondi de l'âge

donneepropre$Age <- round(donneepropre$Age)

Âge relatif FFA

donneepropre$AgeRelatif <- donneepropre$Saison - donneepropre$Annee_naiss

Suppression âges négatifs

donneepropre <- donneepropre[donnee_propre$Age > 0, ]

Catégorie d'âge

donneepropre <- donneepropre %>% mutate( Categage = casewhen( AgeRelatif >= 9 & AgeRelatif <= 14 ~ "Cadet", AgeRelatif >= 15 & AgeRelatif <= 17 ~ "Junior", AgeRelatif >= 18 & AgeRelatif <= 22 ~ "Espoir", AgeRelatif >= 23 ~ "Senior", TRUE ~ NAcharacter_ ) )

Detection des valeurs aberrantes

donneesiqr <- donneepropre %>% groupby(Discipline, Sexe, Categage) %>% mutate( Q1 = quantile(Perf, 0.25, na.rm = TRUE), Q3 = quantile(Perf, 0.75, na.rm = TRUE), IQR = Q3 - Q1, borneinf = Q1 - 1.5 * IQR, bornesup = Q3 + 1.5 * IQR, aberrant = Perf < borneinf | Perf > bornesup ) %>% ungroup()

valeursaberrantes <- donneesiqr %>% filter(aberrant == TRUE) donneesnormales <- donneesiqr %>% filter(aberrant == FALSE)

cat(sprintf("Données prêtes : %d lignes normales\n", nrow(donnees_normales)))

Ordre des catégories pour l'axe x

ordre_cat <- c("Cadet", "Junior", "Espoir", "Senior")

COLH <- "#00BCD4" # cyan — Hommes COLF <- "#E05A5A" # rouge — Femmes COLH2 <- "#B8E8F0" # cyan clair — T4 Hommes COLF2 <- "#F9B8B8" # rose clair — T4 Femmes

THEMEPPTX <- thememinimal(basesize = 14) + theme( strip.text = elementtext(face = "bold", size = 13), strip.background = elementrect(fill = "#F0EEEB", color = NA), legend.position = "bottom", legend.text = elementtext(size = 13), legend.key.size = unit(0.9, "cm"), panel.grid.minor = elementblank(), panel.grid.major = elementline(color = "grey90", linewidth = 0.5), panel.grid.major.x = elementblank(), axis.text = elementtext(size = 11, color = "#444444"), axis.title = elementtext(size = 12, color = "#444444"), plot.background = elementrect(fill = "white", color = NA), panel.background = elementrect(fill = "white", color = NA), plot.title = elementblank(), plot.subtitle = element_blank() )

Répartition par Trimestre de naissance

cat("Génération Fig 1 (slide 5)...\n")

dffig1 <- donneesnormales %>% filter(Categage == "Cadet") %>% count(Sexe, Trimestre) %>% groupby(Sexe) %>% mutate(pct = n / sum(n) * 100) %>% ungroup() %>% mutate( Trimestre = factor(Trimestre, labels = paste0("T", 1:4)), Sexe = ifelse(Sexe == "H", "Hommes", "Femmes") )

Écarts T1-T4 pour annotation

ecartH <- dffig1$pct[dffig1$Sexe == "Hommes" & dffig1$Trimestre == "T1"] - dffig1$pct[dffig1$Sexe == "Hommes" & dffig1$Trimestre == "T4"] ecartF <- dffig1$pct[dffig1$Sexe == "Femmes" & dffig1$Trimestre == "T1"] - dffig1$pct[dffig1$Sexe == "Femmes" & dffig1$Trimestre == "T4"]

fig1 <- ggplot(dffig1, aes(x = Trimestre, y = pct, fill = Sexe)) + geomcol(position = positiondodge(0.72), width = 0.65) + geomhline(yintercept = 25, linetype = "dashed", color = "#333333", linewidth = 0.9) + geomtext(aes(label = paste0(round(pct, 1), "%")), position = positiondodge(0.72), vjust = -0.5, size = 4, fontface = "bold", color = "#333333") + annotate("text", x = 4.5, y = 25.7, label = "25% équitable", size = 3.5, hjust = 1, color = "#666666", fontface = "italic") + annotate("segment", x = 0.82, xend = 3.82, y = max(dffig1$pct[df_fig1$Sexe == "Hommes"]) + 2.5, yend = max(dffig1$pct[df_fig1$Sexe == "Hommes"]) + 2.5, color = COLH, linewidth = 1.2) + annotate("text", x = 2.32, y = max(dffig1$pct[df_fig1$Sexe == "Hommes"]) + 3.8, label = paste0("Écart Hommes : +", round(ecartH, 1), " pts"), size = 3.8, color = COLH, fontface = "bold") + annotate("segment", x = 1.18, xend = 4.18, y = max(dffig1$pct[df_fig1$Sexe == "Femmes"]) + 0.8, yend = max(dffig1$pct[df_fig1$Sexe == "Femmes"]) + 0.8, color = COLF, linewidth = 1.2) + annotate("text", x = 2.68, y = max(dffig1$pct[df_fig1$Sexe == "Femmes"]) + 2.1, label = paste0("Écart Femmes : +", round(ecartF, 1), " pts"), size = 3.8, color = COLF, fontface = "bold") + scalefillmanual( values = c("Femmes" = COLF, "Hommes" = COLH), name = NULL ) + scaleycontinuous( labels = function(x) paste0(x, "%"), expand = expansion(mult = c(0, 0.20)), limits = c(0, NA) ) + labs(x = NULL, y = "% des performances — Catégorie Cadet") + THEME_PPTX

ggsave("slide5fig1repartitionCadet.png", fig1, width = 9, height = 6, dpi = 200) cat("slide5fig1repartitionCadet.png\n")

Disparition du RAE : Cadet vs Senior

cat("Génération Fig 2 (slide 6)...\n")

dffig2 <- donneesnormales %>% filter(Categage %in% c("Cadet", "Senior"), !is.na(Trimestre)) %>% mutate(Sexe = ifelse(Sexe == "H", "Hommes", "Femmes")) %>% groupby(Categage, Sexe, Trimestre) %>% summarise(Perfmoy = mean(Perfpropre, na.rm = TRUE), .groups = "drop") %>% mutate( Trimestre = factor(Trimestre, labels = paste0("T", 1:4)), Categage = factor(Categ_age, levels = c("Cadet", "Senior"), labels = c("Cadet (~14 ans)", "Senior (RAE disparu ?)")) )

fig2 <- ggplot(dffig2, aes(x = Trimestre, y = Perfmoy, color = Sexe, group = Sexe)) + geomline(linewidth = 1.8) + geompoint(size = 5, shape = 21, aes(fill = Sexe), color = "white", stroke = 2.5) + facetwrap(~ Categage, scales = "freey") + scalecolormanual( values = c("Femmes" = COLF, "Hommes" = COLH), name = NULL ) + scalefillmanual( values = c("Femmes" = COLF, "Hommes" = COLH), name = NULL ) + labs(x = NULL, y = "Perfpropre moyenne") + THEMEPPTX + theme(panel.grid.major.x = elementline(color = "grey90", linewidth = 0.5))

ggsave("slide6fig2courbesperf.png", fig2, width = 11, height = 6, dpi = 200) cat("slide6fig2courbesperf.png\n")

Boxplots Perf_propre

Disciplines communes

cat("Génération Fig 4 (slide 7 — gauche)...\n")

disciplinescommunes <- c("longueur", "javelot", "hauteur", "triplesaut", "disque") disclabels <- c( longueur = "Longueur", javelot = "Javelot", hauteur = "Hauteur", triplesaut = "Triple saut", disque = "Disque" )

dffig4 <- donneesnormales %>% filter( Trimestre %in% c(1, 4), Categage == "Cadet", Discipline %in% disciplinescommunes ) %>% mutate( Sexe = ifelse(Sexe == "H", "Hommes", "Femmes"), Groupe = paste0("T", Trimestre, " — ", Sexe), Groupe = factor(Groupe, levels = c( "T1 — Hommes", "T4 — Hommes", "T1 — Femmes", "T4 — Femmes" )), Discipline = factor(Discipline, levels = disciplinescommunes, labels = disclabels) )

fig4 <- ggplot(dffig4, aes(x = Groupe, y = Perfpropre, fill = Groupe)) + geomboxplot( outlier.shape = NA, linewidth = 0.5, width = 0.65 ) + statsummary( fun = median, geom = "text", aes(label = round(afterstat(y), 2)), color = "white", vjust = 0.5, size = 3.2, fontface = "bold" ) + facetwrap(~ Discipline, nrow = 1, scales = "freey") + scalefillmanual( values = c( "T1 — Hommes" = COLH, "T4 — Hommes" = COLH2, "T1 — Femmes" = COLF, "T4 — Femmes" = COLF2 ), labels = c( "T1 — Hommes" = "T1 Hommes", "T4 — Hommes" = "T4 Hommes", "T1 — Femmes" = "T1 Femmes", "T4 — Femmes" = "T4 Femmes" ), name = NULL ) + scalexdiscrete( labels = c( "T1 — Hommes" = "T1\nH", "T4 — Hommes" = "T4\nH", "T1 — Femmes" = "T1\nF", "T4 — Femmes" = "T4\nF" ) ) + labs(x = NULL, y = "Perfpropre — Catégorie Cadet") + THEME_PPTX

ggsave("slide7fig4boxplotsdisciplines.png", fig4, width = 14, height = 6, dpi = 200) cat("slide7fig4boxplotsdisciplines.png\n")

Rang relatif vs Âge_Relatif

cat("Génération Fig 5 (slide 7 — droite)...\n")

Rang relatif calculé sur donnees_normales

donneesnormales <- donneesnormales %>% groupby(Categage, Sexe, Saison) %>% mutate(Rangrelatif = percentrank(Perf_propre)) %>% ungroup()

Âge relatif en mois pour l'axe x

donneesnormales <- donneesnormales %>% groupby(Categage, Sexe, Saison) %>% mutate(Agemoycat = mean(Age, na.rm = TRUE)) %>% ungroup() %>% mutate(Agerelatifmois = (Age - Agemoycat) * 12)

set.seed(42) dffig5 <- donneesnormales %>% filter(Categage %in% c("Cadet", "Junior", "Espoir")) %>% mutate(Sexe = ifelse(Sexe == "H", "Hommes", "Femmes")) %>% groupby(Categage, Sexe) %>% slicesample(n = 4000, replace = FALSE) %>% ungroup() %>% mutate(Categage = factor(Categage, levels = c("Cadet", "Junior", "Espoir"), labels = c("Cadet (~14 ans)", "Junior (~16 ans)", "Espoir (~18 ans)") ))

fig5 <- ggplot(dffig5, aes(x = Agerelatifmois, y = Rangrelatif, color = Sexe)) + geompoint(alpha = 0.05, size = 0.5) + geomsmooth(method = "lm", se = TRUE, linewidth = 2, aes(fill = Sexe), alpha = 0.15) + geomhline(yintercept = 0.5, linetype = "dashed", color = "#555555", linewidth = 0.7) + facetwrap(~ Categage, nrow = 1) + scalecolormanual( values = c("Femmes" = COLF, "Hommes" = COLH), name = NULL ) + scalefillmanual( values = c("Femmes" = COLF, "Hommes" = COLH), name = NULL ) + scaleycontinuous( breaks = c(0, 0.25, 0.5, 0.75, 1), labels = c("0", "25%", "50%", "75%", "100%") ) + labs( x = "ÂgeRelatif (mois)", y = "Rang relatif (percentile de Perfpropre)" ) + THEMEPPTX + theme(panel.grid.major.x = element_line(color = "grey90", linewidth = 0.5))

ggsave("slide7fig5rangagerelatif.png", fig5, width = 12, height = 6, dpi = 200) cat("slide7fig5rangagerelatif.png\n")

Perfpropre brute vs Perfpropre ajustée de l'effet de l'âge

cat("Génération Fig 8 (slide 8 — gauche)...\n")

dffig8 <- donneesnormales %>% filter( Categage == "Cadet", Discipline %in% disciplinescommunes ) %>% mutate(Sexe = ifelse(Sexe == "H", "Hommes", "Femmes")) %>% groupby(Sexe, Discipline) %>% filter(n() >= 10) %>% mutate( Agecentre = Age - mean(Age, na.rm = TRUE), Pente = coef(lm(Perfpropre ~ Agecentre))[2], Perfajustee = Perfpropre - Pente * Agecentre ) %>% ungroup() %>% mutate(Discipline = factor(Discipline, levels = disciplinescommunes, labels = disc_labels))

set.seed(42) dffig8s <- dffig8 %>% groupby(Sexe, Discipline) %>% slice_sample(n = 800, replace = FALSE) %>% ungroup()

fig8 <- ggplot(dffig8s, aes(x = Perfpropre, y = Perfajustee, color = Sexe)) + geompoint(alpha = 0.15, size = 0.8) + geomabline(slope = 1, intercept = 0, linetype = "dashed", color = "#333333", linewidth = 0.9) + geomsmooth(method = "lm", se = FALSE, linewidth = 1.5) + facetwrap(~ Discipline, nrow = 1, scales = "free") + scalecolormanual( values = c("Femmes" = COLF, "Hommes" = COLH), name = NULL ) + labs( x = "Perfpropre (brute)", y = "Perfpropre ajustée (cat. Cadet)" ) + THEMEPPTX + theme(panel.grid.major.x = elementline(color = "grey90", linewidth = 0.5))

ggsave("slide8fig8perfajustee.png", fig8, width = 14, height = 5.5, dpi = 200) cat("slide8fig8perfajustee.png\n")

Classement brut vs classement équitable

cat("Génération Fig 9 (slide 8 — droite)...\n")

dffig9 <- donneesnormales %>% filter(Categage %in% c("Cadet", "Junior")) %>% mutate(Sexe = ifelse(Sexe == "H", "Hommes", "Femmes")) %>% # Rang brut : dans Categage × Sexe × Saison groupby(Categage, Sexe, Saison) %>% mutate(Rangbrut = percentrank(Perfpropre)) %>% ungroup() %>% # Rang équitable : dans sous-groupe Age arrondi ± 0.5 an mutate(Agearrondi = round(Age * 2) / 2) %>% groupby(Categage, Sexe, Saison, Agearrondi) %>% mutate(Rangequitable = percentrank(Perfpropre)) %>% ungroup() %>% mutate(Categage = factor(Categage, levels = c("Cadet", "Junior"), labels = c("Cadet (~14 ans)", "Junior (~16 ans)") ))

set.seed(42) dffig9s <- dffig9 %>% groupby(Categage, Sexe) %>% slicesample(n = 3000, replace = FALSE) %>% ungroup()

fig9 <- ggplot(dffig9s, aes(x = Rangbrut, y = Rangequitable, color = Sexe)) + geompoint(alpha = 0.08, size = 0.6) + geomabline(slope = 1, intercept = 0, linetype = "dashed", color = "#333333", linewidth = 1) + facetgrid(Sexe ~ Categage) + scalecolormanual( values = c("Femmes" = COLF, "Hommes" = COLH), name = NULL ) + scalexcontinuous( breaks = c(0, 0.5, 1), labels = c("0", "50%", "100%") ) + scaleycontinuous( breaks = c(0, 0.5, 1), labels = c("0", "50%", "100%") ) + labs( x = "Rang brut (Categage entière)", y = "Rang équitable (± 0.5 an d'âge)" ) + THEMEPPTX + theme( legend.position = "none", panel.grid.major.x = element_line(color = "grey90", linewidth = 0.5) )

ggsave("slide8fig9classementequitable.png", fig9, width = 9, height = 7, dpi = 200) cat("slide8fig9classementequitable.png\n")

cat("\n TOUS LES GRAPHIQUES GÉNÉRÉS\n\n") cat("Fichiers à insérer dans le PPTX :\n") cat(" SLIDE 5 → slide5fig1repartitionCadet.png\n") cat(" SLIDE 6 → slide6fig2courbesperf.png\n") cat(" SLIDE 7 → slide7fig4boxplotsdisciplines.png (gauche)\n") cat(" SLIDE 7 → slide7fig5rangagerelatif.png (droite)\n") cat(" SLIDE 8 → slide8fig8perfajustee.png (gauche)\n") cat(" SLIDE 8 → slide8fig9classement_equitable.png (droite)\n")