Code Transparency Statement This document contains the complete analysis code for the development of a clinical risk prediction tool for atrial fibrillation (AF) in endurance athletes using multimodal atrial myopathy markers. All code is provided in the interest of transparency, reproducibility, and to facilitate independent verification of our findings. Researchers are welcome to use and adapt this code for their own studies.

Corresponding Author: Professor Andre La Gerche: andre.lagerche@svi.edu.au

Citation: Spencer LW, D’Ambrosio P, et al. Using atrial myopathy metrics to devise a risk prediction tool for atrial fibrillation in endurance athletes. Manuscript in submission for European Heart Journal.

AF risk prediction model: We have developed a AF risk prediction tool available at: https://heart-lab.org/atrial-fibrillation

1 Analysis Setup and Data Preparation

1.1 R packages

# Data manipulation
library(tidyverse)
library(dplyr)
library(readr)

# Statistical analysis
library(tableone)
library(broom)
library(pROC)
library(ResourceSelection)

# Visualisation
library(ggplot2)
library(ggpubr)
library(gridExtra)
library(scales)
library(patchwork)
library(viridis)
library(fmsb)

# Output
library(openxlsx)
library(writexl)
library(knitr)
library(kableExtra)

# Set options
options(scipen = 999)  # To avoid scientific notation
set.seed(123)  # Reproducibility

1.2 Filter and Create Analysis Cohorts

# Remove Atrial Myopathy related pathology
NoCM <- filter(AM_paper, `AM related pathology` != 1)

# Main athlete cohort (excluding young/middle-age controls)
Athletes_NoCM <- filter(NoCM, 
                        Cohort != "young control" & 
                        Cohort != "middle age control")

# Age groups and AF status variables
Athletes_NoCM <- Athletes_NoCM %>%
  mutate(
    Age = as.numeric(Age),
    AgeGroup = factor(ifelse(Age <= 45, "≤45", ">45"), 
                     levels = c("≤45", ">45")),
    AFgroup = factor(ifelse(AF == 1, "AF+", "AF-"), 
                    levels = c("AF-", "AF+")),
    AF_numeric = as.numeric(AF)
  )

# Control and athlete groups for supplementary analysis
controls <- filter(NoCM, EICR == "Control")
athletes <- filter(NoCM, EICR == "Athlete")

# Cohort summary table
cohort_summary <- data.frame(
  Group = c("Athletes (excluding cardiomyopathy)", 
            "  AF+", 
            "  AF-",
            "  Age ≤45 years",
            "  Age >45 years",
            "Controls"),
  N = c(
    nrow(Athletes_NoCM),
    sum(Athletes_NoCM$AF == 1, na.rm = TRUE),
    sum(Athletes_NoCM$AF == 0, na.rm = TRUE),
    sum(Athletes_NoCM$AgeGroup == "≤45", na.rm = TRUE),
    sum(Athletes_NoCM$AgeGroup == ">45", na.rm = TRUE),
    nrow(controls)
  )
)

kable(cohort_summary,
      col.names = c("Cohort Description", "N"),
      align = c("l", "c")) %>%
  kable_styling(bootstrap_options = c("striped", "hover"), 
                full_width = FALSE)

Cohort Description	N
Athletes (excluding cardiomyopathy)	353
AF+	96
AF-	257
Age ≤45 years	201
Age >45 years	152
Controls	97

1.3 Variable Definitions

# Table 1 variables
table1_vars <- c(
  # Demographics
  "Age", "sex", "bmi", "Bsa", "averagehr1", "minhr1", "brad_burden_pct1",
  "htn", "rest_SBP", "rest_DBP", "high_cholesterol", "diabetes",
  "ex_smoker", "etoh sd/wk",
  # Medications
  "bblocker", "nondihydropyridine ccb", "flec", "sotalol", "amio",
  # Exercise
  "vo2_kg_peak", "vo2_absolute", "Total MET hrs/wk", "Exercise Duration (yr)",
  # Sport
   "Rowing", "Cycling", "Running", "Other"
)

# Table 2 variables
table2_vars <- c(
  # LV
  "enddiastolic_volume_ml_lv", "endsystolic_volume_ml_lv", 
  "lvef_bip_q", "g_peak_slavg_",
    # LA
    "laesv_bip_ml", "LAVi", "la/lv",
    "LA Resevoir Strain", "LA Conduit Strain", "LA Contractile Strain",
    # RV
    "RVEDV", "RVESV", "RVEF",
    # Doppler
    "MV E Vel (m/s)", "MV A Vel (m/s)", "MV E/A Ratio", 
    "Sept E' (cm/s)", "E/E'",
    # ECG
    "P wave Duration", "P Wave Axis", "PTFV1 (ms*mV)", "Adv IAB",
    "ECTOPIES LS",
    # Biomarkers
    "bnp", "troponin1",
    # Genetic
    "AF centile", "highest quartile", "highest decile"
  )
  
  #Sport
  Athletes_NoCM <- Athletes_NoCM %>%
    mutate(
      Rowing = as.numeric(`Sport: 1=row, 2=cyc, 3=run, 4=CCS, 5=swim, 6=tri, 7=other` == 1),
      Cycling = as.numeric(`Sport: 1=row, 2=cyc, 3=run, 4=CCS, 5=swim, 6=tri, 7=other` == 2),
      Running = as.numeric(`Sport: 1=row, 2=cyc, 3=run, 4=CCS, 5=swim, 6=tri, 7=other` == 3),
      Other = as.numeric(`Sport: 1=row, 2=cyc, 3=run, 4=CCS, 5=swim, 6=tri, 7=other` %in% c(4, 5, 6, 7))
    )
  
  # Supplementary variables
  supp_vars <- c(
    "% predicted vo2", "peak rer",
    "LAVi > 34", "LAVi > 48", "LAVi > 60",
    "Prevalence of P-wave duration >120 ms", "High PTFV1",
    "Prevalence of >=100 PACs/24 h",
    "AF PRS 6238158var PGS000016", "highest quintile", "98th percentile",
    "Rare Variant"
  )
  
  all_variables <- unique(c(table1_vars, table2_vars, supp_vars))
  all_variables <- all_variables[all_variables %in% names(Athletes_NoCM)]
  
  # Non-normal distributions
  nonnormal_vars <- c("Age", "bmi", "Bsa", "rest_SBP", "rest_DBP", "minhr1",
                      "brad_burden_pct1", "Exercise Duration (yr)", "Total MET hrs/wk",
                      "laesv_bip_ml", "LAVi", "la/lv", "P wave Duration", "PTFV1 (ms*mV)",
                      "ECTOPIES LS", "bnp", "troponin1", "AF centile", 
                      "AF PRS 6238158var PGS000016", "etoh sd/wk")
  
  # Categorical variables
  categorical_vars <- c("sex", "htn", "high_cholesterol", "diabetes", "ex_smoker",
                        "bblocker", "nondihydropyridine ccb", "flec", "sotalol", "amio",
                        "Rowing", "Cycling", "Running", "Other", 
                        "LAVi > 34", "LAVi > 48", "LAVi > 60",
                        "Prevalence of P-wave duration >120 ms", "High PTFV1",
                        "Prevalence of >=100 PACs/24 h",
                        "Adv IAB", "highest quartile", "highest quintile", 
                        "highest decile", "98th percentile", "Rare Variant")

1.4 Statistical Functions

  calculate_p_value <- function(data, var, group_var, is_categorical = FALSE, is_nonnormal = NULL) {
    if (!var %in% names(data) || !group_var %in% names(data)) return(NA)
    
    clean_data <- data %>% filter(!is.na(.data[[var]]) & !is.na(.data[[group_var]]))
    if (nrow(clean_data) < 3) return(NA)
    
    group_levels <- unique(clean_data[[group_var]])
    if (length(group_levels) < 2) return(NA)
    
    tryCatch({
      if (is_categorical) {
        tab <- table(as.factor(clean_data[[var]]), as.factor(clean_data[[group_var]]))
        if (any(dim(tab) < 2) || sum(tab) < 5) return(NA)
        
        if (any(tab < 5)) {
          test_result <- fisher.test(tab, simulate.p.value = TRUE)
        } else {
          test_result <- chisq.test(tab)
        }
        return(test_result$p.value)
      } else {
        var_numeric <- as.numeric(clean_data[[var]])
        if (all(is.na(var_numeric))) return(NA)
        
        if (is.null(is_nonnormal)) {
          n <- length(var_numeric[!is.na(var_numeric)])
          if (n >= 3 && n <= 5000) {
            shapiro_result <- tryCatch(shapiro.test(var_numeric),
                                      error = function(e) list(p.value = 0.049))
            is_nonnormal <- shapiro_result$p.value < 0.05
          } else {
            is_nonnormal <- TRUE
          }
        }
        
        if (is_nonnormal) {
          test_result <- wilcox.test(var_numeric ~ clean_data[[group_var]])
        } else {
          test_result <- t.test(var_numeric ~ clean_data[[group_var]])
        }
        return(test_result$p.value)
      }
    }, error = function(e) NA)
  }
  
  calculate_interaction_p <- function(data, var, is_categorical = FALSE) {
    required_vars <- c(var, "AF_numeric", "AgeGroup")
    if (!all(required_vars %in% names(data))) return(NA)
    
    clean_data <- data %>% 
      filter(!is.na(.data[[var]]) & !is.na(AF_numeric) & !is.na(AgeGroup))
    if (nrow(clean_data) < 20) return(NA)
    
    tryCatch({
      if (is_categorical) {
        model <- glm(as.factor(clean_data[[var]]) ~ AgeGroup * AFgroup,
                    data = clean_data, family = binomial())
      } else {
        model <- lm(as.numeric(clean_data[[var]]) ~ AgeGroup * AFgroup, 
                   data = clean_data)
      }
      
      anova_result <- anova(model)
      int_row <- grep(":", rownames(anova_result))
      if (length(int_row) > 0) {
        p_col <- which(names(anova_result) %in% c("Pr(>F)", "Pr(>Chi)"))
        if (length(p_col) > 0) return(anova_result[int_row[1], p_col[1]])
      }
      return(NA)
    }, error = function(e) NA)
  }
  
  format_stats <- function(data, var, is_cat, is_nonnorm) {
    if (!var %in% names(data) || nrow(data) == 0) return("--")
    
    tryCatch({
      if (is_cat) {
        var_data <- data[[var]]
        
        if (is.logical(var_data)) {
          count <- sum(var_data == TRUE, na.rm = TRUE)
          total <- sum(!is.na(var_data))
        } else if (is.numeric(var_data) && all(var_data %in% c(0, 1, NA), na.rm = TRUE)) {
          count <- sum(var_data == 1, na.rm = TRUE)
          total <- sum(!is.na(var_data))
        } else {
          tab <- table(var_data, useNA = "no")
          if (length(tab) > 0) {
            count <- max(tab)
            total <- sum(tab)
          } else return("0 (0%)")
        }
        
        if (total == 0) return("0 (0%)")
        return(paste0(count, " (", round(100*count/total, 1), "%)"))
      } else {
        values <- as.numeric(data[[var]])
        values <- values[!is.na(values)]
        if (length(values) == 0) return("--")
        
        decimals <- ifelse(var == "la/lv", 2, 1)
        
        if (is_nonnorm) {
          med <- median(values)
          q1 <- quantile(values, 0.25)
          q3 <- quantile(values, 0.75)
          return(paste0(round(med, decimals), " [", 
                       round(q1, decimals), "-", round(q3, decimals), "]"))
        } else {
          m <- mean(values)
          s <- sd(values)
          return(paste0(round(m, decimals), " ± ", round(s, decimals)))
        }
      }
    }, error = function(e) "--")
  }
  
  format_p_value <- function(p) {
    if (is.null(p) || length(p) == 0 || is.na(p)) return("--")
    p_num <- as.numeric(p)
    if (is.na(p_num)) return("--")
    if (p_num < 0.001) return("<0.001")
    if (p_num < 0.01) return(sprintf("%.3f", p_num))
    return(sprintf("%.3f", p_num))
  }
  
  calculate_sport_p_value <- function(data, group_var) {
    sport_var <- "Sport: 1=row, 2=cyc, 3=run, 4=CCS, 5=swim, 6=tri, 7=other"
    
    if (!sport_var %in% names(data) || !group_var %in% names(data)) return(NA)
    
    clean_data <- data %>% 
      filter(!is.na(.data[[sport_var]]) & !is.na(.data[[group_var]]))
    
    if (nrow(clean_data) < 3) return(NA)
    
    tryCatch({
      
      clean_data$sport_collapsed <- ifelse(
        clean_data[[sport_var]] %in% c(4, 5, 6, 7), 
        4, 
        clean_data[[sport_var]]
      )
      
      tab <- table(clean_data$sport_collapsed, clean_data[[group_var]])
      
      if (any(dim(tab) < 2)) return(NA)
      
      if (any(tab < 5)) {
        test_result <- fisher.test(tab, simulate.p.value = TRUE)
      } else {
        test_result <- chisq.test(tab)
      }
      return(test_result$p.value)
    }, error = function(e) NA)
  }

1.5 Helper Functions

  assign_category <- function(var) {
    # Table 1 categories
    if (var %in% c("Age", "sex", "bmi", "Bsa", "averagehr1", "minhr1", 
                   "brad_burden_pct1", "htn", "rest_SBP", "rest_DBP",
                   "high_cholesterol", "diabetes", "ex_smoker", "etoh sd/wk")) {
      return("Demographics")
    } else if (var %in% c("bblocker", "nondihydropyridine ccb", "flec", 
                          "sotalol", "amio")) {
      return("Anti-arrhythmic Medication use, n (%)")
    } else if (var %in% c("vo2_kg_peak", "vo2_absolute", "Total MET hrs/wk", 
                          "Exercise Duration (yr)")) {
      return("Exercise Capacity")
    } else if (var == "Sport: 1=row, 2=cyc, 3=run, 4=CCS, 5=swim, 6=tri, 7=other" ||
               var %in% c("Rowing", "Cycling", "Running", "Other")) {
      return("Primary Sport, n (%)")
    } else if (var %in% c("enddiastolic_volume_ml_lv", "endsystolic_volume_ml_lv",
                          "lvef_bip_q", "g_peak_slavg_")) {
      return("Left ventricular parameters")
    } else if (var %in% c("laesv_bip_ml", "LAVi", "la/lv", "LAVi > 34", 
                          "LAVi > 48", "LAVi > 60",
                          "LA Resevoir Strain", "LA Conduit Strain", 
                          "LA Contractile Strain")) {
      return("Left atrial parameters")
    } else if (var %in% c("RVEDV", "RVESV", "RVEF")) {
      return("Right Ventricular Parameters")
    } else if (var %in% c("MV E Vel (m/s)", "MV A Vel (m/s)", "MV E/A Ratio",
                          "Sept E' (cm/s)", "E/E'")) {
      return("Doppler Parameters")
    } else if (var %in% c("P wave Duration", "P Wave Axis", "PTFV1 (ms*mV)",
                          "Prevalence of P-wave duration >120 ms", "High PTFV1",
                          "Adv IAB", "ECTOPIES LS")) {
      return("Electrocardiographic")
    } else if (var %in% c("bnp", "troponin1")) {
      return("Biochemical")
    } else if (var %in% c("AF centile", "AF PRS 6238158var PGS000016",
                          "highest quartile", "highest quintile", "highest decile",
                          "98th percentile", "Rare Variant")) {
      return("Genetic")
    } else {
      return("Other")
    }
  }
  
  clean_variable_names <- function(var) {
    case_when(
      var == "Age" ~ "Age, years",
      var == "sex" ~ "Male sex, n (%)",
      var == "bmi" ~ "BMI, kg/m²",
      var == "Bsa" ~ "BSA, m²",
      var == "averagehr1" ~ "Average HR, bpm",
      var == "minhr1" ~ "Minimum HR, bpm",
      var == "brad_burden_pct1" ~ "Bradycardia burden, % (24h)",
      var == "htn" ~ "Hypertension, n (%)",
      var == "rest_SBP" ~ "SBP, mmHg",
      var == "rest_DBP" ~ "DBP, mmHg",
      var == "high_cholesterol" ~ "Dyslipidaemia, n (%)",
      var == "diabetes" ~ "Diabetes, n (%)",
      var == "ex_smoker" ~ "History of smoking, n (%)",
      var == "etoh sd/wk" ~ "Alcohol, drinks/week",
      var == "bblocker" ~ "Beta-blockers",
      var == "nondihydropyridine ccb" ~ "Non-dihydropyridine CCB's",
      var == "flec" ~ "Flecainide",
      var == "sotalol" ~ "Sotalol",
      var == "amio" ~ "Amiodarone",
      var == "vo2_kg_peak" ~ "VO₂peak, mL/kg/min",
      var == "vo2_absolute" ~ "VO₂peak, L/min",
      var == "% predicted vo2" ~ "% of predicted exercise capacity",
      var == "peak rer" ~ "Respiratory exchange ratio",
      var == "Total MET hrs/wk" ~ "Total weekly MET-hours",
      var == "Exercise Duration (yr)" ~ "Yrs of endurance training",
      var == "Rowing" ~ "Rowing",
      var == "Cycling" ~ "Cycling",
      var == "Running" ~ "Running",
      var == "Other" ~ "Other",
      var == "Sport: 1=row, 2=cyc, 3=run, 4=CCS, 5=swim, 6=tri, 7=other" ~ "Primary Sport, n (%)",
      var == "enddiastolic_volume_ml_lv" ~ "LVEDV, mL",
      var == "endsystolic_volume_ml_lv" ~ "LVESV, mL",
      var == "lvef_bip_q" ~ "LVEF, %",
      var == "g_peak_slavg_" ~ "LV strain, %",
      var == "laesv_bip_ml" ~ "LAESV, mL",
      var == "LAVi" ~ "LAVi, mL/m²",
      var == "LAVi > 34" ~ "Mildly abnormal >34 mL/m², n (%)",
      var == "LAVi > 48" ~ "Severely abnormal >48 mL/m², n (%)",
      var == "LAVi > 60" ~ "Extremely abnormal >60 mL/m², n (%)",
      var == "la/lv" ~ "LA:LV",
      var == "LA Resevoir Strain" ~ "Reservoir strain, %",
      var == "LA Conduit Strain" ~ "Conduit strain, %",
      var == "LA Contractile Strain" ~ "Contractile strain, %",
      var == "RVEDV" ~ "RVEDV, mL",
      var == "RVESV" ~ "RVESV, mL",
      var == "RVEF" ~ "RVEF, %",
      var == "MV E Vel (m/s)" ~ "E velocity, m/s",
      var == "MV A Vel (m/s)" ~ "A velocity, m/s",
      var == "MV E/A Ratio" ~ "E/A ratio",
      var == "Sept E' (cm/s)" ~ "E′ velocity, cm/s",
      var == "E/E'" ~ "E/E' ratio",
      var == "P wave Duration" ~ "P wave duration, ms",
      var == "Prevalence of P-wave duration >120 ms" ~ "P-wave duration >120 ms, n (%)",
      var == "P Wave Axis" ~ "P wave axis, °",
      var == "PTFV1 (ms*mV)" ~ "PTFV₁, mV·ms",
      var == "High PTFV1" ~ "PTFV₁ ≥4 mV·ms, n (%)",
      var == "Adv IAB" ~ "Advanced IAB, n (%)",
      var == "ECTOPIES LS" ~ "Atrial ectopy, per 24 h",
      var == "Prevalence of >=100 PACs/24 h" ~ "≥100 PACs per 24 h, n (%)",
      var == "bnp" ~ "BNP, pg/mL",
      var == "troponin1" ~ "cTnI, ng/L",
      var == "AF centile" ~ "AF-PRS percentile",
      var == "AF PRS 6238158var PGS000016" ~ "AF-PRS",
      var == "highest quartile" ~ "AF-PRS highest quartile",
      var == "highest quintile" ~ "AF-PRS highest quintile",
      var == "highest decile" ~ "AF-PRS highest decile",
      var == "98th percentile" ~ "AF-PRS ≥98th percentile",
      var == "Rare Variant" ~ "Rare Variant",
      TRUE ~ var
    )
  }

1.6 Table Setup

results_table <- data.frame(Variable = character(), stringsAsFactors = FALSE)

for (var in all_variables) {
  if (!var %in% names(Athletes_NoCM)) next
  
  is_cat <- var %in% categorical_vars
  is_nonnorm <- var %in% nonnormal_vars
  
  af_overall <- Athletes_NoCM %>% filter(AF == 1)
  noaf_overall <- Athletes_NoCM %>% filter(AF == 0)
  
  young_data <- Athletes_NoCM %>% filter(AgeGroup == "≤45")
  af_young <- young_data %>% filter(AF == 1)
  noaf_young <- young_data %>% filter(AF == 0)
  
  old_data <- Athletes_NoCM %>% filter(AgeGroup == ">45")
  af_old <- old_data %>% filter(AF == 1)
  noaf_old <- old_data %>% filter(AF == 0)
  
  # Calculate p-values - Sport
  if (var %in% c("Rowing", "Cycling", "Running", "Other")) {
    if (var == "Rowing") {
      p_overall <- format_p_value(calculate_sport_p_value(Athletes_NoCM, "AFgroup"))
      p_young <- format_p_value(calculate_sport_p_value(young_data, "AFgroup"))
      p_old <- format_p_value(calculate_sport_p_value(old_data, "AFgroup"))
      p_interaction <- format_p_value(calculate_interaction_p(Athletes_NoCM, 
        "Sport: 1=row, 2=cyc, 3=run, 4=CCS, 5=swim, 6=tri, 7=other", is_cat = TRUE))
    } else {
      p_overall <- ""
      p_young <- ""
      p_old <- ""
      p_interaction <- ""
    }
  } else {
    p_overall <- format_p_value(calculate_p_value(Athletes_NoCM, var, "AFgroup", is_cat, is_nonnorm))
    p_young <- format_p_value(calculate_p_value(young_data, var, "AFgroup", is_cat, is_nonnorm))
    p_old <- format_p_value(calculate_p_value(old_data, var, "AFgroup", is_cat, is_nonnorm))
    p_interaction <- format_p_value(calculate_interaction_p(Athletes_NoCM, var, is_cat))
  }
  
  results_table <- rbind(results_table, data.frame(
    Variable = var,
    AF_Overall = format_stats(af_overall, var, is_cat, is_nonnorm),
    NoAF_Overall = format_stats(noaf_overall, var, is_cat, is_nonnorm),
    P_Overall = p_overall,
    AF_Young = format_stats(af_young, var, is_cat, is_nonnorm),
    NoAF_Young = format_stats(noaf_young, var, is_cat, is_nonnorm),
    P_Young = p_young,
    AF_Old = format_stats(af_old, var, is_cat, is_nonnorm),
    NoAF_Old = format_stats(noaf_old, var, is_cat, is_nonnorm),
    P_Old = p_old,
    P_Interaction = p_interaction,
    stringsAsFactors = FALSE
  ))
}

results_table$Category <- sapply(results_table$Variable, assign_category)
results_table$Variable <- sapply(results_table$Variable, clean_variable_names)

n_af_overall <- sum(Athletes_NoCM$AF == 1, na.rm = TRUE)
n_noaf_overall <- sum(Athletes_NoCM$AF == 0, na.rm = TRUE)
n_af_young <- sum(Athletes_NoCM$AF == 1 & Athletes_NoCM$AgeGroup == "≤45", na.rm = TRUE)
n_noaf_young <- sum(Athletes_NoCM$AF == 0 & Athletes_NoCM$AgeGroup == "≤45", na.rm = TRUE)
n_af_old <- sum(Athletes_NoCM$AF == 1 & Athletes_NoCM$AgeGroup == ">45", na.rm = TRUE)
n_noaf_old <- sum(Athletes_NoCM$AF == 0 & Athletes_NoCM$AgeGroup == ">45", na.rm = TRUE)

# Define category orders
category_order_table1 <- c("Demographics", "Anti-arrhythmic Medication use, n (%)", 
                           "Exercise Capacity", "Primary Sport, n (%)")
category_order_table2 <- c("Left ventricular parameters", "Left atrial parameters",
                           "Right Ventricular Parameters", "Doppler Parameters",
                           "Electrocardiographic", "Biochemical", "Genetic")

# Ordered category tables
table1_with_cat <- results_table %>% 
  filter(Category %in% category_order_table1) %>%
  mutate(Category = factor(Category, levels = category_order_table1, ordered = TRUE)) %>%
  arrange(Category)

table2_with_cat <- results_table %>% 
  filter(Category %in% category_order_table2) %>%
  mutate(Category = factor(Category, levels = category_order_table2, ordered = TRUE)) %>%
  arrange(Category)

# Create display tables
table_1 <- table1_with_cat %>% select(-Category)
table_2 <- table2_with_cat %>% select(-Category)

# Rename columns
colnames(table_1) <- c(
  "Characteristic",
  paste0("AF (n=", n_af_overall, ")"),
  paste0("No-AF (n=", n_noaf_overall, ")"),
  "p value",
  paste0("AF (n=", n_af_young, ")"),
  paste0("No-AF (n=", n_noaf_young, ")"),
  "p value ",
  paste0("AF (n=", n_af_old, ")"),
  paste0("No-AF (n=", n_noaf_old, ")"),
  "p value  ",
  "Interaction p value"
)

colnames(table_2) <- colnames(table_1)

cat("\n# Table 1\n\n")

2 Table 1

kable(table_1, 
      caption = "Table 1. Baseline Characteristics of Endurance Athletes Stratified by Age and Atrial Fibrillation Status",
      align = c("l", rep("c", ncol(table_1)-1))) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"),
                full_width = TRUE, font_size = 10) %>%
  add_header_above(c(" " = 1, 
                     "Overall" = 3, 
                     "Younger Athletes (≤45 years)" = 3, 
                     "Older Athletes (>45 years)" = 3, 
                     " " = 1)) %>%
  pack_rows(index = table(table1_with_cat$Category)) %>%
  add_footnote(c("Data are presented as mean ± SD, median (IQR), or n (%)",
                 "P-values from Wilcoxon rank-sum test (continuous) or chi-square/Fisher exact test (categorical)",
                 "Interaction p-values derived from two-way ANOVA testing for age×AF interaction effects"),
               notation = "none")

Table 1. Baseline Characteristics of Endurance Athletes Stratified by Age and Atrial Fibrillation Status
	Overall			Younger Athletes (≤45 years)			Older Athletes (>45 years)
Characteristic	AF (n=96)	No-AF (n=257)	p value	AF (n=25)	No-AF (n=176)	p value	AF (n=71)	No-AF (n=81)	p value	Interaction p value
Demographics
Age, years	60 [45-68]	31 [20-51]	<0.001	35 [30-43]	22 [18.8-32]	<0.001	64 [57-68.5]	60 [52-66]	0.009	0.026
Male sex, n (%)	89 (92.7%)	173 (67.3%)	<0.001	21 (84%)	120 (68.2%)	0.159	68 (95.8%)	53 (65.4%)	<0.001	0.063
BMI, kg/m²	24.6 [23.5-26.7]	23 [21.2-24.9]	<0.001	24.5 [23.8-25.6]	22.1 [20.5-23.6]	<0.001	24.8 [23.4-27.4]	25 [23.3-27.4]	0.762	<0.001
BSA, m²	2 [1.9-2.1]	1.9 [1.8-2]	<0.001	2 [1.9-2.1]	1.9 [1.7-2]	<0.001	2 [1.9-2.1]	2 [1.9-2.2]	0.638	0.004
Average HR, bpm	64.8 ± 9	66.9 ± 9.1	0.084	64.5 ± 8.3	66.6 ± 9.6	0.282	64.9 ± 9.4	67.5 ± 7.6	0.104	0.852
Minimum HR, bpm	45 [40-49]	43 [39-48]	0.332	44.5 [40.2-48.8]	42 [38-47]	0.263	45 [40-50]	46 [43-50]	0.140	0.091
Bradycardia burden, % (24h)	4.5 [0.2-23.4]	7.4 [0.4-28.3]	0.167	12.5 [0.4-26.7]	16.9 [0.8-33]	0.304	3.5 [0.2-20.2]	1.6 [0.2-8.6]	0.270	0.121
Hypertension, n (%)	14 (14.9%)	17 (6.7%)	0.029	2 (8.3%)	3 (1.7%)	0.111	12 (17.1%)	14 (17.5%)	1.000	0.127
SBP, mmHg	132 [124-143.2]	123 [115-134]	<0.001	126 [122-135]	122 [113-131]	0.027	133 [126.5-145.5]	130 [119-138]	0.043	0.884
DBP, mmHg	76 [69.8-82.2]	66 [60.8-75]	<0.001	70 [64-76]	64 [59-70]	0.007	78 [72-83]	75 [68-80]	0.068	0.305
Dyslipidaemia, n (%)	23 (24.5%)	22 (8.6%)	<0.001	3 (12.5%)	1 (0.6%)	0.006	20 (28.6%)	21 (26.2%)	0.893	0.006
Diabetes, n (%)	2 (2.1%)	0 (0%)	0.072	0 (0%)	0 (0%)	–	2 (2.9%)	0 (0%)	0.216	1.000
History of smoking, n (%)	22 (23.4%)	25 (9.8%)	0.002	2 (8.3%)	4 (2.3%)	0.155	20 (28.6%)	21 (26.2%)	0.893	0.226
Alcohol, drinks/week	3 [0-7]	0 [0-5]	<0.001	0.5 [0-3.5]	0 [0-0]	0.007	4.5 [1.2-8.8]	7 [4-12]	0.061	0.047
Anti-arrhythmic Medication use, n (%)
Beta-blockers	11 (11.7%)	1 (0.4%)	<0.001	2 (8.3%)	0 (0%)	0.014	9 (12.9%)	1 (1.2%)	0.006	0.218
Non-dihydropyridine CCB’s	2 (50%)	0 (0%)	1.000	0 (0%)	0 (0%)	–	2 (50%)	0 (0%)	1.000	–
Flecainide	12 (12.5%)	1 (0.4%)	<0.001	3 (12%)	0 (0%)	0.002	9 (12.7%)	1 (1.2%)	0.006	0.154
Sotalol	2 (2.1%)	0 (0%)	0.074	0 (0%)	0 (0%)	–	2 (2.8%)	0 (0%)	0.219	1.000
Amiodarone	1 (1%)	0 (0%)	0.274	0 (0%)	0 (0%)	–	1 (1.4%)	0 (0%)	0.470	1.000
Exercise Capacity
VO₂peak, mL/kg/min	41.3 ± 11.5	51.7 ± 14.2	<0.001	51.6 ± 8.9	58.7 ± 10.1	<0.001	37.6 ± 10	36.5 ± 8.9	0.469	0.002
VO₂peak, L/min	3378.2 ± 947.3	3778.3 ± 995.5	<0.001	4209.6 ± 809	4148.5 ± 876.5	0.729	3085.5 ± 812.3	2974 ± 731	0.378	0.821
Total weekly MET-hours	74 [45-91]	87 [55-109.5]	0.007	81 [57-106]	98 [74-122]	0.075	74 [44-86.5]	64 [41-87]	0.535	0.056
Yrs of endurance training	26 [17-36.5]	17 [8.5-26]	<0.001	15 [10-18]	11 [4-18]	0.066	30 [20-40]	29 [20-39]	0.395	0.567
Primary Sport, n (%)
Rowing	43 (44.8%)	125 (48.6%)	0.127	5 (20%)	47 (26.7%)	0.342	38 (53.5%)	78 (96.3%)	<0.001	<0.001
Cycling	23 (24%)	77 (30%)		9 (36%)	75 (42.6%)		14 (19.7%)	2 (2.5%)
Running	10 (10.4%)	26 (10.1%)		3 (12%)	25 (14.2%)		7 (9.9%)	1 (1.2%)
Other	20 (20.8%)	29 (11.3%)		8 (32%)	29 (16.5%)		12 (16.9%)	0 (0%)
Data are presented as mean ± SD, median (IQR), or n (%)
P-values from Wilcoxon rank-sum test (continuous) or chi-square/Fisher exact test (categorical)
Interaction p-values derived from two-way ANOVA testing for age×AF interaction effects

cat("\n# Table 2\n\n")

3 Table 2

kable(table_2, 
      caption = "Table 2. Cardiac Structure, Function, and Electrophysiological Parameters in Endurance Athletes by Age and Atrial Fibrillation Status",
      align = c("l", rep("c", ncol(table_2)-1))) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"),
                full_width = TRUE, font_size = 10) %>%
  add_header_above(c(" " = 1, 
                     "Overall" = 3, 
                     "Younger Athletes (≤45 years)" = 3, 
                     "Older Athletes (>45 years)" = 3, 
                     " " = 1)) %>%
  pack_rows(index = table(table2_with_cat$Category)) %>%
  add_footnote(c("Data are presented as mean ± SD, median (IQR), or n (%)",
                 "Bolded values indicate statistically significant pairwise comparisons (p<0.05)",
                 "Interaction p-values test whether AF associations differ between age groups"),
               notation = "none")

Table 2. Cardiac Structure, Function, and Electrophysiological Parameters in Endurance Athletes by Age and Atrial Fibrillation Status
	Overall			Younger Athletes (≤45 years)			Older Athletes (>45 years)
Characteristic	AF (n=96)	No-AF (n=257)	p value	AF (n=25)	No-AF (n=176)	p value	AF (n=71)	No-AF (n=81)	p value	Interaction p value
Left ventricular parameters
LVEDV, mL	157.4 ± 40.5	170.4 ± 46.5	0.011	173.7 ± 45.3	183.3 ± 44.2	0.328	151.6 ± 37.2	142.3 ± 38.5	0.136	0.094
LVESV, mL	64 ± 17.8	69.9 ± 24.1	0.013	68.7 ± 21.1	75.5 ± 24.5	0.147	62.4 ± 16.4	57.9 ± 18.2	0.113	0.051
LVEF, %	58.1 ± 5.4	58.1 ± 5.6	0.986	60.2 ± 4.8	57.9 ± 5.7	0.040	57.4 ± 5.4	58.6 ± 5.5	0.182	0.020
LV strain, %	-18.2 ± 2.8	-19.2 ± 2.1	0.001	-18.7 ± 2	-19.1 ± 2.1	0.291	-18 ± 3	-19.3 ± 2.1	0.003	0.173
Left atrial parameters
LAESV, mL	90 [77.8-118.5]	79 [66-95]	<0.001	84 [73-100]	79 [66-93]	0.116	97 [78.5-122.5]	81 [66-98]	<0.001	0.058
LAVi, mL/m²	46.2 [38.7-55.9]	41.9 [34.5-48.4]	<0.001	41.8 [37.5-49.6]	41.9 [34.8-48.4]	0.694	47.7 [39.9-58.6]	41.9 [32.7-47.8]	<0.001	0.004
LA:LV	0.59 [0.49-0.75]	0.46 [0.39-0.56]	<0.001	0.5 [0.45-0.6]	0.44 [0.38-0.49]	0.001	0.64 [0.52-0.84]	0.59 [0.5-0.69]	0.034	0.391
Reservoir strain, %	25.8 ± 8.9	32.2 ± 5.9	<0.001	33.2 ± 6.3	33.6 ± 5.5	0.770	23.4 ± 8.4	29.2 ± 5.4	<0.001	0.003
Conduit strain, %	15.2 ± 6.3	20.9 ± 6.5	<0.001	21.6 ± 5.5	23.6 ± 5.2	0.127	13.1 ± 5.1	15.2 ± 5.2	0.015	0.908
Contractile strain, %	10.7 ± 5.6	11.4 ± 4.3	0.311	11.5 ± 3.3	10 ± 3.4	0.064	10.4 ± 6.2	14.1 ± 4.9	<0.001	<0.001
Mildly abnormal >34 mL/m², n (%)	87 (90.6%)	196 (76.3%)	0.004	20 (80%)	137 (77.8%)	1.000	67 (94.4%)	59 (72.8%)	<0.001	0.030
Severely abnormal >48 mL/m², n (%)	41 (42.7%)	66 (25.7%)	0.003	7 (28%)	46 (26.1%)	1.000	34 (47.9%)	20 (24.7%)	0.005	0.107
Extremely abnormal >60 mL/m², n (%)	18 (18.8%)	8 (3.1%)	<0.001	2 (8%)	7 (4%)	0.310	16 (22.5%)	1 (1.2%)	<0.001	0.039
Right Ventricular Parameters
RVEDV, mL	144.1 ± 42.5	152.3 ± 48.7	0.132	163.5 ± 47.7	165.1 ± 47.4	0.877	137 ± 38.5	124.8 ± 39.3	0.059	0.250
RVESV, mL	78.7 ± 26.6	76 ± 25.5	0.417	86 ± 28.1	79.7 ± 24.3	0.300	76 ± 25.7	68.2 ± 26.4	0.075	0.844
RVEF, %	45.4 ± 8.3	49.5 ± 8.9	<0.001	47 ± 8.4	51.2 ± 8.9	0.031	44.8 ± 8.3	45.8 ± 7.9	0.470	0.173
Doppler Parameters
E velocity, m/s	0.6 ± 0.2	0.7 ± 0.2	<0.001	0.7 ± 0.2	0.8 ± 0.2	0.003	0.6 ± 0.2	0.6 ± 0.1	0.226	0.001
A velocity, m/s	0.5 ± 0.1	0.4 ± 0.1	0.019	0.4 ± 0.1	0.4 ± 0.1	0.184	0.5 ± 0.2	0.5 ± 0.1	0.029	0.009
E/A ratio	1.6 ± 1.3	1.9 ± 0.9	0.012	1.8 ± 0.5	2.3 ± 0.8	<0.001	1.5 ± 1.6	1.2 ± 0.4	0.122	<0.001
E′ velocity, cm/s	9.3 ± 2.9	11.4 ± 3.4	<0.001	11.5 ± 2.7	13 ± 2.5	0.015	8.6 ± 2.5	8 ± 2.2	0.104	0.001
E/E’ ratio	6.2 ± 2.4	5.7 ± 1.4	0.055	5 ± 1.1	5.2 ± 1	0.362	6.6 ± 2.6	6.6 ± 1.7	0.913	0.692
Electrocardiographic
P wave duration, ms	124 [114-131]	111 [101-121]	<0.001	117 [98-129]	106 [96.8-115.2]	0.017	126 [117-132]	120 [112-129]	0.053	0.220
P wave axis, °	51.3 ± 32.4	49.6 ± 23.9	0.667	50.1 ± 35.1	48.4 ± 24.8	0.822	51.8 ± 31.6	52 ± 21.5	0.956	0.790
PTFV₁, mV·ms	2.3 [1-4]	1.9 [1-3]	0.049	2 [1.2-3.2]	2 [1.3-3.1]	0.775	2.4 [0-4.3]	1.6 [0-2.4]	<0.001	0.016
Advanced IAB, n (%)	12 (15%)	16 (6.3%)	0.026	0 (0%)	2 (1.1%)	1.000	12 (21.8%)	14 (17.5%)	0.687	0.412
Atrial ectopy, per 24 h	118 [20-521]	15 [6-47]	<0.001	15.5 [7-35.2]	10 [5-34]	0.299	239 [69-919]	28 [12-98]	<0.001	0.001
P-wave duration >120 ms, n (%)	51 (62.2%)	74 (28.8%)	<0.001	12 (48%)	32 (18.2%)	0.002	39 (68.4%)	42 (51.9%)	0.077	0.208
PTFV₁ ≥4 mV·ms, n (%)	21 (25.6%)	23 (8.9%)	<0.001	3 (12%)	19 (10.8%)	0.742	18 (31.6%)	4 (4.9%)	<0.001	0.011
Biochemical
BNP, pg/mL	24.8 [12.7-55.8]	13.9 [8.5-24]	<0.001	11 [7.9-21.3]	10 [7-17.5]	0.540	38.4 [18.1-63.5]	22.4 [14-37.8]	0.002	0.004
cTnI, ng/L	4 [2-7]	3 [2-8.2]	0.025	3 [2-4]	3 [2-12]	0.285	6 [3-8]	2 [2-4]	<0.001	0.504
Genetic
AF-PRS percentile	64.5 [33-87.2]	51 [29-73]	0.016	73 [53-78]	49.5 [29-76.2]	0.065	62 [30.5-88.5]	52 [29-71]	0.092	0.698
AF-PRS highest quartile	34 (35.4%)	60 (23.3%)	0.032	7 (28%)	45 (25.6%)	0.987	27 (38%)	15 (18.5%)	0.012	0.145
AF-PRS highest decile	22 (22.9%)	26 (10.1%)	0.003	5 (20%)	19 (10.8%)	0.318	17 (23.9%)	7 (8.6%)	0.018	0.512
AF-PRS	30.2 [30.1-30.3]	30.2 [30.1-30.2]	0.015	30.2 [30.2-30.3]	30.2 [30.1-30.3]	0.066	30.2 [30.1-30.3]	30.2 [30.1-30.2]	0.091	0.989
AF-PRS highest quintile	29 (30.2%)	53 (20.6%)	0.079	6 (24%)	40 (22.7%)	1.000	23 (32.4%)	13 (16%)	0.030	0.175
AF-PRS ≥98th percentile	8 (8.5%)	6 (2.4%)	0.022	2 (8.3%)	4 (2.3%)	0.155	6 (8.6%)	2 (2.5%)	0.147	0.960
Rare Variant	17 (18.1%)	32 (12.5%)	0.251	4 (16.7%)	16 (9.1%)	0.273	13 (18.6%)	16 (20%)	0.989	0.303
Data are presented as mean ± SD, median (IQR), or n (%)
Bolded values indicate statistically significant pairwise comparisons (p<0.05)
Interaction p-values test whether AF associations differ between age groups

4 Clinical Risk Prediction Models

# Binary variables for clinical model
clinical_mapping <- list(
  "LAVi_34" = "LAVi > 34",
  "LAVi_60" = "LAVi > 60",
  "P_wave_120" = "Prevalence of P-wave duration >120 ms",
  "PTFV1_high" = "High PTFV1",
  "PAC_100" = "Prevalence of >=100 PACs/24 h",
  "highest_decile" = "highest decile"
)

Athletes_NoCM$Age_decades <- Athletes_NoCM$Age / 10
Athletes_NoCM$High_normal_BP <- as.integer(Athletes_NoCM$rest_SBP >= 130)

for (new_name in names(clinical_mapping)) {
  old_name <- clinical_mapping[[new_name]]
  if (old_name %in% names(Athletes_NoCM)) {
    Athletes_NoCM[[new_name]] <- as.integer(Athletes_NoCM[[old_name]] == 1)
  } else {
    Athletes_NoCM[[new_name]] <- NA_integer_
  }
}

# Exclude athletes with AF during monitoring
Athletes_modelling <- Athletes_NoCM %>%
  filter(is.na(`AF during ECG`) | `AF during ECG` != 1) %>%
  filter(is.na(`AF Throughout`) | `AF Throughout` != 1)

# Filter complete cases for modelling
Athletes_analysis <- Athletes_modelling %>%
  filter(!is.na(AF) & !is.na(Age) & !is.na(`AF centile`)) %>%
  filter(complete.cases(select(., Age_decades, High_normal_BP, LAVi_34, 
                              LAVi_60, P_wave_120, PTFV1_high, PAC_100)))

#Exclusions

# Calculate exclusions for Model B (Clinical-Only)
n_excluded_monitoring <- nrow(Athletes_NoCM) - nrow(Athletes_modelling)
n_excluded_monitoring_af <- sum((Athletes_NoCM$`AF during ECG` == 1 | 
                                 Athletes_NoCM$`AF Throughout` == 1) & 
                                Athletes_NoCM$AF == 1, na.rm = TRUE)
n_excluded_monitoring_noaf <- n_excluded_monitoring - n_excluded_monitoring_af

n_excluded_missing_clinical <- nrow(Athletes_modelling) - nrow(Athletes_analysis)
n_excluded_missing_clinical_af <- sum(Athletes_modelling$AF == 1, na.rm = TRUE) - 
                                   sum(Athletes_analysis$AF == 1)
n_excluded_missing_clinical_noaf <- n_excluded_missing_clinical - n_excluded_missing_clinical_af

# Calculate Model A (Genetic-Only) - needs PRS data
Athletes_model_A <- Athletes_modelling %>%
  filter(!is.na(AF) & !is.na(`highest decile`))
n_model_a <- nrow(Athletes_model_A)
n_model_a_af <- sum(Athletes_model_A$AF == 1)
n_model_a_noaf <- sum(Athletes_model_A$AF == 0)

# Calculate Model C (Clinical + Genetic) - needs both clinical and PRS
Athletes_model_C <- Athletes_modelling %>%
  filter(!is.na(AF) & !is.na(Age) & !is.na(`highest decile`)) %>%
  filter(complete.cases(select(., Age_decades, High_normal_BP, LAVi_34, 
                                LAVi_60, P_wave_120, PTFV1_high, PAC_100, highest_decile)))
n_model_c <- nrow(Athletes_model_C)
n_model_c_af <- sum(Athletes_model_C$AF == 1)
n_model_c_noaf <- sum(Athletes_model_C$AF == 0)

# Calculate how many excluded due to missing PRS (difference between B and C)
n_excluded_prs <- nrow(Athletes_analysis) - n_model_c
n_excluded_prs_af <- sum(Athletes_analysis$AF == 1) - n_model_c_af
n_excluded_prs_noaf <- n_excluded_prs - n_excluded_prs_af

# Exclusion Table
exclusion_flow <- data.frame(
  Step = c("Starting cohort", 
           "Excluded: AF during ECG/Holter", 
           "Excluded: Missing clinical data",
           "Model A: Genetic-Only",
           "Model B: Clinical-Only", 
           "Excluded: Missing PRS data",
           "Model C: Clinical + Genetic"),
  N = c(
    nrow(Athletes_NoCM),
    n_excluded_monitoring,
    n_excluded_missing_clinical,
    n_model_a,
    nrow(Athletes_analysis),
    n_excluded_prs,
    n_model_c
  ),
  AF_cases = c(
    sum(Athletes_NoCM$AF == 1, na.rm = TRUE),
    n_excluded_monitoring_af,
    n_excluded_missing_clinical_af,
    n_model_a_af,
    sum(Athletes_analysis$AF == 1),
    n_excluded_prs_af,
    n_model_c_af
  ),
  Non_AF = c(
    sum(Athletes_NoCM$AF == 0, na.rm = TRUE),
    n_excluded_monitoring_noaf,
    n_excluded_missing_clinical_noaf,
    n_model_a_noaf,
    sum(Athletes_analysis$AF == 0),
    n_excluded_prs_noaf,
    n_model_c_noaf
  )
)

kable(exclusion_flow,
      col.names = c("Cohort Stage", "Total (n)", "AF cases (n)", "Non-AF (n)"),
      align = c("l", "c", "c", "c"),
      caption = "Study Population Flow for Prediction Models") %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE) %>%
  row_spec(c(4, 5, 7), bold = TRUE, background = "#f0f0f0")  # Highlight the three models

Study Population Flow for Prediction Models
Cohort Stage	Total (n)	AF cases (n)	Non-AF (n)
Starting cohort	353	96	257
Excluded: AF during ECG/Holter	15	15	0
Excluded: Missing clinical data	10	4	6
Model A: Genetic-Only	338	81	257
Model B: Clinical-Only	328	77	251
Excluded: Missing PRS data	0	0	0
Model C: Clinical + Genetic	328	77	251

# What data was missing
incomplete_only <- Athletes_modelling %>%
  filter(!complete.cases(select(., AF, Age, `AF centile`, Age_decades, High_normal_BP, 
                                LAVi_34, LAVi_60, P_wave_120, PTFV1_high, PAC_100)))

missing_summary <- data.frame(
  Variable = c("Age", "AF-PRS", "SBP ≥130 mmHg", "LAVi >34 mL/m²", 
               "LAVi >60 mL/m²", "P-wave >120 ms", "PTFV₁ ≥4 mV·ms", "PACs ≥100/24h"),
  Missing_n = c(
    sum(is.na(incomplete_only$Age)),
    sum(is.na(incomplete_only$`AF centile`)),
    sum(is.na(incomplete_only$High_normal_BP)),
    sum(is.na(incomplete_only$LAVi_34)),
    sum(is.na(incomplete_only$LAVi_60)),
    sum(is.na(incomplete_only$P_wave_120)),
    sum(is.na(incomplete_only$PTFV1_high)),
    sum(is.na(incomplete_only$PAC_100))
  )
)

missing_summary$Pct_Excluded <- round((missing_summary$Missing_n / nrow(incomplete_only)) * 100, 1)

kable(missing_summary, 
      col.names = c("Variable", "Missing (n)", "% of Excluded Cases"),
      align = c("l", "c", "c")) %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)

Variable	Missing (n)	% of Excluded Cases
Age	0	0
AF-PRS	0	0
SBP ≥130 mmHg	1	10
LAVi >34 mL/m²	0	0
LAVi >60 mL/m²	0	0
P-wave >120 ms	0	0
PTFV₁ ≥4 mV·ms	0	0
PACs ≥100/24h	9	90

4.1 Model A: Genetic-Only

model_A <- glm(AF ~ highest_decile, data = Athletes_analysis, family = binomial())
Athletes_analysis$predicted_prob_A <- predict(model_A, type = "response")

roc_A <- pROC::roc(Athletes_analysis$AF, Athletes_analysis$predicted_prob_A, quiet = TRUE)
auc_A <- as.numeric(pROC::auc(roc_A))
ci_A <- pROC::ci.auc(roc_A)

model_A_summary <- broom::tidy(model_A, conf.int = TRUE, exponentiate = TRUE) %>%
  filter(term != "(Intercept)")

cat("Model A:Genetic Risk (Highest Decile)\n\n")

## Model A:Genetic Risk (Highest Decile)

cat("AUC:", round(auc_A, 3), "(95% CI:", round(ci_A[1], 3), "-", round(ci_A[3], 3), ")\n")

## AUC: 0.561 (95% CI: 0.51 - 0.611 )

cat("OR:", round(model_A_summary$estimate, 2), 
    "(95% CI:", round(model_A_summary$conf.low, 2), "-", round(model_A_summary$conf.high, 2), ")\n")

## OR: 2.56 (95% CI: 1.28 - 5.03 )

cat("P-value:", format_p_value(model_A_summary$p.value), "\n")

## P-value: 0.007

4.2 Model B: Clinical-Only

# Fit model
model_B <- glm(AF ~ Age_decades + High_normal_BP + LAVi_34 + LAVi_60 +
                P_wave_120 + PTFV1_high + PAC_100,
              data = Athletes_analysis, family = binomial())

Athletes_analysis$predicted_prob_B <- predict(model_B, type = "response")

# ROC analysis
roc_B <- pROC::roc(Athletes_analysis$AF, Athletes_analysis$predicted_prob_B, quiet = TRUE)
auc_B <- as.numeric(pROC::auc(roc_B))
ci_B <- pROC::ci.auc(roc_B)

cat("Model B Performance\n\n")

## Model B Performance

cat("- AUC:", round(auc_B, 3), "(95% CI:", round(ci_B[1], 3), "-", round(ci_B[3], 3), ")\n\n")

## - AUC: 0.844 (95% CI: 0.791 - 0.897 )

# Derive point system
beta_values <- coef(model_B)[-1]
min_beta <- min(abs(beta_values))
derived_points <- round(abs(beta_values) / min_beta)

# Create coefficient table
model_B_coefs <- broom::tidy(model_B, conf.int = TRUE, exponentiate = FALSE)
model_B_or <- broom::tidy(model_B, conf.int = TRUE, exponentiate = TRUE)

model_B_summary <- model_B_coefs %>%
  filter(term != "(Intercept)") %>%
  mutate(
    Variable = case_when(
      term == "Age_decades" ~ "Age (per decade)",
      term == "High_normal_BP" ~ "SBP ≥130 mmHg",
      term == "LAVi_34" ~ "LAVi 35-60 mL/m²",
      term == "LAVi_60" ~ "LAVi >60 mL/m²",
      term == "P_wave_120" ~ "P-wave >120 ms",
      term == "PTFV1_high" ~ "PTFV₁ ≥4 mV·ms",
      term == "PAC_100" ~ "PACs ≥100/24h"
    ),
    OR = model_B_or$estimate[-1],
    OR_lower = model_B_or$conf.low[-1],
    OR_upper = model_B_or$conf.high[-1],
    Points = derived_points,
    Points_Display = if_else(term == "LAVi_60",
      paste0("+", derived_points[4], " (", derived_points[3] + derived_points[4], " total)"),
      as.character(derived_points))
  ) %>%
  select(Variable, `β` = estimate, SE = std.error, OR, 
         `OR CI lower` = OR_lower, `OR CI upper` = OR_upper, 
         `P value` = p.value, `Points Assigned` = Points_Display)

# Calculate risk scores
Athletes_analysis$clinical_score_B <- with(Athletes_analysis,
  Age_decades * derived_points[1] +
    High_normal_BP * derived_points[2] +
    if_else(LAVi_60 == 1, derived_points[3] + derived_points[4],
            if_else(LAVi_34 == 1, derived_points[3], 0)) +
    P_wave_120 * derived_points[5] +
    PTFV1_high * derived_points[6] +
    PAC_100 * derived_points[7]
)

# Model calibration
hl_test <- hoslem.test(Athletes_analysis$AF, Athletes_analysis$predicted_prob_B, g = 10)
cat("Calibration: Hosmer-Lemeshow χ² =", round(hl_test$statistic, 2), 
    ", p =", round(hl_test$p.value, 3), 
    if_else(hl_test$p.value > 0.05, "(good fit)\n\n", "(poor fit)\n\n"))

## Calibration: Hosmer-Lemeshow χ² = 6.56 , p = 0.584 (good fit)

# Optimal cutoff
coords_result <- coords(roc_B, "best", ret = c("threshold", "sensitivity", "specificity", "ppv", "npv"))
cat("Optimal Cutoff (Youden): Threshold =", round(coords_result$threshold, 3), "\n")

## Optimal Cutoff (Youden): Threshold = 0.22

cat("Sensitivity:", round(coords_result$sensitivity * 100, 1), 
    "% | Specificity:", round(coords_result$specificity * 100, 1), "%\n")

## Sensitivity: 80.5 % | Specificity: 77.3 %

cat("PPV:", round(coords_result$ppv * 100, 1), 
    "% | NPV:", round(coords_result$npv * 100, 1), "%\n\n")

## PPV: 52.1 % | NPV: 92.8 %

# Performance at clinical thresholds
thresholds <- c(0.10, 0.40, 0.70)
threshold_names <- c("Low-Moderate (10%)", "Moderate-High (40%)", "High-Very High (70%)")

perf_table <- map_dfr(1:length(thresholds), function(i) {
  predicted_class <- if_else(Athletes_analysis$predicted_prob_B >= thresholds[i], 1, 0)
  tp <- sum(predicted_class == 1 & Athletes_analysis$AF == 1)
  tn <- sum(predicted_class == 0 & Athletes_analysis$AF == 0)
  fp <- sum(predicted_class == 1 & Athletes_analysis$AF == 0)
  fn <- sum(predicted_class == 0 & Athletes_analysis$AF == 1)
  
  tibble(
    Threshold = threshold_names[i],
    Sensitivity = paste0(round(100 * tp/(tp+fn), 1), "%"),
    Specificity = paste0(round(100 * tn/(tn+fp), 1), "%"),
    PPV = paste0(round(100 * tp/(tp+fp), 1), "%"),
    NPV = paste0(round(100 * tn/(tn+fn), 1), "%")
  )
})

kable(perf_table, caption = "Model Performance at Clinical Thresholds") %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = FALSE)

Model Performance at Clinical Thresholds
Threshold	Sensitivity	Specificity	PPV	NPV
Low-Moderate (10%)	92.2%	55.4%	38.8%	95.9%
Moderate-High (40%)	58.4%	90.4%	65.2%	87.6%
High-Very High (70%)	29.9%	98.4%	85.2%	82.1%

# Risk stratification
prob_thresholds <- c(0.10, 0.40, 0.70)
cutoffs <- round(map_dbl(prob_thresholds, ~min(Athletes_analysis$clinical_score_B[Athletes_analysis$predicted_prob_B >= .x], na.rm = TRUE)))
score_breaks <- c(-Inf, cutoffs, Inf)

Athletes_analysis <- Athletes_analysis %>%
  mutate(
    clinical_score_B_rounded = round(clinical_score_B),
    risk_category = cut(clinical_score_B_rounded, breaks = score_breaks,
                        labels = c("Low", "Moderate", "High", "Very High"),
                        include.lowest = TRUE)
  )

risk_summary <- Athletes_analysis %>%
  group_by(risk_category) %>%
  summarise(
    `Score Range` = paste0(min(clinical_score_B_rounded), "–", max(clinical_score_B_rounded)),
    N = n(),
    `AF Cases` = sum(AF),
    `AF Prevalence (%)` = round(mean(AF) * 100, 1),
    `Mean Age (years)` = round(mean(Age), 1),
    `High BP (%)` = round(mean(High_normal_BP) * 100, 1),
    `LAVi >34 (%)` = round(mean(LAVi_34 | LAVi_60) * 100, 1),
    `P-wave >120ms (%)` = round(mean(P_wave_120) * 100, 1),
    `High PTFV1 (%)` = round(mean(PTFV1_high) * 100, 1),
    `PACs ≥100/24h (%)` = round(mean(PAC_100) * 100, 1),
    .groups = "drop"
  ) %>%
  rename(`Risk Category` = risk_category)

4.3 Model C: Clinical-Genetic

model_C <- glm(AF ~ Age_decades + High_normal_BP + LAVi_34 + LAVi_60 +
                P_wave_120 + PTFV1_high + PAC_100 + highest_decile,
              data = Athletes_analysis, family = binomial())

Athletes_analysis$predicted_prob_C <- predict(model_C, type = "response")
roc_C <- pROC::roc(Athletes_analysis$AF, Athletes_analysis$predicted_prob_C, quiet = TRUE)
auc_C <- as.numeric(pROC::auc(roc_C))
ci_C <- pROC::ci.auc(roc_C)
delong_BC <- pROC::roc.test(roc_B, roc_C, method = "delong")

cat("Model C: Clinical + Genetic\n\n")

## Model C: Clinical + Genetic

cat("- AUC:", round(auc_C, 3), "(95% CI:", round(ci_C[1], 3), "-", round(ci_C[3], 3), ")\n")

## - AUC: 0.85 (95% CI: 0.798 - 0.902 )

cat("- Comparison to Model B: p =", format(delong_BC$p.value, digits = 3), "\n")

## - Comparison to Model B: p = 0.324

5 Table 3

kable(model_B_summary, digits = 3,
      caption = paste0("Table 3. Multivariable Logistic Regression Model Predicting Atrial Fibrillation (n=", nrow(Athletes_analysis), ")")) %>%
  kable_styling(bootstrap_options = c("striped", "hover"))

Table 3. Multivariable Logistic Regression Model Predicting Atrial Fibrillation (n=328)
Variable	β	SE	OR	OR CI lower	OR CI upper	P value	Points Assigned
Age (per decade)	0.243	0.104	1.276	1.044	1.570	0.019	1
SBP ≥130 mmHg	0.971	0.338	2.640	1.372	5.190	0.004	4
LAVi 35-60 mL/m²	0.858	0.468	2.359	0.989	6.290	0.066	4
LAVi >60 mL/m²	0.878	0.642	2.406	0.673	8.473	0.171	+4 (8 total)
P-wave >120 ms	0.993	0.348	2.699	1.371	5.392	0.004	4
PTFV₁ ≥4 mV·ms	0.861	0.446	2.367	0.978	5.666	0.053	4
PACs ≥100/24h	1.535	0.358	4.644	2.311	9.444	0.000	6

6 Table 4

kable(risk_summary, 
      caption = "Table 4. Clinical Risk Stratification Categories") %>%
  kable_styling(bootstrap_options = c("striped", "hover"),
                full_width = FALSE, font_size = 10)

Table 4. Clinical Risk Stratification Categories
Risk Category	Score Range	N	AF Cases	AF Prevalence (%)	Mean Age (years)	High BP (%)	LAVi >34 (%)	P-wave >120ms (%)	High PTFV1 (%)	PACs ≥100/24h (%)
Low	2–10	150	6	4.0	27.3	20.0	65.3	10.0	4.7	0.7
Moderate	11–17	110	26	23.6	44.8	58.2	87.3	47.3	11.8	21.8
High	18–24	46	26	56.5	59.9	60.9	95.7	76.1	23.9	63.0
Very High	25–32	22	19	86.4	67.7	86.4	95.5	90.9	40.9	100.0

7 Central Figure:

AF Polygenic Risk Score Distribution

if ("AF PRS 6238158var PGS000016" %in% names(Athletes_NoCM)) {
  ggplot(Athletes_NoCM, aes(x = factor(AF), y = `AF PRS 6238158var PGS000016`, 
                            fill = factor(AF))) +
    geom_violin(trim = FALSE, alpha = 0.8, color = NA) +
    geom_boxplot(width = 0.1, color = "black", fill = "white") +
    stat_compare_means(method = "t.test", label = "p.format", 
                       comparisons = list(c("0", "1")),
                       bracket.size = 0.7, size = 5, tip.length = 0.02) +
    scale_fill_manual(values = c("0" = "#0072B2", "1" = "#990000")) +
    scale_x_discrete(labels = c("No AF", "AF")) +
    labs(x = "", y = "AF Polygenic Risk Score") +
    theme_classic(base_size = 20) +
    theme(legend.position = "none")
} else {
  cat("AF PRS variable not found in dataset\n")
}

Legend = AF Polygenic Risk Score Distribution by AF Status

8 Figure 1

Figure 1. Bar Plot showing the prevalence of abnormal atrial myopathy markers in athletes with (n=96) and without (n=257) Atrial Fibrillation

# Ensure AF_status exists
Athletes_NoCM$AF_status <- factor(Athletes_NoCM$AF, levels = c(0, 1), labels = c("No AF", "AF"))

# Panel A: Individual criterion prevalence
criteria_prevalence <- Athletes_NoCM %>%
  group_by(AF_status) %>%
  summarise(
    `SBP ≥130 mmHg` = mean(High_normal_BP, na.rm = TRUE) * 100,
    `LAVi 35-60 mL/m²` = mean(LAVi_34 & !LAVi_60, na.rm = TRUE) * 100,
    `LAVi >60 mL/m²` = mean(LAVi_60, na.rm = TRUE) * 100,
    `P-wave >120 ms` = mean(P_wave_120, na.rm = TRUE) * 100,
      `PTFV₁ ≥4 mV·ms` = mean(PTFV1_high, na.rm = TRUE) * 100,
    `PACs ≥100/24h` = mean(PAC_100, na.rm = TRUE) * 100,
    `AF-PRS (top decile)` = mean(highest_decile, na.rm = TRUE) * 100,
    .groups = "drop"
  ) %>%
  pivot_longer(cols = -AF_status, names_to = "Criterion", values_to = "Percentage")

fig1a <- ggplot(criteria_prevalence, aes(x = reorder(Criterion, -Percentage), 
                                          y = Percentage, fill = AF_status)) +
  geom_bar(stat = "identity", position = position_dodge(width = 0.9), 
           color = "black", alpha = 0.8, width = 0.8) +
  scale_x_discrete(labels = function(x) {
    x <- gsub("PTFV₁", "PTFV1", x)  # Replace subscript with regular 1
    return(x)
  }) +
  scale_fill_manual(values = c("No AF" = "#0072B2", "AF" = "#990000")) +
  labs(x = "Atrial Myopathy Criterion", y = "Prevalence (%)", fill = "") +
  theme_minimal(base_size = 12) +
  theme(axis.text.x = element_text(angle = 35, hjust = 1),
        legend.position = "bottom")

# Panel B: Number of criteria met (overlap)
Athletes_NoCM$criteria_count <- rowSums(
  select(Athletes_NoCM, High_normal_BP, LAVi_34, LAVi_60, 
         P_wave_120, PTFV1_high, PAC_100, highest_decile),
  na.rm = TRUE
)

Athletes_NoCM$criteria_group <- factor(
  case_when(
    Athletes_NoCM$criteria_count == 0 ~ "0",
    Athletes_NoCM$criteria_count == 1 ~ "1",
    Athletes_NoCM$criteria_count == 2 ~ "2",
    Athletes_NoCM$criteria_count == 3 ~ "3",
    Athletes_NoCM$criteria_count == 4 ~ "4",
    Athletes_NoCM$criteria_count >= 5 ~ "≥5"
  ),
  levels = c("0", "1", "2", "3", "4", "≥5")
)

criteria_overlap <- Athletes_NoCM %>%
  count(AF_status, criteria_group) %>%
  group_by(AF_status) %>%
  mutate(percentage = n / sum(n) * 100) %>%
  ungroup() %>%
  complete(AF_status, criteria_group, fill = list(n = 0, percentage = 0))

fig1b <- ggplot(criteria_overlap, aes(x = criteria_group, y = percentage, 
                                       fill = AF_status)) +
  geom_bar(stat = "identity", position = position_dodge(width = 0.9), 
           color = "black", alpha = 0.8, width = 0.8) +
  geom_text(aes(label = paste0(round(percentage, 1), "%")),
            position = position_dodge(width = 0.9), vjust = -0.5, size = 3) +
  scale_fill_manual(values = c("No AF" = "#0072B2", "AF" = "#990000")) +
  scale_y_continuous(expand = expansion(mult = c(0, 0.15))) +
  labs(x = "Number of Criteria Met", y = "Percentage (%)", fill = "") +
  theme_minimal(base_size = 12) +
  theme(legend.position = "bottom")

# Combine panels
ggarrange(fig1a, fig1b, ncol = 2, labels = c("A", "B"), 
          common.legend = TRUE, legend = "bottom")

Legend = Abnormal Atrial Myopathy Markers – AF-PRS = atrial fibrillation polygenic risk score; LAVi = left atrial volume index; PAC = premature atrial complex; PTFV1 = P wave terminal force velocity in lead V1; P-wave = P wave duration; SBP = systolic blood pressure.

9 Figure 2

Figure 2. Receiver-operating characteristic (ROC) curves for Model A (Genetic-Only) Model B (Clinical-Only), and Model C (Clinical-Genetic).

# Model A ROC
p2a <- ggplot(data.frame(fpr = 1 - roc_A$specificities,
                        tpr = roc_A$sensitivities),
             aes(x = fpr, y = tpr)) +
  geom_line(color = "#e9c46a", size = 1.2) +
  geom_abline(intercept = 0, slope = 1, linetype = "dashed") +
  annotate("text", x = 0.6, y = 0.2,
           label = paste0("AUC = ", round(auc_A, 3),
                         "\n95% CI: ", round(ci_A[1], 3), "–", 
                         round(ci_A[3], 3)),
           size = 4) +
  labs(title = "Model A: Genetic-Only",
       x = "1 - Specificity", y = "Sensitivity") +
  coord_equal() +
  theme_minimal(base_size = 14)

# Model B ROC
p2b <- ggplot(data.frame(fpr = 1 - roc_B$specificities,
                        tpr = roc_B$sensitivities),
             aes(x = fpr, y = tpr)) +
  geom_line(color = "#a7c957", size = 1.2) +
  geom_abline(intercept = 0, slope = 1, linetype = "dashed") +
  annotate("text", x = 0.6, y = 0.2,
           label = paste0("AUC = ", round(auc_B, 3),
                         "\n95% CI: ", round(ci_B[1], 3), "–", 
                         round(ci_B[3], 3)),
           size = 4) +
  labs(title = "Model B: Clinical-Only",
       x = "1 - Specificity", y = "Sensitivity") +
  coord_equal() +
  theme_minimal(base_size = 14)

# Model C ROC
p2c <- ggplot(data.frame(fpr = 1 - roc_C$specificities,
                        tpr = roc_C$sensitivities),
             aes(x = fpr, y = tpr)) +
  geom_line(color = "#6a994e", size = 1.2) +
  geom_abline(intercept = 0, slope = 1, linetype = "dashed") +
  annotate("text", x = 0.6, y = 0.2,
           label = paste0("AUC = ", round(auc_C, 3),
                         "\n95% CI: ", round(ci_C[1], 3), "–", 
                         round(ci_C[3], 3)),
           size = 4) +
  labs(title = "Model C: Clinical-Genetic",
       x = "1 - Specificity", y = "Sensitivity") +
  coord_equal() +
  theme_minimal(base_size = 14)

# Combine plots
gridExtra::grid.arrange(p2a, p2b, p2c, ncol = 3)

Legend = AUC values (with 95% CIs) are annotated for each model. “Genetic Only” refers to AF centile as a continuous percentile predictor. “Clinical-Only” utilises clinical markers (systolic blood pressure ≥ 130 mmHg; P wave duration > 120 ms; P terminal force in V1 ≥ 4.0 mV·ms; left atrial volume index > 34 mL/m² and > 60 mL/m²; and PACs ≥ 100/24 h. “Clinical Genetic” combines Clinical markers) with the AF-PRS dichotomised at the top decile (≥ 90th percentile).

10 Supplementary Tables

controls <- filter(NoCM, EICR == "Control")
athletes <- filter(NoCM, EICR == "Athlete")

# Helper function for age-adjusted p-values
calculate_age_adjusted_p <- function(data, var, group_var, is_categorical = FALSE) {
  if (!all(c(var, group_var, "Age") %in% names(data))) return(NA)
  clean_data <- data %>% filter(!is.na(.data[[var]]) & !is.na(.data[[group_var]]) & !is.na(Age))
  if (nrow(clean_data) < 10 || var == "Age") return(NA)
  
  tryCatch({
    if (is_categorical) {
      model <- glm(as.factor(clean_data[[var]]) ~ clean_data[[group_var]] + Age, 
                  data = clean_data, family = binomial())
    } else {
      model <- lm(as.numeric(clean_data[[var]]) ~ clean_data[[group_var]] + Age, 
                 data = clean_data)
    }
    coef_summary <- summary(model)$coefficients
    group_row <- grep(group_var, rownames(coef_summary))[1]
    if (is.na(group_row)) group_row <- 2
    return(coef_summary[group_row, 4])
  }, error = function(e) NA)
}

# Create comparison table
supp_table_athletes_controls <- data.frame(Variable = character(), stringsAsFactors = FALSE)
combined_data <- rbind(athletes %>% mutate(group = "Athletes"), controls %>% mutate(group = "Controls"))

for (var in all_variables) {
  if (!var %in% names(athletes) || !var %in% names(controls)) next
  
  is_cat <- var %in% categorical_vars
  is_nonnorm <- var %in% nonnormal_vars
  
  supp_table_athletes_controls <- rbind(supp_table_athletes_controls, data.frame(
    Variable = var,
    Athletes = format_stats(athletes, var, is_cat, is_nonnorm),
    Controls = format_stats(controls, var, is_cat, is_nonnorm),
    P_value = format_p_value(calculate_p_value(combined_data, var, "group", is_cat, is_nonnorm)),
    P_age_adj = format_p_value(calculate_age_adjusted_p(combined_data, var, "group", is_cat)),
    stringsAsFactors = FALSE
  ))
}

supp_table_athletes_controls$Category <- sapply(supp_table_athletes_controls$Variable, assign_category)
supp_table_athletes_controls$Variable <- sapply(supp_table_athletes_controls$Variable, clean_variable_names)

# Define correct order within each category
variable_order <- c(
  # Demographics
  "Age, years", "Male sex, n (%)", "BMI, kg/m²", "BSA, m²", "Average HR, bpm", "Minimum HR, bpm", 
  "Bradycardia burden, % (24h)", "Hypertension, n (%)", "SBP, mmHg", "DBP, mmHg", 
  "Dyslipidaemia, n (%)", "Diabetes, n (%)", "History of smoking, n (%)", "Alcohol, drinks/week",
  # Exercise Capacity
  "VO₂peak, mL/kg/min", "VO₂peak, L/min", "% of predicted exercise capacity", "Respiratory exchange ratio",
  "Total weekly MET-hours", "Years of endurance training",
  # Left ventricular parameters
  "LVEDV, mL", "LVESV, mL", "LVEF, %", "LV strain, %",
  # Left atrial parameters
  "LAESV, mL", "LAVi, mL/m²", "Mildly abnormal >34 mL/m², n (%)", "Severely abnormal >48 mL/m², n (%)",
  "Extremely abnormal >60 mL/m², n (%)", "LA:LV", "Reservoir strain, %", "Conduit strain, %", "Contractile strain, %",
  # Right Ventricular Parameters
  "RVEDV, mL", "RVESV, mL", "RVEF, %",
  # Doppler Parameters
  "E velocity, m/s", "A velocity, m/s", "E/A ratio", "E′ velocity, cm/s", "E/E' ratio",
  # Electrocardiographic
  "P wave duration, ms", "P-wave duration >120 ms, n (%)", "P wave axis, °", "PTFV₁, mV·ms",
  "PTFV₁ ≥4 mV·ms, n (%)", "Advanced IAB, n (%)", "PACs per 24 h", "≥100 PACs per 24 h, n (%)",
  # Biochemical
  "BNP, pg/mL", "cTnI, ng/L"
)

# Order the table by the defined sequence
supp_table_athletes_controls$Variable <- factor(supp_table_athletes_controls$Variable, 
                                                 levels = variable_order)
supp_table_athletes_controls <- supp_table_athletes_controls %>% arrange(Variable)
supp_table_athletes_controls$Variable <- as.character(supp_table_athletes_controls$Variable)

# Split and display with correct ordering
table1_cats <- c("Demographics", "Exercise Capacity")

# For table 2, create nested structure for echocardiographic parameters
supp_1_cat <- supp_table_athletes_controls %>% 
  filter(Category %in% table1_cats) %>%
  mutate(Category = factor(Category, levels = table1_cats))

# Create hierarchical grouping for Table 2
supp_2_cat <- supp_table_athletes_controls %>% 
  filter(Category %in% c("Left ventricular parameters", "Left atrial parameters", 
                         "Right Ventricular Parameters", "Doppler Parameters", 
                         "Electrocardiographic", "Biochemical"))

# Add parent category for nested structure
supp_2_cat <- supp_2_cat %>%
  mutate(ParentCategory = case_when(
    Category %in% c("Left ventricular parameters", "Left atrial parameters") ~ "Echocardiographic",
    TRUE ~ Category
  ))

supp_1 <- supp_1_cat %>% select(-Category)
supp_2 <- supp_2_cat %>% select(-Category, -ParentCategory)

colnames(supp_1) <- c("Characteristic", paste0("Athletes (n=", nrow(athletes), ")"), 
                      paste0("Controls (n=", nrow(controls), ")"), "p-value", "Age-adjusted p value")
colnames(supp_2) <- colnames(supp_1)

cat("\n## Supplementary Table 1\n\n")

10.1 Supplementary Table 1

print(kable(supp_1, caption = "Supplementary Table 1. Baseline Demographics: Endurance Athletes versus Non-athletic Controls (n=450)",
      align = c("l", "c", "c", "c", "c")) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = TRUE, font_size = 10) %>%
  pack_rows(index = table(supp_1_cat$Category)[table1_cats]) %>%
  add_footnote(c("Data are presented as mean ± standard deviation, median (interquartile range), or n (%). P values were calculated using unpaired t-tests for normally distributed continuous variables, Mann-Whitney U tests for non-normally distributed continuous variables, and chi-square tests for categorical variables. Age-adjusted p values were calculated using multivariable regression models with age as a covariate to account for age differences between groups.\nAbbreviations: BMI, body mass index; BSA, body surface area; DBP, diastolic blood pressure; HR, heart rate; SBP, systolic blood pressure; VO₂peak, peak oxygen uptake."),
               notation = "none"))

Supplementary Table 1. Baseline Demographics: Endurance Athletes versus Non-athletic Controls (n=450)
Characteristic	Athletes (n=353)	Controls (n=97)	p-value	Age-adjusted p value
Demographics
Age, years	41 [22-60]	34 [22-54]	0.315	–
Male sex, n (%)	262 (74.2%)	62 (63.9%)	0.061	0.071
BMI, kg/m²	23.5 [21.9-25.5]	24.5 [22.3-27]	0.023	<0.001
BSA, m²	1.9 [1.8-2.1]	1.9 [1.7-2]	0.123	0.122
Average HR, bpm	66.4 ± 9.1	73.6 ± 8	<0.001	<0.001
Minimum HR, bpm	44 [39-49]	51 [45-55]	<0.001	<0.001
Bradycardia burden, % (24h)	6.4 [0.4-26.9]	0 [0-3]	<0.001	<0.001
Hypertension, n (%)	31 (8.9%)	1 (1.6%)	0.067	0.847
SBP, mmHg	126 [117-135]	121 [115-129]	0.006	0.106
DBP, mmHg	69 [62-77.2]	69 [65-76]	0.656	0.045
Dyslipidaemia, n (%)	45 (12.9%)	0 (0%)	<0.001	0.985
Diabetes, n (%)	2 (0.6%)	0 (0%)	1.000	0.998
History of smoking, n (%)	47 (13.5%)	10 (14.5%)	0.972	0.021
Alcohol, drinks/week	1 [0-6]	3 [3-3]	0.531	0.467
Exercise Capacity
VO₂peak, mL/kg/min	48.9 ± 14.3	35.8 ± 9.2	<0.001	<0.001
VO₂peak, L/min	3669.5 ± 997.4	2758.8 ± 865.9	<0.001	<0.001
% of predicted exercise capacity	126.4 ± 20.8	97.1 ± 22	<0.001	<0.001
Respiratory exchange ratio	1.3 ± 0.1	1.3 ± 0.1	<0.001	<0.001
Total weekly MET-hours	81 [49-104]	–	–	–
Years of endurance training	20 [10-30]	–	–	–
Data are presented as mean ± standard deviation, median (interquartile range), or n (%). P values were calculated using unpaired t-tests for normally distributed continuous variables, Mann-Whitney U tests for non-normally distributed continuous variables, and chi-square tests for categorical variables. Age-adjusted p values were calculated using multivariable regression models with age as a covariate to account for age differences between groups. Abbreviations: BMI, body mass index; BSA, body surface area; DBP, diastolic blood pressure; HR, heart rate; SBP, systolic blood pressure; VO₂peak, peak oxygen uptake.

cat("\n## Supplementary Table 2\n\n")

10.2 Supplementary Table 2

# Create pack_rows structure for Table 2 - all categories at same level
# Use the same approach as Table 1 for consistency

# Define the order of categories for Table 2
table2_cat_order <- c("Left ventricular parameters", "Left atrial parameters", 
                      "Right Ventricular Parameters", "Doppler Parameters", 
                      "Electrocardiographic", "Biochemical")

# Ensure categories are in the right order
supp_2_cat <- supp_2_cat %>%
  mutate(Category = factor(Category, levels = table2_cat_order))

table2_output <- kable(supp_2, caption = "Supplementary Table 2. Cardiac Parameters: Athletes versus Controls (n=450)",
      align = c("l", "c", "c", "c", "c")) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = TRUE, font_size = 10) %>%
  pack_rows(index = table(supp_2_cat$Category)[table2_cat_order])

print(table2_output %>%
  add_footnote(c("Data are presented as mean ± standard deviation, median (interquartile range), or n (%). Statistical comparisons as per Supplementary Table 1.\nAbbreviations: BNP, brain natriuretic peptide; cTnI, cardiac troponin I; E/A, mitral inflow early/late diastolic velocity ratio; E′, mitral annular early diastolic velocity; EDV, end-diastolic volume; ESV, end-systolic volume; IAB, interatrial block; LA, left atrium; LAVi, left atrial volume index; LA:LV, left atrial to left ventricular end-diastolic volume ratio; LV, left ventricle; LVEF, left ventricular ejection fraction; PAC, premature atrial contraction; PTFV₁, P-wave terminal force in lead V1; RV, right ventricle; RVEF, right ventricular ejection fraction."), 
               notation = "none"))

Supplementary Table 2. Cardiac Parameters: Athletes versus Controls (n=450)
Characteristic	Athletes (n=353)	Controls (n=97)	p-value	Age-adjusted p value
Left ventricular parameters
LVEDV, mL	166.9 ± 45.3	118.2 ± 36	<0.001	<0.001
LVESV, mL	68.3 ± 22.7	50.1 ± 17	<0.001	<0.001
LVEF, %	58.1 ± 5.5	57.4 ± 4.6	0.639	0.714
LV strain, %	-18.9 ± 2.3	-19 ± 2	0.662	0.796
Left atrial parameters
LAESV, mL	83 [69-98.2]	55.4 [47.2-68.5]	<0.001	<0.001
LAVi, mL/m²	42.9 [35.7-49.8]	29.7 [25.4-35.7]	<0.001	<0.001
Mildly abnormal >34 mL/m², n (%)	283 (80.2%)	27 (28.7%)	<0.001	<0.001
Severely abnormal >48 mL/m², n (%)	107 (30.3%)	4 (4.3%)	<0.001	<0.001
Extremely abnormal >60 mL/m², n (%)	26 (7.4%)	1 (1.1%)	0.025	0.071
LA:LV	0.49 [0.42-0.62]	0.48 [0.41-0.58]	0.508	0.857
Reservoir strain, %	30.4 ± 7.4	34 ± 6.3	<0.001	<0.001
Conduit strain, %	19.3 ± 6.9	21 ± 5.7	0.021	0.385
Contractile strain, %	11.2 ± 4.7	13 ± 3.8	<0.001	<0.001
Right Ventricular Parameters
RVEDV, mL	150.2 ± 47.2	90.9 ± 35.7	<0.001	<0.001
RVESV, mL	76.7 ± 25.8	51.7 ± 20.9	<0.001	<0.001
RVEF, %	48.4 ± 8.9	42.6 ± 8.4	<0.001	<0.001
Doppler Parameters
E velocity, m/s	0.7 ± 0.2	0.7 ± 0.2	0.452	0.824
A velocity, m/s	0.4 ± 0.1	0.5 ± 0.1	0.032	<0.001
E/A ratio	1.8 ± 1	1.7 ± 0.7	0.089	0.010
E′ velocity, cm/s	10.9 ± 3.4	10.8 ± 3.4	0.962	0.021
E/E’ ratio	5.8 ± 1.7	6.3 ± 2.1	0.032	<0.001
Electrocardiographic
P wave duration, ms	114 [102.5-125]	107.5 [102-112.2]	<0.001	<0.001
P-wave duration >120 ms, n (%)	125 (36.9%)	12 (12.6%)	<0.001	<0.001
P wave axis, °	50 ± 26.1	54.3 ± 31	0.237	0.178
PTFV₁, mV·ms	2 [1-3.2]	1.8 [0.8-3]	0.987	0.944
PTFV₁ ≥4 mV·ms, n (%)	44 (13%)	7 (8.6%)	0.376	0.344
Advanced IAB, n (%)	28 (8.4%)	0 (0%)	0.022	0.989
PACs per 24 h	20 [7-91.5]	6 [2-24.5]	<0.001	0.345
≥100 PACs per 24 h, n (%)	76 (23%)	7 (8.3%)	0.004	0.047
Biochemical
BNP, pg/mL	15.7 [9.6-29.2]	10 [5.5-13.4]	<0.001	0.465
cTnI, ng/L	3 [2-8]	2 [1-3]	<0.001	0.266
Data are presented as mean ± standard deviation, median (interquartile range), or n (%). Statistical comparisons as per Supplementary Table 1. Abbreviations: BNP, brain natriuretic peptide; cTnI, cardiac troponin I; E/A, mitral inflow early/late diastolic velocity ratio; E′, mitral annular early diastolic velocity; EDV, end-diastolic volume; ESV, end-systolic volume; IAB, interatrial block; LA, left atrium; LAVi, left atrial volume index; LA:LV, left atrial to left ventricular end-diastolic volume ratio; LV, left ventricle; LVEF, left ventricular ejection fraction; PAC, premature atrial contraction; PTFV₁, P-wave terminal force in lead V1; RV, right ventricle; RVEF, right ventricular ejection fraction.

# Define risk factors and their thresholds based on your document
risk_factors <- list(
  "Age" = list(var = "Age", threshold = 45, type = "continuous", cutoff_type = ">"),
  "SBP" = list(var = "rest_SBP", threshold = 130, type = "continuous", cutoff_type = "≥"),
  "LAVi_normal" = list(var = "LAVi", threshold = c(16, 34), type = "range", label = "Normal Range (16-34)"),
  "LAVi_severe" = list(var = "LAVi", threshold = 48, type = "continuous", cutoff_type = "≥", label = "Severely dilated (≥48)"),
  "LAVi_extreme" = list(var = "LAVi", threshold = 60, type = "continuous", cutoff_type = "≥", label = "Extremely dilated (≥60)"),
  "P_wave" = list(var = "P wave Duration", threshold = 120, type = "continuous", cutoff_type = ">"),
  "PTFV1" = list(var = "PTFV1 (ms*mV)", threshold = 4, type = "continuous", cutoff_type = "≥"),
  "PACs" = list(var = "ECTOPIES LS", threshold = 100, type = "continuous", cutoff_type = "≥"),
  "PRS_Q4" = list(var = "highest quartile", threshold = 1, type = "binary"),
  "PRS_D10" = list(var = "highest decile", threshold = 1, type = "binary"),
  "PRS_P98" = list(var = "98th percentile", threshold = 1, type = "binary")
)

# Function to calculate OR and CI
calculate_or_ci <- function(data, var_info) {
  clean_data <- data %>% filter(!is.na(AF) & !is.na(.data[[var_info$var]]))
  
  if (nrow(clean_data) < 10) return(NULL)
  
  tryCatch({
    # Create binary exposure variable based on threshold
    if (var_info$type == "range") {
      clean_data$exposure <- as.numeric(clean_data[[var_info$var]] >= var_info$threshold[1] & 
                                        clean_data[[var_info$var]] <= var_info$threshold[2])
    } else if (var_info$type == "binary") {
      clean_data$exposure <- as.numeric(clean_data[[var_info$var]] == var_info$threshold)
    } else {
      if (var_info$cutoff_type == "≥" | var_info$cutoff_type == ">") {
        clean_data$exposure <- as.numeric(clean_data[[var_info$var]] >= var_info$threshold)
      } else {
        clean_data$exposure <- as.numeric(clean_data[[var_info$var]] > var_info$threshold)
      }
    }
    
    # Fit logistic regression
    model <- glm(AF ~ exposure, data = clean_data, family = binomial())
    
    # Get results
    results <- broom::tidy(model, conf.int = TRUE, exponentiate = TRUE) %>%
      filter(term == "exposure")
    
    # Count cases
    af_exposed <- sum(clean_data$AF == 1 & clean_data$exposure == 1, na.rm = TRUE)
    noaf_exposed <- sum(clean_data$AF == 0 & clean_data$exposure == 1, na.rm = TRUE)
    
    return(list(
      n_af = af_exposed,
      pct_af = round(100 * af_exposed / sum(clean_data$AF == 1, na.rm = TRUE), 0),
      n_noaf = noaf_exposed,
      pct_noaf = round(100 * noaf_exposed / sum(clean_data$AF == 0, na.rm = TRUE), 0),
      or = results$estimate,
      ci_lower = results$conf.low,
      ci_upper = results$conf.high,
      p_value = results$p.value
    ))
  }, error = function(e) NULL)
}

# Build table
supp_table_3 <- data.frame(
  Criterion = character(),
  Threshold = character(),
  AF = character(),
  NoAF = character(),
  OR_CI = character(),
  P_value = character(),
  stringsAsFactors = FALSE
)

# Process each risk factor
for (rf_name in names(risk_factors)) {
  rf <- risk_factors[[rf_name]]
  
  if (!rf$var %in% names(Athletes_NoCM)) next
  
  result <- calculate_or_ci(Athletes_NoCM, rf)
  
  if (!is.null(result)) {
    # Format criterion name
    criterion <- case_when(
      rf_name == "Age" ~ "Age",
      rf_name == "SBP" ~ "Systolic Blood Pressure",
      rf_name == "LAVi_normal" ~ "Left Atrial Volume Index",
      rf_name == "LAVi_severe" ~ "Left Atrial Volume Index",
      rf_name == "LAVi_extreme" ~ "Left Atrial Volume Index",
      rf_name == "P_wave" ~ "P-wave duration",
      rf_name == "PTFV1" ~ "PTFV₁",
      rf_name == "PACs" ~ "Premature Atrial Contractions",
      rf_name == "PRS_Q4" ~ "AF-PRS",
      rf_name == "PRS_D10" ~ "AF-PRS",
      rf_name == "PRS_P98" ~ "AF-PRS",
      TRUE ~ rf_name
    )
    
    # Format threshold
    threshold <- if (!is.null(rf$label)) {
      rf$label
    } else if (rf$type == "range") {
      paste0(rf$threshold[1], "-", rf$threshold[2], " mL/m²")
    } else if (rf_name == "Age") {
      ">45 years"
    } else if (rf_name == "SBP") {
      "≥130 mmHg"
    } else if (rf_name == "P_wave") {
      ">120 ms"
    } else if (rf_name == "PTFV1") {
      "≥4 mV·ms"
    } else if (rf_name == "PACs") {
      "≥100/24h"
    } else if (rf_name == "PRS_Q4") {
      "Highest Quartile (Q4)"
    } else if (rf_name == "PRS_D10") {
      "Highest Decile (D10)"
    } else if (rf_name == "PRS_P98") {
      "98th Percentile"
    } else {
      paste0(rf$cutoff_type, rf$threshold)
    }
    
    supp_table_3 <- rbind(supp_table_3, data.frame(
      Criterion = criterion,
      Threshold = threshold,
      AF = paste0(result$n_af, " (", result$pct_af, "%)"),
      NoAF = paste0(result$n_noaf, " (", result$pct_noaf, "%)"),
      OR_CI = paste0(sprintf("%.1f", result$or), " (", 
                    sprintf("%.1f", result$ci_lower), "–",
                    sprintf("%.1f", result$ci_upper), ")"),
      P_value = format_p_value(result$p_value),
      stringsAsFactors = FALSE
    ))
  }
}

# Rename columns
colnames(supp_table_3) <- c(
  "Criterion",
  "Threshold",
  paste0("AF (n=", sum(Athletes_NoCM$AF == 1, na.rm = TRUE), ")"),
  paste0("No AF (n=", sum(Athletes_NoCM$AF == 0, na.rm = TRUE), ")"),
  "Odds Ratio (95% CI)",
  "p value"
)

cat("\n## Supplementary Table 3\n\n")

10.3 Supplementary Table 3

print(kable(supp_table_3,
      caption = "Supplementary Table 3. Univariable Risk Factors for Atrial Fibrillation Development",
      align = c("l", "l", "c", "c", "c", "c")) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"),
                full_width = TRUE, font_size = 10) %>%
  add_footnote(c("Legend = Data are presented as n (%). Odds ratios with 95% confidence intervals represent the increased odds of atrial fibrillation for athletes meeting each threshold criterion compared to those below threshold. P values were calculated using logistic regression. Abbreviations: AF, atrial fibrillation; CI, confidence interval; LAVi, left atrial volume index; OR, odds ratio; PAC, premature atrial contraction; PRS, polygenic risk score; PTFV₁, P-wave terminal force in lead V1; Q4, fourth quartile; D10, tenth decile."), notation = "none"))

Supplementary Table 3. Univariable Risk Factors for Atrial Fibrillation Development
Criterion	Threshold	AF (n=96)	No AF (n=257)	Odds Ratio (95% CI)	p value
Age	>45 years	74 (77%)	85 (33%)	6.8 (4.0–11.9)	<0.001
Systolic Blood Pressure	≥130 mmHg	58 (60%)	95 (37%)	2.6 (1.6–4.2)	<0.001
Left Atrial Volume Index	Normal Range (16-34)	9 (9%)	62 (24%)	0.3 (0.1–0.7)	0.003
Left Atrial Volume Index	Severely dilated (≥48)	41 (43%)	66 (26%)	2.2 (1.3–3.5)	0.002
Left Atrial Volume Index	Extremely dilated (≥60)	18 (19%)	8 (3%)	7.2 (3.1–18.1)	<0.001
P-wave duration	>120 ms	51 (62%)	74 (29%)	4.1 (2.4–6.9)	<0.001
PTFV₁	≥4 mV·ms	21 (26%)	23 (9%)	3.5 (1.8–6.7)	<0.001
Premature Atrial Contractions	≥100/24h	45 (56%)	33 (13%)	8.3 (4.7–14.9)	<0.001
AF-PRS	Highest Quartile (Q4)	34 (35%)	60 (23%)	1.8 (1.1–3.0)	0.023
AF-PRS	Highest Decile (D10)	22 (23%)	26 (10%)	2.6 (1.4–4.9)	0.002
AF-PRS	98th Percentile	8 (9%)	6 (2%)	3.9 (1.3–12.0)	0.015
Legend = Data are presented as n (%). Odds ratios with 95% confidence intervals represent the increased odds of atrial fibrillation for athletes meeting each threshold criterion compared to those below threshold. P values were calculated using logistic regression. Abbreviations: AF, atrial fibrillation; CI, confidence interval; LAVi, left atrial volume index; OR, odds ratio; PAC, premature atrial contraction; PRS, polygenic risk score; PTFV₁, P-wave terminal force in lead V1; Q4, fourth quartile; D10, tenth decile.

# Use the already-calculated Model C dataset from the exclusions section
Athletes_complete <- Athletes_model_C

cat("Complete cases for multivariable model:", nrow(Athletes_complete), "\n")

Complete cases for multivariable model: 328

cat("AF cases:", sum(Athletes_complete$AF), "\n\n")

AF cases: 77

if (nrow(Athletes_complete) >= 50) {
  # Fit multivariable model
  mv_model <- glm(AF ~ Age_decades + High_normal_BP + LAVi_34 + LAVi_60 + 
                    P_wave_120 + PTFV1_high + PAC_100 + highest_decile,
                  data = Athletes_complete,
                  family = binomial())
  
  # Get coefficients
  mv_coefs <- broom::tidy(mv_model, conf.int = TRUE, exponentiate = FALSE) %>%
    filter(term != "(Intercept)")
  
  mv_or <- broom::tidy(mv_model, conf.int = TRUE, exponentiate = TRUE) %>%
    filter(term != "(Intercept)")
  
  # Calculate points (from beta coefficients)
  beta_values <- mv_coefs$estimate
  min_beta <- min(abs(beta_values))
  points <- round(abs(beta_values) / min_beta)
  
  # Create summary table
  supp_table_4 <- data.frame(
    Variable = c("Age (per 10 years)", "SBP ≥130 mmHg", "LAVi >34 mL/m²",
                 "LAVi >60 mL/m²", "P-wave >120 ms", "PTFV₁ ≥4 mV·ms",
                 "PACs ≥100/24h", "AF-PRS top decile"),
    Beta = sprintf("%.3f", mv_coefs$estimate),
    SE = sprintf("%.3f", mv_coefs$std.error),
    OR = sprintf("%.2f", mv_or$estimate),
    CI_lower = sprintf("%.2f", mv_or$conf.low),
    CI_upper = sprintf("%.2f", mv_or$conf.high),
    P_value = sapply(mv_coefs$p.value, format_p_value),
    Points = points,
    stringsAsFactors = FALSE
  )
  
  colnames(supp_table_4) <- c(
    "Variable",
    "β (coefficient)",
    "SE",
    "OR",
    "CI lower",
    "CI upper",
    "p value",
    "Points Assigned"
  )
  
  cat("\n## Supplementary Table 4\n\n")
  print(kable(supp_table_4,
        caption = paste0("Supplementary Table 4. Multivariable Logistic Regression Model Predicting Atrial Fibrillation in Endurance Athletes (n=", nrow(Athletes_complete), ")"),
        align = c("l", "c", "c", "c", "c", "c", "c", "c")) %>%
    kable_styling(bootstrap_options = c("striped", "hover", "condensed"),
                  full_width = TRUE, font_size = 10) %>%
    add_footnote(c("Extended model incorporating genetic risk assessment. Statistical presentation as per Table 3. Abbreviations: AF, atrial fibrillation; CI, confidence interval; LAVi, left atrial volume index; OR, odds ratio; PAC, premature atrial contraction; PRS, polygenic risk score; PTFV₁, P-wave terminal force in lead V1; SBP, systolic blood pressure."),
               notation = "none"))
  
  # Print model performance
  cat("\n**Model Performance:**\n")
  cat("- AIC:", round(AIC(mv_model), 1), "\n")
  cat("- Deviance:", round(deviance(mv_model), 1), "\n")
  
} else {
  cat("Insufficient complete cases for multivariable modelling\n")
}

10.4 Supplementary Table 4

Supplementary Table 4. Multivariable Logistic Regression Model Predicting Atrial Fibrillation in Endurance Athletes (n=328)
Variable	β (coefficient)	SE	OR	CI lower	CI upper	p value	Points Assigned
Age (per 10 years)	0.247	0.105	1.28	1.05	1.58	0.018	1
SBP ≥130 mmHg	0.970	0.341	2.64	1.36	5.23	0.004	4
LAVi >34 mL/m²	0.857	0.465	2.36	0.99	6.24	0.065	3
LAVi >60 mL/m²	0.983	0.658	2.67	0.72	9.68	0.135	4
P-wave >120 ms	0.970	0.354	2.64	1.32	5.33	0.006	4
PTFV₁ ≥4 mV·ms	0.731	0.458	2.08	0.83	5.08	0.111	3
PACs ≥100/24h	1.555	0.365	4.74	2.33	9.78	<0.001	6
AF-PRS top decile	0.948	0.459	2.58	1.04	6.33	0.039	4
Extended model incorporating genetic risk assessment. Statistical presentation as per Table 3. Abbreviations: AF, atrial fibrillation; CI, confidence interval; LAVi, left atrial volume index; OR, odds ratio; PAC, premature atrial contraction; PRS, polygenic risk score; PTFV₁, P-wave terminal force in lead V1; SBP, systolic blood pressure.

Model Performance: - AIC: 266.9 - Deviance: 248.9

# Find matching data
matched_cases <- matched_controls <- data.frame()
for (dataset in list(Athletes_NoCM, NoCM, AM_paper)) {
  if ("Matching" %in% names(dataset)) {
    temp <- dataset %>% filter(!is.na(Matching) & Matching %in% c("Case", "Control"))
    if (nrow(temp) > 0) {
      matched_cases <- temp %>% filter(Matching == "Case")
      matched_controls <- temp %>% filter(Matching == "Control")
      break
    }
  }
}

if (nrow(matched_cases) > 0 && nrow(matched_controls) > 0) {
  supp_matched <- data.frame(Variable = character(), stringsAsFactors = FALSE)
  matched_combined <- rbind(matched_cases %>% mutate(group = "AF"), matched_controls %>% mutate(group = "No AF"))
  
  for (var in all_variables) {
    if (!var %in% names(matched_cases) || !var %in% names(matched_controls)) next
    is_cat <- var %in% categorical_vars
    is_nonnorm <- var %in% nonnormal_vars
    
    supp_matched <- rbind(supp_matched, data.frame(
      Variable = var,
      AF = format_stats(matched_cases, var, is_cat, is_nonnorm),
      NoAF = format_stats(matched_controls, var, is_cat, is_nonnorm),
      P = format_p_value(calculate_p_value(matched_combined, var, "group", is_cat, is_nonnorm)),
      stringsAsFactors = FALSE
    ))
  }
  
  supp_matched$Category <- sapply(supp_matched$Variable, assign_category)
  supp_matched$Variable <- sapply(supp_matched$Variable, clean_variable_names)
  
  # Order variables the same way as Tables 1 and 2
  supp_matched$Variable <- factor(supp_matched$Variable, levels = variable_order)
  supp_matched <- supp_matched %>% arrange(Variable)
  supp_matched$Variable <- as.character(supp_matched$Variable)
  
  # Table 5: Demographics and Exercise Capacity
  table5_cats <- c("Demographics", "Exercise Capacity")
  supp_5_cat <- supp_matched %>% 
    filter(Category %in% table5_cats) %>%
    mutate(Category = factor(Category, levels = table5_cats))
  
  # Table 6: Cardiac Parameters with same structure as Table 2
  table6_cat_order <- c("Left ventricular parameters", "Left atrial parameters", 
                        "Right Ventricular Parameters", "Doppler Parameters", 
                        "Electrocardiographic", "Biochemical")
  
  supp_6_cat <- supp_matched %>% 
    filter(Category %in% table6_cat_order) %>%
    mutate(Category = factor(Category, levels = table6_cat_order))
  
  supp_5 <- supp_5_cat %>% select(-Category)
  supp_6 <- supp_6_cat %>% select(-Category)
  
  colnames(supp_5) <- c("Characteristic", paste0("AF (n=", nrow(matched_cases), ")"), 
                        paste0("No AF (n=", nrow(matched_controls), ")"), "p value")
  colnames(supp_6) <- colnames(supp_5)
  
  cat("\n## Supplementary Table 5\n\n")
  print(kable(supp_5, caption = "Supplementary Table 5. Age- and Sex-Matched Analysis: Baseline Characteristics (n=160)",
        align = c("l", "c", "c", "c")) %>%
    kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = TRUE, font_size = 10) %>%
    pack_rows(index = table(supp_5_cat$Category)[table5_cats]) %>%
    add_footnote(c("Data presentation and statistical methods as per Supplementary Table 1. Cases and controls were matched 1:1 on age (±5 years) and sex.\nAbbreviations: As per Supplementary Table 1; AF, atrial fibrillation."), 
                 notation = "none"))
  
  cat("\n## Supplementary Table 6\n\n")
  print(kable(supp_6, caption = "Supplementary Table 6. Age- and Sex-Matched Analysis: Cardiac Parameters (n=160)",
        align = c("l", "c", "c", "c")) %>%
    kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = TRUE, font_size = 10) %>%
    pack_rows(index = table(supp_6_cat$Category)[table6_cat_order]) %>%
    add_footnote(c("Data presentation, statistical methods and abbreviations as per Supplementary Table 2."), 
                 notation = "none"))
} else {
  cat("Warning: No matched data available\n")
}

10.5 Supplementary Table 5

Supplementary Table 5. Age- and Sex-Matched Analysis: Baseline Characteristics (n=160)
Characteristic	AF (n=80)	No AF (n=80)	p value
Demographics
Age, years	56.5 [43.8-64.2]	55 [43-65]	0.868
Male sex, n (%)	73 (91.2%)	73 (91.2%)	1.000
BMI, kg/m²	24.5 [23.2-25.7]	24.6 [22.9-26.7]	0.929
BSA, m²	2 [1.9-2.1]	2.1 [1.9-2.2]	0.328
Average HR, bpm	63.5 ± 6.8	65.9 ± 7.1	0.048
Minimum HR, bpm	45 [40-48.5]	46 [41-49]	0.322
Bradycardia burden, % (24h)	5.6 [0.4-23.5]	2.5 [0.3-21.8]	0.474
Hypertension, n (%)	9 (11.5%)	14 (17.5%)	0.403
SBP, mmHg	131 [124-140.5]	130 [122-137]	0.581
DBP, mmHg	74.5 [69-81]	74 [65-79.2]	0.384
Dyslipidaemia, n (%)	17 (21.8%)	20 (25%)	0.774
Diabetes, n (%)	2 (2.6%)	0 (0%)	0.242
History of smoking, n (%)	16 (20.5%)	13 (16.2%)	0.627
Alcohol, drinks/week	3 [0-7.8]	5.5 [1.8-12]	0.057
Exercise Capacity
VO₂peak, mL/kg/min	43.9 ± 10.2	44.1 ± 14.3	0.921
VO₂peak, L/min	3570.1 ± 888.2	3590.7 ± 961.9	0.888
% of predicted exercise capacity	124.3 ± 17.9	123.8 ± 23.2	0.875
Respiratory exchange ratio	1.3 ± 0.1	1.3 ± 0.1	0.179
Total weekly MET-hours	78 [56-94]	71 [44-94]	0.391
Years of endurance training	26 [16.5-36]	22 [15-32]	0.421
Data presentation and statistical methods as per Supplementary Table 1. Cases and controls were matched 1:1 on age (±5 years) and sex. Abbreviations: As per Supplementary Table 1; AF, atrial fibrillation.

10.6 Supplementary Table 6

Supplementary Table 6. Age- and Sex-Matched Analysis: Cardiac Parameters (n=160)
Characteristic	AF (n=80)	No AF (n=80)	p value
Left ventricular parameters
LVEDV, mL	162.7 ± 39.8	166.4 ± 44.1	0.585
LVESV, mL	65.8 ± 17.3	69 ± 21.7	0.303
LVEF, %	58.4 ± 4.8	57.4 ± 5.3	0.213
LV strain, %	-18.5 ± 2.2	-18.8 ± 2.1	0.387
Left atrial parameters
LAESV, mL	89.5 [76.8-111.8]	84 [68.5-97]	0.010
LAVi, mL/m²	45.8 [38.1-54.6]	41.9 [32.7-47.9]	0.003
Mildly abnormal >34 mL/m², n (%)	72 (90%)	57 (71.2%)	0.005
Severely abnormal >48 mL/m², n (%)	32 (40%)	20 (25%)	0.063
Extremely abnormal >60 mL/m², n (%)	12 (15%)	1 (1.2%)	0.002
LA:LV	0.56 [0.48-0.71]	0.51 [0.41-0.62]	0.010
Reservoir strain, %	27.1 ± 8	29.5 ± 5.9	0.047
Conduit strain, %	16 ± 6.3	16.3 ± 6	0.763
Contractile strain, %	11.2 ± 4.7	13.4 ± 5.1	0.008
Right Ventricular Parameters
RVEDV, mL	147.7 ± 43.5	144 ± 44	0.595
RVESV, mL	80.3 ± 27.2	76.9 ± 25.9	0.433
RVEF, %	45.7 ± 8.5	46.3 ± 8.6	0.688
Doppler Parameters
E velocity, m/s	0.6 ± 0.2	0.6 ± 0.2	0.290
A velocity, m/s	0.4 ± 0.1	0.5 ± 0.1	0.194
E/A ratio	1.6 ± 1.4	1.4 ± 0.6	0.271
E′ velocity, cm/s	9.6 ± 2.8	8.7 ± 2.4	0.034
E/E’ ratio	5.9 ± 1.9	6.1 ± 1.7	0.680
Electrocardiographic
P wave duration, ms	124 [112-131]	120 [109.8-128]	0.246
P-wave duration >120 ms, n (%)	45 (60.8%)	41 (51.2%)	0.302
P wave axis, °	50.3 ± 33.8	50 ± 22.9	0.953
PTFV₁, mV·ms	2.4 [1.2-4.1]	2 [0.9-3]	0.020
PTFV₁ ≥4 mV·ms, n (%)	21 (28.4%)	6 (7.5%)	0.001
Advanced IAB, n (%)	11 (15.3%)	13 (16.2%)	1.000
PACs per 24 h	107 [20-480]	25.5 [9-73.5]	<0.001
≥100 PACs per 24 h, n (%)	38 (54.3%)	16 (20.5%)	<0.001
Biochemical
BNP, pg/mL	22.5 [11.2-52.8]	15.8 [7.3-29.1]	0.005
cTnI, ng/L	4 [2-7]	3 [2-5]	0.012
Data presentation, statistical methods and abbreviations as per Supplementary Table 2.

For Tables 7 & 8 (Genetic Analysis):

# Filter extreme PRS groups
high_prs <- Athletes_NoCM %>% filter(!is.na(`AF decile`) & `AF decile` == 10)
low_prs <- Athletes_NoCM %>% filter(!is.na(`AF decile`) & `AF decile` == 1)

if (nrow(high_prs) > 0 && nrow(low_prs) > 0) {
  
  # Build comparison table
  supp_prs <- data.frame(Variable = character(), stringsAsFactors = FALSE)
  prs_combined <- rbind(
    high_prs %>% mutate(PRS_group = "High"), 
    low_prs %>% mutate(PRS_group = "Low")
  )
  
  for (var in all_variables) {
    if (!var %in% names(high_prs) || !var %in% names(low_prs)) next
    
    is_cat <- var %in% categorical_vars
    is_nonnorm <- var %in% nonnormal_vars
    
    supp_prs <- rbind(supp_prs, data.frame(
      Variable = var,
      High = format_stats(high_prs, var, is_cat, is_nonnorm),
      Low = format_stats(low_prs, var, is_cat, is_nonnorm),
      P = format_p_value(calculate_p_value(prs_combined, var, "PRS_group", is_cat, is_nonnorm)),
      stringsAsFactors = FALSE
    ))
  }
  
  # Assign categories and clean names
  supp_prs$Category <- sapply(supp_prs$Variable, assign_category)
  supp_prs$Variable <- sapply(supp_prs$Variable, clean_variable_names)
  
  # Order variables the same way as other tables
  supp_prs$Variable <- factor(supp_prs$Variable, levels = variable_order)
  supp_prs <- supp_prs %>% arrange(Variable)
  supp_prs$Variable <- as.character(supp_prs$Variable)
  
  # Table 7: Demographics and Exercise Capacity
  table7_cats <- c("Demographics", "Exercise Capacity")
  supp_7_cat <- supp_prs %>% 
    filter(Category %in% table7_cats) %>%
    mutate(Category = factor(Category, levels = table7_cats))
  
  # Table 8: Cardiac Parameters with same structure as Tables 2 and 6
  table8_cat_order <- c("Left ventricular parameters", "Left atrial parameters",
                        "Right Ventricular Parameters", "Doppler Parameters",
                        "Electrocardiographic", "Biochemical")
  
  supp_8_cat <- supp_prs %>% 
    filter(Category %in% table8_cat_order) %>%
    mutate(Category = factor(Category, levels = table8_cat_order))
  
  # Remove category column for display
  supp_7 <- supp_7_cat %>% select(-Category)
  supp_8 <- supp_8_cat %>% select(-Category)
  
  # Rename columns
  colnames(supp_7) <- c("Characteristic", 
                        paste0("AF-PRS ≥90th percentile (n=", nrow(high_prs), ")"),
                        paste0("AF-PRS <10th percentile (n=", nrow(low_prs), ")"), 
                        "p value")
  colnames(supp_8) <- colnames(supp_7)
  
  # Display tables
  cat("\n## Supplementary Table 7\n\n")
  print(kable(supp_7, 
              caption = "Supplementary Table 7. Extreme Genetic Risk Analysis: Baseline Characteristics",
              align = c("l", "c", "c", "c")) %>%
    kable_styling(bootstrap_options = c("striped", "hover", "condensed"), 
                  full_width = TRUE, font_size = 10) %>%
    pack_rows(index = table(supp_7_cat$Category)[table7_cats]) %>%
    add_footnote(c("Comparison of athletes in highest (≥90th percentile) versus lowest (<10th percentile) deciles of AF-PRS. Data presentation and statistical methods as per Supplementary Table 1.\nAbbreviations: As per Supplementary Table 1; AF, atrial fibrillation; PRS, polygenic risk score."),
                 notation = "none"))
  
  cat("\n## Supplementary Table 8\n\n")
  print(kable(supp_8, 
              caption = "Supplementary Table 8. Extreme Genetic Risk Analysis: Cardiac Parameters",
              align = c("l", "c", "c", "c")) %>%
    kable_styling(bootstrap_options = c("striped", "hover", "condensed"), 
                  full_width = TRUE, font_size = 10) %>%
    pack_rows(index = table(supp_8_cat$Category)[table8_cat_order]) %>%
    add_footnote(c("Data presentation and statistical methods as per Supplementary Table 2.\nAbbreviations: As per Supplementary Table 2."),
                 notation = "none"))
  
} else {
  cat("Insufficient PRS data for extreme risk analysis\n")
}

10.7 Supplementary Table 7

Supplementary Table 7. Extreme Genetic Risk Analysis: Baseline Characteristics
Characteristic	AF-PRS ≥90th percentile (n=48)	AF-PRS <10th percentile (n=29)	p value
Demographics
Age, years	45.5 [22.2-65.2]	42 [22-59]	0.666
Male sex, n (%)	40 (83.3%)	24 (82.8%)	1.000
BMI, kg/m²	24.2 [22.4-26.4]	24 [22.3-26.3]	0.785
BSA, m²	2 [1.9-2.1]	2 [1.8-2.1]	0.829
Average HR, bpm	67.9 ± 10.4	64.8 ± 7.4	0.153
Minimum HR, bpm	45 [40.2-49]	45 [39.5-48.5]	0.575
Bradycardia burden, % (24h)	2.1 [0.4-14]	4.5 [0.3-26]	0.351
Hypertension, n (%)	6 (13.3%)	2 (7.1%)	0.702
SBP, mmHg	130 [120.5-135]	123 [118-131]	0.161
DBP, mmHg	74 [62.5-79]	71 [62-76]	0.401
Dyslipidaemia, n (%)	7 (15.6%)	5 (17.9%)	1.000
Diabetes, n (%)	1 (2.2%)	0 (0%)	1.000
History of smoking, n (%)	6 (13.3%)	3 (10.7%)	1.000
Alcohol, drinks/week	0 [0-3]	3 [0-8]	0.132
Exercise Capacity
VO₂peak, mL/kg/min	45.5 ± 15	47.1 ± 13.2	0.624
VO₂peak, L/min	3548.8 ± 1089.1	3633.6 ± 988.2	0.727
% of predicted exercise capacity	119.6 ± 19.9	121.2 ± 20.7	0.750
Respiratory exchange ratio	1.3 ± 0.1	1.3 ± 0.1	0.291
Total weekly MET-hours	77 [46-105.8]	84 [58-104]	0.874
Years of endurance training	20 [9.2-30]	19 [15-20]	0.994
Comparison of athletes in highest (≥90th percentile) versus lowest (<10th percentile) deciles of AF-PRS. Data presentation and statistical methods as per Supplementary Table 1. Abbreviations: As per Supplementary Table 1; AF, atrial fibrillation; PRS, polygenic risk score.

10.8 Supplementary Table 8

Supplementary Table 8. Extreme Genetic Risk Analysis: Cardiac Parameters
Characteristic	AF-PRS ≥90th percentile (n=48)	AF-PRS <10th percentile (n=29)	p value
Left ventricular parameters
LVEDV, mL	160.9 ± 45.4	164.2 ± 32.6	0.714
LVESV, mL	68 ± 21.8	65 ± 17.5	0.518
LVEF, %	56.6 ± 6.1	58.9 ± 5.4	0.108
LV strain, %	-18.2 ± 2.6	-18.7 ± 2.1	0.412
Left atrial parameters
LAESV, mL	84.5 [71-98.2]	81 [72-95]	0.829
LAVi, mL/m²	43.6 [35.8-49.1]	42.7 [35.2-48.6]	0.946
Mildly abnormal >34 mL/m², n (%)	38 (79.2%)	22 (75.9%)	0.956
Severely abnormal >48 mL/m², n (%)	14 (29.2%)	8 (27.6%)	1.000
Extremely abnormal >60 mL/m², n (%)	4 (8.3%)	1 (3.4%)	0.645
LA:LV	0.53 [0.43-0.69]	0.49 [0.43-0.64]	0.444
Reservoir strain, %	28.8 ± 8.9	31 ± 7.3	0.262
Conduit strain, %	18.1 ± 6.7	19.5 ± 7.6	0.458
Contractile strain, %	10.7 ± 5.8	11.6 ± 4.8	0.496
Right Ventricular Parameters
RVEDV, mL	145.2 ± 40.9	131.6 ± 33.2	0.120
RVESV, mL	77.3 ± 20.9	68.7 ± 16.3	0.050
RVEF, %	45.8 ± 9.2	46.8 ± 9.8	0.666
Doppler Parameters
E velocity, m/s	0.7 ± 0.2	0.7 ± 0.2	0.321
A velocity, m/s	0.4 ± 0.1	0.4 ± 0.1	0.733
E/A ratio	1.7 ± 0.8	1.8 ± 1	0.813
E′ velocity, cm/s	10.2 ± 3.3	10 ± 3.2	0.783
E/E’ ratio	6 ± 1.6	5.7 ± 1.4	0.465
Electrocardiographic
P wave duration, ms	114 [104-129]	118 [110-125]	0.991
P-wave duration >120 ms, n (%)	20 (46.5%)	12 (41.4%)	0.851
P wave axis, °	56.1 ± 21	49.7 ± 31.8	0.359
PTFV₁, mV·ms	2 [1.2-3.4]	1.4 [0-2.8]	0.107
PTFV₁ ≥4 mV·ms, n (%)	9 (20.9%)	4 (13.8%)	0.541
Advanced IAB, n (%)	5 (12.2%)	0 (0%)	0.075
PACs per 24 h	34.5 [11.8-109.8]	16.5 [6-67.5]	0.127
≥100 PACs per 24 h, n (%)	12 (27.3%)	6 (22.2%)	0.846
Biochemical
BNP, pg/mL	12.6 [9.4-34.7]	12.4 [5.7-30.1]	0.438
cTnI, ng/L	3 [2-9]	2 [2-4]	0.489
Data presentation and statistical methods as per Supplementary Table 2. Abbreviations: As per Supplementary Table 2.

11 Supplementary Figures

11.1 Supplementary Figure 1

Supplementary Figure 1. Distribution of Clinical Risk Scores by AF Status

if (!exists("cutoffs")) {
  prob_thresholds <- c(0.10, 0.40, 0.70)
  cutoffs <- round(map_dbl(prob_thresholds, 
    ~min(Athletes_analysis$clinical_score_B[Athletes_analysis$predicted_prob_B >= .x], na.rm = TRUE)))
}

ggplot(Athletes_analysis, aes(x = clinical_score_B_rounded,
                              fill = factor(AF, labels = c("No AF", "AF")))) +
  geom_histogram(binwidth = 1, position = "dodge", 
                 alpha = 0.8, color = "black") +
  # Use dynamic cutoffs instead of hardcoded values
  geom_segment(aes(x = cutoffs[1], xend = cutoffs[1], y = 0, yend = Inf),
               linetype = "dashed", color = "black", linewidth = 0.8) +
  geom_segment(aes(x = cutoffs[2], xend = cutoffs[2], y = 0, yend = Inf),
               linetype = "dashed", color = "black", linewidth = 0.8) +
  geom_segment(aes(x = cutoffs[3], xend = cutoffs[3], y = 0, yend = Inf),
               linetype = "dashed", color = "black", linewidth = 0.8) +
  annotate("text", 
           x = c(cutoffs[1]/2, 
                 mean(c(cutoffs[1], cutoffs[2])), 
                 mean(c(cutoffs[2], cutoffs[3])), 
                 cutoffs[3] + 3), 
           y = -2, 
           label = c("Low", "Moderate", "High", "Very High"),
           size = 3.5, fontface = "italic") +
  
  scale_fill_manual(values = c("No AF" = "#0072B2", "AF" = "#990000")) +
  scale_y_continuous(expand = expansion(mult = c(0.08, 0.12))) +
  labs(x = "Clinical Risk Score",
       y = "Number of Athletes",
       fill = "AF Status") +
  coord_cartesian(clip = "off") +
  theme_minimal(base_size = 14) +
  theme(legend.position = "bottom",
        plot.margin = margin(t = 10, r = 10, b = 25, l = 10))

Legend = Distribution of Clinical Risk Scores by AF Status

11.2 Supplementary Figure 2

Supplementary Figure 2. AF prevalence by risk category

risk_prev <- Athletes_analysis %>%
  group_by(risk_category) %>%
  summarise(
    N = n(),
    AF_Cases = sum(AF),
    AF_Prevalence = mean(AF) * 100,
    CI_lower = binom.test(AF_Cases, N)$conf.int[1] * 100,
    CI_upper = binom.test(AF_Cases, N)$conf.int[2] * 100,
    .groups = "drop"
  )

ggplot(risk_prev, aes(x = risk_category, y = AF_Prevalence)) +
  geom_col(fill = "#6a994e", alpha = 0.8) +
  geom_errorbar(aes(ymin = CI_lower, ymax = CI_upper),
                width = 0.2, size = 0.6) +
  geom_text(aes(y = CI_upper, label = paste0(round(AF_Prevalence, 1), "%")),
            vjust = -2.5, size = 4, color = "black") +
  geom_text(aes(y = CI_upper, label = paste0("(", AF_Cases, "/", N, ")")),
            vjust = -1.0, size = 4, color = "black") +
  scale_y_continuous(expand = expansion(mult = c(0, 0.15))) +
  labs(x = "Risk Category",
       y = "AF Prevalence (%)") +
  theme_minimal(base_size = 14) +
  theme(panel.grid.major.x = element_blank())

11.3 Supplementary Figure 3

Supplementary Figure 3. Profile of Atrial Remodelling Markers by Clinical Risk Category

if (!"risk_category" %in% names(Athletes_analysis)) {
  
  # Define the probability thresholds
  prob_thresholds <- c(0.10, 0.40, 0.70)
  
  # Calculate the corresponding clinical score cutoffs dynamically
  cutoffs <- round(purrr::map_dbl(prob_thresholds, 
      ~min(Athletes_analysis$clinical_score_B[Athletes_analysis$predicted_prob_B >= .x], na.rm = TRUE)))
      
  # Define the breaks for the cut function
  score_breaks <- c(-Inf, cutoffs, Inf)

  # Create the risk categories using the rounded integer score for consistency
  Athletes_analysis <- Athletes_analysis %>%
    mutate(
      clinical_score_B_rounded = round(clinical_score_B),
      risk_category = cut(clinical_score_B_rounded, breaks = score_breaks,
                          labels = c("Low", "Moderate", "High", "Very High"),
                          include.lowest = TRUE)
    )
}

# Prepare radar chart data with Age >45 as binary prevalence
radar_data_C <- Athletes_analysis %>%
  group_by(risk_category) %>%
  summarise(
    `Age >45 years (%)` = mean(Age > 45, na.rm = TRUE) * 100,
    `SBP ≥130 mmHg (%)` = mean(High_normal_BP, na.rm = TRUE) * 100,
    `LAVi 35-60 mL/m² (%)` = mean(LAVi_34, na.rm = TRUE) * 100,
    `LAVi >60 mL/m² (%)` = mean(LAVi_60, na.rm = TRUE) * 100,
    `P wave >120ms (%)` = mean(P_wave_120, na.rm = TRUE) * 100,
    `PTFV1 ≥4.0 mV·ms (%)` = mean(PTFV1_high, na.rm = TRUE) * 100,
    `PACs ≥100/24h (%)` = mean(PAC_100, na.rm = TRUE) * 100,
    .groups = "drop"
  )

# Convert to data frame with row names
radar_df <- as.data.frame(radar_data_C[, -1])
rownames(radar_df) <- radar_data_C$risk_category

# Add max and min rows for fmsb package (all variables now 0-100%)
max_row <- rep(100, ncol(radar_df))
min_row <- rep(0, ncol(radar_df))
radar_chart_data <- rbind(max_row, min_row, radar_df)

# Define colors for each risk category
colors_border <- c("#0072B2", "#67A9CF", "#F4A582", "#C00000")
colors_fill <- adjustcolor(colors_border, alpha.f = 0.3)

# Set up 2x2 plotting layout
par(mfrow = c(2, 2), mar = c(1, 1, 3, 1))

# Create radar chart for each risk category
for (i in 1:4) {
  radarchart(
    radar_chart_data[c(1, 2, i + 2), ],
    axistype = 1,
    pcol = colors_border[i],
    pfcol = colors_fill[i],
    plwd = 2,
    cglcol = "grey",
    cglty = 1,
    cglwd = 0.8,
    axislabcol = "grey",
    vlcex = 0.9,
    title = rownames(radar_chart_data)[i + 2],
    title.col = "black",
    cex.main = 1.5
  )
}

# Reset plotting parameters
par(mfrow = c(1, 1))

11.4 Supplementary Figure 4

Supplementary Figure 4. Clinical risk score cutoffs corresponding to predicted AF probability thresholds

# Define the probability thresholds used for risk stratification in Table 4
prob_thresholds <- c(0.10, 0.40, 0.70)

# Calculate the corresponding clinical score cutoffs dynamically
# This ensures the vertical lines in the plot match the table's risk categories
cutoffs <- round(purrr::map_dbl(prob_thresholds, 
    ~min(Athletes_analysis$clinical_score_B[Athletes_analysis$predicted_prob_B >= .x], na.rm = TRUE)))

# Generate the plot
ggplot(Athletes_analysis, aes(x = clinical_score_B, 
                             y = predicted_prob_B * 100)) +
  geom_point(aes(color = factor(AF, labels = c("No AF", "AF"))), 
             alpha = 0.6) +
  geom_smooth(method = "loess", se = TRUE, color = "black") +
  # Use the thresholds variable for the horizontal lines
  geom_hline(yintercept = prob_thresholds * 100, 
             linetype = "dashed", alpha = 0.5) +
  # Use the calculated cutoffs variable for the vertical lines
  geom_vline(xintercept = cutoffs, 
             linetype = "dashed", alpha = 0.5) +
  scale_color_manual(values = c("No AF" = "#0072B2", "AF" = "#990000")) +
  labs(x = "Clinical Risk Score",
       y = "Predicted AF Probability (%)",
       color = "AF Status") +
  theme_minimal(base_size = 14) +
  theme(legend.position = "bottom")

Legend: Predicted AF probabilities were stratified into four probability thresholds: <10%, 10-40%, 40-70%, and >70%.

Using atrial myopathy metrics to devise a risk prediction tool for atrial fibrillation in endurance athletes

October 2025