1 Universidad Autónoma de Manizales, Manizales, Colombia
2 Fundación Universitaria del Área Andina, Seccional Pereira, Colombia

This document contains the complete R script used for data cleaning, scoring, and descriptive analysis of the Canadian Assessment of Physical Literacy – second edition (CAPL-2) in a sample of Colombian schoolchildren. The script is provided to ensure transparency, reproducibility, and traceability of all analytical procedures reported in the manuscript.

#########################################################
## CAPL-2 – COMPLETE SCRIPT (PEDOMETER + QUESTIONNAIRES)
## Supplementary Material 1
## 
## Purpose:
## - Import raw CAPL-2 data from an Excel template.
## - Clean and recode variables to CAPL standard format.
## - Compute all CAPL-2 domain scores using the capl package.
## - Export results and generate descriptive plots.
##
## Requirements:
## - R version >= 4.3.2
## - Packages: capl, readxl, dplyr, stringr, tidyr, writexl, ggplot2
## - Raw data file: "Plantilla_CAPL2_ (2).xlsx" in the working directory.
#########################################################

rm(list = ls())
gc()

## 0. Set working directory (edit to your local path) -----------------------
## setwd("~/CAPL2_validation_article")

library(capl)
library(readxl)
library(dplyr)
library(stringr)
library(tidyr)
library(writexl)

## 1. Import raw data from Excel -------------------------------------------
raw_data <- import_capl_data(
  file_path  = "Plantilla_CAPL2_ (2).xlsx",
  sheet_name = "Sheet1"
)

# names(raw_data)   # uncomment if you want to inspect variable names

## 2. Helper function to keep times as "HH:MM" -----------------------------
fix_time_char <- function(x) {
  # If it comes as POSIXct: keep only hour and minutes
  if (inherits(x, "POSIXct") || inherits(x, "POSIXt")) {
    return(format(x, "%H:%M"))
  }
  # If it comes as numeric (Excel time format)
  if (is.numeric(x)) {
    return(format(
      as.POSIXct(x * 86400, origin = "1899-12-30", tz = "UTC"),
      "%H:%M"
    ))
  }
  # If it is already character
  x_chr <- as.character(x)
  x_chr <- str_trim(x_chr)
  x_chr[x_chr %in% c("", "NA", "na")] <- NA
  # Extract last hh:mm(:ss) pattern
  x_chr <- ifelse(
    !is.na(x_chr),
    sub(".*?(\\d{1,2}:\\d{2}(:\\d{2})?)", "\\1", x_chr),
    NA
  )
  x_chr
}

## 3. Rename variables to CAPL standard names ------------------------------
## Note: Spanish labels correspond to the original Colombian data template.
capl_raw <- raw_data %>%
  rename(
    id     = id,
    age    = Edad,
    gender = Sexo,

    pacer_lap_distance = `Distancia del test PACER (15 o 20 m)`,
    pacer_laps         = `Vueltas PACER`,
    plank_time         = `Tiempo plancha (segundos)`,

    camsa_skill_score1 = `Puntaje CAMSA intento 1`,
    camsa_time1        = `Tiempo CAMSA intento 1 (seg)`,
    camsa_skill_score2 = `Puntaje CAMSA intento 2`,
    camsa_time2        = `Tiempo CAMSA intento 2 (seg)`,

    steps1 = `Pasos día 1`,
    steps2 = `Pasos día 2`,
    steps3 = `Pasos día 3`,
    steps4 = `Pasos día 4`,
    steps5 = `Pasos día 5`,
    steps6 = `Pasos día 6`,
    steps7 = `Pasos día 7`,

    time_on1  = `Hora inicio día 1`,
    time_on2  = `Hora inicio día 2`,
    time_on3  = `Hora inicio día 3`,
    time_on4  = `Hora inicio día 4`,
    time_on5  = `Hora inicio día 5`,
    time_on6  = `Hora inicio día 6`,
    time_on7  = `Hora inicio día 7`,

    time_off1 = `Hora fin día 1`,
    time_off2 = `Hora fin día 2`,
    time_off3 = `Hora fin día 3`,
    time_off4 = `Hora fin día 4`,
    time_off5 = `Hora fin día 5`,
    time_off6 = `Hora fin día 6`,
    time_off7 = `Hora fin día 7`,

    non_wear_time1 = `Minutos sin uso día 1`,
    non_wear_time2 = `Minutos sin uso día 2`,
    non_wear_time3 = `Minutos sin uso día 3`,
    non_wear_time4 = `Minutos sin uso día 4`,
    non_wear_time5 = `Minutos sin uso día 5`,
    non_wear_time6 = `Minutos sin uso día 6`,
    non_wear_time7 = `Minutos sin uso día 7`,

    self_report_pa = `Días activos últimos 7 (≥60 min)`,

    # CSAPPA
    csappa1 = csappa1,
    csappa2 = csappa2,
    csappa3 = csappa3,
    csappa4 = csappa4,
    csappa5 = csappa5,
    csappa6 = csappa6,

    # Intrinsic motivation
    why_active1 = `Ser activo es divertido`,
    why_active2 = `Disfruto estar activo`,
    why_active3 = `Me gusta estar activo`,

    # Perceived competence
    feelings_about_pa1 = `Soy bueno en juegos activos`,
    feelings_about_pa2 = `Hago bien las actividades`,
    feelings_about_pa3 = `Tengo buenas habilidades`,

    # Knowledge and understanding
    pa_guideline = `Guía de actividad física`,
    crf_means    = `Significado de resistencia cardiorrespiratoria`,
    ms_means     = `Significado de fuerza muscular`,
    sports_skill = `Mejorar una habilidad deportiva`,

    pa_is             = `La PA es,,,`,
    pa_is_also        = `La PA también es,,,`,
    improve           = `Mejorar,,,`,
    increase          = `Incrementar,,,`,
    when_cooling_down = `Al enfriarse hace,,,`,
    heart_rate        = `Frecuencia cardíaca =`
  )

## 4. Clean types (numeric / character) ------------------------------------
capl_raw <- capl_raw %>%
  mutate(
    # age
    age = as.numeric(age),

    # gender -> 'boy' / 'girl'
    gender = case_when(
      gender %in% c("M", "m", "Masculino", "masculino",
                    "Hombre", "hombre", "Niño", "niño",
                    "Boy", "boy", 1, "1") ~ "boy",
      gender %in% c("F", "f", "Femenino", "femenino",
                    "Mujer", "mujer", "Niña", "niña",
                    "Girl", "girl", 2, "2") ~ "girl",
      TRUE ~ NA_character_
    ),

    pacer_lap_distance = as.numeric(pacer_lap_distance),
    pacer_laps         = as.numeric(pacer_laps),
    plank_time         = as.numeric(plank_time),

    camsa_skill_score1 = as.numeric(camsa_skill_score1),
    camsa_time1        = as.numeric(camsa_time1),
    camsa_skill_score2 = as.numeric(camsa_skill_score2),
    camsa_time2        = as.numeric(camsa_time2),

    across(starts_with("steps"),
           ~ .x |> as.character() |> str_trim() |> na_if("") |> as.numeric()),

    # Remove impossible step counts > 30000
    across(starts_with("steps"),
           ~ ifelse(.x > 30000, NA, .x)),

    # Non-wear minutes
    across(
      starts_with("non_wear_time"),
      ~ .x |>
        as.character() |>
        str_trim() |>
        na_if("") |>
        na_if("na") |>
        na_if("NA") |>
        na_if("Na") |>
        na_if("no") |>
        as.numeric()
    ),

    self_report_pa = as.numeric(self_report_pa),

    across(starts_with("csappa"), as.numeric),
    across(c(why_active1:heart_rate), as.numeric)
  ) %>%
  # Convert on/off times to "HH:MM" character format
  mutate(
    across(starts_with("time_on"),  fix_time_char),
    across(starts_with("time_off"), fix_time_char)
  )

## 5. Add missing CAPL variables (if required by capl) ---------------------
capl_raw <- get_missing_capl_variables(capl_raw)

## 6. Compute all CAPL-2 scores --------------------------------------------
capl_results <- get_capl(capl_raw, sort = "asis")

## 7. Quick check of pedometer + domain statuses ---------------------------
capl_results %>%
  select(
    id, age, gender,
    valid_days, step_average, step_score,
    pc_score, pc_status,
    db_score, db_status,
    mc_score, mc_status,
    ku_score, ku_status,
    capl_score, capl_interpretation, capl_status
  ) %>%
  head()

## 8. Export full results to Excel -----------------------------------------
write_xlsx(capl_results, "CAPL2_final_results.xlsx")

## 9. Descriptive plots -----------------------------------------------------
library(ggplot2)

## 9.1. Keep only children with complete CAPL status -----------------------
capl_complete <- capl_results %>%
  filter(capl_status == "complete")

## 9.2. Order and recode interpretations in English ------------------------
capl_levels <- c("beginning", "progressing", "achieving", "excelling")

capl_complete <- capl_complete %>%
  mutate(
    capl_interpretation = factor(capl_interpretation,
                                 levels = capl_levels),

    # Short English labels for plots
    capl_interpretation_short = dplyr::recode(
      capl_interpretation,
      "beginning"   = "Beginning",
      "progressing" = "Progressing",
      "achieving"   = "Achieving",
      "excelling"   = "Excelling",
      .default = NA_character_
    ),

    # Longer English descriptions (optional for tables/text)
    capl_interpretation_long = dplyr::recode(
      capl_interpretation,
      "beginning"   = "BEGINNING\nBelow the recommended physical literacy level.",
      "progressing" = "PROGRESSING\nApproaching the level of peers of the same age.",
      "achieving"   = "ACHIEVING\nMeets the minimum recommended level.",
      "excelling"   = "EXCELLING\nExceeds the minimum recommended level.",
      .default = NA_character_
    ),

    sex_en = dplyr::recode(
      gender,
      "boy"  = "Boy",
      "girl" = "Girl",
      .default = NA_character_
    )
  )

## 9.3. Histogram of total CAPL-2 score ------------------------------------
ggplot(capl_complete, aes(x = capl_score)) +
  geom_histogram(binwidth = 5, color = "black", fill = "grey80") +
  geom_vline(aes(xintercept = mean(capl_score, na.rm = TRUE)),
             linetype = "dashed", linewidth = 1) +
  labs(
    title = "Distribution of total physical literacy score (CAPL-2)",
    x = "Total CAPL-2 score",
    y = "Frequency"
  ) +
  theme_minimal(base_size = 12)

## 9.4. Bar plot: CAPL categories by sex -----------------------------------
ggplot(capl_complete,
       aes(x = capl_interpretation_short, fill = sex_en)) +
  geom_bar(position = "dodge", color = "black") +
  labs(
    title = "CAPL-2 categories by sex",
    x = "CAPL-2 category",
    y = "Number of children",
    fill = "Sex"
  ) +
  theme_minimal(base_size = 12)

## 10. Domain-level plots ---------------------------------------------------

## 10.1. Keep children with at least one complete domain -------------------
capl_domains <- capl_results %>%
  filter(pc_status == "complete" |
           db_status == "complete" |
           mc_status == "complete" |
           ku_status == "complete") %>%
  mutate(
    sex_en = recode(gender, "boy" = "Boy", "girl" = "Girl")
  )

## 10.2. Reshape domains to long format ------------------------------------
domains_long <- capl_domains %>%
  select(id, sex_en,
         pc_score, db_score, mc_score, ku_score) %>%
  pivot_longer(
    cols = c(pc_score, db_score, mc_score, ku_score),
    names_to = "domain",
    values_to = "score"
  ) %>%
  mutate(
    domain_en = recode(
      domain,
      "pc_score" = "Physical competence (PC)",
      "db_score" = "Daily behavior (DB)",
      "mc_score" = "Motivation and confidence (MC)",
      "ku_score" = "Knowledge and understanding (KU)"
    )
  )

## 10.3. Boxplot: domain scores --------------------------------------------
ggplot(domains_long, aes(x = domain_en, y = score)) +
  geom_boxplot(fill = "grey85") +
  labs(
    title = "Distribution of scores by physical literacy domain",
    x = "Domain",
    y = "Score"
  ) +
  theme_minimal(base_size = 12) +
  theme(axis.text.x = element_text(angle = 20, hjust = 1))

## 10.4. Boxplot: domain scores by sex -------------------------------------
ggplot(domains_long,
       aes(x = domain_en, y = score, fill = sex_en)) +
  geom_boxplot(position = position_dodge(width = 0.8)) +
  labs(
    title = "CAPL-2 domain scores by sex",
    x = "Domain",
    y = "Score",
    fill = "Sex"
  ) +
  theme_minimal(base_size = 12) +
  theme(axis.text.x = element_text(angle = 20, hjust = 1))

## 11. Daily steps by sex (pedometer domain) --------------------------------
capl_results %>%
  filter(db_status == "complete") %>%
  mutate(
    sex_en = dplyr::recode(
      gender,
      "boy"  = "Boy",
      "girl" = "Girl",
      .default = NA_character_
    )
  ) %>%
  ggplot(aes(x = sex_en, y = step_average)) +
  geom_boxplot(fill = "grey85") +
  labs(
    title = "Average valid daily steps by sex",
    x = "Sex",
    y = "Steps/day"
  ) +
  theme_minimal(base_size = 12)