1 Universidad Autónoma de Manizales, Manizales,
Colombia
2 Fundación Universitaria del Área Andina, Seccional Pereira,
Colombia
This document contains the complete R script used for data cleaning, scoring, and descriptive analysis of the Canadian Assessment of Physical Literacy – second edition (CAPL-2) in a sample of Colombian schoolchildren. The script is provided to ensure transparency, reproducibility, and traceability of all analytical procedures reported in the manuscript.
#########################################################
## CAPL-2 – COMPLETE SCRIPT (PEDOMETER + QUESTIONNAIRES)
## Supplementary Material 1
##
## Purpose:
## - Import raw CAPL-2 data from an Excel template.
## - Clean and recode variables to CAPL standard format.
## - Compute all CAPL-2 domain scores using the capl package.
## - Export results and generate descriptive plots.
##
## Requirements:
## - R version >= 4.3.2
## - Packages: capl, readxl, dplyr, stringr, tidyr, writexl, ggplot2
## - Raw data file: "Plantilla_CAPL2_ (2).xlsx" in the working directory.
#########################################################
rm(list = ls())
gc()
## 0. Set working directory (edit to your local path) -----------------------
## setwd("~/CAPL2_validation_article")
library(capl)
library(readxl)
library(dplyr)
library(stringr)
library(tidyr)
library(writexl)
## 1. Import raw data from Excel -------------------------------------------
raw_data <- import_capl_data(
file_path = "Plantilla_CAPL2_ (2).xlsx",
sheet_name = "Sheet1"
)
# names(raw_data) # uncomment if you want to inspect variable names
## 2. Helper function to keep times as "HH:MM" -----------------------------
fix_time_char <- function(x) {
# If it comes as POSIXct: keep only hour and minutes
if (inherits(x, "POSIXct") || inherits(x, "POSIXt")) {
return(format(x, "%H:%M"))
}
# If it comes as numeric (Excel time format)
if (is.numeric(x)) {
return(format(
as.POSIXct(x * 86400, origin = "1899-12-30", tz = "UTC"),
"%H:%M"
))
}
# If it is already character
x_chr <- as.character(x)
x_chr <- str_trim(x_chr)
x_chr[x_chr %in% c("", "NA", "na")] <- NA
# Extract last hh:mm(:ss) pattern
x_chr <- ifelse(
!is.na(x_chr),
sub(".*?(\\d{1,2}:\\d{2}(:\\d{2})?)", "\\1", x_chr),
NA
)
x_chr
}
## 3. Rename variables to CAPL standard names ------------------------------
## Note: Spanish labels correspond to the original Colombian data template.
capl_raw <- raw_data %>%
rename(
id = id,
age = Edad,
gender = Sexo,
pacer_lap_distance = `Distancia del test PACER (15 o 20 m)`,
pacer_laps = `Vueltas PACER`,
plank_time = `Tiempo plancha (segundos)`,
camsa_skill_score1 = `Puntaje CAMSA intento 1`,
camsa_time1 = `Tiempo CAMSA intento 1 (seg)`,
camsa_skill_score2 = `Puntaje CAMSA intento 2`,
camsa_time2 = `Tiempo CAMSA intento 2 (seg)`,
steps1 = `Pasos día 1`,
steps2 = `Pasos día 2`,
steps3 = `Pasos día 3`,
steps4 = `Pasos día 4`,
steps5 = `Pasos día 5`,
steps6 = `Pasos día 6`,
steps7 = `Pasos día 7`,
time_on1 = `Hora inicio día 1`,
time_on2 = `Hora inicio día 2`,
time_on3 = `Hora inicio día 3`,
time_on4 = `Hora inicio día 4`,
time_on5 = `Hora inicio día 5`,
time_on6 = `Hora inicio día 6`,
time_on7 = `Hora inicio día 7`,
time_off1 = `Hora fin día 1`,
time_off2 = `Hora fin día 2`,
time_off3 = `Hora fin día 3`,
time_off4 = `Hora fin día 4`,
time_off5 = `Hora fin día 5`,
time_off6 = `Hora fin día 6`,
time_off7 = `Hora fin día 7`,
non_wear_time1 = `Minutos sin uso día 1`,
non_wear_time2 = `Minutos sin uso día 2`,
non_wear_time3 = `Minutos sin uso día 3`,
non_wear_time4 = `Minutos sin uso día 4`,
non_wear_time5 = `Minutos sin uso día 5`,
non_wear_time6 = `Minutos sin uso día 6`,
non_wear_time7 = `Minutos sin uso día 7`,
self_report_pa = `Días activos últimos 7 (≥60 min)`,
# CSAPPA
csappa1 = csappa1,
csappa2 = csappa2,
csappa3 = csappa3,
csappa4 = csappa4,
csappa5 = csappa5,
csappa6 = csappa6,
# Intrinsic motivation
why_active1 = `Ser activo es divertido`,
why_active2 = `Disfruto estar activo`,
why_active3 = `Me gusta estar activo`,
# Perceived competence
feelings_about_pa1 = `Soy bueno en juegos activos`,
feelings_about_pa2 = `Hago bien las actividades`,
feelings_about_pa3 = `Tengo buenas habilidades`,
# Knowledge and understanding
pa_guideline = `Guía de actividad física`,
crf_means = `Significado de resistencia cardiorrespiratoria`,
ms_means = `Significado de fuerza muscular`,
sports_skill = `Mejorar una habilidad deportiva`,
pa_is = `La PA es,,,`,
pa_is_also = `La PA también es,,,`,
improve = `Mejorar,,,`,
increase = `Incrementar,,,`,
when_cooling_down = `Al enfriarse hace,,,`,
heart_rate = `Frecuencia cardíaca =`
)
## 4. Clean types (numeric / character) ------------------------------------
capl_raw <- capl_raw %>%
mutate(
# age
age = as.numeric(age),
# gender -> 'boy' / 'girl'
gender = case_when(
gender %in% c("M", "m", "Masculino", "masculino",
"Hombre", "hombre", "Niño", "niño",
"Boy", "boy", 1, "1") ~ "boy",
gender %in% c("F", "f", "Femenino", "femenino",
"Mujer", "mujer", "Niña", "niña",
"Girl", "girl", 2, "2") ~ "girl",
TRUE ~ NA_character_
),
pacer_lap_distance = as.numeric(pacer_lap_distance),
pacer_laps = as.numeric(pacer_laps),
plank_time = as.numeric(plank_time),
camsa_skill_score1 = as.numeric(camsa_skill_score1),
camsa_time1 = as.numeric(camsa_time1),
camsa_skill_score2 = as.numeric(camsa_skill_score2),
camsa_time2 = as.numeric(camsa_time2),
across(starts_with("steps"),
~ .x |> as.character() |> str_trim() |> na_if("") |> as.numeric()),
# Remove impossible step counts > 30000
across(starts_with("steps"),
~ ifelse(.x > 30000, NA, .x)),
# Non-wear minutes
across(
starts_with("non_wear_time"),
~ .x |>
as.character() |>
str_trim() |>
na_if("") |>
na_if("na") |>
na_if("NA") |>
na_if("Na") |>
na_if("no") |>
as.numeric()
),
self_report_pa = as.numeric(self_report_pa),
across(starts_with("csappa"), as.numeric),
across(c(why_active1:heart_rate), as.numeric)
) %>%
# Convert on/off times to "HH:MM" character format
mutate(
across(starts_with("time_on"), fix_time_char),
across(starts_with("time_off"), fix_time_char)
)
## 5. Add missing CAPL variables (if required by capl) ---------------------
capl_raw <- get_missing_capl_variables(capl_raw)
## 6. Compute all CAPL-2 scores --------------------------------------------
capl_results <- get_capl(capl_raw, sort = "asis")
## 7. Quick check of pedometer + domain statuses ---------------------------
capl_results %>%
select(
id, age, gender,
valid_days, step_average, step_score,
pc_score, pc_status,
db_score, db_status,
mc_score, mc_status,
ku_score, ku_status,
capl_score, capl_interpretation, capl_status
) %>%
head()
## 8. Export full results to Excel -----------------------------------------
write_xlsx(capl_results, "CAPL2_final_results.xlsx")
## 9. Descriptive plots -----------------------------------------------------
library(ggplot2)
## 9.1. Keep only children with complete CAPL status -----------------------
capl_complete <- capl_results %>%
filter(capl_status == "complete")
## 9.2. Order and recode interpretations in English ------------------------
capl_levels <- c("beginning", "progressing", "achieving", "excelling")
capl_complete <- capl_complete %>%
mutate(
capl_interpretation = factor(capl_interpretation,
levels = capl_levels),
# Short English labels for plots
capl_interpretation_short = dplyr::recode(
capl_interpretation,
"beginning" = "Beginning",
"progressing" = "Progressing",
"achieving" = "Achieving",
"excelling" = "Excelling",
.default = NA_character_
),
# Longer English descriptions (optional for tables/text)
capl_interpretation_long = dplyr::recode(
capl_interpretation,
"beginning" = "BEGINNING\nBelow the recommended physical literacy level.",
"progressing" = "PROGRESSING\nApproaching the level of peers of the same age.",
"achieving" = "ACHIEVING\nMeets the minimum recommended level.",
"excelling" = "EXCELLING\nExceeds the minimum recommended level.",
.default = NA_character_
),
sex_en = dplyr::recode(
gender,
"boy" = "Boy",
"girl" = "Girl",
.default = NA_character_
)
)
## 9.3. Histogram of total CAPL-2 score ------------------------------------
ggplot(capl_complete, aes(x = capl_score)) +
geom_histogram(binwidth = 5, color = "black", fill = "grey80") +
geom_vline(aes(xintercept = mean(capl_score, na.rm = TRUE)),
linetype = "dashed", linewidth = 1) +
labs(
title = "Distribution of total physical literacy score (CAPL-2)",
x = "Total CAPL-2 score",
y = "Frequency"
) +
theme_minimal(base_size = 12)
## 9.4. Bar plot: CAPL categories by sex -----------------------------------
ggplot(capl_complete,
aes(x = capl_interpretation_short, fill = sex_en)) +
geom_bar(position = "dodge", color = "black") +
labs(
title = "CAPL-2 categories by sex",
x = "CAPL-2 category",
y = "Number of children",
fill = "Sex"
) +
theme_minimal(base_size = 12)
## 10. Domain-level plots ---------------------------------------------------
## 10.1. Keep children with at least one complete domain -------------------
capl_domains <- capl_results %>%
filter(pc_status == "complete" |
db_status == "complete" |
mc_status == "complete" |
ku_status == "complete") %>%
mutate(
sex_en = recode(gender, "boy" = "Boy", "girl" = "Girl")
)
## 10.2. Reshape domains to long format ------------------------------------
domains_long <- capl_domains %>%
select(id, sex_en,
pc_score, db_score, mc_score, ku_score) %>%
pivot_longer(
cols = c(pc_score, db_score, mc_score, ku_score),
names_to = "domain",
values_to = "score"
) %>%
mutate(
domain_en = recode(
domain,
"pc_score" = "Physical competence (PC)",
"db_score" = "Daily behavior (DB)",
"mc_score" = "Motivation and confidence (MC)",
"ku_score" = "Knowledge and understanding (KU)"
)
)
## 10.3. Boxplot: domain scores --------------------------------------------
ggplot(domains_long, aes(x = domain_en, y = score)) +
geom_boxplot(fill = "grey85") +
labs(
title = "Distribution of scores by physical literacy domain",
x = "Domain",
y = "Score"
) +
theme_minimal(base_size = 12) +
theme(axis.text.x = element_text(angle = 20, hjust = 1))
## 10.4. Boxplot: domain scores by sex -------------------------------------
ggplot(domains_long,
aes(x = domain_en, y = score, fill = sex_en)) +
geom_boxplot(position = position_dodge(width = 0.8)) +
labs(
title = "CAPL-2 domain scores by sex",
x = "Domain",
y = "Score",
fill = "Sex"
) +
theme_minimal(base_size = 12) +
theme(axis.text.x = element_text(angle = 20, hjust = 1))
## 11. Daily steps by sex (pedometer domain) --------------------------------
capl_results %>%
filter(db_status == "complete") %>%
mutate(
sex_en = dplyr::recode(
gender,
"boy" = "Boy",
"girl" = "Girl",
.default = NA_character_
)
) %>%
ggplot(aes(x = sex_en, y = step_average)) +
geom_boxplot(fill = "grey85") +
labs(
title = "Average valid daily steps by sex",
x = "Sex",
y = "Steps/day"
) +
theme_minimal(base_size = 12)