install_if_needed <- function(pkgs) {
to_install <- pkgs[!pkgs %in% rownames(installed.packages())]
if (length(to_install)) install.packages(to_install, dependencies = TRUE)
}
req <- c("tidyverse","readxl","janitor","stringr","forcats","gt","gtsummary","ggpubr","rstatix","fs","broom")
install_if_needed(req)
invisible(lapply(req, library, character.only = TRUE))
## Warning: package 'tidyverse' was built under R version 4.4.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
## Warning: package 'readxl' was built under R version 4.4.3
## Warning: package 'janitor' was built under R version 4.4.3
##
## Adjuntando el paquete: 'janitor'
##
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
##
##
## Adjuntando el paquete: 'rstatix'
##
## The following object is masked from 'package:janitor':
##
## make_clean_names
##
## The following object is masked from 'package:stats':
##
## filter
options(width = 120, scipen = 999)
fs::dir_create("outputs/tables"); fs::dir_create("outputs/figures"); fs::dir_create("outputs/logs")
excel_path <- "C:/Users/fidel/OneDrive - UNIVERSIDAD AUTONOMA DE SINALOA/MDATOS/trabajo nuevocotix/Pacientes_Fractura_Filtrados_con_ID (2).xlsx"
stopifnot(file.exists(excel_path))
raw <- readxl::read_excel(excel_path, sheet = 1) %>% janitor::clean_names()
nm <- names(raw)
col_tx_cons <- grep("tratamiento.*conserv", nm, ignore.case = TRUE, value = TRUE)[1]
col_tx_quir <- grep("tratamiento.*quir", nm, ignore.case = TRUE, value = TRUE)[1]
if (is.na(col_tx_cons) || is.na(col_tx_quir)) {
# fallback estricto: últimas 2 columnas
last2 <- tail(nm, 2)
col_tx_cons <- last2[1]; col_tx_quir <- last2[2]
message(sprintf("[Aviso] No se encontraron columnas de tratamiento por nombre. Se asumen las últimas dos: %s, %s", col_tx_cons, col_tx_quir))
}
normalize_text <- function(x) stringr::str_to_upper(stringr::str_squish(stringr::str_trim(as.character(x))), locale = "es")
x <- raw %>%
mutate(across(where(is.character), ~na_if(.x, ""))) %>%
# Renombres suaves si existen
rename(
id = any_of(c("id_paciente","id")),
edad = any_of(c("edad")),
sexo = any_of(c("sexo")),
t_score = any_of(c("t_score","tscore","t_score_dxa")),
segmento_fractura_afectado = any_of(c("segmento_fractura_afectado","segmento_afectado","segmento")),
nivel_fractura_afectado = any_of(c("nivel_fractura_afectado","nivel")),
clas_ao_spine = any_of(c("clasificacion_ao_spine_of","clas_ao","clas_ao_spine_of"))
) %>%
mutate(
# Sexo F/M
sexo = factor(case_when(
normalize_text(sexo) %in% c("F","FEMENINO") ~ "F",
normalize_text(sexo) %in% c("M","MASCULINO") ~ "M",
TRUE ~ NA_character_
), levels = c("F","M")),
# Numéricos
edad = suppressWarnings(as.numeric(edad)),
t_score = suppressWarnings(as.numeric(t_score)),
# Segmento afectado (torácica/lumbar)
segmento_afectado = case_when(
str_detect(normalize_text(segmento_fractura_afectado), "TORAC|DOR|^T[0-9]|") ~ "TORÁCICA",
str_detect(normalize_text(segmento_fractura_afectado), "LUMB|^L[0-9]") ~ "LUMBAR",
TRUE ~ NA_character_
),
# Clasificación AO-OF (extraer OF1-OF5)
ao_raw = normalize_text(clas_ao_spine),
ao_tmp = str_extract(ao_raw, "OF\\s*[1-5]|^[1-5]"),
ao_tmp = str_replace_all(ao_tmp, "\\s+", ""),
clas_ao_of = factor(case_when(
ao_tmp %in% c("1","OF1") ~ "OF1",
ao_tmp %in% c("2","OF2") ~ "OF2",
ao_tmp %in% c("3","OF3") ~ "OF3",
ao_tmp %in% c("4","OF4") ~ "OF4",
ao_tmp %in% c("5","OF5") ~ "OF5",
TRUE ~ NA_character_
), levels = paste0("OF", 1:5), ordered = TRUE)
) %>%
select(-any_of(c("ao_raw","ao_tmp")))
norm_bin <- function(v) {
vv <- normalize_text(v)
out <- case_when(
vv %in% c("1","SI","SÍ","TRUE","T","X","PRESENTE") ~ 1,
vv %in% c("0","NO","FALSE","F","AUSENTE") ~ 0,
is.na(v) ~ NA_real_,
TRUE ~ suppressWarnings(as.numeric(v))
)
as.integer(out)
}
x <- x %>% mutate(
tx_cons = norm_bin(.data[[col_tx_cons]]),
tx_quir = norm_bin(.data[[col_tx_quir]]),
tipo_tratamiento = factor(case_when(
tx_cons == 1 & (is.na(tx_quir) | tx_quir == 0) ~ "Conservador",
tx_quir == 1 & (is.na(tx_cons) | tx_cons == 0) ~ "Quirúrgico",
TRUE ~ NA_character_
), levels = c("Conservador","Quirúrgico"))
)
x <- x %>% mutate(
grupo_edad = cut(edad, breaks = c(50,60,70,80,120), right = FALSE,
labels = c("50–59","60–69","70–79","≥80"))
)
if ("id" %in% names(x)) {
dup <- x %>% count(id) %>% filter(n > 1)
if (nrow(dup) > 0) {
readr::write_csv(x %>% filter(id %in% dup$id), "outputs/tables/01_duplicados.csv")
x <- x %>% distinct(id, .keep_all = TRUE)
}
}
# Rangos plausibles (ajustables al protocolo)
x <- x %>% mutate(
edad = if_else(!is.na(edad) & (edad < 50 | edad > 110), NA_real_, edad),
t_score = if_else(!is.na(t_score) & (t_score < -6 | t_score > 2), NA_real_, t_score)
)
# Faltantes
N <- nrow(x)
miss <- x %>% summarise(across(everything(), ~sum(is.na(.)))) %>%
pivot_longer(everything(), names_to = "variable", values_to = "n_na") %>%
mutate(pct = round(100*n_na/N,1)) %>% arrange(desc(pct))
readr::write_csv(miss, "outputs/tables/02_missingness.csv")
# --- 7) Descriptivos (Tabla global y por tratamiento) --------
# Global
tabla_global <- x %>%
select(edad, grupo_edad, sexo, t_score, segmento_afectado, clas_ao_of, tipo_tratamiento) %>%
gtsummary::tbl_summary(
missing = "ifany",
statistic = list(
gtsummary::all_continuous() ~ "{mean} \\u00B1 {sd}",
gtsummary::all_categorical() ~ "{n} ({p}%)"
),
digits = gtsummary::all_continuous() ~ 1
) %>% gtsummary::bold_labels()
tabla_global
| Characteristic | N = 1741 |
|---|---|
| edad | 64.5 \u00B1 10.4 |
| grupo_edad | |
| 50–59 | 53 (30%) |
| 60–69 | 76 (44%) |
| 70–79 | 23 (13%) |
| ≥80 | 22 (13%) |
| sexo | |
| F | 124 (71%) |
| M | 50 (29%) |
| t_score | -2.9 \u00B1 0.5 |
| segmento_afectado | |
| TORÁCICA | 174 (100%) |
| clas_ao_of | |
| OF1 | 38 (22%) |
| OF2 | 68 (39%) |
| OF3 | 34 (20%) |
| OF4 | 26 (15%) |
| OF5 | 8 (4.6%) |
| tipo_tratamiento | |
| Conservador | 132 (76%) |
| Quirúrgico | 42 (24%) |
| 1 Mean \u00B1 SD; n (%) | |
suppressMessages(tabla_global %>% gtsummary::as_gt() %>% gt::gtsave("outputs/tables/10_descriptivo_global.html"))
# Por tipo de tratamiento (si hay ≥2 categorías)
{
tabla_por_trat <- x %>%
select(tipo_tratamiento, edad, t_score, grupo_edad, sexo, segmento_afectado, clas_ao_of) %>%
gtsummary::tbl_summary(
by = tipo_tratamiento, missing = "ifany",
statistic = list(
gtsummary::all_continuous() ~ "{mean} \\{sd}",
gtsummary::all_categorical() ~ "{n} ({p}%)"
),
digits = gtsummary::all_continuous() ~ 1
) %>%
gtsummary::add_p(test = list(
gtsummary::all_continuous() ~ "t.test",
gtsummary::all_categorical() ~ "chisq.test"
)) %>%
gtsummary::bold_labels()
suppressMessages(tabla_por_trat %>% gtsummary::as_gt() %>% gt::gtsave("outputs/tables/11_descriptivo_por_trat.html"))
}
## The following errors were returned during `add_p()`:
## ✖ For variable `segmento_afectado` (`tipo_tratamiento`) and "statistic", "p.value", and "parameter" statistics: 'x' and
## 'y' must have at least 2 levels
## The following warnings were returned during `add_p()`:
## ! For variable `clas_ao_of` (`tipo_tratamiento`) and "statistic", "p.value", and "parameter" statistics: Chi-squared
## approximation may be incorrect
tabla_por_trat
| Characteristic | Conservador N = 1321 |
Quirúrgico N = 421 |
p-value2 |
|---|---|---|---|
| edad | 65.6 \10.8 | 61.2 \8.2 | 0.007 |
| t_score | -2.9 \0.5 | -3.1 \0.5 | 0.025 |
| grupo_edad | 0.015 | ||
| 50–59 | 35 (27%) | 18 (43%) | |
| 60–69 | 59 (45%) | 17 (40%) | |
| 70–79 | 16 (12%) | 7 (17%) | |
| ≥80 | 22 (17%) | 0 (0%) | |
| sexo | 0.3 | ||
| F | 97 (73%) | 27 (64%) | |
| M | 35 (27%) | 15 (36%) | |
| segmento_afectado | |||
| TORÁCICA | 132 (100%) | 42 (100%) | |
| clas_ao_of | <0.001 | ||
| OF1 | 38 (29%) | 0 (0%) | |
| OF2 | 67 (51%) | 1 (2.4%) | |
| OF3 | 22 (17%) | 12 (29%) | |
| OF4 | 5 (3.8%) | 21 (50%) | |
| OF5 | 0 (0%) | 8 (19%) | |
| 1 Mean \SD; n (%) | |||
| 2 Welch Two Sample t-test; Pearson’s Chi-squared test | |||
cat_tests <- list()
cat_vars <- c("sexo","segmento_afectado","clas_ao_of")
for (v in intersect(cat_vars, names(x))) {
df <- x %>% filter(!is.na(.data[[v]]), !is.na(tipo_tratamiento))
if (n_distinct(df[[v]]) >= 2 && n_distinct(df$tipo_tratamiento) >= 2 && nrow(df) > 0) {
tab <- table(df[[v]], df$tipo_tratamiento)
if (all(tab >= 5)) {
tt <- suppressWarnings(broom::tidy(chisq.test(tab)))
tt$test <- "chisq"
} else {
ft <- fisher.test(tab)
tt <- tibble::tibble(statistic = unname(ft$estimate %||% NA_real_), p.value = ft$p.value, method = "Fisher")
tt$test <- "fisher"
}
tt$variable <- v
cat_tests[[v]] <- tt
}
}
if (length(cat_tests)) readr::write_csv(bind_rows(cat_tests), "outputs/tables/12_tests_categoricas_vs_trat.csv")
# t de Student para edad y t_score vs tratamiento
cont_tests <- list()
for (v in c("edad","t_score")) {
if (v %in% names(x)) {
df <- x %>% filter(!is.na(.data[[v]]), !is.na(tipo_tratamiento))
if (n_distinct(df$tipo_tratamiento) >= 2 && nrow(df) > 2) {
tt <- broom::tidy(t.test(df[[v]] ~ df$tipo_tratamiento))
tt$variable <- v
cont_tests[[v]] <- tt
}
}
}
if (length(cont_tests)) readr::write_csv(bind_rows(cont_tests), "outputs/tables/13_ttests_vs_trat.csv")
cont_tests
## $edad
## # A tibble: 1 × 11
## estimate estimate1 estimate2 statistic p.value parameter conf.low conf.high method alternative variable
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr> <chr>
## 1 4.36 65.6 61.2 2.78 0.00665 90.7 1.24 7.48 Welch Two Sample t-t… two.sided edad
##
## $t_score
## # A tibble: 1 × 11
## estimate estimate1 estimate2 statistic p.value parameter conf.low conf.high method alternative variable
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr> <chr>
## 1 0.189 -2.88 -3.07 2.29 0.0251 68.4 0.0243 0.353 Welch Two Sample t-t… two.sided t_score
# OR (modelo logístico) para probabilidad de tratamiento quirúrgico vs sexo (si procede)
{
df_or <- x %>% filter(!is.na(sexo), !is.na(tipo_tratamiento)) %>% mutate(
quir = if_else(tipo_tratamiento == "Quirúrgico", 1, 0)
)
if (n_distinct(df_or$quir) == 2 && n_distinct(df_or$sexo) == 2) {
fit <- glm(quir ~ sexo, data = df_or, family = binomial())
or_tbl <- broom::tidy(fit, conf.int = TRUE, exponentiate = TRUE)
readr::write_csv(or_tbl, "outputs/tables/14_or_quirurgico_vs_sexo.csv")
}
}
or_tbl
## # A tibble: 2 × 7
## term estimate std.error statistic p.value conf.low conf.high
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 (Intercept) 0.278 0.218 -5.88 0.00000000417 0.178 0.420
## 2 sexoM 1.54 0.378 1.14 0.253 0.724 3.21
p_trat <- x %>% filter(!is.na(tipo_tratamiento)) %>% count(tipo_tratamiento) %>%
ggplot(aes(x = tipo_tratamiento, y = n)) + geom_col() + theme_minimal() +
labs(x = "Tipo de tratamiento", y = "Frecuencia")
try(ggsave("outputs/figures/01_tratamiento_bar.png", p_trat, width = 6, height = 4, dpi = 300), silent = TRUE)
p_trat
# Boxplots por tratamiento con p de t.test (edad, t_score)
{
if ("edad" %in% names(x)) {
p_age <- ggpubr::ggboxplot(x, x = "tipo_tratamiento", y = "edad", fill = "tipo_tratamiento") +
ggpubr::stat_compare_means(method = "t.test", label = "p.format") +
labs(x = "Tipo de tratamiento", y = "Edad (años)")
try(ggsave("outputs/figures/02_edad_por_trat.png", p_age, width = 6, height = 4, dpi = 300), silent = TRUE)
}
if ("t_score" %in% names(x)) {
p_ts <- ggpubr::ggboxplot(x, x = "tipo_tratamiento", y = "t_score", fill = "tipo_tratamiento") +
ggpubr::stat_compare_means(method = "t.test", label = "p.format") +
labs(x = "Tipo de tratamiento", y = "T-score")
try(ggsave("outputs/figures/03_tscore_por_trat.png", p_ts, width = 6, height = 4, dpi = 300), silent = TRUE)
}
}
p_age
# Barras: AO/OF por tratamiento con p (Chi^2/Fisher)
if (all(c("clas_ao_of","tipo_tratamiento") %in% names(x))) {
dfp <- x %>% filter(!is.na(clas_ao_of), !is.na(tipo_tratamiento))
if (nrow(dfp) > 0 && n_distinct(dfp$tipo_tratamiento) >= 2) {
tab <- table(dfp$clas_ao_of, dfp$tipo_tratamiento)
if (all(tab >= 5)) {
pval <- suppressWarnings(chisq.test(tab)$p.value)
subt <- paste0("Chi^2: p=", signif(pval, 3))
} else {
pval <- fisher.test(tab)$p.value
subt <- paste0("Fisher: p=", signif(pval, 3))
}
p_of <- dfp %>% count(clas_ao_of, tipo_tratamiento) %>%
ggplot(aes(x = clas_ao_of, y = n, fill = tipo_tratamiento)) + geom_col(position = position_dodge()) +
theme_minimal() + labs(x = "AO/OF", y = "Casos", subtitle = subt)
try(ggsave("outputs/figures/04_ao_of_por_trat.png", p_of, width = 7, height = 4, dpi = 300), silent = TRUE)
}
}
p_of