library(readxl)
## Warning: package 'readxl' was built under R version 4.4.3
library(dplyr)
##
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
BD_unificada_analisis_positividad_2 <- read_excel("BD unificada_analisis positividad_2.xls")
BASE <- BD_unificada_analisis_positividad_2
# --- Limpieza de variable ya existente en la base ---
BASE$ult_tamizaje_agr[BASE$ult_tamizaje_agr == 0] <- NA
# --- Variables categóricas principales ---
BASE <- BASE %>%
mutate(
metodo_int_treat_cat = factor(
metodo_int_treat,
levels = c(0, 1),
labels = c("Clinician", "Self-collected")
),
departamento_cat = factor(
departamento,
levels = c(1, 2),
labels = c("Putumayo", "Choco")
),
etnia_cat = factor(
etnia_agr,
levels = c(1, 2, 3),
labels = c("Indigena", "Afrocolombiano", "Otro")
),
# OJO: revisar con ella si esto debe ser levels = c(0,1) y
# labels = c("Tamizaje rutinario", "Campaña") según lo que dijo ahora
estrategia_toma_cat = factor(
estrategia_toma,
levels = c(1, 2),
labels = c("Campaña", "Tamizaje rutinario")
),
lugar_de_residencia_cat = case_when(
lugar_de_residencia %in% c(1, 2) ~ "Distante",
lugar_de_residencia %in% c(3, 4) ~ "No_D",
TRUE ~ NA_character_
),
lugar_de_residencia_cat = factor(
lugar_de_residencia_cat,
levels = c("Distante", "No_D")
)
)
# --- Grupos de VPH por riesgo oncogénico ---
BASE <- BASE %>%
mutate(
HPV_gr1a = as.integer(vph_16 %in% 1),
HPV_gr1b = as.integer(vph_18 %in% 1 | vph_45 %in% 1),
HPV_gr2 = as.integer(vph_31 %in% 1 | vph_33 %in% 1 | vph_35 %in% 1 | vph_52 %in% 1 | vph_58 %in% 1),
HPV_gr3 = as.integer(vph_39 %in% 1 | vph_51 %in% 1 | vph_56 %in% 1 | vph_59 %in% 1)
)
# --- Nivel educativo agrupado ---
BASE <- BASE %>%
mutate(
nivel_educativo_agr = case_when(
nivel_educativo %in% c(1, 6) ~ 1,
nivel_educativo %in% c(2, 3) ~ 2,
nivel_educativo %in% c(4, 5) ~ 3,
TRUE ~ NA_real_
),
nivel_educativo_agr = factor(
nivel_educativo_agr,
levels = c(1, 2, 3),
labels = c("Primaria o menos", "Secundaria-tecnico", "Profesional o mayor")
)
)
# --- Edad categorizada ---
BASE <- BASE %>%
mutate(
edad_cat = case_when(
edad >= 26 & edad <= 34 ~ 2,
edad >= 35 & edad <= 49 ~ 3,
edad >= 50 ~ 4,
TRUE ~ NA_real_
),
edad_cat = factor(
edad_cat,
levels = c(2, 3, 4),
labels = c("26-34", "35-49", ">=50")
)
)
Tabla
library(gtsummary)
## Warning: package 'gtsummary' was built under R version 4.4.3
vars_recodificadas <- c(
"metodo_int_treat_cat",
"departamento_cat",
"etnia_cat",
"nivel_educativo_agr",
"edad_cat",
"ult_tamizaje_agr",
"HPV_gr1a",
"HPV_gr1b",
"HPV_gr2",
"HPV_gr3"
)
tabla_cruzada <- BASE %>%
filter(!is.na(lugar_de_residencia_cat), !is.na(estrategia_toma_cat)) %>%
dplyr::select(
lugar_de_residencia_cat,
estrategia_toma_cat,
resultado_vph,
all_of(vars_recodificadas)
) %>%
tbl_strata(
strata = c(lugar_de_residencia_cat, estrategia_toma_cat),
.tbl_fun = ~ .x %>%
tbl_summary(
by = resultado_vph,
statistic = all_categorical() ~ "{n} ({p}%)",
missing = "no",
digits = all_categorical() ~ c(0, 1)
) %>%
add_overall(last = TRUE, col_label = "**Total**") %>%
modify_header(label ~ "**Variable**") %>%
bold_labels(),
.header = "**{strata}**"
)
## 1 missing rows in the "resultado_vph" column have been removed.
## 3 missing rows in the "resultado_vph" column have been removed.
tabla_cruzada
| Variable |
Distante, Campaña
|
Distante, Tamizaje rutinario
|
No_D, Campaña
|
No_D, Tamizaje rutinario
|
||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 N = 1281 |
1 N = 451 |
Total1 | 0 N = 161 |
1 N = 51 |
Total1 | 0 N = 1591 |
1 N = 531 |
Total1 | 0 N = 911 |
1 N = 311 |
Total1 | |
| metodo_int_treat_cat | ||||||||||||
| Clinician | 35 (27.3%) | 14 (31.1%) | 49 (28.3%) | 14 (87.5%) | 1 (20.0%) | 15 (71.4%) | 76 (47.8%) | 21 (39.6%) | 97 (45.8%) | 11 (12.1%) | 0 (0.0%) | 11 (9.0%) |
| Self-collected | 93 (72.7%) | 31 (68.9%) | 124 (71.7%) | 2 (12.5%) | 4 (80.0%) | 6 (28.6%) | 83 (52.2%) | 32 (60.4%) | 115 (54.2%) | 80 (87.9%) | 31 (100.0%) | 111 (91.0%) |
| departamento_cat | ||||||||||||
| Putumayo | 93 (72.7%) | 31 (68.9%) | 124 (71.7%) | 14 (87.5%) | 1 (20.0%) | 15 (71.4%) | 76 (47.8%) | 21 (39.6%) | 97 (45.8%) | 80 (87.9%) | 31 (100.0%) | 111 (91.0%) |
| Choco | 35 (27.3%) | 14 (31.1%) | 49 (28.3%) | 2 (12.5%) | 4 (80.0%) | 6 (28.6%) | 83 (52.2%) | 32 (60.4%) | 115 (54.2%) | 11 (12.1%) | 0 (0.0%) | 11 (9.0%) |
| etnia_cat | ||||||||||||
| Indigena | 36 (28.1%) | 14 (31.1%) | 50 (28.9%) | 1 (6.3%) | 1 (20.0%) | 2 (9.5%) | 25 (15.7%) | 5 (9.4%) | 30 (14.2%) | 25 (27.5%) | 10 (32.3%) | 35 (28.7%) |
| Afrocolombiano | 35 (27.3%) | 16 (35.6%) | 51 (29.5%) | 5 (31.3%) | 4 (80.0%) | 9 (42.9%) | 84 (52.8%) | 28 (52.8%) | 112 (52.8%) | 10 (11.0%) | 0 (0.0%) | 10 (8.2%) |
| Otro | 57 (44.5%) | 15 (33.3%) | 72 (41.6%) | 10 (62.5%) | 0 (0.0%) | 10 (47.6%) | 50 (31.4%) | 20 (37.7%) | 70 (33.0%) | 56 (61.5%) | 21 (67.7%) | 77 (63.1%) |
| nivel_educativo_agr | ||||||||||||
| Primaria o menos | 19 (15.2%) | 7 (16.3%) | 26 (15.5%) | 2 (14.3%) | 1 (20.0%) | 3 (15.8%) | 35 (22.0%) | 6 (11.3%) | 41 (19.3%) | 25 (27.5%) | 5 (16.1%) | 30 (24.6%) |
| Secundaria-tecnico | 69 (55.2%) | 23 (53.5%) | 92 (54.8%) | 9 (64.3%) | 3 (60.0%) | 12 (63.2%) | 95 (59.7%) | 39 (73.6%) | 134 (63.2%) | 50 (54.9%) | 21 (67.7%) | 71 (58.2%) |
| Profesional o mayor | 37 (29.6%) | 13 (30.2%) | 50 (29.8%) | 3 (21.4%) | 1 (20.0%) | 4 (21.1%) | 29 (18.2%) | 8 (15.1%) | 37 (17.5%) | 16 (17.6%) | 5 (16.1%) | 21 (17.2%) |
| edad_cat | ||||||||||||
| 26-34 | 31 (24.2%) | 15 (33.3%) | 46 (26.6%) | 5 (31.3%) | 2 (40.0%) | 7 (33.3%) | 44 (27.7%) | 20 (37.7%) | 64 (30.2%) | 19 (20.9%) | 11 (35.5%) | 30 (24.6%) |
| 35-49 | 95 (74.2%) | 28 (62.2%) | 123 (71.1%) | 11 (68.8%) | 3 (60.0%) | 14 (66.7%) | 112 (70.4%) | 31 (58.5%) | 143 (67.5%) | 70 (76.9%) | 18 (58.1%) | 88 (72.1%) |
| >=50 | 2 (1.6%) | 2 (4.4%) | 4 (2.3%) | 0 (0.0%) | 0 (0.0%) | 0 (0.0%) | 3 (1.9%) | 2 (3.8%) | 5 (2.4%) | 2 (2.2%) | 2 (6.5%) | 4 (3.3%) |
| ult_tamizaje_agr | ||||||||||||
| 1 | 19 (15.0%) | 4 (9.1%) | 23 (13.5%) | 0 (0.0%) | 3 (60.0%) | 3 (14.3%) | 8 (5.1%) | 1 (1.9%) | 9 (4.3%) | |||
| 2 | 70 (55.1%) | 30 (68.2%) | 100 (58.5%) | 12 (75.0%) | 1 (20.0%) | 13 (61.9%) | 111 (70.7%) | 40 (75.5%) | 151 (71.9%) | 62 (68.1%) | 25 (80.6%) | 87 (71.3%) |
| 3 | 38 (29.9%) | 10 (22.7%) | 48 (28.1%) | 4 (25.0%) | 1 (20.0%) | 5 (23.8%) | 38 (24.2%) | 12 (22.6%) | 50 (23.8%) | 29 (31.9%) | 6 (19.4%) | 35 (28.7%) |
| HPV_gr1a | 0 (0.0%) | 9 (20.0%) | 9 (5.2%) | 0 (0.0%) | 8 (15.1%) | 8 (3.8%) | 0 (0.0%) | 5 (16.1%) | 5 (4.1%) | |||
| HPV_gr1b | 0 (0.0%) | 3 (6.7%) | 3 (1.7%) | 0 (0.0%) | 7 (13.2%) | 7 (3.3%) | 0 (0.0%) | 8 (25.8%) | 8 (6.6%) | |||
| HPV_gr2 | 0 (0.0%) | 27 (60.0%) | 27 (15.6%) | 0 (0.0%) | 5 (100.0%) | 5 (23.8%) | 0 (0.0%) | 28 (52.8%) | 28 (13.2%) | 0 (0.0%) | 14 (45.2%) | 14 (11.5%) |
| HPV_gr3 | 0 (0.0%) | 21 (46.7%) | 21 (12.1%) | 0 (0.0%) | 2 (40.0%) | 2 (9.5%) | 0 (0.0%) | 32 (60.4%) | 32 (15.1%) | 0 (0.0%) | 19 (61.3%) | 19 (15.6%) |
| 0 | 16 (100.0%) | 5 (100.0%) | 21 (100.0%) | |||||||||
| 0 | 16 (100.0%) | 5 (100.0%) | 21 (100.0%) | |||||||||
| 1 n (%) | ||||||||||||
library(gt)
## Warning: package 'gt' was built under R version 4.4.3
tabla_cruzada %>%
as_gt() %>%
gtsave("tabla_cruzada_vph.html")
#tabla_cruzada %>%
# as_gt() %>%
# gtsave("tabla_cruzada_vph.docx")
library(coin)
## Warning: package 'coin' was built under R version 4.4.3
## Cargando paquete requerido: survival
BASE_filt <- BASE %>% filter(!is.na(resultado_vph))
vars_a_probar <- c(
"metodo_int_treat_cat",
"departamento_cat",
"etnia_cat",
"nivel_educativo_agr",
"edad_cat",
"ult_tamizaje_agr",
"HPV_gr1a",
"HPV_gr1b",
"HPV_gr2",
"HPV_gr3"
)
for (v in vars_a_probar) {
cat("\n==============================\n")
cat("Combinación: estrategia_toma_cat + lugar_de_residencia_cat +", v, "\n")
datos_v <- BASE_filt %>%
filter(!is.na(.data[[v]]), !is.na(estrategia_toma_cat), !is.na(lugar_de_residencia_cat))
formula_v <- as.formula(paste("resultado_vph ~ estrategia_toma_cat + lugar_de_residencia_cat +", v))
test_resultado <- independence_test(
formula_v,
data = datos_v,
distribution = approximate(B = 10000)
)
print(test_resultado)
}
##
## ==============================
## Combinación: estrategia_toma_cat + lugar_de_residencia_cat + metodo_int_treat_cat
## Warning in approximate(B = 10000): 'B' is deprecated; use 'nresample' instead
##
## Approximative General Independence Test
##
## data: resultado_vph by
## estrategia_toma_cat, lugar_de_residencia_cat, metodo_int_treat_cat
## maxT = 1.6312, p-value = 0.277
## alternative hypothesis: two.sided
##
##
## ==============================
## Combinación: estrategia_toma_cat + lugar_de_residencia_cat + departamento_cat
## Warning in approximate(B = 10000): 'B' is deprecated; use 'nresample' instead
##
## Approximative General Independence Test
##
## data: resultado_vph by
## estrategia_toma_cat, lugar_de_residencia_cat, departamento_cat
## maxT = 0.85552, p-value = 0.7677
## alternative hypothesis: two.sided
##
##
## ==============================
## Combinación: estrategia_toma_cat + lugar_de_residencia_cat + etnia_cat
## Warning in approximate(B = 10000): 'B' is deprecated; use 'nresample' instead
##
## Approximative General Independence Test
##
## data: resultado_vph by
## estrategia_toma_cat, lugar_de_residencia_cat, etnia_cat
## maxT = 0.42687, p-value = 0.989
## alternative hypothesis: two.sided
##
##
## ==============================
## Combinación: estrategia_toma_cat + lugar_de_residencia_cat + nivel_educativo_agr
## Warning in approximate(B = 10000): 'B' is deprecated; use 'nresample' instead
##
## Approximative General Independence Test
##
## data: resultado_vph by
## estrategia_toma_cat, lugar_de_residencia_cat, nivel_educativo_agr
## maxT = 1.619, p-value = 0.3822
## alternative hypothesis: two.sided
##
##
## ==============================
## Combinación: estrategia_toma_cat + lugar_de_residencia_cat + edad_cat
## Warning in approximate(B = 10000): 'B' is deprecated; use 'nresample' instead
##
## Approximative General Independence Test
##
## data: resultado_vph by
## estrategia_toma_cat, lugar_de_residencia_cat, edad_cat
## maxT = 2.9118, p-value = 0.0203
## alternative hypothesis: two.sided
##
##
## ==============================
## Combinación: estrategia_toma_cat + lugar_de_residencia_cat + ult_tamizaje_agr
## Warning in approximate(B = 10000): 'B' is deprecated; use 'nresample' instead
##
## Approximative General Independence Test
##
## data: resultado_vph by
## estrategia_toma_cat, lugar_de_residencia_cat, ult_tamizaje_agr
## maxT = 0.95529, p-value = 0.726
## alternative hypothesis: two.sided
##
##
## ==============================
## Combinación: estrategia_toma_cat + lugar_de_residencia_cat + HPV_gr1a
## Warning in approximate(B = 10000): 'B' is deprecated; use 'nresample' instead
##
## Approximative General Independence Test
##
## data: resultado_vph by
## estrategia_toma_cat, lugar_de_residencia_cat, HPV_gr1a
## maxT = 8.208, p-value < 1e-04
## alternative hypothesis: two.sided
##
##
## ==============================
## Combinación: estrategia_toma_cat + lugar_de_residencia_cat + HPV_gr1b
## Warning in approximate(B = 10000): 'B' is deprecated; use 'nresample' instead
##
## Approximative General Independence Test
##
## data: resultado_vph by
## estrategia_toma_cat, lugar_de_residencia_cat, HPV_gr1b
## maxT = 7.3952, p-value < 1e-04
## alternative hypothesis: two.sided
##
##
## ==============================
## Combinación: estrategia_toma_cat + lugar_de_residencia_cat + HPV_gr2
## Warning in approximate(B = 10000): 'B' is deprecated; use 'nresample' instead
##
## Approximative General Independence Test
##
## data: resultado_vph by
## estrategia_toma_cat, lugar_de_residencia_cat, HPV_gr2
## maxT = 15.892, p-value < 1e-04
## alternative hypothesis: two.sided
##
##
## ==============================
## Combinación: estrategia_toma_cat + lugar_de_residencia_cat + HPV_gr3
## Warning in approximate(B = 10000): 'B' is deprecated; use 'nresample' instead
##
## Approximative General Independence Test
##
## data: resultado_vph by
## estrategia_toma_cat, lugar_de_residencia_cat, HPV_gr3
## maxT = 15.892, p-value < 1e-04
## alternative hypothesis: two.sided