Ajuste base de datos.

library(readxl)
## Warning: package 'readxl' was built under R version 4.4.3
library(dplyr)
## 
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
BD_unificada_analisis_positividad_2 <- read_excel("BD unificada_analisis positividad_2.xls")
BASE <- BD_unificada_analisis_positividad_2

# --- Limpieza de variable ya existente en la base ---
BASE$ult_tamizaje_agr[BASE$ult_tamizaje_agr == 0] <- NA

# --- Variables categóricas principales ---
BASE <- BASE %>%
  mutate(
    metodo_int_treat_cat = factor(
      metodo_int_treat,
      levels = c(0, 1),
      labels = c("Clinician", "Self-collected")
    ),
    departamento_cat = factor(
      departamento,
      levels = c(1, 2),
      labels = c("Putumayo", "Choco")
    ),
    etnia_cat = factor(
      etnia_agr,
      levels = c(1, 2, 3),
      labels = c("Indigena", "Afrocolombiano", "Otro")
    ),
    # OJO: revisar con ella si esto debe ser levels = c(0,1) y
    # labels = c("Tamizaje rutinario", "Campaña") según lo que dijo ahora
    estrategia_toma_cat = factor(
      estrategia_toma,
      levels = c(1, 2),
      labels = c("Campaña", "Tamizaje rutinario")
    ),
    lugar_de_residencia_cat = case_when(
      lugar_de_residencia %in% c(1, 2) ~ "Distante",
      lugar_de_residencia %in% c(3, 4) ~ "No_D",
      TRUE ~ NA_character_
    ),
    lugar_de_residencia_cat = factor(
      lugar_de_residencia_cat,
      levels = c("Distante", "No_D")
    )
  )

# --- Grupos de VPH por riesgo oncogénico ---
BASE <- BASE %>%
  mutate(
    HPV_gr1a = as.integer(vph_16 %in% 1),
    HPV_gr1b = as.integer(vph_18 %in% 1 | vph_45 %in% 1),
    HPV_gr2  = as.integer(vph_31 %in% 1 | vph_33 %in% 1 | vph_35 %in% 1 | vph_52 %in% 1 | vph_58 %in% 1),
    HPV_gr3  = as.integer(vph_39 %in% 1 | vph_51 %in% 1 | vph_56 %in% 1 | vph_59 %in% 1)
  )

# --- Nivel educativo agrupado ---
BASE <- BASE %>%
  mutate(
    nivel_educativo_agr = case_when(
      nivel_educativo %in% c(1, 6) ~ 1,
      nivel_educativo %in% c(2, 3) ~ 2,
      nivel_educativo %in% c(4, 5) ~ 3,
      TRUE ~ NA_real_
    ),
    nivel_educativo_agr = factor(
      nivel_educativo_agr,
      levels = c(1, 2, 3),
      labels = c("Primaria o menos", "Secundaria-tecnico", "Profesional o mayor")
    )
  )

# --- Edad categorizada ---
BASE <- BASE %>%
  mutate(
    edad_cat = case_when(
      edad >= 26 & edad <= 34 ~ 2,
      edad >= 35 & edad <= 49 ~ 3,
      edad >= 50              ~ 4,
      TRUE ~ NA_real_
    ),
    edad_cat = factor(
      edad_cat,
      levels = c(2, 3, 4),
      labels = c("26-34", "35-49", ">=50")
    )
  )

Tabla

library(gtsummary)
## Warning: package 'gtsummary' was built under R version 4.4.3
vars_recodificadas <- c(
  "metodo_int_treat_cat",
  "departamento_cat",
  "etnia_cat",
  "nivel_educativo_agr",
  "edad_cat",
  "ult_tamizaje_agr",
  "HPV_gr1a",
  "HPV_gr1b",
  "HPV_gr2",
  "HPV_gr3"
)

tabla_cruzada <- BASE %>%
  filter(!is.na(lugar_de_residencia_cat), !is.na(estrategia_toma_cat)) %>%
  dplyr::select(
    lugar_de_residencia_cat,
    estrategia_toma_cat,
    resultado_vph,
    all_of(vars_recodificadas)
  ) %>%
  tbl_strata(
    strata = c(lugar_de_residencia_cat, estrategia_toma_cat),
    .tbl_fun = ~ .x %>%
      tbl_summary(
        by = resultado_vph,
        statistic = all_categorical() ~ "{n} ({p}%)",
        missing = "no",
        digits = all_categorical() ~ c(0, 1)
      ) %>%
      add_overall(last = TRUE, col_label = "**Total**") %>%
      modify_header(label ~ "**Variable**") %>%
      bold_labels(),
    .header = "**{strata}**"
  )
## 1 missing rows in the "resultado_vph" column have been removed.
## 3 missing rows in the "resultado_vph" column have been removed.
tabla_cruzada
Variable
Distante, Campaña
Distante, Tamizaje rutinario
No_D, Campaña
No_D, Tamizaje rutinario
0
N = 128
1
1
N = 45
1
Total1 0
N = 16
1
1
N = 5
1
Total1 0
N = 159
1
1
N = 53
1
Total1 0
N = 91
1
1
N = 31
1
Total1
metodo_int_treat_cat











    Clinician 35 (27.3%) 14 (31.1%) 49 (28.3%) 14 (87.5%) 1 (20.0%) 15 (71.4%) 76 (47.8%) 21 (39.6%) 97 (45.8%) 11 (12.1%) 0 (0.0%) 11 (9.0%)
    Self-collected 93 (72.7%) 31 (68.9%) 124 (71.7%) 2 (12.5%) 4 (80.0%) 6 (28.6%) 83 (52.2%) 32 (60.4%) 115 (54.2%) 80 (87.9%) 31 (100.0%) 111 (91.0%)
departamento_cat











    Putumayo 93 (72.7%) 31 (68.9%) 124 (71.7%) 14 (87.5%) 1 (20.0%) 15 (71.4%) 76 (47.8%) 21 (39.6%) 97 (45.8%) 80 (87.9%) 31 (100.0%) 111 (91.0%)
    Choco 35 (27.3%) 14 (31.1%) 49 (28.3%) 2 (12.5%) 4 (80.0%) 6 (28.6%) 83 (52.2%) 32 (60.4%) 115 (54.2%) 11 (12.1%) 0 (0.0%) 11 (9.0%)
etnia_cat











    Indigena 36 (28.1%) 14 (31.1%) 50 (28.9%) 1 (6.3%) 1 (20.0%) 2 (9.5%) 25 (15.7%) 5 (9.4%) 30 (14.2%) 25 (27.5%) 10 (32.3%) 35 (28.7%)
    Afrocolombiano 35 (27.3%) 16 (35.6%) 51 (29.5%) 5 (31.3%) 4 (80.0%) 9 (42.9%) 84 (52.8%) 28 (52.8%) 112 (52.8%) 10 (11.0%) 0 (0.0%) 10 (8.2%)
    Otro 57 (44.5%) 15 (33.3%) 72 (41.6%) 10 (62.5%) 0 (0.0%) 10 (47.6%) 50 (31.4%) 20 (37.7%) 70 (33.0%) 56 (61.5%) 21 (67.7%) 77 (63.1%)
nivel_educativo_agr











    Primaria o menos 19 (15.2%) 7 (16.3%) 26 (15.5%) 2 (14.3%) 1 (20.0%) 3 (15.8%) 35 (22.0%) 6 (11.3%) 41 (19.3%) 25 (27.5%) 5 (16.1%) 30 (24.6%)
    Secundaria-tecnico 69 (55.2%) 23 (53.5%) 92 (54.8%) 9 (64.3%) 3 (60.0%) 12 (63.2%) 95 (59.7%) 39 (73.6%) 134 (63.2%) 50 (54.9%) 21 (67.7%) 71 (58.2%)
    Profesional o mayor 37 (29.6%) 13 (30.2%) 50 (29.8%) 3 (21.4%) 1 (20.0%) 4 (21.1%) 29 (18.2%) 8 (15.1%) 37 (17.5%) 16 (17.6%) 5 (16.1%) 21 (17.2%)
edad_cat











    26-34 31 (24.2%) 15 (33.3%) 46 (26.6%) 5 (31.3%) 2 (40.0%) 7 (33.3%) 44 (27.7%) 20 (37.7%) 64 (30.2%) 19 (20.9%) 11 (35.5%) 30 (24.6%)
    35-49 95 (74.2%) 28 (62.2%) 123 (71.1%) 11 (68.8%) 3 (60.0%) 14 (66.7%) 112 (70.4%) 31 (58.5%) 143 (67.5%) 70 (76.9%) 18 (58.1%) 88 (72.1%)
    >=50 2 (1.6%) 2 (4.4%) 4 (2.3%) 0 (0.0%) 0 (0.0%) 0 (0.0%) 3 (1.9%) 2 (3.8%) 5 (2.4%) 2 (2.2%) 2 (6.5%) 4 (3.3%)
ult_tamizaje_agr











    1 19 (15.0%) 4 (9.1%) 23 (13.5%) 0 (0.0%) 3 (60.0%) 3 (14.3%) 8 (5.1%) 1 (1.9%) 9 (4.3%)


    2 70 (55.1%) 30 (68.2%) 100 (58.5%) 12 (75.0%) 1 (20.0%) 13 (61.9%) 111 (70.7%) 40 (75.5%) 151 (71.9%) 62 (68.1%) 25 (80.6%) 87 (71.3%)
    3 38 (29.9%) 10 (22.7%) 48 (28.1%) 4 (25.0%) 1 (20.0%) 5 (23.8%) 38 (24.2%) 12 (22.6%) 50 (23.8%) 29 (31.9%) 6 (19.4%) 35 (28.7%)
HPV_gr1a 0 (0.0%) 9 (20.0%) 9 (5.2%)


0 (0.0%) 8 (15.1%) 8 (3.8%) 0 (0.0%) 5 (16.1%) 5 (4.1%)
HPV_gr1b 0 (0.0%) 3 (6.7%) 3 (1.7%)


0 (0.0%) 7 (13.2%) 7 (3.3%) 0 (0.0%) 8 (25.8%) 8 (6.6%)
HPV_gr2 0 (0.0%) 27 (60.0%) 27 (15.6%) 0 (0.0%) 5 (100.0%) 5 (23.8%) 0 (0.0%) 28 (52.8%) 28 (13.2%) 0 (0.0%) 14 (45.2%) 14 (11.5%)
HPV_gr3 0 (0.0%) 21 (46.7%) 21 (12.1%) 0 (0.0%) 2 (40.0%) 2 (9.5%) 0 (0.0%) 32 (60.4%) 32 (15.1%) 0 (0.0%) 19 (61.3%) 19 (15.6%)
    0


16 (100.0%) 5 (100.0%) 21 (100.0%)





    0


16 (100.0%) 5 (100.0%) 21 (100.0%)





1 n (%)
library(gt)
## Warning: package 'gt' was built under R version 4.4.3
tabla_cruzada %>%
  as_gt() %>%
  gtsave("tabla_cruzada_vph.html")

#tabla_cruzada %>%
#  as_gt() %>%
#  gtsave("tabla_cruzada_vph.docx")
library(coin)
## Warning: package 'coin' was built under R version 4.4.3
## Cargando paquete requerido: survival
BASE_filt <- BASE %>% filter(!is.na(resultado_vph))

vars_a_probar <- c(
  "metodo_int_treat_cat",
  "departamento_cat",
  "etnia_cat",
  "nivel_educativo_agr",
  "edad_cat",
  "ult_tamizaje_agr",
  "HPV_gr1a",
  "HPV_gr1b",
  "HPV_gr2",
  "HPV_gr3"
)

for (v in vars_a_probar) {
  
  cat("\n==============================\n")
  cat("Combinación: estrategia_toma_cat + lugar_de_residencia_cat +", v, "\n")
  
  datos_v <- BASE_filt %>% 
    filter(!is.na(.data[[v]]), !is.na(estrategia_toma_cat), !is.na(lugar_de_residencia_cat))
  
  formula_v <- as.formula(paste("resultado_vph ~ estrategia_toma_cat + lugar_de_residencia_cat +", v))
  
  test_resultado <- independence_test(
    formula_v,
    data = datos_v,
    distribution = approximate(B = 10000)
  )
  
  print(test_resultado)
}
## 
## ==============================
## Combinación: estrategia_toma_cat + lugar_de_residencia_cat + metodo_int_treat_cat
## Warning in approximate(B = 10000): 'B' is deprecated; use 'nresample' instead
## 
##  Approximative General Independence Test
## 
## data:  resultado_vph by
##   estrategia_toma_cat, lugar_de_residencia_cat, metodo_int_treat_cat
## maxT = 1.6312, p-value = 0.277
## alternative hypothesis: two.sided
## 
## 
## ==============================
## Combinación: estrategia_toma_cat + lugar_de_residencia_cat + departamento_cat
## Warning in approximate(B = 10000): 'B' is deprecated; use 'nresample' instead
## 
##  Approximative General Independence Test
## 
## data:  resultado_vph by
##   estrategia_toma_cat, lugar_de_residencia_cat, departamento_cat
## maxT = 0.85552, p-value = 0.7677
## alternative hypothesis: two.sided
## 
## 
## ==============================
## Combinación: estrategia_toma_cat + lugar_de_residencia_cat + etnia_cat
## Warning in approximate(B = 10000): 'B' is deprecated; use 'nresample' instead
## 
##  Approximative General Independence Test
## 
## data:  resultado_vph by
##   estrategia_toma_cat, lugar_de_residencia_cat, etnia_cat
## maxT = 0.42687, p-value = 0.989
## alternative hypothesis: two.sided
## 
## 
## ==============================
## Combinación: estrategia_toma_cat + lugar_de_residencia_cat + nivel_educativo_agr
## Warning in approximate(B = 10000): 'B' is deprecated; use 'nresample' instead
## 
##  Approximative General Independence Test
## 
## data:  resultado_vph by
##   estrategia_toma_cat, lugar_de_residencia_cat, nivel_educativo_agr
## maxT = 1.619, p-value = 0.3822
## alternative hypothesis: two.sided
## 
## 
## ==============================
## Combinación: estrategia_toma_cat + lugar_de_residencia_cat + edad_cat
## Warning in approximate(B = 10000): 'B' is deprecated; use 'nresample' instead
## 
##  Approximative General Independence Test
## 
## data:  resultado_vph by
##   estrategia_toma_cat, lugar_de_residencia_cat, edad_cat
## maxT = 2.9118, p-value = 0.0203
## alternative hypothesis: two.sided
## 
## 
## ==============================
## Combinación: estrategia_toma_cat + lugar_de_residencia_cat + ult_tamizaje_agr
## Warning in approximate(B = 10000): 'B' is deprecated; use 'nresample' instead
## 
##  Approximative General Independence Test
## 
## data:  resultado_vph by
##   estrategia_toma_cat, lugar_de_residencia_cat, ult_tamizaje_agr
## maxT = 0.95529, p-value = 0.726
## alternative hypothesis: two.sided
## 
## 
## ==============================
## Combinación: estrategia_toma_cat + lugar_de_residencia_cat + HPV_gr1a
## Warning in approximate(B = 10000): 'B' is deprecated; use 'nresample' instead
## 
##  Approximative General Independence Test
## 
## data:  resultado_vph by
##   estrategia_toma_cat, lugar_de_residencia_cat, HPV_gr1a
## maxT = 8.208, p-value < 1e-04
## alternative hypothesis: two.sided
## 
## 
## ==============================
## Combinación: estrategia_toma_cat + lugar_de_residencia_cat + HPV_gr1b
## Warning in approximate(B = 10000): 'B' is deprecated; use 'nresample' instead
## 
##  Approximative General Independence Test
## 
## data:  resultado_vph by
##   estrategia_toma_cat, lugar_de_residencia_cat, HPV_gr1b
## maxT = 7.3952, p-value < 1e-04
## alternative hypothesis: two.sided
## 
## 
## ==============================
## Combinación: estrategia_toma_cat + lugar_de_residencia_cat + HPV_gr2
## Warning in approximate(B = 10000): 'B' is deprecated; use 'nresample' instead
## 
##  Approximative General Independence Test
## 
## data:  resultado_vph by
##   estrategia_toma_cat, lugar_de_residencia_cat, HPV_gr2
## maxT = 15.892, p-value < 1e-04
## alternative hypothesis: two.sided
## 
## 
## ==============================
## Combinación: estrategia_toma_cat + lugar_de_residencia_cat + HPV_gr3
## Warning in approximate(B = 10000): 'B' is deprecated; use 'nresample' instead
## 
##  Approximative General Independence Test
## 
## data:  resultado_vph by
##   estrategia_toma_cat, lugar_de_residencia_cat, HPV_gr3
## maxT = 15.892, p-value < 1e-04
## alternative hypothesis: two.sided