AUTOTOMA

Ajuste base de datos.

library(readxl)

## Warning: package 'readxl' was built under R version 4.4.3

library(dplyr)

## 
## Adjuntando el paquete: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

BD_unificada_analisis_positividad_2 <- read_excel("BD unificada_analisis positividad_2.xls")
BASE <- BD_unificada_analisis_positividad_2

# --- Limpieza de variable ya existente en la base ---
BASE$ult_tamizaje_agr[BASE$ult_tamizaje_agr == 0] <- NA

# --- Variables categóricas principales ---
BASE <- BASE %>%
  mutate(
    metodo_toma_cat = factor(
      metodo_toma,
      levels = c(0, 1),
      labels = c("Clinician", "Self-collected")
    ),
    departamento_cat = factor(
      departamento,
      levels = c(1, 2),
      labels = c("Putumayo", "Choco")
    ),
    etnia_cat = factor(
      etnia_agr,
      levels = c(1, 2, 3),
      labels = c("Indigena", "Afrocolombiano", "Otro")
    ),
    # OJO: revisar con ella si esto debe ser levels = c(0,1) y
    # labels = c("Tamizaje rutinario", "Campaña") según lo que dijo ahora
    estrategia_toma_cat = factor(
      estrategia_toma,
      levels = c(1, 2),
      labels = c("Campaña", "Tamizaje rutinario")
    ),
    lugar_de_residencia_cat = case_when(
      lugar_de_residencia %in% c(1, 2) ~ "Distante",
      lugar_de_residencia %in% c(3, 4) ~ "No_D",
      TRUE ~ NA_character_
    ),
    lugar_de_residencia_cat = factor(
      lugar_de_residencia_cat,
      levels = c("Distante", "No_D")
    )
  )

# --- Grupos de VPH por riesgo oncogénico ---
BASE <- BASE %>%
  mutate(
    HPV_gr1a = as.integer(vph_16 %in% 1),
    HPV_gr1b = as.integer(vph_18 %in% 1 | vph_45 %in% 1),
    HPV_gr2  = as.integer(vph_31 %in% 1 | vph_33 %in% 1 | vph_35 %in% 1 | vph_52 %in% 1 | vph_58 %in% 1),
    HPV_gr3  = as.integer(vph_39 %in% 1 | vph_51 %in% 1 | vph_56 %in% 1 | vph_59 %in% 1)
  )

# --- Nivel educativo agrupado ---
BASE <- BASE %>%
  mutate(
    nivel_educativo_agr = case_when(
      nivel_educativo %in% c(1, 6) ~ 1,
      nivel_educativo %in% c(2, 3) ~ 2,
      nivel_educativo %in% c(4, 5) ~ 3,
      TRUE ~ NA_real_
    ),
    nivel_educativo_agr = factor(
      nivel_educativo_agr,
      levels = c(1, 2, 3),
      labels = c("Primaria o menos", "Secundaria-tecnico", "Profesional o mayor")
    )
  )

# --- Edad categorizada ---
BASE <- BASE %>%
  mutate(
    edad_cat = case_when(
      edad >= 26 & edad <= 34 ~ 2,
      edad >= 35 & edad <= 49 ~ 3,
      edad >= 50              ~ 4,
      TRUE ~ NA_real_
    ),
    edad_cat = factor(
      edad_cat,
      levels = c(2, 3, 4),
      labels = c("26-34", "35-49", ">=50")
    )
  )

Tabla

library(gtsummary)

## Warning: package 'gtsummary' was built under R version 4.4.3

vars_recodificadas <- c(
  "metodo_toma_cat",
  "departamento_cat",
  "etnia_cat",
  "nivel_educativo_agr",
  "edad_cat",
  "ult_tamizaje_agr"
)

tabla_cruzada <- BASE %>%
  filter(!is.na(lugar_de_residencia_cat), !is.na(estrategia_toma_cat)) %>%
  dplyr::select(
    lugar_de_residencia_cat,
    estrategia_toma_cat,
    resultado_vph,
    all_of(vars_recodificadas)
  ) %>%
  tbl_strata(
    strata = c(lugar_de_residencia_cat, estrategia_toma_cat),
    .tbl_fun = ~ .x %>%
      tbl_summary(
        by = resultado_vph,
        statistic = all_categorical() ~ "{n} ({p}%)",
        missing = "no",
        digits = all_categorical() ~ c(0, 1)
      ) %>%
      add_overall(last = TRUE, col_label = "**Total**") %>%
      modify_header(label ~ "**Variable**") %>%
      bold_labels(),
    .header = "**{strata}**"
  )

## 1 missing rows in the "resultado_vph" column have been removed.
## 3 missing rows in the "resultado_vph" column have been removed.

tabla_cruzada

Variable	Distante, Campaña			Distante, Tamizaje rutinario			No_D, Campaña			No_D, Tamizaje rutinario
Variable	0 N = 128¹	1 N = 45¹	Total¹	0 N = 16¹	1 N = 5¹	Total¹	0 N = 159¹	1 N = 53¹	Total¹	0 N = 91¹	1 N = 31¹	Total¹
metodo_toma_cat
Clinician	22 (17.2%)	4 (8.9%)	26 (15.0%)	14 (87.5%)	1 (20.0%)	15 (71.4%)	76 (47.8%)	21 (39.6%)	97 (45.8%)	11 (12.1%)	0 (0.0%)	11 (9.0%)
Self-collected	106 (82.8%)	41 (91.1%)	147 (85.0%)	2 (12.5%)	4 (80.0%)	6 (28.6%)	83 (52.2%)	32 (60.4%)	115 (54.2%)	80 (87.9%)	31 (100.0%)	111 (91.0%)
departamento_cat
Putumayo	93 (72.7%)	31 (68.9%)	124 (71.7%)	14 (87.5%)	1 (20.0%)	15 (71.4%)	76 (47.8%)	21 (39.6%)	97 (45.8%)	80 (87.9%)	31 (100.0%)	111 (91.0%)
Choco	35 (27.3%)	14 (31.1%)	49 (28.3%)	2 (12.5%)	4 (80.0%)	6 (28.6%)	83 (52.2%)	32 (60.4%)	115 (54.2%)	11 (12.1%)	0 (0.0%)	11 (9.0%)
etnia_cat
Indigena	36 (28.1%)	14 (31.1%)	50 (28.9%)	1 (6.3%)	1 (20.0%)	2 (9.5%)	25 (15.7%)	5 (9.4%)	30 (14.2%)	25 (27.5%)	10 (32.3%)	35 (28.7%)
Afrocolombiano	35 (27.3%)	16 (35.6%)	51 (29.5%)	5 (31.3%)	4 (80.0%)	9 (42.9%)	84 (52.8%)	28 (52.8%)	112 (52.8%)	10 (11.0%)	0 (0.0%)	10 (8.2%)
Otro	57 (44.5%)	15 (33.3%)	72 (41.6%)	10 (62.5%)	0 (0.0%)	10 (47.6%)	50 (31.4%)	20 (37.7%)	70 (33.0%)	56 (61.5%)	21 (67.7%)	77 (63.1%)
nivel_educativo_agr
Primaria o menos	19 (15.2%)	7 (16.3%)	26 (15.5%)	2 (14.3%)	1 (20.0%)	3 (15.8%)	35 (22.0%)	6 (11.3%)	41 (19.3%)	25 (27.5%)	5 (16.1%)	30 (24.6%)
Secundaria-tecnico	69 (55.2%)	23 (53.5%)	92 (54.8%)	9 (64.3%)	3 (60.0%)	12 (63.2%)	95 (59.7%)	39 (73.6%)	134 (63.2%)	50 (54.9%)	21 (67.7%)	71 (58.2%)
Profesional o mayor	37 (29.6%)	13 (30.2%)	50 (29.8%)	3 (21.4%)	1 (20.0%)	4 (21.1%)	29 (18.2%)	8 (15.1%)	37 (17.5%)	16 (17.6%)	5 (16.1%)	21 (17.2%)
edad_cat
26-34	31 (24.2%)	15 (33.3%)	46 (26.6%)	5 (31.3%)	2 (40.0%)	7 (33.3%)	44 (27.7%)	20 (37.7%)	64 (30.2%)	19 (20.9%)	11 (35.5%)	30 (24.6%)
35-49	95 (74.2%)	28 (62.2%)	123 (71.1%)	11 (68.8%)	3 (60.0%)	14 (66.7%)	112 (70.4%)	31 (58.5%)	143 (67.5%)	70 (76.9%)	18 (58.1%)	88 (72.1%)
>=50	2 (1.6%)	2 (4.4%)	4 (2.3%)	0 (0.0%)	0 (0.0%)	0 (0.0%)	3 (1.9%)	2 (3.8%)	5 (2.4%)	2 (2.2%)	2 (6.5%)	4 (3.3%)
ult_tamizaje_agr
1	19 (15.0%)	4 (9.1%)	23 (13.5%)	0 (0.0%)	3 (60.0%)	3 (14.3%)	8 (5.1%)	1 (1.9%)	9 (4.3%)
2	70 (55.1%)	30 (68.2%)	100 (58.5%)	12 (75.0%)	1 (20.0%)	13 (61.9%)	111 (70.7%)	40 (75.5%)	151 (71.9%)	62 (68.1%)	25 (80.6%)	87 (71.3%)
3	38 (29.9%)	10 (22.7%)	48 (28.1%)	4 (25.0%)	1 (20.0%)	5 (23.8%)	38 (24.2%)	12 (22.6%)	50 (23.8%)	29 (31.9%)	6 (19.4%)	35 (28.7%)
¹ n (%)

library(gt)

## Warning: package 'gt' was built under R version 4.4.3

tabla_cruzada %>%
  as_gt() %>%
  gtsave("tabla_cruzada_vph.html")


library(flextable)

## Warning: package 'flextable' was built under R version 4.4.3

## 
## Adjuntando el paquete: 'flextable'

## The following object is masked from 'package:gtsummary':
## 
##     continuous_summary

library(officer)

## Warning: package 'officer' was built under R version 4.4.3

## 
## Adjuntando el paquete: 'officer'

## The following object is masked from 'package:readxl':
## 
##     read_xlsx

tabla_cruzada %>%
  as_flex_table() %>%
  save_as_docx(path = "tabla_cruzada_vph.docx")

Base filtrada por positividad

vars_recodificadas2 <- c(
  "HPV_gr1a",
  "HPV_gr1b",
  "HPV_gr2",
  "HPV_gr3"
)

BASEfilt <- BASE %>% filter(resultado_vph == 1)

tabla_cruzada2 <- BASEfilt %>%
  filter(!is.na(lugar_de_residencia_cat), !is.na(estrategia_toma_cat)) %>%
  dplyr::select(
    lugar_de_residencia_cat,
    estrategia_toma_cat,
    resultado_vph,
    all_of(vars_recodificadas2)
  ) %>%
  tbl_strata(
    strata = c(lugar_de_residencia_cat, estrategia_toma_cat),
    .tbl_fun = ~ .x %>%
      tbl_summary(
        by = resultado_vph,
        statistic = all_categorical() ~ "{n} ({p}%)",
        missing = "no",
        digits = all_categorical() ~ c(0, 1)
      ) %>%
      add_overall(last = TRUE, col_label = "**Total**") %>%
      modify_header(label ~ "**Variable**") %>%
      bold_labels(),
    .header = "**{strata}**"
  )

tabla_cruzada2

Variable	Distante, Campaña		Distante, Tamizaje rutinario		No_D, Campaña		No_D, Tamizaje rutinario
Variable	1 N = 45¹	Total¹	1 N = 5¹	Total¹	1 N = 53¹	Total¹	1 N = 31¹	Total¹
HPV_gr1a	9 (20.0%)	9 (20.0%)			8 (15.1%)	8 (15.1%)	5 (16.1%)	5 (16.1%)
HPV_gr1b	3 (6.7%)	3 (6.7%)			7 (13.2%)	7 (13.2%)	8 (25.8%)	8 (25.8%)
HPV_gr2	27 (60.0%)	27 (60.0%)			28 (52.8%)	28 (52.8%)	14 (45.2%)	14 (45.2%)
HPV_gr3	21 (46.7%)	21 (46.7%)	2 (40.0%)	2 (40.0%)	32 (60.4%)	32 (60.4%)	19 (61.3%)	19 (61.3%)
0			5 (100.0%)	5 (100.0%)
0			5 (100.0%)	5 (100.0%)
1			5 (100.0%)	5 (100.0%)
¹ n (%)

library(coin)

## Warning: package 'coin' was built under R version 4.4.3

## Cargando paquete requerido: survival

BASE_filt2 <- BASE %>% filter(!is.na(resultado_vph))

vars_a_probar <- c(
  "metodo_toma_cat",
  "departamento_cat",
  "etnia_cat",
  "nivel_educativo_agr",
  "edad_cat",
  "ult_tamizaje_agr"
)

for (v in vars_a_probar) {
  
  cat("\n==============================\n")
  cat("Combinación: estrategia_toma_cat + lugar_de_residencia_cat +", v, "\n")
  
  datos_v <- BASE_filt2 %>% 
    filter(!is.na(.data[[v]]), !is.na(estrategia_toma_cat), !is.na(lugar_de_residencia_cat))
  
  formula_v <- as.formula(paste("resultado_vph ~ estrategia_toma_cat + lugar_de_residencia_cat +", v))
  
  test_resultado <- independence_test(
    formula_v,
    data = datos_v,
    distribution = approximate(B = 10000)
  )
  
  print(test_resultado)
}

## 
## ==============================
## Combinación: estrategia_toma_cat + lugar_de_residencia_cat + metodo_toma_cat

## Warning in approximate(B = 10000): 'B' is deprecated; use 'nresample' instead

## 
##  Approximative General Independence Test
## 
## data:  resultado_vph by
##   estrategia_toma_cat, lugar_de_residencia_cat, metodo_toma_cat
## maxT = 2.6226, p-value = 0.0264
## alternative hypothesis: two.sided
## 
## 
## ==============================
## Combinación: estrategia_toma_cat + lugar_de_residencia_cat + departamento_cat

## Warning in approximate(B = 10000): 'B' is deprecated; use 'nresample' instead

## 
##  Approximative General Independence Test
## 
## data:  resultado_vph by
##   estrategia_toma_cat, lugar_de_residencia_cat, departamento_cat
## maxT = 0.85552, p-value = 0.7639
## alternative hypothesis: two.sided
## 
## 
## ==============================
## Combinación: estrategia_toma_cat + lugar_de_residencia_cat + etnia_cat

## Warning in approximate(B = 10000): 'B' is deprecated; use 'nresample' instead

## 
##  Approximative General Independence Test
## 
## data:  resultado_vph by
##   estrategia_toma_cat, lugar_de_residencia_cat, etnia_cat
## maxT = 0.42687, p-value = 0.9888
## alternative hypothesis: two.sided
## 
## 
## ==============================
## Combinación: estrategia_toma_cat + lugar_de_residencia_cat + nivel_educativo_agr

## Warning in approximate(B = 10000): 'B' is deprecated; use 'nresample' instead

## 
##  Approximative General Independence Test
## 
## data:  resultado_vph by
##   estrategia_toma_cat, lugar_de_residencia_cat, nivel_educativo_agr
## maxT = 1.619, p-value = 0.3862
## alternative hypothesis: two.sided
## 
## 
## ==============================
## Combinación: estrategia_toma_cat + lugar_de_residencia_cat + edad_cat

## Warning in approximate(B = 10000): 'B' is deprecated; use 'nresample' instead

## 
##  Approximative General Independence Test
## 
## data:  resultado_vph by
##   estrategia_toma_cat, lugar_de_residencia_cat, edad_cat
## maxT = 2.9118, p-value = 0.019
## alternative hypothesis: two.sided
## 
## 
## ==============================
## Combinación: estrategia_toma_cat + lugar_de_residencia_cat + ult_tamizaje_agr

## Warning in approximate(B = 10000): 'B' is deprecated; use 'nresample' instead

## 
##  Approximative General Independence Test
## 
## data:  resultado_vph by
##   estrategia_toma_cat, lugar_de_residencia_cat, ult_tamizaje_agr
## maxT = 0.95529, p-value = 0.7369
## alternative hypothesis: two.sided

Cochran Mantel Haenzel

# Crear el estrato combinado
BASE_filt3 <- BASE %>%
  filter(!is.na(resultado_vph)) %>%
  mutate(
    estrato_mh = paste(estrategia_toma_cat, lugar_de_residencia_cat, sep = " × ")
  )

# Variables a probar
vars_a_probar <- c(
  "metodo_toma_cat",
  "departamento_cat",
  "etnia_cat",
  "nivel_educativo_agr",
  "edad_cat",
  "ult_tamizaje_agr"
)

# Loop Mantel-Haenszel
for (v in vars_a_probar) {
  
  cat("\n==============================\n")
  cat("Variable:", v, "\n")
  
  datos_v <- BASE_filt3 %>% filter(!is.na(.data[[v]]))
  
  tabla_3d <- table(
    datos_v[[v]],
    datos_v$resultado_vph,
    datos_v$estrato_mh
  )
  
  test_resultado <- mantelhaen.test(tabla_3d)
  
  print(test_resultado)
}

## 
## ==============================
## Variable: metodo_toma_cat 
## 
##  Mantel-Haenszel chi-squared test with continuity correction
## 
## data:  tabla_3d
## Mantel-Haenszel X-squared = 6.9963, df = 1, p-value = 0.008168
## alternative hypothesis: true common odds ratio is not equal to 1
## 95 percent confidence interval:
##  1.226441 3.449382
## sample estimates:
## common odds ratio 
##          2.056809 
## 
## 
## ==============================
## Variable: departamento_cat 
## 
##  Mantel-Haenszel chi-squared test with continuity correction
## 
## data:  tabla_3d
## Mantel-Haenszel X-squared = 0.73526, df = 1, p-value = 0.3912
## alternative hypothesis: true common odds ratio is not equal to 1
## 95 percent confidence interval:
##  0.8008463 1.9393628
## sample estimates:
## common odds ratio 
##          1.246247 
## 
## 
## ==============================
## Variable: etnia_cat 
## 
##  Cochran-Mantel-Haenszel test
## 
## data:  tabla_3d
## Cochran-Mantel-Haenszel M^2 = 0.25847, df = 2, p-value = 0.8788
## 
## 
## ==============================
## Variable: nivel_educativo_agr 
## 
##  Cochran-Mantel-Haenszel test
## 
## data:  tabla_3d
## Cochran-Mantel-Haenszel M^2 = 3.2215, df = 2, p-value = 0.1997
## 
## 
## ==============================
## Variable: edad_cat 
## 
##  Cochran-Mantel-Haenszel test
## 
## data:  tabla_3d
## Cochran-Mantel-Haenszel M^2 = 9.6248, df = 2, p-value = 0.008128
## 
## 
## ==============================
## Variable: ult_tamizaje_agr 
## 
##  Cochran-Mantel-Haenszel test
## 
## data:  tabla_3d
## Cochran-Mantel-Haenszel M^2 = 2.2629, df = 2, p-value = 0.3226

Modelo multivariado

library(sandwich)

## Warning: package 'sandwich' was built under R version 4.4.3

library(lmtest)

## Warning: package 'lmtest' was built under R version 4.4.3

## Cargando paquete requerido: zoo

## 
## Adjuntando el paquete: 'zoo'

## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric

BASE_modelo <- BASE %>%
  filter(!is.na(resultado_vph)) %>%
  mutate(resultado_vph_num = as.numeric(as.character(resultado_vph)))

# Modelo de Poisson
modelo_poisson <- glm(
  resultado_vph_num ~ metodo_toma_cat +
    departamento_cat +
    etnia_cat +
    nivel_educativo_agr +
    edad_cat +
    ult_tamizaje_agr +
    lugar_de_residencia_cat +
    estrategia_toma_cat,
  data = BASE_modelo,
  family = poisson(link = "log")
)

modelo_poisson2 <- glm(
  resultado_vph_num ~ metodo_toma_cat +
    edad_cat,
  data = BASE_modelo,
  family = poisson(link = "log")
)

coeftest(modelo_poisson2, vcov = sandwich)

## 
## z test of coefficients:
## 
##                               Estimate Std. Error z value  Pr(>|z|)    
## (Intercept)                   -1.48423    0.20277 -7.3199 2.481e-13 ***
## metodo_toma_catSelf-collected  0.49156    0.19285  2.5490   0.01080 *  
## edad_cat35-49                 -0.41950    0.15351 -2.7328   0.00628 ** 
## edad_cat>=50                   0.28109    0.32668  0.8604   0.38954    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

tabla_modelo <- tbl_regression(
  modelo_poisson2,
  exponentiate = TRUE,
  add_estimate_to_reference_rows = TRUE,
  tidy_fun = function(x, ...) {
    coeftest(x, vcov = sandwich(x)) %>%
      broom::tidy(conf.int = TRUE) %>%
      mutate(
        conf.low  = exp(estimate - 1.96 * std.error),
        conf.high = exp(estimate + 1.96 * std.error),
        estimate  = exp(estimate)
      )
  }
) %>%
  add_global_p() %>%
  bold_p(t = 0.05) %>%
  bold_labels() %>%
  modify_caption("**Modelo de Poisson robusto - Razones de Prevalencia**")

tabla_modelo

**Modelo de Poisson robusto - Razones de Prevalencia**
Characteristic	IRR	95% CI	p-value
metodo_toma_cat			0.018
Clinician	1.00	—
Self-collected	1.63	1.12, 2.39
edad_cat			0.037
26-34	1.00	—
35-49	0.66	0.49, 0.89
>=50	1.32	0.70, 2.51
Abbreviations: CI = Confidence Interval, IRR = Incidence Rate Ratio

Medidas de ajuste

Deviance

summary(modelo_poisson2)$deviance       # devianza del modelo

## [1] 355.3452

summary(modelo_poisson2)$null.deviance  # devianza nula

## [1] 367.4967

Explica poca varianza

1 - (summary(modelo_poisson2)$deviance / summary(modelo_poisson2)$null.deviance)

## [1] 0.03306565

#library(AER)
#dispersiontest(modelo_poisson2)

#sobredispersion del modelo

AIC(modelo_poisson2)

## [1] 631.3452

AUTOTOMA_DOS

Nicolas Martínez Ramos

2026-06-16

Ajuste base de datos.

Base filtrada por positividad

Modelo multivariado