Ajuste base de datos.

library(readxl)
## Warning: package 'readxl' was built under R version 4.4.3
library(dplyr)
## 
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
BD_unificada_analisis_positividad_2 <- read_excel("BD unificada_analisis positividad_2.xls")
BASE <- BD_unificada_analisis_positividad_2

# --- Limpieza de variable ya existente en la base ---
BASE$ult_tamizaje_agr[BASE$ult_tamizaje_agr == 0] <- NA

# --- Variables categóricas principales ---
BASE <- BASE %>%
  mutate(
    metodo_toma_cat = factor(
      metodo_toma,
      levels = c(0, 1),
      labels = c("Clinician", "Self-collected")
    ),
    departamento_cat = factor(
      departamento,
      levels = c(1, 2),
      labels = c("Putumayo", "Choco")
    ),
    etnia_cat = factor(
      etnia_agr,
      levels = c(1, 2, 3),
      labels = c("Indigena", "Afrocolombiano", "Otro")
    ),
    # OJO: revisar con ella si esto debe ser levels = c(0,1) y
    # labels = c("Tamizaje rutinario", "Campaña") según lo que dijo ahora
    estrategia_toma_cat = factor(
      estrategia_toma,
      levels = c(1, 2),
      labels = c("Campaña", "Tamizaje rutinario")
    ),
    lugar_de_residencia_cat = case_when(
      lugar_de_residencia %in% c(1, 2) ~ "Distante",
      lugar_de_residencia %in% c(3, 4) ~ "No_D",
      TRUE ~ NA_character_
    ),
    lugar_de_residencia_cat = factor(
      lugar_de_residencia_cat,
      levels = c("Distante", "No_D")
    )
  )

# --- Grupos de VPH por riesgo oncogénico ---
BASE <- BASE %>%
  mutate(
    HPV_gr1a = as.integer(vph_16 %in% 1),
    HPV_gr1b = as.integer(vph_18 %in% 1 | vph_45 %in% 1),
    HPV_gr2  = as.integer(vph_31 %in% 1 | vph_33 %in% 1 | vph_35 %in% 1 | vph_52 %in% 1 | vph_58 %in% 1),
    HPV_gr3  = as.integer(vph_39 %in% 1 | vph_51 %in% 1 | vph_56 %in% 1 | vph_59 %in% 1)
  )

# --- Nivel educativo agrupado ---
BASE <- BASE %>%
  mutate(
    nivel_educativo_agr = case_when(
      nivel_educativo %in% c(1, 6) ~ 1,
      nivel_educativo %in% c(2, 3) ~ 2,
      nivel_educativo %in% c(4, 5) ~ 3,
      TRUE ~ NA_real_
    ),
    nivel_educativo_agr = factor(
      nivel_educativo_agr,
      levels = c(1, 2, 3),
      labels = c("Primaria o menos", "Secundaria-tecnico", "Profesional o mayor")
    )
  )

# --- Edad categorizada ---
BASE <- BASE %>%
  mutate(
    edad_cat = case_when(
      edad >= 26 & edad <= 34 ~ 2,
      edad >= 35 & edad <= 49 ~ 3,
      edad >= 50              ~ 4,
      TRUE ~ NA_real_
    ),
    edad_cat = factor(
      edad_cat,
      levels = c(2, 3, 4),
      labels = c("26-34", "35-49", ">=50")
    )
  )

Tabla

library(gtsummary)
## Warning: package 'gtsummary' was built under R version 4.4.3
vars_recodificadas <- c(
  "metodo_toma_cat",
  "departamento_cat",
  "etnia_cat",
  "nivel_educativo_agr",
  "edad_cat",
  "ult_tamizaje_agr"
)

tabla_cruzada <- BASE %>%
  filter(!is.na(lugar_de_residencia_cat), !is.na(estrategia_toma_cat)) %>%
  dplyr::select(
    lugar_de_residencia_cat,
    estrategia_toma_cat,
    resultado_vph,
    all_of(vars_recodificadas)
  ) %>%
  tbl_strata(
    strata = c(lugar_de_residencia_cat, estrategia_toma_cat),
    .tbl_fun = ~ .x %>%
      tbl_summary(
        by = resultado_vph,
        statistic = all_categorical() ~ "{n} ({p}%)",
        missing = "no",
        digits = all_categorical() ~ c(0, 1)
      ) %>%
      add_overall(last = TRUE, col_label = "**Total**") %>%
      modify_header(label ~ "**Variable**") %>%
      bold_labels(),
    .header = "**{strata}**"
  )
## 1 missing rows in the "resultado_vph" column have been removed.
## 3 missing rows in the "resultado_vph" column have been removed.
tabla_cruzada
Variable
Distante, Campaña
Distante, Tamizaje rutinario
No_D, Campaña
No_D, Tamizaje rutinario
0
N = 128
1
1
N = 45
1
Total1 0
N = 16
1
1
N = 5
1
Total1 0
N = 159
1
1
N = 53
1
Total1 0
N = 91
1
1
N = 31
1
Total1
metodo_toma_cat











    Clinician 22 (17.2%) 4 (8.9%) 26 (15.0%) 14 (87.5%) 1 (20.0%) 15 (71.4%) 76 (47.8%) 21 (39.6%) 97 (45.8%) 11 (12.1%) 0 (0.0%) 11 (9.0%)
    Self-collected 106 (82.8%) 41 (91.1%) 147 (85.0%) 2 (12.5%) 4 (80.0%) 6 (28.6%) 83 (52.2%) 32 (60.4%) 115 (54.2%) 80 (87.9%) 31 (100.0%) 111 (91.0%)
departamento_cat











    Putumayo 93 (72.7%) 31 (68.9%) 124 (71.7%) 14 (87.5%) 1 (20.0%) 15 (71.4%) 76 (47.8%) 21 (39.6%) 97 (45.8%) 80 (87.9%) 31 (100.0%) 111 (91.0%)
    Choco 35 (27.3%) 14 (31.1%) 49 (28.3%) 2 (12.5%) 4 (80.0%) 6 (28.6%) 83 (52.2%) 32 (60.4%) 115 (54.2%) 11 (12.1%) 0 (0.0%) 11 (9.0%)
etnia_cat











    Indigena 36 (28.1%) 14 (31.1%) 50 (28.9%) 1 (6.3%) 1 (20.0%) 2 (9.5%) 25 (15.7%) 5 (9.4%) 30 (14.2%) 25 (27.5%) 10 (32.3%) 35 (28.7%)
    Afrocolombiano 35 (27.3%) 16 (35.6%) 51 (29.5%) 5 (31.3%) 4 (80.0%) 9 (42.9%) 84 (52.8%) 28 (52.8%) 112 (52.8%) 10 (11.0%) 0 (0.0%) 10 (8.2%)
    Otro 57 (44.5%) 15 (33.3%) 72 (41.6%) 10 (62.5%) 0 (0.0%) 10 (47.6%) 50 (31.4%) 20 (37.7%) 70 (33.0%) 56 (61.5%) 21 (67.7%) 77 (63.1%)
nivel_educativo_agr











    Primaria o menos 19 (15.2%) 7 (16.3%) 26 (15.5%) 2 (14.3%) 1 (20.0%) 3 (15.8%) 35 (22.0%) 6 (11.3%) 41 (19.3%) 25 (27.5%) 5 (16.1%) 30 (24.6%)
    Secundaria-tecnico 69 (55.2%) 23 (53.5%) 92 (54.8%) 9 (64.3%) 3 (60.0%) 12 (63.2%) 95 (59.7%) 39 (73.6%) 134 (63.2%) 50 (54.9%) 21 (67.7%) 71 (58.2%)
    Profesional o mayor 37 (29.6%) 13 (30.2%) 50 (29.8%) 3 (21.4%) 1 (20.0%) 4 (21.1%) 29 (18.2%) 8 (15.1%) 37 (17.5%) 16 (17.6%) 5 (16.1%) 21 (17.2%)
edad_cat











    26-34 31 (24.2%) 15 (33.3%) 46 (26.6%) 5 (31.3%) 2 (40.0%) 7 (33.3%) 44 (27.7%) 20 (37.7%) 64 (30.2%) 19 (20.9%) 11 (35.5%) 30 (24.6%)
    35-49 95 (74.2%) 28 (62.2%) 123 (71.1%) 11 (68.8%) 3 (60.0%) 14 (66.7%) 112 (70.4%) 31 (58.5%) 143 (67.5%) 70 (76.9%) 18 (58.1%) 88 (72.1%)
    >=50 2 (1.6%) 2 (4.4%) 4 (2.3%) 0 (0.0%) 0 (0.0%) 0 (0.0%) 3 (1.9%) 2 (3.8%) 5 (2.4%) 2 (2.2%) 2 (6.5%) 4 (3.3%)
ult_tamizaje_agr











    1 19 (15.0%) 4 (9.1%) 23 (13.5%) 0 (0.0%) 3 (60.0%) 3 (14.3%) 8 (5.1%) 1 (1.9%) 9 (4.3%)


    2 70 (55.1%) 30 (68.2%) 100 (58.5%) 12 (75.0%) 1 (20.0%) 13 (61.9%) 111 (70.7%) 40 (75.5%) 151 (71.9%) 62 (68.1%) 25 (80.6%) 87 (71.3%)
    3 38 (29.9%) 10 (22.7%) 48 (28.1%) 4 (25.0%) 1 (20.0%) 5 (23.8%) 38 (24.2%) 12 (22.6%) 50 (23.8%) 29 (31.9%) 6 (19.4%) 35 (28.7%)
1 n (%)
library(gt)
## Warning: package 'gt' was built under R version 4.4.3
tabla_cruzada %>%
  as_gt() %>%
  gtsave("tabla_cruzada_vph.html")


library(flextable)
## Warning: package 'flextable' was built under R version 4.4.3
## 
## Adjuntando el paquete: 'flextable'
## The following object is masked from 'package:gtsummary':
## 
##     continuous_summary
library(officer)
## Warning: package 'officer' was built under R version 4.4.3
## 
## Adjuntando el paquete: 'officer'
## The following object is masked from 'package:readxl':
## 
##     read_xlsx
tabla_cruzada %>%
  as_flex_table() %>%
  save_as_docx(path = "tabla_cruzada_vph.docx")

Base filtrada por positividad

vars_recodificadas2 <- c(
  "HPV_gr1a",
  "HPV_gr1b",
  "HPV_gr2",
  "HPV_gr3"
)

BASEfilt <- BASE %>% filter(resultado_vph == 1)

tabla_cruzada2 <- BASEfilt %>%
  filter(!is.na(lugar_de_residencia_cat), !is.na(estrategia_toma_cat)) %>%
  dplyr::select(
    lugar_de_residencia_cat,
    estrategia_toma_cat,
    resultado_vph,
    all_of(vars_recodificadas2)
  ) %>%
  tbl_strata(
    strata = c(lugar_de_residencia_cat, estrategia_toma_cat),
    .tbl_fun = ~ .x %>%
      tbl_summary(
        by = resultado_vph,
        statistic = all_categorical() ~ "{n} ({p}%)",
        missing = "no",
        digits = all_categorical() ~ c(0, 1)
      ) %>%
      add_overall(last = TRUE, col_label = "**Total**") %>%
      modify_header(label ~ "**Variable**") %>%
      bold_labels(),
    .header = "**{strata}**"
  )

tabla_cruzada2
Variable
Distante, Campaña
Distante, Tamizaje rutinario
No_D, Campaña
No_D, Tamizaje rutinario
1
N = 45
1
Total1 1
N = 5
1
Total1 1
N = 53
1
Total1 1
N = 31
1
Total1
HPV_gr1a 9 (20.0%) 9 (20.0%)

8 (15.1%) 8 (15.1%) 5 (16.1%) 5 (16.1%)
HPV_gr1b 3 (6.7%) 3 (6.7%)

7 (13.2%) 7 (13.2%) 8 (25.8%) 8 (25.8%)
HPV_gr2 27 (60.0%) 27 (60.0%)

28 (52.8%) 28 (52.8%) 14 (45.2%) 14 (45.2%)
HPV_gr3 21 (46.7%) 21 (46.7%) 2 (40.0%) 2 (40.0%) 32 (60.4%) 32 (60.4%) 19 (61.3%) 19 (61.3%)
    0

5 (100.0%) 5 (100.0%)



    0

5 (100.0%) 5 (100.0%)



    1

5 (100.0%) 5 (100.0%)



1 n (%)
library(coin)
## Warning: package 'coin' was built under R version 4.4.3
## Cargando paquete requerido: survival
BASE_filt2 <- BASE %>% filter(!is.na(resultado_vph))

vars_a_probar <- c(
  "metodo_toma_cat",
  "departamento_cat",
  "etnia_cat",
  "nivel_educativo_agr",
  "edad_cat",
  "ult_tamizaje_agr"
)

for (v in vars_a_probar) {
  
  cat("\n==============================\n")
  cat("Combinación: estrategia_toma_cat + lugar_de_residencia_cat +", v, "\n")
  
  datos_v <- BASE_filt2 %>% 
    filter(!is.na(.data[[v]]), !is.na(estrategia_toma_cat), !is.na(lugar_de_residencia_cat))
  
  formula_v <- as.formula(paste("resultado_vph ~ estrategia_toma_cat + lugar_de_residencia_cat +", v))
  
  test_resultado <- independence_test(
    formula_v,
    data = datos_v,
    distribution = approximate(B = 10000)
  )
  
  print(test_resultado)
}
## 
## ==============================
## Combinación: estrategia_toma_cat + lugar_de_residencia_cat + metodo_toma_cat
## Warning in approximate(B = 10000): 'B' is deprecated; use 'nresample' instead
## 
##  Approximative General Independence Test
## 
## data:  resultado_vph by
##   estrategia_toma_cat, lugar_de_residencia_cat, metodo_toma_cat
## maxT = 2.6226, p-value = 0.0264
## alternative hypothesis: two.sided
## 
## 
## ==============================
## Combinación: estrategia_toma_cat + lugar_de_residencia_cat + departamento_cat
## Warning in approximate(B = 10000): 'B' is deprecated; use 'nresample' instead
## 
##  Approximative General Independence Test
## 
## data:  resultado_vph by
##   estrategia_toma_cat, lugar_de_residencia_cat, departamento_cat
## maxT = 0.85552, p-value = 0.7639
## alternative hypothesis: two.sided
## 
## 
## ==============================
## Combinación: estrategia_toma_cat + lugar_de_residencia_cat + etnia_cat
## Warning in approximate(B = 10000): 'B' is deprecated; use 'nresample' instead
## 
##  Approximative General Independence Test
## 
## data:  resultado_vph by
##   estrategia_toma_cat, lugar_de_residencia_cat, etnia_cat
## maxT = 0.42687, p-value = 0.9888
## alternative hypothesis: two.sided
## 
## 
## ==============================
## Combinación: estrategia_toma_cat + lugar_de_residencia_cat + nivel_educativo_agr
## Warning in approximate(B = 10000): 'B' is deprecated; use 'nresample' instead
## 
##  Approximative General Independence Test
## 
## data:  resultado_vph by
##   estrategia_toma_cat, lugar_de_residencia_cat, nivel_educativo_agr
## maxT = 1.619, p-value = 0.3862
## alternative hypothesis: two.sided
## 
## 
## ==============================
## Combinación: estrategia_toma_cat + lugar_de_residencia_cat + edad_cat
## Warning in approximate(B = 10000): 'B' is deprecated; use 'nresample' instead
## 
##  Approximative General Independence Test
## 
## data:  resultado_vph by
##   estrategia_toma_cat, lugar_de_residencia_cat, edad_cat
## maxT = 2.9118, p-value = 0.019
## alternative hypothesis: two.sided
## 
## 
## ==============================
## Combinación: estrategia_toma_cat + lugar_de_residencia_cat + ult_tamizaje_agr
## Warning in approximate(B = 10000): 'B' is deprecated; use 'nresample' instead
## 
##  Approximative General Independence Test
## 
## data:  resultado_vph by
##   estrategia_toma_cat, lugar_de_residencia_cat, ult_tamizaje_agr
## maxT = 0.95529, p-value = 0.7369
## alternative hypothesis: two.sided

Cochran Mantel Haenzel

# Crear el estrato combinado
BASE_filt3 <- BASE %>%
  filter(!is.na(resultado_vph)) %>%
  mutate(
    estrato_mh = paste(estrategia_toma_cat, lugar_de_residencia_cat, sep = " × ")
  )

# Variables a probar
vars_a_probar <- c(
  "metodo_toma_cat",
  "departamento_cat",
  "etnia_cat",
  "nivel_educativo_agr",
  "edad_cat",
  "ult_tamizaje_agr"
)

# Loop Mantel-Haenszel
for (v in vars_a_probar) {
  
  cat("\n==============================\n")
  cat("Variable:", v, "\n")
  
  datos_v <- BASE_filt3 %>% filter(!is.na(.data[[v]]))
  
  tabla_3d <- table(
    datos_v[[v]],
    datos_v$resultado_vph,
    datos_v$estrato_mh
  )
  
  test_resultado <- mantelhaen.test(tabla_3d)
  
  print(test_resultado)
}
## 
## ==============================
## Variable: metodo_toma_cat 
## 
##  Mantel-Haenszel chi-squared test with continuity correction
## 
## data:  tabla_3d
## Mantel-Haenszel X-squared = 6.9963, df = 1, p-value = 0.008168
## alternative hypothesis: true common odds ratio is not equal to 1
## 95 percent confidence interval:
##  1.226441 3.449382
## sample estimates:
## common odds ratio 
##          2.056809 
## 
## 
## ==============================
## Variable: departamento_cat 
## 
##  Mantel-Haenszel chi-squared test with continuity correction
## 
## data:  tabla_3d
## Mantel-Haenszel X-squared = 0.73526, df = 1, p-value = 0.3912
## alternative hypothesis: true common odds ratio is not equal to 1
## 95 percent confidence interval:
##  0.8008463 1.9393628
## sample estimates:
## common odds ratio 
##          1.246247 
## 
## 
## ==============================
## Variable: etnia_cat 
## 
##  Cochran-Mantel-Haenszel test
## 
## data:  tabla_3d
## Cochran-Mantel-Haenszel M^2 = 0.25847, df = 2, p-value = 0.8788
## 
## 
## ==============================
## Variable: nivel_educativo_agr 
## 
##  Cochran-Mantel-Haenszel test
## 
## data:  tabla_3d
## Cochran-Mantel-Haenszel M^2 = 3.2215, df = 2, p-value = 0.1997
## 
## 
## ==============================
## Variable: edad_cat 
## 
##  Cochran-Mantel-Haenszel test
## 
## data:  tabla_3d
## Cochran-Mantel-Haenszel M^2 = 9.6248, df = 2, p-value = 0.008128
## 
## 
## ==============================
## Variable: ult_tamizaje_agr 
## 
##  Cochran-Mantel-Haenszel test
## 
## data:  tabla_3d
## Cochran-Mantel-Haenszel M^2 = 2.2629, df = 2, p-value = 0.3226

Modelo multivariado

library(sandwich)
## Warning: package 'sandwich' was built under R version 4.4.3
library(lmtest)
## Warning: package 'lmtest' was built under R version 4.4.3
## Cargando paquete requerido: zoo
## 
## Adjuntando el paquete: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
BASE_modelo <- BASE %>%
  filter(!is.na(resultado_vph)) %>%
  mutate(resultado_vph_num = as.numeric(as.character(resultado_vph)))

# Modelo de Poisson
modelo_poisson <- glm(
  resultado_vph_num ~ metodo_toma_cat +
    departamento_cat +
    etnia_cat +
    nivel_educativo_agr +
    edad_cat +
    ult_tamizaje_agr +
    lugar_de_residencia_cat +
    estrategia_toma_cat,
  data = BASE_modelo,
  family = poisson(link = "log")
)

modelo_poisson2 <- glm(
  resultado_vph_num ~ metodo_toma_cat +
    edad_cat,
  data = BASE_modelo,
  family = poisson(link = "log")
)

coeftest(modelo_poisson2, vcov = sandwich)
## 
## z test of coefficients:
## 
##                               Estimate Std. Error z value  Pr(>|z|)    
## (Intercept)                   -1.48423    0.20277 -7.3199 2.481e-13 ***
## metodo_toma_catSelf-collected  0.49156    0.19285  2.5490   0.01080 *  
## edad_cat35-49                 -0.41950    0.15351 -2.7328   0.00628 ** 
## edad_cat>=50                   0.28109    0.32668  0.8604   0.38954    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
tabla_modelo <- tbl_regression(
  modelo_poisson2,
  exponentiate = TRUE,
  add_estimate_to_reference_rows = TRUE,
  tidy_fun = function(x, ...) {
    coeftest(x, vcov = sandwich(x)) %>%
      broom::tidy(conf.int = TRUE) %>%
      mutate(
        conf.low  = exp(estimate - 1.96 * std.error),
        conf.high = exp(estimate + 1.96 * std.error),
        estimate  = exp(estimate)
      )
  }
) %>%
  add_global_p() %>%
  bold_p(t = 0.05) %>%
  bold_labels() %>%
  modify_caption("**Modelo de Poisson robusto - Razones de Prevalencia**")

tabla_modelo
Modelo de Poisson robusto - Razones de Prevalencia
Characteristic IRR 95% CI p-value
metodo_toma_cat

0.018
    Clinician 1.00
    Self-collected 1.63 1.12, 2.39
edad_cat

0.037
    26-34 1.00
    35-49 0.66 0.49, 0.89
    >=50 1.32 0.70, 2.51
Abbreviations: CI = Confidence Interval, IRR = Incidence Rate Ratio

Medidas de ajuste

Deviance

summary(modelo_poisson2)$deviance       # devianza del modelo
## [1] 355.3452
summary(modelo_poisson2)$null.deviance  # devianza nula
## [1] 367.4967

Explica poca varianza

1 - (summary(modelo_poisson2)$deviance / summary(modelo_poisson2)$null.deviance)
## [1] 0.03306565
#library(AER)
#dispersiontest(modelo_poisson2)

#sobredispersion del modelo 
AIC(modelo_poisson2)
## [1] 631.3452