library(readxl)
## Warning: package 'readxl' was built under R version 4.4.3
library(dplyr)
##
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
BD_unificada_analisis_positividad_2 <- read_excel("BD unificada_analisis positividad_2.xls")
BASE <- BD_unificada_analisis_positividad_2
# --- Limpieza de variable ya existente en la base ---
BASE$ult_tamizaje_agr[BASE$ult_tamizaje_agr == 0] <- NA
# --- Variables categóricas principales ---
BASE <- BASE %>%
mutate(
metodo_toma_cat = factor(
metodo_toma,
levels = c(0, 1),
labels = c("Clinician", "Self-collected")
),
departamento_cat = factor(
departamento,
levels = c(1, 2),
labels = c("Putumayo", "Choco")
),
etnia_cat = factor(
etnia_agr,
levels = c(1, 2, 3),
labels = c("Indigena", "Afrocolombiano", "Otro")
),
# OJO: revisar con ella si esto debe ser levels = c(0,1) y
# labels = c("Tamizaje rutinario", "Campaña") según lo que dijo ahora
estrategia_toma_cat = factor(
estrategia_toma,
levels = c(1, 2),
labels = c("Campaña", "Tamizaje rutinario")
),
lugar_de_residencia_cat = case_when(
lugar_de_residencia %in% c(1, 2) ~ "Distante",
lugar_de_residencia %in% c(3, 4) ~ "No_D",
TRUE ~ NA_character_
),
lugar_de_residencia_cat = factor(
lugar_de_residencia_cat,
levels = c("Distante", "No_D")
)
)
# --- Grupos de VPH por riesgo oncogénico ---
BASE <- BASE %>%
mutate(
HPV_gr1a = as.integer(vph_16 %in% 1),
HPV_gr1b = as.integer(vph_18 %in% 1 | vph_45 %in% 1),
HPV_gr2 = as.integer(vph_31 %in% 1 | vph_33 %in% 1 | vph_35 %in% 1 | vph_52 %in% 1 | vph_58 %in% 1),
HPV_gr3 = as.integer(vph_39 %in% 1 | vph_51 %in% 1 | vph_56 %in% 1 | vph_59 %in% 1)
)
# --- Nivel educativo agrupado ---
BASE <- BASE %>%
mutate(
nivel_educativo_agr = case_when(
nivel_educativo %in% c(1, 6) ~ 1,
nivel_educativo %in% c(2, 3) ~ 2,
nivel_educativo %in% c(4, 5) ~ 3,
TRUE ~ NA_real_
),
nivel_educativo_agr = factor(
nivel_educativo_agr,
levels = c(1, 2, 3),
labels = c("Primaria o menos", "Secundaria-tecnico", "Profesional o mayor")
)
)
# --- Edad categorizada ---
BASE <- BASE %>%
mutate(
edad_cat = case_when(
edad >= 26 & edad <= 34 ~ 2,
edad >= 35 & edad <= 49 ~ 3,
edad >= 50 ~ 4,
TRUE ~ NA_real_
),
edad_cat = factor(
edad_cat,
levels = c(2, 3, 4),
labels = c("26-34", "35-49", ">=50")
)
)
Tabla
library(gtsummary)
## Warning: package 'gtsummary' was built under R version 4.4.3
vars_recodificadas <- c(
"metodo_toma_cat",
"departamento_cat",
"etnia_cat",
"nivel_educativo_agr",
"edad_cat",
"ult_tamizaje_agr"
)
tabla_cruzada <- BASE %>%
filter(!is.na(lugar_de_residencia_cat), !is.na(estrategia_toma_cat)) %>%
dplyr::select(
lugar_de_residencia_cat,
estrategia_toma_cat,
resultado_vph,
all_of(vars_recodificadas)
) %>%
tbl_strata(
strata = c(lugar_de_residencia_cat, estrategia_toma_cat),
.tbl_fun = ~ .x %>%
tbl_summary(
by = resultado_vph,
statistic = all_categorical() ~ "{n} ({p}%)",
missing = "no",
digits = all_categorical() ~ c(0, 1)
) %>%
add_overall(last = TRUE, col_label = "**Total**") %>%
modify_header(label ~ "**Variable**") %>%
bold_labels(),
.header = "**{strata}**"
)
## 1 missing rows in the "resultado_vph" column have been removed.
## 3 missing rows in the "resultado_vph" column have been removed.
tabla_cruzada
| Variable |
Distante, Campaña
|
Distante, Tamizaje rutinario
|
No_D, Campaña
|
No_D, Tamizaje rutinario
|
||||||||
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 N = 1281 |
1 N = 451 |
Total1 | 0 N = 161 |
1 N = 51 |
Total1 | 0 N = 1591 |
1 N = 531 |
Total1 | 0 N = 911 |
1 N = 311 |
Total1 | |
| metodo_toma_cat | ||||||||||||
| Clinician | 22 (17.2%) | 4 (8.9%) | 26 (15.0%) | 14 (87.5%) | 1 (20.0%) | 15 (71.4%) | 76 (47.8%) | 21 (39.6%) | 97 (45.8%) | 11 (12.1%) | 0 (0.0%) | 11 (9.0%) |
| Self-collected | 106 (82.8%) | 41 (91.1%) | 147 (85.0%) | 2 (12.5%) | 4 (80.0%) | 6 (28.6%) | 83 (52.2%) | 32 (60.4%) | 115 (54.2%) | 80 (87.9%) | 31 (100.0%) | 111 (91.0%) |
| departamento_cat | ||||||||||||
| Putumayo | 93 (72.7%) | 31 (68.9%) | 124 (71.7%) | 14 (87.5%) | 1 (20.0%) | 15 (71.4%) | 76 (47.8%) | 21 (39.6%) | 97 (45.8%) | 80 (87.9%) | 31 (100.0%) | 111 (91.0%) |
| Choco | 35 (27.3%) | 14 (31.1%) | 49 (28.3%) | 2 (12.5%) | 4 (80.0%) | 6 (28.6%) | 83 (52.2%) | 32 (60.4%) | 115 (54.2%) | 11 (12.1%) | 0 (0.0%) | 11 (9.0%) |
| etnia_cat | ||||||||||||
| Indigena | 36 (28.1%) | 14 (31.1%) | 50 (28.9%) | 1 (6.3%) | 1 (20.0%) | 2 (9.5%) | 25 (15.7%) | 5 (9.4%) | 30 (14.2%) | 25 (27.5%) | 10 (32.3%) | 35 (28.7%) |
| Afrocolombiano | 35 (27.3%) | 16 (35.6%) | 51 (29.5%) | 5 (31.3%) | 4 (80.0%) | 9 (42.9%) | 84 (52.8%) | 28 (52.8%) | 112 (52.8%) | 10 (11.0%) | 0 (0.0%) | 10 (8.2%) |
| Otro | 57 (44.5%) | 15 (33.3%) | 72 (41.6%) | 10 (62.5%) | 0 (0.0%) | 10 (47.6%) | 50 (31.4%) | 20 (37.7%) | 70 (33.0%) | 56 (61.5%) | 21 (67.7%) | 77 (63.1%) |
| nivel_educativo_agr | ||||||||||||
| Primaria o menos | 19 (15.2%) | 7 (16.3%) | 26 (15.5%) | 2 (14.3%) | 1 (20.0%) | 3 (15.8%) | 35 (22.0%) | 6 (11.3%) | 41 (19.3%) | 25 (27.5%) | 5 (16.1%) | 30 (24.6%) |
| Secundaria-tecnico | 69 (55.2%) | 23 (53.5%) | 92 (54.8%) | 9 (64.3%) | 3 (60.0%) | 12 (63.2%) | 95 (59.7%) | 39 (73.6%) | 134 (63.2%) | 50 (54.9%) | 21 (67.7%) | 71 (58.2%) |
| Profesional o mayor | 37 (29.6%) | 13 (30.2%) | 50 (29.8%) | 3 (21.4%) | 1 (20.0%) | 4 (21.1%) | 29 (18.2%) | 8 (15.1%) | 37 (17.5%) | 16 (17.6%) | 5 (16.1%) | 21 (17.2%) |
| edad_cat | ||||||||||||
| 26-34 | 31 (24.2%) | 15 (33.3%) | 46 (26.6%) | 5 (31.3%) | 2 (40.0%) | 7 (33.3%) | 44 (27.7%) | 20 (37.7%) | 64 (30.2%) | 19 (20.9%) | 11 (35.5%) | 30 (24.6%) |
| 35-49 | 95 (74.2%) | 28 (62.2%) | 123 (71.1%) | 11 (68.8%) | 3 (60.0%) | 14 (66.7%) | 112 (70.4%) | 31 (58.5%) | 143 (67.5%) | 70 (76.9%) | 18 (58.1%) | 88 (72.1%) |
| >=50 | 2 (1.6%) | 2 (4.4%) | 4 (2.3%) | 0 (0.0%) | 0 (0.0%) | 0 (0.0%) | 3 (1.9%) | 2 (3.8%) | 5 (2.4%) | 2 (2.2%) | 2 (6.5%) | 4 (3.3%) |
| ult_tamizaje_agr | ||||||||||||
| 1 | 19 (15.0%) | 4 (9.1%) | 23 (13.5%) | 0 (0.0%) | 3 (60.0%) | 3 (14.3%) | 8 (5.1%) | 1 (1.9%) | 9 (4.3%) | |||
| 2 | 70 (55.1%) | 30 (68.2%) | 100 (58.5%) | 12 (75.0%) | 1 (20.0%) | 13 (61.9%) | 111 (70.7%) | 40 (75.5%) | 151 (71.9%) | 62 (68.1%) | 25 (80.6%) | 87 (71.3%) |
| 3 | 38 (29.9%) | 10 (22.7%) | 48 (28.1%) | 4 (25.0%) | 1 (20.0%) | 5 (23.8%) | 38 (24.2%) | 12 (22.6%) | 50 (23.8%) | 29 (31.9%) | 6 (19.4%) | 35 (28.7%) |
| 1 n (%) | ||||||||||||
library(gt)
## Warning: package 'gt' was built under R version 4.4.3
tabla_cruzada %>%
as_gt() %>%
gtsave("tabla_cruzada_vph.html")
library(flextable)
## Warning: package 'flextable' was built under R version 4.4.3
##
## Adjuntando el paquete: 'flextable'
## The following object is masked from 'package:gtsummary':
##
## continuous_summary
library(officer)
## Warning: package 'officer' was built under R version 4.4.3
##
## Adjuntando el paquete: 'officer'
## The following object is masked from 'package:readxl':
##
## read_xlsx
tabla_cruzada %>%
as_flex_table() %>%
save_as_docx(path = "tabla_cruzada_vph.docx")
vars_recodificadas2 <- c(
"HPV_gr1a",
"HPV_gr1b",
"HPV_gr2",
"HPV_gr3"
)
BASEfilt <- BASE %>% filter(resultado_vph == 1)
tabla_cruzada2 <- BASEfilt %>%
filter(!is.na(lugar_de_residencia_cat), !is.na(estrategia_toma_cat)) %>%
dplyr::select(
lugar_de_residencia_cat,
estrategia_toma_cat,
resultado_vph,
all_of(vars_recodificadas2)
) %>%
tbl_strata(
strata = c(lugar_de_residencia_cat, estrategia_toma_cat),
.tbl_fun = ~ .x %>%
tbl_summary(
by = resultado_vph,
statistic = all_categorical() ~ "{n} ({p}%)",
missing = "no",
digits = all_categorical() ~ c(0, 1)
) %>%
add_overall(last = TRUE, col_label = "**Total**") %>%
modify_header(label ~ "**Variable**") %>%
bold_labels(),
.header = "**{strata}**"
)
tabla_cruzada2
| Variable |
Distante, Campaña
|
Distante, Tamizaje rutinario
|
No_D, Campaña
|
No_D, Tamizaje rutinario
|
||||
|---|---|---|---|---|---|---|---|---|
| 1 N = 451 |
Total1 | 1 N = 51 |
Total1 | 1 N = 531 |
Total1 | 1 N = 311 |
Total1 | |
| HPV_gr1a | 9 (20.0%) | 9 (20.0%) | 8 (15.1%) | 8 (15.1%) | 5 (16.1%) | 5 (16.1%) | ||
| HPV_gr1b | 3 (6.7%) | 3 (6.7%) | 7 (13.2%) | 7 (13.2%) | 8 (25.8%) | 8 (25.8%) | ||
| HPV_gr2 | 27 (60.0%) | 27 (60.0%) | 28 (52.8%) | 28 (52.8%) | 14 (45.2%) | 14 (45.2%) | ||
| HPV_gr3 | 21 (46.7%) | 21 (46.7%) | 2 (40.0%) | 2 (40.0%) | 32 (60.4%) | 32 (60.4%) | 19 (61.3%) | 19 (61.3%) |
| 0 | 5 (100.0%) | 5 (100.0%) | ||||||
| 0 | 5 (100.0%) | 5 (100.0%) | ||||||
| 1 | 5 (100.0%) | 5 (100.0%) | ||||||
| 1 n (%) | ||||||||
library(coin)
## Warning: package 'coin' was built under R version 4.4.3
## Cargando paquete requerido: survival
BASE_filt2 <- BASE %>% filter(!is.na(resultado_vph))
vars_a_probar <- c(
"metodo_toma_cat",
"departamento_cat",
"etnia_cat",
"nivel_educativo_agr",
"edad_cat",
"ult_tamizaje_agr"
)
for (v in vars_a_probar) {
cat("\n==============================\n")
cat("Combinación: estrategia_toma_cat + lugar_de_residencia_cat +", v, "\n")
datos_v <- BASE_filt2 %>%
filter(!is.na(.data[[v]]), !is.na(estrategia_toma_cat), !is.na(lugar_de_residencia_cat))
formula_v <- as.formula(paste("resultado_vph ~ estrategia_toma_cat + lugar_de_residencia_cat +", v))
test_resultado <- independence_test(
formula_v,
data = datos_v,
distribution = approximate(B = 10000)
)
print(test_resultado)
}
##
## ==============================
## Combinación: estrategia_toma_cat + lugar_de_residencia_cat + metodo_toma_cat
## Warning in approximate(B = 10000): 'B' is deprecated; use 'nresample' instead
##
## Approximative General Independence Test
##
## data: resultado_vph by
## estrategia_toma_cat, lugar_de_residencia_cat, metodo_toma_cat
## maxT = 2.6226, p-value = 0.0264
## alternative hypothesis: two.sided
##
##
## ==============================
## Combinación: estrategia_toma_cat + lugar_de_residencia_cat + departamento_cat
## Warning in approximate(B = 10000): 'B' is deprecated; use 'nresample' instead
##
## Approximative General Independence Test
##
## data: resultado_vph by
## estrategia_toma_cat, lugar_de_residencia_cat, departamento_cat
## maxT = 0.85552, p-value = 0.7639
## alternative hypothesis: two.sided
##
##
## ==============================
## Combinación: estrategia_toma_cat + lugar_de_residencia_cat + etnia_cat
## Warning in approximate(B = 10000): 'B' is deprecated; use 'nresample' instead
##
## Approximative General Independence Test
##
## data: resultado_vph by
## estrategia_toma_cat, lugar_de_residencia_cat, etnia_cat
## maxT = 0.42687, p-value = 0.9888
## alternative hypothesis: two.sided
##
##
## ==============================
## Combinación: estrategia_toma_cat + lugar_de_residencia_cat + nivel_educativo_agr
## Warning in approximate(B = 10000): 'B' is deprecated; use 'nresample' instead
##
## Approximative General Independence Test
##
## data: resultado_vph by
## estrategia_toma_cat, lugar_de_residencia_cat, nivel_educativo_agr
## maxT = 1.619, p-value = 0.3862
## alternative hypothesis: two.sided
##
##
## ==============================
## Combinación: estrategia_toma_cat + lugar_de_residencia_cat + edad_cat
## Warning in approximate(B = 10000): 'B' is deprecated; use 'nresample' instead
##
## Approximative General Independence Test
##
## data: resultado_vph by
## estrategia_toma_cat, lugar_de_residencia_cat, edad_cat
## maxT = 2.9118, p-value = 0.019
## alternative hypothesis: two.sided
##
##
## ==============================
## Combinación: estrategia_toma_cat + lugar_de_residencia_cat + ult_tamizaje_agr
## Warning in approximate(B = 10000): 'B' is deprecated; use 'nresample' instead
##
## Approximative General Independence Test
##
## data: resultado_vph by
## estrategia_toma_cat, lugar_de_residencia_cat, ult_tamizaje_agr
## maxT = 0.95529, p-value = 0.7369
## alternative hypothesis: two.sided
Cochran Mantel Haenzel
# Crear el estrato combinado
BASE_filt3 <- BASE %>%
filter(!is.na(resultado_vph)) %>%
mutate(
estrato_mh = paste(estrategia_toma_cat, lugar_de_residencia_cat, sep = " × ")
)
# Variables a probar
vars_a_probar <- c(
"metodo_toma_cat",
"departamento_cat",
"etnia_cat",
"nivel_educativo_agr",
"edad_cat",
"ult_tamizaje_agr"
)
# Loop Mantel-Haenszel
for (v in vars_a_probar) {
cat("\n==============================\n")
cat("Variable:", v, "\n")
datos_v <- BASE_filt3 %>% filter(!is.na(.data[[v]]))
tabla_3d <- table(
datos_v[[v]],
datos_v$resultado_vph,
datos_v$estrato_mh
)
test_resultado <- mantelhaen.test(tabla_3d)
print(test_resultado)
}
##
## ==============================
## Variable: metodo_toma_cat
##
## Mantel-Haenszel chi-squared test with continuity correction
##
## data: tabla_3d
## Mantel-Haenszel X-squared = 6.9963, df = 1, p-value = 0.008168
## alternative hypothesis: true common odds ratio is not equal to 1
## 95 percent confidence interval:
## 1.226441 3.449382
## sample estimates:
## common odds ratio
## 2.056809
##
##
## ==============================
## Variable: departamento_cat
##
## Mantel-Haenszel chi-squared test with continuity correction
##
## data: tabla_3d
## Mantel-Haenszel X-squared = 0.73526, df = 1, p-value = 0.3912
## alternative hypothesis: true common odds ratio is not equal to 1
## 95 percent confidence interval:
## 0.8008463 1.9393628
## sample estimates:
## common odds ratio
## 1.246247
##
##
## ==============================
## Variable: etnia_cat
##
## Cochran-Mantel-Haenszel test
##
## data: tabla_3d
## Cochran-Mantel-Haenszel M^2 = 0.25847, df = 2, p-value = 0.8788
##
##
## ==============================
## Variable: nivel_educativo_agr
##
## Cochran-Mantel-Haenszel test
##
## data: tabla_3d
## Cochran-Mantel-Haenszel M^2 = 3.2215, df = 2, p-value = 0.1997
##
##
## ==============================
## Variable: edad_cat
##
## Cochran-Mantel-Haenszel test
##
## data: tabla_3d
## Cochran-Mantel-Haenszel M^2 = 9.6248, df = 2, p-value = 0.008128
##
##
## ==============================
## Variable: ult_tamizaje_agr
##
## Cochran-Mantel-Haenszel test
##
## data: tabla_3d
## Cochran-Mantel-Haenszel M^2 = 2.2629, df = 2, p-value = 0.3226
library(sandwich)
## Warning: package 'sandwich' was built under R version 4.4.3
library(lmtest)
## Warning: package 'lmtest' was built under R version 4.4.3
## Cargando paquete requerido: zoo
##
## Adjuntando el paquete: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
BASE_modelo <- BASE %>%
filter(!is.na(resultado_vph)) %>%
mutate(resultado_vph_num = as.numeric(as.character(resultado_vph)))
# Modelo de Poisson
modelo_poisson <- glm(
resultado_vph_num ~ metodo_toma_cat +
departamento_cat +
etnia_cat +
nivel_educativo_agr +
edad_cat +
ult_tamizaje_agr +
lugar_de_residencia_cat +
estrategia_toma_cat,
data = BASE_modelo,
family = poisson(link = "log")
)
modelo_poisson2 <- glm(
resultado_vph_num ~ metodo_toma_cat +
edad_cat,
data = BASE_modelo,
family = poisson(link = "log")
)
coeftest(modelo_poisson2, vcov = sandwich)
##
## z test of coefficients:
##
## Estimate Std. Error z value Pr(>|z|)
## (Intercept) -1.48423 0.20277 -7.3199 2.481e-13 ***
## metodo_toma_catSelf-collected 0.49156 0.19285 2.5490 0.01080 *
## edad_cat35-49 -0.41950 0.15351 -2.7328 0.00628 **
## edad_cat>=50 0.28109 0.32668 0.8604 0.38954
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
tabla_modelo <- tbl_regression(
modelo_poisson2,
exponentiate = TRUE,
add_estimate_to_reference_rows = TRUE,
tidy_fun = function(x, ...) {
coeftest(x, vcov = sandwich(x)) %>%
broom::tidy(conf.int = TRUE) %>%
mutate(
conf.low = exp(estimate - 1.96 * std.error),
conf.high = exp(estimate + 1.96 * std.error),
estimate = exp(estimate)
)
}
) %>%
add_global_p() %>%
bold_p(t = 0.05) %>%
bold_labels() %>%
modify_caption("**Modelo de Poisson robusto - Razones de Prevalencia**")
tabla_modelo
| Characteristic | IRR | 95% CI | p-value |
|---|---|---|---|
| metodo_toma_cat | 0.018 | ||
| Clinician | 1.00 | — | |
| Self-collected | 1.63 | 1.12, 2.39 | |
| edad_cat | 0.037 | ||
| 26-34 | 1.00 | — | |
| 35-49 | 0.66 | 0.49, 0.89 | |
| >=50 | 1.32 | 0.70, 2.51 | |
| Abbreviations: CI = Confidence Interval, IRR = Incidence Rate Ratio | |||
Medidas de ajuste
Deviance
summary(modelo_poisson2)$deviance # devianza del modelo
## [1] 355.3452
summary(modelo_poisson2)$null.deviance # devianza nula
## [1] 367.4967
Explica poca varianza
1 - (summary(modelo_poisson2)$deviance / summary(modelo_poisson2)$null.deviance)
## [1] 0.03306565
#library(AER)
#dispersiontest(modelo_poisson2)
#sobredispersion del modelo
AIC(modelo_poisson2)
## [1] 631.3452