Analisis descriptivo inicial

Solo se exploró la base entregada, estos resultados no necesariamente corresponden a los resultados de la Tabla 1.

library(readxl)
BASE<- read_excel("BASE ISCED 2.xlsx")
library(summarytools)
print(dfSummary(BASE), method = 'render')

Data Frame Summary

BASE

Dimensions: 199 x 16
Duplicates: 0
No Variable Stats / Values Freqs (% of Valid) Graph Valid Missing
1 idntfccion [character]
1. 1014290760
2. 1022394788
3. 32831227
4. 37315402
5. 39533573
6. 40012133
7. 53176876
8. 1000334669
9. 1014198124
10. 1014213026
[ 182 others ]
2(1.0%)
2(1.0%)
2(1.0%)
2(1.0%)
2(1.0%)
2(1.0%)
2(1.0%)
1(0.5%)
1(0.5%)
1(0.5%)
182(91.5%)
199 (100.0%) 0 (0.0%)
2 Edad [numeric]
Mean (sd) : 54.7 (13)
min ≤ med ≤ max:
21 ≤ 57 ≤ 94
IQR (CV) : 19 (0.2)
51 distinct values 199 (100.0%) 0 (0.0%)
3 IMC kg/m2 [numeric]
Mean (sd) : 23.8 (3.4)
min ≤ med ≤ max:
16.9 ≤ 23.3 ≤ 32.8
IQR (CV) : 5.1 (0.1)
129 distinct values 199 (100.0%) 0 (0.0%)
4 Genero [character]
1. Femeninmo
2. femenino
3. Femenino
4. masculino
5. Masculino
1(0.5%)
80(40.2%)
86(43.2%)
18(9.0%)
14(7.0%)
199 (100.0%) 0 (0.0%)
5 Diabetes [numeric]
Min : 0
Mean : 0.1
Max : 1
0:189(95.0%)
1:10(5.0%)
199 (100.0%) 0 (0.0%)
6 HTA [numeric]
Min : 0
Mean : 0.1
Max : 1
0:174(87.4%)
1:25(12.6%)
199 (100.0%) 0 (0.0%)
7 ISCED MEDIANA [numeric]
Mean (sd) : 1502.1 (578.6)
min ≤ med ≤ max:
255 ≤ 1515 ≤ 4080
IQR (CV) : 810 (0.4)
113 distinct values 199 (100.0%) 0 (0.0%)
8 Tiempo de exposición ácida % [character]
1. 0.1
2. 2.1
3. 2.4
4. 2.2999999999999998
5. 0.3
6. 3.1
7. 3.6
8. 0.2
9. 2.6
10. 0.8
[ 63 others ]
16(8.0%)
11(5.5%)
11(5.5%)
10(5.0%)
9(4.5%)
9(4.5%)
7(3.5%)
6(3.0%)
6(3.0%)
5(2.5%)
109(54.8%)
199 (100.0%) 0 (0.0%)
9 N de eventos de reflujo [numeric]
Mean (sd) : 58.3 (78)
min ≤ med ≤ max:
1 ≤ 39 ≤ 730
IQR (CV) : 38 (1.3)
80 distinct values 199 (100.0%) 0 (0.0%)
10 Índice de síntomas % [numeric]
Mean (sd) : 28.9 (36)
min ≤ med ≤ max:
0 ≤ 1.4 ≤ 100
IQR (CV) : 50 (1.2)
42 distinct values 199 (100.0%) 0 (0.0%)
11 SAP % [character]
1. 0
2. 100
3. 1.2
4. 0.1
5. 0.5
6. 1.4
7. 99
8. 98
9. 2.2999999999999998
10. 2.4
[ 33 others ]
66(33.2%)
23(11.6%)
11(5.5%)
10(5.0%)
8(4.0%)
8(4.0%)
8(4.0%)
7(3.5%)
5(2.5%)
5(2.5%)
48(24.1%)
199 (100.0%) 0 (0.0%)
12 Diagnóstico [character]
1. NERD
2. Normal
3. PIROSIS FUNCIONAL
4. REFLUJO HIPERSENSIBLE
48(24.1%)
50(25.1%)
51(25.6%)
50(25.1%)
199 (100.0%) 0 (0.0%)
13 Pirosis [character]
1. N
2. S
50(25.1%)
149(74.9%)
199 (100.0%) 0 (0.0%)
14 Regurgitación [character]
1. N
2. S
83(41.7%)
116(58.3%)
199 (100.0%) 0 (0.0%)
15 Dolor toracico [character]
1. N
2. S
107(54.3%)
90(45.7%)
197 (99.0%) 2 (1.0%)
16 Disfagia [character]
1. N
2. s
3. S
115(60.5%)
2(1.1%)
73(38.4%)
190 (95.5%) 9 (4.5%)

Generated by summarytools 1.1.4 (R version 4.4.2)
2025-09-18

Limpieza base de datos

#Limpieza de variables 

#IMC
BASE$`IMC kg/m2`<- round(BASE$`IMC kg/m2`,2)

#Genero 
BASE$Genero <- tolower(BASE$Genero)
BASE$Genero[BASE$Genero== "femeninmo"] <- "femenino"

#Tiempo de exposición ácida 
BASE$`Tiempo de exposición ácida %` <- as.numeric(BASE$`Tiempo de exposición ácida %`)

library(dplyr)
BASE <- BASE %>% mutate(TEA_cat = cut(`Tiempo de exposición ácida %`, breaks = c(-Inf, 6, Inf), right = F,labels = c("Menor a 6", "mayor o igual a 6")))


#SAP
BASE$`SAP %` <- as.numeric(BASE$`SAP %`)

BASE <- BASE %>% mutate(SAP_cat = cut(`SAP %`, breaks = c(-Inf, 95, Inf), right = F,labels = c("Negativo", "Positivo")))

#Indice de síntomas

BASE <- BASE %>% mutate(Indice_sintomas_cat = cut(`Índice de síntomas %`, breaks = c(-Inf, 50, Inf), right = F,labels = c("Negativo", "Positivo")))


#Se recodifica el diagnostico 
BASE$Diagnóstico <- tolower(BASE$Diagnóstico)

BASE$Diagnóstico <- factor(BASE$Diagnóstico, levels = c("nerd", "reflujo hipersensible", "pirosis funcional", "normal") )

#Disfagia

BASE$Disfagia <- toupper(BASE$Disfagia)


#Se crea la tabla 1 cruzando por "Diagnóstico" 


#dput(names(BASE))
myVars <- c("Edad", "IMC kg/m2", "Genero", "Diabetes", "HTA", 
"ISCED MEDIANA", "Tiempo de exposición ácida %", "TEA_cat", "N de eventos de reflujo", "Indice_sintomas_cat", "SAP_cat", "Pirosis", "Regurgitación", "Dolor toracico", "Disfagia")

catVars <-  c("Genero", "Diabetes", "HTA", "TEA_cat", "Indice_sintomas_cat", "SAP_cat", "Pirosis", "Regurgitación", "Dolor toracico", "Disfagia")

library(tableone)
tab <- CreateTableOne(vars = myVars, factorVars= catVars, data = BASE, strata = "Diagnóstico", includeNA = F, addOverall = T, testNonNormal = T)


table1 <- as.data.frame(print(tab, showAllLevels= TRUE, printToggle = FALSE, noSpaces = TRUE))
table1 <- table1[,c(-8)]
colnames(table1)[7] <-"Valorp"


limpiar_nombres <- function(nombres) {
  nombres %>%
    gsub("\\(X.*?\\)", "", .) %>%              # Quitar (X%) 
    gsub("X", "", .) %>%                       # Quitar X sueltas
    gsub("\\.{1,}", " ", .) %>%                # Puntos a espacios
    gsub("\\s+", " ", .) %>%                   # Múltiples espacios a uno
    trimws(.)                                   # Quitar espacios inicio/final
}

rownames(table1) <- limpiar_nombres(rownames(table1))
library(openxlsx)
write.xlsx(table1, "tabla1.xlsx", rowNames = TRUE, colnames = TRUE )

library(tibble)
library(reactable)

tabla_reactable <- table1 %>%
  rownames_to_column("Variable") %>%
  reactable(
    pagination = FALSE,
    defaultPageSize = nrow(table1),  # Alternativa: especificar número exacto de filas
    
    columns = list(
      Variable = colDef(
        style = list(backgroundColor = "#2C3E50", color = "white", fontWeight = "bold"),
        width = 180,             # Un poco más angosta
        align = "left"
      )
    ),
    theme = reactableTheme(
      headerStyle = list(
        backgroundColor = "#34495E", 
        color = "white", 
        fontWeight = "bold",
        fontSize = "13px"
      ),
      cellStyle = list(fontSize = "11px"),  # Texto más pequeño para que quepa mejor
      stripedColor = "#F8F9FA"
    ),
    striped = TRUE,
    highlight = TRUE,
    bordered = TRUE,
    compact = TRUE,
    wrap = FALSE,              # No envolver texto
    resizable = TRUE           # Columnas redimensionables
  )

tabla_reactable

Revisando en donde hay diferencias variables cuantitativas

Se revisa la normalidad de las variables, solo se distribuyen de manera normal la edad y el IMC.

by(BASE$Edad, BASE$Diagnóstico, shapiro.test) #edad es normal 
## BASE$Diagnóstico: nerd
## 
##  Shapiro-Wilk normality test
## 
## data:  dd[x, ]
## W = 0.96838, p-value = 0.2188
## 
## ------------------------------------------------------------ 
## BASE$Diagnóstico: reflujo hipersensible
## 
##  Shapiro-Wilk normality test
## 
## data:  dd[x, ]
## W = 0.97301, p-value = 0.3054
## 
## ------------------------------------------------------------ 
## BASE$Diagnóstico: pirosis funcional
## 
##  Shapiro-Wilk normality test
## 
## data:  dd[x, ]
## W = 0.98626, p-value = 0.8157
## 
## ------------------------------------------------------------ 
## BASE$Diagnóstico: normal
## 
##  Shapiro-Wilk normality test
## 
## data:  dd[x, ]
## W = 0.95411, p-value = 0.05045
by(BASE$`IMC kg/m2`, BASE$Diagnóstico, shapiro.test) #IMC ES NORMAL
## BASE$Diagnóstico: nerd
## 
##  Shapiro-Wilk normality test
## 
## data:  dd[x, ]
## W = 0.97812, p-value = 0.5029
## 
## ------------------------------------------------------------ 
## BASE$Diagnóstico: reflujo hipersensible
## 
##  Shapiro-Wilk normality test
## 
## data:  dd[x, ]
## W = 0.95561, p-value = 0.05824
## 
## ------------------------------------------------------------ 
## BASE$Diagnóstico: pirosis funcional
## 
##  Shapiro-Wilk normality test
## 
## data:  dd[x, ]
## W = 0.96767, p-value = 0.1767
## 
## ------------------------------------------------------------ 
## BASE$Diagnóstico: normal
## 
##  Shapiro-Wilk normality test
## 
## data:  dd[x, ]
## W = 0.92746, p-value = 0.004436
by(BASE$`ISCED MEDIANA`, BASE$Diagnóstico, shapiro.test) #NO ES NORMAL
## BASE$Diagnóstico: nerd
## 
##  Shapiro-Wilk normality test
## 
## data:  dd[x, ]
## W = 0.7555, p-value = 1.467e-07
## 
## ------------------------------------------------------------ 
## BASE$Diagnóstico: reflujo hipersensible
## 
##  Shapiro-Wilk normality test
## 
## data:  dd[x, ]
## W = 0.93565, p-value = 0.009096
## 
## ------------------------------------------------------------ 
## BASE$Diagnóstico: pirosis funcional
## 
##  Shapiro-Wilk normality test
## 
## data:  dd[x, ]
## W = 0.91522, p-value = 0.001412
## 
## ------------------------------------------------------------ 
## BASE$Diagnóstico: normal
## 
##  Shapiro-Wilk normality test
## 
## data:  dd[x, ]
## W = 0.9093, p-value = 0.0009887
by(BASE$`Tiempo de exposición ácida %`, BASE$Diagnóstico, shapiro.test) #NO ES NORMAL
## BASE$Diagnóstico: nerd
## 
##  Shapiro-Wilk normality test
## 
## data:  dd[x, ]
## W = 0.61113, p-value = 4.834e-10
## 
## ------------------------------------------------------------ 
## BASE$Diagnóstico: reflujo hipersensible
## 
##  Shapiro-Wilk normality test
## 
## data:  dd[x, ]
## W = 0.88265, p-value = 0.0001348
## 
## ------------------------------------------------------------ 
## BASE$Diagnóstico: pirosis funcional
## 
##  Shapiro-Wilk normality test
## 
## data:  dd[x, ]
## W = 0.83636, p-value = 5.656e-06
## 
## ------------------------------------------------------------ 
## BASE$Diagnóstico: normal
## 
##  Shapiro-Wilk normality test
## 
## data:  dd[x, ]
## W = 0.79977, p-value = 8.662e-07
by(BASE$`N de eventos de reflujo`, BASE$Diagnóstico, shapiro.test) #NO ES NORMAL
## BASE$Diagnóstico: nerd
## 
##  Shapiro-Wilk normality test
## 
## data:  dd[x, ]
## W = 0.54462, p-value = 5.567e-11
## 
## ------------------------------------------------------------ 
## BASE$Diagnóstico: reflujo hipersensible
## 
##  Shapiro-Wilk normality test
## 
## data:  dd[x, ]
## W = 0.95359, p-value = 0.04799
## 
## ------------------------------------------------------------ 
## BASE$Diagnóstico: pirosis funcional
## 
##  Shapiro-Wilk normality test
## 
## data:  dd[x, ]
## W = 0.94036, p-value = 0.0127
## 
## ------------------------------------------------------------ 
## BASE$Diagnóstico: normal
## 
##  Shapiro-Wilk normality test
## 
## data:  dd[x, ]
## W = 0.91157, p-value = 0.001185

Diferencias variable edad

Las diferencias están en la variable: REFLUJO HIPERSENSIBLE-Normal

anova_res <- aov(Edad ~ Diagnóstico, data = BASE)
tukey_res <- TukeyHSD(anova_res)

library(broom)
library(dplyr)

tukey_table <- broom::tidy(tukey_res) %>%
  mutate(across(where(is.numeric), round, 4))  # redondeo opcional

library(gt)

tukey_table %>%
  gt() %>%
  tab_header(
    title = md("**Comparación de medias EDAD (ANOVA + Tukey HSD)**"),
    subtitle = md("Prueba post-hoc de pares")
  ) %>%
  fmt_number(
    columns = vars(estimate, conf.low, conf.high, adj.p.value),
    decimals = 4
  ) %>%
  tab_style(
    style = list(
      cell_text(weight = "bold", color = "darkblue", align = "center")
    ),
    locations = cells_column_labels(everything())
  ) %>%
  opt_table_outline() %>%
  tab_options(
    table.border.top.color    = "black",
    table.border.bottom.color = "black",
    table.font.size = 14
  )
Comparación de medias EDAD (ANOVA + Tukey HSD)
Prueba post-hoc de pares
term contrast null.value estimate conf.low conf.high adj.p.value
Diagnóstico reflujo hipersensible-nerd 0 −5.7408 −12.4349 0.9532 0.1208
Diagnóstico pirosis funcional-nerd 0 −2.5404 −9.2023 4.1214 0.7563
Diagnóstico normal-nerd 0 0.9792 −5.7149 7.6732 0.9814
Diagnóstico pirosis funcional-reflujo hipersensible 0 3.2004 −3.3924 9.7932 0.5908
Diagnóstico normal-reflujo hipersensible 0 6.7200 0.0946 13.3454 0.0454
Diagnóstico normal-pirosis funcional 0 3.5196 −3.0732 10.1124 0.5113

Diferencias variable IMC

Hubo diferencias en todos los diagnósticos menos en PIROSISFUNCIONAL vs NERD y REFLUJO HIPERSENSIBLE vs PIROSIS FUNCIONAL

anova_res2 <- aov(`IMC kg/m2` ~ Diagnóstico, data = BASE)
tukey_res2 <- TukeyHSD(anova_res2)


tukey_table2 <- broom::tidy(tukey_res2) %>%
  mutate(across(where(is.numeric), round, 4))  # redondeo opcional

tukey_table2 %>%
  gt() %>%
  tab_header(
    title = md("**Comparación de medias IMC (ANOVA + Tukey HSD)**"),
    subtitle = md("Prueba post-hoc de pares")
  ) %>%
  fmt_number(
    columns = vars(estimate, conf.low, conf.high, adj.p.value),
    decimals = 4
  ) %>%
  tab_style(
    style = list(
      cell_text(weight = "bold", color = "darkblue", align = "center")
    ),
    locations = cells_column_labels(everything())
  ) %>%
  opt_table_outline() %>%
  tab_options(
    table.border.top.color    = "black",
    table.border.bottom.color = "black",
    table.font.size = 14
  )
Comparación de medias IMC (ANOVA + Tukey HSD)
Prueba post-hoc de pares
term contrast null.value estimate conf.low conf.high adj.p.value
Diagnóstico reflujo hipersensible-nerd 0 −2.0205 −3.5635 −0.4774 0.0046
Diagnóstico pirosis funcional-nerd 0 −1.4395 −2.9752 0.0961 0.0750
Diagnóstico normal-nerd 0 −4.5819 −6.1249 −3.0388 0.0000
Diagnóstico pirosis funcional-reflujo hipersensible 0 0.5810 −0.9388 2.1007 0.7550
Diagnóstico normal-reflujo hipersensible 0 −2.5614 −4.0886 −1.0342 0.0001
Diagnóstico normal-pirosis funcional 0 −3.1424 −4.6621 −1.6226 0.0000

Diferencias variable ISCED

Hubo diferencias entre: NERD - Normal; NERD - PIROSIS FUNCIONAL; NERD - REFLUJO HIPERSENSIBLE

library(FSA)

# Correr la prueba de Dunn
dunn_res <- dunnTest(`ISCED MEDIANA` ~ Diagnóstico, 
                     data = BASE, 
                     method = "bonferroni")

# Extraer la tabla de resultados
tabla_dunn <- dunn_res$res

# Pasar a gt para embellecer
tabla_dunn %>%
  gt() %>%
  tab_header(
    title = "Prueba de Dunn post-hoc ISCED",
    subtitle = "Ajuste por Bonferroni"
  ) %>%
  cols_label(
    Comparison = "Comparación",
    Z          = "Estadístico Z",
    P.unadj    = "p-valor sin ajuste",
    P.adj      = "p-valor ajustado"
  ) %>%
  fmt_number(
    columns = c(Z, P.unadj, P.adj),
    decimals = 4
  ) %>%
  tab_style(
    style = list(
      cell_text(weight = "bold", align = "center")
    ),
    locations = cells_column_labels(everything())
  )
Prueba de Dunn post-hoc ISCED
Ajuste por Bonferroni
Comparación Estadístico Z p-valor sin ajuste p-valor ajustado
nerd - normal −9.1017 0.0000 0.0000
nerd - pirosis funcional −8.2831 0.0000 0.0000
normal - pirosis funcional 0.8716 0.3834 1.0000
nerd - reflujo hipersensible −7.1096 0.0000 0.0000
normal - reflujo hipersensible 2.0127 0.0441 0.2649
pirosis funcional - reflujo hipersensible 1.1510 0.2497 1.0000

Difencias TEA

Hubo diferencias menos en las comparaciones

Normal - REFLUJO HIPERSENSIBLE PIROSIS FUNCIONAL - REFLUJO HIPERSENSIBLE

dunn_res2 <- dunnTest(`Tiempo de exposición ácida %` ~ Diagnóstico, 
                     data = BASE, 
                     method = "bonferroni")

tabla_dunn2 <- dunn_res2$res

tabla_dunn2 %>%
  gt() %>%
  tab_header(
    title = "Prueba de Dunn post-hoc TEA",
    subtitle = "Ajuste por Bonferroni"
  ) %>%
  cols_label(
    Comparison = "Comparación",
    Z          = "Estadístico Z",
    P.unadj    = "p-valor sin ajuste",
    P.adj      = "p-valor ajustado"
  ) %>%
  fmt_number(
    columns = c(Z, P.unadj, P.adj),
    decimals = 4
  ) %>%
  tab_style(
    style = list(
      cell_text(weight = "bold", align = "center")
    ),
    locations = cells_column_labels(everything())
  )
Prueba de Dunn post-hoc TEA
Ajuste por Bonferroni
Comparación Estadístico Z p-valor sin ajuste p-valor ajustado
nerd - normal 6.1111 0.0000 0.0000
nerd - pirosis funcional 10.2968 0.0000 0.0000
normal - pirosis funcional 4.1996 0.0000 0.0002
nerd - reflujo hipersensible 8.4981 0.0000 0.0000
normal - reflujo hipersensible 2.4118 0.0159 0.0953
pirosis funcional - reflujo hipersensible −1.7759 0.0757 0.4544

Diferencias N de eventos de reflujo

Hubo diferencias entre NERD - Normal; NERD - PIROSIS FUNCIONAL; NERD - REFLUJO HIPERSENSIBLE;

dunn_res3 <- dunnTest(`N de eventos de reflujo` ~ Diagnóstico, 
                     data = BASE, 
                     method = "bonferroni")

tabla_dunn3 <- dunn_res3$res

tabla_dunn3 %>%
  gt() %>%
  tab_header(
    title = "Prueba de Dunn post-hoc N de eventos de reflujo",
    subtitle = "Ajuste por Bonferroni"
  ) %>%
  cols_label(
    Comparison = "Comparación",
    Z          = "Estadístico Z",
    P.unadj    = "p-valor sin ajuste",
    P.adj      = "p-valor ajustado"
  ) %>%
  fmt_number(
    columns = c(Z, P.unadj, P.adj),
    decimals = 4
  ) %>%
  tab_style(
    style = list(
      cell_text(weight = "bold", align = "center")
    ),
    locations = cells_column_labels(everything())
  )
Prueba de Dunn post-hoc N de eventos de reflujo
Ajuste por Bonferroni
Comparación Estadístico Z p-valor sin ajuste p-valor ajustado
nerd - normal 6.1644 0.0000 0.0000
nerd - pirosis funcional 6.1326 0.0000 0.0000
normal - pirosis funcional −0.0623 0.9503 1.0000
nerd - reflujo hipersensible 3.6307 0.0003 0.0017
normal - reflujo hipersensible −2.5600 0.0105 0.0628
pirosis funcional - reflujo hipersensible −2.5104 0.0121 0.0724

Graficando el ISCED por cada diagnóstico

png("BOXPLOT.png", width = 500, height = 450)

library(ggplot2)
library(ggpubr)
library(viridis)      # Paletas científicas hermosas
library(ggthemes)     # Temas profesionales
library(scales)       # Para formatear ejes
library(RColorBrewer) # Más paletas de colores
library(dplyr)


# Primero calculamos los conteos
conteos <- BASE %>% 
  count(`Diagnóstico`) %>% 
  mutate(label = paste0(`Diagnóstico`, "\n(n=", n, ")"))


# Creamos el factor con las nuevas etiquetas
BASE$Diagnostico_con_n <- factor(BASE$`Diagnóstico`,
                                levels = conteos$`Diagnóstico`,
                                labels = conteos$label)

boxplot_medico <- ggplot(BASE, aes(x = Diagnostico_con_n, y = `ISCED MEDIANA`, fill = `Diagnóstico`)) +
  
  # Violin plot suave de fondo
  geom_violin(
    alpha = 0.3, 
    trim = TRUE, 
    scale = "width",
    color = NA
  ) +
  
  # Boxplot encima
  geom_boxplot(
    width = 0.3,
    alpha = 0.8,
    outlier.shape = 21,
    outlier.size = 1,
    outlier.fill = "lightgrey",
    outlier.stroke = 1,
    color = "gray20",
    size = 1
  ) +
  
  # Paleta médica profesional
  scale_fill_brewer(type = "qual", palette = "Set2", direction = 1) +
  
  scale_y_continuous(
    breaks = pretty_breaks(n = 7),
    expand = expansion(mult = c(0.05, 0.1))
  ) +
  
  labs(
    title = "Distribución ISCED por Diagnóstico",
    subtitle = "Análisis comparativo | n = 199",
    x = "Diagnóstico pH-metría",
    y = "ISCED"
  ) +
  
  theme_classic(base_size = 12) +
  theme(
    plot.title = element_text(
      size = 18, 
      face = "bold", 
      hjust = 0.5,
      margin = margin(b = 5)
    ),
    plot.subtitle = element_text(
      size = 13, 
      hjust = 0.5, 
      color = "#555555",
      face = "italic",
      margin = margin(b = 20)
    ),
    axis.title = element_text(size = 13, face = "bold"),
    axis.text.x = element_text(size = 8, face = "bold"),
    axis.text.y = element_text(size = 10),
    legend.position = "none",
    axis.line = element_line(color = "#333333", size = 0.8),
    plot.margin = margin(30, 30, 30, 30)
  )
boxplot_medico
dev.off()
## png 
##   2
boxplot_medico

# Calculamos estadísticas básicas
stats_simple <- BASE %>%
  group_by(`Diagnóstico`) %>%
  summarise(
    n = n(),
    media = mean(`ISCED MEDIANA`, na.rm = TRUE),
    mediana = median(`ISCED MEDIANA`, na.rm = TRUE),
    .groups = 'drop'
  ) %>%
  # Creamos las etiquetas con diagnóstico y n
  mutate(
    etiqueta_eje = paste0(`Diagnóstico`, "\n(n=", n, ")")
  )

# Convertimos a factor para mantener el orden
stats_simple$etiqueta_eje <- factor(stats_simple$etiqueta_eje, 
                                   levels = stats_simple$etiqueta_eje)

png("BARRASPLOT.png", width = 500, height = 450)
# Gráfico de barras simple con n en el eje X
barras_simple_n <- ggplot(stats_simple, aes(x = etiqueta_eje, y = media, fill = `Diagnóstico`)) +
  
  # Barras principales
  geom_col(
    alpha = 0.8,
    width = 0.7,
    color = "gray20",
    size = 0.8
  ) +
  
  # Solo el valor de la media encima de las barras
  geom_text(
    aes(label = round(media, 1)),
    vjust = -0.5,
    size = 4.5,
    fontface = "bold",
    color = "gray30"
  ) +
  
  # Paleta médica profesional
  scale_fill_brewer(type = "qual", palette = "Set2", direction = 1) +
  
  scale_y_continuous(
    breaks = pretty_breaks(n = 6),
    expand = expansion(mult = c(0, 0.12)),
    limits = c(0, NA)
  ) +
  
  labs(
    title = "ISCED Promedio por Diagnóstico",
    subtitle = "Comparación de medias | Análisis n=199",
    x = "Diagnóstico pH-metría",
    y = "ISCED"
  ) +
  
  theme_classic(base_size = 12) +
  theme(
    plot.title = element_text(
      size = 18, 
      face = "bold", 
      hjust = 0.5,
      margin = margin(b = 5)
    ),
    plot.subtitle = element_text(
      size = 13, 
      hjust = 0.5, 
      color = "#555555",
      face = "italic",
      margin = margin(b = 20)
    ),
    axis.title = element_text(size = 13, face = "bold"),
    axis.text.x = element_text(
      size = 9,           # Reducido un poco por tener más texto
      face = "bold", 
      color = "gray20",
      lineheight = 0.9    # Espaciado entre líneas
    ),
    axis.text.y = element_text(size = 10, color = "gray20"),
    legend.position = "none",
    axis.line = element_line(color = "#333333", size = 0.8),
    plot.margin = margin(30, 30, 40, 30),  # Más margen abajo para las etiquetas
    panel.grid.major.y = element_line(color = "gray90", size = 0.5, linetype = "dashed")
  )
barras_simple_n
dev.off()
## png 
##   2
barras_simple_n

Analisis de correlacion TEA - ISCED

library(ggplot2)
library(ggpubr)

#CORRELACION 
cor_test <- cor.test(BASE$`ISCED MEDIANA`, BASE$`Tiempo de exposición ácida %`, method = "spearman")
png("COR1.png", width = 400, height = 350)
cor_1 <- ggplot(BASE, aes(x = `Tiempo de exposición ácida %`, y = `ISCED MEDIANA`)) +
  geom_point(color = "#2C3E50", size = 2, alpha = 0.7) +
  geom_smooth(method = "lm", se = TRUE, color = "#E74C3C", fill = "salmon", alpha = 0.3, size = 1.1) +
  labs(
    title = "Correlación entre ISCED y la TEA",
    x = "Tiempo de exposición ácida %",
    y = "ISCED MEDIANA",
    caption = paste0("r = ", round(cor_test$estimate, 3),
                     ", p = ", format.pval(cor_test$p.value, digits = 2, eps = .001))
  ) +
  theme_minimal(base_size = 14) +
  theme(
    plot.title = element_text(face = "bold", hjust = 0.5, size = 10),
    plot.caption = element_text(face = "italic", size = 12, hjust = 0.5)
  )
cor_1 
dev.off()
## png 
##   2
cor_1 

Analisis de correlacion número de eventos de reflujo e ISCED

#CORRELACION 
cor_test2 <- cor.test(BASE$`ISCED MEDIANA`, BASE$`N de eventos de reflujo`, method = "spearman")

png("COR2.png", width = 400, height = 350)
cor_2 <- ggplot(BASE, aes(x = `N de eventos de reflujo`, y = `ISCED MEDIANA`)) +
  geom_point(color = "#2C3E50", size = 2, alpha = 0.7) +
  geom_smooth(method = "lm", se = TRUE, color = "blue", fill = "skyblue", alpha = 0.3, size = 1.1) +
  labs(
    title = "Correlación entre ISCED y eventos de reflujo",
    x = "Eventos de refujo",
    y = "ISCED MEDIANA",
    caption = paste0("r = ", round(cor_test2$estimate, 3),
                     ", p = ", format.pval(cor_test2$p.value, digits = 2, eps = .001))
  ) +
  theme_minimal(base_size = 14) +
  theme(
    plot.title = element_text(face = "bold", hjust = 0.5, size = 10),
    plot.caption = element_text(face = "italic", size = 12, hjust = 0.5)
  )
cor_2
dev.off()
## png 
##   2
cor_2

Punto de corte

##### CURVA ROC

library(pROC)

roc_curve1 <- roc(BASE$TEA_cat, BASE$`ISCED MEDIANA`, levels = c("Menor a 6", "mayor o igual a 6"), direction = ">")
auc_value <- auc(roc_curve1) #AUC
ci_auc <- ci.auc(roc_curve1) #IC AUC
optimal_cutoff <- coords(roc_curve1, "best", ret = "threshold") #PUNTO DE CORTE
ci_cutoff <- ci.coords(roc_curve1, "best", ret = "threshold") #IC PUNTO CORTE


#SENSIBILIDAD Y ESPECIFICIDAD 
coords_at_cutoff <- coords(roc_curve1, x = 1032.5, ret = c("sensitivity", "specificity"))
sensitivity_at_cutoff <- as.numeric(coords_at_cutoff["sensitivity"])  # Convertir a numérico
specificity_at_cutoff <- as.numeric(coords_at_cutoff["specificity"])    # Convertir a numérico
png("ROC.png", width = 400, height = 350)
# CÓDIGO ROC ARREGLADO
roc_plot <- ggroc(roc_curve1, color = "#1f77b4", size = 1) +
  
  # Línea de referencia ROJA (como pediste)
  geom_abline(slope = 1, intercept = 1, linetype = "dashed", color = "#d62728", size = 0.5) +
  
  labs(
    title = "Curva ROC",
    x = "1 - Especificidad",
    y = "Sensibilidad"
  ) +
  
  theme_minimal() +
  theme(
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    axis.line = element_line(color = "#1f77b4")
  ) +
  
  # Punto del cutoff (asumiendo que tienes estas variables)
  geom_point(aes(x = specificity_at_cutoff, y = sensitivity_at_cutoff), 
             color = "blue", size = 3, alpha = 0.8) +
  
  # Anotación del cutoff (posición fija corregida)
  annotate("text", x = 0.9, y = 0.85,
           label = paste0("1032.5", 
                          "\n(0,924 - 0,952)"),
           color = "black", size = 4, hjust = 0, face = "bold") +
  
  # AUC con posición fija (SIN text_x ni text_y)
  annotate("text", x = 0.4, y = 0.3,  # Posición fija en la esquina inferior
           label = "AUC = 0,951\nIC: (0,919 - 0,982)",
           color = "black", size = 4, hjust = 0, face = "bold")
roc_plot
dev.off()
## png 
##   2
roc_plot

ORs bivariados = Desenlace tener ISCED mediana igual o menor a 1032.5

*NERD VS NORMAL”

BASE <- BASE %>% mutate(ISED_cat = cut(`ISCED MEDIANA`, breaks = c(-Inf, 1032.5, Inf), right = T, labels = c("Menor igual a 1032.5", "Mayor a 1032.5")))

NERDvsNORMAL <- cbind(c(45, 3), c(3, 47))
# Etiquetas para mayor claridad
rownames(NERDvsNORMAL) <- c("ISED <= 1032.5", "ISED > 1032.5")
colnames(NERDvsNORMAL) <- c("NERD", "Normal")

# Mostrar la tabla
library(kableExtra)
kable(NERDvsNORMAL, 
      caption = "NERD VS NORMAL")
NERD VS NORMAL
NERD Normal
ISED <= 1032.5 45 3
ISED > 1032.5 3 47
library(epiR)
epi.2by2(cbind(c(45, 3), c(3, 47)), method = "case.control", conf.level = 0.95) #nERD VS NORMAL 
##              Outcome+    Outcome-      Total                 Odds
## Exposed +          45           3         48  15.00 (5.86 to Inf)
## Exposed -           3          47         50  0.06 (0.00 to 0.16)
## Total              48          50         98  0.96 (0.63 to 1.45)
## 
## Point estimates and 95% CIs:
## -------------------------------------------------------------------
## Exposure odds ratio                            235.00 (45.06, 1225.71)
## Attrib fraction (est) in the exposed (%)      99.57 (97.87, 99.91)
## Attrib fraction (est) in the population (%)   93.35 (88.77, 100.00)
## -------------------------------------------------------------------
## Uncorrected chi2 test that OR = 1: chi2(1) = 75.461 Pr>chi2 = <0.001
## Fisher exact test that OR = 1: Pr>chi2 = <0.001
##  Wald confidence limits
##  CI: confidence interval

PIROSIS FUNCIONAL VS NORMAL

PIROSOSvsNORMAL <- cbind(c(0, 51), c(3, 47))
# Etiquetas para mayor claridad
rownames(PIROSOSvsNORMAL) <- c("ISED <= 1032.5", "ISED > 1032.5")
colnames(PIROSOSvsNORMAL) <- c("PIROSIS FUNCIONAL", "Normal")

# Mostrar la tabla
library(kableExtra)
kable(PIROSOSvsNORMAL, 
      caption = "PIROSIS FUNCIONAL VS NORMAL")
PIROSIS FUNCIONAL VS NORMAL
PIROSIS FUNCIONAL Normal
ISED <= 1032.5 0 3
ISED > 1032.5 51 47
epi.2by2(PIROSOSvsNORMAL, method = "case.control", conf.level = 0.95) #PIROSIS FUNCIONAL VS NORMAL 
##              Outcome+    Outcome-      Total                 Odds
## Exposed +           0           3          3  0.00 (0.00 to 0.00)
## Exposed -          51          47         98  1.09 (0.72 to 1.65)
## Total              51          50        101  1.02 (0.68 to 1.52)
## 
## Point estimates and 95% CIs:
## -------------------------------------------------------------------
## Exposure odds ratio                            0.00 (0.00, NaN)
## Attrib fraction (est) in the exposed (%)      -Inf (-Inf, 18.47)
## Attrib fraction (est) in the population (%)   -6.38 (-8.11, -5.26)
## -------------------------------------------------------------------
## Yates corrected chi2 test that OR = 1: chi2(1) = 1.415 Pr>chi2 = 0.234
## Fisher exact test that OR = 1: Pr>chi2 = 0.118
##  Wald confidence limits
##  CI: confidence interval

Reflujo hipersensible vs NORMAL

REFLUJOvsNORMAL <- cbind(c(4,46), c(3, 47))

# Etiquetas para mayor claridad
rownames(REFLUJOvsNORMAL) <- c("ISED <= 1032.5", "ISED > 1032.5")
colnames(REFLUJOvsNORMAL) <- c("Reflujo hipersensible", "Normal")

# Mostrar la tabla
library(kableExtra)
kable(REFLUJOvsNORMAL, 
      caption = "Reflujo hipersensible VS NORMAL")
Reflujo hipersensible VS NORMAL
Reflujo hipersensible Normal
ISED <= 1032.5 4 3
ISED > 1032.5 46 47
epi.2by2(REFLUJOvsNORMAL, method = "case.control", conf.level = 0.95) #Reflujo hipersensibleL VS NORMAL 
##              Outcome+    Outcome-      Total                 Odds
## Exposed +           4           3          7  1.33 (0.17 to 6.00)
## Exposed -          46          47         93  0.98 (0.66 to 1.45)
## Total              50          50        100  1.00 (0.67 to 1.50)
## 
## Point estimates and 95% CIs:
## -------------------------------------------------------------------
## Exposure odds ratio                            1.36 (0.29, 6.43)
## Attrib fraction (est) in the exposed (%)      26.60 (-211.57, 82.62)
## Attrib fraction (est) in the population (%)   2.13 (0.89, 3.51)
## -------------------------------------------------------------------
## Yates corrected chi2 test that OR = 1: chi2(1) = 0.000 Pr>chi2 = 1.000
## Fisher exact test that OR = 1: Pr>chi2 = 1.000
##  Wald confidence limits
##  CI: confidence interval