ISCED

Analisis descriptivo inicial

Solo se exploró la base entregada, estos resultados no necesariamente corresponden a los resultados de la Tabla 1.

library(readxl)
BASE<- read_excel("BASE ISCED 2.xlsx")
library(summarytools)
print(dfSummary(BASE), method = 'render')

Data Frame Summary

BASE

Dimensions: 199 x 16
Duplicates: 0

Variable

Stats / Values

Freqs (% of Valid)

Graph

Valid

Missing

idntfccion [character]

1. 1014290760
2. 1022394788
3. 32831227
4. 37315402
5. 39533573
6. 40012133
7. 53176876
8. 1000334669
9. 1014198124
10. 1014213026
[ 182 others ]

2	(	1.0%	)
2	(	1.0%	)
2	(	1.0%	)
2	(	1.0%	)
2	(	1.0%	)
2	(	1.0%	)
2	(	1.0%	)
1	(	0.5%	)
1	(	0.5%	)
1	(	0.5%	)
182	(	91.5%	)

199 (100.0%)

0 (0.0%)

Edad [numeric]

Mean (sd) : 54.7 (13)

min ≤ med ≤ max:

21 ≤ 57 ≤ 94

IQR (CV) : 19 (0.2)

51 distinct values

199 (100.0%)

0 (0.0%)

IMC kg/m2 [numeric]

Mean (sd) : 23.8 (3.4)

min ≤ med ≤ max:

16.9 ≤ 23.3 ≤ 32.8

IQR (CV) : 5.1 (0.1)

129 distinct values

199 (100.0%)

0 (0.0%)

Genero [character]

1. Femeninmo

2. femenino

3. Femenino

4. masculino

5. Masculino

1	(	0.5%	)
80	(	40.2%	)
86	(	43.2%	)
18	(	9.0%	)
14	(	7.0%	)

199 (100.0%)

0 (0.0%)

Diabetes [numeric]

Min : 0

Mean : 0.1

Max : 1

0	:	189	(	95.0%	)
1	:	10	(	5.0%	)

199 (100.0%)

0 (0.0%)

HTA [numeric]

Min : 0

Mean : 0.1

Max : 1

0	:	174	(	87.4%	)
1	:	25	(	12.6%	)

199 (100.0%)

0 (0.0%)

ISCED MEDIANA [numeric]

Mean (sd) : 1502.1 (578.6)

min ≤ med ≤ max:

255 ≤ 1515 ≤ 4080

IQR (CV) : 810 (0.4)

113 distinct values

199 (100.0%)

0 (0.0%)

Tiempo de exposición ácida % [character]

1. 0.1
2. 2.1
3. 2.4
4. 2.2999999999999998
5. 0.3
6. 3.1
7. 3.6
8. 0.2
9. 2.6
10. 0.8
[ 63 others ]

16	(	8.0%	)
11	(	5.5%	)
11	(	5.5%	)
10	(	5.0%	)
9	(	4.5%	)
9	(	4.5%	)
7	(	3.5%	)
6	(	3.0%	)
6	(	3.0%	)
5	(	2.5%	)
109	(	54.8%	)

199 (100.0%)

0 (0.0%)

N de eventos de reflujo [numeric]

Mean (sd) : 58.3 (78)

min ≤ med ≤ max:

1 ≤ 39 ≤ 730

IQR (CV) : 38 (1.3)

80 distinct values

199 (100.0%)

0 (0.0%)

Índice de síntomas % [numeric]

Mean (sd) : 28.9 (36)

min ≤ med ≤ max:

0 ≤ 1.4 ≤ 100

IQR (CV) : 50 (1.2)

42 distinct values

199 (100.0%)

0 (0.0%)

SAP % [character]

1. 0
2. 100
3. 1.2
4. 0.1
5. 0.5
6. 1.4
7. 99
8. 98
9. 2.2999999999999998
10. 2.4
[ 33 others ]

66	(	33.2%	)
23	(	11.6%	)
11	(	5.5%	)
10	(	5.0%	)
8	(	4.0%	)
8	(	4.0%	)
8	(	4.0%	)
7	(	3.5%	)
5	(	2.5%	)
5	(	2.5%	)
48	(	24.1%	)

199 (100.0%)

0 (0.0%)

Diagnóstico [character]

1. NERD

2. Normal

3. PIROSIS FUNCIONAL

4. REFLUJO HIPERSENSIBLE

48	(	24.1%	)
50	(	25.1%	)
51	(	25.6%	)
50	(	25.1%	)

199 (100.0%)

0 (0.0%)

Pirosis [character]

1. N

2. S

50	(	25.1%	)
149	(	74.9%	)

199 (100.0%)

0 (0.0%)

Regurgitación [character]

1. N

2. S

83	(	41.7%	)
116	(	58.3%	)

199 (100.0%)

0 (0.0%)

Dolor toracico [character]

1. N

2. S

107	(	54.3%	)
90	(	45.7%	)

197 (99.0%)

2 (1.0%)

Disfagia [character]

1. N

2. s

3. S

115	(	60.5%	)
2	(	1.1%	)
73	(	38.4%	)

190 (95.5%)

9 (4.5%)

Generated by summarytools 1.1.4 (R version 4.4.2)
2025-09-18

Limpieza base de datos

#Limpieza de variables 

#IMC
BASE$`IMC kg/m2`<- round(BASE$`IMC kg/m2`,2)

#Genero 
BASE$Genero <- tolower(BASE$Genero)
BASE$Genero[BASE$Genero== "femeninmo"] <- "femenino"

#Tiempo de exposición ácida 
BASE$`Tiempo de exposición ácida %` <- as.numeric(BASE$`Tiempo de exposición ácida %`)

library(dplyr)
BASE <- BASE %>% mutate(TEA_cat = cut(`Tiempo de exposición ácida %`, breaks = c(-Inf, 6, Inf), right = F,labels = c("Menor a 6", "mayor o igual a 6")))


#SAP
BASE$`SAP %` <- as.numeric(BASE$`SAP %`)

BASE <- BASE %>% mutate(SAP_cat = cut(`SAP %`, breaks = c(-Inf, 95, Inf), right = F,labels = c("Negativo", "Positivo")))

#Indice de síntomas

BASE <- BASE %>% mutate(Indice_sintomas_cat = cut(`Índice de síntomas %`, breaks = c(-Inf, 50, Inf), right = F,labels = c("Negativo", "Positivo")))


#Se recodifica el diagnostico 
BASE$Diagnóstico <- tolower(BASE$Diagnóstico)

BASE$Diagnóstico <- factor(BASE$Diagnóstico, levels = c("nerd", "reflujo hipersensible", "pirosis funcional", "normal") )

#Disfagia

BASE$Disfagia <- toupper(BASE$Disfagia)


#Se crea la tabla 1 cruzando por "Diagnóstico" 


#dput(names(BASE))
myVars <- c("Edad", "IMC kg/m2", "Genero", "Diabetes", "HTA", 
"ISCED MEDIANA", "Tiempo de exposición ácida %", "TEA_cat", "N de eventos de reflujo", "Indice_sintomas_cat", "SAP_cat", "Pirosis", "Regurgitación", "Dolor toracico", "Disfagia")

catVars <-  c("Genero", "Diabetes", "HTA", "TEA_cat", "Indice_sintomas_cat", "SAP_cat", "Pirosis", "Regurgitación", "Dolor toracico", "Disfagia")

library(tableone)
tab <- CreateTableOne(vars = myVars, factorVars= catVars, data = BASE, strata = "Diagnóstico", includeNA = F, addOverall = T, testNonNormal = T)


table1 <- as.data.frame(print(tab, showAllLevels= TRUE, printToggle = FALSE, noSpaces = TRUE))
table1 <- table1[,c(-8)]
colnames(table1)[7] <-"Valorp"


limpiar_nombres <- function(nombres) {
  nombres %>%
    gsub("\\(X.*?\\)", "", .) %>%              # Quitar (X%) 
    gsub("X", "", .) %>%                       # Quitar X sueltas
    gsub("\\.{1,}", " ", .) %>%                # Puntos a espacios
    gsub("\\s+", " ", .) %>%                   # Múltiples espacios a uno
    trimws(.)                                   # Quitar espacios inicio/final
}

rownames(table1) <- limpiar_nombres(rownames(table1))
library(openxlsx)
write.xlsx(table1, "tabla1.xlsx", rowNames = TRUE, colnames = TRUE )

library(tibble)
library(reactable)

tabla_reactable <- table1 %>%
  rownames_to_column("Variable") %>%
  reactable(
    pagination = FALSE,
    defaultPageSize = nrow(table1),  # Alternativa: especificar número exacto de filas
    
    columns = list(
      Variable = colDef(
        style = list(backgroundColor = "#2C3E50", color = "white", fontWeight = "bold"),
        width = 180,             # Un poco más angosta
        align = "left"
      )
    ),
    theme = reactableTheme(
      headerStyle = list(
        backgroundColor = "#34495E", 
        color = "white", 
        fontWeight = "bold",
        fontSize = "13px"
      ),
      cellStyle = list(fontSize = "11px"),  # Texto más pequeño para que quepa mejor
      stripedColor = "#F8F9FA"
    ),
    striped = TRUE,
    highlight = TRUE,
    bordered = TRUE,
    compact = TRUE,
    wrap = FALSE,              # No envolver texto
    resizable = TRUE           # Columnas redimensionables
  )

tabla_reactable

Revisando en donde hay diferencias variables cuantitativas

Se revisa la normalidad de las variables, solo se distribuyen de manera normal la edad y el IMC.

by(BASE$Edad, BASE$Diagnóstico, shapiro.test) #edad es normal

## BASE$Diagnóstico: nerd
## 
##  Shapiro-Wilk normality test
## 
## data:  dd[x, ]
## W = 0.96838, p-value = 0.2188
## 
## ------------------------------------------------------------ 
## BASE$Diagnóstico: reflujo hipersensible
## 
##  Shapiro-Wilk normality test
## 
## data:  dd[x, ]
## W = 0.97301, p-value = 0.3054
## 
## ------------------------------------------------------------ 
## BASE$Diagnóstico: pirosis funcional
## 
##  Shapiro-Wilk normality test
## 
## data:  dd[x, ]
## W = 0.98626, p-value = 0.8157
## 
## ------------------------------------------------------------ 
## BASE$Diagnóstico: normal
## 
##  Shapiro-Wilk normality test
## 
## data:  dd[x, ]
## W = 0.95411, p-value = 0.05045

by(BASE$`IMC kg/m2`, BASE$Diagnóstico, shapiro.test) #IMC ES NORMAL

## BASE$Diagnóstico: nerd
## 
##  Shapiro-Wilk normality test
## 
## data:  dd[x, ]
## W = 0.97812, p-value = 0.5029
## 
## ------------------------------------------------------------ 
## BASE$Diagnóstico: reflujo hipersensible
## 
##  Shapiro-Wilk normality test
## 
## data:  dd[x, ]
## W = 0.95561, p-value = 0.05824
## 
## ------------------------------------------------------------ 
## BASE$Diagnóstico: pirosis funcional
## 
##  Shapiro-Wilk normality test
## 
## data:  dd[x, ]
## W = 0.96767, p-value = 0.1767
## 
## ------------------------------------------------------------ 
## BASE$Diagnóstico: normal
## 
##  Shapiro-Wilk normality test
## 
## data:  dd[x, ]
## W = 0.92746, p-value = 0.004436

by(BASE$`ISCED MEDIANA`, BASE$Diagnóstico, shapiro.test) #NO ES NORMAL

## BASE$Diagnóstico: nerd
## 
##  Shapiro-Wilk normality test
## 
## data:  dd[x, ]
## W = 0.7555, p-value = 1.467e-07
## 
## ------------------------------------------------------------ 
## BASE$Diagnóstico: reflujo hipersensible
## 
##  Shapiro-Wilk normality test
## 
## data:  dd[x, ]
## W = 0.93565, p-value = 0.009096
## 
## ------------------------------------------------------------ 
## BASE$Diagnóstico: pirosis funcional
## 
##  Shapiro-Wilk normality test
## 
## data:  dd[x, ]
## W = 0.91522, p-value = 0.001412
## 
## ------------------------------------------------------------ 
## BASE$Diagnóstico: normal
## 
##  Shapiro-Wilk normality test
## 
## data:  dd[x, ]
## W = 0.9093, p-value = 0.0009887

by(BASE$`Tiempo de exposición ácida %`, BASE$Diagnóstico, shapiro.test) #NO ES NORMAL

## BASE$Diagnóstico: nerd
## 
##  Shapiro-Wilk normality test
## 
## data:  dd[x, ]
## W = 0.61113, p-value = 4.834e-10
## 
## ------------------------------------------------------------ 
## BASE$Diagnóstico: reflujo hipersensible
## 
##  Shapiro-Wilk normality test
## 
## data:  dd[x, ]
## W = 0.88265, p-value = 0.0001348
## 
## ------------------------------------------------------------ 
## BASE$Diagnóstico: pirosis funcional
## 
##  Shapiro-Wilk normality test
## 
## data:  dd[x, ]
## W = 0.83636, p-value = 5.656e-06
## 
## ------------------------------------------------------------ 
## BASE$Diagnóstico: normal
## 
##  Shapiro-Wilk normality test
## 
## data:  dd[x, ]
## W = 0.79977, p-value = 8.662e-07

by(BASE$`N de eventos de reflujo`, BASE$Diagnóstico, shapiro.test) #NO ES NORMAL

## BASE$Diagnóstico: nerd
## 
##  Shapiro-Wilk normality test
## 
## data:  dd[x, ]
## W = 0.54462, p-value = 5.567e-11
## 
## ------------------------------------------------------------ 
## BASE$Diagnóstico: reflujo hipersensible
## 
##  Shapiro-Wilk normality test
## 
## data:  dd[x, ]
## W = 0.95359, p-value = 0.04799
## 
## ------------------------------------------------------------ 
## BASE$Diagnóstico: pirosis funcional
## 
##  Shapiro-Wilk normality test
## 
## data:  dd[x, ]
## W = 0.94036, p-value = 0.0127
## 
## ------------------------------------------------------------ 
## BASE$Diagnóstico: normal
## 
##  Shapiro-Wilk normality test
## 
## data:  dd[x, ]
## W = 0.91157, p-value = 0.001185

Diferencias variable edad

Las diferencias están en la variable: REFLUJO HIPERSENSIBLE-Normal

anova_res <- aov(Edad ~ Diagnóstico, data = BASE)
tukey_res <- TukeyHSD(anova_res)

library(broom)
library(dplyr)

tukey_table <- broom::tidy(tukey_res) %>%
  mutate(across(where(is.numeric), round, 4))  # redondeo opcional

library(gt)

tukey_table %>%
  gt() %>%
  tab_header(
    title = md("**Comparación de medias EDAD (ANOVA + Tukey HSD)**"),
    subtitle = md("Prueba post-hoc de pares")
  ) %>%
  fmt_number(
    columns = vars(estimate, conf.low, conf.high, adj.p.value),
    decimals = 4
  ) %>%
  tab_style(
    style = list(
      cell_text(weight = "bold", color = "darkblue", align = "center")
    ),
    locations = cells_column_labels(everything())
  ) %>%
  opt_table_outline() %>%
  tab_options(
    table.border.top.color    = "black",
    table.border.bottom.color = "black",
    table.font.size = 14
  )

term	contrast	null.value	estimate	conf.low	conf.high	adj.p.value
Comparación de medias EDAD (ANOVA + Tukey HSD)
Prueba post-hoc de pares
Diagnóstico	reflujo hipersensible-nerd	0	−5.7408	−12.4349	0.9532	0.1208
Diagnóstico	pirosis funcional-nerd	0	−2.5404	−9.2023	4.1214	0.7563
Diagnóstico	normal-nerd	0	0.9792	−5.7149	7.6732	0.9814
Diagnóstico	pirosis funcional-reflujo hipersensible	0	3.2004	−3.3924	9.7932	0.5908
Diagnóstico	normal-reflujo hipersensible	0	6.7200	0.0946	13.3454	0.0454
Diagnóstico	normal-pirosis funcional	0	3.5196	−3.0732	10.1124	0.5113

Diferencias variable IMC

Hubo diferencias en todos los diagnósticos menos en PIROSISFUNCIONAL vs NERD y REFLUJO HIPERSENSIBLE vs PIROSIS FUNCIONAL

anova_res2 <- aov(`IMC kg/m2` ~ Diagnóstico, data = BASE)
tukey_res2 <- TukeyHSD(anova_res2)


tukey_table2 <- broom::tidy(tukey_res2) %>%
  mutate(across(where(is.numeric), round, 4))  # redondeo opcional

tukey_table2 %>%
  gt() %>%
  tab_header(
    title = md("**Comparación de medias IMC (ANOVA + Tukey HSD)**"),
    subtitle = md("Prueba post-hoc de pares")
  ) %>%
  fmt_number(
    columns = vars(estimate, conf.low, conf.high, adj.p.value),
    decimals = 4
  ) %>%
  tab_style(
    style = list(
      cell_text(weight = "bold", color = "darkblue", align = "center")
    ),
    locations = cells_column_labels(everything())
  ) %>%
  opt_table_outline() %>%
  tab_options(
    table.border.top.color    = "black",
    table.border.bottom.color = "black",
    table.font.size = 14
  )

term	contrast	null.value	estimate	conf.low	conf.high	adj.p.value
Comparación de medias IMC (ANOVA + Tukey HSD)
Prueba post-hoc de pares
Diagnóstico	reflujo hipersensible-nerd	0	−2.0205	−3.5635	−0.4774	0.0046
Diagnóstico	pirosis funcional-nerd	0	−1.4395	−2.9752	0.0961	0.0750
Diagnóstico	normal-nerd	0	−4.5819	−6.1249	−3.0388	0.0000
Diagnóstico	pirosis funcional-reflujo hipersensible	0	0.5810	−0.9388	2.1007	0.7550
Diagnóstico	normal-reflujo hipersensible	0	−2.5614	−4.0886	−1.0342	0.0001
Diagnóstico	normal-pirosis funcional	0	−3.1424	−4.6621	−1.6226	0.0000

Diferencias variable ISCED

Hubo diferencias entre: NERD - Normal; NERD - PIROSIS FUNCIONAL; NERD - REFLUJO HIPERSENSIBLE

library(FSA)

# Correr la prueba de Dunn
dunn_res <- dunnTest(`ISCED MEDIANA` ~ Diagnóstico, 
                     data = BASE, 
                     method = "bonferroni")

# Extraer la tabla de resultados
tabla_dunn <- dunn_res$res

# Pasar a gt para embellecer
tabla_dunn %>%
  gt() %>%
  tab_header(
    title = "Prueba de Dunn post-hoc ISCED",
    subtitle = "Ajuste por Bonferroni"
  ) %>%
  cols_label(
    Comparison = "Comparación",
    Z          = "Estadístico Z",
    P.unadj    = "p-valor sin ajuste",
    P.adj      = "p-valor ajustado"
  ) %>%
  fmt_number(
    columns = c(Z, P.unadj, P.adj),
    decimals = 4
  ) %>%
  tab_style(
    style = list(
      cell_text(weight = "bold", align = "center")
    ),
    locations = cells_column_labels(everything())
  )

Comparación	Estadístico Z	p-valor sin ajuste	p-valor ajustado
Prueba de Dunn post-hoc ISCED
Ajuste por Bonferroni
nerd - normal	−9.1017	0.0000	0.0000
nerd - pirosis funcional	−8.2831	0.0000	0.0000
normal - pirosis funcional	0.8716	0.3834	1.0000
nerd - reflujo hipersensible	−7.1096	0.0000	0.0000
normal - reflujo hipersensible	2.0127	0.0441	0.2649
pirosis funcional - reflujo hipersensible	1.1510	0.2497	1.0000

Difencias TEA

Hubo diferencias menos en las comparaciones

Normal - REFLUJO HIPERSENSIBLE PIROSIS FUNCIONAL - REFLUJO HIPERSENSIBLE

dunn_res2 <- dunnTest(`Tiempo de exposición ácida %` ~ Diagnóstico, 
                     data = BASE, 
                     method = "bonferroni")

tabla_dunn2 <- dunn_res2$res

tabla_dunn2 %>%
  gt() %>%
  tab_header(
    title = "Prueba de Dunn post-hoc TEA",
    subtitle = "Ajuste por Bonferroni"
  ) %>%
  cols_label(
    Comparison = "Comparación",
    Z          = "Estadístico Z",
    P.unadj    = "p-valor sin ajuste",
    P.adj      = "p-valor ajustado"
  ) %>%
  fmt_number(
    columns = c(Z, P.unadj, P.adj),
    decimals = 4
  ) %>%
  tab_style(
    style = list(
      cell_text(weight = "bold", align = "center")
    ),
    locations = cells_column_labels(everything())
  )

Comparación	Estadístico Z	p-valor sin ajuste	p-valor ajustado
Prueba de Dunn post-hoc TEA
Ajuste por Bonferroni
nerd - normal	6.1111	0.0000	0.0000
nerd - pirosis funcional	10.2968	0.0000	0.0000
normal - pirosis funcional	4.1996	0.0000	0.0002
nerd - reflujo hipersensible	8.4981	0.0000	0.0000
normal - reflujo hipersensible	2.4118	0.0159	0.0953
pirosis funcional - reflujo hipersensible	−1.7759	0.0757	0.4544

Diferencias N de eventos de reflujo

Hubo diferencias entre NERD - Normal; NERD - PIROSIS FUNCIONAL; NERD - REFLUJO HIPERSENSIBLE;

dunn_res3 <- dunnTest(`N de eventos de reflujo` ~ Diagnóstico, 
                     data = BASE, 
                     method = "bonferroni")

tabla_dunn3 <- dunn_res3$res

tabla_dunn3 %>%
  gt() %>%
  tab_header(
    title = "Prueba de Dunn post-hoc N de eventos de reflujo",
    subtitle = "Ajuste por Bonferroni"
  ) %>%
  cols_label(
    Comparison = "Comparación",
    Z          = "Estadístico Z",
    P.unadj    = "p-valor sin ajuste",
    P.adj      = "p-valor ajustado"
  ) %>%
  fmt_number(
    columns = c(Z, P.unadj, P.adj),
    decimals = 4
  ) %>%
  tab_style(
    style = list(
      cell_text(weight = "bold", align = "center")
    ),
    locations = cells_column_labels(everything())
  )

Comparación	Estadístico Z	p-valor sin ajuste	p-valor ajustado
Prueba de Dunn post-hoc N de eventos de reflujo
Ajuste por Bonferroni
nerd - normal	6.1644	0.0000	0.0000
nerd - pirosis funcional	6.1326	0.0000	0.0000
normal - pirosis funcional	−0.0623	0.9503	1.0000
nerd - reflujo hipersensible	3.6307	0.0003	0.0017
normal - reflujo hipersensible	−2.5600	0.0105	0.0628
pirosis funcional - reflujo hipersensible	−2.5104	0.0121	0.0724

Graficando el ISCED por cada diagnóstico

png("BOXPLOT.png", width = 500, height = 450)

library(ggplot2)
library(ggpubr)
library(viridis)      # Paletas científicas hermosas
library(ggthemes)     # Temas profesionales
library(scales)       # Para formatear ejes
library(RColorBrewer) # Más paletas de colores
library(dplyr)


# Primero calculamos los conteos
conteos <- BASE %>% 
  count(`Diagnóstico`) %>% 
  mutate(label = paste0(`Diagnóstico`, "\n(n=", n, ")"))


# Creamos el factor con las nuevas etiquetas
BASE$Diagnostico_con_n <- factor(BASE$`Diagnóstico`,
                                levels = conteos$`Diagnóstico`,
                                labels = conteos$label)

boxplot_medico <- ggplot(BASE, aes(x = Diagnostico_con_n, y = `ISCED MEDIANA`, fill = `Diagnóstico`)) +
  
  # Violin plot suave de fondo
  geom_violin(
    alpha = 0.3, 
    trim = TRUE, 
    scale = "width",
    color = NA
  ) +
  
  # Boxplot encima
  geom_boxplot(
    width = 0.3,
    alpha = 0.8,
    outlier.shape = 21,
    outlier.size = 1,
    outlier.fill = "lightgrey",
    outlier.stroke = 1,
    color = "gray20",
    size = 1
  ) +
  
  # Paleta médica profesional
  scale_fill_brewer(type = "qual", palette = "Set2", direction = 1) +
  
  scale_y_continuous(
    breaks = pretty_breaks(n = 7),
    expand = expansion(mult = c(0.05, 0.1))
  ) +
  
  labs(
    title = "Distribución ISCED por Diagnóstico",
    subtitle = "Análisis comparativo | n = 199",
    x = "Diagnóstico pH-metría",
    y = "ISCED"
  ) +
  
  theme_classic(base_size = 12) +
  theme(
    plot.title = element_text(
      size = 18, 
      face = "bold", 
      hjust = 0.5,
      margin = margin(b = 5)
    ),
    plot.subtitle = element_text(
      size = 13, 
      hjust = 0.5, 
      color = "#555555",
      face = "italic",
      margin = margin(b = 20)
    ),
    axis.title = element_text(size = 13, face = "bold"),
    axis.text.x = element_text(size = 8, face = "bold"),
    axis.text.y = element_text(size = 10),
    legend.position = "none",
    axis.line = element_line(color = "#333333", size = 0.8),
    plot.margin = margin(30, 30, 30, 30)
  )
boxplot_medico
dev.off()

## png 
##   2

boxplot_medico

# Calculamos estadísticas básicas
stats_simple <- BASE %>%
  group_by(`Diagnóstico`) %>%
  summarise(
    n = n(),
    media = mean(`ISCED MEDIANA`, na.rm = TRUE),
    mediana = median(`ISCED MEDIANA`, na.rm = TRUE),
    .groups = 'drop'
  ) %>%
  # Creamos las etiquetas con diagnóstico y n
  mutate(
    etiqueta_eje = paste0(`Diagnóstico`, "\n(n=", n, ")")
  )

# Convertimos a factor para mantener el orden
stats_simple$etiqueta_eje <- factor(stats_simple$etiqueta_eje, 
                                   levels = stats_simple$etiqueta_eje)

png("BARRASPLOT.png", width = 500, height = 450)
# Gráfico de barras simple con n en el eje X
barras_simple_n <- ggplot(stats_simple, aes(x = etiqueta_eje, y = media, fill = `Diagnóstico`)) +
  
  # Barras principales
  geom_col(
    alpha = 0.8,
    width = 0.7,
    color = "gray20",
    size = 0.8
  ) +
  
  # Solo el valor de la media encima de las barras
  geom_text(
    aes(label = round(media, 1)),
    vjust = -0.5,
    size = 4.5,
    fontface = "bold",
    color = "gray30"
  ) +
  
  # Paleta médica profesional
  scale_fill_brewer(type = "qual", palette = "Set2", direction = 1) +
  
  scale_y_continuous(
    breaks = pretty_breaks(n = 6),
    expand = expansion(mult = c(0, 0.12)),
    limits = c(0, NA)
  ) +
  
  labs(
    title = "ISCED Promedio por Diagnóstico",
    subtitle = "Comparación de medias | Análisis n=199",
    x = "Diagnóstico pH-metría",
    y = "ISCED"
  ) +
  
  theme_classic(base_size = 12) +
  theme(
    plot.title = element_text(
      size = 18, 
      face = "bold", 
      hjust = 0.5,
      margin = margin(b = 5)
    ),
    plot.subtitle = element_text(
      size = 13, 
      hjust = 0.5, 
      color = "#555555",
      face = "italic",
      margin = margin(b = 20)
    ),
    axis.title = element_text(size = 13, face = "bold"),
    axis.text.x = element_text(
      size = 9,           # Reducido un poco por tener más texto
      face = "bold", 
      color = "gray20",
      lineheight = 0.9    # Espaciado entre líneas
    ),
    axis.text.y = element_text(size = 10, color = "gray20"),
    legend.position = "none",
    axis.line = element_line(color = "#333333", size = 0.8),
    plot.margin = margin(30, 30, 40, 30),  # Más margen abajo para las etiquetas
    panel.grid.major.y = element_line(color = "gray90", size = 0.5, linetype = "dashed")
  )
barras_simple_n
dev.off()

## png 
##   2

barras_simple_n

Analisis de correlacion TEA - ISCED

library(ggplot2)
library(ggpubr)

#CORRELACION 
cor_test <- cor.test(BASE$`ISCED MEDIANA`, BASE$`Tiempo de exposición ácida %`, method = "spearman")
png("COR1.png", width = 400, height = 350)
cor_1 <- ggplot(BASE, aes(x = `Tiempo de exposición ácida %`, y = `ISCED MEDIANA`)) +
  geom_point(color = "#2C3E50", size = 2, alpha = 0.7) +
  geom_smooth(method = "lm", se = TRUE, color = "#E74C3C", fill = "salmon", alpha = 0.3, size = 1.1) +
  labs(
    title = "Correlación entre ISCED y la TEA",
    x = "Tiempo de exposición ácida %",
    y = "ISCED MEDIANA",
    caption = paste0("r = ", round(cor_test$estimate, 3),
                     ", p = ", format.pval(cor_test$p.value, digits = 2, eps = .001))
  ) +
  theme_minimal(base_size = 14) +
  theme(
    plot.title = element_text(face = "bold", hjust = 0.5, size = 10),
    plot.caption = element_text(face = "italic", size = 12, hjust = 0.5)
  )
cor_1 
dev.off()

## png 
##   2

cor_1

Analisis de correlacion número de eventos de reflujo e ISCED

#CORRELACION 
cor_test2 <- cor.test(BASE$`ISCED MEDIANA`, BASE$`N de eventos de reflujo`, method = "spearman")

png("COR2.png", width = 400, height = 350)
cor_2 <- ggplot(BASE, aes(x = `N de eventos de reflujo`, y = `ISCED MEDIANA`)) +
  geom_point(color = "#2C3E50", size = 2, alpha = 0.7) +
  geom_smooth(method = "lm", se = TRUE, color = "blue", fill = "skyblue", alpha = 0.3, size = 1.1) +
  labs(
    title = "Correlación entre ISCED y eventos de reflujo",
    x = "Eventos de refujo",
    y = "ISCED MEDIANA",
    caption = paste0("r = ", round(cor_test2$estimate, 3),
                     ", p = ", format.pval(cor_test2$p.value, digits = 2, eps = .001))
  ) +
  theme_minimal(base_size = 14) +
  theme(
    plot.title = element_text(face = "bold", hjust = 0.5, size = 10),
    plot.caption = element_text(face = "italic", size = 12, hjust = 0.5)
  )
cor_2
dev.off()

## png 
##   2

cor_2

Punto de corte

##### CURVA ROC

library(pROC)

roc_curve1 <- roc(BASE$TEA_cat, BASE$`ISCED MEDIANA`, levels = c("Menor a 6", "mayor o igual a 6"), direction = ">")
auc_value <- auc(roc_curve1) #AUC
ci_auc <- ci.auc(roc_curve1) #IC AUC
optimal_cutoff <- coords(roc_curve1, "best", ret = "threshold") #PUNTO DE CORTE
ci_cutoff <- ci.coords(roc_curve1, "best", ret = "threshold") #IC PUNTO CORTE


#SENSIBILIDAD Y ESPECIFICIDAD 
coords_at_cutoff <- coords(roc_curve1, x = 1032.5, ret = c("sensitivity", "specificity"))
sensitivity_at_cutoff <- as.numeric(coords_at_cutoff["sensitivity"])  # Convertir a numérico
specificity_at_cutoff <- as.numeric(coords_at_cutoff["specificity"])    # Convertir a numérico

png("ROC.png", width = 400, height = 350)
# CÓDIGO ROC ARREGLADO
roc_plot <- ggroc(roc_curve1, color = "#1f77b4", size = 1) +
  
  # Línea de referencia ROJA (como pediste)
  geom_abline(slope = 1, intercept = 1, linetype = "dashed", color = "#d62728", size = 0.5) +
  
  labs(
    title = "Curva ROC",
    x = "1 - Especificidad",
    y = "Sensibilidad"
  ) +
  
  theme_minimal() +
  theme(
    panel.grid.major = element_blank(),
    panel.grid.minor = element_blank(),
    axis.line = element_line(color = "#1f77b4")
  ) +
  
  # Punto del cutoff (asumiendo que tienes estas variables)
  geom_point(aes(x = specificity_at_cutoff, y = sensitivity_at_cutoff), 
             color = "blue", size = 3, alpha = 0.8) +
  
  # Anotación del cutoff (posición fija corregida)
  annotate("text", x = 0.9, y = 0.85,
           label = paste0("1032.5", 
                          "\n(0,924 - 0,952)"),
           color = "black", size = 4, hjust = 0, face = "bold") +
  
  # AUC con posición fija (SIN text_x ni text_y)
  annotate("text", x = 0.4, y = 0.3,  # Posición fija en la esquina inferior
           label = "AUC = 0,951\nIC: (0,919 - 0,982)",
           color = "black", size = 4, hjust = 0, face = "bold")
roc_plot
dev.off()

## png 
##   2

roc_plot

ORs bivariados = Desenlace tener ISCED mediana igual o menor a 1032.5

*NERD VS NORMAL”

BASE <- BASE %>% mutate(ISED_cat = cut(`ISCED MEDIANA`, breaks = c(-Inf, 1032.5, Inf), right = T, labels = c("Menor igual a 1032.5", "Mayor a 1032.5")))

NERDvsNORMAL <- cbind(c(45, 3), c(3, 47))
# Etiquetas para mayor claridad
rownames(NERDvsNORMAL) <- c("ISED <= 1032.5", "ISED > 1032.5")
colnames(NERDvsNORMAL) <- c("NERD", "Normal")

# Mostrar la tabla
library(kableExtra)
kable(NERDvsNORMAL, 
      caption = "NERD VS NORMAL")

NERD VS NORMAL
	NERD	Normal
ISED <= 1032.5	45	3
ISED > 1032.5	3	47

library(epiR)
epi.2by2(cbind(c(45, 3), c(3, 47)), method = "case.control", conf.level = 0.95) #nERD VS NORMAL

##              Outcome+    Outcome-      Total                 Odds
## Exposed +          45           3         48  15.00 (5.86 to Inf)
## Exposed -           3          47         50  0.06 (0.00 to 0.16)
## Total              48          50         98  0.96 (0.63 to 1.45)
## 
## Point estimates and 95% CIs:
## -------------------------------------------------------------------
## Exposure odds ratio                            235.00 (45.06, 1225.71)
## Attrib fraction (est) in the exposed (%)      99.57 (97.87, 99.91)
## Attrib fraction (est) in the population (%)   93.35 (88.77, 100.00)
## -------------------------------------------------------------------
## Uncorrected chi2 test that OR = 1: chi2(1) = 75.461 Pr>chi2 = <0.001
## Fisher exact test that OR = 1: Pr>chi2 = <0.001
##  Wald confidence limits
##  CI: confidence interval

PIROSIS FUNCIONAL VS NORMAL

PIROSOSvsNORMAL <- cbind(c(0, 51), c(3, 47))
# Etiquetas para mayor claridad
rownames(PIROSOSvsNORMAL) <- c("ISED <= 1032.5", "ISED > 1032.5")
colnames(PIROSOSvsNORMAL) <- c("PIROSIS FUNCIONAL", "Normal")

# Mostrar la tabla
library(kableExtra)
kable(PIROSOSvsNORMAL, 
      caption = "PIROSIS FUNCIONAL VS NORMAL")

PIROSIS FUNCIONAL VS NORMAL
	PIROSIS FUNCIONAL	Normal
ISED <= 1032.5	0	3
ISED > 1032.5	51	47

epi.2by2(PIROSOSvsNORMAL, method = "case.control", conf.level = 0.95) #PIROSIS FUNCIONAL VS NORMAL

##              Outcome+    Outcome-      Total                 Odds
## Exposed +           0           3          3  0.00 (0.00 to 0.00)
## Exposed -          51          47         98  1.09 (0.72 to 1.65)
## Total              51          50        101  1.02 (0.68 to 1.52)
## 
## Point estimates and 95% CIs:
## -------------------------------------------------------------------
## Exposure odds ratio                            0.00 (0.00, NaN)
## Attrib fraction (est) in the exposed (%)      -Inf (-Inf, 18.47)
## Attrib fraction (est) in the population (%)   -6.38 (-8.11, -5.26)
## -------------------------------------------------------------------
## Yates corrected chi2 test that OR = 1: chi2(1) = 1.415 Pr>chi2 = 0.234
## Fisher exact test that OR = 1: Pr>chi2 = 0.118
##  Wald confidence limits
##  CI: confidence interval

Reflujo hipersensible vs NORMAL

REFLUJOvsNORMAL <- cbind(c(4,46), c(3, 47))

# Etiquetas para mayor claridad
rownames(REFLUJOvsNORMAL) <- c("ISED <= 1032.5", "ISED > 1032.5")
colnames(REFLUJOvsNORMAL) <- c("Reflujo hipersensible", "Normal")

# Mostrar la tabla
library(kableExtra)
kable(REFLUJOvsNORMAL, 
      caption = "Reflujo hipersensible VS NORMAL")

Reflujo hipersensible VS NORMAL
	Reflujo hipersensible	Normal
ISED <= 1032.5	4	3
ISED > 1032.5	46	47

epi.2by2(REFLUJOvsNORMAL, method = "case.control", conf.level = 0.95) #Reflujo hipersensibleL VS NORMAL

##              Outcome+    Outcome-      Total                 Odds
## Exposed +           4           3          7  1.33 (0.17 to 6.00)
## Exposed -          46          47         93  0.98 (0.66 to 1.45)
## Total              50          50        100  1.00 (0.67 to 1.50)
## 
## Point estimates and 95% CIs:
## -------------------------------------------------------------------
## Exposure odds ratio                            1.36 (0.29, 6.43)
## Attrib fraction (est) in the exposed (%)      26.60 (-211.57, 82.62)
## Attrib fraction (est) in the population (%)   2.13 (0.89, 3.51)
## -------------------------------------------------------------------
## Yates corrected chi2 test that OR = 1: chi2(1) = 0.000 Pr>chi2 = 1.000
## Fisher exact test that OR = 1: Pr>chi2 = 1.000
##  Wald confidence limits
##  CI: confidence interval

ISCED

Nicolas Martínez Ramos

2025-09-13

Analisis descriptivo inicial

Data Frame Summary

BASE

Revisando en donde hay diferencias variables cuantitativas

Diferencias variable edad

Diferencias variable IMC

Diferencias variable ISCED

Difencias TEA

Diferencias N de eventos de reflujo

Graficando el ISCED por cada diagnóstico

Analisis de correlacion TEA - ISCED

Analisis de correlacion número de eventos de reflujo e ISCED

Punto de corte

ORs bivariados = Desenlace tener ISCED mediana igual o menor a 1032.5