Análisis de la Mortalidad Infantil

Este informe presenta un análisis inferencial sobre la tasa de mortalidad infantil a partir de datos de calidad del agua. Se ajusta una distribución uniforme a los datos filtrados y se estima la probabilidad de ciertos eventos.

options(repos = c(CRAN = "https://cran.rstudio.com"))
install.packages("readxl")  
## Installing package into 'C:/Users/Usuario/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)
## package 'readxl' successfully unpacked and MD5 sums checked
## Warning: cannot remove prior installation of package 'readxl'
## Warning in file.copy(savedcopy, lib, recursive = TRUE): problema al copiar
## C:\Users\Usuario\AppData\Local\R\win-library\4.4\00LOCK\readxl\libs\x64\readxl.dll
## a C:\Users\Usuario\AppData\Local\R\win-library\4.4\readxl\libs\x64\readxl.dll:
## Permission denied
## Warning: restored 'readxl'
## 
## The downloaded binary packages are in
##  C:\Users\Usuario\AppData\Local\Temp\RtmpueowC2\downloaded_packages
install.packages("readr")
## Installing package into 'C:/Users/Usuario/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)
## package 'readr' successfully unpacked and MD5 sums checked
## Warning: cannot remove prior installation of package 'readr'
## Warning in file.copy(savedcopy, lib, recursive = TRUE): problema al copiar
## C:\Users\Usuario\AppData\Local\R\win-library\4.4\00LOCK\readr\libs\x64\readr.dll
## a C:\Users\Usuario\AppData\Local\R\win-library\4.4\readr\libs\x64\readr.dll:
## Permission denied
## Warning: restored 'readr'
## 
## The downloaded binary packages are in
##  C:\Users\Usuario\AppData\Local\Temp\RtmpueowC2\downloaded_packages
install.packages("knitr")
## Installing package into 'C:/Users/Usuario/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)
## package 'knitr' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\Usuario\AppData\Local\Temp\RtmpueowC2\downloaded_packages
install.packages("kableExtra")
## Installing package into 'C:/Users/Usuario/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)
## package 'kableExtra' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\Usuario\AppData\Local\Temp\RtmpueowC2\downloaded_packages
install.packages("rmarkdown")
## Installing package into 'C:/Users/Usuario/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)
## package 'rmarkdown' successfully unpacked and MD5 sums checked
## 
## The downloaded binary packages are in
##  C:\Users\Usuario\AppData\Local\Temp\RtmpueowC2\downloaded_packages
library(readxl)
library(readr)
library(knitr)
library(kableExtra)
library(rmarkdown)

datos <- read_csv("C:/Users/Usuario/Downloads/water_pollution_disease (2).csv")
## Rows: 3000 Columns: 24
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (4): Country, Region, Water Source Type, Water Treatment Method
## dbl (20): Year, Contaminant Level (ppm), pH Level, Turbidity (NTU), Dissolve...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(datos)
## # A tibble: 6 × 24
##   Country   Region   Year `Water Source Type` Contaminant Level (pp…¹ `pH Level`
##   <chr>     <chr>   <dbl> <chr>                                 <dbl>      <dbl>
## 1 Mexico    North    2015 Lake                                   6.06       7.12
## 2 Brazil    West     2017 Well                                   5.24       7.84
## 3 Indonesia Central  2022 Pond                                   0.24       6.43
## 4 Nigeria   East     2016 Well                                   7.91       6.71
## 5 Mexico    South    2005 Well                                   0.12       8.16
## 6 Ethiopia  West     2013 Tap                                    2.93       8.21
## # ℹ abbreviated name: ¹​`Contaminant Level (ppm)`
## # ℹ 18 more variables: `Turbidity (NTU)` <dbl>,
## #   `Dissolved Oxygen (mg/L)` <dbl>, `Nitrate Level (mg/L)` <dbl>,
## #   `Lead Concentration (µg/L)` <dbl>, `Bacteria Count (CFU/mL)` <dbl>,
## #   `Water Treatment Method` <chr>,
## #   `Access to Clean Water (% of Population)` <dbl>,
## #   `Diarrheal Cases per 100,000 people` <dbl>, …

Histograma de Mortalidad Infantil (filtrada)

# VARIABLE CUANTITATIVA CONTINUA 
str(datos)
## spc_tbl_ [3,000 × 24] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ Country                                      : chr [1:3000] "Mexico" "Brazil" "Indonesia" "Nigeria" ...
##  $ Region                                       : chr [1:3000] "North" "West" "Central" "East" ...
##  $ Year                                         : num [1:3000] 2015 2017 2022 2016 2005 ...
##  $ Water Source Type                            : chr [1:3000] "Lake" "Well" "Pond" "Well" ...
##  $ Contaminant Level (ppm)                      : num [1:3000] 6.06 5.24 0.24 7.91 0.12 2.93 0.06 3.76 0.63 9.14 ...
##  $ pH Level                                     : num [1:3000] 7.12 7.84 6.43 6.71 8.16 8.21 6.11 6.42 6.29 6.45 ...
##  $ Turbidity (NTU)                              : num [1:3000] 3.93 4.79 0.79 1.96 4.22 4.03 3.12 1.35 1.42 0.62 ...
##  $ Dissolved Oxygen (mg/L)                      : num [1:3000] 4.28 3.86 3.42 3.12 9.15 8.66 6.97 9.99 9.67 7.59 ...
##  $ Nitrate Level (mg/L)                         : num [1:3000] 8.28 15.74 36.67 36.92 49.35 ...
##  $ Lead Concentration (µg/L)                    : num [1:3000] 7.89 14.68 9.96 6.77 12.51 ...
##  $ Bacteria Count (CFU/mL)                      : num [1:3000] 3344 2122 2330 3779 4182 ...
##  $ Water Treatment Method                       : chr [1:3000] "Filtration" "Boiling" "None" "Boiling" ...
##  $ Access to Clean Water (% of Population)      : num [1:3000] 33.6 89.5 35.3 57.5 36.6 ...
##  $ Diarrheal Cases per 100,000 people           : num [1:3000] 472 122 274 3 466 258 208 397 265 261 ...
##  $ Cholera Cases per 100,000 people             : num [1:3000] 33 27 39 33 31 22 23 0 23 2 ...
##  $ Typhoid Cases per 100,000 people             : num [1:3000] 44 8 50 13 68 55 90 10 29 38 ...
##  $ Infant Mortality Rate (per 1,000 live births): num [1:3000] 76.2 77.3 48.5 95.7 58.8 ...
##  $ GDP per Capita (USD)                         : num [1:3000] 57057 17220 86022 31166 25661 ...
##  $ Healthcare Access Index (0-100)              : num [1:3000] 96.9 84.7 58.4 39.1 23 ...
##  $ Urbanization Rate (%)                        : num [1:3000] 84.6 73.4 72.9 71.1 55.5 ...
##  $ Sanitation Coverage (% of Population)        : num [1:3000] 63.2 29.1 93.6 94.2 69.2 ...
##  $ Rainfall (mm per year)                       : num [1:3000] 2800 1572 2074 937 2295 ...
##  $ Temperature (°C)                             : num [1:3000] 4.94 16.93 21.73 3.79 31.44 ...
##  $ Population Density (people per km²)          : num [1:3000] 593 234 57 555 414 775 584 111 538 250 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   Country = col_character(),
##   ..   Region = col_character(),
##   ..   Year = col_double(),
##   ..   `Water Source Type` = col_character(),
##   ..   `Contaminant Level (ppm)` = col_double(),
##   ..   `pH Level` = col_double(),
##   ..   `Turbidity (NTU)` = col_double(),
##   ..   `Dissolved Oxygen (mg/L)` = col_double(),
##   ..   `Nitrate Level (mg/L)` = col_double(),
##   ..   `Lead Concentration (µg/L)` = col_double(),
##   ..   `Bacteria Count (CFU/mL)` = col_double(),
##   ..   `Water Treatment Method` = col_character(),
##   ..   `Access to Clean Water (% of Population)` = col_double(),
##   ..   `Diarrheal Cases per 100,000 people` = col_double(),
##   ..   `Cholera Cases per 100,000 people` = col_double(),
##   ..   `Typhoid Cases per 100,000 people` = col_double(),
##   ..   `Infant Mortality Rate (per 1,000 live births)` = col_double(),
##   ..   `GDP per Capita (USD)` = col_double(),
##   ..   `Healthcare Access Index (0-100)` = col_double(),
##   ..   `Urbanization Rate (%)` = col_double(),
##   ..   `Sanitation Coverage (% of Population)` = col_double(),
##   ..   `Rainfall (mm per year)` = col_double(),
##   ..   `Temperature (°C)` = col_double(),
##   ..   `Population Density (people per km²)` = col_double()
##   .. )
##  - attr(*, "problems")=<externalptr>
mortalidad_infantil <- datos$`Infant Mortality Rate (per 1,000 live births)`
mortalidad_infantil <- na.omit(mortalidad_infantil)

# Limitar la variable para evitar outliers (percentil 5% al 95%)
lim_inf <- quantile(mortalidad_infantil, 0.05)
lim_sup <- quantile(mortalidad_infantil, 0.95)
mortalidad_filtrada <- mortalidad_infantil[mortalidad_infantil >= lim_inf & mortalidad_infantil <= lim_sup]

# Crear histograma con breaks de Sturges
Histograma <- hist(mortalidad_filtrada, freq = FALSE, breaks = "Sturges",
                   main = "Modelo de probabilidad",
                   ylab = "Densidad de probabilidad",
                   xlab = "Mortalidad Infantil (por 1,000 nacidos vivos)",
                   col = "orange")

# Parámetros para la distribución uniforme ajustada
min_uni <- lim_inf
max_uni <- lim_sup

# Graficar la curva de la distribución uniforme ajustada
curve(dunif(x, min_uni, max_uni), col = "red", lwd = 2, add = TRUE)

# Frecuencias observadas absolutas del histograma (FO)
FO_l <- Histograma$counts

# Calcular frecuencias esperadas (FE) según la distribución uniforme para cada intervalo
FE_l <- numeric(length(FO_l))
for (i in 1:length(FO_l)) {
  p_i <- punif(Histograma$breaks[i + 1], min_uni, max_uni) - punif(Histograma$breaks[i], min_uni, max_uni)
  FE_l[i] <- p_i * length(mortalidad_filtrada)
}

# Test de correlación de Pearson entre FO y FE
pearson_r <- cor(FO_l, FE_l)
cat("Coeficiente de correlación de Pearson:", round(pearson_r, 4), "\n")
## Coeficiente de correlación de Pearson: 0.9508
# Test de Chi-cuadrado
X2 <- sum((FO_l - FE_l)^2 / FE_l)
cat("Estadístico Chi-cuadrado:", round(X2, 4), "\n")
## Estadístico Chi-cuadrado: 16.3051
# Valor crítico Chi-cuadrado con nivel 99% y grados de libertad = número de clases - 1
chi_critico <- qchisq(0.99, df = length(FO_l) - 1)
cat("Valor crítico Chi-cuadrado (99%):", round(chi_critico, 4), "\n")
## Valor crítico Chi-cuadrado (99%): 34.8053
# Estimar la probabilidad de que la mortalidad 
P1 <- (punif(60, min_uni, max_uni) - punif(40, min_uni, max_uni)) * 100
P1
## [1] 22.66405
# Mostrar resultado en gráfico tipo presentación
Sys.setlocale("LC_CTYPE", "en_US.UTF-8")  # Evita problemas de codificación de caracteres
## [1] "en_US.UTF-8"
plot(1, type = "n", axes = FALSE, xlab = "", ylab = "")  # Crear un gráfico vacío
text(x = 1, y = 1,
     labels = paste0("¿Cuál es la probabilidad de que\nla mortalidad infantil\nesté entre 40 y 60?\n\n",
                     "R: ", round(P1, 2), " (%)"),
     cex = 1.5, col = "blue", font = 6)

# Secuencia general para toda la curva
x <- seq(min_uni, max_uni, length.out = 1000)

# Altura constante de la densidad uniforme
densidad_uniforme <- dunif(x, min = min_uni, max = max_uni)

# Crear la secuencia para sombrear el área entre 40 y 60
x_area <- seq(40, 60, length.out = 100)
y_area <- dunif(x_area, min = min_uni, max = max_uni)

# Graficar la distribución uniforme
plot(x, densidad_uniforme, type = "l", col = "red", lwd = 2,
     main = "Distribución uniforme ajustada a la mortalidad infantil",
     xlab = "Mortalidad infantil (por 1,000 nacidos vivos)",
     ylab = "Densidad de probabilidad",
     xaxt = "n", ylim = c(0, max(densidad_uniforme) + 0.002))

# Rellenar área bajo la curva entre 40 y 60
polygon(c(40, x_area, 60), c(0, y_area, 0),
        col = rgb(0, 0, 1, alpha = 0.3), border = NA)

# Eje x personalizado
axis(1, at = c(round(min_uni), 40, 60, round(max_uni)),
     labels = c(round(min_uni), "40", "60", round(max_uni)))

# leyenda
legend(x = 65, y = 0.013,
       legend = c("Distribución uniforme", "Área bajo la curva entre 40 y 60"),
       col = c("red", rgb(0, 0, 1, alpha = 0.3)),
       lty = c(1, NA),
       lwd = 2,
       pch = c(NA, 15),
       pt.cex = 2,
       cex = 0.9,        # Tamaño del texto
       bty = "o",        # Borde visible (cuadro)
       text.col = "black")  # Color del texto

# Estimación por intervalos de confianza para mortalidad infantil
media_mortalidad <- mean(mortalidad_filtrada)
desv_mortalidad <- sd(mortalidad_filtrada)
n_mortalidad <- length(mortalidad_filtrada)

# Cálculo de intervalo de confianza (95%)
limite_inf <- media_mortalidad - 2 * (desv_mortalidad / sqrt(n_mortalidad))
limite_sup <- media_mortalidad + 2 * (desv_mortalidad / sqrt(n_mortalidad))

limite_inf  
## [1] 49.80654
limite_sup  
## [1] 51.78219
# Crear tabla resumen del modelo uniforme aplicado a mortalidad infantil
rechazo <- X2 > chi_critico 
tabla_modelos_1 <- data.frame(
  "Mortalidad Infantil (por 1,000 nacidos vivos)" = c(paste0("[", round(min_uni, 2), " , ", round(max_uni, 2), "]"), ""),
  "Modelo" = c("Uniforme", ""),
  "Parámetros" = c(paste0("a = ", round(min_uni, 2), ", b = ", round(max_uni, 2)), ""),
  "Test Pearson" = c(round(pearson_r, 2), ""),
  "Test Chi-cuadrado" = c(ifelse(rechazo == TRUE, "Reprobado", "Aprobado"), ""),
  "Dominio del Modelo" = c("x ∈ [a ; b]", "")
)

# Renombrar columnas para mejor presentación
colnames(tabla_modelos_1) <- c("Rango de la Variable", "Modelo",
                               "Parámetros", "Test de Pearson",
                               "Test Chi-cuadrado", "Dominio del Modelo")
tabla_intervalos <- data.frame(
  "Intervalo de Confianza" = c("Límite Inferior", "Límite Superior"),
  "Grado Confianza (%)" = c("95 %", ""),
  "Mortalidad Infantil (por 1,000)" = c(round(limite_inf, 2), round(limite_sup, 2))
)

# Mostrar título como texto gráfico
plot(1, type = "n", axes = FALSE, xlab = "", ylab = "")
text(x = 1, y = 1,
     labels = "CONCLUSIONES",
     cex = 2,
     col = "blue",
     font = 6)

kable(tabla_modelos_1, align = 'c',
      caption = "Conclusiones del Modelo Uniforme para Mortalidad Infantil")
Conclusiones del Modelo Uniforme para Mortalidad Infantil
Rango de la Variable Modelo Parámetros Test de Pearson Test Chi-cuadrado Dominio del Modelo
[6.85 , 95.09] Uniforme a = 6.85, b = 95.09 0.95 Aprobado x ∈ [a ; b]
kable(tabla_intervalos, align = 'c',
      caption = "Intervalos de Confianza de la Media Poblacional")
Intervalos de Confianza de la Media Poblacional
Intervalo.de.Confianza Grado.Confianza…. Mortalidad.Infantil..por.1.000.
Límite Inferior 95 % 49.81
Límite Superior 51.78