Análisis de la Mortalidad Infantil
Este informe presenta un análisis inferencial sobre la tasa de
mortalidad infantil a partir de datos de calidad del agua. Se ajusta una
distribución uniforme a los datos filtrados y se estima la probabilidad
de ciertos eventos.
options(repos = c(CRAN = "https://cran.rstudio.com"))
install.packages("readxl")
## Installing package into 'C:/Users/Usuario/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)
## package 'readxl' successfully unpacked and MD5 sums checked
## Warning: cannot remove prior installation of package 'readxl'
## Warning in file.copy(savedcopy, lib, recursive = TRUE): problema al copiar
## C:\Users\Usuario\AppData\Local\R\win-library\4.4\00LOCK\readxl\libs\x64\readxl.dll
## a C:\Users\Usuario\AppData\Local\R\win-library\4.4\readxl\libs\x64\readxl.dll:
## Permission denied
## Warning: restored 'readxl'
##
## The downloaded binary packages are in
## C:\Users\Usuario\AppData\Local\Temp\RtmpueowC2\downloaded_packages
install.packages("readr")
## Installing package into 'C:/Users/Usuario/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)
## package 'readr' successfully unpacked and MD5 sums checked
## Warning: cannot remove prior installation of package 'readr'
## Warning in file.copy(savedcopy, lib, recursive = TRUE): problema al copiar
## C:\Users\Usuario\AppData\Local\R\win-library\4.4\00LOCK\readr\libs\x64\readr.dll
## a C:\Users\Usuario\AppData\Local\R\win-library\4.4\readr\libs\x64\readr.dll:
## Permission denied
## Warning: restored 'readr'
##
## The downloaded binary packages are in
## C:\Users\Usuario\AppData\Local\Temp\RtmpueowC2\downloaded_packages
install.packages("knitr")
## Installing package into 'C:/Users/Usuario/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)
## package 'knitr' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\Usuario\AppData\Local\Temp\RtmpueowC2\downloaded_packages
install.packages("kableExtra")
## Installing package into 'C:/Users/Usuario/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)
## package 'kableExtra' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\Usuario\AppData\Local\Temp\RtmpueowC2\downloaded_packages
install.packages("rmarkdown")
## Installing package into 'C:/Users/Usuario/AppData/Local/R/win-library/4.4'
## (as 'lib' is unspecified)
## package 'rmarkdown' successfully unpacked and MD5 sums checked
##
## The downloaded binary packages are in
## C:\Users\Usuario\AppData\Local\Temp\RtmpueowC2\downloaded_packages
library(readxl)
library(readr)
library(knitr)
library(kableExtra)
library(rmarkdown)
datos <- read_csv("C:/Users/Usuario/Downloads/water_pollution_disease (2).csv")
## Rows: 3000 Columns: 24
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): Country, Region, Water Source Type, Water Treatment Method
## dbl (20): Year, Contaminant Level (ppm), pH Level, Turbidity (NTU), Dissolve...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(datos)
## # A tibble: 6 × 24
## Country Region Year `Water Source Type` Contaminant Level (pp…¹ `pH Level`
## <chr> <chr> <dbl> <chr> <dbl> <dbl>
## 1 Mexico North 2015 Lake 6.06 7.12
## 2 Brazil West 2017 Well 5.24 7.84
## 3 Indonesia Central 2022 Pond 0.24 6.43
## 4 Nigeria East 2016 Well 7.91 6.71
## 5 Mexico South 2005 Well 0.12 8.16
## 6 Ethiopia West 2013 Tap 2.93 8.21
## # ℹ abbreviated name: ¹`Contaminant Level (ppm)`
## # ℹ 18 more variables: `Turbidity (NTU)` <dbl>,
## # `Dissolved Oxygen (mg/L)` <dbl>, `Nitrate Level (mg/L)` <dbl>,
## # `Lead Concentration (µg/L)` <dbl>, `Bacteria Count (CFU/mL)` <dbl>,
## # `Water Treatment Method` <chr>,
## # `Access to Clean Water (% of Population)` <dbl>,
## # `Diarrheal Cases per 100,000 people` <dbl>, …
Histograma de Mortalidad Infantil (filtrada)
# VARIABLE CUANTITATIVA CONTINUA
str(datos)
## spc_tbl_ [3,000 × 24] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ Country : chr [1:3000] "Mexico" "Brazil" "Indonesia" "Nigeria" ...
## $ Region : chr [1:3000] "North" "West" "Central" "East" ...
## $ Year : num [1:3000] 2015 2017 2022 2016 2005 ...
## $ Water Source Type : chr [1:3000] "Lake" "Well" "Pond" "Well" ...
## $ Contaminant Level (ppm) : num [1:3000] 6.06 5.24 0.24 7.91 0.12 2.93 0.06 3.76 0.63 9.14 ...
## $ pH Level : num [1:3000] 7.12 7.84 6.43 6.71 8.16 8.21 6.11 6.42 6.29 6.45 ...
## $ Turbidity (NTU) : num [1:3000] 3.93 4.79 0.79 1.96 4.22 4.03 3.12 1.35 1.42 0.62 ...
## $ Dissolved Oxygen (mg/L) : num [1:3000] 4.28 3.86 3.42 3.12 9.15 8.66 6.97 9.99 9.67 7.59 ...
## $ Nitrate Level (mg/L) : num [1:3000] 8.28 15.74 36.67 36.92 49.35 ...
## $ Lead Concentration (µg/L) : num [1:3000] 7.89 14.68 9.96 6.77 12.51 ...
## $ Bacteria Count (CFU/mL) : num [1:3000] 3344 2122 2330 3779 4182 ...
## $ Water Treatment Method : chr [1:3000] "Filtration" "Boiling" "None" "Boiling" ...
## $ Access to Clean Water (% of Population) : num [1:3000] 33.6 89.5 35.3 57.5 36.6 ...
## $ Diarrheal Cases per 100,000 people : num [1:3000] 472 122 274 3 466 258 208 397 265 261 ...
## $ Cholera Cases per 100,000 people : num [1:3000] 33 27 39 33 31 22 23 0 23 2 ...
## $ Typhoid Cases per 100,000 people : num [1:3000] 44 8 50 13 68 55 90 10 29 38 ...
## $ Infant Mortality Rate (per 1,000 live births): num [1:3000] 76.2 77.3 48.5 95.7 58.8 ...
## $ GDP per Capita (USD) : num [1:3000] 57057 17220 86022 31166 25661 ...
## $ Healthcare Access Index (0-100) : num [1:3000] 96.9 84.7 58.4 39.1 23 ...
## $ Urbanization Rate (%) : num [1:3000] 84.6 73.4 72.9 71.1 55.5 ...
## $ Sanitation Coverage (% of Population) : num [1:3000] 63.2 29.1 93.6 94.2 69.2 ...
## $ Rainfall (mm per year) : num [1:3000] 2800 1572 2074 937 2295 ...
## $ Temperature (°C) : num [1:3000] 4.94 16.93 21.73 3.79 31.44 ...
## $ Population Density (people per km²) : num [1:3000] 593 234 57 555 414 775 584 111 538 250 ...
## - attr(*, "spec")=
## .. cols(
## .. Country = col_character(),
## .. Region = col_character(),
## .. Year = col_double(),
## .. `Water Source Type` = col_character(),
## .. `Contaminant Level (ppm)` = col_double(),
## .. `pH Level` = col_double(),
## .. `Turbidity (NTU)` = col_double(),
## .. `Dissolved Oxygen (mg/L)` = col_double(),
## .. `Nitrate Level (mg/L)` = col_double(),
## .. `Lead Concentration (µg/L)` = col_double(),
## .. `Bacteria Count (CFU/mL)` = col_double(),
## .. `Water Treatment Method` = col_character(),
## .. `Access to Clean Water (% of Population)` = col_double(),
## .. `Diarrheal Cases per 100,000 people` = col_double(),
## .. `Cholera Cases per 100,000 people` = col_double(),
## .. `Typhoid Cases per 100,000 people` = col_double(),
## .. `Infant Mortality Rate (per 1,000 live births)` = col_double(),
## .. `GDP per Capita (USD)` = col_double(),
## .. `Healthcare Access Index (0-100)` = col_double(),
## .. `Urbanization Rate (%)` = col_double(),
## .. `Sanitation Coverage (% of Population)` = col_double(),
## .. `Rainfall (mm per year)` = col_double(),
## .. `Temperature (°C)` = col_double(),
## .. `Population Density (people per km²)` = col_double()
## .. )
## - attr(*, "problems")=<externalptr>
mortalidad_infantil <- datos$`Infant Mortality Rate (per 1,000 live births)`
mortalidad_infantil <- na.omit(mortalidad_infantil)
# Limitar la variable para evitar outliers (percentil 5% al 95%)
lim_inf <- quantile(mortalidad_infantil, 0.05)
lim_sup <- quantile(mortalidad_infantil, 0.95)
mortalidad_filtrada <- mortalidad_infantil[mortalidad_infantil >= lim_inf & mortalidad_infantil <= lim_sup]
# Crear histograma con breaks de Sturges
Histograma <- hist(mortalidad_filtrada, freq = FALSE, breaks = "Sturges",
main = "Modelo de probabilidad",
ylab = "Densidad de probabilidad",
xlab = "Mortalidad Infantil (por 1,000 nacidos vivos)",
col = "orange")
# Parámetros para la distribución uniforme ajustada
min_uni <- lim_inf
max_uni <- lim_sup
# Graficar la curva de la distribución uniforme ajustada
curve(dunif(x, min_uni, max_uni), col = "red", lwd = 2, add = TRUE)

# Frecuencias observadas absolutas del histograma (FO)
FO_l <- Histograma$counts
# Calcular frecuencias esperadas (FE) según la distribución uniforme para cada intervalo
FE_l <- numeric(length(FO_l))
for (i in 1:length(FO_l)) {
p_i <- punif(Histograma$breaks[i + 1], min_uni, max_uni) - punif(Histograma$breaks[i], min_uni, max_uni)
FE_l[i] <- p_i * length(mortalidad_filtrada)
}
# Test de correlación de Pearson entre FO y FE
pearson_r <- cor(FO_l, FE_l)
cat("Coeficiente de correlación de Pearson:", round(pearson_r, 4), "\n")
## Coeficiente de correlación de Pearson: 0.9508
# Test de Chi-cuadrado
X2 <- sum((FO_l - FE_l)^2 / FE_l)
cat("Estadístico Chi-cuadrado:", round(X2, 4), "\n")
## Estadístico Chi-cuadrado: 16.3051
# Valor crítico Chi-cuadrado con nivel 99% y grados de libertad = número de clases - 1
chi_critico <- qchisq(0.99, df = length(FO_l) - 1)
cat("Valor crítico Chi-cuadrado (99%):", round(chi_critico, 4), "\n")
## Valor crítico Chi-cuadrado (99%): 34.8053
# Estimar la probabilidad de que la mortalidad
P1 <- (punif(60, min_uni, max_uni) - punif(40, min_uni, max_uni)) * 100
P1
## [1] 22.66405
# Mostrar resultado en gráfico tipo presentación
Sys.setlocale("LC_CTYPE", "en_US.UTF-8") # Evita problemas de codificación de caracteres
## [1] "en_US.UTF-8"
plot(1, type = "n", axes = FALSE, xlab = "", ylab = "") # Crear un gráfico vacío
text(x = 1, y = 1,
labels = paste0("¿Cuál es la probabilidad de que\nla mortalidad infantil\nesté entre 40 y 60?\n\n",
"R: ", round(P1, 2), " (%)"),
cex = 1.5, col = "blue", font = 6)

# Secuencia general para toda la curva
x <- seq(min_uni, max_uni, length.out = 1000)
# Altura constante de la densidad uniforme
densidad_uniforme <- dunif(x, min = min_uni, max = max_uni)
# Crear la secuencia para sombrear el área entre 40 y 60
x_area <- seq(40, 60, length.out = 100)
y_area <- dunif(x_area, min = min_uni, max = max_uni)
# Graficar la distribución uniforme
plot(x, densidad_uniforme, type = "l", col = "red", lwd = 2,
main = "Distribución uniforme ajustada a la mortalidad infantil",
xlab = "Mortalidad infantil (por 1,000 nacidos vivos)",
ylab = "Densidad de probabilidad",
xaxt = "n", ylim = c(0, max(densidad_uniforme) + 0.002))
# Rellenar área bajo la curva entre 40 y 60
polygon(c(40, x_area, 60), c(0, y_area, 0),
col = rgb(0, 0, 1, alpha = 0.3), border = NA)
# Eje x personalizado
axis(1, at = c(round(min_uni), 40, 60, round(max_uni)),
labels = c(round(min_uni), "40", "60", round(max_uni)))
# leyenda
legend(x = 65, y = 0.013,
legend = c("Distribución uniforme", "Área bajo la curva entre 40 y 60"),
col = c("red", rgb(0, 0, 1, alpha = 0.3)),
lty = c(1, NA),
lwd = 2,
pch = c(NA, 15),
pt.cex = 2,
cex = 0.9, # Tamaño del texto
bty = "o", # Borde visible (cuadro)
text.col = "black") # Color del texto

# Estimación por intervalos de confianza para mortalidad infantil
media_mortalidad <- mean(mortalidad_filtrada)
desv_mortalidad <- sd(mortalidad_filtrada)
n_mortalidad <- length(mortalidad_filtrada)
# Cálculo de intervalo de confianza (95%)
limite_inf <- media_mortalidad - 2 * (desv_mortalidad / sqrt(n_mortalidad))
limite_sup <- media_mortalidad + 2 * (desv_mortalidad / sqrt(n_mortalidad))
limite_inf
## [1] 49.80654
limite_sup
## [1] 51.78219
# Crear tabla resumen del modelo uniforme aplicado a mortalidad infantil
rechazo <- X2 > chi_critico
tabla_modelos_1 <- data.frame(
"Mortalidad Infantil (por 1,000 nacidos vivos)" = c(paste0("[", round(min_uni, 2), " , ", round(max_uni, 2), "]"), ""),
"Modelo" = c("Uniforme", ""),
"Parámetros" = c(paste0("a = ", round(min_uni, 2), ", b = ", round(max_uni, 2)), ""),
"Test Pearson" = c(round(pearson_r, 2), ""),
"Test Chi-cuadrado" = c(ifelse(rechazo == TRUE, "Reprobado", "Aprobado"), ""),
"Dominio del Modelo" = c("x ∈ [a ; b]", "")
)
# Renombrar columnas para mejor presentación
colnames(tabla_modelos_1) <- c("Rango de la Variable", "Modelo",
"Parámetros", "Test de Pearson",
"Test Chi-cuadrado", "Dominio del Modelo")
tabla_intervalos <- data.frame(
"Intervalo de Confianza" = c("Límite Inferior", "Límite Superior"),
"Grado Confianza (%)" = c("95 %", ""),
"Mortalidad Infantil (por 1,000)" = c(round(limite_inf, 2), round(limite_sup, 2))
)
# Mostrar título como texto gráfico
plot(1, type = "n", axes = FALSE, xlab = "", ylab = "")
text(x = 1, y = 1,
labels = "CONCLUSIONES",
cex = 2,
col = "blue",
font = 6)

kable(tabla_modelos_1, align = 'c',
caption = "Conclusiones del Modelo Uniforme para Mortalidad Infantil")
Conclusiones del Modelo Uniforme para Mortalidad
Infantil
| [6.85 , 95.09] |
Uniforme |
a = 6.85, b = 95.09 |
0.95 |
Aprobado |
x ∈ [a ; b] |
|
|
|
|
|
|
kable(tabla_intervalos, align = 'c',
caption = "Intervalos de Confianza de la Media Poblacional")
Intervalos de Confianza de la Media Poblacional
| Límite Inferior |
95 % |
49.81 |
| Límite Superior |
|
51.78 |