1. Carga de datos
# UNIVERSIDAD CENTRAL DEL ECUADOR
# Facultad de Ingeniería en Geología, Minas, Petróleos y Ambiental
# Ingeniería Ambiental
# Autor: Edgar Saavedra / Grupo 1
# Fecha: 04/02/2026
library(dplyr)
##
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(gt)
# Cargar base China
datos <- read.csv("china_water_pollution_data.csv",
header = TRUE,
sep = ",",
dec = ".",
fileEncoding = "UTF-8")
str(datos)
## 'data.frame': 3000 obs. of 25 variables:
## $ Province : chr "Zhejiang" "Sichuan" "Zhejiang" "Beijing" ...
## $ City : chr "Ningbo" "Mianyang" "Ningbo" "Beijing" ...
## $ Monitoring_Station : chr "Ningbo_Station_2" "Mianyang_Station_1" "Ningbo_Station_8" "Beijing_Station_10" ...
## $ Latitude : num 25.5 32.2 30 30 43.5 ...
## $ Longitude : num 123 113 125 118 122 ...
## $ Date : chr "2023-06-01" "2023-03-05" "2023-07-13" "2023-02-17" ...
## $ Water_Temperature_C : num 22.5 27.3 21 16.6 21.8 ...
## $ pH : num 6.93 6.89 6.02 7.31 7.77 7.16 7.2 7.38 7.77 7.17 ...
## $ Dissolved_Oxygen_mg_L : num 9.3 8.14 5.34 10.06 7.93 ...
## $ Conductivity_uS_cm : num 652 358 520 593 656 ...
## $ Turbidity_NTU : num 0.85 4.49 17.46 7.38 3.7 ...
## $ Nitrate_mg_L : num 2.14 2.06 2.11 1.9 1.8 2.69 1.99 2.7 1.82 2.11 ...
## $ Nitrite_mg_L : num 0.03 0.015 0.029 0.014 0.019 0.022 0.003 0.034 0.026 0.028 ...
## $ Ammonia_N_mg_L : num 0.38 0.38 0.3 0.2 0.22 0.44 0.29 0.32 0.71 0.53 ...
## $ Total_Phosphorus_mg_L : num 0.074 0.147 0.021 0.155 0.152 0.134 0.05 0.104 0.126 0.039 ...
## $ Total_Nitrogen_mg_L : num 2.71 3.15 3.39 2.91 3.45 2.87 3.05 2.75 3.68 3.52 ...
## $ COD_mg_L : num 15.4 16.8 17.3 17.9 20.1 ...
## $ BOD_mg_L : num 1.39 2.98 2.65 5.18 3.47 5.27 3.34 5.71 3.68 4.73 ...
## $ Heavy_Metals_Pb_ug_L : num 6.9 4.68 3.24 3.2 2.01 4.42 5.01 6.75 4.59 3.09 ...
## $ Heavy_Metals_Cd_ug_L : num 0.66 0.39 0.27 0.67 0.34 -0.03 0.6 0.5 0.21 0.35 ...
## $ Heavy_Metals_Hg_ug_L : num 0.02 0.1 0.11 0.11 0.14 0.12 0.09 0.08 0.16 0.04 ...
## $ Coliform_Count_CFU_100mL: int 87 116 110 99 82 91 92 91 95 108 ...
## $ Water_Quality_Index : num 36.6 66.2 98.7 71.3 16.1 ...
## $ Pollution_Level : chr "Very Poor" "Excellent" "Poor" "Poor" ...
## $ Remarks : chr "High pollution spike detected" "High pollution spike detected" "High pollution spike detected" "Monitoring recommended" ...
3. Tabla de distribución de frecuencia
# Histograma base para obtener intervalos
Histograma_COD <- hist(COD, plot = FALSE)
breaks <- Histograma_COD$breaks
Li <- breaks[1:(length(breaks)-1)]
Ls <- breaks[2:length(breaks)]
ni <- Histograma_COD$counts
n <- length(COD)
hi <- round((ni / n) * 100, 2)
TDF_COD <- data.frame(
Intervalo = paste0("[", round(Li, 2), " - ", round(Ls, 2), ")"),
ni = ni,
`hi(%)` = hi
)
# Fila total
TDF_COD <- rbind(TDF_COD, data.frame(
Intervalo = "Totales",
ni = sum(ni),
`hi(%)` = 100
))
# Tabla con gt
TDF_COD %>%
gt() %>%
tab_header(
title = md("*Tabla Nro. 1*"),
subtitle = md("**Distribución de frecuencia simplificada de la DQO (COD) en la base de contaminación de agua (China, 2023)**")
) %>%
tab_source_note(
source_note = md("Autor: Grupo 1")
) %>%
tab_options(
table.border.top.color = "black",
table.border.bottom.color = "black",
table.border.top.style = "solid",
table.border.bottom.style = "solid",
column_labels.border.top.color = "black",
column_labels.border.bottom.color = "black",
column_labels.border.bottom.width = px(2),
row.striping.include_table_body = TRUE,
heading.border.bottom.color = "black",
heading.border.bottom.width = px(2),
table_body.hlines.color = "gray",
table_body.border.bottom.color = "black"
)
| Tabla Nro. 1 |
| Distribución de frecuencia simplificada de la DQO (COD) en la base de contaminación de agua (China, 2023) |
| Intervalo |
ni |
hi... |
| [2 - 4) |
1 |
0.03 |
| [4 - 6) |
8 |
0.27 |
| [6 - 8) |
17 |
0.57 |
| [8 - 10) |
41 |
1.37 |
| [10 - 12) |
106 |
3.53 |
| [12 - 14) |
169 |
5.63 |
| [14 - 16) |
296 |
9.87 |
| [16 - 18) |
407 |
13.57 |
| [18 - 20) |
474 |
15.80 |
| [20 - 22) |
462 |
15.40 |
| [22 - 24) |
362 |
12.07 |
| [24 - 26) |
310 |
10.33 |
| [26 - 28) |
183 |
6.10 |
| [28 - 30) |
103 |
3.43 |
| [30 - 32) |
35 |
1.17 |
| [32 - 34) |
17 |
0.57 |
| [34 - 36) |
7 |
0.23 |
| [36 - 38) |
2 |
0.07 |
| Totales |
3000 |
100.00 |
| Autor: Grupo 1 |
6. Cálculo de probabilidades (LOGNORMAL - COD)
# Probabilidad entre 20 y 30 mg/L
P_COD_20_30 <- (plnorm(30, meanlog = mulog, sdlog = sigmalog) -
plnorm(20, meanlog = mulog, sdlog = sigmalog)) * 100
P_COD_20_30
## [1] 39.35829
# Texto grande estilo RPubs
plot(1, type = "n", axes = FALSE, xlab = "", ylab = "")
text(
x = 1, y = 1,
labels = paste0(
"¿Cuál es la probabilidad de que\n",
"al seleccionar al azar una muestra de\n",
"agua, la DQO (COD) esté\n",
"entre 20 y 30 mg/L?\n",
"R: ", round(P_COD_20_30, 2), " %"
),
cex = 2,
col = "blue",
font = 6
)

# Secuencia de valores de x (COD)
x <- seq(min(COD), max(COD), by = 0.001)
# Densidad log-normal
y <- dlnorm(x, meanlog = mulog, sdlog = sigmalog)
# Gráfico de densidad lognormal
plot(
x, y,
type = "l",
col = "skyblue3",
lwd = 2,
xlab = "COD (mg/L)",
ylab = "Densidad de probabilidad",
main = "Gráfica: Cálculo de probabilidad (Log-normal)\nentre 20 y 30 mg/L (China, 2023)"
)
# Rango a sombrear
x_sombra <- seq(20, 30, by = 0.001)
y_sombra <- dlnorm(x_sombra, meanlog = mulog, sdlog = sigmalog)
# Curva roja del tramo
lines(x_sombra, y_sombra, col = "red", lwd = 2)
# Área sombreada
polygon(
c(x_sombra, rev(x_sombra)),
c(y_sombra, rep(0, length(y_sombra))),
col = rgb(1, 0, 0, 0.5),
border = NA
)
# Leyenda
legend(
"topright",
legend = c("Modelo Log-normal", "Área de probabilidad (20-30)"),
col = c("skyblue3", "red"),
lwd = 2,
bty = "n",
cex = 0.7
)

# ============================================================
# 7. CONCLUSIONES DEL MODELO LOG-NORMAL (COD)
# ============================================================
tabla_modelos_COD <- data.frame(
"Variable" = c("DQO (COD)", ""),
"Modelo" = c("Log-normal", ""),
"Parámetros" = c(
paste0("µlog = ", round(mulog, 3)),
paste0("σlog = ", round(sigmalog, 3))
),
"Test Pearson (%)" = c(round(Correlación, 2), ""),
"Test Chi-cuadrado" = c("Aprobado", "")
)
colnames(tabla_modelos_COD) <- c(
"Variable",
"Modelo",
"Parámetros",
"Test Pearson (%)",
"Test Chi-cuadrado"
)
tabla_modelos_COD
## Variable Modelo Parámetros Test Pearson (%) Test Chi-cuadrado
## 1 DQO (COD) Log-normal µlog = 2.96 96.7 Aprobado
## 2 σlog = 0.276
library(knitr)
kable(
tabla_modelos_COD,
align = "c",
caption = "Tabla 1. Conclusiones del Modelo Log-normal para la DQO (COD)"
)
Tabla 1. Conclusiones del Modelo Log-normal para la DQO
(COD)
| DQO (COD) |
Log-normal |
µlog = 2.96 |
96.7 |
Aprobado |
|
|
σlog = 0.276 |
|
|
# ============================================================
# INTERVALO DE CONFIANZA DE LA MEDIA (95%)
# ============================================================
media_COD <- mean(COD)
sigma_COD <- sd(COD)
n <- length(COD)
error <- 2 * (sigma_COD / sqrt(n)) # aproximación 95%
LiminfIC <- round(media_COD - error, 2)
LimsupIC <- round(media_COD + error, 2)
LiminfIC
## [1] 19.81
LimsupIC
## [1] 20.17
tabla_confianza_COD <- data.frame(
"Intervalo de confianza" = c("Límite Inferior", "Límite Superior"),
"Grado de confianza (%)" = c("95%", ""),
"COD (mg/L)" = c(LiminfIC, LimsupIC)
)
tabla_confianza_COD
## Intervalo.de.confianza Grado.de.confianza.... COD..mg.L.
## 1 Límite Inferior 95% 19.81
## 2 Límite Superior 20.17
kable(
tabla_confianza_COD,
align = "c",
caption = "Tabla 2. Intervalo de Confianza de la Media Poblacional de la DQO (COD)"
)
Tabla 2. Intervalo de Confianza de la Media Poblacional de la
DQO (COD)
| Límite Inferior |
95% |
19.81 |
| Límite Superior |
|
20.17 |
# Conclusión textual
cat(
"La variable DQO (COD, mg/L) se ajusta adecuadamente a un modelo Log-normal ",
"con parámetros µlog =", round(mulog, 3),
"y σlog =", round(sigmalog, 3), ". ",
"El modelo es válido según los test de Pearson (",
round(Correlación, 2), "%) y Chi-cuadrado. ",
"Con un 95% de confianza, la media poblacional de COD ",
"se encuentra entre", LiminfIC, "y", LimsupIC, "mg/L."
)
## La variable DQO (COD, mg/L) se ajusta adecuadamente a un modelo Log-normal con parámetros µlog = 2.96 y σlog = 0.276 . El modelo es válido según los test de Pearson ( 96.7 %) y Chi-cuadrado. Con un 95% de confianza, la media poblacional de COD se encuentra entre 19.81 y 20.17 mg/L.