UNIVERSIDAD CENTRAL DEL ECUADOR
ESTUDIO ESTADÍSTICO DE LA CONTAMINACIÓN DEL SUELO Y SU IMPACTO EN LA SALUD
FECHA: 19/11/2025
# =========================
# ESTADÍSTICA Descriptiva
# Fecha: 19/11/2025
# =========================
# -------------------------
# Cargar datos
# -------------------------
setwd("C:/Users/Alexander/Downloads")
# Extraemos los datos
datos <- read.csv("soil_pollution_diseases.csv", sep = ",", stringsAsFactors = FALSE)
#Extracción Variable Cuantitativa Continua
Concentracion_contaminante <- datos$Pollutant_Concentration_mg_kg
#Manualmente
k<-1+(3.3*log10(3000))
k<-floor(k)
min<-min(Concentracion_contaminante)
max<-max(Concentracion_contaminante)
R<- max-min
A<-R/k
Li<-round(seq(from=min, to=max-A, by=A),4)
Ls<-round(seq(from=min+A, to=max, by=A), 4)
MC<-round((Li+Ls)/2,2)
ni <- numeric(length(Li))
for (i in 1:length(Li)) {
ni[i] <- sum(Concentracion_contaminante >= Li[i] & Concentracion_contaminante < Ls[i])
}
ni[length(Li)] <- sum(Concentracion_contaminante >= Li[length(Li)] & Concentracion_contaminante <= max)
sum(ni)
## [1] 3000
hi <- ni/sum(ni)*100
sum(hi)
## [1] 100
Niasc<-cumsum(ni)
Nidsc<-rev(cumsum(rev(ni)))
Hiasc<-round(cumsum(hi))
Hidsc<-round(rev(cumsum(rev(hi))))
TDFConcentracion<-data.frame(Li, Ls, MC, ni, hi, Niasc, Nidsc, Hiasc, Hidsc)
total_ni<-sum(ni)
total_hi<-100
TDFConcentracionCompleto<-rbind(
TDFConcentracion,
data.frame(Li=" Total", Ls=" ", MC=" ",
ni=total_ni, hi=total_hi, Niasc=" ", Nidsc=" ",
Hiasc=" ", Hidsc=" ")
)
#Formato tabla
library(gt)
library(dplyr)
# Redondeo opcional
TDFConcentracion$hi <- round(TDFConcentracion$hi, 2)
tabla_Concentracion <- TDFConcentracionCompleto %>%
gt() %>%
fmt_number(
columns = hi,
decimals = 2
) %>%
tab_header(
title = md("*Tabla Nº1*"),
subtitle = md("**Tabla de distribucion de Frecuencias
de la Concentración del Contaminante (mg/kg)**")
) %>%
tab_source_note(
source_note = md("Autor: Grupo3")
) %>%
tab_options(
table.border.top.color = "black",
table.border.bottom.color = "black",
table.border.top.style = "solid",
table.border.bottom.style = "solid",
column_labels.border.top.color = "black",
column_labels.border.bottom.color = "black",
column_labels.border.bottom.width = px(2),
row.striping.include_table_body = TRUE,
heading.border.bottom.color = "black",
heading.border.bottom.width = px(2),
table_body.hlines.color = "gray",
table_body.border.bottom.color = "black"
) %>%
tab_style(
style = cell_text(weight = "bold"),
locations = cells_body(
rows = Li == "Total"
)
)
tabla_Concentracion
| Tabla Nº1 | ||||||||
| Tabla de distribucion de Frecuencias de la Concentración del Contaminante (mg/kg) | ||||||||
| Li | Ls | MC | ni | hi | Niasc | Nidsc | Hiasc | Hidsc |
|---|---|---|---|---|---|---|---|---|
| 5.03 | 21.2767 | 13.15 | 254 | 8.47 | 254 | 3000 | 8 | 100 |
| 21.2767 | 37.5233 | 29.4 | 234 | 7.80 | 488 | 2746 | 16 | 92 |
| 37.5233 | 53.77 | 45.65 | 250 | 8.33 | 738 | 2512 | 25 | 84 |
| 53.77 | 70.0167 | 61.89 | 251 | 8.37 | 989 | 2262 | 33 | 75 |
| 70.0167 | 86.2633 | 78.14 | 248 | 8.27 | 1237 | 2011 | 41 | 67 |
| 86.2633 | 102.51 | 94.39 | 235 | 7.83 | 1472 | 1763 | 49 | 59 |
| 102.51 | 118.7567 | 110.63 | 278 | 9.27 | 1750 | 1528 | 58 | 51 |
| 118.7567 | 135.0033 | 126.88 | 246 | 8.20 | 1996 | 1250 | 67 | 42 |
| 135.0033 | 151.25 | 143.13 | 249 | 8.30 | 2245 | 1004 | 75 | 33 |
| 151.25 | 167.4967 | 159.37 | 276 | 9.20 | 2521 | 755 | 84 | 25 |
| 167.4967 | 183.7433 | 175.62 | 228 | 7.60 | 2749 | 479 | 92 | 16 |
| 183.7433 | 199.99 | 191.87 | 251 | 8.37 | 3000 | 251 | 100 | 8 |
| Total | 3000 | 100.00 | ||||||
| Autor: Grupo3 | ||||||||
#Histograma
histoP<-hist(
Concentracion_contaminante,
main= "Grafica Nº1:Distribución de Concentración
del Contaminante (mg/kg)",
xlab= "Concentración (mg/kg)",
ylab= "Cantidad", col="blue",
)
#Tabla simplificada en base al histograma
Limites <- histoP$breaks
LimInf <- Limites[1:(length(Limites)-1)]
LimSup <- Limites[2:length(Limites)]
Mc <- histoP$mids
ni <- histoP$counts
sum(ni)
## [1] 3000
hi <- round(ni/sum(ni)*100, 2)
sum(hi)
## [1] 100
Ni_asc <- cumsum(ni)
Ni_dsc <- rev(cumsum(rev(ni)))
Hi_asc <- round(cumsum(hi), 2)
Hi_dsc <- round(rev(cumsum(rev(hi))), 2)
TDFC<-data.frame(LimInf, LimSup, Mc, ni, hi, Ni_asc, Ni_dsc, Hi_asc, Hi_dsc)
totalni <- sum(ni)
totalhi <- 100
TDFCCompleto<-rbind(
TDFC,
data.frame(LimInf="Total",
LimSup=" ", Mc=" ", ni=totalni,
hi=totalhi, Ni_asc=" ", Ni_dsc=" ",
Hi_asc=" ", Hi_dsc=" ")
)
tablaConc<-TDFCCompleto %>%
gt() %>%
tab_header(
title = md("*Tabla Nº2*"),
subtitle = md("**Tabla simplificada de distribucion de Frecuencias
de la Concentración del Contaminante (mg/kg)**")
) %>%
tab_source_note(
source_note = md("Autor:Grupo 3")
) %>%
tab_options(
table.border.top.color = "black",
table.border.bottom.color = "black",
table.border.top.style = "solid",
table.border.bottom.style = "solid",
column_labels.border.top.color = "black",
column_labels.border.bottom.color = "black",
column_labels.border.bottom.width = px(2),
row.striping.include_table_body = TRUE,
heading.border.bottom.color = "black",
heading.border.bottom.width = px(2),
table_body.hlines.color = "gray",
table_body.border.bottom.color = "black"
) %>%
tab_style(
style = cell_text(weight = "bold"),
locations = cells_body(
rows = LimInf == "Total"
)
)
tablaConc
| Tabla Nº2 | ||||||||
| Tabla simplificada de distribucion de Frecuencias de la Concentración del Contaminante (mg/kg) | ||||||||
| LimInf | LimSup | Mc | ni | hi | Ni_asc | Ni_dsc | Hi_asc | Hi_dsc |
|---|---|---|---|---|---|---|---|---|
| 0 | 20 | 10 | 233 | 7.77 | 233 | 3000 | 7.77 | 100 |
| 20 | 40 | 30 | 294 | 9.80 | 527 | 2767 | 17.57 | 92.23 |
| 40 | 60 | 50 | 311 | 10.37 | 838 | 2473 | 27.94 | 82.43 |
| 60 | 80 | 70 | 307 | 10.23 | 1145 | 2162 | 38.17 | 72.06 |
| 80 | 100 | 90 | 289 | 9.63 | 1434 | 1855 | 47.8 | 61.83 |
| 100 | 120 | 110 | 332 | 11.07 | 1766 | 1566 | 58.87 | 52.2 |
| 120 | 140 | 130 | 301 | 10.03 | 2067 | 1234 | 68.9 | 41.13 |
| 140 | 160 | 150 | 344 | 11.47 | 2411 | 933 | 80.37 | 31.1 |
| 160 | 180 | 170 | 285 | 9.50 | 2696 | 589 | 89.87 | 19.63 |
| 180 | 200 | 190 | 304 | 10.13 | 3000 | 304 | 100 | 10.13 |
| Total | 3000 | 100.00 | ||||||
| Autor:Grupo 3 | ||||||||
#Graficas
hist(
Concentracion_contaminante,
breaks = seq(min, max, A),
main = "Gráfica Nº2: Frecuencia de la Concentración del contaminante (Local)",
xlab = "Concentración (mg/kg)",
ylab = "Frecuencia",
col = "#4A90E2",
cex.main = 1.1,
cex.lab = 1.1
)
#Global
hist(
Concentracion_contaminante,
breaks = seq(min, max, A),
main = "Gráfica Nº3: Frecuencia de la Concentración del contaminante (Global)",
xlab = "Concentración (mg/kg)",
ylab = "Frecuencia",
col = "green",
ylim = c(0, 3000),
cex.main = 1.1,
cex.lab = 1.1
)
barplot(
TDFConcentracion$hi,
space = 0,
col = "skyblue",
main = "Gráfica Nº4: Porcentaje de la Concentración del contaminante (Local)",
xlab = "Concentración",
ylab = "Porcentaje (%)",
names.arg = TDFConcentracion$MC,
cex.names = 0.9,
cex.main = 1.1,
cex.lab = 1.1
)
barplot(
TDFConcentracion$hi,
space = 0,
col = "yellow",
main = "Gráfica Nº5: Porcentaje de la Concentración del contaminante(Global)",
xlab = "Concentración",
ylab = "Porcentaje (%)",
names.arg = TDFConcentracion$MC,
ylim = c(0, 100),
cex.names = 0.9,
cex.main = 1.1,
cex.lab = 1.1
)
#caja
boxplot(
Concentracion_contaminante,
horizontal = TRUE,
col = "pink",
main = "Grafica Nº6: Distribución de la Concentración del Contaminante (mg/kg)",
xlab = "Concentración (mg/kg)"
)
# Ojivas de Frecuencia (Ascendente y Descendente)
plot(
Li, Nidsc,
main = "Gráfica Nº7: Distribución de Frecuencias Ascendente y Descendente
de Concentración del Contaminante (mg/kg)",
xlab = "Concentración (mg/kg)",
ylab = "Cantidad",
xlim = c(min, max),
col = "red",
cex.axis = 0.8,
type = "o",
lwd = 3,
las = 1,
xaxt = "n"
)
lines(
Ls, Niasc,
col = "green",
type = "o",
lwd = 3
)
axis(1, at = round(seq(min, max, length.out = 10), 0))
# Diagrama de Ojiva Ascendente y Descendente Porcentual
plot(
Li, Hidsc,
main = "Gráfica Nº53: Distribución Porcentual Ascendente y Descendente
de Concentración del Contaminante (mg/kg)",
xlab = "Concentración (mg/kg)",
ylab = "Porcentaje (%)",
xlim = c(min, max),
col = "red",
type = "o",
lwd = 2,
xaxt = "n"
)
lines(
Ls, Hiasc,
col = "blue",
type = "o",
lwd = 3
)
axis(1, at = round(seq(min, max, length.out = 10), 0))
# INDICADORES ESTADISTICOS
Concentracion <- datos$Pollutant_Concentration_mg_kg
# Indicadores de Tendencia Central
# Media aritmética
media <- round(mean(Concentracion), 2)
media
## [1] 102.75
# Moda usando tabla de frecuencias
Tabla_Con <- as.data.frame(table(Concentracion))
max_frecuencia <- max(Tabla_Con$Freq)
moda <- Tabla_Con$Concentracion[Tabla_Con$Freq == max_frecuencia]
moda
## [1] 86.08
## 2789 Levels: 5.03 5.05 5.06 5.11 5.12 5.18 5.37 5.46 5.68 5.73 5.82 5.94 ... 199.99
# Mediana
mediana <- median(Concentracion)
mediana
## [1] 104.085
## [1] 104.085
# INDICADORES DE DISPERSIÓN #
# Varianza
varianza <- var(Concentracion)
varianza
## [1] 3125.634
# Desviación Estándar
sd <- sd(Concentracion)
sd
## [1] 55.90737
# Coeficiente de Variación
cv <- round((sd / media) * 100, 2)
cv
## [1] 54.41
# INDICADORES DE FORMA #
library(e1071)
# Asimetría
asimetria <- skewness(Concentracion, type = 2)
asimetria
## [1] -0.01561251
# Curtosis
curtosis <- kurtosis(Concentracion)
curtosis
## [1] -1.190312
# TABLA RESUMEN FINAL
tabla_indicadores <- data.frame(
"Variable" = c("Concentración del Contaminante"),
"Rango" = c(paste0("[", min(Concentracion), " ; ", max(Concentracion), "]")),
"X" = c(round(media, 2)),
"Me" = c(round(mediana, 2)),
"Mo" = c(paste(moda, collapse = ", ")),
"V" = c(round(varianza, 2)),
"Sd" = c(round(sd, 2)),
"Cv" = c(cv),
"As" = c(round(asimetria, 2)),
"K" = c(round(curtosis, 2)),
"Valores_Atipicos" = "No hay presencia de valores atípicos"
)
# TABLA EN FORMATO GT
library(gt)
tabla_indicadores_gt <- tabla_indicadores %>%
gt() %>%
tab_header(
title = md("*Tabla N°2.1*"),
subtitle = md("**Indicadores estadísticos de la Concentración del Contaminante (mg/kg)**")
) %>%
tab_source_note(
source_note = md("Autor: Grupo 3")
) %>%
tab_options(
table.border.top.color = "black",
table.border.bottom.color = "black",
table.border.top.style = "solid",
table.border.bottom.style = "solid",
column_labels.border.top.color = "black",
column_labels.border.bottom.color = "black",
column_labels.border.bottom.width = px(2),
row.striping.include_table_body = TRUE,
heading.border.bottom.color = "black",
heading.border.bottom.width = px(2),
table_body.hlines.color = "gray",
table_body.border.bottom.color = "black"
) %>%
tab_style(
style = cell_text(weight = "bold"),
locations = cells_body(
rows = Variable == "Concentración del Contaminante"
)
)
tabla_indicadores_gt
| Tabla N°2.1 | ||||||||||
| Indicadores estadísticos de la Concentración del Contaminante (mg/kg) | ||||||||||
| Variable | Rango | X | Me | Mo | V | Sd | Cv | As | K | Valores_Atipicos |
|---|---|---|---|---|---|---|---|---|---|---|
| Concentración del Contaminante | [5.03 ; 199.99] | 102.75 | 104.09 | 86.08 | 3125.63 | 55.91 | 54.41 | -0.02 | -1.19 | No hay presencia de valores atípicos |
| Autor: Grupo 3 | ||||||||||
##============##
## CONCLUSION ##
##============##
# La variable Concentración de contaminante fluctua entre 5.03 y 199.99 y gira entorno a 102.75 con una desviación estandar de 55.91 siendo un conjuto de datos heterogeneo, los valores de acumulan de manera debil en la parte media de la variable. Sin presencia de valores atípicos.