UNIVERSIDAD CENTRAL DEL ECUADOR
ESTUDIO ESTADÍSTICO DE LA CONTAMINACIÓN DEL SUELO Y SU IMPACTO EN LA SALUD
FECHA: 19/11/2025
# =========================
# ESTADÍSTICA Descriptiva
# Fecha: 19/11/2025
# =========================
# -------------------------
# Cargar datos
# -------------------------
setwd("C:/Users/Alexander/Downloads")
datos <- read.csv("soil_pollution_diseases.csv",
sep = ",",
stringsAsFactors = FALSE)
# ================================
# VARIABLE CUANTITATIVA CONTINUA
# ================================
Suelo_pH <- datos$Soil_pH
# -------------------------
# Cálculo manual de clases
# -------------------------
k <- 1 + (3.3 * log10(3000))
k <- floor(k)
min <- min(Suelo_pH)
max <- max(Suelo_pH)
R <- max - min
A <- R / k
Li <- round(seq(from = min, to = max - A, by = A), 4)
Ls <- round(seq(from = min + A, to = max, by = A), 4)
MC <- round((Li + Ls) / 2, 2)
ni <- numeric(length(Li))
for (i in 1:length(Li)) {
ni[i] <- sum(Suelo_pH >= Li[i] & Suelo_pH < Ls[i])
}
ni[length(Li)] <- sum(Suelo_pH >= Li[length(Li)] & Suelo_pH <= max)
hi <- ni / sum(ni) * 100
Niasc <- cumsum(ni)
Nidsc <- rev(cumsum(rev(ni)))
Hiasc <- round(cumsum(hi))
Hidsc <- round(rev(cumsum(rev(hi))))
TDF_Suelo_pH <- data.frame(Li, Ls, MC, ni, hi, Niasc, Nidsc, Hiasc, Hidsc)
TDF_Suelo_pH_Completo <- rbind(
TDF_Suelo_pH,
data.frame(Li = "Total", Ls = " ", MC = " ",
ni = sum(ni), hi = 100,
Niasc = " ", Nidsc = " ",
Hiasc = " ", Hidsc = " ")
)
# =========================
# TABLA Nº1
# =========================
library(gt)
library(dplyr)
tabla_Suelo_pH <- TDF_Suelo_pH_Completo %>%
gt() %>%
tab_header(
title = md("*Tabla Nº1*"),
subtitle = md("**Tabla de distribución de Frecuencias del pH del Suelo**")
) %>%
tab_source_note(
source_note = md("Autor: Grupo 3")
)
tabla_Suelo_pH
| Tabla Nº1 | ||||||||
| Tabla de distribución de Frecuencias del pH del Suelo | ||||||||
| Li | Ls | MC | ni | hi | Niasc | Nidsc | Hiasc | Hidsc |
|---|---|---|---|---|---|---|---|---|
| 4.5 | 4.8333 | 4.67 | 290 | 9.666667 | 290 | 3000 | 10 | 100 |
| 4.8333 | 5.1667 | 5 | 248 | 8.266667 | 538 | 2710 | 18 | 90 |
| 5.1667 | 5.5 | 5.33 | 249 | 8.300000 | 787 | 2462 | 26 | 82 |
| 5.5 | 5.8333 | 5.67 | 232 | 7.733333 | 1019 | 2213 | 34 | 74 |
| 5.8333 | 6.1667 | 6 | 265 | 8.833333 | 1284 | 1981 | 43 | 66 |
| 6.1667 | 6.5 | 6.33 | 248 | 8.266667 | 1532 | 1716 | 51 | 57 |
| 6.5 | 6.8333 | 6.67 | 261 | 8.700000 | 1793 | 1468 | 60 | 49 |
| 6.8333 | 7.1667 | 7 | 238 | 7.933333 | 2031 | 1207 | 68 | 40 |
| 7.1667 | 7.5 | 7.33 | 228 | 7.600000 | 2259 | 969 | 75 | 32 |
| 7.5 | 7.8333 | 7.67 | 252 | 8.400000 | 2511 | 741 | 84 | 25 |
| 7.8333 | 8.1667 | 8 | 251 | 8.366667 | 2762 | 489 | 92 | 16 |
| 8.1667 | 8.5 | 8.33 | 238 | 7.933333 | 3000 | 238 | 100 | 8 |
| Total | 3000 | 100.000000 | ||||||
| Autor: Grupo 3 | ||||||||
# =========================
# HISTOGRAMA Nº1
# =========================
histoP <- hist(
Suelo_pH,
main = "Gráfica Nº1: Distribución del pH del Suelo",
xlab = "pH del Suelo",
ylab = "Cantidad",
col = "blue"
)
# =========================
# TABLA SIMPLIFICADA BASADA EN EL HISTOGRAMA
# =========================
Limites <- histoP$breaks
LimInf <- Limites[1:(length(Limites) - 1)]
LimSup <- Limites[2:length(Limites)]
Mc <- histoP$mids
ni <- histoP$counts
sum(ni)
## [1] 3000
hi <- round(ni / sum(ni) * 100, 2)
sum(hi)
## [1] 100.01
Ni_asc <- cumsum(ni)
Ni_dsc <- rev(cumsum(rev(ni)))
Hi_asc <- round(cumsum(hi), 2)
Hi_dsc <- round(rev(cumsum(rev(hi))), 2)
TDF_Histo_Suelo_pH <- data.frame(LimInf, LimSup, Mc, ni, hi, Ni_asc, Ni_dsc, Hi_asc, Hi_dsc)
totalni <- sum(ni)
totalhi <- 100
TDF_Histo_Suelo_pH_completo <- rbind(
TDF_Histo_Suelo_pH,
data.frame(LimInf = "Total",
LimSup = " ", Mc = " ", ni = totalni,
hi = totalhi, Ni_asc = " ", Ni_dsc = " ",
Hi_asc = " ", Hi_dsc = " ")
)
tabla_Histo <- TDF_Histo_Suelo_pH_completo %>%
gt() %>%
tab_header(
title = md("*Tabla Nº2*"),
subtitle = md("**Tabla simplificada de distribución del pH del Suelo**")
) %>%
tab_source_note(
source_note = md("Autor: Grupo 3")
) %>%
tab_options(
table.border.top.color = "black",
table.border.bottom.color = "black",
table.border.bottom.style = "solid",
column_labels.border.bottom.width = px(2),
row.striping.include_table_body = TRUE
) %>%
tab_style(
style = cell_text(weight = "bold"),
locations = cells_body(
rows = LimInf == "Total"
)
)
tabla_Histo
| Tabla Nº2 | ||||||||
| Tabla simplificada de distribución del pH del Suelo | ||||||||
| LimInf | LimSup | Mc | ni | hi | Ni_asc | Ni_dsc | Hi_asc | Hi_dsc |
|---|---|---|---|---|---|---|---|---|
| 4.5 | 5 | 4.75 | 426 | 14.20 | 426 | 3000 | 14.2 | 100.01 |
| 5 | 5.5 | 5.25 | 368 | 12.27 | 794 | 2574 | 26.47 | 85.81 |
| 5.5 | 6 | 5.75 | 371 | 12.37 | 1165 | 2206 | 38.84 | 73.54 |
| 6 | 6.5 | 6.25 | 377 | 12.57 | 1542 | 1835 | 51.41 | 61.17 |
| 6.5 | 7 | 6.75 | 380 | 12.67 | 1922 | 1458 | 64.08 | 48.6 |
| 7 | 7.5 | 7.25 | 339 | 11.30 | 2261 | 1078 | 75.38 | 35.93 |
| 7.5 | 8 | 7.75 | 379 | 12.63 | 2640 | 739 | 88.01 | 24.63 |
| 8 | 8.5 | 8.25 | 360 | 12.00 | 3000 | 360 | 100.01 | 12 |
| Total | 3000 | 100.00 | ||||||
| Autor: Grupo 3 | ||||||||
# =========================
# HISTOGRAMA Nº2 (LOCAL)
# =========================
hist(
Suelo_pH,
breaks = seq(min, max, A),
main = "Gráfica Nº2: Frecuencia del pH del Suelo (Local)",
xlab = "pH",
ylab = "Frecuencia",
col = "#4A90E2"
)
# =========================
# HISTOGRAMA Nº3 (GLOBAL)
# =========================
hist(
Suelo_pH,
breaks = seq(min, max, A),
main = "Gráfica Nº3: Frecuencia del pH del Suelo (Global)",
xlab = "pH",
ylab = "Frecuencia",
col = "green",
ylim = c(0, 3000)
)
# =========================
# GRÁFICA PORCENTUAL LOCAL
# (Basada en Tabla 2)
# =========================
barplot(
TDF_Histo_Suelo_pH$hi,
space = 0,
col = "skyblue",
main = "Gráfica Nº4: Porcentaje del pH del Suelo (Local)",
xlab = "Intervalos de pH",
ylab = "Porcentaje (%)",
names.arg = TDF_Histo_Suelo_pH$Mc,
cex.names = 0.9,
cex.main = 1.1,
cex.lab = 1.1
)
# =========================
# GRÁFICA PORCENTUAL GLOBAL
# (Basada en Tabla 2)
# =========================
barplot(
TDF_Histo_Suelo_pH$hi,
space = 0,
col = "yellow",
main = "Gráfica Nº5: Porcentaje del pH del Suelo (Global)",
xlab = "Intervalos de pH",
ylab = "Porcentaje (%)",
names.arg = TDF_Histo_Suelo_pH$Mc,
ylim = c(0, 100),
cex.names = 0.9,
cex.main = 1.1,
cex.lab = 1.1
)
# =========================
# BOXPLOT
# =========================
boxplot(
Suelo_pH,
horizontal = TRUE,
col = "pink",
main = "Gráfica Nº4: Distribución del pH del Suelo",
xlab = "pH"
)
# =========================
# OJIVAS
# =========================
plot(
Li, Nidsc,
main = "Gráfica Nº5: Ojiva Ascendente y Descendente del pH del Suelo",
xlab = "pH",
ylab = "Cantidad",
xlim = c(min, max),
col = "red",
type = "o",
lwd = 3,
xaxt = "n"
)
lines(Ls, Niasc, col = "green", type = "o", lwd = 3)
axis(1, at = round(seq(min, max, length.out = 10), 2))
# =========================
# INDICADORES ESTADÍSTICOS
# =========================
media <- round(mean(Suelo_pH), 2)
Tabla_pH <- as.data.frame(table(Suelo_pH))
max_frecuencia <- max(Tabla_pH$Freq)
moda <- Tabla_pH$Suelo_pH[Tabla_pH$Freq == max_frecuencia]
mediana <- median(Suelo_pH)
varianza <- var(Suelo_pH)
sd <- sd(Suelo_pH)
cv <- round((sd / media) * 100, 2)
library(e1071)
asimetria <- skewness(Suelo_pH, type = 2)
curtosis <- kurtosis(Suelo_pH)
# =========================
# TABLA RESUMEN FINAL
# =========================
tabla_indicadores <- data.frame(
"Variable" = c("pH del Suelo"),
"Rango" = c(paste0("[", min(Suelo_pH), " ; ", max(Suelo_pH), "]")),
"X" = c(media),
"Me" = c(round(mediana, 2)),
"Mo" = c(paste(moda, collapse = ", ")),
"V" = c(round(varianza, 2)),
"Sd" = c(round(sd, 2)),
"Cv" = c(cv),
"As" = c(round(asimetria, 2)),
"K" = c(round(curtosis, 2)),
"Valores_Atipicos" = "No hay presencia de valores atípicos"
)
tabla_indicadores_gt <- tabla_indicadores %>%
gt() %>%
tab_header(
title = md("*Tabla Nº3*"),
subtitle = md("**Indicadores estadísticos del pH del Suelo**")
) %>%
tab_source_note(
source_note = md("Autor: Grupo 3")
)
tabla_indicadores_gt
| Tabla Nº3 | ||||||||||
| Indicadores estadísticos del pH del Suelo | ||||||||||
| Variable | Rango | X | Me | Mo | V | Sd | Cv | As | K | Valores_Atipicos |
|---|---|---|---|---|---|---|---|---|---|---|
| pH del Suelo | [4.5 ; 8.5] | 6.46 | 6.45 | 4.81, 5.81 | 1.36 | 1.17 | 18.05 | 0.03 | -1.2 | No hay presencia de valores atípicos |
| Autor: Grupo 3 | ||||||||||
##============##
## CONCLUSION ##
##============##
# La variable Ph del Suelo fluctua entre 4.5 y 8.5 y gira entorno a 6.46 con una desviación estandar de 1.17.91 siendo un conjuto de datos homogeneo, los valores de acumulan de manera debil en la parte media de la variable. Sin presencia de valores atípicos.