UNIVERSIDAD CENTRAL DEL ECUADOR

ESTUDIO ESTADÍSTICO DE LA CONTAMINACIÓN DEL SUELO Y SU IMPACTO EN LA SALUD

FECHA: 19/11/2025

# =========================
# ESTADÍSTICA Descriptiva
# Fecha: 19/11/2025
# =========================

# -------------------------
# Cargar datos
# -------------------------
setwd("C:/Users/Alexander/Downloads")

datos <- read.csv("soil_pollution_diseases.csv",
                  sep = ",",
                  stringsAsFactors = FALSE)

# ================================
# VARIABLE CUANTITATIVA CONTINUA
# ================================

Suelo_pH <- datos$Soil_pH

# -------------------------
# Cálculo manual de clases
# -------------------------

k <- 1 + (3.3 * log10(3000))
k <- floor(k)

min <- min(Suelo_pH)
max <- max(Suelo_pH)

R <- max - min
A <- R / k

Li <- round(seq(from = min, to = max - A, by = A), 4)
Ls <- round(seq(from = min + A, to = max, by = A), 4)
MC <- round((Li + Ls) / 2, 2)

ni <- numeric(length(Li))

for (i in 1:length(Li)) {
  ni[i] <- sum(Suelo_pH >= Li[i] & Suelo_pH < Ls[i])
}

ni[length(Li)] <- sum(Suelo_pH >= Li[length(Li)] & Suelo_pH <= max)

hi <- ni / sum(ni) * 100

Niasc <- cumsum(ni)
Nidsc <- rev(cumsum(rev(ni)))
Hiasc <- round(cumsum(hi))
Hidsc <- round(rev(cumsum(rev(hi))))

TDF_Suelo_pH <- data.frame(Li, Ls, MC, ni, hi, Niasc, Nidsc, Hiasc, Hidsc)

TDF_Suelo_pH_Completo <- rbind(
  TDF_Suelo_pH,
  data.frame(Li = "Total", Ls = " ", MC = " ",
             ni = sum(ni), hi = 100,
             Niasc = " ", Nidsc = " ",
             Hiasc = " ", Hidsc = " ")
)

# =========================
# TABLA Nº1
# =========================

library(gt)
library(dplyr)

tabla_Suelo_pH <- TDF_Suelo_pH_Completo %>%
  gt() %>%
  tab_header(
    title = md("*Tabla Nº1*"),
    subtitle = md("**Tabla de distribución de Frecuencias del pH del Suelo**")
  ) %>%
  tab_source_note(
    source_note = md("Autor: Grupo 3")
  )

tabla_Suelo_pH
Tabla Nº1
Tabla de distribución de Frecuencias del pH del Suelo
Li Ls MC ni hi Niasc Nidsc Hiasc Hidsc
4.5 4.8333 4.67 290 9.666667 290 3000 10 100
4.8333 5.1667 5 248 8.266667 538 2710 18 90
5.1667 5.5 5.33 249 8.300000 787 2462 26 82
5.5 5.8333 5.67 232 7.733333 1019 2213 34 74
5.8333 6.1667 6 265 8.833333 1284 1981 43 66
6.1667 6.5 6.33 248 8.266667 1532 1716 51 57
6.5 6.8333 6.67 261 8.700000 1793 1468 60 49
6.8333 7.1667 7 238 7.933333 2031 1207 68 40
7.1667 7.5 7.33 228 7.600000 2259 969 75 32
7.5 7.8333 7.67 252 8.400000 2511 741 84 25
7.8333 8.1667 8 251 8.366667 2762 489 92 16
8.1667 8.5 8.33 238 7.933333 3000 238 100 8
Total 3000 100.000000
Autor: Grupo 3
# =========================
# HISTOGRAMA Nº1
# =========================

histoP <- hist(
  Suelo_pH,
  main = "Gráfica Nº1: Distribución del pH del Suelo",
  xlab = "pH del Suelo",
  ylab = "Cantidad",
  col = "blue"
)

# =========================
# TABLA SIMPLIFICADA BASADA EN EL HISTOGRAMA
# =========================

Limites <- histoP$breaks
LimInf <- Limites[1:(length(Limites) - 1)]
LimSup <- Limites[2:length(Limites)]
Mc <- histoP$mids
ni <- histoP$counts
sum(ni)
## [1] 3000
hi <- round(ni / sum(ni) * 100, 2)
sum(hi)
## [1] 100.01
Ni_asc <- cumsum(ni)
Ni_dsc <- rev(cumsum(rev(ni)))
Hi_asc <- round(cumsum(hi), 2)
Hi_dsc <- round(rev(cumsum(rev(hi))), 2)

TDF_Histo_Suelo_pH <- data.frame(LimInf, LimSup, Mc, ni, hi, Ni_asc, Ni_dsc, Hi_asc, Hi_dsc)

totalni <- sum(ni)
totalhi <- 100

TDF_Histo_Suelo_pH_completo <- rbind(
  TDF_Histo_Suelo_pH,
  data.frame(LimInf = "Total",
             LimSup = " ", Mc = " ", ni = totalni,
             hi = totalhi, Ni_asc = " ", Ni_dsc = " ",
             Hi_asc = " ", Hi_dsc = " ")
)

tabla_Histo <- TDF_Histo_Suelo_pH_completo %>%
  gt() %>%
  tab_header(
    title = md("*Tabla Nº2*"),
    subtitle = md("**Tabla simplificada de distribución del pH del Suelo**")
  ) %>%
  tab_source_note(
    source_note = md("Autor: Grupo 3")
  ) %>%
  tab_options(
    table.border.top.color = "black",
    table.border.bottom.color = "black",
    table.border.bottom.style = "solid",
    column_labels.border.bottom.width = px(2),
    row.striping.include_table_body = TRUE
  ) %>%
  tab_style(
    style = cell_text(weight = "bold"),
    locations = cells_body(
      rows = LimInf == "Total"
    )
  )

tabla_Histo
Tabla Nº2
Tabla simplificada de distribución del pH del Suelo
LimInf LimSup Mc ni hi Ni_asc Ni_dsc Hi_asc Hi_dsc
4.5 5 4.75 426 14.20 426 3000 14.2 100.01
5 5.5 5.25 368 12.27 794 2574 26.47 85.81
5.5 6 5.75 371 12.37 1165 2206 38.84 73.54
6 6.5 6.25 377 12.57 1542 1835 51.41 61.17
6.5 7 6.75 380 12.67 1922 1458 64.08 48.6
7 7.5 7.25 339 11.30 2261 1078 75.38 35.93
7.5 8 7.75 379 12.63 2640 739 88.01 24.63
8 8.5 8.25 360 12.00 3000 360 100.01 12
Total 3000 100.00
Autor: Grupo 3
# =========================
# HISTOGRAMA Nº2 (LOCAL)
# =========================

hist(
  Suelo_pH,
  breaks = seq(min, max, A),
  main = "Gráfica Nº2: Frecuencia del pH del Suelo (Local)",
  xlab = "pH",
  ylab = "Frecuencia",
  col = "#4A90E2"
)

# =========================
# HISTOGRAMA Nº3 (GLOBAL)
# =========================

hist(
  Suelo_pH,
  breaks = seq(min, max, A),
  main = "Gráfica Nº3: Frecuencia del pH del Suelo (Global)",
  xlab = "pH",
  ylab = "Frecuencia",
  col = "green",
  ylim = c(0, 3000)
)

# =========================
# GRÁFICA PORCENTUAL LOCAL
# (Basada en Tabla 2)
# =========================

barplot(
  TDF_Histo_Suelo_pH$hi,
  space = 0,
  col = "skyblue",
  main = "Gráfica Nº4: Porcentaje del pH del Suelo (Local)",
  xlab = "Intervalos de pH",
  ylab = "Porcentaje (%)",
  names.arg = TDF_Histo_Suelo_pH$Mc,
  cex.names = 0.9,
  cex.main = 1.1,
  cex.lab = 1.1
)

# =========================
# GRÁFICA PORCENTUAL GLOBAL
# (Basada en Tabla 2)
# =========================

barplot(
  TDF_Histo_Suelo_pH$hi,
  space = 0,
  col = "yellow",
  main = "Gráfica Nº5: Porcentaje del pH del Suelo (Global)",
  xlab = "Intervalos de pH",
  ylab = "Porcentaje (%)",
  names.arg = TDF_Histo_Suelo_pH$Mc,
  ylim = c(0, 100),
  cex.names = 0.9,
  cex.main = 1.1,
  cex.lab = 1.1
)

# =========================
# BOXPLOT
# =========================

boxplot(
  Suelo_pH,
  horizontal = TRUE,
  col = "pink",
  main = "Gráfica Nº4: Distribución del pH del Suelo",
  xlab = "pH"
)

# =========================
# OJIVAS
# =========================

plot(
  Li, Nidsc,
  main = "Gráfica Nº5: Ojiva Ascendente y Descendente del pH del Suelo",
  xlab = "pH",
  ylab = "Cantidad",
  xlim = c(min, max),
  col = "red",
  type = "o",
  lwd = 3,
  xaxt = "n"
)

lines(Ls, Niasc, col = "green", type = "o", lwd = 3)

axis(1, at = round(seq(min, max, length.out = 10), 2))

# =========================
# INDICADORES ESTADÍSTICOS
# =========================

media <- round(mean(Suelo_pH), 2)

Tabla_pH <- as.data.frame(table(Suelo_pH))
max_frecuencia <- max(Tabla_pH$Freq)
moda <- Tabla_pH$Suelo_pH[Tabla_pH$Freq == max_frecuencia]

mediana <- median(Suelo_pH)

varianza <- var(Suelo_pH)
sd <- sd(Suelo_pH)
cv <- round((sd / media) * 100, 2)

library(e1071)
asimetria <- skewness(Suelo_pH, type = 2)
curtosis <- kurtosis(Suelo_pH)


# =========================
# TABLA RESUMEN FINAL
# =========================

tabla_indicadores <- data.frame(
  "Variable" = c("pH del Suelo"),
  "Rango" = c(paste0("[", min(Suelo_pH), " ; ", max(Suelo_pH), "]")),
  "X" = c(media),
  "Me" = c(round(mediana, 2)),
  "Mo" = c(paste(moda, collapse = ", ")),
  "V" = c(round(varianza, 2)),
  "Sd" = c(round(sd, 2)),
  "Cv" = c(cv),
  "As" = c(round(asimetria, 2)),
  "K" = c(round(curtosis, 2)),
  "Valores_Atipicos" = "No hay presencia de valores atípicos"
)

tabla_indicadores_gt <- tabla_indicadores %>%
  gt() %>%
  tab_header(
    title = md("*Tabla Nº3*"),
    subtitle = md("**Indicadores estadísticos del pH del Suelo**")
  ) %>%
  tab_source_note(
    source_note = md("Autor: Grupo 3")
  )

tabla_indicadores_gt
Tabla Nº3
Indicadores estadísticos del pH del Suelo
Variable Rango X Me Mo V Sd Cv As K Valores_Atipicos
pH del Suelo [4.5 ; 8.5] 6.46 6.45 4.81, 5.81 1.36 1.17 18.05 0.03 -1.2 No hay presencia de valores atípicos
Autor: Grupo 3
##============##
## CONCLUSION ##
##============##
# La variable Ph del Suelo fluctua entre 4.5 y 8.5 y gira entorno a 6.46 con una desviación estandar de 1.17.91 siendo un conjuto de datos homogeneo, los valores de acumulan de manera debil en la parte media de la variable. Sin presencia de valores atípicos.