UNIVERSIDAD CENTRAL DEL ECUADOR

ESTUDIO ESTADÍSTICO DE LA CONTAMINACIÓN DEL SUELO Y SU IMPACTO EN LA SALUD

FECHA: 7/12/2025

#Estadistica Descriptiva
#6/12/2025

datos <- read.csv("soil_pollution_diseases.csv", header = TRUE, dec = ".", sep = ",")

# Extracción Variable Cuantitativa Continua
Suelo_pH <- datos$Soil_pH

# Manualmente
k <- 1 + (3.3 * log10(3000))
k <- floor(k)
min <- min(Suelo_pH)
max <- max(Suelo_pH)
R <- max - min
A <- R / k

Li <- round(seq(from = min, to = max - A, by = A), 4)
Ls <- round(seq(from = min + A, to = max, by = A), 4)
MC <- round((Li + Ls) / 2, 2)

ni <- numeric(length(Li))
for (i in 1:length(Li)) {
  ni[i] <- sum(Suelo_pH >= Li[i] & Suelo_pH < Ls[i])
}
ni[length(Li)] <- sum(Suelo_pH >= Li[length(Li)] & Suelo_pH <= max)
sum(ni)
## [1] 3000
hi <- round(ni / sum(ni) * 100, 2)   # SOLO 2 DECIMALES

sum(hi)
## [1] 100
Niasc <- cumsum(ni)
Nidsc <- rev(cumsum(rev(ni)))
Hiasc <- round(cumsum(hi), 2)
Hidsc <- round(rev(cumsum(rev(hi))), 2)

TDFSuelo_pH <- data.frame(Li, Ls, MC, ni, hi, Niasc, Nidsc, Hiasc, Hidsc)

total_ni <- sum(ni)
total_hi <- 100

TDFSuelo_pHCompleto <- rbind(
  TDFSuelo_pH,
  data.frame(Li = "Total", Ls = " ", MC = " ",
             ni = total_ni, hi = total_hi,
             Niasc = " ", Nidsc = " ",
             Hiasc = " ", Hidsc = " ")
)

# Formato tabla
library(gt)
library(dplyr)

TDFSuelo_pH$hi <- round(TDFSuelo_pH$hi, 2)

tabla_Suelo_pH <- TDFSuelo_pHCompleto %>%
  gt() %>%
  fmt_number(
    columns = hi,
    decimals = 2
  ) %>%
  tab_header(
    title = md("*Tabla Nº1*"),
    subtitle = md("**Tabla de distribución del pH del Suelo**")
  ) %>%
  tab_source_note(
    source_note = md("Autor: Grupo 3")
  ) %>%
  tab_options(
    table.border.top.color = "black",
    table.border.bottom.color = "black",
    table.border.top.style = "solid",
    table.border.bottom.style = "solid",
    column_labels.border.top.color = "black",
    column_labels.border.bottom.color = "black",
    column_labels.border.bottom.width = px(2),
    row.striping.include_table_body = TRUE,
    heading.border.bottom.color = "black",
    heading.border.bottom.width = px(2),
    table_body.hlines.color = "gray",
    table_body.border.bottom.color = "black"
  ) %>%
  tab_style(
    style = cell_text(weight = "bold"),
    locations = cells_body(
      rows = Li == "Total"
    )
  )

tabla_Suelo_pH
Tabla Nº1
Tabla de distribución del pH del Suelo
Li Ls MC ni hi Niasc Nidsc Hiasc Hidsc
4.5 4.8333 4.67 290 9.67 290 3000 9.67 100
4.8333 5.1667 5 248 8.27 538 2710 17.94 90.33
5.1667 5.5 5.33 249 8.30 787 2462 26.24 82.06
5.5 5.8333 5.67 232 7.73 1019 2213 33.97 73.76
5.8333 6.1667 6 265 8.83 1284 1981 42.8 66.03
6.1667 6.5 6.33 248 8.27 1532 1716 51.07 57.2
6.5 6.8333 6.67 261 8.70 1793 1468 59.77 48.93
6.8333 7.1667 7 238 7.93 2031 1207 67.7 40.23
7.1667 7.5 7.33 228 7.60 2259 969 75.3 32.3
7.5 7.8333 7.67 252 8.40 2511 741 83.7 24.7
7.8333 8.1667 8 251 8.37 2762 489 92.07 16.3
8.1667 8.5 8.33 238 7.93 3000 238 100 7.93
Total 3000 100.00
Autor: Grupo 3
# Histograma
histoP <- hist(
  Suelo_pH,
  main = "Gráfica Nº1: Distribución del pH del Suelo",
  xlab = "pH",
  ylab = "Cantidad",
  col = "blue"
)

# Tabla simplificada basada en el histograma
Limites <- histoP$breaks
LimInf <- Limites[1:(length(Limites) - 1)]
LimSup <- Limites[2:length(Limites)]
Mc <- histoP$mids
ni <- histoP$counts
sum(ni)
## [1] 3000
hi <- round(ni / sum(ni) * 100, 2)
sum(hi)
## [1] 100.01
Ni_asc <- cumsum(ni)
Ni_dsc <- rev(cumsum(rev(ni)))
Hi_asc <- round(cumsum(hi), 2)
Hi_dsc <- round(rev(cumsum(rev(hi))), 2)

TDF_Histo_Suelo_pH <- data.frame(LimInf, LimSup, Mc, ni, hi, Ni_asc, Ni_dsc, Hi_asc, Hi_dsc)

totalni <- sum(ni)
totalhi <- 100

TDF_Histo_Suelo_pH_completo <- rbind(
  TDF_Histo_Suelo_pH,
  data.frame(LimInf = "Total",
             LimSup = " ", Mc = " ", ni = totalni,
             hi = totalhi, Ni_asc = " ", Ni_dsc = " ",
             Hi_asc = " ", Hi_dsc = " ")
)

tabla_Histo <- TDF_Histo_Suelo_pH_completo %>%
  gt() %>%
  tab_header(
    title = md("*Tabla Nº2*"),
    subtitle = md("**Tabla simplificada de distribución del pH del Suelo**")
  ) %>%
  tab_source_note(
    source_note = md("Autor: Grupo 3")
  ) %>%
  tab_options(
    table.border.top.color = "black",
    table.border.bottom.color = "black",
    table.border.bottom.style = "solid",
    column_labels.border.bottom.width = px(2),
    row.striping.include_table_body = TRUE
  ) %>%
  tab_style(
    style = cell_text(weight = "bold"),
    locations = cells_body(
      rows = LimInf == "Total"
    )
  )

tabla_Histo
Tabla Nº2
Tabla simplificada de distribución del pH del Suelo
LimInf LimSup Mc ni hi Ni_asc Ni_dsc Hi_asc Hi_dsc
4.5 5 4.75 426 14.20 426 3000 14.2 100.01
5 5.5 5.25 368 12.27 794 2574 26.47 85.81
5.5 6 5.75 371 12.37 1165 2206 38.84 73.54
6 6.5 6.25 377 12.57 1542 1835 51.41 61.17
6.5 7 6.75 380 12.67 1922 1458 64.08 48.6
7 7.5 7.25 339 11.30 2261 1078 75.38 35.93
7.5 8 7.75 379 12.63 2640 739 88.01 24.63
8 8.5 8.25 360 12.00 3000 360 100.01 12
Total 3000 100.00
Autor: Grupo 3
# Gráficas
hist(
  Suelo_pH,
  breaks = seq(min, max, A),
  main = "Gráfica Nº2: Distribución del pH del Suelo",
  xlab = "pH",
  ylab = "Frecuencia",
  col = "#4A90E2"
)

hist(
  Suelo_pH,
  breaks = seq(min, max, A),
  main = "Gráfica Nº3: Distribución del pH del Suelo",
  xlab = "pH",
  ylab = "Frecuencia",
  col = "green",
  ylim = c(0, 3000)
)

barplot(
  TDFSuelo_pH$hi,
  space = 0,
  col = "skyblue",
  main = "Gráfica Nº4: Distribución del pH del Suelo",
  xlab = "pH",
  ylab = "Porcentaje (%)",
  names.arg = TDFSuelo_pH$MC,
  cex.names = 0.9
)

barplot(
  TDFSuelo_pH$hi,
  space = 0,
  col = "yellow",
  main = "Gráfica Nº5: Distribucoión del pH del Suelo",
  xlab = "pH",
  ylab = "Porcentaje (%)",
  names.arg = TDFSuelo_pH$MC,
  ylim = c(0, 100)
)

# Boxplot 
boxplot(
  Suelo_pH_out,
  horizontal = TRUE,
  col = "pink",
  main = "Gráfica Nº6: Distribución del pH del Suelo",
  xlab = "pH",
  outline = TRUE,
  pch = 19
)

# Ojivas de Frecuencia (Ascendente y Descendente)
plot(
  Li, Nidsc,
  main = "Gráfica Nº7: Distribución Ascendente y Descendente del pH del Suelo",
  xlab = "pH",
  ylab = "Cantidad",
  xlim = c(min, max),
  col = "red",
  type = "o",
  lwd = 3
)

lines(
  Ls, Niasc,
  col = "green",
  type = "o",
  lwd = 3
)

# Diagrama de Ojiva Ascendente y Descendente Porcentual
plot(
  Li, Hidsc,
  main = "Gráfica Nº8: Distribución Ascendente y Descendente del pH del Suelo",
  xlab = "pH",
  ylab = "Porcentaje (%)",
  xlim = c(min, max),
  col = "red",
  type = "o",
  lwd = 2
)

lines(
  Ls, Hiasc,
  col = "blue",
  type = "o",
  lwd = 3
)

# INDICADORES ESTADISTICOS

# Indicadores de Tendencia Central

# Media aritmética
media <- round(mean(Suelo_pH, na.rm = TRUE), 2)
media
## [1] 6.46
# Moda usando tabla de frecuencias
Tabla_pH <- as.data.frame(table(Suelo_pH))
max_frecuencia <- max(Tabla_pH$Freq)
moda <- Tabla_pH$Suelo_pH[Tabla_pH$Freq == max_frecuencia]
moda
## [1] 4.81 5.81
## 401 Levels: 4.5 4.51 4.52 4.53 4.54 4.55 4.56 4.57 4.58 4.59 4.6 4.61 ... 8.5
# Mediana
mediana <- median(Suelo_pH, na.rm = TRUE)
mediana
## [1] 6.45
# INDICADORES DE DISPERSIÓN #

# Varianza
varianza <- var(Suelo_pH, na.rm = TRUE)
varianza
## [1] 1.360267
# Desviación Estándar
sd <- sd(Suelo_pH, na.rm = TRUE)
sd
## [1] 1.166305
# Coeficiente de Variación
cv <- round((sd / media) * 100, 2)
cv
## [1] 18.05
# INDICADORES DE FORMA #

library(e1071)

# Asimetría
asimetria <- skewness(Suelo_pH, type = 2, na.rm = TRUE)
asimetria
## [1] 0.03019663
# Curtosis
curtosis <- kurtosis(Suelo_pH, na.rm = TRUE)
curtosis
## [1] -1.203665
# TABLA RESUMEN FINAL
tabla_indicadores <- data.frame(
  "Variable" = c("Suelo_pH"),
  "Rango" = c(paste0("[", min(Suelo_pH, na.rm = TRUE), 
                     " ; ", max(Suelo_pH, na.rm = TRUE), "]")),
  "X" = c(media),
  "Me" = c(round(mediana, 2)),
  "Mo" = c(paste(moda, collapse = ", ")),
  "V" = c(round(varianza, 2)),
  "Sd" = c(round(sd, 2)),
  "Cv" = c(cv),
  "As" = c(round(asimetria, 2)),
  "K" = c(round(curtosis, 2)),
  "Valores_Atipicos" = "--"
)


# TABLA EN FORMATO GT
library(gt)

tabla_indicadores_gt <- tabla_indicadores %>% 
  gt() %>% 
  tab_header(
    title = md("*Tabla N°4.1*"),
    subtitle = md("**Indicadores estadísticos del Suelo pH**")
  ) %>% 
  tab_source_note(
    source_note = md("Autor: Grupo 3")
  ) %>% 
  tab_options(
    table.border.top.color = "black",
    table.border.bottom.color = "black",
    table.border.top.style = "solid",
    table.border.bottom.style = "solid",
    column_labels.border.top.color = "black",
    column_labels.border.bottom.color = "black",
    column_labels.border.bottom.width = px(2),
    row.striping.include_table_body = TRUE,
    heading.border.bottom.color = "black",
    heading.border.bottom.width = px(2),
    table_body.hlines.color = "gray",
    table_body.border.bottom.color = "black"
  ) %>% 
  tab_style(
    style = cell_text(weight = "bold"),
    locations = cells_body(
      rows = Variable == "Suelo_pH"
    )
  )

tabla_indicadores_gt
Tabla N°4.1
Indicadores estadísticos del Suelo pH
Variable Rango X Me Mo V Sd Cv As K Valores_Atipicos
Suelo_pH [4.5 ; 8.5] 6.46 6.45 4.81, 5.81 1.36 1.17 18.05 0.03 -1.2 --
Autor: Grupo 3