UNIVERSIDAD CENTRAL DEL ECUADOR

ESTUDIO ESTADÍSTICO DE LA CONTAMINACIÓN DEL SUELO Y SU IMPACTO EN LA SALUD

FECHA: 19/11/2025

# =========================
# ESTADÍSTICA Descriptiva
# Fecha: 19/11/2025
# =========================

# -------------------------
# Cargar datos
# -------------------------
setwd("C:/Users/Alexander/Downloads")

datos <- read.csv("soil_pollution_diseases.csv",
                  sep = ",",
                  stringsAsFactors = FALSE)

# ================================
# VARIABLE CUANTITATIVA CONTINUA
# ================================


Temperatura <- datos$Temperature_C

# ================================
# TABLA Nº1 (Cálculo manual)
# ================================

k <- 1 + (3.3 * log10(3000))
k <- floor(k)

min <- min(Temperatura)
max <- max(Temperatura)

R <- max - min
A <- R / k

Li <- round(seq(from = min, to = max - A, by = A), 4)
Ls <- round(seq(from = min + A, to = max, by = A), 4)
MC <- round((Li + Ls) / 2, 2)

ni <- numeric(length(Li))

for (i in 1:length(Li)) {
  ni[i] <- sum(Temperatura >= Li[i] & Temperatura < Ls[i])
}

ni[length(Li)] <- sum(Temperatura >= Li[length(Li)] & Temperatura <= max)

hi <- ni / sum(ni) * 100

Niasc <- cumsum(ni)
Nidsc <- rev(cumsum(rev(ni)))
Hiasc <- round(cumsum(hi))
Hidsc <- round(rev(cumsum(rev(hi))))

TDF_Temperatura <- data.frame(Li, Ls, MC, ni, hi, Niasc, Nidsc, Hiasc, Hidsc)

TDF_Temperatura_Completo <- rbind(
  TDF_Temperatura,
  data.frame(Li = "Total", Ls = " ", MC = " ",
             ni = sum(ni), hi = 100,
             Niasc = " ", Nidsc = " ",
             Hiasc = " ", Hidsc = " ")
)

library(gt)
library(dplyr)

tabla_Temperatura <- TDF_Temperatura_Completo %>%
  gt() %>%
  tab_header(
    title = md("*Tabla Nº1*"),
    subtitle = md("**Tabla de distribución de Frecuencias de la Temperatura (°C)**")
  ) %>%
  tab_source_note(
    source_note = md("Autor: Grupo 3")
  )

tabla_Temperatura
Tabla Nº1
Tabla de distribución de Frecuencias de la Temperatura (°C)
Li Ls MC ni hi Niasc Nidsc Hiasc Hidsc
10 12.9167 11.46 258 8.600000 258 3000 9 100
12.9167 15.8333 14.38 266 8.866667 524 2742 17 91
15.8333 18.75 17.29 251 8.366667 775 2476 26 83
18.75 21.6667 20.21 241 8.033333 1016 2225 34 74
21.6667 24.5833 23.12 262 8.733333 1278 1984 43 66
24.5833 27.5 26.04 260 8.666667 1538 1722 51 57
27.5 30.4167 28.96 234 7.800000 1772 1462 59 49
30.4167 33.3333 31.88 257 8.566667 2029 1228 68 41
33.3333 36.25 34.79 253 8.433333 2282 971 76 32
36.25 39.1667 37.71 249 8.300000 2531 718 84 24
39.1667 42.0833 40.62 218 7.266667 2749 469 92 16
42.0833 45 43.54 251 8.366667 3000 251 100 8
Total 3000 100.000000
Autor: Grupo 3
# =========================
# HISTOGRAMA Nº1 (7 intervalos)
# =========================

histoP <- hist(
  Temperatura,
  breaks = 7,   # 👈 EXACTAMENTE 7 INTERVALOS
  main = "Gráfica Nº1: Distribución de la Temperatura (°C)",
  xlab = "Temperatura (°C)",
  ylab = "Cantidad",
  col = "blue"
)

# =========================
# TABLA Nº2 (Basada en Histograma)
# =========================

Limites <- histoP$breaks
LimInf <- Limites[1:(length(Limites) - 1)]
LimSup <- Limites[2:length(Limites)]
Mc <- histoP$mids
ni <- histoP$counts
hi <- round(ni / sum(ni) * 100, 2)

Ni_asc <- cumsum(ni)
Ni_dsc <- rev(cumsum(rev(ni)))
Hi_asc <- round(cumsum(hi), 2)
Hi_dsc <- round(rev(cumsum(rev(hi))), 2)

TDF_Histo_Temperatura <- data.frame(LimInf, LimSup, Mc, ni, hi, Ni_asc, Ni_dsc, Hi_asc, Hi_dsc)

TDF_Histo_Temperatura_Completo <- rbind(
  TDF_Histo_Temperatura,
  data.frame(LimInf = "Total",
             LimSup = " ", Mc = " ",
             ni = sum(ni),
             hi = 100,
             Ni_asc = " ",
             Ni_dsc = " ",
             Hi_asc = " ",
             Hi_dsc = " ")
)

tabla_Histo_Temperatura <- TDF_Histo_Temperatura_Completo %>%
  gt() %>%
  tab_header(
    title = md("*Tabla Nº2*"),
    subtitle = md("**Tabla simplificada de distribución de la Temperatura (7 intervalos)**")
  ) %>%
  tab_source_note(
    source_note = md("Autor: Grupo 3")
  )

tabla_Histo_Temperatura
Tabla Nº2
Tabla simplificada de distribución de la Temperatura (7 intervalos)
LimInf LimSup Mc ni hi Ni_asc Ni_dsc Hi_asc Hi_dsc
10 15 12.5 448 14.93 448 3000 14.93 100
15 20 17.5 451 15.03 899 2552 29.96 85.07
20 25 22.5 429 14.30 1328 2101 44.26 70.04
25 30 27.5 417 13.90 1745 1672 58.16 55.74
30 35 32.5 428 14.27 2173 1255 72.43 41.84
35 40 37.5 414 13.80 2587 827 86.23 27.57
40 45 42.5 413 13.77 3000 413 100 13.77
Total 3000 100.00
Autor: Grupo 3
# =========================
# HISTOGRAMA Nº2 (LOCAL)
# =========================

hist(
  Temperatura,
  breaks = 7,
  main = "Gráfica Nº2: Frecuencia de la Temperatura (Local)",
  xlab = "Temperatura (°C)",
  ylab = "Frecuencia",
  col = "#4A90E2"
)

# =========================
# HISTOGRAMA Nº3 (GLOBAL)
# =========================

hist(
  Temperatura,
  breaks = 7,
  main = "Gráfica Nº3: Frecuencia de la Temperatura (Global)",
  xlab = "Temperatura (°C)",
  ylab = "Frecuencia",
  col = "green",
  ylim = c(0, 3000)
)

# =========================
# GRÁFICAS PORCENTUALES (Basadas en Tabla 2)
# =========================

barplot(
  TDF_Histo_Temperatura$hi,
  space = 0,
  col = "skyblue",
  main = "Gráfica Nº4: Porcentaje de la Temperatura (Local)",
  xlab = "Intervalos",
  ylab = "Porcentaje (%)",
  names.arg = TDF_Histo_Temperatura$Mc
)

barplot(
  TDF_Histo_Temperatura$hi,
  space = 0,
  col = "yellow",
  main = "Gráfica Nº5: Porcentaje de la Temperatura (Global)",
  xlab = "Intervalos",
  ylab = "Porcentaje (%)",
  names.arg = TDF_Histo_Temperatura$Mc,
  ylim = c(0, 100)
)

# =========================
# BOXPLOT
# =========================

boxplot(
  Temperatura,
  horizontal = TRUE,
  col = "pink",
  main = "Gráfica Nº6: Distribución de la Temperatura",
  xlab = "Temperatura (°C)"
)

# =========================
# OJIVAS
# =========================

plot(
  LimInf, Ni_dsc,
  main = "Gráfica Nº7: Ojiva Ascendente y Descendente de la Temperatura",
  xlab = "Temperatura (°C)",
  ylab = "Cantidad",
  col = "red",
  type = "o",
  lwd = 2
)

lines(LimSup, Ni_asc, col = "green", type = "o", lwd = 2)

# =========================
# OJIVAS PORCENTUALES vs TEMPERATURA
# =========================

# Ojiva Ascendente (usar límites superiores)
plot(
  LimSup,
  Hi_asc,
  type = "o",
  col = "blue",
  pch = 16,
  ylim = c(0,100),
  main = "Gráfica Nº5: Ojivas Porcentuales - Temperatura (°C)",
  xlab = "Temperatura (°C)",
  ylab = "Frecuencia Acumulada (%)"
)

# Ojiva Descendente (usar límites inferiores)
lines(
  LimInf,
  Hi_dsc,
  type = "o",
  col = "red",
  pch = 17
)

grid()

legend(
  "right",
  legend = c("Ojiva Ascendente (%)", "Ojiva Descendente (%)"),
  col = c("blue", "red"),
  pch = c(16,17),
  lty = 1
)

# =========================
# INDICADORES ESTADÍSTICOS
# =========================

media <- round(mean(Temperatura), 2)
mediana <- median(Temperatura)

Tabla_Temp <- as.data.frame(table(Temperatura))
max_frecuencia <- max(Tabla_Temp$Freq)
moda <- Tabla_Temp$Temperatura[Tabla_Temp$Freq == max_frecuencia]

varianza <- var(Temperatura)
sd <- sd(Temperatura)
cv <- round((sd / media) * 100, 2)

library(e1071)
asimetria <- skewness(Temperatura, type = 2)
curtosis <- kurtosis(Temperatura)


# =========================
# TABLA RESUMEN FINAL
# =========================

tabla_indicadores <- data.frame(
  "Variable" = c("Temperatura"),
  "Rango" = c(paste0("[", min(Temperatura), " ; ", max(Temperatura), "]")),
  "X" = c(media),
  "Me" = c(round(mediana, 2)),
  "Mo" = c(paste(moda, collapse = ", ")),
  "V" = c(round(varianza, 2)),
  "Sd" = c(round(sd, 2)),
  "Cv" = c(cv),
  "As" = c(round(asimetria, 2)),
  "K" = c(round(curtosis, 2)),
  "Valores_Atipicos" = "No hay presencia de valores atípicos"
)

tabla_indicadores_gt <- tabla_indicadores %>%
  gt() %>%
  tab_header(
    title = md("*Tabla Nº3*"),
    subtitle = md("**Indicadores estadísticos de la Temperatura (°C)**")
  ) %>%
  tab_source_note(
    source_note = md("Autor: Grupo 3")
  )

tabla_indicadores_gt
Tabla Nº3
Indicadores estadísticos de la Temperatura (°C)
Variable Rango X Me Mo V Sd Cv As K Valores_Atipicos
Temperatura [10 ; 45] 27.23 27 40.5 101.7 10.08 37.03 0.03 -1.2 No hay presencia de valores atípicos
Autor: Grupo 3
##============##
## CONCLUSION ##
##============##
# La variable Temperatura fluctua entre 10 y 45 y gira entorno a 27.23 con una desviación estandar de 10.08 siendo un conjuto de datos homogeneo, los valores de acumulan de manera debil en la parte media de la variable. Sin presencia de valores atípicos.