UNIVERSIDAD CENTRAL DEL ECUADOR
ESTUDIO ESTADÍSTICO DE LA CONTAMINACIÓN DEL SUELO Y SU IMPACTO EN LA SALUD
FECHA: 19/11/2025
# =========================
# ESTADÍSTICA Descriptiva
# Fecha: 19/11/2025
# =========================
# -------------------------
# Cargar datos
# -------------------------
setwd("C:/Users/Alexander/Downloads")
datos <- read.csv("soil_pollution_diseases.csv",
sep = ",",
stringsAsFactors = FALSE)
# ================================
# VARIABLE CUANTITATIVA CONTINUA
# ================================
Temperatura <- datos$Temperature_C
# ================================
# TABLA Nº1 (Cálculo manual)
# ================================
k <- 1 + (3.3 * log10(3000))
k <- floor(k)
min <- min(Temperatura)
max <- max(Temperatura)
R <- max - min
A <- R / k
Li <- round(seq(from = min, to = max - A, by = A), 4)
Ls <- round(seq(from = min + A, to = max, by = A), 4)
MC <- round((Li + Ls) / 2, 2)
ni <- numeric(length(Li))
for (i in 1:length(Li)) {
ni[i] <- sum(Temperatura >= Li[i] & Temperatura < Ls[i])
}
ni[length(Li)] <- sum(Temperatura >= Li[length(Li)] & Temperatura <= max)
hi <- ni / sum(ni) * 100
Niasc <- cumsum(ni)
Nidsc <- rev(cumsum(rev(ni)))
Hiasc <- round(cumsum(hi))
Hidsc <- round(rev(cumsum(rev(hi))))
TDF_Temperatura <- data.frame(Li, Ls, MC, ni, hi, Niasc, Nidsc, Hiasc, Hidsc)
TDF_Temperatura_Completo <- rbind(
TDF_Temperatura,
data.frame(Li = "Total", Ls = " ", MC = " ",
ni = sum(ni), hi = 100,
Niasc = " ", Nidsc = " ",
Hiasc = " ", Hidsc = " ")
)
library(gt)
library(dplyr)
tabla_Temperatura <- TDF_Temperatura_Completo %>%
gt() %>%
tab_header(
title = md("*Tabla Nº1*"),
subtitle = md("**Tabla de distribución de Frecuencias de la Temperatura (°C)**")
) %>%
tab_source_note(
source_note = md("Autor: Grupo 3")
)
tabla_Temperatura
| Tabla Nº1 | ||||||||
| Tabla de distribución de Frecuencias de la Temperatura (°C) | ||||||||
| Li | Ls | MC | ni | hi | Niasc | Nidsc | Hiasc | Hidsc |
|---|---|---|---|---|---|---|---|---|
| 10 | 12.9167 | 11.46 | 258 | 8.600000 | 258 | 3000 | 9 | 100 |
| 12.9167 | 15.8333 | 14.38 | 266 | 8.866667 | 524 | 2742 | 17 | 91 |
| 15.8333 | 18.75 | 17.29 | 251 | 8.366667 | 775 | 2476 | 26 | 83 |
| 18.75 | 21.6667 | 20.21 | 241 | 8.033333 | 1016 | 2225 | 34 | 74 |
| 21.6667 | 24.5833 | 23.12 | 262 | 8.733333 | 1278 | 1984 | 43 | 66 |
| 24.5833 | 27.5 | 26.04 | 260 | 8.666667 | 1538 | 1722 | 51 | 57 |
| 27.5 | 30.4167 | 28.96 | 234 | 7.800000 | 1772 | 1462 | 59 | 49 |
| 30.4167 | 33.3333 | 31.88 | 257 | 8.566667 | 2029 | 1228 | 68 | 41 |
| 33.3333 | 36.25 | 34.79 | 253 | 8.433333 | 2282 | 971 | 76 | 32 |
| 36.25 | 39.1667 | 37.71 | 249 | 8.300000 | 2531 | 718 | 84 | 24 |
| 39.1667 | 42.0833 | 40.62 | 218 | 7.266667 | 2749 | 469 | 92 | 16 |
| 42.0833 | 45 | 43.54 | 251 | 8.366667 | 3000 | 251 | 100 | 8 |
| Total | 3000 | 100.000000 | ||||||
| Autor: Grupo 3 | ||||||||
# =========================
# HISTOGRAMA Nº1 (7 intervalos)
# =========================
histoP <- hist(
Temperatura,
breaks = 7, # 👈 EXACTAMENTE 7 INTERVALOS
main = "Gráfica Nº1: Distribución de la Temperatura (°C)",
xlab = "Temperatura (°C)",
ylab = "Cantidad",
col = "blue"
)
# =========================
# TABLA Nº2 (Basada en Histograma)
# =========================
Limites <- histoP$breaks
LimInf <- Limites[1:(length(Limites) - 1)]
LimSup <- Limites[2:length(Limites)]
Mc <- histoP$mids
ni <- histoP$counts
hi <- round(ni / sum(ni) * 100, 2)
Ni_asc <- cumsum(ni)
Ni_dsc <- rev(cumsum(rev(ni)))
Hi_asc <- round(cumsum(hi), 2)
Hi_dsc <- round(rev(cumsum(rev(hi))), 2)
TDF_Histo_Temperatura <- data.frame(LimInf, LimSup, Mc, ni, hi, Ni_asc, Ni_dsc, Hi_asc, Hi_dsc)
TDF_Histo_Temperatura_Completo <- rbind(
TDF_Histo_Temperatura,
data.frame(LimInf = "Total",
LimSup = " ", Mc = " ",
ni = sum(ni),
hi = 100,
Ni_asc = " ",
Ni_dsc = " ",
Hi_asc = " ",
Hi_dsc = " ")
)
tabla_Histo_Temperatura <- TDF_Histo_Temperatura_Completo %>%
gt() %>%
tab_header(
title = md("*Tabla Nº2*"),
subtitle = md("**Tabla simplificada de distribución de la Temperatura (7 intervalos)**")
) %>%
tab_source_note(
source_note = md("Autor: Grupo 3")
)
tabla_Histo_Temperatura
| Tabla Nº2 | ||||||||
| Tabla simplificada de distribución de la Temperatura (7 intervalos) | ||||||||
| LimInf | LimSup | Mc | ni | hi | Ni_asc | Ni_dsc | Hi_asc | Hi_dsc |
|---|---|---|---|---|---|---|---|---|
| 10 | 15 | 12.5 | 448 | 14.93 | 448 | 3000 | 14.93 | 100 |
| 15 | 20 | 17.5 | 451 | 15.03 | 899 | 2552 | 29.96 | 85.07 |
| 20 | 25 | 22.5 | 429 | 14.30 | 1328 | 2101 | 44.26 | 70.04 |
| 25 | 30 | 27.5 | 417 | 13.90 | 1745 | 1672 | 58.16 | 55.74 |
| 30 | 35 | 32.5 | 428 | 14.27 | 2173 | 1255 | 72.43 | 41.84 |
| 35 | 40 | 37.5 | 414 | 13.80 | 2587 | 827 | 86.23 | 27.57 |
| 40 | 45 | 42.5 | 413 | 13.77 | 3000 | 413 | 100 | 13.77 |
| Total | 3000 | 100.00 | ||||||
| Autor: Grupo 3 | ||||||||
# =========================
# HISTOGRAMA Nº2 (LOCAL)
# =========================
hist(
Temperatura,
breaks = 7,
main = "Gráfica Nº2: Frecuencia de la Temperatura (Local)",
xlab = "Temperatura (°C)",
ylab = "Frecuencia",
col = "#4A90E2"
)
# =========================
# HISTOGRAMA Nº3 (GLOBAL)
# =========================
hist(
Temperatura,
breaks = 7,
main = "Gráfica Nº3: Frecuencia de la Temperatura (Global)",
xlab = "Temperatura (°C)",
ylab = "Frecuencia",
col = "green",
ylim = c(0, 3000)
)
# =========================
# GRÁFICAS PORCENTUALES (Basadas en Tabla 2)
# =========================
barplot(
TDF_Histo_Temperatura$hi,
space = 0,
col = "skyblue",
main = "Gráfica Nº4: Porcentaje de la Temperatura (Local)",
xlab = "Intervalos",
ylab = "Porcentaje (%)",
names.arg = TDF_Histo_Temperatura$Mc
)
barplot(
TDF_Histo_Temperatura$hi,
space = 0,
col = "yellow",
main = "Gráfica Nº5: Porcentaje de la Temperatura (Global)",
xlab = "Intervalos",
ylab = "Porcentaje (%)",
names.arg = TDF_Histo_Temperatura$Mc,
ylim = c(0, 100)
)
# =========================
# BOXPLOT
# =========================
boxplot(
Temperatura,
horizontal = TRUE,
col = "pink",
main = "Gráfica Nº6: Distribución de la Temperatura",
xlab = "Temperatura (°C)"
)
# =========================
# OJIVAS
# =========================
plot(
LimInf, Ni_dsc,
main = "Gráfica Nº7: Ojiva Ascendente y Descendente de la Temperatura",
xlab = "Temperatura (°C)",
ylab = "Cantidad",
col = "red",
type = "o",
lwd = 2
)
lines(LimSup, Ni_asc, col = "green", type = "o", lwd = 2)
# =========================
# OJIVAS PORCENTUALES vs TEMPERATURA
# =========================
# Ojiva Ascendente (usar límites superiores)
plot(
LimSup,
Hi_asc,
type = "o",
col = "blue",
pch = 16,
ylim = c(0,100),
main = "Gráfica Nº5: Ojivas Porcentuales - Temperatura (°C)",
xlab = "Temperatura (°C)",
ylab = "Frecuencia Acumulada (%)"
)
# Ojiva Descendente (usar límites inferiores)
lines(
LimInf,
Hi_dsc,
type = "o",
col = "red",
pch = 17
)
grid()
legend(
"right",
legend = c("Ojiva Ascendente (%)", "Ojiva Descendente (%)"),
col = c("blue", "red"),
pch = c(16,17),
lty = 1
)
# =========================
# INDICADORES ESTADÍSTICOS
# =========================
media <- round(mean(Temperatura), 2)
mediana <- median(Temperatura)
Tabla_Temp <- as.data.frame(table(Temperatura))
max_frecuencia <- max(Tabla_Temp$Freq)
moda <- Tabla_Temp$Temperatura[Tabla_Temp$Freq == max_frecuencia]
varianza <- var(Temperatura)
sd <- sd(Temperatura)
cv <- round((sd / media) * 100, 2)
library(e1071)
asimetria <- skewness(Temperatura, type = 2)
curtosis <- kurtosis(Temperatura)
# =========================
# TABLA RESUMEN FINAL
# =========================
tabla_indicadores <- data.frame(
"Variable" = c("Temperatura"),
"Rango" = c(paste0("[", min(Temperatura), " ; ", max(Temperatura), "]")),
"X" = c(media),
"Me" = c(round(mediana, 2)),
"Mo" = c(paste(moda, collapse = ", ")),
"V" = c(round(varianza, 2)),
"Sd" = c(round(sd, 2)),
"Cv" = c(cv),
"As" = c(round(asimetria, 2)),
"K" = c(round(curtosis, 2)),
"Valores_Atipicos" = "No hay presencia de valores atípicos"
)
tabla_indicadores_gt <- tabla_indicadores %>%
gt() %>%
tab_header(
title = md("*Tabla Nº3*"),
subtitle = md("**Indicadores estadísticos de la Temperatura (°C)**")
) %>%
tab_source_note(
source_note = md("Autor: Grupo 3")
)
tabla_indicadores_gt
| Tabla Nº3 | ||||||||||
| Indicadores estadísticos de la Temperatura (°C) | ||||||||||
| Variable | Rango | X | Me | Mo | V | Sd | Cv | As | K | Valores_Atipicos |
|---|---|---|---|---|---|---|---|---|---|---|
| Temperatura | [10 ; 45] | 27.23 | 27 | 40.5 | 101.7 | 10.08 | 37.03 | 0.03 | -1.2 | No hay presencia de valores atípicos |
| Autor: Grupo 3 | ||||||||||
##============##
## CONCLUSION ##
##============##
# La variable Temperatura fluctua entre 10 y 45 y gira entorno a 27.23 con una desviación estandar de 10.08 siendo un conjuto de datos homogeneo, los valores de acumulan de manera debil en la parte media de la variable. Sin presencia de valores atípicos.