UNIVERSIDAD CENTRAL DEL ECUADOR
ESTUDIO ESTADÍSTICO DE LA CONTAMINACIÓN DEL SUELO Y SU IMPACTO EN LA SALUD
FECHA: 19/11/2025
library(dplyr)
# Cargar datos
datos <- read.csv("soil_pollution_diseases.csv")
centroides <- read.csv("centroides_10_paises.csv")
# Cambiar United States por USA
centroides$Pais[centroides$Pais == "United States"] <- "USA"
# Renombrar columna
colnames(centroides)[colnames(centroides) == "Pais"] <- "Country"
# Unir datasets
datos_final <- datos %>%
left_join(centroides, by = "Country")
Longitud <- datos_final$Longitud
Longitud <- as.numeric(Longitud)
minL <- min(Longitud)
maxL <- max(Longitud)
min <-min(Longitud)
max <-max(Longitud)
R <-max-min
K <- floor(1 + 3.33 * log10(length(Longitud)))
A <-R/K
# límites SIN redondeo
breaks <- seq(minL, maxL, length.out = K + 1)
clases <- cut(
Longitud,
breaks = breaks,
include.lowest = TRUE,
right = TRUE
)
ni <- as.numeric(table(clases))
#Tabla de distribución de frecuencia
#Manualmente
Li <-round(seq(from=min,to=max-A,by=A),2)
Ls <-round(seq(from=min+A,to=max,by=A),2)
Mc <-(Li+Ls)/2
ni<-c()
for (i in 1:K) {
if (i < K) {
ni[i] <- length(subset(Longitud, Longitud >= Li[i] & Longitud < Ls[i]))
} else {
ni[i] <- length(subset(Longitud, Longitud >= Li[i] & Longitud <= Ls[i]))
}
}
hi <-ni/sum(ni)*100
Ni_asc<-cumsum(ni)
Hi_asc<-cumsum(hi)
Ni_desc<-rev(cumsum(rev(ni)))
Hi_desc<-rev(cumsum(rev(hi)))
TDF_Longitud <- data.frame(
Li, Ls, Mc, ni, round(hi, 2), Ni_asc, Ni_desc, round(Hi_asc, 2), round(Hi_desc, 2)
)
colnames(TDF_Longitud) <- c("Li","Ls","Mc","ni","hi","Ni_asc(%)","Ni_desc(%)","Hi_asc","Hi_desc")
#Crear fila de totales
totales<-c(
Li="-",
Ls="-",
Mc="-",
ni=sum(ni),
hi=sum(hi),
Ni_asc="-",
Ni_desc="-",
Hi_asc="-",
Hi_desc="-")
TDF_Longitud<-rbind(TDF_Longitud,totales)
# TABLA
library(dplyr)
library(gt)
TDF_Longitud %>%
gt() %>%
tab_header(
title = md("*Tabla Nro. 15*"),
subtitle = md("**Tabla de la Longitud (° ′ ″)**")
) %>%
tab_source_note(
source_note = md("Autor: Grupo 3")
) %>%
tab_options(
table.border.top.color = "black",
table.border.bottom.color = "black",
table.border.top.style = "solid",
table.border.bottom.style = "solid",
column_labels.border.top.color = "black",
column_labels.border.bottom.color = "black",
column_labels.border.bottom.width = px(2),
row.striping.include_table_body = TRUE,
heading.border.bottom.color = "black",
heading.border.bottom.width = px(2),
table_body.hlines.color = "gray",
table_body.border.bottom.color = "black"
)
| Tabla Nro. 15 | ||||||||
| Tabla de la Longitud (° ′ ″) | ||||||||
| Li | Ls | Mc | ni | hi | Ni_asc(%) | Ni_desc(%) | Hi_asc | Hi_desc |
|---|---|---|---|---|---|---|---|---|
| -102.55 | -82.86 | -92.705 | 317 | 11.8 | 317 | 2686 | 11.8 | 100 |
| -82.86 | -63.16 | -73.01 | 0 | 0 | 317 | 2369 | 11.8 | 88.2 |
| -63.16 | -43.47 | -53.315 | 293 | 10.91 | 610 | 2369 | 22.71 | 88.2 |
| -43.47 | -23.78 | -33.625 | 0 | 0 | 610 | 2076 | 22.71 | 77.29 |
| -23.78 | -4.08 | -13.93 | 0 | 0 | 610 | 2076 | 22.71 | 77.29 |
| -4.08 | 15.61 | 5.765 | 592 | 22.04 | 1202 | 2076 | 44.75 | 77.29 |
| 15.61 | 35.31 | 25.46 | 0 | 0 | 1202 | 1484 | 44.75 | 55.25 |
| 35.31 | 55 | 45.155 | 271 | 10.09 | 1473 | 1484 | 54.84 | 55.25 |
| 55 | 74.69 | 64.845 | 305 | 11.36 | 1778 | 1213 | 66.2 | 45.16 |
| 74.69 | 94.39 | 84.54 | 311 | 11.58 | 2089 | 908 | 77.77 | 33.8 |
| 94.39 | 114.08 | 104.235 | 319 | 11.88 | 2408 | 597 | 89.65 | 22.23 |
| 114.08 | 133.78 | 123.93 | 278 | 10.35 | 2686 | 278 | 100 | 10.35 |
| - | - | - | 2686 | 100 | - | - | - | - |
| Autor: Grupo 3 | ||||||||
hist(Longitud)
#Simplificación con el histograma
Hist_Longitud<-hist(Longitud,breaks = 8,plot = F)
k<-length(Hist_Longitud$breaks)
Li<-Hist_Longitud$breaks[1:(length(Hist_Longitud$breaks)-1)]
Ls<-Hist_Longitud$breaks[2:length(Hist_Longitud$breaks)]
ni<-Hist_Longitud$counts
sum(ni)
## [1] 3000
Mc<-Hist_Longitud$mids
hi<-(ni/sum(ni))
sum(hi)
## [1] 1
Ni_asc<-cumsum(ni)
Hi_asc<-cumsum(hi)
Ni_desc<-rev(cumsum(rev(ni)))
Hi_desc<-rev(cumsum(rev(hi)))
TDF_Longitud<-data.frame(Li=round(Li,2),
Ls=round(Ls,2),
Mc=round(Mc,2),
ni=ni,
hi=round(hi*100,2),
Ni_asc=Ni_asc,
Ni_desc=Ni_desc,
Hi_asc=round(Hi_asc*100,2),
Hi_desc=round(Hi_desc*100,2))
colnames(TDF_Longitud)<-c("Lim inf","Lim sup","MC","ni","hi(%)","Ni asc","Ni desc","Hi asc(%)","Hi desc(%)")
#Crear fila de totales
totales<-c(Li="TOTAL",
Ls="-",
Mc="-",
ni = sum(as.numeric(TDF_Longitud$ni)),
hi = sum(as.numeric(TDF_Longitud$`hi(%)`)),
Ni_asc="-",
Ni_desc="-",
Hi_asc="-",
Hi_desc="-")
TDF_Longitud<-rbind(TDF_Longitud,totales)
# TABLA
library(dplyr)
library(gt)
TDF_Longitud %>%
gt() %>%
tab_header(
title = md("*Tabla Nro. 16*"),
subtitle = md("**Tabla simplificada de la Longitud (° ′ ″)**")
) %>%
tab_source_note(
source_note = md("Autor: Grupo 3")
) %>%
tab_options(
table.border.top.color = "black",
table.border.bottom.color = "black",
table.border.top.style = "solid",
table.border.bottom.style = "solid",
column_labels.border.top.color = "black",
column_labels.border.bottom.color = "black",
column_labels.border.bottom.width = px(2),
row.striping.include_table_body = TRUE,
heading.border.bottom.color = "black",
heading.border.bottom.width = px(2),
table_body.hlines.color = "gray",
table_body.border.bottom.color = "black"
)
| Tabla Nro. 16 | ||||||||
| Tabla simplificada de la Longitud (° ′ ″) | ||||||||
| Lim inf | Lim sup | MC | ni | hi(%) | Ni asc | Ni desc | Hi asc(%) | Hi desc(%) |
|---|---|---|---|---|---|---|---|---|
| -150 | -100 | -125 | 314 | 10.47 | 314 | 3000 | 10.47 | 100 |
| -100 | -50 | -75 | 610 | 20.33 | 924 | 2686 | 30.8 | 89.53 |
| -50 | 0 | -25 | 0 | 0 | 924 | 2076 | 30.8 | 69.2 |
| 0 | 50 | 25 | 863 | 28.77 | 1787 | 2076 | 59.57 | 69.2 |
| 50 | 100 | 75 | 616 | 20.53 | 2403 | 1213 | 80.1 | 40.43 |
| 100 | 150 | 125 | 597 | 19.9 | 3000 | 597 | 100 | 19.9 |
| TOTAL | - | - | 3000 | 100 | - | - | - | - |
| Autor: Grupo 3 | ||||||||
#Histograma
hist(Longitud, breaks = 10,
main = "Gráfica N°56:Distribución de la Longitud (° ′ ″)",
xlab = "Longitud (° ′ ″)",
ylab = "Cantidad",
ylim = c(0, max(ni)),
col = "purple",
cex.main = 0.9,
cex.lab = 1,
cex.axis = 0.9,
xaxt = "n")
axis(1, at = hist(Longitud, plot = FALSE)$breaks,
labels = hist(Longitud, plot = FALSE)$breaks, las = 1,
cex.axis = 0.9)
#Gráficas
#Gráfica Global ni
hist(Longitud, breaks = 10,
main = "Gráfica N°57: Distribución de la
longitud (° ′ ″) ",
xlab = "Longitud (° ′ ″)",
ylab = "Cantidad",
ylim = c(0, length(Longitud)),
col = "green",
cex.main = 0.9,
cex.lab = 1,
cex.axis = 0.9,
xaxt = "n")
axis(1, at = Hist_Longitud$breaks,
labels = Hist_Longitud$breaks, las = 1,
cex.axis = 0.9)
# Gráfica Global hi
datos_grafico <- TDF_Longitud[ !(TDF_Longitud$MC %in% c("-", "TOTAL")), ]
barplot(
as.numeric(datos_grafico$`hi(%)`),
space = 0,
col = "skyblue",
main = "Gráfica N°58: Distribución porcentual de la
Longitud (° ′ ″)",
xlab = "Longitud (° ′ ″)",
ylab = "Porcentaje",
names.arg = datos_grafico$MC,
ylim = c(0, 100)
)
# Gráfica Local hi
datos_grafico <- TDF_Longitud[ !(TDF_Longitud$MC %in% c("-", "TOTAL")), ]
barplot(
as.numeric(datos_grafico$`hi(%)`),
space = 0,
col = "red",
main = "Gráfica N°59: Distribución porcentual de la Longitud (° ′ ″)",
xlab = "Longitud (° ′ ″)",
ylab = "Porcentaje",
names.arg = datos_grafico$MC,
ylim = c(0, 40)
)
# Diagrama de Caja
boxplot(Longitud,
horizontal = TRUE,
main = "Gráfica N°60:Distribución de la
Longitud (° ′ ″)",
xlab = "Longitud (° ′ ″)",
col = "pink",
outline = TRUE,
pch = 1)
# Diagrama de Ojiva Ascendente y Descendente
plot(Li ,Ni_desc,
main = "Gráfica N°61: Distribución Ascendente y descendente
de la Longitud (° ′ ″)",
xlab = "Longitud (° ′ ″)",
ylab = "Cantidad",
xlim = c(0,900),
col = "red",
cex.axis=0.8,
type = "o",
lwd = 3,
las=1,
xaxt="n")
lines(Ls,Ni_asc,
col = "green",
type = "o",
lwd = 3)
axis(1, at = seq(0, 900, by = 50))
# Diagrama de Ojiva Ascendente y Descendente Porcentual
plot(Li, Hi_desc * 100,
main = "Gráfica N°62: Distribución Ascendente y Descendente
de la Longitud (° ′ ″)",
xlab = " Longitud (° ′ ″)",
ylab = "Porcentaje",
xlim = c(0,900),
col = "red",
type = "o",
lwd = 2,
xaxt="n")
lines(Ls, Hi_asc * 100,
col = "blue",
type = "o",
lwd = 3)
axis(1, at = seq(0,900,by=50))
# INDICADORES
library(e1071)
library(gt)
# Variable
Longitud <- as.numeric(datos_final$Longitud)
# --- INDICADORES DE TENDENCIA CENTRAL ---
# Media aritmética
media_long <- round(mean(Longitud, na.rm = TRUE), 2)
# Moda (usando tabla de frecuencias)
Tabla_Long <- as.data.frame(table(Longitud))
max_frec_long <- max(Tabla_Long$Freq)
moda_long <- Tabla_Long$Longitud[Tabla_Long$Freq == max_frec_long]
# Mediana
mediana_long <- median(Longitud, na.rm = TRUE)
# --- INDICADORES DE DISPERSIÓN ---
# Varianza
varianza_long <- var(Longitud, na.rm = TRUE)
# Desviación Estándar
sd_long <- sd(Longitud, na.rm = TRUE)
# Coeficiente de Variación
cv_long <- round((sd_long / media_long) * 100, 2)
# --- INDICADORES DE FORMA ---
# Asimetría
asimetria_long <- skewness(Longitud, type = 2, na.rm = TRUE)
# Curtosis
curtosis_long <- kurtosis(Longitud, na.rm = TRUE)
# --- TABLA RESUMEN FINAL ---
tabla_indicadores_longitud <- data.frame(
"Variable" = c("Longitud"),
"Rango" = c(paste0("[", round(min(Longitud, na.rm = TRUE), 2),
" ; ", round(max(Longitud, na.rm = TRUE), 2), "]")),
"X" = c(media_long),
"Me" = c(round(mediana_long, 2)),
"Mo" = c(paste(moda_long, collapse = ", ")),
"V" = c(round(varianza_long, 2)),
"Sd" = c(round(sd_long, 2)),
"Cv" = c(cv_long),
"As" = c(round(asimetria_long, 2)),
"K" = c(round(curtosis_long, 2)),
"Valores_Atipicos" = "No hay presencia de valores atípicos"
)
tabla_indicadores_longitud_gt <- tabla_indicadores_longitud %>%
gt() %>%
tab_header(
title = md("Tabla N°9"),
subtitle = md("*Indicadores estadísticos de la variable Longitud*")
) %>%
tab_source_note(
source_note = md("Autor: Grupo 3")
) %>%
tab_options(
table.border.top.color = "black",
table.border.bottom.color = "black",
table.border.top.style = "solid",
table.border.bottom.style = "solid",
column_labels.border.top.color = "black",
column_labels.border.bottom.color = "black",
column_labels.border.bottom.width = px(2),
row.striping.include_table_body = TRUE,
heading.border.bottom.color = "black",
heading.border.bottom.width = px(2),
table_body.hlines.color = "gray",
table_body.border.bottom.color = "black"
) %>%
tab_style(
style = cell_text(weight = "bold"),
locations = cells_body(
rows = Variable == "Longitud"
)
)
tabla_indicadores_longitud_gt
| Tabla N°9 | ||||||||||
| Indicadores estadísticos de la variable Longitud | ||||||||||
| Variable | Rango | X | Me | Mo | V | Sd | Cv | As | K | Valores_Atipicos |
|---|---|---|---|---|---|---|---|---|---|---|
| Longitud | [-102.55 ; 133.78] | 18.1 | 10.45 | 104.1954 | 6102.56 | 78.12 | 431.6 | -0.26 | -1.21 | No hay presencia de valores atípicos |
| Autor: Grupo 3 | ||||||||||