UNIVERSIDAD CENTRAL DEL ECUADOR
ESTUDIO ESTADÍSTICO DE LA CONTAMINACIÓN DEL SUELO Y SU IMPACTO EN LA SALUD
FECHA: 19/11/2025
library(dplyr)
# Cargar datos
datos <- read.csv("soil_pollution_diseases.csv")
centroides <- read.csv("centroides_10_paises.csv")
# Cambiar United States por USA
centroides$Pais[centroides$Pais == "United States"] <- "USA"
# Renombrar columna
colnames(centroides)[colnames(centroides) == "Pais"] <- "Country"
# Unir datasets
datos_final <- datos %>%
left_join(centroides, by = "Country")
Latitud <- datos_final$Latitud
Latitud <- as.numeric(Latitud)
minL <- min(Latitud)
maxL <- max(Latitud)
min <-min(Latitud)
max <-max(Latitud)
R <-max-min
K <- floor(1 + 3.33 * log10(length(Latitud)))
A <-R/K
# límites SIN redondeo
breaks <- seq(minL, maxL, length.out = K + 1)
clases <- cut(
Latitud,
breaks = breaks,
include.lowest = TRUE,
right = TRUE
)
ni <- as.numeric(table(clases))
#Tabla de distribución de frecuencia
#Manualmente
Li <-round(seq(from=min,to=max-A,by=A),2)
Ls <-round(seq(from=min+A,to=max,by=A),2)
Mc <-(Li+Ls)/2
ni<-c()
for (i in 1:K) {
if (i < K) {
ni[i] <- length(subset(Latitud, Latitud >= Li[i] & Latitud < Ls[i]))
} else {
ni[i] <- length(subset(Latitud, Latitud >= Li[i] & Latitud <= Ls[i]))
}
}
hi <-ni/sum(ni)*100
Ni_asc<-cumsum(ni)
Hi_asc<-cumsum(hi)
Ni_desc<-rev(cumsum(rev(ni)))
Hi_desc<-rev(cumsum(rev(hi)))
TDF_Latitud <- data.frame(
Li, Ls, Mc, ni, round(hi, 2), Ni_asc, Ni_desc, round(Hi_asc, 2), round(Hi_desc, 2)
)
colnames(TDF_Latitud) <- c("Li","Ls","Mc","ni","hi","Ni_asc(%)","Ni_desc(%)","Hi_asc","Hi_desc")
#Crear fila de totales
totales<-c(
Li="-",
Ls="-",
Mc="-",
ni=sum(ni),
hi=sum(hi),
Ni_asc="-",
Ni_desc="-",
Hi_asc="-",
Hi_desc="-")
TDF_Latitud<-rbind(TDF_Latitud,totales)
# TABLA
library(dplyr)
library(gt)
TDF_Latitud %>%
gt() %>%
tab_header(
title = md("*Tabla Nro. 13*"),
subtitle = md("**Tabla de la Latitud (° ′ ″)**")
) %>%
tab_source_note(
source_note = md("Autor: Grupo 3")
) %>%
tab_options(
table.border.top.color = "black",
table.border.bottom.color = "black",
table.border.top.style = "solid",
table.border.bottom.style = "solid",
column_labels.border.top.color = "black",
column_labels.border.bottom.color = "black",
column_labels.border.bottom.width = px(2),
row.striping.include_table_body = TRUE,
heading.border.bottom.color = "black",
heading.border.bottom.width = px(2),
table_body.hlines.color = "gray",
table_body.border.bottom.color = "black"
)
| Tabla Nro. 13 | ||||||||
| Tabla de la Latitud (° ′ ″) | ||||||||
| Li | Ls | Mc | ni | hi | Ni_asc(%) | Ni_desc(%) | Hi_asc | Hi_desc |
|---|---|---|---|---|---|---|---|---|
| -25.27 | -18.9 | -22.085 | 0 | 0 | 0 | 2722 | 0 | 100 |
| -18.9 | -12.53 | -15.715 | 293 | 10.76 | 293 | 2722 | 10.76 | 100 |
| -12.53 | -6.16 | -9.345 | 0 | 0 | 293 | 2429 | 10.76 | 89.24 |
| -6.16 | 0.21 | -2.975 | 271 | 9.96 | 564 | 2429 | 20.72 | 89.24 |
| 0.21 | 6.58 | 3.395 | 0 | 0 | 564 | 2158 | 20.72 | 79.28 |
| 6.58 | 12.95 | 9.765 | 309 | 11.35 | 873 | 2158 | 32.07 | 79.28 |
| 12.95 | 19.32 | 16.135 | 0 | 0 | 873 | 1849 | 32.07 | 67.93 |
| 19.32 | 25.69 | 22.505 | 625 | 22.96 | 1498 | 1849 | 55.03 | 67.93 |
| 25.69 | 32.06 | 28.875 | 305 | 11.2 | 1803 | 1224 | 66.24 | 44.97 |
| 32.06 | 38.43 | 35.245 | 636 | 23.37 | 2439 | 919 | 89.6 | 33.76 |
| 38.43 | 44.8 | 41.615 | 0 | 0 | 2439 | 283 | 89.6 | 10.4 |
| 44.8 | 51.17 | 47.985 | 283 | 10.4 | 2722 | 283 | 100 | 10.4 |
| - | - | - | 2722 | 100 | - | - | - | - |
| Autor: Grupo 3 | ||||||||
hist(Latitud)
#Simplificación con el histograma
Hist_Latitud<-hist(Latitud,breaks = 8,plot = F)
k<-length(Hist_Latitud$breaks)
Li<-Hist_Latitud$breaks[1:(length(Hist_Latitud$breaks)-1)]
Ls<-Hist_Latitud$breaks[2:length(Hist_Latitud$breaks)]
ni<-Hist_Latitud$counts
sum(ni)
## [1] 3000
Mc<-Hist_Latitud$mids
hi<-(ni/sum(ni))
sum(hi)
## [1] 1
Ni_asc<-cumsum(ni)
Hi_asc<-cumsum(hi)
Ni_desc<-rev(cumsum(rev(ni)))
Hi_desc<-rev(cumsum(rev(hi)))
TDF_Latitud<-data.frame(Li=round(Li,2),
Ls=round(Ls,2),
Mc=round(Mc,2),
ni=ni,
hi=round(hi*100,2),
Ni_asc=Ni_asc,
Ni_desc=Ni_desc,
Hi_asc=round(Hi_asc*100,2),
Hi_desc=round(Hi_desc*100,2))
colnames(TDF_Latitud)<-c("Lim inf","Lim sup","MC","ni","hi(%)","Ni asc","Ni desc","Hi asc(%)","Hi desc(%)")
#Crear fila de totales
totales<-c(Li="TOTAL",
Ls="-",
Mc="-",
ni = sum(as.numeric(TDF_Latitud$ni)),
hi = sum(as.numeric(TDF_Latitud$`hi(%)`)),
Ni_asc="-",
Ni_desc="-",
Hi_asc="-",
Hi_desc="-")
TDF_Latitud<-rbind(TDF_Latitud,totales)
# TABLA
library(dplyr)
library(gt)
TDF_Latitud %>%
gt() %>%
tab_header(
title = md("*Tabla Nro. 14*"),
subtitle = md("**Tabla simplificada de la Latitud (° ′ ″)**")
) %>%
tab_source_note(
source_note = md("Autor: Grupo 3")
) %>%
tab_options(
table.border.top.color = "black",
table.border.bottom.color = "black",
table.border.top.style = "solid",
table.border.bottom.style = "solid",
column_labels.border.top.color = "black",
column_labels.border.bottom.color = "black",
column_labels.border.bottom.width = px(2),
row.striping.include_table_body = TRUE,
heading.border.bottom.color = "black",
heading.border.bottom.width = px(2),
table_body.hlines.color = "gray",
table_body.border.bottom.color = "black"
)
| Tabla Nro. 14 | ||||||||
| Tabla simplificada de la Latitud (° ′ ″) | ||||||||
| Lim inf | Lim sup | MC | ni | hi(%) | Ni asc | Ni desc | Hi asc(%) | Hi desc(%) |
|---|---|---|---|---|---|---|---|---|
| -30 | -20 | -25 | 278 | 9.27 | 278 | 3000 | 9.27 | 100 |
| -20 | -10 | -15 | 293 | 9.77 | 571 | 2722 | 19.03 | 90.73 |
| -10 | 0 | -5 | 271 | 9.03 | 842 | 2429 | 28.07 | 80.97 |
| 0 | 10 | 5 | 309 | 10.3 | 1151 | 2158 | 38.37 | 71.93 |
| 10 | 20 | 15 | 0 | 0 | 1151 | 1849 | 38.37 | 61.63 |
| 20 | 30 | 25 | 625 | 20.83 | 1776 | 1849 | 59.2 | 61.63 |
| 30 | 40 | 35 | 941 | 31.37 | 2717 | 1224 | 90.57 | 40.8 |
| 40 | 50 | 45 | 0 | 0 | 2717 | 283 | 90.57 | 9.43 |
| 50 | 60 | 55 | 283 | 9.43 | 3000 | 283 | 100 | 9.43 |
| TOTAL | - | - | 3000 | 100 | - | - | - | - |
| Autor: Grupo 3 | ||||||||
#Histograma
hist(Latitud, breaks = 10,
main = "Gráfica N°49:Distribución para la Latitud (° ′ ″)",
xlab = "Latitud (° ′ ″)",
ylab = "Cantidad",
ylim = c(0, max(ni)),
col = "purple",
cex.main = 0.9,
cex.lab = 1,
cex.axis = 0.9,
xaxt = "n")
axis(1, at = hist(Latitud, plot = FALSE)$breaks,
labels = hist(Latitud, plot = FALSE)$breaks, las = 1,
cex.axis = 0.9)
#Gráficas
#Gráfica Global ni
hist(Latitud, breaks = 10,
main = "Gráfica N°50: Distribución para la
Latitud",
xlab = "Latitud (° ′ ″)",
ylab = "Cantidad",
ylim = c(0, length(Latitud)),
col = "green",
cex.main = 0.9,
cex.lab = 1,
cex.axis = 0.9,
xaxt = "n")
axis(1, at = Hist_Latitud$breaks,
labels = Hist_Latitud$breaks, las = 1,
cex.axis = 0.9)
# Gráfica Global hi
datos_grafico <- TDF_Latitud[ !(TDF_Latitud$MC %in% c("-", "TOTAL")), ]
barplot(
as.numeric(datos_grafico$`hi(%)`),
space = 0,
col = "skyblue",
main = "Gráfica N°51: Distribución porcentual de la Latitud",
xlab = "Latitud (° ′ ″)",
ylab = "Porcentaje",
names.arg = datos_grafico$MC,
ylim = c(0, 100)
)
# Gráfica Local hi
datos_grafico <- TDF_Latitud[ !(TDF_Latitud$MC %in% c("-", "TOTAL")), ]
barplot(
as.numeric(datos_grafico$`hi(%)`),
space = 0,
col = "red",
main = "Gráfica N°52: Distribución porcentual de la Latitud",
xlab = "Latitud (° ′ ″)",
ylab = "Porcentaje",
names.arg = datos_grafico$MC,
ylim = c(0, 40)
)
# Diagrama de Caja
boxplot(Latitud,
horizontal = TRUE,
main = "Gráfica N°53:Distribución de la Latitud",
xlab = "Latitud (° ′ ″)",
col = "pink",
outline = TRUE,
pch = 1)
# Diagrama de Ojiva Ascendente y Descendente
plot(Li ,Ni_desc,
main = "Gráfica N°54: Distribución Ascendente y descendente
para la Latitud (° ′ ″)",
xlab = "Latitud (° ′ ″)",
ylab = "Cantidad",
xlim = c(0,900),
col = "red",
cex.axis=0.8,
type = "o",
lwd = 3,
las=1,
xaxt="n")
lines(Ls,Ni_asc,
col = "green",
type = "o",
lwd = 3)
axis(1, at = seq(0, 900, by = 50))
# Diagrama de Ojiva Ascendente y Descendente Porcentual
plot(Li, Hi_desc * 100,
main = "Gráfica N°55: Distribución Ascendente y Descendente
porcentual para la Latitud (° ′ ″) ",
xlab = "Latitud (° ′ ″)",
ylab = "Porcentaje",
xlim = c(0,900),
col = "red",
type = "o",
lwd = 2,
xaxt="n")
lines(Ls, Hi_asc * 100,
col = "blue",
type = "o",
lwd = 3)
axis(1, at = seq(0,900,by=50))
# INDICADORES
library(e1071)
library(gt)
# Variable
Latitud <- as.numeric(datos_final$Latitud)
# --- INDICADORES DE TENDENCIA CENTRAL ---
# Media aritmética
media_lat <- round(mean(Latitud, na.rm = TRUE), 2)
# Moda (usando tabla de frecuencias)
Tabla_Lat <- as.data.frame(table(Latitud))
max_frec_lat <- max(Tabla_Lat$Freq)
moda_lat <- Tabla_Lat$Latitud[Tabla_Lat$Freq == max_frec_lat]
# Mediana
mediana_lat <- median(Latitud, na.rm = TRUE)
# --- INDICADORES DE DISPERSIÓN ---
# Varianza
varianza_lat <- var(Latitud, na.rm = TRUE)
# Desviación Estándar
sd_lat <- sd(Latitud, na.rm = TRUE)
# Coeficiente de Variación
cv_lat <- round((sd_lat / media_lat) * 100, 2)
# --- INDICADORES DE FORMA ---
# Asimetría
asimetria_lat <- skewness(Latitud, type = 2, na.rm = TRUE)
# Curtosis
curtosis_lat <- kurtosis(Latitud, na.rm = TRUE)
# --- TABLA RESUMEN FINAL ---
tabla_indicadores_latitud <- data.frame(
"Variable" = c("Latitud"),
"Rango" = c(paste0("[", round(min(Latitud, na.rm = TRUE), 2),
" ; ", round(max(Latitud, na.rm = TRUE), 2), "]")),
"X" = c(media_lat),
"Me" = c(round(mediana_lat, 2)),
"Mo" = c(paste(moda_lat, collapse = ", ")),
"V" = c(round(varianza_lat, 2)),
"Sd" = c(round(sd_lat, 2)),
"Cv" = c(cv_lat),
"As" = c(round(asimetria_lat, 2)),
"K" = c(round(curtosis_lat, 2)),
"Valores_Atipicos" = "No hay presencia de valores atípicos"
)
tabla_indicadores_latitud_gt <- tabla_indicadores_latitud %>%
gt() %>%
tab_header(
title = md("Tabla N°14.1"),
subtitle = md("*Indicadores estadísticos de la variable Latitud*")
) %>%
tab_source_note(
source_note = md("Autor: Grupo 3")
) %>%
tab_options(
table.border.top.color = "black",
table.border.bottom.color = "black",
table.border.top.style = "solid",
table.border.bottom.style = "solid",
column_labels.border.top.color = "black",
column_labels.border.bottom.color = "black",
column_labels.border.bottom.width = px(2),
row.striping.include_table_body = TRUE,
heading.border.bottom.color = "black",
heading.border.bottom.width = px(2),
table_body.hlines.color = "gray",
table_body.border.bottom.color = "black"
) %>%
tab_style(
style = cell_text(weight = "bold"),
locations = cells_body(
rows = Variable == "Latitud"
)
)
tabla_indicadores_latitud_gt
| Tabla N°14.1 | ||||||||||
| Indicadores estadísticos de la variable Latitud | ||||||||||
| Variable | Rango | X | Me | Mo | V | Sd | Cv | As | K | Valores_Atipicos |
|---|---|---|---|---|---|---|---|---|---|---|
| Latitud | [-25.27 ; 51.17] | 17.46 | 23.63 | 35.8617 | 508.22 | 22.54 | 129.12 | -0.49 | -0.8 | No hay presencia de valores atípicos |
| Autor: Grupo 3 | ||||||||||