UNIVERSIDAD CENTRAL DEL ECUADOR

ESTUDIO ESTADÍSTICO DE LA CONTAMINACIÓN DEL SUELO Y SU IMPACTO EN LA SALUD

FECHA: 19/11/2025

library(dplyr)

# Cargar datos
datos <- read.csv("soil_pollution_diseases.csv")
centroides <- read.csv("centroides_10_paises.csv")

# Cambiar United States por USA
centroides$Pais[centroides$Pais == "United States"] <- "USA"

# Renombrar columna
colnames(centroides)[colnames(centroides) == "Pais"] <- "Country"

# Unir datasets
datos_final <- datos %>%
  left_join(centroides, by = "Country")


Latitud <- datos_final$Latitud
Latitud <- as.numeric(Latitud)

minL <- min(Latitud)
maxL <- max(Latitud)
min <-min(Latitud)
max <-max(Latitud)
R <-max-min
K <- floor(1 + 3.33 * log10(length(Latitud)))
A <-R/K

# límites SIN redondeo
breaks <- seq(minL, maxL, length.out = K + 1)

clases <- cut(
  Latitud,
  breaks = breaks,
  include.lowest = TRUE,
  right = TRUE
)

ni <- as.numeric(table(clases))

#Tabla de distribución de frecuencia

#Manualmente
Li <-round(seq(from=min,to=max-A,by=A),2)
Ls <-round(seq(from=min+A,to=max,by=A),2)
Mc <-(Li+Ls)/2
ni<-c()
for (i in 1:K) {
  if (i < K) {
    ni[i] <- length(subset(Latitud, Latitud >= Li[i] & Latitud < Ls[i]))
  } else {
    ni[i] <- length(subset(Latitud, Latitud >= Li[i] & Latitud <= Ls[i]))
  }
}

hi <-ni/sum(ni)*100
Ni_asc<-cumsum(ni)
Hi_asc<-cumsum(hi)
Ni_desc<-rev(cumsum(rev(ni)))
Hi_desc<-rev(cumsum(rev(hi)))

TDF_Latitud <- data.frame(
  Li, Ls, Mc, ni, round(hi, 2), Ni_asc, Ni_desc, round(Hi_asc, 2), round(Hi_desc, 2)
)

colnames(TDF_Latitud) <- c("Li","Ls","Mc","ni","hi","Ni_asc(%)","Ni_desc(%)","Hi_asc","Hi_desc")

#Crear fila de totales

totales<-c(
  Li="-",
  Ls="-",
  Mc="-",
  ni=sum(ni),
  hi=sum(hi),
  Ni_asc="-",
  Ni_desc="-",
  Hi_asc="-",
  Hi_desc="-")

TDF_Latitud<-rbind(TDF_Latitud,totales)

# TABLA 
library(dplyr)
library(gt)

TDF_Latitud %>%
  gt() %>%
  tab_header(
    title = md("*Tabla Nro. 13*"),
    subtitle = md("**Tabla de la Latitud (° ′ ″)**")
  ) %>%
  tab_source_note(
    source_note = md("Autor: Grupo 3")
  ) %>%
  tab_options(
    table.border.top.color = "black",
    table.border.bottom.color = "black",
    table.border.top.style = "solid",
    table.border.bottom.style = "solid",
    column_labels.border.top.color = "black",
    column_labels.border.bottom.color = "black",
    column_labels.border.bottom.width = px(2),
    row.striping.include_table_body = TRUE,
    heading.border.bottom.color = "black",
    heading.border.bottom.width = px(2),
    table_body.hlines.color = "gray",
    table_body.border.bottom.color = "black"
  )
Tabla Nro. 13
Tabla de la Latitud (° ′ ″)
Li Ls Mc ni hi Ni_asc(%) Ni_desc(%) Hi_asc Hi_desc
-25.27 -18.9 -22.085 0 0 0 2722 0 100
-18.9 -12.53 -15.715 293 10.76 293 2722 10.76 100
-12.53 -6.16 -9.345 0 0 293 2429 10.76 89.24
-6.16 0.21 -2.975 271 9.96 564 2429 20.72 89.24
0.21 6.58 3.395 0 0 564 2158 20.72 79.28
6.58 12.95 9.765 309 11.35 873 2158 32.07 79.28
12.95 19.32 16.135 0 0 873 1849 32.07 67.93
19.32 25.69 22.505 625 22.96 1498 1849 55.03 67.93
25.69 32.06 28.875 305 11.2 1803 1224 66.24 44.97
32.06 38.43 35.245 636 23.37 2439 919 89.6 33.76
38.43 44.8 41.615 0 0 2439 283 89.6 10.4
44.8 51.17 47.985 283 10.4 2722 283 100 10.4
- - - 2722 100 - - - -
Autor: Grupo 3
hist(Latitud)

#Simplificación con el histograma

Hist_Latitud<-hist(Latitud,breaks = 8,plot = F)
k<-length(Hist_Latitud$breaks)
Li<-Hist_Latitud$breaks[1:(length(Hist_Latitud$breaks)-1)]
Ls<-Hist_Latitud$breaks[2:length(Hist_Latitud$breaks)]
ni<-Hist_Latitud$counts
sum(ni)
## [1] 3000
Mc<-Hist_Latitud$mids
hi<-(ni/sum(ni))
sum(hi)
## [1] 1
Ni_asc<-cumsum(ni)
Hi_asc<-cumsum(hi)
Ni_desc<-rev(cumsum(rev(ni)))
Hi_desc<-rev(cumsum(rev(hi)))
TDF_Latitud<-data.frame(Li=round(Li,2),
                        Ls=round(Ls,2),
                        Mc=round(Mc,2),
                        ni=ni,
                        hi=round(hi*100,2),
                        Ni_asc=Ni_asc,
                        Ni_desc=Ni_desc,
                        Hi_asc=round(Hi_asc*100,2),
                        Hi_desc=round(Hi_desc*100,2))
colnames(TDF_Latitud)<-c("Lim inf","Lim sup","MC","ni","hi(%)","Ni asc","Ni desc","Hi asc(%)","Hi desc(%)")

#Crear fila de totales
totales<-c(Li="TOTAL",
           Ls="-",
           Mc="-",
           ni = sum(as.numeric(TDF_Latitud$ni)),
           hi = sum(as.numeric(TDF_Latitud$`hi(%)`)),
           Ni_asc="-",
           Ni_desc="-",
           Hi_asc="-",
           Hi_desc="-")

TDF_Latitud<-rbind(TDF_Latitud,totales)

# TABLA 
library(dplyr)
library(gt)

TDF_Latitud %>%
  gt() %>%
  tab_header(
    title = md("*Tabla Nro. 14*"),
    subtitle = md("**Tabla simplificada de la Latitud (° ′ ″)**")
  ) %>%
  tab_source_note(
    source_note = md("Autor: Grupo 3")
  ) %>%
  tab_options(
    table.border.top.color = "black",
    table.border.bottom.color = "black",
    table.border.top.style = "solid",
    table.border.bottom.style = "solid",
    column_labels.border.top.color = "black",
    column_labels.border.bottom.color = "black",
    column_labels.border.bottom.width = px(2),
    row.striping.include_table_body = TRUE,
    heading.border.bottom.color = "black",
    heading.border.bottom.width = px(2),
    table_body.hlines.color = "gray",
    table_body.border.bottom.color = "black"
  )
Tabla Nro. 14
Tabla simplificada de la Latitud (° ′ ″)
Lim inf Lim sup MC ni hi(%) Ni asc Ni desc Hi asc(%) Hi desc(%)
-30 -20 -25 278 9.27 278 3000 9.27 100
-20 -10 -15 293 9.77 571 2722 19.03 90.73
-10 0 -5 271 9.03 842 2429 28.07 80.97
0 10 5 309 10.3 1151 2158 38.37 71.93
10 20 15 0 0 1151 1849 38.37 61.63
20 30 25 625 20.83 1776 1849 59.2 61.63
30 40 35 941 31.37 2717 1224 90.57 40.8
40 50 45 0 0 2717 283 90.57 9.43
50 60 55 283 9.43 3000 283 100 9.43
TOTAL - - 3000 100 - - - -
Autor: Grupo 3
#Histograma

hist(Latitud, breaks = 10,
     main = "Gráfica N°49:Distribución para la Latitud (° ′ ″)",
     xlab = "Latitud (° ′ ″)",
     ylab = "Cantidad",
     ylim = c(0, max(ni)),
     col = "purple",
     cex.main = 0.9,
     cex.lab = 1,
     cex.axis = 0.9,
     xaxt = "n")
axis(1, at = hist(Latitud, plot = FALSE)$breaks,
     labels = hist(Latitud, plot = FALSE)$breaks, las = 1,
     cex.axis = 0.9)

#Gráficas

#Gráfica Global ni

hist(Latitud, breaks = 10,
     main = "Gráfica N°50: Distribución para la 
     Latitud",
     xlab = "Latitud (° ′ ″)",
     ylab = "Cantidad",
     ylim = c(0, length(Latitud)),
     col = "green",
     cex.main = 0.9,
     cex.lab = 1,
     cex.axis = 0.9,
     xaxt = "n")
axis(1, at = Hist_Latitud$breaks,
     labels = Hist_Latitud$breaks, las = 1,
     cex.axis = 0.9)

# Gráfica Global hi

datos_grafico <- TDF_Latitud[ !(TDF_Latitud$MC %in% c("-", "TOTAL")), ]

barplot(
  as.numeric(datos_grafico$`hi(%)`),
  space = 0,
  col = "skyblue",
  main = "Gráfica N°51: Distribución porcentual de la Latitud",
  xlab = "Latitud (° ′ ″)",
  ylab = "Porcentaje",
  names.arg = datos_grafico$MC,
  ylim = c(0, 100)
)

# Gráfica Local hi
datos_grafico <- TDF_Latitud[ !(TDF_Latitud$MC %in% c("-", "TOTAL")), ]

barplot(
  as.numeric(datos_grafico$`hi(%)`),
  space = 0,
  col = "red",
  main = "Gráfica N°52: Distribución porcentual de la Latitud",
  xlab = "Latitud (° ′ ″)",
  ylab = "Porcentaje",
  names.arg = datos_grafico$MC,
  ylim = c(0, 40)
)

# Diagrama de Caja

boxplot(Latitud,
        horizontal = TRUE,
        main = "Gráfica N°53:Distribución de la Latitud",
        xlab = "Latitud (° ′ ″)",
        col = "pink",
        outline = TRUE,
        pch = 1)

# Diagrama de Ojiva Ascendente y Descendente

plot(Li ,Ni_desc,
     main = "Gráfica N°54: Distribución Ascendente y descendente 
      para la Latitud (° ′ ″)",
     xlab = "Latitud (° ′ ″)",
     ylab = "Cantidad",
     xlim = c(0,900),
     col = "red",
     cex.axis=0.8,
     type = "o",
     lwd = 3,
     las=1,
     xaxt="n")
lines(Ls,Ni_asc,
      col = "green",
      type = "o",
      lwd = 3)
axis(1, at = seq(0, 900, by = 50))

# Diagrama de Ojiva Ascendente y Descendente Porcentual

plot(Li, Hi_desc * 100,
     main = "Gráfica N°55: Distribución Ascendente y Descendente 
     porcentual para la Latitud (° ′ ″) ",
     xlab = "Latitud (° ′ ″)",
     ylab = "Porcentaje",
     xlim = c(0,900),
     col = "red",
     type = "o",
     lwd = 2,
     xaxt="n")
lines(Ls, Hi_asc * 100,
      col = "blue",
      type = "o",
      lwd = 3)
axis(1, at = seq(0,900,by=50))

# INDICADORES


library(e1071)
library(gt)

# Variable
Latitud <- as.numeric(datos_final$Latitud)

# --- INDICADORES DE TENDENCIA CENTRAL ---

# Media aritmética
media_lat <- round(mean(Latitud, na.rm = TRUE), 2)

# Moda (usando tabla de frecuencias)
Tabla_Lat <- as.data.frame(table(Latitud))
max_frec_lat <- max(Tabla_Lat$Freq)
moda_lat <- Tabla_Lat$Latitud[Tabla_Lat$Freq == max_frec_lat]

# Mediana
mediana_lat <- median(Latitud, na.rm = TRUE)

# --- INDICADORES DE DISPERSIÓN ---

# Varianza
varianza_lat <- var(Latitud, na.rm = TRUE)

# Desviación Estándar
sd_lat <- sd(Latitud, na.rm = TRUE)

# Coeficiente de Variación
cv_lat <- round((sd_lat / media_lat) * 100, 2)

# --- INDICADORES DE FORMA ---

# Asimetría
asimetria_lat <- skewness(Latitud, type = 2, na.rm = TRUE)

# Curtosis
curtosis_lat <- kurtosis(Latitud, na.rm = TRUE)

# --- TABLA RESUMEN FINAL ---

tabla_indicadores_latitud <- data.frame(
  "Variable" = c("Latitud"),
  "Rango" = c(paste0("[", round(min(Latitud, na.rm = TRUE), 2),
                     " ; ", round(max(Latitud, na.rm = TRUE), 2), "]")),
  "X"  = c(media_lat),
  "Me" = c(round(mediana_lat, 2)),
  "Mo" = c(paste(moda_lat, collapse = ", ")),
  "V"  = c(round(varianza_lat, 2)),
  "Sd" = c(round(sd_lat, 2)),
  "Cv" = c(cv_lat),
  "As" = c(round(asimetria_lat, 2)),
  "K"  = c(round(curtosis_lat, 2)),
  "Valores_Atipicos" = "No hay presencia de valores atípicos"
)

tabla_indicadores_latitud_gt <- tabla_indicadores_latitud %>% 
  gt() %>% 
  tab_header(
    title = md("Tabla N°14.1"),
    subtitle = md("*Indicadores estadísticos de la variable Latitud*")
  ) %>% 
  tab_source_note(
    source_note = md("Autor: Grupo 3")
  ) %>% 
  tab_options(
    table.border.top.color = "black",
    table.border.bottom.color = "black",
    table.border.top.style = "solid",
    table.border.bottom.style = "solid",
    column_labels.border.top.color = "black",
    column_labels.border.bottom.color = "black",
    column_labels.border.bottom.width = px(2),
    row.striping.include_table_body = TRUE,
    heading.border.bottom.color = "black",
    heading.border.bottom.width = px(2),
    table_body.hlines.color = "gray",
    table_body.border.bottom.color = "black"
  ) %>% 
  tab_style(
    style = cell_text(weight = "bold"),
    locations = cells_body(
      rows = Variable == "Latitud"
    )
  )

tabla_indicadores_latitud_gt
Tabla N°14.1
Indicadores estadísticos de la variable Latitud
Variable Rango X Me Mo V Sd Cv As K Valores_Atipicos
Latitud [-25.27 ; 51.17] 17.46 23.63 35.8617 508.22 22.54 129.12 -0.49 -0.8 No hay presencia de valores atípicos
Autor: Grupo 3