UNIVERSIDAD CENTRAL DEL ECUADOR

PROYECTO:ESTUDIO ESTADÍSTICO DE LA CONTAMINACIÓN DEL SUELO Y SU IMPACTO EN LA SALUD

FECHA: 06/12/2025

library(dplyr)

# Cargar datos
datos <- read.csv("soil_pollution_diseases.csv")
centroides <- read.csv("centroides_10_paises.csv")

# Cambiar United States por USA
centroides$Pais[centroides$Pais == "United States"] <- "USA"

# Renombrar columna
colnames(centroides)[colnames(centroides) == "Pais"] <- "Country"

# Unir datasets
datos_final <- datos %>%
  left_join(centroides, by = "Country")


Longitud <- datos_final$Longitud
Longitud <- as.numeric(Longitud)

minL <- min(Longitud)
maxL <- max(Longitud)
min <-min(Longitud)
max <-max(Longitud)
R <-max-min
K <- floor(1 + 3.33 * log10(length(Longitud)))
A <-R/K

# límites SIN redondeo
breaks <- seq(minL, maxL, length.out = K + 1)

clases <- cut(
  Longitud,
  breaks = breaks,
  include.lowest = TRUE,
  right = TRUE
)

ni <- as.numeric(table(clases))

#Tabla de distribución de frecuencia

#Manualmente
Li <-round(seq(from=min,to=max-A,by=A),2)
Ls <-round(seq(from=min+A,to=max,by=A),2)
Mc <-(Li+Ls)/2
ni<-c()
for (i in 1:K) {
  if (i < K) {
    ni[i] <- length(subset(Longitud, Longitud >= Li[i] & Longitud < Ls[i]))
  } else {
    ni[i] <- length(subset(Longitud, Longitud >= Li[i] & Longitud <= Ls[i]))
  }
}

hi <-ni/sum(ni)*100
Ni_asc<-cumsum(ni)
Hi_asc<-cumsum(hi)
Ni_desc<-rev(cumsum(rev(ni)))
Hi_desc<-rev(cumsum(rev(hi)))

TDF_Longitud <- data.frame(
  Li, Ls, Mc, ni, round(hi, 2), Ni_asc, Ni_desc, round(Hi_asc, 2), round(Hi_desc, 2)
)

colnames(TDF_Longitud) <- c("Li","Ls","Mc","ni","hi","Ni_asc(%)","Ni_desc(%)","Hi_asc","Hi_desc")

#Crear fila de totales

totales<-c(
  Li="-",
  Ls="-",
  Mc="-",
  ni=sum(ni),
  hi=sum(hi),
  Ni_asc="-",
  Ni_desc="-",
  Hi_asc="-",
  Hi_desc="-")

TDF_Longitud<-rbind(TDF_Longitud,totales)

# TABLA 
library(dplyr)
library(gt)

TDF_Longitud %>%
  gt() %>%
  tab_header(
    title = md("*Tabla Nro. 9*"),
    subtitle = md("**Tabla de Frecuencias de la Longitud (° ′ ″)**")
  ) %>%
  tab_source_note(
    source_note = md("Autor: Grupo 3")
  ) %>%
  tab_options(
    table.border.top.color = "black",
    table.border.bottom.color = "black",
    table.border.top.style = "solid",
    table.border.bottom.style = "solid",
    column_labels.border.top.color = "black",
    column_labels.border.bottom.color = "black",
    column_labels.border.bottom.width = px(2),
    row.striping.include_table_body = TRUE,
    heading.border.bottom.color = "black",
    heading.border.bottom.width = px(2),
    table_body.hlines.color = "gray",
    table_body.border.bottom.color = "black"
  )

Li	Ls	Mc	ni	hi	Ni_asc(%)	Ni_desc(%)	Hi_asc	Hi_desc
Tabla Nro. 9
Tabla de Frecuencias de la Longitud (° ′ ″)
-102.55	-82.86	-92.705	317	11.8	317	2686	11.8	100
-82.86	-63.16	-73.01	0	0	317	2369	11.8	88.2
-63.16	-43.47	-53.315	293	10.91	610	2369	22.71	88.2
-43.47	-23.78	-33.625	0	0	610	2076	22.71	77.29
-23.78	-4.08	-13.93	0	0	610	2076	22.71	77.29
-4.08	15.61	5.765	592	22.04	1202	2076	44.75	77.29
15.61	35.31	25.46	0	0	1202	1484	44.75	55.25
35.31	55	45.155	271	10.09	1473	1484	54.84	55.25
55	74.69	64.845	305	11.36	1778	1213	66.2	45.16
74.69	94.39	84.54	311	11.58	2089	908	77.77	33.8
94.39	114.08	104.235	319	11.88	2408	597	89.65	22.23
114.08	133.78	123.93	278	10.35	2686	278	100	10.35
-	-	-	2686	100	-	-	-	-
Autor: Grupo 3

hist(Longitud)

#Simplificación con el histograma

Hist_Longitud<-hist(Longitud,breaks = 8,plot = F)
k<-length(Hist_Longitud$breaks)
Li<-Hist_Longitud$breaks[1:(length(Hist_Longitud$breaks)-1)]
Ls<-Hist_Longitud$breaks[2:length(Hist_Longitud$breaks)]
ni<-Hist_Longitud$counts
sum(ni)

## [1] 3000

Mc<-Hist_Longitud$mids
hi<-(ni/sum(ni))
sum(hi)

## [1] 1

Ni_asc<-cumsum(ni)
Hi_asc<-cumsum(hi)
Ni_desc<-rev(cumsum(rev(ni)))
Hi_desc<-rev(cumsum(rev(hi)))
TDF_Longitud<-data.frame(Li=round(Li,2),
                        Ls=round(Ls,2),
                        Mc=round(Mc,2),
                        ni=ni,
                        hi=round(hi*100,2),
                        Ni_asc=Ni_asc,
                        Ni_desc=Ni_desc,
                        Hi_asc=round(Hi_asc*100,2),
                        Hi_desc=round(Hi_desc*100,2))
colnames(TDF_Longitud)<-c("Lim inf","Lim sup","MC","ni","hi(%)","Ni asc","Ni desc","Hi asc(%)","Hi desc(%)")

#Crear fila de totales
totales<-c(Li="TOTAL",
           Ls="-",
           Mc="-",
           ni = sum(as.numeric(TDF_Longitud$ni)),
           hi = sum(as.numeric(TDF_Longitud$`hi(%)`)),
           Ni_asc="-",
           Ni_desc="-",
           Hi_asc="-",
           Hi_desc="-")

TDF_Longitud<-rbind(TDF_Longitud,totales)



# TABLA 
library(dplyr)
library(gt)

TDF_Longitud %>%
  gt() %>%
  tab_header(
    title = md("*Tabla Nro. 10*"),
    subtitle = md("**Tabla simplificada de Frecuencias de la Longitud (° ′ ″)**")
  ) %>%
  tab_source_note(
    source_note = md("Autor: Grupo 3")
  ) %>%
  tab_options(
    table.border.top.color = "black",
    table.border.bottom.color = "black",
    table.border.top.style = "solid",
    table.border.bottom.style = "solid",
    column_labels.border.top.color = "black",
    column_labels.border.bottom.color = "black",
    column_labels.border.bottom.width = px(2),
    row.striping.include_table_body = TRUE,
    heading.border.bottom.color = "black",
    heading.border.bottom.width = px(2),
    table_body.hlines.color = "gray",
    table_body.border.bottom.color = "black"
  )

Lim inf	Lim sup	MC	ni	hi(%)	Ni asc	Ni desc	Hi asc(%)	Hi desc(%)
Tabla Nro. 10
Tabla simplificada de Frecuencias de la Longitud (° ′ ″)
-150	-100	-125	314	10.47	314	3000	10.47	100
-100	-50	-75	610	20.33	924	2686	30.8	89.53
-50	0	-25	0	0	924	2076	30.8	69.2
0	50	25	863	28.77	1787	2076	59.57	69.2
50	100	75	616	20.53	2403	1213	80.1	40.43
100	150	125	597	19.9	3000	597	100	19.9
TOTAL	-	-	3000	100	-	-	-	-
Autor: Grupo 3

#Histograma

hist(Longitud, breaks = 10,
     main = "Gráfica N°56:Distribución de frecuencias para la Longitud (° ′ ″)",
     xlab = "Longitud (° ′ ″)",
     ylab = "Cantidad",
     ylim = c(0, max(ni)),
     col = "purple",
     cex.main = 0.9,
     cex.lab = 1,
     cex.axis = 0.9,
     xaxt = "n")
axis(1, at = hist(Longitud, plot = FALSE)$breaks,
     labels = hist(Longitud, plot = FALSE)$breaks, las = 1,
     cex.axis = 0.9)

#Gráficas

#Gráfica Global ni

hist(Longitud, breaks = 10,
     main = "Gráfica N°57: Distribución de frecuencias para la 
     longitud (° ′ ″) ",
     xlab = "Longitud (° ′ ″)",
     ylab = "Cantidad",
     ylim = c(0, length(Longitud)),
     col = "green",
     cex.main = 0.9,
     cex.lab = 1,
     cex.axis = 0.9,
     xaxt = "n")
axis(1, at = Hist_Longitud$breaks,
     labels = Hist_Longitud$breaks, las = 1,
     cex.axis = 0.9)

# Gráfica Global hi

datos_grafico <- TDF_Longitud[ !(TDF_Longitud$MC %in% c("-", "TOTAL")), ]

barplot(
  as.numeric(datos_grafico$`hi(%)`),
  space = 0,
  col = "skyblue",
  main = "Gráfica N°58: Distribución porcentual de frecuencias de la 
  Longitud (° ′ ″)",
  xlab = "Longitud (° ′ ″)",
  ylab = "Porcentaje",
  names.arg = datos_grafico$MC,
  ylim = c(0, 100)
)

# Gráfica Local hi
datos_grafico <- TDF_Longitud[ !(TDF_Longitud$MC %in% c("-", "TOTAL")), ]

barplot(
  as.numeric(datos_grafico$`hi(%)`),
  space = 0,
  col = "red",
  main = "Gráfica N°59: Distribución porcentual de frecuencias de la Longitud (° ′ ″)",
  xlab = "Longitud (° ′ ″)",
  ylab = "Porcentaje",
  names.arg = datos_grafico$MC,
  ylim = c(0, 40)
)

# Diagrama de Caja

boxplot(Longitud,
        horizontal = TRUE,
        main = "Gráfica N°60:Distribución de frecuencia para la 
        Longitud (° ′ ″)",
        xlab = "Longitud (° ′ ″)",
        col = "pink",
        outline = TRUE,
        pch = 1)

# Diagrama de Ojiva Ascendente y Descendente

plot(Li ,Ni_desc,
     main = "Gráfica N°61: Distribución de frecuencias Ascendente y descendente 
      para la Longitud (° ′ ″)",
     xlab = "Longitud (° ′ ″)",
     ylab = "Cantidad",
     xlim = c(0,900),
     col = "red",
     cex.axis=0.8,
     type = "o",
     lwd = 3,
     las=1,
     xaxt="n")
lines(Ls,Ni_asc,
      col = "green",
      type = "o",
      lwd = 3)
axis(1, at = seq(0, 900, by = 50))

# Diagrama de Ojiva Ascendente y Descendente Porcentual

plot(Li, Hi_desc * 100,
     main = "Gráfica N°62: Distribución de frecuencia Ascendente y Descendente 
     porcentual para la Longitud (° ′ ″)",
     xlab = " Longitud (° ′ ″)",
     ylab = "Porcentaje",
     xlim = c(0,900),
     col = "red",
     type = "o",
     lwd = 2,
     xaxt="n")
lines(Ls, Hi_asc * 100,
      col = "blue",
      type = "o",
      lwd = 3)
axis(1, at = seq(0,900,by=50))