FECHA: 06/12/2025
library(dplyr)
# Cargar datos
datos <- read.csv("soil_pollution_diseases.csv")
centroides <- read.csv("centroides_10_paises.csv")
# Cambiar United States por USA
centroides$Pais[centroides$Pais == "United States"] <- "USA"
# Renombrar columna
colnames(centroides)[colnames(centroides) == "Pais"] <- "Country"
# Unir datasets
datos_final <- datos %>%
left_join(centroides, by = "Country")
Latitud <- datos_final$Latitud
Latitud <- as.numeric(Latitud)
minL <- min(Latitud)
maxL <- max(Latitud)
min <-min(Latitud)
max <-max(Latitud)
R <-max-min
K <- floor(1 + 3.33 * log10(length(Latitud)))
A <-R/K
# límites SIN redondeo
breaks <- seq(minL, maxL, length.out = K + 1)
clases <- cut(
Latitud,
breaks = breaks,
include.lowest = TRUE,
right = TRUE
)
ni <- as.numeric(table(clases))
#Tabla de distribución de frecuencia
#Manualmente
Li <-round(seq(from=min,to=max-A,by=A),2)
Ls <-round(seq(from=min+A,to=max,by=A),2)
Mc <-(Li+Ls)/2
ni<-c()
for (i in 1:K) {
if (i < K) {
ni[i] <- length(subset(Latitud, Latitud >= Li[i] & Latitud < Ls[i]))
} else {
ni[i] <- length(subset(Latitud, Latitud >= Li[i] & Latitud <= Ls[i]))
}
}
hi <-ni/sum(ni)*100
Ni_asc<-cumsum(ni)
Hi_asc<-cumsum(hi)
Ni_desc<-rev(cumsum(rev(ni)))
Hi_desc<-rev(cumsum(rev(hi)))
TDF_Latitud <- data.frame(
Li, Ls, Mc, ni, round(hi, 2), Ni_asc, Ni_desc, round(Hi_asc, 2), round(Hi_desc, 2)
)
colnames(TDF_Latitud) <- c("Li","Ls","Mc","ni","hi","Ni_asc(%)","Ni_desc(%)","Hi_asc","Hi_desc")
#Crear fila de totales
totales<-c(
Li="-",
Ls="-",
Mc="-",
ni=sum(ni),
hi=sum(hi),
Ni_asc="-",
Ni_desc="-",
Hi_asc="-",
Hi_desc="-")
TDF_Latitud<-rbind(TDF_Latitud,totales)
# TABLA
library(dplyr)
library(gt)
TDF_Latitud %>%
gt() %>%
tab_header(
title = md("*Tabla Nro. 7*"),
subtitle = md("**Tabla de Frecuencias de la Latitud (° ′ ″)**")
) %>%
tab_source_note(
source_note = md("Autor: Grupo 3")
) %>%
tab_options(
table.border.top.color = "black",
table.border.bottom.color = "black",
table.border.top.style = "solid",
table.border.bottom.style = "solid",
column_labels.border.top.color = "black",
column_labels.border.bottom.color = "black",
column_labels.border.bottom.width = px(2),
row.striping.include_table_body = TRUE,
heading.border.bottom.color = "black",
heading.border.bottom.width = px(2),
table_body.hlines.color = "gray",
table_body.border.bottom.color = "black"
)
| Tabla Nro. 7 |
| Tabla de Frecuencias de la Latitud (° ′ ″) |
| Li |
Ls |
Mc |
ni |
hi |
Ni_asc(%) |
Ni_desc(%) |
Hi_asc |
Hi_desc |
| -25.27 |
-18.9 |
-22.085 |
0 |
0 |
0 |
2722 |
0 |
100 |
| -18.9 |
-12.53 |
-15.715 |
293 |
10.76 |
293 |
2722 |
10.76 |
100 |
| -12.53 |
-6.16 |
-9.345 |
0 |
0 |
293 |
2429 |
10.76 |
89.24 |
| -6.16 |
0.21 |
-2.975 |
271 |
9.96 |
564 |
2429 |
20.72 |
89.24 |
| 0.21 |
6.58 |
3.395 |
0 |
0 |
564 |
2158 |
20.72 |
79.28 |
| 6.58 |
12.95 |
9.765 |
309 |
11.35 |
873 |
2158 |
32.07 |
79.28 |
| 12.95 |
19.32 |
16.135 |
0 |
0 |
873 |
1849 |
32.07 |
67.93 |
| 19.32 |
25.69 |
22.505 |
625 |
22.96 |
1498 |
1849 |
55.03 |
67.93 |
| 25.69 |
32.06 |
28.875 |
305 |
11.2 |
1803 |
1224 |
66.24 |
44.97 |
| 32.06 |
38.43 |
35.245 |
636 |
23.37 |
2439 |
919 |
89.6 |
33.76 |
| 38.43 |
44.8 |
41.615 |
0 |
0 |
2439 |
283 |
89.6 |
10.4 |
| 44.8 |
51.17 |
47.985 |
283 |
10.4 |
2722 |
283 |
100 |
10.4 |
| - |
- |
- |
2722 |
100 |
- |
- |
- |
- |
| Autor: Grupo 3 |
hist(Latitud)

#Simplificación con el histograma
Hist_Latitud<-hist(Latitud,breaks = 8,plot = F)
k<-length(Hist_Latitud$breaks)
Li<-Hist_Latitud$breaks[1:(length(Hist_Latitud$breaks)-1)]
Ls<-Hist_Latitud$breaks[2:length(Hist_Latitud$breaks)]
ni<-Hist_Latitud$counts
sum(ni)
## [1] 3000
Mc<-Hist_Latitud$mids
hi<-(ni/sum(ni))
sum(hi)
## [1] 1
Ni_asc<-cumsum(ni)
Hi_asc<-cumsum(hi)
Ni_desc<-rev(cumsum(rev(ni)))
Hi_desc<-rev(cumsum(rev(hi)))
TDF_Latitud<-data.frame(Li=round(Li,2),
Ls=round(Ls,2),
Mc=round(Mc,2),
ni=ni,
hi=round(hi*100,2),
Ni_asc=Ni_asc,
Ni_desc=Ni_desc,
Hi_asc=round(Hi_asc*100,2),
Hi_desc=round(Hi_desc*100,2))
colnames(TDF_Latitud)<-c("Lim inf","Lim sup","MC","ni","hi(%)","Ni asc","Ni desc","Hi asc(%)","Hi desc(%)")
#Crear fila de totales
totales<-c(Li="TOTAL",
Ls="-",
Mc="-",
ni = sum(as.numeric(TDF_Latitud$ni)),
hi = sum(as.numeric(TDF_Latitud$`hi(%)`)),
Ni_asc="-",
Ni_desc="-",
Hi_asc="-",
Hi_desc="-")
TDF_Latitud<-rbind(TDF_Latitud,totales)
# TABLA
library(dplyr)
library(gt)
TDF_Latitud %>%
gt() %>%
tab_header(
title = md("*Tabla Nro. 8*"),
subtitle = md("**Tabla simplificada de Frecuencias de la Latitud (° ′ ″)**")
) %>%
tab_source_note(
source_note = md("Autor: Grupo 3")
) %>%
tab_options(
table.border.top.color = "black",
table.border.bottom.color = "black",
table.border.top.style = "solid",
table.border.bottom.style = "solid",
column_labels.border.top.color = "black",
column_labels.border.bottom.color = "black",
column_labels.border.bottom.width = px(2),
row.striping.include_table_body = TRUE,
heading.border.bottom.color = "black",
heading.border.bottom.width = px(2),
table_body.hlines.color = "gray",
table_body.border.bottom.color = "black"
)
| Tabla Nro. 8 |
| Tabla simplificada de Frecuencias de la Latitud (° ′ ″) |
| Lim inf |
Lim sup |
MC |
ni |
hi(%) |
Ni asc |
Ni desc |
Hi asc(%) |
Hi desc(%) |
| -30 |
-20 |
-25 |
278 |
9.27 |
278 |
3000 |
9.27 |
100 |
| -20 |
-10 |
-15 |
293 |
9.77 |
571 |
2722 |
19.03 |
90.73 |
| -10 |
0 |
-5 |
271 |
9.03 |
842 |
2429 |
28.07 |
80.97 |
| 0 |
10 |
5 |
309 |
10.3 |
1151 |
2158 |
38.37 |
71.93 |
| 10 |
20 |
15 |
0 |
0 |
1151 |
1849 |
38.37 |
61.63 |
| 20 |
30 |
25 |
625 |
20.83 |
1776 |
1849 |
59.2 |
61.63 |
| 30 |
40 |
35 |
941 |
31.37 |
2717 |
1224 |
90.57 |
40.8 |
| 40 |
50 |
45 |
0 |
0 |
2717 |
283 |
90.57 |
9.43 |
| 50 |
60 |
55 |
283 |
9.43 |
3000 |
283 |
100 |
9.43 |
| TOTAL |
- |
- |
3000 |
100 |
- |
- |
- |
- |
| Autor: Grupo 3 |
#Histograma
hist(Latitud, breaks = 10,
main = "Gráfica N°49:Distribución para la Latitud (° ′ ″)",
xlab = "Latitud (° ′ ″)",
ylab = "Cantidad",
ylim = c(0, max(ni)),
col = "purple",
cex.main = 0.9,
cex.lab = 1,
cex.axis = 0.9,
xaxt = "n")
axis(1, at = hist(Latitud, plot = FALSE)$breaks,
labels = hist(Latitud, plot = FALSE)$breaks, las = 1,
cex.axis = 0.9)

#Gráficas
#Gráfica Global ni
hist(Latitud, breaks = 10,
main = "Gráfica N°50: Distribución para la
Latitud",
xlab = "Latitud (° ′ ″)",
ylab = "Cantidad",
ylim = c(0, length(Latitud)),
col = "green",
cex.main = 0.9,
cex.lab = 1,
cex.axis = 0.9,
xaxt = "n")
axis(1, at = Hist_Latitud$breaks,
labels = Hist_Latitud$breaks, las = 1,
cex.axis = 0.9)

# Gráfica Global hi
datos_grafico <- TDF_Latitud[ !(TDF_Latitud$MC %in% c("-", "TOTAL")), ]
barplot(
as.numeric(datos_grafico$`hi(%)`),
space = 0,
col = "skyblue",
main = "Gráfica N°51: Distribución porcentual de frecuencias de la Latitud",
xlab = "Latitud (° ′ ″)",
ylab = "Porcentaje",
names.arg = datos_grafico$MC,
ylim = c(0, 100)
)

# Gráfica Local hi
datos_grafico <- TDF_Latitud[ !(TDF_Latitud$MC %in% c("-", "TOTAL")), ]
barplot(
as.numeric(datos_grafico$`hi(%)`),
space = 0,
col = "red",
main = "Gráfica N°52: Distribución porcentual de frecuencias de la Latitud",
xlab = "Latitud (° ′ ″)",
ylab = "Porcentaje",
names.arg = datos_grafico$MC,
ylim = c(0, 40)
)

# Diagrama de Caja
boxplot(Latitud,
horizontal = TRUE,
main = "Gráfica N°53:Distribución de frecuencia para la Latitud",
xlab = "Latitud (° ′ ″)",
col = "pink",
outline = TRUE,
pch = 1)

# Diagrama de Ojiva Ascendente y Descendente
plot(Li ,Ni_desc,
main = "Gráfica N°54: Distribución de frecuencias Ascendente y descendente
para la Latitud (° ′ ″)",
xlab = "Latitud (° ′ ″)",
ylab = "Cantidad",
xlim = c(0,900),
col = "red",
cex.axis=0.8,
type = "o",
lwd = 3,
las=1,
xaxt="n")
lines(Ls,Ni_asc,
col = "green",
type = "o",
lwd = 3)
axis(1, at = seq(0, 900, by = 50))

# Diagrama de Ojiva Ascendente y Descendente Porcentual
plot(Li, Hi_desc * 100,
main = "Gráfica N°55: Distribución de frecuencia Ascendente y Descendente
porcentual para la Latitud (° ′ ″) ",
xlab = "Latitud (° ′ ″)",
ylab = "Porcentaje",
xlim = c(0,900),
col = "red",
type = "o",
lwd = 2,
xaxt="n")
lines(Ls, Hi_asc * 100,
col = "blue",
type = "o",
lwd = 3)
axis(1, at = seq(0,900,by=50))
