FECHA: 06/12/2025
library(dplyr)
# Cargar datos
datos <- read.csv("soil_pollution_diseases.csv")
centroides <- read.csv("centroides_10_paises.csv")
# Cambiar United States por USA
centroides$Pais[centroides$Pais == "United States"] <- "USA"
# Renombrar columna
colnames(centroides)[colnames(centroides) == "Pais"] <- "Country"
# Unir datasets
datos_final <- datos %>%
left_join(centroides, by = "Country")
Longitud <- datos_final$Longitud
Longitud <- as.numeric(Longitud)
minL <- min(Longitud)
maxL <- max(Longitud)
min <-min(Longitud)
max <-max(Longitud)
R <-max-min
K <- floor(1 + 3.33 * log10(length(Longitud)))
A <-R/K
# límites SIN redondeo
breaks <- seq(minL, maxL, length.out = K + 1)
clases <- cut(
Longitud,
breaks = breaks,
include.lowest = TRUE,
right = TRUE
)
ni <- as.numeric(table(clases))
#Tabla de distribución de frecuencia
#Manualmente
Li <-round(seq(from=min,to=max-A,by=A),2)
Ls <-round(seq(from=min+A,to=max,by=A),2)
Mc <-(Li+Ls)/2
ni<-c()
for (i in 1:K) {
if (i < K) {
ni[i] <- length(subset(Longitud, Longitud >= Li[i] & Longitud < Ls[i]))
} else {
ni[i] <- length(subset(Longitud, Longitud >= Li[i] & Longitud <= Ls[i]))
}
}
hi <-ni/sum(ni)*100
Ni_asc<-cumsum(ni)
Hi_asc<-cumsum(hi)
Ni_desc<-rev(cumsum(rev(ni)))
Hi_desc<-rev(cumsum(rev(hi)))
TDF_Longitud <- data.frame(
Li, Ls, Mc, ni, round(hi, 2), Ni_asc, Ni_desc, round(Hi_asc, 2), round(Hi_desc, 2)
)
colnames(TDF_Longitud) <- c("Li","Ls","Mc","ni","hi","Ni_asc(%)","Ni_desc(%)","Hi_asc","Hi_desc")
#Crear fila de totales
totales<-c(
Li="-",
Ls="-",
Mc="-",
ni=sum(ni),
hi=sum(hi),
Ni_asc="-",
Ni_desc="-",
Hi_asc="-",
Hi_desc="-")
TDF_Longitud<-rbind(TDF_Longitud,totales)
# TABLA
library(dplyr)
library(gt)
TDF_Longitud %>%
gt() %>%
tab_header(
title = md("*Tabla Nro. 9*"),
subtitle = md("**Tabla de Frecuencias de la Longitud (° ′ ″)**")
) %>%
tab_source_note(
source_note = md("Autor: Grupo 3")
) %>%
tab_options(
table.border.top.color = "black",
table.border.bottom.color = "black",
table.border.top.style = "solid",
table.border.bottom.style = "solid",
column_labels.border.top.color = "black",
column_labels.border.bottom.color = "black",
column_labels.border.bottom.width = px(2),
row.striping.include_table_body = TRUE,
heading.border.bottom.color = "black",
heading.border.bottom.width = px(2),
table_body.hlines.color = "gray",
table_body.border.bottom.color = "black"
)
| Tabla Nro. 9 |
| Tabla de Frecuencias de la Longitud (° ′ ″) |
| Li |
Ls |
Mc |
ni |
hi |
Ni_asc(%) |
Ni_desc(%) |
Hi_asc |
Hi_desc |
| -102.55 |
-82.86 |
-92.705 |
317 |
11.8 |
317 |
2686 |
11.8 |
100 |
| -82.86 |
-63.16 |
-73.01 |
0 |
0 |
317 |
2369 |
11.8 |
88.2 |
| -63.16 |
-43.47 |
-53.315 |
293 |
10.91 |
610 |
2369 |
22.71 |
88.2 |
| -43.47 |
-23.78 |
-33.625 |
0 |
0 |
610 |
2076 |
22.71 |
77.29 |
| -23.78 |
-4.08 |
-13.93 |
0 |
0 |
610 |
2076 |
22.71 |
77.29 |
| -4.08 |
15.61 |
5.765 |
592 |
22.04 |
1202 |
2076 |
44.75 |
77.29 |
| 15.61 |
35.31 |
25.46 |
0 |
0 |
1202 |
1484 |
44.75 |
55.25 |
| 35.31 |
55 |
45.155 |
271 |
10.09 |
1473 |
1484 |
54.84 |
55.25 |
| 55 |
74.69 |
64.845 |
305 |
11.36 |
1778 |
1213 |
66.2 |
45.16 |
| 74.69 |
94.39 |
84.54 |
311 |
11.58 |
2089 |
908 |
77.77 |
33.8 |
| 94.39 |
114.08 |
104.235 |
319 |
11.88 |
2408 |
597 |
89.65 |
22.23 |
| 114.08 |
133.78 |
123.93 |
278 |
10.35 |
2686 |
278 |
100 |
10.35 |
| - |
- |
- |
2686 |
100 |
- |
- |
- |
- |
| Autor: Grupo 3 |
hist(Longitud)

#Simplificación con el histograma
Hist_Longitud<-hist(Longitud,breaks = 8,plot = F)
k<-length(Hist_Longitud$breaks)
Li<-Hist_Longitud$breaks[1:(length(Hist_Longitud$breaks)-1)]
Ls<-Hist_Longitud$breaks[2:length(Hist_Longitud$breaks)]
ni<-Hist_Longitud$counts
sum(ni)
## [1] 3000
Mc<-Hist_Longitud$mids
hi<-(ni/sum(ni))
sum(hi)
## [1] 1
Ni_asc<-cumsum(ni)
Hi_asc<-cumsum(hi)
Ni_desc<-rev(cumsum(rev(ni)))
Hi_desc<-rev(cumsum(rev(hi)))
TDF_Longitud<-data.frame(Li=round(Li,2),
Ls=round(Ls,2),
Mc=round(Mc,2),
ni=ni,
hi=round(hi*100,2),
Ni_asc=Ni_asc,
Ni_desc=Ni_desc,
Hi_asc=round(Hi_asc*100,2),
Hi_desc=round(Hi_desc*100,2))
colnames(TDF_Longitud)<-c("Lim inf","Lim sup","MC","ni","hi(%)","Ni asc","Ni desc","Hi asc(%)","Hi desc(%)")
#Crear fila de totales
totales<-c(Li="TOTAL",
Ls="-",
Mc="-",
ni = sum(as.numeric(TDF_Longitud$ni)),
hi = sum(as.numeric(TDF_Longitud$`hi(%)`)),
Ni_asc="-",
Ni_desc="-",
Hi_asc="-",
Hi_desc="-")
TDF_Longitud<-rbind(TDF_Longitud,totales)
# TABLA
library(dplyr)
library(gt)
TDF_Longitud %>%
gt() %>%
tab_header(
title = md("*Tabla Nro. 10*"),
subtitle = md("**Tabla simplificada de Frecuencias de la Longitud (° ′ ″)**")
) %>%
tab_source_note(
source_note = md("Autor: Grupo 3")
) %>%
tab_options(
table.border.top.color = "black",
table.border.bottom.color = "black",
table.border.top.style = "solid",
table.border.bottom.style = "solid",
column_labels.border.top.color = "black",
column_labels.border.bottom.color = "black",
column_labels.border.bottom.width = px(2),
row.striping.include_table_body = TRUE,
heading.border.bottom.color = "black",
heading.border.bottom.width = px(2),
table_body.hlines.color = "gray",
table_body.border.bottom.color = "black"
)
| Tabla Nro. 10 |
| Tabla simplificada de Frecuencias de la Longitud (° ′ ″) |
| Lim inf |
Lim sup |
MC |
ni |
hi(%) |
Ni asc |
Ni desc |
Hi asc(%) |
Hi desc(%) |
| -150 |
-100 |
-125 |
314 |
10.47 |
314 |
3000 |
10.47 |
100 |
| -100 |
-50 |
-75 |
610 |
20.33 |
924 |
2686 |
30.8 |
89.53 |
| -50 |
0 |
-25 |
0 |
0 |
924 |
2076 |
30.8 |
69.2 |
| 0 |
50 |
25 |
863 |
28.77 |
1787 |
2076 |
59.57 |
69.2 |
| 50 |
100 |
75 |
616 |
20.53 |
2403 |
1213 |
80.1 |
40.43 |
| 100 |
150 |
125 |
597 |
19.9 |
3000 |
597 |
100 |
19.9 |
| TOTAL |
- |
- |
3000 |
100 |
- |
- |
- |
- |
| Autor: Grupo 3 |
#Histograma
hist(Longitud, breaks = 10,
main = "Gráfica N°56:Distribución de frecuencias para la Longitud (° ′ ″)",
xlab = "Longitud (° ′ ″)",
ylab = "Cantidad",
ylim = c(0, max(ni)),
col = "purple",
cex.main = 0.9,
cex.lab = 1,
cex.axis = 0.9,
xaxt = "n")
axis(1, at = hist(Longitud, plot = FALSE)$breaks,
labels = hist(Longitud, plot = FALSE)$breaks, las = 1,
cex.axis = 0.9)

#Gráficas
#Gráfica Global ni
hist(Longitud, breaks = 10,
main = "Gráfica N°57: Distribución de frecuencias para la
longitud (° ′ ″) ",
xlab = "Longitud (° ′ ″)",
ylab = "Cantidad",
ylim = c(0, length(Longitud)),
col = "green",
cex.main = 0.9,
cex.lab = 1,
cex.axis = 0.9,
xaxt = "n")
axis(1, at = Hist_Longitud$breaks,
labels = Hist_Longitud$breaks, las = 1,
cex.axis = 0.9)

# Gráfica Global hi
datos_grafico <- TDF_Longitud[ !(TDF_Longitud$MC %in% c("-", "TOTAL")), ]
barplot(
as.numeric(datos_grafico$`hi(%)`),
space = 0,
col = "skyblue",
main = "Gráfica N°58: Distribución porcentual de frecuencias de la
Longitud (° ′ ″)",
xlab = "Longitud (° ′ ″)",
ylab = "Porcentaje",
names.arg = datos_grafico$MC,
ylim = c(0, 100)
)

# Gráfica Local hi
datos_grafico <- TDF_Longitud[ !(TDF_Longitud$MC %in% c("-", "TOTAL")), ]
barplot(
as.numeric(datos_grafico$`hi(%)`),
space = 0,
col = "red",
main = "Gráfica N°59: Distribución porcentual de frecuencias de la Longitud (° ′ ″)",
xlab = "Longitud (° ′ ″)",
ylab = "Porcentaje",
names.arg = datos_grafico$MC,
ylim = c(0, 40)
)

# Diagrama de Caja
boxplot(Longitud,
horizontal = TRUE,
main = "Gráfica N°60:Distribución de frecuencia para la
Longitud (° ′ ″)",
xlab = "Longitud (° ′ ″)",
col = "pink",
outline = TRUE,
pch = 1)

# Diagrama de Ojiva Ascendente y Descendente
plot(Li ,Ni_desc,
main = "Gráfica N°61: Distribución de frecuencias Ascendente y descendente
para la Longitud (° ′ ″)",
xlab = "Longitud (° ′ ″)",
ylab = "Cantidad",
xlim = c(0,900),
col = "red",
cex.axis=0.8,
type = "o",
lwd = 3,
las=1,
xaxt="n")
lines(Ls,Ni_asc,
col = "green",
type = "o",
lwd = 3)
axis(1, at = seq(0, 900, by = 50))

# Diagrama de Ojiva Ascendente y Descendente Porcentual
plot(Li, Hi_desc * 100,
main = "Gráfica N°62: Distribución de frecuencia Ascendente y Descendente
porcentual para la Longitud (° ′ ″)",
xlab = " Longitud (° ′ ″)",
ylab = "Porcentaje",
xlim = c(0,900),
col = "red",
type = "o",
lwd = 2,
xaxt="n")
lines(Ls, Hi_asc * 100,
col = "blue",
type = "o",
lwd = 3)
axis(1, at = seq(0,900,by=50))
