FECHA: 5/12/2025
#Cargar los datos
datos <- read.csv("soil_pollution_diseases.csv", sep = ";", stringsAsFactors = FALSE)
#extraccion variable cuantitativa continua
Concentracion_contaminante <- datos$Pollutant_Concentration_mg_kg
#Tabla de distribución de frecuencia
#Manualmente
#Mínimos y Máximos
min <- min(Concentracion_contaminante)
max <- max(Concentracion_contaminante)
#Rango
R <- max-min
#K
K <- floor(1+3.33*log10(length(Concentracion_contaminante)))
#Amplitud
A <- R/K
#Limites inferiores y superiores
lim_inf <- round(seq(from=min,to=max-A,by=A),2)
lim_sup <- round(seq(from=min+A,to=max,by=A),2)
#MC
MC <- (lim_inf+lim_sup)/2
#ni
ni <- c()
for (i in 1:K) {
if (i < K) {
ni[i] <- length(subset(Concentracion_contaminante, Concentracion_contaminante >= lim_inf[i] & Concentracion_contaminante < lim_sup[i]))
} else {
ni[i] <- length(subset(Concentracion_contaminante, Concentracion_contaminante >= lim_inf[i] & Concentracion_contaminante <= lim_sup[i]))
}
}
sum(ni)
## [1] 3000
hi <- ni/sum(ni)*100
sum(hi)
## [1] 100
Ni_asc <- cumsum(ni)
Hi_asc <- cumsum(hi)
Ni_desc <- rev(cumsum(rev(ni)))
Hi_desc <- rev(cumsum(rev(hi)))
TDF_Contaminante <- data.frame(lim_inf,
lim_sup,
MC,ni,
round(hi,2),
Ni_asc,
Ni_desc,
round(Hi_asc,2),
round(Hi_desc,2))
colnames(TDF_Contaminante) <- c("Lim inf","Lim sup","MC","ni","hi(%)",
"Ni asc","Ni desc","Hi asc(%)","Hi desc(%)")
# crear la fila de totales
totales <- c( lim_inf= "TOTAL",
lim_sup= "-",
MC= "-",
ni= sum(ni),
hi= sum(hi),
Ni_asc= "-",
Ni_des= "-",
Hi_asc= "-",
Hi_des= "-")
TDF_Contaminante_total <- rbind(TDF_Contaminante, totales)
#Formato Tabla
library(dplyr)
library(gt)
TDF_Contaminante_total %>%
gt() %>%
tab_header(
title = md("*Tabla Nro. 1*"),
subtitle = md("**Distribución de frecuencias de concentración del contaminante (mg/kg)
en los países analizados sobre la contaminación del suelo**")
) %>%
tab_style(
style = cell_text(align = "center"),
locations = cells_body()
) %>%
tab_style(
style = cell_text(align = "center"),
locations = cells_column_labels()
) %>%
tab_source_note(
source_note = md("Autor: Grupo 3")
) %>%
tab_options(
table.border.top.color = "black",
table.border.bottom.color = "black",
table.border.top.style = "solid",
table.border.bottom.style = "solid",
column_labels.border.top.color = "black",
column_labels.border.bottom.color = "black",
column_labels.border.bottom.width = px(2),
row.striping.include_table_body = TRUE,
heading.border.bottom.color = "black",
heading.border.bottom.width = px(2),
table_body.hlines.color = "gray",
table_body.border.bottom.color = "black"
)
| Tabla Nro. 1 |
| Distribución de frecuencias de concentración del contaminante (mg/kg)
en los países analizados sobre la contaminación del suelo |
| Lim inf |
Lim sup |
MC |
ni |
hi(%) |
Ni asc |
Ni desc |
Hi asc(%) |
Hi desc(%) |
| 61 |
1722.5 |
891.75 |
424 |
14.13 |
424 |
3000 |
14.13 |
100 |
| 1722.5 |
3384 |
2553.25 |
247 |
8.23 |
671 |
2576 |
22.37 |
85.87 |
| 3384 |
5045.5 |
4214.75 |
243 |
8.1 |
914 |
2329 |
30.47 |
77.63 |
| 5045.5 |
6707 |
5876.25 |
226 |
7.53 |
1140 |
2086 |
38 |
69.53 |
| 6707 |
8368.5 |
7537.75 |
238 |
7.93 |
1378 |
1860 |
45.93 |
62 |
| 8368.5 |
10030 |
9199.25 |
208 |
6.93 |
1586 |
1622 |
52.87 |
54.07 |
| 10030 |
11691.5 |
10860.75 |
253 |
8.43 |
1839 |
1414 |
61.3 |
47.13 |
| 11691.5 |
13353 |
12522.25 |
233 |
7.77 |
2072 |
1161 |
69.07 |
38.7 |
| 13353 |
15014.5 |
14183.75 |
231 |
7.7 |
2303 |
928 |
76.77 |
30.93 |
| 15014.5 |
16676 |
15845.25 |
247 |
8.23 |
2550 |
697 |
85 |
23.23 |
| 16676 |
18337.5 |
17506.75 |
207 |
6.9 |
2757 |
450 |
91.9 |
15 |
| 18337.5 |
19999 |
19168.25 |
243 |
8.1 |
3000 |
243 |
100 |
8.1 |
| TOTAL |
- |
- |
3000 |
100 |
- |
- |
- |
- |
| Autor: Grupo 3 |
#HISTOGRAMA
hist(Concentracion_contaminante, breaks = 10,
main = "Gráfica N°1:Distribución de la Concentración del Contaminante (mg/kg)
en la contaminación del suelo",
xlab = "Concentración del Contaminante (mg/kg)",
ylab = "Cantidad",
ylim = c(0, max(ni)),
col = "purple",
cex.main = 0.9,
cex.lab = 1,
cex.axis = 0.9,
xaxt = "n")
axis(1, at = hist(Concentracion_contaminante, plot = FALSE)$breaks,
labels = hist(Concentracion_contaminante, plot = FALSE)$breaks, las = 1,
cex.axis = 0.9)

#Simplificación con el histograma
Hist_Contaminante <- hist(Concentracion_contaminante,breaks = 10,plot=F)
k <- length(Hist_Contaminante$breaks)
Li <- Hist_Contaminante$breaks[1:(length(Hist_Contaminante$breaks)-1)]
Ls <- Hist_Contaminante$breaks[2:length(Hist_Contaminante$breaks)]
ni <- Hist_Contaminante$counts
sum(ni)
## [1] 3000
MC <- Hist_Contaminante$mids
hi <- (ni/sum(ni))
sum(hi)
## [1] 1
Ni_asc <- cumsum(ni)
Hi_asc <- cumsum(hi)
Ni_desc <- rev(cumsum(rev(ni)))
Hi_desc <- rev(cumsum(rev(hi)))
TDF_Contaminante <- data.frame(Li = round(Li, 2),
Ls = round(Ls, 2),
MC = round(MC, 2),
ni = ni,
hi = round(hi * 100, 2),
Ni_asc = Ni_asc,
Ni_desc = Ni_desc,
Hi_asc = round(Hi_asc * 100, 2),
Hi_desc = round(Hi_desc * 100, 2))
colnames(TDF_Contaminante) <- c("Lim inf","Lim sup","MC","ni","hi(%)",
"Ni asc","Ni desc","Hi asc(%)","Hi desc(%)")
# crear la fila de totales
totales <- c( lim_inf= "TOTAL",
lim_sup= "-",
MC= "-",
ni= sum(ni),
hi= sum(hi*100),
Ni_asc= "-",
Ni_des= "-",
Hi_asc= "-",
Hi_des= "-")
TDF_Contaminante_total <- rbind(TDF_Contaminante, totales)
#Formato Tabla
library(dplyr)
library(gt)
TDF_Contaminante_total %>%
gt() %>%
tab_header(
title = md("*Tabla N°2*"),
subtitle = md("**Distribución simplificada de frecuencias de concentración del contaminante (mg/kg)
en los países analizados sobre la contaminación del suelo**")
) %>%
tab_style(
style = cell_text(align = "center"),
locations = cells_body()
) %>%
tab_style(
style = cell_text(align = "center"),
locations = cells_column_labels()
) %>%
tab_style(
style = cell_text(weight = "bold"),
locations = cells_column_labels()
) %>%
tab_source_note(
source_note = md("Autor: Grupo 3")
) %>%
tab_options(
table.border.top.color = "black",
table.border.bottom.color = "black",
table.border.top.style = "solid",
table.border.bottom.style = "solid",
column_labels.border.top.color = "black",
column_labels.border.bottom.color = "black",
column_labels.border.bottom.width = px(2),
row.striping.include_table_body = TRUE,
heading.border.bottom.color = "black",
heading.border.bottom.width = px(2),
table_body.hlines.color = "gray",
table_body.border.bottom.color = "black",
table.font.size = px(13)
)
| Tabla N°2 |
| Distribución simplificada de frecuencias de concentración del contaminante (mg/kg)
en los países analizados sobre la contaminación del suelo |
| Lim inf |
Lim sup |
MC |
ni |
hi(%) |
Ni asc |
Ni desc |
Hi asc(%) |
Hi desc(%) |
| 0 |
2000 |
1000 |
501 |
16.7 |
501 |
3000 |
16.7 |
100 |
| 2000 |
4000 |
3000 |
263 |
8.77 |
764 |
2499 |
25.47 |
83.3 |
| 4000 |
6000 |
5000 |
288 |
9.6 |
1052 |
2236 |
35.07 |
74.53 |
| 6000 |
8000 |
7000 |
274 |
9.13 |
1326 |
1948 |
44.2 |
64.93 |
| 8000 |
10000 |
9000 |
260 |
8.67 |
1586 |
1674 |
52.87 |
55.8 |
| 10000 |
12000 |
11000 |
299 |
9.97 |
1885 |
1414 |
62.83 |
47.13 |
| 12000 |
14000 |
13000 |
269 |
8.97 |
2154 |
1115 |
71.8 |
37.17 |
| 14000 |
16000 |
15000 |
313 |
10.43 |
2467 |
846 |
82.23 |
28.2 |
| 16000 |
18000 |
17000 |
252 |
8.4 |
2719 |
533 |
90.63 |
17.77 |
| 18000 |
20000 |
19000 |
281 |
9.37 |
3000 |
281 |
100 |
9.37 |
| TOTAL |
- |
- |
3000 |
100 |
- |
- |
- |
- |
| Autor: Grupo 3 |
# Histograma Global
hist(Concentracion_contaminante,
breaks = Hist_Contaminante$breaks, # Usa los mismos cortes del histograma simplificado
main = "Gráfica N°X: Distribución Global de la Concentración del Contaminante (mg/kg)",
ylab = "Cantidad",
xlab = "Concentración del Contaminante (mg/kg)",
col = "blue",
ylim = c(0, max(Hist_Contaminante$counts)),
cex.main = 0.9,
cex.lab = 1,
cex.axis = 0.9,
xaxt = "n")
axis(1,
at = Hist_Contaminante$breaks,
labels = round(Hist_Contaminante$breaks, 2),
las = 1,
cex.axis = 0.9)

#hi local
barplot(TDF_Contaminante$`hi(%)`,
space = 0,
col = "lightblue",
main = "Gráfica N°3: Porcentaje LOCAL de la Concentración del Contaminante (mg/kg)",
xlab = "Marca de Clase (MC)",
ylab = "Porcentaje (%)",
ylim = c(0, max(TDF_Contaminante$`hi(%)`) + 5),
names.arg = TDF_Contaminante$MC,
cex.main = 0.9,
cex.lab = 1,
cex.axis = 0.9)

#hi Global
barplot(TDF_Contaminante$`hi(%)`,
space = 0,
col = "skyblue",
main = "Gráfica N°4: Porcentaje GLOBAL de la Concentración del Contaminante (mg/kg)",
xlab = "Marca de Clase (MC)",
ylab = "Porcentaje (%)",
ylim = c(0, 100),
names.arg = TDF_Contaminante$MC,
cex.main = 0.9,
cex.lab = 1,
cex.axis = 0.9)

# Diagrama de Caja
boxplot(Concentracion_contaminante,
horizontal = TRUE,
main = "Gráfica N°5: Distribución de la Concentración del Contaminante (mg/kg)
presente en el estudio contaminación del suelo",
xlab = "Concentración del Contaminante (mg/kg)",
col = "lemonchiffon",
outline = TRUE,
pch = 1)

# Diagrama de Ojiva Ascendente y Descendente
plot(Li, Ni_desc,
main = "Gráfica N°6: Ojiva ascendente y descendente de la distribución
del nivel de concentración del contaminante (mg/kg)",
xlab = "Concentración del Contaminante (mg/kg)",
ylab = "Cantidad",
xlim = c(min(Li), max(Ls)),
col = "orange",
cex.axis = 0.8,
type = "o",
lwd = 3,
las = 1,
xaxt = "n")
lines(Ls, Ni_asc,
col = "green",
type = "o",
lwd = 3)
axis(1, at = unique(round(c(Li, Ls), 2)))

# Diagrama de Ojiva Ascendente y Descendente Porcentual
plot(Li, Hi_desc * 100,
main = "Gráfica N°7: Ojiva ascendente y descendente porcentual de la
concentración del contaminante (mg/kg)",
xlab = "Concentración del Contaminante (mg/kg)",
ylab = "Porcentaje (%)",
xlim = c(min(Li), max(Ls)),
col = "red",
type = "o",
lwd = 2,
xaxt = "n")
lines(Ls, Hi_asc * 100,
col = "blue",
type = "o",
lwd = 3)
axis(1, at = unique(round(c(Li, Ls), 2)))
