FECHA: 7/12/2025
#Estadistica Descriptiva
#6/12/2025
datos<-read.csv("soil_pollution_diseases.csv",header = TRUE,dec = ".",
sep = ",")
#Extracción Variable Cuantitativa Continua
Concentracion_contaminante <- datos$Pollutant_Concentration_mg_kg
#Manualmente
k<-1+(3.3*log10(3000))
k<-floor(k)
min<-min(Concentracion_contaminante)
max<-max(Concentracion_contaminante)
R<- max-min
A<-R/k
Li<-round(seq(from=min, to=max-A, by=A),4)
Ls<-round(seq(from=min+A, to=max, by=A), 4)
MC<-round((Li+Ls)/2,2)
ni <- numeric(length(Li))
for (i in 1:length(Li)) {
ni[i] <- sum(Concentracion_contaminante >= Li[i] & Concentracion_contaminante < Ls[i])
}
ni[length(Li)] <- sum(Concentracion_contaminante >= Li[length(Li)] & Concentracion_contaminante <= max)
sum(ni)
## [1] 3000
hi <- ni/sum(ni)*100
sum(hi)
## [1] 100
Niasc<-cumsum(ni)
Nidsc<-rev(cumsum(rev(ni)))
Hiasc<-round(cumsum(hi))
Hidsc<-round(rev(cumsum(rev(hi))))
TDFConcentracion<-data.frame(Li, Ls, MC, ni, hi, Niasc, Nidsc, Hiasc, Hidsc)
total_ni<-sum(ni)
total_hi<-100
TDFConcentracionCompleto<-rbind(
TDFConcentracion,
data.frame(Li=" Total", Ls=" ", MC=" ",
ni=total_ni, hi=total_hi, Niasc=" ", Nidsc=" ",
Hiasc=" ", Hidsc=" ")
)
#Formato tabla
library(gt)
library(dplyr)
# Redondeo opcional
TDFConcentracion$hi <- round(TDFConcentracion$hi, 2)
tabla_Concentracion <- TDFConcentracionCompleto %>%
gt() %>%
fmt_number(
columns = hi,
decimals = 2
) %>%
tab_header(
title = md("*Tabla Nº1*"),
subtitle = md("**Tabla de distribucion de Frecuencias
de la Concentración del Contaminante (mg/kg)**")
) %>%
tab_source_note(
source_note = md("Autor: Grupo3")
) %>%
tab_options(
table.border.top.color = "black",
table.border.bottom.color = "black",
table.border.top.style = "solid",
table.border.bottom.style = "solid",
column_labels.border.top.color = "black",
column_labels.border.bottom.color = "black",
column_labels.border.bottom.width = px(2),
row.striping.include_table_body = TRUE,
heading.border.bottom.color = "black",
heading.border.bottom.width = px(2),
table_body.hlines.color = "gray",
table_body.border.bottom.color = "black"
) %>%
tab_style(
style = cell_text(weight = "bold"),
locations = cells_body(
rows = Li == "Total"
)
)
tabla_Concentracion
| Tabla Nº1 |
| Tabla de distribucion de Frecuencias
de la Concentración del Contaminante (mg/kg) |
| Li |
Ls |
MC |
ni |
hi |
Niasc |
Nidsc |
Hiasc |
Hidsc |
| 5.03 |
21.2767 |
13.15 |
254 |
8.47 |
254 |
3000 |
8 |
100 |
| 21.2767 |
37.5233 |
29.4 |
234 |
7.80 |
488 |
2746 |
16 |
92 |
| 37.5233 |
53.77 |
45.65 |
250 |
8.33 |
738 |
2512 |
25 |
84 |
| 53.77 |
70.0167 |
61.89 |
251 |
8.37 |
989 |
2262 |
33 |
75 |
| 70.0167 |
86.2633 |
78.14 |
248 |
8.27 |
1237 |
2011 |
41 |
67 |
| 86.2633 |
102.51 |
94.39 |
235 |
7.83 |
1472 |
1763 |
49 |
59 |
| 102.51 |
118.7567 |
110.63 |
278 |
9.27 |
1750 |
1528 |
58 |
51 |
| 118.7567 |
135.0033 |
126.88 |
246 |
8.20 |
1996 |
1250 |
67 |
42 |
| 135.0033 |
151.25 |
143.13 |
249 |
8.30 |
2245 |
1004 |
75 |
33 |
| 151.25 |
167.4967 |
159.37 |
276 |
9.20 |
2521 |
755 |
84 |
25 |
| 167.4967 |
183.7433 |
175.62 |
228 |
7.60 |
2749 |
479 |
92 |
16 |
| 183.7433 |
199.99 |
191.87 |
251 |
8.37 |
3000 |
251 |
100 |
8 |
| Total |
|
|
3000 |
100.00 |
|
|
|
|
| Autor: Grupo3 |
#Histograma
histoP<-hist(
Concentracion_contaminante,
main= "Grafica Nº1:Distribución de Concentración
del Contaminante (mg/kg)",
xlab= "Concentración (mg/kg)",
ylab= "Cantidad", col="blue",
)

#Tabla simplificada en base al histograma
Limites <- histoP$breaks
LimInf <- Limites[1:(length(Limites)-1)]
LimSup <- Limites[2:length(Limites)]
Mc <- histoP$mids
ni <- histoP$counts
sum(ni)
## [1] 3000
hi <- round(ni/sum(ni)*100, 2)
sum(hi)
## [1] 100
Ni_asc <- cumsum(ni)
Ni_dsc <- rev(cumsum(rev(ni)))
Hi_asc <- round(cumsum(hi), 2)
Hi_dsc <- round(rev(cumsum(rev(hi))), 2)
TDFC<-data.frame(LimInf, LimSup, Mc, ni, hi, Ni_asc, Ni_dsc, Hi_asc, Hi_dsc)
totalni <- sum(ni)
totalhi <- 100
TDFCCompleto<-rbind(
TDFC,
data.frame(LimInf="Total",
LimSup=" ", Mc=" ", ni=totalni,
hi=totalhi, Ni_asc=" ", Ni_dsc=" ",
Hi_asc=" ", Hi_dsc=" ")
)
tablaConc<-TDFCCompleto %>%
gt() %>%
tab_header(
title = md("*Tabla Nº2*"),
subtitle = md("**Tabla simplificada de distribucion de Frecuencias
de la Concentración del Contaminante (mg/kg)**")
) %>%
tab_source_note(
source_note = md("Autor:Grupo 3")
) %>%
tab_options(
table.border.top.color = "black",
table.border.bottom.color = "black",
table.border.top.style = "solid",
table.border.bottom.style = "solid",
column_labels.border.top.color = "black",
column_labels.border.bottom.color = "black",
column_labels.border.bottom.width = px(2),
row.striping.include_table_body = TRUE,
heading.border.bottom.color = "black",
heading.border.bottom.width = px(2),
table_body.hlines.color = "gray",
table_body.border.bottom.color = "black"
) %>%
tab_style(
style = cell_text(weight = "bold"),
locations = cells_body(
rows = LimInf == "Total"
)
)
tablaConc
| Tabla Nº2 |
| Tabla simplificada de distribucion de Frecuencias
de la Concentración del Contaminante (mg/kg) |
| LimInf |
LimSup |
Mc |
ni |
hi |
Ni_asc |
Ni_dsc |
Hi_asc |
Hi_dsc |
| 0 |
20 |
10 |
233 |
7.77 |
233 |
3000 |
7.77 |
100 |
| 20 |
40 |
30 |
294 |
9.80 |
527 |
2767 |
17.57 |
92.23 |
| 40 |
60 |
50 |
311 |
10.37 |
838 |
2473 |
27.94 |
82.43 |
| 60 |
80 |
70 |
307 |
10.23 |
1145 |
2162 |
38.17 |
72.06 |
| 80 |
100 |
90 |
289 |
9.63 |
1434 |
1855 |
47.8 |
61.83 |
| 100 |
120 |
110 |
332 |
11.07 |
1766 |
1566 |
58.87 |
52.2 |
| 120 |
140 |
130 |
301 |
10.03 |
2067 |
1234 |
68.9 |
41.13 |
| 140 |
160 |
150 |
344 |
11.47 |
2411 |
933 |
80.37 |
31.1 |
| 160 |
180 |
170 |
285 |
9.50 |
2696 |
589 |
89.87 |
19.63 |
| 180 |
200 |
190 |
304 |
10.13 |
3000 |
304 |
100 |
10.13 |
| Total |
|
|
3000 |
100.00 |
|
|
|
|
| Autor:Grupo 3 |
#Graficas
hist(
Concentracion_contaminante,
breaks = seq(min, max, A),
main = "Gráfica Nº2: Frecuencia de la Concentración del contaminante (Local)",
xlab = "Concentración (mg/kg)",
ylab = "Frecuencia",
col = "#4A90E2",
cex.main = 1.1,
cex.lab = 1.1
)

#Global
hist(
Concentracion_contaminante,
breaks = seq(min, max, A),
main = "Gráfica Nº3: Frecuencia de la Concentración del contaminante (Global)",
xlab = "Concentración (mg/kg)",
ylab = "Frecuencia",
col = "green",
ylim = c(0, 3000),
cex.main = 1.1,
cex.lab = 1.1
)

barplot(
TDFConcentracion$hi,
space = 0,
col = "skyblue",
main = "Gráfica Nº4: Porcentaje de la Concentración del contaminante (Local)",
xlab = "Concentración",
ylab = "Porcentaje (%)",
names.arg = TDFConcentracion$MC,
cex.names = 0.9,
cex.main = 1.1,
cex.lab = 1.1
)

barplot(
TDFConcentracion$hi,
space = 0,
col = "yellow",
main = "Gráfica Nº5: Porcentaje de la Concentración del contaminante(Global)",
xlab = "Concentración",
ylab = "Porcentaje (%)",
names.arg = TDFConcentracion$MC,
ylim = c(0, 100),
cex.names = 0.9,
cex.main = 1.1,
cex.lab = 1.1
)

#caja
boxplot(
Concentracion_contaminante,
horizontal = TRUE,
col = "pink",
main = "Grafica Nº6: Distribución de la Concentración del Contaminante (mg/kg)",
xlab = "Concentración (mg/kg)"
)

# Ojivas de Frecuencia (Ascendente y Descendente)
plot(
Li, Nidsc,
main = "Gráfica Nº7: Distribución de Frecuencias Ascendente y Descendente
de Concentración del Contaminante (mg/kg)",
xlab = "Concentración (mg/kg)",
ylab = "Cantidad",
xlim = c(min, max),
col = "red",
cex.axis = 0.8,
type = "o",
lwd = 3,
las = 1,
xaxt = "n"
)
lines(
Ls, Niasc,
col = "green",
type = "o",
lwd = 3
)
axis(1, at = round(seq(min, max, length.out = 10), 0))

# Diagrama de Ojiva Ascendente y Descendente Porcentual
plot(
Li, Hidsc,
main = "Gráfica Nº53: Distribución Porcentual Ascendente y Descendente
de Concentración del Contaminante (mg/kg)",
xlab = "Concentración (mg/kg)",
ylab = "Porcentaje (%)",
xlim = c(min, max),
col = "red",
type = "o",
lwd = 2,
xaxt = "n"
)
lines(
Ls, Hiasc,
col = "blue",
type = "o",
lwd = 3
)
axis(1, at = round(seq(min, max, length.out = 10), 0))

# INDICADORES ESTADISTICOS
Concentracion <- datos$Pollutant_Concentration_mg_kg
# Indicadores de Tendencia Central
# Media aritmética
media <- round(mean(Concentracion), 2)
media
## [1] 102.75
# Moda usando tabla de frecuencias
Tabla_Con <- as.data.frame(table(Concentracion))
max_frecuencia <- max(Tabla_Con$Freq)
moda <- Tabla_Con$Concentracion[Tabla_Con$Freq == max_frecuencia]
moda
## [1] 86.08
## 2789 Levels: 5.03 5.05 5.06 5.11 5.12 5.18 5.37 5.46 5.68 5.73 5.82 5.94 ... 199.99
# Mediana
mediana <- median(Concentracion)
mediana
## [1] 104.085
# INDICADORES DE DISPERSIÓN #
# Varianza
varianza <- var(Concentracion)
varianza
## [1] 3125.634
# Desviación Estándar
sd <- sd(Concentracion)
sd
## [1] 55.90737
# Coeficiente de Variación
cv <- round((sd / media) * 100, 2)
cv
## [1] 54.41
# INDICADORES DE FORMA #
library(e1071)
# Asimetría
asimetria <- skewness(Concentracion, type = 2)
asimetria
## [1] -0.01561251
# Curtosis
curtosis <- kurtosis(Concentracion)
curtosis
## [1] -1.190312
# TABLA RESUMEN FINAL
tabla_indicadores <- data.frame(
"Variable" = c("Concentración del Contaminante"),
"Rango" = c(paste0("[", min(Concentracion), " ; ", max(Concentracion), "]")),
"X" = c(round(media, 2)),
"Me" = c(round(mediana, 2)),
"Mo" = c(paste(moda, collapse = ", ")),
"V" = c(round(varianza, 2)),
"Sd" = c(round(sd, 2)),
"Cv" = c(cv),
"As" = c(round(asimetria, 2)),
"K" = c(round(curtosis, 2)),
"Valores_Atipicos" = "No hay presencia de valores atípicos"
)
# TABLA EN FORMATO GT
library(gt)
tabla_indicadores_gt <- tabla_indicadores %>%
gt() %>%
tab_header(
title = md("*Tabla N°2.1*"),
subtitle = md("**Indicadores estadísticos de la Concentración del Contaminante (mg/kg)**")
) %>%
tab_source_note(
source_note = md("Autor: Grupo 3")
) %>%
tab_options(
table.border.top.color = "black",
table.border.bottom.color = "black",
table.border.top.style = "solid",
table.border.bottom.style = "solid",
column_labels.border.top.color = "black",
column_labels.border.bottom.color = "black",
column_labels.border.bottom.width = px(2),
row.striping.include_table_body = TRUE,
heading.border.bottom.color = "black",
heading.border.bottom.width = px(2),
table_body.hlines.color = "gray",
table_body.border.bottom.color = "black"
) %>%
tab_style(
style = cell_text(weight = "bold"),
locations = cells_body(
rows = Variable == "Concentración del Contaminante"
)
)
tabla_indicadores_gt
| Tabla N°2.1 |
| Indicadores estadísticos de la Concentración del Contaminante (mg/kg) |
| Variable |
Rango |
X |
Me |
Mo |
V |
Sd |
Cv |
As |
K |
Valores_Atipicos |
| Concentración del Contaminante |
[5.03 ; 199.99] |
102.75 |
104.09 |
86.08 |
3125.63 |
55.91 |
54.41 |
-0.02 |
-1.19 |
No hay presencia de valores atípicos |
| Autor: Grupo 3 |