FECHA: 7/12/2025
#Estadistica Descriptiva
#6/12/2025
datos <- read.csv("soil_pollution_diseases.csv", header = TRUE, dec = ".", sep = ",")
# Extracción Variable Cuantitativa Continua
Suelo_pH <- datos$Soil_pH
# Manualmente
k <- 1 + (3.3 * log10(3000))
k <- floor(k)
min <- min(Suelo_pH)
max <- max(Suelo_pH)
R <- max - min
A <- R / k
Li <- round(seq(from = min, to = max - A, by = A), 4)
Ls <- round(seq(from = min + A, to = max, by = A), 4)
MC <- round((Li + Ls) / 2, 2)
ni <- numeric(length(Li))
for (i in 1:length(Li)) {
ni[i] <- sum(Suelo_pH >= Li[i] & Suelo_pH < Ls[i])
}
ni[length(Li)] <- sum(Suelo_pH >= Li[length(Li)] & Suelo_pH <= max)
sum(ni)
## [1] 3000
hi <- round(ni / sum(ni) * 100, 2) # SOLO 2 DECIMALES
sum(hi)
## [1] 100
Niasc <- cumsum(ni)
Nidsc <- rev(cumsum(rev(ni)))
Hiasc <- round(cumsum(hi), 2)
Hidsc <- round(rev(cumsum(rev(hi))), 2)
TDFSuelo_pH <- data.frame(Li, Ls, MC, ni, hi, Niasc, Nidsc, Hiasc, Hidsc)
total_ni <- sum(ni)
total_hi <- 100
TDFSuelo_pHCompleto <- rbind(
TDFSuelo_pH,
data.frame(Li = "Total", Ls = " ", MC = " ",
ni = total_ni, hi = total_hi,
Niasc = " ", Nidsc = " ",
Hiasc = " ", Hidsc = " ")
)
# Formato tabla
library(gt)
library(dplyr)
TDFSuelo_pH$hi <- round(TDFSuelo_pH$hi, 2)
tabla_Suelo_pH <- TDFSuelo_pHCompleto %>%
gt() %>%
fmt_number(
columns = hi,
decimals = 2
) %>%
tab_header(
title = md("*Tabla Nº1*"),
subtitle = md("**Tabla de distribución del pH del Suelo**")
) %>%
tab_source_note(
source_note = md("Autor: Grupo 3")
) %>%
tab_options(
table.border.top.color = "black",
table.border.bottom.color = "black",
table.border.top.style = "solid",
table.border.bottom.style = "solid",
column_labels.border.top.color = "black",
column_labels.border.bottom.color = "black",
column_labels.border.bottom.width = px(2),
row.striping.include_table_body = TRUE,
heading.border.bottom.color = "black",
heading.border.bottom.width = px(2),
table_body.hlines.color = "gray",
table_body.border.bottom.color = "black"
) %>%
tab_style(
style = cell_text(weight = "bold"),
locations = cells_body(
rows = Li == "Total"
)
)
tabla_Suelo_pH
| Tabla Nº1 |
| Tabla de distribución del pH del Suelo |
| Li |
Ls |
MC |
ni |
hi |
Niasc |
Nidsc |
Hiasc |
Hidsc |
| 4.5 |
4.8333 |
4.67 |
290 |
9.67 |
290 |
3000 |
9.67 |
100 |
| 4.8333 |
5.1667 |
5 |
248 |
8.27 |
538 |
2710 |
17.94 |
90.33 |
| 5.1667 |
5.5 |
5.33 |
249 |
8.30 |
787 |
2462 |
26.24 |
82.06 |
| 5.5 |
5.8333 |
5.67 |
232 |
7.73 |
1019 |
2213 |
33.97 |
73.76 |
| 5.8333 |
6.1667 |
6 |
265 |
8.83 |
1284 |
1981 |
42.8 |
66.03 |
| 6.1667 |
6.5 |
6.33 |
248 |
8.27 |
1532 |
1716 |
51.07 |
57.2 |
| 6.5 |
6.8333 |
6.67 |
261 |
8.70 |
1793 |
1468 |
59.77 |
48.93 |
| 6.8333 |
7.1667 |
7 |
238 |
7.93 |
2031 |
1207 |
67.7 |
40.23 |
| 7.1667 |
7.5 |
7.33 |
228 |
7.60 |
2259 |
969 |
75.3 |
32.3 |
| 7.5 |
7.8333 |
7.67 |
252 |
8.40 |
2511 |
741 |
83.7 |
24.7 |
| 7.8333 |
8.1667 |
8 |
251 |
8.37 |
2762 |
489 |
92.07 |
16.3 |
| 8.1667 |
8.5 |
8.33 |
238 |
7.93 |
3000 |
238 |
100 |
7.93 |
| Total |
|
|
3000 |
100.00 |
|
|
|
|
| Autor: Grupo 3 |
# Histograma
histoP <- hist(
Suelo_pH,
main = "Gráfica Nº1: Distribución del pH del Suelo",
xlab = "pH",
ylab = "Cantidad",
col = "blue"
)

# Tabla simplificada basada en el histograma
Limites <- histoP$breaks
LimInf <- Limites[1:(length(Limites) - 1)]
LimSup <- Limites[2:length(Limites)]
Mc <- histoP$mids
ni <- histoP$counts
sum(ni)
## [1] 3000
hi <- round(ni / sum(ni) * 100, 2)
sum(hi)
## [1] 100.01
Ni_asc <- cumsum(ni)
Ni_dsc <- rev(cumsum(rev(ni)))
Hi_asc <- round(cumsum(hi), 2)
Hi_dsc <- round(rev(cumsum(rev(hi))), 2)
TDF_Histo_Suelo_pH <- data.frame(LimInf, LimSup, Mc, ni, hi, Ni_asc, Ni_dsc, Hi_asc, Hi_dsc)
totalni <- sum(ni)
totalhi <- 100
TDF_Histo_Suelo_pH_completo <- rbind(
TDF_Histo_Suelo_pH,
data.frame(LimInf = "Total",
LimSup = " ", Mc = " ", ni = totalni,
hi = totalhi, Ni_asc = " ", Ni_dsc = " ",
Hi_asc = " ", Hi_dsc = " ")
)
tabla_Histo <- TDF_Histo_Suelo_pH_completo %>%
gt() %>%
tab_header(
title = md("*Tabla Nº2*"),
subtitle = md("**Tabla simplificada de distribución del pH del Suelo**")
) %>%
tab_source_note(
source_note = md("Autor: Grupo 3")
) %>%
tab_options(
table.border.top.color = "black",
table.border.bottom.color = "black",
table.border.bottom.style = "solid",
column_labels.border.bottom.width = px(2),
row.striping.include_table_body = TRUE
) %>%
tab_style(
style = cell_text(weight = "bold"),
locations = cells_body(
rows = LimInf == "Total"
)
)
tabla_Histo
| Tabla Nº2 |
| Tabla simplificada de distribución del pH del Suelo |
| LimInf |
LimSup |
Mc |
ni |
hi |
Ni_asc |
Ni_dsc |
Hi_asc |
Hi_dsc |
| 4.5 |
5 |
4.75 |
426 |
14.20 |
426 |
3000 |
14.2 |
100.01 |
| 5 |
5.5 |
5.25 |
368 |
12.27 |
794 |
2574 |
26.47 |
85.81 |
| 5.5 |
6 |
5.75 |
371 |
12.37 |
1165 |
2206 |
38.84 |
73.54 |
| 6 |
6.5 |
6.25 |
377 |
12.57 |
1542 |
1835 |
51.41 |
61.17 |
| 6.5 |
7 |
6.75 |
380 |
12.67 |
1922 |
1458 |
64.08 |
48.6 |
| 7 |
7.5 |
7.25 |
339 |
11.30 |
2261 |
1078 |
75.38 |
35.93 |
| 7.5 |
8 |
7.75 |
379 |
12.63 |
2640 |
739 |
88.01 |
24.63 |
| 8 |
8.5 |
8.25 |
360 |
12.00 |
3000 |
360 |
100.01 |
12 |
| Total |
|
|
3000 |
100.00 |
|
|
|
|
| Autor: Grupo 3 |
# Gráficas
hist(
Suelo_pH,
breaks = seq(min, max, A),
main = "Gráfica Nº2: Distribución del pH del Suelo",
xlab = "pH",
ylab = "Frecuencia",
col = "#4A90E2"
)

hist(
Suelo_pH,
breaks = seq(min, max, A),
main = "Gráfica Nº3: Distribución del pH del Suelo",
xlab = "pH",
ylab = "Frecuencia",
col = "green",
ylim = c(0, 3000)
)

barplot(
TDFSuelo_pH$hi,
space = 0,
col = "skyblue",
main = "Gráfica Nº4: Distribución del pH del Suelo",
xlab = "pH",
ylab = "Porcentaje (%)",
names.arg = TDFSuelo_pH$MC,
cex.names = 0.9
)

barplot(
TDFSuelo_pH$hi,
space = 0,
col = "yellow",
main = "Gráfica Nº5: Distribucoión del pH del Suelo",
xlab = "pH",
ylab = "Porcentaje (%)",
names.arg = TDFSuelo_pH$MC,
ylim = c(0, 100)
)

# Boxplot
boxplot(
Suelo_pH_out,
horizontal = TRUE,
col = "pink",
main = "Gráfica Nº6: Distribución del pH del Suelo",
xlab = "pH",
outline = TRUE,
pch = 19
)

# Ojivas de Frecuencia (Ascendente y Descendente)
plot(
Li, Nidsc,
main = "Gráfica Nº7: Distribución Ascendente y Descendente del pH del Suelo",
xlab = "pH",
ylab = "Cantidad",
xlim = c(min, max),
col = "red",
type = "o",
lwd = 3
)
lines(
Ls, Niasc,
col = "green",
type = "o",
lwd = 3
)

# Diagrama de Ojiva Ascendente y Descendente Porcentual
plot(
Li, Hidsc,
main = "Gráfica Nº8: Distribución Ascendente y Descendente del pH del Suelo",
xlab = "pH",
ylab = "Porcentaje (%)",
xlim = c(min, max),
col = "red",
type = "o",
lwd = 2
)
lines(
Ls, Hiasc,
col = "blue",
type = "o",
lwd = 3
)

# INDICADORES ESTADISTICOS
# Indicadores de Tendencia Central
# Media aritmética
media <- round(mean(Suelo_pH, na.rm = TRUE), 2)
media
## [1] 6.46
# Moda usando tabla de frecuencias
Tabla_pH <- as.data.frame(table(Suelo_pH))
max_frecuencia <- max(Tabla_pH$Freq)
moda <- Tabla_pH$Suelo_pH[Tabla_pH$Freq == max_frecuencia]
moda
## [1] 4.81 5.81
## 401 Levels: 4.5 4.51 4.52 4.53 4.54 4.55 4.56 4.57 4.58 4.59 4.6 4.61 ... 8.5
# Mediana
mediana <- median(Suelo_pH, na.rm = TRUE)
mediana
## [1] 6.45
# INDICADORES DE DISPERSIÓN #
# Varianza
varianza <- var(Suelo_pH, na.rm = TRUE)
varianza
## [1] 1.360267
# Desviación Estándar
sd <- sd(Suelo_pH, na.rm = TRUE)
sd
## [1] 1.166305
# Coeficiente de Variación
cv <- round((sd / media) * 100, 2)
cv
## [1] 18.05
# INDICADORES DE FORMA #
library(e1071)
# Asimetría
asimetria <- skewness(Suelo_pH, type = 2, na.rm = TRUE)
asimetria
## [1] 0.03019663
# Curtosis
curtosis <- kurtosis(Suelo_pH, na.rm = TRUE)
curtosis
## [1] -1.203665
# TABLA RESUMEN FINAL
tabla_indicadores <- data.frame(
"Variable" = c("Suelo_pH"),
"Rango" = c(paste0("[", min(Suelo_pH, na.rm = TRUE),
" ; ", max(Suelo_pH, na.rm = TRUE), "]")),
"X" = c(media),
"Me" = c(round(mediana, 2)),
"Mo" = c(paste(moda, collapse = ", ")),
"V" = c(round(varianza, 2)),
"Sd" = c(round(sd, 2)),
"Cv" = c(cv),
"As" = c(round(asimetria, 2)),
"K" = c(round(curtosis, 2)),
"Valores_Atipicos" = "--"
)
# TABLA EN FORMATO GT
library(gt)
tabla_indicadores_gt <- tabla_indicadores %>%
gt() %>%
tab_header(
title = md("*Tabla N°4.1*"),
subtitle = md("**Indicadores estadísticos del Suelo pH**")
) %>%
tab_source_note(
source_note = md("Autor: Grupo 3")
) %>%
tab_options(
table.border.top.color = "black",
table.border.bottom.color = "black",
table.border.top.style = "solid",
table.border.bottom.style = "solid",
column_labels.border.top.color = "black",
column_labels.border.bottom.color = "black",
column_labels.border.bottom.width = px(2),
row.striping.include_table_body = TRUE,
heading.border.bottom.color = "black",
heading.border.bottom.width = px(2),
table_body.hlines.color = "gray",
table_body.border.bottom.color = "black"
) %>%
tab_style(
style = cell_text(weight = "bold"),
locations = cells_body(
rows = Variable == "Suelo_pH"
)
)
tabla_indicadores_gt
| Tabla N°4.1 |
| Indicadores estadísticos del Suelo pH |
| Variable |
Rango |
X |
Me |
Mo |
V |
Sd |
Cv |
As |
K |
Valores_Atipicos |
| Suelo_pH |
[4.5 ; 8.5] |
6.46 |
6.45 |
4.81, 5.81 |
1.36 |
1.17 |
18.05 |
0.03 |
-1.2 |
-- |
| Autor: Grupo 3 |