FECHA: 06/12/2025
#Estadistica Descriptiva
#6/12/2025
datos<-read.csv("soil_pollution_diseases.csv",header = TRUE,dec = ".",
sep = ",")
#Extracción Variable Cuantitativa Continua
Humedad <- datos$Humidity_.
min <-min(Humedad)
max <-max(Humedad)
R <-max-min
K <- floor(1+3.33*log10(length(Humedad)))
A <-R/K
Li <-round(seq(from=min,to=max-A,by=A),2)
Ls <-round(seq(from=min+A,to=max,by=A),2)
Mc <-(Li+Ls)/2
ni<-c()
for (i in 1:K) {
if (i < K) {
ni[i] <- length(subset(Humedad, Humedad >= Li[i] & Humedad < Ls[i]))
} else {
ni[i] <- length(subset(Humedad, Humedad >= Li[i] & Humedad <= Ls[i]))
}
}
sum(ni)
## [1] 3000
hi <-ni/sum(ni)*100
Ni_asc<-cumsum(ni)
Hi_asc<-cumsum(hi)
Ni_desc<-rev(cumsum(rev(ni)))
Hi_desc<-rev(cumsum(rev(hi)))
TDF_Humedad <- data.frame(
Li, Ls, Mc, ni, round(hi, 2), Ni_asc, Ni_desc, round(Hi_asc, 2), round(Hi_desc, 2)
)
colnames(TDF_Humedad) <- c("Li","Ls","Mc","ni","hi","Ni_asc","Ni_desc","Hi_asc(%)","Hi_desc(%)")
#Crear fila de totales
totales<-c(
Li="TOTAL",
Ls="-",
Mc="-",
ni=sum(ni),
hi=sum(hi),
Ni_asc="-",
Ni_desc="-",
Hi_asc="-",
Hi_desc="-")
TDF_Humedad <-rbind(TDF_Humedad,totales)
library(dplyr)
library(gt)
TDF_Humedad %>%
gt() %>%
tab_header(
title = md("*Tabla Nro. 7*"),
subtitle = md("**Tabla de distribución de la Humedad (%) presente en el Suelo**")
) %>%
tab_source_note(
source_note = md("Autor: Grupo 3")
) %>%
tab_options(
table.border.top.color = "black",
table.border.bottom.color = "black",
table.border.top.style = "solid",
table.border.bottom.style = "solid",
column_labels.border.top.color = "black",
column_labels.border.bottom.color = "black",
column_labels.border.bottom.width = px(2),
row.striping.include_table_body = TRUE,
heading.border.bottom.color = "black",
heading.border.bottom.width = px(2),
table_body.hlines.color = "gray",
table_body.border.bottom.color = "black"
)
| Tabla Nro. 7 |
| Tabla de distribución de la Humedad (%) presente en el Suelo |
| Li |
Ls |
Mc |
ni |
hi |
Ni_asc |
Ni_desc |
Hi_asc(%) |
Hi_desc(%) |
| 20 |
26.25 |
23.125 |
231 |
7.7 |
231 |
3000 |
7.7 |
100 |
| 26.25 |
32.5 |
29.375 |
220 |
7.33 |
451 |
2769 |
15.03 |
92.3 |
| 32.5 |
38.75 |
35.625 |
252 |
8.4 |
703 |
2549 |
23.43 |
84.97 |
| 38.75 |
45 |
41.875 |
239 |
7.97 |
942 |
2297 |
31.4 |
76.57 |
| 45 |
51.25 |
48.125 |
270 |
9 |
1212 |
2058 |
40.4 |
68.6 |
| 51.25 |
57.5 |
54.375 |
245 |
8.17 |
1457 |
1788 |
48.57 |
59.6 |
| 57.5 |
63.75 |
60.625 |
270 |
9 |
1727 |
1543 |
57.57 |
51.43 |
| 63.75 |
70 |
66.875 |
242 |
8.07 |
1969 |
1273 |
65.63 |
42.43 |
| 70 |
76.25 |
73.125 |
254 |
8.47 |
2223 |
1031 |
74.1 |
34.37 |
| 76.25 |
82.5 |
79.375 |
249 |
8.3 |
2472 |
777 |
82.4 |
25.9 |
| 82.5 |
88.75 |
85.625 |
293 |
9.77 |
2765 |
528 |
92.17 |
17.6 |
| 88.75 |
95 |
91.875 |
235 |
7.83 |
3000 |
235 |
100 |
7.83 |
| TOTAL |
- |
- |
3000 |
100 |
- |
- |
- |
- |
| Autor: Grupo 3 |
# Histograma
histoT <- hist(
Humedad,
main = "Gráfica Nº25: Distribución de la Humedad",
xlab = "Humedad (%)",
ylab = "Cantidad",
col = "blue"
)

#Simplificación con el histograma
Hist_Humedad<-hist(Humedad,breaks = 8,plot = F)
k<-length(Hist_Humedad$breaks)
Li<-Hist_Humedad$breaks[1:(length(Hist_Humedad$breaks)-1)]
Ls<-Hist_Humedad$breaks[2:length(Hist_Humedad$breaks)]
ni<-Hist_Humedad$counts
sum(ni)
## [1] 3000
Mc<-Hist_Humedad$mids
hi<-(ni/sum(ni))
sum(hi)
## [1] 1
Ni_asc<-cumsum(ni)
Hi_asc<-cumsum(hi)
Ni_desc<-rev(cumsum(rev(ni)))
Hi_desc<-rev(cumsum(rev(hi)))
TDF_Humedad<-data.frame(Li=round(Li,2),
Ls=round(Ls,2),
Mc=round(Mc,2),
ni=ni,
hi=round(hi*100,2),
Ni_asc=Ni_asc,
Ni_desc=Ni_desc,
Hi_asc=round(Hi_asc*100,2),
Hi_desc=round(Hi_desc*100,2))
colnames(TDF_Humedad)<-c("Lim inf","Lim sup","MC","ni","hi(%)","Ni asc","Ni desc","Hi asc(%)","Hi desc(%)")
totales <- c(
Li = "TOTAL",
Ls = "-",
Mc = "-",
ni = sum(as.numeric(TDF_Humedad$ni)),
# Forzamos el 100% en vez de sumar los redondeos
hi = 100,
Ni_asc = "-",
Ni_desc = "-",
Hi_asc = "-",
Hi_desc = "-"
)
TDF_Humedad<-rbind(TDF_Humedad,totales)
library(dplyr)
library(gt)
TDF_Humedad %>%
gt() %>%
tab_header(
title = md("*Tabla Nro. 8*"),
subtitle = md("**Tabla Simplificada de distribución de la Humedad (%) presente en el Suelo*")
) %>%
tab_source_note(
source_note = md("Autor: Grupo 3")
) %>%
tab_options(
table.border.top.color = "black",
table.border.bottom.color = "black",
table.border.top.style = "solid",
table.border.bottom.style = "solid",
column_labels.border.top.color = "black",
column_labels.border.bottom.color = "black",
column_labels.border.bottom.width = px(2),
row.striping.include_table_body = TRUE,
heading.border.bottom.color = "black",
heading.border.bottom.width = px(2),
table_body.hlines.color = "gray",
table_body.border.bottom.color = "black"
)
| Tabla Nro. 8 |
| *Tabla Simplificada de distribución de la Humedad (%) presente en el Suelo |
| Lim inf |
Lim sup |
MC |
ni |
hi(%) |
Ni asc |
Ni desc |
Hi asc(%) |
Hi desc(%) |
| 20 |
30 |
25 |
369 |
12.3 |
369 |
3000 |
12.3 |
100 |
| 30 |
40 |
35 |
387 |
12.9 |
756 |
2631 |
25.2 |
87.7 |
| 40 |
50 |
45 |
392 |
13.07 |
1148 |
2244 |
38.27 |
74.8 |
| 50 |
60 |
55 |
416 |
13.87 |
1564 |
1852 |
52.13 |
61.73 |
| 60 |
70 |
65 |
411 |
13.7 |
1975 |
1436 |
65.83 |
47.87 |
| 70 |
80 |
75 |
398 |
13.27 |
2373 |
1025 |
79.1 |
34.17 |
| 80 |
90 |
85 |
441 |
14.7 |
2814 |
627 |
93.8 |
20.9 |
| 90 |
100 |
95 |
186 |
6.2 |
3000 |
186 |
100 |
6.2 |
| TOTAL |
- |
- |
3000 |
100 |
- |
- |
- |
- |
| Autor: Grupo 3 |
#Gráficas
hist(Humedad, breaks = 10,
main = "Gráfica N°26 Distribución para la Humedad (%) presente en el suelo ",
xlab = "Humedad(%)",
ylab = "Cantidad",
ylim = c(0,max(ni)),
col = "yellow",
cex.main = 0.9,
cex.lab = 1,
cex.axis = 0.9,
xaxt = "n")
axis(1, at = Hist_Humedad$breaks,
labels = Hist_Humedad$breaks, las = 1,
cex.axis = 0.9)

hist(Humedad, breaks = 10,
main = "Gráfica N°27: Distribución de la Humedad (%) presente en el suelo",
xlab = "Humedad (%)",
ylab = "Cantidad",
ylim = c(0, length(Humedad)),
col = "green",
cex.main = 0.9,
cex.lab = 1,
cex.axis = 0.9,
xaxt = "n")
axis(1, at = Hist_Humedad$breaks,
labels = Hist_Humedad$breaks, las = 1,
cex.axis = 0.9)

TDF_Humedad$hi <- as.numeric(TDF_Humedad$hi)
datos_grafico <- subset(TDF_Humedad, !(MC %in% c("-", "TOTAL")))
barplot(datos_grafico$hi,
space = 0,
col = "blue",
main = "Gráfica N°28: Distribución porcentual de la Humedad (%) presente en el suelo",
xlab = "Humedad (%)",
ylab = "Porcentaje (%)",
names.arg = datos_grafico$MC,
ylim = c(0, 20))

barplot(datos_grafico$hi,
space = 0,
col = "skyblue",
main = "Gráfica N°29: Distribución porcentual de la Humedad (%) presente en el suelo",
xlab = "Humedad(%)",
ylab = "Porcentaje (%)",
names.arg = datos_grafico$MC,
ylim = c(0, 100))

# Diagrama de Caja
boxplot(Humedad,
horizontal = TRUE,
main = "Gráfica N°30 Distribución para la Humedad (%) presente en el suelo ",
xlab = " Humedad (%)",
col = "brown",
outline = TRUE,
pch = 1)

# Diagrama de Ojiva Ascendente y Descendente Ni
plot(Li ,Ni_desc,
main = "Gráfica N°31: Distribución Ascendente y descendente
para la Humedad (%) presente en el suelo",
xlab = "Humedad(%)",
ylab = "Cantidad",
xlim = c(0,100),
col = "red",
cex.axis=0.8,
type = "o",
lwd = 3,
las=1,
xaxt="n")
lines(Ls,Ni_asc,
col = "orange",
type = "o",
lwd = 3)
axis(1, at = seq(0, 900, by = 50))

# Diagrama de Ojiva Ascendente y Descendente Porcentual
plot(Li, Hi_desc * 100,
main = "Gráfica N°32: Distribución Ascendente y Descendente porcentual
para la Humedad (%) presente en el suelo ",
xlab = " Humedad(%)",
ylab = "Porcentaje (%)",
xlim = c(0,100),
col = "red",
type = "o",
lwd = 2,
xaxt="n")
lines(Ls, Hi_asc * 100,
col = "orange",
type = "o",
lwd = 3)
axis(1, at = seq(0,100,by=25))

# INDICADORES ESTADISTICOS
# Indicadores de Tendencia Central
# Media aritmética
media <- round(mean(Humedad), 0)
media
## [1] 58
# Moda
# Moda
max_frecuencia <- max(TDF_Humedad$ni)
moda <- TDF_Humedad$MC[TDF_Humedad$ni == max_frecuencia]
moda
## [1] "85"
# Mediana
mediana <- median(Humedad)
mediana
## [1] 58.45
# INDICADORES DE DISPERSIÓN #
# Varianza
varianza <- var(Humedad)
varianza
## [1] 456.1518
# Desviación Estándar
sd <- sd(Humedad)
sd
## [1] 21.35771
# Coeficiente de Variación
cv <- round((sd / media) * 100, 2)
cv
## [1] 36.82
# INDICADORES DE FORMA #
# Coeficiente deAsimetría
library("e1071")
asimetria <- skewness(Humedad, type = 2)
asimetria
## [1] -0.0417425
#Curtosis
curtosis <- kurtosis(Humedad)
curtosis
## [1] -1.181146
# TABLA RESUMEN FINAL
tabla_indicadores <- data.frame(
"Variable" = c("Humedad"),
"Rango" = c(paste0("[", min(Humedad), " ; ", max(Humedad), "]")),
"X" = c(round(media, 0)),
"Me" = c(round(mediana, 0)),
"Mo" = c(paste(moda, collapse = ", ")),
"V" = c(round(varianza, 2)),
"Sd" = c(round(sd, 0)),
"Cv" = c(cv),
"As" = c(round(asimetria, 2)),
"K" = c(round(curtosis, 2)),
"Valores Atípicos" = ""
)
library(gt)
tabla_indicadores_gt <- tabla_indicadores %>%
gt() %>%
tab_header(
title = md("Tabla N°8.1"),
subtitle = md("*Indicadores estadísticos de la variable Humedad*")
) %>%
tab_source_note(
source_note = md("Autor: Grupo 3")
) %>%
tab_options(
table.border.top.color = "black",
table.border.bottom.color = "black",
table.border.top.style = "solid",
table.border.bottom.style = "solid",
column_labels.border.top.color = "black",
column_labels.border.bottom.color = "black",
table_body.hlines.color = "gray",
table_body.border.bottom.color = "black",
row.striping.include_table_body = TRUE,
heading.border.bottom.color = "black"
) %>%
tab_style(
style = cell_text(weight = "bold"),
locations = cells_body(
rows = Variable == "Temperatura"
)
)
tabla_indicadores_gt
| Tabla N°8.1 |
| Indicadores estadísticos de la variable Humedad |
| Variable |
Rango |
X |
Me |
Mo |
V |
Sd |
Cv |
As |
K |
Valores.Atípicos |
| Humedad |
[20 ; 95] |
58 |
58 |
85 |
456.15 |
21 |
36.82 |
-0.04 |
-1.18 |
|
| Autor: Grupo 3 |