# =====================================================
# ANÁLISIS ESTADÍSTICO DE ARCILLA - VERSIÓN LIMPIA
# Grupo 3
# =====================================================
# 1. Cargar datos
datos <- read.csv("ESTADISTICA/dataset_geologico_limpio_80.csv",
header = TRUE, sep = ",", dec = ".", stringsAsFactors = FALSE)
cat("✅ Columnas cargadas:", ncol(datos), "\n") # debe decir 58
## ✅ Columnas cargadas: 58
# 2. Variable Arcilla (columna correcta según tu archivo)
arcilla_raw <- as.numeric(gsub("[^0-9.-]", "", datos$CLAY_PCT))
arcilla <- na.omit(arcilla_raw)
arcilla <- arcilla[arcilla >= 0 & arcilla <= 100]
cat("Valores válidos de arcilla:", length(arcilla), "\n\n")
## Valores válidos de arcilla: 27240
# 3. Cálculos básicos
n <- length(arcilla)
minimo <- min(arcilla)
maximo <- max(arcilla)
R <- maximo - minimo
k <- max(1, floor(1 + 3.3 * log10(n)))
A <- R / k
Li <- round(seq(minimo, maximo - A + 1e-6, by = A), 2)
Ls <- round(Li + A, 2)
Ls[length(Ls)] <- maximo
MC <- round((Li + Ls)/2, 2)
# Frecuencias
ni <- numeric(length(Li))
for(i in 1:length(Li)){
if(i == length(Li)){
ni[i] <- sum(arcilla >= Li[i] & arcilla <= Ls[i])
} else {
ni[i] <- sum(arcilla >= Li[i] & arcilla < Ls[i])
}
}
hi <- round(ni / sum(ni) * 100, 3)
Niasc <- cumsum(ni)
Nidsc <- rev(cumsum(rev(ni)))
Hiasc <- round(cumsum(hi), 2)
Hidsc <- round(rev(cumsum(rev(hi))), 2)
# 4. Tabla de Distribución (bonita)
library(gt)
library(dplyr)
##
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
TDFarcilla <- data.frame(Li, Ls, MC, ni, hi, Niasc, Nidsc, Hiasc, Hidsc)
fila_total <- data.frame(Li = "TOTAL", Ls = "", MC = "",
ni = sum(ni), hi = round(sum(hi),2),
Niasc = "", Nidsc = "", Hiasc = "", Hidsc = "")
TDFarcilla_p <- rbind(TDFarcilla, fila_total)
tabla_arcilla_p <- TDFarcilla_p %>%
gt() %>%
tab_header(title = md("**Tabla Nº 1**"),
subtitle = md("Tabla de distribución de la arcilla de los sedimentos marinos")) %>%
tab_source_note(md("Autor: Grupo 3")) %>%
tab_options(
table.border.top.color = "black",
table.border.bottom.color = "black",
column_labels.border.bottom.color = "black",
column_labels.border.bottom.width = px(2),
row.striping.include_table_body = TRUE,
table_body.hlines.color = "gray"
)
tabla_arcilla_p
| Tabla Nº 1 |
| Tabla de distribución de la arcilla de los sedimentos marinos |
| Li |
Ls |
MC |
ni |
hi |
Niasc |
Nidsc |
Hiasc |
Hidsc |
| 0 |
6.31 |
3.15 |
14156 |
51.981 |
14156 |
27233 |
51.98 |
100 |
| 6.31 |
12.62 |
9.46 |
2706 |
9.936 |
16862 |
13077 |
61.92 |
48.02 |
| 12.63 |
18.94 |
15.79 |
2536 |
9.312 |
19398 |
10371 |
71.23 |
38.08 |
| 18.94 |
25.25 |
22.09 |
1725 |
6.334 |
21123 |
7835 |
77.56 |
28.77 |
| 25.26 |
31.57 |
28.41 |
1437 |
5.277 |
22560 |
6110 |
82.84 |
22.44 |
| 31.57 |
37.88 |
34.73 |
1325 |
4.865 |
23885 |
4673 |
87.7 |
17.16 |
| 37.88 |
44.19 |
41.03 |
1037 |
3.808 |
24922 |
3348 |
91.51 |
12.29 |
| 44.2 |
50.51 |
47.36 |
680 |
2.497 |
25602 |
2311 |
94.01 |
8.49 |
| 50.51 |
56.82 |
53.66 |
457 |
1.678 |
26059 |
1631 |
95.69 |
5.99 |
| 56.83 |
63.14 |
59.98 |
371 |
1.362 |
26430 |
1174 |
97.05 |
4.31 |
| 63.14 |
69.45 |
66.3 |
298 |
1.094 |
26728 |
803 |
98.14 |
2.95 |
| 69.45 |
75.76 |
72.61 |
250 |
0.918 |
26978 |
505 |
99.06 |
1.85 |
| 75.77 |
82.08 |
78.92 |
162 |
0.595 |
27140 |
255 |
99.66 |
0.94 |
| 82.08 |
88.39 |
85.24 |
74 |
0.272 |
27214 |
93 |
99.93 |
0.34 |
| 88.4 |
94.71 |
91.56 |
19 |
0.070 |
27233 |
19 |
100 |
0.07 |
| TOTAL |
|
|
27233 |
100.000 |
|
|
|
|
| Autor: Grupo 3 |
# 5. Gráficos
library(moments)
colores <- gray.colors(length(ni), start = 0.3, end = 0.9)
hist(arcilla, breaks = k, col = colores,
main = "Gráfica Nº2: Distribución de la arcilla",
xlab = "Arcilla (%)", ylab = "Frecuencia")

boxplot(arcilla, horizontal = TRUE, col = "lightblue",
main = "Gráfica Nº6: Boxplot de la arcilla", xlab = "Arcilla (%)")

# 6. Indicadores estadísticos
x <- mean(arcilla)
Me <- median(arcilla)
sd_val <- sd(arcilla)
CV <- round((sd_val / x) * 100, 2)
As <- round(skewness(arcilla), 2)
K <- round(kurtosis(arcilla), 2)
TablaIndicadores <- data.frame(
Variable = "Arcilla (%)",
Mínimo = round(minimo,2),
Máximo = round(maximo,2),
Media = round(x,2),
Mediana = round(Me,2),
`Desv. Est.` = round(sd_val,2),
`CV (%)` = CV,
Asimetría = As,
Curtosis = K
)
library(knitr)
library(kableExtra)
##
## Adjuntando el paquete: 'kableExtra'
## The following object is masked from 'package:dplyr':
##
## group_rows
kable(TablaIndicadores, format = "markdown", align = "c",
caption = "Tabla N°3: Indicadores estadísticos de la variable arcilla") %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"))
Tabla N°3: Indicadores estadísticos de la variable arcilla
|
Variable
|
Mínimo
|
Máximo
|
Media
|
Mediana
|
Desv..Est.
|
CV….
|
Asimetría
|
Curtosis
|
|
Arcilla (%)
|
0
|
94.71
|
14.21
|
5.37
|
18.39
|
129.46
|
1.55
|
4.89
|
# 7. Outliers
outliers <- boxplot.stats(arcilla)$out
num_outliers <- length(outliers)
min_out <- ifelse(num_outliers > 0, round(min(outliers),2), NA)
max_out <- ifelse(num_outliers > 0, round(max(outliers),2), NA)
TablaOutliers <- data.frame(
"Cantidad de outliers" = num_outliers,
"Mínimo" = min_out,
"Máximo" = max_out
)
kable(TablaOutliers, format = "markdown", align = "c",
caption = "Tabla N°4: Outliers de la variable arcilla") %>%
kable_styling(bootstrap_options = c("striped", "hover"))
Tabla N°4: Outliers de la variable arcilla
|
Cantidad.de.outliers
|
Mínimo
|
Máximo
|
|
1275
|
55.43
|
94.71
|