# =====================================================
# ANÁLISIS ESTADÍSTICO DE GRAVA - VERSIÓN LIMPIA
# Grupo 3 - Estadística
# =====================================================

# 1. Cargar datos
datos <- read.csv("ESTADISTICA/dataset_geologico_limpio_80.csv",
                  header = TRUE, sep = ",", dec = ".", stringsAsFactors = FALSE)

cat("✅ Columnas cargadas:", ncol(datos), "\n")   # debe decir 58
## ✅ Columnas cargadas: 58
# 2. Variable GRAVA
grava_raw <- as.numeric(gsub("[^0-9.-]", "", datos$GRAVEL_PCT))

grava <- na.omit(grava_raw)
grava <- grava[grava >= 0 & grava <= 100]

cat("Valores válidos de grava:", length(grava), "\n\n")
## Valores válidos de grava: 27286
# 3. Cálculos básicos
n <- length(grava)
minimo <- min(grava)
maximo <- max(grava)
R <- maximo - minimo
k <- max(1, floor(1 + 3.3 * log10(n)))
A <- R / k

Li <- round(seq(minimo, maximo - A + 1e-6, by = A), 2)
Ls <- round(Li + A, 2)
Ls[length(Ls)] <- maximo
MC <- round((Li + Ls)/2, 2)

# Frecuencias
ni <- numeric(length(Li))
for(i in 1:length(Li)){
  if(i == length(Li)){
    ni[i] <- sum(grava >= Li[i] & grava <= Ls[i])
  } else {
    ni[i] <- sum(grava >= Li[i] & grava < Ls[i])
  }
}

hi <- round(ni / sum(ni) * 100, 3)

Niasc <- cumsum(ni)
Nidsc <- rev(cumsum(rev(ni)))
Hiasc <- round(cumsum(hi), 2)
Hidsc <- round(rev(cumsum(rev(hi))), 2)

# 4. Tabla de Distribución (bonita con gt)
library(gt)
library(dplyr)
## 
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
TDFgrava <- data.frame(Li, Ls, MC, ni, hi, Niasc, Nidsc, Hiasc, Hidsc)

fila_total <- data.frame(Li = "TOTAL", Ls = "", MC = "", 
                         ni = sum(ni), hi = round(sum(hi),2),
                         Niasc = "", Nidsc = "", Hiasc = "", Hidsc = "")

TDFgrava_p <- rbind(TDFgrava, fila_total)

tabla_grava_p <- TDFgrava_p %>%
  gt() %>%
  tab_header(title = md("**Tabla Nº 1**"),
             subtitle = md("Tabla de distribución de la grava de los sedimentos marinos")) %>%
  tab_source_note(md("Autor: Grupo 3")) %>%
  tab_options(
    table.border.top.color = "black",
    table.border.bottom.color = "black",
    column_labels.border.bottom.color = "black",
    column_labels.border.bottom.width = px(2),
    row.striping.include_table_body = TRUE,
    table_body.hlines.color = "gray"
  )

tabla_grava_p
Tabla Nº 1
Tabla de distribución de la grava de los sedimentos marinos
Li Ls MC ni hi Niasc Nidsc Hiasc Hidsc
0 6.67 3.34 22538 82.587 22538 27290 82.59 100
6.67 13.34 10 1488 5.453 24026 4752 88.04 17.41
13.33 20 16.66 926 3.393 24952 3264 91.43 11.96
20 26.67 23.34 620 2.272 25572 2338 93.7 8.57
26.67 33.34 30.01 421 1.543 25993 1718 95.25 6.3
33.33 40 36.66 281 1.030 26274 1297 96.28 4.75
40 46.67 43.34 230 0.843 26504 1016 97.12 3.72
46.67 53.34 50.01 170 0.623 26674 786 97.74 2.88
53.33 60 56.66 146 0.535 26820 616 98.28 2.26
60 66.67 63.34 108 0.396 26928 470 98.67 1.72
66.67 73.34 70 85 0.311 27013 362 98.99 1.33
73.33 80 76.66 75 0.275 27088 277 99.26 1.02
80 86.67 83.34 49 0.180 27137 202 99.44 0.74
86.67 93.34 90 65 0.238 27202 153 99.68 0.56
93.33 100 96.66 88 0.322 27290 88 100 0.32
TOTAL 27290 100.000
Autor: Grupo 3
# 5. Gráficos
library(moments)
colores <- gray.colors(length(ni), start = 0.3, end = 0.9)

hist(grava, breaks = k, col = colores,
     main = "Gráfica Nº2: Distribución de la grava",
     xlab = "Grava (%)", ylab = "Frecuencia")

boxplot(grava, horizontal = TRUE, col = "lightgreen",
        main = "Gráfica Nº6: Boxplot de la grava", xlab = "Grava (%)")

# 6. Indicadores estadísticos
x <- mean(grava)
Me <- median(grava)
sd_val <- sd(grava)
CV <- round((sd_val / x) * 100, 2)
As <- round(skewness(grava), 2)
K  <- round(kurtosis(grava), 2)

TablaIndicadores <- data.frame(
  Variable = "Grava (%)",
  Mínimo = round(minimo,2), 
  Máximo = round(maximo,2), 
  Media = round(x,2),
  Mediana = round(Me,2), 
  `Desv. Est.` = round(sd_val,2), 
  `CV (%)` = CV,
  Asimetría = As, 
  Curtosis = K
)

library(knitr)
library(kableExtra)
## 
## Adjuntando el paquete: 'kableExtra'
## The following object is masked from 'package:dplyr':
## 
##     group_rows
kable(TablaIndicadores, format = "markdown", align = "c",
      caption = "Tabla N°3: Indicadores estadísticos de la variable grava") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"))
Tabla N°3: Indicadores estadísticos de la variable grava
Variable Mínimo Máximo Media Mediana Desv..Est. CV…. Asimetría Curtosis
Grava (%) 0 100 5.38 0.03 13.62 252.94 3.86 20.01
# 7. Outliers
outliers <- boxplot.stats(grava)$out
num_outliers <- length(outliers)
min_out <- ifelse(num_outliers > 0, round(min(outliers),2), NA)
max_out <- ifelse(num_outliers > 0, round(max(outliers),2), NA)

TablaOutliers <- data.frame(
  "Cantidad de outliers" = num_outliers,
  "Mínimo" = min_out,
  "Máximo" = max_out
)

kable(TablaOutliers, format = "markdown", align = "c",
      caption = "Tabla N°4: Outliers de la variable grava") %>%
  kable_styling(bootstrap_options = c("striped", "hover"))
Tabla N°4: Outliers de la variable grava
Cantidad.de.outliers Mínimo Máximo
4238 8.51 100
cat("\n✅ ¡ANÁLISIS DE GRAVA COMPLETADO CORRECTAMENTE!\n")
## 
## ✅ ¡ANÁLISIS DE GRAVA COMPLETADO CORRECTAMENTE!