# ===================================================
# CÓDIGO FINAL SIN NINGÚN ERROR – CLASIFICACIÓN
# ===================================================
setwd("/cloud/project")
datos <- read.csv("Sedimentos Marinos.csv", header = TRUE, sep = ";", dec = ".")
# Instalar paquetes si te faltan (solo la primera vez)
if (!require(dplyr)) install.packages("dplyr")
## Loading required package: dplyr
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
if (!require(ggplot2)) install.packages("ggplot2")
## Loading required package: ggplot2
if (!require(forcats)) install.packages("forcats")
## Loading required package: forcats
if (!require(scales)) install.packages("scales")
## Loading required package: scales
if (!require(kableExtra)) install.packages("kableExtra")
## Loading required package: kableExtra
##
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
##
## group_rows
# Cargar paquetes (¡con la "r" en dplyr!)
library(dplyr) # ← aquí estaba el error antes
library(ggplot2)
library(forcats)
library(scales)
library(kableExtra)
# Tabla de frecuencias
tabla <- datos %>%
count(CLASSIFICATION, name = "ni") %>%
mutate(hi = round(ni/sum(ni)*100, 3),
Clasificación = CLASSIFICATION) %>%
select(Clasificación, ni, hi) %>%
arrange(desc(ni))
# Añadir TOTAL
tabla <- bind_rows(tabla,
data.frame(Clasificación = "TOTAL",
ni = sum(tabla$ni),
hi = 100))
# Tabla bonita
kable(tabla, col.names = c("Clasificación", "ni", "hi (%)"), digits = 3) %>%
kable_styling(full_width = FALSE, font_size = 15, bootstrap_options = c("striped", "hover")) %>%
row_spec(nrow(tabla), bold = TRUE, background = "#2E7D32", color = "white") %>%
row_spec(1:6, background = "#FFEB3B", color = "black")
|
Clasificación
|
ni
|
hi (%)
|
|
SAND
|
3059
|
21.535
|
|
SAND
|
2114
|
14.882
|
|
GRAVELLY SEDIMENT
|
1499
|
10.553
|
|
SAND
|
1150
|
8.096
|
|
CLAYEY SILT
|
1074
|
7.561
|
|
CLAYEY SILT
|
831
|
5.850
|
|
SAND SILT CLAY
|
698
|
4.914
|
|
SILTY SAND
|
601
|
4.231
|
|
GRAVEL
|
596
|
4.196
|
|
SILTY SAND
|
530
|
3.731
|
|
SILTY CLAY
|
373
|
2.626
|
|
SANDY SILT
|
325
|
2.288
|
|
SANDY SILT
|
242
|
1.704
|
|
SILTY CLAY
|
211
|
1.485
|
|
BOULDERS
|
143
|
1.007
|
|
SILTY SAND
|
142
|
1.000
|
|
SILT
|
116
|
0.817
|
|
SILT
|
114
|
0.803
|
|
CLAY
|
106
|
0.746
|
|
GRAVEL > 10%
|
66
|
0.465
|
|
CLAY
|
40
|
0.282
|
|
CLAYEY SAND
|
32
|
0.225
|
|
CLAYEY SAND
|
30
|
0.211
|
|
|
28
|
0.197
|
|
SAND
|
27
|
0.190
|
|
BEDROCK
|
21
|
0.148
|
|
SAND
|
14
|
0.099
|
|
SANDY CLAY
|
8
|
0.056
|
|
SANDY SILT
|
5
|
0.035
|
|
MUD
|
4
|
0.028
|
|
GRAVEL
|
3
|
0.021
|
|
MUDDY SAND
|
1
|
0.007
|
|
SANDY CLAY
|
1
|
0.007
|
|
SILT
|
1
|
0.007
|
|
TOTAL
|
14205
|
100.000
|
# Datos para gráficos
g <- tabla %>% filter(Clasificación != "TOTAL")
# 1. Barras horizontales (perfectas para muchas categorías)
ggplot(g, aes(x = fct_reorder(Clasificación, ni), y = ni)) +
geom_col(fill = "#1976D2", width = 0.75) +
geom_text(aes(label = paste(ni, "(", hi, "%)")),
hjust = -0.1, size = 4.5, fontface = "bold") +
coord_flip() +
labs(title = "Distribución de la Clasificación de Sedimentos Marinos",
subtitle = paste("Total de muestras =", sum(g$ni)),
x = "Clasificación", y = "Frecuencia absoluta (ni)") +
theme_minimal(base_size = 14) +
theme(plot.title = element_text(face = "bold", hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5))

# 2. Tarta limpia (solo top 8 + Otros)
top8 <- g %>% slice_max(ni, n = 8)
otros <- g %>% slice_min(ni, n = nrow(g)-8) %>%
summarise(ni = sum(ni), hi = sum(hi), Clasificación = "Otros")
tarta <- bind_rows(top8, otros)
ggplot(tarta, aes(x = "", y = ni, fill = Clasificación)) +
geom_bar(stat = "identity", width = 0.5, color = "white", size = 1.2) +
coord_polar("y") +
geom_text(aes(label = paste0(hi, "%")), position = position_stack(vjust = 0.5),
color = "white", size = 6, fontface = "bold") +
labs(title = "Proporción de Clasificación (Top 8 + Otros)") +
theme_void() +
scale_fill_brewer(palette = "Set3") +
theme(plot.title = element_text(hjust = 0.5, face = "bold", size = 16),
legend.position = "right")
## Warning in geom_bar(stat = "identity", width = 0.5, color = "white", size =
## 1.2): Ignoring unknown parameters: `size`

# 3. Pareto elegante
pareto <- g %>% arrange(desc(ni)) %>% mutate(Hi_acum = cumsum(hi))
ggplot(pareto, aes(x = reorder(Clasificación, -ni))) +
geom_col(aes(y = ni), fill = "#8E24AA") +
geom_line(aes(y = Hi_acum * max(ni)/100, group = 1), color = "#E91E63", size = 2) +
geom_point(aes(y = Hi_acum * max(ni)/100), color = "#E91E63", size = 4) +
geom_text(aes(y = Hi_acum * max(ni)/100, label = paste0(Hi_acum, "%")),
vjust = -1, color = "#E91E63", fontface = "bold") +
scale_y_continuous(name = "Frecuencia absoluta",
sec.axis = sec_axis(~ . * 100 / max(pareto$ni),
name = "Porcentaje acumulado (%)")) +
labs(title = "Diagrama de Pareto – Clasificación", x = "") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
axis.title.y.right = element_text(color = "#E91E63"),
axis.text.y.right = element_text(color = "#E91E63"))
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

# Guardar automáticamente las imágenes (opcional)
ggsave("01_barras_clasificacion.png", width = 14, height = 9, dpi = 300)
ggsave("02_tarta_clasificacion.png", width = 10, height = 10, dpi = 300)
ggsave("03_pareto_clasificacion.png", width = 14, height = 8, dpi = 300)