CARGA DE DATOS Y LIBRERÍAS

library(dplyr)
## 
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(gt)
library(magrittr)
Sedimentos_Marinos <- read.csv(
  "ESTADISTICA/dataset_geologico_limpio_80.csv",
  header = TRUE,
  sep = ",",
  dec = ".",
  stringsAsFactors = FALSE
)

#Limpieza y creación de la variable Área agrupada

Sedimentos_Marinos <- Sedimentos_Marinos %>%
  mutate(
    Area_raw = AREA,
    AREA_clean = toupper(trimws(AREA))
  ) %>%
  mutate(
    Area = case_when(
      AREA_clean %in% c("STELLWAGEN BANK","STELLWAGEN BASIN","MASS BAY","MASSBAY",
                        "MASSACHUSETTS BAY","BOSTON HARBOR","BOSTON HARBOR ISLANDS",
                        "MERRIMACK","MERRIMACK EMBAYMENT","COASTAL MASSACHUSETTS",
                        "MARTHA'S VINEYARD","BUZZARDS BAY","PROVINCETOWN HARBOR",
                        "IPSWICH BAY","CAPE COD BAY","COASTAL MA") ~ "New England Shelf",
      
      AREA_clean %in% c("LONG ISLAND SOUND","NEW YORK BIGHT","BLOCK ISLAND SOUND",
                        "NEW YORK","FIRE ISLAND","LIS") ~ "Long Island & New York Bight",
      
      AREA_clean %in% c("NORTH CAROLINA","CAPE HATTERAS","BARNEGAT BAY",
                        "HUDSON CANYON","HUDSON SHELF VALLEY") ~ "Mid-Atlantic Coast",
      
      AREA_clean %in% c("GULF OF MAINE","MAINE","BAY OF FUNDY","NEW HAMPSHIRE") ~ "Gulf of Maine Region",
      
      AREA_clean %in% c("RHODE ISLAND SOUND","NANTUCKET SOUND","NANTUCKET") ~ "Southern New England & Rhode Island",
      
      AREA_clean %in% c("FLORIDA","SOUTH CAROLINA","APALACHICOLA BAY") ~ "Southeast USA",
      
      AREA_clean %in% c("GULF OF MEXICO") ~ "Gulf of Mexico",
      
      AREA_clean %in% c("PUERTO RICO","CARIBBEAN","MONA CANYON") ~ "Caribbean Region",
      
      AREA_clean %in% c("CALIFORNIA","WASHINGTON","ALASKA","CASCADIA") ~ "Western USA",
      
      TRUE ~ NA_character_
    )
  )

# Filtrar solo registros con Área asignada
Sedimentos_Marinos_Area <- Sedimentos_Marinos %>%
  filter(!is.na(Area))

TABLA DE DISTRIBUCIÓN DE FRECUENCIA

TABLA DE DISTRIBUCIÓN DE FRECUENCIA

TDF_Area_Final <- Sedimentos_Marinos_Area %>%
  count(Area, name = "ni") %>%
  mutate(hi = round(ni / sum(ni) * 100, 2)) %>%
  rename(Area = Area) %>%
  bind_rows(
    data.frame(
      Area = "Total",
      ni = sum(.$ni),
      hi = 100
    )
  )

tabla_area_gt <- TDF_Area_Final %>%
  gt() %>%
  tab_header(
    title = md("**Tabla N°1**"),
    subtitle = md("Frecuencias Absolutas y Relativas del Área de Recolección de Sedimentos Marinos")
  ) %>%
  tab_source_note(source_note = md("Autor: Leonel Padilla")) %>%
  tab_style(
    style = cell_text(weight = "bold"),
    locations = cells_body(rows = Area == "Total")
  )

tabla_area_gt
Tabla N°1
Frecuencias Absolutas y Relativas del Área de Recolección de Sedimentos Marinos
Area ni hi
Caribbean Region 925 7.27
Gulf of Maine Region 604 4.75
Gulf of Mexico 1133 8.90
Long Island & New York Bight 2545 19.99
Mid-Atlantic Coast 1685 13.24
New England Shelf 4982 39.14
Southeast USA 476 3.74
Southern New England & Rhode Island 108 0.85
Western USA 271 2.13
Total 12729 100.00
Autor: Leonel Padilla

GRÁFICAS DE DISTRIBUCIÓN DE FRECUENCIA

# Preparar datos para gráficos (sin fila Total)
TDF_Area_Plot <- TDF_Area_Final %>% filter(Area != "Total")

# 4 Gráficas de barras
barplot(TDF_Area_Plot$ni,
        main = "Gráfica Nº1: Frecuencia Local por Área (ni)",
        xlab = "Áreas", ylab = "Cantidad",
        col = "yellow", names.arg = TDF_Area_Plot$Area,
        las = 2, cex.names = 0.65)

barplot(TDF_Area_Plot$ni,
        main = "Gráfica Nº2: Frecuencia Global por Área (ni)",
        xlab = "Áreas", ylab = "Cantidad",
        col = "orange", names.arg = TDF_Area_Plot$Area,
        las = 2, ylim = c(0, sum(TDF_Area_Plot$ni)))

barplot(TDF_Area_Plot$hi,
        main = "Gráfica Nº3: Frecuencia Relativa Local (hi %)",
        xlab = "Áreas", ylab = "Porcentaje (%)",
        col = "cyan", names.arg = TDF_Area_Plot$Area, las = 2)

barplot(TDF_Area_Plot$hi,
        main = "Gráfica Nº4: Frecuencia Relativa Global (hi %)",
        xlab = "Áreas", ylab = "Porcentaje (%)",
        col = "#86D0B9", names.arg = TDF_Area_Plot$Area,
        las = 2, ylim = c(0, 100))

# Diagrama de Circulo (Gráfica Nº5)
Colores <- c("#41B7C4","#FECEA3","#5182AF","#AADD9C","#EBC4E1","#D4C95F","#B8936B")
etiquetas <- paste0(round(TDF_Area_Plot$hi), "%")

par(xpd = TRUE)
pie(TDF_Area_Plot$hi, labels = etiquetas, col = Colores, radius = 0.8,
    main = "Gráfica Nº5: Distribución Porcentual por Área (%)")

legend("right", inset = -0.25, legend = TDF_Area_Plot$Area,
       fill = Colores, cex = 0.65, bty = "n")

par(xpd = FALSE)

INDICADORES ESTADÍSTICOS

tabla_freq_area <- table(Sedimentos_Marinos_Area$Area)
moda_area <- names(tabla_freq_area)[which.max(tabla_freq_area)]

tabla_indicadores_area <- data.frame(
  Indicador = c("Tamaño muestral (n)", "Moda"),
  Resultado = c(sum(tabla_freq_area), moda_area)
)

tabla_indicadores_area %>%
  gt() %>%
  tab_header(
    title = md("**Tabla N°1**"),
    subtitle = md("Indicadores Estadísticos del Área de Recolección")
  )
Tabla N°1
Indicadores Estadísticos del Área de Recolección
Indicador Resultado
Tamaño muestral (n) 12729
Moda New England Shelf

CONCLUSIONES