library(gt)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(readxl)
library(ggplot2)

# ===========================================
# CARGAR EL ARCHIVO EXCEL
# (usa el nombre exacto de tu archivo)
# ===========================================
datos_nuevoartes <- read_excel("datos_nuevoartes.xlsx")
# TABLA 2: landslide_size
# ============================================================

tabla_size <- datos_nuevoartes %>%
  
  # 1. Eliminar NA
  filter(!is.na(landslide_size)) %>%
  
  # 2. Calcular frecuencias
  group_by(landslide_size) %>%
  summarise(ni = n(), .groups = "drop") %>%
  
  # 3. Ordenar de mayor a menor ni
  arrange(desc(ni)) %>%
  
  # 4. Asignar número i
  mutate(i = row_number()) %>%
  
  # 5. Calcular porcentaje hi
  mutate(hi = round((ni / sum(ni)) * 100, 2)) %>%
  
  # 6. Convertir i a texto
  mutate(i = as.character(i)) %>%
  
  # 7. Reordenar columnas
  select(i, landslide_size, ni, hi)

# ====== SUMATORIA ======
fila_total_size <- tibble(
  i = "SUMATORIA",
  landslide_size = "TOTAL",
  ni = sum(tabla_size$ni),
  hi = round(sum(tabla_size$hi), 0)
)

tabla_size_final <- bind_rows(tabla_size, fila_total_size)

# ====== FORMATO GT ======

tabla_size_gt <- tabla_size_final %>%
  gt() %>%
  tab_header(
    title = md("**Tabla N° 3**"),
    subtitle = md("**Frecuencia y porcentaje: Landslide Size**")
  ) %>%
  tab_source_note(
    source_note = md("Autor: Alessandro")
  ) %>%
  tab_options(
    table.border.top.color = "black",
    table.border.bottom.color = "black",
    table.border.top.style = "solid",
    table.border.bottom.style = "solid",
    column_labels.border.top.color = "black",
    column_labels.border.bottom.color = "black",
    column_labels.border.bottom.width = px(2),
    row.striping.include_table_body = TRUE,
    heading.border.bottom.color = "black",
    heading.border.bottom.width = px(2),
    table_body.hlines.color = "gray",
    table_body.border.bottom.color = "black"
  )

tabla_size_gt
Tabla N° 3
Frecuencia y porcentaje: Landslide Size
i landslide_size ni hi
1 medium 6551 59.42
2 small 2767 25.10
3 unknown 851 7.72
4 large 750 6.80
5 very_large 102 0.93
6 catastrophic 3 0.03
SUMATORIA TOTAL 11024 100.00
Autor: Alessandro
# ========================================================================
ggplot(
  datos_nuevoartes %>% filter(!is.na(landslide_size)),
  aes(x = landslide_size)
) +
  geom_bar(fill = "skyblue", color = "black") +
  labs(
    title = "Grafica N° 5 Distribución de landslide_size",
    x = "Categorías de landslide_size",
    y = "Frecuencia"
  ) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# ========================================================================
tabla_size_hi <- datos_nuevoartes %>%
  filter(!is.na(landslide_size)) %>%
  group_by(landslide_size) %>%
  summarise(ni = n(), .groups = "drop") %>%
  mutate(hi = round((ni / sum(ni)) * 100, 2)) %>%
  arrange(desc(ni))
ggplot(tabla_size_hi, aes(x = landslide_size, y = hi)) +
  geom_col(fill = "steelblue", color = "black") +
  geom_text(aes(label = paste0(hi, "%")),
            vjust = -0.5,
            size = 4) +
  labs(
    title = "Grafica N°6  Porcentaje (hi) por categoría de Landslide Size",
    x = "Categoría",
    y = "hi (%)"
  ) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# ========================================================================

# ============================================================
# DIAGRAMA DE BARRAS GLOBAL (hi) – landslide_size
# ============================================================

# ============================
# TABLA hi (ya la tienes)
# ============================

tabla_size_hi <- datos_nuevoartes %>%
  filter(!is.na(landslide_size)) %>%
  group_by(landslide_size) %>%
  summarise(ni = n(), .groups = "drop") %>%
  mutate(hi = round((ni / sum(ni)) * 100, 2))

# ============================
# DIAGRAMA DE BARRAS GLOBAL (hi)
# ============================

# Guardamos posiciones de las barras
pos <- barplot(
  tabla_size_hi$hi,
  main = "Gráfica N°7: Distribución de frecuencia relativa (hi)\nLandslide Size",
  xlab = "Landslide Size",
  ylab = "Porcentaje (%)",
  ylim = c(0, 100),
  col = "skyblue",
  names.arg = FALSE
)

# Agregar texto inclinado 45°
text(
  x = pos,
  y = -3,
  labels = tabla_size_hi$landslide_size,
  srt = 45,
  adj = 1,
  xpd = TRUE,
  cex = 0.9
)