#TEMA: HISTOGRAMAS CULITATIVAS 
#AUTROR: JHOEL BUITRON 
# 1. Instalar y cargar librerías necesarias
install.packages("readxl")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.5'
## (as 'lib' is unspecified)
install.packages("ggplot2")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.5'
## (as 'lib' is unspecified)
install.packages("gt")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.5'
## (as 'lib' is unspecified)
install.packages("dplyr")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.5'
## (as 'lib' is unspecified)
# ===========================================
# CARGAR LIBRERÍAS


library(gt)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(readxl)
library(ggplot2)

# ===========================================
# CARGAR EL ARCHIVO EXCEL
# (usa el nombre exacto de tu archivo)
# ===========================================
datos_nuevoartes <- read_excel("datos_nuevoartes.xlsx")
# ============================================================
# TABLA 1: landslide_category
# ============================================================

tabla_category <- datos_nuevoartes %>%
  
  # 1. Eliminar NA
  filter(!is.na(landslide_category)) %>%
  
  # 2. Calcular frecuencias
  group_by(landslide_category) %>%
  summarise(ni = n(), .groups = "drop") %>%
  
  # 3. Ordenar desde el mayor ni hasta el menor
  arrange(desc(ni)) %>%
  
  # 4. Asignar número (i)
  mutate(i = row_number()) %>%
  
  # 5. Calcular porcentaje hi
  mutate(hi = round((ni / sum(ni)) * 100, 2)) %>%
  
  # 6. Convertir i a character para poder unir con "SUMATORIA"
  mutate(i = as.character(i)) %>%
  
  # 7. Reordenar columnas
  select(i, landslide_category, ni, hi)


# ======= AGREGAR FILA DE SUMATORIA =======

fila_total <- tibble(
  i = "SUMATORIA",
  landslide_category = "TOTAL",
  ni = sum(tabla_category$ni),
  hi = round(sum(tabla_category$hi), 0)
)

# Unir tabla + fila final sin errores
tabla_category_final <- bind_rows(tabla_category, fila_total)


# ======= FORMATO GT =======

tabla_category_gt <- tabla_category_final %>%
  gt() %>%
  tab_header(
    title = md("**Tabla N° 1**"),
    subtitle = md("**Tabla de equivalencias, frecuencia y porcentaje: Landslide Category**")
  ) %>%
  tab_source_note(
    source_note = md("Autor: Alessandro")
  ) %>%
  tab_options(
    table.border.top.color = "black",
    table.border.bottom.color = "black",
    table.border.top.style = "solid",
    table.border.bottom.style = "solid",
    column_labels.border.top.color = "black",
    column_labels.border.bottom.color = "black",
    column_labels.border.bottom.width = px(2),
    row.striping.include_table_body = TRUE,
    heading.border.bottom.color = "black",
    heading.border.bottom.width = px(2),
    table_body.hlines.color = "gray",
    table_body.border.bottom.color = "black"
  )

tabla_category_gt
Tabla N° 1
Tabla de equivalencias, frecuencia y porcentaje: Landslide Category
i landslide_category ni hi
1 landslide 7648 69.33
2 mudslide 2100 19.04
3 rock_fall 671 6.08
4 complex 232 2.10
5 debris_flow 194 1.76
6 other 68 0.62
7 unknown 38 0.34
8 riverbank_collapse 37 0.34
9 snow_avalanche 15 0.14
10 translational_slide 9 0.08
11 earth_flow 7 0.06
12 lahar 7 0.06
13 creep 5 0.05
14 topple 1 0.01
SUMATORIA TOTAL 11032 100.00
Autor: Alessandro
# ======================================================================
ggplot(
  datos_nuevoartes %>% filter(!is.na(landslide_category)),
  aes(x = landslide_category)
) +
  geom_bar(fill = "skyblue", color = "black") +
  labs(
    title = "Distribución de landslide_category ",
    x = "Categorías de landslide_category",
    y = "Frecuencia"
  ) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# ============================================================
# ======================================================================
tabla_category_hi <- datos_nuevoartes %>%
  filter(!is.na(landslide_category)) %>%
  group_by(landslide_category) %>%
  summarise(ni = n(), .groups = "drop") %>%
  mutate(hi = round((ni / sum(ni)) * 100, 2)) %>%
  arrange(desc(ni))
ggplot(tabla_category_hi, aes(x = landslide_category, y = hi)) +
  geom_col(fill = "steelblue", color = "black") +
  geom_text(aes(label = paste0(hi, "%")),
            vjust = -0.5,
            size = 4) +
  labs(
    title = "Porcentaje (hi) por categoría de Landslide Category",
    x = "Categoría",
    y = "hi (%)"
  ) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# ========================================================================
# TABLA 2: landslide_size
# ============================================================

tabla_size <- datos_nuevoartes %>%
  
  # 1. Eliminar NA
  filter(!is.na(landslide_size)) %>%
  
  # 2. Calcular frecuencias
  group_by(landslide_size) %>%
  summarise(ni = n(), .groups = "drop") %>%
  
  # 3. Ordenar de mayor a menor ni
  arrange(desc(ni)) %>%
  
  # 4. Asignar número i
  mutate(i = row_number()) %>%
  
  # 5. Calcular porcentaje hi
  mutate(hi = round((ni / sum(ni)) * 100, 2)) %>%
  
  # 6. Convertir i a texto
  mutate(i = as.character(i)) %>%
  
  # 7. Reordenar columnas
  select(i, landslide_size, ni, hi)

# ====== SUMATORIA ======
fila_total_size <- tibble(
  i = "SUMATORIA",
  landslide_size = "TOTAL",
  ni = sum(tabla_size$ni),
  hi = round(sum(tabla_size$hi), 0)
)

tabla_size_final <- bind_rows(tabla_size, fila_total_size)

# ====== FORMATO GT ======

tabla_size_gt <- tabla_size_final %>%
  gt() %>%
  tab_header(
    title = md("**Tabla N° 2**"),
    subtitle = md("**Frecuencia y porcentaje: Landslide Size**")
  ) %>%
  tab_source_note(
    source_note = md("Autor: Alessandro")
  ) %>%
  tab_options(
    table.border.top.color = "black",
    table.border.bottom.color = "black",
    table.border.top.style = "solid",
    table.border.bottom.style = "solid",
    column_labels.border.top.color = "black",
    column_labels.border.bottom.color = "black",
    column_labels.border.bottom.width = px(2),
    row.striping.include_table_body = TRUE,
    heading.border.bottom.color = "black",
    heading.border.bottom.width = px(2),
    table_body.hlines.color = "gray",
    table_body.border.bottom.color = "black"
  )

tabla_size_gt
Tabla N° 2
Frecuencia y porcentaje: Landslide Size
i landslide_size ni hi
1 medium 6551 59.42
2 small 2767 25.10
3 unknown 851 7.72
4 large 750 6.80
5 very_large 102 0.93
6 catastrophic 3 0.03
SUMATORIA TOTAL 11024 100.00
Autor: Alessandro
# ========================================================================
ggplot(
  datos_nuevoartes %>% filter(!is.na(landslide_size)),
  aes(x = landslide_size)
) +
  geom_bar(fill = "skyblue", color = "black") +
  labs(
    title = "Distribución de landslide_size ",
    x = "Categorías de landslide_size",
    y = "Frecuencia"
  ) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# ========================================================================
tabla_size_hi <- datos_nuevoartes %>%
  filter(!is.na(landslide_size)) %>%
  group_by(landslide_size) %>%
  summarise(ni = n(), .groups = "drop") %>%
  mutate(hi = round((ni / sum(ni)) * 100, 2)) %>%
  arrange(desc(ni))
ggplot(tabla_size_hi, aes(x = landslide_size, y = hi)) +
  geom_col(fill = "steelblue", color = "black") +
  geom_text(aes(label = paste0(hi, "%")),
            vjust = -0.5,
            size = 4) +
  labs(
    title = "Porcentaje (hi) por categoría de Landslide Size",
    x = "Categoría",
    y = "hi (%)"
  ) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# ========================================================================
# TABLA 3: landslide_trigger
# ============================================================

tabla_trigger <- datos_nuevoartes %>%
  # 1. Eliminar NA
  filter(!is.na(landslide_trigger)) %>%
  # 2. Calcular frecuencias
  group_by(landslide_trigger) %>%
  summarise(ni = n(), .groups = "drop") %>%
  # 3. Ordenar de mayor a menor ni
  arrange(desc(ni)) %>%
  # 4. Asignar número i
  mutate(i = row_number()) %>%
  # 5. Calcular porcentaje hi
  mutate(hi = round((ni / sum(ni)) * 100, 2)) %>%
  # 6. Convertir i a texto
  mutate(i = as.character(i)) %>%
  # 7. Reordenar columnas
  select(i, landslide_trigger, ni, hi)

# ====== SUMATORIA ======
fila_total_trigger <- tibble(
  i = "SUMATORIA",
  landslide_trigger = "TOTAL",
  ni = sum(tabla_trigger$ni),
  hi = round(sum(tabla_trigger$hi), 0)
)
tabla_trigger_final <- bind_rows(tabla_trigger, fila_total_trigger)
tabla_trigger_gt <- tabla_trigger_final %>%
  gt() %>%
  tab_header(
    title = md("**Tabla N° 3**"),
    subtitle = md("**Frecuencia y porcentaje: Landslide Trigger**")
  ) %>%
  tab_source_note(
    source_note = md("Autor: Alessandro")
  ) %>%
  tab_options(
    table.border.top.color = "black",
    table.border.bottom.color = "black",
    table.border.top.style = "solid",
    table.border.bottom.style = "solid",
    column_labels.border.top.color = "black",
    column_labels.border.bottom.color = "black",
    column_labels.border.bottom.width = px(2),
    row.striping.include_table_body = TRUE,
    heading.border.bottom.color = "black",
    heading.border.bottom.width = px(2),
    table_body.hlines.color = "gray",
    table_body.border.bottom.color = "black"
  )
tabla_trigger_gt
Tabla N° 3
Frecuencia y porcentaje: Landslide Trigger
i landslide_trigger ni hi
1 downpour 4680 42.51
2 rain 2592 23.54
3 unknown 1691 15.36
4 continuous_rain 748 6.79
5 tropical_cyclone 561 5.10
6 snowfall_snowmelt 135 1.23
7 monsoon 129 1.17
8 mining 93 0.84
9 earthquake 89 0.81
10 construction 82 0.74
11 flooding 75 0.68
12 no_apparent_trigger 44 0.40
13 freeze_thaw 41 0.37
14 other 26 0.24
15 dam_embankment_collapse 12 0.11
16 leaking_pipe 10 0.09
17 vibration 1 0.01
18 volcano 1 0.01
SUMATORIA TOTAL 11010 100.00
Autor: Alessandro
# ========================================================================

ggplot(
  datos_nuevoartes %>% filter(!is.na(landslide_trigger)),
  aes(x = landslide_trigger)
) +
  geom_bar(fill = "skyblue", color = "black") +
  labs(
    title = "Distribución de landslide_trigger ",
    x = "Categorías de landslide_trigger",
    y = "Frecuencia"
  ) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# ========================================================================
tabla_trigger_hi <- datos_nuevoartes %>%
  filter(!is.na(landslide_trigger)) %>%
  group_by(landslide_trigger) %>%
  summarise(ni = n(), .groups = "drop") %>%
  mutate(hi = round((ni / sum(ni)) * 100, 2)) %>%
  arrange(desc(ni))
ggplot(tabla_trigger_hi, aes(x = landslide_trigger, y = hi)) +
  geom_col(fill = "steelblue", color = "black") +
  geom_text(aes(label = paste0(hi, "%")),
            vjust = -0.5,
            size = 4) +
  labs(
    title = "Porcentaje (hi) por categoría de Landslide Trigger",
    x = "Categoría",
    y = "hi (%)"
  ) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))