#TEMA: HISTOGRAMAS CULITATIVAS
#AUTROR: JHOEL BUITRON
# 1. Instalar y cargar librerías necesarias
install.packages("readxl")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.5'
## (as 'lib' is unspecified)
install.packages("ggplot2")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.5'
## (as 'lib' is unspecified)
install.packages("gt")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.5'
## (as 'lib' is unspecified)
install.packages("dplyr")
## Installing package into '/cloud/lib/x86_64-pc-linux-gnu-library/4.5'
## (as 'lib' is unspecified)
# ===========================================
# CARGAR LIBRERÍAS
library(gt)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(readxl)
library(ggplot2)
# ===========================================
# CARGAR EL ARCHIVO EXCEL
# (usa el nombre exacto de tu archivo)
# ===========================================
datos_nuevoartes <- read_excel("datos_nuevoartes.xlsx")
# ============================================================
# TABLA 1: landslide_category
# ============================================================
tabla_category <- datos_nuevoartes %>%
# 1. Eliminar NA
filter(!is.na(landslide_category)) %>%
# 2. Calcular frecuencias
group_by(landslide_category) %>%
summarise(ni = n(), .groups = "drop") %>%
# 3. Ordenar desde el mayor ni hasta el menor
arrange(desc(ni)) %>%
# 4. Asignar número (i)
mutate(i = row_number()) %>%
# 5. Calcular porcentaje hi
mutate(hi = round((ni / sum(ni)) * 100, 2)) %>%
# 6. Convertir i a character para poder unir con "SUMATORIA"
mutate(i = as.character(i)) %>%
# 7. Reordenar columnas
select(i, landslide_category, ni, hi)
# ======= AGREGAR FILA DE SUMATORIA =======
fila_total <- tibble(
i = "SUMATORIA",
landslide_category = "TOTAL",
ni = sum(tabla_category$ni),
hi = round(sum(tabla_category$hi), 0)
)
# Unir tabla + fila final sin errores
tabla_category_final <- bind_rows(tabla_category, fila_total)
# ======= FORMATO GT =======
tabla_category_gt <- tabla_category_final %>%
gt() %>%
tab_header(
title = md("**Tabla N° 1**"),
subtitle = md("**Tabla de equivalencias, frecuencia y porcentaje: Landslide Category**")
) %>%
tab_source_note(
source_note = md("Autor: Alessandro")
) %>%
tab_options(
table.border.top.color = "black",
table.border.bottom.color = "black",
table.border.top.style = "solid",
table.border.bottom.style = "solid",
column_labels.border.top.color = "black",
column_labels.border.bottom.color = "black",
column_labels.border.bottom.width = px(2),
row.striping.include_table_body = TRUE,
heading.border.bottom.color = "black",
heading.border.bottom.width = px(2),
table_body.hlines.color = "gray",
table_body.border.bottom.color = "black"
)
tabla_category_gt
| Tabla N° 1 |
| Tabla de equivalencias, frecuencia y porcentaje: Landslide Category |
| i |
landslide_category |
ni |
hi |
| 1 |
landslide |
7648 |
69.33 |
| 2 |
mudslide |
2100 |
19.04 |
| 3 |
rock_fall |
671 |
6.08 |
| 4 |
complex |
232 |
2.10 |
| 5 |
debris_flow |
194 |
1.76 |
| 6 |
other |
68 |
0.62 |
| 7 |
unknown |
38 |
0.34 |
| 8 |
riverbank_collapse |
37 |
0.34 |
| 9 |
snow_avalanche |
15 |
0.14 |
| 10 |
translational_slide |
9 |
0.08 |
| 11 |
earth_flow |
7 |
0.06 |
| 12 |
lahar |
7 |
0.06 |
| 13 |
creep |
5 |
0.05 |
| 14 |
topple |
1 |
0.01 |
| SUMATORIA |
TOTAL |
11032 |
100.00 |
| Autor: Alessandro |
# ======================================================================
ggplot(
datos_nuevoartes %>% filter(!is.na(landslide_category)),
aes(x = landslide_category)
) +
geom_bar(fill = "skyblue", color = "black") +
labs(
title = "Distribución de landslide_category ",
x = "Categorías de landslide_category",
y = "Frecuencia"
) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))

# ============================================================
# ======================================================================
tabla_category_hi <- datos_nuevoartes %>%
filter(!is.na(landslide_category)) %>%
group_by(landslide_category) %>%
summarise(ni = n(), .groups = "drop") %>%
mutate(hi = round((ni / sum(ni)) * 100, 2)) %>%
arrange(desc(ni))
ggplot(tabla_category_hi, aes(x = landslide_category, y = hi)) +
geom_col(fill = "steelblue", color = "black") +
geom_text(aes(label = paste0(hi, "%")),
vjust = -0.5,
size = 4) +
labs(
title = "Porcentaje (hi) por categoría de Landslide Category",
x = "Categoría",
y = "hi (%)"
) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))

# ========================================================================
# TABLA 2: landslide_size
# ============================================================
tabla_size <- datos_nuevoartes %>%
# 1. Eliminar NA
filter(!is.na(landslide_size)) %>%
# 2. Calcular frecuencias
group_by(landslide_size) %>%
summarise(ni = n(), .groups = "drop") %>%
# 3. Ordenar de mayor a menor ni
arrange(desc(ni)) %>%
# 4. Asignar número i
mutate(i = row_number()) %>%
# 5. Calcular porcentaje hi
mutate(hi = round((ni / sum(ni)) * 100, 2)) %>%
# 6. Convertir i a texto
mutate(i = as.character(i)) %>%
# 7. Reordenar columnas
select(i, landslide_size, ni, hi)
# ====== SUMATORIA ======
fila_total_size <- tibble(
i = "SUMATORIA",
landslide_size = "TOTAL",
ni = sum(tabla_size$ni),
hi = round(sum(tabla_size$hi), 0)
)
tabla_size_final <- bind_rows(tabla_size, fila_total_size)
# ====== FORMATO GT ======
tabla_size_gt <- tabla_size_final %>%
gt() %>%
tab_header(
title = md("**Tabla N° 2**"),
subtitle = md("**Frecuencia y porcentaje: Landslide Size**")
) %>%
tab_source_note(
source_note = md("Autor: Alessandro")
) %>%
tab_options(
table.border.top.color = "black",
table.border.bottom.color = "black",
table.border.top.style = "solid",
table.border.bottom.style = "solid",
column_labels.border.top.color = "black",
column_labels.border.bottom.color = "black",
column_labels.border.bottom.width = px(2),
row.striping.include_table_body = TRUE,
heading.border.bottom.color = "black",
heading.border.bottom.width = px(2),
table_body.hlines.color = "gray",
table_body.border.bottom.color = "black"
)
tabla_size_gt
| Tabla N° 2 |
| Frecuencia y porcentaje: Landslide Size |
| i |
landslide_size |
ni |
hi |
| 1 |
medium |
6551 |
59.42 |
| 2 |
small |
2767 |
25.10 |
| 3 |
unknown |
851 |
7.72 |
| 4 |
large |
750 |
6.80 |
| 5 |
very_large |
102 |
0.93 |
| 6 |
catastrophic |
3 |
0.03 |
| SUMATORIA |
TOTAL |
11024 |
100.00 |
| Autor: Alessandro |
# ========================================================================
ggplot(
datos_nuevoartes %>% filter(!is.na(landslide_size)),
aes(x = landslide_size)
) +
geom_bar(fill = "skyblue", color = "black") +
labs(
title = "Distribución de landslide_size ",
x = "Categorías de landslide_size",
y = "Frecuencia"
) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))

# ========================================================================
tabla_size_hi <- datos_nuevoartes %>%
filter(!is.na(landslide_size)) %>%
group_by(landslide_size) %>%
summarise(ni = n(), .groups = "drop") %>%
mutate(hi = round((ni / sum(ni)) * 100, 2)) %>%
arrange(desc(ni))
ggplot(tabla_size_hi, aes(x = landslide_size, y = hi)) +
geom_col(fill = "steelblue", color = "black") +
geom_text(aes(label = paste0(hi, "%")),
vjust = -0.5,
size = 4) +
labs(
title = "Porcentaje (hi) por categoría de Landslide Size",
x = "Categoría",
y = "hi (%)"
) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))

# ========================================================================
# TABLA 3: landslide_trigger
# ============================================================
tabla_trigger <- datos_nuevoartes %>%
# 1. Eliminar NA
filter(!is.na(landslide_trigger)) %>%
# 2. Calcular frecuencias
group_by(landslide_trigger) %>%
summarise(ni = n(), .groups = "drop") %>%
# 3. Ordenar de mayor a menor ni
arrange(desc(ni)) %>%
# 4. Asignar número i
mutate(i = row_number()) %>%
# 5. Calcular porcentaje hi
mutate(hi = round((ni / sum(ni)) * 100, 2)) %>%
# 6. Convertir i a texto
mutate(i = as.character(i)) %>%
# 7. Reordenar columnas
select(i, landslide_trigger, ni, hi)
# ====== SUMATORIA ======
fila_total_trigger <- tibble(
i = "SUMATORIA",
landslide_trigger = "TOTAL",
ni = sum(tabla_trigger$ni),
hi = round(sum(tabla_trigger$hi), 0)
)
tabla_trigger_final <- bind_rows(tabla_trigger, fila_total_trigger)
tabla_trigger_gt <- tabla_trigger_final %>%
gt() %>%
tab_header(
title = md("**Tabla N° 3**"),
subtitle = md("**Frecuencia y porcentaje: Landslide Trigger**")
) %>%
tab_source_note(
source_note = md("Autor: Alessandro")
) %>%
tab_options(
table.border.top.color = "black",
table.border.bottom.color = "black",
table.border.top.style = "solid",
table.border.bottom.style = "solid",
column_labels.border.top.color = "black",
column_labels.border.bottom.color = "black",
column_labels.border.bottom.width = px(2),
row.striping.include_table_body = TRUE,
heading.border.bottom.color = "black",
heading.border.bottom.width = px(2),
table_body.hlines.color = "gray",
table_body.border.bottom.color = "black"
)
tabla_trigger_gt
| Tabla N° 3 |
| Frecuencia y porcentaje: Landslide Trigger |
| i |
landslide_trigger |
ni |
hi |
| 1 |
downpour |
4680 |
42.51 |
| 2 |
rain |
2592 |
23.54 |
| 3 |
unknown |
1691 |
15.36 |
| 4 |
continuous_rain |
748 |
6.79 |
| 5 |
tropical_cyclone |
561 |
5.10 |
| 6 |
snowfall_snowmelt |
135 |
1.23 |
| 7 |
monsoon |
129 |
1.17 |
| 8 |
mining |
93 |
0.84 |
| 9 |
earthquake |
89 |
0.81 |
| 10 |
construction |
82 |
0.74 |
| 11 |
flooding |
75 |
0.68 |
| 12 |
no_apparent_trigger |
44 |
0.40 |
| 13 |
freeze_thaw |
41 |
0.37 |
| 14 |
other |
26 |
0.24 |
| 15 |
dam_embankment_collapse |
12 |
0.11 |
| 16 |
leaking_pipe |
10 |
0.09 |
| 17 |
vibration |
1 |
0.01 |
| 18 |
volcano |
1 |
0.01 |
| SUMATORIA |
TOTAL |
11010 |
100.00 |
| Autor: Alessandro |
# ========================================================================
ggplot(
datos_nuevoartes %>% filter(!is.na(landslide_trigger)),
aes(x = landslide_trigger)
) +
geom_bar(fill = "skyblue", color = "black") +
labs(
title = "Distribución de landslide_trigger ",
x = "Categorías de landslide_trigger",
y = "Frecuencia"
) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))

# ========================================================================
tabla_trigger_hi <- datos_nuevoartes %>%
filter(!is.na(landslide_trigger)) %>%
group_by(landslide_trigger) %>%
summarise(ni = n(), .groups = "drop") %>%
mutate(hi = round((ni / sum(ni)) * 100, 2)) %>%
arrange(desc(ni))
ggplot(tabla_trigger_hi, aes(x = landslide_trigger, y = hi)) +
geom_col(fill = "steelblue", color = "black") +
geom_text(aes(label = paste0(hi, "%")),
vjust = -0.5,
size = 4) +
labs(
title = "Porcentaje (hi) por categoría de Landslide Trigger",
x = "Categoría",
y = "hi (%)"
) +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1))
