# Carga de datos
setwd("C:/Users/lenovo/OneDrive/Escritorio/ESTADISTICA")
datos <- read.csv("china_water_pollution_data.csv")
# Instalar si falta
# install.packages("gt")
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.5.2
##
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(gt)
## Warning: package 'gt' was built under R version 4.5.2
# ========= 1) Tabla base ==========
TDF <- datos %>%
count(Monitoring_Station, name = "ni")
# ========= 2) Intervalos ==========
TDF$Intervalo <- cut(
TDF$ni,
breaks = c(0, 5, 10, 20, 50, 100, Inf),
labels = c("1–5 registros", "6–10", "11–20", "21–50", "51–100", "100+")
)
# ========= 3) Tabla por intervalos ==========
tabla_intervalos <- TDF %>%
group_by(Intervalo) %>%
summarise(
Num_Estaciones = n(),
ni = sum(ni),
.groups = "drop"
) %>%
mutate(`hi(%)` = round((Num_Estaciones / sum(Num_Estaciones)) * 100, 2))
# ========= 4) Convertir todo a character para evitar conflicto ==========
tabla_intervalos <- tabla_intervalos %>%
mutate(across(everything(), as.character))
# ========= 5) Crear total general con exactamente los mismos nombres ==========
total_general <- tibble(
Intervalo = "TOTAL GENERAL",
Num_Estaciones = sum(as.numeric(tabla_intervalos$Num_Estaciones)),
ni = sum(as.numeric(tabla_intervalos$ni)),
`hi(%)` = sum(as.numeric(tabla_intervalos$`hi(%)`))
) %>%
mutate(across(everything(), as.character)) # ⚠️ Muy importante
# ========= 6) Unir sin errores ==========
tabla_final <- bind_rows(tabla_intervalos, total_general)
# ========= 7) Tabla GT bonitamente formateada ==========
tabla_final_gt <- tabla_final %>%
gt() %>%
tab_header(
title = md("**Tabla N° 1**"),
subtitle = md("**Distribución de Frecuencias de las Estaciones de Monitoreo por Intervalos (2015–2023)**")
) %>%
tab_source_note(
source_note = md("Autor: Grupo 1")
) %>%
tab_options(
table.border.top.color = "black",
table.border.bottom.color = "black",
column_labels.border.bottom.color = "black",
row.striping.include_table_body = TRUE
) %>%
tab_style(
style = cell_text(weight = "bold"),
locations = cells_body(rows = Intervalo == "TOTAL GENERAL")
)
tabla_final_gt
| Tabla N° 1 |
| Distribución de Frecuencias de las Estaciones de Monitoreo por Intervalos (2015–2023) |
| Intervalo |
Num_Estaciones |
ni |
hi(%) |
| 1–5 registros |
1 |
5 |
0.56 |
| 6–10 |
17 |
158 |
9.44 |
| 11–20 |
136 |
2086 |
75.56 |
| 21–50 |
26 |
751 |
14.44 |
| TOTAL GENERAL |
180 |
3000 |
100 |
| Autor: Grupo 1 |