FECHA: 22/11/2025
# ====================================================================
# ESTADÍSTICA DESCRIPTIVA: ANÁLISIS DE CO (µg/m³) EN INDIA (2015-2020)
# Adaptado por: Ariel Chiluisa
# Fecha: 10/12/2025
# ====================================================================
library(gt)
library(dplyr)
# ============================================
# 1. CARGA Y LIMPIEZA DE DATOS (CO)
# ============================================
# Asegúrate de que 'Datos Cambiados.csv' esté en el directorio de trabajo o usa la ruta absoluta.
datos <- read.csv("C:/Users/JOSELYN/Desktop/kangle/Datos Cambiados.csv", header = TRUE, sep = ",") # Usando el nombre del archivo si ya está cargado
CO_raw <- datos$CO[datos$CO != "-"]
CO_raw <- as.numeric(as.character(CO_raw))
CO_clean <- CO_raw[!is.na(CO_raw)]
CO <- CO_clean # Variable limpia principal
# ====================================================================
# PARTE A: TABLA Y GRÁFICOS BASADOS EN REGLA DE STURGES (Tabla Nro. 1)
# ====================================================================
# --- CÁLCULOS DE STURGES ---
n <- length(CO)
min_CO <- min(CO)
max_CO <- max(CO)
R <- max_CO - min_CO
k_sturges <- 1 + 3.322 * log10(n)
k <- round(k_sturges)
A <- R / k
Li <- seq(from = min_CO, by = A, length.out = k)
Ls <- Li + A
Ls[k] <- max_CO
breaks_intervals <- c(Li[1], Ls)
MC <- (Li + Ls) / 2
CO_calc <- round(CO, 4)
Li_R <- round(Li, 4)
Ls_R <- round(Ls, 4)
MC_R <- round(MC, 4)
# --- CÁLCULO DE FRECUENCIAS ---
ni <- numeric(k)
for (i in 1:k) {
if (i < k) {
ni[i] <- sum(CO_calc >= Li_R[i] & CO_calc < Ls_R[i])
} else {
ni[i] <- sum(CO_calc >= Li_R[i] & CO_calc <= Ls_R[i])
}
}
# --- CÁLCULOS COMPLEMENTARIOS ---
hi <- (ni / sum(ni)) * 100
Ni_asc <- cumsum(ni)
Hi_asc <- cumsum(hi)
Ni_desc <- rev(cumsum(rev(ni)))
Hi_desc <- rev(cumsum(rev(hi)))
# --- CONSTRUCCIÓN DE LA TABLA NRO. 1 ---
Intervalo_txt <- paste0("[", Li_R, " - ", Ls_R, ")")
Intervalo_txt[k] <- paste0("[", Li_R[k], " - ", Ls_R[k], "]")
TDF_CO <- data.frame(
Intervalo = Intervalo_txt, MC = MC_R, ni = ni, hi = round(hi, 2),
Ni_ascendente = Ni_asc, Ni_descendente = Ni_desc, Hi_ascendente = round(Hi_asc, 2), Hi_descendente = round(Hi_desc, 2)
)
colnames(TDF_CO) <- c("Intervalo", "MC", "ni", "hi", "Ni_ascendente", "Ni_descendente", "Hi_ascendente", "Hi_descendente")
TDF_CO_Sturges <- TDF_CO # Guardamos TDF sin totales para gráficos
totales <- data.frame(
Intervalo = "Totales", MC = "-", ni = sum(ni), hi = sum(hi),
Ni_ascendente = "-", Ni_descendente = "-", Hi_ascendente = "-", Hi_descendente = "-"
)
TDF_Final <- rbind(TDF_CO, totales)
# MOSTRAR TABLA NRO. 1 (NO SE MODIFICAN ESTILOS)
TDF_Final %>%
gt() %>%
tab_header(
title = md("**Tabla Nro. 1**"),
subtitle = md(paste0("*Distribución de frecuencia de CO (k=", k, " según Sturges)*"))
) %>%
tab_source_note(
source_note = md("Fuente: Elaboración propia a partir de Datos Cambiados.csv")
) %>%
tab_style(style = cell_borders(sides = "left", color = "black", weight = px(2), style = "solid"), locations = cells_body()) %>%
tab_style(style = cell_borders(sides = "right", color = "black", weight = px(2), style = "solid"), locations = cells_body()) %>%
tab_style(style = cell_borders(sides = "left", color = "black", weight = px(2), style = "solid"), locations = cells_column_labels()) %>%
tab_style(style = cell_borders(sides = "right", color = "black", weight = px(2), style = "solid"), locations = cells_column_labels()) %>%
tab_options(
table.border.top.color = "black", table.border.bottom.color = "black", table.border.top.style = "solid", table.border.bottom.style = "solid",
column_labels.border.top.color = "black", column_labels.border.bottom.color = "black", column_labels.border.bottom.width = px(2),
row.striping.include_table_body = TRUE, heading.border.bottom.color = "black", heading.border.bottom.width = px(2),
table_body.hlines.color = "gray", table_body.border.bottom.color = "black"
)
| Tabla Nro. 1 |
| Distribución de frecuencia de CO (k=16 según Sturges) |
| Intervalo |
MC |
ni |
hi |
Ni_ascendente |
Ni_descendente |
Hi_ascendente |
Hi_descendente |
| [0 - 10.9881) |
5.4941 |
26414 |
96.15 |
26414 |
27472 |
96.15 |
100 |
| [10.9881 - 21.9763) |
16.4822 |
485 |
1.77 |
26899 |
1058 |
97.91 |
3.85 |
| [21.9763 - 32.9644) |
27.4703 |
270 |
0.98 |
27169 |
573 |
98.9 |
2.09 |
| [32.9644 - 43.9525) |
38.4584 |
134 |
0.49 |
27303 |
303 |
99.38 |
1.1 |
| [43.9525 - 54.9406) |
49.4466 |
68 |
0.25 |
27371 |
169 |
99.63 |
0.62 |
| [54.9406 - 65.9287) |
60.4347 |
37 |
0.13 |
27408 |
101 |
99.77 |
0.37 |
| [65.9288 - 76.9169) |
71.4228 |
17 |
0.06 |
27425 |
64 |
99.83 |
0.23 |
| [76.9169 - 87.905) |
82.4109 |
15 |
0.05 |
27440 |
47 |
99.88 |
0.17 |
| [87.905 - 98.8931) |
93.3991 |
13 |
0.05 |
27453 |
32 |
99.93 |
0.12 |
| [98.8931 - 109.8812) |
104.3872 |
6 |
0.02 |
27459 |
19 |
99.95 |
0.07 |
| [109.8812 - 120.8694) |
115.3753 |
7 |
0.03 |
27466 |
13 |
99.98 |
0.05 |
| [120.8694 - 131.8575) |
126.3634 |
1 |
0.00 |
27467 |
6 |
99.98 |
0.02 |
| [131.8575 - 142.8456) |
137.3516 |
3 |
0.01 |
27470 |
5 |
99.99 |
0.02 |
| [142.8456 - 153.8338) |
148.3397 |
1 |
0.00 |
27471 |
2 |
100 |
0.01 |
| [153.8338 - 164.8219) |
159.3278 |
0 |
0.00 |
27471 |
1 |
100 |
0 |
| [164.8219 - 175.81] |
170.3159 |
1 |
0.00 |
27472 |
1 |
100 |
0 |
| Totales |
- |
27472 |
100.00 |
- |
- |
- |
- |
| Fuente: Elaboración propia a partir de Datos Cambiados.csv |
# ====================================================================
# PARTE B: TABLA Y GRÁFICOS BASADOS EN K=12 (Tabla Nro. 2)
# ====================================================================
# --- CÁLCULOS DE K=12 ---
k <- 12 # K FIJO
A <- R / k
Lis <- seq(from = min_CO, to = max_CO - A, by = A)
Lss <- c(seq(from = min_CO + A, to = max_CO - A, by = A), max_CO)
MCs <- (Lis + Lss) / 2
CO_calc <- round(CO, 3)
Lis_calc <- round(Lis, 3)
Lss_calc <- round(Lss, 3)
# --- CÁLCULO DE FRECUENCIAS ---
ni <- numeric(length(Lis))
for (i in 1:length(Lis)) {
if (i < length(Lis)) {
ni[i] <- sum(CO_calc >= Lis_calc[i] & CO_calc < Lss_calc[i])
} else {
ni[i] <- sum(CO_calc >= Lis_calc[i] & CO_calc <= Lss_calc[i])
}
}
# --- AJUSTE FORZADO (hi y Hi) ---
hi <- (ni / sum(ni)) * 100
hi <- round(hi, 3)
ajuste_hi <- 100 - sum(hi)
hi[length(hi)] <- hi[length(hi)] + ajuste_hi
hi <- round(hi, 3)
Hi_asc <- cumsum(hi)
Hi_desc <- rev(cumsum(rev(hi)))
Hi_asc[length(Hi_asc)] <- 100
Hi_desc[1] <- 100
Ni_asc <- cumsum(ni)
Ni_desc <- rev(cumsum(rev(ni)))
# --- CONSTRUCCIÓN DE LA TABLA NRO. 2 ---
Intervalo_txt <- paste0("[", round(Lis, 2), " - ", round(Lss, 2), ")")
Intervalo_txt[k] <- paste0("[", round(Lis[k], 2), " - ", round(Lss[k], 2), "]")
TDF_CO_Simplificada <- data.frame(
Intervalo = Intervalo_txt, MC = round(MCs, 3), ni = ni, hi = hi,
Ni_ascendente = Ni_asc, Hi_ascendente = Hi_asc, Ni_descendente = Ni_desc, Hi_descendente = Hi_desc
)
colnames(TDF_CO_Simplificada) <- c("Intervalo", "MC", "ni", "hi(%)", "Ni_asc", "Hi_asc (%)", "Ni_desc", "Hi_desc (%)")
totales <- data.frame(
Intervalo = "Totales", MC = "-", ni = sum(ni), hi. = 100, Ni_asc = "-", Hi_asc. = "-", Ni_desc = "-", Hi_desc. = "-"
)
colnames(totales) <- colnames(TDF_CO_Simplificada)
TDF_Final_Simplificada <- rbind(TDF_CO_Simplificada, totales)
# MOSTRAR TABLA NRO. 2 (NO SE MODIFICAN ESTILOS)
TDF_Final_Simplificada %>%
gt() %>%
tab_header(
title = md("**Tabla Nro. 2**"),
subtitle = md(paste0("*Distribución de frecuencia simplificada de CO (k=", k, ")*"))
) %>%
tab_source_note(
source_note = md("Fuente: Elaboración propia a partir de Datos Cambiados.csv")
) %>%
tab_style(style = cell_borders(sides = "left", color = "black", weight = px(2), style = "solid"), locations = cells_body()) %>%
tab_style(style = cell_borders(sides = "right", color = "black", weight = px(2), style = "solid"), locations = cells_body()) %>%
tab_style(style = cell_borders(sides = "left", color = "black", weight = px(2), style = "solid"), locations = cells_column_labels()) %>%
tab_style(style = cell_borders(sides = "right", color = "black", weight = px(2), style = "solid"), locations = cells_column_labels()) %>%
tab_options(
table.border.top.color = "black", table.border.bottom.color = "black", table.border.top.style = "solid", table.border.bottom.style = "solid",
column_labels.border.top.color = "black", column_labels.border.bottom.color = "black", column_labels.border.bottom.width = px(2),
row.striping.include_table_body = TRUE, heading.border.bottom.color = "black", heading.border.bottom.width = px(2),
table_body.hlines.color = "gray", table_body.border.bottom.color = "black"
)
| Tabla Nro. 2 |
| Distribución de frecuencia simplificada de CO (k=12) |
| Intervalo |
MC |
ni |
hi(%) |
Ni_asc |
Hi_asc (%) |
Ni_desc |
Hi_desc (%) |
| [0 - 14.65) |
7.325 |
26628 |
96.928 |
26628 |
96.928 |
27472 |
100 |
| [14.65 - 29.3) |
21.976 |
465 |
1.693 |
27093 |
98.621 |
844 |
3.072 |
| [29.3 - 43.95) |
36.627 |
210 |
0.764 |
27303 |
99.385 |
379 |
1.379 |
| [43.95 - 58.6) |
51.278 |
81 |
0.295 |
27384 |
99.68 |
169 |
0.615 |
| [58.6 - 73.25) |
65.929 |
38 |
0.138 |
27422 |
99.818 |
88 |
0.32 |
| [73.25 - 87.9) |
80.58 |
18 |
0.066 |
27440 |
99.884 |
50 |
0.182 |
| [87.9 - 102.56) |
95.23 |
15 |
0.055 |
27455 |
99.939 |
32 |
0.116 |
| [102.56 - 117.21) |
109.881 |
7 |
0.025 |
27462 |
99.964 |
17 |
0.061 |
| [117.21 - 131.86) |
124.532 |
5 |
0.018 |
27467 |
99.982 |
10 |
0.036 |
| [131.86 - 146.51) |
139.183 |
4 |
0.015 |
27471 |
99.997 |
5 |
0.018 |
| [146.51 - 161.16) |
153.834 |
0 |
0.000 |
27471 |
99.997 |
1 |
0.003 |
| [161.16 - 175.81] |
168.485 |
1 |
0.003 |
27472 |
100 |
1 |
0.003 |
| Totales |
- |
27472 |
100.000 |
- |
- |
- |
- |
| Fuente: Elaboración propia a partir de Datos Cambiados.csv |
# --------------------------------------------------------------------
# GRÁFICA N°1: HISTOGRAMA DE CO (Frecuencia Absoluta - Sturges)
# --------------------------------------------------------------------
histo_CO_Sturges <- hist(CO_clean, breaks = breaks_intervals, plot = FALSE)
hist(CO_clean, breaks = breaks_intervals,
main = "Gráfica N°1: Histograma de Frecuencia Absoluta (Sturges)",
xlab = "Concentración de CO (µg/m³)",
ylab = "Frecuencia (ni)",
ylim = c(0, max(TDF_CO_Sturges$ni) * 1.1),
col = "lightskyblue", cex.main = 0.9, cex.lab = 1, cex.axis = 0.9, xaxt = "n", border = "black")
axis(1, at = histo_CO_Sturges$breaks,
labels = round(histo_CO_Sturges$breaks, 0), # AJUSTE A ENTERO
las = 2, cex.axis = 0.8)

# --------------------------------------------------------------------
# GRÁFICA N°2: HISTOGRAMA DE FRECUENCIA ABSOLUTA (Sturges — k=16)
# AJUSTADO A EJE Y = 0 A 27472
# --------------------------------------------------------------------
# Los breaks ya están calculados como "breaks_intervals" en tu código
histo_CO_Sturges <- hist(CO_clean, breaks = breaks_intervals, plot = FALSE)
hist(CO_clean, breaks = breaks_intervals,
main = "Gráfica N°2: Histograma de Frecuencia Absoluta (Sturges)",
xlab = "Concentración de CO (µg/m³)",
ylab = "Frecuencia (ni)",
ylim = c(0, 27472), # <<–– AJUSTE SOLICITADO
col = "lightskyblue",
cex.main = 0.9, cex.lab = 1, cex.axis = 0.9,
xaxt = "n", border = "black")
axis(1, at = histo_CO_Sturges$breaks,
labels = round(histo_CO_Sturges$breaks, 0),
las = 2, cex.axis = 0.8)

# --------------------------------------------------------------------
# GRÁFICA N°3: HISTOGRAMA DE CO (Frecuencia Absoluta - k=12)
# --------------------------------------------------------------------
breaks_simplificado <- c(Lis[1], Lss)
histo_CO_simplificado <- hist(CO, breaks = breaks_simplificado, plot = FALSE)
hist(CO, breaks = breaks_simplificado,
main = "Gráfica N°3: Histograma de Frecuencia Absoluta (k=12)",
xlab = "CO (µg/m³)",
ylab = "Frecuencia (ni)",
ylim = c(0, max(ni) * 1.1),
col = "lightskyblue", cex.main = 0.9, cex.lab = 1, cex.axis = 0.9, xaxt = "n", border = "black")
axis(1, at = histo_CO_simplificado$breaks,
labels = round(histo_CO_simplificado$breaks, 0), # AJUSTE A ENTERO
las = 2, cex.axis = 0.8)

# --------------------------------------------------------------------
# GRÁFICA N°4: HISTOGRAMA DE CO (Frecuencia Absoluta - k=12)
# AJUSTADO A EJE Y DE 0 A 27472
# --------------------------------------------------------------------
breaks_simplificado <- c(Lis[1], Lss)
histo_CO_simplificado <- hist(CO, breaks = breaks_simplificado, plot = FALSE)
hist(CO, breaks = breaks_simplificado,
main = "Gráfica N°4: Histograma de Frecuencia Absoluta (k=12)",
xlab = "CO (µg/m³)",
ylab = "Frecuencia (ni)",
ylim = c(0, 27472), # <<–– AJUSTE SOLICITADO
col = "lightskyblue",
cex.main = 0.9, cex.lab = 1, cex.axis = 0.9,
xaxt = "n", border = "black")
axis(1, at = histo_CO_simplificado$breaks,
labels = round(histo_CO_simplificado$breaks, 0),
las = 2, cex.axis = 0.8)

# --------------------------------------------------------------------
# GRÁFICA N°5: BOXPLOT (Diagrama de Caja)
# --------------------------------------------------------------------
boxplot(CO, horizontal = TRUE, col = "lightgreen", border = "black",
main = "Gráfica N°5: Diagrama de Caja de la Concentración de CO",
xlab = "CO (µg/m³)")

# --------------------------------------------------------------------
# GRÁFICA N°6: OJIVA (Frecuencia Absoluta - k=12)
# --------------------------------------------------------------------
# Usamos variables de k=12: Lis, Lss, Ni_asc, Ni_desc.
X_coordenadas <- c(Lis[1], Lss)
Y_asc <- c(0, Ni_asc)
Y_desc <- c(Ni_desc, 0)
plot(X_coordenadas, Y_asc, type = "b",
main = "Gráfica N°6: Ojiva Ascendente y Descendente",
xlab = "CO (µg/m³)", ylab = "Frecuencia Absoluta Acumulada (Ni)",
pch = 19, col = "darkblue", ylim = c(0, max(Ni_asc)), xaxt = "n")
lines(X_coordenadas, Y_desc, type = "b", col = "red", pch = 19)
axis(1, at = X_coordenadas,
labels = round(X_coordenadas, 0), # AJUSTE A ENTERO
las = 2, cex.axis = 0.8)
legend("topright", legend = c("Ojiva Ascendente (Ni)", "Ojiva Descendente (Ni)"),
col = c("darkblue", "red"), lty = 1, pch = 19, cex = 0.8)

# --------------------------------------------------------------------
# GRÁFICA N°7: OJIVA (Frecuencia Relativa Porcentual - k=12)
# --------------------------------------------------------------------
# Usamos variables de k=12: Lis, Lss, Hi_asc, Hi_desc.
Y_asc_pct <- c(0, Hi_asc)
Y_asc_pct[length(Y_asc_pct)] <- 100 # Forzar a 100
Y_desc_pct <- c(Hi_desc, 0)
Y_desc_pct[1] <- 100 # Forzar a 100
plot(X_coordenadas, Y_asc_pct, type = "b",
main = "Gráfica N°7: Ojiva Ascendente y Descendente (Porcentaje)",
xlab = "CO (µg/m³)", ylab = "Frecuencia Relativa Acumulada (%)",
pch = 19, col = "darkgreen", ylim = c(0, 100), xaxt = "n")
lines(X_coordenadas, Y_desc_pct, type = "b", col = "red", pch = 19)
axis(1, at = X_coordenadas,
labels = round(X_coordenadas, 0), # AJUSTE A ENTERO
las = 2, cex.axis = 0.8)
legend("topright", legend = c("Ojiva Ascendente (Hi%)", "Ojiva Descendente (Hi%)"),
col = c("darkgreen", "red"), lty = 1, pch = 19, cex = 0.8)
