FECHA: 22/11/2025
# ==============================================================================
# 1. CARGA DE PAQUETES Y DATOS
# ==============================================================================
# Cargar librerías necesarias
# Instala si no las tienes: install.packages(c("gt", "dplyr"))
library(gt)
library(dplyr)
# Carga de datos
# El archivo ya está cargado en el entorno
datos <- read.csv("C:/Users/JOSELYN/Desktop/kangle/Datos Cambiados.csv",
header = TRUE,
sep = ",",
dec = ".")
# ==============================================================================
# 2. PREPARACIÓN Y LIMPIEZA DE DATOS DE DIÓXIDO DE NITRÓGENO (NO2)
# ==============================================================================
# Extracción de los "-" (valores inexistentes) de la variable NO2
no2 <- datos$NO2[datos$NO2 != "-"]
# Tamaño muestral después de la limpieza
cat("Tamaño muestral del Dióxido de Nitrógeno (sin '-'):", length(no2), "\n")
## Tamaño muestral del Dióxido de Nitrógeno (sin '-'): 25946
# Conversión a numérico
no2 <- as.numeric(no2)
# ==============================================================================
# 3. CÁLCULOS PARA LA DISTRIBUCIÓN DE FRECUENCIAS DETALLADA (34 CLASES)
# ==============================================================================
N <- length(no2)
min_no2 <- min(no2)
max_no2 <- max(no2)
R <- max_no2 - min_no2
# Número de clases (k): Se mantienen 34 clases para replicar la estructura original
k_detallado <- 34
# Amplitud de clase (A)
A <- R / k_detallado
# Generación de límites de intervalos
Li <- seq(from = min_no2, to = max_no2 - A, by = A)
Ls <- c(seq(from = min_no2 + A, to = max_no2 - A, by = A), max_no2)
# Redondeo para cálculos de intervalos precisos
no2 <- round(no2, 3)
Li <- round(Li, 3)
Ls <- round(Ls, 3)
# Marcas de Clase (MC)
MC <- (Li + Ls) / 2
# Creación de frecuencias absolutas (ni)
ni <- numeric(length(Li))
for (i in 1:length(Li)) {
if (i < length(Li)) {
# Intervalo abierto por la derecha: [Li, Ls)
ni[i] <- sum(no2 >= Li[i] & no2 < Ls[i])
} else {
# Último intervalo cerrado: [Li, Ls]
ni[i] <- sum(no2 >= Li[i] & no2 <= Ls[i])
}
}
# Frecuencias relativas y acumuladas
hi <- (ni / N) * 100
Ni_asc <- cumsum(ni)
Ni_desc <- rev(cumsum(rev(ni)))
Hi_asc <- cumsum(hi)
Hi_desc <- rev(cumsum(rev(hi)))
# Formatear la columna Intervalo
Intervalo <- paste0("[", round(Li, 2), " - ", round(Ls, 2), ")")
# Corregir el último intervalo para que sea cerrado
Intervalo[length(Intervalo)] <- paste0("[", round(Li[length(Li)], 2), " - ",
round(Ls[length(Ls)], 2), "]")
# Crear el Data Frame (TDF)
TDF_no2 <- data.frame(
Intervalo = Intervalo,
MC = round(MC, 2),
ni = ni,
hi = round(hi, 2),
Ni_ascendente = Ni_asc,
Ni_descendente = Ni_desc,
Hi_ascendente = round(Hi_asc, 2),
Hi_descendente = round(Hi_desc, 2)
)
# Agregar la fila de totales
totales <- data.frame(
Intervalo = "Totales",
MC = "-",
ni = sum(ni),
hi = sum(hi),
Ni_ascendente = "-",
Ni_descendente = "-",
Hi_ascendente = "-",
Hi_descendente = "-"
)
TDF_no2_completa <- rbind(TDF_no2, totales)
# ==============================================================================
# 4. TABLA DE FRECUENCIAS DETALLADA (Tabla Nro. 1)
# ==============================================================================
TDF_no2_completa %>%
gt() %>%
tab_header(
title = "Tabla Nro. 1",
subtitle = "Distribución de frecuencia de concentración de Dióxido de Nitrógeno (NO2), estudio calidad del aire en India entre 2015-2020"
) %>%
tab_source_note(
source_note = "Autor: Grupo 2\n Fuente: https://www.kaggle.com/datasets/rohanrao/air-quality-data-in-india"
) %>%
tab_options(
table.border.top.color = "black",
table.border.bottom.color = "black",
table.border.top.style = "solid",
table.border.bottom.style = "solid",
column_labels.border.top.color = "black",
column_labels.border.bottom.color = "black",
column_labels.border.bottom.width = px(2),
row.striping.include_table_body = TRUE,
heading.border.bottom.color = "black",
heading.border.bottom.width = px(2),
table_body.hlines.color = "gray",
table_body.border.bottom.color = "black"
)
| Tabla Nro. 1 |
| Distribución de frecuencia de concentración de Dióxido de Nitrógeno (NO2), estudio calidad del aire en India entre 2015-2020 |
| Intervalo |
MC |
ni |
hi |
Ni_ascendente |
Ni_descendente |
Hi_ascendente |
Hi_descendente |
| [0.01 - 10.66) |
5.34 |
5484 |
21.14 |
5484 |
25946 |
21.14 |
100 |
| [10.66 - 21.32) |
15.99 |
7277 |
28.05 |
12761 |
20462 |
49.18 |
78.86 |
| [21.32 - 31.97) |
26.64 |
4901 |
18.89 |
17662 |
13185 |
68.07 |
50.82 |
| [31.97 - 42.62) |
37.3 |
3096 |
11.93 |
20758 |
8284 |
80 |
31.93 |
| [42.62 - 53.27) |
47.95 |
1963 |
7.57 |
22721 |
5188 |
87.57 |
20 |
| [53.27 - 63.93) |
58.6 |
1227 |
4.73 |
23948 |
3225 |
92.3 |
12.43 |
| [63.93 - 74.58) |
69.25 |
721 |
2.78 |
24669 |
1998 |
95.08 |
7.7 |
| [74.58 - 85.23) |
79.91 |
469 |
1.81 |
25138 |
1277 |
96.89 |
4.92 |
| [85.23 - 95.89) |
90.56 |
253 |
0.98 |
25391 |
808 |
97.86 |
3.11 |
| [95.89 - 106.54) |
101.21 |
165 |
0.64 |
25556 |
555 |
98.5 |
2.14 |
| [106.54 - 117.19) |
111.87 |
132 |
0.51 |
25688 |
390 |
99.01 |
1.5 |
| [117.19 - 127.84) |
122.52 |
62 |
0.24 |
25750 |
258 |
99.24 |
0.99 |
| [127.84 - 138.5) |
133.17 |
52 |
0.20 |
25802 |
196 |
99.45 |
0.76 |
| [138.5 - 149.15) |
143.82 |
36 |
0.14 |
25838 |
144 |
99.58 |
0.55 |
| [149.15 - 159.8) |
154.48 |
26 |
0.10 |
25864 |
108 |
99.68 |
0.42 |
| [159.8 - 170.46) |
165.13 |
22 |
0.08 |
25886 |
82 |
99.77 |
0.32 |
| [170.46 - 181.11) |
175.78 |
20 |
0.08 |
25906 |
60 |
99.85 |
0.23 |
| [181.11 - 191.76) |
186.44 |
7 |
0.03 |
25913 |
40 |
99.87 |
0.15 |
| [191.76 - 202.42) |
197.09 |
6 |
0.02 |
25919 |
33 |
99.9 |
0.13 |
| [202.42 - 213.07) |
207.74 |
8 |
0.03 |
25927 |
27 |
99.93 |
0.1 |
| [213.07 - 223.72) |
218.4 |
5 |
0.02 |
25932 |
19 |
99.95 |
0.07 |
| [223.72 - 234.38) |
229.05 |
3 |
0.01 |
25935 |
14 |
99.96 |
0.05 |
| [234.38 - 245.03) |
239.7 |
5 |
0.02 |
25940 |
11 |
99.98 |
0.04 |
| [245.03 - 255.68) |
250.35 |
1 |
0.00 |
25941 |
6 |
99.98 |
0.02 |
| [255.68 - 266.33) |
261.01 |
0 |
0.00 |
25941 |
5 |
99.98 |
0.02 |
| [266.33 - 276.99) |
271.66 |
2 |
0.01 |
25943 |
5 |
99.99 |
0.02 |
| [276.99 - 287.64) |
282.31 |
1 |
0.00 |
25944 |
3 |
99.99 |
0.01 |
| [287.64 - 298.29) |
292.97 |
1 |
0.00 |
25945 |
2 |
100 |
0.01 |
| [298.29 - 308.94) |
303.62 |
0 |
0.00 |
25945 |
1 |
100 |
0 |
| [308.94 - 319.6) |
314.27 |
0 |
0.00 |
25945 |
1 |
100 |
0 |
| [319.6 - 330.25) |
324.92 |
0 |
0.00 |
25945 |
1 |
100 |
0 |
| [330.25 - 340.9) |
335.58 |
0 |
0.00 |
25945 |
1 |
100 |
0 |
| [340.9 - 351.56) |
346.23 |
0 |
0.00 |
25945 |
1 |
100 |
0 |
| [351.56 - 362.21] |
356.88 |
1 |
0.00 |
25946 |
1 |
100 |
0 |
| Totales |
- |
25946 |
100.00 |
- |
- |
- |
- |
| Autor: Grupo 2
Fuente: https://www.kaggle.com/datasets/rohanrao/air-quality-data-in-india |
# ==============================================================================
# 5. CÁLCULOS PARA LA DISTRIBUCIÓN SIMPLIFICADA (13 CLASES)
# ==============================================================================
# Se utiliza la función hist() para obtener los intervalos simplificados
Histograma_no2 <- hist(no2, breaks = 13, plot = FALSE) # No mostrar la gráfica aún
# Extraer elementos simplificados
Lis <- Histograma_no2$breaks[1:(length(Histograma_no2$breaks) - 1)]
Lss <- Histograma_no2$breaks[2:length(Histograma_no2$breaks)]
MCs <- (Lis + Lss) / 2
nis <- Histograma_no2$counts
# Frecuencias relativas y acumuladas simplificadas
his <- (nis / N) * 100
Nis_asc <- cumsum(nis)
His_asc <- cumsum(his)
Nis_desc <- rev(cumsum(rev(nis)))
His_desc <- rev(cumsum(rev(his)))
# Crear el Data Frame Simplificado
TDF_no2simplificado <- data.frame(
Intervalo = paste0("[", round(Lis, 2), " - ", round(Lss, 2), ")"),
MC = round(MCs, 2),
ni = nis,
hi = round(his, 2),
Ni_asc = Nis_asc,
Hi_asc = round(His_asc, 2),
Ni_desc = Nis_desc,
Hi_desc = round(His_desc, 2)
)
# Renombrar columnas con los nombres finales (usando comillas invertidas para % en R)
colnames(TDF_no2simplificado) <- c("Intervalo", "MC", "ni", "hi(%)",
"Ni_asc", "Hi_asc (%)", "Ni_desc", "Hi_desc (%)")
# Agregar la fila de totales
totaless <- data.frame(
Intervalo = "Totales",
MC = "-",
ni = sum(nis),
`hi(%)` = sum(his),
Ni_asc = "-",
`Hi_asc (%)` = "-",
Ni_desc = "-",
`Hi_desc (%)` = "-"
)
# Asegurar que las columnas de totaless coincidan con los nombres finales
colnames(totaless) <- colnames(TDF_no2simplificado)
# Unir los data frames
TDF_no2simplificado_completa <- rbind(TDF_no2simplificado, totaless)
# ==============================================================================
# 6. TABLA DE FRECUENCIAS SIMPLIFICADA (Tabla Nro. 2)
# ==============================================================================
TDF_no2simplificado_completa %>%
gt() %>%
tab_header(
title = "Tabla Nro. 2",
subtitle = "Distribución de frecuencia simplificada de concentración de Dióxido de Nitrógeno (NO2), estudio calidad del aire en India entre 2015-2020"
) %>%
tab_source_note(
source_note = "Autor: Grupo 2\n Fuente: https://www.kaggle.com/datasets/rohanrao/air-quality-data-in-india"
) %>%
tab_options(
table.border.top.color = "black",
table.border.bottom.color = "black",
table.border.top.style = "solid",
table.border.bottom.style = "solid",
column_labels.border.top.color = "black",
column_labels.border.bottom.color = "black",
column_labels.border.bottom.width = px(2),
row.striping.include_table_body = TRUE,
heading.border.bottom.color = "black",
heading.border.bottom.width = px(2),
table_body.hlines.color = "gray",
table_body.border.bottom.color = "black"
)
| Tabla Nro. 2 |
| Distribución de frecuencia simplificada de concentración de Dióxido de Nitrógeno (NO2), estudio calidad del aire en India entre 2015-2020 |
| Intervalo |
MC |
ni |
hi(%) |
Ni_asc |
Hi_asc (%) |
Ni_desc |
Hi_desc (%) |
| [0 - 20) |
10 |
12018 |
46.32 |
12018 |
46.32 |
25946 |
100 |
| [20 - 40) |
30 |
8096 |
31.20 |
20114 |
77.52 |
13928 |
53.68 |
| [40 - 60) |
50 |
3436 |
13.24 |
23550 |
90.77 |
5832 |
22.48 |
| [60 - 80) |
70 |
1393 |
5.37 |
24943 |
96.13 |
2396 |
9.23 |
| [80 - 100) |
90 |
511 |
1.97 |
25454 |
98.1 |
1003 |
3.87 |
| [100 - 120) |
110 |
250 |
0.96 |
25704 |
99.07 |
492 |
1.9 |
| [120 - 140) |
130 |
101 |
0.39 |
25805 |
99.46 |
242 |
0.93 |
| [140 - 160) |
150 |
59 |
0.23 |
25864 |
99.68 |
141 |
0.54 |
| [160 - 180) |
170 |
41 |
0.16 |
25905 |
99.84 |
82 |
0.32 |
| [180 - 200) |
190 |
12 |
0.05 |
25917 |
99.89 |
41 |
0.16 |
| [200 - 220) |
210 |
13 |
0.05 |
25930 |
99.94 |
29 |
0.11 |
| [220 - 240) |
230 |
9 |
0.03 |
25939 |
99.97 |
16 |
0.06 |
| [240 - 260) |
250 |
2 |
0.01 |
25941 |
99.98 |
7 |
0.03 |
| [260 - 280) |
270 |
3 |
0.01 |
25944 |
99.99 |
5 |
0.02 |
| [280 - 300) |
290 |
1 |
0.00 |
25945 |
100 |
2 |
0.01 |
| [300 - 320) |
310 |
0 |
0.00 |
25945 |
100 |
1 |
0 |
| [320 - 340) |
330 |
0 |
0.00 |
25945 |
100 |
1 |
0 |
| [340 - 360) |
350 |
0 |
0.00 |
25945 |
100 |
1 |
0 |
| [360 - 380) |
370 |
1 |
0.00 |
25946 |
100 |
1 |
0 |
| Totales |
- |
25946 |
100.00 |
- |
- |
- |
- |
| Autor: Grupo 2
Fuente: https://www.kaggle.com/datasets/rohanrao/air-quality-data-in-india |
# ==============================================================================
# 7. GENERACIÓN DE GRÁFICOS
# ==============================================================================
# ==============================================================================
# 7. GRÁFICA N°1: Histograma Local
# ==============================================================================
# NOTA: Los códigos de escape para caracteres especiales (\u00E1, \u00B0, \u00B5)
# se mantienen para asegurar la compatibilidad con diferentes entornos R.
hist(no2, breaks = 13,
main = "Gr\u00E1fica N\u00B01: Distribuci\u00F3n de la Concentraci\u00F3n de Di\u00F3xido de Nitr\u00F3geno (NO2)
presente en el estudio sobre calidad del aire en India entre 2015-2020",
xlab = "NO2 (\u00B5g/m\u00B3)",
ylab = "Cantidad",
# max(nis) es la altura máxima para el histograma 'local'
ylim = c(0, max(nis)),
col = "lightsalmon", # Nuevo color
cex.main = 0.9,
cex.lab = 1,
cex.axis = 0.9,
xaxt = "n")
axis(1, at = Histograma_no2$breaks,
labels = round(Histograma_no2$breaks, 0), las = 1,
cex.axis = 0.9)

#
# ----------------------------------
# Gráfica N°2: Histograma Global (Frecuencia Absoluta)
# ----------------------------------
hist(no2, breaks = 13,
main = "Gr\u00E1fica N\u00B02: Distribuci\u00F3n de la Concentraci\u00F3n de Di\u00F3xido de Nitr\u00F3geno (NO2)
presente en el estudio sobre calidad del aire en India entre 2015-2020",
xlab = "NO2 (\u00B5g/m\u00B3)",
ylab = "Cantidad",
# length(no2) es la altura máxima (replicando el uso original)
ylim = c(0, length(no2)),
col = "lightsalmon",
cex.main = 1,
cex.lab = 1,
cex.axis = 0.9,
xaxt = "n")
axis(1, at = Histograma_no2$breaks,
labels = round(Histograma_no2$breaks, 0), las = 1,
cex.axis = 0.9)

# ==============================================================================
# 7. GRÁFICA N°3/4: Histograma Porcentual (Gráfico de Barras)
# ==============================================================================
barplot(TDF_no2simplificado_completa$`hi(%)`[1:(nrow(TDF_no2simplificado_completa)-1)],
space = 0,
col = "darksalmon", # Nuevo color
main = "Gr\u00E1fica N\u00B03/4: Distribuci\u00F3n Porcentual de la Concentraci\u00F3n de NO2, estudio
calidad del aire en India, 2015-2020",
xlab = "NO2 (\u00B5g/m\u00B3)",
ylab = "Porcentaje (%)",
names.arg = TDF_no2simplificado_completa$MC[1:(nrow(TDF_no2simplificado_completa)-1)],
ylim = c(0, max(TDF_no2simplificado_completa$`hi(%)`[1:(nrow(TDF_no2simplificado_completa)-1)]) * 1.1)
)

# ==============================================================================
# 7. GRÁFICA N°5: Box Plot
# ==============================================================================
boxplot(no2,
horizontal = TRUE,
col = "indianred", # Nuevo color
border = "black",
main = "Gr\u00E1fica N\u00B0 5: Distribuci\u00F3n de la concentraci\u00F3n de Di\u00F3xido de Nitr\u00F3geno (NO2),
estudio calidad del aire en India desde 2015-2020",
xlab = "NO2 (\u00B5g/m\u00B3)")

#
# ==============================================================================
# 7. GRÁFICA N°6: Ojivas de Frecuencia Absoluta
# ==============================================================================
plot(Lss, Nis_asc, type = "b",
main = "Gr\u00E1fica N\u00B06: Ojiva ascendente y descendente (Absoluta) de la
distribuci\u00F3n de la concentraci\u00F3n de Di\u00F3xido de Nitr\u00F3geno (NO2)",
xlab = "NO2 (\u00B5g/m\u00B3)",
ylab = "Cantidad",
pch = 19,
col = "darkred", # Nuevo color
ylim = c(0, N * 1.05))
# Ojiva descendente
lines(Lis, Nis_desc, type = "b", col = "black", pch = 19)

# ==============================================================================
# 7. GRÁFICA N°7: Ojivas de Frecuencia Relativa
# ==============================================================================
plot(Lss, His_asc,
type = "b",
main = "Gr\u00E1fica N\u00B07: Ojiva ascendente y descendente (Relativa) de la distribuci\u00F3n
de la concentraci\u00F3n de Di\u00F3xido de Nitr\u00F3geno (NO2)",
xlab = "NO2 (\u00B5g/m\u00B3)",
ylab = "Porcentaje %",
col = "blue4", # Nuevo color
pch = 19,
ylim = c(0, 105))
# Ojiva descendente
lines(Lis, His_desc,
type = "b",
col = "red4", # Nuevo color
pch = 19)

#