Universidad Central del Ecuador
FIGEMPA-Ingeniería Ambiental
# Cargar datos
datos <- read.csv(
"city_day.csv",
header = TRUE,
sep = ",",
dec = "."
)
# Crear vector ozono sin guiones
ozono <- datos$O3[datos$O3 != "-"]
ozono <- as.numeric(ozono)
#Estadistica descriptiva
#02/12/2025
#Lorien Arcentales
#Carga paquetes
library(gt)
library(dplyr)
#carga de datos
datos<-read.csv("city_day.csv", header = TRUE, dec = ".",
sep = ",")
#Extraccion de los "-" de la variable ozono porque son valores inexistentes,
# para un mejor analisis, cambia tamaño muestral de 29531 a 25509
ozono <- datos$O3[datos$O3 != "-"]
length(ozono)
## [1] 25509
ozono <- as.numeric(ozono)
min<-min(ozono)
max<-max(ozono)
R=max-min
k=1+(3.3)*log(length(ozono))
k<-floor(k)
A<-R/k
#Generación de intervalos
Li <- seq(from = min, to = max - A, by = A)
Ls <- c (seq(from = min + A, to = max - A, by = A), max) # último límite = max
MC<-(Li+Ls)/2
#Creación de ni
ozono <- round(ozono, 3) # redondear los datos de ozono
Li <- round(Li, 3) # redondear límites inferiores
Ls <- round(Ls, 3)
ni <- numeric(length(Li))
for (i in 1:length(Li)) {
if (i < length(Li)) {
ni[i] <- sum(ozono >= Li[i] & ozono < Ls[i])
} else {
ni[i] <- sum(ozono >= Li[i] & ozono <= Ls[i]) # Último intervalo cerrado
}
}
N <- sum(ni)
hi <- (ni / N) * 100
Ni_asc <- cumsum(ni)
Ni_desc <- rev(cumsum(rev(ni)))
Hi_asc <- cumsum(hi)
Hi_desc <- rev(cumsum(rev(hi)))
Intervalo <- paste0("[", round(Li,2), " - ", round(Ls,2), ")")
Intervalo[length(Intervalo)] <- paste0("[", round(Li[length(Li)],2), " - ",
round(Ls[length(Ls)],2), "]")
TDF_ozono <- data.frame(
Intervalo = Intervalo,
MC = round(MC, 2),
ni = ni,
hi = round(hi, 2),
Ni_ascendente = Ni_asc,
Ni_descendente = Ni_desc,
Hi_ascendente = round(Hi_asc, 2),
Hi_descendente = round(Hi_desc, 2)
)
# Crear fila de totales
totales <- data.frame(
Intervalo = "Totales",
MC = "-",
ni = sum(ni),
hi = sum(hi),
Ni_ascendente = "-",
Ni_descendente = "-",
Hi_ascendente = "-",
Hi_descendente = "-"
)
# Agregar al final del data.frame
TDF_ozono <- rbind(TDF_ozono, totales)
length(Li)
## [1] 34
length(Ls)
## [1] 34
max(ozono)
## [1] 257.73
max(Ls)
## [1] 257.73
range(ozono)
## [1] 0.01 257.73
summary(ozono)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.01 18.86 30.84 34.49 45.57 257.73
cbind(Li, Ls, ni)
## Li Ls ni
## [1,] 0.01 7.59 1437
## [2,] 7.59 15.17 2952
## [3,] 15.17 22.75 4019
## [4,] 22.75 30.33 4112
## [5,] 30.33 37.91 3614
## [6,] 37.91 45.49 2970
## [7,] 45.49 53.07 2147
## [8,] 53.07 60.65 1518
## [9,] 60.65 68.23 953
## [10,] 68.23 75.81 613
## [11,] 75.81 83.39 374
## [12,] 83.39 90.97 255
## [13,] 90.97 98.55 183
## [14,] 98.55 106.13 113
## [15,] 106.13 113.71 86
## [16,] 113.71 121.29 53
## [17,] 121.29 128.87 33
## [18,] 128.87 136.45 26
## [19,] 136.45 144.03 18
## [20,] 144.03 151.61 9
## [21,] 151.61 159.19 9
## [22,] 159.19 166.77 6
## [23,] 166.77 174.35 3
## [24,] 174.35 181.93 2
## [25,] 181.93 189.51 1
## [26,] 189.51 197.09 1
## [27,] 197.09 204.67 1
## [28,] 204.67 212.25 0
## [29,] 212.25 219.83 0
## [30,] 219.83 227.41 0
## [31,] 227.41 234.99 0
## [32,] 234.99 242.57 0
## [33,] 242.57 250.15 0
## [34,] 250.15 257.73 1
library(gt)
library(dplyr)
TDF_ozono %>%
gt() %>%
tab_header(
title = md("*Tabla Nro. 1*"),
subtitle = md("**Distribucion de frecuencia de concentración de ozono,estudio calidad del aire en India entre 2015-2020 **")
) %>%
tab_source_note(
source_note = md("Autor: Grupo 2\n Fuente:https://www.kaggle.com/datasets/rohanrao/air-quality-data-in-india")
) %>%
tab_options(
table.border.top.color = "black",
table.border.bottom.color = "black",
table.border.top.style = "solid",
table.border.bottom.style = "solid",
column_labels.border.top.color = "black",
column_labels.border.bottom.color = "black",
column_labels.border.bottom.width = px(2),
row.striping.include_table_body = TRUE,
heading.border.bottom.color = "black",
heading.border.bottom.width = px(2),
table_body.hlines.color = "gray",
table_body.border.bottom.color = "black"
)
| Tabla Nro. 1 | |||||||
| **Distribucion de frecuencia de concentración de ozono,estudio calidad del aire en India entre 2015-2020 ** | |||||||
| Intervalo | MC | ni | hi | Ni_ascendente | Ni_descendente | Hi_ascendente | Hi_descendente |
|---|---|---|---|---|---|---|---|
| [0.01 - 7.59) | 3.8 | 1437 | 5.63 | 1437 | 25509 | 5.63 | 100 |
| [7.59 - 15.17) | 11.38 | 2952 | 11.57 | 4389 | 24072 | 17.21 | 94.37 |
| [15.17 - 22.75) | 18.96 | 4019 | 15.76 | 8408 | 21120 | 32.96 | 82.79 |
| [22.75 - 30.33) | 26.54 | 4112 | 16.12 | 12520 | 17101 | 49.08 | 67.04 |
| [30.33 - 37.91) | 34.12 | 3614 | 14.17 | 16134 | 12989 | 63.25 | 50.92 |
| [37.91 - 45.49) | 41.7 | 2970 | 11.64 | 19104 | 9375 | 74.89 | 36.75 |
| [45.49 - 53.07) | 49.28 | 2147 | 8.42 | 21251 | 6405 | 83.31 | 25.11 |
| [53.07 - 60.65) | 56.86 | 1518 | 5.95 | 22769 | 4258 | 89.26 | 16.69 |
| [60.65 - 68.23) | 64.44 | 953 | 3.74 | 23722 | 2740 | 92.99 | 10.74 |
| [68.23 - 75.81) | 72.02 | 613 | 2.40 | 24335 | 1787 | 95.4 | 7.01 |
| [75.81 - 83.39) | 79.6 | 374 | 1.47 | 24709 | 1174 | 96.86 | 4.6 |
| [83.39 - 90.97) | 87.18 | 255 | 1.00 | 24964 | 800 | 97.86 | 3.14 |
| [90.97 - 98.55) | 94.76 | 183 | 0.72 | 25147 | 545 | 98.58 | 2.14 |
| [98.55 - 106.13) | 102.34 | 113 | 0.44 | 25260 | 362 | 99.02 | 1.42 |
| [106.13 - 113.71) | 109.92 | 86 | 0.34 | 25346 | 249 | 99.36 | 0.98 |
| [113.71 - 121.29) | 117.5 | 53 | 0.21 | 25399 | 163 | 99.57 | 0.64 |
| [121.29 - 128.87) | 125.08 | 33 | 0.13 | 25432 | 110 | 99.7 | 0.43 |
| [128.87 - 136.45) | 132.66 | 26 | 0.10 | 25458 | 77 | 99.8 | 0.3 |
| [136.45 - 144.03) | 140.24 | 18 | 0.07 | 25476 | 51 | 99.87 | 0.2 |
| [144.03 - 151.61) | 147.82 | 9 | 0.04 | 25485 | 33 | 99.91 | 0.13 |
| [151.61 - 159.19) | 155.4 | 9 | 0.04 | 25494 | 24 | 99.94 | 0.09 |
| [159.19 - 166.77) | 162.98 | 6 | 0.02 | 25500 | 15 | 99.96 | 0.06 |
| [166.77 - 174.35) | 170.56 | 3 | 0.01 | 25503 | 9 | 99.98 | 0.04 |
| [174.35 - 181.93) | 178.14 | 2 | 0.01 | 25505 | 6 | 99.98 | 0.02 |
| [181.93 - 189.51) | 185.72 | 1 | 0.00 | 25506 | 4 | 99.99 | 0.02 |
| [189.51 - 197.09) | 193.3 | 1 | 0.00 | 25507 | 3 | 99.99 | 0.01 |
| [197.09 - 204.67) | 200.88 | 1 | 0.00 | 25508 | 2 | 100 | 0.01 |
| [204.67 - 212.25) | 208.46 | 0 | 0.00 | 25508 | 1 | 100 | 0 |
| [212.25 - 219.83) | 216.04 | 0 | 0.00 | 25508 | 1 | 100 | 0 |
| [219.83 - 227.41) | 223.62 | 0 | 0.00 | 25508 | 1 | 100 | 0 |
| [227.41 - 234.99) | 231.2 | 0 | 0.00 | 25508 | 1 | 100 | 0 |
| [234.99 - 242.57) | 238.78 | 0 | 0.00 | 25508 | 1 | 100 | 0 |
| [242.57 - 250.15) | 246.36 | 0 | 0.00 | 25508 | 1 | 100 | 0 |
| [250.15 - 257.73] | 253.94 | 1 | 0.00 | 25509 | 1 | 100 | 0 |
| Totales | - | 25509 | 100.00 | - | - | - | - |
| Autor: Grupo 2 Fuente:https://www.kaggle.com/datasets/rohanrao/air-quality-data-in-india | |||||||
#PROCESO DE SIMPLIFICACIÓN
#Creación Hist(ozono)
#Elemnetos simplificados
Lis<-Histograma_ozono$breaks [1:13]
Lss<-Histograma_ozono$breaks [2:14]
MCs<-(Lis+Lis)/2
nis<-Histograma_ozono$counts
his <- (nis / N) * 100
Nis_asc <- cumsum(nis)
His_asc <- cumsum(his)
Nis_desc <- rev(cumsum(rev(nis)))
His_desc <- rev(cumsum(rev(his)))
TDF_ozonosimplificado <- data.frame(
Intervalo = paste0("[", round(Lis,2), " - ", round(Lss,2), ")"),
MC = round(MCs, 2),
ni = nis,
hi= round(his, 2),
Ni_ascendente = Nis_asc,
Hi_ascendente = round(His_asc, 2),
Ni_descendente = Nis_desc,
Hi_descendente = round(His_desc, 2)
)
colnames(TDF_ozonosimplificado) <- c(
"Intervalo",
"MC",
"ni",
"hi(%)",
"Ni_asc",
"Hi_asc (%)",
"Ni_desc",
"Hi_desc (%)"
)
totaless <- data.frame(
Intervalo = "Totales",
MC = "-",
ni = sum(nis), # suma total de ni
hi = sum(his), # suma total de hi (%)
Ni_ascendente = "-",
Ni_descendente = "-",
Hi_ascendente = "-",
Hi_descendente = "-"
)
colnames(totaless) <- c(
"Intervalo",
"MC",
"ni",
"hi(%)",
"Ni_asc",
"Hi_asc (%)",
"Ni_desc",
"Hi_desc (%)"
)
# Agregar al final de la tabla
TDF_ozonosimplificado <- rbind(TDF_ozonosimplificado, totaless)
TDF_ozonosimplificado %>%
gt() %>%
tab_header(
title = md("*Tabla Nro. 2*"),
subtitle = md("**Distribucion de frecuencia simplificado de concentración de ozono,estudio calidad del aire en India entre 2015-2020 **")
) %>%
tab_source_note(
source_note = md("Autor: Grupo 2\n Fuente:https://www.kaggle.com/datasets/rohanrao/air-quality-data-in-india")
) %>%
tab_options(
table.border.top.color = "black",
table.border.bottom.color = "black",
table.border.top.style = "solid",
table.border.bottom.style = "solid",
column_labels.border.top.color = "black",
column_labels.border.bottom.color = "black",
column_labels.border.bottom.width = px(2),
row.striping.include_table_body = TRUE,
heading.border.bottom.color = "black",
heading.border.bottom.width = px(2),
table_body.hlines.color = "gray",
table_body.border.bottom.color = "black"
)
| Tabla Nro. 2 | |||||||
| **Distribucion de frecuencia simplificado de concentración de ozono,estudio calidad del aire en India entre 2015-2020 ** | |||||||
| Intervalo | MC | ni | hi(%) | Ni_asc | Hi_asc (%) | Ni_desc | Hi_desc (%) |
|---|---|---|---|---|---|---|---|
| [0 - 20) | 0 | 7022 | 27.53 | 7022 | 27.53 | 25509 | 100 |
| [20 - 40) | 20 | 10006 | 39.23 | 17028 | 66.75 | 18487 | 72.47 |
| [40 - 60) | 40 | 5647 | 22.14 | 22675 | 88.89 | 8481 | 33.25 |
| [60 - 80) | 60 | 1896 | 7.43 | 24571 | 96.32 | 2834 | 11.11 |
| [80 - 100) | 80 | 600 | 2.35 | 25171 | 98.67 | 938 | 3.68 |
| [100 - 120) | 100 | 224 | 0.88 | 25395 | 99.55 | 338 | 1.33 |
| [120 - 140) | 120 | 75 | 0.29 | 25470 | 99.85 | 114 | 0.45 |
| [140 - 160) | 140 | 24 | 0.09 | 25494 | 99.94 | 39 | 0.15 |
| [160 - 180) | 160 | 11 | 0.04 | 25505 | 99.98 | 15 | 0.06 |
| [180 - 200) | 180 | 2 | 0.01 | 25507 | 99.99 | 4 | 0.02 |
| [200 - 220) | 200 | 1 | 0.00 | 25508 | 100 | 2 | 0.01 |
| [220 - 240) | 220 | 0 | 0.00 | 25508 | 100 | 1 | 0 |
| [240 - 260) | 240 | 1 | 0.00 | 25509 | 100 | 1 | 0 |
| Totales | - | 25509 | 100.00 | - | - | - | - |
| Autor: Grupo 2 Fuente:https://www.kaggle.com/datasets/rohanrao/air-quality-data-in-india | |||||||
#GRAFICAS
#Histogramas local
hist(ozono, breaks = 11,
main = "Gráfica N°1: Distribución de la Concentración de Ozono
presente en el estudio sobre calidad del aire en India entre 2015-2020 ",
xlab = " Ozono (µg/m3)",
ylab = "Cantidad",
ylim = c(0, max(nis)),
col = "lightskyblue",
cex.main = 0.9,
cex.lab = 1,
cex.axis = 0.9,
xaxt = "n")
axis(1, at = Histograma_ozono$breaks,
labels = Histograma_ozono$breaks, las = 1,
cex.axis = 0.9)
#Histograma global
hist(ozono, breaks = 11,
main = "Gráfica N°2:Distribución de la Concentración de Ozono
presente en el estudio sobre calidad del aire en India entre 2015-2020",
xlab = "Ozono (µg/m3)",
ylab = "Cantidad",
ylim = c(0, length(ozono)),
col = "lightskyblue",
cex.main = 1,
cex.lab = 1,
cex.axis = 0.9,
xaxt = "n")
axis(1, at = Histograma_ozono$breaks,
labels = Histograma_ozono$breaks, las = 1,
cex.axis = 0.9)
#Histograma porcentual global
TDF_ozonosimplificado$`hi (%)` <- as.numeric(TDF_ozonosimplificado$`hi(%)`)
barplot(TDF_ozonosimplificado$`hi(%)`[1:(nrow(TDF_ozonosimplificado)-1)],
space = 0,
col = "skyblue",
main = "Gráfica N°3:Distribución de la Concentración de Ozono, estudio
calidad del aire en India, 2015-2020",
xlab = "Ozono (µg/m3)",
ylab = "Porcentaje (%)",
names.arg = TDF_ozonosimplificado$MC[1:(nrow(TDF_ozonosimplificado)-1)],
ylim = c(0,100))
#Histograma porcentual local
n <- as.numeric(nrow(TDF_ozonosimplificado))
barplot(
TDF_ozonosimplificado$`hi(%)`[1:(n-1)],
space = 0,
main = "Gráfica No. 4:Distribución concentración de Ozono en el estudio
calidad del aire en India, 2015-2020",
ylab = "Porcentaje (%)",
xlab = "Ozono (µg/m3)",
names.arg = TDF_ozonosimplificado$MC[1:(n-1)],
col = "skyblue"
)
#Box plot
Cajaozono<-boxplot(ozono, horizontal = T,col = "turquoise", border = "black",
main= "Gráfica No. 5: Distribución de la concentración de ozono,
estudio calidad del aire en India desde 2015-2020",
xlab="Ozono (µg/m3)")
# Ojiva ascendente
plot(Lss, Nis_asc, type = "b", main = "Gráfica N°6:Ojiva ascendente y descendente de la
distribución local del concentración de ozono",
xlab = "Ozono (µg/m3)",
ylab = "Cantidad", pch = 19,col="turquoise")
# Ojiva descendente
lines(Lis, Nis_desc, type = "b", col = "black", pch = 19) # agrega en rojo
# Ojiva ascendente
plot(Lss, His_asc,
type = "b",
main = " Gráfica N°7:Ojiva ascendete y descendete de la distribución
de la concentración de Ozono",
xlab = "Ozono(µg/m3)",
ylab = "Porcentaje %",
col="blue",
pch = 19)
# Ojiva descendente
lines(Lis, His_desc,
type = "b",
col = "red",
pch = 19)