Univercidad Central del Ecuador
FIGEMPA-Ingeenieía Ambiental
# Cargar datos
datos <- read.csv(
"city_day.csv",
header = TRUE,
sep = ",",
dec = "."
)
# Crear vector ozono sin guiones
xyleno <- datos$Xylene[datos$Xylene != "-"]
xyleno <- as.numeric(xyleno)
#Estadística descriptiva
#5/12/2025
#Lorien Arcentales
#Carga de paquetes
library(gt)
library(dplyr)
datos<-read.csv("city_day.csv", header = TRUE, dec = ".",
sep = ",")
#Extraccion de los "-" de la variable ozono porque son valores inexistentes,
# para un mejor analisis, cambia tamaño muestral de 29531 a 11422
xyleno<-datos$Xylene[datos$Xylene != "-"]
length(xyleno)
## [1] 11422
xyleno <- as.numeric(xyleno)
min<-min(xyleno)
max<-max(xyleno)
R=max-min
k=1+(3.3)*log(length(xyleno))
k<-floor(k)
A<-R/k
#Generación de intervalos
Li <- seq(from = min, to = max - A, by = A)
Ls <- c (seq(from = min + A, to = max - A, by = A), max) # último límite = max
MC<-(Li+Ls)/2
#Creación de ni
xyleno <- round(xyleno, 3) # redondear los datos de ozono
Li <- round(Li, 3) # redondear límites inferiores
Ls <- round(Ls, 3)
ni <- numeric(length(Li))
for (i in 1:length(Li)) {
if (i < length(Li)) {
ni[i] <- sum(xyleno >= Li[i] & xyleno < Ls[i])
} else {
ni[i] <- sum(xyleno >= Li[i] & xyleno <= Ls[i]) # Último intervalo cerrado
}
}
N <- sum(ni)
hi <- (ni / N) * 100
Ni_asc <- cumsum(ni)
Ni_desc <- rev(cumsum(rev(ni)))
Hi_asc <- cumsum(hi)
Hi_desc <- rev(cumsum(rev(hi)))
Intervalo <- paste0("[", round(Li,2), " - ", round(Ls,2), ")")
Intervalo[length(Intervalo)] <- paste0("[", round(Li[length(Li)],2), " - ",
round(Ls[length(Ls)],2), "]")
TDF_xyleno <- data.frame(
Intervalo = Intervalo,
MC = round(MC, 2),
ni = ni,
hi = round(hi, 2),
Ni_ascendente = Ni_asc,
Ni_descendente = Ni_desc,
Hi_ascendente = round(Hi_asc, 2),
Hi_descendente = round(Hi_desc, 2)
)
# Crear fila de totales
totales <- data.frame(
Intervalo = "Totales",
MC = "-",
ni = sum(ni),
hi = sum(hi),
Ni_ascendente = "-",
Ni_descendente = "-",
Hi_ascendente = "-",
Hi_descendente = "-"
)
# Agregar al final del data.frame
TDF_xyleno <- rbind(TDF_xyleno, totales)
library(gt)
library(dplyr)
TDF_xyleno %>%
gt() %>%
tab_header(
title = md("*Tabla Nro. 1*"),
subtitle = md("**Distribucion de frecuencia de concentración de xyleno,estudio calidad del aire en India entre 2015-2020 **")
) %>%
tab_source_note(
source_note = md("Autor: Grupo 2\n Fuente:https://www.kaggle.com/datasets/rohanrao/air-quality-data-in-india")
) %>%
tab_options(
table.border.top.color = "black",
table.border.bottom.color = "black",
table.border.top.style = "solid",
table.border.bottom.style = "solid",
column_labels.border.top.color = "black",
column_labels.border.bottom.color = "black",
column_labels.border.bottom.width = px(2),
row.striping.include_table_body = TRUE,
heading.border.bottom.color = "black",
heading.border.bottom.width = px(2),
table_body.hlines.color = "gray",
table_body.border.bottom.color = "black"
)
| Tabla Nro. 1 |
| **Distribucion de frecuencia de concentración de xyleno,estudio calidad del aire en India entre 2015-2020 ** |
| Intervalo |
MC |
ni |
hi |
Ni_ascendente |
Ni_descendente |
Hi_ascendente |
Hi_descendente |
| [0 - 5.5) |
2.75 |
9523 |
83.37 |
9523 |
11422 |
83.37 |
100 |
| [5.5 - 10.99) |
8.24 |
1178 |
10.31 |
10701 |
1899 |
93.69 |
16.63 |
| [10.99 - 16.49) |
13.74 |
367 |
3.21 |
11068 |
721 |
96.9 |
6.31 |
| [16.49 - 21.98) |
19.24 |
167 |
1.46 |
11235 |
354 |
98.36 |
3.1 |
| [21.98 - 27.48) |
24.73 |
84 |
0.74 |
11319 |
187 |
99.1 |
1.64 |
| [27.48 - 32.98) |
30.23 |
37 |
0.32 |
11356 |
103 |
99.42 |
0.9 |
| [32.98 - 38.47) |
35.72 |
11 |
0.10 |
11367 |
66 |
99.52 |
0.58 |
| [38.47 - 43.97) |
41.22 |
18 |
0.16 |
11385 |
55 |
99.68 |
0.48 |
| [43.97 - 49.46) |
46.71 |
9 |
0.08 |
11394 |
37 |
99.75 |
0.32 |
| [49.46 - 54.96) |
52.21 |
9 |
0.08 |
11403 |
28 |
99.83 |
0.25 |
| [54.96 - 60.45) |
57.71 |
5 |
0.04 |
11408 |
19 |
99.88 |
0.17 |
| [60.45 - 65.95) |
63.2 |
3 |
0.03 |
11411 |
14 |
99.9 |
0.12 |
| [65.95 - 71.44) |
68.7 |
0 |
0.00 |
11411 |
11 |
99.9 |
0.1 |
| [71.44 - 76.94) |
74.19 |
1 |
0.01 |
11412 |
11 |
99.91 |
0.1 |
| [76.94 - 82.44) |
79.69 |
1 |
0.01 |
11413 |
10 |
99.92 |
0.09 |
| [82.44 - 87.93) |
85.18 |
1 |
0.01 |
11414 |
9 |
99.93 |
0.08 |
| [87.93 - 93.43) |
90.68 |
1 |
0.01 |
11415 |
8 |
99.94 |
0.07 |
| [93.43 - 98.92) |
96.18 |
1 |
0.01 |
11416 |
7 |
99.95 |
0.06 |
| [98.92 - 104.42) |
101.67 |
0 |
0.00 |
11416 |
6 |
99.95 |
0.05 |
| [104.42 - 109.92) |
107.17 |
2 |
0.02 |
11418 |
6 |
99.96 |
0.05 |
| [109.92 - 115.41) |
112.66 |
0 |
0.00 |
11418 |
4 |
99.96 |
0.04 |
| [115.41 - 120.91) |
118.16 |
1 |
0.01 |
11419 |
4 |
99.97 |
0.04 |
| [120.91 - 126.4) |
123.66 |
1 |
0.01 |
11420 |
3 |
99.98 |
0.03 |
| [126.4 - 131.9) |
129.15 |
0 |
0.00 |
11420 |
2 |
99.98 |
0.02 |
| [131.9 - 137.4) |
134.65 |
0 |
0.00 |
11420 |
2 |
99.98 |
0.02 |
| [137.4 - 142.89) |
140.14 |
1 |
0.01 |
11421 |
2 |
99.99 |
0.02 |
| [142.89 - 148.39) |
145.64 |
0 |
0.00 |
11421 |
1 |
99.99 |
0.01 |
| [148.39 - 153.88) |
151.13 |
0 |
0.00 |
11421 |
1 |
99.99 |
0.01 |
| [153.88 - 159.38) |
156.63 |
0 |
0.00 |
11421 |
1 |
99.99 |
0.01 |
| [159.38 - 164.87) |
162.13 |
0 |
0.00 |
11421 |
1 |
99.99 |
0.01 |
| [164.87 - 170.37] |
167.62 |
1 |
0.01 |
11422 |
1 |
100 |
0.01 |
| Totales |
- |
11422 |
100.00 |
- |
- |
- |
- |
| Autor: Grupo 2
Fuente:https://www.kaggle.com/datasets/rohanrao/air-quality-data-in-india |
#PROCESO DE SIMPLIFICACIÓN
#Hist(xyleno)
#Elemnetos simplificados
breaks <- Histograma_xyleno$breaks
Lis <- breaks[1:(length(breaks)-1)]
Lss <- breaks[2:length(breaks)]
MCs<-(Lis+Lss)/2
nis<-Histograma_xyleno$counts
N<-length(xyleno)
his <- (nis / N) * 100
Nis_asc <- cumsum(nis)
His_asc <- cumsum(his)
Nis_desc <- rev(cumsum(rev(nis)))
His_desc <- rev(cumsum(rev(his)))
Intervalos <- paste0("[", round(Lis,2), " - ", round(Lss,2), ")")
Intervalos[length(Intervalos)] <- paste0("[", round(Lis[length(Lis)],2),
" - ", round(Lss[length(Lss)],2), "]")
TDF_xylenosimplificado <- data.frame(
Intervalo = Intervalos,
MC = round(MCs, 2),
ni = nis,
hi= round(his, 2),
Ni_ascendente = Nis_asc,
Hi_ascendente = round(His_asc, 2),
Ni_descendente = Nis_desc,
Hi_descendente = round(His_desc, 2)
)
colnames(TDF_xylenosimplificado) <- c(
"Intervalo",
"MC",
"ni",
"hi(%)",
"Ni_asc",
"Hi_asc (%)",
"Ni_desc",
"Hi_desc (%)"
)
totaless <- data.frame(
Intervalo = "Totales",
MC = "-",
ni = sum(nis), # suma total de ni
hi = sum(his), # suma total de hi (%)
Ni_ascendente = "-",
Ni_descendente = "-",
Hi_ascendente = "-",
Hi_descendente = "-"
)
colnames(totaless) <- c(
"Intervalo",
"MC",
"ni",
"hi(%)",
"Ni_asc",
"Hi_asc (%)",
"Ni_desc",
"Hi_desc (%)"
)
# Agregar al final de la tabla
TDF_xylenosimplificado <- rbind(TDF_xylenosimplificado, totaless)
#Tabla 2
TDF_xylenosimplificado %>%
gt() %>%
tab_header(
title = md("*Tabla Nro. 2*"),
subtitle = md("**Distribucion de frecuencia simplificado de concentración de Xyleno,estudio calidad del aire en India entre 2015-2020 **")
) %>%
tab_source_note(
source_note = md("Autor: Grupo 2\n Fuente:https://www.kaggle.com/datasets/rohanrao/air-quality-data-in-india")
) %>%
tab_options(
table.border.top.color = "black",
table.border.bottom.color = "black",
table.border.top.style = "solid",
table.border.bottom.style = "solid",
column_labels.border.top.color = "black",
column_labels.border.bottom.color = "black",
column_labels.border.bottom.width = px(2),
row.striping.include_table_body = TRUE,
heading.border.bottom.color = "black",
heading.border.bottom.width = px(2),
table_body.hlines.color = "gray",
table_body.border.bottom.color = "black"
)
| Tabla Nro. 2 |
| **Distribucion de frecuencia simplificado de concentración de Xyleno,estudio calidad del aire en India entre 2015-2020 ** |
| Intervalo |
MC |
ni |
hi(%) |
Ni_asc |
Hi_asc (%) |
Ni_desc |
Hi_desc (%) |
| [0 - 10) |
5 |
10588 |
92.70 |
10588 |
92.7 |
11422 |
100 |
| [10 - 20) |
15 |
610 |
5.34 |
11198 |
98.04 |
834 |
7.3 |
| [20 - 30) |
25 |
143 |
1.25 |
11341 |
99.29 |
224 |
1.96 |
| [30 - 40) |
35 |
34 |
0.30 |
11375 |
99.59 |
81 |
0.71 |
| [40 - 50) |
45 |
21 |
0.18 |
11396 |
99.77 |
47 |
0.41 |
| [50 - 60) |
55 |
11 |
0.10 |
11407 |
99.87 |
26 |
0.23 |
| [60 - 70) |
65 |
4 |
0.04 |
11411 |
99.9 |
15 |
0.13 |
| [70 - 80) |
75 |
1 |
0.01 |
11412 |
99.91 |
11 |
0.1 |
| [80 - 90) |
85 |
3 |
0.03 |
11415 |
99.94 |
10 |
0.09 |
| [90 - 100) |
95 |
1 |
0.01 |
11416 |
99.95 |
7 |
0.06 |
| [100 - 110) |
105 |
2 |
0.02 |
11418 |
99.96 |
6 |
0.05 |
| [110 - 120) |
115 |
1 |
0.01 |
11419 |
99.97 |
4 |
0.04 |
| [120 - 130) |
125 |
1 |
0.01 |
11420 |
99.98 |
3 |
0.03 |
| [130 - 140) |
135 |
1 |
0.01 |
11421 |
99.99 |
2 |
0.02 |
| [140 - 150) |
145 |
0 |
0.00 |
11421 |
99.99 |
1 |
0.01 |
| [150 - 160) |
155 |
0 |
0.00 |
11421 |
99.99 |
1 |
0.01 |
| [160 - 170) |
165 |
0 |
0.00 |
11421 |
99.99 |
1 |
0.01 |
| [170 - 180] |
175 |
1 |
0.01 |
11422 |
100 |
1 |
0.01 |
| Totales |
- |
11422 |
100.00 |
- |
- |
- |
- |
| Autor: Grupo 2
Fuente:https://www.kaggle.com/datasets/rohanrao/air-quality-data-in-india |
#GRAFICAS
#Histogramas local
hist(xyleno, breaks = 19,
main = "Gráfica N°1: Distribución de la Concentración de Xyleno
presente en el estudio sobre calidad del aire en India entre 2015-2020 ",
xlab = " Xyleno (µg/m3)",
ylab = "Cantidad",
ylim = c(0, max(nis)),
col = "orange",
cex.main = 0.9,
cex.lab = 1,
cex.axis = 0.9,
xaxt = "n")
axis(1, at = Histograma_xyleno$breaks,
labels = Histograma_xyleno$breaks, las = 1,
cex.axis = 0.9)

#Histograma global
hist(xyleno, breaks = 19,
main = "Gráfica N°2:Distribución de la Concentración de Xyleno
presente en el estudio sobre calidad del aire en India entre 2015-2020",
xlab = "Xyleno (µg/m3)",
ylab = "Cantidad",
col = "orange",
cex.main = 1,
cex.lab = 1,
cex.axis = 0.9,
xaxt = "n")
axis(1, at = Histograma_xyleno$breaks,
labels = Histograma_xyleno$breaks, las = 1,
cex.axis = 0.9)

#Histograma porcentual global
TDF_xylenosimplificado$`hi (%)` <- as.numeric(TDF_xylenosimplificado$`hi(%)`)
post<-barplot(TDF_xylenosimplificado$`hi(%)`[1:(nrow(TDF_xylenosimplificado)-1)],
space = 0,
col = "orange",
main = "Gráfica N°3:Distribución de la Concentración de xyleno, estudio
calidad del aire en India, 2015-2020",
xlab = "Xyleno (µg/m3)",
ylab = "Porcentaje (%)",
names.arg = TDF_xylenosimplificado$MC[1:(nrow(TDF_xylenosimplificado)-1)],
ylim = c(0,100), xaxt = "n"
)
axis(side = 1,
at = post,
labels = TDF_xylenosimplificado$MC[1:(nrow(TDF_xylenosimplificado)-1)],
tck = -0.02)

#Histograma porcentual local
n <- as.numeric(nrow(TDF_xylenosimplificado))
pos<-barplot(
TDF_xylenosimplificado$`hi(%)`[1:(n-1)],
space = 0,
main = "Gráfica No. 4:Distribución concentración de Xyleno en el estudio
calidad del aire en India, 2015-2020",
ylab = "Porcentaje (%)",
xlab = "Xyleno (µg/m3)",
names.arg = TDF_xylenosimplificado$MC[1:(n-1)],
col = "orange"
)
axis(side = 1,
at = pos,
labels = TDF_xylenosimplificado$MC[1:(nrow(TDF_xylenosimplificado)-1)],
tck = -0.04, # controla la longitud de la rayita
las = 1) # rota las etiquetas si quieres verticales

#Box plot
Cajaxyleno<-boxplot(xyleno, horizontal = T,col = "pink", border = "black",
main= "Gráfica No. 5: Distribución de la concentración de xyleno,
estudio calidad del aire en India desde 2015-2020",
xlab="Xyleno (µg/m3)")

#Ojivas locales
# Usar un único eje X para ambas ojivas
x <- Lss
plot(
x, Nis_asc,
type = "b",
col = "red",
pch = 19,
main = "Gráfica N°6:Distribución de concentración de Xyleno en el
estudio calidad del aire en India 2015-2020",
xlab = "Xyleno (µg/m3)",
ylab = "Cantidad",
ylim = c(0, max(c(Nis_asc, Nis_desc))) # 👈 Eje Y iniciando en 0
)
# Ojiva descendente
lines(
x, Nis_desc,
type = "b",
col = "blue",
pch = 19
)

#Ojiva porcentual
# Usar un único eje X para ambas ojivas
x <- Lss
# Ojiva ascendente porcentual
plot(
x, His_asc,
type = "b",
main ="Gráfica N°7:Distribución de concentración de Xyleno en el
estudio calidad del aire en India 2015-2020",
xlab = "Xyleno (µg/m3)",
ylab = "Porcentaje (%)",
col = "blue",
pch = 19,
ylim = c(0, 100)
)
# Ojiva descendente porcentual
lines(
x, His_desc,
type = "b",
col = "red",
pch = 19
)

#INDICADORES
#Indicadores de Tendencia Central
# Mediana
Me <- median(xyleno)
Me
## [1] 0.98
# Media
X <- mean(xyleno)
X
## [1] 3.070128
# Moda
Mo <- "[0,10]"
Mo
## [1] "[0,10]"
#Indicadores de Dispersión
# Varianza
var(xyleno)
## [1] 39.98346
# Desviación estandar
desv<-round(sd(xyleno), 2)
# Coeficiente de variación
CV <- (sd(xyleno)/X)*100
CV
## [1] 205.9604
#Indicadores de Forma
# Coeficiente de Asimetría
library(e1071)
As <- skewness(xyleno)
As
## [1] 7.889443
# Curtosis
K <- kurtosis(xyleno)
K
## [1] 119.9056
Variable <- "Xyleno"
Rango <- "[0,170.37]"
Tabla_indicadores <- data.frame(Variable,Rango,round(X,3),Me,Mo,round(desv,2),round(CV,2),round(As,2),round(K,2))
colnames(Tabla_indicadores) <- c("Variable","Rango","X", "Me", "Mo","sd","CV","As","K")
#Paquetes la tabla
library(gt)
library(dplyr)
Tabla_indicadores %>%
gt() %>%
tab_header(
title = md("*Tabla Nro. 3*"),
subtitle = md("**Indicadores Estadísticos de concentración de Xyleno,estudio calidad del aire en India entre 2015-2020 **")
) %>%
tab_source_note(
source_note = md("Autor: Grupo 2\n Fuente:https://www.kaggle.com/datasets/rohanrao/air-quality-data-in-india")
) %>%
tab_options(
table.border.top.color = "black",
table.border.bottom.color = "black",
table.border.top.style = "solid",
table.border.bottom.style = "solid",
column_labels.border.top.color = "black",
column_labels.border.bottom.color = "black",
column_labels.border.bottom.width = px(2),
row.striping.include_table_body = TRUE,
heading.border.bottom.color = "black",
heading.border.bottom.width = px(2),
table_body.hlines.color = "gray",
table_body.border.bottom.color = "black"
)