Univercidad Central del Ecuador
FIGEMPA-Ingeenieía Ambiental
# Cargar datos
datos <- read.csv(
"city_day.csv",
header = TRUE,
sep = ",",
dec = "."
)
# Crear vector ozono sin guiones
xyleno <- datos$Xylene[datos$Xylene != "-"]
xyleno <- as.numeric(xyleno)
#Estadística descriptiva
#5/12/2025
#Lorien Arcentales
#Carga de paquetes
library(gt)
library(dplyr)
datos<-read.csv("city_day.csv", header = TRUE, dec = ".",
sep = ",")
#Extraccion de los "-" de la variable ozono porque son valores inexistentes,
# para un mejor analisis, cambia tamaño muestral de 29531 a 11422
xyleno<-datos$Xylene[datos$Xylene != "-"]
length(xyleno)
## [1] 11422
xyleno <- as.numeric(xyleno)
min<-min(xyleno)
max<-max(xyleno)
R=max-min
k=1+(3.3)*log(length(xyleno))
k<-floor(k)
A<-R/k
#Generación de intervalos
Li <- seq(from = min, to = max - A, by = A)
Ls <- c (seq(from = min + A, to = max - A, by = A), max) # último límite = max
MC<-(Li+Ls)/2
#Creación de ni
xyleno <- round(xyleno, 3) # redondear los datos de ozono
Li <- round(Li, 3) # redondear límites inferiores
Ls <- round(Ls, 3)
ni <- numeric(length(Li))
for (i in 1:length(Li)) {
if (i < length(Li)) {
ni[i] <- sum(xyleno >= Li[i] & xyleno < Ls[i])
} else {
ni[i] <- sum(xyleno >= Li[i] & xyleno <= Ls[i]) # Último intervalo cerrado
}
}
N <- sum(ni)
hi <- (ni / N) * 100
Ni_asc <- cumsum(ni)
Ni_desc <- rev(cumsum(rev(ni)))
Hi_asc <- cumsum(hi)
Hi_desc <- rev(cumsum(rev(hi)))
Intervalo <- paste0("[", round(Li,2), " - ", round(Ls,2), ")")
Intervalo[length(Intervalo)] <- paste0("[", round(Li[length(Li)],2), " - ",
round(Ls[length(Ls)],2), "]")
TDF_xyleno <- data.frame(
Intervalo = Intervalo,
MC = round(MC, 2),
ni = ni,
hi = round(hi, 2),
Ni_ascendente = Ni_asc,
Ni_descendente = Ni_desc,
Hi_ascendente = round(Hi_asc, 2),
Hi_descendente = round(Hi_desc, 2)
)
# Crear fila de totales
totales <- data.frame(
Intervalo = "Totales",
MC = "-",
ni = sum(ni),
hi = sum(hi),
Ni_ascendente = "-",
Ni_descendente = "-",
Hi_ascendente = "-",
Hi_descendente = "-"
)
# Agregar al final del data.frame
TDF_xyleno <- rbind(TDF_xyleno, totales)
library(gt)
library(dplyr)
TDF_xyleno %>%
gt() %>%
tab_header(
title = md("*Tabla Nro. 1*"),
subtitle = md("**Distribucion de frecuencia de concentración de xyleno,estudio calidad del aire en India entre 2015-2020 **")
) %>%
tab_source_note(
source_note = md("Autor: Grupo 2\n Fuente:https://www.kaggle.com/datasets/rohanrao/air-quality-data-in-india")
) %>%
tab_options(
table.border.top.color = "black",
table.border.bottom.color = "black",
table.border.top.style = "solid",
table.border.bottom.style = "solid",
column_labels.border.top.color = "black",
column_labels.border.bottom.color = "black",
column_labels.border.bottom.width = px(2),
row.striping.include_table_body = TRUE,
heading.border.bottom.color = "black",
heading.border.bottom.width = px(2),
table_body.hlines.color = "gray",
table_body.border.bottom.color = "black"
)
| Tabla Nro. 1 | |||||||
| **Distribucion de frecuencia de concentración de xyleno,estudio calidad del aire en India entre 2015-2020 ** | |||||||
| Intervalo | MC | ni | hi | Ni_ascendente | Ni_descendente | Hi_ascendente | Hi_descendente |
|---|---|---|---|---|---|---|---|
| [0 - 5.5) | 2.75 | 9523 | 83.37 | 9523 | 11422 | 83.37 | 100 |
| [5.5 - 10.99) | 8.24 | 1178 | 10.31 | 10701 | 1899 | 93.69 | 16.63 |
| [10.99 - 16.49) | 13.74 | 367 | 3.21 | 11068 | 721 | 96.9 | 6.31 |
| [16.49 - 21.98) | 19.24 | 167 | 1.46 | 11235 | 354 | 98.36 | 3.1 |
| [21.98 - 27.48) | 24.73 | 84 | 0.74 | 11319 | 187 | 99.1 | 1.64 |
| [27.48 - 32.98) | 30.23 | 37 | 0.32 | 11356 | 103 | 99.42 | 0.9 |
| [32.98 - 38.47) | 35.72 | 11 | 0.10 | 11367 | 66 | 99.52 | 0.58 |
| [38.47 - 43.97) | 41.22 | 18 | 0.16 | 11385 | 55 | 99.68 | 0.48 |
| [43.97 - 49.46) | 46.71 | 9 | 0.08 | 11394 | 37 | 99.75 | 0.32 |
| [49.46 - 54.96) | 52.21 | 9 | 0.08 | 11403 | 28 | 99.83 | 0.25 |
| [54.96 - 60.45) | 57.71 | 5 | 0.04 | 11408 | 19 | 99.88 | 0.17 |
| [60.45 - 65.95) | 63.2 | 3 | 0.03 | 11411 | 14 | 99.9 | 0.12 |
| [65.95 - 71.44) | 68.7 | 0 | 0.00 | 11411 | 11 | 99.9 | 0.1 |
| [71.44 - 76.94) | 74.19 | 1 | 0.01 | 11412 | 11 | 99.91 | 0.1 |
| [76.94 - 82.44) | 79.69 | 1 | 0.01 | 11413 | 10 | 99.92 | 0.09 |
| [82.44 - 87.93) | 85.18 | 1 | 0.01 | 11414 | 9 | 99.93 | 0.08 |
| [87.93 - 93.43) | 90.68 | 1 | 0.01 | 11415 | 8 | 99.94 | 0.07 |
| [93.43 - 98.92) | 96.18 | 1 | 0.01 | 11416 | 7 | 99.95 | 0.06 |
| [98.92 - 104.42) | 101.67 | 0 | 0.00 | 11416 | 6 | 99.95 | 0.05 |
| [104.42 - 109.92) | 107.17 | 2 | 0.02 | 11418 | 6 | 99.96 | 0.05 |
| [109.92 - 115.41) | 112.66 | 0 | 0.00 | 11418 | 4 | 99.96 | 0.04 |
| [115.41 - 120.91) | 118.16 | 1 | 0.01 | 11419 | 4 | 99.97 | 0.04 |
| [120.91 - 126.4) | 123.66 | 1 | 0.01 | 11420 | 3 | 99.98 | 0.03 |
| [126.4 - 131.9) | 129.15 | 0 | 0.00 | 11420 | 2 | 99.98 | 0.02 |
| [131.9 - 137.4) | 134.65 | 0 | 0.00 | 11420 | 2 | 99.98 | 0.02 |
| [137.4 - 142.89) | 140.14 | 1 | 0.01 | 11421 | 2 | 99.99 | 0.02 |
| [142.89 - 148.39) | 145.64 | 0 | 0.00 | 11421 | 1 | 99.99 | 0.01 |
| [148.39 - 153.88) | 151.13 | 0 | 0.00 | 11421 | 1 | 99.99 | 0.01 |
| [153.88 - 159.38) | 156.63 | 0 | 0.00 | 11421 | 1 | 99.99 | 0.01 |
| [159.38 - 164.87) | 162.13 | 0 | 0.00 | 11421 | 1 | 99.99 | 0.01 |
| [164.87 - 170.37] | 167.62 | 1 | 0.01 | 11422 | 1 | 100 | 0.01 |
| Totales | - | 11422 | 100.00 | - | - | - | - |
| Autor: Grupo 2 Fuente:https://www.kaggle.com/datasets/rohanrao/air-quality-data-in-india | |||||||
#PROCESO DE SIMPLIFICACIÓN
#Histogram(xyleno)
#Elemnetos simplificados
Lis<-Histograma_xyleno$breaks [1:18]
Lss<-Histograma_xyleno$breaks [2:19]
MCs<-(Lis+Lis)/2
nis<-Histograma_xyleno$counts
his <- (nis / N) * 100
Nis_asc <- cumsum(nis)
His_asc <- cumsum(his)
Nis_desc <- rev(cumsum(rev(nis)))
His_desc <- rev(cumsum(rev(his)))
Intervalos <- paste0("[", round(Lis,2), " - ", round(Lss,2), ")")
Intervalos[length(Intervalos)] <- paste0("[", round(Lis[length(Lis)],2),
" - ", round(Lss[length(Lss)],2), "]")
TDF_xylenosimplificado <- data.frame(
Intervalo = Intervalos,
MC = round(MCs, 2),
ni = nis,
hi= round(his, 2),
Ni_ascendente = Nis_asc,
Hi_ascendente = round(His_asc, 2),
Ni_descendente = Nis_desc,
Hi_descendente = round(His_desc, 2)
)
colnames(TDF_xylenosimplificado) <- c(
"Intervalo",
"MC",
"ni",
"hi(%)",
"Ni_asc",
"Hi_asc (%)",
"Ni_desc",
"Hi_desc (%)"
)
totaless <- data.frame(
Intervalo = "Totales",
MC = "-",
ni = sum(nis), # suma total de ni
hi = sum(his), # suma total de hi (%)
Ni_ascendente = "-",
Ni_descendente = "-",
Hi_ascendente = "-",
Hi_descendente = "-"
)
colnames(totaless) <- c(
"Intervalo",
"MC",
"ni",
"hi(%)",
"Ni_asc",
"Hi_asc (%)",
"Ni_desc",
"Hi_desc (%)"
)
# Agregar al final de la tabla
TDF_xylenosimplificado <- rbind(TDF_xylenosimplificado, totaless)
#Tabla 2
TDF_xylenosimplificado %>%
gt() %>%
tab_header(
title = md("*Tabla Nro. 2*"),
subtitle = md("**Distribucion de frecuencia simplificado de concentración de xyleno, estudio calidad del aire en India entre 2015-2020 **")
) %>%
tab_source_note(
source_note = md("Autor: Grupo 2\n Fuente:https://www.kaggle.com/datasets/rohanrao/air-quality-data-in-india")
) %>%
tab_options(
table.border.top.color = "black",
table.border.bottom.color = "black",
table.border.top.style = "solid",
table.border.bottom.style = "solid",
column_labels.border.top.color = "black",
column_labels.border.bottom.color = "black",
column_labels.border.bottom.width = px(2),
row.striping.include_table_body = TRUE,
heading.border.bottom.color = "black",
heading.border.bottom.width = px(2),
table_body.hlines.color = "gray",
table_body.border.bottom.color = "black"
)
| Tabla Nro. 2 | |||||||
| **Distribucion de frecuencia simplificado de concentración de xyleno, estudio calidad del aire en India entre 2015-2020 ** | |||||||
| Intervalo | MC | ni | hi(%) | Ni_asc | Hi_asc (%) | Ni_desc | Hi_desc (%) |
|---|---|---|---|---|---|---|---|
| [0 - 10) | 0 | 10588 | 92.70 | 10588 | 92.7 | 11422 | 100 |
| [10 - 20) | 10 | 610 | 5.34 | 11198 | 98.04 | 834 | 7.3 |
| [20 - 30) | 20 | 143 | 1.25 | 11341 | 99.29 | 224 | 1.96 |
| [30 - 40) | 30 | 34 | 0.30 | 11375 | 99.59 | 81 | 0.71 |
| [40 - 50) | 40 | 21 | 0.18 | 11396 | 99.77 | 47 | 0.41 |
| [50 - 60) | 50 | 11 | 0.10 | 11407 | 99.87 | 26 | 0.23 |
| [60 - 70) | 60 | 4 | 0.04 | 11411 | 99.9 | 15 | 0.13 |
| [70 - 80) | 70 | 1 | 0.01 | 11412 | 99.91 | 11 | 0.1 |
| [80 - 90) | 80 | 3 | 0.03 | 11415 | 99.94 | 10 | 0.09 |
| [90 - 100) | 90 | 1 | 0.01 | 11416 | 99.95 | 7 | 0.06 |
| [100 - 110) | 100 | 2 | 0.02 | 11418 | 99.96 | 6 | 0.05 |
| [110 - 120) | 110 | 1 | 0.01 | 11419 | 99.97 | 4 | 0.04 |
| [120 - 130) | 120 | 1 | 0.01 | 11420 | 99.98 | 3 | 0.03 |
| [130 - 140) | 130 | 1 | 0.01 | 11421 | 99.99 | 2 | 0.02 |
| [140 - 150) | 140 | 0 | 0.00 | 11421 | 99.99 | 1 | 0.01 |
| [150 - 160) | 150 | 0 | 0.00 | 11421 | 99.99 | 1 | 0.01 |
| [160 - 170) | 160 | 0 | 0.00 | 11421 | 99.99 | 1 | 0.01 |
| [170 - 180] | 170 | 1 | 0.01 | 11422 | 100 | 1 | 0.01 |
| Totales | - | 11422 | 100.00 | - | - | - | - |
| Autor: Grupo 2 Fuente:https://www.kaggle.com/datasets/rohanrao/air-quality-data-in-india | |||||||
#GRAFICAS
#Histogramas local
hist(xyleno, breaks = 19,
main = "Gráfica N°1: Distribución de la Concentración de Xyleno
presente en el estudio sobre calidad del aire en India entre 2015-2020 ",
xlab = " Xyleno (µg/m3)",
ylab = "Cantidad",
ylim = c(0, max(nis)),
col = "orange",
cex.main = 0.9,
cex.lab = 1,
cex.axis = 0.9,
xaxt = "n")
axis(1, at = Histograma_xyleno$breaks,
labels = Histograma_xyleno$breaks, las = 1,
cex.axis = 0.9)
#Histograma global
hist(xyleno, breaks = 19,
main = "Gráfica N°2:Distribución de la Concentración de Xyleno
presente en el estudio sobre calidad del aire en India entre 2015-2020",
xlab = "Xyleno (µg/m3)",
ylab = "Cantidad",
ylim = c(0, length(xyleno)),
col = "orange",
cex.main = 1,
cex.lab = 1,
cex.axis = 0.9,
xaxt = "n")
axis(1, at = Histograma_xyleno$breaks,
labels = Histograma_xyleno$breaks, las = 1,
cex.axis = 0.9)
#Histograma porcentual global
TDF_xylenosimplificado$`hi (%)` <- as.numeric(TDF_xylenosimplificado$`hi(%)`)
post<-barplot(TDF_xylenosimplificado$`hi(%)`[1:(nrow(TDF_xylenosimplificado)-1)],
space = 0,
col = "orange",
main = "Gráfica N°3:Distribución de la Concentración de xyleno, estudio
calidad del aire en India, 2015-2020",
xlab = "Xyleno (µg/m3)",
ylab = "Porcentaje (%)",
names.arg = TDF_xylenosimplificado$MC[1:(nrow(TDF_xylenosimplificado)-1)],
ylim = c(0,100),
xaxt = "n")
axis(side = 1,
at = post,
labels = TDF_xylenosimplificado$MC[1:(nrow(TDF_xylenosimplificado)-1)],
tck = -0.02)
#Histograma porcentual local
n <- as.numeric(nrow(TDF_xylenosimplificado))
pos<-barplot(
TDF_xylenosimplificado$`hi(%)`[1:(n-1)],
space = 0,
main = "Gráfica No. 4:Distribución concentración de Xyleno en el estudio
calidad del aire en India, 2015-2020",
ylab = "Porcentaje (%)",
xlab = "Xyleno (µg/m3)",
names.arg = TDF_xylenosimplificado$MC[1:(n-1)],
col = "orange"
)
axis(side = 1,
at = pos,
labels = TDF_xylenosimplificado$MC[1:(nrow(TDF_xylenosimplificado)-1)],
tck = -0.04, # controla la longitud de la rayita
las = 1) # rota las etiquetas si quieres verticales
#Box plot
Cajaxyleno<-boxplot(xyleno, horizontal = T,col = "pink", border = "black",
main= "Gráfica No. 5: Distribución de la concentración de xyleno,
estudio calidad del aire en India desde 2015-2020",
xlab="Xyleno (µg/m3)")
# Ojiva ascendente
plot(Lss, Nis_asc, type = "b", main = "Gráfica N°6:Ojiva ascendente y descendente de la
distribución local del concentración de xyleno",
xlab = "Xyleno (µg/m3)",
ylab = "Cantidad", pch = 19,col="turquoise")
# Ojiva descendente
lines(Lis, Nis_desc, type = "b", col = "black", pch = 19) # agrega en rojo
# Ojiva ascendente
plot(Lss, His_asc,
type = "b",
main = " Gráfica N°7:Ojiva ascendete y descendete de la distribución
de la concentración de xyleno",
xlab = "Xyleno(µg/m3)",
ylab = "Porcentaje %",
col="blue",
pch = 19)
# Ojiva descendente
lines(Lis, His_desc,
type = "b",
col = "red",
pch = 19)