# Cargar datos
datos <- read.csv(
"city_day.csv",
header = TRUE,
sep = ",",
dec = "."
)
# Crear vector ozono sin guiones
ozono <- datos$O3[datos$O3 != "-"]
ozono <- as.numeric(ozono)
#Estadistica descriptiva
#02/12/2025
#Lorien Arcentales
#carga de datos
datos<-read.csv("city_day.csv", header = TRUE, dec = ".",
sep = ",")
#Extraccion de los "-" de la variable ozono porque son valores inexistentes,
# para un mejor analisis, cambia tamaño muestral de 29531 a 25509
ozono <- datos$O3[datos$O3 != "-"]
length(ozono)
## [1] 25509
ozono <- as.numeric(ozono)
min<-min(ozono)
max<-max(ozono)
R=max-min
k=1+(3.3)*log(length(ozono))
k<-floor(k)
A<-R/k
#Generación de intervalos
Li <- seq(from = min, to = max - A, by = A)
Ls <- c (seq(from = min + A, to = max - A, by = A), max) # último límite = max
MC<-(Li+Ls)/2
#Creación de ni
ozono <- round(ozono, 3) # redondear los datos de ozono
Li <- round(Li, 3) # redondear límites inferiores
Ls <- round(Ls, 3)
ni <- numeric(length(Li))
for (i in 1:length(Li)) {
if (i < length(Li)) {
ni[i] <- sum(ozono >= Li[i] & ozono < Ls[i])
} else {
ni[i] <- sum(ozono >= Li[i] & ozono <= Ls[i]) # Último intervalo cerrado
}
}
N <- sum(ni)
hi <- (ni / N) * 100
Ni_asc <- cumsum(ni)
Ni_desc <- rev(cumsum(rev(ni)))
Hi_asc <- cumsum(hi)
Hi_desc <- rev(cumsum(rev(hi)))
Intervalo <- paste0("[", round(Li,2), " - ", round(Ls,2), ")")
Intervalo[length(Intervalo)] <- paste0("[", round(Li[length(Li)],2), " - ",
round(Ls[length(Ls)],2), "]")
TDF_ozono <- data.frame(
Intervalo = Intervalo,
MC = round(MC, 2),
ni = ni,
hi = round(hi, 2),
Ni_ascendente = Ni_asc,
Ni_descendente = Ni_desc,
Hi_ascendente = round(Hi_asc, 2),
Hi_descendente = round(Hi_desc, 2)
)
# Crear fila de totales
totales <- data.frame(
Intervalo = "Totales",
MC = "-",
ni = sum(ni),
hi = sum(hi),
Ni_ascendente = "-",
Ni_descendente = "-",
Hi_ascendente = "-",
Hi_descendente = "-"
)
# Agregar al final del data.frame
TDF_ozono <- rbind(TDF_ozono, totales)
length(Li)
## [1] 34
length(Ls)
## [1] 34
max(ozono)
## [1] 257.73
max(Ls)
## [1] 257.73
range(ozono)
## [1] 0.01 257.73
summary(ozono)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.01 18.86 30.84 34.49 45.57 257.73
cbind(Li, Ls, ni)
## Li Ls ni
## [1,] 0.01 7.59 1437
## [2,] 7.59 15.17 2952
## [3,] 15.17 22.75 4019
## [4,] 22.75 30.33 4112
## [5,] 30.33 37.91 3614
## [6,] 37.91 45.49 2970
## [7,] 45.49 53.07 2147
## [8,] 53.07 60.65 1518
## [9,] 60.65 68.23 953
## [10,] 68.23 75.81 613
## [11,] 75.81 83.39 374
## [12,] 83.39 90.97 255
## [13,] 90.97 98.55 183
## [14,] 98.55 106.13 113
## [15,] 106.13 113.71 86
## [16,] 113.71 121.29 53
## [17,] 121.29 128.87 33
## [18,] 128.87 136.45 26
## [19,] 136.45 144.03 18
## [20,] 144.03 151.61 9
## [21,] 151.61 159.19 9
## [22,] 159.19 166.77 6
## [23,] 166.77 174.35 3
## [24,] 174.35 181.93 2
## [25,] 181.93 189.51 1
## [26,] 189.51 197.09 1
## [27,] 197.09 204.67 1
## [28,] 204.67 212.25 0
## [29,] 212.25 219.83 0
## [30,] 219.83 227.41 0
## [31,] 227.41 234.99 0
## [32,] 234.99 242.57 0
## [33,] 242.57 250.15 0
## [34,] 250.15 257.73 1
library(gt)
library(dplyr)
TDF_ozono %>%
gt() %>%
tab_header(
title = md("*Tabla Nro. 1*"),
subtitle = md("**Distribucion de frecuencia de concentración de ozono,estudio calidad del aire en India entre 2015-2020 **")
) %>%
tab_source_note(
source_note = md("Autor: Grupo 2\n Fuente:https://www.kaggle.com/datasets/rohanrao/air-quality-data-in-india")
) %>%
tab_options(
table.border.top.color = "black",
table.border.bottom.color = "black",
table.border.top.style = "solid",
table.border.bottom.style = "solid",
column_labels.border.top.color = "black",
column_labels.border.bottom.color = "black",
column_labels.border.bottom.width = px(2),
row.striping.include_table_body = TRUE,
heading.border.bottom.color = "black",
heading.border.bottom.width = px(2),
table_body.hlines.color = "gray",
table_body.border.bottom.color = "black"
)
| Tabla Nro. 1 |
| **Distribucion de frecuencia de concentración de ozono,estudio calidad del aire en India entre 2015-2020 ** |
| Intervalo |
MC |
ni |
hi |
Ni_ascendente |
Ni_descendente |
Hi_ascendente |
Hi_descendente |
| [0.01 - 7.59) |
3.8 |
1437 |
5.63 |
1437 |
25509 |
5.63 |
100 |
| [7.59 - 15.17) |
11.38 |
2952 |
11.57 |
4389 |
24072 |
17.21 |
94.37 |
| [15.17 - 22.75) |
18.96 |
4019 |
15.76 |
8408 |
21120 |
32.96 |
82.79 |
| [22.75 - 30.33) |
26.54 |
4112 |
16.12 |
12520 |
17101 |
49.08 |
67.04 |
| [30.33 - 37.91) |
34.12 |
3614 |
14.17 |
16134 |
12989 |
63.25 |
50.92 |
| [37.91 - 45.49) |
41.7 |
2970 |
11.64 |
19104 |
9375 |
74.89 |
36.75 |
| [45.49 - 53.07) |
49.28 |
2147 |
8.42 |
21251 |
6405 |
83.31 |
25.11 |
| [53.07 - 60.65) |
56.86 |
1518 |
5.95 |
22769 |
4258 |
89.26 |
16.69 |
| [60.65 - 68.23) |
64.44 |
953 |
3.74 |
23722 |
2740 |
92.99 |
10.74 |
| [68.23 - 75.81) |
72.02 |
613 |
2.40 |
24335 |
1787 |
95.4 |
7.01 |
| [75.81 - 83.39) |
79.6 |
374 |
1.47 |
24709 |
1174 |
96.86 |
4.6 |
| [83.39 - 90.97) |
87.18 |
255 |
1.00 |
24964 |
800 |
97.86 |
3.14 |
| [90.97 - 98.55) |
94.76 |
183 |
0.72 |
25147 |
545 |
98.58 |
2.14 |
| [98.55 - 106.13) |
102.34 |
113 |
0.44 |
25260 |
362 |
99.02 |
1.42 |
| [106.13 - 113.71) |
109.92 |
86 |
0.34 |
25346 |
249 |
99.36 |
0.98 |
| [113.71 - 121.29) |
117.5 |
53 |
0.21 |
25399 |
163 |
99.57 |
0.64 |
| [121.29 - 128.87) |
125.08 |
33 |
0.13 |
25432 |
110 |
99.7 |
0.43 |
| [128.87 - 136.45) |
132.66 |
26 |
0.10 |
25458 |
77 |
99.8 |
0.3 |
| [136.45 - 144.03) |
140.24 |
18 |
0.07 |
25476 |
51 |
99.87 |
0.2 |
| [144.03 - 151.61) |
147.82 |
9 |
0.04 |
25485 |
33 |
99.91 |
0.13 |
| [151.61 - 159.19) |
155.4 |
9 |
0.04 |
25494 |
24 |
99.94 |
0.09 |
| [159.19 - 166.77) |
162.98 |
6 |
0.02 |
25500 |
15 |
99.96 |
0.06 |
| [166.77 - 174.35) |
170.56 |
3 |
0.01 |
25503 |
9 |
99.98 |
0.04 |
| [174.35 - 181.93) |
178.14 |
2 |
0.01 |
25505 |
6 |
99.98 |
0.02 |
| [181.93 - 189.51) |
185.72 |
1 |
0.00 |
25506 |
4 |
99.99 |
0.02 |
| [189.51 - 197.09) |
193.3 |
1 |
0.00 |
25507 |
3 |
99.99 |
0.01 |
| [197.09 - 204.67) |
200.88 |
1 |
0.00 |
25508 |
2 |
100 |
0.01 |
| [204.67 - 212.25) |
208.46 |
0 |
0.00 |
25508 |
1 |
100 |
0 |
| [212.25 - 219.83) |
216.04 |
0 |
0.00 |
25508 |
1 |
100 |
0 |
| [219.83 - 227.41) |
223.62 |
0 |
0.00 |
25508 |
1 |
100 |
0 |
| [227.41 - 234.99) |
231.2 |
0 |
0.00 |
25508 |
1 |
100 |
0 |
| [234.99 - 242.57) |
238.78 |
0 |
0.00 |
25508 |
1 |
100 |
0 |
| [242.57 - 250.15) |
246.36 |
0 |
0.00 |
25508 |
1 |
100 |
0 |
| [250.15 - 257.73] |
253.94 |
1 |
0.00 |
25509 |
1 |
100 |
0 |
| Totales |
- |
25509 |
100.00 |
- |
- |
- |
- |
| Autor: Grupo 2
Fuente:https://www.kaggle.com/datasets/rohanrao/air-quality-data-in-india |
#PROCESO DE SIMPLIFICACIÓN
#Hist(ozono)
#Elemnetos simplificados
breaks <- Histograma_ozono$breaks
Lis <- breaks[1:(length(breaks)-1)]
Lss <- breaks[2:length(breaks)]
MCs<-(Lis+Lss)/2
nis<-Histograma_ozono$counts
N<-length(ozono)
his <- (nis / N) * 100
Nis_asc <- cumsum(nis)
His_asc <- cumsum(his)
Nis_desc <- rev(cumsum(rev(nis)))
His_desc <- rev(cumsum(rev(his)))
TDF_ozonosimplificado <- data.frame(
Intervalo = paste0("[", round(Lis,2), " - ", round(Lss,2), ")"),
MC = round(MCs, 2),
ni = nis,
hi= round(his, 2),
Ni_ascendente = Nis_asc,
Hi_ascendente = round(His_asc, 2),
Ni_descendente = Nis_desc,
Hi_descendente = round(His_desc, 2)
)
colnames(TDF_ozonosimplificado) <- c(
"Intervalo",
"MC",
"ni",
"hi(%)",
"Ni_asc",
"Hi_asc (%)",
"Ni_desc",
"Hi_desc (%)"
)
totaless <- data.frame(
Intervalo = "Totales",
MC = "-",
ni = sum(nis), # suma total de ni
hi = sum(his), # suma total de hi (%)
Ni_ascendente = "-",
Ni_descendente = "-",
Hi_ascendente = "-",
Hi_descendente = "-"
)
colnames(totaless) <- c(
"Intervalo",
"MC",
"ni",
"hi(%)",
"Ni_asc",
"Hi_asc (%)",
"Ni_desc",
"Hi_desc (%)"
)
# Agregar al final de la tabla
TDF_ozonosimplificado <- rbind(TDF_ozonosimplificado, totaless)
TDF_ozonosimplificado %>%
gt() %>%
tab_header(
title = md("*Tabla Nro. 2*"),
subtitle = md("**Distribucion de frecuencia simplificado de concentración de ozono,estudio calidad del aire en India entre 2015-2020 **")
) %>%
tab_source_note(
source_note = md("Autor: Grupo 2\n Fuente:https://www.kaggle.com/datasets/rohanrao/air-quality-data-in-india")
) %>%
tab_options(
table.border.top.color = "black",
table.border.bottom.color = "black",
table.border.top.style = "solid",
table.border.bottom.style = "solid",
column_labels.border.top.color = "black",
column_labels.border.bottom.color = "black",
column_labels.border.bottom.width = px(2),
row.striping.include_table_body = TRUE,
heading.border.bottom.color = "black",
heading.border.bottom.width = px(2),
table_body.hlines.color = "gray",
table_body.border.bottom.color = "black"
)
| Tabla Nro. 2 |
| **Distribucion de frecuencia simplificado de concentración de ozono,estudio calidad del aire en India entre 2015-2020 ** |
| Intervalo |
MC |
ni |
hi(%) |
Ni_asc |
Hi_asc (%) |
Ni_desc |
Hi_desc (%) |
| [0 - 20) |
10 |
7022 |
27.53 |
7022 |
27.53 |
25509 |
100 |
| [20 - 40) |
30 |
10006 |
39.23 |
17028 |
66.75 |
18487 |
72.47 |
| [40 - 60) |
50 |
5647 |
22.14 |
22675 |
88.89 |
8481 |
33.25 |
| [60 - 80) |
70 |
1896 |
7.43 |
24571 |
96.32 |
2834 |
11.11 |
| [80 - 100) |
90 |
600 |
2.35 |
25171 |
98.67 |
938 |
3.68 |
| [100 - 120) |
110 |
224 |
0.88 |
25395 |
99.55 |
338 |
1.33 |
| [120 - 140) |
130 |
75 |
0.29 |
25470 |
99.85 |
114 |
0.45 |
| [140 - 160) |
150 |
24 |
0.09 |
25494 |
99.94 |
39 |
0.15 |
| [160 - 180) |
170 |
11 |
0.04 |
25505 |
99.98 |
15 |
0.06 |
| [180 - 200) |
190 |
2 |
0.01 |
25507 |
99.99 |
4 |
0.02 |
| [200 - 220) |
210 |
1 |
0.00 |
25508 |
100 |
2 |
0.01 |
| [220 - 240) |
230 |
0 |
0.00 |
25508 |
100 |
1 |
0 |
| [240 - 260) |
250 |
1 |
0.00 |
25509 |
100 |
1 |
0 |
| Totales |
- |
25509 |
100.00 |
- |
- |
- |
- |
| Autor: Grupo 2
Fuente:https://www.kaggle.com/datasets/rohanrao/air-quality-data-in-india |
#GRAFICAS
#Histogramas local
hist(ozono, breaks = 11,
main = "Gráfica N°1: Distribución de la Concentración de Ozono
presente en el estudio sobre calidad del aire en India entre 2015-2020 ",
xlab = " Ozono (µg/m3)",
ylab = "Cantidad",
ylim = c(0, max(nis)),
col = "lightskyblue",
cex.main = 0.9,
cex.lab = 1,
cex.axis = 0.9,
xaxt = "n")
axis(1, at = Histograma_ozono$breaks,
labels = Histograma_ozono$breaks, las = 1,
cex.axis = 0.9)

#Histograma global
hist(ozono, breaks = 11,
main = "Gráfica N°2:Distribución de la Concentración de Ozono
presente en el estudio sobre calidad del aire en India entre 2015-2020",
xlab = "Ozono (µg/m3)",
ylab = "Cantidad",
ylim = c(0, length(ozono)),
col = "lightskyblue",
cex.main = 1,
cex.lab = 1,
cex.axis = 0.9,
xaxt = "n")
axis(1, at = Histograma_ozono$breaks,
labels = Histograma_ozono$breaks, las = 1,
cex.axis = 0.9)

#Histograma porcentual global
TDF_ozonosimplificado$`hi (%)` <- as.numeric(TDF_ozonosimplificado$`hi(%)`)
post<-barplot(TDF_ozonosimplificado$`hi(%)`[1:(nrow(TDF_ozonosimplificado)-1)],
space = 0,
col = "skyblue",
main = "Gráfica N°3:Distribución de la Concentración de Ozono, estudio
calidad del aire en India, 2015-2020",
xlab = "Ozono (µg/m3)",
ylab = "Porcentaje (%)",
names.arg = TDF_ozonosimplificado$MC[1:(nrow(TDF_ozonosimplificado)-1)],
ylim = c(0,100))
axis(side = 1,
at = post,
labels = TDF_ozonosimplificado$MC[1:(nrow(TDF_ozonosimplificado)-1)],
tck = -0.02)

#Histograma porcentual local
n <- as.numeric(nrow(TDF_ozonosimplificado))
pos<-barplot(
TDF_ozonosimplificado$`hi(%)`[1:(n-1)],
space = 0,
main = "Gráfica No. 4:Distribución concentración de Ozono en el estudio
calidad del aire en India, 2015-2020",
ylab = "Porcentaje (%)",
xlab = "Ozono (µg/m3)",
names.arg = TDF_ozonosimplificado$MC[1:(n-1)],
col = "skyblue"
)
axis(side = 1,
at = pos,
labels = TDF_ozonosimplificado$MC[1:(nrow(TDF_ozonosimplificado)-1)],
tck = -0.04, # controla la longitud de la rayita
las = 1) # rota las etiquetas si quieres verticales

#Box plot
Cajaozono<-boxplot(ozono, horizontal = T,col = "turquoise", border = "black",
main= "Gráfica No. 5: Distribución de la concentración de ozono,
estudio calidad del aire en India desde 2015-2020",
xlab="Ozono (µg/m3)")

# Ojiva ascendente
plot(Lss, Nis_asc, type = "b", main = "Gráfica N°6:Ojiva ascendente y descendente de la
distribución local del concentración de ozono",
xlab = "Ozono (µg/m3)",
ylab = "Cantidad", pch = 19,col="turquoise",
ylim = c(0, max(c(Nis_asc, Nis_desc))))
# Ojiva descendente
lines(Lis, Nis_desc, type = "b", col = "black", pch = 19) # agrega en rojo

# Ojiva ascendente
plot(Lss, His_asc,
type = "b",
main = " Gráfica N°7:Ojiva ascendete y descendete de la distribución
de la concentración de Ozono",
xlab = "Ozono(µg/m3)",
ylab = "Porcentaje %",
col="blue",
pch = 19)
# Ojiva descendente
lines(Lis, His_desc,
type = "b",
col = "red",
pch = 19)

#INDICADORES
#Indicadores de Tendencia Central
# Mediana
Me <- median(ozono)
Me
## [1] 30.84
# Media
X <- mean(ozono)
X
## [1] 34.49143
# Moda
Mo <- "[20,40]"
Mo
## [1] "[20,40]"
#Indicadores de Dispersión
# Varianza
var(ozono)
## [1] 470.6699
# Desviación estandar
desv<-round(sd(ozono), 2)
# Coeficiente de variación
CV <- (sd(ozono)/X)*100
CV
## [1] 62.89947
#Indicadores de Forma
# Coeficiente de Asimetría
library(e1071)
As <- skewness(ozono)
As
## [1] 1.329963
# Curtosis
K <- kurtosis(ozono)
K
## [1] 3.428053
Variable <- "Ozono"
Rango <- "[0.01,257.73]"
Tabla_indicadores <- data.frame(Variable,Rango,round(X,3),Me,Mo,round(desv,2),round(CV,2),round(As,2),round(K,2))
colnames(Tabla_indicadores) <- c("Variable","Rango","X", "Me", "Mo","sd","CV","As","K")
library(gt)
library(dplyr)
Tabla_indicadores %>%
gt() %>%
tab_header(
title = md("*Tabla Nro. 3*"),
subtitle = md("**Indicadores Estadísticos de concentración de Ozono,estudio calidad del aire en India entre 2015-2020 **")
) %>%
tab_source_note(
source_note = md("Autor: Grupo 2\n Fuente:https://www.kaggle.com/datasets/rohanrao/air-quality-data-in-india")
) %>%
tab_options(
table.border.top.color = "black",
table.border.bottom.color = "black",
table.border.top.style = "solid",
table.border.bottom.style = "solid",
column_labels.border.top.color = "black",
column_labels.border.bottom.color = "black",
column_labels.border.bottom.width = px(2),
row.striping.include_table_body = TRUE,
heading.border.bottom.color = "black",
heading.border.bottom.width = px(2),
table_body.hlines.color = "gray",
table_body.border.bottom.color = "black"
)