UNIVERSIDAD CENTRAL DEL ECUADOR

ANÁLISIS ESTADÍSTICO SOBRE LA CALIDAD DE AIRE EN LA INDIA

FECHA: 05/12/2025

##Estadística Descriptiva
#Variable Continua SO2
#Llumitasig Daniela 


#Cargar librerias
library(gt)
library(dplyr)

## 
## Adjuntando el paquete: 'dplyr'

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

library(e1071)

#Cargar datos 
city_day_2_ <- read.csv("city_day (2).csv")
datos<-read.csv("~/Documentos R/SO2/city_day (2).csv")

#Extraccion de los "-" de la variable SO2 debido a que son valores inexistentes, 
# para un mejor analisis, cambia tamaño muestral de 29531 a 25677

city_day_2_$SO2[city_day_2_$SO2 == "-"] <- NA

city_day_2_$SO2 <- as.numeric(city_day_2_$SO2)

SO2 <- na.omit(city_day_2_$SO2)

#Filtrar SO2
SO2 <- subset(city_day_2_$SO2, city_day_2_$SO2 >= 0)

# Calcular el mínimo y máximo de NH3
min_SO2 <- min(SO2)
max_SO2 <- max(SO2)

#Calcular rango 
R <- max_SO2 - min_SO2

#Calcular intervalos 
K <- floor(1 + 3.33 * log10(length(SO2)))

#Calcular amplitud 
A <-R/K

#Limite inferior 
Li <-round(seq(from=min_SO2,to=max_SO2-A,by=A),2)

#Limite superior 
Ls <-round(seq(from=min_SO2+A,to=max_SO2,by=A),2)

#Marca de clase 
Mc <- (Li+Ls)/2

# Vector vacío para guardar las frecuencias de cada clase
ni <- c()

for (i in 1:K) {
  if (i < K) {
    # Para las primeras clases: [Li , Ls)
    ni[i] <- length(subset(SO2, SO2 >= Li[i] & SO2 < Ls[i]))
  } else {
    # Para la última clase: [Li , Ls]
    ni[i] <- length(subset(SO2, SO2 >= Li[i] & SO2 <= Ls[i]))
  }
}

N <- sum(ni)
hi <- (ni / N) * 100
Ni_asc <- cumsum(ni)
Ni_desc <- rev(cumsum(rev(ni)))
Hi_asc <- cumsum(hi)
Hi_desc <- rev(cumsum(rev(hi)))
Intervalo <- paste0("[", round(Li,2), " - ", round(Ls,2), ")")
Intervalo[length(Intervalo)] <- paste0("[", round(Li[length(Li)],2), " - ", 
                                       round(Ls[length(Ls)],2), "]")



TDF_SO2 <- data.frame(
  Intervalo = Intervalo,
  Mc = round(Mc, 2),
  ni = ni,
  hi = round(hi, 2),
  Ni_asc = Ni_asc,
  Ni_desc = Ni_desc,
  Hi_asc = round(Hi_asc, 2),
  Hi_desc = round(Hi_desc, 2)
)
# Crear fila de totales para Benzene
totales <- data.frame(
  Intervalo = "Totales",
  Mc = "-",
  ni = sum(ni),
  hi = round(sum(hi), 2),
  Ni_asc = "-",
  Ni_desc = "-",
  Hi_asc= "-",
  Hi_desc = "-"
)

TDF_SO2 <- rbind(TDF_SO2, totales)

length(Li)

## [1] 15

#Tabla 1

library(gt)
library(dplyr)

TDF_SO2 %>%
  gt() %>%
  tab_header(
    title = md("*Tabla Nro. 1*"),
    subtitle = md("*Distribución de frecuencias de la concentración de SO2 en el estudio de calidad del aire en la India*")
  ) %>%
  tab_source_note(
    source_note = md("Fuente:Datos procesados por el autor a partir del archivo *city_day_(2).csv*")
  ) %>%
  tab_options(
    table.border.top.color = "black",
    table.border.bottom.color = "black",
    table.border.top.style = "solid",
    table.border.bottom.style = "solid",
    column_labels.border.top.color = "black",
    column_labels.border.bottom.color = "black",
    column_labels.border.bottom.width = px(2),
    row.striping.include_table_body = TRUE,
    heading.border.bottom.color = "black",
    heading.border.bottom.width = px(2),
    table_body.hlines.color = "gray",
    table_body.border.bottom.color = "black"
  )

Intervalo	Mc	ni	hi	Ni_asc	Ni_desc	Hi_asc	Hi_desc
Tabla Nro. 1
Distribución de frecuencias de la concentración de SO2 en el estudio de calidad del aire en la India
[0.01 - 12.93)	6.47	17536	68.29	17536	25677	68.29	100
[12.93 - 25.86)	19.4	5066	19.73	22602	8141	88.02	31.71
[25.86 - 38.78)	32.32	1270	4.95	23872	3075	92.97	11.98
[38.78 - 51.7)	45.24	818	3.19	24690	1805	96.16	7.03
[51.7 - 64.63)	58.16	328	1.28	25018	987	97.43	3.84
[64.63 - 77.55)	71.09	175	0.68	25193	659	98.12	2.57
[77.55 - 90.47)	84.01	123	0.48	25316	484	98.59	1.88
[90.47 - 103.4)	96.94	110	0.43	25426	361	99.02	1.41
[103.4 - 116.32)	109.86	73	0.28	25499	251	99.31	0.98
[116.32 - 129.24)	122.78	63	0.25	25562	178	99.55	0.69
[129.24 - 142.17)	135.7	45	0.18	25607	115	99.73	0.45
[142.17 - 155.09)	148.63	26	0.10	25633	70	99.83	0.27
[155.09 - 168.01)	161.55	19	0.07	25652	44	99.9	0.17
[168.01 - 180.94)	174.48	21	0.08	25673	25	99.98	0.1
[180.94 - 193.86]	187.4	4	0.02	25677	4	100	0.02
Totales	-	25677	100.00	-	-	-	-
Fuente:Datos procesados por el autor a partir del archivo city_day_(2).csv

# PROCESO DE SIMPLIFICACIÓN PARA SO2
#HISTOGRAMA SO2
histo_SO2<-hist(SO2,plot = FALSE)
#ELEMENTOS SIMPLIFICADOS 
Lis<-histo_SO2$breaks [1:20]
Lss<-histo_SO2$breaks [2:21]
MCs<-(Lis+Lis)/2
nis<-histo_SO2$counts
his <- (nis / N) * 100
Nis_asc <- cumsum(nis) 
His_asc <- cumsum(his) 
Nis_desc <- rev(cumsum(rev(nis)))
His_desc <- rev(cumsum(rev(his)))
Intervalos <- paste0("[", round(Lis,2), " - ", round(Lss,2), ")")
Intervalos[length(Intervalos)] <- paste0("[", round(Lis[length(Lis)],2), 
                                         " - ", round(Lss[length(Lss)],2), "]")
TDF_SO2simplificado <- data.frame(
  Intervalo = Intervalos,
  MC = round(MCs, 2),
  ni = nis,
  hi= round(his, 2),
  Ni_ascendente = Nis_asc,
  Hi_ascendente = round(His_asc, 2),
  Ni_descendente = Nis_desc,
  Hi_descendente = round(His_desc, 2)
)
colnames(TDF_SO2simplificado) <- c(
  "Intervalo",
  "MC",
  "ni",
  "hi(%)",
  "Ni_asc",
  "Hi_asc (%)",
  "Ni_desc",
  "Hi_desc (%)"
)
totaless <- data.frame(
  Intervalo = "Totales",
  MC = "-",
  ni = sum(nis),           # suma total de ni
  hi = sum(his),            # suma total de hi (%)
  Ni_ascendente = "-",
  Ni_descendente = "-",
  Hi_ascendente = "-",
  Hi_descendente = "-"
)
colnames(totaless) <- c(
  "Intervalo",
  "MC",
  "ni",
  "hi(%)",
  "Ni_asc",
  "Hi_asc (%)",
  "Ni_desc",
  "Hi_desc (%)"
)
# Agregar al final de la tabla
TDF_SO2simplificado <- rbind(TDF_SO2simplificado, totaless)


#Tabla 2 


TDF_SO2simplificado %>%
  gt() %>%
  tab_header(
    title = md("*Tabla Nro. 2*"),
    subtitle = md("*Distribución de frecuencia simplificada de concentración de SO2, en el estudio calidad del aire en India*")
  ) %>%
  tab_source_note(
    source_note = md("Fuente: Datos procesados por el autor a partir del archivo *city_day (2)*")
  ) %>%
  tab_options(
    table.border.top.color = "black",
    table.border.bottom.color = "black",
    table.border.top.style = "solid",
    table.border.bottom.style = "solid",
    column_labels.border.top.color = "black",
    column_labels.border.bottom.color = "black",
    column_labels.border.bottom.width = px(2),
    row.striping.include_table_body = TRUE,
    heading.border.bottom.color = "black",
    heading.border.bottom.width = px(2),
    table_body.hlines.color = "gray",
    table_body.border.bottom.color = "black"
  )

Intervalo	MC	ni	hi(%)	Ni_asc	Hi_asc (%)	Ni_desc	Hi_desc (%)
Tabla Nro. 2
Distribución de frecuencia simplificada de concentración de SO2, en el estudio calidad del aire en India
[0 - 10)	0	14075	54.82	14075	54.82	25677	100
[10 - 20)	10	7169	27.92	21244	82.74	11602	45.18
[20 - 30)	20	1900	7.40	23144	90.14	4433	17.26
[30 - 40)	30	835	3.25	23979	93.39	2533	9.86
[40 - 50)	40	613	2.39	24592	95.77	1698	6.61
[50 - 60)	50	340	1.32	24932	97.1	1085	4.23
[60 - 70)	60	178	0.69	25110	97.79	745	2.9
[70 - 80)	70	109	0.42	25219	98.22	567	2.21
[80 - 90)	80	94	0.37	25313	98.58	458	1.78
[90 - 100)	90	80	0.31	25393	98.89	364	1.42
[100 - 110)	100	70	0.27	25463	99.17	284	1.11
[110 - 120)	110	58	0.23	25521	99.39	214	0.83
[120 - 130)	120	44	0.17	25565	99.56	156	0.61
[130 - 140)	130	39	0.15	25604	99.72	112	0.44
[140 - 150)	140	22	0.09	25626	99.8	73	0.28
[150 - 160)	150	18	0.07	25644	99.87	51	0.2
[160 - 170)	160	17	0.07	25661	99.94	33	0.13
[170 - 180)	170	11	0.04	25672	99.98	16	0.06
[180 - 190)	180	4	0.02	25676	100	5	0.02
[190 - 200]	190	1	0.00	25677	100	1	0
Totales	-	25677	100.00	-	-	-	-
Fuente: Datos procesados por el autor a partir del archivo city_day (2)

#GRAFICAS

#Histogramas local

hist(SO2, breaks = 21,
     main = "Gráfica N°1: Distribución de la Concentración de SO2
     presente en el estudio sobre calidad del aire en Inia ",
     xlab = " SO2 (µg/m3)",
     ylab = "Cantidad",
     ylim = c(0, max(nis)),
     col = "yellow",
     cex.main = 0.9,
     cex.lab = 1,
     cex.axis = 0.9,
     xaxt = "n")
axis(1, at = histo_SO2$breaks,
     labels = histo_SO2$breaks, las = 1,
     cex.axis = 0.9)

#Histograma global
hist(SO2, breaks = 21,
     main = "Gráfica N°2:Distribución de la Concentración de SO2
     presente en el estudio sobre calidad del aire en India",
     xlab = "SO2 (µg/m3)",
     ylab = "Cantidad",
     ylim = c(0, length(SO2)),
     col = "yellow",
     cex.main = 1,
     cex.lab = 1,
     cex.axis = 0.9,
     xaxt = "n")
axis(1, at = histo_SO2$breaks,
     labels = histo_SO2$breaks, las = 1,
     cex.axis = 0.9)

#Histograma porcentual local


# hi(%) local desde tu tabla simplificada (sin la fila Totales)
hi_loc <- TDF_SO2simplificado$`hi(%)`[1:(nrow(TDF_SO2simplificado) - 1)]

# MISMO sistema de breaks locales
breaks_SO2_simplificado <- c(Lis[1], Lss)
histo_SO2_simplificado  <- hist(SO2, breaks = breaks_SO2_simplificado, plot = FALSE)

# LIENZO vacío con eje Y en PORCENTAJE, SIN MARCO
plot(NA,
     xlim = range(breaks_SO2_simplificado),
     ylim = c(0, max(hi_loc) * 1.1),
     xlab = "SO2 (µg/m³)",
     ylab = "Porcentaje (%)",
     main = "Gráfica N°3: Histograma porcentual local de SO2",
     xaxt = "n",
     bty = "n")     # <<--- QUITA EL MARCO COMPLETO

# DIBUJAR LAS BARRAS (como histograma) usando hi(%)
for (i in seq_along(hi_loc)) {
  rect(xleft   = breaks_SO2_simplificado[i],
       ybottom = 0,
       xright  = breaks_SO2_simplificado[i + 1],
       ytop    = hi_loc[i],
       col     = "yellow",
       border  = "black")
}

# EJE X igual que en el histograma global/local absoluto (con etiquetas salteadas)
idx <- seq(1, length(histo_SO2_simplificado$breaks), by = 2)
axis(1,
     at     = histo_SO2_simplificado$breaks[idx],
     labels = round(histo_SO2_simplificado$breaks[idx], 0),
     las    = 1,
     cex.axis = 0.9)

#Histograma porcentual global

# 1. Histograma global para obtener breaks y counts
histo_SO2_global <- hist(SO2, breaks = 19, plot = FALSE)

# 2. Frecuencias absolutas
ni_global <- histo_SO2_global$counts

# 3. Calcular porcentajes globales
hi_global <- ni_global / sum(ni_global) * 100

# 4. Crear lienzo vacío con eje Y de 0 a 100%, SIN MARCO
plot(NA,
     xlim = range(histo_SO2_global$breaks),
     ylim = c(0, 100),
     xlab = "NH3 (µg/m³)",
     ylab = "Porcentaje (%)",
     main = "Gráfica N°4: Histograma porcentual global de NH3",
     xaxt = "n",
     bty = "n"      #QUITA EL MARCO
)

# 5. Dibujar barras del histograma porcentual GLOBAL
for (i in seq_along(hi_global)) {
  rect(
    xleft   = histo_SO2_global$breaks[i],
    ybottom = 0,
    xright  = histo_SO2_global$breaks[i + 1],
    ytop    = hi_global[i],
    col     = "yellow",
    border  = "black"
  )
}

# 6. Eje X limpio (saltando un break)
idx <- seq(1, length(histo_SO2_global$breaks), by = 2)

axis(1,
     at     = histo_SO2_global$breaks[idx],
     labels = round(histo_SO2_global$breaks[idx], 0),
     las    = 1,
     cex.axis = 0.9)

#Diagrama de caja 
CajaSO2<-boxplot(SO2, horizontal = T,col = "blue", border = "black",
                     main= "Gráfica No. 5: Distribución de la concentración de SO2,
                   estudio calidad del aire en India",
                     xlab="SO2 (µg/m3)")

#Ojiva local 

# Usar un único eje X para ambas ojivas
x <- Lss

plot(
  x, Nis_asc,
  type = "b",
  col = "purple",
  pch = 19,
  main = "Gráfica N°6:Distribución de Frecuencias Ascendente y Descendente
          de la Concentración de SO2",
  xlab = "SO2 (µg/m3)",
  ylab = "Cantidad",
  ylim = c(0, max(c(Nis_asc, Nis_desc)))   
)

# Ojiva descendente
lines(
  x, Nis_desc,
  type = "b",
  col = "red",
  pch = 19
)

# Porcentaje

# Usar un único eje X para ambas ojivas
x <- Lss   

# Ojiva ascendente porcentual
plot(
  x, His_asc,
  type = "b",
  main = "Gráfica N°7:Distribución de Frecuencias Ascendente y Descendente 
  de la Concentración de SO2",
  xlab = "SO2 (µg/m3)",
  ylab = "Porcentaje (%)",
  col = "purple",
  pch = 19,
  ylim = c(0, 100)   
)

# Ojiva descendente porcentual
lines(
  x, His_desc,
  type = "b",
  col = "red",
  pch = 19
)

#INDICADORES DE POSICION 
#MEDIA ARITMETICA
X_SO2 <- sum(SO2) / length(SO2)
X_SO2

## [1] 14.53198

#MEDIANA 
Me_SO2 <- median(SO2)
Me_SO2

## [1] 9.16

#MODA
# Moda
Mo <- "[0,10]"
Mo

## [1] "[0,10]"

#INDICADORES DE DISPERSION 
#VARIANZA
varianza_SO2 <- var(SO2)
varianza_SO2

## [1] 328.8338

#DESVIACION ESTANDAR 
sd_SO2 <- sd(SO2)
sd_SO2

## [1] 18.13377

# COEFICIENTE DE VARIACIÓN (%)
CV_SO2 <- (sd_SO2 / X_SO2) * 100
CV_SO2

## [1] 124.7853

#La dispersión de NH3 es muy alta respecto al promedio

#INDICADORES DE FORMA

#COEFICIENTE DE ASIMETRIA 
install.packages("e1071")

## Warning: package 'e1071' is in use and will not be installed

library(e1071)

As_SO2 <- skewness(SO2)
As_SO2

## [1] 4.083182

#CUORTOSIS
library(e1071)

Cu_SO2 <- kurtosis(SO2)
Cu_SO2

## [1] 22.06062

Variable<-"SO2"

Rango<-Rango <- "[0,193.5]"

#outliers

cajaBigotes <- boxplot(SO2, plot = FALSE)

outliers <- cajaBigotes$out
min(outliers)

## [1] 29.56

max(outliers)

## [1] 193.86

length(outliers)

## [1] 2578

Variable<-"SO2"

Rango<-Rango <- "[0,193.5]"

VA <- paste0(
  length(outliers),
  " (",
  round(min(outliers), 2),
  " – ",
  round(max(outliers), 2),
  ")"
)


Tabla_indicadores <- data.frame(
  Variable = Variable,
  Rango    = Rango,
  X_SO2    = round(X_SO2, 3),
  Me_SO2   = Me_SO2,
  Mo       = Mo,
  sd_SO2   = round(sd_SO2, 2),
  CV_SO2   = round(CV_SO2, 2),
  As_SO2   = round(As_SO2, 2),
  Cu_SO2   = round(Cu_SO2, 2),
  VA = VA
)

install.packages("gt")

## Warning: package 'gt' is in use and will not be installed

library(gt)
library(dplyr)
Tabla_indicadores %>%
  gt() %>%
  tab_header(
    title = md("Tabla Nro. 3"),
    subtitle = md("*Indicadores Estadísticos de concentración de SO2*")
  ) %>%
  tab_source_note(
    source_note = md("Fuente: Datos procesados por el autor a partir del archivo *city_day (2)*")
  ) %>%
  tab_options(
    table.border.top.color = "black",
    table.border.bottom.color = "black",
    table.border.top.style = "solid",
    table.border.bottom.style = "solid",
    column_labels.border.top.color = "black",
    column_labels.border.bottom.color = "black",
    column_labels.border.bottom.width = px(2),
    row.striping.include_table_body = TRUE,
    heading.border.bottom.color = "black",
    heading.border.bottom.width = px(2),
    table_body.hlines.color = "gray",
    table_body.border.bottom.color = "black"
  )

Variable	Rango	X_SO2	Me_SO2	Mo	sd_SO2	CV_SO2	As_SO2	Cu_SO2	VA
Tabla Nro. 3
Indicadores Estadísticos de concentración de SO2
SO2	[0,193.5]	14.532	9.16	[0,10]	18.13	124.79	4.08	22.06	2578 (29.56 – 193.86)
Fuente: Datos procesados por el autor a partir del archivo city_day (2)

VARIABLE CUANTITATIVA CONTINUA

UNIVERSIDAD CENTRAL DEL ECUADOR

ANÁLISIS ESTADÍSTICO SOBRE LA CALIDAD DE AIRE EN LA INDIA

FECHA: 05/12/2025