#Estadistica descriptiva 
#02/12/2025
#Lorien Arcentales
#carga de datos
datos<-read.csv("city_day.csv", header = TRUE, dec = ".",
                sep = ",")
ozono <- datos$O3[datos$O3 != "-"]
ozono <- as.numeric(ozono)
min<-min(ozono)
max<-max(ozono)
R=max-min
k=1+(3.3)*log(length(ozono))
k<-floor(k)
A<-R/k
#Generación de intervalos
Li<-seq(from=min,to=max-A,by=A)
Ls<-seq(from=min+A,to=max,by=A)
MC<-(Li+Ls)/2
#Creación de ni
ni <- numeric(length(Li))
for (i in 1:length(Li)) {
  if (i < length(Li)) {
    ni[i] <- sum(ozono >= Li[i] & ozono < Ls[i])
  } else {
    ni[i] <- sum(ozono >= Li[i] & ozono <= Ls[i])  # Último intervalo cerrado
  }
}
N <- sum(ni)
hi <- (ni / N) * 100
Ni_asc <- cumsum(ni)
Ni_desc <- rev(cumsum(rev(ni)))
Hi_asc <- cumsum(hi)
Hi_desc <- rev(cumsum(rev(hi)))
TDF_ozono<- data.frame(
  Intervalo = paste0("[", round(Li,2), " - ", round(Ls,2), ")"),
  MC = round(MC, 2),
  ni = ni,
  hi = round(hi, 2),
  Ni_ascendente = Ni_asc,
  Ni_descendente = Ni_desc,
  Hi_ascendente = round(Hi_asc, 2),
  Hi_descendente = round(Hi_desc, 2)
)
length(Li)
## [1] 34
length(Ls)
## [1] 34
max(ozono)
## [1] 257.73
max(Ls)
## [1] 257.73
range(ozono)
## [1]   0.01 257.73
summary(ozono)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.01   18.86   30.84   34.49   45.57  257.73
cbind(Li, Ls, ni)
##           Li     Ls   ni
##  [1,]   0.01   7.59 1441
##  [2,]   7.59  15.17 2953
##  [3,]  15.17  22.75 4018
##  [4,]  22.75  30.33 4114
##  [5,]  30.33  37.91 3611
##  [6,]  37.91  45.49 2971
##  [7,]  45.49  53.07 2149
##  [8,]  53.07  60.65 1519
##  [9,]  60.65  68.23  950
## [10,]  68.23  75.81  612
## [11,]  75.81  83.39  374
## [12,]  83.39  90.97  255
## [13,]  90.97  98.55  183
## [14,]  98.55 106.13  113
## [15,] 106.13 113.71   86
## [16,] 113.71 121.29   53
## [17,] 121.29 128.87   33
## [18,] 128.87 136.45   26
## [19,] 136.45 144.03   18
## [20,] 144.03 151.61    9
## [21,] 151.61 159.19    9
## [22,] 159.19 166.77    6
## [23,] 166.77 174.35    3
## [24,] 174.35 181.93    2
## [25,] 181.93 189.51    1
## [26,] 189.51 197.09    1
## [27,] 197.09 204.67    1
## [28,] 204.67 212.25    0
## [29,] 212.25 219.83    0
## [30,] 219.83 227.41    0
## [31,] 227.41 234.99    0
## [32,] 234.99 242.57    0
## [33,] 242.57 250.15    0
## [34,] 250.15 257.73    1
library(gt)
library(dplyr)
TDF_ozono %>%
  gt() %>%
  tab_header(
    title = md("*Tabla Nro. 1*"),
    subtitle = md("**Distribucion de frecuencia de concentración de ozono,estudio calidad del aire en India entre 2015-2020 **")
  ) %>%
  tab_source_note(
    source_note = md("Autor: Grupo 2\n Fuente:https://www.kaggle.com/datasets/rohanrao/air-quality-data-in-india")
  ) %>%
  tab_options(
    table.border.top.color = "black",
    table.border.bottom.color = "black",
    table.border.top.style = "solid",
    table.border.bottom.style = "solid",
    column_labels.border.top.color = "black",
    column_labels.border.bottom.color = "black",
    column_labels.border.bottom.width = px(2),
    row.striping.include_table_body = TRUE,
    heading.border.bottom.color = "black",
    heading.border.bottom.width = px(2),
    table_body.hlines.color = "gray",
    table_body.border.bottom.color = "black"
  )
Tabla Nro. 1
**Distribucion de frecuencia de concentración de ozono,estudio calidad del aire en India entre 2015-2020 **
Intervalo MC ni hi Ni_ascendente Ni_descendente Hi_ascendente Hi_descendente
[0.01 - 7.59) 3.80 1441 5.65 1441 25512 5.65 100.00
[7.59 - 15.17) 11.38 2953 11.57 4394 24071 17.22 94.35
[15.17 - 22.75) 18.96 4018 15.75 8412 21118 32.97 82.78
[22.75 - 30.33) 26.54 4114 16.13 12526 17100 49.10 67.03
[30.33 - 37.91) 34.12 3611 14.15 16137 12986 63.25 50.90
[37.91 - 45.49) 41.70 2971 11.65 19108 9375 74.90 36.75
[45.49 - 53.07) 49.28 2149 8.42 21257 6404 83.32 25.10
[53.07 - 60.65) 56.86 1519 5.95 22776 4255 89.28 16.68
[60.65 - 68.23) 64.44 950 3.72 23726 2736 93.00 10.72
[68.23 - 75.81) 72.02 612 2.40 24338 1786 95.40 7.00
[75.81 - 83.39) 79.60 374 1.47 24712 1174 96.86 4.60
[83.39 - 90.97) 87.18 255 1.00 24967 800 97.86 3.14
[90.97 - 98.55) 94.76 183 0.72 25150 545 98.58 2.14
[98.55 - 106.13) 102.34 113 0.44 25263 362 99.02 1.42
[106.13 - 113.71) 109.92 86 0.34 25349 249 99.36 0.98
[113.71 - 121.29) 117.50 53 0.21 25402 163 99.57 0.64
[121.29 - 128.87) 125.08 33 0.13 25435 110 99.70 0.43
[128.87 - 136.45) 132.66 26 0.10 25461 77 99.80 0.30
[136.45 - 144.03) 140.24 18 0.07 25479 51 99.87 0.20
[144.03 - 151.61) 147.82 9 0.04 25488 33 99.91 0.13
[151.61 - 159.19) 155.40 9 0.04 25497 24 99.94 0.09
[159.19 - 166.77) 162.98 6 0.02 25503 15 99.96 0.06
[166.77 - 174.35) 170.56 3 0.01 25506 9 99.98 0.04
[174.35 - 181.93) 178.14 2 0.01 25508 6 99.98 0.02
[181.93 - 189.51) 185.72 1 0.00 25509 4 99.99 0.02
[189.51 - 197.09) 193.30 1 0.00 25510 3 99.99 0.01
[197.09 - 204.67) 200.88 1 0.00 25511 2 100.00 0.01
[204.67 - 212.25) 208.46 0 0.00 25511 1 100.00 0.00
[212.25 - 219.83) 216.04 0 0.00 25511 1 100.00 0.00
[219.83 - 227.41) 223.62 0 0.00 25511 1 100.00 0.00
[227.41 - 234.99) 231.20 0 0.00 25511 1 100.00 0.00
[234.99 - 242.57) 238.78 0 0.00 25511 1 100.00 0.00
[242.57 - 250.15) 246.36 0 0.00 25511 1 100.00 0.00
[250.15 - 257.73) 253.94 1 0.00 25512 1 100.00 0.00
Autor: Grupo 2 Fuente:https://www.kaggle.com/datasets/rohanrao/air-quality-data-in-india