1.Librerías

library(readr)
library(knitr)
library(kableExtra)
library(dplyr)
## 
## Adjuntando el paquete: 'dplyr'
## The following object is masked from 'package:kableExtra':
## 
##     group_rows
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

2.Leer Datos

variables <- read_csv(
  "C:/Users/WAN/Downloads/GlobalWeatherRepository.csv"
)
## Rows: 141703 Columns: 41
## -- Column specification --------------------------------------------------------
## Delimiter: ","
## chr   (8): country, location_name, timezone, condition_text, wind_direction,...
## dbl  (30): latitude, longitude, last_updated_epoch, temperature_celsius, tem...
## dttm  (1): last_updated
## time  (2): sunrise, sunset
## 
## i Use `spec()` to retrieve the full column specification for this data.
## i Specify the column types or set `show_col_types = FALSE` to quiet this message.

3. Extraer Variables

DEFRA <- variables$`air_quality_gb-defra-index`

# Eliminar NA
DEFRA <- na.omit(DEFRA)

4. Agrupar Datos

DEFRA_agrupada <- cut(
  
  DEFRA,
  
  breaks = c(0, 3, 6, 9, 10),
  
  labels = c(
    "Bajo",
    "Moderado",
    "Alto",
    "Muy Alto"
  ),
  
  include.lowest = TRUE
)

5. Tabla Base

tabla_base <- table(DEFRA_agrupada)

categorias <- names(tabla_base)

6. Frecuencia absoluta

ni <- numeric(length(tabla_base))

for(i in 1:length(tabla_base)){
  
  if(tabla_base[i] >= 0){
    
    ni[i] <- tabla_base[i]
    
  } else {
    
    ni[i] <- 0
  }
}

7. Frecuencia Relativa

hi <- numeric(length(ni))

for(i in 1:length(ni)){
  
  hi[i] <- round(
    (ni[i] / sum(ni)) * 100,
    2
  )
}

8. Tabla Final

TDF_DEFRA <- data.frame(
  
  DEFRA = categorias,
  
  ni = ni,
  
  hi = hi
)

9. Fila Total

sumatoria <- data.frame(
  
  DEFRA = "TOTAL",
  
  ni = sum(ni),
  
  hi = 100
)

# Agregar total
TDF_DEFRA_suma <- rbind(
  TDF_DEFRA,
  sumatoria
)

10. Cambiar Nombres

colnames(TDF_DEFRA_suma) <- c(
  
  "DEFRA",
  
  "ni",
  
  "hi (%)"
)

11. Tabla de Frecuencias Agrupadas

kable(
  
  TDF_DEFRA_suma,
  
  align = "c",
  
  caption = paste(
    
    "Tabla N°2:",
    
    "Distribución de frecuencia",
    
    "del índice AQI según DEFRA"
  )
  
) |>
  
  kable_styling(
    
    full_width = TRUE,
    
    position = "center",
    
    bootstrap_options = c(
      "striped",
      "hover",
      "condensed",
      "responsive"
    )
  ) |>
  
  row_spec(
    
    0,
    
    bold = TRUE,
    
    color = "white",
    
    background = "#2C3E50"
  ) |>
  
  row_spec(
    
    nrow(TDF_DEFRA_suma),
    
    bold = TRUE,
    
    background = "#EAEDED"
  ) |>
  
  footnote(
    
    general = paste(
      
      "Elaborado por Grupo 2.",
      
      "Fuente: Global Weather Repository.",
      
      "https://www.kaggle.com/datasets/nelgiriyewithana/global-weather-repository"
    ),
    
    general_title = "Nota: ",
    
    footnote_as_chunk = TRUE,
    
    title_format = c(
      "italic",
      "bold"
    )
  )
Tabla N°2: Distribución de frecuencia del índice AQI según DEFRA
DEFRA ni hi (%)
Bajo 117166 82.68
Moderado 11354 8.01
Alto 4986 3.52
Muy Alto 8197 5.78
TOTAL 141703 100.00
Nota: Elaborado por Grupo 2. Fuente: Global Weather Repository. https://www.kaggle.com/datasets/nelgiriyewithana/global-weather-repository

12.Diagrama de Barras

barplot(
  
  ni,
  
  names.arg = categorias,
  
  col = c(
    "green",
    "yellow",
    "orange",
    "red"
  ),
  
  main = "Gráfica N°1: Índice AQI según DEFRA",
  
  xlab = "Categorías DEFRA",
  
  ylab = "Frecuencia",
  
  ylim = c(0, max(ni) + 100),
  
  las = 1,
  
  cex.names = 0.9
)
mtext(
  
  "Indica la calidad de aire ",
  
  side = 1,
  
  line = 4,
  
  cex = 0.8
)

13. Sector Círcular

etiquetas <- paste0(hi, "%")

colores <- c(
  "green",
  "yellow",
  "orange",
  "red"
)

pie(
  
  hi,
  
  labels = etiquetas,
  
  col = colores,
  
  main = "Gráfica N°2: Distribución porcentual AQI DEFRA"
)

legend(
  
  "topright",
  
  legend = categorias,
  
  fill = colores,
  
  title = "Categorías",
  
  cex = 0.8
)
mtext(
  
  "Nota: El 82.68% indica la calidad de aire buena según la escala mostrada",
  
  side = 1,
  
  line = 4,
  
  cex = 0.8
)