#Estadística Descriptiva
#Autor: Ariana Viteri
#Fecha:24/05/2025
#Carga de Librerias
library(gt)
## Warning: package 'gt' was built under R version 4.5.3
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.5.3
##
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(RColorBrewer)
#Cargar los datos
datos <- read.csv("~/ariana tercer semestre/Estadistica/city_day.csv",
header = TRUE, dec = ".", sep = ";")
#VARIABLE ORDINAL AQI_Bucket
#Limpieza de datos
AQI_Bucket<-datos$AQI_Bucket[datos$AQI_Bucket!="-"]
# Orden de jerarquía
datos$AQI_Bucket <- factor(
datos$AQI_Bucket,
levels = c("Good", "Satisfactory", "Moderate",
"Poor", "Very Poor", "Severe"),
ordered = TRUE
)
#Preparación de datos para TDF
AQI_Bucket <- datos$AQI_Bucket
TDF_AQI_Bucket <- data.frame(table(AQI_Bucket))
ni <- TDF_AQI_Bucket$Freq
hi <- round((ni / sum(ni)) * 100, 2)
AQI_Bucket <- TDF_AQI_Bucket$AQI_Bucket
TDF_AQI_Bucket <- data.frame(AQI_Bucket, ni, hi)
Summary <- data.frame(AQI_Bucket = "TOTAL", ni = sum(ni), hi = 100)
TDF_AQI_Bucket_suma <- rbind(TDF_AQI_Bucket, Summary)
colnames(TDF_AQI_Bucket_suma) <- c("AQI_Nivel", "ni", "hi(%)")
# TABLA
TDF_AQI_Bucket_suma %>%
gt() %>%
tab_header(
title = md("Tabla Nro. 1"),
subtitle = md("Tabla de frecuencias de AQI_Bucket en el estudio calidad del aire en India de 2015-2020")
) %>%
tab_source_note(
source_note = md("Grupo: 1 <br> Fuente: https://www.kaggle.com/datasets/rohanrao/air-quality-data-in-india ")
) %>%
tab_style(
style = cell_borders(sides = "left", color = "black", weight = px(2)),
locations = cells_body()
) %>%
tab_style(
style = cell_borders(sides = "right", color = "black", weight = px(2)),
locations = cells_body()
) %>%
tab_style(
style = cell_borders(sides = "left", color = "black", weight = px(2)),
locations = cells_column_labels()
) %>%
tab_style(
style = cell_borders(sides = "right", color = "black", weight = px(2)),
locations = cells_column_labels()
) %>%
tab_options(
table.border.top.color = "black",
table.border.bottom.color = "black",
table.border.top.style = "solid",
table.border.bottom.style = "solid",
column_labels.border.top.color = "black",
column_labels.border.bottom.color = "black",
column_labels.border.bottom.width = px(2),
row.striping.include_table_body = TRUE,
heading.border.bottom.color = "black",
heading.border.bottom.width = px(2),
table_body.hlines.color = "gray",
table_body.border.bottom.color = "black"
)
| Tabla Nro. 1 | ||
| Tabla de frecuencias de AQI_Bucket en el estudio calidad del aire en India de 2015-2020 | ||
| AQI_Nivel | ni | hi(%) |
|---|---|---|
| Good | 1341 | 5.40 |
| Satisfactory | 8224 | 33.09 |
| Moderate | 8829 | 35.53 |
| Poor | 2781 | 11.19 |
| Very Poor | 2337 | 9.40 |
| Severe | 1338 | 5.38 |
| TOTAL | 24850 | 100.00 |
| Grupo: 1 Fuente: https://www.kaggle.com/datasets/rohanrao/air-quality-data-in-india |
||
#DIAGRAMAS
#===========================
# Diagrama de barras local
library(RColorBrewer)
# Colores suaves
n <- length(TDF_AQI_Bucket$AQI_Bucket)
colores <- brewer.pal(min(max(n,3), 8), "Set2")
# Crear gráfico
barplot(
height = TDF_AQI_Bucket$ni,
names.arg = TDF_AQI_Bucket$AQI_Bucket,
# Títulos
main = "Gráfica Nro. 1\nDistribución de AQI_Bucket en el estudio\ncalidad del aire en India (2015-2020)",
ylab = "Frecuencia",
# Diseño
col = colores,
border = "white",
# Texto
las = 2,
cex.names = 0.9,
cex.main = 1,
cex.lab = 1,
# Tamaño barras
width = 0.7,
# Espaciado
space = 0.3
)
title(xlab = "AQI_Bucket", line = 4)
# Líneas de fondo
grid(
nx = NA,
ny = NULL,
col = "gray80",
lty = 1
)
# ================================
#Diagrama de barras global
library(RColorBrewer)
# Colores suaveshttp://127.0.0.1:26745/graphics/d74a8a24-46ac-472e-a69d-d79d76c1ee9c.png
n <- length(TDF_AQI_Bucket$AQI_Bucket)
colores <- brewer.pal(min(max(n,3), 8), "Set2")
# Crear gráfico
barplot(
TDF_AQI_Bucket$hi,
# Etiquetas
names.arg = TDF_AQI_Bucket$AQI_Bucket,
# Títulos
main = "Gráfica Nro. 2\nDistribución porcentual de AQI_Bucket en el estudio\ncalidad del aire en India (2015-2020)",
ylab = "Porcentaje (%)",
# Diseño
col = colores,
border = "white",
# Escala
ylim = c(0, max(TDF_AQI_Bucket$hi) + 5),
# Texto
las = 2,
cex.names = 0.9,
cex.main = 1,
cex.lab = 1,
# Barras
width = 0.7,
space = 0.3
)
# Bajar nombre del eje X
title(xlab = "AQI_Bucket", line = 4)
# Líneas de fondo
grid(
nx = NA,
ny = NULL,
col = "gray80",
lty = 1
)
# ================================
# HISTOGRAMA DE CANTIDAD GLOBAL
library(RColorBrewer)
# Colores suaves
n <- length(TDF_AQI_Bucket$AQI_Bucket)
colores <- brewer.pal(min(max(n,3), 8), "Set2")
# Valor máximo
max_y_global <- max(TDF_AQI_Bucket$ni) + 500
# Crear gráfico
barplot(
height = TDF_AQI_Bucket$ni,
# Etiquetas
names.arg = TDF_AQI_Bucket$AQI_Bucket,
# Títulos
main = "Gráfica Nro. 3\nDistribución de AQI_Bucket en el estudio\ncalidad del aire en India (2015-2020)",
ylab = "Cantidad",
# Diseño
col = colores,
border = "white",
# Escala
ylim = c(0, max_y_global),
# Texto
las = 2,
cex.names = 0.9,
cex.main = 1,
cex.lab = 1,
# Barras
width = 0.7,
space = 0.3
)
# Bajar nombre del eje X
title(xlab = "AQI_Bucket", line = 4)
# Líneas de fondo
grid(
nx = NA,
ny = NULL,
col = "gray80",
lty = 1
)
# ================================
# DIAGRAMA DE BARRAS PORCENTAJE GLOBAL
library(RColorBrewer)
# Colores suaves
n <- length(TDF_AQI_Bucket$AQI_Bucket)
colores <- brewer.pal(min(max(n,3), 8), "Set2")
# Crear gráfico
barplot(
TDF_AQI_Bucket$hi,
# Etiquetas
names.arg = TDF_AQI_Bucket$AQI_Bucket,
# Títulos
main = "Gráfica Nro. 4\nDistribución porcentual de AQI_Bucket en el estudio\ncalidad del aire en India (2015-2020)",
ylab = "Porcentaje (%)",
# Diseño
col = colores,
border = "white",
# Escala
ylim = c(0, 100),
# Texto
las = 2,
cex.names = 0.9,
cex.main = 1,
cex.lab = 1,
# Barras
width = 0.7,
space = 0.3
)
# Bajar nombre eje X
title(xlab = "AQI_Bucket", line = 4)
# Líneas de fondo
grid(
nx = NA,
ny = NULL,
col = "gray80",
lty = 1
)
# ================================
# DIAGRAMA CIRCULAR
library(RColorBrewer)
# Colores pastel suaves
colores <- c(
"#FBB4AE", # rosado pastel
"#FFF2AE", # amarillo pastel
"#CDEAC0", # verde pastel
"#B5D8EB", # celeste pastel
"#E4C1F9", # lila pastel
"#FFD6A5" # naranja pastel
)
color <- adjustcolor(colores, alpha.f = 0.95)
# Etiquetas porcentuales
etiqueta <- paste0(TDF_AQI_Bucket$hi, " %")
# Crear gráfico circular
pie(
TDF_AQI_Bucket$hi,
labels = etiqueta,
radius = 0.9,
col = color,
# Bordes marcados
border = "black",
lwd = 2,
main = "Gráfica Nro. 5\nDistribución de AQI_Bucket en el estudio\ncalidad del aire en India (2015-2020)",
cex.main = 1,
cex = 0.9
)
# Leyenda
legend(
"topright",
legend = TDF_AQI_Bucket$AQI_Bucket,
title = "Leyenda",
fill = color,
border = "black",
box.lwd = 1,
box.col = "black",
cex = 0.7,5,
bty = "o"
)
# =========================================================
# INDICADORES ESTADÍSTICOS para la variable AQI_Bucket (Cualitativa Ordinal)
# 1. CÁLCULO DE LA MODA (Mo)
# Es el nivel con la mayor frecuencia (ni)
Moda_row <- TDF_AQI_Bucket[which.max(TDF_AQI_Bucket$ni), ]
Mo_calc <- as.character(Moda_row$AQI_Bucket[1])
# 2. CÁLCULO DE LA MEDIANA (Me)
# Frecuencias Acumuladas
Ni_calc <- cumsum(TDF_AQI_Bucket$ni)
N_total <- sum(TDF_AQI_Bucket$ni)
Posicion_Me <- N_total / 2
# Categoría que contiene la posición N/2
Mediana_row_index <- which(Ni_calc >= Posicion_Me)[1]
Me_calc <- as.character(TDF_AQI_Bucket$AQI_Bucket[Mediana_row_index])
# 3. RANGO
# Usamos los niveles extremos de la variable ordinal
Rango_calc <- paste0(
as.character(TDF_AQI_Bucket$AQI_Bucket[1]),
" a ",
as.character(TDF_AQI_Bucket$AQI_Bucket[length(TDF_AQI_Bucket$AQI_Bucket)-1])
)
Variable <- "AQI_Bucket (Nivel de Calidad del Aire)"
Rango <- Rango_calc # Rango calculado
# Mediana
Me <- Me_calc # Mediana calculada
# Media
X <- "-"
# Moda
Mo <- Mo_calc # Moda calculada
# Indicadores de Dispersión
# Varianza
var<-"-"
# Desviación estandar
desv<-"-"
# Coeficiente de variación
CV <- "-"
# Indicadores de Forma
# Coeficiente de Asimetría
As <-"-"
# Curtosis
K <- "-"
# Crear el Data Frame
Tabla_indicadores <- data.frame(Variable, Rango, X, Me, Mo, var, desv, CV, As, K)
colnames(Tabla_indicadores) <- c("Variable","Rango","X", "Me", "Mo","Var",
"sd","CV","As","K")
library(gt)
library(dplyr)
# Generar la Tabla GT
Tabla_indicadores %>%
gt() %>%
tab_header(
title = md("*Tabla Nro. 3*"),
subtitle = md("**Indicadores Estadísticos del Nivel AQI_Bucket (Calidad del Aire) en India entre 2015-2020**")
) %>%
tab_source_note(
source_note = md("Grupo: 2 <br> Fuente: Datos procesados por el autor a partir de archivo city.day.csv")
) %>%
# Líneas verticales en TODA la tabla
tab_style(
style = cell_borders(
sides = "left",
color = "black",
weight = px(2)
),
locations = list(
cells_body(),
cells_column_labels()
)
) %>%
tab_style(
style = cell_borders(
sides = "right",
color = "black",
weight = px(2)
),
locations = list(
cells_body(),
cells_column_labels()
)
) %>%
tab_options(
table.border.top.color = "black",
table.border.bottom.color = "black",
table.border.top.style = "solid",
table.border.bottom.style = "solid",
column_labels.border.top.color = "black",
column_labels.border.bottom.color = "black",
column_labels.border.bottom.width = px(2),
row.striping.include_table_body = TRUE,
heading.border.bottom.color = "black",
heading.border.bottom.width = px(2),
table_body.hlines.color = "gray",
table_body.border.bottom.color = "black"
)
| Tabla Nro. 3 | |||||||||
| Indicadores Estadísticos del Nivel AQI_Bucket (Calidad del Aire) en India entre 2015-2020 | |||||||||
| Variable | Rango | X | Me | Mo | Var | sd | CV | As | K |
|---|---|---|---|---|---|---|---|---|---|
| AQI_Bucket (Nivel de Calidad del Aire) | Good a Very Poor | - | Moderate | Moderate | - | - | - | - | - |
| Grupo: 2 Fuente: Datos procesados por el autor a partir de archivo city.day.csv |
|||||||||