#Variable Cuantitativa Continua
# NOx
#Autor: Ariana Viteri
#Fecha:31/05/2026
library(gt)
## Warning: package 'gt' was built under R version 4.5.3
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.5.3
##
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# ===== CARGA DE DATOS =====
datos <- read.csv("~/semestre 3 y 4/Estadistica/Datos Cambiados.csv",
header = TRUE, dec = ".", sep = ",")
#----------------------------------------
# Selección de variable
# NOTA: Asumimos que la columna se llama 'Longitud'
longitud <- datos$Longitud[datos$Longitud != "-"]
# Conversión a numérico
longitud <- as.numeric(longitud)
# =========================================================
# FRECUENCIAS PARA longitud
# Tamaño de muestra
n <- length(longitud)
# Valor mínimo y máximo
min_longitud <- min(longitud, na.rm = TRUE)
max_longitud <- max(longitud, na.rm = TRUE)
# Rango
R <- max_longitud - min_longitud
# Número de intervalos (Regla de Sturges)
k_detallado <- ceiling(1 + 3.322 * log10(n))
# Amplitud de clase
A <- R / k_detallado
# Mostrar resultados
cat("Número de intervalos (k):", k_detallado, "\n")
## Número de intervalos (k): 16
# Generación de límites de intervalos
Li <- seq(from = min_longitud, to = max_longitud - A, by = A)
Ls <- c(seq(from = min_longitud + A, to = max_longitud - A, by = A), max_longitud)
# Redondeo
longitud <- round(longitud, 3)
Li <- round(Li, 3)
Ls <- round(Ls, 3)
# Marcas de clase
MC <- (Li + Ls) / 2
# Frecuencias absolutas
ni <- numeric(length(Li))
for(i in 1:length(Li)){
if(i < length(Li)){
ni[i] <- sum(longitud >= Li[i] & longitud < Ls[i])
} else {
ni[i] <- sum(longitud >= Li[i] & longitud <= Ls[i])
}
}
# Frecuencias relativas y acumuladas
hi <- (ni / n) * 100
Ni_asc <- cumsum(ni)
Ni_desc <- rev(cumsum(rev(ni)))
Hi_asc <- cumsum(hi)
Hi_desc <- rev(cumsum(rev(hi)))
# Intervalos
Intervalo <- paste0("[", round(Li, 2), " - ", round(Ls, 2), ")")
# Último intervalo cerrado
Intervalo[length(Intervalo)] <- paste0(
"[",
round(Li[length(Li)], 2),
" - ",
round(Ls[length(Ls)], 2),
"]"
)
# Tabla de distribución de frecuencias
TDF_longitud <- data.frame(
Intervalo = Intervalo,
MC = round(MC, 2),
ni = ni,
hi = round(hi, 2),
Ni_ascendente = Ni_asc,
Ni_descendente = Ni_desc,
Hi_ascendente = round(Hi_asc, 2),
Hi_descendente = round(Hi_desc, 2)
)
# Fila de totales
totales <- data.frame(
Intervalo = "Totales",
MC = "-",
ni = sum(ni),
hi = round(sum(hi), 2),
Ni_ascendente = "-",
Ni_descendente = "-",
Hi_ascendente = "-",
Hi_descendente = "-"
)
# Tabla completa
TDF_longitud_completa <- rbind(TDF_longitud, totales)
#TABLA DE FRECUENCIAS DETALLADA
TDF_longitud_completa %>%
gt() %>%
tab_header(
title = "Tabla Nro. 1",
subtitle = "Distribución de frecuencia de longitud del aire en India entre 2015-2020"
) %>%
tab_source_note(
source_note = md("Grupo: 1 <br> Fuente: https://www.kaggle.com/datasets/rohanrao/air-quality-data-in-india ")
) %>%
tab_style(
style = cell_borders(sides = "left", color = "black", weight = px(2)),
locations = cells_body()
) %>%
tab_style(
style = cell_borders(sides = "right", color = "black", weight = px(2)),
locations = cells_body()
) %>%
tab_style(
style = cell_borders(sides = "left", color = "black", weight = px(2)),
locations = cells_column_labels()
) %>%
tab_style(
style = cell_borders(sides = "right", color = "black", weight = px(2)),
locations = cells_column_labels()
) %>%
tab_options(
table.border.top.color = "black",
table.border.bottom.color = "black",
table.border.top.style = "solid",
table.border.bottom.style = "solid",
column_labels.border.top.color = "black",
column_labels.border.bottom.color = "black",
column_labels.border.bottom.width = px(2),
row.striping.include_table_body = TRUE,
heading.border.bottom.color = "black",
heading.border.bottom.width = px(2),
table_body.hlines.color = "gray",
table_body.border.bottom.color = "black"
)
| Tabla Nro. 1 | |||||||
| Distribución de frecuencia de longitud del aire en India entre 2015-2020 | |||||||
| Intervalo | MC | ni | hi | Ni_ascendente | Ni_descendente | Hi_ascendente | Hi_descendente |
|---|---|---|---|---|---|---|---|
| [73.01 - 74.18) | 73.59 | 2009 | 6.80 | 2009 | 29531 | 6.8 | 100 |
| [74.18 - 75.35) | 74.76 | 1863 | 6.31 | 3872 | 27522 | 13.11 | 93.2 |
| [75.35 - 76.52) | 75.93 | 2552 | 8.64 | 6424 | 25659 | 21.75 | 86.89 |
| [76.52 - 77.68) | 77.1 | 10483 | 35.50 | 16907 | 23107 | 57.25 | 78.25 |
| [77.68 - 78.85) | 78.27 | 2298 | 7.78 | 19205 | 12624 | 65.03 | 42.75 |
| [78.85 - 80.02) | 79.44 | 3121 | 10.57 | 22326 | 10326 | 75.6 | 34.97 |
| [80.02 - 81.19) | 80.6 | 2009 | 6.80 | 24335 | 7205 | 82.4 | 24.4 |
| [81.19 - 82.36) | 81.77 | 951 | 3.22 | 25286 | 5196 | 85.63 | 17.6 |
| [82.36 - 83.52) | 82.94 | 1462 | 4.95 | 26748 | 4245 | 90.58 | 14.37 |
| [83.52 - 84.69) | 84.11 | 310 | 1.05 | 27058 | 2783 | 91.63 | 9.42 |
| [84.69 - 85.86) | 85.28 | 1971 | 6.67 | 29029 | 2473 | 98.3 | 8.37 |
| [85.86 - 87.03) | 86.44 | 0 | 0.00 | 29029 | 502 | 98.3 | 1.7 |
| [87.03 - 88.19) | 87.61 | 0 | 0.00 | 29029 | 502 | 98.3 | 1.7 |
| [88.19 - 89.36) | 88.78 | 0 | 0.00 | 29029 | 502 | 98.3 | 1.7 |
| [89.36 - 90.53) | 89.95 | 0 | 0.00 | 29029 | 502 | 98.3 | 1.7 |
| [90.53 - 91.7] | 91.12 | 502 | 1.70 | 29531 | 502 | 100 | 1.7 |
| Totales | - | 29531 | 100.00 | - | - | - | - |
| Grupo: 1 Fuente: https://www.kaggle.com/datasets/rohanrao/air-quality-data-in-india |
|||||||
# ==============================================================================
# Por una gran cantidad de intervalos se realizara una reducción de filas en la
# tabla Nro. 2 creando solo 10 intervalos
# ==============================================================================
# TABLA 2
k_tabla2 <- 10
# Nueva amplitud
A2 <- R / k_tabla2
# Nuevos límites
Li2 <- seq(from = min_longitud, to = max_longitud - A2, by = A2)
Ls2 <- c(seq(from = min_longitud + A2, to = max_longitud - A2, by = A2), max_longitud)
# Redondeo
Li2 <- round(Li2, 3)
Ls2 <- round(Ls2, 3)
# Marcas de clase
MC2 <- (Li2 + Ls2) / 2
# Frecuencias absolutas
ni2 <- numeric(length(Li2))
for(i in 1:length(Li2)){
if(i < length(Li2)){
ni2[i] <- sum(longitud >= Li2[i] & longitud < Ls2[i])
} else {
ni2[i] <- sum(longitud >= Li2[i] & longitud <= Ls2[i])
}
}
# Frecuencias relativas y acumuladas
hi2 <- (ni2 / n) * 100
Ni2_asc <- cumsum(ni2)
Ni2_desc <- rev(cumsum(rev(ni2)))
Hi2_asc <- cumsum(hi2)
Hi2_desc <- rev(cumsum(rev(hi2)))
# Intervalos
Intervalo2 <- paste0("[", round(Li2,2), " - ", round(Ls2,2), ")")
Intervalo2[length(Intervalo2)] <- paste0(
"[",
round(Li2[length(Li2)],2),
" - ",
round(Ls2[length(Ls2)],2),
"]"
)
# Tabla 2
TDF_longitud_10 <- data.frame(
Intervalo = Intervalo2,
MC = round(MC2,2),
ni = ni2,
hi = round(hi2,2),
Ni_ascendente = Ni2_asc,
Ni_descendente = Ni2_desc,
Hi_ascendente = round(Hi2_asc,2),
Hi_descendente = round(Hi2_desc,2)
)
# Totales
totales2 <- data.frame(
Intervalo = "Totales",
MC = "-",
ni = sum(ni2),
hi = sum(hi2),
Ni_ascendente = "-",
Ni_descendente = "-",
Hi_ascendente = "-",
Hi_descendente = "-"
)
TDF_longitud_10_completa <- rbind(TDF_longitud_10, totales2)
# TABLA 2: Distribución de frecuencias de longitud con 10 intervalos
TDF_longitud_10_completa %>%
gt() %>%
tab_header(
title = "Tabla Nro. 2",
subtitle = "Distribución de frecuencia de longitud, del estudio calidad del aire en India entre 2015-2020"
) %>%
tab_source_note(
source_note = md("Grupo: 1 <br> Fuente: https://www.kaggle.com/datasets/rohanrao/air-quality-data-in-india ")
) %>%
tab_style(
style = cell_borders(sides = "left", color = "black", weight = px(2)),
locations = cells_body()
) %>%
tab_style(
style = cell_borders(sides = "right", color = "black", weight = px(2)),
locations = cells_body()
) %>%
tab_style(
style = cell_borders(sides = "left", color = "black", weight = px(2)),
locations = cells_column_labels()
) %>%
tab_style(
style = cell_borders(sides = "right", color = "black", weight = px(2)),
locations = cells_column_labels()
) %>%
tab_options(
table.border.top.color = "black",
table.border.bottom.color = "black",
table.border.top.style = "solid",
table.border.bottom.style = "solid",
column_labels.border.top.color = "black",
column_labels.border.bottom.color = "black",
column_labels.border.bottom.width = px(2),
row.striping.include_table_body = TRUE,
heading.border.bottom.color = "black",
heading.border.bottom.width = px(2),
table_body.hlines.color = "gray",
table_body.border.bottom.color = "black"
)
| Tabla Nro. 2 | |||||||
| Distribución de frecuencia de longitud, del estudio calidad del aire en India entre 2015-2020 | |||||||
| Intervalo | MC | ni | hi | Ni_ascendente | Ni_descendente | Hi_ascendente | Hi_descendente |
|---|---|---|---|---|---|---|---|
| [73.01 - 74.88) | 73.94 | 2947 | 9.98 | 2947 | 29531 | 9.98 | 100 |
| [74.88 - 76.75) | 75.81 | 3477 | 11.77 | 6424 | 26584 | 21.75 | 90.02 |
| [76.75 - 78.62) | 77.68 | 12781 | 43.28 | 19205 | 23107 | 65.03 | 78.25 |
| [78.62 - 80.49) | 79.55 | 3121 | 10.57 | 22326 | 10326 | 75.6 | 34.97 |
| [80.49 - 82.36) | 81.42 | 2960 | 10.02 | 25286 | 7205 | 85.63 | 24.4 |
| [82.36 - 84.22) | 83.29 | 1772 | 6.00 | 27058 | 4245 | 91.63 | 14.37 |
| [84.22 - 86.09) | 85.16 | 1971 | 6.67 | 29029 | 2473 | 98.3 | 8.37 |
| [86.09 - 87.96) | 87.03 | 0 | 0.00 | 29029 | 502 | 98.3 | 1.7 |
| [87.96 - 89.83) | 88.9 | 0 | 0.00 | 29029 | 502 | 98.3 | 1.7 |
| [89.83 - 91.7] | 90.77 | 502 | 1.70 | 29531 | 502 | 100 | 1.7 |
| Totales | - | 29531 | 100.00 | - | - | - | - |
| Grupo: 1 Fuente: https://www.kaggle.com/datasets/rohanrao/air-quality-data-in-india |
|||||||
#===========================
# Histograma de R studio
# Primero: Crea el objeto sin graficar
Histograma_longitud <- hist(longitud, breaks = 13, plot = FALSE)
# Segundo: Ahora sí, usa el objeto en el gráfico
hist(longitud, breaks = 13,
main = "Grafica Nro.1 de distribución de frecuencias de longitud \nen el estudio calidad del aire en India de 2015-2020",
xlab = "Longitud",
ylab = "Cantidad",
ylim = c(0, max(Histograma_longitud$counts)),
col = "darkseagreen3",
cex.main = 0.9,
cex.lab = 1,
cex.axis = 0.9,
xaxt = "n")
axis(1, at = Histograma_longitud$breaks,
labels = round(Histograma_longitud$breaks, 0),
las = 1,
cex.axis = 0.9)
grid()
#================================
# Histograma con relación a la totalidad de los datos
# Crear objeto histograma
Histograma_longitud <- hist(longitud, breaks = 13, plot = FALSE)
par(mgp = c(3.2, 1, 0))
hist(longitud, breaks = 13,
main = "Grafica Nro.2 Distribución de frecuencias de longitud\nen el estudio calidad del aire en India de 2015-2020",
xlab = "Longitud",
ylab = "Cantidad",
ylim = c(0, 25500),
col = "darkseagreen3",
cex.main = 0.9,
cex.lab = 1,
cex.axis = 0.9,
xaxt = "n",
yaxt = "n")
# Eje X
axis(1,
at = Histograma_longitud$breaks,
labels = round(Histograma_longitud$breaks, 0),
las = 1,
cex.axis = 0.9)
# Eje Y
axis(2,
at = seq(0, 25500, by = 5000),
labels = seq(0, 25500, by = 5000),
las = 1,
cex.axis = 0.9)
grid()
#------------------------------------------------
# Histograma porcentual de Longitud
bp <- barplot(hi2,
space = 0,
names.arg = FALSE,
xaxt = "n",
yaxt = "n",
main = "Grafica Nro.3 de distribución porcentual de longitud\nen el estudio calidad del aire en India de 2015-2020",
xlab = "Longitud",
ylab = "Porcentaje (%)",
col = "darkseagreen3",
border = "black",
ylim = c(0, 45),
cex.main = 0.8)
# Eje X
axis(1,
at = c(0, 2, 4, 6, 8, 10),
labels = c(72, 76, 80, 84, 88, 92),
las = 1)
# Eje Y
axis(2,
at = seq(0, 45, by = 10),
labels = seq(0, 45, by = 10),
las = 1)
grid()
#-------------------------------------------------------
# Histograma porcentual de Longitud
bp <- barplot(hi,
space = 0,
names.arg = FALSE,
xaxt = "n",
yaxt = "n",
main = "Grafica Nro.4 de distribución porcentual de Longitud\nen el estudio calidad del aire en India de 2015-2020",
xlab = "Longitud",
ylab = "Porcentaje (%)",
col = "darkseagreen3",
border = "black",
ylim = c(0, 100),
cex.main = 0.8)
#Eje X
axis(1,
at = c(min(bp)-0.5,
bp[4]-0.5,
bp[8]-0.5,
bp[12]-0.5,
max(bp)+0.5),
labels = c(72, 76, 80, 84, 92),
las = 1)
# Eje Y
axis(2,
at = seq(0, 100, by = 20),
labels = seq(0, 100, by = 20),
las = 1)
abline(v = bp - 0.5, col = "lightgray", lty = 3)
abline(h = seq(0, 100, by = 20), col = "lightgray", lty = 3)
# Diagrama de caja y bigotes de Longitud
boxplot(longitud,
horizontal = TRUE,
xaxt = "n",
yaxt = "n",
main = "Gráfica Nro.5: Diagrama de caja de Longitud\nen el estudio calidad del aire en India de 2015-2020",
xlab = "Longitud",
col = "turquoise3",
border = "black",
cex.main = 0.9,
cex.lab = 1,
cex.axis = 0.9)
# Eje X personalizado
axis(1,
at = seq(72, 92, by = 2),
labels = seq(72, 92, by = 2),
las = 1)
grid()
# ==============================================================================
# OJIVA ASCENDENTE Y DESCENDENTE
plot(Ls2, Ni2_asc,
type = "b",
pch = 16,
col = "turquoise3",
lwd = 1,
ylim = c(0, n),
xlim = c(72, 92),
xaxt = "n",
xlab = "Longitud",
ylab = "Cantidad",
main = "Gráfica N°6: Ojiva ascendente y descendente de la\nLongitud en el estudio calidad del aire en India de 2015-2020",
cex.main = 1)
# Eje X cada 4 unidades
axis(1, at = seq(72, 92, by = 4))
# Ojiva descendente
lines(Ls2, Ni2_desc,
type = "b",
pch = 16,
col = "black",
lwd = 1)
grid()
box()
# ==============================================================================
# OJIVA PORCENTUAL ASCENDENTE Y DESCENDENTE
plot(Ls2, Hi2_asc,
type = "b",
pch = 16,
col = "turquoise3",
lwd = 1,
ylim = c(0, 100),
xlim = c(72, 92),
xaxt = "n",
xlab = "Longitud",
ylab = "Porcentaje (%)",
main = "Gráfica N°7: Ojiva porcentual ascendente y descendente de la\nLongitud en el estudio calidad del aire en India de 2015-2020",
cex.main = 1)
# Eje X cada 4 unidades
axis(1, at = seq(72, 92, by = 4))
# Ojiva porcentual descendente
lines(Ls2, Hi2_desc,
type = "b",
pch = 16,
col = "black",
lwd = 1)
grid()
box()
# Calculo previo de indicadores
# =========================================================
X <- mean(longitud, na.rm = TRUE) # Media
Me <- median(longitud, na.rm = TRUE) # Mediana
# Funcion para la Moda
# Moda (Mo)
moda_index <- which.max(TDF_longitud_10_completa$ni[1:(nrow(TDF_longitud_10_completa)-1)])
Mo <- TDF_longitud_10_completa$Intervalo[moda_index]
desv <- sd(longitud, na.rm = TRUE) # Desviacion estandar
CV <- (desv / X) * 100 # Coeficiente de variacion
# Libreria para Asimetria y Curtosis
library(e1071)
## Warning: package 'e1071' was built under R version 4.5.3
As <- skewness(longitud, na.rm = TRUE)
K <- kurtosis(longitud, na.rm = TRUE)
# Creacion del data frame
Tabla_indicadores <- data.frame(
Variable = "Longitud",
Rango = paste0("[", round(min(longitud),2), " - ", round(max(longitud),2), "]"),
Media = X,
Mediana = Me,
Moda = Mo,
DesvEst = desv,
CV = CV,
Asimetria = As,
Curtosis = K
)
# Visualizacion de la tabla
library(gt)
Tabla_indicadores %>%
gt() %>%
cols_label(
Variable = "Variable",
Rango = "Rango",
Media = "Media (X)",
Mediana = "Mediana (Me)",
Moda = "Moda (Mo)",
DesvEst = "Desv. Est. (sd)",
CV = "CV (%)",
Asimetria = "Asimetria (As)",
Curtosis = "Curtosis (K)"
) %>%
tab_header(
title = "Tabla Nro. 3",
subtitle = "Indicadores Estadisticos de la Longitud, estudio calidad del aire en India entre 2015-2020"
) %>%
tab_source_note(
source_note = "Autor: Grupo 1 | Fuente: https://www.kaggle.com/datasets/rohanrao/air-quality-data-in-india"
) %>%
tab_spanner(
label = "Tendencia Central",
columns = c(Media, Mediana, Moda)
) %>%
tab_spanner(
label = "Dispersion",
columns = c(DesvEst, CV)
) %>%
tab_spanner(
label = "Forma",
columns = c(Asimetria, Curtosis)
) %>%
fmt_number(
columns = c(Media, Mediana, Moda, DesvEst, CV, Asimetria, Curtosis),
decimals = 2
) %>%
tab_style(
style = cell_borders(
sides = c("left", "right", "top", "bottom"),
color = "black",
weight = px(1)
),
locations = list(
cells_body(columns = everything(), rows = everything()),
cells_column_labels(columns = everything()),
cells_column_spanners(spanners = everything())
)
) %>%
tab_options(
table.border.top.color = "black",
table.border.bottom.color = "black",
table.border.left.color = "black",
table.border.right.color = "black",
table_body.hlines.color = "black",
table_body.vlines.color = "black",
column_labels.border.bottom.width = px(2)
)
| Tabla Nro. 3 | ||||||||
| Indicadores Estadisticos de la Longitud, estudio calidad del aire en India entre 2015-2020 | ||||||||
| Variable | Rango |
Tendencia Central
|
Dispersion
|
Forma
|
||||
|---|---|---|---|---|---|---|---|---|
| Media (X) | Mediana (Me) | Moda (Mo) | Desv. Est. (sd) | CV (%) | Asimetria (As) | Curtosis (K) | ||
| Longitud | [73.01 - 91.7] | 78.45 | 77.38 | [76.75 - 78.62) | 3.52 | 4.48 | 1.21 | 2.08 |
| Autor: Grupo 1 | Fuente: https://www.kaggle.com/datasets/rohanrao/air-quality-data-in-india | ||||||||