# Tema: Estadística Multivariable
# Autor: Camila Zambrano
# Fecha: 22/06/2026
library(gt)
library(dplyr)
##
## Adjuntando el paquete: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(e1071)
library(htmltools)
setwd("~/CAMILA")
datos <- read.csv("Datos Cambiados..csv",
header = TRUE,
sep = ",",
dec = ".",
na.strings = "-")
datos <- na.omit(datos)
datos <- datos[order(datos$AQI, datos$NO), ]
datos_prom <- aggregate(NO ~ AQI, data = datos, mean)
# Variable independiente (X): Óxido de Nitrógeno
X <- datos_prom$NO
# Variable dependiente (Y): Índice de Calidad del Aire
Y <- datos_prom$AQI
n_modelo <- nrow(datos_prom)
cat("Tamaño muestral del modelo =", n_modelo)
## Tamaño muestral del modelo = 448
TVP_NO_AQI <- data.frame(X, Y)
# Tabla pares de valores
tabla <- TVP_NO_AQI %>%
gt() %>%
cols_align(
align = "center",
columns = everything()
) %>%
fmt_number(
columns = everything(),
decimals = 2
) %>%
tab_header(
title = md("*Tabla N°1*"),
subtitle = md("*Pares de valores de Óxido de Nitrógeno y AQI, Calidad del aire en India 2015-2020*")
) %>%
tab_source_note(
source_note = md(paste0(
"**Nota:** Tamaño muestral n = ", nrow(TVP_NO_AQI),
" observaciones utilizadas en el análisis."
))
)
div(
style = "height:400px; overflow-y:auto;",
tabla
)
| Tabla N°1 | |
| Pares de valores de Óxido de Nitrógeno y AQI, Calidad del aire en India 2015-2020 | |
| X | Y |
|---|---|
| 1.09 | 23.00 |
| 1.57 | 26.00 |
| 5.05 | 29.00 |
| 3.87 | 30.00 |
| 5.14 | 31.00 |
| 3.38 | 32.00 |
| 4.81 | 33.00 |
| 4.77 | 34.00 |
| 4.36 | 35.00 |
| 4.35 | 36.00 |
| 5.39 | 37.00 |
| 3.97 | 38.00 |
| 4.34 | 39.00 |
| 4.16 | 40.00 |
| 4.27 | 41.00 |
| 5.27 | 42.00 |
| 6.42 | 43.00 |
| 5.22 | 44.00 |
| 4.00 | 45.00 |
| 5.11 | 46.00 |
| 5.77 | 47.00 |
| 7.33 | 48.00 |
| 5.53 | 49.00 |
| 5.03 | 50.00 |
| 6.83 | 51.00 |
| 7.39 | 52.00 |
| 8.92 | 53.00 |
| 6.81 | 54.00 |
| 7.41 | 55.00 |
| 8.02 | 56.00 |
| 5.75 | 57.00 |
| 6.68 | 58.00 |
| 7.42 | 59.00 |
| 7.40 | 60.00 |
| 6.22 | 61.00 |
| 9.63 | 62.00 |
| 9.59 | 63.00 |
| 8.74 | 64.00 |
| 6.64 | 65.00 |
| 8.79 | 66.00 |
| 7.57 | 67.00 |
| 9.72 | 68.00 |
| 6.08 | 69.00 |
| 8.68 | 70.00 |
| 8.46 | 71.00 |
| 6.34 | 72.00 |
| 7.69 | 73.00 |
| 9.63 | 74.00 |
| 7.51 | 75.00 |
| 7.49 | 76.00 |
| 9.79 | 77.00 |
| 10.91 | 78.00 |
| 8.49 | 79.00 |
| 9.69 | 80.00 |
| 10.92 | 81.00 |
| 11.47 | 82.00 |
| 11.52 | 83.00 |
| 8.16 | 84.00 |
| 9.45 | 85.00 |
| 9.95 | 86.00 |
| 8.00 | 87.00 |
| 12.55 | 88.00 |
| 11.77 | 89.00 |
| 11.51 | 90.00 |
| 11.20 | 91.00 |
| 7.78 | 92.00 |
| 11.76 | 93.00 |
| 8.82 | 94.00 |
| 10.41 | 95.00 |
| 9.69 | 96.00 |
| 13.44 | 97.00 |
| 13.11 | 98.00 |
| 11.39 | 99.00 |
| 12.24 | 100.00 |
| 8.51 | 101.00 |
| 11.14 | 102.00 |
| 9.11 | 103.00 |
| 12.65 | 104.00 |
| 12.50 | 105.00 |
| 14.74 | 106.00 |
| 9.47 | 107.00 |
| 15.03 | 108.00 |
| 13.72 | 109.00 |
| 12.45 | 110.00 |
| 14.07 | 111.00 |
| 12.53 | 112.00 |
| 10.28 | 113.00 |
| 10.86 | 114.00 |
| 10.71 | 115.00 |
| 14.75 | 116.00 |
| 12.39 | 117.00 |
| 15.70 | 118.00 |
| 14.23 | 119.00 |
| 10.31 | 120.00 |
| 7.74 | 121.00 |
| 11.74 | 122.00 |
| 10.57 | 123.00 |
| 14.19 | 124.00 |
| 13.25 | 125.00 |
| 11.67 | 126.00 |
| 10.85 | 127.00 |
| 9.58 | 128.00 |
| 16.47 | 129.00 |
| 12.04 | 130.00 |
| 14.31 | 131.00 |
| 13.56 | 132.00 |
| 15.41 | 133.00 |
| 13.09 | 134.00 |
| 14.59 | 135.00 |
| 13.52 | 136.00 |
| 13.76 | 137.00 |
| 15.00 | 138.00 |
| 17.65 | 139.00 |
| 18.45 | 140.00 |
| 13.47 | 141.00 |
| 14.34 | 142.00 |
| 15.37 | 143.00 |
| 14.34 | 144.00 |
| 13.64 | 145.00 |
| 16.73 | 146.00 |
| 14.16 | 147.00 |
| 15.17 | 148.00 |
| 23.59 | 149.00 |
| 17.02 | 150.00 |
| 17.45 | 151.00 |
| 11.18 | 152.00 |
| 10.14 | 153.00 |
| 10.86 | 154.00 |
| 14.91 | 155.00 |
| 19.72 | 156.00 |
| 13.40 | 157.00 |
| 21.06 | 158.00 |
| 14.31 | 159.00 |
| 13.08 | 160.00 |
| 8.85 | 161.00 |
| 18.52 | 162.00 |
| 27.67 | 163.00 |
| 13.67 | 164.00 |
| 13.78 | 165.00 |
| 13.54 | 166.00 |
| 24.81 | 167.00 |
| 26.40 | 168.00 |
| 17.11 | 169.00 |
| 15.79 | 170.00 |
| 24.01 | 171.00 |
| 23.20 | 172.00 |
| 21.63 | 173.00 |
| 16.08 | 174.00 |
| 18.94 | 175.00 |
| 21.89 | 176.00 |
| 27.26 | 177.00 |
| 24.42 | 178.00 |
| 16.37 | 179.00 |
| 15.89 | 180.00 |
| 16.43 | 181.00 |
| 34.91 | 182.00 |
| 20.92 | 183.00 |
| 28.17 | 184.00 |
| 18.20 | 185.00 |
| 31.51 | 186.00 |
| 26.50 | 187.00 |
| 14.10 | 188.00 |
| 21.99 | 189.00 |
| 22.18 | 190.00 |
| 30.86 | 191.00 |
| 27.79 | 192.00 |
| 15.89 | 193.00 |
| 39.13 | 194.00 |
| 24.81 | 195.00 |
| 27.49 | 196.00 |
| 37.52 | 197.00 |
| 22.55 | 198.00 |
| 24.65 | 199.00 |
| 37.23 | 200.00 |
| 39.48 | 201.00 |
| 20.95 | 202.00 |
| 23.74 | 203.00 |
| 28.85 | 204.00 |
| 35.57 | 205.00 |
| 30.99 | 206.00 |
| 35.17 | 207.00 |
| 28.11 | 208.00 |
| 49.00 | 209.00 |
| 25.38 | 210.00 |
| 32.39 | 211.00 |
| 9.30 | 212.00 |
| 16.16 | 213.00 |
| 22.67 | 214.00 |
| 18.82 | 215.00 |
| 20.63 | 216.00 |
| 28.82 | 217.00 |
| 31.61 | 218.00 |
| 35.60 | 219.00 |
| 26.24 | 220.00 |
| 26.56 | 221.00 |
| 30.11 | 222.00 |
| 29.32 | 223.00 |
| 37.43 | 224.00 |
| 44.24 | 225.00 |
| 39.35 | 226.00 |
| 21.83 | 227.00 |
| 28.83 | 228.00 |
| 40.45 | 229.00 |
| 40.36 | 230.00 |
| 31.83 | 231.00 |
| 31.94 | 232.00 |
| 36.31 | 233.00 |
| 33.17 | 234.00 |
| 45.44 | 235.00 |
| 17.68 | 236.00 |
| 52.46 | 237.00 |
| 15.96 | 238.00 |
| 42.77 | 239.00 |
| 39.53 | 240.00 |
| 49.55 | 241.00 |
| 48.87 | 242.00 |
| 30.92 | 243.00 |
| 43.00 | 244.00 |
| 33.96 | 245.00 |
| 33.19 | 246.00 |
| 50.35 | 247.00 |
| 34.63 | 248.00 |
| 27.52 | 249.00 |
| 39.27 | 251.00 |
| 24.34 | 252.00 |
| 25.99 | 253.00 |
| 41.49 | 254.00 |
| 26.53 | 255.00 |
| 28.35 | 256.00 |
| 35.62 | 257.00 |
| 26.56 | 258.00 |
| 12.66 | 259.00 |
| 37.44 | 260.00 |
| 31.14 | 261.00 |
| 27.69 | 262.00 |
| 34.54 | 263.00 |
| 42.75 | 264.00 |
| 41.25 | 265.00 |
| 25.74 | 266.00 |
| 43.46 | 267.00 |
| 32.46 | 268.00 |
| 30.83 | 269.00 |
| 24.26 | 270.00 |
| 21.28 | 271.00 |
| 23.09 | 272.00 |
| 30.54 | 273.00 |
| 34.13 | 274.00 |
| 18.19 | 275.00 |
| 31.62 | 276.00 |
| 47.55 | 277.00 |
| 40.92 | 278.00 |
| 34.48 | 279.00 |
| 26.49 | 280.00 |
| 23.84 | 281.00 |
| 36.25 | 282.00 |
| 24.34 | 283.00 |
| 30.77 | 284.00 |
| 27.23 | 285.00 |
| 64.32 | 286.00 |
| 35.14 | 287.00 |
| 17.11 | 288.00 |
| 38.02 | 289.00 |
| 25.28 | 290.00 |
| 38.36 | 291.00 |
| 25.37 | 292.00 |
| 29.03 | 293.00 |
| 14.00 | 294.00 |
| 40.96 | 295.00 |
| 39.30 | 296.00 |
| 26.31 | 297.00 |
| 38.82 | 298.00 |
| 26.76 | 299.00 |
| 39.68 | 300.00 |
| 38.38 | 301.00 |
| 27.55 | 302.00 |
| 45.46 | 303.00 |
| 52.45 | 304.00 |
| 31.46 | 305.00 |
| 47.94 | 306.00 |
| 31.89 | 307.00 |
| 11.64 | 308.00 |
| 50.44 | 309.00 |
| 45.45 | 310.00 |
| 67.57 | 311.00 |
| 48.56 | 312.00 |
| 40.82 | 313.00 |
| 31.68 | 314.00 |
| 36.41 | 315.00 |
| 23.96 | 316.00 |
| 43.54 | 317.00 |
| 24.09 | 318.00 |
| 41.30 | 319.00 |
| 40.55 | 320.00 |
| 39.36 | 321.00 |
| 12.81 | 322.00 |
| 29.25 | 323.00 |
| 15.77 | 324.00 |
| 31.59 | 325.00 |
| 37.86 | 326.00 |
| 45.42 | 327.00 |
| 50.42 | 328.00 |
| 50.66 | 329.00 |
| 53.98 | 330.00 |
| 42.92 | 331.00 |
| 50.05 | 332.00 |
| 30.14 | 333.00 |
| 33.14 | 334.00 |
| 55.58 | 335.00 |
| 56.63 | 336.00 |
| 61.41 | 338.00 |
| 30.88 | 339.00 |
| 25.82 | 340.00 |
| 32.53 | 341.00 |
| 45.75 | 342.00 |
| 43.58 | 343.00 |
| 36.24 | 344.00 |
| 30.35 | 345.00 |
| 57.88 | 346.00 |
| 69.83 | 347.00 |
| 37.50 | 348.00 |
| 59.38 | 349.00 |
| 43.09 | 350.00 |
| 30.09 | 351.00 |
| 39.98 | 352.00 |
| 23.00 | 353.00 |
| 53.46 | 354.00 |
| 76.42 | 356.00 |
| 56.53 | 357.00 |
| 35.33 | 358.00 |
| 86.69 | 359.00 |
| 42.39 | 360.00 |
| 41.75 | 361.00 |
| 35.85 | 362.00 |
| 120.37 | 363.00 |
| 45.86 | 364.00 |
| 58.20 | 365.00 |
| 63.12 | 366.00 |
| 41.85 | 367.00 |
| 51.37 | 368.00 |
| 68.13 | 369.00 |
| 53.22 | 370.00 |
| 57.95 | 371.00 |
| 46.71 | 372.00 |
| 62.59 | 373.00 |
| 77.35 | 374.00 |
| 51.39 | 375.00 |
| 41.17 | 376.00 |
| 75.36 | 377.00 |
| 55.55 | 378.00 |
| 40.85 | 379.00 |
| 39.10 | 380.00 |
| 61.38 | 381.00 |
| 62.46 | 382.00 |
| 61.86 | 383.00 |
| 41.66 | 384.00 |
| 84.39 | 385.00 |
| 18.38 | 387.00 |
| 47.38 | 388.00 |
| 99.47 | 389.00 |
| 34.13 | 390.00 |
| 41.70 | 391.00 |
| 60.31 | 392.00 |
| 96.78 | 394.00 |
| 109.87 | 396.00 |
| 78.30 | 397.00 |
| 75.91 | 398.00 |
| 70.60 | 399.00 |
| 56.71 | 400.00 |
| 118.55 | 401.00 |
| 73.19 | 403.00 |
| 71.04 | 404.00 |
| 48.83 | 405.00 |
| 65.02 | 406.00 |
| 51.06 | 407.00 |
| 61.05 | 411.00 |
| 48.77 | 413.00 |
| 67.53 | 415.00 |
| 39.80 | 417.00 |
| 58.36 | 419.00 |
| 70.71 | 420.00 |
| 49.89 | 421.00 |
| 87.44 | 422.00 |
| 93.98 | 423.00 |
| 26.22 | 424.00 |
| 88.34 | 425.00 |
| 111.09 | 426.00 |
| 91.29 | 428.00 |
| 102.83 | 430.00 |
| 79.33 | 431.00 |
| 47.83 | 432.00 |
| 96.17 | 433.00 |
| 66.45 | 437.00 |
| 23.38 | 438.00 |
| 59.66 | 439.00 |
| 90.10 | 440.00 |
| 37.13 | 441.00 |
| 16.96 | 442.00 |
| 56.21 | 443.00 |
| 21.88 | 444.00 |
| 8.62 | 449.00 |
| 60.95 | 450.00 |
| 52.54 | 451.00 |
| 72.46 | 452.00 |
| 11.58 | 453.00 |
| 74.55 | 454.00 |
| 86.35 | 455.00 |
| 98.50 | 456.00 |
| 110.91 | 458.00 |
| 99.58 | 460.00 |
| 62.76 | 462.00 |
| 86.62 | 463.00 |
| 79.28 | 464.00 |
| 32.30 | 467.00 |
| 85.55 | 468.00 |
| 62.41 | 471.00 |
| 69.16 | 472.00 |
| 34.23 | 473.00 |
| 62.96 | 475.00 |
| 101.94 | 478.00 |
| 158.63 | 480.00 |
| 117.23 | 482.00 |
| 42.74 | 483.00 |
| 127.84 | 484.00 |
| 120.76 | 485.00 |
| 46.24 | 487.00 |
| 109.53 | 492.00 |
| 56.28 | 497.00 |
| 101.08 | 501.00 |
| 3.12 | 502.00 |
| 54.97 | 506.00 |
| 105.28 | 509.00 |
| 8.53 | 510.00 |
| 108.96 | 515.00 |
| 11.75 | 522.00 |
| 10.15 | 531.00 |
| 48.05 | 532.00 |
| 77.69 | 537.00 |
| 112.95 | 557.00 |
| 65.36 | 561.00 |
| 123.73 | 577.00 |
| 62.60 | 591.00 |
| 12.05 | 593.00 |
| 44.06 | 595.00 |
| 55.07 | 597.00 |
| 66.78 | 613.00 |
| 28.98 | 659.00 |
| 55.83 | 675.00 |
| 54.71 | 677.00 |
| Nota: Tamaño muestral n = 448 observaciones utilizadas en el análisis. | |
# Asegurar formato numérico
X <- as.numeric(as.character(X))
Y <- as.numeric(as.character(Y))
# Definir límites con manejo de valores faltantes
x_max <- max(X, na.rm = TRUE) * 1.05
y_max <- max(Y, na.rm = TRUE) * 1.05
# Crear gráfico
plot(X, Y,
type = "n",
main = "Gráfica N°1\nDiagrama de dispersión entre Óxido de Nitrógeno y AQI\nen el estudio de la calidad del aire en India 2015-2020",
xlab = expression("Óxido de Nitrógeno ("*mu*"g/m³)"),
ylab = "AQI (Índice)",
xlim = c(0, x_max),
ylim = c(0, y_max),
cex.main = 1.1,
cex.lab = 1.1,
cex.axis = 0.9)
# Cuadrícula
grid(nx = NULL, ny = NULL, col = "gray85", lty = 1)
# Puntos (usando na.omit para evitar errores con valores faltantes en puntos)
points(na.omit(data.frame(X, Y)),
col = "deepskyblue3",
pch = 16,
cex = 1.2)
box(lwd = 1.5)
Conjetura.-
La distribución de los puntos en el gráfico muestra una curva
ascendente, lo que sugiere un modelo polinómico. El AQI aumenta de forma
acelerada a medida que se incrementa la concentración de Óxido de
Nitrógeno, indicando una relación no lineal y un impacto creciente del
contaminante sobre la calidad del aire.
Modelo polinímico general:
\[ Y = a+bX+cX^2 \]
Modelo polinómico aplicado al estudio:
\[ AQI = a+b(NO)+c(NO)^2 \]
modelo_pol <- lm(Y ~ poly(X, 2, raw = TRUE), data = TVP_NO_AQI)
param <- coef(modelo_pol)
a_est <- param[1]
a_est
## (Intercept)
## 51.23435
b_est <- param[2]
b_est
## poly(X, 2, raw = TRUE)1
## 8.227434
c_est <- param[3]
c_est
## poly(X, 2, raw = TRUE)2
## -0.04014406
Justificación del uso de la regresión lineal (lm)
Aunque el modelo planteado presenta una relación polinómica, se puede
utilizar la función lm() debido a que el modelo mantiene
una estructura lineal respecto a sus parámetros desconocidos. La
variable independiente se incorpora mediante términos de primer y
segundo grado, permitiendo ajustar una curva a los datos.
Modelo polinómico:
\[ Y = a+bX+cX^2 \]
Para aplicar regresión lineal se define cada término del polinomio como una variable explicativa:
\[ X_1=X \]
\[ X_2=X^2 \]
Por lo tanto, el modelo queda expresado como:
\[
Y=a+bX_1+cX_2
\]
Definimos:
\[ Y=AQI \]
\[ X_1=NO \]
\[ X_2=NO^2 \]
Obtenemos un modelo lineal equivalente:
\[ AQI=\beta_0+\beta_1(NO)+\beta_2(NO^2) \]
Esta ecuación corresponde a una regresión lineal múltiple, por lo que puede ser estimada utilizando:
\[ lm(AQI \sim NO + I(NO^2)) \]
Finalmente, los coeficientes obtenidos permiten reconstruir el modelo polinómico:
\[ AQI=a+b(NO)+c(NO)^2 \]
# Límites
x_max <- max(X) * 1.05
y_max <- max(Y) * 1.05
# Gráfico vacío
plot(X, Y,
type = "n",
main = "Gráfica N°2\nModelo polinómico entre Óxido de Nitrógeno y AQI\nen el estudio de la calidad del aire en India 2015-2020",
xlab = expression("Óxido de Nitrógeno ("*mu*"g/m³)"),
ylab = "AQI (Índice)",
xlim = c(0, x_max),
ylim = c(0, y_max),
cex.main = 1.1,
cex.lab = 1.1,
cex.axis = 0.9)
# Cuadrícula
grid(nx = NULL, ny = NULL,
col = "gray85",
lty = 1)
# Puntos reales
points(X, Y,
col = "deepskyblue3",
pch = 16,
cex = 1.2)
# Curva del modelo logarítmico
curve(a_est + b_est * x + c_est * x^2,
from = 0,
to = x_max,
col = "red",
lwd = 3,
add = TRUE)
box(lwd = 1.5)
legend("topleft",
legend = c("Datos reales", "Modelo polinómico"),
col = c("deepskyblue3", "red"),
pch = c(16, NA),
lwd = c(NA, 3),
bty = "o",
bg = "white",
cex = 0.8,
x.intersp = 0.6,
y.intersp = 0.8)
## Coeficiente de correlación de Pearson
r <- cor(TVP_NO_AQI$X, TVP_NO_AQI$Y) * 100
r
## [1] 75.01084
## Coeficiente de determinación (r^2)
r2 <- (r^2) / 100
r2
## [1] 56.26626
# Dominio [x]: D = {x|x E R+^0}
# Dominio [y]: D = {x|x E N ^ 0 ≤ x ≤ 2934}
El modelo polinómico no presenta restricciones. Aunque el modelo puede producir resultados decimales, estos pueden interpretarse como aproximaciones y redondearse al entero más cercano, manteniéndose dentro del dominio práctico del AQI. Al ser un modelo polinómico creciente, lo que significa que el AQI aumenta a medida que se incrementa la concetración de Óxido de Nitrógeno (NO).
NO_objetivo <- median(X, na.rm = TRUE)
if(NO_objetivo < 0){
stop("Error: La concentración de Óxido de Nitrógeno debe ser válida.")
}
aqi_est <- a_est + b_est * NO_objetivo + c_est * (NO_objetivo^2)
plot(1, type = "n", axes = FALSE, xlab = "", ylab = "")
text(
1, 1,
labels = paste(
"¿Cuál es el AQI esperado\n",
"cuando el Óxido de Nitrógeno es",
round(NO_objetivo, 2), "µg/m³?\n\n",
"Resultado estimado (AQI):",
round(aqi_est, 0)
),
cex = 1.2,
col = "deepskyblue3",
font = 2
)
Entre la concentración de Óxido de Nitrógeno (NO) y el Índice de Calidad del Aire (AQI) existe una relación de tipo polinómica, representada por el modelo y = 51.23 + 8.22(x) - 0.04(x)^2, siendo “y = Índice de Calidad de Agua (AQI)” y “x = Óxido de Nitrógeno (NO)”. El modelo no presenta restricciones, por lo cual, el modelo indica que el AQI aumenta de forma constante con el incremento de la concentración de Óxido de Nitrógeno (NO).