queremos predecir el y : tiempo de respuesta de un servidor (en milisegundos) en función de varias variables independientes: el x1: número de usuarios concurrentes, x2 uso de la CPU (%), x3 cantidad de memoria disponible (MB), x4 ancho de banda (Mbps) y x5 latencia de red (ms)
y (170, 165, 168, 175, 180, 190, 185, 220, 220, 240)
x1 (13, 12, 11, 16, 15, 20, 19, 24, 25, 28)
x2 (40, 35, 32, 40, 60, 55, 55, 65, 72, 75)
x3 (1648, 1900, 1740, 1710, 1600, 1500, 1360, 1300, 1200, 1100)
x4 (80, 95, 85, 82, 80, 75, 70, 65, 60, 55)
x5 (18, 25, 28, 30, 40, 45, 50, 55, 60, 65)
Realizar el análisis de regresión lineal múltiple (ajuste del modelo lineal múltiple, hipótesis para los parámetros del modelo, anova , veri cación de los supuestos, aplicar el método backward o forward para la elección de las variables signi cativas), publicar en Rpub
#datos
y <- c(170, 165, 168, 175, 180, 190, 185, 220, 220, 240)
x1 <- c(13, 12, 11, 16, 15, 20, 19, 24, 25, 28)
x2 <- c(40, 35, 32, 40, 60, 55, 55, 65, 72, 75)
x3 <- c(1648, 1900, 1740, 1710, 1600, 1500, 1360, 1300, 1200, 1100)
x4 <- c(80, 95, 85, 82, 80, 75, 70, 65, 60, 55)
x5 <- c(18, 25, 28, 30, 40, 45, 50, 55, 60, 65)
# Crear un data frame
data <- data.frame(y, x1, x2, x3, x4, x5)
# Ajustar el modelo de regresión lineal múltiple
modelo_multiple <- lm(y ~ x1 + x2 + x3 + x4 + x5, data=data)
# Resumen del modelo
summary(modelo_multiple)
##
## Call:
## lm(formula = y ~ x1 + x2 + x3 + x4 + x5, data = data)
##
## Residuals:
## 1 2 3 4 5 6 7 8 9 10
## 1.0267 5.6131 5.7701 -7.8724 -0.7862 -6.4893 -6.7745 4.9594 -1.8551 6.4082
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 125.26435 88.78967 1.411 0.231
## x1 2.93885 2.04789 1.435 0.225
## x2 0.23891 0.58176 0.411 0.702
## x3 0.09949 0.14408 0.690 0.528
## x4 -2.16073 2.76459 -0.782 0.478
## x5 0.26966 0.77829 0.346 0.746
##
## Residual standard error: 8.449 on 4 degrees of freedom
## Multiple R-squared: 0.9535, Adjusted R-squared: 0.8954
## F-statistic: 16.41 on 5 and 4 DF, p-value: 0.009021
# ANOVA del modelo
anova(modelo_multiple)
## Analysis of Variance Table
##
## Response: y
## Df Sum Sq Mean Sq F value Pr(>F)
## x1 1 5797.2 5797.2 81.2057 0.0008397 ***
## x2 1 10.9 10.9 0.1530 0.7156091
## x3 1 3.2 3.2 0.0445 0.8431616
## x4 1 36.7 36.7 0.5142 0.5129565
## x5 1 8.6 8.6 0.1200 0.7464419
## Residuals 4 285.6 71.4
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Residuales del modelo
residuos <- residuals(modelo_multiple)
# Gráficos de diagnóstico
par(mfrow=c(2,2))
plot(modelo_multiple)
## Warning in sqrt(crit * p * (1 - hh)/hh): Se han producido NaNs
## Warning in sqrt(crit * p * (1 - hh)/hh): Se han producido NaNs
# Gráfico de los residuos
plot(data$y, residuos, main="Residuos vs Tiempo de respuesta",
xlab="Tiempo de respuesta (ms)", ylab="Residuos", pch=19)
abline(h=0, col="red")
# Método Backward
modelo_backward <- step(modelo_multiple, direction="backward")
## Start: AIC=45.52
## y ~ x1 + x2 + x3 + x4 + x5
##
## Df Sum of Sq RSS AIC
## - x5 1 8.570 294.12 43.814
## - x2 1 12.039 297.59 43.931
## - x3 1 34.037 319.59 44.645
## - x4 1 43.608 329.16 44.940
## <none> 285.55 45.518
## - x1 1 147.018 432.57 47.672
##
## Step: AIC=43.81
## y ~ x1 + x2 + x3 + x4
##
## Df Sum of Sq RSS AIC
## - x2 1 18.574 312.70 42.427
## - x3 1 26.232 320.36 42.668
## - x4 1 36.710 330.84 42.990
## <none> 294.12 43.814
## - x1 1 289.324 583.45 48.664
##
## Step: AIC=42.43
## y ~ x1 + x3 + x4
##
## Df Sum of Sq RSS AIC
## - x3 1 12.14 324.84 40.808
## - x4 1 23.14 335.84 41.140
## <none> 312.70 42.427
## - x1 1 435.74 748.44 49.154
##
## Step: AIC=40.81
## y ~ x1 + x4
##
## Df Sum of Sq RSS AIC
## - x4 1 20.10 344.94 39.408
## <none> 324.84 40.808
## - x1 1 425.26 750.10 47.176
##
## Step: AIC=39.41
## y ~ x1
##
## Df Sum of Sq RSS AIC
## <none> 344.9 39.408
## - x1 1 5797.2 6142.1 66.203
# Resumen del modelo Backward
summary(modelo_backward)
##
## Call:
## lm(formula = y ~ x1, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.317 -4.835 1.197 3.831 8.162
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 112.4300 7.1118 15.81 2.56e-07 ***
## x1 4.3098 0.3717 11.60 2.78e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.566 on 8 degrees of freedom
## Multiple R-squared: 0.9438, Adjusted R-squared: 0.9368
## F-statistic: 134.5 on 1 and 8 DF, p-value: 2.783e-06
# Método Forward
modelo_forward <- step(lm(y ~ 1, data=data), direction="forward",
scope=~ x1 + x2 + x3 + x4 + x5)
## Start: AIC=66.2
## y ~ 1
##
## Df Sum of Sq RSS AIC
## + x1 1 5797.2 344.9 39.408
## + x4 1 5392.0 750.1 47.176
## + x3 1 5345.0 797.1 47.784
## + x5 1 5194.2 947.9 49.517
## + x2 1 5124.1 1018.0 50.230
## <none> 6142.1 66.203
##
## Step: AIC=39.41
## y ~ x1
##
## Df Sum of Sq RSS AIC
## <none> 344.94 39.408
## + x4 1 20.0960 324.84 40.808
## + x2 1 10.9239 334.01 41.086
## + x3 1 9.1019 335.84 41.140
## + x5 1 0.7420 344.20 41.386
# Resumen del modelo Forward
summary(modelo_forward)
##
## Call:
## lm(formula = y ~ x1, data = data)
##
## Residuals:
## Min 1Q Median 3Q Max
## -9.317 -4.835 1.197 3.831 8.162
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 112.4300 7.1118 15.81 2.56e-07 ***
## x1 4.3098 0.3717 11.60 2.78e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 6.566 on 8 degrees of freedom
## Multiple R-squared: 0.9438, Adjusted R-squared: 0.9368
## F-statistic: 134.5 on 1 and 8 DF, p-value: 2.783e-06