queremos predecir el y : tiempo de respuesta de un servidor (en milisegundos) en función de varias variables independientes: el x1: número de usuarios concurrentes, x2 uso de la CPU (%), x3 cantidad de memoria disponible (MB), x4 ancho de banda (Mbps) y x5 latencia de red (ms)

y (170, 165, 168, 175, 180, 190, 185, 220, 220, 240)

x1 (13, 12, 11, 16, 15, 20, 19, 24, 25, 28)

x2 (40, 35, 32, 40, 60, 55, 55, 65, 72, 75)

x3 (1648, 1900, 1740, 1710, 1600, 1500, 1360, 1300, 1200, 1100)

x4 (80, 95, 85, 82, 80, 75, 70, 65, 60, 55)

x5 (18, 25, 28, 30, 40, 45, 50, 55, 60, 65)

Realizar el análisis de regresión lineal múltiple (ajuste del modelo lineal múltiple, hipótesis para los parámetros del modelo, anova , veri cación de los supuestos, aplicar el método backward o forward para la elección de las variables signi cativas), publicar en Rpub

#datos
y <- c(170, 165, 168, 175, 180, 190, 185, 220, 220, 240)
x1 <- c(13, 12, 11, 16, 15, 20, 19, 24, 25, 28)
x2 <- c(40, 35, 32, 40, 60, 55, 55, 65, 72, 75)
x3 <- c(1648, 1900, 1740, 1710, 1600, 1500, 1360, 1300, 1200, 1100)
x4 <- c(80, 95, 85, 82, 80, 75, 70, 65, 60, 55)
x5 <- c(18, 25, 28, 30, 40, 45, 50, 55, 60, 65)

# Crear un data frame
data <- data.frame(y, x1, x2, x3, x4, x5)

# Ajustar el modelo de regresión lineal múltiple
modelo_multiple <- lm(y ~ x1 + x2 + x3 + x4 + x5, data=data)

# Resumen del modelo
summary(modelo_multiple)
## 
## Call:
## lm(formula = y ~ x1 + x2 + x3 + x4 + x5, data = data)
## 
## Residuals:
##       1       2       3       4       5       6       7       8       9      10 
##  1.0267  5.6131  5.7701 -7.8724 -0.7862 -6.4893 -6.7745  4.9594 -1.8551  6.4082 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)
## (Intercept) 125.26435   88.78967   1.411    0.231
## x1            2.93885    2.04789   1.435    0.225
## x2            0.23891    0.58176   0.411    0.702
## x3            0.09949    0.14408   0.690    0.528
## x4           -2.16073    2.76459  -0.782    0.478
## x5            0.26966    0.77829   0.346    0.746
## 
## Residual standard error: 8.449 on 4 degrees of freedom
## Multiple R-squared:  0.9535, Adjusted R-squared:  0.8954 
## F-statistic: 16.41 on 5 and 4 DF,  p-value: 0.009021
# ANOVA del modelo
anova(modelo_multiple)
## Analysis of Variance Table
## 
## Response: y
##           Df Sum Sq Mean Sq F value    Pr(>F)    
## x1         1 5797.2  5797.2 81.2057 0.0008397 ***
## x2         1   10.9    10.9  0.1530 0.7156091    
## x3         1    3.2     3.2  0.0445 0.8431616    
## x4         1   36.7    36.7  0.5142 0.5129565    
## x5         1    8.6     8.6  0.1200 0.7464419    
## Residuals  4  285.6    71.4                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Residuales del modelo
residuos <- residuals(modelo_multiple)

# Gráficos de diagnóstico
par(mfrow=c(2,2))
plot(modelo_multiple)
## Warning in sqrt(crit * p * (1 - hh)/hh): Se han producido NaNs
## Warning in sqrt(crit * p * (1 - hh)/hh): Se han producido NaNs

# Gráfico de los residuos
plot(data$y, residuos, main="Residuos vs Tiempo de respuesta",
     xlab="Tiempo de respuesta (ms)", ylab="Residuos", pch=19)
abline(h=0, col="red")

# Método Backward
modelo_backward <- step(modelo_multiple, direction="backward")
## Start:  AIC=45.52
## y ~ x1 + x2 + x3 + x4 + x5
## 
##        Df Sum of Sq    RSS    AIC
## - x5    1     8.570 294.12 43.814
## - x2    1    12.039 297.59 43.931
## - x3    1    34.037 319.59 44.645
## - x4    1    43.608 329.16 44.940
## <none>              285.55 45.518
## - x1    1   147.018 432.57 47.672
## 
## Step:  AIC=43.81
## y ~ x1 + x2 + x3 + x4
## 
##        Df Sum of Sq    RSS    AIC
## - x2    1    18.574 312.70 42.427
## - x3    1    26.232 320.36 42.668
## - x4    1    36.710 330.84 42.990
## <none>              294.12 43.814
## - x1    1   289.324 583.45 48.664
## 
## Step:  AIC=42.43
## y ~ x1 + x3 + x4
## 
##        Df Sum of Sq    RSS    AIC
## - x3    1     12.14 324.84 40.808
## - x4    1     23.14 335.84 41.140
## <none>              312.70 42.427
## - x1    1    435.74 748.44 49.154
## 
## Step:  AIC=40.81
## y ~ x1 + x4
## 
##        Df Sum of Sq    RSS    AIC
## - x4    1     20.10 344.94 39.408
## <none>              324.84 40.808
## - x1    1    425.26 750.10 47.176
## 
## Step:  AIC=39.41
## y ~ x1
## 
##        Df Sum of Sq    RSS    AIC
## <none>               344.9 39.408
## - x1    1    5797.2 6142.1 66.203
# Resumen del modelo Backward
summary(modelo_backward)
## 
## Call:
## lm(formula = y ~ x1, data = data)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -9.317 -4.835  1.197  3.831  8.162 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 112.4300     7.1118   15.81 2.56e-07 ***
## x1            4.3098     0.3717   11.60 2.78e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.566 on 8 degrees of freedom
## Multiple R-squared:  0.9438, Adjusted R-squared:  0.9368 
## F-statistic: 134.5 on 1 and 8 DF,  p-value: 2.783e-06
# Método Forward
modelo_forward <- step(lm(y ~ 1, data=data), direction="forward", 
                       scope=~ x1 + x2 + x3 + x4 + x5)
## Start:  AIC=66.2
## y ~ 1
## 
##        Df Sum of Sq    RSS    AIC
## + x1    1    5797.2  344.9 39.408
## + x4    1    5392.0  750.1 47.176
## + x3    1    5345.0  797.1 47.784
## + x5    1    5194.2  947.9 49.517
## + x2    1    5124.1 1018.0 50.230
## <none>              6142.1 66.203
## 
## Step:  AIC=39.41
## y ~ x1
## 
##        Df Sum of Sq    RSS    AIC
## <none>              344.94 39.408
## + x4    1   20.0960 324.84 40.808
## + x2    1   10.9239 334.01 41.086
## + x3    1    9.1019 335.84 41.140
## + x5    1    0.7420 344.20 41.386
# Resumen del modelo Forward
summary(modelo_forward)
## 
## Call:
## lm(formula = y ~ x1, data = data)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -9.317 -4.835  1.197  3.831  8.162 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 112.4300     7.1118   15.81 2.56e-07 ***
## x1            4.3098     0.3717   11.60 2.78e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.566 on 8 degrees of freedom
## Multiple R-squared:  0.9438, Adjusted R-squared:  0.9368 
## F-statistic: 134.5 on 1 and 8 DF,  p-value: 2.783e-06