Regresión lineal múltiple (vinos)

David Márquez Domínguez - 30000071320

Punto 2:

1. Carga y exploración inicial de los datos

library(readxl)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.2.1     ✔ readr     2.2.0
## ✔ forcats   1.0.1     ✔ stringr   1.6.0
## ✔ ggplot2   4.0.2     ✔ tibble    3.3.1
## ✔ lubridate 1.9.5     ✔ tidyr     1.3.2
## ✔ purrr     1.2.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(agriutilities)
library(corrplot)
## corrplot 0.95 loaded
# Cargar datos de excel
data_vinos <- read_excel("reg_lineal/red_wine_data.xlsx")

# Muestra los nombres de las columnas del dataframe
names(data_vinos)
##  [1] "fixed.acidity"        "volatile.acidity"     "citric.acid"         
##  [4] "residual.sugar"       "chlorides"            "free.sulfur.dioxide" 
##  [7] "total.sulfur.dioxide" "density"              "pH"                  
## [10] "sulphates"            "alcohol"              "quality"
# Adjunta el dataframe a la ruta de búsqueda de R
attach(data_vinos)

# Carga librerias adicionales para el rendimiento analítico
library(PerformanceAnalytics)
## Loading required package: xts
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## 
## ######################### Warning from 'xts' package ##########################
## #                                                                             #
## # The dplyr lag() function breaks how base R's lag() function is supposed to  #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or       #
## # source() into this session won't work correctly.                            #
## #                                                                             #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop           #
## # dplyr from breaking base R's lag() function.                                #
## #                                                                             #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning.  #
## #                                                                             #
## ###############################################################################
## 
## Attaching package: 'xts'
## 
## The following objects are masked from 'package:dplyr':
## 
##     first, last
## 
## 
## Attaching package: 'PerformanceAnalytics'
## 
## The following object is masked from 'package:graphics':
## 
##     legend
library(psych)
## 
## Attaching package: 'psych'
## 
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha

2. Identificación de correlaciones entre variables

# Computa correlaciones entre las variables del dataframe
data_vinos %>% 
  cor() %>% 
  corrplot(
    type = 'upper', 
    order = 'hclust',
    tl.col = 'black', 
    tl.srt = 45
  )

# Crea una matriz scatterplot
data_vinos %>% 
  pairs()

# Grafica correlación con histograma
chart.Correlation(data_vinos, histogram = TRUE, pch = 19)

# Plot pairwise panels with Pearson correlation
pairs.panels(
  data_vinos,
  method = "pearson",
  density = T,
  ellipses = FALSE,
  smooth = T
)

3. Test de normalidad

# Test de normalidad de Shapiro-Wilk
shapiro.test(pH)
## 
##  Shapiro-Wilk normality test
## 
## data:  pH
## W = 0.99349, p-value = 1.712e-06
hist(pH)

shapiro.test(alcohol)
## 
##  Shapiro-Wilk normality test
## 
## data:  alcohol
## W = 0.92884, p-value < 2.2e-16
hist(alcohol)

shapiro.test(quality)
## 
##  Shapiro-Wilk normality test
## 
## data:  quality
## W = 0.85759, p-value < 2.2e-16
hist(quality)

# Test de normalidad de Lilliefors
library(nortest)
lillie.test(pH)
## 
##  Lilliefors (Kolmogorov-Smirnov) normality test
## 
## data:  pH
## D = 0.040368, p-value = 2.244e-06
lillie.test(alcohol)
## 
##  Lilliefors (Kolmogorov-Smirnov) normality test
## 
## data:  alcohol
## D = 0.12145, p-value < 2.2e-16
lillie.test(quality)
## 
##  Lilliefors (Kolmogorov-Smirnov) normality test
## 
## data:  quality
## D = 0.24982, p-value < 2.2e-16

4. Modelos de regresión

# Modelos de regresión múltiple
RLM_vacio <- lm(quality ~ 1, data = data_vinos)  # Empty model
summary(RLM_vacio)
## 
## Call:
## lm(formula = quality ~ 1, data = data_vinos)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -2.636 -0.636  0.364  0.364  2.364 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   5.6360     0.0202   279.1   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.8076 on 1598 degrees of freedom
RLM_Completo <- lm(quality ~ ., data = data_vinos)  # Full model
summary(RLM_Completo)
## 
## Call:
## lm(formula = quality ~ ., data = data_vinos)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.68911 -0.36652 -0.04699  0.45202  2.02498 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           2.197e+01  2.119e+01   1.036   0.3002    
## fixed.acidity         2.499e-02  2.595e-02   0.963   0.3357    
## volatile.acidity     -1.084e+00  1.211e-01  -8.948  < 2e-16 ***
## citric.acid          -1.826e-01  1.472e-01  -1.240   0.2150    
## residual.sugar        1.633e-02  1.500e-02   1.089   0.2765    
## chlorides            -1.874e+00  4.193e-01  -4.470 8.37e-06 ***
## free.sulfur.dioxide   4.361e-03  2.171e-03   2.009   0.0447 *  
## total.sulfur.dioxide -3.265e-03  7.287e-04  -4.480 8.00e-06 ***
## density              -1.788e+01  2.163e+01  -0.827   0.4086    
## pH                   -4.137e-01  1.916e-01  -2.159   0.0310 *  
## sulphates             9.163e-01  1.143e-01   8.014 2.13e-15 ***
## alcohol               2.762e-01  2.648e-02  10.429  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.648 on 1587 degrees of freedom
## Multiple R-squared:  0.3606, Adjusted R-squared:  0.3561 
## F-statistic: 81.35 on 11 and 1587 DF,  p-value: < 2.2e-16
AIC(RLM_Completo)
## [1] 3164.277
# Acercamiento a selección de modelo por Stepwise
RLM_Forward <- step(RLM_vacio, scope = list(lower = RLM_vacio, upper = RLM_Completo), direction = "forward")
## Start:  AIC=-682.5
## quality ~ 1
## 
##                        Df Sum of Sq     RSS      AIC
## + alcohol               1   236.295  805.87 -1091.65
## + volatile.acidity      1   158.967  883.20  -945.14
## + sulphates             1    65.865  976.30  -784.89
## + citric.acid           1    53.405  988.76  -764.61
## + total.sulfur.dioxide  1    35.707 1006.46  -736.24
## + density               1    31.887 1010.28  -730.19
## + chlorides             1    17.318 1024.85  -707.29
## + fixed.acidity         1    16.038 1026.13  -705.29
## + pH                    1     3.473 1038.69  -685.84
## + free.sulfur.dioxide   1     2.674 1039.49  -684.61
## <none>                              1042.17  -682.50
## + residual.sugar        1     0.197 1041.97  -680.80
## 
## Step:  AIC=-1091.65
## quality ~ alcohol
## 
##                        Df Sum of Sq    RSS     AIC
## + volatile.acidity      1    94.074 711.80 -1288.1
## + sulphates             1    44.977 760.89 -1181.5
## + citric.acid           1    31.953 773.92 -1154.3
## + pH                    1    26.362 779.51 -1142.8
## + fixed.acidity         1    24.623 781.25 -1139.3
## + total.sulfur.dioxide  1     8.270 797.60 -1106.2
## + density               1     5.203 800.67 -1100.0
## <none>                              805.87 -1091.7
## + chlorides             1     0.611 805.26 -1090.9
## + free.sulfur.dioxide   1     0.325 805.55 -1090.3
## + residual.sugar        1     0.041 805.83 -1089.7
## 
## Step:  AIC=-1288.14
## quality ~ alcohol + volatile.acidity
## 
##                        Df Sum of Sq    RSS     AIC
## + sulphates             1   19.6916 692.10 -1331.0
## + total.sulfur.dioxide  1    6.3730 705.42 -1300.5
## + pH                    1    5.9515 705.84 -1299.6
## + fixed.acidity         1    5.7061 706.09 -1299.0
## + density               1    1.9410 709.86 -1290.5
## <none>                              711.80 -1288.1
## + free.sulfur.dioxide   1    0.6621 711.13 -1287.6
## + chlorides             1    0.3762 711.42 -1287.0
## + citric.acid           1    0.1936 711.60 -1286.6
## + residual.sugar        1    0.0101 711.79 -1286.2
## 
## Step:  AIC=-1331
## quality ~ alcohol + volatile.acidity + sulphates
## 
##                        Df Sum of Sq    RSS     AIC
## + total.sulfur.dioxide  1    8.2176 683.89 -1348.1
## + chlorides             1    7.4925 684.61 -1346.4
## + fixed.acidity         1    3.3282 688.78 -1336.7
## + pH                    1    3.0454 689.06 -1336.0
## + free.sulfur.dioxide   1    1.1129 690.99 -1331.6
## <none>                              692.10 -1331.0
## + citric.acid           1    0.2522 691.85 -1329.6
## + density               1    0.2222 691.88 -1329.5
## + residual.sugar        1    0.0143 692.09 -1329.0
## 
## Step:  AIC=-1348.1
## quality ~ alcohol + volatile.acidity + sulphates + total.sulfur.dioxide
## 
##                       Df Sum of Sq    RSS     AIC
## + chlorides            1    8.0370 675.85 -1365.0
## + pH                   1    3.3094 680.58 -1353.8
## + fixed.acidity        1    2.1037 681.78 -1351.0
## + free.sulfur.dioxide  1    1.3557 682.53 -1349.3
## <none>                             683.89 -1348.1
## + residual.sugar       1    0.2634 683.62 -1346.7
## + density              1    0.1077 683.78 -1346.3
## + citric.acid          1    0.0730 683.81 -1346.3
## 
## Step:  AIC=-1365
## quality ~ alcohol + volatile.acidity + sulphates + total.sulfur.dioxide + 
##     chlorides
## 
##                       Df Sum of Sq    RSS     AIC
## + pH                   1    5.9189 669.93 -1377.1
## + fixed.acidity        1    2.4065 673.44 -1368.7
## + free.sulfur.dioxide  1    1.2403 674.61 -1365.9
## <none>                             675.85 -1365.0
## + residual.sugar       1    0.5531 675.30 -1364.3
## + citric.acid          1    0.1615 675.69 -1363.4
## + density              1    0.1526 675.70 -1363.4
## 
## Step:  AIC=-1377.06
## quality ~ alcohol + volatile.acidity + sulphates + total.sulfur.dioxide + 
##     chlorides + pH
## 
##                       Df Sum of Sq    RSS     AIC
## + free.sulfur.dioxide  1   2.39413 667.54 -1380.8
## <none>                             669.93 -1377.1
## + citric.acid          1   0.80525 669.13 -1377.0
## + residual.sugar       1   0.28390 669.65 -1375.7
## + density              1   0.04468 669.89 -1375.2
## + fixed.acidity        1   0.01040 669.92 -1375.1
## 
## Step:  AIC=-1380.79
## quality ~ alcohol + volatile.acidity + sulphates + total.sulfur.dioxide + 
##     chlorides + pH + free.sulfur.dioxide
## 
##                  Df Sum of Sq    RSS     AIC
## <none>                        667.54 -1380.8
## + citric.acid     1   0.47480 667.06 -1379.9
## + residual.sugar  1   0.16673 667.37 -1379.2
## + density         1   0.03079 667.51 -1378.9
## + fixed.acidity   1   0.00663 667.53 -1378.8
summary(RLM_Forward)
## 
## Call:
## lm(formula = quality ~ alcohol + volatile.acidity + sulphates + 
##     total.sulfur.dioxide + chlorides + pH + free.sulfur.dioxide, 
##     data = data_vinos)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.68918 -0.36757 -0.04653  0.46081  2.02954 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           4.4300987  0.4029168  10.995  < 2e-16 ***
## alcohol               0.2893028  0.0167958  17.225  < 2e-16 ***
## volatile.acidity     -1.0127527  0.1008429 -10.043  < 2e-16 ***
## sulphates             0.8826651  0.1099084   8.031 1.86e-15 ***
## total.sulfur.dioxide -0.0034822  0.0006868  -5.070 4.43e-07 ***
## chlorides            -2.0178138  0.3975417  -5.076 4.31e-07 ***
## pH                   -0.4826614  0.1175581  -4.106 4.23e-05 ***
## free.sulfur.dioxide   0.0050774  0.0021255   2.389    0.017 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6477 on 1591 degrees of freedom
## Multiple R-squared:  0.3595, Adjusted R-squared:  0.3567 
## F-statistic: 127.6 on 7 and 1591 DF,  p-value: < 2.2e-16
AIC(RLM_Forward)
## [1] 3158.977
# Acercamiento a selección de modelo por Backward
RLM_Backward <- step(RLM_Completo, scope = list(lower = RLM_vacio, upper = RLM_Completo), direction = "backward")
## Start:  AIC=-1375.49
## quality ~ fixed.acidity + volatile.acidity + citric.acid + residual.sugar + 
##     chlorides + free.sulfur.dioxide + total.sulfur.dioxide + 
##     density + pH + sulphates + alcohol
## 
##                        Df Sum of Sq    RSS     AIC
## - density               1     0.287 666.70 -1376.8
## - fixed.acidity         1     0.389 666.80 -1376.5
## - residual.sugar        1     0.498 666.91 -1376.3
## - citric.acid           1     0.646 667.06 -1375.9
## <none>                              666.41 -1375.5
## - free.sulfur.dioxide   1     1.694 668.10 -1373.4
## - pH                    1     1.957 668.37 -1372.8
## - chlorides             1     8.391 674.80 -1357.5
## - total.sulfur.dioxide  1     8.427 674.84 -1357.4
## - sulphates             1    26.971 693.38 -1314.0
## - volatile.acidity      1    33.620 700.03 -1298.8
## - alcohol               1    45.672 712.08 -1271.5
## 
## Step:  AIC=-1376.8
## quality ~ fixed.acidity + volatile.acidity + citric.acid + residual.sugar + 
##     chlorides + free.sulfur.dioxide + total.sulfur.dioxide + 
##     pH + sulphates + alcohol
## 
##                        Df Sum of Sq    RSS     AIC
## - fixed.acidity         1     0.108 666.81 -1378.5
## - residual.sugar        1     0.231 666.93 -1378.2
## - citric.acid           1     0.654 667.35 -1377.2
## <none>                              666.70 -1376.8
## - free.sulfur.dioxide   1     1.829 668.53 -1374.4
## - pH                    1     4.325 671.02 -1368.5
## - total.sulfur.dioxide  1     8.728 675.43 -1358.0
## - chlorides             1     8.761 675.46 -1357.9
## - sulphates             1    27.287 693.98 -1314.7
## - volatile.acidity      1    35.000 701.70 -1297.0
## - alcohol               1   119.669 786.37 -1114.8
## 
## Step:  AIC=-1378.54
## quality ~ volatile.acidity + citric.acid + residual.sugar + chlorides + 
##     free.sulfur.dioxide + total.sulfur.dioxide + pH + sulphates + 
##     alcohol
## 
##                        Df Sum of Sq    RSS     AIC
## - residual.sugar        1     0.257 667.06 -1379.9
## - citric.acid           1     0.565 667.37 -1379.2
## <none>                              666.81 -1378.5
## - free.sulfur.dioxide   1     1.901 668.71 -1376.0
## - pH                    1     7.065 673.87 -1363.7
## - chlorides             1     9.940 676.75 -1356.9
## - total.sulfur.dioxide  1    10.031 676.84 -1356.7
## - sulphates             1    27.673 694.48 -1315.5
## - volatile.acidity      1    36.234 703.04 -1295.9
## - alcohol               1   120.633 787.44 -1114.7
## 
## Step:  AIC=-1379.93
## quality ~ volatile.acidity + citric.acid + chlorides + free.sulfur.dioxide + 
##     total.sulfur.dioxide + pH + sulphates + alcohol
## 
##                        Df Sum of Sq    RSS     AIC
## - citric.acid           1     0.475 667.54 -1380.8
## <none>                              667.06 -1379.9
## - free.sulfur.dioxide   1     2.064 669.13 -1377.0
## - pH                    1     7.138 674.20 -1364.9
## - total.sulfur.dioxide  1     9.828 676.89 -1358.5
## - chlorides             1     9.832 676.89 -1358.5
## - sulphates             1    27.446 694.51 -1317.5
## - volatile.acidity      1    35.977 703.04 -1297.9
## - alcohol               1   122.667 789.73 -1112.0
## 
## Step:  AIC=-1380.79
## quality ~ volatile.acidity + chlorides + free.sulfur.dioxide + 
##     total.sulfur.dioxide + pH + sulphates + alcohol
## 
##                        Df Sum of Sq    RSS     AIC
## <none>                              667.54 -1380.8
## - free.sulfur.dioxide   1     2.394 669.93 -1377.1
## - pH                    1     7.073 674.61 -1365.9
## - total.sulfur.dioxide  1    10.787 678.32 -1357.2
## - chlorides             1    10.809 678.35 -1357.1
## - sulphates             1    27.060 694.60 -1319.2
## - volatile.acidity      1    42.318 709.85 -1284.5
## - alcohol               1   124.483 792.02 -1109.4
summary(RLM_Backward)
## 
## Call:
## lm(formula = quality ~ volatile.acidity + chlorides + free.sulfur.dioxide + 
##     total.sulfur.dioxide + pH + sulphates + alcohol, data = data_vinos)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.68918 -0.36757 -0.04653  0.46081  2.02954 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           4.4300987  0.4029168  10.995  < 2e-16 ***
## volatile.acidity     -1.0127527  0.1008429 -10.043  < 2e-16 ***
## chlorides            -2.0178138  0.3975417  -5.076 4.31e-07 ***
## free.sulfur.dioxide   0.0050774  0.0021255   2.389    0.017 *  
## total.sulfur.dioxide -0.0034822  0.0006868  -5.070 4.43e-07 ***
## pH                   -0.4826614  0.1175581  -4.106 4.23e-05 ***
## sulphates             0.8826651  0.1099084   8.031 1.86e-15 ***
## alcohol               0.2893028  0.0167958  17.225  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6477 on 1591 degrees of freedom
## Multiple R-squared:  0.3595, Adjusted R-squared:  0.3567 
## F-statistic: 127.6 on 7 and 1591 DF,  p-value: < 2.2e-16
AIC(RLM_Backward)
## [1] 3158.977
# Acercamiento a selección de modelo por método mixto
RLM_Stepwise <- step(RLM_vacio, scope = list(lower = RLM_vacio, upper = RLM_Completo), direction = "both")
## Start:  AIC=-682.5
## quality ~ 1
## 
##                        Df Sum of Sq     RSS      AIC
## + alcohol               1   236.295  805.87 -1091.65
## + volatile.acidity      1   158.967  883.20  -945.14
## + sulphates             1    65.865  976.30  -784.89
## + citric.acid           1    53.405  988.76  -764.61
## + total.sulfur.dioxide  1    35.707 1006.46  -736.24
## + density               1    31.887 1010.28  -730.19
## + chlorides             1    17.318 1024.85  -707.29
## + fixed.acidity         1    16.038 1026.13  -705.29
## + pH                    1     3.473 1038.69  -685.84
## + free.sulfur.dioxide   1     2.674 1039.49  -684.61
## <none>                              1042.17  -682.50
## + residual.sugar        1     0.197 1041.97  -680.80
## 
## Step:  AIC=-1091.65
## quality ~ alcohol
## 
##                        Df Sum of Sq     RSS     AIC
## + volatile.acidity      1    94.074  711.80 -1288.1
## + sulphates             1    44.977  760.89 -1181.5
## + citric.acid           1    31.953  773.92 -1154.3
## + pH                    1    26.362  779.51 -1142.8
## + fixed.acidity         1    24.623  781.25 -1139.3
## + total.sulfur.dioxide  1     8.270  797.60 -1106.2
## + density               1     5.203  800.67 -1100.0
## <none>                               805.87 -1091.7
## + chlorides             1     0.611  805.26 -1090.9
## + free.sulfur.dioxide   1     0.325  805.55 -1090.3
## + residual.sugar        1     0.041  805.83 -1089.7
## - alcohol               1   236.295 1042.17  -682.5
## 
## Step:  AIC=-1288.14
## quality ~ alcohol + volatile.acidity
## 
##                        Df Sum of Sq    RSS      AIC
## + sulphates             1    19.692 692.10 -1331.00
## + total.sulfur.dioxide  1     6.373 705.42 -1300.52
## + pH                    1     5.952 705.84 -1299.56
## + fixed.acidity         1     5.706 706.09 -1299.01
## + density               1     1.941 709.86 -1290.50
## <none>                              711.80 -1288.14
## + free.sulfur.dioxide   1     0.662 711.13 -1287.63
## + chlorides             1     0.376 711.42 -1286.98
## + citric.acid           1     0.194 711.60 -1286.57
## + residual.sugar        1     0.010 711.79 -1286.16
## - volatile.acidity      1    94.074 805.87 -1091.65
## - alcohol               1   171.402 883.20  -945.14
## 
## Step:  AIC=-1331
## quality ~ alcohol + volatile.acidity + sulphates
## 
##                        Df Sum of Sq    RSS      AIC
## + total.sulfur.dioxide  1     8.218 683.89 -1348.10
## + chlorides             1     7.493 684.61 -1346.40
## + fixed.acidity         1     3.328 688.78 -1336.70
## + pH                    1     3.045 689.06 -1336.05
## + free.sulfur.dioxide   1     1.113 690.99 -1331.57
## <none>                              692.10 -1331.00
## + citric.acid           1     0.252 691.85 -1329.58
## + density               1     0.222 691.88 -1329.51
## + residual.sugar        1     0.014 692.09 -1329.03
## - sulphates             1    19.692 711.80 -1288.14
## - volatile.acidity      1    68.789 760.89 -1181.48
## - alcohol               1   166.109 858.21  -989.03
## 
## Step:  AIC=-1348.1
## quality ~ alcohol + volatile.acidity + sulphates + total.sulfur.dioxide
## 
##                        Df Sum of Sq    RSS     AIC
## + chlorides             1     8.037 675.85 -1365.0
## + pH                    1     3.309 680.58 -1353.8
## + fixed.acidity         1     2.104 681.78 -1351.0
## + free.sulfur.dioxide   1     1.356 682.53 -1349.3
## <none>                              683.89 -1348.1
## + residual.sugar        1     0.263 683.62 -1346.7
## + density               1     0.108 683.78 -1346.3
## + citric.acid           1     0.073 683.81 -1346.3
## - total.sulfur.dioxide  1     8.218 692.10 -1331.0
## - sulphates             1    21.536 705.42 -1300.5
## - volatile.acidity      1    66.047 749.93 -1202.7
## - alcohol               1   145.552 829.44 -1041.6
## 
## Step:  AIC=-1365
## quality ~ alcohol + volatile.acidity + sulphates + total.sulfur.dioxide + 
##     chlorides
## 
##                        Df Sum of Sq    RSS     AIC
## + pH                    1     5.919 669.93 -1377.1
## + fixed.acidity         1     2.407 673.44 -1368.7
## + free.sulfur.dioxide   1     1.240 674.61 -1365.9
## <none>                              675.85 -1365.0
## + residual.sugar        1     0.553 675.30 -1364.3
## + citric.acid           1     0.162 675.69 -1363.4
## + density               1     0.153 675.70 -1363.4
## - chlorides             1     8.037 683.89 -1348.1
## - total.sulfur.dioxide  1     8.762 684.61 -1346.4
## - sulphates             1    29.201 705.05 -1299.4
## - volatile.acidity      1    58.869 734.72 -1233.5
## - alcohol               1   119.894 795.74 -1105.9
## 
## Step:  AIC=-1377.06
## quality ~ alcohol + volatile.acidity + sulphates + total.sulfur.dioxide + 
##     chlorides + pH
## 
##                        Df Sum of Sq    RSS     AIC
## + free.sulfur.dioxide   1     2.394 667.54 -1380.8
## <none>                              669.93 -1377.1
## + citric.acid           1     0.805 669.13 -1377.0
## + residual.sugar        1     0.284 669.65 -1375.7
## + density               1     0.045 669.89 -1375.2
## + fixed.acidity         1     0.010 669.92 -1375.1
## - pH                    1     5.919 675.85 -1365.0
## - total.sulfur.dioxide  1     9.233 679.16 -1357.2
## - chlorides             1    10.647 680.58 -1353.8
## - sulphates             1    27.445 697.38 -1314.9
## - volatile.acidity      1    44.972 714.90 -1275.2
## - alcohol               1   125.812 795.74 -1103.9
## 
## Step:  AIC=-1380.79
## quality ~ alcohol + volatile.acidity + sulphates + total.sulfur.dioxide + 
##     chlorides + pH + free.sulfur.dioxide
## 
##                        Df Sum of Sq    RSS     AIC
## <none>                              667.54 -1380.8
## + citric.acid           1     0.475 667.06 -1379.9
## + residual.sugar        1     0.167 667.37 -1379.2
## + density               1     0.031 667.51 -1378.9
## + fixed.acidity         1     0.007 667.53 -1378.8
## - free.sulfur.dioxide   1     2.394 669.93 -1377.1
## - pH                    1     7.073 674.61 -1365.9
## - total.sulfur.dioxide  1    10.787 678.32 -1357.2
## - chlorides             1    10.809 678.35 -1357.1
## - sulphates             1    27.060 694.60 -1319.2
## - volatile.acidity      1    42.318 709.85 -1284.5
## - alcohol               1   124.483 792.02 -1109.4
summary(RLM_Stepwise)
## 
## Call:
## lm(formula = quality ~ alcohol + volatile.acidity + sulphates + 
##     total.sulfur.dioxide + chlorides + pH + free.sulfur.dioxide, 
##     data = data_vinos)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.68918 -0.36757 -0.04653  0.46081  2.02954 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           4.4300987  0.4029168  10.995  < 2e-16 ***
## alcohol               0.2893028  0.0167958  17.225  < 2e-16 ***
## volatile.acidity     -1.0127527  0.1008429 -10.043  < 2e-16 ***
## sulphates             0.8826651  0.1099084   8.031 1.86e-15 ***
## total.sulfur.dioxide -0.0034822  0.0006868  -5.070 4.43e-07 ***
## chlorides            -2.0178138  0.3975417  -5.076 4.31e-07 ***
## pH                   -0.4826614  0.1175581  -4.106 4.23e-05 ***
## free.sulfur.dioxide   0.0050774  0.0021255   2.389    0.017 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6477 on 1591 degrees of freedom
## Multiple R-squared:  0.3595, Adjusted R-squared:  0.3567 
## F-statistic: 127.6 on 7 and 1591 DF,  p-value: < 2.2e-16
AIC(RLM_Stepwise)
## [1] 3158.977
# Modelo propio
RLM_Propio <- lm(quality ~ alcohol + sulphates + citric.acid, data = data_vinos)
summary(RLM_Propio)
## 
## Call:
## lm(formula = quality ~ alcohol + sulphates + citric.acid, data = data_vinos)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.7565 -0.3535 -0.1007  0.5067  2.2125 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  1.43392    0.17615   8.140 7.86e-16 ***
## alcohol      0.33841    0.01619  20.903  < 2e-16 ***
## sulphates    0.81403    0.10651   7.643 3.65e-14 ***
## citric.acid  0.51345    0.09284   5.531 3.72e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6842 on 1595 degrees of freedom
## Multiple R-squared:  0.2836, Adjusted R-squared:  0.2823 
## F-statistic: 210.5 on 3 and 1595 DF,  p-value: < 2.2e-16
AIC(RLM_Propio)
## [1] 3329.91
# Comparación de Modelos
AIC(RLM_Completo, RLM_Forward, RLM_Backward, RLM_Stepwise, RLM_Propio)
##              df      AIC
## RLM_Completo 13 3164.277
## RLM_Forward   9 3158.977
## RLM_Backward  9 3158.977
## RLM_Stepwise  9 3158.977
## RLM_Propio    5 3329.910