1. Carga y exploración inicial de los datos
library(readxl)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.2.1 ✔ readr 2.2.0
## ✔ forcats 1.0.1 ✔ stringr 1.6.0
## ✔ ggplot2 4.0.2 ✔ tibble 3.3.1
## ✔ lubridate 1.9.5 ✔ tidyr 1.3.2
## ✔ purrr 1.2.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(agriutilities)
library(corrplot)
## corrplot 0.95 loaded
# Cargar datos de excel
data_vinos <- read_excel("reg_lineal/red_wine_data.xlsx")
# Muestra los nombres de las columnas del dataframe
names(data_vinos)
## [1] "fixed.acidity" "volatile.acidity" "citric.acid"
## [4] "residual.sugar" "chlorides" "free.sulfur.dioxide"
## [7] "total.sulfur.dioxide" "density" "pH"
## [10] "sulphates" "alcohol" "quality"
# Adjunta el dataframe a la ruta de búsqueda de R
attach(data_vinos)
# Carga librerias adicionales para el rendimiento analítico
library(PerformanceAnalytics)
## Loading required package: xts
## Loading required package: zoo
##
## Attaching package: 'zoo'
##
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
##
##
## ######################### Warning from 'xts' package ##########################
## # #
## # The dplyr lag() function breaks how base R's lag() function is supposed to #
## # work, which breaks lag(my_xts). Calls to lag(my_xts) that you type or #
## # source() into this session won't work correctly. #
## # #
## # Use stats::lag() to make sure you're not using dplyr::lag(), or you can add #
## # conflictRules('dplyr', exclude = 'lag') to your .Rprofile to stop #
## # dplyr from breaking base R's lag() function. #
## # #
## # Code in packages is not affected. It's protected by R's namespace mechanism #
## # Set `options(xts.warn_dplyr_breaks_lag = FALSE)` to suppress this warning. #
## # #
## ###############################################################################
##
## Attaching package: 'xts'
##
## The following objects are masked from 'package:dplyr':
##
## first, last
##
##
## Attaching package: 'PerformanceAnalytics'
##
## The following object is masked from 'package:graphics':
##
## legend
library(psych)
##
## Attaching package: 'psych'
##
## The following objects are masked from 'package:ggplot2':
##
## %+%, alpha
2. Identificación de correlaciones entre variables
# Computa correlaciones entre las variables del dataframe
data_vinos %>%
cor() %>%
corrplot(
type = 'upper',
order = 'hclust',
tl.col = 'black',
tl.srt = 45
)

# Crea una matriz scatterplot
data_vinos %>%
pairs()

# Grafica correlación con histograma
chart.Correlation(data_vinos, histogram = TRUE, pch = 19)

# Plot pairwise panels with Pearson correlation
pairs.panels(
data_vinos,
method = "pearson",
density = T,
ellipses = FALSE,
smooth = T
)

3. Test de normalidad
# Test de normalidad de Shapiro-Wilk
shapiro.test(pH)
##
## Shapiro-Wilk normality test
##
## data: pH
## W = 0.99349, p-value = 1.712e-06
hist(pH)

shapiro.test(alcohol)
##
## Shapiro-Wilk normality test
##
## data: alcohol
## W = 0.92884, p-value < 2.2e-16
hist(alcohol)

shapiro.test(quality)
##
## Shapiro-Wilk normality test
##
## data: quality
## W = 0.85759, p-value < 2.2e-16
hist(quality)

# Test de normalidad de Lilliefors
library(nortest)
lillie.test(pH)
##
## Lilliefors (Kolmogorov-Smirnov) normality test
##
## data: pH
## D = 0.040368, p-value = 2.244e-06
lillie.test(alcohol)
##
## Lilliefors (Kolmogorov-Smirnov) normality test
##
## data: alcohol
## D = 0.12145, p-value < 2.2e-16
lillie.test(quality)
##
## Lilliefors (Kolmogorov-Smirnov) normality test
##
## data: quality
## D = 0.24982, p-value < 2.2e-16
4. Modelos de regresión
# Modelos de regresión múltiple
RLM_vacio <- lm(quality ~ 1, data = data_vinos) # Empty model
summary(RLM_vacio)
##
## Call:
## lm(formula = quality ~ 1, data = data_vinos)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.636 -0.636 0.364 0.364 2.364
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.6360 0.0202 279.1 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8076 on 1598 degrees of freedom
RLM_Completo <- lm(quality ~ ., data = data_vinos) # Full model
summary(RLM_Completo)
##
## Call:
## lm(formula = quality ~ ., data = data_vinos)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.68911 -0.36652 -0.04699 0.45202 2.02498
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.197e+01 2.119e+01 1.036 0.3002
## fixed.acidity 2.499e-02 2.595e-02 0.963 0.3357
## volatile.acidity -1.084e+00 1.211e-01 -8.948 < 2e-16 ***
## citric.acid -1.826e-01 1.472e-01 -1.240 0.2150
## residual.sugar 1.633e-02 1.500e-02 1.089 0.2765
## chlorides -1.874e+00 4.193e-01 -4.470 8.37e-06 ***
## free.sulfur.dioxide 4.361e-03 2.171e-03 2.009 0.0447 *
## total.sulfur.dioxide -3.265e-03 7.287e-04 -4.480 8.00e-06 ***
## density -1.788e+01 2.163e+01 -0.827 0.4086
## pH -4.137e-01 1.916e-01 -2.159 0.0310 *
## sulphates 9.163e-01 1.143e-01 8.014 2.13e-15 ***
## alcohol 2.762e-01 2.648e-02 10.429 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.648 on 1587 degrees of freedom
## Multiple R-squared: 0.3606, Adjusted R-squared: 0.3561
## F-statistic: 81.35 on 11 and 1587 DF, p-value: < 2.2e-16
AIC(RLM_Completo)
## [1] 3164.277
# Acercamiento a selección de modelo por Stepwise
RLM_Forward <- step(RLM_vacio, scope = list(lower = RLM_vacio, upper = RLM_Completo), direction = "forward")
## Start: AIC=-682.5
## quality ~ 1
##
## Df Sum of Sq RSS AIC
## + alcohol 1 236.295 805.87 -1091.65
## + volatile.acidity 1 158.967 883.20 -945.14
## + sulphates 1 65.865 976.30 -784.89
## + citric.acid 1 53.405 988.76 -764.61
## + total.sulfur.dioxide 1 35.707 1006.46 -736.24
## + density 1 31.887 1010.28 -730.19
## + chlorides 1 17.318 1024.85 -707.29
## + fixed.acidity 1 16.038 1026.13 -705.29
## + pH 1 3.473 1038.69 -685.84
## + free.sulfur.dioxide 1 2.674 1039.49 -684.61
## <none> 1042.17 -682.50
## + residual.sugar 1 0.197 1041.97 -680.80
##
## Step: AIC=-1091.65
## quality ~ alcohol
##
## Df Sum of Sq RSS AIC
## + volatile.acidity 1 94.074 711.80 -1288.1
## + sulphates 1 44.977 760.89 -1181.5
## + citric.acid 1 31.953 773.92 -1154.3
## + pH 1 26.362 779.51 -1142.8
## + fixed.acidity 1 24.623 781.25 -1139.3
## + total.sulfur.dioxide 1 8.270 797.60 -1106.2
## + density 1 5.203 800.67 -1100.0
## <none> 805.87 -1091.7
## + chlorides 1 0.611 805.26 -1090.9
## + free.sulfur.dioxide 1 0.325 805.55 -1090.3
## + residual.sugar 1 0.041 805.83 -1089.7
##
## Step: AIC=-1288.14
## quality ~ alcohol + volatile.acidity
##
## Df Sum of Sq RSS AIC
## + sulphates 1 19.6916 692.10 -1331.0
## + total.sulfur.dioxide 1 6.3730 705.42 -1300.5
## + pH 1 5.9515 705.84 -1299.6
## + fixed.acidity 1 5.7061 706.09 -1299.0
## + density 1 1.9410 709.86 -1290.5
## <none> 711.80 -1288.1
## + free.sulfur.dioxide 1 0.6621 711.13 -1287.6
## + chlorides 1 0.3762 711.42 -1287.0
## + citric.acid 1 0.1936 711.60 -1286.6
## + residual.sugar 1 0.0101 711.79 -1286.2
##
## Step: AIC=-1331
## quality ~ alcohol + volatile.acidity + sulphates
##
## Df Sum of Sq RSS AIC
## + total.sulfur.dioxide 1 8.2176 683.89 -1348.1
## + chlorides 1 7.4925 684.61 -1346.4
## + fixed.acidity 1 3.3282 688.78 -1336.7
## + pH 1 3.0454 689.06 -1336.0
## + free.sulfur.dioxide 1 1.1129 690.99 -1331.6
## <none> 692.10 -1331.0
## + citric.acid 1 0.2522 691.85 -1329.6
## + density 1 0.2222 691.88 -1329.5
## + residual.sugar 1 0.0143 692.09 -1329.0
##
## Step: AIC=-1348.1
## quality ~ alcohol + volatile.acidity + sulphates + total.sulfur.dioxide
##
## Df Sum of Sq RSS AIC
## + chlorides 1 8.0370 675.85 -1365.0
## + pH 1 3.3094 680.58 -1353.8
## + fixed.acidity 1 2.1037 681.78 -1351.0
## + free.sulfur.dioxide 1 1.3557 682.53 -1349.3
## <none> 683.89 -1348.1
## + residual.sugar 1 0.2634 683.62 -1346.7
## + density 1 0.1077 683.78 -1346.3
## + citric.acid 1 0.0730 683.81 -1346.3
##
## Step: AIC=-1365
## quality ~ alcohol + volatile.acidity + sulphates + total.sulfur.dioxide +
## chlorides
##
## Df Sum of Sq RSS AIC
## + pH 1 5.9189 669.93 -1377.1
## + fixed.acidity 1 2.4065 673.44 -1368.7
## + free.sulfur.dioxide 1 1.2403 674.61 -1365.9
## <none> 675.85 -1365.0
## + residual.sugar 1 0.5531 675.30 -1364.3
## + citric.acid 1 0.1615 675.69 -1363.4
## + density 1 0.1526 675.70 -1363.4
##
## Step: AIC=-1377.06
## quality ~ alcohol + volatile.acidity + sulphates + total.sulfur.dioxide +
## chlorides + pH
##
## Df Sum of Sq RSS AIC
## + free.sulfur.dioxide 1 2.39413 667.54 -1380.8
## <none> 669.93 -1377.1
## + citric.acid 1 0.80525 669.13 -1377.0
## + residual.sugar 1 0.28390 669.65 -1375.7
## + density 1 0.04468 669.89 -1375.2
## + fixed.acidity 1 0.01040 669.92 -1375.1
##
## Step: AIC=-1380.79
## quality ~ alcohol + volatile.acidity + sulphates + total.sulfur.dioxide +
## chlorides + pH + free.sulfur.dioxide
##
## Df Sum of Sq RSS AIC
## <none> 667.54 -1380.8
## + citric.acid 1 0.47480 667.06 -1379.9
## + residual.sugar 1 0.16673 667.37 -1379.2
## + density 1 0.03079 667.51 -1378.9
## + fixed.acidity 1 0.00663 667.53 -1378.8
summary(RLM_Forward)
##
## Call:
## lm(formula = quality ~ alcohol + volatile.acidity + sulphates +
## total.sulfur.dioxide + chlorides + pH + free.sulfur.dioxide,
## data = data_vinos)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.68918 -0.36757 -0.04653 0.46081 2.02954
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.4300987 0.4029168 10.995 < 2e-16 ***
## alcohol 0.2893028 0.0167958 17.225 < 2e-16 ***
## volatile.acidity -1.0127527 0.1008429 -10.043 < 2e-16 ***
## sulphates 0.8826651 0.1099084 8.031 1.86e-15 ***
## total.sulfur.dioxide -0.0034822 0.0006868 -5.070 4.43e-07 ***
## chlorides -2.0178138 0.3975417 -5.076 4.31e-07 ***
## pH -0.4826614 0.1175581 -4.106 4.23e-05 ***
## free.sulfur.dioxide 0.0050774 0.0021255 2.389 0.017 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6477 on 1591 degrees of freedom
## Multiple R-squared: 0.3595, Adjusted R-squared: 0.3567
## F-statistic: 127.6 on 7 and 1591 DF, p-value: < 2.2e-16
AIC(RLM_Forward)
## [1] 3158.977
# Acercamiento a selección de modelo por Backward
RLM_Backward <- step(RLM_Completo, scope = list(lower = RLM_vacio, upper = RLM_Completo), direction = "backward")
## Start: AIC=-1375.49
## quality ~ fixed.acidity + volatile.acidity + citric.acid + residual.sugar +
## chlorides + free.sulfur.dioxide + total.sulfur.dioxide +
## density + pH + sulphates + alcohol
##
## Df Sum of Sq RSS AIC
## - density 1 0.287 666.70 -1376.8
## - fixed.acidity 1 0.389 666.80 -1376.5
## - residual.sugar 1 0.498 666.91 -1376.3
## - citric.acid 1 0.646 667.06 -1375.9
## <none> 666.41 -1375.5
## - free.sulfur.dioxide 1 1.694 668.10 -1373.4
## - pH 1 1.957 668.37 -1372.8
## - chlorides 1 8.391 674.80 -1357.5
## - total.sulfur.dioxide 1 8.427 674.84 -1357.4
## - sulphates 1 26.971 693.38 -1314.0
## - volatile.acidity 1 33.620 700.03 -1298.8
## - alcohol 1 45.672 712.08 -1271.5
##
## Step: AIC=-1376.8
## quality ~ fixed.acidity + volatile.acidity + citric.acid + residual.sugar +
## chlorides + free.sulfur.dioxide + total.sulfur.dioxide +
## pH + sulphates + alcohol
##
## Df Sum of Sq RSS AIC
## - fixed.acidity 1 0.108 666.81 -1378.5
## - residual.sugar 1 0.231 666.93 -1378.2
## - citric.acid 1 0.654 667.35 -1377.2
## <none> 666.70 -1376.8
## - free.sulfur.dioxide 1 1.829 668.53 -1374.4
## - pH 1 4.325 671.02 -1368.5
## - total.sulfur.dioxide 1 8.728 675.43 -1358.0
## - chlorides 1 8.761 675.46 -1357.9
## - sulphates 1 27.287 693.98 -1314.7
## - volatile.acidity 1 35.000 701.70 -1297.0
## - alcohol 1 119.669 786.37 -1114.8
##
## Step: AIC=-1378.54
## quality ~ volatile.acidity + citric.acid + residual.sugar + chlorides +
## free.sulfur.dioxide + total.sulfur.dioxide + pH + sulphates +
## alcohol
##
## Df Sum of Sq RSS AIC
## - residual.sugar 1 0.257 667.06 -1379.9
## - citric.acid 1 0.565 667.37 -1379.2
## <none> 666.81 -1378.5
## - free.sulfur.dioxide 1 1.901 668.71 -1376.0
## - pH 1 7.065 673.87 -1363.7
## - chlorides 1 9.940 676.75 -1356.9
## - total.sulfur.dioxide 1 10.031 676.84 -1356.7
## - sulphates 1 27.673 694.48 -1315.5
## - volatile.acidity 1 36.234 703.04 -1295.9
## - alcohol 1 120.633 787.44 -1114.7
##
## Step: AIC=-1379.93
## quality ~ volatile.acidity + citric.acid + chlorides + free.sulfur.dioxide +
## total.sulfur.dioxide + pH + sulphates + alcohol
##
## Df Sum of Sq RSS AIC
## - citric.acid 1 0.475 667.54 -1380.8
## <none> 667.06 -1379.9
## - free.sulfur.dioxide 1 2.064 669.13 -1377.0
## - pH 1 7.138 674.20 -1364.9
## - total.sulfur.dioxide 1 9.828 676.89 -1358.5
## - chlorides 1 9.832 676.89 -1358.5
## - sulphates 1 27.446 694.51 -1317.5
## - volatile.acidity 1 35.977 703.04 -1297.9
## - alcohol 1 122.667 789.73 -1112.0
##
## Step: AIC=-1380.79
## quality ~ volatile.acidity + chlorides + free.sulfur.dioxide +
## total.sulfur.dioxide + pH + sulphates + alcohol
##
## Df Sum of Sq RSS AIC
## <none> 667.54 -1380.8
## - free.sulfur.dioxide 1 2.394 669.93 -1377.1
## - pH 1 7.073 674.61 -1365.9
## - total.sulfur.dioxide 1 10.787 678.32 -1357.2
## - chlorides 1 10.809 678.35 -1357.1
## - sulphates 1 27.060 694.60 -1319.2
## - volatile.acidity 1 42.318 709.85 -1284.5
## - alcohol 1 124.483 792.02 -1109.4
summary(RLM_Backward)
##
## Call:
## lm(formula = quality ~ volatile.acidity + chlorides + free.sulfur.dioxide +
## total.sulfur.dioxide + pH + sulphates + alcohol, data = data_vinos)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.68918 -0.36757 -0.04653 0.46081 2.02954
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.4300987 0.4029168 10.995 < 2e-16 ***
## volatile.acidity -1.0127527 0.1008429 -10.043 < 2e-16 ***
## chlorides -2.0178138 0.3975417 -5.076 4.31e-07 ***
## free.sulfur.dioxide 0.0050774 0.0021255 2.389 0.017 *
## total.sulfur.dioxide -0.0034822 0.0006868 -5.070 4.43e-07 ***
## pH -0.4826614 0.1175581 -4.106 4.23e-05 ***
## sulphates 0.8826651 0.1099084 8.031 1.86e-15 ***
## alcohol 0.2893028 0.0167958 17.225 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6477 on 1591 degrees of freedom
## Multiple R-squared: 0.3595, Adjusted R-squared: 0.3567
## F-statistic: 127.6 on 7 and 1591 DF, p-value: < 2.2e-16
AIC(RLM_Backward)
## [1] 3158.977
# Acercamiento a selección de modelo por método mixto
RLM_Stepwise <- step(RLM_vacio, scope = list(lower = RLM_vacio, upper = RLM_Completo), direction = "both")
## Start: AIC=-682.5
## quality ~ 1
##
## Df Sum of Sq RSS AIC
## + alcohol 1 236.295 805.87 -1091.65
## + volatile.acidity 1 158.967 883.20 -945.14
## + sulphates 1 65.865 976.30 -784.89
## + citric.acid 1 53.405 988.76 -764.61
## + total.sulfur.dioxide 1 35.707 1006.46 -736.24
## + density 1 31.887 1010.28 -730.19
## + chlorides 1 17.318 1024.85 -707.29
## + fixed.acidity 1 16.038 1026.13 -705.29
## + pH 1 3.473 1038.69 -685.84
## + free.sulfur.dioxide 1 2.674 1039.49 -684.61
## <none> 1042.17 -682.50
## + residual.sugar 1 0.197 1041.97 -680.80
##
## Step: AIC=-1091.65
## quality ~ alcohol
##
## Df Sum of Sq RSS AIC
## + volatile.acidity 1 94.074 711.80 -1288.1
## + sulphates 1 44.977 760.89 -1181.5
## + citric.acid 1 31.953 773.92 -1154.3
## + pH 1 26.362 779.51 -1142.8
## + fixed.acidity 1 24.623 781.25 -1139.3
## + total.sulfur.dioxide 1 8.270 797.60 -1106.2
## + density 1 5.203 800.67 -1100.0
## <none> 805.87 -1091.7
## + chlorides 1 0.611 805.26 -1090.9
## + free.sulfur.dioxide 1 0.325 805.55 -1090.3
## + residual.sugar 1 0.041 805.83 -1089.7
## - alcohol 1 236.295 1042.17 -682.5
##
## Step: AIC=-1288.14
## quality ~ alcohol + volatile.acidity
##
## Df Sum of Sq RSS AIC
## + sulphates 1 19.692 692.10 -1331.00
## + total.sulfur.dioxide 1 6.373 705.42 -1300.52
## + pH 1 5.952 705.84 -1299.56
## + fixed.acidity 1 5.706 706.09 -1299.01
## + density 1 1.941 709.86 -1290.50
## <none> 711.80 -1288.14
## + free.sulfur.dioxide 1 0.662 711.13 -1287.63
## + chlorides 1 0.376 711.42 -1286.98
## + citric.acid 1 0.194 711.60 -1286.57
## + residual.sugar 1 0.010 711.79 -1286.16
## - volatile.acidity 1 94.074 805.87 -1091.65
## - alcohol 1 171.402 883.20 -945.14
##
## Step: AIC=-1331
## quality ~ alcohol + volatile.acidity + sulphates
##
## Df Sum of Sq RSS AIC
## + total.sulfur.dioxide 1 8.218 683.89 -1348.10
## + chlorides 1 7.493 684.61 -1346.40
## + fixed.acidity 1 3.328 688.78 -1336.70
## + pH 1 3.045 689.06 -1336.05
## + free.sulfur.dioxide 1 1.113 690.99 -1331.57
## <none> 692.10 -1331.00
## + citric.acid 1 0.252 691.85 -1329.58
## + density 1 0.222 691.88 -1329.51
## + residual.sugar 1 0.014 692.09 -1329.03
## - sulphates 1 19.692 711.80 -1288.14
## - volatile.acidity 1 68.789 760.89 -1181.48
## - alcohol 1 166.109 858.21 -989.03
##
## Step: AIC=-1348.1
## quality ~ alcohol + volatile.acidity + sulphates + total.sulfur.dioxide
##
## Df Sum of Sq RSS AIC
## + chlorides 1 8.037 675.85 -1365.0
## + pH 1 3.309 680.58 -1353.8
## + fixed.acidity 1 2.104 681.78 -1351.0
## + free.sulfur.dioxide 1 1.356 682.53 -1349.3
## <none> 683.89 -1348.1
## + residual.sugar 1 0.263 683.62 -1346.7
## + density 1 0.108 683.78 -1346.3
## + citric.acid 1 0.073 683.81 -1346.3
## - total.sulfur.dioxide 1 8.218 692.10 -1331.0
## - sulphates 1 21.536 705.42 -1300.5
## - volatile.acidity 1 66.047 749.93 -1202.7
## - alcohol 1 145.552 829.44 -1041.6
##
## Step: AIC=-1365
## quality ~ alcohol + volatile.acidity + sulphates + total.sulfur.dioxide +
## chlorides
##
## Df Sum of Sq RSS AIC
## + pH 1 5.919 669.93 -1377.1
## + fixed.acidity 1 2.407 673.44 -1368.7
## + free.sulfur.dioxide 1 1.240 674.61 -1365.9
## <none> 675.85 -1365.0
## + residual.sugar 1 0.553 675.30 -1364.3
## + citric.acid 1 0.162 675.69 -1363.4
## + density 1 0.153 675.70 -1363.4
## - chlorides 1 8.037 683.89 -1348.1
## - total.sulfur.dioxide 1 8.762 684.61 -1346.4
## - sulphates 1 29.201 705.05 -1299.4
## - volatile.acidity 1 58.869 734.72 -1233.5
## - alcohol 1 119.894 795.74 -1105.9
##
## Step: AIC=-1377.06
## quality ~ alcohol + volatile.acidity + sulphates + total.sulfur.dioxide +
## chlorides + pH
##
## Df Sum of Sq RSS AIC
## + free.sulfur.dioxide 1 2.394 667.54 -1380.8
## <none> 669.93 -1377.1
## + citric.acid 1 0.805 669.13 -1377.0
## + residual.sugar 1 0.284 669.65 -1375.7
## + density 1 0.045 669.89 -1375.2
## + fixed.acidity 1 0.010 669.92 -1375.1
## - pH 1 5.919 675.85 -1365.0
## - total.sulfur.dioxide 1 9.233 679.16 -1357.2
## - chlorides 1 10.647 680.58 -1353.8
## - sulphates 1 27.445 697.38 -1314.9
## - volatile.acidity 1 44.972 714.90 -1275.2
## - alcohol 1 125.812 795.74 -1103.9
##
## Step: AIC=-1380.79
## quality ~ alcohol + volatile.acidity + sulphates + total.sulfur.dioxide +
## chlorides + pH + free.sulfur.dioxide
##
## Df Sum of Sq RSS AIC
## <none> 667.54 -1380.8
## + citric.acid 1 0.475 667.06 -1379.9
## + residual.sugar 1 0.167 667.37 -1379.2
## + density 1 0.031 667.51 -1378.9
## + fixed.acidity 1 0.007 667.53 -1378.8
## - free.sulfur.dioxide 1 2.394 669.93 -1377.1
## - pH 1 7.073 674.61 -1365.9
## - total.sulfur.dioxide 1 10.787 678.32 -1357.2
## - chlorides 1 10.809 678.35 -1357.1
## - sulphates 1 27.060 694.60 -1319.2
## - volatile.acidity 1 42.318 709.85 -1284.5
## - alcohol 1 124.483 792.02 -1109.4
summary(RLM_Stepwise)
##
## Call:
## lm(formula = quality ~ alcohol + volatile.acidity + sulphates +
## total.sulfur.dioxide + chlorides + pH + free.sulfur.dioxide,
## data = data_vinos)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.68918 -0.36757 -0.04653 0.46081 2.02954
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 4.4300987 0.4029168 10.995 < 2e-16 ***
## alcohol 0.2893028 0.0167958 17.225 < 2e-16 ***
## volatile.acidity -1.0127527 0.1008429 -10.043 < 2e-16 ***
## sulphates 0.8826651 0.1099084 8.031 1.86e-15 ***
## total.sulfur.dioxide -0.0034822 0.0006868 -5.070 4.43e-07 ***
## chlorides -2.0178138 0.3975417 -5.076 4.31e-07 ***
## pH -0.4826614 0.1175581 -4.106 4.23e-05 ***
## free.sulfur.dioxide 0.0050774 0.0021255 2.389 0.017 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6477 on 1591 degrees of freedom
## Multiple R-squared: 0.3595, Adjusted R-squared: 0.3567
## F-statistic: 127.6 on 7 and 1591 DF, p-value: < 2.2e-16
AIC(RLM_Stepwise)
## [1] 3158.977
# Modelo propio
RLM_Propio <- lm(quality ~ alcohol + sulphates + citric.acid, data = data_vinos)
summary(RLM_Propio)
##
## Call:
## lm(formula = quality ~ alcohol + sulphates + citric.acid, data = data_vinos)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.7565 -0.3535 -0.1007 0.5067 2.2125
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.43392 0.17615 8.140 7.86e-16 ***
## alcohol 0.33841 0.01619 20.903 < 2e-16 ***
## sulphates 0.81403 0.10651 7.643 3.65e-14 ***
## citric.acid 0.51345 0.09284 5.531 3.72e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6842 on 1595 degrees of freedom
## Multiple R-squared: 0.2836, Adjusted R-squared: 0.2823
## F-statistic: 210.5 on 3 and 1595 DF, p-value: < 2.2e-16
AIC(RLM_Propio)
## [1] 3329.91
# Comparación de Modelos
AIC(RLM_Completo, RLM_Forward, RLM_Backward, RLM_Stepwise, RLM_Propio)
## df AIC
## RLM_Completo 13 3164.277
## RLM_Forward 9 3158.977
## RLM_Backward 9 3158.977
## RLM_Stepwise 9 3158.977
## RLM_Propio 5 3329.910