library(readr)
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.6.2
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(caret) # Para particiones
## Warning: package 'caret' was built under R version 3.6.2
## Loading required package: lattice
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 3.6.2
library(corrplot) # Para correlaciones visuales
## Warning: package 'corrplot' was built under R version 3.6.2
## corrplot 0.84 loaded
setwd("~/tabajos diplomado/modulo 5/Git/FundaMachineLearning/datos")
datos_red <- read.csv("winequality-red.csv")
head(datos_red)
## fixed.acidity volatile.acidity citric.acid residual.sugar chlorides
## 1 7.4 0.70 0.00 1.9 0.076
## 2 7.8 0.88 0.00 2.6 0.098
## 3 7.8 0.76 0.04 2.3 0.092
## 4 11.2 0.28 0.56 1.9 0.075
## 5 7.4 0.70 0.00 1.9 0.076
## 6 7.4 0.66 0.00 1.8 0.075
## free.sulfur.dioxide total.sulfur.dioxide density pH sulphates alcohol
## 1 11 34 0.9978 3.51 0.56 9.4
## 2 25 67 0.9968 3.20 0.68 9.8
## 3 15 54 0.9970 3.26 0.65 9.8
## 4 17 60 0.9980 3.16 0.58 9.8
## 5 11 34 0.9978 3.51 0.56 9.4
## 6 13 40 0.9978 3.51 0.56 9.4
## quality
## 1 5
## 2 5
## 3 5
## 4 6
## 5 5
## 6 5
tail(datos_red)
## fixed.acidity volatile.acidity citric.acid residual.sugar chlorides
## 1594 6.8 0.620 0.08 1.9 0.068
## 1595 6.2 0.600 0.08 2.0 0.090
## 1596 5.9 0.550 0.10 2.2 0.062
## 1597 6.3 0.510 0.13 2.3 0.076
## 1598 5.9 0.645 0.12 2.0 0.075
## 1599 6.0 0.310 0.47 3.6 0.067
## free.sulfur.dioxide total.sulfur.dioxide density pH sulphates alcohol
## 1594 28 38 0.99651 3.42 0.82 9.5
## 1595 32 44 0.99490 3.45 0.58 10.5
## 1596 39 51 0.99512 3.52 0.76 11.2
## 1597 29 40 0.99574 3.42 0.75 11.0
## 1598 32 44 0.99547 3.57 0.71 10.2
## 1599 18 42 0.99549 3.39 0.66 11.0
## quality
## 1594 6
## 1595 5
## 1596 6
## 1597 6
## 1598 5
## 1599 6
str(datos_red)
## 'data.frame': 1599 obs. of 12 variables:
## $ fixed.acidity : num 7.4 7.8 7.8 11.2 7.4 7.4 7.9 7.3 7.8 7.5 ...
## $ volatile.acidity : num 0.7 0.88 0.76 0.28 0.7 0.66 0.6 0.65 0.58 0.5 ...
## $ citric.acid : num 0 0 0.04 0.56 0 0 0.06 0 0.02 0.36 ...
## $ residual.sugar : num 1.9 2.6 2.3 1.9 1.9 1.8 1.6 1.2 2 6.1 ...
## $ chlorides : num 0.076 0.098 0.092 0.075 0.076 0.075 0.069 0.065 0.073 0.071 ...
## $ free.sulfur.dioxide : num 11 25 15 17 11 13 15 15 9 17 ...
## $ total.sulfur.dioxide: num 34 67 54 60 34 40 59 21 18 102 ...
## $ density : num 0.998 0.997 0.997 0.998 0.998 ...
## $ pH : num 3.51 3.2 3.26 3.16 3.51 3.51 3.3 3.39 3.36 3.35 ...
## $ sulphates : num 0.56 0.68 0.65 0.58 0.56 0.56 0.46 0.47 0.57 0.8 ...
## $ alcohol : num 9.4 9.8 9.8 9.8 9.4 9.4 9.4 10 9.5 10.5 ...
## $ quality : int 5 5 5 6 5 5 5 7 7 5 ...
summary(datos_red)
## fixed.acidity volatile.acidity citric.acid residual.sugar
## Min. : 4.60 Min. :0.1200 Min. :0.000 Min. : 0.900
## 1st Qu.: 7.10 1st Qu.:0.3900 1st Qu.:0.090 1st Qu.: 1.900
## Median : 7.90 Median :0.5200 Median :0.260 Median : 2.200
## Mean : 8.32 Mean :0.5278 Mean :0.271 Mean : 2.539
## 3rd Qu.: 9.20 3rd Qu.:0.6400 3rd Qu.:0.420 3rd Qu.: 2.600
## Max. :15.90 Max. :1.5800 Max. :1.000 Max. :15.500
## chlorides free.sulfur.dioxide total.sulfur.dioxide density
## Min. :0.01200 Min. : 1.00 Min. : 6.00 Min. :0.9901
## 1st Qu.:0.07000 1st Qu.: 7.00 1st Qu.: 22.00 1st Qu.:0.9956
## Median :0.07900 Median :14.00 Median : 38.00 Median :0.9968
## Mean :0.08747 Mean :15.87 Mean : 46.47 Mean :0.9967
## 3rd Qu.:0.09000 3rd Qu.:21.00 3rd Qu.: 62.00 3rd Qu.:0.9978
## Max. :0.61100 Max. :72.00 Max. :289.00 Max. :1.0037
## pH sulphates alcohol quality
## Min. :2.740 Min. :0.3300 Min. : 8.40 Min. :3.000
## 1st Qu.:3.210 1st Qu.:0.5500 1st Qu.: 9.50 1st Qu.:5.000
## Median :3.310 Median :0.6200 Median :10.20 Median :6.000
## Mean :3.311 Mean :0.6581 Mean :10.42 Mean :5.636
## 3rd Qu.:3.400 3rd Qu.:0.7300 3rd Qu.:11.10 3rd Qu.:6.000
## Max. :4.010 Max. :2.0000 Max. :14.90 Max. :8.000
correlacion_red <- cor(datos_red)
correlacion_red
## fixed.acidity volatile.acidity citric.acid residual.sugar
## fixed.acidity 1.00000000 -0.256130895 0.67170343 0.114776724
## volatile.acidity -0.25613089 1.000000000 -0.55249568 0.001917882
## citric.acid 0.67170343 -0.552495685 1.00000000 0.143577162
## residual.sugar 0.11477672 0.001917882 0.14357716 1.000000000
## chlorides 0.09370519 0.061297772 0.20382291 0.055609535
## free.sulfur.dioxide -0.15379419 -0.010503827 -0.06097813 0.187048995
## total.sulfur.dioxide -0.11318144 0.076470005 0.03553302 0.203027882
## density 0.66804729 0.022026232 0.36494718 0.355283371
## pH -0.68297819 0.234937294 -0.54190414 -0.085652422
## sulphates 0.18300566 -0.260986685 0.31277004 0.005527121
## alcohol -0.06166827 -0.202288027 0.10990325 0.042075437
## quality 0.12405165 -0.390557780 0.22637251 0.013731637
## chlorides free.sulfur.dioxide total.sulfur.dioxide
## fixed.acidity 0.093705186 -0.153794193 -0.11318144
## volatile.acidity 0.061297772 -0.010503827 0.07647000
## citric.acid 0.203822914 -0.060978129 0.03553302
## residual.sugar 0.055609535 0.187048995 0.20302788
## chlorides 1.000000000 0.005562147 0.04740047
## free.sulfur.dioxide 0.005562147 1.000000000 0.66766645
## total.sulfur.dioxide 0.047400468 0.667666450 1.00000000
## density 0.200632327 -0.021945831 0.07126948
## pH -0.265026131 0.070377499 -0.06649456
## sulphates 0.371260481 0.051657572 0.04294684
## alcohol -0.221140545 -0.069408354 -0.20565394
## quality -0.128906560 -0.050656057 -0.18510029
## density pH sulphates alcohol
## fixed.acidity 0.66804729 -0.68297819 0.183005664 -0.06166827
## volatile.acidity 0.02202623 0.23493729 -0.260986685 -0.20228803
## citric.acid 0.36494718 -0.54190414 0.312770044 0.10990325
## residual.sugar 0.35528337 -0.08565242 0.005527121 0.04207544
## chlorides 0.20063233 -0.26502613 0.371260481 -0.22114054
## free.sulfur.dioxide -0.02194583 0.07037750 0.051657572 -0.06940835
## total.sulfur.dioxide 0.07126948 -0.06649456 0.042946836 -0.20565394
## density 1.00000000 -0.34169933 0.148506412 -0.49617977
## pH -0.34169933 1.00000000 -0.196647602 0.20563251
## sulphates 0.14850641 -0.19664760 1.000000000 0.09359475
## alcohol -0.49617977 0.20563251 0.093594750 1.00000000
## quality -0.17491923 -0.05773139 0.251397079 0.47616632
## quality
## fixed.acidity 0.12405165
## volatile.acidity -0.39055778
## citric.acid 0.22637251
## residual.sugar 0.01373164
## chlorides -0.12890656
## free.sulfur.dioxide -0.05065606
## total.sulfur.dioxide -0.18510029
## density -0.17491923
## pH -0.05773139
## sulphates 0.25139708
## alcohol 0.47616632
## quality 1.00000000
corrplot(correlacion_red, method = "number")

set.seed(2020) # Semilla
entrena <- createDataPartition(datos_red$quality, p=0.7, list = FALSE)
head(entrena)
## Resample1
## [1,] 1
## [2,] 2
## [3,] 3
## [4,] 4
## [5,] 5
## [6,] 6
nrow(entrena)
## [1] 1120
head(datos_red[-entrena,])
## fixed.acidity volatile.acidity citric.acid residual.sugar chlorides
## 7 7.9 0.60 0.06 1.6 0.069
## 8 7.3 0.65 0.00 1.2 0.065
## 12 7.5 0.50 0.36 6.1 0.071
## 17 8.5 0.28 0.56 1.8 0.092
## 18 8.1 0.56 0.28 1.7 0.368
## 23 7.9 0.43 0.21 1.6 0.106
## free.sulfur.dioxide total.sulfur.dioxide density pH sulphates alcohol
## 7 15 59 0.9964 3.30 0.46 9.4
## 8 15 21 0.9946 3.39 0.47 10.0
## 12 17 102 0.9978 3.35 0.80 10.5
## 17 35 103 0.9969 3.30 0.75 10.5
## 18 16 56 0.9968 3.11 1.28 9.3
## 23 10 37 0.9966 3.17 0.91 9.5
## quality
## 7 5
## 8 7
## 12 5
## 17 7
## 18 5
## 23 5
nrow(datos_red[-entrena,])
## [1] 479
datos_red.entrena <- datos_red[entrena,]
head(datos_red.entrena)
## fixed.acidity volatile.acidity citric.acid residual.sugar chlorides
## 1 7.4 0.70 0.00 1.9 0.076
## 2 7.8 0.88 0.00 2.6 0.098
## 3 7.8 0.76 0.04 2.3 0.092
## 4 11.2 0.28 0.56 1.9 0.075
## 5 7.4 0.70 0.00 1.9 0.076
## 6 7.4 0.66 0.00 1.8 0.075
## free.sulfur.dioxide total.sulfur.dioxide density pH sulphates alcohol
## 1 11 34 0.9978 3.51 0.56 9.4
## 2 25 67 0.9968 3.20 0.68 9.8
## 3 15 54 0.9970 3.26 0.65 9.8
## 4 17 60 0.9980 3.16 0.58 9.8
## 5 11 34 0.9978 3.51 0.56 9.4
## 6 13 40 0.9978 3.51 0.56 9.4
## quality
## 1 5
## 2 5
## 3 5
## 4 6
## 5 5
## 6 5
tail(datos_red.entrena)
## fixed.acidity volatile.acidity citric.acid residual.sugar chlorides
## 1590 6.6 0.725 0.20 7.8 0.073
## 1592 5.4 0.740 0.09 1.7 0.089
## 1593 6.3 0.510 0.13 2.3 0.076
## 1594 6.8 0.620 0.08 1.9 0.068
## 1596 5.9 0.550 0.10 2.2 0.062
## 1598 5.9 0.645 0.12 2.0 0.075
## free.sulfur.dioxide total.sulfur.dioxide density pH sulphates alcohol
## 1590 29 79 0.99770 3.29 0.54 9.2
## 1592 16 26 0.99402 3.67 0.56 11.6
## 1593 29 40 0.99574 3.42 0.75 11.0
## 1594 28 38 0.99651 3.42 0.82 9.5
## 1596 39 51 0.99512 3.52 0.76 11.2
## 1598 32 44 0.99547 3.57 0.71 10.2
## quality
## 1590 5
## 1592 6
## 1593 6
## 1594 6
## 1596 6
## 1598 5
summary(datos_red.entrena)
## fixed.acidity volatile.acidity citric.acid residual.sugar
## Min. : 4.70 Min. :0.1200 Min. :0.0000 Min. : 0.900
## 1st Qu.: 7.10 1st Qu.:0.4000 1st Qu.:0.0975 1st Qu.: 1.900
## Median : 7.90 Median :0.5300 Median :0.2500 Median : 2.200
## Mean : 8.34 Mean :0.5326 Mean :0.2691 Mean : 2.554
## 3rd Qu.: 9.20 3rd Qu.:0.6400 3rd Qu.:0.4300 3rd Qu.: 2.600
## Max. :15.90 Max. :1.5800 Max. :0.7900 Max. :15.500
## chlorides free.sulfur.dioxide total.sulfur.dioxide density
## Min. :0.03400 Min. : 1.00 Min. : 6.00 Min. :0.9901
## 1st Qu.:0.07100 1st Qu.: 8.00 1st Qu.: 22.00 1st Qu.:0.9956
## Median :0.08000 Median :14.00 Median : 38.00 Median :0.9968
## Mean :0.08693 Mean :16.13 Mean : 46.82 Mean :0.9968
## 3rd Qu.:0.09025 3rd Qu.:22.00 3rd Qu.: 62.00 3rd Qu.:0.9979
## Max. :0.46700 Max. :72.00 Max. :289.00 Max. :1.0037
## pH sulphates alcohol quality
## Min. :2.860 Min. :0.3700 Min. : 8.4 Min. :3.000
## 1st Qu.:3.210 1st Qu.:0.5500 1st Qu.: 9.5 1st Qu.:5.000
## Median :3.310 Median :0.6200 Median :10.1 Median :6.000
## Mean :3.311 Mean :0.6588 Mean :10.4 Mean :5.635
## 3rd Qu.:3.400 3rd Qu.:0.7300 3rd Qu.:11.1 3rd Qu.:6.000
## Max. :4.010 Max. :1.9800 Max. :14.9 Max. :8.000
datos_red.valida <- datos_red[-entrena,]
head(datos_red.valida)
## fixed.acidity volatile.acidity citric.acid residual.sugar chlorides
## 7 7.9 0.60 0.06 1.6 0.069
## 8 7.3 0.65 0.00 1.2 0.065
## 12 7.5 0.50 0.36 6.1 0.071
## 17 8.5 0.28 0.56 1.8 0.092
## 18 8.1 0.56 0.28 1.7 0.368
## 23 7.9 0.43 0.21 1.6 0.106
## free.sulfur.dioxide total.sulfur.dioxide density pH sulphates alcohol
## 7 15 59 0.9964 3.30 0.46 9.4
## 8 15 21 0.9946 3.39 0.47 10.0
## 12 17 102 0.9978 3.35 0.80 10.5
## 17 35 103 0.9969 3.30 0.75 10.5
## 18 16 56 0.9968 3.11 1.28 9.3
## 23 10 37 0.9966 3.17 0.91 9.5
## quality
## 7 5
## 8 7
## 12 5
## 17 7
## 18 5
## 23 5
tail(datos_red.valida)
## fixed.acidity volatile.acidity citric.acid residual.sugar chlorides
## 1588 5.8 0.61 0.11 1.8 0.066
## 1589 7.2 0.66 0.33 2.5 0.068
## 1591 6.3 0.55 0.15 1.8 0.077
## 1595 6.2 0.60 0.08 2.0 0.090
## 1597 6.3 0.51 0.13 2.3 0.076
## 1599 6.0 0.31 0.47 3.6 0.067
## free.sulfur.dioxide total.sulfur.dioxide density pH sulphates alcohol
## 1588 18 28 0.99483 3.55 0.66 10.9
## 1589 34 102 0.99414 3.27 0.78 12.8
## 1591 26 35 0.99314 3.32 0.82 11.6
## 1595 32 44 0.99490 3.45 0.58 10.5
## 1597 29 40 0.99574 3.42 0.75 11.0
## 1599 18 42 0.99549 3.39 0.66 11.0
## quality
## 1588 6
## 1589 6
## 1591 6
## 1595 5
## 1597 6
## 1599 6
summary(datos_red.valida)
## fixed.acidity volatile.acidity citric.acid residual.sugar
## Min. : 4.600 Min. :0.1200 Min. :0.0000 Min. :1.200
## 1st Qu.: 7.100 1st Qu.:0.3900 1st Qu.:0.0900 1st Qu.:1.900
## Median : 7.900 Median :0.5000 Median :0.2800 Median :2.200
## Mean : 8.272 Mean :0.5166 Mean :0.2753 Mean :2.503
## 3rd Qu.: 9.200 3rd Qu.:0.6300 3rd Qu.:0.4200 3rd Qu.:2.600
## Max. :15.600 Max. :1.1850 Max. :1.0000 Max. :9.000
## chlorides free.sulfur.dioxide total.sulfur.dioxide density
## Min. :0.01200 Min. : 1.00 Min. : 6.00 Min. :0.9902
## 1st Qu.:0.06800 1st Qu.: 7.00 1st Qu.: 21.50 1st Qu.:0.9956
## Median :0.07800 Median :12.00 Median : 36.00 Median :0.9967
## Mean :0.08872 Mean :15.29 Mean : 45.64 Mean :0.9966
## 3rd Qu.:0.09000 3rd Qu.:21.00 3rd Qu.: 63.00 3rd Qu.:0.9978
## Max. :0.61100 Max. :68.00 Max. :155.00 Max. :1.0031
## pH sulphates alcohol quality
## Min. :2.740 Min. :0.3300 Min. : 9.00 Min. :3.000
## 1st Qu.:3.210 1st Qu.:0.5500 1st Qu.: 9.50 1st Qu.:5.000
## Median :3.310 Median :0.6200 Median :10.30 Median :6.000
## Mean :3.312 Mean :0.6566 Mean :10.47 Mean :5.639
## 3rd Qu.:3.410 3rd Qu.:0.7300 3rd Qu.:11.10 3rd Qu.:6.000
## Max. :3.900 Max. :2.0000 Max. :14.00 Max. :8.000
modelo_red <- lm(quality ~ ., datos_red.entrena)
modelo_red
##
## Call:
## lm(formula = quality ~ ., data = datos_red.entrena)
##
## Coefficients:
## (Intercept) fixed.acidity volatile.acidity
## 20.747534 0.008274 -0.922244
## citric.acid residual.sugar chlorides
## -0.019610 0.011741 -1.804597
## free.sulfur.dioxide total.sulfur.dioxide density
## 0.006650 -0.003765 -16.312818
## pH sulphates alcohol
## -0.573518 0.928338 0.294048
summary(modelo_red)
##
## Call:
## lm(formula = quality ~ ., data = datos_red.entrena)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.70643 -0.36046 -0.04914 0.45944 1.98343
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.075e+01 2.562e+01 0.810 0.418266
## fixed.acidity 8.274e-03 3.111e-02 0.266 0.790320
## volatile.acidity -9.222e-01 1.446e-01 -6.378 2.63e-10 ***
## citric.acid -1.961e-02 1.793e-01 -0.109 0.912913
## residual.sugar 1.174e-02 1.716e-02 0.684 0.493948
## chlorides -1.805e+00 5.417e-01 -3.331 0.000893 ***
## free.sulfur.dioxide 6.650e-03 2.652e-03 2.508 0.012300 *
## total.sulfur.dioxide -3.765e-03 8.681e-04 -4.337 1.57e-05 ***
## density -1.631e+01 2.613e+01 -0.624 0.532544
## pH -5.735e-01 2.276e-01 -2.520 0.011878 *
## sulphates 9.283e-01 1.360e-01 6.824 1.46e-11 ***
## alcohol 2.940e-01 3.192e-02 9.212 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6611 on 1108 degrees of freedom
## Multiple R-squared: 0.3558, Adjusted R-squared: 0.3494
## F-statistic: 55.64 on 11 and 1108 DF, p-value: < 2.2e-16
datos_red_nuevo <- select(datos_red, quality, volatile.acidity,chlorides,total.sulfur.dioxide,sulphates,alcohol)
correlacion_red <- cor(datos_red_nuevo)
correlacion_red
## quality volatile.acidity chlorides
## quality 1.0000000 -0.39055778 -0.12890656
## volatile.acidity -0.3905578 1.00000000 0.06129777
## chlorides -0.1289066 0.06129777 1.00000000
## total.sulfur.dioxide -0.1851003 0.07647000 0.04740047
## sulphates 0.2513971 -0.26098669 0.37126048
## alcohol 0.4761663 -0.20228803 -0.22114054
## total.sulfur.dioxide sulphates alcohol
## quality -0.18510029 0.25139708 0.47616632
## volatile.acidity 0.07647000 -0.26098669 -0.20228803
## chlorides 0.04740047 0.37126048 -0.22114054
## total.sulfur.dioxide 1.00000000 0.04294684 -0.20565394
## sulphates 0.04294684 1.00000000 0.09359475
## alcohol -0.20565394 0.09359475 1.00000000
corrplot(correlacion_red, method = "number")

set.seed(2020) # Semilla
entrena <- createDataPartition(datos_red_nuevo$quality, p=0.7, list = FALSE)
head(entrena)
## Resample1
## [1,] 1
## [2,] 2
## [3,] 3
## [4,] 4
## [5,] 5
## [6,] 6
nrow(entrena)
## [1] 1120
head(datos_red_nuevo[-entrena,])
## quality volatile.acidity chlorides total.sulfur.dioxide sulphates alcohol
## 7 5 0.60 0.069 59 0.46 9.4
## 8 7 0.65 0.065 21 0.47 10.0
## 12 5 0.50 0.071 102 0.80 10.5
## 17 7 0.28 0.092 103 0.75 10.5
## 18 5 0.56 0.368 56 1.28 9.3
## 23 5 0.43 0.106 37 0.91 9.5
nrow(datos_red_nuevo[-entrena,])
## [1] 479
datos_red_nuevo.entrena <- datos_red_nuevo[entrena,]
head(datos_red_nuevo.entrena)
## quality volatile.acidity chlorides total.sulfur.dioxide sulphates alcohol
## 1 5 0.70 0.076 34 0.56 9.4
## 2 5 0.88 0.098 67 0.68 9.8
## 3 5 0.76 0.092 54 0.65 9.8
## 4 6 0.28 0.075 60 0.58 9.8
## 5 5 0.70 0.076 34 0.56 9.4
## 6 5 0.66 0.075 40 0.56 9.4
tail(datos_red_nuevo.entrena)
## quality volatile.acidity chlorides total.sulfur.dioxide sulphates alcohol
## 1590 5 0.725 0.073 79 0.54 9.2
## 1592 6 0.740 0.089 26 0.56 11.6
## 1593 6 0.510 0.076 40 0.75 11.0
## 1594 6 0.620 0.068 38 0.82 9.5
## 1596 6 0.550 0.062 51 0.76 11.2
## 1598 5 0.645 0.075 44 0.71 10.2
summary(datos_red_nuevo.entrena)
## quality volatile.acidity chlorides total.sulfur.dioxide
## Min. :3.000 Min. :0.1200 Min. :0.03400 Min. : 6.00
## 1st Qu.:5.000 1st Qu.:0.4000 1st Qu.:0.07100 1st Qu.: 22.00
## Median :6.000 Median :0.5300 Median :0.08000 Median : 38.00
## Mean :5.635 Mean :0.5326 Mean :0.08693 Mean : 46.82
## 3rd Qu.:6.000 3rd Qu.:0.6400 3rd Qu.:0.09025 3rd Qu.: 62.00
## Max. :8.000 Max. :1.5800 Max. :0.46700 Max. :289.00
## sulphates alcohol
## Min. :0.3700 Min. : 8.4
## 1st Qu.:0.5500 1st Qu.: 9.5
## Median :0.6200 Median :10.1
## Mean :0.6588 Mean :10.4
## 3rd Qu.:0.7300 3rd Qu.:11.1
## Max. :1.9800 Max. :14.9
datos_red_nuevo.valida <- datos_red_nuevo[-entrena,]
head(datos_red_nuevo.valida)
## quality volatile.acidity chlorides total.sulfur.dioxide sulphates alcohol
## 7 5 0.60 0.069 59 0.46 9.4
## 8 7 0.65 0.065 21 0.47 10.0
## 12 5 0.50 0.071 102 0.80 10.5
## 17 7 0.28 0.092 103 0.75 10.5
## 18 5 0.56 0.368 56 1.28 9.3
## 23 5 0.43 0.106 37 0.91 9.5
tail(datos_red_nuevo.valida)
## quality volatile.acidity chlorides total.sulfur.dioxide sulphates alcohol
## 1588 6 0.61 0.066 28 0.66 10.9
## 1589 6 0.66 0.068 102 0.78 12.8
## 1591 6 0.55 0.077 35 0.82 11.6
## 1595 5 0.60 0.090 44 0.58 10.5
## 1597 6 0.51 0.076 40 0.75 11.0
## 1599 6 0.31 0.067 42 0.66 11.0
summary(datos_red_nuevo.valida)
## quality volatile.acidity chlorides total.sulfur.dioxide
## Min. :3.000 Min. :0.1200 Min. :0.01200 Min. : 6.00
## 1st Qu.:5.000 1st Qu.:0.3900 1st Qu.:0.06800 1st Qu.: 21.50
## Median :6.000 Median :0.5000 Median :0.07800 Median : 36.00
## Mean :5.639 Mean :0.5166 Mean :0.08872 Mean : 45.64
## 3rd Qu.:6.000 3rd Qu.:0.6300 3rd Qu.:0.09000 3rd Qu.: 63.00
## Max. :8.000 Max. :1.1850 Max. :0.61100 Max. :155.00
## sulphates alcohol
## Min. :0.3300 Min. : 9.00
## 1st Qu.:0.5500 1st Qu.: 9.50
## Median :0.6200 Median :10.30
## Mean :0.6566 Mean :10.47
## 3rd Qu.:0.7300 3rd Qu.:11.10
## Max. :2.0000 Max. :14.00
modelo_red_nuevo <- lm(quality ~ ., datos_red_nuevo.entrena)
modelo_red_nuevo
##
## Call:
## lm(formula = quality ~ ., data = datos_red_nuevo.entrena)
##
## Coefficients:
## (Intercept) volatile.acidity chlorides
## 2.76400 -1.08935 -1.43685
## total.sulfur.dioxide sulphates alcohol
## -0.00213 0.94817 0.29334
summary(modelo_red_nuevo)
##
## Call:
## lm(formula = quality ~ ., data = datos_red_nuevo.entrena)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.69357 -0.37628 -0.05968 0.46112 2.08671
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.7639974 0.2488583 11.107 < 2e-16 ***
## volatile.acidity -1.0893474 0.1164057 -9.358 < 2e-16 ***
## chlorides -1.4368514 0.5168694 -2.780 0.005529 **
## total.sulfur.dioxide -0.0021300 0.0006106 -3.488 0.000505 ***
## sulphates 0.9481661 0.1315576 7.207 1.05e-12 ***
## alcohol 0.2933379 0.0202158 14.510 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6659 on 1114 degrees of freedom
## Multiple R-squared: 0.3428, Adjusted R-squared: 0.3398
## F-statistic: 116.2 on 5 and 1114 DF, p-value: < 2.2e-16
summary(modelo_red)
##
## Call:
## lm(formula = quality ~ ., data = datos_red.entrena)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.70643 -0.36046 -0.04914 0.45944 1.98343
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.075e+01 2.562e+01 0.810 0.418266
## fixed.acidity 8.274e-03 3.111e-02 0.266 0.790320
## volatile.acidity -9.222e-01 1.446e-01 -6.378 2.63e-10 ***
## citric.acid -1.961e-02 1.793e-01 -0.109 0.912913
## residual.sugar 1.174e-02 1.716e-02 0.684 0.493948
## chlorides -1.805e+00 5.417e-01 -3.331 0.000893 ***
## free.sulfur.dioxide 6.650e-03 2.652e-03 2.508 0.012300 *
## total.sulfur.dioxide -3.765e-03 8.681e-04 -4.337 1.57e-05 ***
## density -1.631e+01 2.613e+01 -0.624 0.532544
## pH -5.735e-01 2.276e-01 -2.520 0.011878 *
## sulphates 9.283e-01 1.360e-01 6.824 1.46e-11 ***
## alcohol 2.940e-01 3.192e-02 9.212 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6611 on 1108 degrees of freedom
## Multiple R-squared: 0.3558, Adjusted R-squared: 0.3494
## F-statistic: 55.64 on 11 and 1108 DF, p-value: < 2.2e-16
predecir <- predict(modelo_red, newdata = datos_red.valida )
datos_red.valida[1,]
## fixed.acidity volatile.acidity citric.acid residual.sugar chlorides
## 7 7.9 0.6 0.06 1.6 0.069
## free.sulfur.dioxide total.sulfur.dioxide density pH sulphates alcohol
## 7 15 59 0.9964 3.3 0.46 9.4
## quality
## 7 5
datos_red.valida[nrow(datos_red.valida),]
## fixed.acidity volatile.acidity citric.acid residual.sugar chlorides
## 1599 6 0.31 0.47 3.6 0.067
## free.sulfur.dioxide total.sulfur.dioxide density pH sulphates alcohol
## 1599 18 42 0.99549 3.39 0.66 11
## quality
## 1599 6
predecir[1]
## 7
## 5.074639
predecir[nrow(datos_red.valida)]
## 1599
## 6.048752
fixed.acidity <- 7.0
volatile.acidity <- .68
citric.acid <- .16
residual.sugar <- 5
chlorides <- .055
free.sulfur.dioxide <- 15
total.sulfur.dioxide <- 60
density <- .95
pH <- 4
sulphates <- .6
alcohol <- 8.5
nuevo.dato <- data.frame(fixed.acidity, volatile.acidity, citric.acid, residual.sugar, chlorides, free.sulfur.dioxide,total.sulfur.dioxide,density,pH,sulphates,alcohol)
nuevo.dato
## fixed.acidity volatile.acidity citric.acid residual.sugar chlorides
## 1 7 0.68 0.16 5 0.055
## free.sulfur.dioxide total.sulfur.dioxide density pH sulphates alcohol
## 1 15 60 0.95 4 0.6 8.5
predecir <- predict(modelo_red, newdata = nuevo.dato)
predecir
## 1
## 5.273644