library(readr)
library(dplyr)
## Warning: package 'dplyr' was built under R version 3.6.2
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(caret) # Para particiones
## Warning: package 'caret' was built under R version 3.6.2
## Loading required package: lattice
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 3.6.2
library(corrplot) # Para correlaciones visuales
## Warning: package 'corrplot' was built under R version 3.6.2
## corrplot 0.84 loaded
setwd("~/tabajos diplomado/modulo 5/Git/FundaMachineLearning/datos")
datos_red <- read.csv("winequality-red.csv")

head(datos_red)
##   fixed.acidity volatile.acidity citric.acid residual.sugar chlorides
## 1           7.4             0.70        0.00            1.9     0.076
## 2           7.8             0.88        0.00            2.6     0.098
## 3           7.8             0.76        0.04            2.3     0.092
## 4          11.2             0.28        0.56            1.9     0.075
## 5           7.4             0.70        0.00            1.9     0.076
## 6           7.4             0.66        0.00            1.8     0.075
##   free.sulfur.dioxide total.sulfur.dioxide density   pH sulphates alcohol
## 1                  11                   34  0.9978 3.51      0.56     9.4
## 2                  25                   67  0.9968 3.20      0.68     9.8
## 3                  15                   54  0.9970 3.26      0.65     9.8
## 4                  17                   60  0.9980 3.16      0.58     9.8
## 5                  11                   34  0.9978 3.51      0.56     9.4
## 6                  13                   40  0.9978 3.51      0.56     9.4
##   quality
## 1       5
## 2       5
## 3       5
## 4       6
## 5       5
## 6       5
tail(datos_red)
##      fixed.acidity volatile.acidity citric.acid residual.sugar chlorides
## 1594           6.8            0.620        0.08            1.9     0.068
## 1595           6.2            0.600        0.08            2.0     0.090
## 1596           5.9            0.550        0.10            2.2     0.062
## 1597           6.3            0.510        0.13            2.3     0.076
## 1598           5.9            0.645        0.12            2.0     0.075
## 1599           6.0            0.310        0.47            3.6     0.067
##      free.sulfur.dioxide total.sulfur.dioxide density   pH sulphates alcohol
## 1594                  28                   38 0.99651 3.42      0.82     9.5
## 1595                  32                   44 0.99490 3.45      0.58    10.5
## 1596                  39                   51 0.99512 3.52      0.76    11.2
## 1597                  29                   40 0.99574 3.42      0.75    11.0
## 1598                  32                   44 0.99547 3.57      0.71    10.2
## 1599                  18                   42 0.99549 3.39      0.66    11.0
##      quality
## 1594       6
## 1595       5
## 1596       6
## 1597       6
## 1598       5
## 1599       6
str(datos_red)
## 'data.frame':    1599 obs. of  12 variables:
##  $ fixed.acidity       : num  7.4 7.8 7.8 11.2 7.4 7.4 7.9 7.3 7.8 7.5 ...
##  $ volatile.acidity    : num  0.7 0.88 0.76 0.28 0.7 0.66 0.6 0.65 0.58 0.5 ...
##  $ citric.acid         : num  0 0 0.04 0.56 0 0 0.06 0 0.02 0.36 ...
##  $ residual.sugar      : num  1.9 2.6 2.3 1.9 1.9 1.8 1.6 1.2 2 6.1 ...
##  $ chlorides           : num  0.076 0.098 0.092 0.075 0.076 0.075 0.069 0.065 0.073 0.071 ...
##  $ free.sulfur.dioxide : num  11 25 15 17 11 13 15 15 9 17 ...
##  $ total.sulfur.dioxide: num  34 67 54 60 34 40 59 21 18 102 ...
##  $ density             : num  0.998 0.997 0.997 0.998 0.998 ...
##  $ pH                  : num  3.51 3.2 3.26 3.16 3.51 3.51 3.3 3.39 3.36 3.35 ...
##  $ sulphates           : num  0.56 0.68 0.65 0.58 0.56 0.56 0.46 0.47 0.57 0.8 ...
##  $ alcohol             : num  9.4 9.8 9.8 9.8 9.4 9.4 9.4 10 9.5 10.5 ...
##  $ quality             : int  5 5 5 6 5 5 5 7 7 5 ...
summary(datos_red)
##  fixed.acidity   volatile.acidity  citric.acid    residual.sugar  
##  Min.   : 4.60   Min.   :0.1200   Min.   :0.000   Min.   : 0.900  
##  1st Qu.: 7.10   1st Qu.:0.3900   1st Qu.:0.090   1st Qu.: 1.900  
##  Median : 7.90   Median :0.5200   Median :0.260   Median : 2.200  
##  Mean   : 8.32   Mean   :0.5278   Mean   :0.271   Mean   : 2.539  
##  3rd Qu.: 9.20   3rd Qu.:0.6400   3rd Qu.:0.420   3rd Qu.: 2.600  
##  Max.   :15.90   Max.   :1.5800   Max.   :1.000   Max.   :15.500  
##    chlorides       free.sulfur.dioxide total.sulfur.dioxide    density      
##  Min.   :0.01200   Min.   : 1.00       Min.   :  6.00       Min.   :0.9901  
##  1st Qu.:0.07000   1st Qu.: 7.00       1st Qu.: 22.00       1st Qu.:0.9956  
##  Median :0.07900   Median :14.00       Median : 38.00       Median :0.9968  
##  Mean   :0.08747   Mean   :15.87       Mean   : 46.47       Mean   :0.9967  
##  3rd Qu.:0.09000   3rd Qu.:21.00       3rd Qu.: 62.00       3rd Qu.:0.9978  
##  Max.   :0.61100   Max.   :72.00       Max.   :289.00       Max.   :1.0037  
##        pH          sulphates         alcohol         quality     
##  Min.   :2.740   Min.   :0.3300   Min.   : 8.40   Min.   :3.000  
##  1st Qu.:3.210   1st Qu.:0.5500   1st Qu.: 9.50   1st Qu.:5.000  
##  Median :3.310   Median :0.6200   Median :10.20   Median :6.000  
##  Mean   :3.311   Mean   :0.6581   Mean   :10.42   Mean   :5.636  
##  3rd Qu.:3.400   3rd Qu.:0.7300   3rd Qu.:11.10   3rd Qu.:6.000  
##  Max.   :4.010   Max.   :2.0000   Max.   :14.90   Max.   :8.000
correlacion_red <- cor(datos_red)

correlacion_red
##                      fixed.acidity volatile.acidity citric.acid residual.sugar
## fixed.acidity           1.00000000     -0.256130895  0.67170343    0.114776724
## volatile.acidity       -0.25613089      1.000000000 -0.55249568    0.001917882
## citric.acid             0.67170343     -0.552495685  1.00000000    0.143577162
## residual.sugar          0.11477672      0.001917882  0.14357716    1.000000000
## chlorides               0.09370519      0.061297772  0.20382291    0.055609535
## free.sulfur.dioxide    -0.15379419     -0.010503827 -0.06097813    0.187048995
## total.sulfur.dioxide   -0.11318144      0.076470005  0.03553302    0.203027882
## density                 0.66804729      0.022026232  0.36494718    0.355283371
## pH                     -0.68297819      0.234937294 -0.54190414   -0.085652422
## sulphates               0.18300566     -0.260986685  0.31277004    0.005527121
## alcohol                -0.06166827     -0.202288027  0.10990325    0.042075437
## quality                 0.12405165     -0.390557780  0.22637251    0.013731637
##                         chlorides free.sulfur.dioxide total.sulfur.dioxide
## fixed.acidity         0.093705186        -0.153794193          -0.11318144
## volatile.acidity      0.061297772        -0.010503827           0.07647000
## citric.acid           0.203822914        -0.060978129           0.03553302
## residual.sugar        0.055609535         0.187048995           0.20302788
## chlorides             1.000000000         0.005562147           0.04740047
## free.sulfur.dioxide   0.005562147         1.000000000           0.66766645
## total.sulfur.dioxide  0.047400468         0.667666450           1.00000000
## density               0.200632327        -0.021945831           0.07126948
## pH                   -0.265026131         0.070377499          -0.06649456
## sulphates             0.371260481         0.051657572           0.04294684
## alcohol              -0.221140545        -0.069408354          -0.20565394
## quality              -0.128906560        -0.050656057          -0.18510029
##                          density          pH    sulphates     alcohol
## fixed.acidity         0.66804729 -0.68297819  0.183005664 -0.06166827
## volatile.acidity      0.02202623  0.23493729 -0.260986685 -0.20228803
## citric.acid           0.36494718 -0.54190414  0.312770044  0.10990325
## residual.sugar        0.35528337 -0.08565242  0.005527121  0.04207544
## chlorides             0.20063233 -0.26502613  0.371260481 -0.22114054
## free.sulfur.dioxide  -0.02194583  0.07037750  0.051657572 -0.06940835
## total.sulfur.dioxide  0.07126948 -0.06649456  0.042946836 -0.20565394
## density               1.00000000 -0.34169933  0.148506412 -0.49617977
## pH                   -0.34169933  1.00000000 -0.196647602  0.20563251
## sulphates             0.14850641 -0.19664760  1.000000000  0.09359475
## alcohol              -0.49617977  0.20563251  0.093594750  1.00000000
## quality              -0.17491923 -0.05773139  0.251397079  0.47616632
##                          quality
## fixed.acidity         0.12405165
## volatile.acidity     -0.39055778
## citric.acid           0.22637251
## residual.sugar        0.01373164
## chlorides            -0.12890656
## free.sulfur.dioxide  -0.05065606
## total.sulfur.dioxide -0.18510029
## density              -0.17491923
## pH                   -0.05773139
## sulphates             0.25139708
## alcohol               0.47616632
## quality               1.00000000
corrplot(correlacion_red, method = "number")

set.seed(2020) # Semilla
entrena <- createDataPartition(datos_red$quality, p=0.7, list = FALSE)
head(entrena)
##      Resample1
## [1,]         1
## [2,]         2
## [3,]         3
## [4,]         4
## [5,]         5
## [6,]         6
nrow(entrena)
## [1] 1120
head(datos_red[-entrena,])
##    fixed.acidity volatile.acidity citric.acid residual.sugar chlorides
## 7            7.9             0.60        0.06            1.6     0.069
## 8            7.3             0.65        0.00            1.2     0.065
## 12           7.5             0.50        0.36            6.1     0.071
## 17           8.5             0.28        0.56            1.8     0.092
## 18           8.1             0.56        0.28            1.7     0.368
## 23           7.9             0.43        0.21            1.6     0.106
##    free.sulfur.dioxide total.sulfur.dioxide density   pH sulphates alcohol
## 7                   15                   59  0.9964 3.30      0.46     9.4
## 8                   15                   21  0.9946 3.39      0.47    10.0
## 12                  17                  102  0.9978 3.35      0.80    10.5
## 17                  35                  103  0.9969 3.30      0.75    10.5
## 18                  16                   56  0.9968 3.11      1.28     9.3
## 23                  10                   37  0.9966 3.17      0.91     9.5
##    quality
## 7        5
## 8        7
## 12       5
## 17       7
## 18       5
## 23       5
nrow(datos_red[-entrena,])
## [1] 479
datos_red.entrena <- datos_red[entrena,]

head(datos_red.entrena)
##   fixed.acidity volatile.acidity citric.acid residual.sugar chlorides
## 1           7.4             0.70        0.00            1.9     0.076
## 2           7.8             0.88        0.00            2.6     0.098
## 3           7.8             0.76        0.04            2.3     0.092
## 4          11.2             0.28        0.56            1.9     0.075
## 5           7.4             0.70        0.00            1.9     0.076
## 6           7.4             0.66        0.00            1.8     0.075
##   free.sulfur.dioxide total.sulfur.dioxide density   pH sulphates alcohol
## 1                  11                   34  0.9978 3.51      0.56     9.4
## 2                  25                   67  0.9968 3.20      0.68     9.8
## 3                  15                   54  0.9970 3.26      0.65     9.8
## 4                  17                   60  0.9980 3.16      0.58     9.8
## 5                  11                   34  0.9978 3.51      0.56     9.4
## 6                  13                   40  0.9978 3.51      0.56     9.4
##   quality
## 1       5
## 2       5
## 3       5
## 4       6
## 5       5
## 6       5
tail(datos_red.entrena)
##      fixed.acidity volatile.acidity citric.acid residual.sugar chlorides
## 1590           6.6            0.725        0.20            7.8     0.073
## 1592           5.4            0.740        0.09            1.7     0.089
## 1593           6.3            0.510        0.13            2.3     0.076
## 1594           6.8            0.620        0.08            1.9     0.068
## 1596           5.9            0.550        0.10            2.2     0.062
## 1598           5.9            0.645        0.12            2.0     0.075
##      free.sulfur.dioxide total.sulfur.dioxide density   pH sulphates alcohol
## 1590                  29                   79 0.99770 3.29      0.54     9.2
## 1592                  16                   26 0.99402 3.67      0.56    11.6
## 1593                  29                   40 0.99574 3.42      0.75    11.0
## 1594                  28                   38 0.99651 3.42      0.82     9.5
## 1596                  39                   51 0.99512 3.52      0.76    11.2
## 1598                  32                   44 0.99547 3.57      0.71    10.2
##      quality
## 1590       5
## 1592       6
## 1593       6
## 1594       6
## 1596       6
## 1598       5
summary(datos_red.entrena)
##  fixed.acidity   volatile.acidity  citric.acid     residual.sugar  
##  Min.   : 4.70   Min.   :0.1200   Min.   :0.0000   Min.   : 0.900  
##  1st Qu.: 7.10   1st Qu.:0.4000   1st Qu.:0.0975   1st Qu.: 1.900  
##  Median : 7.90   Median :0.5300   Median :0.2500   Median : 2.200  
##  Mean   : 8.34   Mean   :0.5326   Mean   :0.2691   Mean   : 2.554  
##  3rd Qu.: 9.20   3rd Qu.:0.6400   3rd Qu.:0.4300   3rd Qu.: 2.600  
##  Max.   :15.90   Max.   :1.5800   Max.   :0.7900   Max.   :15.500  
##    chlorides       free.sulfur.dioxide total.sulfur.dioxide    density      
##  Min.   :0.03400   Min.   : 1.00       Min.   :  6.00       Min.   :0.9901  
##  1st Qu.:0.07100   1st Qu.: 8.00       1st Qu.: 22.00       1st Qu.:0.9956  
##  Median :0.08000   Median :14.00       Median : 38.00       Median :0.9968  
##  Mean   :0.08693   Mean   :16.13       Mean   : 46.82       Mean   :0.9968  
##  3rd Qu.:0.09025   3rd Qu.:22.00       3rd Qu.: 62.00       3rd Qu.:0.9979  
##  Max.   :0.46700   Max.   :72.00       Max.   :289.00       Max.   :1.0037  
##        pH          sulphates         alcohol        quality     
##  Min.   :2.860   Min.   :0.3700   Min.   : 8.4   Min.   :3.000  
##  1st Qu.:3.210   1st Qu.:0.5500   1st Qu.: 9.5   1st Qu.:5.000  
##  Median :3.310   Median :0.6200   Median :10.1   Median :6.000  
##  Mean   :3.311   Mean   :0.6588   Mean   :10.4   Mean   :5.635  
##  3rd Qu.:3.400   3rd Qu.:0.7300   3rd Qu.:11.1   3rd Qu.:6.000  
##  Max.   :4.010   Max.   :1.9800   Max.   :14.9   Max.   :8.000
datos_red.valida <- datos_red[-entrena,]

head(datos_red.valida)
##    fixed.acidity volatile.acidity citric.acid residual.sugar chlorides
## 7            7.9             0.60        0.06            1.6     0.069
## 8            7.3             0.65        0.00            1.2     0.065
## 12           7.5             0.50        0.36            6.1     0.071
## 17           8.5             0.28        0.56            1.8     0.092
## 18           8.1             0.56        0.28            1.7     0.368
## 23           7.9             0.43        0.21            1.6     0.106
##    free.sulfur.dioxide total.sulfur.dioxide density   pH sulphates alcohol
## 7                   15                   59  0.9964 3.30      0.46     9.4
## 8                   15                   21  0.9946 3.39      0.47    10.0
## 12                  17                  102  0.9978 3.35      0.80    10.5
## 17                  35                  103  0.9969 3.30      0.75    10.5
## 18                  16                   56  0.9968 3.11      1.28     9.3
## 23                  10                   37  0.9966 3.17      0.91     9.5
##    quality
## 7        5
## 8        7
## 12       5
## 17       7
## 18       5
## 23       5
tail(datos_red.valida)
##      fixed.acidity volatile.acidity citric.acid residual.sugar chlorides
## 1588           5.8             0.61        0.11            1.8     0.066
## 1589           7.2             0.66        0.33            2.5     0.068
## 1591           6.3             0.55        0.15            1.8     0.077
## 1595           6.2             0.60        0.08            2.0     0.090
## 1597           6.3             0.51        0.13            2.3     0.076
## 1599           6.0             0.31        0.47            3.6     0.067
##      free.sulfur.dioxide total.sulfur.dioxide density   pH sulphates alcohol
## 1588                  18                   28 0.99483 3.55      0.66    10.9
## 1589                  34                  102 0.99414 3.27      0.78    12.8
## 1591                  26                   35 0.99314 3.32      0.82    11.6
## 1595                  32                   44 0.99490 3.45      0.58    10.5
## 1597                  29                   40 0.99574 3.42      0.75    11.0
## 1599                  18                   42 0.99549 3.39      0.66    11.0
##      quality
## 1588       6
## 1589       6
## 1591       6
## 1595       5
## 1597       6
## 1599       6
summary(datos_red.valida)
##  fixed.acidity    volatile.acidity  citric.acid     residual.sugar 
##  Min.   : 4.600   Min.   :0.1200   Min.   :0.0000   Min.   :1.200  
##  1st Qu.: 7.100   1st Qu.:0.3900   1st Qu.:0.0900   1st Qu.:1.900  
##  Median : 7.900   Median :0.5000   Median :0.2800   Median :2.200  
##  Mean   : 8.272   Mean   :0.5166   Mean   :0.2753   Mean   :2.503  
##  3rd Qu.: 9.200   3rd Qu.:0.6300   3rd Qu.:0.4200   3rd Qu.:2.600  
##  Max.   :15.600   Max.   :1.1850   Max.   :1.0000   Max.   :9.000  
##    chlorides       free.sulfur.dioxide total.sulfur.dioxide    density      
##  Min.   :0.01200   Min.   : 1.00       Min.   :  6.00       Min.   :0.9902  
##  1st Qu.:0.06800   1st Qu.: 7.00       1st Qu.: 21.50       1st Qu.:0.9956  
##  Median :0.07800   Median :12.00       Median : 36.00       Median :0.9967  
##  Mean   :0.08872   Mean   :15.29       Mean   : 45.64       Mean   :0.9966  
##  3rd Qu.:0.09000   3rd Qu.:21.00       3rd Qu.: 63.00       3rd Qu.:0.9978  
##  Max.   :0.61100   Max.   :68.00       Max.   :155.00       Max.   :1.0031  
##        pH          sulphates         alcohol         quality     
##  Min.   :2.740   Min.   :0.3300   Min.   : 9.00   Min.   :3.000  
##  1st Qu.:3.210   1st Qu.:0.5500   1st Qu.: 9.50   1st Qu.:5.000  
##  Median :3.310   Median :0.6200   Median :10.30   Median :6.000  
##  Mean   :3.312   Mean   :0.6566   Mean   :10.47   Mean   :5.639  
##  3rd Qu.:3.410   3rd Qu.:0.7300   3rd Qu.:11.10   3rd Qu.:6.000  
##  Max.   :3.900   Max.   :2.0000   Max.   :14.00   Max.   :8.000
modelo_red <- lm(quality ~ ., datos_red.entrena)
modelo_red
## 
## Call:
## lm(formula = quality ~ ., data = datos_red.entrena)
## 
## Coefficients:
##          (Intercept)         fixed.acidity      volatile.acidity  
##            20.747534              0.008274             -0.922244  
##          citric.acid        residual.sugar             chlorides  
##            -0.019610              0.011741             -1.804597  
##  free.sulfur.dioxide  total.sulfur.dioxide               density  
##             0.006650             -0.003765            -16.312818  
##                   pH             sulphates               alcohol  
##            -0.573518              0.928338              0.294048
summary(modelo_red)
## 
## Call:
## lm(formula = quality ~ ., data = datos_red.entrena)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.70643 -0.36046 -0.04914  0.45944  1.98343 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           2.075e+01  2.562e+01   0.810 0.418266    
## fixed.acidity         8.274e-03  3.111e-02   0.266 0.790320    
## volatile.acidity     -9.222e-01  1.446e-01  -6.378 2.63e-10 ***
## citric.acid          -1.961e-02  1.793e-01  -0.109 0.912913    
## residual.sugar        1.174e-02  1.716e-02   0.684 0.493948    
## chlorides            -1.805e+00  5.417e-01  -3.331 0.000893 ***
## free.sulfur.dioxide   6.650e-03  2.652e-03   2.508 0.012300 *  
## total.sulfur.dioxide -3.765e-03  8.681e-04  -4.337 1.57e-05 ***
## density              -1.631e+01  2.613e+01  -0.624 0.532544    
## pH                   -5.735e-01  2.276e-01  -2.520 0.011878 *  
## sulphates             9.283e-01  1.360e-01   6.824 1.46e-11 ***
## alcohol               2.940e-01  3.192e-02   9.212  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6611 on 1108 degrees of freedom
## Multiple R-squared:  0.3558, Adjusted R-squared:  0.3494 
## F-statistic: 55.64 on 11 and 1108 DF,  p-value: < 2.2e-16
datos_red_nuevo <- select(datos_red, quality, volatile.acidity,chlorides,total.sulfur.dioxide,sulphates,alcohol)


correlacion_red <- cor(datos_red_nuevo)

correlacion_red
##                         quality volatile.acidity   chlorides
## quality               1.0000000      -0.39055778 -0.12890656
## volatile.acidity     -0.3905578       1.00000000  0.06129777
## chlorides            -0.1289066       0.06129777  1.00000000
## total.sulfur.dioxide -0.1851003       0.07647000  0.04740047
## sulphates             0.2513971      -0.26098669  0.37126048
## alcohol               0.4761663      -0.20228803 -0.22114054
##                      total.sulfur.dioxide   sulphates     alcohol
## quality                       -0.18510029  0.25139708  0.47616632
## volatile.acidity               0.07647000 -0.26098669 -0.20228803
## chlorides                      0.04740047  0.37126048 -0.22114054
## total.sulfur.dioxide           1.00000000  0.04294684 -0.20565394
## sulphates                      0.04294684  1.00000000  0.09359475
## alcohol                       -0.20565394  0.09359475  1.00000000
corrplot(correlacion_red, method = "number")

set.seed(2020) # Semilla
entrena <- createDataPartition(datos_red_nuevo$quality, p=0.7, list = FALSE)
head(entrena)
##      Resample1
## [1,]         1
## [2,]         2
## [3,]         3
## [4,]         4
## [5,]         5
## [6,]         6
nrow(entrena)
## [1] 1120
head(datos_red_nuevo[-entrena,])
##    quality volatile.acidity chlorides total.sulfur.dioxide sulphates alcohol
## 7        5             0.60     0.069                   59      0.46     9.4
## 8        7             0.65     0.065                   21      0.47    10.0
## 12       5             0.50     0.071                  102      0.80    10.5
## 17       7             0.28     0.092                  103      0.75    10.5
## 18       5             0.56     0.368                   56      1.28     9.3
## 23       5             0.43     0.106                   37      0.91     9.5
nrow(datos_red_nuevo[-entrena,])
## [1] 479
datos_red_nuevo.entrena <- datos_red_nuevo[entrena,]

head(datos_red_nuevo.entrena)
##   quality volatile.acidity chlorides total.sulfur.dioxide sulphates alcohol
## 1       5             0.70     0.076                   34      0.56     9.4
## 2       5             0.88     0.098                   67      0.68     9.8
## 3       5             0.76     0.092                   54      0.65     9.8
## 4       6             0.28     0.075                   60      0.58     9.8
## 5       5             0.70     0.076                   34      0.56     9.4
## 6       5             0.66     0.075                   40      0.56     9.4
tail(datos_red_nuevo.entrena)
##      quality volatile.acidity chlorides total.sulfur.dioxide sulphates alcohol
## 1590       5            0.725     0.073                   79      0.54     9.2
## 1592       6            0.740     0.089                   26      0.56    11.6
## 1593       6            0.510     0.076                   40      0.75    11.0
## 1594       6            0.620     0.068                   38      0.82     9.5
## 1596       6            0.550     0.062                   51      0.76    11.2
## 1598       5            0.645     0.075                   44      0.71    10.2
summary(datos_red_nuevo.entrena)
##     quality      volatile.acidity   chlorides       total.sulfur.dioxide
##  Min.   :3.000   Min.   :0.1200   Min.   :0.03400   Min.   :  6.00      
##  1st Qu.:5.000   1st Qu.:0.4000   1st Qu.:0.07100   1st Qu.: 22.00      
##  Median :6.000   Median :0.5300   Median :0.08000   Median : 38.00      
##  Mean   :5.635   Mean   :0.5326   Mean   :0.08693   Mean   : 46.82      
##  3rd Qu.:6.000   3rd Qu.:0.6400   3rd Qu.:0.09025   3rd Qu.: 62.00      
##  Max.   :8.000   Max.   :1.5800   Max.   :0.46700   Max.   :289.00      
##    sulphates         alcohol    
##  Min.   :0.3700   Min.   : 8.4  
##  1st Qu.:0.5500   1st Qu.: 9.5  
##  Median :0.6200   Median :10.1  
##  Mean   :0.6588   Mean   :10.4  
##  3rd Qu.:0.7300   3rd Qu.:11.1  
##  Max.   :1.9800   Max.   :14.9
datos_red_nuevo.valida <- datos_red_nuevo[-entrena,]

head(datos_red_nuevo.valida)
##    quality volatile.acidity chlorides total.sulfur.dioxide sulphates alcohol
## 7        5             0.60     0.069                   59      0.46     9.4
## 8        7             0.65     0.065                   21      0.47    10.0
## 12       5             0.50     0.071                  102      0.80    10.5
## 17       7             0.28     0.092                  103      0.75    10.5
## 18       5             0.56     0.368                   56      1.28     9.3
## 23       5             0.43     0.106                   37      0.91     9.5
tail(datos_red_nuevo.valida)
##      quality volatile.acidity chlorides total.sulfur.dioxide sulphates alcohol
## 1588       6             0.61     0.066                   28      0.66    10.9
## 1589       6             0.66     0.068                  102      0.78    12.8
## 1591       6             0.55     0.077                   35      0.82    11.6
## 1595       5             0.60     0.090                   44      0.58    10.5
## 1597       6             0.51     0.076                   40      0.75    11.0
## 1599       6             0.31     0.067                   42      0.66    11.0
summary(datos_red_nuevo.valida)
##     quality      volatile.acidity   chlorides       total.sulfur.dioxide
##  Min.   :3.000   Min.   :0.1200   Min.   :0.01200   Min.   :  6.00      
##  1st Qu.:5.000   1st Qu.:0.3900   1st Qu.:0.06800   1st Qu.: 21.50      
##  Median :6.000   Median :0.5000   Median :0.07800   Median : 36.00      
##  Mean   :5.639   Mean   :0.5166   Mean   :0.08872   Mean   : 45.64      
##  3rd Qu.:6.000   3rd Qu.:0.6300   3rd Qu.:0.09000   3rd Qu.: 63.00      
##  Max.   :8.000   Max.   :1.1850   Max.   :0.61100   Max.   :155.00      
##    sulphates         alcohol     
##  Min.   :0.3300   Min.   : 9.00  
##  1st Qu.:0.5500   1st Qu.: 9.50  
##  Median :0.6200   Median :10.30  
##  Mean   :0.6566   Mean   :10.47  
##  3rd Qu.:0.7300   3rd Qu.:11.10  
##  Max.   :2.0000   Max.   :14.00
modelo_red_nuevo <- lm(quality ~ ., datos_red_nuevo.entrena)
modelo_red_nuevo
## 
## Call:
## lm(formula = quality ~ ., data = datos_red_nuevo.entrena)
## 
## Coefficients:
##          (Intercept)      volatile.acidity             chlorides  
##              2.76400              -1.08935              -1.43685  
## total.sulfur.dioxide             sulphates               alcohol  
##             -0.00213               0.94817               0.29334
summary(modelo_red_nuevo)
## 
## Call:
## lm(formula = quality ~ ., data = datos_red_nuevo.entrena)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.69357 -0.37628 -0.05968  0.46112  2.08671 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           2.7639974  0.2488583  11.107  < 2e-16 ***
## volatile.acidity     -1.0893474  0.1164057  -9.358  < 2e-16 ***
## chlorides            -1.4368514  0.5168694  -2.780 0.005529 ** 
## total.sulfur.dioxide -0.0021300  0.0006106  -3.488 0.000505 ***
## sulphates             0.9481661  0.1315576   7.207 1.05e-12 ***
## alcohol               0.2933379  0.0202158  14.510  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6659 on 1114 degrees of freedom
## Multiple R-squared:  0.3428, Adjusted R-squared:  0.3398 
## F-statistic: 116.2 on 5 and 1114 DF,  p-value: < 2.2e-16
summary(modelo_red)
## 
## Call:
## lm(formula = quality ~ ., data = datos_red.entrena)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.70643 -0.36046 -0.04914  0.45944  1.98343 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           2.075e+01  2.562e+01   0.810 0.418266    
## fixed.acidity         8.274e-03  3.111e-02   0.266 0.790320    
## volatile.acidity     -9.222e-01  1.446e-01  -6.378 2.63e-10 ***
## citric.acid          -1.961e-02  1.793e-01  -0.109 0.912913    
## residual.sugar        1.174e-02  1.716e-02   0.684 0.493948    
## chlorides            -1.805e+00  5.417e-01  -3.331 0.000893 ***
## free.sulfur.dioxide   6.650e-03  2.652e-03   2.508 0.012300 *  
## total.sulfur.dioxide -3.765e-03  8.681e-04  -4.337 1.57e-05 ***
## density              -1.631e+01  2.613e+01  -0.624 0.532544    
## pH                   -5.735e-01  2.276e-01  -2.520 0.011878 *  
## sulphates             9.283e-01  1.360e-01   6.824 1.46e-11 ***
## alcohol               2.940e-01  3.192e-02   9.212  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6611 on 1108 degrees of freedom
## Multiple R-squared:  0.3558, Adjusted R-squared:  0.3494 
## F-statistic: 55.64 on 11 and 1108 DF,  p-value: < 2.2e-16
predecir <- predict(modelo_red, newdata = datos_red.valida )

datos_red.valida[1,]
##   fixed.acidity volatile.acidity citric.acid residual.sugar chlorides
## 7           7.9              0.6        0.06            1.6     0.069
##   free.sulfur.dioxide total.sulfur.dioxide density  pH sulphates alcohol
## 7                  15                   59  0.9964 3.3      0.46     9.4
##   quality
## 7       5
datos_red.valida[nrow(datos_red.valida),]
##      fixed.acidity volatile.acidity citric.acid residual.sugar chlorides
## 1599             6             0.31        0.47            3.6     0.067
##      free.sulfur.dioxide total.sulfur.dioxide density   pH sulphates alcohol
## 1599                  18                   42 0.99549 3.39      0.66      11
##      quality
## 1599       6
predecir[1]
##        7 
## 5.074639
predecir[nrow(datos_red.valida)]
##     1599 
## 6.048752
fixed.acidity <- 7.0
volatile.acidity <- .68
citric.acid <- .16
residual.sugar <- 5
chlorides <- .055
free.sulfur.dioxide <- 15
total.sulfur.dioxide <- 60
density <- .95
pH <- 4
sulphates <- .6 
alcohol <- 8.5

nuevo.dato <- data.frame(fixed.acidity, volatile.acidity, citric.acid, residual.sugar, chlorides, free.sulfur.dioxide,total.sulfur.dioxide,density,pH,sulphates,alcohol)
nuevo.dato
##   fixed.acidity volatile.acidity citric.acid residual.sugar chlorides
## 1             7             0.68        0.16              5     0.055
##   free.sulfur.dioxide total.sulfur.dioxide density pH sulphates alcohol
## 1                  15                   60    0.95  4       0.6     8.5
predecir <- predict(modelo_red, newdata = nuevo.dato)
predecir
##        1 
## 5.273644