Objetivo

Desarrollar árboles de regresión para predecir el precio de casas en Melbourne

Descripción

Analizar y aplicar la técnica de regresión lineal en el conjunto de datos de vinos; realizar interpretaciones y de los modelos lineal y árbol de regresión para elaborar predicciones , comparaciones y establecer resultados de la calidad de los vinos.

Cargar librerías adecuadas

library(rpart)      # Arboles
library(rpart.plot) # Visualizar y represenar árboles
library(caret)      # Para llevar a cabo particiones de conjuntos de datos en caso de...
library(dplyr)      # Para select, filter, mutate, arange ....
library(readr)      # Para leer datos
library(ggplot2)    # Para grafica mas vistosas
library(reshape)    # Para renombrar columnas
library(corrplot)   # Para correlaciones visuales

Cargar datos

datos <- read_csv("datos/winequality-red.csv")
## Parsed with column specification:
## cols(
##   fixed_acidity = col_double(),
##   volatile_acidity = col_double(),
##   citric_acid = col_double(),
##   residual_sugar = col_double(),
##   chlorides = col_double(),
##   free_sulfur_dioxide = col_double(),
##   total_sulfur_dioxide = col_double(),
##   density = col_double(),
##   pH = col_double(),
##   sulphates = col_double(),
##   alcohol = col_double(),
##   quality = col_double()
## )
#datos_w <- read_csv2("datos/winequality-white.csv")
datos
## # A tibble: 1,599 x 12
##    fixed_acidity volatile_acidity citric_acid residual_sugar chlorides
##            <dbl>            <dbl>       <dbl>          <dbl>     <dbl>
##  1           7.4            0.7          0               1.9     0.076
##  2           7.8            0.88         0               2.6     0.098
##  3           7.8            0.76         0.04            2.3     0.092
##  4          11.2            0.28         0.56            1.9     0.075
##  5           7.4            0.7          0               1.9     0.076
##  6           7.4            0.66         0               1.8     0.075
##  7           7.9            0.6          0.06            1.6     0.069
##  8           7.3            0.65         0               1.2     0.065
##  9           7.8            0.580        0.02            2       0.073
## 10           7.5            0.5          0.36            6.1     0.071
## # … with 1,589 more rows, and 7 more variables: free_sulfur_dioxide <dbl>,
## #   total_sulfur_dioxide <dbl>, density <dbl>, pH <dbl>, sulphates <dbl>,
## #   alcohol <dbl>, quality <dbl>

Explorar los datos indicando las características generales de los mismos

datos <- select(datos, quality, fixed_acidity, volatile_acidity, citric_acid, residual_sugar,chlorides,free_sulfur_dioxide, total_sulfur_dioxide, density, pH, sulphates, alcohol)
head(datos)
## # A tibble: 6 x 12
##   quality fixed_acidity volatile_acidity citric_acid residual_sugar chlorides
##     <dbl>         <dbl>            <dbl>       <dbl>          <dbl>     <dbl>
## 1       5           7.4             0.7         0               1.9     0.076
## 2       5           7.8             0.88        0               2.6     0.098
## 3       5           7.8             0.76        0.04            2.3     0.092
## 4       6          11.2             0.28        0.56            1.9     0.075
## 5       5           7.4             0.7         0               1.9     0.076
## 6       5           7.4             0.66        0               1.8     0.075
## # … with 6 more variables: free_sulfur_dioxide <dbl>,
## #   total_sulfur_dioxide <dbl>, density <dbl>, pH <dbl>, sulphates <dbl>,
## #   alcohol <dbl>
tail(datos)
## # A tibble: 6 x 12
##   quality fixed_acidity volatile_acidity citric_acid residual_sugar chlorides
##     <dbl>         <dbl>            <dbl>       <dbl>          <dbl>     <dbl>
## 1       6           6.8            0.62         0.08            1.9     0.068
## 2       5           6.2            0.6          0.08            2       0.09 
## 3       6           5.9            0.55         0.1             2.2     0.062
## 4       6           6.3            0.51         0.13            2.3     0.076
## 5       5           5.9            0.645        0.12            2       0.075
## 6       6           6              0.31         0.47            3.6     0.067
## # … with 6 more variables: free_sulfur_dioxide <dbl>,
## #   total_sulfur_dioxide <dbl>, density <dbl>, pH <dbl>, sulphates <dbl>,
## #   alcohol <dbl>
summary(datos)
##     quality      fixed_acidity   volatile_acidity  citric_acid   
##  Min.   :3.000   Min.   : 4.60   Min.   :0.1200   Min.   :0.000  
##  1st Qu.:5.000   1st Qu.: 7.10   1st Qu.:0.3900   1st Qu.:0.090  
##  Median :6.000   Median : 7.90   Median :0.5200   Median :0.260  
##  Mean   :5.636   Mean   : 8.32   Mean   :0.5278   Mean   :0.271  
##  3rd Qu.:6.000   3rd Qu.: 9.20   3rd Qu.:0.6400   3rd Qu.:0.420  
##  Max.   :8.000   Max.   :15.90   Max.   :1.5800   Max.   :1.000  
##  residual_sugar     chlorides       free_sulfur_dioxide total_sulfur_dioxide
##  Min.   : 0.900   Min.   :0.01200   Min.   : 1.00       Min.   :  6.00      
##  1st Qu.: 1.900   1st Qu.:0.07000   1st Qu.: 7.00       1st Qu.: 22.00      
##  Median : 2.200   Median :0.07900   Median :14.00       Median : 38.00      
##  Mean   : 2.539   Mean   :0.08747   Mean   :15.87       Mean   : 46.47      
##  3rd Qu.: 2.600   3rd Qu.:0.09000   3rd Qu.:21.00       3rd Qu.: 62.00      
##  Max.   :15.500   Max.   :0.61100   Max.   :72.00       Max.   :289.00      
##     density             pH          sulphates         alcohol     
##  Min.   :0.9901   Min.   :2.740   Min.   :0.3300   Min.   : 8.40  
##  1st Qu.:0.9956   1st Qu.:3.210   1st Qu.:0.5500   1st Qu.: 9.50  
##  Median :0.9968   Median :3.310   Median :0.6200   Median :10.20  
##  Mean   :0.9967   Mean   :3.311   Mean   :0.6581   Mean   :10.42  
##  3rd Qu.:0.9978   3rd Qu.:3.400   3rd Qu.:0.7300   3rd Qu.:11.10  
##  Max.   :1.0037   Max.   :4.010   Max.   :2.0000   Max.   :14.90
str(datos)
## Classes 'spec_tbl_df', 'tbl_df', 'tbl' and 'data.frame': 1599 obs. of  12 variables:
##  $ quality             : num  5 5 5 6 5 5 5 7 7 5 ...
##  $ fixed_acidity       : num  7.4 7.8 7.8 11.2 7.4 7.4 7.9 7.3 7.8 7.5 ...
##  $ volatile_acidity    : num  0.7 0.88 0.76 0.28 0.7 0.66 0.6 0.65 0.58 0.5 ...
##  $ citric_acid         : num  0 0 0.04 0.56 0 0 0.06 0 0.02 0.36 ...
##  $ residual_sugar      : num  1.9 2.6 2.3 1.9 1.9 1.8 1.6 1.2 2 6.1 ...
##  $ chlorides           : num  0.076 0.098 0.092 0.075 0.076 0.075 0.069 0.065 0.073 0.071 ...
##  $ free_sulfur_dioxide : num  11 25 15 17 11 13 15 15 9 17 ...
##  $ total_sulfur_dioxide: num  34 67 54 60 34 40 59 21 18 102 ...
##  $ density             : num  0.998 0.997 0.997 0.998 0.998 ...
##  $ pH                  : num  3.51 3.2 3.26 3.16 3.51 3.51 3.3 3.39 3.36 3.35 ...
##  $ sulphates           : num  0.56 0.68 0.65 0.58 0.56 0.56 0.46 0.47 0.57 0.8 ...
##  $ alcohol             : num  9.4 9.8 9.8 9.8 9.4 9.4 9.4 10 9.5 10.5 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   fixed_acidity = col_double(),
##   ..   volatile_acidity = col_double(),
##   ..   citric_acid = col_double(),
##   ..   residual_sugar = col_double(),
##   ..   chlorides = col_double(),
##   ..   free_sulfur_dioxide = col_double(),
##   ..   total_sulfur_dioxide = col_double(),
##   ..   density = col_double(),
##   ..   pH = col_double(),
##   ..   sulphates = col_double(),
##   ..   alcohol = col_double(),
##   ..   quality = col_double()
##   .. )

Identificar el significado de cada variable e identiicar variables dependientes y variable independiente

correlaciones <- cor(datos)
correlaciones
##                          quality fixed_acidity volatile_acidity citric_acid
## quality               1.00000000    0.12405165     -0.390557780  0.22637251
## fixed_acidity         0.12405165    1.00000000     -0.256130895  0.67170343
## volatile_acidity     -0.39055778   -0.25613089      1.000000000 -0.55249568
## citric_acid           0.22637251    0.67170343     -0.552495685  1.00000000
## residual_sugar        0.01373164    0.11477672      0.001917882  0.14357716
## chlorides            -0.12890656    0.09370519      0.061297772  0.20382291
## free_sulfur_dioxide  -0.05065606   -0.15379419     -0.010503827 -0.06097813
## total_sulfur_dioxide -0.18510029   -0.11318144      0.076470005  0.03553302
## density              -0.17491923    0.66804729      0.022026232  0.36494718
## pH                   -0.05773139   -0.68297819      0.234937294 -0.54190414
## sulphates             0.25139708    0.18300566     -0.260986685  0.31277004
## alcohol               0.47616632   -0.06166827     -0.202288027  0.10990325
##                      residual_sugar    chlorides free_sulfur_dioxide
## quality                 0.013731637 -0.128906560        -0.050656057
## fixed_acidity           0.114776724  0.093705186        -0.153794193
## volatile_acidity        0.001917882  0.061297772        -0.010503827
## citric_acid             0.143577162  0.203822914        -0.060978129
## residual_sugar          1.000000000  0.055609535         0.187048995
## chlorides               0.055609535  1.000000000         0.005562147
## free_sulfur_dioxide     0.187048995  0.005562147         1.000000000
## total_sulfur_dioxide    0.203027882  0.047400468         0.667666450
## density                 0.355283371  0.200632327        -0.021945831
## pH                     -0.085652422 -0.265026131         0.070377499
## sulphates               0.005527121  0.371260481         0.051657572
## alcohol                 0.042075437 -0.221140545        -0.069408354
##                      total_sulfur_dioxide     density          pH    sulphates
## quality                       -0.18510029 -0.17491923 -0.05773139  0.251397079
## fixed_acidity                 -0.11318144  0.66804729 -0.68297819  0.183005664
## volatile_acidity               0.07647000  0.02202623  0.23493729 -0.260986685
## citric_acid                    0.03553302  0.36494718 -0.54190414  0.312770044
## residual_sugar                 0.20302788  0.35528337 -0.08565242  0.005527121
## chlorides                      0.04740047  0.20063233 -0.26502613  0.371260481
## free_sulfur_dioxide            0.66766645 -0.02194583  0.07037750  0.051657572
## total_sulfur_dioxide           1.00000000  0.07126948 -0.06649456  0.042946836
## density                        0.07126948  1.00000000 -0.34169933  0.148506412
## pH                            -0.06649456 -0.34169933  1.00000000 -0.196647602
## sulphates                      0.04294684  0.14850641 -0.19664760  1.000000000
## alcohol                       -0.20565394 -0.49617977  0.20563251  0.093594750
##                          alcohol
## quality               0.47616632
## fixed_acidity        -0.06166827
## volatile_acidity     -0.20228803
## citric_acid           0.10990325
## residual_sugar        0.04207544
## chlorides            -0.22114054
## free_sulfur_dioxide  -0.06940835
## total_sulfur_dioxide -0.20565394
## density              -0.49617977
## pH                    0.20563251
## sulphates             0.09359475
## alcohol               1.00000000

Encontrar los coeficientes y decorrelación e interpretar su significado

Gráfica de correlaciones:

Partir datos en datos de entrenamiento y datos de validación

set.seed(2020) # Semilla
entrena <- createDataPartition(datos$quality, p=0.7, list = FALSE)
head(entrena)
##      Resample1
## [1,]         1
## [2,]         2
## [3,]         3
## [4,]         4
## [5,]         5
## [6,]         6
nrow(entrena)
## [1] 1120
head(datos[-entrena,])
## # A tibble: 6 x 12
##   quality fixed_acidity volatile_acidity citric_acid residual_sugar chlorides
##     <dbl>         <dbl>            <dbl>       <dbl>          <dbl>     <dbl>
## 1       5           7.9             0.6         0.06            1.6     0.069
## 2       7           7.3             0.65        0               1.2     0.065
## 3       5           7.5             0.5         0.36            6.1     0.071
## 4       7           8.5             0.28        0.56            1.8     0.092
## 5       5           8.1             0.56        0.28            1.7     0.368
## 6       5           7.9             0.43        0.21            1.6     0.106
## # … with 6 more variables: free_sulfur_dioxide <dbl>,
## #   total_sulfur_dioxide <dbl>, density <dbl>, pH <dbl>, sulphates <dbl>,
## #   alcohol <dbl>
head(datos)
## # A tibble: 6 x 12
##   quality fixed_acidity volatile_acidity citric_acid residual_sugar chlorides
##     <dbl>         <dbl>            <dbl>       <dbl>          <dbl>     <dbl>
## 1       5           7.4             0.7         0               1.9     0.076
## 2       5           7.8             0.88        0               2.6     0.098
## 3       5           7.8             0.76        0.04            2.3     0.092
## 4       6          11.2             0.28        0.56            1.9     0.075
## 5       5           7.4             0.7         0               1.9     0.076
## 6       5           7.4             0.66        0               1.8     0.075
## # … with 6 more variables: free_sulfur_dioxide <dbl>,
## #   total_sulfur_dioxide <dbl>, density <dbl>, pH <dbl>, sulphates <dbl>,
## #   alcohol <dbl>
datos.Entrena <- datos[entrena,]
head(datos.Entrena)
## # A tibble: 6 x 12
##   quality fixed_acidity volatile_acidity citric_acid residual_sugar chlorides
##     <dbl>         <dbl>            <dbl>       <dbl>          <dbl>     <dbl>
## 1       5           7.4             0.7         0               1.9     0.076
## 2       5           7.8             0.88        0               2.6     0.098
## 3       5           7.8             0.76        0.04            2.3     0.092
## 4       6          11.2             0.28        0.56            1.9     0.075
## 5       5           7.4             0.7         0               1.9     0.076
## 6       5           7.4             0.66        0               1.8     0.075
## # … with 6 more variables: free_sulfur_dioxide <dbl>,
## #   total_sulfur_dioxide <dbl>, density <dbl>, pH <dbl>, sulphates <dbl>,
## #   alcohol <dbl>
summary(datos.Entrena)
##     quality      fixed_acidity   volatile_acidity  citric_acid    
##  Min.   :3.000   Min.   : 4.70   Min.   :0.1200   Min.   :0.0000  
##  1st Qu.:5.000   1st Qu.: 7.10   1st Qu.:0.4000   1st Qu.:0.0975  
##  Median :6.000   Median : 7.90   Median :0.5300   Median :0.2500  
##  Mean   :5.635   Mean   : 8.34   Mean   :0.5326   Mean   :0.2691  
##  3rd Qu.:6.000   3rd Qu.: 9.20   3rd Qu.:0.6400   3rd Qu.:0.4300  
##  Max.   :8.000   Max.   :15.90   Max.   :1.5800   Max.   :0.7900  
##  residual_sugar     chlorides       free_sulfur_dioxide total_sulfur_dioxide
##  Min.   : 0.900   Min.   :0.03400   Min.   : 1.00       Min.   :  6.00      
##  1st Qu.: 1.900   1st Qu.:0.07100   1st Qu.: 8.00       1st Qu.: 22.00      
##  Median : 2.200   Median :0.08000   Median :14.00       Median : 38.00      
##  Mean   : 2.554   Mean   :0.08693   Mean   :16.13       Mean   : 46.82      
##  3rd Qu.: 2.600   3rd Qu.:0.09025   3rd Qu.:22.00       3rd Qu.: 62.00      
##  Max.   :15.500   Max.   :0.46700   Max.   :72.00       Max.   :289.00      
##     density             pH          sulphates         alcohol    
##  Min.   :0.9901   Min.   :2.860   Min.   :0.3700   Min.   : 8.4  
##  1st Qu.:0.9956   1st Qu.:3.210   1st Qu.:0.5500   1st Qu.: 9.5  
##  Median :0.9968   Median :3.310   Median :0.6200   Median :10.1  
##  Mean   :0.9968   Mean   :3.311   Mean   :0.6588   Mean   :10.4  
##  3rd Qu.:0.9979   3rd Qu.:3.400   3rd Qu.:0.7300   3rd Qu.:11.1  
##  Max.   :1.0037   Max.   :4.010   Max.   :1.9800   Max.   :14.9
# y conjunto de datos de validación y luego head()
datos.Valida <- datos[-entrena,]
head(datos.Valida)
## # A tibble: 6 x 12
##   quality fixed_acidity volatile_acidity citric_acid residual_sugar chlorides
##     <dbl>         <dbl>            <dbl>       <dbl>          <dbl>     <dbl>
## 1       5           7.9             0.6         0.06            1.6     0.069
## 2       7           7.3             0.65        0               1.2     0.065
## 3       5           7.5             0.5         0.36            6.1     0.071
## 4       7           8.5             0.28        0.56            1.8     0.092
## 5       5           8.1             0.56        0.28            1.7     0.368
## 6       5           7.9             0.43        0.21            1.6     0.106
## # … with 6 more variables: free_sulfur_dioxide <dbl>,
## #   total_sulfur_dioxide <dbl>, density <dbl>, pH <dbl>, sulphates <dbl>,
## #   alcohol <dbl>

Crear modelo de regresión lineal múltiple indicando como variable independiente la calidad del vino.

modelo <- lm(quality ~ ., datos.Entrena)
#Guardar en variable este modelo
modelo
## 
## Call:
## lm(formula = quality ~ ., data = datos.Entrena)
## 
## Coefficients:
##          (Intercept)         fixed_acidity      volatile_acidity  
##            20.747534              0.008274             -0.922244  
##          citric_acid        residual_sugar             chlorides  
##            -0.019610              0.011741             -1.804597  
##  free_sulfur_dioxide  total_sulfur_dioxide               density  
##             0.006650             -0.003765            -16.312818  
##                   pH             sulphates               alcohol  
##            -0.573518              0.928338              0.294048
summary(modelo)
## 
## Call:
## lm(formula = quality ~ ., data = datos.Entrena)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.70643 -0.36046 -0.04914  0.45944  1.98343 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           2.075e+01  2.562e+01   0.810 0.418266    
## fixed_acidity         8.274e-03  3.111e-02   0.266 0.790320    
## volatile_acidity     -9.222e-01  1.446e-01  -6.378 2.63e-10 ***
## citric_acid          -1.961e-02  1.793e-01  -0.109 0.912913    
## residual_sugar        1.174e-02  1.716e-02   0.684 0.493948    
## chlorides            -1.805e+00  5.417e-01  -3.331 0.000893 ***
## free_sulfur_dioxide   6.650e-03  2.652e-03   2.508 0.012300 *  
## total_sulfur_dioxide -3.765e-03  8.681e-04  -4.337 1.57e-05 ***
## density              -1.631e+01  2.613e+01  -0.624 0.532544    
## pH                   -5.735e-01  2.276e-01  -2.520 0.011878 *  
## sulphates             9.283e-01  1.360e-01   6.824 1.46e-11 ***
## alcohol               2.940e-01  3.192e-02   9.212  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.6611 on 1108 degrees of freedom
## Multiple R-squared:  0.3558, Adjusted R-squared:  0.3494 
## F-statistic: 55.64 on 11 and 1108 DF,  p-value: < 2.2e-16

Con el modelo árbol

arbol <- rpart(formula = quality  ~ ., data = datos.Entrena)
arbol
## n= 1120 
## 
## node), split, n, deviance, yval
##       * denotes terminal node
## 
##  1) root 1120 751.64200 5.634821  
##    2) alcohol< 10.525 693 314.90330 5.366522  
##      4) volatile_acidity>=0.345 621 253.08530 5.302738  
##        8) sulphates< 0.535 155  35.97419 5.012903 *
##        9) sulphates>=0.535 466 199.75970 5.399142  
##         18) alcohol< 9.85 297 100.24240 5.282828 *
##         19) alcohol>=9.85 169  88.43787 5.603550 *
##      5) volatile_acidity< 0.345 72  37.50000 5.916667 *
##    3) alcohol>=10.525 427 305.89230 6.070258  
##      6) sulphates< 0.625 157 101.73250 5.649682  
##       12) volatile_acidity>=1.015 8   4.87500 4.125000 *
##       13) volatile_acidity< 1.015 149  77.26174 5.731544  
##         26) alcohol< 11.65 94  47.48936 5.510638  
##           52) free_sulfur_dioxide< 8.5 36  15.55556 5.111111 *
##           53) free_sulfur_dioxide>=8.5 58  22.62069 5.758621 *
##         27) alcohol>=11.65 55  17.34545 6.109091 *
##      7) sulphates>=0.625 270 160.24070 6.314815  
##       14) alcohol< 11.55 157  84.36943 6.101911  
##         28) volatile_acidity>=0.395 84  32.03571 5.892857 *
##         29) volatile_acidity< 0.395 73  44.43836 6.342466  
##           58) pH>=3.25 45  24.80000 6.066667 *
##           59) pH< 3.25 28  10.71429 6.785714 *
##       15) alcohol>=11.55 113  58.86726 6.610619 *
prp(arbol, type = 2, nn = TRUE, fallen.leaves = TRUE, faclen = 4, varlen = 8,  shadow.col = "gray")

arbol$cptable
##            CP nsplit rel error    xerror       xstd
## 1  0.17408072      0 1.0000000 1.0005542 0.04657576
## 2  0.05843081      1 0.8259193 0.8570500 0.04479501
## 3  0.03235313      2 0.7674885 0.7920724 0.03916559
## 4  0.02607058      3 0.7351353 0.7756639 0.03890049
## 5  0.02308479      4 0.7090648 0.7551788 0.03815786
## 6  0.02262255      5 0.6859800 0.7440868 0.03767959
## 7  0.01653304      6 0.6633574 0.7111984 0.03645750
## 8  0.01474021      7 0.6468244 0.6966805 0.03590473
## 9  0.01239036      8 0.6320842 0.6877223 0.03508585
## 10 0.01118846      9 0.6196938 0.6926970 0.03504693
## 11 0.01000000     11 0.5973169 0.6787878 0.03469302

Plot árbol

plotcp(arbol)

### Recortar árbol

arbol.Recortado <- prune(arbol, cp = 0.01417645)
prp(arbol.Recortado, type = 2, nn = TRUE, fallen.leaves = TRUE, faclen = 4, varlen = 8,  shadow.col = "gray")

summary(datos.Valida)
##     quality      fixed_acidity    volatile_acidity  citric_acid    
##  Min.   :3.000   Min.   : 4.600   Min.   :0.1200   Min.   :0.0000  
##  1st Qu.:5.000   1st Qu.: 7.100   1st Qu.:0.3900   1st Qu.:0.0900  
##  Median :6.000   Median : 7.900   Median :0.5000   Median :0.2800  
##  Mean   :5.639   Mean   : 8.272   Mean   :0.5166   Mean   :0.2753  
##  3rd Qu.:6.000   3rd Qu.: 9.200   3rd Qu.:0.6300   3rd Qu.:0.4200  
##  Max.   :8.000   Max.   :15.600   Max.   :1.1850   Max.   :1.0000  
##  residual_sugar    chlorides       free_sulfur_dioxide total_sulfur_dioxide
##  Min.   :1.200   Min.   :0.01200   Min.   : 1.00       Min.   :  6.00      
##  1st Qu.:1.900   1st Qu.:0.06800   1st Qu.: 7.00       1st Qu.: 21.50      
##  Median :2.200   Median :0.07800   Median :12.00       Median : 36.00      
##  Mean   :2.503   Mean   :0.08872   Mean   :15.29       Mean   : 45.64      
##  3rd Qu.:2.600   3rd Qu.:0.09000   3rd Qu.:21.00       3rd Qu.: 63.00      
##  Max.   :9.000   Max.   :0.61100   Max.   :68.00       Max.   :155.00      
##     density             pH          sulphates         alcohol     
##  Min.   :0.9902   Min.   :2.740   Min.   :0.3300   Min.   : 9.00  
##  1st Qu.:0.9956   1st Qu.:3.210   1st Qu.:0.5500   1st Qu.: 9.50  
##  Median :0.9967   Median :3.310   Median :0.6200   Median :10.30  
##  Mean   :0.9966   Mean   :3.312   Mean   :0.6566   Mean   :10.47  
##  3rd Qu.:0.9978   3rd Qu.:3.410   3rd Qu.:0.7300   3rd Qu.:11.10  
##  Max.   :1.0031   Max.   :3.900   Max.   :2.0000   Max.   :14.00

Predicción

fixed_acidity=8
volatile_acidity= .6
citric_acid= .1
residual_sugar = 2 
chlorides = .09
free_sulfur_dioxide = 12
total_sulfur_dioxide = 50
density = .996
pH = 3.3
sulphates = .55
alcohol = 15

nuevo.Dato <- data.frame(fixed_acidity, volatile_acidity, citric_acid, residual_sugar, chlorides, free_sulfur_dioxide, total_sulfur_dioxide, density, pH, sulphates, alcohol)

nuevo.Dato
##   fixed_acidity volatile_acidity citric_acid residual_sugar chlorides
## 1             8              0.6         0.1              2      0.09
##   free_sulfur_dioxide total_sulfur_dioxide density  pH sulphates alcohol
## 1                  12                   50   0.996 3.3      0.55      15
predecir <- predict(arbol, newdata = nuevo.Dato)
predecir
##        1 
## 6.109091