## corrplot 0.95 loaded
datos = read.table("combinado.csv", header=TRUE,sep=";", dec = ".", stringsAsFactors = TRUE)
summary(datos)
##  fixed.acidity    volatile.acidity  citric.acid     residual.sugar  
##  Min.   : 3.800   Min.   :0.0800   Min.   :0.0000   Min.   : 0.600  
##  1st Qu.: 6.400   1st Qu.:0.2300   1st Qu.:0.2500   1st Qu.: 1.800  
##  Median : 7.000   Median :0.2900   Median :0.3100   Median : 3.000  
##  Mean   : 7.215   Mean   :0.3397   Mean   :0.3186   Mean   : 5.443  
##  3rd Qu.: 7.700   3rd Qu.:0.4000   3rd Qu.:0.3900   3rd Qu.: 8.100  
##  Max.   :15.900   Max.   :1.5800   Max.   :1.6600   Max.   :65.800  
##    chlorides       free.sulfur.dioxide total.sulfur.dioxide    density      
##  Min.   :0.00900   Min.   :  1.00      Min.   :  6.0        Min.   :0.9871  
##  1st Qu.:0.03800   1st Qu.: 17.00      1st Qu.: 77.0        1st Qu.:0.9923  
##  Median :0.04700   Median : 29.00      Median :118.0        Median :0.9949  
##  Mean   :0.05603   Mean   : 30.53      Mean   :115.7        Mean   :0.9947  
##  3rd Qu.:0.06500   3rd Qu.: 41.00      3rd Qu.:156.0        3rd Qu.:0.9970  
##  Max.   :0.61100   Max.   :289.00      Max.   :440.0        Max.   :1.0390  
##        pH          sulphates         alcohol         quality         color     
##  Min.   :2.720   Min.   :0.2200   Min.   : 8.00   Min.   :3.000   blanco:4898  
##  1st Qu.:3.110   1st Qu.:0.4300   1st Qu.: 9.50   1st Qu.:5.000   rojo  :1599  
##  Median :3.210   Median :0.5100   Median :10.30   Median :6.000                
##  Mean   :3.219   Mean   :0.5313   Mean   :10.49   Mean   :5.818                
##  3rd Qu.:3.320   3rd Qu.:0.6000   3rd Qu.:11.30   3rd Qu.:6.000                
##  Max.   :4.010   Max.   :2.0000   Max.   :14.90   Max.   :9.000
attach(datos)
newDatos = data.frame(fixed.acidity, volatile.acidity, citric.acid, residual.sugar, chlorides, free.sulfur.dioxide, total.sulfur.dioxide, density, pH, sulphates, alcohol, quality)
r = cor(na.omit(newDatos))
corrplot(r, method="number")

library(ggplot2)
ggplot(datos, aes(x = alcohol, y = quality)) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE, color = "red") +
  theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

library(ggplot2)
ggplot(datos, aes(x = density, y = quality)) +
  geom_point() +
  geom_smooth(method = "lm", se = FALSE, color = "red") +
  theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

Analisis

En el mapa de calor se observa que la valoración final de la calidad del vino está influenciada principalmente por el contenido de alcohol, con una correlación positiva moderada, y por la densidad, con una correlación negativa moderada tambien Esto sugiere que los vinos con mayor grado alcohólico tienden a recibir mejores calificaciones, mientras que aquellos con mayor densidad presentan una calidad inferior.

tus_datos_long <- pivot_longer( datos, cols = -color)
ggplot(tus_datos_long, aes(x = color, y = value, fill = color)) +
  geom_boxplot() +
  facet_wrap(~ name, scales = "free_y") +
  theme_minimal() +
  labs(x = "Color del vino", y = "Valor", title = "Distribución de variables por tipo de vino")