## corrplot 0.95 loaded
datos = read.table("combinado.csv", header=TRUE,sep=";", dec = ".", stringsAsFactors = TRUE)
summary(datos)
## fixed.acidity volatile.acidity citric.acid residual.sugar
## Min. : 3.800 Min. :0.0800 Min. :0.0000 Min. : 0.600
## 1st Qu.: 6.400 1st Qu.:0.2300 1st Qu.:0.2500 1st Qu.: 1.800
## Median : 7.000 Median :0.2900 Median :0.3100 Median : 3.000
## Mean : 7.215 Mean :0.3397 Mean :0.3186 Mean : 5.443
## 3rd Qu.: 7.700 3rd Qu.:0.4000 3rd Qu.:0.3900 3rd Qu.: 8.100
## Max. :15.900 Max. :1.5800 Max. :1.6600 Max. :65.800
## chlorides free.sulfur.dioxide total.sulfur.dioxide density
## Min. :0.00900 Min. : 1.00 Min. : 6.0 Min. :0.9871
## 1st Qu.:0.03800 1st Qu.: 17.00 1st Qu.: 77.0 1st Qu.:0.9923
## Median :0.04700 Median : 29.00 Median :118.0 Median :0.9949
## Mean :0.05603 Mean : 30.53 Mean :115.7 Mean :0.9947
## 3rd Qu.:0.06500 3rd Qu.: 41.00 3rd Qu.:156.0 3rd Qu.:0.9970
## Max. :0.61100 Max. :289.00 Max. :440.0 Max. :1.0390
## pH sulphates alcohol quality color
## Min. :2.720 Min. :0.2200 Min. : 8.00 Min. :3.000 blanco:4898
## 1st Qu.:3.110 1st Qu.:0.4300 1st Qu.: 9.50 1st Qu.:5.000 rojo :1599
## Median :3.210 Median :0.5100 Median :10.30 Median :6.000
## Mean :3.219 Mean :0.5313 Mean :10.49 Mean :5.818
## 3rd Qu.:3.320 3rd Qu.:0.6000 3rd Qu.:11.30 3rd Qu.:6.000
## Max. :4.010 Max. :2.0000 Max. :14.90 Max. :9.000
attach(datos)
newDatos = data.frame(fixed.acidity, volatile.acidity, citric.acid, residual.sugar, chlorides, free.sulfur.dioxide, total.sulfur.dioxide, density, pH, sulphates, alcohol, quality)
r = cor(na.omit(newDatos))
corrplot(r, method="number")
library(ggplot2)
ggplot(datos, aes(x = alcohol, y = quality)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE, color = "red") +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
library(ggplot2)
ggplot(datos, aes(x = density, y = quality)) +
geom_point() +
geom_smooth(method = "lm", se = FALSE, color = "red") +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
En el mapa de calor se observa que la valoración final de la calidad del vino está influenciada principalmente por el contenido de alcohol, con una correlación positiva moderada, y por la densidad, con una correlación negativa moderada tambien Esto sugiere que los vinos con mayor grado alcohólico tienden a recibir mejores calificaciones, mientras que aquellos con mayor densidad presentan una calidad inferior.
tus_datos_long <- pivot_longer( datos, cols = -color)
ggplot(tus_datos_long, aes(x = color, y = value, fill = color)) +
geom_boxplot() +
facet_wrap(~ name, scales = "free_y") +
theme_minimal() +
labs(x = "Color del vino", y = "Valor", title = "Distribución de variables por tipo de vino")