COMENTARIOS SOBRE EL ANÁLISIS: El análisis realizado sobre una base de datos de 1599 registros de vino y 12 variables fisicoquímicas permitió identificar patrones importantes relacionados con la composición y calidad del producto. No se encontraron valores faltantes, lo que garantiza la integridad de la información analizada.

Además, los resultados muestran que la calidad del vino está influenciada principalmente por el contenido de alcohol y la acidez volátil. El análisis de correlación mostró que el alcohol presenta la asociación positiva más alta con la calidad (r=0.476), mientras que la acidez volátil registra la relación negativa más fuerte (r=−0.391). Estos resultados sugieren que los vinos con mayor graduación alcohólica y menores niveles de acidez volátil tienden a obtener mejores calificaciones de calidad. Por eso, estas variables constituyen factores críticos para futuras estrategias de mejora y modelamiento predictivo.

library(ggplot2)
library(pastecs)
library(car)
## Cargando paquete requerido: carData
library(gridExtra)
library(corrplot)
## corrplot 0.95 loaded
# Se carga el archivo csv para realizar el análisis

library(readr)
dat <- read_csv("winequality-red.csv")
## Rows: 1599 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (12): fixed acidity, volatile acidity, citric acid, residual sugar, chlo...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
View(dat)

head(dat, 6)
## # A tibble: 6 × 12
##   `fixed acidity` `volatile acidity` `citric acid` `residual sugar` chlorides
##             <dbl>              <dbl>         <dbl>            <dbl>     <dbl>
## 1             7.4               0.7           0                 1.9     0.076
## 2             7.8               0.88          0                 2.6     0.098
## 3             7.8               0.76          0.04              2.3     0.092
## 4            11.2               0.28          0.56              1.9     0.075
## 5             7.4               0.7           0                 1.9     0.076
## 6             7.4               0.66          0                 1.8     0.075
## # ℹ 7 more variables: `free sulfur dioxide` <dbl>,
## #   `total sulfur dioxide` <dbl>, density <dbl>, pH <dbl>, sulphates <dbl>,
## #   alcohol <dbl>, quality <dbl>
tail(dat, 6)
## # A tibble: 6 × 12
##   `fixed acidity` `volatile acidity` `citric acid` `residual sugar` chlorides
##             <dbl>              <dbl>         <dbl>            <dbl>     <dbl>
## 1             6.8              0.62           0.08              1.9     0.068
## 2             6.2              0.6            0.08              2       0.09 
## 3             5.9              0.55           0.1               2.2     0.062
## 4             6.3              0.51           0.13              2.3     0.076
## 5             5.9              0.645          0.12              2       0.075
## 6             6                0.31           0.47              3.6     0.067
## # ℹ 7 more variables: `free sulfur dioxide` <dbl>,
## #   `total sulfur dioxide` <dbl>, density <dbl>, pH <dbl>, sulphates <dbl>,
## #   alcohol <dbl>, quality <dbl>
str(dat)
## spc_tbl_ [1,599 × 12] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ fixed acidity       : num [1:1599] 7.4 7.8 7.8 11.2 7.4 7.4 7.9 7.3 7.8 7.5 ...
##  $ volatile acidity    : num [1:1599] 0.7 0.88 0.76 0.28 0.7 0.66 0.6 0.65 0.58 0.5 ...
##  $ citric acid         : num [1:1599] 0 0 0.04 0.56 0 0 0.06 0 0.02 0.36 ...
##  $ residual sugar      : num [1:1599] 1.9 2.6 2.3 1.9 1.9 1.8 1.6 1.2 2 6.1 ...
##  $ chlorides           : num [1:1599] 0.076 0.098 0.092 0.075 0.076 0.075 0.069 0.065 0.073 0.071 ...
##  $ free sulfur dioxide : num [1:1599] 11 25 15 17 11 13 15 15 9 17 ...
##  $ total sulfur dioxide: num [1:1599] 34 67 54 60 34 40 59 21 18 102 ...
##  $ density             : num [1:1599] 0.998 0.997 0.997 0.998 0.998 ...
##  $ pH                  : num [1:1599] 3.51 3.2 3.26 3.16 3.51 3.51 3.3 3.39 3.36 3.35 ...
##  $ sulphates           : num [1:1599] 0.56 0.68 0.65 0.58 0.56 0.56 0.46 0.47 0.57 0.8 ...
##  $ alcohol             : num [1:1599] 9.4 9.8 9.8 9.8 9.4 9.4 9.4 10 9.5 10.5 ...
##  $ quality             : num [1:1599] 5 5 5 6 5 5 5 7 7 5 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   `fixed acidity` = col_double(),
##   ..   `volatile acidity` = col_double(),
##   ..   `citric acid` = col_double(),
##   ..   `residual sugar` = col_double(),
##   ..   chlorides = col_double(),
##   ..   `free sulfur dioxide` = col_double(),
##   ..   `total sulfur dioxide` = col_double(),
##   ..   density = col_double(),
##   ..   pH = col_double(),
##   ..   sulphates = col_double(),
##   ..   alcohol = col_double(),
##   ..   quality = col_double()
##   .. )
##  - attr(*, "problems")=<pointer: 0x00000243eb6ff8a0>
names(dat)
##  [1] "fixed acidity"        "volatile acidity"     "citric acid"         
##  [4] "residual sugar"       "chlorides"            "free sulfur dioxide" 
##  [7] "total sulfur dioxide" "density"              "pH"                  
## [10] "sulphates"            "alcohol"              "quality"
nrow(dat)
## [1] 1599
ncol(dat)
## [1] 12
dim(dat)
## [1] 1599   12
cat("Valores faltantes:", sum(is.na(dat)), "\n")
## Valores faltantes: 0
# Se crea una función para analizar cada variable

analizar_variable <- function(variable, nombre_var, unidad = "") {
  
  cat(rep("=", 60), "\n")
  cat("  VARIABLE:", nombre_var, "\n")
  cat(rep("=", 60), "\n\n")
  
  # Moda
  tab  <- table(variable)
  moda <- as.numeric(names(tab)[which.max(tab)])
  
  # Tendencia central
  cat("MEDIDAS DE TENDENCIA CENTRAL \n")
  cat("  Media:       ", round(mean(variable, na.rm = TRUE), 4), unidad, "\n")
  cat("  Mediana:     ", round(median(variable, na.rm = TRUE), 4), unidad, "\n")
  cat("  Moda:        ", moda, unidad, "\n\n")
  
  # Dispersión
  cat("MEDIDAS DE DISPERSIÓN \n")
  cat("  Mínimo:      ", round(min(variable, na.rm = TRUE), 4), unidad, "\n")
  cat("  Máximo:      ", round(max(variable, na.rm = TRUE), 4), unidad, "\n")
  cat("  Rango:       ", round(max(variable, na.rm = TRUE) - min(variable, na.rm = TRUE), 4), unidad, "\n")
  cat("  Q1 (25%):    ", round(quantile(variable, 0.25, na.rm = TRUE), 4), unidad, "\n")
  cat("  Q3 (75%):    ", round(quantile(variable, 0.75, na.rm = TRUE), 4), unidad, "\n")
  cat("  IQR:         ", round(IQR(variable, na.rm = TRUE), 4), unidad, "\n")
  cat("  Varianza:    ", round(var(variable, na.rm = TRUE), 4), "\n")
  cat("  Desv. Est.:  ", round(sd(variable, na.rm = TRUE), 4), unidad, "\n")
  cv <- sd(variable, na.rm = TRUE) / mean(variable, na.rm = TRUE) * 100
  cat("  Coef. Var.:  ", round(cv, 2), "%\n\n")
  
  # Forma distribución
  desc <- stat.desc(variable, norm = TRUE)
  cat("FORMA DE LA DISTRIBUCIÓN \n")
  cat("  Asimetría (skewness):", round(desc["skewness"], 3), "\n")
  cat("  Curtosis  (kurtosis):", round(desc["kurtosis"], 3), "\n\n")
  
  # Percentiles
  cat("PERCENTILES \n")
  percs <- quantile(variable, probs = c(0.05, 0.10, 0.25, 0.50, 0.75, 0.90, 0.95), na.rm = TRUE)
  print(round(percs, 4))
  cat("\n")
}

# Se crea una función para visualizar los gráficos para cada variable

graficar_variable <- function(variable, nombre_var, color_hex) {
  
  df_tmp <- data.frame(x = variable)
  
  p1 <- ggplot(df_tmp, aes(x = x)) +
    geom_histogram(bins = 30, fill = color_hex, color = "white", alpha = 0.85) +
    labs(title = paste("Histograma:", nombre_var),
         x = nombre_var, y = "Frecuencia") +
    theme_minimal()
  
  p2 <- ggplot(df_tmp, aes(y = x)) +
    geom_boxplot(fill = color_hex, alpha = 0.7, color = "gray20") +
    labs(title = paste("Boxplot:", nombre_var), y = nombre_var) +
    theme_minimal()
  
  p3 <- ggplot(df_tmp, aes(x = x)) +
    geom_density(fill = color_hex, alpha = 0.5) +
    labs(title = paste("Densidad:", nombre_var),
         x = nombre_var, y = "Densidad") +
    theme_minimal()
  
  p4 <- ggplot(df_tmp, aes(sample = x)) +
    stat_qq(color = color_hex) +
    stat_qq_line(color = "black") +
    labs(title = paste("QQ-Plot:", nombre_var)) +
    theme_minimal()
  
  print(grid.arrange(p1, p2, p3, p4, ncol = 2))
}

# Análisis de cada variable

# ── fixed acidity ─────────────────────────────────────────────────────────────
analizar_variable(dat$`fixed acidity`, "fixed acidity", "g/L")
## = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = 
##   VARIABLE: fixed acidity 
## = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = 
## 
## MEDIDAS DE TENDENCIA CENTRAL 
##   Media:        8.3196 g/L 
##   Mediana:      7.9 g/L 
##   Moda:         7.2 g/L 
## 
## MEDIDAS DE DISPERSIÓN 
##   Mínimo:       4.6 g/L 
##   Máximo:       15.9 g/L 
##   Rango:        11.3 g/L 
##   Q1 (25%):     7.1 g/L 
##   Q3 (75%):     9.2 g/L 
##   IQR:          2.1 g/L 
##   Varianza:     3.0314 
##   Desv. Est.:   1.7411 g/L 
##   Coef. Var.:   20.93 %
## 
## FORMA DE LA DISTRIBUCIÓN 
##   Asimetría (skewness): 0.981 
##   Curtosis  (kurtosis): 1.12 
## 
## PERCENTILES 
##   5%  10%  25%  50%  75%  90%  95% 
##  6.1  6.5  7.1  7.9  9.2 10.7 11.8
graficar_variable(dat$`fixed acidity`, "fixed acidity", "#C0392B")

## TableGrob (2 x 2) "arrange": 4 grobs
##   z     cells    name           grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (1-1,2-2) arrange gtable[layout]
## 3 3 (2-2,1-1) arrange gtable[layout]
## 4 4 (2-2,2-2) arrange gtable[layout]
# ── volatile acidity ──────────────────────────────────────────────────────────
analizar_variable(dat$`volatile acidity`, "volatile acidity", "g/L")
## = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = 
##   VARIABLE: volatile acidity 
## = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = 
## 
## MEDIDAS DE TENDENCIA CENTRAL 
##   Media:        0.5278 g/L 
##   Mediana:      0.52 g/L 
##   Moda:         0.6 g/L 
## 
## MEDIDAS DE DISPERSIÓN 
##   Mínimo:       0.12 g/L 
##   Máximo:       1.58 g/L 
##   Rango:        1.46 g/L 
##   Q1 (25%):     0.39 g/L 
##   Q3 (75%):     0.64 g/L 
##   IQR:          0.25 g/L 
##   Varianza:     0.0321 
##   Desv. Est.:   0.1791 g/L 
##   Coef. Var.:   33.92 %
## 
## FORMA DE LA DISTRIBUCIÓN 
##   Asimetría (skewness): 0.67 
##   Curtosis  (kurtosis): 1.213 
## 
## PERCENTILES 
##    5%   10%   25%   50%   75%   90%   95% 
## 0.270 0.310 0.390 0.520 0.640 0.745 0.840
graficar_variable(dat$`volatile acidity`, "volatile acidity", "#E67E22")

## TableGrob (2 x 2) "arrange": 4 grobs
##   z     cells    name           grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (1-1,2-2) arrange gtable[layout]
## 3 3 (2-2,1-1) arrange gtable[layout]
## 4 4 (2-2,2-2) arrange gtable[layout]
# ── citric acid ───────────────────────────────────────────────────────────────
analizar_variable(dat$`citric acid`, "citric acid", "g/L")
## = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = 
##   VARIABLE: citric acid 
## = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = 
## 
## MEDIDAS DE TENDENCIA CENTRAL 
##   Media:        0.271 g/L 
##   Mediana:      0.26 g/L 
##   Moda:         0 g/L 
## 
## MEDIDAS DE DISPERSIÓN 
##   Mínimo:       0 g/L 
##   Máximo:       1 g/L 
##   Rango:        1 g/L 
##   Q1 (25%):     0.09 g/L 
##   Q3 (75%):     0.42 g/L 
##   IQR:          0.33 g/L 
##   Varianza:     0.0379 
##   Desv. Est.:   0.1948 g/L 
##   Coef. Var.:   71.89 %
## 
## FORMA DE LA DISTRIBUCIÓN 
##   Asimetría (skewness): 0.318 
##   Curtosis  (kurtosis): -0.793 
## 
## PERCENTILES 
##    5%   10%   25%   50%   75%   90%   95% 
## 0.000 0.010 0.090 0.260 0.420 0.522 0.600
graficar_variable(dat$`citric acid`, "citric acid", "#F1C40F")

## TableGrob (2 x 2) "arrange": 4 grobs
##   z     cells    name           grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (1-1,2-2) arrange gtable[layout]
## 3 3 (2-2,1-1) arrange gtable[layout]
## 4 4 (2-2,2-2) arrange gtable[layout]
# ── residual sugar ────────────────────────────────────────────────────────────
analizar_variable(dat$`residual sugar`, "residual sugar", "g/L")
## = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = 
##   VARIABLE: residual sugar 
## = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = 
## 
## MEDIDAS DE TENDENCIA CENTRAL 
##   Media:        2.5388 g/L 
##   Mediana:      2.2 g/L 
##   Moda:         2 g/L 
## 
## MEDIDAS DE DISPERSIÓN 
##   Mínimo:       0.9 g/L 
##   Máximo:       15.5 g/L 
##   Rango:        14.6 g/L 
##   Q1 (25%):     1.9 g/L 
##   Q3 (75%):     2.6 g/L 
##   IQR:          0.7 g/L 
##   Varianza:     1.9879 
##   Desv. Est.:   1.4099 g/L 
##   Coef. Var.:   55.54 %
## 
## FORMA DE LA DISTRIBUCIÓN 
##   Asimetría (skewness): 4.532 
##   Curtosis  (kurtosis): 28.485 
## 
## PERCENTILES 
##   5%  10%  25%  50%  75%  90%  95% 
## 1.59 1.70 1.90 2.20 2.60 3.60 5.10
graficar_variable(dat$`residual sugar`, "residual sugar", "#27AE60")

## TableGrob (2 x 2) "arrange": 4 grobs
##   z     cells    name           grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (1-1,2-2) arrange gtable[layout]
## 3 3 (2-2,1-1) arrange gtable[layout]
## 4 4 (2-2,2-2) arrange gtable[layout]
# ── chlorides ─────────────────────────────────────────────────────────────────
analizar_variable(dat$chlorides, "chlorides", "g/L")
## = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = 
##   VARIABLE: chlorides 
## = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = 
## 
## MEDIDAS DE TENDENCIA CENTRAL 
##   Media:        0.0875 g/L 
##   Mediana:      0.079 g/L 
##   Moda:         0.08 g/L 
## 
## MEDIDAS DE DISPERSIÓN 
##   Mínimo:       0.012 g/L 
##   Máximo:       0.611 g/L 
##   Rango:        0.599 g/L 
##   Q1 (25%):     0.07 g/L 
##   Q3 (75%):     0.09 g/L 
##   IQR:          0.02 g/L 
##   Varianza:     0.0022 
##   Desv. Est.:   0.0471 g/L 
##   Coef. Var.:   53.81 %
## 
## FORMA DE LA DISTRIBUCIÓN 
##   Asimetría (skewness): 5.67 
##   Curtosis  (kurtosis): 41.526 
## 
## PERCENTILES 
##     5%    10%    25%    50%    75%    90%    95% 
## 0.0540 0.0600 0.0700 0.0790 0.0900 0.1090 0.1261
graficar_variable(dat$chlorides, "chlorides", "#2980B9")

## TableGrob (2 x 2) "arrange": 4 grobs
##   z     cells    name           grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (1-1,2-2) arrange gtable[layout]
## 3 3 (2-2,1-1) arrange gtable[layout]
## 4 4 (2-2,2-2) arrange gtable[layout]
# ── free sulfur dioxide ───────────────────────────────────────────────────────
analizar_variable(dat$`free sulfur dioxide`, "free sulfur dioxide", "mg/L")
## = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = 
##   VARIABLE: free sulfur dioxide 
## = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = 
## 
## MEDIDAS DE TENDENCIA CENTRAL 
##   Media:        15.8749 mg/L 
##   Mediana:      14 mg/L 
##   Moda:         6 mg/L 
## 
## MEDIDAS DE DISPERSIÓN 
##   Mínimo:       1 mg/L 
##   Máximo:       72 mg/L 
##   Rango:        71 mg/L 
##   Q1 (25%):     7 mg/L 
##   Q3 (75%):     21 mg/L 
##   IQR:          14 mg/L 
##   Varianza:     109.4149 
##   Desv. Est.:   10.4602 mg/L 
##   Coef. Var.:   65.89 %
## 
## FORMA DE LA DISTRIBUCIÓN 
##   Asimetría (skewness): 1.248 
##   Curtosis  (kurtosis): 2.007 
## 
## PERCENTILES 
##  5% 10% 25% 50% 75% 90% 95% 
##   4   5   7  14  21  31  35
graficar_variable(dat$`free sulfur dioxide`, "free sulfur dioxide", "#8E44AD")

## TableGrob (2 x 2) "arrange": 4 grobs
##   z     cells    name           grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (1-1,2-2) arrange gtable[layout]
## 3 3 (2-2,1-1) arrange gtable[layout]
## 4 4 (2-2,2-2) arrange gtable[layout]
# ── total sulfur dioxide ──────────────────────────────────────────────────────
analizar_variable(dat$`total sulfur dioxide`, "total sulfur dioxide", "mg/L")
## = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = 
##   VARIABLE: total sulfur dioxide 
## = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = 
## 
## MEDIDAS DE TENDENCIA CENTRAL 
##   Media:        46.4678 mg/L 
##   Mediana:      38 mg/L 
##   Moda:         28 mg/L 
## 
## MEDIDAS DE DISPERSIÓN 
##   Mínimo:       6 mg/L 
##   Máximo:       289 mg/L 
##   Rango:        283 mg/L 
##   Q1 (25%):     22 mg/L 
##   Q3 (75%):     62 mg/L 
##   IQR:          40 mg/L 
##   Varianza:     1082.102 
##   Desv. Est.:   32.8953 mg/L 
##   Coef. Var.:   70.79 %
## 
## FORMA DE LA DISTRIBUCIÓN 
##   Asimetría (skewness): 1.513 
##   Curtosis  (kurtosis): 3.786 
## 
## PERCENTILES 
##    5%   10%   25%   50%   75%   90%   95% 
##  11.0  14.0  22.0  38.0  62.0  93.2 112.1
graficar_variable(dat$`total sulfur dioxide`, "total sulfur dioxide", "#D35400")

## TableGrob (2 x 2) "arrange": 4 grobs
##   z     cells    name           grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (1-1,2-2) arrange gtable[layout]
## 3 3 (2-2,1-1) arrange gtable[layout]
## 4 4 (2-2,2-2) arrange gtable[layout]
# ── density ───────────────────────────────────────────────────────────────────
analizar_variable(dat$density, "density", "g/cm3")
## = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = 
##   VARIABLE: density 
## = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = 
## 
## MEDIDAS DE TENDENCIA CENTRAL 
##   Media:        0.9967 g/cm3 
##   Mediana:      0.9968 g/cm3 
##   Moda:         0.9972 g/cm3 
## 
## MEDIDAS DE DISPERSIÓN 
##   Mínimo:       0.9901 g/cm3 
##   Máximo:       1.0037 g/cm3 
##   Rango:        0.0136 g/cm3 
##   Q1 (25%):     0.9956 g/cm3 
##   Q3 (75%):     0.9978 g/cm3 
##   IQR:          0.0022 g/cm3 
##   Varianza:     0 
##   Desv. Est.:   0.0019 g/cm3 
##   Coef. Var.:   0.19 %
## 
## FORMA DE LA DISTRIBUCIÓN 
##   Asimetría (skewness): 0.071 
##   Curtosis  (kurtosis): 0.923 
## 
## PERCENTILES 
##     5%    10%    25%    50%    75%    90%    95% 
## 0.9936 0.9946 0.9956 0.9968 0.9978 0.9991 1.0000
graficar_variable(dat$density, "density", "#1ABC9C")

## TableGrob (2 x 2) "arrange": 4 grobs
##   z     cells    name           grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (1-1,2-2) arrange gtable[layout]
## 3 3 (2-2,1-1) arrange gtable[layout]
## 4 4 (2-2,2-2) arrange gtable[layout]
# ── pH ────────────────────────────────────────────────────────────────────────
analizar_variable(dat$pH, "pH", "")
## = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = 
##   VARIABLE: pH 
## = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = 
## 
## MEDIDAS DE TENDENCIA CENTRAL 
##   Media:        3.3111  
##   Mediana:      3.31  
##   Moda:         3.3  
## 
## MEDIDAS DE DISPERSIÓN 
##   Mínimo:       2.74  
##   Máximo:       4.01  
##   Rango:        1.27  
##   Q1 (25%):     3.21  
##   Q3 (75%):     3.4  
##   IQR:          0.19  
##   Varianza:     0.0238 
##   Desv. Est.:   0.1544  
##   Coef. Var.:   4.66 %
## 
## FORMA DE LA DISTRIBUCIÓN 
##   Asimetría (skewness): 0.193 
##   Curtosis  (kurtosis): 0.796 
## 
## PERCENTILES 
##   5%  10%  25%  50%  75%  90%  95% 
## 3.06 3.12 3.21 3.31 3.40 3.51 3.57
graficar_variable(dat$pH, "pH", "#2ECC71")

## TableGrob (2 x 2) "arrange": 4 grobs
##   z     cells    name           grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (1-1,2-2) arrange gtable[layout]
## 3 3 (2-2,1-1) arrange gtable[layout]
## 4 4 (2-2,2-2) arrange gtable[layout]
# ── sulphates ─────────────────────────────────────────────────────────────────
analizar_variable(dat$sulphates, "sulphates", "g/L")
## = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = 
##   VARIABLE: sulphates 
## = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = 
## 
## MEDIDAS DE TENDENCIA CENTRAL 
##   Media:        0.6581 g/L 
##   Mediana:      0.62 g/L 
##   Moda:         0.6 g/L 
## 
## MEDIDAS DE DISPERSIÓN 
##   Mínimo:       0.33 g/L 
##   Máximo:       2 g/L 
##   Rango:        1.67 g/L 
##   Q1 (25%):     0.55 g/L 
##   Q3 (75%):     0.73 g/L 
##   IQR:          0.18 g/L 
##   Varianza:     0.0287 
##   Desv. Est.:   0.1695 g/L 
##   Coef. Var.:   25.76 %
## 
## FORMA DE LA DISTRIBUCIÓN 
##   Asimetría (skewness): 2.424 
##   Curtosis  (kurtosis): 11.662 
## 
## PERCENTILES 
##   5%  10%  25%  50%  75%  90%  95% 
## 0.47 0.50 0.55 0.62 0.73 0.85 0.93
graficar_variable(dat$sulphates, "sulphates", "#E74C3C")

## TableGrob (2 x 2) "arrange": 4 grobs
##   z     cells    name           grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (1-1,2-2) arrange gtable[layout]
## 3 3 (2-2,1-1) arrange gtable[layout]
## 4 4 (2-2,2-2) arrange gtable[layout]
# ── alcohol ───────────────────────────────────────────────────────────────────
analizar_variable(dat$alcohol, "alcohol", "% vol")
## = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = 
##   VARIABLE: alcohol 
## = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = 
## 
## MEDIDAS DE TENDENCIA CENTRAL 
##   Media:        10.423 % vol 
##   Mediana:      10.2 % vol 
##   Moda:         9.5 % vol 
## 
## MEDIDAS DE DISPERSIÓN 
##   Mínimo:       8.4 % vol 
##   Máximo:       14.9 % vol 
##   Rango:        6.5 % vol 
##   Q1 (25%):     9.5 % vol 
##   Q3 (75%):     11.1 % vol 
##   IQR:          1.6 % vol 
##   Varianza:     1.1356 
##   Desv. Est.:   1.0657 % vol 
##   Coef. Var.:   10.22 %
## 
## FORMA DE LA DISTRIBUCIÓN 
##   Asimetría (skewness): 0.859 
##   Curtosis  (kurtosis): 0.192 
## 
## PERCENTILES 
##   5%  10%  25%  50%  75%  90%  95% 
##  9.2  9.3  9.5 10.2 11.1 12.0 12.5
graficar_variable(dat$alcohol, "alcohol", "#9B59B6")

## TableGrob (2 x 2) "arrange": 4 grobs
##   z     cells    name           grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (1-1,2-2) arrange gtable[layout]
## 3 3 (2-2,1-1) arrange gtable[layout]
## 4 4 (2-2,2-2) arrange gtable[layout]
# ── quality ───────────────────────────────────────────────────────────────────
cat(rep("=", 60), "\n")
## = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
cat("  VARIABLE: quality\n")
##   VARIABLE: quality
cat(rep("=", 60), "\n\n")
## = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
cat("Tabla de frecuencias:\n")
## Tabla de frecuencias:
print(table(dat$quality))
## 
##   3   4   5   6   7   8 
##  10  53 681 638 199  18
cat("\nProporciones (%):\n")
## 
## Proporciones (%):
print(round(prop.table(table(dat$quality)) * 100, 2))
## 
##     3     4     5     6     7     8 
##  0.63  3.31 42.59 39.90 12.45  1.13
dat$quality_factor <- factor(dat$quality)

p1 <- ggplot(dat, aes(x = quality_factor, fill = quality_factor)) +
  geom_bar(color = "white", alpha = 0.9) +
  scale_fill_brewer(palette = "RdYlGn") +
  labs(title = "Distribución de Calidad", x = "Calidad", y = "Frecuencia") +
  theme_minimal()

p2 <- ggplot(dat, aes(x = quality_factor,
                      y = after_stat(count) / sum(after_stat(count)),
                      fill = quality_factor)) +
  geom_bar(color = "white", alpha = 0.9) +
  scale_fill_brewer(palette = "RdYlGn") +
  scale_y_continuous(labels = scales::percent) +
  labs(title = "Proporción de Calidad", x = "Calidad", y = "Proporción") +
  theme_minimal()

print(grid.arrange(p1, p2, ncol = 2))

## TableGrob (1 x 2) "arrange": 2 grobs
##   z     cells    name           grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (1-1,2-2) arrange gtable[layout]
# Resumen estadístico

summary(dat[, 1:12])
##  fixed acidity   volatile acidity  citric acid    residual sugar  
##  Min.   : 4.60   Min.   :0.1200   Min.   :0.000   Min.   : 0.900  
##  1st Qu.: 7.10   1st Qu.:0.3900   1st Qu.:0.090   1st Qu.: 1.900  
##  Median : 7.90   Median :0.5200   Median :0.260   Median : 2.200  
##  Mean   : 8.32   Mean   :0.5278   Mean   :0.271   Mean   : 2.539  
##  3rd Qu.: 9.20   3rd Qu.:0.6400   3rd Qu.:0.420   3rd Qu.: 2.600  
##  Max.   :15.90   Max.   :1.5800   Max.   :1.000   Max.   :15.500  
##    chlorides       free sulfur dioxide total sulfur dioxide    density      
##  Min.   :0.01200   Min.   : 1.00       Min.   :  6.00       Min.   :0.9901  
##  1st Qu.:0.07000   1st Qu.: 7.00       1st Qu.: 22.00       1st Qu.:0.9956  
##  Median :0.07900   Median :14.00       Median : 38.00       Median :0.9968  
##  Mean   :0.08747   Mean   :15.87       Mean   : 46.47       Mean   :0.9967  
##  3rd Qu.:0.09000   3rd Qu.:21.00       3rd Qu.: 62.00       3rd Qu.:0.9978  
##  Max.   :0.61100   Max.   :72.00       Max.   :289.00       Max.   :1.0037  
##        pH          sulphates         alcohol         quality     
##  Min.   :2.740   Min.   :0.3300   Min.   : 8.40   Min.   :3.000  
##  1st Qu.:3.210   1st Qu.:0.5500   1st Qu.: 9.50   1st Qu.:5.000  
##  Median :3.310   Median :0.6200   Median :10.20   Median :6.000  
##  Mean   :3.311   Mean   :0.6581   Mean   :10.42   Mean   :5.636  
##  3rd Qu.:3.400   3rd Qu.:0.7300   3rd Qu.:11.10   3rd Qu.:6.000  
##  Max.   :4.010   Max.   :2.0000   Max.   :14.90   Max.   :8.000
round(stat.desc(dat[, 1:11], norm = TRUE), 3)
##              fixed acidity volatile acidity citric acid residual sugar
## nbr.val           1599.000         1599.000    1599.000       1599.000
## nbr.null             0.000            0.000     132.000          0.000
## nbr.na               0.000            0.000       0.000          0.000
## min                  4.600            0.120       0.000          0.900
## max                 15.900            1.580       1.000         15.500
## range               11.300            1.460       1.000         14.600
## sum              13303.100          843.985     433.290       4059.550
## median               7.900            0.520       0.260          2.200
## mean                 8.320            0.528       0.271          2.539
## SE.mean              0.044            0.004       0.005          0.035
## CI.mean.0.95         0.085            0.009       0.010          0.069
## var                  3.031            0.032       0.038          1.988
## std.dev              1.741            0.179       0.195          1.410
## coef.var             0.209            0.339       0.719          0.555
## skewness             0.981            0.670       0.318          4.532
## skew.2SE             8.014            5.477       2.596         37.028
## kurtosis             1.120            1.213      -0.793         28.485
## kurt.2SE             4.577            4.957      -3.242        116.435
## normtest.W           0.942            0.974       0.955          0.566
## normtest.p           0.000            0.000       0.000          0.000
##              chlorides free sulfur dioxide total sulfur dioxide  density
## nbr.val       1599.000            1599.000             1599.000 1599.000
## nbr.null         0.000               0.000                0.000    0.000
## nbr.na           0.000               0.000                0.000    0.000
## min              0.012               1.000                6.000    0.990
## max              0.611              72.000              289.000    1.004
## range            0.599              71.000              283.000    0.014
## sum            139.859           25384.000            74302.000 1593.798
## median           0.079              14.000               38.000    0.997
## mean             0.087              15.875               46.468    0.997
## SE.mean          0.001               0.262                0.823    0.000
## CI.mean.0.95     0.002               0.513                1.614    0.000
## var              0.002             109.415             1082.102    0.000
## std.dev          0.047              10.460               32.895    0.002
## coef.var         0.538               0.659                0.708    0.002
## skewness         5.670               1.248                1.513    0.071
## skew.2SE        46.322              10.198               12.359    0.581
## kurtosis        41.526               2.007                3.786    0.923
## kurt.2SE       169.740               8.205               15.474    3.771
## normtest.W       0.484               0.902                0.873    0.991
## normtest.p       0.000               0.000                0.000    0.000
##                    pH sulphates   alcohol
## nbr.val      1599.000  1599.000  1599.000
## nbr.null        0.000     0.000     0.000
## nbr.na          0.000     0.000     0.000
## min             2.740     0.330     8.400
## max             4.010     2.000    14.900
## range           1.270     1.670     6.500
## sum          5294.470  1052.380 16666.350
## median          3.310     0.620    10.200
## mean            3.311     0.658    10.423
## SE.mean         0.004     0.004     0.027
## CI.mean.0.95    0.008     0.008     0.052
## var             0.024     0.029     1.136
## std.dev         0.154     0.170     1.066
## coef.var        0.047     0.258     0.102
## skewness        0.193     2.424     0.859
## skew.2SE        1.579    19.805     7.020
## kurtosis        0.796    11.662     0.192
## kurt.2SE        3.253    47.667     0.783
## normtest.W      0.993     0.833     0.929
## normtest.p      0.000     0.000     0.000
# Análisis multivariado

# ── Coeficientes de variación comparados ──────────────────────────────────────
cv_tabla <- data.frame(
  Variable = names(dat)[1:11],
  CV_pct   = sapply(dat[, 1:11], function(x) round(sd(x) / mean(x) * 100, 2))
)
cv_tabla <- cv_tabla[order(cv_tabla$CV_pct, decreasing = TRUE), ]
print(cv_tabla)
##                                  Variable CV_pct
## citric acid                   citric acid  71.89
## total sulfur dioxide total sulfur dioxide  70.79
## free sulfur dioxide   free sulfur dioxide  65.89
## residual sugar             residual sugar  55.54
## chlorides                       chlorides  53.81
## volatile acidity         volatile acidity  33.92
## sulphates                       sulphates  25.76
## fixed acidity               fixed acidity  20.93
## alcohol                           alcohol  10.22
## pH                                     pH   4.66
## density                           density   0.19
print(
  ggplot(cv_tabla, aes(x = reorder(Variable, CV_pct), y = CV_pct, fill = CV_pct)) +
    geom_col(show.legend = FALSE, alpha = 0.85) +
    geom_text(aes(label = paste0(CV_pct, "%")), hjust = -0.1, size = 3.5) +
    coord_flip() +
    scale_fill_gradient(low = "#3498DB", high = "#C0392B") +
    labs(title = "Coeficiente de Variación por Variable",
         x = "Variable", y = "CV (%)") +
    theme_minimal()
)

# ── Matriz de correlación ─────────────────────────────────────────────────────
matriz_cor <- cor(dat[, 1:12])
print(round(matriz_cor, 3))
##                      fixed acidity volatile acidity citric acid residual sugar
## fixed acidity                1.000           -0.256       0.672          0.115
## volatile acidity            -0.256            1.000      -0.552          0.002
## citric acid                  0.672           -0.552       1.000          0.144
## residual sugar               0.115            0.002       0.144          1.000
## chlorides                    0.094            0.061       0.204          0.056
## free sulfur dioxide         -0.154           -0.011      -0.061          0.187
## total sulfur dioxide        -0.113            0.076       0.036          0.203
## density                      0.668            0.022       0.365          0.355
## pH                          -0.683            0.235      -0.542         -0.086
## sulphates                    0.183           -0.261       0.313          0.006
## alcohol                     -0.062           -0.202       0.110          0.042
## quality                      0.124           -0.391       0.226          0.014
##                      chlorides free sulfur dioxide total sulfur dioxide density
## fixed acidity            0.094              -0.154               -0.113   0.668
## volatile acidity         0.061              -0.011                0.076   0.022
## citric acid              0.204              -0.061                0.036   0.365
## residual sugar           0.056               0.187                0.203   0.355
## chlorides                1.000               0.006                0.047   0.201
## free sulfur dioxide      0.006               1.000                0.668  -0.022
## total sulfur dioxide     0.047               0.668                1.000   0.071
## density                  0.201              -0.022                0.071   1.000
## pH                      -0.265               0.070               -0.066  -0.342
## sulphates                0.371               0.052                0.043   0.149
## alcohol                 -0.221              -0.069               -0.206  -0.496
## quality                 -0.129              -0.051               -0.185  -0.175
##                          pH sulphates alcohol quality
## fixed acidity        -0.683     0.183  -0.062   0.124
## volatile acidity      0.235    -0.261  -0.202  -0.391
## citric acid          -0.542     0.313   0.110   0.226
## residual sugar       -0.086     0.006   0.042   0.014
## chlorides            -0.265     0.371  -0.221  -0.129
## free sulfur dioxide   0.070     0.052  -0.069  -0.051
## total sulfur dioxide -0.066     0.043  -0.206  -0.185
## density              -0.342     0.149  -0.496  -0.175
## pH                    1.000    -0.197   0.206  -0.058
## sulphates            -0.197     1.000   0.094   0.251
## alcohol               0.206     0.094   1.000   0.476
## quality              -0.058     0.251   0.476   1.000
corrplot(matriz_cor,
         method      = "color",
         type        = "upper",
         tl.col      = "black",
         tl.srt      = 45,
         addCoef.col = "black",
         number.cex  = 0.60,
         col         = colorRampPalette(c("#3498DB", "white", "#C0392B"))(200),
         title       = "Matriz de Correlación - Vino Tinto",
         mar         = c(0, 0, 1, 0))

# ── Calidad vs variables clave ────────────────────────────────────────────────
vars_clave <- c("alcohol", "volatile acidity", "sulphates", "citric acid")

plots_cal <- lapply(vars_clave, function(v) {
  df_p <- data.frame(cal = dat$quality_factor, val = dat[[v]])
  ggplot(df_p, aes(x = cal, y = val, fill = cal)) +
    geom_boxplot(alpha = 0.8, show.legend = FALSE) +
    scale_fill_brewer(palette = "RdYlGn") +
    labs(title = v, x = "Calidad", y = v) +
    theme_minimal(base_size = 10)
})

print(grid.arrange(grobs = plots_cal, ncol = 2,
                   top = "Variables clave vs Calidad"))

## TableGrob (3 x 2) "arrange": 5 grobs
##   z     cells    name                 grob
## 1 1 (2-2,1-1) arrange       gtable[layout]
## 2 2 (2-2,2-2) arrange       gtable[layout]
## 3 3 (3-3,1-1) arrange       gtable[layout]
## 4 4 (3-3,2-2) arrange       gtable[layout]
## 5 5 (1-1,1-2) arrange text[GRID.text.2207]