library(ggplot2)
library(pastecs)
library(car)
## Cargando paquete requerido: carData
library(gridExtra)
library(corrplot)
## corrplot 0.95 loaded
# =============================================================================
# PASO 1: CARGAR Y EXPLORAR LOS DATOS
# =============================================================================
library(readr)
dat <- read_csv("winequality-red.csv")
## Rows: 1599 Columns: 12
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## dbl (12): fixed acidity, volatile acidity, citric acid, residual sugar, chlo...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
head(dat, 6)
## # A tibble: 6 × 12
##   `fixed acidity` `volatile acidity` `citric acid` `residual sugar` chlorides
##             <dbl>              <dbl>         <dbl>            <dbl>     <dbl>
## 1             7.4               0.7           0                 1.9     0.076
## 2             7.8               0.88          0                 2.6     0.098
## 3             7.8               0.76          0.04              2.3     0.092
## 4            11.2               0.28          0.56              1.9     0.075
## 5             7.4               0.7           0                 1.9     0.076
## 6             7.4               0.66          0                 1.8     0.075
## # ℹ 7 more variables: `free sulfur dioxide` <dbl>,
## #   `total sulfur dioxide` <dbl>, density <dbl>, pH <dbl>, sulphates <dbl>,
## #   alcohol <dbl>, quality <dbl>
tail(dat, 6)
## # A tibble: 6 × 12
##   `fixed acidity` `volatile acidity` `citric acid` `residual sugar` chlorides
##             <dbl>              <dbl>         <dbl>            <dbl>     <dbl>
## 1             6.8              0.62           0.08              1.9     0.068
## 2             6.2              0.6            0.08              2       0.09 
## 3             5.9              0.55           0.1               2.2     0.062
## 4             6.3              0.51           0.13              2.3     0.076
## 5             5.9              0.645          0.12              2       0.075
## 6             6                0.31           0.47              3.6     0.067
## # ℹ 7 more variables: `free sulfur dioxide` <dbl>,
## #   `total sulfur dioxide` <dbl>, density <dbl>, pH <dbl>, sulphates <dbl>,
## #   alcohol <dbl>, quality <dbl>
str(dat)
## spc_tbl_ [1,599 × 12] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
##  $ fixed acidity       : num [1:1599] 7.4 7.8 7.8 11.2 7.4 7.4 7.9 7.3 7.8 7.5 ...
##  $ volatile acidity    : num [1:1599] 0.7 0.88 0.76 0.28 0.7 0.66 0.6 0.65 0.58 0.5 ...
##  $ citric acid         : num [1:1599] 0 0 0.04 0.56 0 0 0.06 0 0.02 0.36 ...
##  $ residual sugar      : num [1:1599] 1.9 2.6 2.3 1.9 1.9 1.8 1.6 1.2 2 6.1 ...
##  $ chlorides           : num [1:1599] 0.076 0.098 0.092 0.075 0.076 0.075 0.069 0.065 0.073 0.071 ...
##  $ free sulfur dioxide : num [1:1599] 11 25 15 17 11 13 15 15 9 17 ...
##  $ total sulfur dioxide: num [1:1599] 34 67 54 60 34 40 59 21 18 102 ...
##  $ density             : num [1:1599] 0.998 0.997 0.997 0.998 0.998 ...
##  $ pH                  : num [1:1599] 3.51 3.2 3.26 3.16 3.51 3.51 3.3 3.39 3.36 3.35 ...
##  $ sulphates           : num [1:1599] 0.56 0.68 0.65 0.58 0.56 0.56 0.46 0.47 0.57 0.8 ...
##  $ alcohol             : num [1:1599] 9.4 9.8 9.8 9.8 9.4 9.4 9.4 10 9.5 10.5 ...
##  $ quality             : num [1:1599] 5 5 5 6 5 5 5 7 7 5 ...
##  - attr(*, "spec")=
##   .. cols(
##   ..   `fixed acidity` = col_double(),
##   ..   `volatile acidity` = col_double(),
##   ..   `citric acid` = col_double(),
##   ..   `residual sugar` = col_double(),
##   ..   chlorides = col_double(),
##   ..   `free sulfur dioxide` = col_double(),
##   ..   `total sulfur dioxide` = col_double(),
##   ..   density = col_double(),
##   ..   pH = col_double(),
##   ..   sulphates = col_double(),
##   ..   alcohol = col_double(),
##   ..   quality = col_double()
##   .. )
##  - attr(*, "problems")=<externalptr>
names(dat)
##  [1] "fixed acidity"        "volatile acidity"     "citric acid"         
##  [4] "residual sugar"       "chlorides"            "free sulfur dioxide" 
##  [7] "total sulfur dioxide" "density"              "pH"                  
## [10] "sulphates"            "alcohol"              "quality"
nrow(dat)
## [1] 1599
ncol(dat)
## [1] 12
dim(dat)
## [1] 1599   12
cat("Valores faltantes:", sum(is.na(dat)), "\n")
## Valores faltantes: 0
# =============================================================================
# PASO 2: FUNCIÓN PRINCIPAL - ESTADÍSTICAS POR VARIABLE
# =============================================================================
analizar_variable <- function(variable, nombre_var, unidad = "") {
  
  cat(rep("=", 60), "\n")
  cat("  VARIABLE:", nombre_var, "\n")
  cat(rep("=", 60), "\n\n")
  
  # Moda
  tab  <- table(variable)
  moda <- as.numeric(names(tab)[which.max(tab)])
  
  # -- Tendencia central --
  cat("── MEDIDAS DE TENDENCIA CENTRAL ──\n")
  cat("  Media:       ", round(mean(variable, na.rm = TRUE), 4), unidad, "\n")
  cat("  Mediana:     ", round(median(variable, na.rm = TRUE), 4), unidad, "\n")
  cat("  Moda:        ", moda, unidad, "\n\n")
  
  # -- Dispersión --
  cat("── MEDIDAS DE DISPERSIÓN ──\n")
  cat("  Mínimo:      ", round(min(variable, na.rm = TRUE), 4), unidad, "\n")
  cat("  Máximo:      ", round(max(variable, na.rm = TRUE), 4), unidad, "\n")
  cat("  Rango:       ", round(max(variable, na.rm = TRUE) - min(variable, na.rm = TRUE), 4), unidad, "\n")
  cat("  Q1 (25%):    ", round(quantile(variable, 0.25, na.rm = TRUE), 4), unidad, "\n")
  cat("  Q3 (75%):    ", round(quantile(variable, 0.75, na.rm = TRUE), 4), unidad, "\n")
  cat("  IQR:         ", round(IQR(variable, na.rm = TRUE), 4), unidad, "\n")
  cat("  Varianza:    ", round(var(variable, na.rm = TRUE), 4), "\n")
  cat("  Desv. Est.:  ", round(sd(variable, na.rm = TRUE), 4), unidad, "\n")
  cv <- sd(variable, na.rm = TRUE) / mean(variable, na.rm = TRUE) * 100
  cat("  Coef. Var.:  ", round(cv, 2), "%\n\n")
  
  # -- Forma distribución --
  desc <- stat.desc(variable, norm = TRUE)
  cat("── FORMA DE LA DISTRIBUCIÓN ──\n")
  cat("  Asimetría (skewness):", round(desc["skewness"], 3), "\n")
  cat("  Curtosis  (kurtosis):", round(desc["kurtosis"], 3), "\n\n")
  
  # -- Percentiles --
  cat("── PERCENTILES ──\n")
  percs <- quantile(variable, probs = c(0.05, 0.10, 0.25, 0.50, 0.75, 0.90, 0.95), na.rm = TRUE)
  print(round(percs, 4))
  cat("\n")
}

# =============================================================================
# PASO 3: FUNCIÓN DE GRÁFICOS POR VARIABLE
# =============================================================================
graficar_variable <- function(variable, nombre_var, color_hex) {
  
  df_tmp <- data.frame(x = variable)
  
  p1 <- ggplot(df_tmp, aes(x = x)) +
    geom_histogram(bins = 30, fill = color_hex, color = "white", alpha = 0.85) +
    labs(title = paste("Histograma:", nombre_var),
         x = nombre_var, y = "Frecuencia") +
    theme_minimal()
  
  p2 <- ggplot(df_tmp, aes(y = x)) +
    geom_boxplot(fill = color_hex, alpha = 0.7, color = "gray20") +
    labs(title = paste("Boxplot:", nombre_var), y = nombre_var) +
    theme_minimal()
  
  p3 <- ggplot(df_tmp, aes(x = x)) +
    geom_density(fill = color_hex, alpha = 0.5) +
    labs(title = paste("Densidad:", nombre_var),
         x = nombre_var, y = "Densidad") +
    theme_minimal()
  
  p4 <- ggplot(df_tmp, aes(sample = x)) +
    stat_qq(color = color_hex) +
    stat_qq_line(color = "black") +
    labs(title = paste("QQ-Plot:", nombre_var)) +
    theme_minimal()
  
  print(grid.arrange(p1, p2, p3, p4, ncol = 2))
}

# =============================================================================
# PASO 4: ANÁLISIS DE CADA VARIABLE
# =============================================================================

# ── fixed acidity ─────────────────────────────────────────────────────────────
analizar_variable(dat$`fixed acidity`, "fixed acidity", "g/L")
## = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = 
##   VARIABLE: fixed acidity 
## = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = 
## 
## ── MEDIDAS DE TENDENCIA CENTRAL ──
##   Media:        8.3196 g/L 
##   Mediana:      7.9 g/L 
##   Moda:         7.2 g/L 
## 
## ── MEDIDAS DE DISPERSIÓN ──
##   Mínimo:       4.6 g/L 
##   Máximo:       15.9 g/L 
##   Rango:        11.3 g/L 
##   Q1 (25%):     7.1 g/L 
##   Q3 (75%):     9.2 g/L 
##   IQR:          2.1 g/L 
##   Varianza:     3.0314 
##   Desv. Est.:   1.7411 g/L 
##   Coef. Var.:   20.93 %
## 
## ── FORMA DE LA DISTRIBUCIÓN ──
##   Asimetría (skewness): 0.981 
##   Curtosis  (kurtosis): 1.12 
## 
## ── PERCENTILES ──
##   5%  10%  25%  50%  75%  90%  95% 
##  6.1  6.5  7.1  7.9  9.2 10.7 11.8
graficar_variable(dat$`fixed acidity`, "fixed acidity", "#C0392B")

## TableGrob (2 x 2) "arrange": 4 grobs
##   z     cells    name           grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (1-1,2-2) arrange gtable[layout]
## 3 3 (2-2,1-1) arrange gtable[layout]
## 4 4 (2-2,2-2) arrange gtable[layout]
# ── volatile acidity ──────────────────────────────────────────────────────────
analizar_variable(dat$`volatile acidity`, "volatile acidity", "g/L")
## = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = 
##   VARIABLE: volatile acidity 
## = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = 
## 
## ── MEDIDAS DE TENDENCIA CENTRAL ──
##   Media:        0.5278 g/L 
##   Mediana:      0.52 g/L 
##   Moda:         0.6 g/L 
## 
## ── MEDIDAS DE DISPERSIÓN ──
##   Mínimo:       0.12 g/L 
##   Máximo:       1.58 g/L 
##   Rango:        1.46 g/L 
##   Q1 (25%):     0.39 g/L 
##   Q3 (75%):     0.64 g/L 
##   IQR:          0.25 g/L 
##   Varianza:     0.0321 
##   Desv. Est.:   0.1791 g/L 
##   Coef. Var.:   33.92 %
## 
## ── FORMA DE LA DISTRIBUCIÓN ──
##   Asimetría (skewness): 0.67 
##   Curtosis  (kurtosis): 1.213 
## 
## ── PERCENTILES ──
##    5%   10%   25%   50%   75%   90%   95% 
## 0.270 0.310 0.390 0.520 0.640 0.745 0.840
graficar_variable(dat$`volatile acidity`, "volatile acidity", "#E67E22")

## TableGrob (2 x 2) "arrange": 4 grobs
##   z     cells    name           grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (1-1,2-2) arrange gtable[layout]
## 3 3 (2-2,1-1) arrange gtable[layout]
## 4 4 (2-2,2-2) arrange gtable[layout]
# ── citric acid ───────────────────────────────────────────────────────────────
analizar_variable(dat$`citric acid`, "citric acid", "g/L")
## = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = 
##   VARIABLE: citric acid 
## = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = 
## 
## ── MEDIDAS DE TENDENCIA CENTRAL ──
##   Media:        0.271 g/L 
##   Mediana:      0.26 g/L 
##   Moda:         0 g/L 
## 
## ── MEDIDAS DE DISPERSIÓN ──
##   Mínimo:       0 g/L 
##   Máximo:       1 g/L 
##   Rango:        1 g/L 
##   Q1 (25%):     0.09 g/L 
##   Q3 (75%):     0.42 g/L 
##   IQR:          0.33 g/L 
##   Varianza:     0.0379 
##   Desv. Est.:   0.1948 g/L 
##   Coef. Var.:   71.89 %
## 
## ── FORMA DE LA DISTRIBUCIÓN ──
##   Asimetría (skewness): 0.318 
##   Curtosis  (kurtosis): -0.793 
## 
## ── PERCENTILES ──
##    5%   10%   25%   50%   75%   90%   95% 
## 0.000 0.010 0.090 0.260 0.420 0.522 0.600
graficar_variable(dat$`citric acid`, "citric acid", "#F1C40F")

## TableGrob (2 x 2) "arrange": 4 grobs
##   z     cells    name           grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (1-1,2-2) arrange gtable[layout]
## 3 3 (2-2,1-1) arrange gtable[layout]
## 4 4 (2-2,2-2) arrange gtable[layout]
# ── residual sugar ────────────────────────────────────────────────────────────
analizar_variable(dat$`residual sugar`, "residual sugar", "g/L")
## = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = 
##   VARIABLE: residual sugar 
## = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = 
## 
## ── MEDIDAS DE TENDENCIA CENTRAL ──
##   Media:        2.5388 g/L 
##   Mediana:      2.2 g/L 
##   Moda:         2 g/L 
## 
## ── MEDIDAS DE DISPERSIÓN ──
##   Mínimo:       0.9 g/L 
##   Máximo:       15.5 g/L 
##   Rango:        14.6 g/L 
##   Q1 (25%):     1.9 g/L 
##   Q3 (75%):     2.6 g/L 
##   IQR:          0.7 g/L 
##   Varianza:     1.9879 
##   Desv. Est.:   1.4099 g/L 
##   Coef. Var.:   55.54 %
## 
## ── FORMA DE LA DISTRIBUCIÓN ──
##   Asimetría (skewness): 4.532 
##   Curtosis  (kurtosis): 28.485 
## 
## ── PERCENTILES ──
##   5%  10%  25%  50%  75%  90%  95% 
## 1.59 1.70 1.90 2.20 2.60 3.60 5.10
graficar_variable(dat$`residual sugar`, "residual sugar", "#27AE60")

## TableGrob (2 x 2) "arrange": 4 grobs
##   z     cells    name           grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (1-1,2-2) arrange gtable[layout]
## 3 3 (2-2,1-1) arrange gtable[layout]
## 4 4 (2-2,2-2) arrange gtable[layout]
# ── chlorides ─────────────────────────────────────────────────────────────────
analizar_variable(dat$chlorides, "chlorides", "g/L")
## = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = 
##   VARIABLE: chlorides 
## = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = 
## 
## ── MEDIDAS DE TENDENCIA CENTRAL ──
##   Media:        0.0875 g/L 
##   Mediana:      0.079 g/L 
##   Moda:         0.08 g/L 
## 
## ── MEDIDAS DE DISPERSIÓN ──
##   Mínimo:       0.012 g/L 
##   Máximo:       0.611 g/L 
##   Rango:        0.599 g/L 
##   Q1 (25%):     0.07 g/L 
##   Q3 (75%):     0.09 g/L 
##   IQR:          0.02 g/L 
##   Varianza:     0.0022 
##   Desv. Est.:   0.0471 g/L 
##   Coef. Var.:   53.81 %
## 
## ── FORMA DE LA DISTRIBUCIÓN ──
##   Asimetría (skewness): 5.67 
##   Curtosis  (kurtosis): 41.526 
## 
## ── PERCENTILES ──
##     5%    10%    25%    50%    75%    90%    95% 
## 0.0540 0.0600 0.0700 0.0790 0.0900 0.1090 0.1261
graficar_variable(dat$chlorides, "chlorides", "#2980B9")

## TableGrob (2 x 2) "arrange": 4 grobs
##   z     cells    name           grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (1-1,2-2) arrange gtable[layout]
## 3 3 (2-2,1-1) arrange gtable[layout]
## 4 4 (2-2,2-2) arrange gtable[layout]
# ── free sulfur dioxide ───────────────────────────────────────────────────────
analizar_variable(dat$`free sulfur dioxide`, "free sulfur dioxide", "mg/L")
## = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = 
##   VARIABLE: free sulfur dioxide 
## = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = 
## 
## ── MEDIDAS DE TENDENCIA CENTRAL ──
##   Media:        15.8749 mg/L 
##   Mediana:      14 mg/L 
##   Moda:         6 mg/L 
## 
## ── MEDIDAS DE DISPERSIÓN ──
##   Mínimo:       1 mg/L 
##   Máximo:       72 mg/L 
##   Rango:        71 mg/L 
##   Q1 (25%):     7 mg/L 
##   Q3 (75%):     21 mg/L 
##   IQR:          14 mg/L 
##   Varianza:     109.4149 
##   Desv. Est.:   10.4602 mg/L 
##   Coef. Var.:   65.89 %
## 
## ── FORMA DE LA DISTRIBUCIÓN ──
##   Asimetría (skewness): 1.248 
##   Curtosis  (kurtosis): 2.007 
## 
## ── PERCENTILES ──
##  5% 10% 25% 50% 75% 90% 95% 
##   4   5   7  14  21  31  35
graficar_variable(dat$`free sulfur dioxide`, "free sulfur dioxide", "#8E44AD")

## TableGrob (2 x 2) "arrange": 4 grobs
##   z     cells    name           grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (1-1,2-2) arrange gtable[layout]
## 3 3 (2-2,1-1) arrange gtable[layout]
## 4 4 (2-2,2-2) arrange gtable[layout]
# ── total sulfur dioxide ──────────────────────────────────────────────────────
analizar_variable(dat$`total sulfur dioxide`, "total sulfur dioxide", "mg/L")
## = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = 
##   VARIABLE: total sulfur dioxide 
## = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = 
## 
## ── MEDIDAS DE TENDENCIA CENTRAL ──
##   Media:        46.4678 mg/L 
##   Mediana:      38 mg/L 
##   Moda:         28 mg/L 
## 
## ── MEDIDAS DE DISPERSIÓN ──
##   Mínimo:       6 mg/L 
##   Máximo:       289 mg/L 
##   Rango:        283 mg/L 
##   Q1 (25%):     22 mg/L 
##   Q3 (75%):     62 mg/L 
##   IQR:          40 mg/L 
##   Varianza:     1082.102 
##   Desv. Est.:   32.8953 mg/L 
##   Coef. Var.:   70.79 %
## 
## ── FORMA DE LA DISTRIBUCIÓN ──
##   Asimetría (skewness): 1.513 
##   Curtosis  (kurtosis): 3.786 
## 
## ── PERCENTILES ──
##    5%   10%   25%   50%   75%   90%   95% 
##  11.0  14.0  22.0  38.0  62.0  93.2 112.1
graficar_variable(dat$`total sulfur dioxide`, "total sulfur dioxide", "#D35400")

## TableGrob (2 x 2) "arrange": 4 grobs
##   z     cells    name           grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (1-1,2-2) arrange gtable[layout]
## 3 3 (2-2,1-1) arrange gtable[layout]
## 4 4 (2-2,2-2) arrange gtable[layout]
# ── density ───────────────────────────────────────────────────────────────────
analizar_variable(dat$density, "density", "g/cm3")
## = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = 
##   VARIABLE: density 
## = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = 
## 
## ── MEDIDAS DE TENDENCIA CENTRAL ──
##   Media:        0.9967 g/cm3 
##   Mediana:      0.9968 g/cm3 
##   Moda:         0.9972 g/cm3 
## 
## ── MEDIDAS DE DISPERSIÓN ──
##   Mínimo:       0.9901 g/cm3 
##   Máximo:       1.0037 g/cm3 
##   Rango:        0.0136 g/cm3 
##   Q1 (25%):     0.9956 g/cm3 
##   Q3 (75%):     0.9978 g/cm3 
##   IQR:          0.0022 g/cm3 
##   Varianza:     0 
##   Desv. Est.:   0.0019 g/cm3 
##   Coef. Var.:   0.19 %
## 
## ── FORMA DE LA DISTRIBUCIÓN ──
##   Asimetría (skewness): 0.071 
##   Curtosis  (kurtosis): 0.923 
## 
## ── PERCENTILES ──
##     5%    10%    25%    50%    75%    90%    95% 
## 0.9936 0.9946 0.9956 0.9968 0.9978 0.9991 1.0000
graficar_variable(dat$density, "density", "#1ABC9C")

## TableGrob (2 x 2) "arrange": 4 grobs
##   z     cells    name           grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (1-1,2-2) arrange gtable[layout]
## 3 3 (2-2,1-1) arrange gtable[layout]
## 4 4 (2-2,2-2) arrange gtable[layout]
# ── pH ────────────────────────────────────────────────────────────────────────
analizar_variable(dat$pH, "pH", "")
## = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = 
##   VARIABLE: pH 
## = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = 
## 
## ── MEDIDAS DE TENDENCIA CENTRAL ──
##   Media:        3.3111  
##   Mediana:      3.31  
##   Moda:         3.3  
## 
## ── MEDIDAS DE DISPERSIÓN ──
##   Mínimo:       2.74  
##   Máximo:       4.01  
##   Rango:        1.27  
##   Q1 (25%):     3.21  
##   Q3 (75%):     3.4  
##   IQR:          0.19  
##   Varianza:     0.0238 
##   Desv. Est.:   0.1544  
##   Coef. Var.:   4.66 %
## 
## ── FORMA DE LA DISTRIBUCIÓN ──
##   Asimetría (skewness): 0.193 
##   Curtosis  (kurtosis): 0.796 
## 
## ── PERCENTILES ──
##   5%  10%  25%  50%  75%  90%  95% 
## 3.06 3.12 3.21 3.31 3.40 3.51 3.57
graficar_variable(dat$pH, "pH", "#2ECC71")

## TableGrob (2 x 2) "arrange": 4 grobs
##   z     cells    name           grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (1-1,2-2) arrange gtable[layout]
## 3 3 (2-2,1-1) arrange gtable[layout]
## 4 4 (2-2,2-2) arrange gtable[layout]
# ── sulphates ─────────────────────────────────────────────────────────────────
analizar_variable(dat$sulphates, "sulphates", "g/L")
## = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = 
##   VARIABLE: sulphates 
## = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = 
## 
## ── MEDIDAS DE TENDENCIA CENTRAL ──
##   Media:        0.6581 g/L 
##   Mediana:      0.62 g/L 
##   Moda:         0.6 g/L 
## 
## ── MEDIDAS DE DISPERSIÓN ──
##   Mínimo:       0.33 g/L 
##   Máximo:       2 g/L 
##   Rango:        1.67 g/L 
##   Q1 (25%):     0.55 g/L 
##   Q3 (75%):     0.73 g/L 
##   IQR:          0.18 g/L 
##   Varianza:     0.0287 
##   Desv. Est.:   0.1695 g/L 
##   Coef. Var.:   25.76 %
## 
## ── FORMA DE LA DISTRIBUCIÓN ──
##   Asimetría (skewness): 2.424 
##   Curtosis  (kurtosis): 11.662 
## 
## ── PERCENTILES ──
##   5%  10%  25%  50%  75%  90%  95% 
## 0.47 0.50 0.55 0.62 0.73 0.85 0.93
graficar_variable(dat$sulphates, "sulphates", "#E74C3C")

## TableGrob (2 x 2) "arrange": 4 grobs
##   z     cells    name           grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (1-1,2-2) arrange gtable[layout]
## 3 3 (2-2,1-1) arrange gtable[layout]
## 4 4 (2-2,2-2) arrange gtable[layout]
# ── alcohol ───────────────────────────────────────────────────────────────────
analizar_variable(dat$alcohol, "alcohol", "% vol")
## = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = 
##   VARIABLE: alcohol 
## = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = 
## 
## ── MEDIDAS DE TENDENCIA CENTRAL ──
##   Media:        10.423 % vol 
##   Mediana:      10.2 % vol 
##   Moda:         9.5 % vol 
## 
## ── MEDIDAS DE DISPERSIÓN ──
##   Mínimo:       8.4 % vol 
##   Máximo:       14.9 % vol 
##   Rango:        6.5 % vol 
##   Q1 (25%):     9.5 % vol 
##   Q3 (75%):     11.1 % vol 
##   IQR:          1.6 % vol 
##   Varianza:     1.1356 
##   Desv. Est.:   1.0657 % vol 
##   Coef. Var.:   10.22 %
## 
## ── FORMA DE LA DISTRIBUCIÓN ──
##   Asimetría (skewness): 0.859 
##   Curtosis  (kurtosis): 0.192 
## 
## ── PERCENTILES ──
##   5%  10%  25%  50%  75%  90%  95% 
##  9.2  9.3  9.5 10.2 11.1 12.0 12.5
graficar_variable(dat$alcohol, "alcohol", "#9B59B6")

## TableGrob (2 x 2) "arrange": 4 grobs
##   z     cells    name           grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (1-1,2-2) arrange gtable[layout]
## 3 3 (2-2,1-1) arrange gtable[layout]
## 4 4 (2-2,2-2) arrange gtable[layout]
# ── quality ───────────────────────────────────────────────────────────────────
cat(rep("=", 60), "\n")
## = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
cat("  VARIABLE: quality\n")
##   VARIABLE: quality
cat(rep("=", 60), "\n\n")
## = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = = =
cat("Tabla de frecuencias:\n")
## Tabla de frecuencias:
print(table(dat$quality))
## 
##   3   4   5   6   7   8 
##  10  53 681 638 199  18
cat("\nProporciones (%):\n")
## 
## Proporciones (%):
print(round(prop.table(table(dat$quality)) * 100, 2))
## 
##     3     4     5     6     7     8 
##  0.63  3.31 42.59 39.90 12.45  1.13
dat$quality_factor <- factor(dat$quality)

p1 <- ggplot(dat, aes(x = quality_factor, fill = quality_factor)) +
  geom_bar(color = "white", alpha = 0.9) +
  scale_fill_brewer(palette = "RdYlGn") +
  labs(title = "Distribución de Calidad", x = "Calidad", y = "Frecuencia") +
  theme_minimal()

p2 <- ggplot(dat, aes(x = quality_factor,
                      y = after_stat(count) / sum(after_stat(count)),
                      fill = quality_factor)) +
  geom_bar(color = "white", alpha = 0.9) +
  scale_fill_brewer(palette = "RdYlGn") +
  scale_y_continuous(labels = scales::percent) +
  labs(title = "Proporción de Calidad", x = "Calidad", y = "Proporción") +
  theme_minimal()

print(grid.arrange(p1, p2, ncol = 2))

## TableGrob (1 x 2) "arrange": 2 grobs
##   z     cells    name           grob
## 1 1 (1-1,1-1) arrange gtable[layout]
## 2 2 (1-1,2-2) arrange gtable[layout]
# =============================================================================
# PASO 5: RESUMEN ESTADÍSTICO GENERAL
# =============================================================================
summary(dat[, 1:12])
##  fixed acidity   volatile acidity  citric acid    residual sugar  
##  Min.   : 4.60   Min.   :0.1200   Min.   :0.000   Min.   : 0.900  
##  1st Qu.: 7.10   1st Qu.:0.3900   1st Qu.:0.090   1st Qu.: 1.900  
##  Median : 7.90   Median :0.5200   Median :0.260   Median : 2.200  
##  Mean   : 8.32   Mean   :0.5278   Mean   :0.271   Mean   : 2.539  
##  3rd Qu.: 9.20   3rd Qu.:0.6400   3rd Qu.:0.420   3rd Qu.: 2.600  
##  Max.   :15.90   Max.   :1.5800   Max.   :1.000   Max.   :15.500  
##    chlorides       free sulfur dioxide total sulfur dioxide    density      
##  Min.   :0.01200   Min.   : 1.00       Min.   :  6.00       Min.   :0.9901  
##  1st Qu.:0.07000   1st Qu.: 7.00       1st Qu.: 22.00       1st Qu.:0.9956  
##  Median :0.07900   Median :14.00       Median : 38.00       Median :0.9968  
##  Mean   :0.08747   Mean   :15.87       Mean   : 46.47       Mean   :0.9967  
##  3rd Qu.:0.09000   3rd Qu.:21.00       3rd Qu.: 62.00       3rd Qu.:0.9978  
##  Max.   :0.61100   Max.   :72.00       Max.   :289.00       Max.   :1.0037  
##        pH          sulphates         alcohol         quality     
##  Min.   :2.740   Min.   :0.3300   Min.   : 8.40   Min.   :3.000  
##  1st Qu.:3.210   1st Qu.:0.5500   1st Qu.: 9.50   1st Qu.:5.000  
##  Median :3.310   Median :0.6200   Median :10.20   Median :6.000  
##  Mean   :3.311   Mean   :0.6581   Mean   :10.42   Mean   :5.636  
##  3rd Qu.:3.400   3rd Qu.:0.7300   3rd Qu.:11.10   3rd Qu.:6.000  
##  Max.   :4.010   Max.   :2.0000   Max.   :14.90   Max.   :8.000
round(stat.desc(dat[, 1:11], norm = TRUE), 3)
##              fixed acidity volatile acidity citric acid residual sugar
## nbr.val           1599.000         1599.000    1599.000       1599.000
## nbr.null             0.000            0.000     132.000          0.000
## nbr.na               0.000            0.000       0.000          0.000
## min                  4.600            0.120       0.000          0.900
## max                 15.900            1.580       1.000         15.500
## range               11.300            1.460       1.000         14.600
## sum              13303.100          843.985     433.290       4059.550
## median               7.900            0.520       0.260          2.200
## mean                 8.320            0.528       0.271          2.539
## SE.mean              0.044            0.004       0.005          0.035
## CI.mean.0.95         0.085            0.009       0.010          0.069
## var                  3.031            0.032       0.038          1.988
## std.dev              1.741            0.179       0.195          1.410
## coef.var             0.209            0.339       0.719          0.555
## skewness             0.981            0.670       0.318          4.532
## skew.2SE             8.014            5.477       2.596         37.028
## kurtosis             1.120            1.213      -0.793         28.485
## kurt.2SE             4.577            4.957      -3.242        116.435
## normtest.W           0.942            0.974       0.955          0.566
## normtest.p           0.000            0.000       0.000          0.000
##              chlorides free sulfur dioxide total sulfur dioxide  density
## nbr.val       1599.000            1599.000             1599.000 1599.000
## nbr.null         0.000               0.000                0.000    0.000
## nbr.na           0.000               0.000                0.000    0.000
## min              0.012               1.000                6.000    0.990
## max              0.611              72.000              289.000    1.004
## range            0.599              71.000              283.000    0.014
## sum            139.859           25384.000            74302.000 1593.798
## median           0.079              14.000               38.000    0.997
## mean             0.087              15.875               46.468    0.997
## SE.mean          0.001               0.262                0.823    0.000
## CI.mean.0.95     0.002               0.513                1.614    0.000
## var              0.002             109.415             1082.102    0.000
## std.dev          0.047              10.460               32.895    0.002
## coef.var         0.538               0.659                0.708    0.002
## skewness         5.670               1.248                1.513    0.071
## skew.2SE        46.322              10.198               12.359    0.581
## kurtosis        41.526               2.007                3.786    0.923
## kurt.2SE       169.740               8.205               15.474    3.771
## normtest.W       0.484               0.902                0.873    0.991
## normtest.p       0.000               0.000                0.000    0.000
##                    pH sulphates   alcohol
## nbr.val      1599.000  1599.000  1599.000
## nbr.null        0.000     0.000     0.000
## nbr.na          0.000     0.000     0.000
## min             2.740     0.330     8.400
## max             4.010     2.000    14.900
## range           1.270     1.670     6.500
## sum          5294.470  1052.380 16666.350
## median          3.310     0.620    10.200
## mean            3.311     0.658    10.423
## SE.mean         0.004     0.004     0.027
## CI.mean.0.95    0.008     0.008     0.052
## var             0.024     0.029     1.136
## std.dev         0.154     0.170     1.066
## coef.var        0.047     0.258     0.102
## skewness        0.193     2.424     0.859
## skew.2SE        1.579    19.805     7.020
## kurtosis        0.796    11.662     0.192
## kurt.2SE        3.253    47.667     0.783
## normtest.W      0.993     0.833     0.929
## normtest.p      0.000     0.000     0.000
# =============================================================================
# PASO 6: ANÁLISIS MULTIVARIADO
# =============================================================================

# ── Coeficientes de variación comparados ──────────────────────────────────────
cv_tabla <- data.frame(
  Variable = names(dat)[1:11],
  CV_pct   = sapply(dat[, 1:11], function(x) round(sd(x) / mean(x) * 100, 2))
)
cv_tabla <- cv_tabla[order(cv_tabla$CV_pct, decreasing = TRUE), ]
print(cv_tabla)
##                                  Variable CV_pct
## citric acid                   citric acid  71.89
## total sulfur dioxide total sulfur dioxide  70.79
## free sulfur dioxide   free sulfur dioxide  65.89
## residual sugar             residual sugar  55.54
## chlorides                       chlorides  53.81
## volatile acidity         volatile acidity  33.92
## sulphates                       sulphates  25.76
## fixed acidity               fixed acidity  20.93
## alcohol                           alcohol  10.22
## pH                                     pH   4.66
## density                           density   0.19
print(
  ggplot(cv_tabla, aes(x = reorder(Variable, CV_pct), y = CV_pct, fill = CV_pct)) +
    geom_col(show.legend = FALSE, alpha = 0.85) +
    geom_text(aes(label = paste0(CV_pct, "%")), hjust = -0.1, size = 3.5) +
    coord_flip() +
    scale_fill_gradient(low = "#3498DB", high = "#C0392B") +
    labs(title = "Coeficiente de Variación por Variable",
         x = "Variable", y = "CV (%)") +
    theme_minimal()
)

# ── Matriz de correlación ─────────────────────────────────────────────────────
matriz_cor <- cor(dat[, 1:12])
print(round(matriz_cor, 3))
##                      fixed acidity volatile acidity citric acid residual sugar
## fixed acidity                1.000           -0.256       0.672          0.115
## volatile acidity            -0.256            1.000      -0.552          0.002
## citric acid                  0.672           -0.552       1.000          0.144
## residual sugar               0.115            0.002       0.144          1.000
## chlorides                    0.094            0.061       0.204          0.056
## free sulfur dioxide         -0.154           -0.011      -0.061          0.187
## total sulfur dioxide        -0.113            0.076       0.036          0.203
## density                      0.668            0.022       0.365          0.355
## pH                          -0.683            0.235      -0.542         -0.086
## sulphates                    0.183           -0.261       0.313          0.006
## alcohol                     -0.062           -0.202       0.110          0.042
## quality                      0.124           -0.391       0.226          0.014
##                      chlorides free sulfur dioxide total sulfur dioxide density
## fixed acidity            0.094              -0.154               -0.113   0.668
## volatile acidity         0.061              -0.011                0.076   0.022
## citric acid              0.204              -0.061                0.036   0.365
## residual sugar           0.056               0.187                0.203   0.355
## chlorides                1.000               0.006                0.047   0.201
## free sulfur dioxide      0.006               1.000                0.668  -0.022
## total sulfur dioxide     0.047               0.668                1.000   0.071
## density                  0.201              -0.022                0.071   1.000
## pH                      -0.265               0.070               -0.066  -0.342
## sulphates                0.371               0.052                0.043   0.149
## alcohol                 -0.221              -0.069               -0.206  -0.496
## quality                 -0.129              -0.051               -0.185  -0.175
##                          pH sulphates alcohol quality
## fixed acidity        -0.683     0.183  -0.062   0.124
## volatile acidity      0.235    -0.261  -0.202  -0.391
## citric acid          -0.542     0.313   0.110   0.226
## residual sugar       -0.086     0.006   0.042   0.014
## chlorides            -0.265     0.371  -0.221  -0.129
## free sulfur dioxide   0.070     0.052  -0.069  -0.051
## total sulfur dioxide -0.066     0.043  -0.206  -0.185
## density              -0.342     0.149  -0.496  -0.175
## pH                    1.000    -0.197   0.206  -0.058
## sulphates            -0.197     1.000   0.094   0.251
## alcohol               0.206     0.094   1.000   0.476
## quality              -0.058     0.251   0.476   1.000
corrplot(matriz_cor,
         method      = "color",
         type        = "upper",
         tl.col      = "black",
         tl.srt      = 45,
         addCoef.col = "black",
         number.cex  = 0.60,
         col         = colorRampPalette(c("#3498DB", "white", "#C0392B"))(200),
         title       = "Matriz de Correlación - Vino Tinto",
         mar         = c(0, 0, 1, 0))

# ── Calidad vs variables clave ────────────────────────────────────────────────
vars_clave <- c("alcohol", "volatile acidity", "sulphates", "citric acid")

plots_cal <- lapply(vars_clave, function(v) {
  df_p <- data.frame(cal = dat$quality_factor, val = dat[[v]])
  ggplot(df_p, aes(x = cal, y = val, fill = cal)) +
    geom_boxplot(alpha = 0.8, show.legend = FALSE) +
    scale_fill_brewer(palette = "RdYlGn") +
    labs(title = v, x = "Calidad", y = v) +
    theme_minimal(base_size = 10)
})

print(grid.arrange(grobs = plots_cal, ncol = 2,
                   top = "Variables clave vs Calidad"))

## TableGrob (3 x 2) "arrange": 5 grobs
##   z     cells    name                 grob
## 1 1 (2-2,1-1) arrange       gtable[layout]
## 2 2 (2-2,2-2) arrange       gtable[layout]
## 3 3 (3-3,1-1) arrange       gtable[layout]
## 4 4 (3-3,2-2) arrange       gtable[layout]
## 5 5 (1-1,1-2) arrange text[GRID.text.2207]