library(tidyverse) library(ggplot2) library(corrplot) library(gridExtra) library(knitr) library(kableExtra) library(scales) library(RColorBrewer) library(GGally)



wine <- read.csv("winequality-red.csv", sep = ",", header = TRUE)


colnames(wine) <- c("fixed_acidity", "volatile_acidity", "citric_acid",
                    "residual_sugar", "chlorides", "free_sulfur_dioxide",
                    "total_sulfur_dioxide", "density", "pH",
                    "sulphates", "alcohol", "quality")


cat("Dimensiones del dataset:", nrow(wine), "filas x", ncol(wine), "columnas\n")

kable(head(wine, 8),
      caption = "Primeras 8 observaciones del dataset",
      digits = 3) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"),
                full_width = TRUE, font_size = 12)

var_desc <- data.frame(
  Variable = colnames(wine),
  Tipo = sapply(wine, class),
  Descripción = c(
    "Acidez fija (ácido tartárico, g/dm³)",
    "Acidez volátil (ácido acético, g/dm³)",
    "Ácido cítrico (g/dm³)",
    "Azúcar residual (g/dm³)",
    "Cloruros (NaCl, g/dm³)",
    "Dióxido de azufre libre (mg/dm³)",
    "Dióxido de azufre total (mg/dm³)",
    "Densidad (g/cm³)",
    "pH",
    "Sulfatos (g/dm³)",
    "Alcohol (% vol.)",
    "Calidad (puntuación 0–10)"
  )
)

kable(var_desc, caption = "Descripción de variables del dataset") %>%
  kable_styling(bootstrap_options = c("striped", "hover"),
                full_width = TRUE, font_size = 12)

desc_stats <- wine %>% summarise(across(everything(), list( N = ~n(), Media = ~round(mean(.), 3), Mediana= ~round(median(.), 3), DE = ~round(sd(.), 3), Min = ~round(min(.), 3), Q1 = ~round(quantile(., 0.25), 3), Q3 = ~round(quantile(., 0.75), 3), Max = ~round(max(.), 3) ))) %>% pivot_longer(everything(), names_to = c(“Variable”, “Estadístico”), names_sep = “_(?=[^_]+$)“) %>% pivot_wider(names_from =”Estadístico”, values_from = “value”)

kable(desc_stats, caption = “Estadísticas descriptivas por variable”) %>% kable_styling(bootstrap_options = c(“striped”, “hover”, “condensed”), full_width = TRUE, font_size = 11) ```

missing_df <- data.frame( Variable = colnames(wine), Valores_NA = sapply(wine, function(x) sum(is.na(x))), Porcentaje = paste0(round(sapply(wine, function(x) mean(is.na(x))) * 100, 2), “%”) )

kable(missing_df, caption = “Conteo de valores faltantes por variable”) %>% kable_styling(bootstrap_options = c(“striped”, “hover”), full_width = FALSE) %>% row_spec(which(missing_df$Valores_NA > 0), background = “#FFEB3B”)