Importamos la base de datos y la almacenamos en WINE_DB:
winequality.red <- read.csv("C:/Users/erick/OneDrive/Desktop/2-StatisticsProgramingForBusinessAnalytics/Clases/winequality-red.csv")
WINE_DB <- winequality.red
El análisis exploratorio de la calidad del vino se hará en base a 12 variables mencionadas a continuación:
## [1] "fixed.acidity" "volatile.acidity" "citric.acid"
## [4] "residual.sugar" "chlorides" "free.sulfur.dioxide"
## [7] "total.sulfur.dioxide" "density" "pH"
## [10] "sulphates" "alcohol" "quality"
Se codifica para obtener histogramas en el caso de variables numéricas continuas y diagramas de pie para variables de caracteres.
INDICE_NUM <- NULL
INDICE_CAR <- NULL
COLUMNA_W <- dim(WINE_DB)[2]
par(mfrow = c(1,2))
for ( i in 1:COLUMNA_W)
{
if(is.numeric(WINE_DB[ ,i]) == "TRUE")
{DIMENSION <- length(WINE_DB[ ,i])
N_INTERVALOS <- trunc(DIMENSION^0.5+.999)
RANGO <- diff(range(WINE_DB[ ,i]))
ANCHO = RANGO/N_INTERVALOS
PuntosDeCorte = min(WINE_DB[ ,i]) + (0:N_INTERVALOS)*ANCHO
texto1 <- paste ("HIST", colnames(WINE_DB)[i])
hist(WINE_DB[ ,i], breaks=PuntosDeCorte, col=i, main = texto1, xlab = colnames(WINE_DB)[i])
INDICE_NUM <- c(INDICE_NUM,i)}
else
{texto2 <- paste("PIE", colnames(WINE_DB)[i])
pie(table(WINE_DB[ ,i]), main = texto2)
INDICE_CAR <- c(INDICE_CAR,i)}
}
INDICE_NUM
## [1] 1 2 3 4 5 6 7 8 9 10 11 12
INDICE_CAR
## NULL
NOTA: Si bien se observa que no hay ninguna variable tipo caracter, se deja el código abierto para una variable de esas características en próximos análisis.
Se evaluará la data WINE_Q en relación a la calidad y a el grado de alcohol, así se analizará individualmente con las variables restantes para explorar la data.
ggplot(WINE_DB, aes(x = alcohol, y = fixed.acidity, color = alcohol)) +
geom_point() + facet_wrap(~quality)
ggplot(WINE_DB, aes(x = alcohol, y = volatile.acidity, color = alcohol)) +
geom_point() + facet_wrap(~quality)
ggplot(WINE_DB, aes(x = alcohol, y = citric.acid, color = alcohol)) +
geom_point() + facet_wrap(~quality)
ggplot(WINE_DB, aes(x = alcohol, y = residual.sugar, color = alcohol)) +
geom_point() + facet_wrap(~quality)
ggplot(WINE_DB, aes(x = alcohol, y = chlorides, color = alcohol)) +
geom_point() + facet_wrap(~quality)
ggplot(WINE_DB, aes(x = alcohol, y = free.sulfur.dioxide, color = alcohol)) +
geom_point() + facet_wrap(~quality)
ggplot(WINE_DB, aes(x = alcohol, y = total.sulfur.dioxide, color = alcohol)) +
geom_point() + facet_wrap(~quality)
ggplot(WINE_DB, aes(x = alcohol, y = density, color = alcohol)) +
geom_point() + facet_wrap(~quality)
ggplot(WINE_DB, aes(x = alcohol, y = pH, color = alcohol)) +
geom_point() + facet_wrap(~quality)
ggplot(WINE_DB, aes(x = alcohol, y = sulphates, color = alcohol)) +
geom_point() + facet_wrap(~quality)
—-FIN