# Libraires
library(ggplot2)
The red wine quality dataset is a collection of data related to red variants of Portuguese “Vinho Verde” wine. It provides valuable insights into the physicochemical attributes of these wines and their corresponding sensory quality ratings.
Source: Kaggle (www.kaggle.com)
It consists of variables such as:
Fixed Acidity: The concentration of non-volatile acids in the wine.
Volatile Acidity: The concentration of acetic acid in the wine.
Citric Acid: The amount of citric acid in the wine.
Residual Sugar: The remaining sugar content in the wine after fermentation.
Chlorides: The concentration of salts (chlorides) in the wine.
Free Sulfur Dioxide: The level of free sulfur dioxide, which serves as an antimicrobial and antioxidant agent.
Total Sulfur Dioxide: The total amount of sulfur dioxide, including both free and bound forms.
Density: The density of the wine, which is related to its alcohol content and sweetness.
pH: A measure of the wine’s acidity or alkalinity.
Sulphates: The concentration of sulfur dioxide. Alcohol: The alcohol content of the wine.
setwd("/Users/olix/Desktop/Advanced Visualization/archive")
getwd()
## [1] "/Users/olix/Desktop/Advanced Visualization/archive"
# Import database
wines <- read.csv('winequality-red.csv',sep=',',dec='.')
head(wines)
## fixed.acidity volatile.acidity citric.acid residual.sugar chlorides
## 1 7.4 0.70 0.00 1.9 0.076
## 2 7.8 0.88 0.00 2.6 0.098
## 3 7.8 0.76 0.04 2.3 0.092
## 4 11.2 0.28 0.56 1.9 0.075
## 5 7.4 0.70 0.00 1.9 0.076
## 6 7.4 0.66 0.00 1.8 0.075
## free.sulfur.dioxide total.sulfur.dioxide density pH sulphates alcohol
## 1 11 34 0.9978 3.51 0.56 9.4
## 2 25 67 0.9968 3.20 0.68 9.8
## 3 15 54 0.9970 3.26 0.65 9.8
## 4 17 60 0.9980 3.16 0.58 9.8
## 5 11 34 0.9978 3.51 0.56 9.4
## 6 13 40 0.9978 3.51 0.56 9.4
## quality
## 1 5
## 2 5
## 3 5
## 4 6
## 5 5
## 6 5
str(wines)
## 'data.frame': 1599 obs. of 12 variables:
## $ fixed.acidity : num 7.4 7.8 7.8 11.2 7.4 7.4 7.9 7.3 7.8 7.5 ...
## $ volatile.acidity : num 0.7 0.88 0.76 0.28 0.7 0.66 0.6 0.65 0.58 0.5 ...
## $ citric.acid : num 0 0 0.04 0.56 0 0 0.06 0 0.02 0.36 ...
## $ residual.sugar : num 1.9 2.6 2.3 1.9 1.9 1.8 1.6 1.2 2 6.1 ...
## $ chlorides : num 0.076 0.098 0.092 0.075 0.076 0.075 0.069 0.065 0.073 0.071 ...
## $ free.sulfur.dioxide : num 11 25 15 17 11 13 15 15 9 17 ...
## $ total.sulfur.dioxide: num 34 67 54 60 34 40 59 21 18 102 ...
## $ density : num 0.998 0.997 0.997 0.998 0.998 ...
## $ pH : num 3.51 3.2 3.26 3.16 3.51 3.51 3.3 3.39 3.36 3.35 ...
## $ sulphates : num 0.56 0.68 0.65 0.58 0.56 0.56 0.46 0.47 0.57 0.8 ...
## $ alcohol : num 9.4 9.8 9.8 9.8 9.4 9.4 9.4 10 9.5 10.5 ...
## $ quality : int 5 5 5 6 5 5 5 7 7 5 ...
p <- ggplot(data = wines, aes(x = alcohol, y = quality)) +
geom_point(aes(color = quality), size = 3, alpha = 0.6) +
labs(x = "Alcohol by volume",
y = "Quality",
title = "Quality vs Alcohol by Volume") +
scale_color_gradient(low = "lightgreen", high = "darkgreen")
# customize the appearance of the plot
p1 <- p +
theme_minimal() +
theme(
plot.title = element_text(color = 'darkgreen', size = 15, face = 'bold'),
axis.title = element_text(face = "bold", color = "darkgreen"),
axis.text = element_text(size = 12),
axis.line = element_line(color = "black", size = 0.5),
panel.grid.major = element_line(size = 0.5, colour = "lightgreen"),
panel.grid.major.x = element_blank(),
panel.background = element_rect(fill = "white", colour = "white"),
plot.background = element_rect(fill = "aliceblue")
)
## Warning: The `size` argument of `element_line()` is deprecated as of ggplot2 3.4.0.
## ℹ Please use the `linewidth` argument instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# remove legend for the color scale
p1 <- p1 + guides(color = FALSE)
## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
p1
p <- ggplot(data = wines, aes(x = fixed.acidity, y = pH)) +
geom_point(aes(color = fixed.acidity), size = 3, alpha = 0.6) +
theme_minimal() +
labs(x = "Fixed Acidity",
y = "pH",
title = "Fixed Acidity vs pH") +
scale_color_gradient(low = "lightgreen", high = "darkgreen")
#the appearance of the plot
p1 <- p +
theme(
plot.title = element_text(color = 'darkgreen', size = 18, face = 'bold'),
axis.title = element_text(face = "bold", color = "darkgreen"),
axis.text = element_text(size = 12),
axis.line = element_line(color = "black", size = 0.5),
panel.grid.minor.y = element_line(size = 0.5, colour = "gray80"),
panel.background = element_rect(fill = "white", colour = "white"),
plot.background = element_rect(fill = "aliceblue")
)
# remove legend for the color scale
p1 <- p1 + guides(color = FALSE)
p1
p <- ggplot(data = wines, aes(x = alcohol, y = residual.sugar)) +
geom_point(aes(color = residual.sugar), size = 3, alpha = 0.6) +
labs(x = "Alcohol by volume",
y = "Residual Sugar",
title = "Relationship between Alcohol and Residual Sugar") +
scale_color_gradient(low = "lightgreen", high = "darkgreen")
# customize the appearance of the plot
p1 <- p +
theme_minimal() +
theme(
plot.title = element_text(color = 'darkgreen', size = 18, face = 'bold'),
axis.title = element_text(face = "bold", color = "darkgreen"),
axis.text = element_text(size = 12),
axis.line = element_line(color = "black", size = 0.5),
panel.grid.major = element_line(size = 0.5, colour = "lightgreen"), # Add grid lines
panel.grid.major.x = element_blank(), # Remove vertical grid lines
panel.background = element_rect(fill = "white", colour = "white"),
plot.background = element_rect(fill = "aliceblue")
)
# remove legend for the color scale
p1 <- p1 + guides(color = FALSE)
p1