In the following code hunk, I am importing my data.
dat <- tibble(read_excel(rute))
# Data from:
# S. Aeberhard, D. Coomans and O. de Vel, Comparison of Classifiers in High Dimensional Settings, Tech. Rep. no. 92-02, (1992), Dept. of Computer Science and Dept. of Mathematics and Statistics, James Cook University of North Queensland.
In the first visualization, I will create an interactive boxplot for the alcohol data of each wine type (wine type on the x-axis and alcohol value on the y-axis).
fig_dat1<-dat %>% select(Wine,Alcohol)
fig_dat1
## # A tibble: 178 × 2
## Wine Alcohol
## <chr> <dbl>
## 1 Wine A 14.2
## 2 Wine A 13.2
## 3 Wine A 13.2
## 4 Wine A 14.4
## 5 Wine A 13.2
## 6 Wine A 14.2
## 7 Wine A 14.4
## 8 Wine A 14.1
## 9 Wine A 14.8
## 10 Wine A 13.9
## # ℹ 168 more rows
cajas_bigotes <- ggplot(fig_dat1,
aes(x=Wine, y=Alcohol,
fill=Wine))+
geom_boxplot(color="black")+
scale_fill_manual(values=c("#FFB6C1",
"#98FB98",
"#ADD8E6"),
name="Wine type")+
labs(title="Alcohol Percentage for Each Type of Wine", x="Wine type")+
theme_classic()+
theme(plot.title = element_text(hjust = 0.5, size = 14))
ggplotly(cajas_bigotes)
In the second visualization with overlaid bar charts, the mean of the number of flavonoids and non-flavonoid phenols for each wine type will be assessed (x-axis representing wine types and y-axis displaying the corresponding values).
fig_dat2<- dat %>% select(Wine, Flavanoids, Nonflavanoid.phenols)
fig_dat2
## # A tibble: 178 × 3
## Wine Flavanoids Nonflavanoid.phenols
## <chr> <dbl> <dbl>
## 1 Wine A 3.06 0.28
## 2 Wine A 2.76 0.26
## 3 Wine A 3.24 0.3
## 4 Wine A 3.49 0.24
## 5 Wine A 2.69 0.39
## 6 Wine A 3.39 0.34
## 7 Wine A 2.52 0.3
## 8 Wine A 2.51 0.31
## 9 Wine A 2.98 0.29
## 10 Wine A 3.15 0.22
## # ℹ 168 more rows
Flavonoides_y_noflavonoides <- ggplot(fig_dat2, aes(x=Wine)) +
geom_bar(aes(y = Flavanoids, fill = "Flavonoids"), stat = "identity") +
geom_bar(aes(y = Nonflavanoid.phenols, fill = "Non flavonoids"), stat = "identity") +
scale_fill_manual(values = c("Flavonoids" = "#FFA550", "Non flavonoids" = "#FF0200"),
name = "Content of...") +
labs(title = "Content of flavonoids and non flavonoids in the wine",
x="Wine type", y="Content of flavonoids and non flavonoids")+
theme_classic()+
theme(plot.title = element_text(hjust = 0.5, size = 14))
Flavonoides_y_noflavonoides
In the third graph, I will create a scatter plot to assess the relationship between hue (x-axis) and color (y-axis).
fig_dat3<-dat %>% select(Wine, Color.int, Hue)
fig_dat3
## # A tibble: 178 × 3
## Wine Color.int Hue
## <chr> <dbl> <dbl>
## 1 Wine A 5.64 1.04
## 2 Wine A 4.38 1.05
## 3 Wine A 5.68 1.03
## 4 Wine A 7.8 0.86
## 5 Wine A 4.32 1.04
## 6 Wine A 6.75 1.05
## 7 Wine A 5.25 1.02
## 8 Wine A 5.05 1.06
## 9 Wine A 5.2 1.08
## 10 Wine A 7.22 1.01
## # ℹ 168 more rows
Hueandcolor <- ggplot(fig_dat3,
aes(x = Hue, y = Color.int, color=Wine, fill= Wine))+
geom_point() +
scale_fill_manual(values=c("#FFB6C1",
"#98FB98",
"#ADD8E6"),
name="Wine type")+
stat_smooth(color= "black")+
labs(title="Color intensity and Hue of wine")+
theme_classic()+
xlab("Hue") + ylab("Color intensity")+
theme(plot.title = element_text(hjust = 0.5, size = 14))
Hueandcolor
In the fourth graph, I want to create a ridgeline plot of the Mg data for each type of wine (ordered on the x-axis from minimum to maximum Mg).
fig_dat4<-dat %>% select(Wine, Mg)
fig_dat4
## # A tibble: 178 × 2
## Wine Mg
## <chr> <dbl>
## 1 Wine A 127
## 2 Wine A 100
## 3 Wine A 101
## 4 Wine A 113
## 5 Wine A 118
## 6 Wine A 112
## 7 Wine A 96
## 8 Wine A 121
## 9 Wine A 97
## 10 Wine A 98
## # ℹ 168 more rows
orden_deseado <- c("Wine A", "Wine C", "Wine B")
fig_dat4$Wine <- factor(fig_dat4$Wine, levels = orden_deseado)
Mg <- ggplot(fig_dat4, aes(x = Mg, y = Wine, fill = ..x..)) +
geom_density_ridges_gradient(scale = 1.5, rel_min_height = 0.01) +
scale_fill_viridis(name = "Mg", option = "H") +
labs(title = 'Mg of each wine type') +
theme_classic() +
theme(
legend.position="none",
panel.spacing = unit(0.1, "lines"),
strip.text.x = element_text(size = 8))+
theme(plot.title = element_text(hjust = 0.5, size = 14))
Mg
The fifth figure I want to create is a scatter plot where the x-axis represents the phenol content, the y-axis represents the Acl content, the points are colored based on the type of wine, and their size is a measure of malic acid.
fig_dat5<-dat %>% select(Wine, Phenols, Acl, Malic.acid)
fig_dat5
## # A tibble: 178 × 4
## Wine Phenols Acl Malic.acid
## <chr> <dbl> <dbl> <dbl>
## 1 Wine A 2.8 15.6 1.71
## 2 Wine A 2.65 11.2 1.78
## 3 Wine A 2.8 18.6 2.36
## 4 Wine A 3.85 16.8 1.95
## 5 Wine A 2.8 21 2.59
## 6 Wine A 3.27 15.2 1.76
## 7 Wine A 2.5 14.6 1.87
## 8 Wine A 2.6 17.6 2.15
## 9 Wine A 2.8 14 1.64
## 10 Wine A 2.98 16 1.35
## # ℹ 168 more rows
phenols_Acl_Malic.acid <- ggplot(fig_dat5, aes(x=Phenols, y=Acl,
color=Wine, size=Malic.acid)) +
geom_point() +
theme(legend.position="none")+
scale_fill_manual(values=c("#FFB6C1",
"#98FB98",
"#ADD8E6"),
name="Wine type")+
labs(title="Acl, Phenols, Malic acid content, and Wine type study")+
theme_classic()+
xlab("Phenols") + ylab("Acl")+
theme(plot.title = element_text(hjust = 0.5, size = 14))
phenols_Acl_Malic.acid
In the sixth plot, I want to create a violin plot for each type of wine, representing the Ash composition. I want the individual data to be displayed, and the mean to be represented by a yellow diamond.
fig_dat6<-dat %>% select(Wine,Ash)
fig_dat6
## # A tibble: 178 × 2
## Wine Ash
## <chr> <dbl>
## 1 Wine A 2.43
## 2 Wine A 2.14
## 3 Wine A 2.67
## 4 Wine A 2.5
## 5 Wine A 2.87
## 6 Wine A 2.45
## 7 Wine A 2.45
## 8 Wine A 2.61
## 9 Wine A 2.17
## 10 Wine A 2.27
## # ℹ 168 more rows
Ashes <- ggplot(fig_dat6, aes(x=Wine, y=Ash, fill=Wine)) +
geom_violin()+
geom_jitter(shape=16, position=position_jitter(0.2), color="black")+
stat_summary(fun.y=mean, geom="point", shape=23, size=4, fill="yellow")+
scale_fill_manual(values=c("#FFB6C1",
"#98FB98",
"#ADD8E6"),
name="Wine type")+
theme_classic()+
xlab("Wine type") + ylab("Ash")+
labs(title="Ash content distribution of each type of Wine")+
theme(plot.title = element_text(hjust = 0.5, size = 14))
Ashes
In the seventh plot, I want to perform a correlation map
dat_corr <- dat %>%
rename(Color = Color.int, Non.flavonoids = Nonflavanoid.phenols)
Correlation <- ggcorr(dat_corr, method = c("everything", "pearson"),
geom = "circle",
hjust = 0.7, size = 3,
layout.exp = 1)+
labs(title = 'Correlation of each pair of variable as a square') +
theme(plot.title = element_text(hjust = 0.5, size = 14))
Correlation
In the eighth plot, I want to perform a PCA on all the data, grouping them by wine type.
my_pca <- PCA(dat[c(2:14)], graph=TRUE)
Caso <- dat$Wine
confidence <- 0.95
biplot_Wine <- fviz_pca_biplot(my_pca, geom="point",
col.ind= Caso, invisible="var",
addEllipses=TRUE, ellipse.level=confidence)+
theme(plot.title = element_text(hjust = 0.5, size = 14, face = "bold"))+
ggtitle("PCA for all wine data")
biplot_Wine