Import Data

In the following code hunk, I am importing my data.

dat <- tibble(read_excel(rute))

# Data from: 
# S. Aeberhard, D. Coomans and O. de Vel, Comparison of Classifiers in High Dimensional Settings, Tech. Rep. no. 92-02, (1992), Dept. of Computer Science and Dept. of Mathematics and Statistics, James Cook University of North Queensland.

Part 1

In the first visualization, I will create an interactive boxplot for the alcohol data of each wine type (wine type on the x-axis and alcohol value on the y-axis).

fig_dat1<-dat %>% select(Wine,Alcohol)
fig_dat1
## # A tibble: 178 × 2
##    Wine   Alcohol
##    <chr>    <dbl>
##  1 Wine A    14.2
##  2 Wine A    13.2
##  3 Wine A    13.2
##  4 Wine A    14.4
##  5 Wine A    13.2
##  6 Wine A    14.2
##  7 Wine A    14.4
##  8 Wine A    14.1
##  9 Wine A    14.8
## 10 Wine A    13.9
## # ℹ 168 more rows
cajas_bigotes <- ggplot(fig_dat1, 
                        aes(x=Wine, y=Alcohol, 
                            fill=Wine))+
  geom_boxplot(color="black")+
  scale_fill_manual(values=c("#FFB6C1", 
                             "#98FB98", 
                             "#ADD8E6"),
                    name="Wine type")+
  labs(title="Alcohol Percentage for Each Type of Wine", x="Wine type")+
  theme_classic()+
  theme(plot.title = element_text(hjust = 0.5, size = 14))
  
  
ggplotly(cajas_bigotes)

Part 2

In the second visualization with overlaid bar charts, the mean of the number of flavonoids and non-flavonoid phenols for each wine type will be assessed (x-axis representing wine types and y-axis displaying the corresponding values).

fig_dat2<- dat %>% select(Wine, Flavanoids, Nonflavanoid.phenols)
fig_dat2
## # A tibble: 178 × 3
##    Wine   Flavanoids Nonflavanoid.phenols
##    <chr>       <dbl>                <dbl>
##  1 Wine A       3.06                 0.28
##  2 Wine A       2.76                 0.26
##  3 Wine A       3.24                 0.3 
##  4 Wine A       3.49                 0.24
##  5 Wine A       2.69                 0.39
##  6 Wine A       3.39                 0.34
##  7 Wine A       2.52                 0.3 
##  8 Wine A       2.51                 0.31
##  9 Wine A       2.98                 0.29
## 10 Wine A       3.15                 0.22
## # ℹ 168 more rows
Flavonoides_y_noflavonoides <- ggplot(fig_dat2, aes(x=Wine)) + 
  geom_bar(aes(y = Flavanoids, fill = "Flavonoids"), stat = "identity") +
  geom_bar(aes(y = Nonflavanoid.phenols, fill = "Non flavonoids"), stat = "identity") +
  scale_fill_manual(values = c("Flavonoids" = "#FFA550", "Non flavonoids" = "#FF0200"),
                    name = "Content of...") +
  labs(title = "Content of flavonoids and non flavonoids in the wine", 
       x="Wine type", y="Content of flavonoids and non flavonoids")+
  theme_classic()+
  theme(plot.title = element_text(hjust = 0.5, size = 14))


Flavonoides_y_noflavonoides

Part 3

In the third graph, I will create a scatter plot to assess the relationship between hue (x-axis) and color (y-axis).

fig_dat3<-dat %>% select(Wine, Color.int, Hue)
fig_dat3
## # A tibble: 178 × 3
##    Wine   Color.int   Hue
##    <chr>      <dbl> <dbl>
##  1 Wine A      5.64  1.04
##  2 Wine A      4.38  1.05
##  3 Wine A      5.68  1.03
##  4 Wine A      7.8   0.86
##  5 Wine A      4.32  1.04
##  6 Wine A      6.75  1.05
##  7 Wine A      5.25  1.02
##  8 Wine A      5.05  1.06
##  9 Wine A      5.2   1.08
## 10 Wine A      7.22  1.01
## # ℹ 168 more rows
Hueandcolor <- ggplot(fig_dat3, 
                       aes(x = Hue, y = Color.int, color=Wine, fill= Wine))+
  geom_point() +
  scale_fill_manual(values=c("#FFB6C1", 
                             "#98FB98", 
                             "#ADD8E6"),
                    name="Wine type")+
  stat_smooth(color= "black")+
  labs(title="Color intensity and Hue of wine")+
  theme_classic()+
  xlab("Hue") + ylab("Color intensity")+
  theme(plot.title = element_text(hjust = 0.5, size = 14))

Hueandcolor

Part 4

In the fourth graph, I want to create a ridgeline plot of the Mg data for each type of wine (ordered on the x-axis from minimum to maximum Mg).

fig_dat4<-dat %>% select(Wine, Mg)
fig_dat4
## # A tibble: 178 × 2
##    Wine      Mg
##    <chr>  <dbl>
##  1 Wine A   127
##  2 Wine A   100
##  3 Wine A   101
##  4 Wine A   113
##  5 Wine A   118
##  6 Wine A   112
##  7 Wine A    96
##  8 Wine A   121
##  9 Wine A    97
## 10 Wine A    98
## # ℹ 168 more rows
orden_deseado <- c("Wine A", "Wine C", "Wine B")
fig_dat4$Wine <- factor(fig_dat4$Wine, levels = orden_deseado)

Mg <- ggplot(fig_dat4, aes(x = Mg, y = Wine, fill = ..x..)) +
  geom_density_ridges_gradient(scale = 1.5, rel_min_height = 0.01) +
  scale_fill_viridis(name = "Mg", option = "H") +
  labs(title = 'Mg of each wine type') +
  theme_classic() +
    theme(
      legend.position="none",
      panel.spacing = unit(0.1, "lines"),
      strip.text.x = element_text(size = 8))+
  theme(plot.title = element_text(hjust = 0.5, size = 14))

Mg

Part 5

The fifth figure I want to create is a scatter plot where the x-axis represents the phenol content, the y-axis represents the Acl content, the points are colored based on the type of wine, and their size is a measure of malic acid.

fig_dat5<-dat %>% select(Wine, Phenols, Acl, Malic.acid)
fig_dat5
## # A tibble: 178 × 4
##    Wine   Phenols   Acl Malic.acid
##    <chr>    <dbl> <dbl>      <dbl>
##  1 Wine A    2.8   15.6       1.71
##  2 Wine A    2.65  11.2       1.78
##  3 Wine A    2.8   18.6       2.36
##  4 Wine A    3.85  16.8       1.95
##  5 Wine A    2.8   21         2.59
##  6 Wine A    3.27  15.2       1.76
##  7 Wine A    2.5   14.6       1.87
##  8 Wine A    2.6   17.6       2.15
##  9 Wine A    2.8   14         1.64
## 10 Wine A    2.98  16         1.35
## # ℹ 168 more rows
phenols_Acl_Malic.acid <- ggplot(fig_dat5, aes(x=Phenols, y=Acl, 
                          color=Wine, size=Malic.acid)) +
      geom_point() +
      theme(legend.position="none")+
      scale_fill_manual(values=c("#FFB6C1", 
                             "#98FB98", 
                             "#ADD8E6"),
                    name="Wine type")+
  labs(title="Acl, Phenols, Malic acid content, and Wine type study")+
  theme_classic()+
  xlab("Phenols") + ylab("Acl")+
  theme(plot.title = element_text(hjust = 0.5, size = 14))

phenols_Acl_Malic.acid

Part 6

In the sixth plot, I want to create a violin plot for each type of wine, representing the Ash composition. I want the individual data to be displayed, and the mean to be represented by a yellow diamond.

fig_dat6<-dat %>% select(Wine,Ash)
fig_dat6
## # A tibble: 178 × 2
##    Wine     Ash
##    <chr>  <dbl>
##  1 Wine A  2.43
##  2 Wine A  2.14
##  3 Wine A  2.67
##  4 Wine A  2.5 
##  5 Wine A  2.87
##  6 Wine A  2.45
##  7 Wine A  2.45
##  8 Wine A  2.61
##  9 Wine A  2.17
## 10 Wine A  2.27
## # ℹ 168 more rows
Ashes <- ggplot(fig_dat6, aes(x=Wine, y=Ash, fill=Wine)) + 
  geom_violin()+
  geom_jitter(shape=16, position=position_jitter(0.2), color="black")+
  stat_summary(fun.y=mean, geom="point", shape=23, size=4, fill="yellow")+
   scale_fill_manual(values=c("#FFB6C1", 
                             "#98FB98", 
                             "#ADD8E6"),
                    name="Wine type")+
  theme_classic()+
  xlab("Wine type") + ylab("Ash")+
  labs(title="Ash content distribution of each type of Wine")+
  theme(plot.title = element_text(hjust = 0.5, size = 14))

Ashes

Part 7

In the seventh plot, I want to perform a correlation map

dat_corr <- dat %>% 
  rename(Color = Color.int, Non.flavonoids = Nonflavanoid.phenols)

Correlation <- ggcorr(dat_corr, method = c("everything", "pearson"),
                      geom = "circle",
                      hjust = 0.7, size = 3,
                      layout.exp = 1)+
  labs(title = 'Correlation of each pair of variable as a square') +
  theme(plot.title = element_text(hjust = 0.5, size = 14))

Correlation

Part 8

In the eighth plot, I want to perform a PCA on all the data, grouping them by wine type.

my_pca <- PCA(dat[c(2:14)], graph=TRUE)

Caso <- dat$Wine

confidence <- 0.95

biplot_Wine <- fviz_pca_biplot(my_pca, geom="point", 
                                              col.ind= Caso, invisible="var",
                                              addEllipses=TRUE, ellipse.level=confidence)+
  theme(plot.title = element_text(hjust = 0.5, size = 14, face = "bold"))+
  ggtitle("PCA for all wine data")

biplot_Wine