Installing Packages install.packages(c(“package A”, “package X”))

##Use 'install.packages(c("library A", "library X"))' to newly install these libraries

library(tidyverse)
Registered S3 method overwritten by 'dplyr':
  method           from
  print.rowwise_df     
Registered S3 methods overwritten by 'dbplyr':
  method         from
  print.tbl_lazy     
  print.tbl_sql      
-- Attaching packages --------------------------------------- tidyverse 1.3.0 --
v ggplot2 3.3.0     v purrr   0.3.3
v tibble  2.1.3     v dplyr   0.8.5
v tidyr   1.0.2     v stringr 1.4.0
v readr   1.3.1     v forcats 0.5.0
-- Conflicts ------------------------------------------ tidyverse_conflicts() --
x dplyr::filter() masks stats::filter()
x dplyr::lag()    masks stats::lag()
library(ggplot2)
library(corrplot)
corrplot 0.84 loaded
red_wine =read.csv('C:/Users/HP/Documents/EDA/R/Wine/winequality-red.csv', sep=";")
white_wine = read.csv('C:/Users/HP/Documents/EDA/R/Wine/winequality-white.csv', sep = ";")

##Note the 'sep' argument to state that the data is delimited not by a comma, but by a';'
head(red_wine)

glimpse(red_wine)

summary(red_wine)

Assignment: Find the last 10 entries, and the respective descriptions for white_wine.

Before we go into Data Visualisation we can actually merge our datasets into one, to save us the Duplicity of tasks.

red_wine$style <- "red"
white_wine$style <- "white"

wine <- full_join(red_wine, white_wine)
Joining, by = c("fixed.acidity", "volatile.acidity", "citric.acid", "residual.sugar", "chlorides", "free.sulfur.dioxide", "total.sulfur.dioxide", "density", "pH", "sulphates", "alcohol", "quality", "style")
##Missing Value
sum(is.na(wine))
[1] 0
glimpse(wine)
Observations: 6,497
Variables: 13
$ fixed.acidity        <dbl> 7.4, 7.8, 7.8, 11.2, 7.4, 7.4, 7.9, 7.3, 7.8, 7.5, 6.7, 7.5, 5.6, 7....
$ volatile.acidity     <dbl> 0.700, 0.880, 0.760, 0.280, 0.700, 0.660, 0.600, 0.650, 0.580, 0.500...
$ citric.acid          <dbl> 0.00, 0.00, 0.04, 0.56, 0.00, 0.00, 0.06, 0.00, 0.02, 0.36, 0.08, 0....
$ residual.sugar       <dbl> 1.9, 2.6, 2.3, 1.9, 1.9, 1.8, 1.6, 1.2, 2.0, 6.1, 1.8, 6.1, 1.6, 1.6...
$ chlorides            <dbl> 0.076, 0.098, 0.092, 0.075, 0.076, 0.075, 0.069, 0.065, 0.073, 0.071...
$ free.sulfur.dioxide  <dbl> 11, 25, 15, 17, 11, 13, 15, 15, 9, 17, 15, 17, 16, 9, 52, 51, 35, 16...
$ total.sulfur.dioxide <dbl> 34, 67, 54, 60, 34, 40, 59, 21, 18, 102, 65, 102, 59, 29, 145, 148, ...
$ density              <dbl> 0.9978, 0.9968, 0.9970, 0.9980, 0.9978, 0.9978, 0.9964, 0.9946, 0.99...
$ pH                   <dbl> 3.51, 3.20, 3.26, 3.16, 3.51, 3.51, 3.30, 3.39, 3.36, 3.35, 3.28, 3....
$ sulphates            <dbl> 0.56, 0.68, 0.65, 0.58, 0.56, 0.56, 0.46, 0.47, 0.57, 0.80, 0.54, 0....
$ alcohol              <dbl> 9.4, 9.8, 9.8, 9.8, 9.4, 9.4, 9.4, 10.0, 9.5, 10.5, 9.2, 10.5, 9.9, ...
$ quality              <int> 5, 5, 5, 6, 5, 5, 5, 7, 7, 5, 5, 5, 5, 5, 5, 5, 7, 5, 4, 6, 6, 5, 5,...
$ style                <chr> "red", "red", "red", "red", "red", "red", "red", "red", "red", "red"...

We see that wine$quality is an Integer Value and from the documentation describes the ‘quality’ of wine from a scale of 1-10, well lets transform this into three Categories of ‘low’, ‘medium’ and ‘high’.

wine$qualityBucket <- cut(wine$quality, ##Cut this Variable
                          c(1, 4, 6, 10), ##From these provided ranges
                          labels = c("low", "Medium", "High")) ##Into these respective ranges
ggplot(data = wine) +
 geom_bar(mapping = aes(x = qualityBucket))+ 
  facet_wrap(~style, ncol = 1)

Let see Alcohol distribution by wine.

wine %>% ggplot(aes(x = alcohol)) +
  geom_histogram(binwidth = .1) +
  facet_wrap(~style, ncol = 1)

##BiVariate Relationship We are primarily assessing the relationship with wine quality, but you can adopt this for any other feature

with(subset(wine, style == 'red'), by(alcohol, qualityBucket, summary))
qualityBucket: low
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   8.40    9.60   10.00   10.22   11.00   13.10 
---------------------------------------------------------------------------- 
qualityBucket: Medium
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   8.40    9.50   10.00   10.25   10.90   14.90 
---------------------------------------------------------------------------- 
qualityBucket: High
   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
   9.20   10.80   11.60   11.52   12.20   14.00 

Assignment: Run ‘?cat’ and read up on this argument, what does it do?

wine %>%
  ggplot(aes(x = alcohol, y =quality)) +
  geom_point(alpha =1/4) +
  stat_smooth(method = "lm")+
  facet_wrap(~style, ncol = 1)

wine %>%
  ggplot(aes(x = alcohol, y =quality)) +
  geom_point(alpha =1/4) +
  stat_smooth(method = "lm")+
  facet_wrap(~style, ncol = 1)
cor_Red <- cor(subset(wine,
                      style == "red",
                      select = (-c(qualityBucket, style))),
               method = "pearson")

corrplot.mixed(abs(cor_Red))
cor_Red
cor_White <- cor(subset(wine,
                      style == "white",
                      select = (-c(qualityBucket, style))),
               method = "pearson")

corrplot.mixed(abs(cor_White))

cor_White
                     fixed.acidity volatile.acidity  citric.acid residual.sugar   chlorides
fixed.acidity           1.00000000      -0.02269729  0.289180698     0.08902070  0.02308564
volatile.acidity       -0.02269729       1.00000000 -0.149471811     0.06428606  0.07051157
citric.acid             0.28918070      -0.14947181  1.000000000     0.09421162  0.11436445
residual.sugar          0.08902070       0.06428606  0.094211624     1.00000000  0.08868454
chlorides               0.02308564       0.07051157  0.114364448     0.08868454  1.00000000
free.sulfur.dioxide    -0.04939586      -0.09701194  0.094077221     0.29909835  0.10139235
total.sulfur.dioxide    0.09106976       0.08926050  0.121130798     0.40143931  0.19891030
density                 0.26533101       0.02711385  0.149502571     0.83896645  0.25721132
pH                     -0.42585829      -0.03191537 -0.163748211    -0.19413345 -0.09043946
sulphates              -0.01714299      -0.03572815  0.062330940    -0.02666437  0.01676288
alcohol                -0.12088112       0.06771794 -0.075728730    -0.45063122 -0.36018871
quality                -0.11366283      -0.19472297 -0.009209091    -0.09757683 -0.20993441
                     free.sulfur.dioxide total.sulfur.dioxide     density            pH   sulphates
fixed.acidity              -0.0493958591          0.091069756  0.26533101 -0.4258582910 -0.01714299
volatile.acidity           -0.0970119393          0.089260504  0.02711385 -0.0319153683 -0.03572815
citric.acid                 0.0940772210          0.121130798  0.14950257 -0.1637482114  0.06233094
residual.sugar              0.2990983537          0.401439311  0.83896645 -0.1941334540 -0.02666437
chlorides                   0.1013923521          0.198910300  0.25721132 -0.0904394560  0.01676288
free.sulfur.dioxide         1.0000000000          0.615500965  0.29421041 -0.0006177961  0.05921725
total.sulfur.dioxide        0.6155009650          1.000000000  0.52988132  0.0023209718  0.13456237
density                     0.2942104109          0.529881324  1.00000000 -0.0935914935  0.07449315
pH                         -0.0006177961          0.002320972 -0.09359149  1.0000000000  0.15595150
sulphates                   0.0592172458          0.134562367  0.07449315  0.1559514973  1.00000000
alcohol                    -0.2501039415         -0.448892102 -0.78013762  0.1214320987 -0.01743277
quality                     0.0081580671         -0.174737218 -0.30712331  0.0994272457  0.05367788
                         alcohol      quality
fixed.acidity        -0.12088112 -0.113662831
volatile.acidity      0.06771794 -0.194722969
citric.acid          -0.07572873 -0.009209091
residual.sugar       -0.45063122 -0.097576829
chlorides            -0.36018871 -0.209934411
free.sulfur.dioxide  -0.25010394  0.008158067
total.sulfur.dioxide -0.44889210 -0.174737218
density              -0.78013762 -0.307123313
pH                    0.12143210  0.099427246
sulphates            -0.01743277  0.053677877
alcohol               1.00000000  0.435574715
quality               0.43557472  1.000000000
LS0tDQp0aXRsZTogIldpbmUgRXhwbG9yYXRvcnkgRGF0YSBBbmFseXNpcyINCm91dHB1dDogaHRtbF9ub3RlYm9vaw0KLS0tDQoNCioqSW5zdGFsbGluZyBQYWNrYWdlcyoqDQppbnN0YWxsLnBhY2thZ2VzKGMoInBhY2thZ2UgQSIsICJwYWNrYWdlIFgiKSkNCg0KDQpgYGB7ciBMb2FkIExpYnJhcmllc30NCiMjVXNlICdpbnN0YWxsLnBhY2thZ2VzKGMoImxpYnJhcnkgQSIsICJsaWJyYXJ5IFgiKSknIHRvIG5ld2x5IGluc3RhbGwgdGhlc2UgbGlicmFyaWVzDQoNCmxpYnJhcnkodGlkeXZlcnNlKQ0KbGlicmFyeShnZ3Bsb3QyKQ0KbGlicmFyeShjb3JycGxvdCkNCg0KIyNJZ25vcmUgdGhlIEhpZ2hsaWdodGVkIG1lc3NhZ2UsIHRoZSBjb25mbGljdCBpcyB3aXRoaW4gdGhlIGxvYWRlZCBkZXBlbmRlbmNpZXMgYW5kIG5vdCBhbiBpc3N1ZS4NCmBgYA0KDQoNCg0KYGBge3IgTG9hZCBEYXRhfQ0KcmVkX3dpbmUgPXJlYWQuY3N2KCdDOi9Vc2Vycy9IUC9Eb2N1bWVudHMvRURBL1IvV2luZS93aW5lcXVhbGl0eS1yZWQuY3N2Jywgc2VwPSI7IikNCndoaXRlX3dpbmUgPSByZWFkLmNzdignQzovVXNlcnMvSFAvRG9jdW1lbnRzL0VEQS9SL1dpbmUvd2luZXF1YWxpdHktd2hpdGUuY3N2Jywgc2VwID0gIjsiKQ0KDQojI05vdGUgdGhlICdzZXAnIGFyZ3VtZW50IHRvIHN0YXRlIHRoYXQgdGhlIGRhdGEgaXMgZGVsaW1pdGVkIG5vdCBieSBhIGNvbW1hLCBidXQgYnkgYSc7Jw0KDQoNCiMjTm90ZTogdGhlIGFib3ZlIHdpbGwgZm9sbG93IHlvdXIgb3duIHBhdGggdG8gdGhlIGRhdGEsIGluIGRvdWJ0IGp1c3QgcmlnaHQtY2xpY2sgYW5kIGNvcHkgdGhlIHBhdGgsIHRoZW4gY2hhbmdlIHRvIGZvbGxvdyBzbGFzaCBhZnRlciB5b3UndmUgcGFzdGVkLg0KYGBgDQoNCmBgYHtyIERlc2NyaXB0aW9uc18gUkVEIFdJTkU6IERhdGEgU3VtbWFyeSBhbmQgRGltZW5zaW9uc30NCmhlYWQocmVkX3dpbmUpDQoNCmdsaW1wc2UocmVkX3dpbmUpDQoNCnN1bW1hcnkocmVkX3dpbmUpDQpgYGANCiMjIEFzc2lnbm1lbnQ6IEZpbmQgdGhlIGxhc3QgMTAgZW50cmllcywgYW5kIHRoZSByZXNwZWN0aXZlIGRlc2NyaXB0aW9ucyBmb3IgYHdoaXRlX3dpbmVgLg0KDQoNCkJlZm9yZSB3ZSBnbyBpbnRvIERhdGEgVmlzdWFsaXNhdGlvbiB3ZSBjYW4gYWN0dWFsbHkgYG1lcmdlYCBvdXIgZGF0YXNldHMgaW50byBvbmUsIHRvIHNhdmUgdXMgdGhlIER1cGxpY2l0eSBvZiB0YXNrcy4NCmBgYHtyIFRpZHlpbmc6IE1lcmdpbmcgRGF0YXNldHMgYW5kIEV4YW1pbmF0aW9ufQ0KcmVkX3dpbmUkc3R5bGUgPC0gInJlZCINCndoaXRlX3dpbmUkc3R5bGUgPC0gIndoaXRlIg0KDQp3aW5lIDwtIGZ1bGxfam9pbihyZWRfd2luZSwgd2hpdGVfd2luZSkNCg0KIyNNaXNzaW5nIFZhbHVlDQpzdW0oaXMubmEod2luZSkpDQoNCmdsaW1wc2Uod2luZSkNCmBgYA0KV2Ugc2VlIHRoYXQgd2luZSRxdWFsaXR5IGlzIGFuIEludGVnZXIgVmFsdWUgYW5kIGZyb20gdGhlIGRvY3VtZW50YXRpb24gZGVzY3JpYmVzIHRoZSAncXVhbGl0eScgb2Ygd2luZSBmcm9tIGEgc2NhbGUgb2YgMS0xMCwgd2VsbCBsZXRzIHRyYW5zZm9ybSB0aGlzIGludG8gdGhyZWUgQ2F0ZWdvcmllcyBvZiAnbG93JywgJ21lZGl1bScgYW5kICdoaWdoJy4NCg0KYGBge3IgVGlkeWluZzogRGF0YSBUcmFuc2Zvcm1hdGlvbn0NCndpbmUkcXVhbGl0eUJ1Y2tldCA8LSBjdXQod2luZSRxdWFsaXR5LCAjI0N1dCB0aGlzIFZhcmlhYmxlDQogICAgICAgICAgICAgICAgICAgICAgICAgIGMoMSwgNCwgNiwgMTApLCAjI0Zyb20gdGhlc2UgcHJvdmlkZWQgcmFuZ2VzDQogICAgICAgICAgICAgICAgICAgICAgICAgIGxhYmVscyA9IGMoImxvdyIsICJNZWRpdW0iLCAiSGlnaCIpKSAjI0ludG8gdGhlc2UgcmVzcGVjdGl2ZSByYW5nZXMNCmBgYA0KDQoNCmBgYHtyIERhdGEgVmlzdWFsaXphdGlvbn0NCmdncGxvdChkYXRhID0gd2luZSkgKw0KIGdlb21fYmFyKG1hcHBpbmcgPSBhZXMoeCA9IHF1YWxpdHlCdWNrZXQpKSsgIyNObyBpbnN0ZWFkIG9mICdxdWFsaXR5JyBpbnB1dCBhbnkgb3RoZXIgZmVhdHVyZSBpbiBoZXJlLg0KICBmYWNldF93cmFwKH5zdHlsZSwgbmNvbCA9IDEpDQpgYGANCg0KTGV0IHNlZSBBbGNvaG9sIGRpc3RyaWJ1dGlvbiBieSB3aW5lLg0KYGBge3IgQWxjb2hvbCBEaXN0cmlidXRpb259DQp3aW5lICU+JSBnZ3Bsb3QoYWVzKHggPSBhbGNvaG9sKSkgKw0KICBnZW9tX2hpc3RvZ3JhbShiaW53aWR0aCA9IC4xKSArDQogIGZhY2V0X3dyYXAofnN0eWxlLCBuY29sID0gMSkNCmBgYA0KDQoNCg0KIyNCaVZhcmlhdGUgUmVsYXRpb25zaGlwDQpXZSBhcmUgcHJpbWFyaWx5IGFzc2Vzc2luZyB0aGUgcmVsYXRpb25zaGlwIHdpdGggd2luZSBxdWFsaXR5LCBidXQgeW91IGNhbiBhZG9wdCB0aGlzIGZvciBhbnkgb3RoZXIgZmVhdHVyZSANCmBgYHtyIFJlZCBXaW5lOiBCSSBSZWxhdGlvbnNoaXB9DQpjYXQoIlJlZCBXaW5lOiBcbiIpIA0Kd2l0aChzdWJzZXQod2luZSwgc3R5bGUgPT0gJ3JlZCcpLCBieShhbGNvaG9sLCBxdWFsaXR5QnVja2V0LCBzdW1tYXJ5KSkgDQojI0dyb3VwQnkgRnVuY3Rpb24gcmV0dXJuaW5nIHN1bW1hcnkgc3RhdGlzdGljcw0KYGBgDQojIyBBc3NpZ25tZW50OiBSdW4gJz9jYXQnIGFuZCByZWFkIHVwIG9uIHRoaXMgYXJndW1lbnQsIHdoYXQgZG9lcyBpdCBkbz8NCg0KDQpgYGB7ciBXaGl0ZSBXaW5lOiBCSSBSZWxhdGlvbnNoaXB9DQpjYXQoIldoaXRlIFdpbmU6IFxuIikgDQp3aXRoKHN1YnNldCh3aW5lLCBzdHlsZSA9PSAnd2hpdGUnKSwgYnkoYWxjb2hvbCwgcXVhbGl0eUJ1Y2tldCwgc3VtbWFyeSkpDQpgYGANCg0KDQpgYGB7ciBWaXN1YWxpemluZyBCSSBSZWxhdGlvbnNoaXB9DQp3aW5lICU+JQ0KICBnZ3Bsb3QoYWVzKHggPSBhbGNvaG9sLCB5ID1xdWFsaXR5KSkgKw0KICBnZW9tX3BvaW50KGFscGhhID0xLzQpICsNCiAgc3RhdF9zbW9vdGgobWV0aG9kID0gImxtIikrDQogIGZhY2V0X3dyYXAofnN0eWxlLCBuY29sID0gMSkNCmBgYA0KDQoNCmBgYHtyIFJlZCBXaW5lOiBDb3JyZWxhdGlvbnN9DQpjb3JfUmVkIDwtIGNvcihzdWJzZXQod2luZSwNCiAgICAgICAgICAgICAgICAgICAgICBzdHlsZSA9PSAicmVkIiwNCiAgICAgICAgICAgICAgICAgICAgICBzZWxlY3QgPSAoLWMocXVhbGl0eUJ1Y2tldCwgc3R5bGUpKSksDQogICAgICAgICAgICAgICBtZXRob2QgPSAicGVhcnNvbiIpDQoNCmNvcnJwbG90Lm1peGVkKGFicyhjb3JfUmVkKSkNCmNvcl9SZWQNCmBgYA0KDQoNCmBgYHtyIFdoaXRlIFdpbmU6IENvcnJlbGF0aW9uc30NCmNvcl9XaGl0ZSA8LSBjb3Ioc3Vic2V0KHdpbmUsDQogICAgICAgICAgICAgICAgICAgICAgc3R5bGUgPT0gIndoaXRlIiwNCiAgICAgICAgICAgICAgICAgICAgICBzZWxlY3QgPSAoLWMocXVhbGl0eUJ1Y2tldCwgc3R5bGUpKSksDQogICAgICAgICAgICAgICBtZXRob2QgPSAicGVhcnNvbiIpDQoNCmNvcnJwbG90Lm1peGVkKGFicyhjb3JfV2hpdGUpKQ0KY29yX1doaXRlDQpgYGANCg0K