#Project Title: Project: Data Analytics with Manegerial Application Internship
#NAME: Nimit Dhalia
#EMAIL: lucky.dhalia8@gmail.com
#COLLEGE : IIT Roorkee
My project relates to what is the balance amount of physicochemical make a good quality wine. I propose to investigate the impact of input variables ( based on physicochemical test ) on output variable- quality ( which is based on sensory data ) of two different wines: White and Red.
#Reading: Red wine datset and White wine dataset
redWine<- read.csv("winequality-red.csv", sep = ";")
whiteWine<- read.csv("winequality-white.csv", sep = ";")
dim(redWine) #dimension of red wine
## [1] 1599 12
dim(whiteWine) #dimension of white wine
## [1] 4898 12
redWine$type<- "red"
whiteWine$type<- "white"
wine.df<- rbind(redWine,whiteWine) #dimension of newly joing wine.df
#Wine.df has no. of rows equals to summing of rows in redWine and whiteWine datset and no. of column is one extra compare to redWine / whiteWine because of dummy variable 'type'
dim(wine.df)
## [1] 6497 13
#no of columns in Red wine dataset:
ncol(redWine)
## [1] 13
#no of columns in White wine dataset:
ncol(whiteWine)
## [1] 13
head(redWine)
## fixed.acidity volatile.acidity citric.acid residual.sugar chlorides
## 1 7.4 0.70 0.00 1.9 0.076
## 2 7.8 0.88 0.00 2.6 0.098
## 3 7.8 0.76 0.04 2.3 0.092
## 4 11.2 0.28 0.56 1.9 0.075
## 5 7.4 0.70 0.00 1.9 0.076
## 6 7.4 0.66 0.00 1.8 0.075
## free.sulfur.dioxide total.sulfur.dioxide density pH sulphates alcohol
## 1 11 34 0.9978 3.51 0.56 9.4
## 2 25 67 0.9968 3.20 0.68 9.8
## 3 15 54 0.9970 3.26 0.65 9.8
## 4 17 60 0.9980 3.16 0.58 9.8
## 5 11 34 0.9978 3.51 0.56 9.4
## 6 13 40 0.9978 3.51 0.56 9.4
## quality type
## 1 5 red
## 2 5 red
## 3 5 red
## 4 6 red
## 5 5 red
## 6 5 red
head(whiteWine)
## fixed.acidity volatile.acidity citric.acid residual.sugar chlorides
## 1 7.0 0.27 0.36 20.7 0.045
## 2 6.3 0.30 0.34 1.6 0.049
## 3 8.1 0.28 0.40 6.9 0.050
## 4 7.2 0.23 0.32 8.5 0.058
## 5 7.2 0.23 0.32 8.5 0.058
## 6 8.1 0.28 0.40 6.9 0.050
## free.sulfur.dioxide total.sulfur.dioxide density pH sulphates alcohol
## 1 45 170 1.0010 3.00 0.45 8.8
## 2 14 132 0.9940 3.30 0.49 9.5
## 3 30 97 0.9951 3.26 0.44 10.1
## 4 47 186 0.9956 3.19 0.40 9.9
## 5 47 186 0.9956 3.19 0.40 9.9
## 6 30 97 0.9951 3.26 0.44 10.1
## quality type
## 1 6 white
## 2 6 white
## 3 6 white
## 4 6 white
## 5 6 white
## 6 6 white
str(redWine)
## 'data.frame': 1599 obs. of 13 variables:
## $ fixed.acidity : num 7.4 7.8 7.8 11.2 7.4 7.4 7.9 7.3 7.8 7.5 ...
## $ volatile.acidity : num 0.7 0.88 0.76 0.28 0.7 0.66 0.6 0.65 0.58 0.5 ...
## $ citric.acid : num 0 0 0.04 0.56 0 0 0.06 0 0.02 0.36 ...
## $ residual.sugar : num 1.9 2.6 2.3 1.9 1.9 1.8 1.6 1.2 2 6.1 ...
## $ chlorides : num 0.076 0.098 0.092 0.075 0.076 0.075 0.069 0.065 0.073 0.071 ...
## $ free.sulfur.dioxide : num 11 25 15 17 11 13 15 15 9 17 ...
## $ total.sulfur.dioxide: num 34 67 54 60 34 40 59 21 18 102 ...
## $ density : num 0.998 0.997 0.997 0.998 0.998 ...
## $ pH : num 3.51 3.2 3.26 3.16 3.51 3.51 3.3 3.39 3.36 3.35 ...
## $ sulphates : num 0.56 0.68 0.65 0.58 0.56 0.56 0.46 0.47 0.57 0.8 ...
## $ alcohol : num 9.4 9.8 9.8 9.8 9.4 9.4 9.4 10 9.5 10.5 ...
## $ quality : int 5 5 5 6 5 5 5 7 7 5 ...
## $ type : chr "red" "red" "red" "red" ...
str(whiteWine)
## 'data.frame': 4898 obs. of 13 variables:
## $ fixed.acidity : num 7 6.3 8.1 7.2 7.2 8.1 6.2 7 6.3 8.1 ...
## $ volatile.acidity : num 0.27 0.3 0.28 0.23 0.23 0.28 0.32 0.27 0.3 0.22 ...
## $ citric.acid : num 0.36 0.34 0.4 0.32 0.32 0.4 0.16 0.36 0.34 0.43 ...
## $ residual.sugar : num 20.7 1.6 6.9 8.5 8.5 6.9 7 20.7 1.6 1.5 ...
## $ chlorides : num 0.045 0.049 0.05 0.058 0.058 0.05 0.045 0.045 0.049 0.044 ...
## $ free.sulfur.dioxide : num 45 14 30 47 47 30 30 45 14 28 ...
## $ total.sulfur.dioxide: num 170 132 97 186 186 97 136 170 132 129 ...
## $ density : num 1.001 0.994 0.995 0.996 0.996 ...
## $ pH : num 3 3.3 3.26 3.19 3.19 3.26 3.18 3 3.3 3.22 ...
## $ sulphates : num 0.45 0.49 0.44 0.4 0.4 0.44 0.47 0.45 0.49 0.45 ...
## $ alcohol : num 8.8 9.5 10.1 9.9 9.9 10.1 9.6 8.8 9.5 11 ...
## $ quality : int 6 6 6 6 6 6 6 6 6 6 ...
## $ type : chr "white" "white" "white" "white" ...
#convert them into factors
redWine$type<- factor(redWine$type)
whiteWine$type<- factor(whiteWine$type)
wine.df$type<- factor(wine.df$type)
#check the conversion was successful
str(redWine$type)
## Factor w/ 1 level "red": 1 1 1 1 1 1 1 1 1 1 ...
str(whiteWine$type)
## Factor w/ 1 level "white": 1 1 1 1 1 1 1 1 1 1 ...
str(wine.df$type)
## Factor w/ 2 levels "red","white": 1 1 1 1 1 1 1 1 1 1 ...
library(psych)
describe(redWine[,c(1:12)])
## vars n mean sd median trimmed mad min
## fixed.acidity 1 1599 8.32 1.74 7.90 8.15 1.48 4.60
## volatile.acidity 2 1599 0.53 0.18 0.52 0.52 0.18 0.12
## citric.acid 3 1599 0.27 0.19 0.26 0.26 0.25 0.00
## residual.sugar 4 1599 2.54 1.41 2.20 2.26 0.44 0.90
## chlorides 5 1599 0.09 0.05 0.08 0.08 0.01 0.01
## free.sulfur.dioxide 6 1599 15.87 10.46 14.00 14.58 10.38 1.00
## total.sulfur.dioxide 7 1599 46.47 32.90 38.00 41.84 26.69 6.00
## density 8 1599 1.00 0.00 1.00 1.00 0.00 0.99
## pH 9 1599 3.31 0.15 3.31 3.31 0.15 2.74
## sulphates 10 1599 0.66 0.17 0.62 0.64 0.12 0.33
## alcohol 11 1599 10.42 1.07 10.20 10.31 1.04 8.40
## quality 12 1599 5.64 0.81 6.00 5.59 1.48 3.00
## max range skew kurtosis se
## fixed.acidity 15.90 11.30 0.98 1.12 0.04
## volatile.acidity 1.58 1.46 0.67 1.21 0.00
## citric.acid 1.00 1.00 0.32 -0.79 0.00
## residual.sugar 15.50 14.60 4.53 28.49 0.04
## chlorides 0.61 0.60 5.67 41.53 0.00
## free.sulfur.dioxide 72.00 71.00 1.25 2.01 0.26
## total.sulfur.dioxide 289.00 283.00 1.51 3.79 0.82
## density 1.00 0.01 0.07 0.92 0.00
## pH 4.01 1.27 0.19 0.80 0.00
## sulphates 2.00 1.67 2.42 11.66 0.00
## alcohol 14.90 6.50 0.86 0.19 0.03
## quality 8.00 5.00 0.22 0.29 0.02
describe(whiteWine[,c(1:12)])
## vars n mean sd median trimmed mad min
## fixed.acidity 1 4898 6.85 0.84 6.80 6.82 0.74 3.80
## volatile.acidity 2 4898 0.28 0.10 0.26 0.27 0.09 0.08
## citric.acid 3 4898 0.33 0.12 0.32 0.33 0.09 0.00
## residual.sugar 4 4898 6.39 5.07 5.20 5.80 5.34 0.60
## chlorides 5 4898 0.05 0.02 0.04 0.04 0.01 0.01
## free.sulfur.dioxide 6 4898 35.31 17.01 34.00 34.36 16.31 2.00
## total.sulfur.dioxide 7 4898 138.36 42.50 134.00 136.96 43.00 9.00
## density 8 4898 0.99 0.00 0.99 0.99 0.00 0.99
## pH 9 4898 3.19 0.15 3.18 3.18 0.15 2.72
## sulphates 10 4898 0.49 0.11 0.47 0.48 0.10 0.22
## alcohol 11 4898 10.51 1.23 10.40 10.43 1.48 8.00
## quality 12 4898 5.88 0.89 6.00 5.85 1.48 3.00
## max range skew kurtosis se
## fixed.acidity 14.20 10.40 0.65 2.17 0.01
## volatile.acidity 1.10 1.02 1.58 5.08 0.00
## citric.acid 1.66 1.66 1.28 6.16 0.00
## residual.sugar 65.80 65.20 1.08 3.46 0.07
## chlorides 0.35 0.34 5.02 37.51 0.00
## free.sulfur.dioxide 289.00 287.00 1.41 11.45 0.24
## total.sulfur.dioxide 440.00 431.00 0.39 0.57 0.61
## density 1.04 0.05 0.98 9.78 0.00
## pH 3.82 1.10 0.46 0.53 0.00
## sulphates 1.08 0.86 0.98 1.59 0.00
## alcohol 14.20 6.20 0.49 -0.70 0.02
## quality 9.00 6.00 0.16 0.21 0.01
wineTypeTable<- with(wine.df, table(type))
wineTypeTable # frequencies
## type
## red white
## 1599 4898
round(prop.table(wineTypeTable)*100,2) # percentages
## type
## red white
## 24.61 75.39
typeAndQuality <- xtabs(~ type + quality, data=wine.df)
typeAndQuality # frequencies
## quality
## type 3 4 5 6 7 8 9
## red 10 53 681 638 199 18 0
## white 20 163 1457 2198 880 175 5
round(prop.table(typeAndQuality,1)*100,2) #row percentage
## quality
## type 3 4 5 6 7 8 9
## red 0.63 3.31 42.59 39.90 12.45 1.13 0.00
## white 0.41 3.33 29.75 44.88 17.97 3.57 0.10
round(prop.table(typeAndQuality,2)*100,2) #column percentage
## quality
## type 3 4 5 6 7 8 9
## red 33.33 24.54 31.85 22.50 18.44 9.33 0.00
## white 66.67 75.46 68.15 77.50 81.56 90.67 100.00
library(gmodels)
CrossTable(wine.df$type, wine.df$quality)
##
##
## Cell Contents
## |-------------------------|
## | N |
## | Chi-square contribution |
## | N / Row Total |
## | N / Col Total |
## | N / Table Total |
## |-------------------------|
##
##
## Total Observations in Table: 6497
##
##
## | wine.df$quality
## wine.df$type | 3 | 4 | 5 | 6 | 7 | 8 | 9 | Row Total |
## -------------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|
## red | 10 | 53 | 681 | 638 | 199 | 18 | 0 | 1599 |
## | 0.927 | 0.000 | 45.546 | 5.154 | 16.681 | 18.321 | 1.231 | |
## | 0.006 | 0.033 | 0.426 | 0.399 | 0.124 | 0.011 | 0.000 | 0.246 |
## | 0.333 | 0.245 | 0.319 | 0.225 | 0.184 | 0.093 | 0.000 | |
## | 0.002 | 0.008 | 0.105 | 0.098 | 0.031 | 0.003 | 0.000 | |
## -------------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|
## white | 20 | 163 | 1457 | 2198 | 880 | 175 | 5 | 4898 |
## | 0.303 | 0.000 | 14.869 | 1.683 | 5.446 | 5.981 | 0.402 | |
## | 0.004 | 0.033 | 0.297 | 0.449 | 0.180 | 0.036 | 0.001 | 0.754 |
## | 0.667 | 0.755 | 0.681 | 0.775 | 0.816 | 0.907 | 1.000 | |
## | 0.003 | 0.025 | 0.224 | 0.338 | 0.135 | 0.027 | 0.001 | |
## -------------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|
## Column Total | 30 | 216 | 2138 | 2836 | 1079 | 193 | 5 | 6497 |
## | 0.005 | 0.033 | 0.329 | 0.437 | 0.166 | 0.030 | 0.001 | |
## -------------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|
##
##
chisq.test(typeAndQuality)
## Warning in chisq.test(typeAndQuality): Chi-squared approximation may be
## incorrect
##
## Pearson's Chi-squared test
##
## data: typeAndQuality
## X-squared = 116.54, df = 6, p-value < 2.2e-16
mean(redWine$quality)
## [1] 5.636023
mean(whiteWine$quality)
## [1] 5.877909
t.test(redWine$quality,whiteWine$quality)
##
## Welch Two Sample t-test
##
## data: redWine$quality and whiteWine$quality
## t = -10.149, df = 2950.8, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.2886173 -0.1951564
## sample estimates:
## mean of x mean of y
## 5.636023 5.877909
mean(redWine$alcohol)
## [1] 10.42298
mean(whiteWine$alcohol)
## [1] 10.51427
t.test(redWine$alcohol,whiteWine$alcohol)
##
## Welch Two Sample t-test
##
## data: redWine$alcohol and whiteWine$alcohol
## t = -2.859, df = 3100.5, p-value = 0.004278
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -0.15388669 -0.02868117
## sample estimates:
## mean of x mean of y
## 10.42298 10.51427
mean(redWine$residual.sugar)
## [1] 2.538806
mean(whiteWine$residual.sugar)
## [1] 6.391415
t.test(redWine$residual.sugar,whiteWine$residual.sugar)
##
## Welch Two Sample t-test
##
## data: redWine$residual.sugar and whiteWine$residual.sugar
## t = -47.802, df = 6392.1, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## -4.010602 -3.694617
## sample estimates:
## mean of x mean of y
## 2.538806 6.391415
mean(redWine$density)
## [1] 0.9967467
mean(whiteWine$density)
## [1] 0.9940274
t.test(redWine$density,whiteWine$density)
##
## Welch Two Sample t-test
##
## data: redWine$density and whiteWine$density
## t = 42.709, df = 4340.4, p-value < 2.2e-16
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
## 0.002594475 0.002844131
## sample estimates:
## mean of x mean of y
## 0.9967467 0.9940274
boxplot(redWine$fixed.acidity, whiteWine$fixed.acidity ,col=c("red3","seashell1"), horizontal = FALSE, main="Acidity (tartaric)", ylab="tartaric acid, g / dm3", names=c("Red wine","White wine"))
boxplot(redWine$volatile.acidity, whiteWine$volatile.acidity ,col=c("red3","seashell1"), horizontal = FALSE, main="Acidity (acetic)", ylab="acetic acid, g / dm3", names=c("Red wine","White wine"))
boxplot(redWine$citric.acid, whiteWine$citric.acid ,col=c("red3","seashell1"), horizontal = FALSE, main="Critic acid", ylab="Critic acid, g / dm3", names=c("Red wine","White wine"))
boxplot(redWine$residual.sugar, whiteWine$residual.sugar ,col=c("red3","seashell1"), horizontal = FALSE, main="Residual sugar", ylab="g / dm3", names=c("Red wine","White wine"))
boxplot(redWine$chlorides, whiteWine$chlorides ,col=c("red3","seashell1"), horizontal = FALSE, main="Sodium chloride", ylab="g / dm3", names=c("Red wine","White wine"))
boxplot(redWine$free.sulfur.dioxide, whiteWine$free.sulfur.dioxide ,col=c("red3","seashell1"), horizontal = FALSE, main="Free sulfur dioxide", ylab="mg / dm3", names=c("Red wine","White wine"))
boxplot(redWine$total.sulfur.dioxide, whiteWine$total.sulfur.dioxide ,col=c("red3","seashell1"), horizontal = FALSE, main="Total sulfur dioxide", ylab="mg / dm3", names=c("Red wine","White wine"))
boxplot(redWine$density, whiteWine$density ,col=c("red3","seashell1"), horizontal = FALSE, main="Density", ylab="g / cm3", names=c("Red wine","White wine"))
boxplot(redWine$pH, whiteWine$pH ,col=c("red3","seashell1"), horizontal = FALSE, main="pH", ylab="pH", names=c("Red wine","White wine"))
boxplot(redWine$sulphates, whiteWine$sulphates ,col=c("red3","seashell1"), horizontal = FALSE, main="Potassium sulphates", ylab="g / dm3", names=c("Red wine","White wine"))
boxplot(redWine$alcohol, whiteWine$alcohol ,col=c("red3","seashell1"), horizontal = FALSE, main="Alcohol", ylab="vol %", names=c("Red wine","White wine"))
library(lattice)
par(mfrow=c(1,2))
histogram(redWine$quality, xlab="Sensory score", main="Red wine ", col = "red3")
histogram(whiteWine$quality, xlab="Sensory score",main="White wine", col="seashell1")
par(mfrow=c(1,1))
#1. Corrplot
library(corrplot)
## corrplot 0.84 loaded
N<- cor(redWine[,c(1:5,12)])
corrplot(N, method="circle") #first five variable with quailty
N1<- cor(redWine[,c(6:11,12)])
corrplot(N1, method="circle") #last 6 variable with quality
#2. Correlation matrix
round(cor(redWine[,c(1:5,12)]),2) #first 5 variable with quality
## fixed.acidity volatile.acidity citric.acid residual.sugar
## fixed.acidity 1.00 -0.26 0.67 0.11
## volatile.acidity -0.26 1.00 -0.55 0.00
## citric.acid 0.67 -0.55 1.00 0.14
## residual.sugar 0.11 0.00 0.14 1.00
## chlorides 0.09 0.06 0.20 0.06
## quality 0.12 -0.39 0.23 0.01
## chlorides quality
## fixed.acidity 0.09 0.12
## volatile.acidity 0.06 -0.39
## citric.acid 0.20 0.23
## residual.sugar 0.06 0.01
## chlorides 1.00 -0.13
## quality -0.13 1.00
round(cor(redWine[,c(6:11,12)]),2) #last 6 variable with quality
## free.sulfur.dioxide total.sulfur.dioxide density
## free.sulfur.dioxide 1.00 0.67 -0.02
## total.sulfur.dioxide 0.67 1.00 0.07
## density -0.02 0.07 1.00
## pH 0.07 -0.07 -0.34
## sulphates 0.05 0.04 0.15
## alcohol -0.07 -0.21 -0.50
## quality -0.05 -0.19 -0.17
## pH sulphates alcohol quality
## free.sulfur.dioxide 0.07 0.05 -0.07 -0.05
## total.sulfur.dioxide -0.07 0.04 -0.21 -0.19
## density -0.34 0.15 -0.50 -0.17
## pH 1.00 -0.20 0.21 -0.06
## sulphates -0.20 1.00 0.09 0.25
## alcohol 0.21 0.09 1.00 0.48
## quality -0.06 0.25 0.48 1.00
#3. Correaltion matrix using Corrgram
library(corrgram)
## Warning: replacing previous import by 'magrittr::%>%' when loading
## 'dendextend'
corrgram(redWine[,c(1:5,12)], lower.panel=panel.shade,
upper.panel=panel.pie, diag.panel = panel.minmax, text.panel = panel.txt,
main="Corrgram of Red wine intercorrelations I") #first five variable with quality
corrgram(redWine[,c(6:11,12)], lower.panel=panel.shade,
upper.panel=panel.pie, diag.panel = panel.minmax, text.panel = panel.txt,
main="Corrgram of Red wine intercorrelations II") #last 6 variable with quality
#4. Scatter plot matrix
library(psych)
pairs.panels(redWine[,c(1:4,12)],
method = "pearson", # correlation method
hist.col = "#00AFBB",
density = TRUE, # show density plots
lm = TRUE
) #first 4 variable with quality
pairs.panels(redWine[,c(5:8,12)],
method = "pearson", # correlation method
hist.col = "#00AFBB",
density = TRUE, # show density plots
lm = TRUE
) #mid 4 variable wit quality
pairs.panels(redWine[,c(8:11,12)],
method = "pearson", # correlation method
hist.col = "#00AFBB",
density = TRUE, # show density plots
lm = TRUE
) #last five variable with quality
#1. Corrplot
library(corrplot)
M<- cor(whiteWine[,c(1:5,12)])
corrplot(M, method="circle") #first five variable with quality
M1<- cor(whiteWine[,c(6:11,12)])
corrplot(M1, method="circle") #last six variable with quality
#2. Correlation matrix
round(cor(whiteWine[,c(1:5,12)]),2) #first 5 variable with quality
## fixed.acidity volatile.acidity citric.acid residual.sugar
## fixed.acidity 1.00 -0.02 0.29 0.09
## volatile.acidity -0.02 1.00 -0.15 0.06
## citric.acid 0.29 -0.15 1.00 0.09
## residual.sugar 0.09 0.06 0.09 1.00
## chlorides 0.02 0.07 0.11 0.09
## quality -0.11 -0.19 -0.01 -0.10
## chlorides quality
## fixed.acidity 0.02 -0.11
## volatile.acidity 0.07 -0.19
## citric.acid 0.11 -0.01
## residual.sugar 0.09 -0.10
## chlorides 1.00 -0.21
## quality -0.21 1.00
round(cor(whiteWine[,c(6:11,12)]),2) #last 6 variable with quality
## free.sulfur.dioxide total.sulfur.dioxide density
## free.sulfur.dioxide 1.00 0.62 0.29
## total.sulfur.dioxide 0.62 1.00 0.53
## density 0.29 0.53 1.00
## pH 0.00 0.00 -0.09
## sulphates 0.06 0.13 0.07
## alcohol -0.25 -0.45 -0.78
## quality 0.01 -0.17 -0.31
## pH sulphates alcohol quality
## free.sulfur.dioxide 0.00 0.06 -0.25 0.01
## total.sulfur.dioxide 0.00 0.13 -0.45 -0.17
## density -0.09 0.07 -0.78 -0.31
## pH 1.00 0.16 0.12 0.10
## sulphates 0.16 1.00 -0.02 0.05
## alcohol 0.12 -0.02 1.00 0.44
## quality 0.10 0.05 0.44 1.00
#3. Correlation matrix using corrgram
library(corrgram)
corrgram(whiteWine[,c(1:5,12)], lower.panel=panel.shade,
upper.panel=panel.pie, diag.panel = panel.minmax, text.panel = panel.txt,
main="Corrgram of White wine intercorrelations I") #first five variables with quality
corrgram(whiteWine[,c(6:11,12)], lower.panel=panel.shade,
upper.panel=panel.pie, diag.panel = panel.minmax, text.panel = panel.txt,
main="Corrgram of White wine intercorrelations II") #last six variables with quality
*
#4. Scatter plot matrix
library(psych)
pairs.panels(whiteWine[,c(1:4,12)],
method = "pearson", # correlation method
hist.col = "lightpink",
density = TRUE, # show density plots
lm = TRUE
) #first four variables with quality
pairs.panels(whiteWine[,c(5:8,12)],
method = "pearson", # correlation method
hist.col = "lightpink",
density = TRUE, # show density plots
lm = TRUE
) #mid four variables with quality
pairs.panels(whiteWine[,c(8:11,12)],
method = "pearson", # correlation method
hist.col = "lightpink",
density = TRUE, # show density plots
lm = TRUE
) #last five variables with quality
#redWine
acidityRedWine<- aggregate(cbind( citric.acid, volatile.acidity, fixed.acidity) ~ quality,
data = redWine, mean)
library(car)
##
## Attaching package: 'car'
## The following object is masked from 'package:psych':
##
## logit
scatterplot(acidityRedWine$quality, acidityRedWine$citric.acid, xlab="Quality", ylab = "Citric Acid", main="1. Citric Acid and Quality scatterplot")
scatterplot(acidityRedWine$quality, acidityRedWine$volatile.acidity, xlab="Quality", ylab = "Volatile Acidity", main="2. Volatile Acidity and Quality scatterplot")
scatterplot(acidityRedWine$quality, acidityRedWine$fixed.acidity, xlab="Quality", ylab = "Fixed Acidity", main="3. Fixed Acidity and Quality scatterplot")
#whiteWine
acidityWhiteWine<- aggregate(cbind( citric.acid, volatile.acidity, fixed.acidity) ~ quality,
data = whiteWine, mean)
scatterplot(acidityWhiteWine$quality, acidityWhiteWine$citric.acid, xlab="Quality", ylab = "Citric Acid", main="1. Citric Acid and Quality scatterplot")
scatterplot(acidityWhiteWine$quality, acidityWhiteWine$volatile.acidity, xlab="Quality", ylab = "Volatile Acidity", main="2. Volatile Acidity and Quality scatterplot")
scatterplot(acidityWhiteWine$quality, acidityWhiteWine$fixed.acidity, xlab="Quality", ylab = "Fixed Acidity", main="3. Fixed Acidity and Quality scatterplot")
#correlataion Red wine and Fixed Acidity
cor(redWine$fixed.acidity, redWine$quality)
## [1] 0.1240516
#correlation White wine and Fixed Acidity
cor(whiteWine$fixed.acidity, whiteWine$quality)
## [1] -0.1136628
#redWine
impactRedWine<- aggregate(cbind( residual.sugar, chlorides, free.sulfur.dioxide) ~ quality,
data = redWine, mean)
scatterplot(impactRedWine$quality, impactRedWine$residual.sugar, xlab="Quality", ylab = "Residual sugar", main="4. Residual Sugar and Quality scatterplot")
scatterplot(impactRedWine$quality, impactRedWine$chlorides, xlab="Quality", ylab = "Chlorides", main="5. Chlorides and Quality scatterplot")
## Warning in smoother(.x, .y, col = col[2], log.x = logged("x"), log.y =
## logged("y"), : could not fit positive part of the spread
scatterplot(impactRedWine$quality, impactRedWine$free.sulfur.dioxide, xlab="Quality", ylab = "Free Sulfur Dioxide", main="6. Free Sulfur Dioxide and Quality scatterplot")
#whiteWine
impactWhiteWine<- aggregate(cbind( residual.sugar, chlorides, free.sulfur.dioxide) ~ quality,
data = whiteWine, mean)
scatterplot(impactWhiteWine$quality, impactWhiteWine$residual.sugar, xlab="Quality", ylab = "Residual sugar", main="4. Residual Sugar and Quality scatterplot")
scatterplot(impactWhiteWine$quality, impactWhiteWine$chlorides, xlab="Quality", ylab = "Chlorides", main="5. Chlorides and Quality scatterplot")
scatterplot(impactWhiteWine$quality, impactWhiteWine$free.sulfur.dioxide, xlab="Quality", ylab = "6. Free Sulfur Dioxide", main="Free Sulfur Dioxide and Quality scatterplot")
#redWine
impact2RedWine<- aggregate(cbind( total.sulfur.dioxide, density, pH, sulphates, alcohol) ~ quality,
data = redWine, mean)
scatterplot(impact2RedWine$quality, impact2RedWine$total.sulfur.dioxide, xlab="Quality", ylab = "Total Sulfur Dioxide", main="7. Total Sulfur Dioxide and Quality scatterplot")
## Warning in smoother(.x, .y, col = col[2], log.x = logged("x"), log.y =
## logged("y"), : could not fit positive part of the spread
scatterplot(impact2RedWine$quality, impact2RedWine$density, xlab="Quality", ylab = "Density", main="8. Density and Quality scatterplot")
scatterplot(impact2RedWine$quality, impact2RedWine$pH, xlab="Quality", ylab = "pH", main="9. Free pH and Quality scatterplot")
scatterplot(impact2RedWine$quality, impact2RedWine$sulphates, xlab="Quality", ylab = "Sulphates", main="10. Sulphates and Quality scatterplot")
scatterplot(impact2RedWine$quality, impact2RedWine$alcohol, xlab="Quality", ylab = "Alcohol", main="11. Alcohol and Quality scatterplot")
#whitedWine
impact2WhiteWine<- aggregate(cbind( total.sulfur.dioxide, density, pH, sulphates, alcohol) ~ quality,
data = whiteWine, mean)
scatterplot(impact2WhiteWine$quality, impact2WhiteWine$total.sulfur.dioxide, xlab="Quality", ylab = "Total Sulfur Dioxide", main="7. Total Sulfur Dioxide and Quality scatterplot")
## Warning in smoother(.x, .y, col = col[2], log.x = logged("x"), log.y =
## logged("y"), : could not fit positive part of the spread
scatterplot(impact2WhiteWine$quality, impact2WhiteWine$density, xlab="Quality", ylab = "Density", main="8. Density and Quality scatterplot")
scatterplot(impact2WhiteWine$quality, impact2WhiteWine$pH, xlab="Quality", ylab = "pH", main="9. Free pH and Quality scatterplot")
## Warning in smoother(.x, .y, col = col[2], log.x = logged("x"), log.y =
## logged("y"), : could not fit positive part of the spread
scatterplot(impact2WhiteWine$quality, impact2WhiteWine$sulphates, xlab="Quality", ylab = "Sulphates", main="10. Sulphates and Quality scatterplot")
scatterplot(impact2WhiteWine$quality, impact2WhiteWine$alcohol, xlab="Quality", ylab = "Alcohol", main="11. Alcohol and Quality scatterplot")
## Warning in smoother(.x, .y, col = col[2], log.x = logged("x"), log.y =
## logged("y"), : could not fit positive part of the spread
We devide the wines in best and normal as the red wine with quality greater than 5 are ‘best Red wine’ and otherwise ‘normal red wine’.
bestRedWine<- redWine[which(redWine$quality > 5),] #bestRedWine
normalRedWine<- redWine[which(redWine$quality <= 5),] #normalRedWine
library(lattice)
par(mfrow=c(1,2))
histogram(bestRedWine$quality, xlab="Sensory score", main="Best Red wine(Quality > 5)", col = "red3")
histogram(normalRedWine$quality, xlab="Sensory score",main="Normal Red wine(Quality <= 5)", col="red3")
par(mfrow=c(1,1))
library(corrgram)
corrgram(bestRedWine[,c(1:5,12)], lower.panel=panel.shade,
upper.panel=panel.pie, diag.panel = panel.minmax, text.panel = panel.txt,
main="Corrgram of Best Red wine intercorrelations I") #first five variables with quality of 'best red wine'
corrgram(bestRedWine[,c(6:11,12)], lower.panel=panel.shade,
upper.panel=panel.pie, diag.panel = panel.minmax, text.panel = panel.txt,
main="Corrgram of Best Red wine intercorrelations II")#last sex variables with quality of 'best red wine'
library(corrgram)
corrgram(normalRedWine[,c(1:5,12)], lower.panel=panel.shade,
upper.panel=panel.pie, diag.panel = panel.minmax, text.panel = panel.txt,
main="Corrgram of Normal Red wine intercorrelations I")#first five variables with quality of 'normal red wine'
corrgram(normalRedWine[,c(6:11,12)], lower.panel=panel.shade,
upper.panel=panel.pie, diag.panel = panel.minmax, text.panel = panel.txt,
main="Corrgram of Normal Red wine intercorrelations II")#first five variables with quality of 'normal red wine'
bestWhiteWine<- whiteWine[which(whiteWine$quality > 5),]
normalWhiteWine<- whiteWine[which(whiteWine$quality <= 5),]
library(lattice)
par(mfrow=c(1,2))
histogram(bestWhiteWine$quality, xlab="Sensory score", main="Best White wine(Quality > 5)", col = "seashell1")
histogram(normalWhiteWine$quality, xlab="Sensory score",main="Normal White wine(Quality <= 5)", col="seashell1")
par(mfrow=c(1,1))
library(corrgram)
corrgram(bestWhiteWine[,c(1:5,12)], lower.panel=panel.shade,
upper.panel=panel.pie, diag.panel = panel.minmax, text.panel = panel.txt,
main="Corrgram of Best White wine intercorrelations I")#first five variables with quality of 'best white wine'
corrgram(bestWhiteWine[,c(6:11,12)], lower.panel=panel.shade,
upper.panel=panel.pie, diag.panel = panel.minmax, text.panel = panel.txt,
main="Corrgram of Best White wine intercorrelations II")#last variables with quality of 'best white wine'
library(corrgram)
corrgram(normalWhiteWine[,c(1:5,12)], lower.panel=panel.shade,
upper.panel=panel.pie, diag.panel = panel.minmax, text.panel = panel.txt,
main="Corrgram of Normal White wine intercorrelations I")#first five variables with quality of 'normal white wine'
corrgram(normalWhiteWine[,c(6:11,12)], lower.panel=panel.shade,
upper.panel=panel.pie, diag.panel = panel.minmax, text.panel = panel.txt,
main="Corrgram of Normal White wine intercorrelations II")#last six variables with quality of 'normal white wine'