winequality <- read.csv("C:/Users/Jaswanth/Desktop/winequality-red.csv")
attach(winequality)
library(psych)
colnames(winequality)[colnames(winequality)=="fixed.acidity"] <- "Fixed acidity"
colnames(winequality)[colnames(winequality)=="volatile.acidity"] <- "Volatile acidity"
colnames(winequality)[colnames(winequality)=="citric.acid"] <- "Citric Acid"
colnames(winequality)[colnames(winequality)=="residual.sugar"] <- "Residual Sugar"
colnames(winequality)[colnames(winequality)=="free.sulfur.dioxide"] <- "Free Sulfur Dioxide"
colnames(winequality)[colnames(winequality)=="total.sulfur.dioxide"] <- "Total Sulfur Dioxide"
colnames(winequality)[colnames(winequality)=="chlorides"] <- "Chlorides"
colnames(winequality)[colnames(winequality)=="density"] <- "Density"
colnames(winequality)[colnames(winequality)=="sulphates"] <- "Sulphates"
colnames(winequality)[colnames(winequality)=="alcohol"] <- "Alcohol"
colnames(winequality)[colnames(winequality)=="quality"] <- "Quality"
describe(winequality)
library(corrgram)
corrgram(x=cor(winequality))
plot(quality,fixed.acidity,xlab = "Quality",ylab = "Fixed acidity")
###Here we can see that the higher quality wines lie on the higher fixed acidity spectrum of the data.
plot(quality,citric.acid,xlab = "Quality",ylab = "Citric Acid")
###We can’t immediately conclude anything regarding the quality and Citric Acid content.
plot(quality,residual.sugar,xlab = "Quality",ylab = "Residual Sugar")
###Once again, there is no conclusive evidence right here about their interdependencies.
plot(quality,sulphates,xlab = "Quality",ylab = "Sulphates")
###Here we can see that as we traverse along the X-Axis, the Y-coordinate value (Sulphates) tends to shift upwards.
plot(quality,alcohol,xlab = "Quality",ylab = "Alcohol content")
###Here the dependency is more pronounced due to higher correlation. We see trend of wine quality increasing with Alcohol content
Model1<- lm(quality~alcohol+sulphates+citric.acid,data=winequality)
summary(Model1)
##
## Call:
## lm(formula = quality ~ alcohol + sulphates + citric.acid, data = winequality)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.7565 -0.3535 -0.1007 0.5067 2.2125
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.43392 0.17615 8.140 7.86e-16 ***
## alcohol 0.33841 0.01619 20.903 < 2e-16 ***
## sulphates 0.81403 0.10651 7.643 3.65e-14 ***
## citric.acid 0.51345 0.09284 5.531 3.72e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6842 on 1595 degrees of freedom
## Multiple R-squared: 0.2836, Adjusted R-squared: 0.2823
## F-statistic: 210.5 on 3 and 1595 DF, p-value: < 2.2e-16
Model2<- lm(quality~fixed.acidity+residual.sugar+alcohol+citric.acid+sulphates,data=winequality)
summary(Model2)
##
## Call:
## lm(formula = quality ~ fixed.acidity + residual.sugar + alcohol +
## citric.acid + sulphates, data = winequality)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.7521 -0.3533 -0.0909 0.5133 2.1554
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.16472 0.21552 5.404 7.50e-08 ***
## fixed.acidity 0.03298 0.01349 2.445 0.01458 *
## residual.sugar -0.01483 0.01227 -1.209 0.22701
## alcohol 0.34631 0.01645 21.055 < 2e-16 ***
## citric.acid 0.32568 0.12528 2.600 0.00942 **
## sulphates 0.81556 0.10647 7.660 3.21e-14 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.683 on 1593 degrees of freedom
## Multiple R-squared: 0.2869, Adjusted R-squared: 0.2846
## F-statistic: 128.2 on 5 and 1593 DF, p-value: < 2.2e-16
\[Wine Quality= \alpha_0 + \alpha_1 Fixed Acidity + \alpha_2 Residual Sugar + \alpha_3 Alcohol Content + \alpha_4 Citric Acid +\alpha_5Sulphates\]
\[Wine Quality= 1.16472 + 0.03298* Fixed Acidity -0.01483* Residual Sugar + 0.34631* Alcohol Content + 0.32568* Citric Acid +0.81556*Sulphates\] ##We have thus obtained an equation for predicting the Wine quality using the significant attributes from the data.