##Loading the dataset
getwd()
## [1] "C:/Users/Hariharan/3D Objects"
setwd("C:/Users/Hariharan/Documents")
Wine.Dataset<-read.csv('winequality-red.csv')
##dimensions of wine dataset
dim(Wine.Dataset)
## [1] 1599 12
##summary of the wine dataset
summary(Wine.Dataset)
## fixed.acidity volatile.acidity citric.acid residual.sugar
## Min. : 4.60 Min. :0.1200 Min. :0.000 Min. : 0.900
## 1st Qu.: 7.10 1st Qu.:0.3900 1st Qu.:0.090 1st Qu.: 1.900
## Median : 7.90 Median :0.5200 Median :0.260 Median : 2.200
## Mean : 8.32 Mean :0.5278 Mean :0.271 Mean : 2.539
## 3rd Qu.: 9.20 3rd Qu.:0.6400 3rd Qu.:0.420 3rd Qu.: 2.600
## Max. :15.90 Max. :1.5800 Max. :1.000 Max. :15.500
## chlorides free.sulfur.dioxide total.sulfur.dioxide density
## Min. :0.01200 Min. : 1.00 Min. : 6.00 Min. :0.9901
## 1st Qu.:0.07000 1st Qu.: 7.00 1st Qu.: 22.00 1st Qu.:0.9956
## Median :0.07900 Median :14.00 Median : 38.00 Median :0.9968
## Mean :0.08747 Mean :15.87 Mean : 46.47 Mean :0.9967
## 3rd Qu.:0.09000 3rd Qu.:21.00 3rd Qu.: 62.00 3rd Qu.:0.9978
## Max. :0.61100 Max. :72.00 Max. :289.00 Max. :1.0037
## pH sulphates alcohol quality
## Min. :2.740 Min. :0.3300 Min. : 8.40 Min. :3.000
## 1st Qu.:3.210 1st Qu.:0.5500 1st Qu.: 9.50 1st Qu.:5.000
## Median :3.310 Median :0.6200 Median :10.20 Median :6.000
## Mean :3.311 Mean :0.6581 Mean :10.42 Mean :5.636
## 3rd Qu.:3.400 3rd Qu.:0.7300 3rd Qu.:11.10 3rd Qu.:6.000
## Max. :4.010 Max. :2.0000 Max. :14.90 Max. :8.000
##plot
plot(Wine.Dataset$residual.sugar)
### Bar & Stack Bar Chart
library(ggplot2)
ggplot(Wine.Dataset, aes(alcohol)) + geom_bar(fill = "red")+ scale_x_continuous("alcohol", breaks = seq(8,15)) + scale_y_continuous("amount of adding", breaks = seq(5,100,10)) +coord_flip()+ labs(title = "Bar Chart") + theme_gray()+theme_bw()
ggplot(Wine.Dataset, aes(chlorides, sulphates))+geom_bar(stat = "identity") + ggtitle("chlorides VS sulphates") + theme_bw()
ggplot(Wine.Dataset, aes(fixed.acidity,volatile.acidity)) + geom_bar( stat = "identity") +theme(axis.text.x = element_text(angle = 70, vjust = 0.5, color = "navy")) + xlab("Fixed Acidity") + ylab("Volatile Acidity")+ggtitle("Fixed VS Volatile Acidity")
###Stacked Bar chart:
ggplot(Wine.Dataset, aes(alcohol, fill = quality)) + geom_bar()+
labs(title = "Stacked Bar Chart", x = "Alcohol Level", y = "Amount")
###scatter plots
ggplot(Wine.Dataset, aes(pH, quality)) + geom_point() + scale_x_continuous("Wine pH Level", breaks = seq(0,5,0.5))+ scale_y_continuous("Item Quality", breaks = seq(0,10,by = 1))+ theme_bw()
###scatter plots
ggplot(Wine.Dataset, aes(pH, quality)) + geom_point(aes(color = alcohol)) +
scale_x_continuous("Wine pH Level", breaks = seq(0,5,0.5))+
scale_y_continuous("Item Quality", breaks = seq(0,10,by = 1))+
theme_bw() + labs(title="Scatterplot")
ggplot(Wine.Dataset, aes(pH, quality)) + geom_point(aes(color = alcohol)) +
scale_x_continuous("Wine pH Level", breaks = seq(0,5,0.5))+
scale_y_continuous("Item Quality", breaks = seq(0,10,by = 1))+
theme_bw() + labs(title="Scatterplot") + facet_wrap( ~ alcohol)
##box plot
ggplot(Wine.Dataset, aes(alcohol, fixed.acidity)) +geom_boxplot() +ggtitle("Box Plot") + theme(axis.text.x = element_text(angle = 50, vjust = 0.5, color = "red")) + xlab("Alcohol") + ylab("Fixed Acidity") + ggtitle("Alcohol VS Fixed Acidity")
## Warning: Continuous x aesthetic -- did you forget aes(group=...)?
###histogram
ggplot(Wine.Dataset, aes(fixed.acidity)) + geom_histogram(binwidth = 1)+
scale_x_continuous("Fixed Acidity", breaks = seq(4,16,by = 4))+
scale_y_continuous("Count", breaks = seq(0,1600,by = 100))+
labs(title = "Histogram")
###Heat Map
ggplot(Wine.Dataset, aes(chlorides, sulphates))+
geom_raster(aes(fill = quality))+
labs(title ="Heat Map", x = "Amount of Chlorides", y = "Amount of Sulphates")+
scale_fill_continuous(name = "Item Quality")
## Warning: Raster pixels are placed at uneven horizontal intervals and will be
## shifted. Consider using geom_tile() instead.
## Warning: Raster pixels are placed at uneven vertical intervals and will be
## shifted. Consider using geom_tile() instead.
##corrgram
library(corrgram)
corrgram(Wine.Dataset, order=NULL, panel=panel.shade, text.panel=panel.txt,
main="Correlogram")