Questions:
Observations:
RedWine<-data.table::fread(file = "/Users/uraj/Desktop/StatMethods/DataFile/winequality-red.csv", data.table = FALSE)
attach(RedWine)
# Question a
dim(RedWine)
## [1] 1599 12
summary(RedWine)
## fixed_acidity volatile_acidity citric_acid residual_sugar
## Min. : 4.60 Min. :0.1200 Min. :0.000 Min. : 0.900
## 1st Qu.: 7.10 1st Qu.:0.3900 1st Qu.:0.090 1st Qu.: 1.900
## Median : 7.90 Median :0.5200 Median :0.260 Median : 2.200
## Mean : 8.32 Mean :0.5278 Mean :0.271 Mean : 2.539
## 3rd Qu.: 9.20 3rd Qu.:0.6400 3rd Qu.:0.420 3rd Qu.: 2.600
## Max. :15.90 Max. :1.5800 Max. :1.000 Max. :15.500
## chlorides free_sulfur_dioxide total_sulfur_dioxide density
## Min. :0.01200 Min. : 1.00 Min. : 6.00 Min. :0.9901
## 1st Qu.:0.07000 1st Qu.: 7.00 1st Qu.: 22.00 1st Qu.:0.9956
## Median :0.07900 Median :14.00 Median : 38.00 Median :0.9968
## Mean :0.08747 Mean :15.87 Mean : 46.47 Mean :0.9967
## 3rd Qu.:0.09000 3rd Qu.:21.00 3rd Qu.: 62.00 3rd Qu.:0.9978
## Max. :0.61100 Max. :72.00 Max. :289.00 Max. :1.0037
## pH sulphates alcohol quality
## Min. :2.740 Min. :0.3300 Min. : 8.40 Min. :3.000
## 1st Qu.:3.210 1st Qu.:0.5500 1st Qu.: 9.50 1st Qu.:5.000
## Median :3.310 Median :0.6200 Median :10.20 Median :6.000
## Mean :3.311 Mean :0.6581 Mean :10.42 Mean :5.636
## 3rd Qu.:3.400 3rd Qu.:0.7300 3rd Qu.:11.10 3rd Qu.:6.000
## Max. :4.010 Max. :2.0000 Max. :14.90 Max. :8.000
# Question d:How can you visualize the distribution of each variable
boxplot(RedWine$fixed_acidity, col="slategray2", main=" Fixed Acidity")
boxplot(RedWine$volatile_acidity, col="slategray2", main=" Volatile Acidity")
boxplot(RedWine$citric_acid, col="slategray2", main="Citric Acid")
boxplot(RedWine$residual_sugar, col="slategray2", main="Residual Sugar Content")
boxplot(RedWine$chlorides, col="slategray2", main=" Chloride Content")
boxplot(RedWine$free_sulfur_dioxide, col="slategray2", main=" Free Sulpur Dioxide")
boxplot(RedWine$total_sulfur_dioxide, col="slategray2", main="Total Sulfur Dioxide")
boxplot(RedWine$density, col="slategray2", main="Density")
boxplot(RedWine$pH, col="slategray2", main="pH")
boxplot(RedWine$sulphates, col="slategray2", main="Sulphate Content")
boxplot(RedWine$alcohol, col="slategray2", main="AlcoholContent")
boxplot(RedWine$quality, col="slategray2", main=" Quality")
hist(RedWine$fixed_acidity,col='lightblue', labels = TRUE)
hist(RedWine$volatile_acidity,col='lightblue', labels = TRUE)
hist(RedWine$citric_acid,col='lightblue', labels = TRUE)
hist(RedWine$residual_sugar,col='lightblue', labels = TRUE)
hist(RedWine$chlorides,col='lightblue', labels = TRUE)
hist(RedWine$free_sulfur_dioxide,col='lightblue', labels = TRUE)
hist(RedWine$total_sulfur_dioxide,col='lightblue', labels = TRUE)
hist(RedWine$density,col='lightblue', labels = TRUE)
hist(RedWine$pH,col='lightblue', labels = TRUE)
hist(RedWine$sulphates,col='lightblue', labels = TRUE)
hist(RedWine$alcohol,col='lightblue', labels = TRUE)
hist(RedWine$quality,col='lightblue', labels = TRUE)