## Rows: 6,497
## Columns: 13
## $ type <chr> "white", "white", "white", "white", "white", "whi…
## $ fixed.acidity <dbl> 7.0, 6.3, 8.1, 7.2, 7.2, 8.1, 6.2, 7.0, 6.3, 8.1,…
## $ volatile.acidity <dbl> 0.27, 0.30, 0.28, 0.23, 0.23, 0.28, 0.32, 0.27, 0…
## $ citric.acid <dbl> 0.36, 0.34, 0.40, 0.32, 0.32, 0.40, 0.16, 0.36, 0…
## $ residual.sugar <dbl> 20.70, 1.60, 6.90, 8.50, 8.50, 6.90, 7.00, 20.70,…
## $ chlorides <dbl> 0.045, 0.049, 0.050, 0.058, 0.058, 0.050, 0.045, …
## $ free.sulfur.dioxide <dbl> 45, 14, 30, 47, 47, 30, 30, 45, 14, 28, 11, 17, 1…
## $ total.sulfur.dioxide <dbl> 170, 132, 97, 186, 186, 97, 136, 170, 132, 129, 6…
## $ density <dbl> 1.0010, 0.9940, 0.9951, 0.9956, 0.9956, 0.9951, 0…
## $ pH <dbl> 3.00, 3.30, 3.26, 3.19, 3.19, 3.26, 3.18, 3.00, 3…
## $ sulphates <dbl> 0.45, 0.49, 0.44, 0.40, 0.40, 0.44, 0.47, 0.45, 0…
## $ alcohol <dbl> 8.8, 9.5, 10.1, 9.9, 9.9, 10.1, 9.6, 8.8, 9.5, 11…
## $ quality <dbl> 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 7, 5, 7, 6…
## # A tibble: 6 × 13
## type fixed.ac…¹ volat…² citri…³ resid…⁴ chlor…⁵ free.…⁶ total…⁷ density pH
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 white 7 0.27 0.36 20.7 0.045 45 170 1.00 3
## 2 white 6.3 0.3 0.34 1.6 0.049 14 132 0.994 3.3
## 3 white 8.1 0.28 0.4 6.9 0.05 30 97 0.995 3.26
## 4 white 7.2 0.23 0.32 8.5 0.058 47 186 0.996 3.19
## 5 white 7.2 0.23 0.32 8.5 0.058 47 186 0.996 3.19
## 6 white 8.1 0.28 0.4 6.9 0.05 30 97 0.995 3.26
## # … with 3 more variables: sulphates <dbl>, alcohol <dbl>, quality <dbl>, and
## # abbreviated variable names ¹fixed.acidity, ²volatile.acidity, ³citric.acid,
## # ⁴residual.sugar, ⁵chlorides, ⁶free.sulfur.dioxide, ⁷total.sulfur.dioxide
## Rows: 6,463
## Columns: 13
## $ type <chr> "white", "white", "white", "white", "white", "whi…
## $ fixed.acidity <dbl> 7.0, 6.3, 8.1, 7.2, 7.2, 8.1, 6.2, 7.0, 6.3, 8.1,…
## $ volatile.acidity <dbl> 0.27, 0.30, 0.28, 0.23, 0.23, 0.28, 0.32, 0.27, 0…
## $ citric.acid <dbl> 0.36, 0.34, 0.40, 0.32, 0.32, 0.40, 0.16, 0.36, 0…
## $ residual.sugar <dbl> 20.70, 1.60, 6.90, 8.50, 8.50, 6.90, 7.00, 20.70,…
## $ chlorides <dbl> 0.045, 0.049, 0.050, 0.058, 0.058, 0.050, 0.045, …
## $ free.sulfur.dioxide <dbl> 45, 14, 30, 47, 47, 30, 30, 45, 14, 28, 11, 17, 1…
## $ total.sulfur.dioxide <dbl> 170, 132, 97, 186, 186, 97, 136, 170, 132, 129, 6…
## $ density <dbl> 1.0010, 0.9940, 0.9951, 0.9956, 0.9956, 0.9951, 0…
## $ pH <dbl> 3.00, 3.30, 3.26, 3.19, 3.19, 3.26, 3.18, 3.00, 3…
## $ sulphates <dbl> 0.45, 0.49, 0.44, 0.40, 0.40, 0.44, 0.47, 0.45, 0…
## $ alcohol <dbl> 8.8, 9.5, 10.1, 9.9, 9.9, 10.1, 9.6, 8.8, 9.5, 11…
## $ quality <dbl> 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 7, 5, 7, 6…
## .
## 3 4 5 6 7 8 9
## 30 214 2128 2820 1074 192 5
## .
## 3 4 5 6 7 8 9
## 0.005 0.033 0.329 0.436 0.166 0.030 0.001
quality_cor <- cor(wine[, colnames(wine) != "quality"],
wine$quality)
quality_cor
## [,1]
## type -0.11918495
## fixed.acidity -0.07617378
## volatile.acidity -0.26667748
## citric.acid 0.08492614
## residual.sugar -0.03465378
## chlorides -0.20055317
## free.sulfur.dioxide 0.05492413
## total.sulfur.dioxide -0.04159801
## density -0.30444677
## pH 0.01840276
## sulphates 0.03905364
## alcohol 0.44463687
## [1] "fixed.acidity"
## [1] "volatile.acidity"
## [1] "citric.acid"
## [1] "residual.sugar"
## [1] "chlorides"
## [1] "free.sulfur.dioxide"
## [1] "total.sulfur.dioxide"
## [1] "density"
## [1] "pH"
## [1] "sulphates"
## [1] "alcohol"
## [1] "quality"
train<-sample(nrow(wine)*0.8)
##
## Call:
## lm(formula = quality ~ . - type, data = wine.train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.82263 -0.46705 -0.01595 0.47189 2.21454
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.797e+01 1.658e+01 5.307 1.16e-07 ***
## fixed.acidity 7.352e-02 2.010e-02 3.657 0.000258 ***
## volatile.acidity -1.575e+00 1.007e-01 -15.633 < 2e-16 ***
## citric.acid -1.193e-01 9.589e-02 -1.244 0.213679
## residual.sugar 5.616e-02 6.589e-03 8.523 < 2e-16 ***
## chlorides 8.420e-01 7.658e-01 1.099 0.271641
## free.sulfur.dioxide 7.011e-03 8.277e-04 8.470 < 2e-16 ***
## total.sulfur.dioxide -2.238e-03 3.062e-04 -7.308 3.12e-13 ***
## density -8.768e+01 1.686e+01 -5.200 2.07e-07 ***
## pH 6.018e-01 9.934e-02 6.058 1.47e-09 ***
## sulphates 7.701e-01 9.388e-02 8.203 2.94e-16 ***
## alcohol 2.281e-01 2.210e-02 10.325 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7036 on 5158 degrees of freedom
## Multiple R-squared: 0.2937, Adjusted R-squared: 0.2922
## F-statistic: 195 on 11 and 5158 DF, p-value: < 2.2e-16
## fixed.acidity volatile.acidity citric.acid
## 3.210615 1.382746 1.264477
## residual.sugar chlorides free.sulfur.dioxide
## 10.666093 1.794805 1.893137
## total.sulfur.dioxide density pH
## 2.063821 25.473683 2.390585
## sulphates alcohol
## 1.296809 7.589321
##
## Call:
## lm(formula = quality ~ . - type - density, data = wine.train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.8244 -0.4625 -0.0147 0.4741 2.1760
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.7953023 0.3130057 5.736 1.03e-08 ***
## fixed.acidity -0.0061888 0.0130377 -0.475 0.635033
## volatile.acidity -1.7116528 0.0974729 -17.560 < 2e-16 ***
## citric.acid -0.1380128 0.0960665 -1.437 0.150881
## residual.sugar 0.0243940 0.0024759 9.853 < 2e-16 ***
## chlorides -0.6291021 0.7134665 -0.882 0.377951
## free.sulfur.dioxide 0.0072712 0.0008283 8.778 < 2e-16 ***
## total.sulfur.dioxide -0.0021759 0.0003067 -7.094 1.48e-12 ***
## pH 0.2536861 0.0735765 3.448 0.000569 ***
## sulphates 0.5842631 0.0870327 6.713 2.11e-11 ***
## alcohol 0.3289874 0.0106208 30.976 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7053 on 5159 degrees of freedom
## Multiple R-squared: 0.29, Adjusted R-squared: 0.2887
## F-statistic: 210.8 on 10 and 5159 DF, p-value: < 2.2e-16
## fixed.acidity volatile.acidity citric.acid
## 1.343780 1.288256 1.262688
## residual.sugar chlorides free.sulfur.dioxide
## 1.498266 1.549887 1.886203
## total.sulfur.dioxide pH sulphates
## 2.060714 1.304795 1.108922
## alcohol
## 1.744393
##
## Call:
## lm(formula = quality ~ . - type - density - residual.sugar, data = wine.train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.71967 -0.46329 -0.02265 0.49174 2.07290
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.6826048 0.3025477 8.867 < 2e-16 ***
## fixed.acidity -0.0120800 0.0131447 -0.919 0.3581
## volatile.acidity -1.6333069 0.0980482 -16.658 < 2e-16 ***
## citric.acid -0.0987875 0.0968734 -1.020 0.3079
## chlorides -1.5600749 0.7137347 -2.186 0.0289 *
## free.sulfur.dioxide 0.0082906 0.0008294 9.995 < 2e-16 ***
## total.sulfur.dioxide -0.0016461 0.0003048 -5.401 6.92e-08 ***
## pH 0.1293863 0.0731585 1.769 0.0770 .
## sulphates 0.5437645 0.0877411 6.197 6.19e-10 ***
## alcohol 0.2929845 0.0100648 29.110 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7119 on 5160 degrees of freedom
## Multiple R-squared: 0.2767, Adjusted R-squared: 0.2754
## F-statistic: 219.3 on 9 and 5160 DF, p-value: < 2.2e-16
## fixed.acidity volatile.acidity citric.acid
## 1.340954 1.279682 1.260520
## chlorides free.sulfur.dioxide total.sulfur.dioxide
## 1.522702 1.856773 1.997376
## pH sulphates alcohol
## 1.266432 1.106449 1.537899
### We can see the Density and residual sugar are higher than 10 as well
as alcohol being very close but not more then 10. ### Now we will make a
regression model
##
## Call:
## lm(formula = quality ~ fixed.acidity + volatile.acidity + citric.acid +
## residual.sugar + chlorides + free.sulfur.dioxide + total.sulfur.dioxide +
## density + pH + sulphates + alcohol, data = wine.train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.82263 -0.46705 -0.01595 0.47189 2.21454
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.797e+01 1.658e+01 5.307 1.16e-07 ***
## fixed.acidity 7.352e-02 2.010e-02 3.657 0.000258 ***
## volatile.acidity -1.575e+00 1.007e-01 -15.633 < 2e-16 ***
## citric.acid -1.193e-01 9.589e-02 -1.244 0.213679
## residual.sugar 5.616e-02 6.589e-03 8.523 < 2e-16 ***
## chlorides 8.420e-01 7.658e-01 1.099 0.271641
## free.sulfur.dioxide 7.011e-03 8.277e-04 8.470 < 2e-16 ***
## total.sulfur.dioxide -2.238e-03 3.062e-04 -7.308 3.12e-13 ***
## density -8.768e+01 1.686e+01 -5.200 2.07e-07 ***
## pH 6.018e-01 9.934e-02 6.058 1.47e-09 ***
## sulphates 7.701e-01 9.388e-02 8.203 2.94e-16 ***
## alcohol 2.281e-01 2.210e-02 10.325 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7036 on 5158 degrees of freedom
## Multiple R-squared: 0.2937, Adjusted R-squared: 0.2922
## F-statistic: 195 on 11 and 5158 DF, p-value: < 2.2e-16
## 1 2 3 4 5 6
## 6.397178 5.132746 5.981751 6.117660 5.351733 6.435004
## 1 2 3 4 5 6
## 7 6 6 7 6 7
## actual
## predicted 3.5 4 5 6 7 7.5
## 5 2 24 94 26 1 0
## 6 11 136 1350 1337 252 46
## 7 7 17 177 910 640 135
## 8 0 0 0 2 3 0
## [1] 0.2451663
## fixed.acidity volatile.acidity citric.acid
## 1.864147e+02 2.364436e+06 1.574669e+05
## residual.sugar chlorides free.sulfur.dioxide
## 1.701527e+00 8.862709e+06 2.358498e+00
## total.sulfur.dioxide density pH
## 2.920313e+00 6.314801e+06 5.529830e+04
## sulphates alcohol
## 4.263106e+05 4.426875e+02
## Call:
## polr(formula = quality2 ~ . - type - quality, data = wine.train,
## Hess = TRUE)
##
## Coefficients:
## Value Std. Error t value
## fixed.acidity 0.062229 0.0361248 1.7226
## volatile.acidity -3.910251 0.2652338 -14.7427
## citric.acid -0.060066 0.2697742 -0.2227
## residual.sugar 0.083250 0.0078619 10.5890
## chlorides -3.223162 0.0265041 -121.6101
## free.sulfur.dioxide 0.019672 0.0025386 7.7490
## total.sulfur.dioxide -0.007446 0.0008519 -8.7413
## density -54.537856 0.4666007 -116.8834
## pH 0.825945 0.2248289 3.6737
## sulphates 2.341744 0.2487452 9.4142
## alcohol 0.848126 0.0302951 27.9954
##
## Intercepts:
## Value Std. Error t value
## 3.5|4 -48.4024 0.4766 -101.5664
## 4|5 -46.2365 0.4763 -97.0719
## 5|6 -43.0830 0.4822 -89.3482
## 6|7 -40.4748 0.4923 -82.2216
## 7|7.5 -38.1278 0.5036 -75.7077
##
## Residual Deviance: 9835.458
## AIC: 9867.458
## Start: AIC=9867.46
## quality2 ~ (type + fixed.acidity + volatile.acidity + citric.acid +
## residual.sugar + chlorides + free.sulfur.dioxide + total.sulfur.dioxide +
## density + pH + sulphates + alcohol + quality) - type - quality
##
## Df AIC
## - citric.acid 1 9865.5
## - fixed.acidity 1 9866.7
## - density 1 9867.3
## <none> 9867.5
## - chlorides 1 9867.6
## - pH 1 9874.8
## - residual.sugar 1 9889.6
## - free.sulfur.dioxide 1 9925.6
## - total.sulfur.dioxide 1 9935.4
## - sulphates 1 9939.6
## - volatile.acidity 1 10043.0
## - alcohol 1 10110.0
##
## Step: AIC=9865.51
## quality2 ~ fixed.acidity + volatile.acidity + residual.sugar +
## chlorides + free.sulfur.dioxide + total.sulfur.dioxide +
## density + pH + sulphates + alcohol
##
## Df AIC
## - fixed.acidity 1 9864.8
## - density 1 9865.4
## <none> 9865.5
## - chlorides 1 9865.7
## - pH 1 9873.4
## - residual.sugar 1 9887.9
## - free.sulfur.dioxide 1 9927.5
## - total.sulfur.dioxide 1 9935.8
## - sulphates 1 9937.6
## - volatile.acidity 1 10074.0
## - alcohol 1 10112.1
##
## Step: AIC=9864.76
## quality2 ~ volatile.acidity + residual.sugar + chlorides + free.sulfur.dioxide +
## total.sulfur.dioxide + density + pH + sulphates + alcohol
##
## Df AIC
## - density 1 9863.4
## <none> 9864.8
## - chlorides 1 9866.5
## - pH 1 9871.9
## - residual.sugar 1 9897.0
## - free.sulfur.dioxide 1 9922.5
## - total.sulfur.dioxide 1 9934.4
## - sulphates 1 9935.8
## - volatile.acidity 1 10087.8
## - alcohol 1 10299.9
##
## Step: AIC=9863.38
## quality2 ~ volatile.acidity + residual.sugar + chlorides + free.sulfur.dioxide +
## total.sulfur.dioxide + pH + sulphates + alcohol
##
## Df AIC
## <none> 9863.4
## - chlorides 1 9866.8
## - pH 1 9870.0
## - free.sulfur.dioxide 1 9922.1
## - residual.sugar 1 9926.4
## - total.sulfur.dioxide 1 9933.0
## - sulphates 1 9942.0
## - volatile.acidity 1 10100.7
## - alcohol 1 10686.4
## 3.5 4 5 6 7 7.5
## 1 0.0034958339 0.026215106 0.38870771 0.4884762 0.08336730 0.009737883
## 2 0.0024425598 0.018482504 0.31336385 0.5374827 0.11433514 0.013893214
## 3 0.0056867196 0.041860549 0.49224197 0.4009690 0.05324613 0.005995643
## 4 0.0054847109 0.040442226 0.48481471 0.4079567 0.05508533 0.006216353
## 5 0.0015331478 0.011692345 0.22626316 0.5705221 0.16801640 0.021972862
## 6 0.0005624501 0.004325692 0.09858348 0.5062952 0.33247412 0.057759013
## [1] 6 6 5 5 6 6
## Levels: 3.5 4 5 6 7 7.5
## predicted
## Actual 3.5 4 5 6 7 7.5
## 3.5 0 0 13 8 0 0
## 4 0 0 87 62 1 0
## 5 0 0 859 623 8 0
## 6 0 0 420 1422 130 2
## 7 0 0 55 526 171 0
## 7.5 0 0 10 81 47 0
## [1] 0.5418785
## actual
## predicted 3.5 4 5 6 7 7.5
## 3.5 0 0 0 0 0 0
## 4 0 0 0 0 0 0
## 5 4 42 389 175 26 5
## 6 5 21 246 615 220 37
## 7 0 1 3 56 76 17
## 7.5 0 0 0 0 0 0
## [1] 0.238674
## Start: AIC=4658.44
## category ~ (type + fixed.acidity + volatile.acidity + citric.acid +
## residual.sugar + chlorides + free.sulfur.dioxide + total.sulfur.dioxide +
## density + pH + sulphates + alcohol + quality + quality2) -
## type - quality - quality2
##
## Df Deviance AIC
## - fixed.acidity 1 4634.4 4656.4
## - density 1 4634.5 4656.5
## - pH 1 4634.6 4656.6
## - chlorides 1 4634.8 4656.8
## <none> 4634.4 4658.4
## - citric.acid 1 4637.9 4659.9
## - residual.sugar 1 4647.9 4669.9
## - free.sulfur.dioxide 1 4673.2 4695.2
## - total.sulfur.dioxide 1 4686.0 4708.0
## - sulphates 1 4699.7 4721.7
## - volatile.acidity 1 4818.9 4840.9
## - alcohol 1 4827.3 4849.3
##
## Step: AIC=4656.45
## category ~ volatile.acidity + citric.acid + residual.sugar +
## chlorides + free.sulfur.dioxide + total.sulfur.dioxide +
## density + pH + sulphates + alcohol
##
## Df Deviance AIC
## - density 1 4634.6 4654.6
## - pH 1 4634.7 4654.7
## - chlorides 1 4634.9 4654.9
## <none> 4634.4 4656.4
## - citric.acid 1 4638.0 4658.0
## - residual.sugar 1 4659.0 4679.0
## - free.sulfur.dioxide 1 4673.2 4693.2
## - total.sulfur.dioxide 1 4686.0 4706.0
## - sulphates 1 4701.3 4721.3
## - volatile.acidity 1 4821.8 4841.8
## - alcohol 1 4932.1 4952.1
##
## Step: AIC=4654.56
## category ~ volatile.acidity + citric.acid + residual.sugar +
## chlorides + free.sulfur.dioxide + total.sulfur.dioxide +
## pH + sulphates + alcohol
##
## Df Deviance AIC
## - pH 1 4634.7 4652.7
## - chlorides 1 4635.2 4653.2
## <none> 4634.6 4654.6
## - citric.acid 1 4639.0 4657.0
## - free.sulfur.dioxide 1 4673.7 4691.7
## - total.sulfur.dioxide 1 4687.7 4705.7
## - residual.sugar 1 4693.1 4711.1
## - sulphates 1 4710.0 4728.0
## - volatile.acidity 1 4851.3 4869.3
## - alcohol 1 5212.7 5230.7
##
## Step: AIC=4652.72
## category ~ volatile.acidity + citric.acid + residual.sugar +
## chlorides + free.sulfur.dioxide + total.sulfur.dioxide +
## sulphates + alcohol
##
## Df Deviance AIC
## - chlorides 1 4635.4 4651.4
## <none> 4634.7 4652.7
## - citric.acid 1 4640.2 4656.2
## - free.sulfur.dioxide 1 4674.3 4690.3
## - total.sulfur.dioxide 1 4688.3 4704.3
## - residual.sugar 1 4693.6 4709.6
## - sulphates 1 4714.4 4730.4
## - volatile.acidity 1 4851.3 4867.3
## - alcohol 1 5218.6 5234.6
##
## Step: AIC=4651.39
## category ~ volatile.acidity + citric.acid + residual.sugar +
## free.sulfur.dioxide + total.sulfur.dioxide + sulphates +
## alcohol
##
## Df Deviance AIC
## <none> 4635.4 4651.4
## - citric.acid 1 4641.5 4655.5
## - free.sulfur.dioxide 1 4674.4 4688.4
## - total.sulfur.dioxide 1 4689.7 4703.7
## - residual.sugar 1 4697.3 4711.3
## - sulphates 1 4715.8 4729.8
## - volatile.acidity 1 4906.4 4920.4
## - alcohol 1 5408.9 5422.9
## 1 2 3 4 5 6
## 0.4819976 0.5904504 0.5904504 0.6415135 0.4819976 0.7518595
## 1 2 3 4 5 6
## -0.07204056 0.36582745 0.36582745 0.58193944 -0.07204056 1.10855417
## 1 2 3 4 5 6
## 0.4819976 0.5904504 0.5904504 0.6415135 0.4819976 0.7518595
## actual
## predicted 0 1
## Bad 964 462
## Good 696 2402
## [1] 0.7440318
## actual
## predicted 0 1
## Bad quality 386 177
## Good quality 326 1050
## [1] 0.7405879