## Rows: 6,497
## Columns: 13
## $ type <chr> "white", "white", "white", "white", "white", "whi…
## $ fixed.acidity <dbl> 7.0, 6.3, 8.1, 7.2, 7.2, 8.1, 6.2, 7.0, 6.3, 8.1,…
## $ volatile.acidity <dbl> 0.27, 0.30, 0.28, 0.23, 0.23, 0.28, 0.32, 0.27, 0…
## $ citric.acid <dbl> 0.36, 0.34, 0.40, 0.32, 0.32, 0.40, 0.16, 0.36, 0…
## $ residual.sugar <dbl> 20.70, 1.60, 6.90, 8.50, 8.50, 6.90, 7.00, 20.70,…
## $ chlorides <dbl> 0.045, 0.049, 0.050, 0.058, 0.058, 0.050, 0.045, …
## $ free.sulfur.dioxide <dbl> 45, 14, 30, 47, 47, 30, 30, 45, 14, 28, 11, 17, 1…
## $ total.sulfur.dioxide <dbl> 170, 132, 97, 186, 186, 97, 136, 170, 132, 129, 6…
## $ density <dbl> 1.0010, 0.9940, 0.9951, 0.9956, 0.9956, 0.9951, 0…
## $ pH <dbl> 3.00, 3.30, 3.26, 3.19, 3.19, 3.26, 3.18, 3.00, 3…
## $ sulphates <dbl> 0.45, 0.49, 0.44, 0.40, 0.40, 0.44, 0.47, 0.45, 0…
## $ alcohol <dbl> 8.8, 9.5, 10.1, 9.9, 9.9, 10.1, 9.6, 8.8, 9.5, 11…
## $ quality <dbl> 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 7, 5, 7, 6…
## # A tibble: 6 × 13
## type fixed.ac…¹ volat…² citri…³ resid…⁴ chlor…⁵ free.…⁶ total…⁷ density pH
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 white 7 0.27 0.36 20.7 0.045 45 170 1.00 3
## 2 white 6.3 0.3 0.34 1.6 0.049 14 132 0.994 3.3
## 3 white 8.1 0.28 0.4 6.9 0.05 30 97 0.995 3.26
## 4 white 7.2 0.23 0.32 8.5 0.058 47 186 0.996 3.19
## 5 white 7.2 0.23 0.32 8.5 0.058 47 186 0.996 3.19
## 6 white 8.1 0.28 0.4 6.9 0.05 30 97 0.995 3.26
## # … with 3 more variables: sulphates <dbl>, alcohol <dbl>, quality <dbl>, and
## # abbreviated variable names ¹fixed.acidity, ²volatile.acidity, ³citric.acid,
## # ⁴residual.sugar, ⁵chlorides, ⁶free.sulfur.dioxide, ⁷total.sulfur.dioxide
## Rows: 6,463
## Columns: 13
## $ type <chr> "white", "white", "white", "white", "white", "whi…
## $ fixed.acidity <dbl> 7.0, 6.3, 8.1, 7.2, 7.2, 8.1, 6.2, 7.0, 6.3, 8.1,…
## $ volatile.acidity <dbl> 0.27, 0.30, 0.28, 0.23, 0.23, 0.28, 0.32, 0.27, 0…
## $ citric.acid <dbl> 0.36, 0.34, 0.40, 0.32, 0.32, 0.40, 0.16, 0.36, 0…
## $ residual.sugar <dbl> 20.70, 1.60, 6.90, 8.50, 8.50, 6.90, 7.00, 20.70,…
## $ chlorides <dbl> 0.045, 0.049, 0.050, 0.058, 0.058, 0.050, 0.045, …
## $ free.sulfur.dioxide <dbl> 45, 14, 30, 47, 47, 30, 30, 45, 14, 28, 11, 17, 1…
## $ total.sulfur.dioxide <dbl> 170, 132, 97, 186, 186, 97, 136, 170, 132, 129, 6…
## $ density <dbl> 1.0010, 0.9940, 0.9951, 0.9956, 0.9956, 0.9951, 0…
## $ pH <dbl> 3.00, 3.30, 3.26, 3.19, 3.19, 3.26, 3.18, 3.00, 3…
## $ sulphates <dbl> 0.45, 0.49, 0.44, 0.40, 0.40, 0.44, 0.47, 0.45, 0…
## $ alcohol <dbl> 8.8, 9.5, 10.1, 9.9, 9.9, 10.1, 9.6, 8.8, 9.5, 11…
## $ quality <dbl> 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 5, 5, 5, 7, 5, 7, 6…
## .
## 3 4 5 6 7 8 9
## 30 214 2128 2820 1074 192 5
## .
## 3 4 5 6 7 8 9
## 0.005 0.033 0.329 0.436 0.166 0.030 0.001
quality_cor <- cor(wine[, colnames(wine) != "quality"],
wine$quality)
quality_cor
## [,1]
## type -0.11918495
## fixed.acidity -0.07617378
## volatile.acidity -0.26667748
## citric.acid 0.08492614
## residual.sugar -0.03465378
## chlorides -0.20055317
## free.sulfur.dioxide 0.05492413
## total.sulfur.dioxide -0.04159801
## density -0.30444677
## pH 0.01840276
## sulphates 0.03905364
## alcohol 0.44463687
## [1] "fixed.acidity"
## [1] "volatile.acidity"
## [1] "citric.acid"
## [1] "residual.sugar"
## [1] "chlorides"
## [1] "free.sulfur.dioxide"
## [1] "total.sulfur.dioxide"
## [1] "density"
## [1] "pH"
## [1] "sulphates"
## [1] "alcohol"
## [1] "quality"
train<-sample(nrow(wine)*0.8)
##
## Call:
## lm(formula = quality ~ . - type, data = wine.train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.82263 -0.46705 -0.01595 0.47189 2.21454
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.797e+01 1.658e+01 5.307 1.16e-07 ***
## fixed.acidity 7.352e-02 2.010e-02 3.657 0.000258 ***
## volatile.acidity -1.575e+00 1.007e-01 -15.633 < 2e-16 ***
## citric.acid -1.193e-01 9.589e-02 -1.244 0.213679
## residual.sugar 5.616e-02 6.589e-03 8.523 < 2e-16 ***
## chlorides 8.420e-01 7.658e-01 1.099 0.271641
## free.sulfur.dioxide 7.011e-03 8.277e-04 8.470 < 2e-16 ***
## total.sulfur.dioxide -2.238e-03 3.062e-04 -7.308 3.12e-13 ***
## density -8.768e+01 1.686e+01 -5.200 2.07e-07 ***
## pH 6.018e-01 9.934e-02 6.058 1.47e-09 ***
## sulphates 7.701e-01 9.388e-02 8.203 2.94e-16 ***
## alcohol 2.281e-01 2.210e-02 10.325 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7036 on 5158 degrees of freedom
## Multiple R-squared: 0.2937, Adjusted R-squared: 0.2922
## F-statistic: 195 on 11 and 5158 DF, p-value: < 2.2e-16
## fixed.acidity volatile.acidity citric.acid
## 3.210615 1.382746 1.264477
## residual.sugar chlorides free.sulfur.dioxide
## 10.666093 1.794805 1.893137
## total.sulfur.dioxide density pH
## 2.063821 25.473683 2.390585
## sulphates alcohol
## 1.296809 7.589321
##
## Call:
## lm(formula = quality ~ . - type - density, data = wine.train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.8244 -0.4625 -0.0147 0.4741 2.1760
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.7953023 0.3130057 5.736 1.03e-08 ***
## fixed.acidity -0.0061888 0.0130377 -0.475 0.635033
## volatile.acidity -1.7116528 0.0974729 -17.560 < 2e-16 ***
## citric.acid -0.1380128 0.0960665 -1.437 0.150881
## residual.sugar 0.0243940 0.0024759 9.853 < 2e-16 ***
## chlorides -0.6291021 0.7134665 -0.882 0.377951
## free.sulfur.dioxide 0.0072712 0.0008283 8.778 < 2e-16 ***
## total.sulfur.dioxide -0.0021759 0.0003067 -7.094 1.48e-12 ***
## pH 0.2536861 0.0735765 3.448 0.000569 ***
## sulphates 0.5842631 0.0870327 6.713 2.11e-11 ***
## alcohol 0.3289874 0.0106208 30.976 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7053 on 5159 degrees of freedom
## Multiple R-squared: 0.29, Adjusted R-squared: 0.2887
## F-statistic: 210.8 on 10 and 5159 DF, p-value: < 2.2e-16
## fixed.acidity volatile.acidity citric.acid
## 1.343780 1.288256 1.262688
## residual.sugar chlorides free.sulfur.dioxide
## 1.498266 1.549887 1.886203
## total.sulfur.dioxide pH sulphates
## 2.060714 1.304795 1.108922
## alcohol
## 1.744393
##
## Call:
## lm(formula = quality ~ . - type - density - residual.sugar, data = wine.train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.71967 -0.46329 -0.02265 0.49174 2.07290
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 2.6826048 0.3025477 8.867 < 2e-16 ***
## fixed.acidity -0.0120800 0.0131447 -0.919 0.3581
## volatile.acidity -1.6333069 0.0980482 -16.658 < 2e-16 ***
## citric.acid -0.0987875 0.0968734 -1.020 0.3079
## chlorides -1.5600749 0.7137347 -2.186 0.0289 *
## free.sulfur.dioxide 0.0082906 0.0008294 9.995 < 2e-16 ***
## total.sulfur.dioxide -0.0016461 0.0003048 -5.401 6.92e-08 ***
## pH 0.1293863 0.0731585 1.769 0.0770 .
## sulphates 0.5437645 0.0877411 6.197 6.19e-10 ***
## alcohol 0.2929845 0.0100648 29.110 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7119 on 5160 degrees of freedom
## Multiple R-squared: 0.2767, Adjusted R-squared: 0.2754
## F-statistic: 219.3 on 9 and 5160 DF, p-value: < 2.2e-16
## fixed.acidity volatile.acidity citric.acid
## 1.340954 1.279682 1.260520
## chlorides free.sulfur.dioxide total.sulfur.dioxide
## 1.522702 1.856773 1.997376
## pH sulphates alcohol
## 1.266432 1.106449 1.537899
# We can see the Density and residual sugar are higher than 10 as well
as alcohol being very close but not more then 10. # Now we will make a
regression model
##
## Call:
## lm(formula = quality ~ fixed.acidity + volatile.acidity + citric.acid +
## residual.sugar + chlorides + free.sulfur.dioxide + total.sulfur.dioxide +
## density + pH + sulphates + alcohol, data = wine.train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.82263 -0.46705 -0.01595 0.47189 2.21454
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 8.797e+01 1.658e+01 5.307 1.16e-07 ***
## fixed.acidity 7.352e-02 2.010e-02 3.657 0.000258 ***
## volatile.acidity -1.575e+00 1.007e-01 -15.633 < 2e-16 ***
## citric.acid -1.193e-01 9.589e-02 -1.244 0.213679
## residual.sugar 5.616e-02 6.589e-03 8.523 < 2e-16 ***
## chlorides 8.420e-01 7.658e-01 1.099 0.271641
## free.sulfur.dioxide 7.011e-03 8.277e-04 8.470 < 2e-16 ***
## total.sulfur.dioxide -2.238e-03 3.062e-04 -7.308 3.12e-13 ***
## density -8.768e+01 1.686e+01 -5.200 2.07e-07 ***
## pH 6.018e-01 9.934e-02 6.058 1.47e-09 ***
## sulphates 7.701e-01 9.388e-02 8.203 2.94e-16 ***
## alcohol 2.281e-01 2.210e-02 10.325 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7036 on 5158 degrees of freedom
## Multiple R-squared: 0.2937, Adjusted R-squared: 0.2922
## F-statistic: 195 on 11 and 5158 DF, p-value: < 2.2e-16
## 1 2 3 4 5 6
## 5.257577 6.209973 5.575768 5.600188 6.371360 5.701538
## 1 2 3 4 5 6
## 6 7 6 6 7 6
## actual
## predicted 3.5 4 5 6 7 7.5
## 5 2 24 94 26 1 0
## 6 11 136 1350 1337 252 46
## 7 7 17 177 910 640 135
## 8 0 0 0 2 3 0
## [1] 0.2451663
## fixed.acidity volatile.acidity citric.acid
## 3.699649e+02 4.315336e+06 3.637632e+05
## residual.sugar chlorides free.sulfur.dioxide
## 1.781843e+00 1.258004e+07 2.488821e+00
## total.sulfur.dioxide density pH
## 3.030931e+00 8.164594e+06 1.328452e+05
## sulphates alcohol
## 7.983110e+05 7.732264e+02
## Call:
## polr(formula = quality2 ~ . - type - quality, data = wine.train,
## Hess = TRUE)
##
## Coefficients:
## Value Std. Error t value
## fixed.acidity 0.115503 0.0354109 3.2618
## volatile.acidity -4.017865 0.2668232 -15.0582
## citric.acid -0.200291 0.2723497 -0.7354
## residual.sugar 0.107129 0.0079197 13.5268
## chlorides -1.421921 0.0231987 -61.2932
## free.sulfur.dioxide 0.017796 0.0025574 6.9585
## total.sulfur.dioxide -0.007241 0.0008556 -8.4634
## density -95.080212 0.4645592 -204.6676
## pH 0.888241 0.2226090 3.9901
## sulphates 2.530635 0.2497945 10.1309
## alcohol 0.788355 0.0301011 26.1902
##
## Intercepts:
## Value Std. Error t value
## 3.5|4 -88.5476 0.4746 -186.5789
## 4|5 -86.3822 0.4744 -182.0719
## 5|6 -83.2245 0.4803 -173.2795
## 6|7 -80.6447 0.4897 -164.6653
## 7|7.5 -78.3108 0.5008 -156.3613
##
## Residual Deviance: 9852.361
## AIC: 9884.361
## Start: AIC=9884.36
## quality2 ~ (type + fixed.acidity + volatile.acidity + citric.acid +
## residual.sugar + chlorides + free.sulfur.dioxide + total.sulfur.dioxide +
## density + pH + sulphates + alcohol + quality) - type - quality
##
## Df AIC
## - chlorides 1 9880.1
## - citric.acid 1 9882.4
## <none> 9884.4
## - fixed.acidity 1 9890.8
## - density 1 9894.9
## - pH 1 9896.4
## - free.sulfur.dioxide 1 9929.4
## - residual.sugar 1 9939.2
## - total.sulfur.dioxide 1 9953.7
## - sulphates 1 9979.4
## - volatile.acidity 1 10052.1
## - alcohol 1 10069.5
##
## Step: AIC=9880.08
## quality2 ~ fixed.acidity + volatile.acidity + citric.acid + residual.sugar +
## free.sulfur.dioxide + total.sulfur.dioxide + density + pH +
## sulphates + alcohol
##
## Df AIC
## <none> 9880.1
## - citric.acid 1 9881.7
## - fixed.acidity 1 9890.4
## - pH 1 9897.5
## - density 1 9898.1
## - free.sulfur.dioxide 1 9927.4
## - total.sulfur.dioxide 1 9952.1
## - residual.sugar 1 9952.7
## - sulphates 1 9977.6
## - volatile.acidity 1 10059.7
## - alcohol 1 10067.7
## 3.5 4 5 6 7 7.5
## 1 0.006436306 0.04705570 0.5182732 0.3754516 0.04743908 0.005344208
## 2 0.010769140 0.07596757 0.6049799 0.2761884 0.02890800 0.003186998
## 3 0.001937993 0.01472001 0.2691641 0.5574151 0.13915356 0.017609258
## 4 0.003301711 0.02478661 0.3776510 0.4959972 0.08786584 0.010397691
## 5 0.001937993 0.01472001 0.2691641 0.5574151 0.13915356 0.017609258
## 6 0.010769140 0.07596757 0.6049799 0.2761884 0.02890800 0.003186998
## [1] 5 5 6 6 6 5
## Levels: 3.5 4 5 6 7 7.5
## predicted
## Actual 3.5 4 5 6 7 7.5
## 3.5 0 0 12 9 0 0
## 4 0 0 83 66 1 0
## 5 0 0 873 608 9 0
## 6 0 0 412 1435 127 0
## 7 0 0 55 530 167 0
## 7.5 0 0 11 88 39 0
## [1] 0.5469613
## actual
## predicted 3.5 4 5 6 7 7.5
## 3.5 0 0 0 0 0 0
## 4 0 0 0 0 0 0
## 5 4 42 371 195 20 4
## 6 5 21 265 596 232 34
## 7 0 1 2 55 70 21
## 7.5 0 0 0 0 0 0
## [1] 0.2291713
## Start: AIC=4586.89
## category ~ (type + fixed.acidity + volatile.acidity + citric.acid +
## residual.sugar + chlorides + free.sulfur.dioxide + total.sulfur.dioxide +
## density + pH + sulphates + alcohol + quality + quality2) -
## type - quality - quality2
##
## Df Deviance AIC
## - pH 1 4563.1 4585.1
## - fixed.acidity 1 4564.2 4586.2
## - density 1 4564.2 4586.2
## <none> 4562.9 4586.9
## - chlorides 1 4565.7 4587.7
## - citric.acid 1 4568.0 4590.0
## - residual.sugar 1 4579.4 4601.4
## - free.sulfur.dioxide 1 4609.2 4631.2
## - sulphates 1 4636.6 4658.6
## - total.sulfur.dioxide 1 4645.5 4667.5
## - alcohol 1 4735.5 4757.5
## - volatile.acidity 1 4779.2 4801.2
##
## Step: AIC=4585.12
## category ~ fixed.acidity + volatile.acidity + citric.acid + residual.sugar +
## chlorides + free.sulfur.dioxide + total.sulfur.dioxide +
## density + sulphates + alcohol
##
## Df Deviance AIC
## - density 1 4564.3 4584.3
## - fixed.acidity 1 4564.3 4584.3
## <none> 4563.1 4585.1
## - chlorides 1 4566.3 4586.3
## - citric.acid 1 4568.8 4588.8
## - residual.sugar 1 4584.8 4604.8
## - free.sulfur.dioxide 1 4609.6 4629.6
## - sulphates 1 4636.6 4656.6
## - total.sulfur.dioxide 1 4645.6 4665.6
## - volatile.acidity 1 4783.4 4803.4
## - alcohol 1 4802.0 4822.0
##
## Step: AIC=4584.25
## category ~ fixed.acidity + volatile.acidity + citric.acid + residual.sugar +
## chlorides + free.sulfur.dioxide + total.sulfur.dioxide +
## sulphates + alcohol
##
## Df Deviance AIC
## - fixed.acidity 1 4564.6 4582.6
## <none> 4564.3 4584.3
## - chlorides 1 4569.6 4587.6
## - citric.acid 1 4569.9 4587.9
## - residual.sugar 1 4610.3 4628.3
## - free.sulfur.dioxide 1 4610.3 4628.3
## - sulphates 1 4645.9 4663.9
## - total.sulfur.dioxide 1 4646.3 4664.3
## - volatile.acidity 1 4817.4 4835.4
## - alcohol 1 5102.4 5120.4
##
## Step: AIC=4582.56
## category ~ volatile.acidity + citric.acid + residual.sugar +
## chlorides + free.sulfur.dioxide + total.sulfur.dioxide +
## sulphates + alcohol
##
## Df Deviance AIC
## <none> 4564.6 4582.6
## - chlorides 1 4569.6 4585.6
## - citric.acid 1 4570.2 4586.2
## - free.sulfur.dioxide 1 4610.3 4626.3
## - residual.sugar 1 4610.8 4626.8
## - sulphates 1 4648.1 4664.1
## - total.sulfur.dioxide 1 4648.9 4664.9
## - volatile.acidity 1 4820.4 4836.4
## - alcohol 1 5104.5 5120.5
## 1 2 3 4 5 6
## 0.4684139 0.3042316 0.6601849 0.5577841 0.6601849 0.4741672
## 1 2 3 4 5 6
## -0.1265130 -0.8272277 0.6641182 0.2321737 0.6641182 -0.1034232
## 1 2 3 4 5 6
## 0.4684139 0.3042316 0.6601849 0.5577841 0.6601849 0.4741672
## actual
## predicted 0 1
## Bad 975 451
## Good 685 2413
## [1] 0.7488948
## actual
## predicted 0 1
## Bad quality 386 211
## Good quality 326 1016
## [1] 0.7230531