What is wine?

redwine = read.csv("\\Users\\Thomas\\Desktop\\StatisticalLearning\\Final\\Wine+Wuality\\winequality-red.csv", sep=';', header=T)
whitewine = read.csv("\\Users\\Thomas\\Desktop\\StatisticalLearning\\Final\\Wine+Wuality\\winequality-white.csv", sep=';', header=T)
head(redwine)
##   fixed.acidity volatile.acidity citric.acid residual.sugar chlorides
## 1           7.4             0.70        0.00            1.9     0.076
## 2           7.8             0.88        0.00            2.6     0.098
## 3           7.8             0.76        0.04            2.3     0.092
## 4          11.2             0.28        0.56            1.9     0.075
## 5           7.4             0.70        0.00            1.9     0.076
## 6           7.4             0.66        0.00            1.8     0.075
##   free.sulfur.dioxide total.sulfur.dioxide density   pH sulphates alcohol
## 1                  11                   34  0.9978 3.51      0.56     9.4
## 2                  25                   67  0.9968 3.20      0.68     9.8
## 3                  15                   54  0.9970 3.26      0.65     9.8
## 4                  17                   60  0.9980 3.16      0.58     9.8
## 5                  11                   34  0.9978 3.51      0.56     9.4
## 6                  13                   40  0.9978 3.51      0.56     9.4
##   quality
## 1       5
## 2       5
## 3       5
## 4       6
## 5       5
## 6       5
head(whitewine)
##   fixed.acidity volatile.acidity citric.acid residual.sugar chlorides
## 1           7.0             0.27        0.36           20.7     0.045
## 2           6.3             0.30        0.34            1.6     0.049
## 3           8.1             0.28        0.40            6.9     0.050
## 4           7.2             0.23        0.32            8.5     0.058
## 5           7.2             0.23        0.32            8.5     0.058
## 6           8.1             0.28        0.40            6.9     0.050
##   free.sulfur.dioxide total.sulfur.dioxide density   pH sulphates alcohol
## 1                  45                  170  1.0010 3.00      0.45     8.8
## 2                  14                  132  0.9940 3.30      0.49     9.5
## 3                  30                   97  0.9951 3.26      0.44    10.1
## 4                  47                  186  0.9956 3.19      0.40     9.9
## 5                  47                  186  0.9956 3.19      0.40     9.9
## 6                  30                   97  0.9951 3.26      0.44    10.1
##   quality
## 1       6
## 2       6
## 3       6
## 4       6
## 5       6
## 6       6
wnames = names(whitewine)
rnames = names(redwine)

print("Common Names:")
## [1] "Common Names:"
print(intersect(wnames, rnames))
##  [1] "fixed.acidity"        "volatile.acidity"     "citric.acid"         
##  [4] "residual.sugar"       "chlorides"            "free.sulfur.dioxide" 
##  [7] "total.sulfur.dioxide" "density"              "pH"                  
## [10] "sulphates"            "alcohol"              "quality"
for (name in setdiff(rnames, "quality")) {
  par(mfrow = c(1,2))
  formula = as.formula(paste(name, "~ quality"))
  
  boxplot(formula, data = redwine)
  boxplot(formula, data = whitewine)
}

Just for funsies

summary(lm(quality ~ ., data = redwine))
## 
## Call:
## lm(formula = quality ~ ., data = redwine)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.68911 -0.36652 -0.04699  0.45202  2.02498 
## 
## Coefficients:
##                        Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           2.197e+01  2.119e+01   1.036   0.3002    
## fixed.acidity         2.499e-02  2.595e-02   0.963   0.3357    
## volatile.acidity     -1.084e+00  1.211e-01  -8.948  < 2e-16 ***
## citric.acid          -1.826e-01  1.472e-01  -1.240   0.2150    
## residual.sugar        1.633e-02  1.500e-02   1.089   0.2765    
## chlorides            -1.874e+00  4.193e-01  -4.470 8.37e-06 ***
## free.sulfur.dioxide   4.361e-03  2.171e-03   2.009   0.0447 *  
## total.sulfur.dioxide -3.265e-03  7.287e-04  -4.480 8.00e-06 ***
## density              -1.788e+01  2.163e+01  -0.827   0.4086    
## pH                   -4.137e-01  1.916e-01  -2.159   0.0310 *  
## sulphates             9.163e-01  1.143e-01   8.014 2.13e-15 ***
## alcohol               2.762e-01  2.648e-02  10.429  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.648 on 1587 degrees of freedom
## Multiple R-squared:  0.3606, Adjusted R-squared:  0.3561 
## F-statistic: 81.35 on 11 and 1587 DF,  p-value: < 2.2e-16
summary(lm(quality ~ fixed.acidity + volatile.acidity + residual.sugar + free.sulfur.dioxide + density + pH + sulphates + alcohol, data = whitewine))
## 
## Call:
## lm(formula = quality ~ fixed.acidity + volatile.acidity + residual.sugar + 
##     free.sulfur.dioxide + density + pH + sulphates + alcohol, 
##     data = whitewine)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.8246 -0.4938 -0.0396  0.4660  3.1208 
## 
## Coefficients:
##                       Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          1.541e+02  1.810e+01   8.514  < 2e-16 ***
## fixed.acidity        6.810e-02  2.043e-02   3.333 0.000864 ***
## volatile.acidity    -1.888e+00  1.095e-01 -17.242  < 2e-16 ***
## residual.sugar       8.285e-02  7.287e-03  11.370  < 2e-16 ***
## free.sulfur.dioxide  3.349e-03  6.766e-04   4.950 7.67e-07 ***
## density             -1.543e+02  1.834e+01  -8.411  < 2e-16 ***
## pH                   6.942e-01  1.034e-01   6.717 2.07e-11 ***
## sulphates            6.285e-01  9.997e-02   6.287 3.52e-10 ***
## alcohol              1.932e-01  2.408e-02   8.021 1.31e-15 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7512 on 4889 degrees of freedom
## Multiple R-squared:  0.2818, Adjusted R-squared:  0.2806 
## F-statistic: 239.7 on 8 and 4889 DF,  p-value: < 2.2e-16
w.model = lm(quality ~ poly(fixed.acidity,2) + poly(volatile.acidity,3) + residual.sugar + free.sulfur.dioxide + poly(density,2) + pH + sulphates + poly(alcohol,2), data = whitewine)
summary(w.model)
## 
## Call:
## lm(formula = quality ~ poly(fixed.acidity, 2) + poly(volatile.acidity, 
##     3) + residual.sugar + free.sulfur.dioxide + poly(density, 
##     2) + pH + sulphates + poly(alcohol, 2), data = whitewine)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.7496 -0.4940 -0.0328  0.4664  3.3045 
## 
## Coefficients:
##                              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                 1.715e+00  3.956e-01   4.336 1.48e-05 ***
## poly(fixed.acidity, 2)1     7.726e+00  1.317e+00   5.866 4.75e-09 ***
## poly(fixed.acidity, 2)2    -3.667e+00  7.615e-01  -4.816 1.51e-06 ***
## poly(volatile.acidity, 3)1 -1.374e+01  7.809e-01 -17.589  < 2e-16 ***
## poly(volatile.acidity, 3)2  2.448e+00  7.734e-01   3.165 0.001561 ** 
## poly(volatile.acidity, 3)3 -3.023e+00  7.664e-01  -3.945 8.10e-05 ***
## residual.sugar              9.886e-02  8.015e-03  12.334  < 2e-16 ***
## free.sulfur.dioxide         3.388e-03  6.727e-04   5.036 4.92e-07 ***
## poly(density, 2)1          -4.456e+01  4.413e+00 -10.097  < 2e-16 ***
## poly(density, 2)2           4.900e+00  9.440e-01   5.190 2.18e-07 ***
## pH                          9.638e-01  1.100e-01   8.760  < 2e-16 ***
## sulphates                   6.906e-01  9.993e-02   6.910 5.45e-12 ***
## poly(alcohol, 2)1           8.906e+00  2.482e+00   3.588 0.000336 ***
## poly(alcohol, 2)2           3.971e+00  8.078e-01   4.916 9.13e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.7436 on 4884 degrees of freedom
## Multiple R-squared:  0.2969, Adjusted R-squared:  0.295 
## F-statistic: 158.7 on 13 and 4884 DF,  p-value: < 2.2e-16
pred = predict(w.model, whitewine)
plot(y = whitewine$quality, x = fitted(w.model))

correct = sum(whitewine$quality == round(fitted(w.model)))
correct / nrow(whitewine)
## [1] 0.5177624
diff = whitewine$quality - round(fitted(w.model))
hist(diff)

hist(whitewine$quality)

hist(redwine$quality)