Invoking required packages

library(readxl)
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 3.3.2
library(foreign)
library(reshape2) 
library(formatR)
library(UsingR)
## Warning: package 'UsingR' was built under R version 3.3.2
## Loading required package: MASS
## Warning: package 'MASS' was built under R version 3.3.2
## Loading required package: HistData
## Loading required package: Hmisc
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:base':
## 
##     format.pval, round.POSIXt, trunc.POSIXt, units
## 
## Attaching package: 'UsingR'
## The following object is masked from 'package:survival':
## 
##     cancer
library(psych)
## Warning: package 'psych' was built under R version 3.3.2
## 
## Attaching package: 'psych'
## The following objects are masked from 'package:UsingR':
## 
##     galton, headtail
## The following object is masked from 'package:Hmisc':
## 
##     describe
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
library(rmarkdown)

Importing files- White wine csv files

# Structure and Summary for red wine
getwd()
## [1] "C:/Users/Amit/Desktop/MSDA/MSDA/IS6713"
wine_white <- read.csv("C:/Users/Amit/Desktop/MSDA/MSDA/IS6713/winequality-white.csv", sep= ";")
View(wine_white)
str(wine_white)
## 'data.frame':    4898 obs. of  12 variables:
##  $ fixed.acidity       : num  7 6.3 8.1 7.2 7.2 8.1 6.2 7 6.3 8.1 ...
##  $ volatile.acidity    : num  0.27 0.3 0.28 0.23 0.23 0.28 0.32 0.27 0.3 0.22 ...
##  $ citric.acid         : num  0.36 0.34 0.4 0.32 0.32 0.4 0.16 0.36 0.34 0.43 ...
##  $ residual.sugar      : num  20.7 1.6 6.9 8.5 8.5 6.9 7 20.7 1.6 1.5 ...
##  $ chlorides           : num  0.045 0.049 0.05 0.058 0.058 0.05 0.045 0.045 0.049 0.044 ...
##  $ free.sulfur.dioxide : num  45 14 30 47 47 30 30 45 14 28 ...
##  $ total.sulfur.dioxide: num  170 132 97 186 186 97 136 170 132 129 ...
##  $ density             : num  1.001 0.994 0.995 0.996 0.996 ...
##  $ pH                  : num  3 3.3 3.26 3.19 3.19 3.26 3.18 3 3.3 3.22 ...
##  $ sulphates           : num  0.45 0.49 0.44 0.4 0.4 0.44 0.47 0.45 0.49 0.45 ...
##  $ alcohol             : num  8.8 9.5 10.1 9.9 9.9 10.1 9.6 8.8 9.5 11 ...
##  $ quality             : int  6 6 6 6 6 6 6 6 6 6 ...
summary(wine_white)
##  fixed.acidity    volatile.acidity  citric.acid     residual.sugar  
##  Min.   : 3.800   Min.   :0.0800   Min.   :0.0000   Min.   : 0.600  
##  1st Qu.: 6.300   1st Qu.:0.2100   1st Qu.:0.2700   1st Qu.: 1.700  
##  Median : 6.800   Median :0.2600   Median :0.3200   Median : 5.200  
##  Mean   : 6.855   Mean   :0.2782   Mean   :0.3342   Mean   : 6.391  
##  3rd Qu.: 7.300   3rd Qu.:0.3200   3rd Qu.:0.3900   3rd Qu.: 9.900  
##  Max.   :14.200   Max.   :1.1000   Max.   :1.6600   Max.   :65.800  
##    chlorides       free.sulfur.dioxide total.sulfur.dioxide
##  Min.   :0.00900   Min.   :  2.00      Min.   :  9.0       
##  1st Qu.:0.03600   1st Qu.: 23.00      1st Qu.:108.0       
##  Median :0.04300   Median : 34.00      Median :134.0       
##  Mean   :0.04577   Mean   : 35.31      Mean   :138.4       
##  3rd Qu.:0.05000   3rd Qu.: 46.00      3rd Qu.:167.0       
##  Max.   :0.34600   Max.   :289.00      Max.   :440.0       
##     density             pH          sulphates         alcohol     
##  Min.   :0.9871   Min.   :2.720   Min.   :0.2200   Min.   : 8.00  
##  1st Qu.:0.9917   1st Qu.:3.090   1st Qu.:0.4100   1st Qu.: 9.50  
##  Median :0.9937   Median :3.180   Median :0.4700   Median :10.40  
##  Mean   :0.9940   Mean   :3.188   Mean   :0.4898   Mean   :10.51  
##  3rd Qu.:0.9961   3rd Qu.:3.280   3rd Qu.:0.5500   3rd Qu.:11.40  
##  Max.   :1.0390   Max.   :3.820   Max.   :1.0800   Max.   :14.20  
##     quality     
##  Min.   :3.000  
##  1st Qu.:5.000  
##  Median :6.000  
##  Mean   :5.878  
##  3rd Qu.:6.000  
##  Max.   :9.000

White wine data has 4899 observation and quality variable has levels from 3 to 9.

Basic exploratory data analysis to get a sense of data

# install.packages("corrplot")
library(corrplot)
## Warning: package 'corrplot' was built under R version 3.3.2
# Scatter plot matrix
plot(wine_white)

wine_corr <- cor(wine_white)
summary(wine_corr)
##  fixed.acidity       volatile.acidity     citric.acid      
##  Min.   :-0.425858   Min.   :-0.194723   Min.   :-0.16375  
##  1st Qu.:-0.065463   1st Qu.:-0.051049   1st Qu.:-0.02584  
##  Median : 0.002971   Median : 0.002208   Median : 0.09414  
##  Mean   : 0.084004   Mean   : 0.065612   Mean   : 0.12722  
##  3rd Qu.: 0.134635   3rd Qu.: 0.068416   3rd Qu.: 0.12822  
##  Max.   : 1.000000   Max.   : 1.000000   Max.   : 1.00000  
##  residual.sugar       chlorides        free.sulfur.dioxide
##  Min.   :-0.45063   Min.   :-0.36019   Min.   :-0.25010   
##  1st Qu.:-0.04439   1st Qu.:-0.01004   1st Qu.:-0.01281   
##  Median : 0.08885   Median : 0.07960   Median : 0.07665   
##  Mean   : 0.17556   Mean   : 0.10086   Mean   : 0.17288   
##  3rd Qu.: 0.32468   3rd Qu.: 0.13550   3rd Qu.: 0.29543   
##  Max.   : 1.00000   Max.   : 1.00000   Max.   : 1.00000   
##  total.sulfur.dioxide    density                pH          
##  Min.   :-0.44889     Min.   :-0.780138   Min.   :-0.42586  
##  1st Qu.: 0.06753     1st Qu.:-0.003063   1st Qu.:-0.11113  
##  Median : 0.12785     Median : 0.203357   Median :-0.01627  
##  Mean   : 0.21337     Mean   : 0.187988   Mean   : 0.03157  
##  3rd Qu.: 0.43355     3rd Qu.: 0.353128   3rd Qu.: 0.10493  
##  Max.   : 1.00000     Max.   : 1.000000   Max.   : 1.00000  
##    sulphates           alcohol            quality        
##  Min.   :-0.03573   Min.   :-0.78014   Min.   :-0.30712  
##  1st Qu.:-0.01722   1st Qu.:-0.38236   1st Qu.:-0.17973  
##  Median : 0.05645   Median :-0.09830   Median :-0.05339  
##  Mean   : 0.12167   Mean   :-0.07327   Mean   : 0.04082  
##  3rd Qu.: 0.08951   3rd Qu.: 0.08115   3rd Qu.: 0.06512  
##  Max.   : 1.00000   Max.   : 1.00000   Max.   : 1.00000
corrplot(wine_corr,method="number")

Correlation table indicates that density and residual sugar has high correlation (0.84) and there is a negative correlation (-.78) between density and alcohol. The correlation between quality and alcohol is 0.44

# Skewness and kurtosis
# install.packages("psych")
library("psych")
describe(wine_white)
##                      vars    n   mean    sd median trimmed   mad  min
## fixed.acidity           1 4898   6.85  0.84   6.80    6.82  0.74 3.80
## volatile.acidity        2 4898   0.28  0.10   0.26    0.27  0.09 0.08
## citric.acid             3 4898   0.33  0.12   0.32    0.33  0.09 0.00
## residual.sugar          4 4898   6.39  5.07   5.20    5.80  5.34 0.60
## chlorides               5 4898   0.05  0.02   0.04    0.04  0.01 0.01
## free.sulfur.dioxide     6 4898  35.31 17.01  34.00   34.36 16.31 2.00
## total.sulfur.dioxide    7 4898 138.36 42.50 134.00  136.96 43.00 9.00
## density                 8 4898   0.99  0.00   0.99    0.99  0.00 0.99
## pH                      9 4898   3.19  0.15   3.18    3.18  0.15 2.72
## sulphates              10 4898   0.49  0.11   0.47    0.48  0.10 0.22
## alcohol                11 4898  10.51  1.23  10.40   10.43  1.48 8.00
## quality                12 4898   5.88  0.89   6.00    5.85  1.48 3.00
##                         max  range skew kurtosis   se
## fixed.acidity         14.20  10.40 0.65     2.17 0.01
## volatile.acidity       1.10   1.02 1.58     5.08 0.00
## citric.acid            1.66   1.66 1.28     6.16 0.00
## residual.sugar        65.80  65.20 1.08     3.46 0.07
## chlorides              0.35   0.34 5.02    37.51 0.00
## free.sulfur.dioxide  289.00 287.00 1.41    11.45 0.24
## total.sulfur.dioxide 440.00 431.00 0.39     0.57 0.61
## density                1.04   0.05 0.98     9.78 0.00
## pH                     3.82   1.10 0.46     0.53 0.00
## sulphates              1.08   0.86 0.98     1.59 0.00
## alcohol               14.20   6.20 0.49    -0.70 0.02
## quality                9.00   6.00 0.16     0.21 0.01
# Histogram and table to get frquency of different qualities
#hist_1 <- hist(winequality.red$X.quality.)
#hist_1
#print(hist_1$breaks)
# Change number of breaks and add labels
hist_2 <- hist(wine_white$quality, breaks = 5, col ="lightblue", xlab = "Quality ", main= " Histogram of Quality rating for white wine ")

table(wine_white$quality)
## 
##    3    4    5    6    7    8    9 
##   20  163 1457 2198  880  175    5

. From describe function output, we observe that skewness coefficient for quality is 0.21 and kurtosis is 0.01.

Let’s load a package called FactoMineR in R to run principal component analysis.

if (!require("FactoMineR")) install.packages("FactoMineR")
## Loading required package: FactoMineR
## Warning: package 'FactoMineR' was built under R version 3.3.2
# Unscaled data
pc_white <- prcomp(wine_white)
pc_white
## Standard deviations:
##  [1] 43.949221756 12.979721330  4.643585542  1.147246458  0.828680478
##  [6]  0.707396079  0.135607404  0.118811827  0.106986464  0.090589602
## [11]  0.019895607  0.000559343
## 
## Rotation:
##                                PC1           PC2           PC3
## fixed.acidity        -1.544525e-03 -9.166733e-03 -1.292446e-02
## volatile.acidity     -1.690309e-04 -1.546248e-03 -9.343979e-04
## citric.acid          -3.386468e-04  1.403673e-04 -1.257927e-03
## residual.sugar       -4.732751e-02  1.493143e-02 -9.951321e-01
## chlorides            -9.757940e-05 -7.203906e-05 -7.999827e-05
## free.sulfur.dioxide  -2.618723e-01  9.646376e-01  2.628366e-02
## total.sulfur.dioxide -9.638533e-01 -2.626820e-01  4.285064e-02
## density              -3.597064e-05 -1.839769e-05 -4.470891e-04
## pH                   -3.361997e-06 -4.080579e-05  7.022487e-03
## sulphates            -3.408882e-04 -3.605330e-04  2.145496e-03
## alcohol               1.250436e-02  6.479656e-03  8.288867e-02
## quality               3.280412e-03  1.099334e-02  9.537000e-03
##                                PC4           PC5           PC6
## fixed.acidity         0.1244224095 -0.9845307068 -0.0849236772
## volatile.acidity     -0.0050464160  0.0038497157  0.0409544532
## citric.acid           0.0029386326 -0.0416636471 -0.0082748751
## residual.sugar       -0.0759758326  0.0005949154  0.0378225593
## chlorides             0.0058640653  0.0015129289 -0.0021355214
## free.sulfur.dioxide   0.0108349352 -0.0078862716  0.0049067954
## total.sulfur.dioxide -0.0119772833  0.0017391578  0.0010440067
## density               0.0009775556 -0.0003252625 -0.0005772027
## pH                   -0.0166565597  0.0754813438 -0.0001749859
## sulphates            -0.0050451785  0.0035612543 -0.0104262703
## alcohol              -0.8258312591 -0.1521893791  0.5357917699
## quality              -0.5441656711  0.0029863201 -0.8380931116
##                                PC7           PC8           PC9
## fixed.acidity        -0.0743389867 -0.0468562413 -4.969682e-03
## volatile.acidity      0.0719119916 -0.2832216699  1.624292e-01
## citric.acid           0.0403006772  0.8767959786 -3.520743e-01
## residual.sugar       -0.0062948503  0.0003229402 -1.398574e-04
## chlorides             0.0137426810  0.0142757191  1.509765e-03
## free.sulfur.dioxide  -0.0006309199 -0.0007834377  4.943424e-04
## total.sulfur.dioxide  0.0007473199 -0.0000595991 -3.552413e-04
## density              -0.0036914690  0.0003273388  1.303056e-04
## pH                   -0.9356299441 -0.1119938401 -3.122144e-01
## sulphates            -0.3343812305  0.3683952830  8.672667e-01
## alcohol               0.0002777270  0.0088509911 -1.928489e-03
## quality               0.0148623442 -0.0166769439 -4.762335e-05
##                               PC10          PC11          PC12
## fixed.acidity        -0.0008578167  2.247396e-03 -7.666015e-04
## volatile.acidity     -0.9412187532 -2.484141e-02 -4.697001e-04
## citric.acid          -0.3214800154 -2.200332e-02 -3.371246e-04
## residual.sugar        0.0024096653  5.940522e-04 -3.736920e-04
## chlorides            -0.0294761271  9.993366e-01 -4.616856e-03
## free.sulfur.dioxide  -0.0011675075 -1.036815e-05  6.485294e-06
## total.sulfur.dioxide  0.0006933234 -2.819662e-05 -3.812818e-06
## density              -0.0010104654  4.629403e-03  9.999811e-01
## pH                   -0.0916016835  1.220385e-02 -3.481757e-03
## sulphates             0.0128950512 -1.598781e-03 -1.446541e-03
## alcohol               0.0238683042  6.811356e-03  1.095668e-03
## quality              -0.0274365169  6.245904e-04  8.347362e-05
plot(pc_white, type ="barplot")

summary(pc_white)
## Importance of components:
##                            PC1      PC2     PC3     PC4     PC5     PC6
## Standard deviation     43.9492 12.97972 4.64359 1.14725 0.82868 0.70740
## Proportion of Variance  0.9093  0.07931 0.01015 0.00062 0.00032 0.00024
## Cumulative Proportion   0.9093  0.98865 0.99880 0.99942 0.99974 0.99998
##                            PC7     PC8     PC9    PC10   PC11      PC12
## Standard deviation     0.13561 0.11881 0.10699 0.09059 0.0199 0.0005593
## Proportion of Variance 0.00001 0.00001 0.00001 0.00000 0.0000 0.0000000
## Cumulative Proportion  0.99998 0.99999 1.00000 1.00000 1.0000 1.0000000
biplot(pc_white)
## Warning in arrows(0, 0, y[, 1L] * 0.8, y[, 2L] * 0.8, col = col[2L], length
## = arrow.len): zero-length arrow is of indeterminate angle and so skipped
## Warning in arrows(0, 0, y[, 1L] * 0.8, y[, 2L] * 0.8, col = col[2L], length
## = arrow.len): zero-length arrow is of indeterminate angle and so skipped

## Warning in arrows(0, 0, y[, 1L] * 0.8, y[, 2L] * 0.8, col = col[2L], length
## = arrow.len): zero-length arrow is of indeterminate angle and so skipped

## Warning in arrows(0, 0, y[, 1L] * 0.8, y[, 2L] * 0.8, col = col[2L], length
## = arrow.len): zero-length arrow is of indeterminate angle and so skipped

## Warning in arrows(0, 0, y[, 1L] * 0.8, y[, 2L] * 0.8, col = col[2L], length
## = arrow.len): zero-length arrow is of indeterminate angle and so skipped

## Warning in arrows(0, 0, y[, 1L] * 0.8, y[, 2L] * 0.8, col = col[2L], length
## = arrow.len): zero-length arrow is of indeterminate angle and so skipped

# Scaled data
# PRINCIPAL COMPONENT ANALYSIS
pc_white_scaled <- prcomp(wine_white, scale = TRUE)
summary(pc_white_scaled)
## Importance of components:
##                           PC1    PC2    PC3     PC4     PC5     PC6    PC7
## Standard deviation     1.8294 1.2594 1.1710 1.04157 0.98756 0.96890 0.8771
## Proportion of Variance 0.2789 0.1322 0.1143 0.09041 0.08127 0.07823 0.0641
## Cumulative Proportion  0.2789 0.4111 0.5253 0.61573 0.69701 0.77524 0.8393
##                            PC8     PC9    PC10    PC11    PC12
## Standard deviation     0.85082 0.74599 0.58561 0.53302 0.14307
## Proportion of Variance 0.06032 0.04638 0.02858 0.02368 0.00171
## Cumulative Proportion  0.89967 0.94604 0.97462 0.99829 1.00000
names(pc_white_scaled)
## [1] "sdev"     "rotation" "center"   "scale"    "x"
#outputs the mean of variables
pc_white_scaled$center
##        fixed.acidity     volatile.acidity          citric.acid 
##           6.85478767           0.27824112           0.33419151 
##       residual.sugar            chlorides  free.sulfur.dioxide 
##           6.39141486           0.04577236          35.30808493 
## total.sulfur.dioxide              density                   pH 
##         138.36065741           0.99402738           3.18826664 
##            sulphates              alcohol              quality 
##           0.48984688          10.51426705           5.87790935
# output the standrad deviation of variables
pc_white_scaled$scale
##        fixed.acidity     volatile.acidity          citric.acid 
##          0.843868228          0.100794548          0.121019804 
##       residual.sugar            chlorides  free.sulfur.dioxide 
##          5.072057784          0.021847968         17.007137325 
## total.sulfur.dioxide              density                   pH 
##         42.498064554          0.002990907          0.151000600 
##            sulphates              alcohol              quality 
##          0.114125834          1.230620568          0.885638575
# rotation
pc_white_scaled$rotation
##                              PC1         PC2         PC3         PC4
## fixed.acidity        -0.15690447  0.56066866 -0.20738436  0.03373494
## volatile.acidity     -0.02428722  0.01606694  0.52491466 -0.13119747
## citric.acid          -0.13294430  0.28938115 -0.44635554  0.32953335
## residual.sugar       -0.40605288 -0.03882402 -0.03384313 -0.41615630
## chlorides            -0.21754400  0.03691144  0.21471269  0.50961203
## free.sulfur.dioxide  -0.27471931 -0.34554881 -0.31297088 -0.14892788
## total.sulfur.dioxide -0.39044148 -0.27232605 -0.12479447 -0.02161841
## density              -0.50129557 -0.01773344  0.03196758 -0.10386393
## pH                    0.13003701 -0.56714503  0.06848384  0.20410995
## sulphates            -0.03364168 -0.24826266 -0.22699505  0.51924489
## alcohol               0.44279498  0.01698188 -0.15887556 -0.13438871
## quality               0.22713722 -0.14603134 -0.48884718 -0.27820033
##                              PC5          PC6         PC7         PC8
## fixed.acidity        -0.24413933  0.105856235  0.22355921  0.13041311
## volatile.acidity     -0.70298193 -0.123704688 -0.22363601 -0.22960669
## citric.acid          -0.06510579 -0.131958661 -0.12037133 -0.69141866
## residual.sugar        0.01610213  0.289918546 -0.33860858 -0.11329401
## chlorides             0.17829248 -0.409317266 -0.55225504  0.21139734
## free.sulfur.dioxide  -0.11117214 -0.488085145  0.22407108  0.12883115
## total.sulfur.dioxide -0.27144774 -0.272493820  0.20375343  0.01290262
## density               0.07834373  0.326008106 -0.12313568 -0.08667076
## pH                    0.11270171  0.192688838  0.07704001 -0.47796137
## sulphates            -0.45623099  0.479811894 -0.04462167  0.33642752
## alcohol              -0.30855451 -0.135443327 -0.09801169 -0.08899029
## quality              -0.04112191 -0.005524396 -0.58434519  0.14444197
##                              PC9        PC10        PC11         PC12
## fixed.acidity        -0.63145048  0.20087123 -0.10411772  0.170792295
## volatile.acidity     -0.03159628 -0.14175876 -0.27002270  0.013376718
## citric.acid           0.24949503 -0.10632912 -0.05395597  0.009648802
## residual.sugar        0.17730336  0.37427490  0.17987291  0.493565139
## chlorides            -0.17916182  0.23552782  0.09108849  0.025168952
## free.sulfur.dioxide   0.10184710  0.32733415 -0.49921348 -0.029475198
## total.sulfur.dioxide -0.17800832 -0.34735757  0.64355326  0.035060193
## density              -0.12538636  0.04349161 -0.06686042 -0.761184485
## pH                   -0.52031593  0.18375599 -0.07911267  0.141842640
## sulphates             0.23662489  0.05519364 -0.04102077  0.042787387
## alcohol               0.01278298  0.57530003  0.41895440 -0.350156811
## quality              -0.29970621 -0.36771605 -0.14620225 -0.016069252
dim(pc_white_scaled$x)
## [1] 4898   12
wine_white.quality <- wine_white[, 12]
# Scree plot and eigen vector
plot(pc_white_scaled, type ="line")

pc_white_scaled
## Standard deviations:
##  [1] 1.8293903 1.2594008 1.1709706 1.0415668 0.9875644 0.9688978 0.8770680
##  [8] 0.8508195 0.7459900 0.5856051 0.5330248 0.1430703
## 
## Rotation:
##                              PC1         PC2         PC3         PC4
## fixed.acidity        -0.15690447  0.56066866 -0.20738436  0.03373494
## volatile.acidity     -0.02428722  0.01606694  0.52491466 -0.13119747
## citric.acid          -0.13294430  0.28938115 -0.44635554  0.32953335
## residual.sugar       -0.40605288 -0.03882402 -0.03384313 -0.41615630
## chlorides            -0.21754400  0.03691144  0.21471269  0.50961203
## free.sulfur.dioxide  -0.27471931 -0.34554881 -0.31297088 -0.14892788
## total.sulfur.dioxide -0.39044148 -0.27232605 -0.12479447 -0.02161841
## density              -0.50129557 -0.01773344  0.03196758 -0.10386393
## pH                    0.13003701 -0.56714503  0.06848384  0.20410995
## sulphates            -0.03364168 -0.24826266 -0.22699505  0.51924489
## alcohol               0.44279498  0.01698188 -0.15887556 -0.13438871
## quality               0.22713722 -0.14603134 -0.48884718 -0.27820033
##                              PC5          PC6         PC7         PC8
## fixed.acidity        -0.24413933  0.105856235  0.22355921  0.13041311
## volatile.acidity     -0.70298193 -0.123704688 -0.22363601 -0.22960669
## citric.acid          -0.06510579 -0.131958661 -0.12037133 -0.69141866
## residual.sugar        0.01610213  0.289918546 -0.33860858 -0.11329401
## chlorides             0.17829248 -0.409317266 -0.55225504  0.21139734
## free.sulfur.dioxide  -0.11117214 -0.488085145  0.22407108  0.12883115
## total.sulfur.dioxide -0.27144774 -0.272493820  0.20375343  0.01290262
## density               0.07834373  0.326008106 -0.12313568 -0.08667076
## pH                    0.11270171  0.192688838  0.07704001 -0.47796137
## sulphates            -0.45623099  0.479811894 -0.04462167  0.33642752
## alcohol              -0.30855451 -0.135443327 -0.09801169 -0.08899029
## quality              -0.04112191 -0.005524396 -0.58434519  0.14444197
##                              PC9        PC10        PC11         PC12
## fixed.acidity        -0.63145048  0.20087123 -0.10411772  0.170792295
## volatile.acidity     -0.03159628 -0.14175876 -0.27002270  0.013376718
## citric.acid           0.24949503 -0.10632912 -0.05395597  0.009648802
## residual.sugar        0.17730336  0.37427490  0.17987291  0.493565139
## chlorides            -0.17916182  0.23552782  0.09108849  0.025168952
## free.sulfur.dioxide   0.10184710  0.32733415 -0.49921348 -0.029475198
## total.sulfur.dioxide -0.17800832 -0.34735757  0.64355326  0.035060193
## density              -0.12538636  0.04349161 -0.06686042 -0.761184485
## pH                   -0.52031593  0.18375599 -0.07911267  0.141842640
## sulphates             0.23662489  0.05519364 -0.04102077  0.042787387
## alcohol               0.01278298  0.57530003  0.41895440 -0.350156811
## quality              -0.29970621 -0.36771605 -0.14620225 -0.016069252
#compute standard deviation of each principal component
std_dev <- pc_white_scaled$sdev

#compute variance
pr_var <- std_dev^2

#proportion of variance explained
prop_varex <- pr_var/sum(pr_var)

#scree plot
plot(prop_varex, xlab = "Principal Component",
             ylab = "Proportion of Variance Explained",
             type = "b")

#cumulative scree plot
plot(cumsum(prop_varex), xlab = "Principal Component",
              ylab = "Cumulative Proportion of Variance Explained",
              type = "b")

biplot(pc_white_scaled, scale = 0)

# pREDICTION 
pc_white_scaled1 <- prcomp(wine_white[,1:11], scale = TRUE)
summary(pc_white_scaled1)
## Importance of components:
##                           PC1    PC2    PC3     PC4     PC5     PC6
## Standard deviation     1.7951 1.2551 1.1053 1.00922 0.98658 0.96889
## Proportion of Variance 0.2929 0.1432 0.1111 0.09259 0.08848 0.08534
## Cumulative Proportion  0.2929 0.4361 0.5472 0.63979 0.72827 0.81361
##                            PC7     PC8     PC9    PC10    PC11
## Standard deviation     0.85241 0.77418 0.64354 0.53804 0.14370
## Proportion of Variance 0.06605 0.05449 0.03765 0.02632 0.00188
## Cumulative Proportion  0.87967 0.93416 0.97181 0.99812 1.00000
wine_white_pca <- predict(pc_white_scaled1,newdata = wine_white)
wine_white_pca <- as.data.frame(wine_white_pca)
head(wine_white_pca)
##          PC1        PC2         PC3        PC4       PC5        PC6
## 1  3.6765681 -0.5451776 0.930326898  1.1352584 0.2928249 -0.8915294
## 2 -0.6445220  0.4307260 0.356294322 -0.9990146 0.7140048 -0.4836882
## 3  0.1552747 -1.1896785 0.017529733 -0.2701919 0.3676683 -0.5006305
## 4  1.4552255  0.0996700 0.001956186  0.4229793 0.4699326  0.7691822
## 5  1.4552255  0.0996700 0.001956186  0.4229793 0.4699326  0.7691822
## 6  0.1552747 -1.1896785 0.017529733 -0.2701919 0.3676683 -0.5006305
##          PC7        PC8        PC9        PC10        PC11
## 1 -0.2046431 -1.0250789 -0.1505937  0.07589739  0.04218543
## 2 -0.4369532  0.5848705  1.0634409  0.25389286  0.12789128
## 3 -0.5542620  0.9748027 -0.5495982 -0.85548280 -0.18427651
## 4  0.1976817  0.6015911 -0.2932142  0.40365834 -0.04526931
## 5  0.1976817  0.6015911 -0.2932142  0.40365834 -0.04526931
## 6 -0.5542620  0.9748027 -0.5495982 -0.85548280 -0.18427651
wine_white_pca[ , 12] <- as.data.frame(wine_white$quality)
colnames(wine_white_pca)[12] <- "quality"
# Code to get graph -biplot and eigen values
pca_white1<- PCA(scale(wine_white))

pca_white1
## **Results for the Principal Component Analysis (PCA)**
## The analysis was performed on 4898 individuals, described by 12 variables
## *The results are available in the following objects:
## 
##    name               description                          
## 1  "$eig"             "eigenvalues"                        
## 2  "$var"             "results for the variables"          
## 3  "$var$coord"       "coord. for the variables"           
## 4  "$var$cor"         "correlations variables - dimensions"
## 5  "$var$cos2"        "cos2 for the variables"             
## 6  "$var$contrib"     "contributions of the variables"     
## 7  "$ind"             "results for the individuals"        
## 8  "$ind$coord"       "coord. for the individuals"         
## 9  "$ind$cos2"        "cos2 for the individuals"           
## 10 "$ind$contrib"     "contributions of the individuals"   
## 11 "$call"            "summary statistics"                 
## 12 "$call$centre"     "mean of the variables"              
## 13 "$call$ecart.type" "standard error of the variables"    
## 14 "$call$row.w"      "weights for the individuals"        
## 15 "$call$col.w"      "weights for the variables"
pca_white1$eig
##         eigenvalue percentage of variance
## comp 1  3.34666886             27.8889072
## comp 2  1.58609030             13.2174191
## comp 3  1.37117206             11.4264338
## comp 4  1.08486150              9.0405125
## comp 5  0.97528337              8.1273614
## comp 6  0.93876295              7.8230246
## comp 7  0.76924825              6.4104021
## comp 8  0.72389378              6.0324481
## comp 9  0.55650103              4.6375086
## comp 10 0.34293339              2.8577782
## comp 11 0.28411542              2.3676285
## comp 12 0.02046911              0.1705759
##         cumulative percentage of variance
## comp 1                           27.88891
## comp 2                           41.10633
## comp 3                           52.53276
## comp 4                           61.57327
## comp 5                           69.70063
## comp 6                           77.52366
## comp 7                           83.93406
## comp 8                           89.96651
## comp 9                           94.60402
## comp 10                          97.46180
## comp 11                          99.82942
## comp 12                         100.00000
# Predict PCs-white
predict(pc_white_scaled,newdata=tail(wine_white, 2))
##           PC1        PC2          PC3        PC4       PC5        PC6
## 4897 3.622627 -0.9396813  0.002755897 -0.8458506 0.1861173 -0.4915531
## 4898 2.794760  0.1840536 -0.079200365 -0.5201373 1.0988470 -0.5375748
##             PC7        PC8        PC9       PC10      PC11        PC12
## 4897 -0.3168015 -0.9089182 0.09385922 -0.3619748 0.5562302 -0.02555398
## 4898  0.6454322 -1.1429077 0.42697522 -0.2760451 0.2858102  0.04833857
summary(pc_white_scaled)
## Importance of components:
##                           PC1    PC2    PC3     PC4     PC5     PC6    PC7
## Standard deviation     1.8294 1.2594 1.1710 1.04157 0.98756 0.96890 0.8771
## Proportion of Variance 0.2789 0.1322 0.1143 0.09041 0.08127 0.07823 0.0641
## Cumulative Proportion  0.2789 0.4111 0.5253 0.61573 0.69701 0.77524 0.8393
##                            PC8     PC9    PC10    PC11    PC12
## Standard deviation     0.85082 0.74599 0.58561 0.53302 0.14307
## Proportion of Variance 0.06032 0.04638 0.02858 0.02368 0.00171
## Cumulative Proportion  0.89967 0.94604 0.97462 0.99829 1.00000
library(devtools)
## Warning: package 'devtools' was built under R version 3.3.2
# install_github("ggbiplot", "vqv/ggbiplot")

library(ggbiplot)
## Loading required package: plyr
## 
## Attaching package: 'plyr'
## The following objects are masked from 'package:Hmisc':
## 
##     is.discrete, summarize
## Loading required package: scales
## Warning: package 'scales' was built under R version 3.3.2
## 
## Attaching package: 'scales'
## The following objects are masked from 'package:psych':
## 
##     alpha, rescale
## Loading required package: grid
g <- ggbiplot(pc_white_scaled, obs.scale = 2, var.scale = 2, groups= wine_white.quality, ellipse = TRUE, 
              circle = FALSE)

g <- g + scale_color_continuous(name = '')
g <- g + theme(legend.direction = 'horizontal', 
               legend.position = 'top')
print(g)

require(ggplot2)

theta <- seq(0,2*pi,length.out = 100)
circle <- data.frame(x = cos(theta), y = sin(theta))
p <- ggplot(circle,aes(x,y)) + geom_path()

# loadings <- data.frame(ir.pca$rotation, names = row.names(ir.pca$rotation))
# p + geom_text(data=loadings, 
#              mapping=aes(x = PC1, y = PC2, label = .names, colour = .names)) +
#  coord_fixed(ratio=2) +
#  labs(x = "PC1", y = "PC2")

print(g)

pc_white$eig
## NULL
Correlation_Matrix<-as.data.frame(round(cor(wine_white,pca_white1$ind$coord)^2*100,0))
 
Correlation_Matrix
##                      Dim.1 Dim.2 Dim.3 Dim.4 Dim.5
## fixed.acidity            8    50     6     0     6
## volatile.acidity         0     0    38     2    48
## citric.acid              6    13    27    12     0
## residual.sugar          55     0     0    19     0
## chlorides               16     0     6    28     3
## free.sulfur.dioxide     25    19    13     2     1
## total.sulfur.dioxide    51    12     2     0     7
## density                 84     0     0     1     1
## pH                       6    51     1     5     1
## sulphates                0    10     7    29    20
## alcohol                 66     0     3     2     9
## quality                 17     3    33     8     0

Next, I use multinomial logistic regression model using “quality” as the target variable.to compare result using PCA predictors and actual predictors

library(nnet)
wine_white$quality <- as.factor(wine_white$quality)
mlogit_model <- multinom(quality ~. ,data =wine_white, maxit = 1000) 
## # weights:  91 (72 variable)
## initial  value 9531.067910 
## iter  10 value 6414.647224
## iter  20 value 6052.157273
## iter  30 value 5815.209566
## iter  40 value 5382.001911
## iter  50 value 5347.161928
## iter  60 value 5339.165595
## iter  70 value 5336.314586
## iter  80 value 5334.784154
## iter  90 value 5332.550025
## iter 100 value 5330.291963
## iter 110 value 5320.982596
## iter 120 value 5315.854261
## iter 130 value 5313.649597
## iter 140 value 5311.812395
## final  value 5311.740067 
## converged
mlogit_model
## Call:
## multinom(formula = quality ~ ., data = wine_white, maxit = 1000)
## 
## Coefficients:
##   (Intercept) fixed.acidity volatile.acidity citric.acid residual.sugar
## 4  -146.17833    -0.9011549         1.909546   0.7646530   -0.120460944
## 5  -160.02570    -1.2470523        -1.962609   1.2057458   -0.075605321
## 6  -107.89694    -1.3270421        -7.658482   1.3035760    0.004296053
## 7   339.28622    -0.8738655        -9.623866   0.4033815    0.202491343
## 8   129.37097    -1.0690515        -8.865040   0.8106784    0.177462841
## 9   -27.16097     1.1956160        -9.445852   2.0895194    0.256108776
##     chlorides free.sulfur.dioxide total.sulfur.dioxide    density
## 4   -9.807789        -0.073995463        -4.514643e-03  171.16764
## 5  -11.087749        -0.037352677         7.114124e-05  193.19748
## 6   -9.826164        -0.029373926        -2.116957e-03  132.70043
## 7  -25.229760        -0.024135389        -2.239523e-03 -331.31340
## 8  -16.299088        -0.007545143        -5.604673e-03 -125.95295
## 9 -441.548528        -0.020825203        -7.039806e-04  -44.57506
##           pH sulphates    alcohol
## 4 -2.5851632 2.0532646 -0.4634899
## 5 -3.7521896 1.4230186 -0.4813939
## 6 -3.4541101 2.7440208  0.3430976
## 7 -0.8773179 4.5087587  0.4053614
## 8 -1.0427423 3.3377507  0.9456756
## 9 17.3613016 0.9662957  1.6300509
## 
## Residual Deviance: 10623.48 
## AIC: 10767.48
mlogit_output <- summary(mlogit_model)
mlogit_output
## Call:
## multinom(formula = quality ~ ., data = wine_white, maxit = 1000)
## 
## Coefficients:
##   (Intercept) fixed.acidity volatile.acidity citric.acid residual.sugar
## 4  -146.17833    -0.9011549         1.909546   0.7646530   -0.120460944
## 5  -160.02570    -1.2470523        -1.962609   1.2057458   -0.075605321
## 6  -107.89694    -1.3270421        -7.658482   1.3035760    0.004296053
## 7   339.28622    -0.8738655        -9.623866   0.4033815    0.202491343
## 8   129.37097    -1.0690515        -8.865040   0.8106784    0.177462841
## 9   -27.16097     1.1956160        -9.445852   2.0895194    0.256108776
##     chlorides free.sulfur.dioxide total.sulfur.dioxide    density
## 4   -9.807789        -0.073995463        -4.514643e-03  171.16764
## 5  -11.087749        -0.037352677         7.114124e-05  193.19748
## 6   -9.826164        -0.029373926        -2.116957e-03  132.70043
## 7  -25.229760        -0.024135389        -2.239523e-03 -331.31340
## 8  -16.299088        -0.007545143        -5.604673e-03 -125.95295
## 9 -441.548528        -0.020825203        -7.039806e-04  -44.57506
##           pH sulphates    alcohol
## 4 -2.5851632 2.0532646 -0.4634899
## 5 -3.7521896 1.4230186 -0.4813939
## 6 -3.4541101 2.7440208  0.3430976
## 7 -0.8773179 4.5087587  0.4053614
## 8 -1.0427423 3.3377507  0.9456756
## 9 17.3613016 0.9662957  1.6300509
## 
## Std. Errors:
##   (Intercept) fixed.acidity volatile.acidity citric.acid residual.sugar
## 4  0.07503227     0.1988060      0.563383375   0.5853718     0.05361692
## 5  0.42451826     0.1883643      0.368259938   0.3063269     0.04997279
## 6  0.35728005     0.1879169      0.347243801   0.2810207     0.04991745
## 7  0.46128442     0.1910399      0.463207285   0.3867977     0.05059846
## 8  0.05252902     0.2040204      0.778156873   0.6489123     0.05308092
## 9  0.01192238     0.2769569      0.004118554   0.0199239     0.15058624
##      chlorides free.sulfur.dioxide total.sulfur.dioxide    density
## 4 0.0614734959          0.01327570          0.006966082 0.07414261
## 5 0.8346830953          0.01080784          0.006568198 0.41689063
## 6 0.8271379501          0.01067142          0.006559115 0.35095023
## 7 0.0456715981          0.01085444          0.006656021 0.45478016
## 8 0.0096554725          0.01135180          0.007167097 0.05226580
## 9 0.0004420978          0.04115254          0.019924978 0.01174476
##           pH   sulphates   alcohol
## 4 0.23808757 0.668467746 0.1448381
## 5 0.21881415 0.330642547 0.1284929
## 6 0.18164790 0.272048777 0.1260438
## 7 0.22468046 0.318407242 0.1288072
## 8 0.24114375 0.530465586 0.1398779
## 9 0.04752021 0.006232217 0.1818881
## 
## Residual Deviance: 10623.48 
## AIC: 10767.48

calculate coefficient and standard error

mlogit_output$coefficients
##   (Intercept) fixed.acidity volatile.acidity citric.acid residual.sugar
## 4  -146.17833    -0.9011549         1.909546   0.7646530   -0.120460944
## 5  -160.02570    -1.2470523        -1.962609   1.2057458   -0.075605321
## 6  -107.89694    -1.3270421        -7.658482   1.3035760    0.004296053
## 7   339.28622    -0.8738655        -9.623866   0.4033815    0.202491343
## 8   129.37097    -1.0690515        -8.865040   0.8106784    0.177462841
## 9   -27.16097     1.1956160        -9.445852   2.0895194    0.256108776
##     chlorides free.sulfur.dioxide total.sulfur.dioxide    density
## 4   -9.807789        -0.073995463        -4.514643e-03  171.16764
## 5  -11.087749        -0.037352677         7.114124e-05  193.19748
## 6   -9.826164        -0.029373926        -2.116957e-03  132.70043
## 7  -25.229760        -0.024135389        -2.239523e-03 -331.31340
## 8  -16.299088        -0.007545143        -5.604673e-03 -125.95295
## 9 -441.548528        -0.020825203        -7.039806e-04  -44.57506
##           pH sulphates    alcohol
## 4 -2.5851632 2.0532646 -0.4634899
## 5 -3.7521896 1.4230186 -0.4813939
## 6 -3.4541101 2.7440208  0.3430976
## 7 -0.8773179 4.5087587  0.4053614
## 8 -1.0427423 3.3377507  0.9456756
## 9 17.3613016 0.9662957  1.6300509
mlogit_output$standard.errors
##   (Intercept) fixed.acidity volatile.acidity citric.acid residual.sugar
## 4  0.07503227     0.1988060      0.563383375   0.5853718     0.05361692
## 5  0.42451826     0.1883643      0.368259938   0.3063269     0.04997279
## 6  0.35728005     0.1879169      0.347243801   0.2810207     0.04991745
## 7  0.46128442     0.1910399      0.463207285   0.3867977     0.05059846
## 8  0.05252902     0.2040204      0.778156873   0.6489123     0.05308092
## 9  0.01192238     0.2769569      0.004118554   0.0199239     0.15058624
##      chlorides free.sulfur.dioxide total.sulfur.dioxide    density
## 4 0.0614734959          0.01327570          0.006966082 0.07414261
## 5 0.8346830953          0.01080784          0.006568198 0.41689063
## 6 0.8271379501          0.01067142          0.006559115 0.35095023
## 7 0.0456715981          0.01085444          0.006656021 0.45478016
## 8 0.0096554725          0.01135180          0.007167097 0.05226580
## 9 0.0004420978          0.04115254          0.019924978 0.01174476
##           pH   sulphates   alcohol
## 4 0.23808757 0.668467746 0.1448381
## 5 0.21881415 0.330642547 0.1284929
## 6 0.18164790 0.272048777 0.1260438
## 7 0.22468046 0.318407242 0.1288072
## 8 0.24114375 0.530465586 0.1398779
## 9 0.04752021 0.006232217 0.1818881

calculate Z score and p-Value for the variables in the model

z <- mlogit_output$coefficients/mlogit_output$standard.errors
p <- (1-pnorm(abs(z),0,1))*2 # I am using two-tailed z test
print(z, digits =2)
##   (Intercept) fixed.acidity volatile.acidity citric.acid residual.sugar
## 4       -1948          -4.5              3.4         1.3         -2.247
## 5        -377          -6.6             -5.3         3.9         -1.513
## 6        -302          -7.1            -22.1         4.6          0.086
## 7         736          -4.6            -20.8         1.0          4.002
## 8        2463          -5.2            -11.4         1.2          3.343
## 9       -2278           4.3          -2293.5       104.9          1.701
##   chlorides free.sulfur.dioxide total.sulfur.dioxide density    pH
## 4      -160               -5.57               -0.648    2309 -10.9
## 5       -13               -3.46                0.011     463 -17.1
## 6       -12               -2.75               -0.323     378 -19.0
## 7      -552               -2.22               -0.336    -729  -3.9
## 8     -1688               -0.66               -0.782   -2410  -4.3
## 9   -998758               -0.51               -0.035   -3795 365.3
##   sulphates alcohol
## 4       3.1    -3.2
## 5       4.3    -3.7
## 6      10.1     2.7
## 7      14.2     3.1
## 8       6.3     6.8
## 9     155.0     9.0
print(p, digits =2)
##   (Intercept) fixed.acidity volatile.acidity citric.acid residual.sugar
## 4           0       5.8e-06          7.0e-04     1.9e-01        2.5e-02
## 5           0       3.6e-11          9.9e-08     8.3e-05        1.3e-01
## 6           0       1.6e-12          0.0e+00     3.5e-06        9.3e-01
## 7           0       4.8e-06          0.0e+00     3.0e-01        6.3e-05
## 8           0       1.6e-07          0.0e+00     2.1e-01        8.3e-04
## 9           0       1.6e-05          0.0e+00     0.0e+00        8.9e-02
##   chlorides free.sulfur.dioxide total.sulfur.dioxide density      pH
## 4         0             2.5e-08                 0.52       0 0.0e+00
## 5         0             5.5e-04                 0.99       0 0.0e+00
## 6         0             5.9e-03                 0.75       0 0.0e+00
## 7         0             2.6e-02                 0.74       0 9.4e-05
## 8         0             5.1e-01                 0.43       0 1.5e-05
## 9         0             6.1e-01                 0.97       0 0.0e+00
##   sulphates alcohol
## 4   2.1e-03 1.4e-03
## 5   1.7e-05 1.8e-04
## 6   0.0e+00 6.5e-03
## 7   0.0e+00 1.6e-03
## 8   3.1e-10 1.4e-11
## 9   0.0e+00 0.0e+00

Now I’ll explore the entire data set, and analyze if we can remove any variables which do not add to model performance.

Pquality5 <- rbind(mlogit_output$coefficients[2, ],mlogit_output$standard.errors[2, ],z[2, ],p[2, ])
Pquality5
##       (Intercept) fixed.acidity volatile.acidity  citric.acid
## [1,] -160.0256980 -1.247052e+00    -1.962609e+00 1.205746e+00
## [2,]    0.4245183  1.883643e-01     3.682599e-01 3.063269e-01
## [3,] -376.9583410 -6.620428e+00    -5.329413e+00 3.936141e+00
## [4,]    0.0000000  3.581602e-11     9.853093e-08 8.280224e-05
##      residual.sugar   chlorides free.sulfur.dioxide total.sulfur.dioxide
## [1,]    -0.07560532 -11.0877492       -0.0373526768         7.114124e-05
## [2,]     0.04997279   0.8346831        0.0108078450         6.568198e-03
## [3,]    -1.51292978 -13.2837831       -3.4560707499         1.083117e-02
## [4,]     0.13029750   0.0000000        0.0005481114         9.913581e-01
##          density          pH    sulphates      alcohol
## [1,] 193.1974798  -3.7521896 1.4230186397 -0.481393883
## [2,]   0.4168906   0.2188142 0.3306425474  0.128492922
## [3,] 463.4248602 -17.1478377 4.3037977141 -3.746462259
## [4,]   0.0000000   0.0000000 0.0000167895  0.000179346
rownames(Pquality5) <- c("Coefficient","Std. Errors","z stat","p value")
knitr::kable(Pquality5)
(Intercept) fixed.acidity volatile.acidity citric.acid residual.sugar chlorides free.sulfur.dioxide total.sulfur.dioxide density pH sulphates alcohol
Coefficient -160.0256980 -1.2470523 -1.9626091 1.2057458 -0.0756053 -11.0877492 -0.0373527 0.0000711 193.1974798 -3.7521896 1.4230186 -0.4813939
Std. Errors 0.4245183 0.1883643 0.3682599 0.3063269 0.0499728 0.8346831 0.0108078 0.0065682 0.4168906 0.2188142 0.3306425 0.1284929
z stat -376.9583410 -6.6204282 -5.3294126 3.9361413 -1.5129298 -13.2837831 -3.4560707 0.0108312 463.4248602 -17.1478377 4.3037977 -3.7464623
p value 0.0000000 0.0000000 0.0000001 0.0000828 0.1302975 0.0000000 0.0005481 0.9913581 0.0000000 0.0000000 0.0000168 0.0001793

Lean Multinomial Model

I used coefficients magnitude, p-value and accuracy to select key variables for lean model

mlogit_model1 <- multinom(quality ~ density + chlorides + volatile.acidity + alcohol, data = wine_white, maxit = 1000) 
## # weights:  42 (30 variable)
## initial  value 9531.067910 
## iter  10 value 6042.447019
## iter  20 value 5522.543872
## iter  30 value 5512.500391
## iter  40 value 5504.072357
## iter  50 value 5498.013204
## iter  60 value 5495.236496
## iter  70 value 5493.446161
## iter  80 value 5491.784822
## iter  90 value 5490.802324
## iter 100 value 5488.899222
## iter 110 value 5487.839488
## iter 120 value 5484.593955
## final  value 5484.564810 
## converged
mlogit_model1
## Call:
## multinom(formula = quality ~ density + chlorides + volatile.acidity + 
##     alcohol, data = wine_white, maxit = 1000)
## 
## Coefficients:
##   (Intercept)    density   chlorides volatile.acidity    alcohol
## 4  182.942656 -176.24297   -9.137097         2.902623 -0.5992752
## 5   76.853209  -65.21239   -8.314696        -1.381076 -0.6769956
## 6  -37.614256   41.91572   -7.818278        -7.126923  0.2964201
## 7  -69.837795   68.52409  -27.209577        -9.050082  0.8595494
## 8 -122.536927  115.88794  -17.240567        -8.442490  1.1672243
## 9   -8.966465    3.78357 -153.639503        -8.386114  1.0319386
## 
## Residual Deviance: 10969.13 
## AIC: 11029.13
mlogit_output1 <- summary(mlogit_model1)
mlogit_output1 
## Call:
## multinom(formula = quality ~ density + chlorides + volatile.acidity + 
##     alcohol, data = wine_white, maxit = 1000)
## 
## Coefficients:
##   (Intercept)    density   chlorides volatile.acidity    alcohol
## 4  182.942656 -176.24297   -9.137097         2.902623 -0.5992752
## 5   76.853209  -65.21239   -8.314696        -1.381076 -0.6769956
## 6  -37.614256   41.91572   -7.818278        -7.126923  0.2964201
## 7  -69.837795   68.52409  -27.209577        -9.050082  0.8595494
## 8 -122.536927  115.88794  -17.240567        -8.442490  1.1672243
## 9   -8.966465    3.78357 -153.639503        -8.386114  1.0319386
## 
## Std. Errors:
##   (Intercept)  density chlorides volatile.acidity   alcohol
## 4    1.142151 1.158530 6.1280463         1.782877 0.2133311
## 5    1.101058 1.075444 5.4474523         1.728997 0.2020842
## 6    1.005195 1.223813 5.5163101         1.739039 0.2006937
## 7    1.133330 1.078595 6.7392197         1.780452 0.2034684
## 8    1.242276 1.136474 8.8620454         1.918194 0.2144794
## 9    3.180607 3.213563 0.3377814         5.098028 0.5693638
## 
## Residual Deviance: 10969.13 
## AIC: 11029.13
mlogit_output1$coefficients
##   (Intercept)    density   chlorides volatile.acidity    alcohol
## 4  182.942656 -176.24297   -9.137097         2.902623 -0.5992752
## 5   76.853209  -65.21239   -8.314696        -1.381076 -0.6769956
## 6  -37.614256   41.91572   -7.818278        -7.126923  0.2964201
## 7  -69.837795   68.52409  -27.209577        -9.050082  0.8595494
## 8 -122.536927  115.88794  -17.240567        -8.442490  1.1672243
## 9   -8.966465    3.78357 -153.639503        -8.386114  1.0319386
mlogit_output1$standard.errors
##   (Intercept)  density chlorides volatile.acidity   alcohol
## 4    1.142151 1.158530 6.1280463         1.782877 0.2133311
## 5    1.101058 1.075444 5.4474523         1.728997 0.2020842
## 6    1.005195 1.223813 5.5163101         1.739039 0.2006937
## 7    1.133330 1.078595 6.7392197         1.780452 0.2034684
## 8    1.242276 1.136474 8.8620454         1.918194 0.2144794
## 9    3.180607 3.213563 0.3377814         5.098028 0.5693638
z <- mlogit_output1$coefficients/mlogit_output1$standard.errors
p <- (1-pnorm(abs(z),0,1))*2

Table of Coefficient,standard error, z statand, p value for Probability of Quality 5 ( Quality 5 has the highest probability))

Pquality5 <- rbind(mlogit_output1$coefficients[2, ],mlogit_output1$standard.errors[2, ],z[2, ],p[2, ])
Pquality5
##      (Intercept)    density  chlorides volatile.acidity       alcohol
## [1,]   76.853209 -65.212387 -8.3146964       -1.3810757 -0.6769956119
## [2,]    1.101058   1.075444  5.4474523        1.7289969  0.2020842416
## [3,]   69.799444 -60.637651 -1.5263459       -0.7987728 -3.3500663222
## [4,]    0.000000   0.000000  0.1269237        0.4244222  0.0008079221
rownames(Pquality5) <- c("Coefficient","Std. Errors","z stat","p value")
knitr::kable(Pquality5)
(Intercept) density chlorides volatile.acidity alcohol
Coefficient 76.853208 -65.212387 -8.3146964 -1.3810757 -0.6769956
Std. Errors 1.101058 1.075444 5.4474523 1.7289969 0.2020842
z stat 69.799444 -60.637651 -1.5263459 -0.7987728 -3.3500663
p value 0.000000 0.000000 0.1269237 0.4244222 0.0008079

Accuracy of MULTINOMIAL MODEL USING density + chlorides + volatile.acidity + alcohol

predictedML1 <- predict(mlogit_model1,wine_white,na.action =na.pass, type="probs")
predicted_classML1 <- predict(mlogit_model1,wine_white)
cm_ols <- table(predicted_classML1, wine_white$quality)
print(cm_ols)
##                   
## predicted_classML1    3    4    5    6    7    8    9
##                  3    0    0    0    0    0    0    0
##                  4    0    3    2    1    0    0    0
##                  5    7   92  800  450   38   11    0
##                  6   12   65  647 1622  688  122    2
##                  7    1    3    8  124  154   42    3
##                  8    0    0    0    1    0    0    0
##                  9    0    0    0    0    0    0    0
Accuracy <- sum(diag(cm_ols))/sum(cm_ols)
Accuracy
## [1] 0.5265414

USING pca

library(nnet)
wine_white$quality <- as.factor(wine_white_pca$quality)
mlogit_model_pca <- multinom(quality ~. ,data =wine_white_pca, maxit = 1000) 
## # weights:  91 (72 variable)
## initial  value 9531.067910 
## iter  10 value 6036.731016
## iter  20 value 5694.114383
## iter  30 value 5472.950913
## iter  40 value 5389.586477
## iter  50 value 5315.063391
## iter  60 value 5302.126745
## iter  70 value 5301.452356
## iter  80 value 5301.029649
## final  value 5300.956377 
## converged
mlogit_model_pca
## Call:
## multinom(formula = quality ~ ., data = wine_white_pca, maxit = 1000)
## 
## Coefficients:
##   (Intercept)         PC1        PC2        PC3        PC4        PC5
## 4    2.027223 -0.28901446 -0.1348783  0.4341662 -0.5896798  0.3894355
## 5    4.739629  0.09133208  0.1104327  0.1367819 -0.1555566  0.5366869
## 6    5.446409 -0.27475013  0.1601901 -0.3398061  0.1960996  0.5177786
## 7    4.099425 -0.63715392  0.2594263 -0.6238430  0.6511807  0.2471959
## 8    2.242588 -0.63211224  0.3220157 -0.5947537  0.6968820  0.1836012
## 9   -6.256670 -1.83248810  0.4835077 -1.4652448  3.9704391 -1.5844819
##          PC6          PC7        PC8        PC9        PC10       PC11
## 4 -0.7095140 -0.100504703 -0.2867423  1.2781645  0.36473454  0.6561609
## 5 -0.4356058 -0.020956662 -0.6535577  1.1822973  0.32982542 -0.1822998
## 6 -0.4398309 -0.037024060 -1.1331343  0.2926639  0.63792229 -0.3598283
## 7 -0.5900704 -0.006554536 -1.2252729 -0.1996221  0.73051233 -2.4871566
## 8 -0.3458034 -0.166084128 -1.4341226 -0.6708020  0.64443425 -3.0548513
## 9 -2.5368759 -1.400295780  0.8337027 -0.3685297 -0.04790321 -0.2212499
## 
## Residual Deviance: 10601.91 
## AIC: 10745.91
mlogit_output_pca <- summary(mlogit_model_pca)
mlogit_output_pca
## Call:
## multinom(formula = quality ~ ., data = wine_white_pca, maxit = 1000)
## 
## Coefficients:
##   (Intercept)         PC1        PC2        PC3        PC4        PC5
## 4    2.027223 -0.28901446 -0.1348783  0.4341662 -0.5896798  0.3894355
## 5    4.739629  0.09133208  0.1104327  0.1367819 -0.1555566  0.5366869
## 6    5.446409 -0.27475013  0.1601901 -0.3398061  0.1960996  0.5177786
## 7    4.099425 -0.63715392  0.2594263 -0.6238430  0.6511807  0.2471959
## 8    2.242588 -0.63211224  0.3220157 -0.5947537  0.6968820  0.1836012
## 9   -6.256670 -1.83248810  0.4835077 -1.4652448  3.9704391 -1.5844819
##          PC6          PC7        PC8        PC9        PC10       PC11
## 4 -0.7095140 -0.100504703 -0.2867423  1.2781645  0.36473454  0.6561609
## 5 -0.4356058 -0.020956662 -0.6535577  1.1822973  0.32982542 -0.1822998
## 6 -0.4398309 -0.037024060 -1.1331343  0.2926639  0.63792229 -0.3598283
## 7 -0.5900704 -0.006554536 -1.2252729 -0.1996221  0.73051233 -2.4871566
## 8 -0.3458034 -0.166084128 -1.4341226 -0.6708020  0.64443425 -3.0548513
## 9 -2.5368759 -1.400295780  0.8337027 -0.3685297 -0.04790321 -0.2212499
## 
## Std. Errors:
##   (Intercept)       PC1       PC2       PC3       PC4       PC5       PC6
## 4   0.3533404 0.1503791 0.1633764 0.2060539 0.1864103 0.2040837 0.2136086
## 5   0.3316006 0.1401905 0.1532896 0.1960060 0.1723007 0.1937683 0.1926941
## 6   0.3305255 0.1398330 0.1529030 0.1962249 0.1724303 0.1935652 0.1921264
## 7   0.3333543 0.1416353 0.1547544 0.1990413 0.1844522 0.1972320 0.1983140
## 8   0.3517707 0.1485140 0.1631778 0.2098844 0.2095402 0.2094765 0.2126973
## 9   2.2690103 0.5442530 0.3550829 0.5853719 1.3480274 0.8587051 1.0117091
##         PC7       PC8       PC9      PC10      PC11
## 4 0.2796000 0.2989028 0.3419873 0.3941081 1.0080647
## 5 0.2652672 0.2835056 0.3220298 0.3671447 0.9782499
## 6 0.2649037 0.2833340 0.3216162 0.3667061 0.9858760
## 7 0.2679956 0.2864775 0.3285265 0.3719938 1.0253194
## 8 0.2799308 0.3033375 0.3528157 0.3957018 1.2347147
## 9 0.5725602 0.8965439 1.1226002 0.9963558 3.0200993
## 
## Residual Deviance: 10601.91 
## AIC: 10745.91

calculate coefficient and standard error

mlogit_output_pca$coefficients
##   (Intercept)         PC1        PC2        PC3        PC4        PC5
## 4    2.027223 -0.28901446 -0.1348783  0.4341662 -0.5896798  0.3894355
## 5    4.739629  0.09133208  0.1104327  0.1367819 -0.1555566  0.5366869
## 6    5.446409 -0.27475013  0.1601901 -0.3398061  0.1960996  0.5177786
## 7    4.099425 -0.63715392  0.2594263 -0.6238430  0.6511807  0.2471959
## 8    2.242588 -0.63211224  0.3220157 -0.5947537  0.6968820  0.1836012
## 9   -6.256670 -1.83248810  0.4835077 -1.4652448  3.9704391 -1.5844819
##          PC6          PC7        PC8        PC9        PC10       PC11
## 4 -0.7095140 -0.100504703 -0.2867423  1.2781645  0.36473454  0.6561609
## 5 -0.4356058 -0.020956662 -0.6535577  1.1822973  0.32982542 -0.1822998
## 6 -0.4398309 -0.037024060 -1.1331343  0.2926639  0.63792229 -0.3598283
## 7 -0.5900704 -0.006554536 -1.2252729 -0.1996221  0.73051233 -2.4871566
## 8 -0.3458034 -0.166084128 -1.4341226 -0.6708020  0.64443425 -3.0548513
## 9 -2.5368759 -1.400295780  0.8337027 -0.3685297 -0.04790321 -0.2212499
mlogit_output_pca$standard.errors
##   (Intercept)       PC1       PC2       PC3       PC4       PC5       PC6
## 4   0.3533404 0.1503791 0.1633764 0.2060539 0.1864103 0.2040837 0.2136086
## 5   0.3316006 0.1401905 0.1532896 0.1960060 0.1723007 0.1937683 0.1926941
## 6   0.3305255 0.1398330 0.1529030 0.1962249 0.1724303 0.1935652 0.1921264
## 7   0.3333543 0.1416353 0.1547544 0.1990413 0.1844522 0.1972320 0.1983140
## 8   0.3517707 0.1485140 0.1631778 0.2098844 0.2095402 0.2094765 0.2126973
## 9   2.2690103 0.5442530 0.3550829 0.5853719 1.3480274 0.8587051 1.0117091
##         PC7       PC8       PC9      PC10      PC11
## 4 0.2796000 0.2989028 0.3419873 0.3941081 1.0080647
## 5 0.2652672 0.2835056 0.3220298 0.3671447 0.9782499
## 6 0.2649037 0.2833340 0.3216162 0.3667061 0.9858760
## 7 0.2679956 0.2864775 0.3285265 0.3719938 1.0253194
## 8 0.2799308 0.3033375 0.3528157 0.3957018 1.2347147
## 9 0.5725602 0.8965439 1.1226002 0.9963558 3.0200993

calculate Z score and p-Value for the variables in the model

z <- mlogit_output_pca$coefficients/mlogit_output_pca$standard.errors
p <- (1-pnorm(abs(z),0,1))*2 # I am using two-tailed z test
print(z, digits =2)
##   (Intercept)   PC1   PC2  PC3  PC4   PC5  PC6    PC7   PC8   PC9   PC10
## 4         5.7 -1.92 -0.83  2.1 -3.2  1.91 -3.3 -0.359 -0.96  3.74  0.925
## 5        14.3  0.65  0.72  0.7 -0.9  2.77 -2.3 -0.079 -2.31  3.67  0.898
## 6        16.5 -1.96  1.05 -1.7  1.1  2.67 -2.3 -0.140 -4.00  0.91  1.740
## 7        12.3 -4.50  1.68 -3.1  3.5  1.25 -3.0 -0.024 -4.28 -0.61  1.964
## 8         6.4 -4.26  1.97 -2.8  3.3  0.88 -1.6 -0.593 -4.73 -1.90  1.629
## 9        -2.8 -3.37  1.36 -2.5  2.9 -1.85 -2.5 -2.446  0.93 -0.33 -0.048
##     PC11
## 4  0.651
## 5 -0.186
## 6 -0.365
## 7 -2.426
## 8 -2.474
## 9 -0.073
print(p, digits =2)
##   (Intercept)     PC1   PC2    PC3     PC4    PC5    PC6   PC7     PC8
## 4     9.6e-09 5.5e-02 0.409 0.0351 0.00156 0.0564 0.0009 0.719 3.4e-01
## 5     0.0e+00 5.1e-01 0.471 0.4853 0.36662 0.0056 0.0238 0.937 2.1e-02
## 6     0.0e+00 4.9e-02 0.295 0.0833 0.25543 0.0075 0.0221 0.889 6.4e-05
## 7     0.0e+00 6.8e-06 0.094 0.0017 0.00042 0.2101 0.0029 0.980 1.9e-05
## 8     1.8e-10 2.1e-05 0.048 0.0046 0.00088 0.3808 0.1040 0.553 2.3e-06
## 9     5.8e-03 7.6e-04 0.173 0.0123 0.00323 0.0650 0.0122 0.014 3.5e-01
##       PC9  PC10  PC11
## 4 0.00019 0.355 0.515
## 5 0.00024 0.369 0.852
## 6 0.36283 0.082 0.715
## 7 0.54343 0.050 0.015
## 8 0.05727 0.103 0.013
## 9 0.74270 0.962 0.942

Now I’ll explore the entire data set, and analyze if we can remove any variables which do not add to model performance.

Pquality5_pca <- rbind(mlogit_output_pca$coefficients[2, ],mlogit_output_pca$standard.errors[2, ],z[2, ],p[2, ])
Pquality5_pca
##      (Intercept)        PC1       PC2       PC3        PC4         PC5
## [1,]   4.7396291 0.09133208 0.1104327 0.1367819 -0.1555566 0.536686929
## [2,]   0.3316006 0.14019051 0.1532896 0.1960060  0.1723007 0.193768274
## [3,]  14.2931876 0.65148549 0.7204187 0.6978452 -0.9028200 2.769735824
## [4,]   0.0000000 0.51473314 0.4712672 0.4852740  0.3666214 0.005610178
##              PC6         PC7        PC8          PC9      PC10       PC11
## [1,] -0.43560578 -0.02095666 -0.6535577 1.1822973001 0.3298254 -0.1822998
## [2,]  0.19269406  0.26526720  0.2835056 0.3220297895 0.3671447  0.9782499
## [3,] -2.26060820 -0.07900209 -2.3052729 3.6713910909 0.8983526 -0.1863530
## [4,]  0.02378353  0.93703096  0.0211513 0.0002412339 0.3689976  0.8521679
rownames(Pquality5_pca) <- c("Coefficient","Std. Errors","z stat","p value")
knitr::kable(Pquality5_pca)
(Intercept) PC1 PC2 PC3 PC4 PC5 PC6 PC7 PC8 PC9 PC10 PC11
Coefficient 4.7396291 0.0913321 0.1104327 0.1367819 -0.1555566 0.5366869 -0.4356058 -0.0209567 -0.6535577 1.1822973 0.3298254 -0.1822998
Std. Errors 0.3316006 0.1401905 0.1532896 0.1960060 0.1723007 0.1937683 0.1926941 0.2652672 0.2835056 0.3220298 0.3671447 0.9782499
z stat 14.2931876 0.6514855 0.7204187 0.6978452 -0.9028200 2.7697358 -2.2606082 -0.0790021 -2.3052729 3.6713911 0.8983526 -0.1863530
p value 0.0000000 0.5147331 0.4712672 0.4852740 0.3666214 0.0056102 0.0237835 0.9370310 0.0211513 0.0002412 0.3689976 0.8521679

Lean Multinomial Model

I used PC1 to PC4 for lean model

mlogit_model_pca1 <- multinom(quality ~ ., data = wine_white_pca, maxit = 1000)
## # weights:  91 (72 variable)
## initial  value 9531.067910 
## iter  10 value 6036.731016
## iter  20 value 5694.114383
## iter  30 value 5472.950913
## iter  40 value 5389.586477
## iter  50 value 5315.063391
## iter  60 value 5302.126745
## iter  70 value 5301.452356
## iter  80 value 5301.029649
## final  value 5300.956377 
## converged
mlogit_model_pca1
## Call:
## multinom(formula = quality ~ ., data = wine_white_pca, maxit = 1000)
## 
## Coefficients:
##   (Intercept)         PC1        PC2        PC3        PC4        PC5
## 4    2.027223 -0.28901446 -0.1348783  0.4341662 -0.5896798  0.3894355
## 5    4.739629  0.09133208  0.1104327  0.1367819 -0.1555566  0.5366869
## 6    5.446409 -0.27475013  0.1601901 -0.3398061  0.1960996  0.5177786
## 7    4.099425 -0.63715392  0.2594263 -0.6238430  0.6511807  0.2471959
## 8    2.242588 -0.63211224  0.3220157 -0.5947537  0.6968820  0.1836012
## 9   -6.256670 -1.83248810  0.4835077 -1.4652448  3.9704391 -1.5844819
##          PC6          PC7        PC8        PC9        PC10       PC11
## 4 -0.7095140 -0.100504703 -0.2867423  1.2781645  0.36473454  0.6561609
## 5 -0.4356058 -0.020956662 -0.6535577  1.1822973  0.32982542 -0.1822998
## 6 -0.4398309 -0.037024060 -1.1331343  0.2926639  0.63792229 -0.3598283
## 7 -0.5900704 -0.006554536 -1.2252729 -0.1996221  0.73051233 -2.4871566
## 8 -0.3458034 -0.166084128 -1.4341226 -0.6708020  0.64443425 -3.0548513
## 9 -2.5368759 -1.400295780  0.8337027 -0.3685297 -0.04790321 -0.2212499
## 
## Residual Deviance: 10601.91 
## AIC: 10745.91
mlogit_output_pca1 <- summary(mlogit_model_pca1)
mlogit_output_pca1
## Call:
## multinom(formula = quality ~ ., data = wine_white_pca, maxit = 1000)
## 
## Coefficients:
##   (Intercept)         PC1        PC2        PC3        PC4        PC5
## 4    2.027223 -0.28901446 -0.1348783  0.4341662 -0.5896798  0.3894355
## 5    4.739629  0.09133208  0.1104327  0.1367819 -0.1555566  0.5366869
## 6    5.446409 -0.27475013  0.1601901 -0.3398061  0.1960996  0.5177786
## 7    4.099425 -0.63715392  0.2594263 -0.6238430  0.6511807  0.2471959
## 8    2.242588 -0.63211224  0.3220157 -0.5947537  0.6968820  0.1836012
## 9   -6.256670 -1.83248810  0.4835077 -1.4652448  3.9704391 -1.5844819
##          PC6          PC7        PC8        PC9        PC10       PC11
## 4 -0.7095140 -0.100504703 -0.2867423  1.2781645  0.36473454  0.6561609
## 5 -0.4356058 -0.020956662 -0.6535577  1.1822973  0.32982542 -0.1822998
## 6 -0.4398309 -0.037024060 -1.1331343  0.2926639  0.63792229 -0.3598283
## 7 -0.5900704 -0.006554536 -1.2252729 -0.1996221  0.73051233 -2.4871566
## 8 -0.3458034 -0.166084128 -1.4341226 -0.6708020  0.64443425 -3.0548513
## 9 -2.5368759 -1.400295780  0.8337027 -0.3685297 -0.04790321 -0.2212499
## 
## Std. Errors:
##   (Intercept)       PC1       PC2       PC3       PC4       PC5       PC6
## 4   0.3533404 0.1503791 0.1633764 0.2060539 0.1864103 0.2040837 0.2136086
## 5   0.3316006 0.1401905 0.1532896 0.1960060 0.1723007 0.1937683 0.1926941
## 6   0.3305255 0.1398330 0.1529030 0.1962249 0.1724303 0.1935652 0.1921264
## 7   0.3333543 0.1416353 0.1547544 0.1990413 0.1844522 0.1972320 0.1983140
## 8   0.3517707 0.1485140 0.1631778 0.2098844 0.2095402 0.2094765 0.2126973
## 9   2.2690103 0.5442530 0.3550829 0.5853719 1.3480274 0.8587051 1.0117091
##         PC7       PC8       PC9      PC10      PC11
## 4 0.2796000 0.2989028 0.3419873 0.3941081 1.0080647
## 5 0.2652672 0.2835056 0.3220298 0.3671447 0.9782499
## 6 0.2649037 0.2833340 0.3216162 0.3667061 0.9858760
## 7 0.2679956 0.2864775 0.3285265 0.3719938 1.0253194
## 8 0.2799308 0.3033375 0.3528157 0.3957018 1.2347147
## 9 0.5725602 0.8965439 1.1226002 0.9963558 3.0200993
## 
## Residual Deviance: 10601.91 
## AIC: 10745.91
mlogit_output_pca1$coefficients
##   (Intercept)         PC1        PC2        PC3        PC4        PC5
## 4    2.027223 -0.28901446 -0.1348783  0.4341662 -0.5896798  0.3894355
## 5    4.739629  0.09133208  0.1104327  0.1367819 -0.1555566  0.5366869
## 6    5.446409 -0.27475013  0.1601901 -0.3398061  0.1960996  0.5177786
## 7    4.099425 -0.63715392  0.2594263 -0.6238430  0.6511807  0.2471959
## 8    2.242588 -0.63211224  0.3220157 -0.5947537  0.6968820  0.1836012
## 9   -6.256670 -1.83248810  0.4835077 -1.4652448  3.9704391 -1.5844819
##          PC6          PC7        PC8        PC9        PC10       PC11
## 4 -0.7095140 -0.100504703 -0.2867423  1.2781645  0.36473454  0.6561609
## 5 -0.4356058 -0.020956662 -0.6535577  1.1822973  0.32982542 -0.1822998
## 6 -0.4398309 -0.037024060 -1.1331343  0.2926639  0.63792229 -0.3598283
## 7 -0.5900704 -0.006554536 -1.2252729 -0.1996221  0.73051233 -2.4871566
## 8 -0.3458034 -0.166084128 -1.4341226 -0.6708020  0.64443425 -3.0548513
## 9 -2.5368759 -1.400295780  0.8337027 -0.3685297 -0.04790321 -0.2212499
mlogit_output_pca1$standard.errors
##   (Intercept)       PC1       PC2       PC3       PC4       PC5       PC6
## 4   0.3533404 0.1503791 0.1633764 0.2060539 0.1864103 0.2040837 0.2136086
## 5   0.3316006 0.1401905 0.1532896 0.1960060 0.1723007 0.1937683 0.1926941
## 6   0.3305255 0.1398330 0.1529030 0.1962249 0.1724303 0.1935652 0.1921264
## 7   0.3333543 0.1416353 0.1547544 0.1990413 0.1844522 0.1972320 0.1983140
## 8   0.3517707 0.1485140 0.1631778 0.2098844 0.2095402 0.2094765 0.2126973
## 9   2.2690103 0.5442530 0.3550829 0.5853719 1.3480274 0.8587051 1.0117091
##         PC7       PC8       PC9      PC10      PC11
## 4 0.2796000 0.2989028 0.3419873 0.3941081 1.0080647
## 5 0.2652672 0.2835056 0.3220298 0.3671447 0.9782499
## 6 0.2649037 0.2833340 0.3216162 0.3667061 0.9858760
## 7 0.2679956 0.2864775 0.3285265 0.3719938 1.0253194
## 8 0.2799308 0.3033375 0.3528157 0.3957018 1.2347147
## 9 0.5725602 0.8965439 1.1226002 0.9963558 3.0200993
z <- mlogit_output_pca1$coefficients/mlogit_output_pca1$standard.errors
p <- (1-pnorm(abs(z),0,1))*2

Accuracy of MULTINOMIAL MODEL USING Principal component

predictedML1 <- predict(mlogit_model_pca1,wine_white_pca,na.action =na.pass, type="probs")
predicted_class_pca_ML1 <- predict(mlogit_model_pca1,wine_white_pca)
cm_ols_pca <- table(predicted_class_pca_ML1, wine_white_pca$quality)
print(cm_ols_pca)
##                        
## predicted_class_pca_ML1    3    4    5    6    7    8    9
##                       3    2    0    0    0    0    0    0
##                       4    0   12    2    3    0    0    0
##                       5    8   88  772  406   41   10    0
##                       6    9   62  675 1644  626  117    1
##                       7    1    1    8  144  213   48    4
##                       8    0    0    0    0    0    0    0
##                       9    0    0    0    1    0    0    0
Accuracy_pca <- sum(diag(cm_ols_pca))/sum(cm_ols_pca)
Accuracy_pca
## [1] 0.539608