Regresi Logistik Multinomial Data IRIS

Package

library(haven)
library(nnet)
library(readxl)
library(caret)
## Loading required package: ggplot2
## Loading required package: lattice
library(arsenal)

1. Input Data

data <- iris
data
##     Sepal.Length Sepal.Width Petal.Length Petal.Width    Species
## 1            5.1         3.5          1.4         0.2     setosa
## 2            4.9         3.0          1.4         0.2     setosa
## 3            4.7         3.2          1.3         0.2     setosa
## 4            4.6         3.1          1.5         0.2     setosa
## 5            5.0         3.6          1.4         0.2     setosa
## 6            5.4         3.9          1.7         0.4     setosa
## 7            4.6         3.4          1.4         0.3     setosa
## 8            5.0         3.4          1.5         0.2     setosa
## 9            4.4         2.9          1.4         0.2     setosa
## 10           4.9         3.1          1.5         0.1     setosa
## 11           5.4         3.7          1.5         0.2     setosa
## 12           4.8         3.4          1.6         0.2     setosa
## 13           4.8         3.0          1.4         0.1     setosa
## 14           4.3         3.0          1.1         0.1     setosa
## 15           5.8         4.0          1.2         0.2     setosa
## 16           5.7         4.4          1.5         0.4     setosa
## 17           5.4         3.9          1.3         0.4     setosa
## 18           5.1         3.5          1.4         0.3     setosa
## 19           5.7         3.8          1.7         0.3     setosa
## 20           5.1         3.8          1.5         0.3     setosa
## 21           5.4         3.4          1.7         0.2     setosa
## 22           5.1         3.7          1.5         0.4     setosa
## 23           4.6         3.6          1.0         0.2     setosa
## 24           5.1         3.3          1.7         0.5     setosa
## 25           4.8         3.4          1.9         0.2     setosa
## 26           5.0         3.0          1.6         0.2     setosa
## 27           5.0         3.4          1.6         0.4     setosa
## 28           5.2         3.5          1.5         0.2     setosa
## 29           5.2         3.4          1.4         0.2     setosa
## 30           4.7         3.2          1.6         0.2     setosa
## 31           4.8         3.1          1.6         0.2     setosa
## 32           5.4         3.4          1.5         0.4     setosa
## 33           5.2         4.1          1.5         0.1     setosa
## 34           5.5         4.2          1.4         0.2     setosa
## 35           4.9         3.1          1.5         0.2     setosa
## 36           5.0         3.2          1.2         0.2     setosa
## 37           5.5         3.5          1.3         0.2     setosa
## 38           4.9         3.6          1.4         0.1     setosa
## 39           4.4         3.0          1.3         0.2     setosa
## 40           5.1         3.4          1.5         0.2     setosa
## 41           5.0         3.5          1.3         0.3     setosa
## 42           4.5         2.3          1.3         0.3     setosa
## 43           4.4         3.2          1.3         0.2     setosa
## 44           5.0         3.5          1.6         0.6     setosa
## 45           5.1         3.8          1.9         0.4     setosa
## 46           4.8         3.0          1.4         0.3     setosa
## 47           5.1         3.8          1.6         0.2     setosa
## 48           4.6         3.2          1.4         0.2     setosa
## 49           5.3         3.7          1.5         0.2     setosa
## 50           5.0         3.3          1.4         0.2     setosa
## 51           7.0         3.2          4.7         1.4 versicolor
## 52           6.4         3.2          4.5         1.5 versicolor
## 53           6.9         3.1          4.9         1.5 versicolor
## 54           5.5         2.3          4.0         1.3 versicolor
## 55           6.5         2.8          4.6         1.5 versicolor
## 56           5.7         2.8          4.5         1.3 versicolor
## 57           6.3         3.3          4.7         1.6 versicolor
## 58           4.9         2.4          3.3         1.0 versicolor
## 59           6.6         2.9          4.6         1.3 versicolor
## 60           5.2         2.7          3.9         1.4 versicolor
## 61           5.0         2.0          3.5         1.0 versicolor
## 62           5.9         3.0          4.2         1.5 versicolor
## 63           6.0         2.2          4.0         1.0 versicolor
## 64           6.1         2.9          4.7         1.4 versicolor
## 65           5.6         2.9          3.6         1.3 versicolor
## 66           6.7         3.1          4.4         1.4 versicolor
## 67           5.6         3.0          4.5         1.5 versicolor
## 68           5.8         2.7          4.1         1.0 versicolor
## 69           6.2         2.2          4.5         1.5 versicolor
## 70           5.6         2.5          3.9         1.1 versicolor
## 71           5.9         3.2          4.8         1.8 versicolor
## 72           6.1         2.8          4.0         1.3 versicolor
## 73           6.3         2.5          4.9         1.5 versicolor
## 74           6.1         2.8          4.7         1.2 versicolor
## 75           6.4         2.9          4.3         1.3 versicolor
## 76           6.6         3.0          4.4         1.4 versicolor
## 77           6.8         2.8          4.8         1.4 versicolor
## 78           6.7         3.0          5.0         1.7 versicolor
## 79           6.0         2.9          4.5         1.5 versicolor
## 80           5.7         2.6          3.5         1.0 versicolor
## 81           5.5         2.4          3.8         1.1 versicolor
## 82           5.5         2.4          3.7         1.0 versicolor
## 83           5.8         2.7          3.9         1.2 versicolor
## 84           6.0         2.7          5.1         1.6 versicolor
## 85           5.4         3.0          4.5         1.5 versicolor
## 86           6.0         3.4          4.5         1.6 versicolor
## 87           6.7         3.1          4.7         1.5 versicolor
## 88           6.3         2.3          4.4         1.3 versicolor
## 89           5.6         3.0          4.1         1.3 versicolor
## 90           5.5         2.5          4.0         1.3 versicolor
## 91           5.5         2.6          4.4         1.2 versicolor
## 92           6.1         3.0          4.6         1.4 versicolor
## 93           5.8         2.6          4.0         1.2 versicolor
## 94           5.0         2.3          3.3         1.0 versicolor
## 95           5.6         2.7          4.2         1.3 versicolor
## 96           5.7         3.0          4.2         1.2 versicolor
## 97           5.7         2.9          4.2         1.3 versicolor
## 98           6.2         2.9          4.3         1.3 versicolor
## 99           5.1         2.5          3.0         1.1 versicolor
## 100          5.7         2.8          4.1         1.3 versicolor
## 101          6.3         3.3          6.0         2.5  virginica
## 102          5.8         2.7          5.1         1.9  virginica
## 103          7.1         3.0          5.9         2.1  virginica
## 104          6.3         2.9          5.6         1.8  virginica
## 105          6.5         3.0          5.8         2.2  virginica
## 106          7.6         3.0          6.6         2.1  virginica
## 107          4.9         2.5          4.5         1.7  virginica
## 108          7.3         2.9          6.3         1.8  virginica
## 109          6.7         2.5          5.8         1.8  virginica
## 110          7.2         3.6          6.1         2.5  virginica
## 111          6.5         3.2          5.1         2.0  virginica
## 112          6.4         2.7          5.3         1.9  virginica
## 113          6.8         3.0          5.5         2.1  virginica
## 114          5.7         2.5          5.0         2.0  virginica
## 115          5.8         2.8          5.1         2.4  virginica
## 116          6.4         3.2          5.3         2.3  virginica
## 117          6.5         3.0          5.5         1.8  virginica
## 118          7.7         3.8          6.7         2.2  virginica
## 119          7.7         2.6          6.9         2.3  virginica
## 120          6.0         2.2          5.0         1.5  virginica
## 121          6.9         3.2          5.7         2.3  virginica
## 122          5.6         2.8          4.9         2.0  virginica
## 123          7.7         2.8          6.7         2.0  virginica
## 124          6.3         2.7          4.9         1.8  virginica
## 125          6.7         3.3          5.7         2.1  virginica
## 126          7.2         3.2          6.0         1.8  virginica
## 127          6.2         2.8          4.8         1.8  virginica
## 128          6.1         3.0          4.9         1.8  virginica
## 129          6.4         2.8          5.6         2.1  virginica
## 130          7.2         3.0          5.8         1.6  virginica
## 131          7.4         2.8          6.1         1.9  virginica
## 132          7.9         3.8          6.4         2.0  virginica
## 133          6.4         2.8          5.6         2.2  virginica
## 134          6.3         2.8          5.1         1.5  virginica
## 135          6.1         2.6          5.6         1.4  virginica
## 136          7.7         3.0          6.1         2.3  virginica
## 137          6.3         3.4          5.6         2.4  virginica
## 138          6.4         3.1          5.5         1.8  virginica
## 139          6.0         3.0          4.8         1.8  virginica
## 140          6.9         3.1          5.4         2.1  virginica
## 141          6.7         3.1          5.6         2.4  virginica
## 142          6.9         3.1          5.1         2.3  virginica
## 143          5.8         2.7          5.1         1.9  virginica
## 144          6.8         3.2          5.9         2.3  virginica
## 145          6.7         3.3          5.7         2.5  virginica
## 146          6.7         3.0          5.2         2.3  virginica
## 147          6.3         2.5          5.0         1.9  virginica
## 148          6.5         3.0          5.2         2.0  virginica
## 149          6.2         3.4          5.4         2.3  virginica
## 150          5.9         3.0          5.1         1.8  virginica
summary(data)
##   Sepal.Length    Sepal.Width     Petal.Length    Petal.Width   
##  Min.   :4.300   Min.   :2.000   Min.   :1.000   Min.   :0.100  
##  1st Qu.:5.100   1st Qu.:2.800   1st Qu.:1.600   1st Qu.:0.300  
##  Median :5.800   Median :3.000   Median :4.350   Median :1.300  
##  Mean   :5.843   Mean   :3.057   Mean   :3.758   Mean   :1.199  
##  3rd Qu.:6.400   3rd Qu.:3.300   3rd Qu.:5.100   3rd Qu.:1.800  
##  Max.   :7.900   Max.   :4.400   Max.   :6.900   Max.   :2.500  
##        Species  
##  setosa    :50  
##  versicolor:50  
##  virginica :50  
##                 
##                 
## 

1.1. Struktur data

str(data)
## 'data.frame':    150 obs. of  5 variables:
##  $ Sepal.Length: num  5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
##  $ Sepal.Width : num  3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
##  $ Petal.Length: num  1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
##  $ Petal.Width : num  0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
##  $ Species     : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
library(arsenal)
tab<-tableby(Species~ .,data = data)
summary(tab,text=TRUE)
## 
## 
## |             | setosa (N=50) | versicolor (N=50) | virginica (N=50) | Total (N=150) | p value|
## |:------------|:-------------:|:-----------------:|:----------------:|:-------------:|-------:|
## |Sepal.Length |               |                   |                  |               | < 0.001|
## |-  Mean (SD) | 5.006 (0.352) |   5.936 (0.516)   |  6.588 (0.636)   | 5.843 (0.828) |        |
## |-  Range     | 4.300 - 5.800 |   4.900 - 7.000   |  4.900 - 7.900   | 4.300 - 7.900 |        |
## |Sepal.Width  |               |                   |                  |               | < 0.001|
## |-  Mean (SD) | 3.428 (0.379) |   2.770 (0.314)   |  2.974 (0.322)   | 3.057 (0.436) |        |
## |-  Range     | 2.300 - 4.400 |   2.000 - 3.400   |  2.200 - 3.800   | 2.000 - 4.400 |        |
## |Petal.Length |               |                   |                  |               | < 0.001|
## |-  Mean (SD) | 1.462 (0.174) |   4.260 (0.470)   |  5.552 (0.552)   | 3.758 (1.765) |        |
## |-  Range     | 1.000 - 1.900 |   3.000 - 5.100   |  4.500 - 6.900   | 1.000 - 6.900 |        |
## |Petal.Width  |               |                   |                  |               | < 0.001|
## |-  Mean (SD) | 0.246 (0.105) |   1.326 (0.198)   |  2.026 (0.275)   | 1.199 (0.762) |        |
## |-  Range     | 0.100 - 0.600 |   1.000 - 1.800   |  1.400 - 2.500   | 0.100 - 2.500 |        |

2. EDA

2.1. Box plot

boxplot(Petal.Width~Species, data)

boxplot(Sepal.Width~Species, data)

boxplot(Petal.Length~Species, data)

boxplot(Sepal.Length~Species, data)

library(ggplot2)
ggplot(data = data) +
  aes(x = Species, y = Sepal.Length, color = Species) +
  geom_boxplot()

ggplot(data,aes(x=Petal.Length,y=Sepal.Length))+
  geom_point(aes(color=Species))

ggplot(data,aes(x=Petal.Width,y=Sepal.Width))+
  geom_point(aes(color=Species))

2.2. Corr Plot

library(corrplot)
datacor <- data[,1:4]
corrplot(cor(datacor), method = "number")

3. Partition data

set.seed(1881)

acak <- createDataPartition(data$Species, p=0.8, list=FALSE)
data_train <- data[acak,]
data_test <- data[-acak,]
summary(data_train)
##   Sepal.Length    Sepal.Width     Petal.Length    Petal.Width   
##  Min.   :4.300   Min.   :2.200   Min.   :1.000   Min.   :0.100  
##  1st Qu.:5.100   1st Qu.:2.800   1st Qu.:1.500   1st Qu.:0.300  
##  Median :5.800   Median :3.000   Median :4.400   Median :1.300  
##  Mean   :5.857   Mean   :3.056   Mean   :3.763   Mean   :1.199  
##  3rd Qu.:6.500   3rd Qu.:3.300   3rd Qu.:5.100   3rd Qu.:1.800  
##  Max.   :7.900   Max.   :4.400   Max.   :6.900   Max.   :2.500  
##        Species  
##  setosa    :40  
##  versicolor:40  
##  virginica :40  
##                 
##                 
## 
summary(data_test)
##   Sepal.Length    Sepal.Width     Petal.Length   Petal.Width         Species  
##  Min.   :4.600   Min.   :2.000   Min.   :1.40   Min.   :0.20   setosa    :10  
##  1st Qu.:5.100   1st Qu.:2.900   1st Qu.:1.60   1st Qu.:0.25   versicolor:10  
##  Median :5.800   Median :3.000   Median :4.30   Median :1.30   virginica :10  
##  Mean   :5.790   Mean   :3.063   Mean   :3.74   Mean   :1.20                  
##  3rd Qu.:6.275   3rd Qu.:3.375   3rd Qu.:5.10   3rd Qu.:1.80                  
##  Max.   :7.700   Max.   :4.200   Max.   :6.30   Max.   :2.40

4. Create Model

model <- multinom(Species~., data = data_train)
## # weights:  18 (10 variable)
## initial  value 131.833475 
## iter  10 value 12.628120
## iter  20 value 4.200728
## iter  30 value 3.458753
## iter  40 value 3.150455
## iter  50 value 2.811466
## iter  60 value 2.753638
## iter  70 value 2.688681
## iter  80 value 2.622174
## iter  90 value 2.493981
## iter 100 value 2.460290
## final  value 2.460290 
## stopped after 100 iterations
summary(model)
## Call:
## multinom(formula = Species ~ ., data = data_train)
## 
## Coefficients:
##            (Intercept) Sepal.Length Sepal.Width Petal.Length Petal.Width
## versicolor    32.66810    -10.33274   -10.46871     22.18676   -8.427008
## virginica    -45.21229    -14.60786   -22.88522     36.65793   31.876629
## 
## Std. Errors:
##            (Intercept) Sepal.Length Sepal.Width Petal.Length Petal.Width
## versicolor    49.10111     80.90139    76.99591     210.7449    115.1398
## virginica     53.16903     81.15352    77.80899     211.3832    117.0232
## 
## Residual Deviance: 4.920579 
## AIC: 24.92058

4.1. Uji multicol

library(car)
## Loading required package: carData
vif(model)
## Warning in vif.default(model): No intercept: vifs may not be sensible.
##  Sepal.Length   Sepal.Width  Petal.Length   Petal.Width 
##  9.501761e+13  3.213028e+13 -3.459481e+15 -2.927140e+15

4.2. Uji simultan

library(lmtest)
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
lrtest(model)
## # weights:  6 (2 variable)
## initial  value 131.833475 
## final  value 131.833475 
## converged
## Likelihood ratio test
## 
## Model 1: Species ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width
## Model 2: Species ~ 1
##   #Df  LogLik Df  Chisq Pr(>Chisq)    
## 1  10   -2.46                         
## 2   2 -131.83 -8 258.75  < 2.2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

4.3. Hitung p-value menggunakan z test

z <- summary(model)$coefficients/summary(model)$standard.errors
p <- (1-pnorm(abs(z), 0, 1))*2
data.frame(p)
##            X.Intercept. Sepal.Length Sepal.Width Petal.Length Petal.Width
## versicolor    0.5058439    0.8983704   0.8918494    0.9161554   0.9416554
## virginica     0.3951304    0.8571504   0.7686658    0.8623218   0.7853177

5. Prediction of Data Test

predict_prob = predict(model, data_test, type = "prob")
head(predict_prob,10)
##       setosa   versicolor    virginica
## 1  1.0000000 1.401770e-12 1.908013e-62
## 25 0.9999942 5.825789e-06 1.373640e-51
## 27 1.0000000 1.758853e-10 7.273125e-55
## 30 0.9999998 1.708937e-07 9.637643e-54
## 32 1.0000000 3.066832e-13 5.395637e-59
## 34 1.0000000 1.476198e-17 6.105041e-72
## 47 1.0000000 5.126772e-12 3.040681e-62
## 48 1.0000000 5.680066e-09 2.718409e-56
## 49 1.0000000 2.011153e-13 4.130546e-64
## 50 1.0000000 3.196852e-11 7.994246e-60
data.frame(predict_prob, data_test$Species)
##           setosa   versicolor    virginica data_test.Species
## 1   1.000000e+00 1.401770e-12 1.908013e-62            setosa
## 25  9.999942e-01 5.825789e-06 1.373640e-51            setosa
## 27  1.000000e+00 1.758853e-10 7.273125e-55            setosa
## 30  9.999998e-01 1.708937e-07 9.637643e-54            setosa
## 32  1.000000e+00 3.066832e-13 5.395637e-59            setosa
## 34  1.000000e+00 1.476198e-17 6.105041e-72            setosa
## 47  1.000000e+00 5.126772e-12 3.040681e-62            setosa
## 48  1.000000e+00 5.680066e-09 2.718409e-56            setosa
## 49  1.000000e+00 2.011153e-13 4.130546e-64            setosa
## 50  1.000000e+00 3.196852e-11 7.994246e-60            setosa
## 61  1.896396e-13 1.000000e+00 4.065294e-15        versicolor
## 64  1.611357e-14 9.999998e-01 1.806094e-07        versicolor
## 65  1.572655e-06 9.999984e-01 3.322639e-15        versicolor
## 69  5.828223e-15 9.978211e-01 2.178906e-03        versicolor
## 71  1.039009e-13 6.959929e-01 3.040071e-01        versicolor
## 75  1.100735e-09 1.000000e+00 2.725705e-12        versicolor
## 79  1.126016e-12 9.999991e-01 8.626235e-07        versicolor
## 94  3.706787e-10 1.000000e+00 5.425197e-18        versicolor
## 96  8.968000e-12 1.000000e+00 6.562346e-14        versicolor
## 98  1.393774e-10 1.000000e+00 6.409303e-12        versicolor
## 102 5.879981e-25 6.951111e-07 9.999993e-01         virginica
## 105 7.844102e-33 1.285365e-13 1.000000e+00         virginica
## 108 3.575274e-31 8.207242e-09 1.000000e+00         virginica
## 115 6.939798e-31 4.260626e-15 1.000000e+00         virginica
## 124 3.215796e-17 5.958040e-03 9.940420e-01         virginica
## 134 1.444308e-17 9.951382e-01 4.861807e-03         virginica
## 136 2.223268e-31 5.026083e-15 1.000000e+00         virginica
## 138 3.694589e-22 2.235571e-04 9.997764e-01         virginica
## 144 6.443826e-32 2.320971e-14 1.000000e+00         virginica
## 149 8.925453e-26 2.968465e-11 1.000000e+00         virginica

5.1. Confusion Matrix

prediksi.test <- predict(model, data_test,type = "class")
data_test$Species<-as.factor(data_test$Species)
confusionMatrix(as.factor(prediksi.test), 
                data_test$Species)
## Confusion Matrix and Statistics
## 
##             Reference
## Prediction   setosa versicolor virginica
##   setosa         10          0         0
##   versicolor      0         10         1
##   virginica       0          0         9
## 
## Overall Statistics
##                                           
##                Accuracy : 0.9667          
##                  95% CI : (0.8278, 0.9992)
##     No Information Rate : 0.3333          
##     P-Value [Acc > NIR] : 2.963e-13       
##                                           
##                   Kappa : 0.95            
##                                           
##  Mcnemar's Test P-Value : NA              
## 
## Statistics by Class:
## 
##                      Class: setosa Class: versicolor Class: virginica
## Sensitivity                 1.0000            1.0000           0.9000
## Specificity                 1.0000            0.9500           1.0000
## Pos Pred Value              1.0000            0.9091           1.0000
## Neg Pred Value              1.0000            1.0000           0.9524
## Prevalence                  0.3333            0.3333           0.3333
## Detection Rate              0.3333            0.3333           0.3000
## Detection Prevalence        0.3333            0.3667           0.3000
## Balanced Accuracy           1.0000            0.9750           0.9500

6. Interpretasi koefisien regresi

data.frame(summary(model)$coefficients)
##            X.Intercept. Sepal.Length Sepal.Width Petal.Length Petal.Width
## versicolor     32.66810    -10.33274   -10.46871     22.18676   -8.427008
## virginica     -45.21229    -14.60786   -22.88522     36.65793   31.876629
# odds = exponensial parameter model
odds = data.frame(exp(summary(model)$coefficients))

round(odds, digits = 6)
##            X.Intercept. Sepal.Length Sepal.Width Petal.Length  Petal.Width
## versicolor 1.540189e+14      3.3e-05     2.8e-05 4.321051e+09 2.190000e-04
## virginica  0.000000e+00      0.0e+00     0.0e+00 8.324087e+15 6.979815e+13

7. Predict new data

Input Nilai
Sepal.Length 4.2
Sepal.Width 2.1
Petal.Length 5.7
Petal.Width 1.8
# Prediksi data baru
databaru = data.frame(Sepal.Length = 1.2, Sepal.Width = 2.1, Petal.Length = 5.7, Petal.Width = 1.8)
predict(model, newdata = databaru, "probs")
##       setosa   versicolor    virginica 
## 2.852920e-68 1.110403e-20 1.000000e+00