Regresi Logistik Multinomial Data
IRIS
Package
library(haven)
library(nnet)
library(readxl)
library(caret)## Loading required package: ggplot2
## Loading required package: lattice
library(arsenal)1. Input Data
data <- iris
data## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## 1 5.1 3.5 1.4 0.2 setosa
## 2 4.9 3.0 1.4 0.2 setosa
## 3 4.7 3.2 1.3 0.2 setosa
## 4 4.6 3.1 1.5 0.2 setosa
## 5 5.0 3.6 1.4 0.2 setosa
## 6 5.4 3.9 1.7 0.4 setosa
## 7 4.6 3.4 1.4 0.3 setosa
## 8 5.0 3.4 1.5 0.2 setosa
## 9 4.4 2.9 1.4 0.2 setosa
## 10 4.9 3.1 1.5 0.1 setosa
## 11 5.4 3.7 1.5 0.2 setosa
## 12 4.8 3.4 1.6 0.2 setosa
## 13 4.8 3.0 1.4 0.1 setosa
## 14 4.3 3.0 1.1 0.1 setosa
## 15 5.8 4.0 1.2 0.2 setosa
## 16 5.7 4.4 1.5 0.4 setosa
## 17 5.4 3.9 1.3 0.4 setosa
## 18 5.1 3.5 1.4 0.3 setosa
## 19 5.7 3.8 1.7 0.3 setosa
## 20 5.1 3.8 1.5 0.3 setosa
## 21 5.4 3.4 1.7 0.2 setosa
## 22 5.1 3.7 1.5 0.4 setosa
## 23 4.6 3.6 1.0 0.2 setosa
## 24 5.1 3.3 1.7 0.5 setosa
## 25 4.8 3.4 1.9 0.2 setosa
## 26 5.0 3.0 1.6 0.2 setosa
## 27 5.0 3.4 1.6 0.4 setosa
## 28 5.2 3.5 1.5 0.2 setosa
## 29 5.2 3.4 1.4 0.2 setosa
## 30 4.7 3.2 1.6 0.2 setosa
## 31 4.8 3.1 1.6 0.2 setosa
## 32 5.4 3.4 1.5 0.4 setosa
## 33 5.2 4.1 1.5 0.1 setosa
## 34 5.5 4.2 1.4 0.2 setosa
## 35 4.9 3.1 1.5 0.2 setosa
## 36 5.0 3.2 1.2 0.2 setosa
## 37 5.5 3.5 1.3 0.2 setosa
## 38 4.9 3.6 1.4 0.1 setosa
## 39 4.4 3.0 1.3 0.2 setosa
## 40 5.1 3.4 1.5 0.2 setosa
## 41 5.0 3.5 1.3 0.3 setosa
## 42 4.5 2.3 1.3 0.3 setosa
## 43 4.4 3.2 1.3 0.2 setosa
## 44 5.0 3.5 1.6 0.6 setosa
## 45 5.1 3.8 1.9 0.4 setosa
## 46 4.8 3.0 1.4 0.3 setosa
## 47 5.1 3.8 1.6 0.2 setosa
## 48 4.6 3.2 1.4 0.2 setosa
## 49 5.3 3.7 1.5 0.2 setosa
## 50 5.0 3.3 1.4 0.2 setosa
## 51 7.0 3.2 4.7 1.4 versicolor
## 52 6.4 3.2 4.5 1.5 versicolor
## 53 6.9 3.1 4.9 1.5 versicolor
## 54 5.5 2.3 4.0 1.3 versicolor
## 55 6.5 2.8 4.6 1.5 versicolor
## 56 5.7 2.8 4.5 1.3 versicolor
## 57 6.3 3.3 4.7 1.6 versicolor
## 58 4.9 2.4 3.3 1.0 versicolor
## 59 6.6 2.9 4.6 1.3 versicolor
## 60 5.2 2.7 3.9 1.4 versicolor
## 61 5.0 2.0 3.5 1.0 versicolor
## 62 5.9 3.0 4.2 1.5 versicolor
## 63 6.0 2.2 4.0 1.0 versicolor
## 64 6.1 2.9 4.7 1.4 versicolor
## 65 5.6 2.9 3.6 1.3 versicolor
## 66 6.7 3.1 4.4 1.4 versicolor
## 67 5.6 3.0 4.5 1.5 versicolor
## 68 5.8 2.7 4.1 1.0 versicolor
## 69 6.2 2.2 4.5 1.5 versicolor
## 70 5.6 2.5 3.9 1.1 versicolor
## 71 5.9 3.2 4.8 1.8 versicolor
## 72 6.1 2.8 4.0 1.3 versicolor
## 73 6.3 2.5 4.9 1.5 versicolor
## 74 6.1 2.8 4.7 1.2 versicolor
## 75 6.4 2.9 4.3 1.3 versicolor
## 76 6.6 3.0 4.4 1.4 versicolor
## 77 6.8 2.8 4.8 1.4 versicolor
## 78 6.7 3.0 5.0 1.7 versicolor
## 79 6.0 2.9 4.5 1.5 versicolor
## 80 5.7 2.6 3.5 1.0 versicolor
## 81 5.5 2.4 3.8 1.1 versicolor
## 82 5.5 2.4 3.7 1.0 versicolor
## 83 5.8 2.7 3.9 1.2 versicolor
## 84 6.0 2.7 5.1 1.6 versicolor
## 85 5.4 3.0 4.5 1.5 versicolor
## 86 6.0 3.4 4.5 1.6 versicolor
## 87 6.7 3.1 4.7 1.5 versicolor
## 88 6.3 2.3 4.4 1.3 versicolor
## 89 5.6 3.0 4.1 1.3 versicolor
## 90 5.5 2.5 4.0 1.3 versicolor
## 91 5.5 2.6 4.4 1.2 versicolor
## 92 6.1 3.0 4.6 1.4 versicolor
## 93 5.8 2.6 4.0 1.2 versicolor
## 94 5.0 2.3 3.3 1.0 versicolor
## 95 5.6 2.7 4.2 1.3 versicolor
## 96 5.7 3.0 4.2 1.2 versicolor
## 97 5.7 2.9 4.2 1.3 versicolor
## 98 6.2 2.9 4.3 1.3 versicolor
## 99 5.1 2.5 3.0 1.1 versicolor
## 100 5.7 2.8 4.1 1.3 versicolor
## 101 6.3 3.3 6.0 2.5 virginica
## 102 5.8 2.7 5.1 1.9 virginica
## 103 7.1 3.0 5.9 2.1 virginica
## 104 6.3 2.9 5.6 1.8 virginica
## 105 6.5 3.0 5.8 2.2 virginica
## 106 7.6 3.0 6.6 2.1 virginica
## 107 4.9 2.5 4.5 1.7 virginica
## 108 7.3 2.9 6.3 1.8 virginica
## 109 6.7 2.5 5.8 1.8 virginica
## 110 7.2 3.6 6.1 2.5 virginica
## 111 6.5 3.2 5.1 2.0 virginica
## 112 6.4 2.7 5.3 1.9 virginica
## 113 6.8 3.0 5.5 2.1 virginica
## 114 5.7 2.5 5.0 2.0 virginica
## 115 5.8 2.8 5.1 2.4 virginica
## 116 6.4 3.2 5.3 2.3 virginica
## 117 6.5 3.0 5.5 1.8 virginica
## 118 7.7 3.8 6.7 2.2 virginica
## 119 7.7 2.6 6.9 2.3 virginica
## 120 6.0 2.2 5.0 1.5 virginica
## 121 6.9 3.2 5.7 2.3 virginica
## 122 5.6 2.8 4.9 2.0 virginica
## 123 7.7 2.8 6.7 2.0 virginica
## 124 6.3 2.7 4.9 1.8 virginica
## 125 6.7 3.3 5.7 2.1 virginica
## 126 7.2 3.2 6.0 1.8 virginica
## 127 6.2 2.8 4.8 1.8 virginica
## 128 6.1 3.0 4.9 1.8 virginica
## 129 6.4 2.8 5.6 2.1 virginica
## 130 7.2 3.0 5.8 1.6 virginica
## 131 7.4 2.8 6.1 1.9 virginica
## 132 7.9 3.8 6.4 2.0 virginica
## 133 6.4 2.8 5.6 2.2 virginica
## 134 6.3 2.8 5.1 1.5 virginica
## 135 6.1 2.6 5.6 1.4 virginica
## 136 7.7 3.0 6.1 2.3 virginica
## 137 6.3 3.4 5.6 2.4 virginica
## 138 6.4 3.1 5.5 1.8 virginica
## 139 6.0 3.0 4.8 1.8 virginica
## 140 6.9 3.1 5.4 2.1 virginica
## 141 6.7 3.1 5.6 2.4 virginica
## 142 6.9 3.1 5.1 2.3 virginica
## 143 5.8 2.7 5.1 1.9 virginica
## 144 6.8 3.2 5.9 2.3 virginica
## 145 6.7 3.3 5.7 2.5 virginica
## 146 6.7 3.0 5.2 2.3 virginica
## 147 6.3 2.5 5.0 1.9 virginica
## 148 6.5 3.0 5.2 2.0 virginica
## 149 6.2 3.4 5.4 2.3 virginica
## 150 5.9 3.0 5.1 1.8 virginica
summary(data)## Sepal.Length Sepal.Width Petal.Length Petal.Width
## Min. :4.300 Min. :2.000 Min. :1.000 Min. :0.100
## 1st Qu.:5.100 1st Qu.:2.800 1st Qu.:1.600 1st Qu.:0.300
## Median :5.800 Median :3.000 Median :4.350 Median :1.300
## Mean :5.843 Mean :3.057 Mean :3.758 Mean :1.199
## 3rd Qu.:6.400 3rd Qu.:3.300 3rd Qu.:5.100 3rd Qu.:1.800
## Max. :7.900 Max. :4.400 Max. :6.900 Max. :2.500
## Species
## setosa :50
## versicolor:50
## virginica :50
##
##
##
1.1. Struktur data
str(data)## 'data.frame': 150 obs. of 5 variables:
## $ Sepal.Length: num 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
## $ Sepal.Width : num 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
## $ Petal.Length: num 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
## $ Petal.Width : num 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
## $ Species : Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
library(arsenal)
tab<-tableby(Species~ .,data = data)
summary(tab,text=TRUE)##
##
## | | setosa (N=50) | versicolor (N=50) | virginica (N=50) | Total (N=150) | p value|
## |:------------|:-------------:|:-----------------:|:----------------:|:-------------:|-------:|
## |Sepal.Length | | | | | < 0.001|
## |- Mean (SD) | 5.006 (0.352) | 5.936 (0.516) | 6.588 (0.636) | 5.843 (0.828) | |
## |- Range | 4.300 - 5.800 | 4.900 - 7.000 | 4.900 - 7.900 | 4.300 - 7.900 | |
## |Sepal.Width | | | | | < 0.001|
## |- Mean (SD) | 3.428 (0.379) | 2.770 (0.314) | 2.974 (0.322) | 3.057 (0.436) | |
## |- Range | 2.300 - 4.400 | 2.000 - 3.400 | 2.200 - 3.800 | 2.000 - 4.400 | |
## |Petal.Length | | | | | < 0.001|
## |- Mean (SD) | 1.462 (0.174) | 4.260 (0.470) | 5.552 (0.552) | 3.758 (1.765) | |
## |- Range | 1.000 - 1.900 | 3.000 - 5.100 | 4.500 - 6.900 | 1.000 - 6.900 | |
## |Petal.Width | | | | | < 0.001|
## |- Mean (SD) | 0.246 (0.105) | 1.326 (0.198) | 2.026 (0.275) | 1.199 (0.762) | |
## |- Range | 0.100 - 0.600 | 1.000 - 1.800 | 1.400 - 2.500 | 0.100 - 2.500 | |
2. EDA
2.1. Box plot
boxplot(Petal.Width~Species, data)boxplot(Sepal.Width~Species, data)boxplot(Petal.Length~Species, data)boxplot(Sepal.Length~Species, data)library(ggplot2)
ggplot(data = data) +
aes(x = Species, y = Sepal.Length, color = Species) +
geom_boxplot()ggplot(data,aes(x=Petal.Length,y=Sepal.Length))+
geom_point(aes(color=Species))ggplot(data,aes(x=Petal.Width,y=Sepal.Width))+
geom_point(aes(color=Species))2.2. Corr Plot
library(corrplot)
datacor <- data[,1:4]
corrplot(cor(datacor), method = "number")3. Partition data
set.seed(1881)
acak <- createDataPartition(data$Species, p=0.8, list=FALSE)
data_train <- data[acak,]
data_test <- data[-acak,]summary(data_train)## Sepal.Length Sepal.Width Petal.Length Petal.Width
## Min. :4.300 Min. :2.200 Min. :1.000 Min. :0.100
## 1st Qu.:5.100 1st Qu.:2.800 1st Qu.:1.500 1st Qu.:0.300
## Median :5.800 Median :3.000 Median :4.400 Median :1.300
## Mean :5.857 Mean :3.056 Mean :3.763 Mean :1.199
## 3rd Qu.:6.500 3rd Qu.:3.300 3rd Qu.:5.100 3rd Qu.:1.800
## Max. :7.900 Max. :4.400 Max. :6.900 Max. :2.500
## Species
## setosa :40
## versicolor:40
## virginica :40
##
##
##
summary(data_test)## Sepal.Length Sepal.Width Petal.Length Petal.Width Species
## Min. :4.600 Min. :2.000 Min. :1.40 Min. :0.20 setosa :10
## 1st Qu.:5.100 1st Qu.:2.900 1st Qu.:1.60 1st Qu.:0.25 versicolor:10
## Median :5.800 Median :3.000 Median :4.30 Median :1.30 virginica :10
## Mean :5.790 Mean :3.063 Mean :3.74 Mean :1.20
## 3rd Qu.:6.275 3rd Qu.:3.375 3rd Qu.:5.10 3rd Qu.:1.80
## Max. :7.700 Max. :4.200 Max. :6.30 Max. :2.40
4. Create Model
model <- multinom(Species~., data = data_train)## # weights: 18 (10 variable)
## initial value 131.833475
## iter 10 value 12.628120
## iter 20 value 4.200728
## iter 30 value 3.458753
## iter 40 value 3.150455
## iter 50 value 2.811466
## iter 60 value 2.753638
## iter 70 value 2.688681
## iter 80 value 2.622174
## iter 90 value 2.493981
## iter 100 value 2.460290
## final value 2.460290
## stopped after 100 iterations
summary(model)## Call:
## multinom(formula = Species ~ ., data = data_train)
##
## Coefficients:
## (Intercept) Sepal.Length Sepal.Width Petal.Length Petal.Width
## versicolor 32.66810 -10.33274 -10.46871 22.18676 -8.427008
## virginica -45.21229 -14.60786 -22.88522 36.65793 31.876629
##
## Std. Errors:
## (Intercept) Sepal.Length Sepal.Width Petal.Length Petal.Width
## versicolor 49.10111 80.90139 76.99591 210.7449 115.1398
## virginica 53.16903 81.15352 77.80899 211.3832 117.0232
##
## Residual Deviance: 4.920579
## AIC: 24.92058
4.1. Uji multicol
library(car)## Loading required package: carData
vif(model)## Warning in vif.default(model): No intercept: vifs may not be sensible.
## Sepal.Length Sepal.Width Petal.Length Petal.Width
## 9.501761e+13 3.213028e+13 -3.459481e+15 -2.927140e+15
4.2. Uji simultan
library(lmtest)## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
lrtest(model)## # weights: 6 (2 variable)
## initial value 131.833475
## final value 131.833475
## converged
## Likelihood ratio test
##
## Model 1: Species ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width
## Model 2: Species ~ 1
## #Df LogLik Df Chisq Pr(>Chisq)
## 1 10 -2.46
## 2 2 -131.83 -8 258.75 < 2.2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
4.3. Hitung p-value menggunakan z test
z <- summary(model)$coefficients/summary(model)$standard.errors
p <- (1-pnorm(abs(z), 0, 1))*2
data.frame(p)## X.Intercept. Sepal.Length Sepal.Width Petal.Length Petal.Width
## versicolor 0.5058439 0.8983704 0.8918494 0.9161554 0.9416554
## virginica 0.3951304 0.8571504 0.7686658 0.8623218 0.7853177
5. Prediction of Data Test
predict_prob = predict(model, data_test, type = "prob")
head(predict_prob,10)## setosa versicolor virginica
## 1 1.0000000 1.401770e-12 1.908013e-62
## 25 0.9999942 5.825789e-06 1.373640e-51
## 27 1.0000000 1.758853e-10 7.273125e-55
## 30 0.9999998 1.708937e-07 9.637643e-54
## 32 1.0000000 3.066832e-13 5.395637e-59
## 34 1.0000000 1.476198e-17 6.105041e-72
## 47 1.0000000 5.126772e-12 3.040681e-62
## 48 1.0000000 5.680066e-09 2.718409e-56
## 49 1.0000000 2.011153e-13 4.130546e-64
## 50 1.0000000 3.196852e-11 7.994246e-60
data.frame(predict_prob, data_test$Species)## setosa versicolor virginica data_test.Species
## 1 1.000000e+00 1.401770e-12 1.908013e-62 setosa
## 25 9.999942e-01 5.825789e-06 1.373640e-51 setosa
## 27 1.000000e+00 1.758853e-10 7.273125e-55 setosa
## 30 9.999998e-01 1.708937e-07 9.637643e-54 setosa
## 32 1.000000e+00 3.066832e-13 5.395637e-59 setosa
## 34 1.000000e+00 1.476198e-17 6.105041e-72 setosa
## 47 1.000000e+00 5.126772e-12 3.040681e-62 setosa
## 48 1.000000e+00 5.680066e-09 2.718409e-56 setosa
## 49 1.000000e+00 2.011153e-13 4.130546e-64 setosa
## 50 1.000000e+00 3.196852e-11 7.994246e-60 setosa
## 61 1.896396e-13 1.000000e+00 4.065294e-15 versicolor
## 64 1.611357e-14 9.999998e-01 1.806094e-07 versicolor
## 65 1.572655e-06 9.999984e-01 3.322639e-15 versicolor
## 69 5.828223e-15 9.978211e-01 2.178906e-03 versicolor
## 71 1.039009e-13 6.959929e-01 3.040071e-01 versicolor
## 75 1.100735e-09 1.000000e+00 2.725705e-12 versicolor
## 79 1.126016e-12 9.999991e-01 8.626235e-07 versicolor
## 94 3.706787e-10 1.000000e+00 5.425197e-18 versicolor
## 96 8.968000e-12 1.000000e+00 6.562346e-14 versicolor
## 98 1.393774e-10 1.000000e+00 6.409303e-12 versicolor
## 102 5.879981e-25 6.951111e-07 9.999993e-01 virginica
## 105 7.844102e-33 1.285365e-13 1.000000e+00 virginica
## 108 3.575274e-31 8.207242e-09 1.000000e+00 virginica
## 115 6.939798e-31 4.260626e-15 1.000000e+00 virginica
## 124 3.215796e-17 5.958040e-03 9.940420e-01 virginica
## 134 1.444308e-17 9.951382e-01 4.861807e-03 virginica
## 136 2.223268e-31 5.026083e-15 1.000000e+00 virginica
## 138 3.694589e-22 2.235571e-04 9.997764e-01 virginica
## 144 6.443826e-32 2.320971e-14 1.000000e+00 virginica
## 149 8.925453e-26 2.968465e-11 1.000000e+00 virginica
5.1. Confusion Matrix
prediksi.test <- predict(model, data_test,type = "class")
data_test$Species<-as.factor(data_test$Species)
confusionMatrix(as.factor(prediksi.test),
data_test$Species)## Confusion Matrix and Statistics
##
## Reference
## Prediction setosa versicolor virginica
## setosa 10 0 0
## versicolor 0 10 1
## virginica 0 0 9
##
## Overall Statistics
##
## Accuracy : 0.9667
## 95% CI : (0.8278, 0.9992)
## No Information Rate : 0.3333
## P-Value [Acc > NIR] : 2.963e-13
##
## Kappa : 0.95
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: setosa Class: versicolor Class: virginica
## Sensitivity 1.0000 1.0000 0.9000
## Specificity 1.0000 0.9500 1.0000
## Pos Pred Value 1.0000 0.9091 1.0000
## Neg Pred Value 1.0000 1.0000 0.9524
## Prevalence 0.3333 0.3333 0.3333
## Detection Rate 0.3333 0.3333 0.3000
## Detection Prevalence 0.3333 0.3667 0.3000
## Balanced Accuracy 1.0000 0.9750 0.9500
6. Interpretasi koefisien regresi
data.frame(summary(model)$coefficients)## X.Intercept. Sepal.Length Sepal.Width Petal.Length Petal.Width
## versicolor 32.66810 -10.33274 -10.46871 22.18676 -8.427008
## virginica -45.21229 -14.60786 -22.88522 36.65793 31.876629
# odds = exponensial parameter model
odds = data.frame(exp(summary(model)$coefficients))
round(odds, digits = 6)## X.Intercept. Sepal.Length Sepal.Width Petal.Length Petal.Width
## versicolor 1.540189e+14 3.3e-05 2.8e-05 4.321051e+09 2.190000e-04
## virginica 0.000000e+00 0.0e+00 0.0e+00 8.324087e+15 6.979815e+13
7. Predict new data
| Input | Nilai |
|---|---|
| Sepal.Length | 4.2 |
| Sepal.Width | 2.1 |
| Petal.Length | 5.7 |
| Petal.Width | 1.8 |
# Prediksi data baru
databaru = data.frame(Sepal.Length = 1.2, Sepal.Width = 2.1, Petal.Length = 5.7, Petal.Width = 1.8)
predict(model, newdata = databaru, "probs")## setosa versicolor virginica
## 2.852920e-68 1.110403e-20 1.000000e+00