library(nnet)
library(MASS)
library(caret)
## Loading required package: ggplot2
## Loading required package: lattice
library(dplyr)
##
## Attaching package: 'dplyr'
## The following object is masked from 'package:MASS':
##
## select
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
library(corrplot)
## corrplot 0.95 loaded
library(readxl)
wine <- read_xlsx("winequality-red.xlsx")
## Warning: Expecting numeric in K1361 / R1361C11: got '100.333.333.333.333'
## Warning: Expecting numeric in K1364 / R1364C11: got '100.333.333.333.333'
## Warning: Expecting numeric in K1441 / R1441C11: got '11.066.666.666.666.600'
## Warning: Expecting numeric in K1443 / R1443C11: got '956.666.666.666.667'
## Warning: Expecting numeric in K1477 / R1477C11: got '13.566.666.666.666.600'
## Warning: Expecting numeric in K1516 / R1516C11: got '923.333.333.333.333'
iris <- read_xlsx("Iris.xlsx")
sum(is.na(wine))
## [1] 6
wine$quality <- factor(wine$quality, ordered = TRUE)
wine$density <- as.numeric(wine$density)
## Warning: NAs introduced by coercion
wine <- na.omit(wine)
sum(is.na(iris))
## [1] 0
iris$Species <- factor(iris$Species)
iris <- na.omit(iris)
ggplot(wine, aes(x = quality)) +
geom_bar(fill = "skyblue") +
ggtitle("Distribusi Kualitas Wine")
ggplot(iris, aes(x = Species)) +
geom_bar(fill = "lightgreen") +
ggtitle("Distribusi Jumlah Data per Spesies") +
xlab("Spesies") +
ylab("Jumlah")
wine_numeric <- wine[, sapply(wine, is.numeric)]
wine_numeric <- na.omit(wine_numeric)
cor_matrix_wine <- cor(wine_numeric)
corrplot(cor_matrix_wine, method = "color", tl.cex = 0.7)
iris_numeric <- iris[, 2:5]
cor_matrix_iris <- cor(iris_numeric)
corrplot(cor_matrix_iris, method = "color", tl.cex = 0.8, tl.col = "black", addCoef.col = "black")
set.seed(123)
index <- createDataPartition(wine$quality, p = 0.8, list = FALSE)
train_data_wine <- wine[index, ]
test_data_wine <- wine[-index, ]
split <- createDataPartition(iris$Species, p = 0.7, list = FALSE)
train_data_iris <- iris[split, ]
test_data_iris <- iris[-split, ]
model_ordinal <- polr(quality ~ ., data = train_data_wine, Hess = TRUE)
summary(model_ordinal)
## Call:
## polr(formula = quality ~ ., data = train_data_wine, Hess = TRUE)
##
## Coefficients:
## Value Std. Error t value
## `fixed acidity` 0.07490 0.058984 1.270
## `volatile acidity` -3.85628 0.451272 -8.545
## `citric acid` -1.13212 0.519172 -2.181
## `residual sugar` 0.10278 0.048032 2.140
## chlorides -5.19991 1.579973 -3.291
## `free sulfur dioxide` 0.01235 0.007533 1.639
## `total sulfur dioxide` -0.01048 0.002699 -3.882
## density -0.01355 0.006897 -1.965
## pH -1.17872 0.565987 -2.083
## sulphates 2.80313 0.408678 6.859
## alcohol 0.93767 0.067853 13.819
##
## Intercepts:
## Value Std. Error t value
## 3|4 -0.5242 2.2549 -0.2325
## 4|5 1.4312 2.2276 0.6425
## 5|6 5.1941 2.2264 2.3330
## 6|7 8.0863 2.2380 3.6132
## 7|8 11.0880 2.2561 4.9147
##
## Residual Deviance: 2429.834
## AIC: 2461.834
pred_ordinal <- predict(model_ordinal, newdata = test_data_wine)
confusionMatrix(pred_ordinal, test_data_wine$quality)
## Confusion Matrix and Statistics
##
## Reference
## Prediction 3 4 5 6 7 8
## 3 0 0 0 0 0 0
## 4 0 0 1 0 0 0
## 5 1 7 101 40 2 0
## 6 1 3 33 79 28 1
## 7 0 0 1 7 9 2
## 8 0 0 0 0 0 0
##
## Overall Statistics
##
## Accuracy : 0.5981
## 95% CI : (0.5418, 0.6526)
## No Information Rate : 0.4304
## P-Value [Acc > NIR] : 1.528e-09
##
## Kappa : 0.3344
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: 3 Class: 4 Class: 5 Class: 6 Class: 7 Class: 8
## Sensitivity 0.000000 0.000000 0.7426 0.6270 0.23077 0.000000
## Specificity 1.000000 0.996732 0.7222 0.6526 0.96390 1.000000
## Pos Pred Value NaN 0.000000 0.6689 0.5448 0.47368 NaN
## Neg Pred Value 0.993671 0.968254 0.7879 0.7251 0.89899 0.990506
## Prevalence 0.006329 0.031646 0.4304 0.3987 0.12342 0.009494
## Detection Rate 0.000000 0.000000 0.3196 0.2500 0.02848 0.000000
## Detection Prevalence 0.000000 0.003165 0.4778 0.4589 0.06013 0.000000
## Balanced Accuracy 0.500000 0.498366 0.7324 0.6398 0.59733 0.500000
model_multinom <- multinom(Species ~ SepalLengthCm + SepalWidthCm + PetalLengthCm + PetalWidthCm, data = train_data_iris)
## # weights: 18 (10 variable)
## initial value 115.354290
## iter 10 value 11.228287
## iter 20 value 2.406765
## iter 30 value 1.425637
## iter 40 value 1.278527
## iter 50 value 1.139458
## iter 60 value 1.046589
## iter 70 value 0.953613
## iter 80 value 0.932730
## iter 90 value 0.603091
## iter 100 value 0.536076
## final value 0.536076
## stopped after 100 iterations
summary(model_multinom)
## Call:
## multinom(formula = Species ~ SepalLengthCm + SepalWidthCm + PetalLengthCm +
## PetalWidthCm, data = train_data_iris)
##
## Coefficients:
## (Intercept) SepalLengthCm SepalWidthCm PetalLengthCm
## Iris-versicolor 64.26165 -17.40947 -18.33765 33.28642
## Iris-virginica -91.22158 -42.50208 -47.81531 95.41040
## PetalWidthCm
## Iris-versicolor -7.735199
## Iris-virginica 46.372820
##
## Std. Errors:
## (Intercept) SepalLengthCm SepalWidthCm PetalLengthCm
## Iris-versicolor 198.4690 334.9440 178.1277 167.3427
## Iris-virginica 181.1602 366.6241 200.9404 143.3183
## PetalWidthCm
## Iris-versicolor 92.37670
## Iris-virginica 85.53182
##
## Residual Deviance: 1.072151
## AIC: 21.07215
pred_multinom <- predict(model_multinom, newdata = test_data_iris)
confusionMatrix(pred_multinom, test_data_iris$Species)
## Confusion Matrix and Statistics
##
## Reference
## Prediction Iris-setosa Iris-versicolor Iris-virginica
## Iris-setosa 15 0 0
## Iris-versicolor 0 14 0
## Iris-virginica 0 1 15
##
## Overall Statistics
##
## Accuracy : 0.9778
## 95% CI : (0.8823, 0.9994)
## No Information Rate : 0.3333
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 0.9667
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: Iris-setosa Class: Iris-versicolor
## Sensitivity 1.0000 0.9333
## Specificity 1.0000 1.0000
## Pos Pred Value 1.0000 1.0000
## Neg Pred Value 1.0000 0.9677
## Prevalence 0.3333 0.3333
## Detection Rate 0.3333 0.3111
## Detection Prevalence 0.3333 0.3111
## Balanced Accuracy 1.0000 0.9667
## Class: Iris-virginica
## Sensitivity 1.0000
## Specificity 0.9667
## Pos Pred Value 0.9375
## Neg Pred Value 1.0000
## Prevalence 0.3333
## Detection Rate 0.3333
## Detection Prevalence 0.3556
## Balanced Accuracy 0.9833
Dataset IRIS
Memilki akurasi 96,67% dan Kappa 0.96 berarti memiliki sensitivitas tinggi terutama Iris-setosa diprediksi 100% benar dan Iris-versicolor dan Iris-virginica menunjukkan performa sangat baik dengan kesalahan minimal. Mengindikasikan kemampuan klasifikasi yang akurat terhadap semua jenis spesies, cocok digunakan untuk klasifikasi jenis bunga.
Dataset Wine Quality
Memiliki akurasi 59,81% dan Kappa 0.3344 berarti model cenderung bias terhadap kelas mayoritas seperti 5,6,7 dan memiliki keterbatasan dalam mendeteksi kelas minoritas seperti 3,4,8 karena memiliki sensitivity =0, performa yang tidak merata dan perlu perbaikan seperti balancing atau tunning model.
dengan evaluasi ini model multinomial lebih bagus dan stabil dalam membandungkan model ordinal dalam konteks klasifikasi pada kasus ini.