Pendahuluan
Dokumen ini membahas implementasi dan interpresentasi dari model
Multinomial Logistic Regression. Data yang digunakan adalah Data Wine
yang di dapat dari situs UCI Meshine Learning Repository. Data Wine
merupakan data dari hasil analisis kimia anggur yang ditanam di wilayah
yang sama di Italia tetapi berasal dari tiga kultivar yang berbeda.
Analisis tersebut menentukan jumlah 13 unsur yang ditemukan di
masing-masing dari tiga jenis anggur.
Anda dapat mengunduh dataset di sini.
Import Library yang di gunakan
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(nnet)
library(broom)
library(kableExtra)
## Warning: package 'kableExtra' was built under R version 4.4.3
##
## Attaching package: 'kableExtra'
##
## The following object is masked from 'package:dplyr':
##
## group_rows
library(marginaleffects)
## Warning: package 'marginaleffects' was built under R version 4.4.3
library(ggplot2)
library(car)
## Warning: package 'car' was built under R version 4.4.3
## Loading required package: carData
## Warning: package 'carData' was built under R version 4.4.3
##
## Attaching package: 'car'
##
## The following object is masked from 'package:dplyr':
##
## recode
##
## The following object is masked from 'package:purrr':
##
## some
library(corrplot)
## Warning: package 'corrplot' was built under R version 4.4.3
## corrplot 0.95 loaded
library(caret)
## Warning: package 'caret' was built under R version 4.4.3
## Loading required package: lattice
##
## Attaching package: 'caret'
##
## The following object is masked from 'package:purrr':
##
## lift
Load Data
data_wine <- read.csv("C:/Users/lenovo/OneDrive/Documents/SEMESTER 4/Analisis Multivariat/P11/Terbaru/whine.csv", header = TRUE, check.names = TRUE)
head(data_wine)
## Alcohol Malicacid Ash Alcalinity_of_ash Magnesium Total_phenols Flavanoids
## 1 14.23 1.71 2.43 15.6 127 2.80 3.06
## 2 13.20 1.78 2.14 11.2 100 2.65 2.76
## 3 13.16 2.36 2.67 18.6 101 2.80 3.24
## 4 14.37 1.95 2.50 16.8 113 3.85 3.49
## 5 13.24 2.59 2.87 21.0 118 2.80 2.69
## 6 14.20 1.76 2.45 15.2 112 3.27 3.39
## Nonflavanoid_phenols Proanthocyanins Color_intensity Hue
## 1 0.28 2.29 5.64 1.04
## 2 0.26 1.28 4.38 1.05
## 3 0.30 2.81 5.68 1.03
## 4 0.24 2.18 7.80 0.86
## 5 0.39 1.82 4.32 1.04
## 6 0.34 1.97 6.75 1.05
## X0D280_0D315_of_diluted_wines Proline class
## 1 3.92 1065 1
## 2 3.40 1050 1
## 3 3.17 1185 1
## 4 3.45 1480 1
## 5 2.93 735 1
## 6 2.85 1450 1
=========== PREPROCESSING ============
Pastikan target menjadi faktor
data_wine$class <- factor(data_wine$class)
1. Hapus missing value kalau ada
data_wine <- na.omit(data_wine)
Cek struktur data
str(data_wine)
## 'data.frame': 178 obs. of 14 variables:
## $ Alcohol : num 14.2 13.2 13.2 14.4 13.2 ...
## $ Malicacid : num 1.71 1.78 2.36 1.95 2.59 1.76 1.87 2.15 1.64 1.35 ...
## $ Ash : num 2.43 2.14 2.67 2.5 2.87 2.45 2.45 2.61 2.17 2.27 ...
## $ Alcalinity_of_ash : num 15.6 11.2 18.6 16.8 21 15.2 14.6 17.6 14 16 ...
## $ Magnesium : int 127 100 101 113 118 112 96 121 97 98 ...
## $ Total_phenols : num 2.8 2.65 2.8 3.85 2.8 3.27 2.5 2.6 2.8 2.98 ...
## $ Flavanoids : num 3.06 2.76 3.24 3.49 2.69 3.39 2.52 2.51 2.98 3.15 ...
## $ Nonflavanoid_phenols : num 0.28 0.26 0.3 0.24 0.39 0.34 0.3 0.31 0.29 0.22 ...
## $ Proanthocyanins : num 2.29 1.28 2.81 2.18 1.82 1.97 1.98 1.25 1.98 1.85 ...
## $ Color_intensity : num 5.64 4.38 5.68 7.8 4.32 6.75 5.25 5.05 5.2 7.22 ...
## $ Hue : num 1.04 1.05 1.03 0.86 1.04 1.05 1.02 1.06 1.08 1.01 ...
## $ X0D280_0D315_of_diluted_wines: num 3.92 3.4 3.17 3.45 2.93 2.85 3.58 3.58 2.85 3.55 ...
## $ Proline : int 1065 1050 1185 1480 735 1450 1290 1295 1045 1045 ...
## $ class : Factor w/ 3 levels "1","2","3": 1 1 1 1 1 1 1 1 1 1 ...
levels(data_wine$class)
## [1] "1" "2" "3"
2. Visualisasi data linieritas
ggplot(data_wine, aes(x = Alcohol, y = Color_intensity, color = class)) +
geom_point(size = 3, alpha = 0.7) +
geom_smooth(method = "lm", se = FALSE) +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'

3. MENGHANDLE OUTLER
3.1 fungsi deteksi outlier
detect_outliers_zscore <- function(df, threshold = 3) {
df_numeric <- df %>% select(where(is.numeric))
if ("class_num" %in% colnames(df_numeric)) {
df_numeric <- df_numeric %>% select(-class_num)
}
z_scores <- scale(df_numeric)
outlier_flags <- abs(z_scores) > threshold
outlier_summary <- colSums(outlier_flags, na.rm = TRUE)
return(outlier_summary)
}
3.2 hitung/ketahui outlier sebelum penanganan
outliers_before <- detect_outliers_zscore(data_wine)
print(outliers_before)
## Alcohol Malicacid
## 0 1
## Ash Alcalinity_of_ash
## 3 1
## Magnesium Total_phenols
## 2 0
## Flavanoids Nonflavanoid_phenols
## 1 0
## Proanthocyanins Color_intensity
## 1 1
## Hue X0D280_0D315_of_diluted_wines
## 1 0
## Proline
## 0
3.3 clipping outlier
Fungsi clipping
clip_outliers <- function(x, lower_quantile = 0.01, upper_quantile = 0.99) {
lower <- quantile(x, lower_quantile)
upper <- quantile(x, upper_quantile)
x[x < lower] <- lower
x[x > upper] <- upper
return(x)
}
3.4 Fitur yang perlu di-clipping
features_to_clip <- c(
"Malicacid", "Ash", "Alcalinity_of_ash",
"Magnesium", "Flavanoids", "Proanthocyanins",
"Color_intensity", "Hue"
)
data_wine[features_to_clip] <- data_wine[features_to_clip] %>%
mutate(across(everything(), clip_outliers))
3.5. HITUNG OUTLIER SETELAH PENANGANAN
outliers_after <- detect_outliers_zscore(data_wine)
print(outliers_after)
## Alcohol Malicacid
## 0 0
## Ash Alcalinity_of_ash
## 0 0
## Magnesium Total_phenols
## 2 0
## Flavanoids Nonflavanoid_phenols
## 0 0
## Proanthocyanins Color_intensity
## 0 0
## Hue X0D280_0D315_of_diluted_wines
## 0 0
## Proline
## 0
3.6 TABEL PERBANDINGAN BEFORE vs AFTER
outliers_comparison <- data.frame(
Feature = names(outliers_before),
Outliers_Before = as.integer(outliers_before),
Outliers_After = as.integer(outliers_after[names(outliers_before)])
)
3.7 Tampilkan tabel
outliers_comparison %>%
arrange(desc(Outliers_Before)) %>%
kable("html") %>%
kable_styling(full_width = FALSE, bootstrap_options = c("striped", "hover", "condensed"))
|
Feature
|
Outliers_Before
|
Outliers_After
|
|
Ash
|
3
|
0
|
|
Magnesium
|
2
|
2
|
|
Malicacid
|
1
|
0
|
|
Alcalinity_of_ash
|
1
|
0
|
|
Flavanoids
|
1
|
0
|
|
Proanthocyanins
|
1
|
0
|
|
Color_intensity
|
1
|
0
|
|
Hue
|
1
|
0
|
|
Alcohol
|
0
|
0
|
|
Total_phenols
|
0
|
0
|
|
Nonflavanoid_phenols
|
0
|
0
|
|
X0D280_0D315_of_diluted_wines
|
0
|
0
|
|
Proline
|
0
|
0
|
3.8 VISUALISASI BOXPLOT SEBELUM DAN SESUDAH
Data untuk visualisasi sebelum
data_wine_before <- read.csv("C:/Users/lenovo/OneDrive/Documents/SEMESTER 4/Analisis Multivariat/P11/Terbaru/whine.csv", header = TRUE, check.names = TRUE)
data_wine_before$class <- factor(data_wine_before$class)
data_wine_before <- na.omit(data_wine_before)
Buat 2 boxplot
par(mfrow = c(1, 2)) # 2 plot sejajar
Boxplot sebelum atasi outlier
data_wine_before %>%
select(-class) %>%
pivot_longer(cols = everything(), names_to = "Fitur", values_to = "Nilai") %>%
ggplot(aes(x = Fitur, y = Nilai)) +
geom_boxplot(outlier.color = "red", outlier.size = 1) +
coord_flip() +
theme_minimal() +
labs(title = "Boxplot Sebelum Clipping", x = "Fitur", y = "Nilai")

Boxplot sesudah atasi outlier
data_wine %>%
select(-class) %>%
pivot_longer(cols = everything(), names_to = "Fitur", values_to = "Nilai") %>%
ggplot(aes(x = Fitur, y = Nilai)) +
geom_boxplot(outlier.color = "red", outlier.size = 1) +
coord_flip() +
theme_minimal() +
labs(title = "Boxplot Setelah Clipping", x = "Fitur", y = "Nilai")

par(mfrow = c(1, 1)) # Reset plot
4. CEK KORELASI UNTUK MENGETAHUI KORELASI YANG TINGGI
cor_matrix <- cor(select(data_wine, -class))
corrplot::corrplot(cor_matrix, method = "color", type = "lower", tl.cex = 1.0)

high_cor_pairs <- which(abs(cor_matrix) > 0.80 & abs(cor_matrix) < 1, arr.ind = TRUE)
if (nrow(high_cor_pairs) > 0) {
apply(high_cor_pairs, 1, function(idx) {
cat(rownames(cor_matrix)[idx[1]], "-", colnames(cor_matrix)[idx[2]],
": ", cor_matrix[idx[1], idx[2]], "\n")
})
} else {
cat("Tidak ada korelasi antar fitur > 0.80\n")
}
## Flavanoids - Total_phenols : 0.8692106
## Total_phenols - Flavanoids : 0.8692106
## NULL
Setelah mengetahui korelasi yang tinggi, cek multikolinearitas pakai
vif
data_wine$class_num <- as.numeric(data_wine$class)
fit_lm <- lm(class_num ~ . -class, data = data_wine)
vif_values <- car::vif(fit_lm)
print(vif_values)
## Alcohol Malicacid
## 2.432460 1.658511
## Ash Alcalinity_of_ash
## 2.072191 2.212071
## Magnesium Total_phenols
## 1.431608 4.456763
## Flavanoids Nonflavanoid_phenols
## 7.870950 1.802629
## Proanthocyanins Color_intensity
## 2.024319 3.029858
## Hue X0D280_0D315_of_diluted_wines
## 2.633835 3.735419
## Proline
## 2.870371
selected_features <- names(vif_values)[vif_values < 5]
print(selected_features)
## [1] "Alcohol" "Malicacid"
## [3] "Ash" "Alcalinity_of_ash"
## [5] "Magnesium" "Total_phenols"
## [7] "Nonflavanoid_phenols" "Proanthocyanins"
## [9] "Color_intensity" "Hue"
## [11] "X0D280_0D315_of_diluted_wines" "Proline"
===MULTINOMIAL LOGISTIC REGRESSION ===
DATA FINAL UNTUK MODELING
data_wine_selected <- data_wine %>%
select(all_of(selected_features), class)
str(data_wine_selected)
## 'data.frame': 178 obs. of 13 variables:
## $ Alcohol : num 14.2 13.2 13.2 14.4 13.2 ...
## $ Malicacid : num 1.71 1.78 2.36 1.95 2.59 1.76 1.87 2.15 1.64 1.35 ...
## $ Ash : num 2.43 2.14 2.67 2.5 2.87 2.45 2.45 2.61 2.17 2.27 ...
## $ Alcalinity_of_ash : num 15.6 11.4 18.6 16.8 21 ...
## $ Magnesium : num 127 100 101 113 118 112 96 121 97 98 ...
## $ Total_phenols : num 2.8 2.65 2.8 3.85 2.8 3.27 2.5 2.6 2.8 2.98 ...
## $ Nonflavanoid_phenols : num 0.28 0.26 0.3 0.24 0.39 0.34 0.3 0.31 0.29 0.22 ...
## $ Proanthocyanins : num 2.29 1.28 2.81 2.18 1.82 1.97 1.98 1.25 1.98 1.85 ...
## $ Color_intensity : num 5.64 4.38 5.68 7.8 4.32 6.75 5.25 5.05 5.2 7.22 ...
## $ Hue : num 1.04 1.05 1.03 0.86 1.04 1.05 1.02 1.06 1.08 1.01 ...
## $ X0D280_0D315_of_diluted_wines: num 3.92 3.4 3.17 3.45 2.93 2.85 3.58 3.58 2.85 3.55 ...
## $ Proline : int 1065 1050 1185 1480 735 1450 1290 1295 1045 1045 ...
## $ class : Factor w/ 3 levels "1","2","3": 1 1 1 1 1 1 1 1 1 1 ...
FIT MULTINOMIAL LOGISTIC REGRESSION
fit_selected <- multinom(class ~ ., data = data_wine_selected)
## # weights: 42 (26 variable)
## initial value 195.552987
## iter 10 value 26.998609
## iter 20 value 5.072752
## iter 30 value 0.017587
## iter 40 value 0.000256
## final value 0.000000
## converged
INTERPRETASI Relative Log Odds
Summary log odds interpret
summary(fit_selected)
## Call:
## multinom(formula = class ~ ., data = data_wine_selected)
##
## Coefficients:
## (Intercept) Alcohol Malicacid Ash Alcalinity_of_ash Magnesium
## 2 680.19285 -10.68665 -2.608825 -239.9261 13.69161 -0.6113847
## 3 -89.07054 39.54851 15.671327 -66.6167 28.70761 -0.8301563
## Total_phenols Nonflavanoid_phenols Proanthocyanins Color_intensity Hue
## 2 35.03611 127.8892 28.31754 -36.424817 359.4479
## 3 138.77755 -279.5729 -70.62997 1.922861 -338.0214
## X0D280_0D315_of_diluted_wines Proline
## 2 -58.2278 -0.4928365
## 3 -191.2860 -0.1674691
##
## Std. Errors:
## (Intercept) Alcohol Malicacid Ash Alcalinity_of_ash Magnesium
## 2 153.10390 597.3898 236.55809 546.93910 8395.5789 19498.329
## 3 10.02764 150.1122 63.80025 35.43367 208.6994 848.367
## Total_phenols Nonflavanoid_phenols Proanthocyanins Color_intensity Hue
## 2 1288.04107 138.64895 261.53165 6.328517 43.023260
## 3 23.15804 13.59585 11.50667 74.163380 3.050656
## X0D280_0D315_of_diluted_wines Proline
## 2 819.895229 2233.9251
## 3 6.063528 157.9235
##
## Residual Deviance: 3.531451e-08
## AIC: 52
Atau bisa menggunakan keble biar lebih rapi
tidy(fit_selected, conf.int = TRUE) %>%
kable() %>%
kable_styling("basic", full_width = FALSE)
|
y.level
|
term
|
estimate
|
std.error
|
statistic
|
p.value
|
conf.low
|
conf.high
|
|
2
|
(Intercept)
|
680.1928517
|
153.103902
|
4.4426879
|
0.0000089
|
380.11472
|
980.270985
|
|
2
|
Alcohol
|
-10.6866491
|
597.389809
|
-0.0178889
|
0.9857275
|
-1181.54916
|
1160.175861
|
|
2
|
Malicacid
|
-2.6088247
|
236.558090
|
-0.0110283
|
0.9912009
|
-466.25416
|
461.036511
|
|
2
|
Ash
|
-239.9261364
|
546.939102
|
-0.4386707
|
0.6609002
|
-1311.90708
|
832.054805
|
|
2
|
Alcalinity_of_ash
|
13.6916107
|
8395.578858
|
0.0016308
|
0.9986988
|
-16441.34058
|
16468.723802
|
|
2
|
Magnesium
|
-0.6113847
|
19498.328716
|
-0.0000314
|
0.9999750
|
-38216.63343
|
38215.410657
|
|
2
|
Total_phenols
|
35.0361072
|
1288.041069
|
0.0272011
|
0.9782994
|
-2489.47800
|
2559.550212
|
|
2
|
Nonflavanoid_phenols
|
127.8892076
|
138.648949
|
0.9223958
|
0.3563222
|
-143.85774
|
399.636155
|
|
2
|
Proanthocyanins
|
28.3175355
|
261.531646
|
0.1082758
|
0.9137770
|
-484.27507
|
540.910142
|
|
2
|
Color_intensity
|
-36.4248167
|
6.328517
|
-5.7556639
|
0.0000000
|
-48.82848
|
-24.021151
|
|
2
|
Hue
|
359.4479054
|
43.023260
|
8.3547342
|
0.0000000
|
275.12386
|
443.771946
|
|
2
|
X0D280_0D315_of_diluted_wines
|
-58.2278044
|
819.895229
|
-0.0710186
|
0.9433830
|
-1665.19292
|
1548.737316
|
|
2
|
Proline
|
-0.4928365
|
2233.925103
|
-0.0002206
|
0.9998240
|
-4378.90558
|
4377.919909
|
|
3
|
(Intercept)
|
-89.0705387
|
10.027641
|
-8.8825020
|
0.0000000
|
-108.72435
|
-69.416724
|
|
3
|
Alcohol
|
39.5485128
|
150.112227
|
0.2634596
|
0.7921963
|
-254.66605
|
333.763071
|
|
3
|
Malicacid
|
15.6713267
|
63.800253
|
0.2456311
|
0.8059678
|
-109.37487
|
140.717524
|
|
3
|
Ash
|
-66.6166955
|
35.433673
|
-1.8800392
|
0.0601027
|
-136.06542
|
2.832028
|
|
3
|
Alcalinity_of_ash
|
28.7076097
|
208.699404
|
0.1375548
|
0.8905923
|
-380.33571
|
437.750926
|
|
3
|
Magnesium
|
-0.8301563
|
848.367025
|
-0.0009785
|
0.9992192
|
-1663.59897
|
1661.938659
|
|
3
|
Total_phenols
|
138.7775472
|
23.158038
|
5.9926298
|
0.0000000
|
93.38863
|
184.166467
|
|
3
|
Nonflavanoid_phenols
|
-279.5728528
|
13.595846
|
-20.5631086
|
0.0000000
|
-306.22022
|
-252.925485
|
|
3
|
Proanthocyanins
|
-70.6299744
|
11.506666
|
-6.1381792
|
0.0000000
|
-93.18262
|
-48.077325
|
|
3
|
Color_intensity
|
1.9228610
|
74.163380
|
0.0259274
|
0.9793153
|
-143.43469
|
147.280415
|
|
3
|
Hue
|
-338.0213883
|
3.050656
|
-110.8028671
|
0.0000000
|
-344.00056
|
-332.042213
|
|
3
|
X0D280_0D315_of_diluted_wines
|
-191.2859990
|
6.063528
|
-31.5469814
|
0.0000000
|
-203.17030
|
-179.401703
|
|
3
|
Proline
|
-0.1674691
|
157.923517
|
-0.0010604
|
0.9991539
|
-309.69187
|
309.356936
|
HASIL INTERPRETASI RELATIVE LOG ODDS
Relative Log Odds ini mengintrepresentasikan nilai estimate sama
p-value. Setiap kenaikan 1 unit pada fitur berikut memberikan perubahan
log odds terhadap pilihan kelas dibandingkan dengan Class 1
—perbandingan class 2 vs class 1—
Berdasarkan hasil estimasi model regresi logistik multinomial,
beberapa fitur seperti Alcalinity_of_ash, Magnesium, Total_phenols,
Nonflavanoid_phenols, Proanthocyanins, dan Hue memiliki nilai koefisien
(estimate) positif, yang mengindikasikan bahwa peningkatan nilai pada
fitur-fitur tersebut cenderung meningkatkan kemungkinan suatu observasi
diklasifikasikan ke dalam kelas 2 dibandingkan kelas 1. Sebaliknya,
fitur dengan nilai koefisien negatif menunjukkan kecenderungan untuk
menurunkan peluang masuk ke dalam kelas 2.
Di antara semua fitur tersebut, Hue dan Color_intensity merupakan
variabel yang signifikan secara statistik, ditunjukkan oleh nilai
p-value sebesar 0.000, yang jauh lebih kecil dari ambang signifikansi
0.05. Hal ini mengimplikasikan bahwa Hue memiliki pengaruh yang kuat dan
signifikan dalam membedakan antara kelas 2 dan kelas 1
—Perbandingan Class 3 vs Class 1—
Berdasarkan hasil estimasi model regresi logistik multinomial untuk
perbandingan antara kelas 3 dan kelas 1, ditemukan bahwa beberapa fitur
seperti Alkohol, Malicacid, Alcalinity_of_ash, Total_phenols,
Color_intensity memiliki koefisien positif. Ini menunjukkan bahwa
peningkatan nilai pada fitur-fitur tersebut cenderung meningkatkan
kemungkinan suatu observasi diklasifikasikan ke dalam kelas 3
dibandingkan kelas 1. Sebaliknya, fitur dengan nilai koefisien negatif,
yang mengindikasikan adanya penurunan kemungkinan masuk ke dalam kelas 3
seiring peningkatan nilai fitur tersebut.
———————————————-
INTERPRETAASI Relative Risk Ration (RRR)
exp(coef(fit_selected))
## (Intercept) Alcohol Malicacid Ash Alcalinity_of_ash
## 2 2.535141e+295 2.284795e-05 7.362102e-02 6.329988e-105 8.834683e+05
## 3 2.075662e-39 1.498652e+17 6.396917e+06 1.171485e-29 2.934651e+12
## Magnesium Total_phenols Nonflavanoid_phenols Proanthocyanins Color_intensity
## 2 0.5425990 1.644326e+15 3.479983e+55 1.986778e+12 1.516712e-16
## 3 0.4359811 1.863472e+60 3.828712e-122 2.117346e-31 6.840501e+00
## Hue X0D280_0D315_of_diluted_wines Proline
## 2 1.27715e+156 5.152119e-26 0.6108911
## 3 1.58189e-147 8.424540e-84 0.8458027
Atau bisa menggunakan kable untuk RRR
tidy(fit_selected, conf.int = TRUE, exponentiate = TRUE) %>%
kable() %>%
kable_styling("basic", full_width = FALSE)
|
y.level
|
term
|
estimate
|
std.error
|
statistic
|
p.value
|
conf.low
|
conf.high
|
|
2
|
(Intercept)
|
2.535141e+295
|
153.103902
|
4.4426879
|
0.0000089
|
1.207048e+165
|
Inf
|
|
2
|
Alcohol
|
2.280000e-05
|
597.389809
|
-0.0178889
|
0.9857275
|
0.000000e+00
|
Inf
|
|
2
|
Malicacid
|
7.362100e-02
|
236.558090
|
-0.0110283
|
0.9912009
|
0.000000e+00
|
1.681174e+200
|
|
2
|
Ash
|
0.000000e+00
|
546.939102
|
-0.4386707
|
0.6609002
|
0.000000e+00
|
Inf
|
|
2
|
Alcalinity_of_ash
|
8.834683e+05
|
8395.578858
|
0.0016308
|
0.9986988
|
0.000000e+00
|
Inf
|
|
2
|
Magnesium
|
5.425990e-01
|
19498.328716
|
-0.0000314
|
0.9999750
|
0.000000e+00
|
Inf
|
|
2
|
Total_phenols
|
1.644326e+15
|
1288.041069
|
0.0272011
|
0.9782994
|
0.000000e+00
|
Inf
|
|
2
|
Nonflavanoid_phenols
|
3.479983e+55
|
138.648949
|
0.9223958
|
0.3563222
|
0.000000e+00
|
3.628914e+173
|
|
2
|
Proanthocyanins
|
1.986778e+12
|
261.531646
|
0.1082758
|
0.9137770
|
0.000000e+00
|
8.208993e+234
|
|
2
|
Color_intensity
|
0.000000e+00
|
6.328517
|
-5.7556639
|
0.0000000
|
0.000000e+00
|
0.000000e+00
|
|
2
|
Hue
|
1.277150e+156
|
43.023260
|
8.3547342
|
0.0000000
|
3.053348e+119
|
5.342042e+192
|
|
2
|
X0D280_0D315_of_diluted_wines
|
0.000000e+00
|
819.895229
|
-0.0710186
|
0.9433830
|
0.000000e+00
|
Inf
|
|
2
|
Proline
|
6.108911e-01
|
2233.925103
|
-0.0002206
|
0.9998240
|
0.000000e+00
|
Inf
|
|
3
|
(Intercept)
|
0.000000e+00
|
10.027641
|
-8.8825020
|
0.0000000
|
0.000000e+00
|
0.000000e+00
|
|
3
|
Alcohol
|
1.498652e+17
|
150.112227
|
0.2634596
|
0.7921963
|
0.000000e+00
|
8.942522e+144
|
|
3
|
Malicacid
|
6.396917e+06
|
63.800253
|
0.2456311
|
0.8059678
|
0.000000e+00
|
1.296714e+61
|
|
3
|
Ash
|
0.000000e+00
|
35.433673
|
-1.8800392
|
0.0601027
|
0.000000e+00
|
1.697985e+01
|
|
3
|
Alcalinity_of_ash
|
2.934651e+12
|
208.699404
|
0.1375548
|
0.8905923
|
0.000000e+00
|
1.296616e+190
|
|
3
|
Magnesium
|
4.359811e-01
|
848.367025
|
-0.0009785
|
0.9992192
|
0.000000e+00
|
Inf
|
|
3
|
Total_phenols
|
1.863472e+60
|
23.158038
|
5.9926298
|
0.0000000
|
3.615476e+40
|
9.604627e+79
|
|
3
|
Nonflavanoid_phenols
|
0.000000e+00
|
13.595846
|
-20.5631086
|
0.0000000
|
0.000000e+00
|
0.000000e+00
|
|
3
|
Proanthocyanins
|
0.000000e+00
|
11.506666
|
-6.1381792
|
0.0000000
|
0.000000e+00
|
0.000000e+00
|
|
3
|
Color_intensity
|
6.840501e+00
|
74.163380
|
0.0259274
|
0.9793153
|
0.000000e+00
|
9.184841e+63
|
|
3
|
Hue
|
0.000000e+00
|
3.050656
|
-110.8028671
|
0.0000000
|
0.000000e+00
|
0.000000e+00
|
|
3
|
X0D280_0D315_of_diluted_wines
|
0.000000e+00
|
6.063528
|
-31.5469814
|
0.0000000
|
0.000000e+00
|
0.000000e+00
|
|
3
|
Proline
|
8.458027e-01
|
157.923517
|
-0.0010604
|
0.9991539
|
0.000000e+00
|
2.249106e+134
|
HASIL INTERPRETASI Relative Risk Ration (RRR)
Hasil eksponensial dari koefisien menunjukkan perubahan rasio
peluang memilih kelas dibandingkan dengan Class 1
—Perbandingan Class 2 vs Class 1—
Sebagian besar fitur tidak berpengaruh signifikan dalam membedakan
Class 2 dari Class 1 (p-value > 0.05). Namun, terdapat dua fitur yang
signifikan:
1. Hue: Secara signifikan meningkatkan risiko memilih Class 2 (p
< 0.001).
Fitur lainnya seperti Alcohol, Malicacid, Ash, Magnesium, dll. tidak
berkontribusi secara signifikan terhadap klasifikasi Class 2.
—Perbandingan Class 3 vs Class 1—
1. Total Phenols: Secara signifikan meningkatkan risiko memilih
Class 3.
Fitur Ash hampir signifikan (p = 0.060), sementara fitur lainnya
seperti Alcohol, Malicacid, Magnesium, dll. tidak signifikan memberikan
pengaruh yang berarti secara statistik.
—————————-
PLOT MARGINAL EFFECTS
plot_marginal_effect <- function(fit_model, data_selected, variable_name, n_points = 100) {
variable_range <- seq(
from = min(data_selected[[variable_name]]),
to = max(data_selected[[variable_name]]),
length.out = n_points
)
newdata <- data_selected %>%
select(-class) %>%
summarise(across(everything(), mean)) %>%
slice(rep(1, n_points))
newdata[[variable_name]] <- variable_range
pred_prob <- predict(fit_model, newdata = newdata, type = "probs")
plot_data <- cbind(Variable = variable_range, as.data.frame(pred_prob))
plot_data_long <- pivot_longer(plot_data, cols = -Variable, names_to = "Class", values_to = "Probability")
ggplot(plot_data_long, aes(x = Variable, y = Probability, color = Class)) +
geom_line(size = 1) +
theme_minimal() +
labs(
title = paste("Marginal Effect of", variable_name, "on Class Probabilities"),
x = variable_name,
y = "Predicted Probability",
color = "Class"
) +
theme(
plot.title = element_text(hjust = 0.5, face = "bold"),
axis.title = element_text(face = "bold")
)
}
plot_marginal_effect(fit_model = fit_selected, data_selected = data_wine_selected, variable_name = "Alcohol")
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

—Hasil Interpretasi Plot Marginal Effect—
effects Alcohol terhadap probabilitas masing-masing kelas:
Class 1 (Merah):
- Probabilitas memilih Class 1 meningkat sangat tajam mulai sekitar
Alcohol 11.5–12.5, dan hampir pasti (probabilitas mendekati 1) untuk
nilai Alcohol 12.5–14.
- Namun, setelah Alcohol > 14, probabilitas memilih Class 1 turun
sangat drastis hingga mendekati nol.
Class 2 (hijau)
- Probabilitas memilih Class 2 sangat tinggi saat Alcohol berada di
bawah 11.5.
Class 3 (biru)
- Probabilitas memilih Class 3 tetap hampir nol hingga Alcohol
sekitar 14.
- Setelah Alcohol > 14, probabilitas memilih Class 3 meningkat
sangat tajam, menjadi hampir pasti (probabilitas mendekati 1) untuk
nilai Alcohol lebih dari 14.2.
INTERPRETASI Average Marginal Effect
mfx_Alcohol <- avg_comparisons(fit_selected, variables = "Alcohol", type = "probs")
print(mfx_Alcohol)
##
## Group Estimate Std. Error z Pr(>|z|) S 2.5 % 97.5 %
## 1 -0.00551 19.5 -2.82e-04 1 0.0 -38.3 38.3
## 2 -0.01739 399.9 -4.35e-05 1 0.0 -783.8 783.8
## 3 0.02290 382.5 5.99e-05 1 0.0 -749.7 749.7
##
## Term: Alcohol
## Type: probs
## Comparison: +1
Hasil interpretasi dari output Average Marginal Effect (AME) untuk
variabel Alcohol pada model multinomial logistik:
pada class 3 di ketahu nilai estimasi 0.02290 dimana Sebaliknya,
kenaikan 1 unit Alcohol meningkatkan probabilitas memilih Class 3
sebesar 0.0229 (sekitar 2.29%).
PREDIKSI DAN AKURASI
pred_selected <- predict(fit_selected, newdata = data_wine_selected)
accuracy_selected <- mean(pred_selected == data_wine$class)
cat("Akurasi model:", accuracy_selected, "\n")
## Akurasi model: 1