1. Install dan Muat Library yang Dibutuhkan

# install.packages(c("tidyverse", "caret", "nnet", "MASS", "corrplot", "rsample", "ggplot2", "ggthemes", "rcompanion"))
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.4     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(caret)
## Loading required package: lattice
## 
## Attaching package: 'caret'
## 
## The following object is masked from 'package:purrr':
## 
##     lift
library(nnet)
library(MASS)
## 
## Attaching package: 'MASS'
## 
## The following object is masked from 'package:dplyr':
## 
##     select
library(corrplot)
## corrplot 0.95 loaded
library(rsample)
library(ggplot2)
library(ggthemes)
library(rcompanion)

2. Import Dataset

car <- read.csv("/Users/naphoyy/Downloads/car_evaluation.csv")
wine <- read.csv("/Users/naphoyy/Downloads/wine_quality.csv")

3. Preprocessing

# Car Evaluation: ubah semua kolom ke faktor
car <- car %>% mutate(across(everything(), as.factor))

# Wine Quality: quality jadi faktor ordinal, fitur lainnya distandarisasi
wine$quality <- factor(wine$quality, ordered = TRUE)
wine_scaled <- wine %>% mutate(across(-quality, scale))

4. EDA

# Distribusi kelas car
ggplot(car, aes(x = class)) +
  geom_bar(fill = "steelblue") +
  theme_minimal() +
  ggtitle("Distribusi Kelas - Car Evaluation")

# Distribusi kualitas wine
ggplot(wine, aes(x = quality)) +
  geom_bar(fill = "tomato") +
  theme_minimal() +
  ggtitle("Distribusi Kualitas Wine")

5. Regresi Logistik Multinomial (Car Evaluation)

# Multinomial logistic regression
model_multinom <- multinom(class ~ ., data = car)
## # weights:  68 (48 variable)
## initial  value 2395.516656 
## iter  10 value 597.315128
## iter  20 value 467.090136
## iter  30 value 328.810480
## iter  40 value 244.612984
## iter  50 value 225.206007
## iter  60 value 224.159121
## iter  70 value 224.097943
## iter  80 value 224.087947
## iter  90 value 224.087532
## final  value 224.087527 
## converged
summary(model_multinom)
## Call:
## multinom(formula = class ~ ., data = car)
## 
## Coefficients:
##       (Intercept) buyinglow buyingmed buyingvhigh  maintlow  maintmed
## good    -71.97681 46.032637  40.47446    7.708094 43.623291 38.823630
## unacc    69.42043 -5.024328  -3.90714    2.059543 -3.389293 -3.395287
## vgood   -82.70614 61.469227  54.04258   -8.976376 16.063184 11.340997
##       maintvhigh    doors3    doors4 doors5more   persons4 personsmore
## good  -15.067163  2.476621  3.569842   3.569867  -4.444182   -4.390051
## unacc   2.815285 -1.850907 -2.474985  -2.474984 -70.956144  -70.575800
## vgood -40.387721  4.276113  7.592576   7.592565  20.785542   22.158446
##       lug_bootmed lug_bootsmall safetylow  safetymed
## good    -3.071061     -9.435452 -19.37140  -7.970149
## unacc    1.512567      4.434120  53.06646   2.994148
## vgood   -6.413096    -31.842156 -19.50151 -36.878507
## 
## Std. Errors:
##       (Intercept) buyinglow buyingmed  buyingvhigh  maintlow  maintmed
## good    0.7059506 0.8635442 0.3687517 6.484118e-06 0.8420711 0.3745084
## unacc   0.3136007 0.5702226 0.4854423 3.744752e-01 0.4711107 0.4696681
## vgood   0.5647000 0.6400854 0.8437218 2.290431e-18 2.7186363 1.9931284
##         maintvhigh    doors3    doors4 doors5more  persons4 personsmore
## good  2.901445e-12 0.9585491 1.0693807  1.0693825 0.4565157   0.4706371
## unacc 4.147939e-01 0.4040386 0.4278606  0.4278605 0.2116375   0.2069215
## vgood 7.945125e-11 1.3525023 1.5889352  1.5889321 0.5366096   0.4981646
##       lug_bootmed lug_bootsmall    safetylow    safetymed
## good    0.9983801     1.9612606 1.831934e-12 1.601625e+00
## unacc   0.3755744     0.4759407 4.034039e-12 3.584070e-01
## vgood   1.3740735   645.3302953 4.196119e-12 1.535448e-05
## 
## Residual Deviance: 448.1751 
## AIC: 544.1751
# Prediksi dan evaluasi
pred_multinom <- predict(model_multinom, newdata = car)
conf_matrix_multinom <- table(Predicted = pred_multinom, Actual = car$class)
conf_matrix_multinom
##          Actual
## Predicted  acc good unacc vgood
##     acc    346    6    43     2
##     good     4   59     1     0
##     unacc   32    0  1166     0
##     vgood    2    4     0    63
accuracy_multinom <- sum(diag(conf_matrix_multinom)) / sum(conf_matrix_multinom)
cat("Akurasi model multinomial:", round(accuracy_multinom * 100, 2), "%\n")
## Akurasi model multinomial: 94.56 %

6. Regresi Logistik Ordinal (Wine Quality)

# Ordinal logistic regression
model_polr <- polr(quality ~ ., data = wine_scaled, Hess = TRUE)
summary(model_polr)
## Call:
## polr(formula = quality ~ ., data = wine_scaled, Hess = TRUE)
## 
## Coefficients:
##                          Value Std. Error t value
## fixed_acidity         0.341203    0.12161  2.8056
## volatile_acidity     -0.666290    0.06795 -9.8050
## citric_acid          -0.058626    0.07484 -0.7834
## residual_sugar        0.217412    0.08475  2.5654
## chlorides            -0.212876    0.05773 -3.6872
## free_sulfur_dioxide   0.162780    0.07643  2.1299
## total_sulfur_dioxide -0.300035    0.08938 -3.3567
## density              -0.463729    0.12521 -3.7036
## pH                    0.008586    0.08063  0.1065
## sulphates             0.462379    0.05798  7.9754
## alcohol               0.743014    0.07877  9.4324
## 
## Intercepts:
##     Value    Std. Error t value 
## 3|4  -5.8018   0.2837   -20.4539
## 4|5  -3.8882   0.1239   -31.3781
## 5|6  -0.3986   0.0532    -7.4865
## 6|7   2.3513   0.0798    29.4530
## 7|8   5.1140   0.1940    26.3658
## 
## Residual Deviance: 4030.577 
## AIC: 4062.577
# Prediksi dan evaluasi
pred_polr <- predict(model_polr, newdata = wine_scaled)
conf_matrix_polr <- table(Predicted = pred_polr, Actual = wine_scaled$quality)
conf_matrix_polr
##          Actual
## Predicted   3   4   5   6   7   8
##         3   0   0   0   0   0   0
##         4   0   0   0   0   0   0
##         5  10  44 565 287  15   0
##         6   3  23 240 489 183  19
##         7   0   1   4  38  65  12
##         8   0   0   1   0   0   0
accuracy_polr <- sum(diag(conf_matrix_polr)) / sum(conf_matrix_polr)
cat("Akurasi model ordinal:", round(accuracy_polr * 100, 2), "%\n")
## Akurasi model ordinal: 55.98 %

7. Kesimpulan