library(readr)
library(dplyr)
library(knitr)
library(kableExtra)
library(glmnet)
library(corrplot)
library(caret)
library(tidyr)
set.seed(123)
# 1. Load Data
data_wine <- read_delim("D:/Semester 2/Machine Learning/winequality-red.csv",
delim = ";", show_col_types = FALSE)
# --- TABEL A: DIMENSI DATA ---
dimensi_df <- data.frame(
Metric = c("Jumlah Baris (Observasi)", "Jumlah Kolom (Variabel)"),
Value = c(nrow(data_wine), ncol(data_wine))
)
kable(dimensi_df, caption = "Tabel A: Dimensi Dataset") %>%
kable_styling(bootstrap_options = c("striped", "hover"), full_width = F)
Tabel A: Dimensi Dataset
|
Metric
|
Value
|
|
Jumlah Baris (Observasi)
|
1469
|
|
Jumlah Kolom (Variabel)
|
12
|
# --- TABEL B: CEK MISSING VALUE ---
missing_df <- data.frame(
Variabel = names(data_wine),
Jumlah_Missing = colSums(is.na(data_wine))
)
kable(missing_df, caption = "Tabel B: Pengecekan Missing Value") %>%
kable_styling(bootstrap_options = "condensed", full_width = F)
Tabel B: Pengecekan Missing Value
|
|
Variabel
|
Jumlah_Missing
|
|
fixed acidity
|
fixed acidity
|
0
|
|
volatile acidity
|
volatile acidity
|
0
|
|
citric acid
|
citric acid
|
0
|
|
residual sugar
|
residual sugar
|
0
|
|
chlorides
|
chlorides
|
0
|
|
free sulfur dioxide
|
free sulfur dioxide
|
0
|
|
total sulfur dioxide
|
total sulfur dioxide
|
0
|
|
density
|
density
|
0
|
|
pH
|
pH
|
0
|
|
sulphates
|
sulphates
|
0
|
|
alcohol
|
alcohol
|
0
|
|
quality
|
quality
|
0
|
# --- TABEL C: RINGKASAN STATISTIK DESKRIPTIF ---
summary_table <- data_wine %>%
summarise(across(everything(), list(
Min = ~min(.),
Mean = ~round(mean(.), 2),
Max = ~max(.),
SD = ~round(sd(.), 2)
))) %>%
pivot_longer(everything(), names_to = c("Variabel", ".value"), names_sep = "_")
kable(summary_table, caption = "Tabel C: Statistik Deskriptif Fitur") %>%
kable_styling(bootstrap_options = c("striped", "hover"), full_width = F)
Tabel C: Statistik Deskriptif Fitur
|
Variabel
|
Min
|
Mean
|
Max
|
SD
|
|
fixed acidity
|
4.60000
|
8.44
|
15.90000
|
1.75
|
|
volatile acidity
|
0.12000
|
0.51
|
1.24000
|
0.17
|
|
citric acid
|
0.00000
|
0.29
|
1.00000
|
0.19
|
|
residual sugar
|
0.90000
|
2.57
|
15.50000
|
1.45
|
|
chlorides
|
0.01200
|
0.09
|
0.61100
|
0.05
|
|
free sulfur dioxide
|
1.00000
|
15.93
|
72.00000
|
10.43
|
|
total sulfur dioxide
|
6.00000
|
47.74
|
289.00000
|
33.57
|
|
density
|
0.99007
|
1.00
|
1.00369
|
0.00
|
|
pH
|
2.74000
|
3.30
|
3.90000
|
0.15
|
|
sulphates
|
0.33000
|
0.66
|
2.00000
|
0.17
|
|
alcohol
|
8.40000
|
10.41
|
14.90000
|
1.05
|
|
quality
|
3.00000
|
5.66
|
8.00000
|
0.80
|
# --- VISUALISASI MATRIKS KORELASI ---
M <- cor(data_wine)
corrplot(M, method = "color", type = "upper", order = "hclust",
addCoef.col = "black", tl.col = "black", tl.srt = 45,
diag = FALSE, title = "\n Gambar 1: Heatmap Korelasi", mar = c(0,0,1,0))

# 2. Persiapan Model (x dan y)
y <- data_wine$quality
x <- as.matrix(data_wine[, -which(names(data_wine) == "quality")])
# Split Data (80% Train, 20% Test)
trainIndex <- createDataPartition(y, p = 0.8, list = FALSE)
x_train <- x[trainIndex, ]
x_test <- x[-trainIndex, ]
y_train <- y[trainIndex]
y_test <- y[-trainIndex]
# 3. Training Model & Cross-Validation
cv_ridge <- cv.glmnet(x_train, y_train, alpha = 0)
cv_lasso <- cv.glmnet(x_train, y_train, alpha = 1)
cv_en <- cv.glmnet(x_train, y_train, alpha = 0.5)
# --- PLOT CROSS-VALIDATION (Poin 6) ---
cat("### Gambar 2: Plot Cross-Validation untuk Penentuan Lambda Terbaik")
## ### Gambar 2: Plot Cross-Validation untuk Penentuan Lambda Terbaik
par(mfrow = c(1, 3))
plot(cv_ridge, main = "Ridge (Alpha 0)")
plot(cv_lasso, main = "Lasso (Alpha 1)")
plot(cv_en, main = "Elastic Net (0.5)")

par(mfrow = c(1, 1))
# 4. Fungsi Evaluasi
hitung_evaluasi <- function(model, best_lambda, x_baru, y_asli, nama_model) {
prediksi <- predict(model, s = best_lambda, newx = x_baru)
return(data.frame(Model = nama_model,
Lambda_Best = round(best_lambda, 5),
RMSE = round(RMSE(prediksi, y_asli), 4),
MAE = round(MAE(prediksi, y_asli), 4)))
}
# --- TABEL D: PERBANDINGAN METRIK EVALUASI (Poin 7 & 8) ---
tabel_eval <- rbind(
hitung_evaluasi(cv_ridge, cv_ridge$lambda.min, x_test, y_test, "Ridge"),
hitung_evaluasi(cv_lasso, cv_lasso$lambda.min, x_test, y_test, "Lasso"),
hitung_evaluasi(cv_en, cv_en$lambda.min, x_test, y_test, "Elastic Net")
)
kable(tabel_eval, caption = "Tabel D: Perbandingan Metrik Evaluasi & Lambda") %>%
kable_styling(bootstrap_options = c("striped", "bordered"), full_width = F)
Tabel D: Perbandingan Metrik Evaluasi & Lambda
|
Model
|
Lambda_Best
|
RMSE
|
MAE
|
|
Ridge
|
0.04027
|
0.6555
|
0.5092
|
|
Lasso
|
0.00558
|
0.6578
|
0.5107
|
|
Elastic Net
|
0.00844
|
0.6578
|
0.5107
|
# --- TABEL E: KOEFISIEN AKHIR TIAP MODEL ---
tabel_koef <- data.frame(
Fitur = row.names(coef(cv_ridge)),
Ridge = round(as.vector(coef(cv_ridge, s = "lambda.min")), 4),
Lasso = round(as.vector(coef(cv_lasso, s = "lambda.min")), 4),
ElasticNet = round(as.vector(coef(cv_en, s = "lambda.min")), 4)
)
kable(tabel_koef, caption = "Tabel E: Perbandingan Koefisien Akhir") %>%
kable_styling(bootstrap_options = "hover", full_width = F)
Tabel E: Perbandingan Koefisien Akhir
|
Fitur
|
Ridge
|
Lasso
|
ElasticNet
|
|
(Intercept)
|
26.0095
|
3.9312
|
3.9908
|
|
fixed acidity
|
0.0240
|
0.0000
|
0.0005
|
|
volatile acidity
|
-0.9747
|
-0.9939
|
-1.0012
|
|
citric acid
|
-0.1356
|
-0.0976
|
-0.1178
|
|
residual sugar
|
0.0421
|
0.0308
|
0.0319
|
|
chlorides
|
-1.7447
|
-1.7223
|
-1.7512
|
|
free sulfur dioxide
|
0.0029
|
0.0023
|
0.0026
|
|
total sulfur dioxide
|
-0.0030
|
-0.0028
|
-0.0029
|
|
density
|
-22.3933
|
0.0000
|
0.0000
|
|
pH
|
-0.3129
|
-0.4010
|
-0.4174
|
|
sulphates
|
0.8763
|
0.8471
|
0.8541
|
|
alcohol
|
0.2784
|
0.3067
|
0.3063
|
# --- TABEL F: SELEKSI FITUR OLEH LASSO ---
fitur_terpilih <- tabel_koef %>%
filter(Lasso != 0 & Fitur != "(Intercept)") %>%
select(Fitur, Lasso)
kable(fitur_terpilih, caption = "Tabel F: Fitur yang Berhasil Diseleksi Lasso") %>%
kable_styling(bootstrap_options = "striped", full_width = F) %>%
column_spec(2, bold = T, color = "white", background = "forestgreen")
Tabel F: Fitur yang Berhasil Diseleksi Lasso
|
Fitur
|
Lasso
|
|
volatile acidity
|
-0.9939
|
|
citric acid
|
-0.0976
|
|
residual sugar
|
0.0308
|
|
chlorides
|
-1.7223
|
|
free sulfur dioxide
|
0.0023
|
|
total sulfur dioxide
|
-0.0028
|
|
pH
|
-0.4010
|
|
sulphates
|
0.8471
|
|
alcohol
|
0.3067
|