Tugas 2 Machine Learning: Regresi Regularisasi

library(readr)
library(dplyr)
library(knitr)
library(kableExtra)
library(glmnet)
library(corrplot)
library(caret)
library(tidyr)

set.seed(123)

# 1. Load Data
data_wine <- read_delim("D:/Semester 2/Machine Learning/winequality-red.csv", 
                        delim = ";", show_col_types = FALSE)

# --- TABEL A: DIMENSI DATA ---
dimensi_df <- data.frame(
  Metric = c("Jumlah Baris (Observasi)", "Jumlah Kolom (Variabel)"),
  Value = c(nrow(data_wine), ncol(data_wine))
)

kable(dimensi_df, caption = "Tabel A: Dimensi Dataset") %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = F)

Tabel A: Dimensi Dataset
Metric	Value
Jumlah Baris (Observasi)	1469
Jumlah Kolom (Variabel)	12

# --- TABEL B: CEK MISSING VALUE ---
missing_df <- data.frame(
  Variabel = names(data_wine),
  Jumlah_Missing = colSums(is.na(data_wine))
)

kable(missing_df, caption = "Tabel B: Pengecekan Missing Value") %>%
  kable_styling(bootstrap_options = "condensed", full_width = F)

Tabel B: Pengecekan Missing Value
	Variabel	Jumlah_Missing
fixed acidity	fixed acidity	0
volatile acidity	volatile acidity	0
citric acid	citric acid	0
residual sugar	residual sugar	0
chlorides	chlorides	0
free sulfur dioxide	free sulfur dioxide	0
total sulfur dioxide	total sulfur dioxide	0
density	density	0
pH	pH	0
sulphates	sulphates	0
alcohol	alcohol	0
quality	quality	0

# --- TABEL C: RINGKASAN STATISTIK DESKRIPTIF ---
summary_table <- data_wine %>%
  summarise(across(everything(), list(
    Min = ~min(.),
    Mean = ~round(mean(.), 2),
    Max = ~max(.),
    SD = ~round(sd(.), 2)
  ))) %>%
  pivot_longer(everything(), names_to = c("Variabel", ".value"), names_sep = "_")

kable(summary_table, caption = "Tabel C: Statistik Deskriptif Fitur") %>%
  kable_styling(bootstrap_options = c("striped", "hover"), full_width = F)

Tabel C: Statistik Deskriptif Fitur
Variabel	Min	Mean	Max	SD
fixed acidity	4.60000	8.44	15.90000	1.75
volatile acidity	0.12000	0.51	1.24000	0.17
citric acid	0.00000	0.29	1.00000	0.19
residual sugar	0.90000	2.57	15.50000	1.45
chlorides	0.01200	0.09	0.61100	0.05
free sulfur dioxide	1.00000	15.93	72.00000	10.43
total sulfur dioxide	6.00000	47.74	289.00000	33.57
density	0.99007	1.00	1.00369	0.00
pH	2.74000	3.30	3.90000	0.15
sulphates	0.33000	0.66	2.00000	0.17
alcohol	8.40000	10.41	14.90000	1.05
quality	3.00000	5.66	8.00000	0.80

# --- VISUALISASI MATRIKS KORELASI ---
M <- cor(data_wine)
corrplot(M, method = "color", type = "upper", order = "hclust", 
         addCoef.col = "black", tl.col = "black", tl.srt = 45, 
         diag = FALSE, title = "\n Gambar 1: Heatmap Korelasi", mar = c(0,0,1,0))

# 2. Persiapan Model (x dan y)
y <- data_wine$quality
x <- as.matrix(data_wine[, -which(names(data_wine) == "quality")])

# Split Data (80% Train, 20% Test)
trainIndex <- createDataPartition(y, p = 0.8, list = FALSE)
x_train <- x[trainIndex, ]
x_test  <- x[-trainIndex, ]
y_train <- y[trainIndex]
y_test  <- y[-trainIndex]

# 3. Training Model & Cross-Validation
cv_ridge <- cv.glmnet(x_train, y_train, alpha = 0)
cv_lasso <- cv.glmnet(x_train, y_train, alpha = 1)
cv_en    <- cv.glmnet(x_train, y_train, alpha = 0.5)

# --- PLOT CROSS-VALIDATION (Poin 6) ---
cat("### Gambar 2: Plot Cross-Validation untuk Penentuan Lambda Terbaik")

## ### Gambar 2: Plot Cross-Validation untuk Penentuan Lambda Terbaik

par(mfrow = c(1, 3)) 
plot(cv_ridge, main = "Ridge (Alpha 0)")
plot(cv_lasso, main = "Lasso (Alpha 1)")
plot(cv_en,    main = "Elastic Net (0.5)")

par(mfrow = c(1, 1)) 

# 4. Fungsi Evaluasi
hitung_evaluasi <- function(model, best_lambda, x_baru, y_asli, nama_model) {
  prediksi <- predict(model, s = best_lambda, newx = x_baru)
  return(data.frame(Model = nama_model, 
                    Lambda_Best = round(best_lambda, 5),
                    RMSE = round(RMSE(prediksi, y_asli), 4), 
                    MAE = round(MAE(prediksi, y_asli), 4)))
}

# --- TABEL D: PERBANDINGAN METRIK EVALUASI (Poin 7 & 8) ---
tabel_eval <- rbind(
  hitung_evaluasi(cv_ridge, cv_ridge$lambda.min, x_test, y_test, "Ridge"),
  hitung_evaluasi(cv_lasso, cv_lasso$lambda.min, x_test, y_test, "Lasso"),
  hitung_evaluasi(cv_en,    cv_en$lambda.min,    x_test, y_test, "Elastic Net")
)

kable(tabel_eval, caption = "Tabel D: Perbandingan Metrik Evaluasi & Lambda") %>%
  kable_styling(bootstrap_options = c("striped", "bordered"), full_width = F)

Tabel D: Perbandingan Metrik Evaluasi & Lambda
Model	Lambda_Best	RMSE	MAE
Ridge	0.04027	0.6555	0.5092
Lasso	0.00558	0.6578	0.5107
Elastic Net	0.00844	0.6578	0.5107

# --- TABEL E: KOEFISIEN AKHIR TIAP MODEL ---
tabel_koef <- data.frame(
  Fitur = row.names(coef(cv_ridge)),
  Ridge = round(as.vector(coef(cv_ridge, s = "lambda.min")), 4),
  Lasso = round(as.vector(coef(cv_lasso, s = "lambda.min")), 4),
  ElasticNet = round(as.vector(coef(cv_en, s = "lambda.min")), 4)
)

kable(tabel_koef, caption = "Tabel E: Perbandingan Koefisien Akhir") %>%
  kable_styling(bootstrap_options = "hover", full_width = F)

Tabel E: Perbandingan Koefisien Akhir
Fitur	Ridge	Lasso	ElasticNet
(Intercept)	26.0095	3.9312	3.9908
fixed acidity	0.0240	0.0000	0.0005
volatile acidity	-0.9747	-0.9939	-1.0012
citric acid	-0.1356	-0.0976	-0.1178
residual sugar	0.0421	0.0308	0.0319
chlorides	-1.7447	-1.7223	-1.7512
free sulfur dioxide	0.0029	0.0023	0.0026
total sulfur dioxide	-0.0030	-0.0028	-0.0029
density	-22.3933	0.0000	0.0000
pH	-0.3129	-0.4010	-0.4174
sulphates	0.8763	0.8471	0.8541
alcohol	0.2784	0.3067	0.3063

# --- TABEL F: SELEKSI FITUR OLEH LASSO ---
fitur_terpilih <- tabel_koef %>%
  filter(Lasso != 0 & Fitur != "(Intercept)") %>%
  select(Fitur, Lasso)

kable(fitur_terpilih, caption = "Tabel F: Fitur yang Berhasil Diseleksi Lasso") %>%
  kable_styling(bootstrap_options = "striped", full_width = F) %>%
  column_spec(2, bold = T, color = "white", background = "forestgreen")

Tabel F: Fitur yang Berhasil Diseleksi Lasso
Fitur	Lasso
volatile acidity	-0.9939
citric acid	-0.0976
residual sugar	0.0308
chlorides	-1.7223
free sulfur dioxide	0.0023
total sulfur dioxide	-0.0028
pH	-0.4010
sulphates	0.8471
alcohol	0.3067

Tugas 2 Machine Learning: Regresi Regularisasi

Linda Apriliana

2026-03-24