# Load package
library(glmnet)
## Loading required package: Matrix
## Loaded glmnet 4.1-8
library(caret)
## Loading required package: ggplot2
## Loading required package: lattice

Data yang digunakan adalah data yang diambil dari dataset package mtcars data terdiri dari 32 mobil (pengamatan) dan 11 variabel. mpg = efisiensi bahan bakar (Mil per galon) tipe numerik –> y hp = tenaga mesin (horsepower) tipe numerik –> x wt = berat mobil dalam 1000 pon tipe numerik –>x disp = cc mesin (displacement) tipe numerik –>x drat = rasio gigi akhir (rear axle ratio) tipe numerik –>x qsec = waktu akselerasi 1/4 mil dalam detik tipe numerik –> x

 #Gunakan dataset mtcars
data(mtcars)

# Pilih variabel prediktor dan respon
X <- as.matrix(mtcars[, c("hp", "wt", "disp", "drat", "qsec")])  # Predictor variables
y <- mtcars$mpg  # Response variable

membagi data

# Bagi data menjadi training (70%) dan testing (30%)
set.seed(123)  
trainIndex <- createDataPartition(y, p = 0.7, list = FALSE)
X_train <- X[trainIndex, ]
X_test  <- X[-trainIndex, ]
y_train <- y[trainIndex]
y_test  <- y[-trainIndex]

Model OLS

# Model OLS
lm_model <- lm(mpg ~ hp + wt + disp + drat + qsec, data = mtcars)

# Prediksi pada data test
y_pred_ols <- predict(lm_model, newdata = as.data.frame(X_test))

# Hitung RMSE
rmse_ols <- sqrt(mean((y_test - y_pred_ols)^2))
rmse_ols
## [1] 2.411635

Ridge Regression

# Ridge Regression (alpha = 0 untuk Ridge)
ridge_model <- glmnet(X_train, y_train, alpha = 0)

# Cari lambda optimal dengan cross-validation
set.seed(123)
cv_ridge <- cv.glmnet(X_train, y_train, alpha = 0)
## Warning: Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per
## fold
# Model Ridge dengan lambda optimal
ridge_final <- glmnet(X_train, y_train, alpha = 0, lambda = cv_ridge$lambda.min)

# Prediksi pada data test
y_pred_ridge <- predict(ridge_final, newx = X_test)

# Hitung RMSE
rmse_ridge <- sqrt(mean((y_test - y_pred_ridge)^2))
rmse_ridge
## [1] 2.295107

Lasso regression

# Lasso Regression (alpha = 1 untuk Lasso)
lasso_model <- glmnet(X_train, y_train, alpha = 1)

# Cari lambda optimal dengan cross-validation
set.seed(123)
cv_lasso <- cv.glmnet(X_train, y_train, alpha = 1)
## Warning: Option grouped=FALSE enforced in cv.glmnet, since < 3 observations per
## fold
# Model Lasso dengan lambda optimal
lasso_final <- glmnet(X_train, y_train, alpha = 1, lambda = cv_lasso$lambda.min)

# Prediksi pada data test
y_pred_lasso <- predict(lasso_final, newx = X_test)

RMSE

# Hitung RMSE
rmse_lasso <- sqrt(mean((y_test - y_pred_lasso)^2))
rmse_lasso
## [1] 2.58821

perbandingan RMSE

cat("RMSE OLS:", rmse_ols, "\n")
## RMSE OLS: 2.411635
cat("RMSE Ridge:", rmse_ridge, "\n")
## RMSE Ridge: 2.295107
cat("RMSE Lasso:", rmse_lasso, "\n")
## RMSE Lasso: 2.58821

Dari hasil RMSE yang diperoleh, maka dapat dikatakan bahwa model ridge regression adalah model yang paling baik dibandingkan OLS dan Lasso