1. Load Library dan Data

library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.4.3
## Warning: package 'ggplot2' was built under R version 4.4.3
## Warning: package 'tidyr' was built under R version 4.4.3
## Warning: package 'purrr' was built under R version 4.4.3
## Warning: package 'dplyr' was built under R version 4.4.3
## Warning: package 'forcats' was built under R version 4.4.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.4     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(caret)
## Loading required package: lattice
## 
## Attaching package: 'caret'
## 
## The following object is masked from 'package:purrr':
## 
##     lift
library(glmnet)
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## 
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
## 
## Loaded glmnet 4.1-8
# Load dataset
data <- read.csv("D:/Machine Learning/boston_housing.csv")
str(data)
## 'data.frame':    506 obs. of  14 variables:
##  $ CRIM   : num  0.00632 0.02731 0.02729 0.03237 0.06905 ...
##  $ ZN     : num  18 0 0 0 0 0 12.5 12.5 12.5 12.5 ...
##  $ INDUS  : num  2.31 7.07 7.07 2.18 2.18 2.18 7.87 7.87 7.87 7.87 ...
##  $ CHAS   : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ NOX    : num  0.538 0.469 0.469 0.458 0.458 0.458 0.524 0.524 0.524 0.524 ...
##  $ RM     : num  6.58 6.42 7.18 7 7.15 ...
##  $ AGE    : num  65.2 78.9 61.1 45.8 54.2 58.7 66.6 96.1 100 85.9 ...
##  $ DIS    : num  4.09 4.97 4.97 6.06 6.06 ...
##  $ RAD    : num  1 2 2 3 3 3 5 5 5 5 ...
##  $ TAX    : num  296 242 242 222 222 222 311 311 311 311 ...
##  $ PTRATIO: num  15.3 17.8 17.8 18.7 18.7 18.7 15.2 15.2 15.2 15.2 ...
##  $ B      : num  397 397 393 395 397 ...
##  $ LSTAT  : num  4.98 9.14 4.03 2.94 5.33 ...
##  $ MEDV   : num  24 21.6 34.7 33.4 36.2 28.7 22.9 27.1 16.5 18.9 ...

2. Preprocessing

# Matrix X dan vector y
x <- model.matrix(MEDV ~ ., data = data)[,-1]
y <- data$MEDV

3. Split Data

set.seed(123)
trainIndex <- createDataPartition(y, p = 0.7, list = FALSE)
x_train <- x[trainIndex, ]
x_test <- x[-trainIndex, ]
y_train <- y[trainIndex]
y_test <- y[-trainIndex]

4. Ridge Regression

ridge <- cv.glmnet(x_train, y_train, alpha = 0)
ridge_pred <- predict(ridge, s = ridge$lambda.min, newx = x_test)
ridge_rmse <- sqrt(mean((ridge_pred - y_test)^2))
ridge_mae <- mean(abs(ridge_pred - y_test))

5. Lasso Regression

lasso <- cv.glmnet(x_train, y_train, alpha = 1)
lasso_pred <- predict(lasso, s = lasso$lambda.min, newx = x_test)
lasso_rmse <- sqrt(mean((lasso_pred - y_test)^2))
lasso_mae <- mean(abs(lasso_pred - y_test))

6. Elastic Net Regression

elastic_model <- train(
  x = x_train, y = y_train,
  method = "glmnet",
  trControl = trainControl("cv", number = 10),
  tuneLength = 10
)
elastic_pred <- predict(elastic_model, newdata = x_test)
elastic_rmse <- sqrt(mean((elastic_pred - y_test)^2))
elastic_mae <- mean(abs(elastic_pred - y_test))

7. Perbandingan Model

results <- data.frame(
  Model = c("Ridge", "Lasso", "Elastic Net"),
  RMSE = c(ridge_rmse, lasso_rmse, elastic_rmse),
  MAE = c(ridge_mae, lasso_mae, elastic_mae)
)
print(results)
##         Model     RMSE      MAE
## 1       Ridge 5.269754 3.343030
## 2       Lasso 5.120902 3.309685
## 3 Elastic Net 5.134644 3.308347

8. Kesimpulan

Model dengan performa terbaik (RMSE dan MAE terendah) digunakan untuk memprediksi harga rumah secara lebih akurat.