library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.4.3
## Warning: package 'ggplot2' was built under R version 4.4.3
## Warning: package 'tidyr' was built under R version 4.4.3
## Warning: package 'purrr' was built under R version 4.4.3
## Warning: package 'dplyr' was built under R version 4.4.3
## Warning: package 'forcats' was built under R version 4.4.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.2 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(caret)
## Loading required package: lattice
##
## Attaching package: 'caret'
##
## The following object is masked from 'package:purrr':
##
## lift
library(glmnet)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
##
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
##
## Loaded glmnet 4.1-8
# Load dataset
data <- read.csv("D:/Machine Learning/boston_housing.csv")
str(data)
## 'data.frame': 506 obs. of 14 variables:
## $ CRIM : num 0.00632 0.02731 0.02729 0.03237 0.06905 ...
## $ ZN : num 18 0 0 0 0 0 12.5 12.5 12.5 12.5 ...
## $ INDUS : num 2.31 7.07 7.07 2.18 2.18 2.18 7.87 7.87 7.87 7.87 ...
## $ CHAS : num 0 0 0 0 0 0 0 0 0 0 ...
## $ NOX : num 0.538 0.469 0.469 0.458 0.458 0.458 0.524 0.524 0.524 0.524 ...
## $ RM : num 6.58 6.42 7.18 7 7.15 ...
## $ AGE : num 65.2 78.9 61.1 45.8 54.2 58.7 66.6 96.1 100 85.9 ...
## $ DIS : num 4.09 4.97 4.97 6.06 6.06 ...
## $ RAD : num 1 2 2 3 3 3 5 5 5 5 ...
## $ TAX : num 296 242 242 222 222 222 311 311 311 311 ...
## $ PTRATIO: num 15.3 17.8 17.8 18.7 18.7 18.7 15.2 15.2 15.2 15.2 ...
## $ B : num 397 397 393 395 397 ...
## $ LSTAT : num 4.98 9.14 4.03 2.94 5.33 ...
## $ MEDV : num 24 21.6 34.7 33.4 36.2 28.7 22.9 27.1 16.5 18.9 ...
# Matrix X dan vector y
x <- model.matrix(MEDV ~ ., data = data)[,-1]
y <- data$MEDV
set.seed(123)
trainIndex <- createDataPartition(y, p = 0.7, list = FALSE)
x_train <- x[trainIndex, ]
x_test <- x[-trainIndex, ]
y_train <- y[trainIndex]
y_test <- y[-trainIndex]
ridge <- cv.glmnet(x_train, y_train, alpha = 0)
ridge_pred <- predict(ridge, s = ridge$lambda.min, newx = x_test)
ridge_rmse <- sqrt(mean((ridge_pred - y_test)^2))
ridge_mae <- mean(abs(ridge_pred - y_test))
lasso <- cv.glmnet(x_train, y_train, alpha = 1)
lasso_pred <- predict(lasso, s = lasso$lambda.min, newx = x_test)
lasso_rmse <- sqrt(mean((lasso_pred - y_test)^2))
lasso_mae <- mean(abs(lasso_pred - y_test))
elastic_model <- train(
x = x_train, y = y_train,
method = "glmnet",
trControl = trainControl("cv", number = 10),
tuneLength = 10
)
elastic_pred <- predict(elastic_model, newdata = x_test)
elastic_rmse <- sqrt(mean((elastic_pred - y_test)^2))
elastic_mae <- mean(abs(elastic_pred - y_test))
results <- data.frame(
Model = c("Ridge", "Lasso", "Elastic Net"),
RMSE = c(ridge_rmse, lasso_rmse, elastic_rmse),
MAE = c(ridge_mae, lasso_mae, elastic_mae)
)
print(results)
## Model RMSE MAE
## 1 Ridge 5.269754 3.343030
## 2 Lasso 5.120902 3.309685
## 3 Elastic Net 5.134644 3.308347
Model dengan performa terbaik (RMSE dan MAE terendah) digunakan untuk memprediksi harga rumah secara lebih akurat.