# --- 1. Library
library(randomForest)
## Warning: package 'randomForest' was built under R version 4.4.3
## randomForest 4.7-1.2
## Type rfNews() to see new features/changes/bug fixes.
library(caret)
## Warning: package 'caret' was built under R version 4.4.3
## Loading required package: ggplot2
##
## Attaching package: 'ggplot2'
## The following object is masked from 'package:randomForest':
##
## margin
## Loading required package: lattice
set.seed(42)
# --- 2. Buat data simulasi
n <- 100
data <- data.frame(
pH = round(runif(n, 5.5, 8.5), 2),
TDS = sample(50:500, n, replace=TRUE),
DO = round(runif(n, 2, 10), 2),
BOD = round(runif(n, 1, 10), 2),
COD = sample(10:100, n, replace=TRUE),
ammonia = round(runif(n, 0.1, 3.0), 2),
nitrate = round(runif(n, 0.1, 10.0), 2)
)
# --- 3. Tentukan label kualitas_air
data$kualitas_air <- with(data, ifelse(
pH >= 6.5 & pH <= 8.5 & TDS < 300 & DO >= 5 & BOD < 3 & COD < 50 & ammonia < 1 & nitrate < 5,
"Baik",
ifelse(DO >= 3, "Sedang", "Buruk")
))
data$kualitas_air <- as.factor(data$kualitas_air)
# --- 4. Simpan sebagai file CSV
write.csv(data, "data_kualitas_air_dki.csv", row.names = FALSE)
# --- 5. Split Data
index <- createDataPartition(data$kualitas_air, p=0.8, list=FALSE)
train_data <- data[index, ]
test_data <- data[-index, ]
# --- 6. Bangun model Random Forest
model_rf <- randomForest(kualitas_air ~ ., data=train_data, ntree=100, mtry=3, importance=TRUE)
print(model_rf)
##
## Call:
## randomForest(formula = kualitas_air ~ ., data = train_data, ntree = 100, mtry = 3, importance = TRUE)
## Type of random forest: classification
## Number of trees: 100
## No. of variables tried at each split: 3
##
## OOB estimate of error rate: 0%
## Confusion matrix:
## Buruk Sedang class.error
## Buruk 12 0 0
## Sedang 0 68 0
# --- 7. Prediksi dan evaluasi
prediksi <- predict(model_rf, newdata=test_data)
confusionMatrix(prediksi, test_data$kualitas_air)
## Confusion Matrix and Statistics
##
## Reference
## Prediction Buruk Sedang
## Buruk 2 0
## Sedang 1 17
##
## Accuracy : 0.95
## 95% CI : (0.7513, 0.9987)
## No Information Rate : 0.85
## P-Value [Acc > NIR] : 0.1756
##
## Kappa : 0.7727
##
## Mcnemar's Test P-Value : 1.0000
##
## Sensitivity : 0.6667
## Specificity : 1.0000
## Pos Pred Value : 1.0000
## Neg Pred Value : 0.9444
## Prevalence : 0.1500
## Detection Rate : 0.1000
## Detection Prevalence : 0.1000
## Balanced Accuracy : 0.8333
##
## 'Positive' Class : Buruk
##
# --- 8. Visualisasi pentingnya variabel
varImpPlot(model_rf)

# Load library
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.4.3
## Warning: package 'readr' was built under R version 4.4.3
## Warning: package 'forcats' was built under R version 4.4.3
## Warning: package 'lubridate' was built under R version 4.4.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ lubridate 1.9.4 ✔ tibble 3.2.1
## ✔ purrr 1.0.4 ✔ tidyr 1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::combine() masks randomForest::combine()
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ purrr::lift() masks caret::lift()
## ✖ ggplot2::margin() masks randomForest::margin()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(randomForest)
library(caret)
# Simulasi data kualitas air
set.seed(123)
n <- 300
air_data <- data.frame(
pH = runif(n, 5.5, 8.5),
TDS = runif(n, 100, 1000),
DO = runif(n, 2, 10),
BOD = runif(n, 1, 10),
COD = runif(n, 10, 100),
Amonia = runif(n, 0.1, 5),
Nitrat = runif(n, 0.1, 10)
)
# Menentukan kelas kualitas berdasarkan aturan sederhana (simulasi)
air_data$Kualitas <- with(air_data, ifelse(
DO > 6 & BOD < 3 & COD < 40 & Amonia < 1 & Nitrat < 5, "Baik",
ifelse(DO > 4 & BOD < 6 & COD < 60, "Sedang", "Buruk")
))
air_data$Kualitas <- factor(air_data$Kualitas, levels = c("Baik", "Sedang", "Buruk"))
# Lihat ringkasan data
summary(air_data)
## pH TDS DO BOD
## Min. :5.502 Min. :100.4 Min. :2.010 Min. :1.022
## 1st Qu.:6.280 1st Qu.:339.6 1st Qu.:3.896 1st Qu.:3.423
## Median :6.939 Median :538.0 Median :6.029 Median :5.487
## Mean :6.999 Mean :549.0 Mean :5.927 Mean :5.434
## 3rd Qu.:7.699 3rd Qu.:784.5 3rd Qu.:7.950 3rd Qu.:7.506
## Max. :8.483 Max. :999.5 Max. :9.959 Max. :9.934
## COD Amonia Nitrat Kualitas
## Min. :10.10 Min. :0.1263 Min. :0.135 Baik : 0
## 1st Qu.:31.81 1st Qu.:1.4357 1st Qu.:2.430 Sedang: 74
## Median :55.38 Median :2.5458 Median :5.024 Buruk :226
## Mean :54.88 Mean :2.5952 Mean :4.974
## 3rd Qu.:77.30 3rd Qu.:3.9244 3rd Qu.:7.406
## Max. :99.96 Max. :4.9822 Max. :9.977
# Visualisasi distribusi kelas
ggplot(air_data, aes(x = Kualitas, fill = Kualitas)) +
geom_bar() +
theme_minimal() +
labs(title = "Distribusi Kualitas Air", x = "Kelas", y = "Jumlah Sampel")

# Buat data simulasi
set.seed(123)
air_data <- data.frame(
pH = c(6.8, 7.2, 6.5, 7.8, 8.0, 6.0, 5.8, 7.0, 6.3, 7.5),
TDS = c(400, 550, 300, 800, 700, 950, 1000, 600, 850, 500),
DO = c(7.2, 6.5, 5.8, 4.2, 3.5, 6.8, 2.5, 4.8, 5.0, 6.0),
BOD = c(2.0, 3.5, 4.2, 6.0, 7.0, 2.5, 8.5, 5.0, 4.5, 3.0),
COD = c(35, 45, 55, 70, 80, 30, 90, 60, 50, 40),
Amonia = c(0.5, 0.8, 1.2, 2.5, 3.0, 0.3, 4.0, 2.0, 1.5, 0.7),
Nitrat = c(2.0, 3.5, 6.0, 7.5, 8.0, 1.5, 9.5, 6.5, 5.5, 3.0)
)
# Tambahkan label kualitas berdasarkan kondisi (aturan sederhana)
air_data$Kualitas <- with(air_data, ifelse(
DO > 6 & BOD < 3 & COD < 40 & Amonia < 1 & Nitrat < 5, "Baik",
ifelse(DO > 4 & BOD < 6 & COD < 60, "Sedang", "Buruk")
))
# Konversi ke faktor
air_data$Kualitas <- as.factor(air_data$Kualitas)
# Lihat tabel
print(air_data)
## pH TDS DO BOD COD Amonia Nitrat Kualitas
## 1 6.8 400 7.2 2.0 35 0.5 2.0 Baik
## 2 7.2 550 6.5 3.5 45 0.8 3.5 Sedang
## 3 6.5 300 5.8 4.2 55 1.2 6.0 Sedang
## 4 7.8 800 4.2 6.0 70 2.5 7.5 Buruk
## 5 8.0 700 3.5 7.0 80 3.0 8.0 Buruk
## 6 6.0 950 6.8 2.5 30 0.3 1.5 Baik
## 7 5.8 1000 2.5 8.5 90 4.0 9.5 Buruk
## 8 7.0 600 4.8 5.0 60 2.0 6.5 Buruk
## 9 6.3 850 5.0 4.5 50 1.5 5.5 Sedang
## 10 7.5 500 6.0 3.0 40 0.7 3.0 Sedang