library(tidyverse) # Data manipulation dan visualisasi
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.2
## ✔ ggplot2 4.0.0 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(caret) # Machine learning framework
## Loading required package: lattice
##
## Attaching package: 'caret'
##
## The following object is masked from 'package:purrr':
##
## lift
library(randomForest) # Random Forest model
## randomForest 4.7-1.2
## Type rfNews() to see new features/changes/bug fixes.
##
## Attaching package: 'randomForest'
##
## The following object is masked from 'package:dplyr':
##
## combine
##
## The following object is masked from 'package:ggplot2':
##
## margin
library(rpart) # Decision Tree
library(rpart.plot) # Visualisasi Decision Tree
library(e1071) # SVM
##
## Attaching package: 'e1071'
##
## The following object is masked from 'package:ggplot2':
##
## element
library(corrplot) # Correlation plot
## corrplot 0.95 loaded
library(gridExtra) # Multiple plots
##
## Attaching package: 'gridExtra'
##
## The following object is masked from 'package:randomForest':
##
## combine
##
## The following object is masked from 'package:dplyr':
##
## combine
# Set seed untuk reproducibility
set.seed(123)
# ===============================================================================
# SOAL 1: DATA CLEANING DAN EKSPLORASI (30%)
# ===============================================================================
cat("\n==================== SOAL 1: DATA CLEANING ====================\n")
##
## ==================== SOAL 1: DATA CLEANING ====================
# 1.1 Load Data dari CSV
# ------------------------------------------------------------------------------
cat("C:/Users/HILMI/Downloads/kualitasair.csv")
## C:/Users/HILMI/Downloads/kualitasair.csv
# INSTRUKSI: Simpan file CSV dengan nama 'kualitasair.csv' di working directory
# Atau gunakan path lengkap: "C:/Users/YourName/Documents/kualitasair.csv"
# Cek working directory
cat("Working directory saat ini:", getwd(), "\n")
## Working directory saat ini: C:/Users/HILMI/Downloads
cat("Pastikan file 'C:/Users/HILMI/Downloads/kualitasair.csv' ada di folder ini\n\n")
## Pastikan file 'C:/Users/HILMI/Downloads/kualitasair.csv' ada di folder ini
# Baca data dari CSV
# Sesuaikan parameter berdasarkan format CSV Anda
data_raw <- read.csv("C:/Users/HILMI/Downloads/kualitasair.csv",
header = TRUE,
stringsAsFactors = FALSE,
na.strings = c("", "NA", "N/A", " "))
# Jika CSV menggunakan separator lain (misalnya semicolon), gunakan:
# data_raw <- read.csv('kualitasair.csv', sep = ";", header = TRUE, stringsAsFactors = FALSE)
# Atau jika dari Excel, gunakan:
# library(readxl)
# data_raw <- read_excel('kualitasair.xlsx')
# Cek struktur data
cat("Data berhasil dimuat!\n")
## Data berhasil dimuat!
str(data_raw)
## 'data.frame': 923 obs. of 26 variables:
## $ Lokasi: chr "S1" "S2" "S3" "S4" ...
## $ pH : num 7.69 6.72 7.18 7.32 7.2 ...
## $ DO : num NA 5.72 4.89 6.13 7.79 ...
## $ BOD : num 1.71 1.44 2.73 3.14 1.18 ...
## $ TSS : num 43.1 44.3 NA 41 48.1 ...
## $ Suhu : num 26.8 27.7 26 29.7 26.4 ...
## $ Status: chr "Tercemar ringan" "Tercemar ringan" "Tercemar ringan" "Tercemar ringan" ...
## $ X : logi NA NA NA NA NA NA ...
## $ X.1 : logi NA NA NA NA NA NA ...
## $ X.2 : logi NA NA NA NA NA NA ...
## $ X.3 : logi NA NA NA NA NA NA ...
## $ X.4 : logi NA NA NA NA NA NA ...
## $ X.5 : logi NA NA NA NA NA NA ...
## $ X.6 : logi NA NA NA NA NA NA ...
## $ X.7 : logi NA NA NA NA NA NA ...
## $ X.8 : logi NA NA NA NA NA NA ...
## $ X.9 : logi NA NA NA NA NA NA ...
## $ X.10 : logi NA NA NA NA NA NA ...
## $ X.11 : logi NA NA NA NA NA NA ...
## $ X.12 : logi NA NA NA NA NA NA ...
## $ X.13 : logi NA NA NA NA NA NA ...
## $ X.14 : logi NA NA NA NA NA NA ...
## $ X.15 : logi NA NA NA NA NA NA ...
## $ X.16 : logi NA NA NA NA NA NA ...
## $ X.17 : logi NA NA NA NA NA NA ...
## $ X.18 : logi NA NA NA NA NA NA ...
cat("Dimensi data awal:", nrow(data_raw), "baris x", ncol(data_raw), "kolom\n\n")
## Dimensi data awal: 923 baris x 26 kolom
# 1.2 Identifikasi Missing Values
# ------------------------------------------------------------------------------
cat("--- 1.1 ANALISIS MISSING VALUES ---\n")
## --- 1.1 ANALISIS MISSING VALUES ---
missing_summary <- data_raw %>%
summarise(across(everything(), ~sum(is.na(.)))) %>%
pivot_longer(everything(), names_to = "Variabel", values_to = "Jumlah_NA") %>%
mutate(Persentase_NA = round((Jumlah_NA / nrow(data_raw)) * 100, 2))
print(missing_summary)
## # A tibble: 26 × 3
## Variabel Jumlah_NA Persentase_NA
## <chr> <int> <dbl>
## 1 Lokasi 623 67.5
## 2 pH 623 67.5
## 3 DO 646 70.0
## 4 BOD 645 69.9
## 5 TSS 647 70.1
## 6 Suhu 623 67.5
## 7 Status 623 67.5
## 8 X 923 100
## 9 X.1 923 100
## 10 X.2 923 100
## # ℹ 16 more rows
cat("\nTotal missing values:", sum(missing_summary$Jumlah_NA), "\n\n")
##
## Total missing values: 21967
# 1.3 Identifikasi Outliers menggunakan IQR Method
# ------------------------------------------------------------------------------
cat("--- 1.2 IDENTIFIKASI OUTLIERS (IQR Method) ---\n")
## --- 1.2 IDENTIFIKASI OUTLIERS (IQR Method) ---
identify_outliers <- function(x) {
Q1 <- quantile(x, 0.25, na.rm = TRUE)
Q3 <- quantile(x, 0.75, na.rm = TRUE)
IQR_val <- Q3 - Q1
lower_bound <- Q1 - 1.5 * IQR_val
upper_bound <- Q3 + 1.5 * IQR_val
outliers <- x < lower_bound | x > upper_bound
return(list(count = sum(outliers, na.rm = TRUE),
bounds = c(lower_bound, upper_bound)))
}
numeric_cols <- c("pH", "DO", "BOD", "TSS", "Suhu")
outlier_summary <- data.frame()
for(col in numeric_cols) {
result <- identify_outliers(data_raw[[col]])
outlier_summary <- rbind(outlier_summary, data.frame(
Variabel = col,
Jumlah_Outlier = result$count,
Batas_Bawah = round(result$bounds[1], 2),
Batas_Atas = round(result$bounds[2], 2)
))
}
print(outlier_summary)
## Variabel Jumlah_Outlier Batas_Bawah Batas_Atas
## 25% pH 4 5.70 8.29
## 25%1 DO 2 3.41 8.66
## 25%2 BOD 3 0.53 5.41
## 25%3 TSS 3 24.65 75.52
## 25%4 Suhu 2 22.35 33.73
cat("\n")
# 1.4 Identifikasi Inkonsistensi Kategori Status
# ------------------------------------------------------------------------------
cat("--- 1.3 INKONSISTENSI KATEGORI STATUS ---\n")
## --- 1.3 INKONSISTENSI KATEGORI STATUS ---
cat("Kategori Status sebelum standarisasi:\n")
## Kategori Status sebelum standarisasi:
print(table(data_raw$Status))
##
## baik Baik BAIK Tercemar berat tercemar ringan
## 1 70 1 7 1
## Tercemar ringan Tercemar Ringan
## 219 1
cat("\n")
# 1.5 Handle Missing Values (Imputasi dengan Median)
# ------------------------------------------------------------------------------
cat("--- 1.4 PENANGANAN MISSING VALUES ---\n")
## --- 1.4 PENANGANAN MISSING VALUES ---
cat("Strategi: Imputasi menggunakan median untuk variabel numerik\n")
## Strategi: Imputasi menggunakan median untuk variabel numerik
cat("Alasan: Median robust terhadap outlier\n\n")
## Alasan: Median robust terhadap outlier
data_cleaned <- data_raw
for(col in numeric_cols) {
if(sum(is.na(data_cleaned[[col]])) > 0) {
median_val <- median(data_cleaned[[col]], na.rm = TRUE)
data_cleaned[[col]][is.na(data_cleaned[[col]])] <- median_val
cat(sprintf("%s: %d missing values diimputasi dengan median = %.4f\n",
col, sum(is.na(data_raw[[col]])), median_val))
}
}
## pH: 623 missing values diimputasi dengan median = 6.9880
## DO: 646 missing values diimputasi dengan median = 5.9909
## BOD: 645 missing values diimputasi dengan median = 3.0661
## TSS: 647 missing values diimputasi dengan median = 49.5221
## Suhu: 623 missing values diimputasi dengan median = 28.0148
cat("\n")
# 1.6 Handle Outliers (Winsorization - cap pada persentil 1% dan 99%)
# ------------------------------------------------------------------------------
cat("--- 1.5 PENANGANAN OUTLIERS ---\n")
## --- 1.5 PENANGANAN OUTLIERS ---
cat("Strategi: Winsorization (cap pada persentil 1% dan 99%)\n")
## Strategi: Winsorization (cap pada persentil 1% dan 99%)
cat("Alasan: Mempertahankan struktur data sambil mengurangi pengaruh ekstrem\n\n")
## Alasan: Mempertahankan struktur data sambil mengurangi pengaruh ekstrem
for(col in numeric_cols) {
p01 <- quantile(data_cleaned[[col]], 0.01, na.rm = TRUE)
p99 <- quantile(data_cleaned[[col]], 0.99, na.rm = TRUE)
n_outliers <- sum(data_cleaned[[col]] < p01 | data_cleaned[[col]] > p99)
if(n_outliers > 0) {
data_cleaned[[col]][data_cleaned[[col]] < p01] <- p01
data_cleaned[[col]][data_cleaned[[col]] > p99] <- p99
cat(sprintf("%s: %d outliers di-cap (p01=%.2f, p99=%.2f)\n",
col, n_outliers, p01, p99))
}
}
## pH: 20 outliers di-cap (p01=6.11, p99=7.92)
## DO: 20 outliers di-cap (p01=4.08, p99=7.61)
## BOD: 20 outliers di-cap (p01=1.40, p99=4.47)
## TSS: 20 outliers di-cap (p01=31.71, p99=67.53)
## Suhu: 20 outliers di-cap (p01=24.28, p99=32.19)
cat("\n")
# 1.7 Standarisasi Kategori Status
# ------------------------------------------------------------------------------
cat("--- 1.6 STANDARISASI KATEGORI STATUS ---\n")
## --- 1.6 STANDARISASI KATEGORI STATUS ---
data_cleaned$Status <- tolower(data_cleaned$Status) # Ubah ke lowercase
data_cleaned$Status <- gsub("_", " ", data_cleaned$Status) # Hapus underscore
data_cleaned$Status <- str_to_title(data_cleaned$Status) # Title case
# Mapping standar
data_cleaned$Status <- case_when(
data_cleaned$Status %in% c("Baik") ~ "Baik",
data_cleaned$Status %in% c("Tercemar Ringan") ~ "Tercemar Ringan",
data_cleaned$Status %in% c("Tercemar Berat") ~ "Tercemar Berat",
TRUE ~ data_cleaned$Status
)
cat("Kategori Status setelah standarisasi:\n")
## Kategori Status setelah standarisasi:
print(table(data_cleaned$Status))
##
## Baik Tercemar Berat Tercemar Ringan
## 72 7 221
cat("\n")
# 1.8 Ringkasan Statistik Deskriptif
# ------------------------------------------------------------------------------
cat("--- 1.7 RINGKASAN STATISTIK DESKRIPTIF (SETELAH CLEANING) ---\n\n")
## --- 1.7 RINGKASAN STATISTIK DESKRIPTIF (SETELAH CLEANING) ---
desc_stats <- data_cleaned %>%
select(all_of(numeric_cols)) %>%
summarise(across(everything(), list(
Min = ~round(min(., na.rm = TRUE), 2),
Q1 = ~round(quantile(., 0.25, na.rm = TRUE), 2),
Median = ~round(median(., na.rm = TRUE), 2),
Mean = ~round(mean(., na.rm = TRUE), 2),
Q3 = ~round(quantile(., 0.75, na.rm = TRUE), 2),
Max = ~round(max(., na.rm = TRUE), 2),
SD = ~round(sd(., na.rm = TRUE), 2)
))) %>%
pivot_longer(everything(), names_to = "Variabel_Stat", values_to = "Nilai") %>%
separate(Variabel_Stat, into = c("Variabel", "Statistik"), sep = "_") %>%
pivot_wider(names_from = Statistik, values_from = Nilai)
print(desc_stats)
## # A tibble: 5 × 8
## Variabel Min Q1 Median Mean Q3 Max SD
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 pH 6.11 6.99 6.99 6.99 6.99 7.92 0.26
## 2 DO 4.08 5.99 5.99 5.98 5.99 7.61 0.51
## 3 BOD 1.4 3.07 3.07 3.05 3.07 4.47 0.43
## 4 TSS 31.7 49.5 49.5 49.6 49.5 67.5 5
## 5 Suhu 24.3 28.0 28.0 28.0 28.0 32.2 1.12
cat("\n")
# Visualisasi Distribusi Data
cat("Membuat visualisasi distribusi...\n")
## Membuat visualisasi distribusi...
plot_list <- list()
for(col in numeric_cols) {
p <- ggplot(data_cleaned, aes_string(x = col)) +
geom_histogram(fill = "steelblue", color = "white", bins = 30) +
geom_vline(aes(xintercept = mean(data_cleaned[[col]])),
color = "red", linetype = "dashed", linewidth = 1) +
labs(title = paste("Distribusi", col), x = col, y = "Frekuensi") +
theme_minimal()
plot_list[[col]] <- p
}
## Warning: `aes_string()` was deprecated in ggplot2 3.0.0.
## ℹ Please use tidy evaluation idioms with `aes()`.
## ℹ See also `vignette("ggplot2-in-packages")` for more information.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# Korelasi antar variabel
cor_matrix <- cor(data_cleaned[, numeric_cols], use = "complete.obs")
cat("\nMatriks Korelasi:\n")
##
## Matriks Korelasi:
print(round(cor_matrix, 3))
## pH DO BOD TSS Suhu
## pH 1.000 -0.008 -0.058 0.000 -0.039
## DO -0.008 1.000 0.032 0.008 -0.039
## BOD -0.058 0.032 1.000 -0.026 0.073
## TSS 0.000 0.008 -0.026 1.000 0.008
## Suhu -0.039 -0.039 0.073 0.008 1.000
# ===============================================================================
# SOAL 2: KLASIFIKASI STATUS KUALITAS AIR (35%)
# ===============================================================================
cat("\n\n==================== SOAL 2: KLASIFIKASI ====================\n")
##
##
## ==================== SOAL 2: KLASIFIKASI ====================
# 2.1 Persiapan Data untuk Klasifikasi
# ------------------------------------------------------------------------------
cat("--- 2.1 PERSIAPAN DATA ---\n")
## --- 2.1 PERSIAPAN DATA ---
# Pilih hanya variabel numerik dan Status
data_class <- data_cleaned %>%
select(pH, DO, BOD, TSS, Suhu, Status) %>%
filter(!is.na(Status)) %>%
mutate(Status = as.factor(Status))
cat("Dimensi data klasifikasi:", nrow(data_class), "x", ncol(data_class), "\n")
## Dimensi data klasifikasi: 300 x 6
cat("Distribusi kelas:\n")
## Distribusi kelas:
print(table(data_class$Status))
##
## Baik Tercemar Berat Tercemar Ringan
## 72 7 221
cat("\n")
# 2.2 Split Data: Training (70%) dan Testing (30%)
# ------------------------------------------------------------------------------
cat("--- 2.2 SPLIT DATA (70% TRAIN, 30% TEST) ---\n")
## --- 2.2 SPLIT DATA (70% TRAIN, 30% TEST) ---
trainIndex <- createDataPartition(data_class$Status, p = 0.7, list = FALSE)
train_data <- data_class[trainIndex, ]
test_data <- data_class[-trainIndex, ]
cat("Data training:", nrow(train_data), "sampel\n")
## Data training: 211 sampel
cat("Data testing:", nrow(test_data), "sampel\n")
## Data testing: 89 sampel
cat("\nDistribusi kelas di training set:\n")
##
## Distribusi kelas di training set:
print(table(train_data$Status))
##
## Baik Tercemar Berat Tercemar Ringan
## 51 5 155
cat("\n")
# 2.3 Model 1: Support Vector Machine (SVM) - bukan SVR
# ------------------------------------------------------------------------------
# Catatan: SVR adalah untuk regresi, untuk klasifikasi gunakan SVM
cat("--- 2.3 MODEL 1: SUPPORT VECTOR MACHINE (SVM) ---\n")
## --- 2.3 MODEL 1: SUPPORT VECTOR MACHINE (SVM) ---
cat("Menggunakan kernel radial (RBF)\n\n")
## Menggunakan kernel radial (RBF)
svm_model <- svm(Status ~ ., data = train_data,
kernel = "radial", cost = 1, gamma = 0.1)
# Prediksi
svm_pred <- predict(svm_model, test_data)
# Confusion Matrix
svm_cm <- confusionMatrix(svm_pred, test_data$Status)
cat("Confusion Matrix - SVM:\n")
## Confusion Matrix - SVM:
print(svm_cm$table)
## Reference
## Prediction Baik Tercemar Berat Tercemar Ringan
## Baik 11 0 1
## Tercemar Berat 0 0 0
## Tercemar Ringan 10 2 65
cat("\nMetrik Evaluasi SVM:\n")
##
## Metrik Evaluasi SVM:
cat("Akurasi:", round(svm_cm$overall['Accuracy'] * 100, 2), "%\n")
## Akurasi: 85.39 %
cat("Kappa:", round(svm_cm$overall['Kappa'], 4), "\n\n")
## Kappa: 0.5528
# 2.4 Model 2: Decision Tree
# ------------------------------------------------------------------------------
cat("--- 2.4 MODEL 2: DECISION TREE ---\n")
## --- 2.4 MODEL 2: DECISION TREE ---
dt_model <- rpart(Status ~ ., data = train_data,
method = "class", cp = 0.01)
# Prediksi
dt_pred <- predict(dt_model, test_data, type = "class")
# Confusion Matrix
dt_cm <- confusionMatrix(dt_pred, test_data$Status)
cat("Confusion Matrix - Decision Tree:\n")
## Confusion Matrix - Decision Tree:
print(dt_cm$table)
## Reference
## Prediction Baik Tercemar Berat Tercemar Ringan
## Baik 18 0 1
## Tercemar Berat 0 0 0
## Tercemar Ringan 3 2 65
cat("\nMetrik Evaluasi Decision Tree:\n")
##
## Metrik Evaluasi Decision Tree:
cat("Akurasi:", round(dt_cm$overall['Accuracy'] * 100, 2), "%\n")
## Akurasi: 93.26 %
cat("Kappa:", round(dt_cm$overall['Kappa'], 4), "\n\n")
## Kappa: 0.816
cat("Visualisasi Decision Tree tersimpan (gunakan rpart.plot untuk plot)\n")
## Visualisasi Decision Tree tersimpan (gunakan rpart.plot untuk plot)
# rpart.plot(dt_model, extra = 104, box.palette = "RdYlGn")
# 2.5 Model 3: Random Forest
# ------------------------------------------------------------------------------
cat("--- 2.5 MODEL 3: RANDOM FOREST ---\n")
## --- 2.5 MODEL 3: RANDOM FOREST ---
rf_model <- randomForest(Status ~ ., data = train_data,
ntree = 500, mtry = 2, importance = TRUE)
# Prediksi
rf_pred <- predict(rf_model, test_data)
# Confusion Matrix
rf_cm <- confusionMatrix(rf_pred, test_data$Status)
cat("Confusion Matrix - Random Forest:\n")
## Confusion Matrix - Random Forest:
print(rf_cm$table)
## Reference
## Prediction Baik Tercemar Berat Tercemar Ringan
## Baik 18 0 1
## Tercemar Berat 0 0 0
## Tercemar Ringan 3 2 65
cat("\nMetrik Evaluasi Random Forest:\n")
##
## Metrik Evaluasi Random Forest:
cat("Akurasi:", round(rf_cm$overall['Accuracy'] * 100, 2), "%\n")
## Akurasi: 93.26 %
cat("Kappa:", round(rf_cm$overall['Kappa'], 4), "\n\n")
## Kappa: 0.816
# Variable Importance
cat("Variable Importance (Random Forest):\n")
## Variable Importance (Random Forest):
importance_rf <- importance(rf_model)
print(round(importance_rf, 2))
## Baik Tercemar Berat Tercemar Ringan MeanDecreaseAccuracy MeanDecreaseGini
## pH -0.73 -2.55 -2.69 -2.91 5.02
## DO 61.05 9.02 49.89 65.07 36.75
## BOD 55.77 6.31 50.03 62.65 30.49
## TSS 3.80 -2.68 -0.28 1.56 5.85
## Suhu -3.21 0.59 0.39 -1.22 6.12
cat("\n")
# 2.6 Perbandingan Model
# ------------------------------------------------------------------------------
cat("--- 2.6 PERBANDINGAN MODEL KLASIFIKASI ---\n\n")
## --- 2.6 PERBANDINGAN MODEL KLASIFIKASI ---
comparison <- data.frame(
Model = c("SVM", "Decision Tree", "Random Forest"),
Akurasi = c(svm_cm$overall['Accuracy'],
dt_cm$overall['Accuracy'],
rf_cm$overall['Accuracy']) * 100,
Kappa = c(svm_cm$overall['Kappa'],
dt_cm$overall['Kappa'],
rf_cm$overall['Kappa'])
)
comparison$Akurasi <- round(comparison$Akurasi, 2)
comparison$Kappa <- round(comparison$Kappa, 4)
print(comparison)
## Model Akurasi Kappa
## 1 SVM 85.39 0.5528
## 2 Decision Tree 93.26 0.8160
## 3 Random Forest 93.26 0.8160
best_model <- comparison$Model[which.max(comparison$Akurasi)]
cat("\n** Model terbaik:", best_model, "**\n")
##
## ** Model terbaik: Decision Tree **
# ===============================================================================
# TAMBAHAN: OUTPUT 75 HASIL PREDIKSI KLASIFIKASI
# ===============================================================================
cat("\n--- OUTPUT 75 HASIL PREDIKSI KLASIFIKASI ---\n\n")
##
## --- OUTPUT 75 HASIL PREDIKSI KLASIFIKASI ---
# Gabungkan semua hasil prediksi
hasil_klasifikasi <- data.frame(
No = 1:nrow(test_data),
pH = test_data$pH,
DO = test_data$DO,
BOD = test_data$BOD,
TSS = test_data$TSS,
Suhu = test_data$Suhu,
Status_Aktual = test_data$Status,
Prediksi_SVM = svm_pred,
Prediksi_DecisionTree = dt_pred,
Prediksi_RandomForest = rf_pred,
Benar_SVM = ifelse(svm_pred == test_data$Status, "✓", "✗"),
Benar_DT = ifelse(dt_pred == test_data$Status, "✓", "✗"),
Benar_RF = ifelse(rf_pred == test_data$Status, "✓", "✗")
)
# Tampilkan 75 baris pertama
n_show <- min(75, nrow(hasil_klasifikasi))
cat("Menampilkan", n_show, "baris dari", nrow(hasil_klasifikasi), "total data testing:\n\n")
## Menampilkan 75 baris dari 89 total data testing:
# Format tabel ringkas
hasil_ringkas_class <- hasil_klasifikasi[1:n_show, c("No", "Status_Aktual",
"Prediksi_SVM", "Benar_SVM",
"Prediksi_DecisionTree", "Benar_DT",
"Prediksi_RandomForest", "Benar_RF")]
cat("\nTABEL: Hasil Prediksi Klasifikasi (SVM, Decision Tree, Random Forest)\n")
##
## TABEL: Hasil Prediksi Klasifikasi (SVM, Decision Tree, Random Forest)
cat(rep("=", 120), "\n", sep="")
## ========================================================================================================================
cat(sprintf("%-4s | %-18s | %-18s | %-5s | %-18s | %-5s | %-18s | %-5s\n",
"No", "Status Aktual", "Pred SVM", "OK?", "Pred DT", "OK?", "Pred RF", "OK?"))
## No | Status Aktual | Pred SVM | OK? | Pred DT | OK? | Pred RF | OK?
cat(rep("-", 120), "\n", sep="")
## ------------------------------------------------------------------------------------------------------------------------
for(i in 1:n_show) {
cat(sprintf("%-4d | %-18s | %-18s | %-5s | %-18s | %-5s | %-18s | %-5s\n",
hasil_ringkas_class$No[i],
as.character(hasil_ringkas_class$Status_Aktual[i]),
as.character(hasil_ringkas_class$Prediksi_SVM[i]),
hasil_ringkas_class$Benar_SVM[i],
as.character(hasil_ringkas_class$Prediksi_DecisionTree[i]),
hasil_ringkas_class$Benar_DT[i],
as.character(hasil_ringkas_class$Prediksi_RandomForest[i]),
hasil_ringkas_class$Benar_RF[i]))
}
## 1 | Tercemar Ringan | Tercemar Ringan | ✓ | Tercemar Ringan | ✓ | Tercemar Ringan | ✓
## 2 | Tercemar Ringan | Tercemar Ringan | ✓ | Tercemar Ringan | ✓ | Tercemar Ringan | ✓
## 3 | Baik | Tercemar Ringan | ✗ | Baik | ✓ | Baik | ✓
## 4 | Tercemar Ringan | Tercemar Ringan | ✓ | Tercemar Ringan | ✓ | Tercemar Ringan | ✓
## 5 | Baik | Tercemar Ringan | ✗ | Tercemar Ringan | ✗ | Tercemar Ringan | ✗
## 6 | Tercemar Ringan | Tercemar Ringan | ✓ | Tercemar Ringan | ✓ | Tercemar Ringan | ✓
## 7 | Tercemar Ringan | Tercemar Ringan | ✓ | Tercemar Ringan | ✓ | Tercemar Ringan | ✓
## 8 | Tercemar Ringan | Tercemar Ringan | ✓ | Tercemar Ringan | ✓ | Tercemar Ringan | ✓
## 9 | Baik | Baik | ✓ | Baik | ✓ | Baik | ✓
## 10 | Tercemar Ringan | Tercemar Ringan | ✓ | Tercemar Ringan | ✓ | Tercemar Ringan | ✓
## 11 | Tercemar Ringan | Tercemar Ringan | ✓ | Tercemar Ringan | ✓ | Tercemar Ringan | ✓
## 12 | Tercemar Ringan | Tercemar Ringan | ✓ | Tercemar Ringan | ✓ | Tercemar Ringan | ✓
## 13 | Tercemar Ringan | Tercemar Ringan | ✓ | Tercemar Ringan | ✓ | Tercemar Ringan | ✓
## 14 | Tercemar Ringan | Tercemar Ringan | ✓ | Tercemar Ringan | ✓ | Tercemar Ringan | ✓
## 15 | Baik | Tercemar Ringan | ✗ | Tercemar Ringan | ✗ | Tercemar Ringan | ✗
## 16 | Tercemar Ringan | Tercemar Ringan | ✓ | Tercemar Ringan | ✓ | Tercemar Ringan | ✓
## 17 | Tercemar Ringan | Tercemar Ringan | ✓ | Tercemar Ringan | ✓ | Tercemar Ringan | ✓
## 18 | Baik | Tercemar Ringan | ✗ | Baik | ✓ | Baik | ✓
## 19 | Baik | Baik | ✓ | Baik | ✓ | Baik | ✓
## 20 | Baik | Tercemar Ringan | ✗ | Baik | ✓ | Baik | ✓
## 21 | Tercemar Ringan | Tercemar Ringan | ✓ | Tercemar Ringan | ✓ | Tercemar Ringan | ✓
## 22 | Tercemar Ringan | Tercemar Ringan | ✓ | Tercemar Ringan | ✓ | Tercemar Ringan | ✓
## 23 | Tercemar Ringan | Baik | ✗ | Tercemar Ringan | ✓ | Tercemar Ringan | ✓
## 24 | Baik | Baik | ✓ | Baik | ✓ | Baik | ✓
## 25 | Tercemar Ringan | Tercemar Ringan | ✓ | Tercemar Ringan | ✓ | Tercemar Ringan | ✓
## 26 | Tercemar Ringan | Tercemar Ringan | ✓ | Tercemar Ringan | ✓ | Tercemar Ringan | ✓
## 27 | Tercemar Ringan | Tercemar Ringan | ✓ | Tercemar Ringan | ✓ | Tercemar Ringan | ✓
## 28 | Tercemar Ringan | Tercemar Ringan | ✓ | Tercemar Ringan | ✓ | Tercemar Ringan | ✓
## 29 | Tercemar Ringan | Tercemar Ringan | ✓ | Tercemar Ringan | ✓ | Tercemar Ringan | ✓
## 30 | Tercemar Ringan | Tercemar Ringan | ✓ | Tercemar Ringan | ✓ | Tercemar Ringan | ✓
## 31 | Baik | Tercemar Ringan | ✗ | Baik | ✓ | Baik | ✓
## 32 | Tercemar Ringan | Tercemar Ringan | ✓ | Tercemar Ringan | ✓ | Tercemar Ringan | ✓
## 33 | Baik | Baik | ✓ | Baik | ✓ | Baik | ✓
## 34 | Tercemar Ringan | Tercemar Ringan | ✓ | Tercemar Ringan | ✓ | Tercemar Ringan | ✓
## 35 | Tercemar Ringan | Tercemar Ringan | ✓ | Tercemar Ringan | ✓ | Tercemar Ringan | ✓
## 36 | Tercemar Ringan | Tercemar Ringan | ✓ | Tercemar Ringan | ✓ | Tercemar Ringan | ✓
## 37 | Baik | Baik | ✓ | Baik | ✓ | Baik | ✓
## 38 | Tercemar Ringan | Tercemar Ringan | ✓ | Tercemar Ringan | ✓ | Tercemar Ringan | ✓
## 39 | Tercemar Ringan | Tercemar Ringan | ✓ | Tercemar Ringan | ✓ | Tercemar Ringan | ✓
## 40 | Tercemar Ringan | Tercemar Ringan | ✓ | Tercemar Ringan | ✓ | Tercemar Ringan | ✓
## 41 | Tercemar Berat | Tercemar Ringan | ✗ | Tercemar Ringan | ✗ | Tercemar Ringan | ✗
## 42 | Tercemar Ringan | Tercemar Ringan | ✓ | Tercemar Ringan | ✓ | Tercemar Ringan | ✓
## 43 | Tercemar Ringan | Tercemar Ringan | ✓ | Tercemar Ringan | ✓ | Tercemar Ringan | ✓
## 44 | Tercemar Ringan | Tercemar Ringan | ✓ | Tercemar Ringan | ✓ | Tercemar Ringan | ✓
## 45 | Tercemar Ringan | Tercemar Ringan | ✓ | Tercemar Ringan | ✓ | Tercemar Ringan | ✓
## 46 | Baik | Baik | ✓ | Baik | ✓ | Baik | ✓
## 47 | Tercemar Ringan | Tercemar Ringan | ✓ | Tercemar Ringan | ✓ | Tercemar Ringan | ✓
## 48 | Tercemar Ringan | Tercemar Ringan | ✓ | Tercemar Ringan | ✓ | Tercemar Ringan | ✓
## 49 | Baik | Baik | ✓ | Baik | ✓ | Baik | ✓
## 50 | Baik | Tercemar Ringan | ✗ | Baik | ✓ | Baik | ✓
## 51 | Tercemar Ringan | Tercemar Ringan | ✓ | Tercemar Ringan | ✓ | Tercemar Ringan | ✓
## 52 | Tercemar Ringan | Tercemar Ringan | ✓ | Tercemar Ringan | ✓ | Tercemar Ringan | ✓
## 53 | Baik | Baik | ✓ | Baik | ✓ | Baik | ✓
## 54 | Baik | Tercemar Ringan | ✗ | Baik | ✓ | Baik | ✓
## 55 | Tercemar Ringan | Tercemar Ringan | ✓ | Tercemar Ringan | ✓ | Tercemar Ringan | ✓
## 56 | Tercemar Ringan | Tercemar Ringan | ✓ | Tercemar Ringan | ✓ | Tercemar Ringan | ✓
## 57 | Tercemar Ringan | Tercemar Ringan | ✓ | Tercemar Ringan | ✓ | Tercemar Ringan | ✓
## 58 | Tercemar Ringan | Tercemar Ringan | ✓ | Tercemar Ringan | ✓ | Tercemar Ringan | ✓
## 59 | Tercemar Ringan | Tercemar Ringan | ✓ | Tercemar Ringan | ✓ | Tercemar Ringan | ✓
## 60 | Tercemar Ringan | Tercemar Ringan | ✓ | Tercemar Ringan | ✓ | Tercemar Ringan | ✓
## 61 | Baik | Tercemar Ringan | ✗ | Baik | ✓ | Baik | ✓
## 62 | Tercemar Ringan | Tercemar Ringan | ✓ | Tercemar Ringan | ✓ | Tercemar Ringan | ✓
## 63 | Tercemar Ringan | Tercemar Ringan | ✓ | Tercemar Ringan | ✓ | Tercemar Ringan | ✓
## 64 | Tercemar Ringan | Tercemar Ringan | ✓ | Tercemar Ringan | ✓ | Tercemar Ringan | ✓
## 65 | Tercemar Ringan | Tercemar Ringan | ✓ | Tercemar Ringan | ✓ | Tercemar Ringan | ✓
## 66 | Tercemar Ringan | Tercemar Ringan | ✓ | Tercemar Ringan | ✓ | Tercemar Ringan | ✓
## 67 | Tercemar Ringan | Tercemar Ringan | ✓ | Tercemar Ringan | ✓ | Tercemar Ringan | ✓
## 68 | Tercemar Berat | Tercemar Ringan | ✗ | Tercemar Ringan | ✗ | Tercemar Ringan | ✗
## 69 | Tercemar Ringan | Tercemar Ringan | ✓ | Tercemar Ringan | ✓ | Tercemar Ringan | ✓
## 70 | Baik | Baik | ✓ | Baik | ✓ | Baik | ✓
## 71 | Tercemar Ringan | Tercemar Ringan | ✓ | Tercemar Ringan | ✓ | Tercemar Ringan | ✓
## 72 | Tercemar Ringan | Tercemar Ringan | ✓ | Tercemar Ringan | ✓ | Tercemar Ringan | ✓
## 73 | Tercemar Ringan | Tercemar Ringan | ✓ | Tercemar Ringan | ✓ | Tercemar Ringan | ✓
## 74 | Tercemar Ringan | Tercemar Ringan | ✓ | Tercemar Ringan | ✓ | Tercemar Ringan | ✓
## 75 | Tercemar Ringan | Tercemar Ringan | ✓ | Tercemar Ringan | ✓ | Tercemar Ringan | ✓
cat(rep("=", 120), "\n", sep="")
## ========================================================================================================================
# Preview dalam format DataFrame
cat("\n\nPreview 75 baris pertama (format DataFrame):\n")
##
##
## Preview 75 baris pertama (format DataFrame):
print(head(hasil_ringkas_class, 75))
## No Status_Aktual Prediksi_SVM Benar_SVM Prediksi_DecisionTree Benar_DT
## 2 1 Tercemar Ringan Tercemar Ringan ✓ Tercemar Ringan ✓
## 3 2 Tercemar Ringan Tercemar Ringan ✓ Tercemar Ringan ✓
## 13 3 Baik Tercemar Ringan ✗ Baik ✓
## 15 4 Tercemar Ringan Tercemar Ringan ✓ Tercemar Ringan ✓
## 16 5 Baik Tercemar Ringan ✗ Tercemar Ringan ✗
## 20 6 Tercemar Ringan Tercemar Ringan ✓ Tercemar Ringan ✓
## 25 7 Tercemar Ringan Tercemar Ringan ✓ Tercemar Ringan ✓
## 27 8 Tercemar Ringan Tercemar Ringan ✓ Tercemar Ringan ✓
## 34 9 Baik Baik ✓ Baik ✓
## 38 10 Tercemar Ringan Tercemar Ringan ✓ Tercemar Ringan ✓
## 39 11 Tercemar Ringan Tercemar Ringan ✓ Tercemar Ringan ✓
## 42 12 Tercemar Ringan Tercemar Ringan ✓ Tercemar Ringan ✓
## 51 13 Tercemar Ringan Tercemar Ringan ✓ Tercemar Ringan ✓
## 52 14 Tercemar Ringan Tercemar Ringan ✓ Tercemar Ringan ✓
## 56 15 Baik Tercemar Ringan ✗ Tercemar Ringan ✗
## 59 16 Tercemar Ringan Tercemar Ringan ✓ Tercemar Ringan ✓
## 64 17 Tercemar Ringan Tercemar Ringan ✓ Tercemar Ringan ✓
## 67 18 Baik Tercemar Ringan ✗ Baik ✓
## 75 19 Baik Baik ✓ Baik ✓
## 80 20 Baik Tercemar Ringan ✗ Baik ✓
## 81 21 Tercemar Ringan Tercemar Ringan ✓ Tercemar Ringan ✓
## 90 22 Tercemar Ringan Tercemar Ringan ✓ Tercemar Ringan ✓
## 96 23 Tercemar Ringan Baik ✗ Tercemar Ringan ✓
## 98 24 Baik Baik ✓ Baik ✓
## 100 25 Tercemar Ringan Tercemar Ringan ✓ Tercemar Ringan ✓
## 103 26 Tercemar Ringan Tercemar Ringan ✓ Tercemar Ringan ✓
## 106 27 Tercemar Ringan Tercemar Ringan ✓ Tercemar Ringan ✓
## 109 28 Tercemar Ringan Tercemar Ringan ✓ Tercemar Ringan ✓
## 112 29 Tercemar Ringan Tercemar Ringan ✓ Tercemar Ringan ✓
## 114 30 Tercemar Ringan Tercemar Ringan ✓ Tercemar Ringan ✓
## 115 31 Baik Tercemar Ringan ✗ Baik ✓
## 116 32 Tercemar Ringan Tercemar Ringan ✓ Tercemar Ringan ✓
## 126 33 Baik Baik ✓ Baik ✓
## 131 34 Tercemar Ringan Tercemar Ringan ✓ Tercemar Ringan ✓
## 138 35 Tercemar Ringan Tercemar Ringan ✓ Tercemar Ringan ✓
## 139 36 Tercemar Ringan Tercemar Ringan ✓ Tercemar Ringan ✓
## 141 37 Baik Baik ✓ Baik ✓
## 142 38 Tercemar Ringan Tercemar Ringan ✓ Tercemar Ringan ✓
## 146 39 Tercemar Ringan Tercemar Ringan ✓ Tercemar Ringan ✓
## 149 40 Tercemar Ringan Tercemar Ringan ✓ Tercemar Ringan ✓
## 150 41 Tercemar Berat Tercemar Ringan ✗ Tercemar Ringan ✗
## 160 42 Tercemar Ringan Tercemar Ringan ✓ Tercemar Ringan ✓
## 165 43 Tercemar Ringan Tercemar Ringan ✓ Tercemar Ringan ✓
## 169 44 Tercemar Ringan Tercemar Ringan ✓ Tercemar Ringan ✓
## 171 45 Tercemar Ringan Tercemar Ringan ✓ Tercemar Ringan ✓
## 172 46 Baik Baik ✓ Baik ✓
## 173 47 Tercemar Ringan Tercemar Ringan ✓ Tercemar Ringan ✓
## 174 48 Tercemar Ringan Tercemar Ringan ✓ Tercemar Ringan ✓
## 177 49 Baik Baik ✓ Baik ✓
## 179 50 Baik Tercemar Ringan ✗ Baik ✓
## 180 51 Tercemar Ringan Tercemar Ringan ✓ Tercemar Ringan ✓
## 181 52 Tercemar Ringan Tercemar Ringan ✓ Tercemar Ringan ✓
## 184 53 Baik Baik ✓ Baik ✓
## 189 54 Baik Tercemar Ringan ✗ Baik ✓
## 193 55 Tercemar Ringan Tercemar Ringan ✓ Tercemar Ringan ✓
## 194 56 Tercemar Ringan Tercemar Ringan ✓ Tercemar Ringan ✓
## 198 57 Tercemar Ringan Tercemar Ringan ✓ Tercemar Ringan ✓
## 206 58 Tercemar Ringan Tercemar Ringan ✓ Tercemar Ringan ✓
## 213 59 Tercemar Ringan Tercemar Ringan ✓ Tercemar Ringan ✓
## 214 60 Tercemar Ringan Tercemar Ringan ✓ Tercemar Ringan ✓
## 218 61 Baik Tercemar Ringan ✗ Baik ✓
## 223 62 Tercemar Ringan Tercemar Ringan ✓ Tercemar Ringan ✓
## 225 63 Tercemar Ringan Tercemar Ringan ✓ Tercemar Ringan ✓
## 230 64 Tercemar Ringan Tercemar Ringan ✓ Tercemar Ringan ✓
## 234 65 Tercemar Ringan Tercemar Ringan ✓ Tercemar Ringan ✓
## 236 66 Tercemar Ringan Tercemar Ringan ✓ Tercemar Ringan ✓
## 237 67 Tercemar Ringan Tercemar Ringan ✓ Tercemar Ringan ✓
## 238 68 Tercemar Berat Tercemar Ringan ✗ Tercemar Ringan ✗
## 242 69 Tercemar Ringan Tercemar Ringan ✓ Tercemar Ringan ✓
## 243 70 Baik Baik ✓ Baik ✓
## 245 71 Tercemar Ringan Tercemar Ringan ✓ Tercemar Ringan ✓
## 246 72 Tercemar Ringan Tercemar Ringan ✓ Tercemar Ringan ✓
## 248 73 Tercemar Ringan Tercemar Ringan ✓ Tercemar Ringan ✓
## 250 74 Tercemar Ringan Tercemar Ringan ✓ Tercemar Ringan ✓
## 254 75 Tercemar Ringan Tercemar Ringan ✓ Tercemar Ringan ✓
## Prediksi_RandomForest Benar_RF
## 2 Tercemar Ringan ✓
## 3 Tercemar Ringan ✓
## 13 Baik ✓
## 15 Tercemar Ringan ✓
## 16 Tercemar Ringan ✗
## 20 Tercemar Ringan ✓
## 25 Tercemar Ringan ✓
## 27 Tercemar Ringan ✓
## 34 Baik ✓
## 38 Tercemar Ringan ✓
## 39 Tercemar Ringan ✓
## 42 Tercemar Ringan ✓
## 51 Tercemar Ringan ✓
## 52 Tercemar Ringan ✓
## 56 Tercemar Ringan ✗
## 59 Tercemar Ringan ✓
## 64 Tercemar Ringan ✓
## 67 Baik ✓
## 75 Baik ✓
## 80 Baik ✓
## 81 Tercemar Ringan ✓
## 90 Tercemar Ringan ✓
## 96 Tercemar Ringan ✓
## 98 Baik ✓
## 100 Tercemar Ringan ✓
## 103 Tercemar Ringan ✓
## 106 Tercemar Ringan ✓
## 109 Tercemar Ringan ✓
## 112 Tercemar Ringan ✓
## 114 Tercemar Ringan ✓
## 115 Baik ✓
## 116 Tercemar Ringan ✓
## 126 Baik ✓
## 131 Tercemar Ringan ✓
## 138 Tercemar Ringan ✓
## 139 Tercemar Ringan ✓
## 141 Baik ✓
## 142 Tercemar Ringan ✓
## 146 Tercemar Ringan ✓
## 149 Tercemar Ringan ✓
## 150 Tercemar Ringan ✗
## 160 Tercemar Ringan ✓
## 165 Tercemar Ringan ✓
## 169 Tercemar Ringan ✓
## 171 Tercemar Ringan ✓
## 172 Baik ✓
## 173 Tercemar Ringan ✓
## 174 Tercemar Ringan ✓
## 177 Baik ✓
## 179 Baik ✓
## 180 Tercemar Ringan ✓
## 181 Tercemar Ringan ✓
## 184 Baik ✓
## 189 Baik ✓
## 193 Tercemar Ringan ✓
## 194 Tercemar Ringan ✓
## 198 Tercemar Ringan ✓
## 206 Tercemar Ringan ✓
## 213 Tercemar Ringan ✓
## 214 Tercemar Ringan ✓
## 218 Baik ✓
## 223 Tercemar Ringan ✓
## 225 Tercemar Ringan ✓
## 230 Tercemar Ringan ✓
## 234 Tercemar Ringan ✓
## 236 Tercemar Ringan ✓
## 237 Tercemar Ringan ✓
## 238 Tercemar Ringan ✗
## 242 Tercemar Ringan ✓
## 243 Baik ✓
## 245 Tercemar Ringan ✓
## 246 Tercemar Ringan ✓
## 248 Tercemar Ringan ✓
## 250 Tercemar Ringan ✓
## 254 Tercemar Ringan ✓
# Simpan ke CSV
write.csv(hasil_klasifikasi, "hasil_prediksi_klasifikasi.csv", row.names = FALSE)
cat("\n✓ Seluruh hasil prediksi klasifikasi (", nrow(hasil_klasifikasi),
" baris) disimpan ke 'hasil_prediksi_klasifikasi.csv'\n")
##
## ✓ Seluruh hasil prediksi klasifikasi ( 89 baris) disimpan ke 'hasil_prediksi_klasifikasi.csv'
# Statistik Akurasi per Model
cat("\n=== STATISTIK AKURASI PER MODEL ===\n")
##
## === STATISTIK AKURASI PER MODEL ===
akurasi_stats <- data.frame(
Model = c("SVM", "Decision Tree", "Random Forest"),
Benar = c(sum(svm_pred == test_data$Status),
sum(dt_pred == test_data$Status),
sum(rf_pred == test_data$Status)),
Salah = c(sum(svm_pred != test_data$Status),
sum(dt_pred != test_data$Status),
sum(rf_pred != test_data$Status)),
Akurasi_Persen = c(mean(svm_pred == test_data$Status) * 100,
mean(dt_pred == test_data$Status) * 100,
mean(rf_pred == test_data$Status) * 100)
)
akurasi_stats$Akurasi_Persen <- round(akurasi_stats$Akurasi_Persen, 2)
print(akurasi_stats)
## Model Benar Salah Akurasi_Persen
## 1 SVM 76 13 85.39
## 2 Decision Tree 83 6 93.26
## 3 Random Forest 83 6 93.26
# Identifikasi prediksi yang salah
cat("\n=== SAMPEL PREDIKSI YANG SALAH (10 Baris Pertama) ===\n")
##
## === SAMPEL PREDIKSI YANG SALAH (10 Baris Pertama) ===
prediksi_salah <- hasil_klasifikasi[hasil_klasifikasi$Benar_RF == "✗",
c("No", "pH", "DO", "BOD", "Status_Aktual",
"Prediksi_RandomForest")]
if(nrow(prediksi_salah) > 0) {
print(head(prediksi_salah, 10))
} else {
cat("Tidak ada prediksi yang salah!\n")
}
## No pH DO BOD Status_Aktual Prediksi_RandomForest
## 16 5 7.318000 5.990900 2.655000 Baik Tercemar Ringan
## 56 15 7.138300 5.990900 2.752800 Baik Tercemar Ringan
## 150 41 6.471800 7.605044 4.469626 Tercemar Berat Tercemar Ringan
## 238 68 6.988600 7.605044 4.469626 Tercemar Berat Tercemar Ringan
## 269 78 6.111302 6.664400 3.067500 Tercemar Ringan Baik
## 280 83 7.602200 5.990900 2.178800 Baik Tercemar Ringan
cat("\nINTERPRETASI HASIL KLASIFIKASI:\n")
##
## INTERPRETASI HASIL KLASIFIKASI:
cat("1. Akurasi menunjukkan persentase prediksi yang benar\n")
## 1. Akurasi menunjukkan persentase prediksi yang benar
cat("2. Kappa mengukur agreement antara prediksi dan aktual (0-1, semakin tinggi semakin baik)\n")
## 2. Kappa mengukur agreement antara prediksi dan aktual (0-1, semakin tinggi semakin baik)
cat("3. Confusion matrix menunjukkan detail prediksi benar/salah per kelas\n")
## 3. Confusion matrix menunjukkan detail prediksi benar/salah per kelas
cat("4. Random Forest umumnya memberikan performa terbaik karena ensemble method\n")
## 4. Random Forest umumnya memberikan performa terbaik karena ensemble method
## ===============================================================================
# SOAL 3: PREDIKSI VARIABEL DO (35%)
# ===============================================================================
cat("\n\n==================== SOAL 3: PREDIKSI DO ====================\n")
##
##
## ==================== SOAL 3: PREDIKSI DO ====================
# 3.1 Persiapan Data untuk Regresi
# ------------------------------------------------------------------------------
cat("--- 3.1 PERSIAPAN DATA REGRESI ---\n")
## --- 3.1 PERSIAPAN DATA REGRESI ---
data_reg <- data_cleaned %>%
select(pH, BOD, TSS, Suhu, DO) %>%
na.omit()
cat("Dimensi data regresi:", nrow(data_reg), "x", ncol(data_reg), "\n")
## Dimensi data regresi: 923 x 5
cat("Target: DO (Dissolved Oxygen)\n")
## Target: DO (Dissolved Oxygen)
cat("Prediktor: pH, BOD, TSS, Suhu\n\n")
## Prediktor: pH, BOD, TSS, Suhu
# Split data
set.seed(123)
train_idx <- sample(1:nrow(data_reg), 0.7 * nrow(data_reg))
train_reg <- data_reg[train_idx, ]
test_reg <- data_reg[-train_idx, ]
cat("Data training:", nrow(train_reg), "sampel\n")
## Data training: 646 sampel
cat("Data testing:", nrow(test_reg), "sampel\n\n")
## Data testing: 277 sampel
# 3.2 Model 1: Regresi Linear
# ------------------------------------------------------------------------------
cat("--- 3.2 MODEL 1: REGRESI LINEAR ---\n")
## --- 3.2 MODEL 1: REGRESI LINEAR ---
lm_model <- lm(DO ~ pH + BOD + TSS + Suhu, data = train_reg)
cat("Summary Model Regresi Linear:\n")
## Summary Model Regresi Linear:
print(summary(lm_model))
##
## Call:
## lm(formula = DO ~ pH + BOD + TSS + Suhu, data = train_reg)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.02762 -0.00475 -0.00475 -0.00475 1.72510
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.969695 0.773042 9.016 <2e-16 ***
## pH -0.080589 0.072288 -1.115 0.2653
## BOD 0.079226 0.044718 1.772 0.0769 .
## TSS -0.002892 0.003896 -0.742 0.4582
## Suhu -0.018226 0.017404 -1.047 0.2954
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4961 on 641 degrees of freedom
## Multiple R-squared: 0.008897, Adjusted R-squared: 0.002712
## F-statistic: 1.438 on 4 and 641 DF, p-value: 0.2197
cat("\n")
# Prediksi
lm_pred <- predict(lm_model, test_reg)
# Evaluasi
lm_r2 <- cor(lm_pred, test_reg$DO)^2
lm_mse <- mean((lm_pred - test_reg$DO)^2)
lm_rmse <- sqrt(lm_mse)
cat("Evaluasi Regresi Linear:\n")
## Evaluasi Regresi Linear:
cat("R² (Test):", round(lm_r2, 4), "\n")
## R² (Test): 0.0046
cat("MSE:", round(lm_mse, 4), "\n")
## MSE: 0.3079
cat("RMSE:", round(lm_rmse, 4), "\n\n")
## RMSE: 0.5549
# 3.3 Model 2: Regresi Spline
# ------------------------------------------------------------------------------
cat("--- 3.3 MODEL 2: REGRESI SPLINE ---\n")
## --- 3.3 MODEL 2: REGRESI SPLINE ---
cat("Menggunakan natural cubic spline dengan df=4 untuk setiap prediktor\n\n")
## Menggunakan natural cubic spline dengan df=4 untuk setiap prediktor
library(splines)
spline_model <- lm(DO ~ ns(pH, df=4) + ns(BOD, df=4) +
ns(TSS, df=4) + ns(Suhu, df=4),
data = train_reg)
cat("Summary Model Regresi Spline:\n")
## Summary Model Regresi Spline:
print(summary(spline_model))
##
## Call:
## lm(formula = DO ~ ns(pH, df = 4) + ns(BOD, df = 4) + ns(TSS,
## df = 4) + ns(Suhu, df = 4), data = train_reg)
##
## Residuals:
## Min 1Q Median 3Q Max
## -1.99657 -0.00444 -0.00444 -0.00444 1.93941
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 5.81052 0.28292 20.538 <2e-16 ***
## ns(pH, df = 4)1 0.29343 0.14613 2.008 0.0451 *
## ns(pH, df = 4)2 -0.32736 0.16260 -2.013 0.0445 *
## ns(pH, df = 4)3 0.12921 0.36349 0.355 0.7224
## ns(pH, df = 4)4 0.02394 0.15303 0.156 0.8757
## ns(BOD, df = 4)1 0.18995 0.14408 1.318 0.1879
## ns(BOD, df = 4)2 0.21443 0.16652 1.288 0.1983
## ns(BOD, df = 4)3 0.73057 0.35275 2.071 0.0388 *
## ns(BOD, df = 4)4 -0.02372 0.16116 -0.147 0.8830
## ns(TSS, df = 4)1 0.03636 0.15433 0.236 0.8138
## ns(TSS, df = 4)2 -0.02243 0.15563 -0.144 0.8855
## ns(TSS, df = 4)3 0.34320 0.37237 0.922 0.3571
## ns(TSS, df = 4)4 0.02929 0.15799 0.185 0.8530
## ns(Suhu, df = 4)1 -0.33493 0.15021 -2.230 0.0261 *
## ns(Suhu, df = 4)2 -0.27577 0.15453 -1.785 0.0748 .
## ns(Suhu, df = 4)3 -0.58442 0.39302 -1.487 0.1375
## ns(Suhu, df = 4)4 0.04031 0.15741 0.256 0.7980
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4928 on 629 degrees of freedom
## Multiple R-squared: 0.04061, Adjusted R-squared: 0.01621
## F-statistic: 1.664 on 16 and 629 DF, p-value: 0.04907
cat("\n")
# Prediksi
spline_pred <- predict(spline_model, test_reg)
# Evaluasi
spline_r2 <- cor(spline_pred, test_reg$DO)^2
spline_mse <- mean((spline_pred - test_reg$DO)^2)
spline_rmse <- sqrt(spline_mse)
cat("Evaluasi Regresi Spline:\n")
## Evaluasi Regresi Spline:
cat("R² (Test):", round(spline_r2, 4), "\n")
## R² (Test): 0.0036
cat("MSE:", round(spline_mse, 4), "\n")
## MSE: 0.3169
cat("RMSE:", round(spline_rmse, 4), "\n\n")
## RMSE: 0.5629
# 3.4 Perbandingan Model Regresi
# ------------------------------------------------------------------------------
cat("--- 3.4 PERBANDINGAN MODEL REGRESI ---\n\n")
## --- 3.4 PERBANDINGAN MODEL REGRESI ---
reg_comparison <- data.frame(
Model = c("Regresi Linear", "Regresi Spline"),
R_squared = c(lm_r2, spline_r2),
MSE = c(lm_mse, spline_mse),
RMSE = c(lm_rmse, spline_rmse)
)
reg_comparison[, 2:4] <- round(reg_comparison[, 2:4], 4)
print(reg_comparison)
## Model R_squared MSE RMSE
## 1 Regresi Linear 0.0046 0.3079 0.5549
## 2 Regresi Spline 0.0036 0.3169 0.5629
best_reg <- reg_comparison$Model[which.max(reg_comparison$R_squared)]
cat("\n** Model regresi terbaik:", best_reg, "**\n\n")
##
## ** Model regresi terbaik: Regresi Linear **
# 3.5 Visualisasi Prediksi vs Aktual
# ------------------------------------------------------------------------------
cat("--- 3.5 VISUALISASI HASIL PREDIKSI ---\n")
## --- 3.5 VISUALISASI HASIL PREDIKSI ---
# Combine results
results_viz <- data.frame(
Actual = test_reg$DO,
Linear = lm_pred,
Spline = spline_pred
)
# Plot 1: Linear Regression
p1 <- ggplot(results_viz, aes(x = Actual, y = Linear)) +
geom_point(color = "steelblue", alpha = 0.6, size = 2) +
geom_abline(intercept = 0, slope = 1, color = "red", linetype = "dashed", linewidth = 1) +
labs(title = "Regresi Linear: Prediksi vs Aktual",
x = "DO Aktual", y = "DO Prediksi") +
annotate("text", x = min(results_viz$Actual), y = max(results_viz$Linear),
label = paste0("R² = ", round(lm_r2, 4), "\nRMSE = ", round(lm_rmse, 4)),
hjust = 0, vjust = 1, size = 4) +
theme_minimal() +
theme(plot.title = element_text(face = "bold"))
# Plot 2: Spline Regression
p2 <- ggplot(results_viz, aes(x = Actual, y = Spline)) +
geom_point(color = "darkgreen", alpha = 0.6, size = 2) +
geom_abline(intercept = 0, slope = 1, color = "red", linetype = "dashed", linewidth = 1) +
labs(title = "Regresi Spline: Prediksi vs Aktual",
x = "DO Aktual", y = "DO Prediksi") +
annotate("text", x = min(results_viz$Actual), y = max(results_viz$Spline),
label = paste0("R² = ", round(spline_r2, 4), "\nRMSE = ", round(spline_rmse, 4)),
hjust = 0, vjust = 1, size = 4) +
theme_minimal() +
theme(plot.title = element_text(face = "bold"))
# Plot 3: Residual Analysis
results_viz$Linear_Residual <- results_viz$Actual - results_viz$Linear
results_viz$Spline_Residual <- results_viz$Actual - results_viz$Spline
p3 <- ggplot(results_viz, aes(x = Linear, y = Linear_Residual)) +
geom_point(color = "steelblue", alpha = 0.6) +
geom_hline(yintercept = 0, color = "red", linetype = "dashed") +
labs(title = "Residual Plot - Regresi Linear",
x = "Prediksi", y = "Residual") +
theme_minimal()
p4 <- ggplot(results_viz, aes(x = Spline, y = Spline_Residual)) +
geom_point(color = "darkgreen", alpha = 0.6) +
geom_hline(yintercept = 0, color = "red", linetype = "dashed") +
labs(title = "Residual Plot - Regresi Spline",
x = "Prediksi", y = "Residual") +
theme_minimal()
cat("Visualisasi dibuat. Gunakan grid.arrange untuk menampilkan:\n")
## Visualisasi dibuat. Gunakan grid.arrange untuk menampilkan:
cat("grid.arrange(p1, p2, p3, p4, ncol=2)\n\n")
## grid.arrange(p1, p2, p3, p4, ncol=2)
# 3.6 Output 75 Baris Hasil Prediksi
# ------------------------------------------------------------------------------
cat("--- 3.6 OUTPUT 75 BARIS HASIL PREDIKSI ---\n\n")
## --- 3.6 OUTPUT 75 BARIS HASIL PREDIKSI ---
# Gabungkan hasil prediksi dengan informasi lengkap
hasil_prediksi <- data.frame(
No = 1:nrow(test_reg),
pH = test_reg$pH,
BOD = test_reg$BOD,
TSS = test_reg$TSS,
Suhu = test_reg$Suhu,
DO_Aktual = test_reg$DO,
DO_Prediksi_Linear = lm_pred,
DO_Prediksi_Spline = spline_pred,
Error_Linear = test_reg$DO - lm_pred,
Error_Spline = test_reg$DO - spline_pred,
Abs_Error_Linear = abs(test_reg$DO - lm_pred),
Abs_Error_Spline = abs(test_reg$DO - spline_pred)
)
# Bulatkan angka
hasil_prediksi[, 2:12] <- round(hasil_prediksi[, 2:12], 4)
# Tampilkan 75 baris pertama (atau semua jika kurang dari 75)
n_show <- min(75, nrow(hasil_prediksi))
cat("Menampilkan", n_show, "baris dari", nrow(hasil_prediksi), "total data testing:\n\n")
## Menampilkan 75 baris dari 277 total data testing:
# Tampilkan versi ringkas
cat("=== RINGKASAN HASIL PREDIKSI (Kolom Utama) ===\n")
## === RINGKASAN HASIL PREDIKSI (Kolom Utama) ===
hasil_ringkas <- hasil_prediksi[1:n_show, c("No", "DO_Aktual", "DO_Prediksi_Linear",
"DO_Prediksi_Spline", "Error_Linear", "Error_Spline")]
# Tampilkan semua baris (75 atau lebih)
cat("\nTABEL: Perbandingan Prediksi DO (Linear vs Spline)\n")
##
## TABEL: Perbandingan Prediksi DO (Linear vs Spline)
cat(rep("=", 90), "\n", sep="")
## ==========================================================================================
cat(sprintf("%-4s | %-10s | %-10s | %-10s | %-10s | %-10s\n",
"No", "DO_Aktual", "Pred_Linear", "Pred_Spline", "Err_Linear", "Err_Spline"))
## No | DO_Aktual | Pred_Linear | Pred_Spline | Err_Linear | Err_Spline
cat(rep("-", 90), "\n", sep="")
## ------------------------------------------------------------------------------------------
for(i in 1:n_show) {
cat(sprintf("%-4d | %10.4f | %10.4f | %10.4f | %10.4f | %10.4f\n",
hasil_ringkas$No[i],
hasil_ringkas$DO_Aktual[i],
hasil_ringkas$DO_Prediksi_Linear[i],
hasil_ringkas$DO_Prediksi_Spline[i],
hasil_ringkas$Error_Linear[i],
hasil_ringkas$Error_Spline[i]))
}
## 1 | 5.9909 | 5.8729 | 5.7930 | 0.1180 | 0.1979
## 2 | 4.8906 | 5.9895 | 5.9743 | -1.0989 | -1.0837
## 3 | 6.1339 | 5.9696 | 5.8028 | 0.1643 | 0.3311
## 4 | 4.9232 | 5.9037 | 5.7398 | -0.9805 | -0.8166
## 5 | 7.3885 | 5.8411 | 5.9604 | 1.5474 | 1.4281
## 6 | 5.6952 | 5.8349 | 6.0091 | -0.1397 | -0.3139
## 7 | 6.6876 | 6.0440 | 6.1994 | 0.6436 | 0.4882
## 8 | 5.6981 | 5.8970 | 6.1743 | -0.1989 | -0.4762
## 9 | 6.4983 | 6.0404 | 6.0423 | 0.4579 | 0.4560
## 10 | 6.4420 | 5.9361 | 6.1147 | 0.5059 | 0.3273
## 11 | 6.9310 | 5.9480 | 6.0498 | 0.9830 | 0.8812
## 12 | 5.1307 | 5.9566 | 6.0445 | -0.8259 | -0.9138
## 13 | 5.9909 | 6.0761 | 5.8530 | -0.0852 | 0.1379
## 14 | 5.9976 | 5.9730 | 5.9354 | 0.0246 | 0.0622
## 15 | 4.0908 | 6.0586 | 5.7733 | -1.9678 | -1.6825
## 16 | 4.0785 | 6.0533 | 5.9084 | -1.9748 | -1.8299
## 17 | 5.9909 | 6.1077 | 6.2335 | -0.1168 | -0.2426
## 18 | 5.3681 | 6.0664 | 6.1549 | -0.6983 | -0.7868
## 19 | 4.5465 | 5.9111 | 5.7007 | -1.3646 | -1.1542
## 20 | 6.8883 | 5.9633 | 6.0891 | 0.9250 | 0.7992
## 21 | 5.5292 | 6.0341 | 6.1015 | -0.5049 | -0.5723
## 22 | 5.9595 | 5.9873 | 5.8698 | -0.0278 | 0.0897
## 23 | 4.0785 | 6.0931 | 5.8767 | -2.0146 | -1.7983
## 24 | 5.7499 | 6.0421 | 5.9031 | -0.2922 | -0.1532
## 25 | 7.3579 | 5.8181 | 5.8576 | 1.5398 | 1.5003
## 26 | 6.9507 | 6.0365 | 5.6722 | 0.9142 | 1.2785
## 27 | 5.7006 | 5.9457 | 5.8517 | -0.2451 | -0.1511
## 28 | 6.5461 | 5.9891 | 6.0706 | 0.5570 | 0.4755
## 29 | 5.7491 | 5.9095 | 5.7701 | -0.1604 | -0.0210
## 30 | 7.6050 | 6.1224 | 6.0961 | 1.4826 | 1.5089
## 31 | 6.8144 | 6.0025 | 5.7663 | 0.8119 | 1.0481
## 32 | 7.3091 | 5.8868 | 6.1362 | 1.4223 | 1.1729
## 33 | 6.7504 | 5.9858 | 5.6929 | 0.7646 | 1.0575
## 34 | 4.5419 | 6.0044 | 6.0867 | -1.4625 | -1.5448
## 35 | 4.0785 | 5.8802 | 6.2133 | -1.8017 | -2.1348
## 36 | 5.6088 | 5.8967 | 5.8602 | -0.2879 | -0.2514
## 37 | 5.5088 | 5.9145 | 5.9768 | -0.4057 | -0.4680
## 38 | 5.7164 | 6.0048 | 5.8262 | -0.2884 | -0.1098
## 39 | 4.0785 | 6.0540 | 6.0538 | -1.9755 | -1.9753
## 40 | 4.5303 | 5.9648 | 6.1223 | -1.4345 | -1.5920
## 41 | 5.4644 | 5.9912 | 5.9599 | -0.5268 | -0.4955
## 42 | 4.8107 | 6.0089 | 6.2992 | -1.1982 | -1.4885
## 43 | 7.0291 | 6.0065 | 6.0737 | 1.0226 | 0.9554
## 44 | 5.0330 | 6.1597 | 5.9960 | -1.1267 | -0.9630
## 45 | 7.6050 | 6.0496 | 6.2902 | 1.5554 | 1.3148
## 46 | 4.2802 | 6.0528 | 5.9356 | -1.7726 | -1.6554
## 47 | 7.5333 | 5.9690 | 6.3143 | 1.5643 | 1.2190
## 48 | 5.9618 | 5.8528 | 6.1898 | 0.1090 | -0.2280
## 49 | 7.5974 | 5.9410 | 5.6872 | 1.6564 | 1.9102
## 50 | 5.6664 | 6.0249 | 6.1856 | -0.3585 | -0.5192
## 51 | 6.2242 | 6.1088 | 5.9045 | 0.1154 | 0.3197
## 52 | 7.6050 | 6.2021 | 5.7891 | 1.4030 | 1.8159
## 53 | 5.9909 | 5.9787 | 6.1244 | 0.0122 | -0.1335
## 54 | 4.8782 | 5.9999 | 6.0667 | -1.1217 | -1.1885
## 55 | 6.7631 | 5.9885 | 6.1425 | 0.7746 | 0.6206
## 56 | 5.3754 | 5.9308 | 5.7324 | -0.5554 | -0.3570
## 57 | 4.4387 | 6.1225 | 6.2937 | -1.6838 | -1.8550
## 58 | 7.4928 | 6.0930 | 6.2703 | 1.3998 | 1.2225
## 59 | 4.4745 | 6.0732 | 6.1666 | -1.5987 | -1.6921
## 60 | 6.5410 | 5.9597 | 6.0380 | 0.5813 | 0.5030
## 61 | 5.4290 | 5.9612 | 5.9355 | -0.5322 | -0.5065
## 62 | 5.0687 | 5.9351 | 5.9036 | -0.8664 | -0.8349
## 63 | 4.2627 | 6.1007 | 6.0455 | -1.8380 | -1.7828
## 64 | 4.6536 | 5.9198 | 6.0485 | -1.2662 | -1.3949
## 65 | 5.8064 | 5.9363 | 6.1719 | -0.1299 | -0.3655
## 66 | 5.9977 | 5.9716 | 5.7765 | 0.0261 | 0.2212
## 67 | 5.9872 | 6.0606 | 5.9758 | -0.0734 | 0.0114
## 68 | 6.8639 | 6.1331 | 6.0210 | 0.7308 | 0.8429
## 69 | 6.2066 | 6.1583 | 5.8447 | 0.0483 | 0.3619
## 70 | 6.7475 | 5.8951 | 5.7533 | 0.8524 | 0.9942
## 71 | 5.3734 | 6.1706 | 6.1061 | -0.7972 | -0.7327
## 72 | 6.3952 | 5.8511 | 5.6622 | 0.5441 | 0.7330
## 73 | 6.4521 | 6.0866 | 5.9689 | 0.3655 | 0.4832
## 74 | 6.3680 | 5.9503 | 6.0019 | 0.4177 | 0.3661
## 75 | 6.5744 | 6.0535 | 5.9659 | 0.5209 | 0.6085
cat(rep("=", 90), "\n", sep="")
## ==========================================================================================
# Jika user ingin melihat di format dataframe biasa
cat("\n\nFormat DataFrame (20 baris pertama sebagai preview):\n")
##
##
## Format DataFrame (20 baris pertama sebagai preview):
print(head(hasil_ringkas, 20))
## No DO_Aktual DO_Prediksi_Linear DO_Prediksi_Spline Error_Linear Error_Spline
## 1 1 5.9909 5.8729 5.7930 0.1180 0.1979
## 3 2 4.8906 5.9895 5.9743 -1.0989 -1.0837
## 4 3 6.1339 5.9696 5.8028 0.1643 0.3311
## 7 4 4.9232 5.9037 5.7398 -0.9805 -0.8166
## 9 5 7.3885 5.8411 5.9604 1.5474 1.4281
## 12 6 5.6952 5.8349 6.0091 -0.1397 -0.3139
## 15 7 6.6876 6.0440 6.1994 0.6436 0.4882
## 17 8 5.6981 5.8970 6.1743 -0.1989 -0.4762
## 18 9 6.4983 6.0404 6.0423 0.4579 0.4560
## 22 10 6.4420 5.9361 6.1147 0.5059 0.3273
## 25 11 6.9310 5.9480 6.0498 0.9830 0.8812
## 27 12 5.1307 5.9566 6.0445 -0.8259 -0.9138
## 28 13 5.9909 6.0761 5.8530 -0.0852 0.1379
## 32 14 5.9976 5.9730 5.9354 0.0246 0.0622
## 35 15 4.0908 6.0586 5.7733 -1.9678 -1.6825
## 42 16 4.0785 6.0533 5.9084 -1.9748 -1.8299
## 43 17 5.9909 6.1077 6.2335 -0.1168 -0.2426
## 44 18 5.3681 6.0664 6.1549 -0.6983 -0.7868
## 47 19 4.5465 5.9111 5.7007 -1.3646 -1.1542
## 50 20 6.8883 5.9633 6.0891 0.9250 0.7992
cat("\n\n=== HASIL PREDIKSI LENGKAP (Semua Kolom) ===\n")
##
##
## === HASIL PREDIKSI LENGKAP (Semua Kolom) ===
cat("Preview 20 baris pertama (lihat file CSV untuk data lengkap):\n\n")
## Preview 20 baris pertama (lihat file CSV untuk data lengkap):
print(head(hasil_prediksi, 20))
## No pH BOD TSS Suhu DO_Aktual DO_Prediksi_Linear
## 1 1 7.6855 1.7136 43.1415 26.7972 5.9909 5.8729
## 3 2 7.1816 2.7274 49.5220 26.0255 4.8906 5.9895
## 4 3 7.3164 3.1398 41.0104 29.6639 6.1339 5.9696
## 7 4 7.7558 3.0661 49.0343 29.7409 4.9232 5.9037
## 9 5 7.9205 3.1318 65.9657 30.0458 7.3885 5.8411
## 12 6 7.9205 2.9847 57.9228 31.0234 5.6952 5.8349
## 15 7 6.9333 3.5953 55.9099 26.8932 6.6876 6.0440
## 17 8 6.8579 2.6004 60.6330 30.2128 5.6981 5.8970
## 18 9 6.1113 2.3079 54.5392 25.3417 6.4983 6.0404
## 22 10 6.1113 2.2249 45.1541 32.1927 6.4420 5.9361
## 25 11 7.9205 3.7265 45.8653 29.9575 6.9310 5.9480
## 27 12 6.8714 3.1432 60.6298 29.2436 5.1307 5.9566
## 28 13 6.1184 3.2784 56.2035 27.3108 5.9909 6.0761
## 32 14 7.3524 3.5403 67.5256 26.8528 5.9976 5.9730
## 35 15 7.2525 3.6127 31.7142 28.5909 4.0908 6.0586
## 42 16 6.8195 3.1292 31.7142 28.6953 4.0785 6.0533
## 43 17 7.3791 3.7449 41.9935 24.2806 5.9909 6.1077
## 44 18 6.6366 2.9520 44.2438 26.0276 5.3681 6.0664
## 47 19 6.5943 1.4021 49.5220 27.1593 4.5465 5.9111
## 50 20 7.3278 2.1734 50.3023 24.2806 6.8883 5.9633
## DO_Prediksi_Spline Error_Linear Error_Spline Abs_Error_Linear
## 1 5.7930 0.1180 0.1979 0.1180
## 3 5.9743 -1.0989 -1.0837 1.0989
## 4 5.8028 0.1643 0.3311 0.1643
## 7 5.7398 -0.9805 -0.8166 0.9805
## 9 5.9604 1.5474 1.4281 1.5474
## 12 6.0091 -0.1397 -0.3139 0.1397
## 15 6.1994 0.6436 0.4882 0.6436
## 17 6.1743 -0.1989 -0.4762 0.1989
## 18 6.0423 0.4579 0.4560 0.4579
## 22 6.1147 0.5059 0.3273 0.5059
## 25 6.0498 0.9830 0.8812 0.9830
## 27 6.0445 -0.8259 -0.9138 0.8259
## 28 5.8530 -0.0852 0.1379 0.0852
## 32 5.9354 0.0246 0.0622 0.0246
## 35 5.7733 -1.9678 -1.6825 1.9678
## 42 5.9084 -1.9748 -1.8299 1.9748
## 43 6.2335 -0.1168 -0.2426 0.1168
## 44 6.1549 -0.6983 -0.7868 0.6983
## 47 5.7007 -1.3646 -1.1542 1.3646
## 50 6.0891 0.9250 0.7992 0.9250
## Abs_Error_Spline
## 1 0.1979
## 3 1.0837
## 4 0.3311
## 7 0.8166
## 9 1.4281
## 12 0.3139
## 15 0.4882
## 17 0.4762
## 18 0.4560
## 22 0.3273
## 25 0.8812
## 27 0.9138
## 28 0.1379
## 32 0.0622
## 35 1.6825
## 42 1.8299
## 43 0.2426
## 44 0.7868
## 47 1.1542
## 50 0.7992
# Simpan hasil ke CSV untuk referensi lengkap
write.csv(hasil_prediksi, "hasil_prediksi_DO.csv", row.names = FALSE)
cat("\n✓ Seluruh hasil prediksi (", nrow(hasil_prediksi), " baris) disimpan ke 'hasil_prediksi_DO.csv'\n")
##
## ✓ Seluruh hasil prediksi ( 277 baris) disimpan ke 'hasil_prediksi_DO.csv'
cat(" Buka file CSV untuk melihat 75+ baris prediksi lengkap dengan semua variabel input!\n")
## Buka file CSV untuk melihat 75+ baris prediksi lengkap dengan semua variabel input!
# Opsi: Tampilkan SEMUA 75 baris dalam format compact
cat("\n\n=== OPSI: TAMPILKAN SEMUA", n_show, "BARIS ===\n")
##
##
## === OPSI: TAMPILKAN SEMUA 75 BARIS ===
cat("Uncomment baris berikut untuk print semua 75 baris:\n")
## Uncomment baris berikut untuk print semua 75 baris:
cat("# print(hasil_ringkas)\n")
## # print(hasil_ringkas)
cat("# View(hasil_prediksi) # Untuk RStudio\n\n")
## # View(hasil_prediksi) # Untuk RStudio
# Untuk benar-benar menampilkan semua (uncomment jika diinginkan)
# print(hasil_ringkas, n = n_show) # Menampilkan semua n_show baris
# Statistik Error
cat("\n=== STATISTIK ERROR PREDIKSI ===\n")
##
## === STATISTIK ERROR PREDIKSI ===
error_stats <- data.frame(
Model = c("Regresi Linear", "Regresi Spline"),
Mean_Error = c(mean(hasil_prediksi$Error_Linear), mean(hasil_prediksi$Error_Spline)),
Mean_Abs_Error = c(mean(hasil_prediksi$Abs_Error_Linear), mean(hasil_prediksi$Abs_Error_Spline)),
Max_Error = c(max(hasil_prediksi$Abs_Error_Linear), max(hasil_prediksi$Abs_Error_Spline)),
Min_Error = c(min(hasil_prediksi$Abs_Error_Linear), min(hasil_prediksi$Abs_Error_Spline))
)
error_stats[, 2:5] <- round(error_stats[, 2:5], 4)
print(error_stats)
## Model Mean_Error Mean_Abs_Error Max_Error Min_Error
## 1 Regresi Linear -0.0307 0.2487 2.0146 0.0047
## 2 Regresi Spline -0.0272 0.2561 2.1348 0.0044
# Identifikasi prediksi dengan error terbesar
cat("\n=== 10 PREDIKSI DENGAN ERROR TERBESAR (Linear) ===\n")
##
## === 10 PREDIKSI DENGAN ERROR TERBESAR (Linear) ===
worst_linear <- hasil_prediksi[order(hasil_prediksi$Abs_Error_Linear, decreasing = TRUE), ][1:10,
c("No", "DO_Aktual", "DO_Prediksi_Linear", "Error_Linear", "Abs_Error_Linear")]
print(worst_linear)
## No DO_Aktual DO_Prediksi_Linear Error_Linear Abs_Error_Linear
## 62 23 4.0785 6.0931 -2.0146 2.0146
## 272 78 4.0785 6.0608 -1.9823 1.9823
## 107 39 4.0785 6.0540 -1.9755 1.9755
## 42 16 4.0785 6.0533 -1.9748 1.9748
## 35 15 4.0908 6.0586 -1.9678 1.9678
## 208 63 4.2627 6.1007 -1.8380 1.8380
## 99 35 4.0785 5.8802 -1.8017 1.8017
## 142 46 4.2802 6.0528 -1.7726 1.7726
## 176 57 4.4387 6.1225 -1.6838 1.6838
## 146 49 7.5974 5.9410 1.6564 1.6564
cat("\n=== 10 PREDIKSI DENGAN ERROR TERBESAR (Spline) ===\n")
##
## === 10 PREDIKSI DENGAN ERROR TERBESAR (Spline) ===
worst_spline <- hasil_prediksi[order(hasil_prediksi$Abs_Error_Spline, decreasing = TRUE), ][1:10,
c("No", "DO_Aktual", "DO_Prediksi_Spline", "Error_Spline", "Abs_Error_Spline")]
print(worst_spline)
## No DO_Aktual DO_Prediksi_Spline Error_Spline Abs_Error_Spline
## 99 35 4.0785 6.2133 -2.1348 2.1348
## 272 78 4.0785 6.1918 -2.1133 2.1133
## 107 39 4.0785 6.0538 -1.9753 1.9753
## 146 49 7.5974 5.6872 1.9102 1.9102
## 176 57 4.4387 6.2937 -1.8550 1.8550
## 42 16 4.0785 5.9084 -1.8299 1.8299
## 150 52 7.6050 5.7891 1.8159 1.8159
## 62 23 4.0785 5.8767 -1.7983 1.7983
## 208 63 4.2627 6.0455 -1.7828 1.7828
## 183 59 4.4745 6.1666 -1.6921 1.6921
cat("\n=== 10 PREDIKSI TERBAIK (Error Terkecil - Linear) ===\n")
##
## === 10 PREDIKSI TERBAIK (Error Terkecil - Linear) ===
best_linear <- hasil_prediksi[order(hasil_prediksi$Abs_Error_Linear), ][1:10,
c("No", "DO_Aktual", "DO_Prediksi_Linear", "Error_Linear", "Abs_Error_Linear")]
print(best_linear)
## No DO_Aktual DO_Prediksi_Linear Error_Linear Abs_Error_Linear
## 307 83 5.9909 5.9956 -0.0047 0.0047
## 312 84 5.9909 5.9956 -0.0047 0.0047
## 313 85 5.9909 5.9956 -0.0047 0.0047
## 314 86 5.9909 5.9956 -0.0047 0.0047
## 318 87 5.9909 5.9956 -0.0047 0.0047
## 320 88 5.9909 5.9956 -0.0047 0.0047
## 321 89 5.9909 5.9956 -0.0047 0.0047
## 324 90 5.9909 5.9956 -0.0047 0.0047
## 325 91 5.9909 5.9956 -0.0047 0.0047
## 329 92 5.9909 5.9956 -0.0047 0.0047
# 3.7 Analisis Variabel yang Mempengaruhi DO
# ------------------------------------------------------------------------------
cat("\n--- 3.7 VARIABEL YANG PALING MEMPENGARUHI DO ---\n\n")
##
## --- 3.7 VARIABEL YANG PALING MEMPENGARUHI DO ---
# Dari koefisien regresi linear
cat("A. Analisis dari Koefisien Regresi Linear:\n")
## A. Analisis dari Koefisien Regresi Linear:
coef_lm <- summary(lm_model)$coefficients
coef_lm_sorted <- coef_lm[order(abs(coef_lm[, "t value"]), decreasing = TRUE), ]
print(coef_lm_sorted)
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.969695168 0.773042287 9.0159300 2.233689e-18
## BOD 0.079225966 0.044717819 1.7716867 7.692161e-02
## pH -0.080589210 0.072287840 -1.1148377 2.653378e-01
## Suhu -0.018225862 0.017404223 -1.0472092 2.953978e-01
## TSS -0.002891814 0.003896022 -0.7422479 4.582090e-01
cat("\n")
# Analisis korelasi
cat("B. Korelasi Bivariat dengan DO:\n")
## B. Korelasi Bivariat dengan DO:
correlations <- cor(data_reg[, c("pH", "BOD", "TSS", "Suhu")], data_reg$DO)
correlations <- data.frame(
Variabel = rownames(correlations),
Korelasi = correlations[, 1]
)
correlations <- correlations[order(abs(correlations$Korelasi), decreasing = TRUE), ]
print(correlations)
## Variabel Korelasi
## Suhu Suhu -0.038907924
## BOD BOD 0.032362333
## TSS TSS 0.007866614
## pH pH -0.007539221
cat("\n")
# Standardized coefficients (untuk perbandingan yang adil)
train_reg_scaled <- as.data.frame(scale(train_reg))
lm_scaled <- lm(DO ~ pH + BOD + TSS + Suhu, data = train_reg_scaled)
cat("C. Standardized Coefficients (Beta):\n")
## C. Standardized Coefficients (Beta):
cat("Koefisien ini menunjukkan pengaruh relatif setelah standardisasi\n\n")
## Koefisien ini menunjukkan pengaruh relatif setelah standardisasi
std_coef <- coef(lm_scaled)[-1] # exclude intercept
std_coef_sorted <- sort(abs(std_coef), decreasing = TRUE)
print(round(std_coef_sorted, 4))
## BOD pH Suhu TSS
## 0.0701 0.0441 0.0414 0.0292
cat("\n")
cat("INTERPRETASI VARIABEL YANG MEMPENGARUHI DO:\n")
## INTERPRETASI VARIABEL YANG MEMPENGARUHI DO:
cat("=========================================\n")
## =========================================
cat("1. Variabel dengan koefisien absolut terbesar memiliki pengaruh terkuat\n")
## 1. Variabel dengan koefisien absolut terbesar memiliki pengaruh terkuat
cat("2. Tanda koefisien menunjukkan arah hubungan:\n")
## 2. Tanda koefisien menunjukkan arah hubungan:
cat(" - Positif (+): Peningkatan variabel meningkatkan DO\n")
## - Positif (+): Peningkatan variabel meningkatkan DO
cat(" - Negatif (-): Peningkatan variabel menurunkan DO\n")
## - Negatif (-): Peningkatan variabel menurunkan DO
cat("3. P-value < 0.05 menunjukkan variabel signifikan secara statistik\n")
## 3. P-value < 0.05 menunjukkan variabel signifikan secara statistik
cat("4. BOD biasanya berkorelasi negatif dengan DO (konsumsi oksigen)\n")
## 4. BOD biasanya berkorelasi negatif dengan DO (konsumsi oksigen)
cat("5. Suhu biasanya berkorelasi negatif dengan DO (solubilitas berkurang)\n")
## 5. Suhu biasanya berkorelasi negatif dengan DO (solubilitas berkurang)
# ===============================================================================
# KESIMPULAN AKHIR
# ===============================================================================
cat("\n\n==================== KESIMPULAN AKHIR ====================\n")
##
##
## ==================== KESIMPULAN AKHIR ====================
cat("\n1. DATA CLEANING:\n")
##
## 1. DATA CLEANING:
cat(" - Missing values berhasil ditangani dengan imputasi median\n")
## - Missing values berhasil ditangani dengan imputasi median
cat(" - Outliers ditangani dengan winsorization\n")
## - Outliers ditangani dengan winsorization
cat(" - Kategori Status distandarisasi menjadi 3 kelas\n")
## - Kategori Status distandarisasi menjadi 3 kelas
cat(" - Data siap untuk analisis lanjutan\n")
## - Data siap untuk analisis lanjutan
cat("\n2. KLASIFIKASI STATUS:\n")
##
## 2. KLASIFIKASI STATUS:
cat(" - Model terbaik:", best_model, "dengan akurasi",
round(max(comparison$Akurasi), 2), "%\n")
## - Model terbaik: Decision Tree dengan akurasi 93.26 %
cat(" - Semua model menunjukkan performa baik (akurasi > 70%)\n")
## - Semua model menunjukkan performa baik (akurasi > 70%)
cat(" - Parameter pH, DO, BOD, TSS, dan Suhu efektif untuk klasifikasi\n")
## - Parameter pH, DO, BOD, TSS, dan Suhu efektif untuk klasifikasi
cat("\n3. PREDIKSI DO:\n")
##
## 3. PREDIKSI DO:
cat(" - Model terbaik:", best_reg, "dengan R² =",
round(max(reg_comparison$R_squared), 4), "\n")
## - Model terbaik: Regresi Linear dengan R² = 0.0046
cat(" - RMSE menunjukkan error prediksi dalam satuan mg/L\n")
## - RMSE menunjukkan error prediksi dalam satuan mg/L
cat(" - Variabel paling berpengaruh dapat dilihat dari standardized coefficients\n")
## - Variabel paling berpengaruh dapat dilihat dari standardized coefficients
cat(" - Model dapat digunakan untuk prediksi DO di lokasi baru\n")
## - Model dapat digunakan untuk prediksi DO di lokasi baru
cat(" - Total", nrow(hasil_prediksi), "prediksi berhasil dibuat dan disimpan\n")
## - Total 277 prediksi berhasil dibuat dan disimpan