library(readxl)
## Warning: package 'readxl' was built under R version 4.4.2
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.2
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
## Warning: package 'ggplot2' was built under R version 4.4.3
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.4.3
## Warning: package 'tibble' was built under R version 4.4.3
## Warning: package 'tidyr' was built under R version 4.4.2
## Warning: package 'readr' was built under R version 4.4.2
## Warning: package 'purrr' was built under R version 4.4.3
## Warning: package 'forcats' was built under R version 4.4.2
## Warning: package 'lubridate' was built under R version 4.4.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ lubridate 1.9.4 ✔ tibble 3.3.0
## ✔ purrr 1.1.0 ✔ tidyr 1.3.1
## ✔ readr 2.1.5
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(data.table)
## Warning: package 'data.table' was built under R version 4.4.2
##
## Attaching package: 'data.table'
##
## The following objects are masked from 'package:lubridate':
##
## hour, isoweek, mday, minute, month, quarter, second, wday, week,
## yday, year
##
## The following object is masked from 'package:purrr':
##
## transpose
##
## The following objects are masked from 'package:dplyr':
##
## between, first, last
library(e1071)
## Warning: package 'e1071' was built under R version 4.4.2
##
## Attaching package: 'e1071'
##
## The following object is masked from 'package:ggplot2':
##
## element
library(rpart)
library(randomForest)
## Warning: package 'randomForest' was built under R version 4.4.3
## randomForest 4.7-1.2
## Type rfNews() to see new features/changes/bug fixes.
##
## Attaching package: 'randomForest'
##
## The following object is masked from 'package:ggplot2':
##
## margin
##
## The following object is masked from 'package:dplyr':
##
## combine
library(mgcv)
## Loading required package: nlme
##
## Attaching package: 'nlme'
##
## The following object is masked from 'package:dplyr':
##
## collapse
##
## This is mgcv 1.9-1. For overview type 'help("mgcv-package")'.
library(stringr)
set.seed(42)
# Baca file Excel
data <- read_excel("C:/Users/Lenovo/Downloads/output_UTS/kualitasair.xlsx")
# Lihat struktur data
glimpse(data)
## Rows: 300
## Columns: 7
## $ Lokasi <chr> "S1", "S2", "S3", "S4", "S5", "S6", "S7", "S8", "S9", "S10", "S…
## $ pH <dbl> 7.6855, 6.7177, 7.1816, 7.3164, 7.2021, 6.9469, 7.7558, 6.9527,…
## $ DO <dbl> NA, 5.7236, 4.8906, 6.1339, 7.7853, 8.4222, 4.9232, 6.4859, 7.3…
## $ BOD <dbl> 1.7136, 1.4402, 2.7274, 3.1398, 1.1778, 3.2324, NA, 4.0358, 3.1…
## $ TSS <dbl> 43.1415, 44.2963, NA, 41.0104, 48.0967, 48.5610, 49.0343, 51.81…
## $ Suhu <dbl> 26.7972, 27.7284, 26.0255, 29.6639, 26.4099, 28.6809, 29.7409, …
## $ Status <chr> "Tercemar ringan", "Tercemar ringan", "Tercemar ringan", "Terce…
summary(data)
## Lokasi pH DO BOD
## Length:300 Min. :5.503 Min. :2.982 Min. :0.3026
## Class :character 1st Qu.:6.670 1st Qu.:5.375 1st Qu.:2.3573
## Mode :character Median :6.988 Median :5.991 Median :3.0661
## Mean :6.989 Mean :5.976 Mean :3.0005
## 3rd Qu.:7.318 3rd Qu.:6.688 3rd Qu.:3.5781
## Max. :8.351 Max. :9.229 Max. :5.7962
## NA's :23 NA's :22
## TSS Suhu Status
## Min. :24.65 Min. :22.77 Length:300
## 1st Qu.:43.73 1st Qu.:26.62 Class :character
## Median :49.52 Median :28.01 Mode :character
## Mean :49.70 Mean :28.31
## 3rd Qu.:56.44 3rd Qu.:29.46
## Max. :76.34 Max. :90.00
## NA's :24
# Cek missing value
colSums(is.na(data))
## Lokasi pH DO BOD TSS Suhu Status
## 0 0 23 22 24 0 0
# Imputasi nilai hilang dengan median
data <- data %>%
mutate(
DO = ifelse(is.na(DO), median(DO, na.rm = TRUE), DO),
BOD = ifelse(is.na(BOD), median(BOD, na.rm = TRUE), BOD),
TSS = ifelse(is.na(TSS), median(TSS, na.rm = TRUE), TSS)
)
# Normalisasi teks
data <- data %>%
mutate(Status = str_to_lower(Status) %>%
str_trim() %>%
recode(
"baik" = "Baik",
"tercemar ringan" = "Tercemar ringan",
"tercemar berat" = "Tercemar berat"
))
table(data$Status)
##
## Baik Tercemar berat Tercemar ringan
## 72 7 221
detect_outlier_iqr <- function(x) {
Q1 <- quantile(x, 0.25)
Q3 <- quantile(x, 0.75)
IQR <- Q3 - Q1
sum(x < (Q1 - 1.5 * IQR) | x > (Q3 + 1.5 * IQR))
}
outliers <- sapply(data %>% select(pH, DO, BOD, TSS, Suhu), detect_outlier_iqr)
outliers
## pH DO BOD TSS Suhu
## 4 4 5 5 2
summary(select(data, pH, DO, BOD, TSS, Suhu))
## pH DO BOD TSS
## Min. :5.503 Min. :2.982 Min. :0.3026 Min. :24.65
## 1st Qu.:6.670 1st Qu.:5.413 1st Qu.:2.4599 1st Qu.:44.28
## Median :6.988 Median :5.991 Median :3.0661 Median :49.52
## Mean :6.989 Mean :5.977 Mean :3.0053 Mean :49.68
## 3rd Qu.:7.318 3rd Qu.:6.611 3rd Qu.:3.5323 3rd Qu.:55.62
## Max. :8.351 Max. :9.229 Max. :5.7962 Max. :76.34
## Suhu
## Min. :22.77
## 1st Qu.:26.62
## Median :28.01
## Mean :28.31
## 3rd Qu.:29.46
## Max. :90.00
# Visualisasi boxplot untuk mendeteksi outlier
data %>%
pivot_longer(cols = c(pH, DO, BOD, TSS, Suhu), names_to = "Variabel", values_to = "Nilai") %>%
ggplot(aes(x = Variabel, y = Nilai, fill = Variabel)) +
geom_boxplot() +
theme_minimal() +
labs(title = "Boxplot Variabel Kualitas Air Setelah Cleaning")
set.seed(123)
library(caret)
## Warning: package 'caret' was built under R version 4.4.3
## Loading required package: lattice
##
## Attaching package: 'caret'
## The following object is masked from 'package:purrr':
##
## lift
library(e1071)
library(rpart)
library(randomForest)
# Bagi data menjadi training (70%) dan testing (30%)
index <- createDataPartition(data$Status, p = 0.7, list = FALSE)
train_data <- data[index, ]
test_data <- data[-index, ]
standardize_status <- function(x){
x0 <- tolower(trimws(as.character(x)))
x0[grepl("baik", x0)] <- "Baik"
x0[grepl("berat", x0)] <- "Tercemar berat"
x0[grepl("ringan", x0) | (grepl("tercemar", x0) & !grepl("berat", x0))] <- "Tercemar ringan"
x0[is.na(x0) | x0==""] <- NA
return(x0)
}
cat("Missing values per kolom:\n")
## Missing values per kolom:
print(sapply(df, function(x) sum(is.na(x))))
## Warning in is.na(x): is.na() applied to non-(list or vector) of type 'symbol'
## Warning in is.na(x): is.na() applied to non-(list or vector) of type 'symbol'
## Warning in is.na(x): is.na() applied to non-(list or vector) of type 'symbol'
## Warning in is.na(x): is.na() applied to non-(list or vector) of type 'symbol'
## Warning in is.na(x): is.na() applied to non-(list or vector) of type 'language'
## x df1 df2 ncp log
## 0 0 0 0 0 0
num_cols <- intersect(c("pH","DO","BOD","TSS","Suhu"), names(df))
for(col in num_cols){
if(any(is.na(df[[col]]))){
med <- median(df[[col]], na.rm = TRUE)
df[[col]][is.na(df[[col]])] <- med
cat("Imputed median for", col, "=", med, "\n")
}
}
cap_outliers <- function(x){
q1 <- quantile(x, 0.25, na.rm=TRUE)
q3 <- quantile(x, 0.75, na.rm=TRUE)
iqr <- q3 - q1
lower <- q1 - 1.5 * iqr
upper <- q3 + 1.5 * iqr
pmin(pmax(x, lower), upper)
}
for(col in num_cols){
df[[col]] <- cap_outliers(df[[col]])
}
cat("\nSummary numeric (after cleaning):\n")
##
## Summary numeric (after cleaning):
library(readxl)
library(dplyr)
library(rpart)
library(rpart.plot)
## Warning: package 'rpart.plot' was built under R version 4.4.3
library(caret)
# Ganti path sesuai lokasi file kamu
df <- read_excel("C:/Users/Lenovo/Downloads/kualitasair.xlsx")
# Pastikan kolom 'Status' atau 'Status_clean' ada
names(df)
## [1] "Lokasi" "pH" "DO" "BOD" "TSS" "Suhu" "Status"
# Jika kolom aslinya bernama "Status"
df_clean <- df %>%
dplyr::mutate(
Status_clean = factor(
Status,
levels = c("Baik", "Tercemar ringan", "Tercemar berat")
)
)
# Jika ternyata kolom aslinya sudah bernama "Status_clean"
# pakai ini sebagai gantinya:
# df_clean <- df %>%
# dplyr::mutate(Status_clean = factor(Status_clean,
# levels = c("Baik", "Tercemar ringan", "Tercemar berat")))
set.seed(123)
n <- nrow(df_clean)
train_index <- sample(1:n, size = 0.7 * n)
train_df <- df_clean[train_index, ]
test_df <- df_clean[-train_index, ]
model_tree <- rpart(Status_clean ~ pH + DO + BOD + TSS + Suhu,
data = train_df, method = "class",
control = rpart.control(cp = 0.01, minsplit = 8))
pred_tree <- predict(model_tree, test_df, type = "class")
conf_tree <- confusionMatrix(as.factor(pred_tree),
as.factor(test_df$Status_clean))
print(conf_tree)
## Confusion Matrix and Statistics
##
## Reference
## Prediction Baik Tercemar ringan Tercemar berat
## Baik 13 2 0
## Tercemar ringan 3 70 1
## Tercemar berat 0 0 1
##
## Overall Statistics
##
## Accuracy : 0.9333
## 95% CI : (0.8605, 0.9751)
## No Information Rate : 0.8
## P-Value [Acc > NIR] : 0.0003927
##
## Kappa : 0.7866
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: Baik Class: Tercemar ringan Class: Tercemar berat
## Sensitivity 0.8125 0.9722 0.50000
## Specificity 0.9730 0.7778 1.00000
## Pos Pred Value 0.8667 0.9459 1.00000
## Neg Pred Value 0.9600 0.8750 0.98876
## Prevalence 0.1778 0.8000 0.02222
## Detection Rate 0.1444 0.7778 0.01111
## Detection Prevalence 0.1667 0.8222 0.01111
## Balanced Accuracy 0.8927 0.8750 0.75000
rpart.plot(model_tree,
type = 4, extra = 101,
main = "Pohon Keputusan – Kualitas Air")
acc_tree <- conf_tree$overall["Accuracy"]
# Model Decision Tree (versi baru)
library(rpart)
library(rpart.plot)
set.seed(42)
dt_model <- rpart(Status ~ pH + DO + BOD + TSS + Suhu,
data = train_data,
parms = list(split = "information"),
control = rpart.control(cp = 0.01, minsplit = 10))
dt_pred <- predict(dt_model, test_data, type = "class")
# Evaluasi hasil
hasil_tree <- confusionMatrix(as.factor(dt_pred), as.factor(test_data$Status))
print(hasil_tree)
## Confusion Matrix and Statistics
##
## Reference
## Prediction Baik Tercemar berat Tercemar ringan
## Baik 18 0 1
## Tercemar berat 0 2 0
## Tercemar ringan 3 0 65
##
## Overall Statistics
##
## Accuracy : 0.9551
## 95% CI : (0.8889, 0.9876)
## No Information Rate : 0.7416
## P-Value [Acc > NIR] : 1.148e-07
##
## Kappa : 0.8825
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: Baik Class: Tercemar berat Class: Tercemar ringan
## Sensitivity 0.8571 1.00000 0.9848
## Specificity 0.9853 1.00000 0.8696
## Pos Pred Value 0.9474 1.00000 0.9559
## Neg Pred Value 0.9571 1.00000 0.9524
## Prevalence 0.2360 0.02247 0.7416
## Detection Rate 0.2022 0.02247 0.7303
## Detection Prevalence 0.2135 0.02247 0.7640
## Balanced Accuracy 0.9212 1.00000 0.9272
# Plot hasil pohon
rpart.plot(dt_model,
type = 4,
extra = 102,
under = TRUE,
main = "Pohon Keputusan – Status Kualitas Air (Decision Tree)")
# --- Hapus baris dengan nilai NA pada data training ---
train_df <- na.omit(train_df)
test_df <- na.omit(test_df)
library(randomForest)
library(caret)
set.seed(123)
# --- Pastikan tidak ada NA ---
train_df <- na.omit(train_df)
test_df <- na.omit(test_df)
# --- Bangun model Random Forest ---
model_rf <- randomForest(Status_clean ~ pH + DO + BOD + TSS + Suhu,
data = train_df,
ntree = 200,
importance = TRUE)
# --- Prediksi data test ---
pred_rf <- predict(model_rf, test_df)
# --- Evaluasi hasil ---
conf_rf <- confusionMatrix(as.factor(pred_rf),
as.factor(test_df$Status_clean))
print(conf_rf)
## Confusion Matrix and Statistics
##
## Reference
## Prediction Baik Tercemar ringan Tercemar berat
## Baik 12 0 0
## Tercemar ringan 1 61 1
## Tercemar berat 0 0 0
##
## Overall Statistics
##
## Accuracy : 0.9733
## 95% CI : (0.907, 0.9968)
## No Information Rate : 0.8133
## P-Value [Acc > NIR] : 3.062e-05
##
## Kappa : 0.9077
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: Baik Class: Tercemar ringan Class: Tercemar berat
## Sensitivity 0.9231 1.0000 0.00000
## Specificity 1.0000 0.8571 1.00000
## Pos Pred Value 1.0000 0.9683 NaN
## Neg Pred Value 0.9841 1.0000 0.98667
## Prevalence 0.1733 0.8133 0.01333
## Detection Rate 0.1600 0.8133 0.00000
## Detection Prevalence 0.1600 0.8400 0.00000
## Balanced Accuracy 0.9615 0.9286 0.50000
# --- Plot pentingnya variabel ---
varImpPlot(model_rf,
main = "Pentingnya Variabel – Random Forest")
set.seed(321)
model_svm <- e1071::svm(Status_clean ~ pH + DO + BOD + TSS + Suhu,
data = train_df, kernel = "radial", scale = TRUE,
cost = 1, gamma = 0.1)
pred_svm <- predict(model_svm, test_df)
conf_svm <- confusionMatrix(as.factor(pred_svm),
as.factor(test_df$Status_clean))
conf_svm
## Confusion Matrix and Statistics
##
## Reference
## Prediction Baik Tercemar ringan Tercemar berat
## Baik 7 1 0
## Tercemar ringan 6 60 1
## Tercemar berat 0 0 0
##
## Overall Statistics
##
## Accuracy : 0.8933
## 95% CI : (0.8006, 0.9528)
## No Information Rate : 0.8133
## P-Value [Acc > NIR] : 0.04493
##
## Kappa : 0.5816
##
## Mcnemar's Test P-Value : NA
##
## Statistics by Class:
##
## Class: Baik Class: Tercemar ringan Class: Tercemar berat
## Sensitivity 0.53846 0.9836 0.00000
## Specificity 0.98387 0.5000 1.00000
## Pos Pred Value 0.87500 0.8955 NaN
## Neg Pred Value 0.91045 0.8750 0.98667
## Prevalence 0.17333 0.8133 0.01333
## Detection Rate 0.09333 0.8000 0.00000
## Detection Prevalence 0.10667 0.8933 0.00000
## Balanced Accuracy 0.76117 0.7418 0.50000
acc_tree <- conf_tree$overall["Accuracy"]
acc_rf <- conf_rf$overall["Accuracy"]
acc_svm <- conf_svm$overall["Accuracy"]
hasil_akurasi <- data.frame(
Model = c("Decision Tree", "Random Forest", "SVM"),
Akurasi = c(acc_tree, acc_rf, acc_svm)
)
hasil_akurasi %>%
arrange(desc(Akurasi)) %>%
knitr::kable(caption = "Perbandingan Akurasi Model Klasifikasi")
| Model | Akurasi |
|---|---|
| Random Forest | 0.9733333 |
| Decision Tree | 0.9333333 |
| SVM | 0.8933333 |
Interpretasi Hasil
Model terbaik untuk dataset ini adalah Decision Tree, karena memberikan kombinasi interpretabilitas dan performa yang tinggi.
library(tidyverse)
library(caret)
library(splines)
library(ggplot2)
# Ambil data bersih dari df (hasil cleaning sebelumnya)
set.seed(123)
fit_data <- df %>%
select(DO, pH, BOD, TSS, Suhu) %>%
drop_na()
# Split training dan testing (70:30)
n <- nrow(fit_data)
train_idx <- sample(1:n, size = 0.7 * n)
train_data <- fit_data[train_idx, ]
test_data <- fit_data[-train_idx, ]
summary(train_data)
## DO pH BOD TSS
## Min. :3.811 Min. :5.650 Min. :0.3026 Min. :24.65
## 1st Qu.:5.365 1st Qu.:6.713 1st Qu.:2.4115 1st Qu.:45.68
## Median :6.013 Median :6.998 Median :3.1320 Median :50.23
## Mean :5.980 Mean :6.996 Mean :3.0923 Mean :50.66
## 3rd Qu.:6.658 3rd Qu.:7.301 3rd Qu.:3.7033 3rd Qu.:56.81
## Max. :8.422 Max. :8.351 Max. :5.7962 Max. :76.34
## Suhu
## Min. :22.77
## 1st Qu.:26.81
## Median :28.05
## Mean :28.16
## 3rd Qu.:29.52
## Max. :35.17
# Bangun model linear sederhana
lm_model <- lm(DO ~ pH + BOD + TSS + Suhu, data = train_data)
# Prediksi dan evaluasi
pred_lm <- predict(lm_model, newdata = test_data)
mse_lm <- mean((test_data$DO - pred_lm)^2)
rmse_lm <- sqrt(mse_lm)
r2_lm <- cor(test_data$DO, pred_lm)^2
cat("📊 Regresi Linear:\n")
## 📊 Regresi Linear:
cat(" R² =", round(r2_lm,3), "\n")
## R² = 0
cat(" MSE =", round(mse_lm,3), "\n")
## MSE = 0.99
cat(" RMSE =", round(rmse_lm,3), "\n")
## RMSE = 0.995
# Plot prediksi vs aktual
ggplot(data.frame(Aktual = test_data$DO, Prediksi = pred_lm),
aes(x = Aktual, y = Prediksi)) +
geom_point(color = "steelblue", size = 2) +
geom_abline(slope = 1, intercept = 0, linetype = "dashed", color = "red") +
theme_minimal() +
labs(title = "Prediksi vs Aktual (Regresi Linear)",
x = "DO Aktual", y = "DO Prediksi")
Grafik Prediksi vs Aktual (Regresi Linear) menggambarkan hubungan antara nilai aktual dan hasil prediksi variabel DO (Dissolved Oxygen) yang dihasilkan oleh model regresi linear. Titik-titik biru menunjukkan sebaran data, sedangkan garis merah putus-putus merupakan garis ideal di mana nilai prediksi sama dengan nilai aktual. Berdasarkan grafik, terlihat bahwa sebagian besar titik tidak berada tepat di sekitar garis merah, melainkan tersebar cukup jauh. Hal ini menandakan bahwa model regresi linear belum mampu memprediksi nilai DO dengan akurasi yang baik. Dengan kata lain, hubungan antara variabel input dan DO tidak sepenuhnya bersifat linier, sehingga model menghasilkan penyimpangan atau error yang cukup besar. Oleh karena itu, diperlukan pendekatan model lain yang lebih kompleks atau non-linier untuk memperoleh hasil prediksi yang lebih akurat.
# Gunakan basis spline untuk pH, BOD, TSS, Suhu
spline_model <- lm(DO ~ bs(pH, df=4) + bs(BOD, df=4) + bs(TSS, df=4) + bs(Suhu, df=4),
data = train_data)
# Prediksi dan evaluasi
pred_spline <- predict(spline_model, newdata = test_data)
## Warning in bs(Suhu, degree = 3L, knots = 28.0481, Boundary.knots = c(22.7727, :
## some 'x' values beyond boundary knots may cause ill-conditioned bases
mse_spline <- mean((test_data$DO - pred_spline)^2)
rmse_spline <- sqrt(mse_spline)
r2_spline <- cor(test_data$DO, pred_spline)^2
cat("\nRegresi Spline:\n")
##
## Regresi Spline:
cat(" R² =", round(r2_spline,3), "\n")
## R² = 0.001
cat(" MSE =", round(mse_spline,3), "\n")
## MSE = 24161.06
cat(" RMSE =", round(rmse_spline,3), "\n")
## RMSE = 155.438
# Visualisasi
ggplot(data.frame(Aktual = test_data$DO, Prediksi = pred_spline),
aes(x = Aktual, y = Prediksi)) +
geom_point(color = "darkorange", size = 2) +
geom_abline(slope = 1, intercept = 0, linetype = "dashed", color = "red") +
theme_minimal() +
labs(title = "Prediksi vs Aktual (Regresi Spline)",
x = "DO Aktual", y = "DO Prediksi")
hasil_prediksi <- tibble(
Model = c("Regresi Linear", "Regresi Spline"),
R2 = c(r2_lm, r2_spline),
MSE = c(mse_lm, mse_spline),
RMSE = c(rmse_lm, rmse_spline)
)
knitr::kable(hasil_prediksi, caption = "Perbandingan Performa Model Prediksi DO")
| Model | R2 | MSE | RMSE |
|---|---|---|---|
| Regresi Linear | 0.0001824 | 9.903857e-01 | 0.9951812 |
| Regresi Spline | 0.0008509 | 2.416106e+04 | 155.4382907 |
Interpretasi
# Muat library
library(readxl)
library(dplyr)
# Cek folder kerja aktif
cat("Folder kerja aktif:", getwd(), "\n")
## Folder kerja aktif: C:/Users/Lenovo/Downloads/pRADYTHA/STATLING - RABU/UTS
# Pastikan folder output bisa ditulis
output_dir <- file.path(getwd(), "output_UTS")
if (!dir.exists(output_dir)) {
dir.create(output_dir)
cat("Folder 'output_UTS' berhasil dibuat\n")
}
# ==== 1. Baca file Excel ====
input_file <- "C:/Users/Lenovo/Downloads/PALING FIX UTS MANTAP.xlsx"
data <- read_excel(input_file)
cat("Data berhasil dibaca dari:", input_file, "\n")
## Data berhasil dibaca dari: C:/Users/Lenovo/Downloads/PALING FIX UTS MANTAP.xlsx
# ==== 2. Olah data jika perlu ====
# Misal hanya menampilkan kolom tertentu, atau tetap utuh:
hasil_75 <- data %>%
mutate_if(is.character, trimws) # membersihkan spasi ekstra di teks
# ==== 3. Simpan hasil baru ====
timestamp <- format(Sys.time(), "%Y%m%d_%H%M%S")
output_file <- file.path(output_dir, paste0("hasil_prediksi_75_model_", timestamp, ".csv"))
tryCatch({
write.csv(hasil_75, output_file, row.names = FALSE)
cat("File hasil terbaru disimpan di:\n", output_file, "\n")
}, error = function(e) {
cat("Error saat menyimpan file:", e$message, "\n")
})
## File hasil terbaru disimpan di:
## C:/Users/Lenovo/Downloads/pRADYTHA/STATLING - RABU/UTS/output_UTS/hasil_prediksi_75_model_20251015_183048.csv
# ==== 4. Cek isi file (5 baris pertama) ====
if (file.exists(output_file)) {
cat("File tersimpan sukses! Menampilkan contoh isi:\n")
print(head(read.csv(output_file), 5))
} else {
cat("File tidak tersimpan. Periksa izin folder kerja!\n")
}
## File tersimpan sukses! Menampilkan contoh isi:
## pH DO BOD TSS Suhu Status_clean
## 1 2170789 789777 3.0913 471188 258608 Tercemar ringan
## 2 1437778 752887 3.0772 636502 27657 Tercemar ringan
## 3 1217142 1941386 1020156.0000 533784 32592 Tercemar ringan
## 4 2530554 2925349 942753.0000 571947 282945 Tercemar ringan
## 5 1339527 1488181 3.1364 518954 284291 Tercemar ringan
## Prediksi_DecisionTree Prediksi_RandomForest Prediksi_SVM
## 1 Tercemar ringan Tercemar ringan Tercemar ringan
## 2 Tercemar ringan Tercemar ringan Tercemar ringan
## 3 Tercemar ringan Tercemar ringan Tercemar ringan
## 4 Tercemar ringan Tercemar ringan Tercemar ringan
## 5 Tercemar ringan Tercemar ringan Tercemar ringan