library(readxl)
## Warning: package 'readxl' was built under R version 4.4.3
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.4.3
## Warning: package 'ggplot2' was built under R version 4.4.3
## Warning: package 'tibble' was built under R version 4.4.3
## Warning: package 'tidyr' was built under R version 4.4.3
## Warning: package 'readr' was built under R version 4.4.3
## Warning: package 'purrr' was built under R version 4.4.3
## Warning: package 'dplyr' was built under R version 4.4.3
## Warning: package 'stringr' was built under R version 4.4.3
## Warning: package 'forcats' was built under R version 4.4.3
## Warning: package 'lubridate' was built under R version 4.4.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.2 ✔ tibble 3.3.0
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.1.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(caret)
## Warning: package 'caret' was built under R version 4.4.3
## Loading required package: lattice
##
## Attaching package: 'caret'
##
## The following object is masked from 'package:purrr':
##
## lift
library(e1071) # SVM backend
## Warning: package 'e1071' was built under R version 4.4.3
library(rpart) # Decision Tree
library(randomForest) # Random Forest
## Warning: package 'randomForest' was built under R version 4.4.3
## randomForest 4.7-1.2
## Type rfNews() to see new features/changes/bug fixes.
##
## Attaching package: 'randomForest'
##
## The following object is masked from 'package:dplyr':
##
## combine
##
## The following object is masked from 'package:ggplot2':
##
## margin
library(splines) # Basis spline
library(mice) # Imputasi multivariat (jika diperlukan)
## Warning: package 'mice' was built under R version 4.4.3
##
## Attaching package: 'mice'
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following objects are masked from 'package:base':
##
## cbind, rbind
library(broom) # Merapikan output model
## Warning: package 'broom' was built under R version 4.4.3
library(knitr)
## Warning: package 'knitr' was built under R version 4.4.3
set.seed(10251547)
options(stringsAsFactors = FALSE)
# 1) Load data ---------------------------------------------------------------
file_path <- "kualitasair.xlsx"
train_raw <- read_excel("E:/z DRIVE E/KULIAH/SEMESTER 5/Statistika Lingkungan/Week UTS/kualitasair.xlsx", sheet = "Training")
test_raw <- read_excel("E:/z DRIVE E/KULIAH/SEMESTER 5/Statistika Lingkungan/Week UTS/kualitasair.xlsx", sheet = "Testing")
# Ringkasan
cat("Training: ", dim(train_raw)[1], " baris x ", dim(train_raw)[2], " kolom\n")
## Training: 300 baris x 7 kolom
1. Soal 1
train <- train_raw
test <- test_raw
# Kolom numerik yang relevan
num_vars <- c("pH", "DO", "BOD", "TSS", "Suhu")
# Missing Value
prop_missing <- sapply(train[num_vars], function(x) mean(is.na(x)))
prop_missing <- tibble(var = names(prop_missing), prop_missing = prop_missing)
kable(prop_missing, caption = "Proporsi Missing Value per Variabel")
Proporsi Missing Value per Variabel
| pH |
0.0000000 |
| DO |
0.0766667 |
| BOD |
0.0733333 |
| TSS |
0.0800000 |
| Suhu |
0.0000000 |
# Imputasi: aturan praktis
# Jika prop < 0.10 -> median; else -> mice (pmm)
small_miss <- prop_missing %>% filter(prop_missing < 0.10) %>% pull(var)
large_miss <- prop_missing %>% filter(prop_missing >= 0.10) %>% pull(var)
# Median imputasi untuk small_miss
for (v in small_miss) {
medv <- median(train[[v]], na.rm = TRUE)
train[[v]][is.na(train[[v]])] <- medv
}
# MICE untuk variabel dengan missing >= 10%
if (length(large_miss) > 0) {
impute_df <- train %>% select(all_of(num_vars), Status)
impute_df$Status <- as.factor(impute_df$Status)
mice_out <- mice(impute_df, m = 1, method = "pmm", printFlag = FALSE, seed = 10251547)
completed <- complete(mice_out, 1)
train[num_vars] <- completed[num_vars]
}
# Verifikasi tidak ada missing numeric
train %>% summarise(across(all_of(num_vars), ~ sum(is.na(.)))) %>%
kable(caption = "Jumlah Missing Value Setelah Imputasi")
Jumlah Missing Value Setelah Imputasi
| 0 |
0 |
0 |
0 |
0 |
# Outlier Detection & Treatment
# Gunakan metode IQR (Interquartile Range)
for (v in num_vars) {
Q1 <- quantile(train[[v]], 0.25, na.rm = TRUE)
Q3 <- quantile(train[[v]], 0.75, na.rm = TRUE)
IQR_val <- Q3 - Q1
lower <- Q1 - 1.5 * IQR_val
upper <- Q3 + 1.5 * IQR_val
outlier_count <- sum(train[[v]] < lower | train[[v]] > upper)
cat(v, ": Jumlah outlier =", outlier_count, "\n")
# Winsorizing: ganti outlier ekstrem dengan batas bawah/atas
train[[v]][train[[v]] < lower] <- lower
train[[v]][train[[v]] > upper] <- upper
}
## pH : Jumlah outlier = 4
## DO : Jumlah outlier = 4
## BOD : Jumlah outlier = 5
## TSS : Jumlah outlier = 5
## Suhu : Jumlah outlier = 2
# Inkonsistensi Kategori
# Cek level unik di kolom Status
unique(train$Status)
## [1] "Tercemar ringan" "baik" "BAIK" "Baik"
## [5] "tercemar ringan" "Tercemar Ringan" "Tercemar berat"
# Standarisasi penulisan kategori (huruf kecil dan tanpa typo)
train$Status <- tolower(train$Status)
train$Status <- trimws(train$Status)
# Perbaiki ejaan umum
train$Status <- recode(train$Status,
"baik" = "Baik",
"tercemar ringan" = "Tercemar ringan",
"tercemar berat" = "Tercemar berat",
"tercemar_berat" = "Tercemar berat",
"tercemar_ringan" = "Tercemar ringan"
)
# Konversi ke faktor dengan urutan logis
train$Status <- factor(train$Status, levels = c("Baik", "Tercemar ringan", "Tercemar berat"))
# Tabel frekuensi
train %>% count(Status) %>% mutate(prop = n / sum(n)) %>% kable()
| Baik |
72 |
0.2400000 |
| Tercemar ringan |
221 |
0.7366667 |
| Tercemar berat |
7 |
0.0233333 |
# Distribusi variabel (histogram)
train %>%
pivot_longer(cols = all_of(num_vars), names_to = "var", values_to = "val") %>%
ggplot(aes(x = val)) +
geom_histogram(bins = 30, alpha = 0.7) +
facet_wrap(~var, scales = "free") +
theme_minimal() +
labs(title = "Distribusi variabel numerik")

# Boxplots untuk deteksi outlier
train %>%
pivot_longer(cols = all_of(num_vars), names_to = "var", values_to = "val") %>%
ggplot(aes(x = var, y = val)) +
geom_boxplot(outlier.shape = 1) +
theme_minimal() +
labs(title = "Boxplot per variabel (cek outlier)")
### Soal 2
# Split internal
set.seed(20251015)
train_idx <- createDataPartition(train$Status, p = 0.8, list = FALSE)
train_int <- train[train_idx, ]
val_int <- train[-train_idx, ]
# Formula
formula_cls <- as.formula(paste("Status ~", paste(num_vars, collapse = " + ")))
# Helper predict & eval function
eval_cls <- function(true, pred) {
cm <- confusionMatrix(pred, true)
byClass <- as.data.frame(cm$byClass)
overall <- cm$overall
list(confusion = cm$table, overall = overall, byClass = byClass)
}
library(rpart.plot)
## Warning: package 'rpart.plot' was built under R version 4.4.3
# Formula klasifikasi
formula_cls <- Status ~ pH + DO + BOD + TSS + Suhu
# Model SVM
svm_fit <- svm(formula_cls, data = train_int, probability = TRUE)
cat("Model SVM berhasil dibangun.\n")
## Model SVM berhasil dibangun.
print(svm_fit)
##
## Call:
## svm(formula = formula_cls, data = train_int, probability = TRUE)
##
##
## Parameters:
## SVM-Type: C-classification
## SVM-Kernel: radial
## cost: 1
##
## Number of Support Vectors: 134
ggplot(train_int, aes(x = DO, y = BOD, color = Status)) +
geom_point(size = 2, alpha = 0.8) +
stat_contour(data = train_int, aes(z = as.numeric(predict(svm_fit, train_int))),
bins = 2, color = "black", linetype = "dashed") +
theme_minimal() +
labs(title = "Visualisasi SVM Berdasarkan DO dan BOD", x = "DO", y = "BOD")
## Warning: Contour data has duplicated x, y coordinates.
## ℹ 1 duplicated row have been dropped.
## Warning: `stat_contour()`: Zero contours were generated
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf

# Model Decision Tree
tree_fit <- rpart(formula_cls, data = train_int, method = "class", control = rpart.control(cp = 0.01))
cat("\nModel Decision Tree berhasil dibangun.\n")
##
## Model Decision Tree berhasil dibangun.
rpart.plot(tree_fit, main = "Struktur Decision Tree")

# Model Random Forest
rf_fit <- randomForest(formula_cls, data = train_int, ntree = 500, importance = TRUE)
cat("\nModel Random Forest berhasil dibangun.\n")
##
## Model Random Forest berhasil dibangun.
print(rf_fit)
##
## Call:
## randomForest(formula = formula_cls, data = train_int, ntree = 500, importance = TRUE)
## Type of random forest: classification
## Number of trees: 500
## No. of variables tried at each split: 2
##
## OOB estimate of error rate: 7.05%
## Confusion matrix:
## Baik Tercemar ringan Tercemar berat class.error
## Baik 51 7 0 0.12068966
## Tercemar ringan 6 171 0 0.03389831
## Tercemar berat 0 4 2 0.66666667
# Perubahan OOB Error terhadap Jumlah Pohon (Random Forest)
plot(rf_fit, main = "Perubahan OOB Error terhadap Jumlah Pohon (Random Forest)")

# Prediksi untuk masing-masing model
svm_pred <- predict(svm_fit, val_int)
tree_pred <- predict(tree_fit, val_int, type = "class")
rf_pred <- predict(rf_fit, val_int)
# Evaluasi dengan confusion matrix
res_svm <- confusionMatrix(svm_pred, val_int$Status)
res_tree <- confusionMatrix(tree_pred, val_int$Status)
res_rf <- confusionMatrix(rf_pred, val_int$Status)
# Tampilkan hasil
cat("Confusion Matrix SVM\n")
## Confusion Matrix SVM
print(res_svm$table)
## Reference
## Prediction Baik Tercemar ringan Tercemar berat
## Baik 9 0 0
## Tercemar ringan 5 44 1
## Tercemar berat 0 0 0
cat("\nAkurasi SVM:", round(res_svm$overall["Accuracy"], 4), "\n\n")
##
## Akurasi SVM: 0.8983
cat("Confusion Matrix Decision Tree\n")
## Confusion Matrix Decision Tree
print(res_tree$table)
## Reference
## Prediction Baik Tercemar ringan Tercemar berat
## Baik 14 1 0
## Tercemar ringan 0 43 1
## Tercemar berat 0 0 0
cat("\nAkurasi Decision Tree:", round(res_tree$overall["Accuracy"], 4), "\n\n")
##
## Akurasi Decision Tree: 0.9661
cat("Confusion Matrix Random Forest\n")
## Confusion Matrix Random Forest
print(res_rf$table)
## Reference
## Prediction Baik Tercemar ringan Tercemar berat
## Baik 14 0 0
## Tercemar ringan 0 44 1
## Tercemar berat 0 0 0
cat("\nAkurasi Random Forest:", round(res_rf$overall["Accuracy"], 4), "\n\n")
##
## Akurasi Random Forest: 0.9831
# Tabel perbandingan akurasi
acc_table <- tibble(
Model = c("SVM", "Decision Tree", "Random Forest"),
Accuracy = c(
round(res_svm$overall["Accuracy"], 4),
round(res_tree$overall["Accuracy"], 4),
round(res_rf$overall["Accuracy"], 4)
)
)
# Visualisasi perbandingan akurasi
ggplot(acc_table, aes(x = Model, y = Accuracy, fill = Model)) +
geom_col(width = 0.6) +
geom_text(aes(label = round(Accuracy, 3)), vjust = -0.5, size = 4) +
ylim(0, 1) +
theme_minimal() +
labs(
title = "Perbandingan Akurasi Model Klasifikasi",
x = "Model",
y = "Akurasi"
) +
theme(legend.position = "none")

#Prediksi 75 baris hasil ketiga model
# Imputasi missing value pada data test
test_work <- test_raw
for (v in num_vars) {
if (any(is.na(test_work[[v]]))) {
test_work[[v]][is.na(test_work[[v]])] <- median(train[[v]], na.rm = TRUE)
}
}
# Gunakan formula & model yang sudah dilatih
pred_svm <- predict(svm_fit, test_work)
pred_tree <- predict(tree_fit, test_work, type = "class")
pred_rf <- predict(rf_fit, test_work)
# Gabungkan hasil prediksi semua model
pred_output <- tibble(
Lokasi = test_work$Lokasi,
Pred_SVM = as.character(pred_svm),
Pred_Tree = as.character(pred_tree),
Pred_RF = as.character(pred_rf)
)
# Tampilkan semua hasil (75 baris)
kable(pred_output, caption = "Hasil Prediksi 75 Lokasi untuk Semua Model")
Hasil Prediksi 75 Lokasi untuk Semua Model
| S301 |
Tercemar ringan |
Tercemar ringan |
Tercemar ringan |
| S302 |
Tercemar ringan |
Tercemar ringan |
Tercemar ringan |
| S303 |
Baik |
Baik |
Baik |
| S304 |
Tercemar ringan |
Tercemar ringan |
Tercemar ringan |
| S305 |
Tercemar ringan |
Tercemar ringan |
Tercemar ringan |
| S306 |
Tercemar ringan |
Tercemar ringan |
Tercemar ringan |
| S307 |
Tercemar ringan |
Tercemar ringan |
Tercemar ringan |
| S308 |
Tercemar ringan |
Tercemar ringan |
Tercemar ringan |
| S309 |
Tercemar ringan |
Tercemar ringan |
Tercemar ringan |
| S310 |
Tercemar ringan |
Tercemar ringan |
Tercemar ringan |
| S311 |
Tercemar ringan |
Tercemar ringan |
Tercemar ringan |
| S312 |
Tercemar ringan |
Tercemar ringan |
Tercemar ringan |
| S313 |
Tercemar ringan |
Tercemar ringan |
Tercemar ringan |
| S314 |
Tercemar ringan |
Tercemar ringan |
Tercemar ringan |
| S315 |
Tercemar ringan |
Tercemar ringan |
Tercemar ringan |
| S316 |
Tercemar ringan |
Tercemar ringan |
Tercemar ringan |
| S317 |
Tercemar ringan |
Tercemar ringan |
Tercemar ringan |
| S318 |
Tercemar ringan |
Tercemar ringan |
Tercemar ringan |
| S319 |
Tercemar ringan |
Tercemar ringan |
Tercemar ringan |
| S320 |
Tercemar ringan |
Tercemar ringan |
Tercemar ringan |
| S321 |
Tercemar ringan |
Tercemar ringan |
Tercemar ringan |
| S322 |
Tercemar ringan |
Tercemar ringan |
Tercemar ringan |
| S323 |
Tercemar ringan |
Tercemar ringan |
Tercemar ringan |
| S324 |
Tercemar ringan |
Tercemar ringan |
Tercemar ringan |
| S325 |
Tercemar ringan |
Tercemar ringan |
Tercemar ringan |
| S326 |
Tercemar ringan |
Tercemar ringan |
Tercemar ringan |
| S327 |
Baik |
Baik |
Baik |
| S328 |
Tercemar ringan |
Tercemar ringan |
Tercemar ringan |
| S329 |
Tercemar ringan |
Tercemar ringan |
Tercemar ringan |
| S330 |
Tercemar ringan |
Tercemar ringan |
Tercemar ringan |
| S331 |
Tercemar ringan |
Tercemar ringan |
Tercemar ringan |
| S332 |
Tercemar ringan |
Tercemar ringan |
Tercemar ringan |
| S333 |
Tercemar ringan |
Tercemar ringan |
Tercemar ringan |
| S334 |
Tercemar ringan |
Baik |
Baik |
| S335 |
Tercemar ringan |
Tercemar ringan |
Tercemar ringan |
| S336 |
Tercemar ringan |
Tercemar ringan |
Tercemar ringan |
| S337 |
Baik |
Baik |
Baik |
| S338 |
Tercemar ringan |
Tercemar ringan |
Tercemar ringan |
| S339 |
Tercemar ringan |
Tercemar ringan |
Tercemar ringan |
| S340 |
Tercemar ringan |
Tercemar ringan |
Tercemar ringan |
| S341 |
Tercemar ringan |
Tercemar ringan |
Tercemar ringan |
| S342 |
Tercemar ringan |
Baik |
Baik |
| S343 |
Tercemar ringan |
Tercemar ringan |
Tercemar ringan |
| S344 |
Tercemar ringan |
Tercemar ringan |
Tercemar ringan |
| S345 |
Tercemar ringan |
Tercemar ringan |
Tercemar ringan |
| S346 |
Tercemar ringan |
Tercemar ringan |
Tercemar ringan |
| S347 |
Tercemar ringan |
Tercemar ringan |
Tercemar ringan |
| S348 |
Tercemar ringan |
Tercemar ringan |
Tercemar ringan |
| S349 |
Baik |
Baik |
Baik |
| S350 |
Tercemar ringan |
Tercemar berat |
Tercemar ringan |
| S351 |
Baik |
Baik |
Baik |
| S352 |
Baik |
Baik |
Baik |
| S353 |
Tercemar ringan |
Tercemar ringan |
Tercemar ringan |
| S354 |
Tercemar ringan |
Tercemar ringan |
Tercemar ringan |
| S355 |
Tercemar ringan |
Tercemar ringan |
Tercemar ringan |
| S356 |
Tercemar ringan |
Tercemar ringan |
Tercemar ringan |
| S357 |
Tercemar ringan |
Tercemar ringan |
Tercemar ringan |
| S358 |
Tercemar ringan |
Baik |
Baik |
| S359 |
Tercemar ringan |
Tercemar ringan |
Tercemar ringan |
| S360 |
Tercemar ringan |
Tercemar ringan |
Tercemar ringan |
| S361 |
Baik |
Baik |
Baik |
| S362 |
Tercemar ringan |
Tercemar ringan |
Tercemar ringan |
| S363 |
Tercemar ringan |
Tercemar ringan |
Tercemar ringan |
| S364 |
Tercemar ringan |
Tercemar ringan |
Tercemar ringan |
| S365 |
Tercemar ringan |
Tercemar ringan |
Tercemar ringan |
| S366 |
Tercemar ringan |
Tercemar ringan |
Tercemar ringan |
| S367 |
Tercemar ringan |
Tercemar ringan |
Tercemar ringan |
| S368 |
Tercemar ringan |
Tercemar ringan |
Tercemar ringan |
| S369 |
Tercemar ringan |
Tercemar ringan |
Tercemar ringan |
| S370 |
Tercemar ringan |
Tercemar ringan |
Tercemar ringan |
| S371 |
Baik |
Baik |
Baik |
| S372 |
Tercemar ringan |
Tercemar ringan |
Tercemar ringan |
| S373 |
Tercemar ringan |
Tercemar ringan |
Tercemar ringan |
| S374 |
Tercemar ringan |
Tercemar ringan |
Tercemar ringan |
| S375 |
Tercemar ringan |
Tercemar ringan |
Tercemar ringan |
Soal 3
# Model 1: Regresi Linear
lm_fit <- lm(DO ~ pH + BOD + TSS + Suhu, data = train)
summary(lm_fit)
##
## Call:
## lm(formula = DO ~ pH + BOD + TSS + Suhu, data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.41303 -0.54131 0.01817 0.64138 2.42521
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.404415 1.159983 5.521 7.39e-08 ***
## pH -0.015239 0.112138 -0.136 0.892
## BOD 0.079449 0.069348 1.146 0.253
## TSS 0.000214 0.006015 0.036 0.972
## Suhu -0.020276 0.026703 -0.759 0.448
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9491 on 295 degrees of freedom
## Multiple R-squared: 0.006004, Adjusted R-squared: -0.007474
## F-statistic: 0.4454 on 4 and 295 DF, p-value: 0.7757
# Evaluasi performa model linear
pred_lm <- predict(lm_fit, train)
R2_lm <- cor(train$DO, pred_lm)^2
MSE_lm <- mean((train$DO - pred_lm)^2)
RMSE_lm <- sqrt(MSE_lm)
#Regresi Spline
spline_fit <- lm(DO ~ bs(pH, df = 4) + bs(BOD, df = 4) + bs(TSS, df = 4) + bs(Suhu, df = 4), data = train)
summary(spline_fit)
##
## Call:
## lm(formula = DO ~ bs(pH, df = 4) + bs(BOD, df = 4) + bs(TSS,
## df = 4) + bs(Suhu, df = 4), data = train)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.39628 -0.51707 0.03633 0.59513 2.43208
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.21317 0.96478 6.440 5.11e-10 ***
## bs(pH, df = 4)1 -0.27182 0.70242 -0.387 0.69906
## bs(pH, df = 4)2 0.17304 0.53588 0.323 0.74700
## bs(pH, df = 4)3 -0.50827 0.66083 -0.769 0.44246
## bs(pH, df = 4)4 0.53918 0.66801 0.807 0.42026
## bs(BOD, df = 4)1 0.68815 0.71667 0.960 0.33777
## bs(BOD, df = 4)2 1.01856 0.51429 1.980 0.04862 *
## bs(BOD, df = 4)3 0.05936 0.64835 0.092 0.92711
## bs(BOD, df = 4)4 1.93658 0.60539 3.199 0.00154 **
## bs(TSS, df = 4)1 -0.50186 0.67594 -0.742 0.45843
## bs(TSS, df = 4)2 0.26001 0.46619 0.558 0.57747
## bs(TSS, df = 4)3 -0.43019 0.60998 -0.705 0.48123
## bs(TSS, df = 4)4 -0.19630 0.54912 -0.357 0.72100
## bs(Suhu, df = 4)1 -0.45345 0.85969 -0.527 0.59829
## bs(Suhu, df = 4)2 -0.91895 0.55450 -1.657 0.09858 .
## bs(Suhu, df = 4)3 -0.40864 0.78962 -0.518 0.60520
## bs(Suhu, df = 4)4 -0.72825 0.68524 -1.063 0.28880
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.9441 on 283 degrees of freedom
## Multiple R-squared: 0.05656, Adjusted R-squared: 0.003224
## F-statistic: 1.06 on 16 and 283 DF, p-value: 0.3933
# Evaluasi performa model spline
pred_spline <- predict(spline_fit, train)
R2_spl <- cor(train$DO, pred_spline)^2
MSE_spl <- mean((train$DO - pred_spline)^2)
RMSE_spl <- sqrt(MSE_spl)
# Ringkasan performa model
perf_table <- tibble(
Model = c("Regresi Linear", "Regresi Spline"),
R2 = c(R2_lm, R2_spl),
MSE = c(MSE_lm, MSE_spl),
RMSE = c(RMSE_lm, RMSE_spl)
)
kable(perf_table, caption = "Perbandingan Performa Model Regresi")
Perbandingan Performa Model Regresi
| Regresi Linear |
0.0060035 |
0.8858272 |
0.9411839 |
| Regresi Spline |
0.0565631 |
0.8407696 |
0.9169349 |
# Visualisasi hasil prediksi vs aktual
pred_df <- tibble(
Aktual = train$DO,
Pred_LM = pred_lm,
Pred_Spline = pred_spline
)
ggplot(pred_df, aes(x = Aktual)) +
geom_point(aes(y = Pred_LM), color = "#0052B1", alpha = 0.7) +
geom_point(aes(y = Pred_Spline), color = "#E45F00", alpha = 0.7) +
geom_abline(slope = 1, intercept = 0, linetype = "dashed") +
theme_minimal() +
labs(
title = "Perbandingan Prediksi vs Aktual (DO)",
x = "Nilai Aktual DO",
y = "Nilai Prediksi DO",
caption = "Garis putus-putus = prediksi sempurna (y = x)"
)

# Ambil koefisien model tanpa intercept + interpretasi otomatis
coef_df <- summary(lm_fit)$coefficients[-1, , drop = FALSE] %>%
as.data.frame() %>%
rownames_to_column("Variabel") %>%
mutate(
Arah = ifelse(Estimate > 0, "Positif", "Negatif"),
Signifikan = ifelse(`Pr(>|t|)` < 0.05, "Signifikan", "Tidak signifikan"),
Pengaruh = abs(Estimate),
Interpretasi = case_when(
Arah == "Positif" & Signifikan == "Signifikan" ~ "Berpengaruh positif dan signifikan terhadap DO",
Arah == "Negatif" & Signifikan == "Signifikan" ~ "Berpengaruh negatif dan signifikan terhadap DO",
Arah == "Positif" & Signifikan == "Tidak signifikan" ~ "Cenderung meningkatkan DO, namun tidak signifikan",
Arah == "Negatif" & Signifikan == "Tidak signifikan" ~ "Cenderung menurunkan DO, namun tidak signifikan",
TRUE ~ "Tidak ada pengaruh yang berarti"
)
) %>%
arrange(desc(Pengaruh))
kable(coef_df, caption = "Urutan Pengaruh Variabel terhadap DO (berdasarkan nilai absolut koefisien dan interpretasi)")
Urutan Pengaruh Variabel terhadap DO (berdasarkan nilai absolut
koefisien dan interpretasi)
| BOD |
0.0794486 |
0.0693484 |
1.1456442 |
0.2528709 |
Positif |
Tidak signifikan |
0.0794486 |
Cenderung meningkatkan DO, namun tidak signifikan |
| Suhu |
-0.0202762 |
0.0267033 |
-0.7593158 |
0.4482700 |
Negatif |
Tidak signifikan |
0.0202762 |
Cenderung menurunkan DO, namun tidak signifikan |
| pH |
-0.0152392 |
0.1121379 |
-0.1358971 |
0.8919953 |
Negatif |
Tidak signifikan |
0.0152392 |
Cenderung menurunkan DO, namun tidak signifikan |
| TSS |
0.0002140 |
0.0060148 |
0.0355791 |
0.9716420 |
Positif |
Tidak signifikan |
0.0002140 |
Cenderung meningkatkan DO, namun tidak signifikan |