library(readxl)
## Warning: package 'readxl' was built under R version 4.4.3
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 4.4.3
## Warning: package 'ggplot2' was built under R version 4.4.3
## Warning: package 'tibble' was built under R version 4.4.3
## Warning: package 'tidyr' was built under R version 4.4.3
## Warning: package 'readr' was built under R version 4.4.3
## Warning: package 'purrr' was built under R version 4.4.3
## Warning: package 'dplyr' was built under R version 4.4.3
## Warning: package 'stringr' was built under R version 4.4.3
## Warning: package 'forcats' was built under R version 4.4.3
## Warning: package 'lubridate' was built under R version 4.4.3
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.2     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(caret)
## Warning: package 'caret' was built under R version 4.4.3
## Loading required package: lattice
## 
## Attaching package: 'caret'
## 
## The following object is masked from 'package:purrr':
## 
##     lift
library(e1071)        # SVM backend
## Warning: package 'e1071' was built under R version 4.4.3
library(rpart)        # Decision Tree
library(randomForest) # Random Forest
## Warning: package 'randomForest' was built under R version 4.4.3
## randomForest 4.7-1.2
## Type rfNews() to see new features/changes/bug fixes.
## 
## Attaching package: 'randomForest'
## 
## The following object is masked from 'package:dplyr':
## 
##     combine
## 
## The following object is masked from 'package:ggplot2':
## 
##     margin
library(splines)      # Basis spline
library(mice)         # Imputasi multivariat (jika diperlukan)
## Warning: package 'mice' was built under R version 4.4.3
## 
## Attaching package: 'mice'
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following objects are masked from 'package:base':
## 
##     cbind, rbind
library(broom)        # Merapikan output model
## Warning: package 'broom' was built under R version 4.4.3
library(knitr)
## Warning: package 'knitr' was built under R version 4.4.3
set.seed(10251547)
options(stringsAsFactors = FALSE)
# 1) Load data ---------------------------------------------------------------
file_path <- "kualitasair.xlsx"
train_raw <- read_excel("E:/z DRIVE E/KULIAH/SEMESTER 5/Statistika Lingkungan/Week UTS/kualitasair.xlsx", sheet = "Training")
test_raw  <- read_excel("E:/z DRIVE E/KULIAH/SEMESTER 5/Statistika Lingkungan/Week UTS/kualitasair.xlsx", sheet = "Testing")

# Ringkasan
cat("Training: ", dim(train_raw)[1], " baris x ", dim(train_raw)[2], " kolom\n")
## Training:  300  baris x  7  kolom

1. Soal 1

train <- train_raw
test  <- test_raw

# Kolom numerik yang relevan
num_vars <- c("pH", "DO", "BOD", "TSS", "Suhu")


# Missing Value
prop_missing <- sapply(train[num_vars], function(x) mean(is.na(x)))
prop_missing <- tibble(var = names(prop_missing), prop_missing = prop_missing)
kable(prop_missing, caption = "Proporsi Missing Value per Variabel")
Proporsi Missing Value per Variabel
var prop_missing
pH 0.0000000
DO 0.0766667
BOD 0.0733333
TSS 0.0800000
Suhu 0.0000000
# Imputasi: aturan praktis
# Jika prop < 0.10 -> median; else -> mice (pmm)
small_miss <- prop_missing %>% filter(prop_missing < 0.10) %>% pull(var)
large_miss <- prop_missing %>% filter(prop_missing >= 0.10) %>% pull(var)

# Median imputasi untuk small_miss
for (v in small_miss) {
  medv <- median(train[[v]], na.rm = TRUE)
  train[[v]][is.na(train[[v]])] <- medv
}

# MICE untuk variabel dengan missing >= 10%
if (length(large_miss) > 0) {
  impute_df <- train %>% select(all_of(num_vars), Status)
  impute_df$Status <- as.factor(impute_df$Status)
  mice_out <- mice(impute_df, m = 1, method = "pmm", printFlag = FALSE, seed = 10251547)
  completed <- complete(mice_out, 1)
  train[num_vars] <- completed[num_vars]
}

# Verifikasi tidak ada missing numeric
train %>% summarise(across(all_of(num_vars), ~ sum(is.na(.)))) %>%
  kable(caption = "Jumlah Missing Value Setelah Imputasi")
Jumlah Missing Value Setelah Imputasi
pH DO BOD TSS Suhu
0 0 0 0 0
# Outlier Detection & Treatment
# Gunakan metode IQR (Interquartile Range)
for (v in num_vars) {
  Q1 <- quantile(train[[v]], 0.25, na.rm = TRUE)
  Q3 <- quantile(train[[v]], 0.75, na.rm = TRUE)
  IQR_val <- Q3 - Q1
  lower <- Q1 - 1.5 * IQR_val
  upper <- Q3 + 1.5 * IQR_val
  outlier_count <- sum(train[[v]] < lower | train[[v]] > upper)
  cat(v, ": Jumlah outlier =", outlier_count, "\n")
  
  # Winsorizing: ganti outlier ekstrem dengan batas bawah/atas
  train[[v]][train[[v]] < lower] <- lower
  train[[v]][train[[v]] > upper] <- upper
}
## pH : Jumlah outlier = 4 
## DO : Jumlah outlier = 4 
## BOD : Jumlah outlier = 5 
## TSS : Jumlah outlier = 5 
## Suhu : Jumlah outlier = 2
# Inkonsistensi Kategori
# Cek level unik di kolom Status
unique(train$Status)
## [1] "Tercemar ringan" "baik"            "BAIK"            "Baik"           
## [5] "tercemar ringan" "Tercemar Ringan" "Tercemar berat"
# Standarisasi penulisan kategori (huruf kecil dan tanpa typo)
train$Status <- tolower(train$Status)
train$Status <- trimws(train$Status)

# Perbaiki ejaan umum
train$Status <- recode(train$Status,
  "baik" = "Baik",
  "tercemar ringan" = "Tercemar ringan",
  "tercemar berat" = "Tercemar berat",
  "tercemar_berat" = "Tercemar berat",
  "tercemar_ringan" = "Tercemar ringan"
)

# Konversi ke faktor dengan urutan logis
train$Status <- factor(train$Status, levels = c("Baik", "Tercemar ringan", "Tercemar berat"))

# Tabel frekuensi
train %>% count(Status) %>% mutate(prop = n / sum(n)) %>% kable()
Status n prop
Baik 72 0.2400000
Tercemar ringan 221 0.7366667
Tercemar berat 7 0.0233333
# Distribusi variabel (histogram)
train %>%
pivot_longer(cols = all_of(num_vars), names_to = "var", values_to = "val") %>%
ggplot(aes(x = val)) +
geom_histogram(bins = 30, alpha = 0.7) +
facet_wrap(~var, scales = "free") +
theme_minimal() +
labs(title = "Distribusi variabel numerik")

# Boxplots untuk deteksi outlier
train %>%
pivot_longer(cols = all_of(num_vars), names_to = "var", values_to = "val") %>%
ggplot(aes(x = var, y = val)) +
geom_boxplot(outlier.shape = 1) +
theme_minimal() +
labs(title = "Boxplot per variabel (cek outlier)")

### Soal 2

# Split internal
set.seed(20251015)
train_idx <- createDataPartition(train$Status, p = 0.8, list = FALSE)
train_int <- train[train_idx, ]
val_int   <- train[-train_idx, ]

# Formula
formula_cls <- as.formula(paste("Status ~", paste(num_vars, collapse = " + ")))

# Helper predict & eval function
eval_cls <- function(true, pred) {
cm <- confusionMatrix(pred, true)
byClass <- as.data.frame(cm$byClass)
overall <- cm$overall
list(confusion = cm$table, overall = overall, byClass = byClass)
}
library(rpart.plot)
## Warning: package 'rpart.plot' was built under R version 4.4.3
# Formula klasifikasi
formula_cls <- Status ~ pH + DO + BOD + TSS + Suhu

# Model SVM
svm_fit <- svm(formula_cls, data = train_int, probability = TRUE)
cat("Model SVM berhasil dibangun.\n")
## Model SVM berhasil dibangun.
print(svm_fit)
## 
## Call:
## svm(formula = formula_cls, data = train_int, probability = TRUE)
## 
## 
## Parameters:
##    SVM-Type:  C-classification 
##  SVM-Kernel:  radial 
##        cost:  1 
## 
## Number of Support Vectors:  134
ggplot(train_int, aes(x = DO, y = BOD, color = Status)) +
  geom_point(size = 2, alpha = 0.8) +
  stat_contour(data = train_int, aes(z = as.numeric(predict(svm_fit, train_int))), 
               bins = 2, color = "black", linetype = "dashed") +
  theme_minimal() +
  labs(title = "Visualisasi SVM Berdasarkan DO dan BOD", x = "DO", y = "BOD")
## Warning: Contour data has duplicated x, y coordinates.
## ℹ 1 duplicated row have been dropped.
## Warning: `stat_contour()`: Zero contours were generated
## Warning in min(x): no non-missing arguments to min; returning Inf
## Warning in max(x): no non-missing arguments to max; returning -Inf

# Model Decision Tree
tree_fit <- rpart(formula_cls, data = train_int, method = "class", control = rpart.control(cp = 0.01))
cat("\nModel Decision Tree berhasil dibangun.\n")
## 
## Model Decision Tree berhasil dibangun.
rpart.plot(tree_fit, main = "Struktur Decision Tree")

# Model Random Forest
rf_fit <- randomForest(formula_cls, data = train_int, ntree = 500, importance = TRUE)
cat("\nModel Random Forest berhasil dibangun.\n")
## 
## Model Random Forest berhasil dibangun.
print(rf_fit)
## 
## Call:
##  randomForest(formula = formula_cls, data = train_int, ntree = 500,      importance = TRUE) 
##                Type of random forest: classification
##                      Number of trees: 500
## No. of variables tried at each split: 2
## 
##         OOB estimate of  error rate: 7.05%
## Confusion matrix:
##                 Baik Tercemar ringan Tercemar berat class.error
## Baik              51               7              0  0.12068966
## Tercemar ringan    6             171              0  0.03389831
## Tercemar berat     0               4              2  0.66666667
# Perubahan OOB Error terhadap Jumlah Pohon (Random Forest)
plot(rf_fit, main = "Perubahan OOB Error terhadap Jumlah Pohon (Random Forest)")

# Prediksi untuk masing-masing model
svm_pred  <- predict(svm_fit, val_int)
tree_pred <- predict(tree_fit, val_int, type = "class")
rf_pred   <- predict(rf_fit, val_int)

# Evaluasi dengan confusion matrix
res_svm  <- confusionMatrix(svm_pred, val_int$Status)
res_tree <- confusionMatrix(tree_pred, val_int$Status)
res_rf   <- confusionMatrix(rf_pred, val_int$Status)

# Tampilkan hasil
cat("Confusion Matrix SVM\n")
## Confusion Matrix SVM
print(res_svm$table)
##                  Reference
## Prediction        Baik Tercemar ringan Tercemar berat
##   Baik               9               0              0
##   Tercemar ringan    5              44              1
##   Tercemar berat     0               0              0
cat("\nAkurasi SVM:", round(res_svm$overall["Accuracy"], 4), "\n\n")
## 
## Akurasi SVM: 0.8983
cat("Confusion Matrix Decision Tree\n")
## Confusion Matrix Decision Tree
print(res_tree$table)
##                  Reference
## Prediction        Baik Tercemar ringan Tercemar berat
##   Baik              14               1              0
##   Tercemar ringan    0              43              1
##   Tercemar berat     0               0              0
cat("\nAkurasi Decision Tree:", round(res_tree$overall["Accuracy"], 4), "\n\n")
## 
## Akurasi Decision Tree: 0.9661
cat("Confusion Matrix Random Forest\n")
## Confusion Matrix Random Forest
print(res_rf$table)
##                  Reference
## Prediction        Baik Tercemar ringan Tercemar berat
##   Baik              14               0              0
##   Tercemar ringan    0              44              1
##   Tercemar berat     0               0              0
cat("\nAkurasi Random Forest:", round(res_rf$overall["Accuracy"], 4), "\n\n")
## 
## Akurasi Random Forest: 0.9831
# Tabel perbandingan akurasi
acc_table <- tibble(
  Model = c("SVM", "Decision Tree", "Random Forest"),
  Accuracy = c(
    round(res_svm$overall["Accuracy"], 4),
    round(res_tree$overall["Accuracy"], 4),
    round(res_rf$overall["Accuracy"], 4)
  )
)

# Visualisasi perbandingan akurasi
ggplot(acc_table, aes(x = Model, y = Accuracy, fill = Model)) +
  geom_col(width = 0.6) +
  geom_text(aes(label = round(Accuracy, 3)), vjust = -0.5, size = 4) +
  ylim(0, 1) +
  theme_minimal() +
  labs(
    title = "Perbandingan Akurasi Model Klasifikasi",
    x = "Model",
    y = "Akurasi"
  ) +
  theme(legend.position = "none")

#Prediksi 75 baris hasil ketiga model
# Imputasi missing value pada data test
test_work <- test_raw
for (v in num_vars) {
  if (any(is.na(test_work[[v]]))) {
    test_work[[v]][is.na(test_work[[v]])] <- median(train[[v]], na.rm = TRUE)
  }
}

# Gunakan formula & model yang sudah dilatih
pred_svm  <- predict(svm_fit,  test_work)
pred_tree <- predict(tree_fit, test_work, type = "class")
pred_rf   <- predict(rf_fit,   test_work)

# Gabungkan hasil prediksi semua model
pred_output <- tibble(
  Lokasi    = test_work$Lokasi,
  Pred_SVM  = as.character(pred_svm),
  Pred_Tree = as.character(pred_tree),
  Pred_RF   = as.character(pred_rf)
)

# Tampilkan semua hasil (75 baris)
kable(pred_output, caption = "Hasil Prediksi 75 Lokasi untuk Semua Model")
Hasil Prediksi 75 Lokasi untuk Semua Model
Lokasi Pred_SVM Pred_Tree Pred_RF
S301 Tercemar ringan Tercemar ringan Tercemar ringan
S302 Tercemar ringan Tercemar ringan Tercemar ringan
S303 Baik Baik Baik
S304 Tercemar ringan Tercemar ringan Tercemar ringan
S305 Tercemar ringan Tercemar ringan Tercemar ringan
S306 Tercemar ringan Tercemar ringan Tercemar ringan
S307 Tercemar ringan Tercemar ringan Tercemar ringan
S308 Tercemar ringan Tercemar ringan Tercemar ringan
S309 Tercemar ringan Tercemar ringan Tercemar ringan
S310 Tercemar ringan Tercemar ringan Tercemar ringan
S311 Tercemar ringan Tercemar ringan Tercemar ringan
S312 Tercemar ringan Tercemar ringan Tercemar ringan
S313 Tercemar ringan Tercemar ringan Tercemar ringan
S314 Tercemar ringan Tercemar ringan Tercemar ringan
S315 Tercemar ringan Tercemar ringan Tercemar ringan
S316 Tercemar ringan Tercemar ringan Tercemar ringan
S317 Tercemar ringan Tercemar ringan Tercemar ringan
S318 Tercemar ringan Tercemar ringan Tercemar ringan
S319 Tercemar ringan Tercemar ringan Tercemar ringan
S320 Tercemar ringan Tercemar ringan Tercemar ringan
S321 Tercemar ringan Tercemar ringan Tercemar ringan
S322 Tercemar ringan Tercemar ringan Tercemar ringan
S323 Tercemar ringan Tercemar ringan Tercemar ringan
S324 Tercemar ringan Tercemar ringan Tercemar ringan
S325 Tercemar ringan Tercemar ringan Tercemar ringan
S326 Tercemar ringan Tercemar ringan Tercemar ringan
S327 Baik Baik Baik
S328 Tercemar ringan Tercemar ringan Tercemar ringan
S329 Tercemar ringan Tercemar ringan Tercemar ringan
S330 Tercemar ringan Tercemar ringan Tercemar ringan
S331 Tercemar ringan Tercemar ringan Tercemar ringan
S332 Tercemar ringan Tercemar ringan Tercemar ringan
S333 Tercemar ringan Tercemar ringan Tercemar ringan
S334 Tercemar ringan Baik Baik
S335 Tercemar ringan Tercemar ringan Tercemar ringan
S336 Tercemar ringan Tercemar ringan Tercemar ringan
S337 Baik Baik Baik
S338 Tercemar ringan Tercemar ringan Tercemar ringan
S339 Tercemar ringan Tercemar ringan Tercemar ringan
S340 Tercemar ringan Tercemar ringan Tercemar ringan
S341 Tercemar ringan Tercemar ringan Tercemar ringan
S342 Tercemar ringan Baik Baik
S343 Tercemar ringan Tercemar ringan Tercemar ringan
S344 Tercemar ringan Tercemar ringan Tercemar ringan
S345 Tercemar ringan Tercemar ringan Tercemar ringan
S346 Tercemar ringan Tercemar ringan Tercemar ringan
S347 Tercemar ringan Tercemar ringan Tercemar ringan
S348 Tercemar ringan Tercemar ringan Tercemar ringan
S349 Baik Baik Baik
S350 Tercemar ringan Tercemar berat Tercemar ringan
S351 Baik Baik Baik
S352 Baik Baik Baik
S353 Tercemar ringan Tercemar ringan Tercemar ringan
S354 Tercemar ringan Tercemar ringan Tercemar ringan
S355 Tercemar ringan Tercemar ringan Tercemar ringan
S356 Tercemar ringan Tercemar ringan Tercemar ringan
S357 Tercemar ringan Tercemar ringan Tercemar ringan
S358 Tercemar ringan Baik Baik
S359 Tercemar ringan Tercemar ringan Tercemar ringan
S360 Tercemar ringan Tercemar ringan Tercemar ringan
S361 Baik Baik Baik
S362 Tercemar ringan Tercemar ringan Tercemar ringan
S363 Tercemar ringan Tercemar ringan Tercemar ringan
S364 Tercemar ringan Tercemar ringan Tercemar ringan
S365 Tercemar ringan Tercemar ringan Tercemar ringan
S366 Tercemar ringan Tercemar ringan Tercemar ringan
S367 Tercemar ringan Tercemar ringan Tercemar ringan
S368 Tercemar ringan Tercemar ringan Tercemar ringan
S369 Tercemar ringan Tercemar ringan Tercemar ringan
S370 Tercemar ringan Tercemar ringan Tercemar ringan
S371 Baik Baik Baik
S372 Tercemar ringan Tercemar ringan Tercemar ringan
S373 Tercemar ringan Tercemar ringan Tercemar ringan
S374 Tercemar ringan Tercemar ringan Tercemar ringan
S375 Tercemar ringan Tercemar ringan Tercemar ringan

Soal 3

# Model 1: Regresi Linear
lm_fit <- lm(DO ~ pH + BOD + TSS + Suhu, data = train)
summary(lm_fit)
## 
## Call:
## lm(formula = DO ~ pH + BOD + TSS + Suhu, data = train)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.41303 -0.54131  0.01817  0.64138  2.42521 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  6.404415   1.159983   5.521 7.39e-08 ***
## pH          -0.015239   0.112138  -0.136    0.892    
## BOD          0.079449   0.069348   1.146    0.253    
## TSS          0.000214   0.006015   0.036    0.972    
## Suhu        -0.020276   0.026703  -0.759    0.448    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9491 on 295 degrees of freedom
## Multiple R-squared:  0.006004,   Adjusted R-squared:  -0.007474 
## F-statistic: 0.4454 on 4 and 295 DF,  p-value: 0.7757
# Evaluasi performa model linear
pred_lm <- predict(lm_fit, train)
R2_lm   <- cor(train$DO, pred_lm)^2
MSE_lm  <- mean((train$DO - pred_lm)^2)
RMSE_lm <- sqrt(MSE_lm)
#Regresi Spline
spline_fit <- lm(DO ~ bs(pH, df = 4) + bs(BOD, df = 4) + bs(TSS, df = 4) + bs(Suhu, df = 4), data = train)
summary(spline_fit)
## 
## Call:
## lm(formula = DO ~ bs(pH, df = 4) + bs(BOD, df = 4) + bs(TSS, 
##     df = 4) + bs(Suhu, df = 4), data = train)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -2.39628 -0.51707  0.03633  0.59513  2.43208 
## 
## Coefficients:
##                   Estimate Std. Error t value Pr(>|t|)    
## (Intercept)        6.21317    0.96478   6.440 5.11e-10 ***
## bs(pH, df = 4)1   -0.27182    0.70242  -0.387  0.69906    
## bs(pH, df = 4)2    0.17304    0.53588   0.323  0.74700    
## bs(pH, df = 4)3   -0.50827    0.66083  -0.769  0.44246    
## bs(pH, df = 4)4    0.53918    0.66801   0.807  0.42026    
## bs(BOD, df = 4)1   0.68815    0.71667   0.960  0.33777    
## bs(BOD, df = 4)2   1.01856    0.51429   1.980  0.04862 *  
## bs(BOD, df = 4)3   0.05936    0.64835   0.092  0.92711    
## bs(BOD, df = 4)4   1.93658    0.60539   3.199  0.00154 ** 
## bs(TSS, df = 4)1  -0.50186    0.67594  -0.742  0.45843    
## bs(TSS, df = 4)2   0.26001    0.46619   0.558  0.57747    
## bs(TSS, df = 4)3  -0.43019    0.60998  -0.705  0.48123    
## bs(TSS, df = 4)4  -0.19630    0.54912  -0.357  0.72100    
## bs(Suhu, df = 4)1 -0.45345    0.85969  -0.527  0.59829    
## bs(Suhu, df = 4)2 -0.91895    0.55450  -1.657  0.09858 .  
## bs(Suhu, df = 4)3 -0.40864    0.78962  -0.518  0.60520    
## bs(Suhu, df = 4)4 -0.72825    0.68524  -1.063  0.28880    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.9441 on 283 degrees of freedom
## Multiple R-squared:  0.05656,    Adjusted R-squared:  0.003224 
## F-statistic:  1.06 on 16 and 283 DF,  p-value: 0.3933
# Evaluasi performa model spline
pred_spline <- predict(spline_fit, train)
R2_spl   <- cor(train$DO, pred_spline)^2
MSE_spl  <- mean((train$DO - pred_spline)^2)
RMSE_spl <- sqrt(MSE_spl)
# Ringkasan performa model
perf_table <- tibble(
  Model = c("Regresi Linear", "Regresi Spline"),
  R2    = c(R2_lm, R2_spl),
  MSE   = c(MSE_lm, MSE_spl),
  RMSE  = c(RMSE_lm, RMSE_spl)
)
kable(perf_table, caption = "Perbandingan Performa Model Regresi")
Perbandingan Performa Model Regresi
Model R2 MSE RMSE
Regresi Linear 0.0060035 0.8858272 0.9411839
Regresi Spline 0.0565631 0.8407696 0.9169349
# Visualisasi hasil prediksi vs aktual 
pred_df <- tibble(
  Aktual = train$DO,
  Pred_LM = pred_lm,
  Pred_Spline = pred_spline
)
ggplot(pred_df, aes(x = Aktual)) +
  geom_point(aes(y = Pred_LM), color = "#0052B1", alpha = 0.7) +
  geom_point(aes(y = Pred_Spline), color = "#E45F00", alpha = 0.7) +
  geom_abline(slope = 1, intercept = 0, linetype = "dashed") +
  theme_minimal() +
  labs(
    title = "Perbandingan Prediksi vs Aktual (DO)",
    x = "Nilai Aktual DO",
    y = "Nilai Prediksi DO",
    caption = "Garis putus-putus = prediksi sempurna (y = x)"
  )

# Ambil koefisien model tanpa intercept + interpretasi otomatis
coef_df <- summary(lm_fit)$coefficients[-1, , drop = FALSE] %>%
  as.data.frame() %>%
  rownames_to_column("Variabel") %>%
  mutate(
    Arah = ifelse(Estimate > 0, "Positif", "Negatif"),
    Signifikan = ifelse(`Pr(>|t|)` < 0.05, "Signifikan", "Tidak signifikan"),
    Pengaruh = abs(Estimate),
    Interpretasi = case_when(
      Arah == "Positif" & Signifikan == "Signifikan" ~ "Berpengaruh positif dan signifikan terhadap DO",
      Arah == "Negatif" & Signifikan == "Signifikan" ~ "Berpengaruh negatif dan signifikan terhadap DO",
      Arah == "Positif" & Signifikan == "Tidak signifikan" ~ "Cenderung meningkatkan DO, namun tidak signifikan",
      Arah == "Negatif" & Signifikan == "Tidak signifikan" ~ "Cenderung menurunkan DO, namun tidak signifikan",
      TRUE ~ "Tidak ada pengaruh yang berarti"
    )
  ) %>%
  arrange(desc(Pengaruh))

kable(coef_df, caption = "Urutan Pengaruh Variabel terhadap DO (berdasarkan nilai absolut koefisien dan interpretasi)")
Urutan Pengaruh Variabel terhadap DO (berdasarkan nilai absolut koefisien dan interpretasi)
Variabel Estimate Std. Error t value Pr(>|t|) Arah Signifikan Pengaruh Interpretasi
BOD 0.0794486 0.0693484 1.1456442 0.2528709 Positif Tidak signifikan 0.0794486 Cenderung meningkatkan DO, namun tidak signifikan
Suhu -0.0202762 0.0267033 -0.7593158 0.4482700 Negatif Tidak signifikan 0.0202762 Cenderung menurunkan DO, namun tidak signifikan
pH -0.0152392 0.1121379 -0.1358971 0.8919953 Negatif Tidak signifikan 0.0152392 Cenderung menurunkan DO, namun tidak signifikan
TSS 0.0002140 0.0060148 0.0355791 0.9716420 Positif Tidak signifikan 0.0002140 Cenderung meningkatkan DO, namun tidak signifikan