KLasifikasi

Kelompok

# ===============================================
# K-FOLD BIASA TANPA SMOTE + METRIK LENGKAP
# ===============================================

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(recipes)
## 
## Attaching package: 'recipes'
## The following object is masked from 'package:stats':
## 
##     step
library(e1071)
library(rpart)
library(ranger)
library(caret)
## Loading required package: ggplot2
## Loading required package: lattice
library(yardstick)
## 
## Attaching package: 'yardstick'
## The following objects are masked from 'package:caret':
## 
##     precision, recall, sensitivity, specificity
library(readxl)

set.seed(42)

# 1. Load dan label data (4 Rentan)
desa_df <- read_excel("C:/Users/User/Downloads/Pangan.xlsx")
desa_df$Y <- factor(
  desa_df$Y,
  levels = 1:6,
  labels = c("Sangat Rentan", "Rentan", "Agak Rentan", "Agak Tahan", "Tahan", "Sangat Tahan")
)
desa_df$Y <- ifelse(desa_df$Y %in% c("Sangat Rentan", "Rentan", "Agak Rentan", "Agak Tahan"), "Rentan", "Tahan")
desa_df$Y <- as.factor(desa_df$Y)

# 2. Buat fold biasa (non-spasial)
k <- 5
folds <- createFolds(desa_df$Y, k = k, returnTrain = FALSE)

# 3. Formula model
form <- Y ~ . -lon -lat

# 4. Loop K-Fold
results_all <- list()

for (i in 1:k) {
  cat("\n=== K-FOLD BIASA TANPA SMOTE Fold", i, "===\n")

  test_idx <- folds[[i]]
  train_data <- desa_df[-test_idx, ]
  test_data  <- desa_df[test_idx, ]

  models <- list()
  preds  <- list()
  metrics <- list()

  # === MODEL 1: NAIVE BAYES
  models$nb <- naiveBayes(form, data = train_data)
  preds$nb <- predict(models$nb, test_data)

  # === MODEL 2: DECISION TREE
  models$dt <- rpart(form, data = train_data)
  preds$dt <- predict(models$dt, test_data, type = "class")

  # === MODEL 3: KNN
  models$knn <- train(form, data = train_data, method = "knn",
                      trControl = trainControl(method = "none"),
                      tuneGrid = data.frame(k = 5))
  preds$knn <- predict(models$knn, test_data)

  # === MODEL 4: SVM
  models$svm <- svm(form, data = train_data)
  preds$svm <- predict(models$svm, test_data)

  # === MODEL 5: RANDOM FOREST
  models$rf <- ranger(formula = form, data = train_data, num.trees = 500)
  preds$rf <- predict(models$rf, data = test_data)$predictions

  for (model_name in names(preds)) {
    pred <- preds[[model_name]]
    obs <- test_data$Y
    eval_df <- data.frame(obs = obs, pred = pred)

    cm <- confusionMatrix(pred, obs, positive = "Rentan")

    acc <- cm$overall["Accuracy"]
    kappa <- cm$overall["Kappa"]
    prec <- precision(eval_df, truth = obs, estimate = pred)[[3]]
    rec  <- recall(eval_df, truth = obs, estimate = pred)[[3]]
    f1   <- f_meas(eval_df, truth = obs, estimate = pred)[[3]]

    metrics[[model_name]] <- data.frame(
      Fold = i,
      Model = model_name,
      Accuracy = acc,
      Kappa = kappa,
      Precision = prec,
      Recall = rec,
      F1_Score = f1
    )
  }

  results_all[[i]] <- bind_rows(metrics)
}
## 
## === K-FOLD BIASA TANPA SMOTE Fold 1 ===
## 
## === K-FOLD BIASA TANPA SMOTE Fold 2 ===
## Warning: While computing binary `precision()`, no predicted events were detected (i.e.
## `true_positive + false_positive = 0`).
## Precision is undefined in this case, and `NA` will be returned.
## Note that 73 true event(s) actually occurred for the problematic event level,
## Rentan
## While computing binary `precision()`, no predicted events were detected (i.e.
## `true_positive + false_positive = 0`).
## Precision is undefined in this case, and `NA` will be returned.
## Note that 73 true event(s) actually occurred for the problematic event level,
## Rentan
## 
## === K-FOLD BIASA TANPA SMOTE Fold 3 ===
## 
## === K-FOLD BIASA TANPA SMOTE Fold 4 ===
## 
## === K-FOLD BIASA TANPA SMOTE Fold 5 ===
# 5. Gabung hasil
hasil_all <- bind_rows(results_all)

cat("\n=== METRIK PER FOLD (TANPA SMOTE) ===\n")
## 
## === METRIK PER FOLD (TANPA SMOTE) ===
print(hasil_all)
##               Fold Model  Accuracy        Kappa Precision     Recall   F1_Score
## Accuracy...1     1    nb 0.4788136  0.122172493 0.3579545 0.86301370 0.50602410
## Accuracy...2     1    dt 0.6440678  0.083833996 0.3877551 0.26027397 0.31147541
## Accuracy...3     1   knn 0.6610169  0.148705925 0.4363636 0.32876712 0.37500000
## Accuracy...4     1   svm 0.6864407 -0.008430535 0.0000000 0.00000000 0.00000000
## Accuracy...5     1    rf 0.6991525  0.215836765 0.5217391 0.32876712 0.40336134
## Accuracy...6     2    nb 0.4279661 -0.007335273 0.3062500 0.67123288 0.42060086
## Accuracy...7     2    dt 0.6779661  0.142556650 0.4634146 0.26027397 0.33333333
## Accuracy...8     2   knn 0.6525424  0.090516026 0.4000000 0.24657534 0.30508475
## Accuracy...9     2   svm 0.6906780  0.000000000        NA 0.00000000         NA
## Accuracy...10    2    rf 0.7161017  0.220623028 0.5882353 0.27397260 0.37383178
## Accuracy...11    3    nb 0.4661017  0.078182156 0.3413174 0.78082192 0.47500000
## Accuracy...12    3    dt 0.7033898  0.159629667 0.5555556 0.20547945 0.30000000
## Accuracy...13    3   knn 0.6355932  0.029735156 0.3414634 0.19178082 0.24561404
## Accuracy...14    3   svm 0.6906780  0.010340074 0.5000000 0.01369863 0.02666667
## Accuracy...15    3    rf 0.6906780  0.143226576 0.5000000 0.21917808 0.30476190
## Accuracy...16    4    nb 0.6949153  0.076421350 0.5384615 0.09589041 0.16279070
## Accuracy...17    4    dt 0.6652542  0.097055405 0.4210526 0.21917808 0.28828829
## Accuracy...18    4   knn 0.6779661  0.171087901 0.4693878 0.31506849 0.37704918
## Accuracy...19    4   svm 0.6949153  0.018824345 1.0000000 0.01369863 0.02702703
## Accuracy...20    4    rf 0.6652542  0.104858844 0.4250000 0.23287671 0.30088496
## Accuracy...21    5    nb 0.4703390  0.088042537 0.3452381 0.79452055 0.48132780
## Accuracy...22    5    dt 0.6779661  0.164134588 0.4680851 0.30136986 0.36666667
## Accuracy...23    5   knn 0.6567797  0.105391239 0.4130435 0.26027397 0.31932773
## Accuracy...24    5   svm 0.6864407 -0.008430535 0.0000000 0.00000000 0.00000000
## Accuracy...25    5    rf 0.7161017  0.240829652 0.5750000 0.31506849 0.40707965
cat("\n=== RATA-RATA METRIK PER MODEL ===\n")
## 
## === RATA-RATA METRIK PER MODEL ===
hasil_summary <- hasil_all %>%
  group_by(Model) %>%
  summarise(across(Accuracy:F1_Score, mean, na.rm = TRUE), .groups = "drop")
## Warning: There was 1 warning in `summarise()`.
## ℹ In argument: `across(Accuracy:F1_Score, mean, na.rm = TRUE)`.
## ℹ In group 1: `Model = "dt"`.
## Caused by warning:
## ! The `...` argument of `across()` is deprecated as of dplyr 1.1.0.
## Supply arguments directly to `.fns` through an anonymous function instead.
## 
##   # Previously
##   across(a:b, mean, na.rm = TRUE)
## 
##   # Now
##   across(a:b, \(x) mean(x, na.rm = TRUE))
print(hasil_summary)
## # A tibble: 5 × 6
##   Model Accuracy   Kappa Precision  Recall F1_Score
##   <chr>    <dbl>   <dbl>     <dbl>   <dbl>    <dbl>
## 1 dt       0.674 0.129       0.459 0.249     0.320 
## 2 knn      0.657 0.109       0.412 0.268     0.324 
## 3 nb       0.508 0.0715      0.378 0.641     0.409 
## 4 rf       0.697 0.185       0.522 0.274     0.358 
## 5 svm      0.690 0.00246     0.375 0.00548   0.0134
# ===============================================
# K-FOLD BIASA + SMOTE + ALL METRICS
# ===============================================

library(dplyr)
library(themis)
library(recipes)
library(e1071)
library(rpart)
library(ranger)
library(caret)
library(yardstick)
library(readxl)

set.seed(42)

# 1. Load dan re-label data (4 Rentan, 2 Tahan)
desa_df <- read_excel("C:/Users/User/Downloads/Pangan.xlsx")
desa_df$Y <- factor(
  desa_df$Y,
  levels = 1:6,
  labels = c("Sangat Rentan", "Rentan", "Agak Rentan", "Agak Tahan", "Tahan", "Sangat Tahan")
)
desa_df$Y <- ifelse(desa_df$Y %in% c("Sangat Rentan", "Rentan", "Agak Rentan", "Agak Tahan"), "Rentan", "Tahan")
desa_df$Y <- as.factor(desa_df$Y)

# 2. Buat fold biasa (non-spasial)
k <- 10
folds <- createFolds(desa_df$Y, k = k, returnTrain = FALSE)

# 3. Formula model
form <- Y ~ . -lon -lat  # tidak ada spatial_fold

# 4. Loop K-Fold
results_all <- list()

for (i in 1:k) {
  cat("\n=== K-FOLD BIASA Fold", i, "===\n")

  test_idx <- folds[[i]]
  train_data <- desa_df[-test_idx, ]
  test_data  <- desa_df[test_idx, ]

  # SMOTE pada training set
  rec <- recipe(Y ~ ., data = train_data) %>%
    step_smote(Y) %>%
    prep()
  train_data_smote <- bake(rec, new_data = NULL)

  models <- list()
  preds  <- list()
  metrics <- list()

  # === MODEL 1: NAIVE BAYES
  models$nb <- naiveBayes(form, data = train_data_smote)
  preds$nb <- predict(models$nb, test_data)

  # === MODEL 2: DECISION TREE
  models$dt <- rpart(form, data = train_data_smote)
  preds$dt <- predict(models$dt, test_data, type = "class")

  # === MODEL 3: KNN
  models$knn <- train(form, data = train_data_smote, method = "knn",
                      trControl = trainControl(method = "none"),
                      tuneGrid = data.frame(k = 10))
  preds$knn <- predict(models$knn, test_data)

  # === MODEL 4: SVM
  models$svm <- svm(form, data = train_data_smote)
  preds$svm <- predict(models$svm, test_data)

  # === MODEL 5: RANDOM FOREST
  models$rf <- ranger(formula = form, data = train_data_smote, num.trees = 500)
  preds$rf <- predict(models$rf, data = test_data)$predictions

  for (model_name in names(preds)) {
    pred <- preds[[model_name]]
    obs <- test_data$Y
    eval_df <- data.frame(obs = obs, pred = pred)

    cm <- confusionMatrix(pred, obs, positive = "Rentan")

    acc <- cm$overall["Accuracy"]
    kappa <- cm$overall["Kappa"]
    prec <- precision(eval_df, truth = obs, estimate = pred)[[3]]
    rec  <- recall(eval_df, truth = obs, estimate = pred)[[3]]
    f1   <- f_meas(eval_df, truth = obs, estimate = pred)[[3]]

    metrics[[model_name]] <- data.frame(
      Fold = i,
      Model = model_name,
      Accuracy = acc,
      Kappa = kappa,
      Precision = prec,
      Recall = rec,
      F1_Score = f1
    )
  }

  results_all[[i]] <- bind_rows(metrics)
}
## 
## === K-FOLD BIASA Fold 1 ===
## 
## === K-FOLD BIASA Fold 2 ===
## 
## === K-FOLD BIASA Fold 3 ===
## 
## === K-FOLD BIASA Fold 4 ===
## 
## === K-FOLD BIASA Fold 5 ===
## 
## === K-FOLD BIASA Fold 6 ===
## 
## === K-FOLD BIASA Fold 7 ===
## 
## === K-FOLD BIASA Fold 8 ===
## 
## === K-FOLD BIASA Fold 9 ===
## 
## === K-FOLD BIASA Fold 10 ===
# 5. Gabung & cetak hasil
hasil_all <- bind_rows(results_all)

cat("\n=== METRIK PER FOLD (K-FOLD BIASA) ===\n")
## 
## === METRIK PER FOLD (K-FOLD BIASA) ===
print(hasil_all)
##               Fold Model  Accuracy        Kappa Precision    Recall  F1_Score
## Accuracy...1     1    nb 0.3445378 -0.006942938 0.3084112 0.8918919 0.4583333
## Accuracy...2     1    dt 0.5378151  0.123006834 0.3676471 0.6756757 0.4761905
## Accuracy...3     1   knn 0.4957983  0.060279021 0.3380282 0.6486486 0.4444444
## Accuracy...4     1   svm 0.4033613  0.041302621 0.3265306 0.8648649 0.4740741
## Accuracy...5     1    rf 0.5966387  0.147208122 0.3921569 0.5405405 0.4545455
## Accuracy...6     2    nb 0.3305085 -0.022373327 0.3055556 0.8918919 0.4551724
## Accuracy...7     2    dt 0.6271186  0.290128521 0.4477612 0.8108108 0.5769231
## Accuracy...8     2   knn 0.4661017 -0.074588031 0.2758621 0.4324324 0.3368421
## Accuracy...9     2   svm 0.5423729  0.187452181 0.3924051 0.8378378 0.5344828
## Accuracy...10    2    rf 0.7288136  0.404979515 0.5555556 0.6756757 0.6097561
## Accuracy...11    3    nb 0.2649573 -0.109126984 0.2685185 0.8055556 0.4027778
## Accuracy...12    3    dt 0.6923077  0.374331551 0.5000000 0.7777778 0.6086957
## Accuracy...13    3   knn 0.6068376  0.178571429 0.4038462 0.5833333 0.4772727
## Accuracy...14    3   svm 0.5213675  0.141509434 0.3684211 0.7777778 0.5000000
## Accuracy...15    3    rf 0.6495726  0.208023774 0.4390244 0.5000000 0.4675325
## Accuracy...16    4    nb 0.3135593 -0.031958540 0.2935780 0.8888889 0.4413793
## Accuracy...17    4    dt 0.5254237  0.104121475 0.3529412 0.6666667 0.4615385
## Accuracy...18    4   knn 0.5932203  0.158645276 0.3888889 0.5833333 0.4666667
## Accuracy...19    4   svm 0.5762712  0.264339152 0.4146341 0.9444444 0.5762712
## Accuracy...20    4    rf 0.6440678  0.209821429 0.4318182 0.5277778 0.4750000
## Accuracy...21    5    nb 0.3361345 -0.005132043 0.3090909 0.9189189 0.4625850
## Accuracy...22    5    dt 0.6386555  0.162931458 0.4210526 0.4324324 0.4266667
## Accuracy...23    5   knn 0.5882353  0.135251372 0.3846154 0.5405405 0.4494382
## Accuracy...24    5   svm 0.4705882  0.063226290 0.3375000 0.7297297 0.4615385
## Accuracy...25    5    rf 0.6302521  0.137112722 0.4054054 0.4054054 0.4054054
## Accuracy...26    6    nb 0.3247863 -0.013820336 0.3027523 0.9166667 0.4551724
## Accuracy...27    6    dt 0.5982906  0.143057504 0.3877551 0.5277778 0.4470588
## Accuracy...28    6   knn 0.6153846  0.201909959 0.4150943 0.6111111 0.4943820
## Accuracy...29    6   svm 0.4615385  0.072480181 0.3373494 0.7777778 0.4705882
## Accuracy...30    6    rf 0.5897436  0.093023256 0.3636364 0.4444444 0.4000000
## Accuracy...31    7    nb 0.3220339 -0.055928412 0.2843137 0.8055556 0.4202899
## Accuracy...32    7    dt 0.6440678  0.209821429 0.4318182 0.5277778 0.4750000
## Accuracy...33    7   knn 0.6016949  0.134789392 0.3829787 0.5000000 0.4337349
## Accuracy...34    7   svm 0.4745763  0.055268595 0.3289474 0.6944444 0.4464286
## Accuracy...35    7    rf 0.6779661  0.240514905 0.4722222 0.4722222 0.4722222
## Accuracy...36    8    nb 0.3277311 -0.003372681 0.3097345 0.9459459 0.4666667
## Accuracy...37    8    dt 0.6470588  0.222464219 0.4444444 0.5405405 0.4878049
## Accuracy...38    8   knn 0.5882353  0.111534359 0.3750000 0.4864865 0.4235294
## Accuracy...39    8   svm 0.5294118  0.143224479 0.3733333 0.7567568 0.5000000
## Accuracy...40    8    rf 0.6890756  0.329117782 0.5000000 0.6486486 0.5647059
## Accuracy...41    9    nb 0.4529915  0.073496659 0.3372093 0.8055556 0.4754098
## Accuracy...42    9    dt 0.6410256  0.170212766 0.4210526 0.4444444 0.4324324
## Accuracy...43    9   knn 0.5042735 -0.035714286 0.2884615 0.4166667 0.3409091
## Accuracy...44    9   svm 0.4700855  0.060606061 0.3333333 0.7222222 0.4561404
## Accuracy...45    9    rf 0.6495726  0.171073095 0.4285714 0.4166667 0.4225352
## Accuracy...46   10    nb 0.4067797 -0.008054674 0.3103448 0.7297297 0.4354839
## Accuracy...47   10    dt 0.6271186  0.193036991 0.4255319 0.5405405 0.4761905
## Accuracy...48   10   knn 0.5762712  0.119140042 0.3773585 0.5405405 0.4444444
## Accuracy...49   10   svm 0.4745763  0.045656144 0.3333333 0.6756757 0.4464286
## Accuracy...50   10    rf 0.6271186  0.203925176 0.4285714 0.5675676 0.4883721
cat("\n=== RATA-RATA METRIK PER MODEL ===\n")
## 
## === RATA-RATA METRIK PER MODEL ===
hasil_summary <- hasil_all %>%
  group_by(Model) %>%
  summarise(across(Accuracy:F1_Score, mean, na.rm = TRUE), .groups = "drop")
print(hasil_summary)
## # A tibble: 5 × 6
##   Model Accuracy   Kappa Precision Recall F1_Score
##   <chr>    <dbl>   <dbl>     <dbl>  <dbl>    <dbl>
## 1 dt       0.618  0.199      0.420  0.594    0.487
## 2 knn      0.564  0.0990     0.363  0.534    0.431
## 3 nb       0.342 -0.0183     0.303  0.860    0.447
## 4 rf       0.648  0.214      0.442  0.520    0.476
## 5 svm      0.492  0.108      0.355  0.778    0.487
# ===============================================
# SPATIAL K-FOLD BIASA + SMOTE + METRIK LENGKAP
# ===============================================

library(CAST)
library(caret)
library(e1071)
library(rpart)
library(ranger)
library(klaR)
## Loading required package: MASS
## 
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
## 
##     select
library(themis)
library(recipes)
library(dplyr)
library(yardstick)
library(readxl)

set.seed(42)

# 1. Load & siapkan data
desa_df <- read_excel("C:/Users/User/Downloads/Pangan.xlsx")
desa_df$Y <- factor(
  desa_df$Y,
  levels = 1:6,
  labels = c("Sangat Rentan", "Rentan", "Agak Rentan", "Agak Tahan", "Tahan", "Sangat Tahan")
)
desa_df$Y <- ifelse(desa_df$Y %in% c("Sangat Rentan", "Rentan", "Agak Rentan","Agak Tahan"), "Rentan", "Tahan")
desa_df$Y <- as.factor(desa_df$Y)

# 2. Buat cluster spasial
desa_df$spatial_cluster <- kmeans(desa_df[, c("lon", "lat")], centers = 10)$cluster

# 3. Spatial CV biasa
spatial_folds <- CreateSpacetimeFolds(desa_df, spacevar = "spatial_cluster", k = 10)
folds_index <- spatial_folds$indexOut

# 4. Formula
form <- Y ~ . -lon -lat -spatial_cluster

# 5. Looping Fold
results_all <- list()

for (i in 1:10) {
  cat("\n=== Fold", i, "===\n")

  train_data <- desa_df[-folds_index[[i]], ]
  test_data  <- desa_df[ folds_index[[i]], ]

  # SMOTE
  rec <- recipe(Y ~ ., data = train_data) %>%
    step_smote(Y) %>%
    prep()
  train_data_smote <- bake(rec, new_data = NULL)

  models <- list()
  preds  <- list()
  metrics <- list()

  # MODELS:
  models$nb <- naiveBayes(form, data = train_data_smote)
  preds$nb <- predict(models$nb, test_data)

  models$dt <- rpart(form, data = train_data_smote)
  preds$dt <- predict(models$dt, test_data, type = "class")

  models$knn <- train(form, data = train_data_smote, method = "knn",
                      trControl = trainControl(method = "none"),
                      tuneGrid = data.frame(k = 10))
  preds$knn <- predict(models$knn, test_data)

  models$svm <- svm(form, data = train_data_smote)
  preds$svm <- predict(models$svm, test_data)

  models$rf <- ranger(formula = form, data = train_data_smote, num.trees = 500)
  preds$rf <- predict(models$rf, data = test_data)$predictions

  # Evaluasi semua model
  for (model_name in names(preds)) {
    pred <- preds[[model_name]]
    obs <- test_data$Y
    eval_df <- data.frame(obs = obs, pred = pred)

    cm <- confusionMatrix(pred, obs, positive = "Rentan")

    acc <- cm$overall["Accuracy"]
    kappa <- cm$overall["Kappa"]
    prec <- precision(eval_df, truth = obs, estimate = pred)$.estimate
    rec  <- recall(eval_df, truth = obs, estimate = pred)$.estimate
    f1   <- f_meas(eval_df, truth = obs, estimate = pred)$.estimate

    row <- data.frame(
      Fold = i,
      Model = model_name,
      Accuracy = acc,
      Kappa = kappa,
      Precision = prec,
      Recall = rec,
      F1_Score = f1
    )
    metrics[[model_name]] <- row
  }

  results_all[[i]] <- bind_rows(metrics)
}
## 
## === Fold 1 ===
## 
## === Fold 2 ===
## 
## === Fold 3 ===
## 
## === Fold 4 ===
## 
## === Fold 5 ===
## 
## === Fold 6 ===
## 
## === Fold 7 ===
## 
## === Fold 8 ===
## 
## === Fold 9 ===
## 
## === Fold 10 ===
# Gabungkan
hasil_all <- do.call(rbind, results_all)

# Cetak per fold
print(hasil_all)
##               Fold Model  Accuracy        Kappa  Precision     Recall
## Accuracy...1     1    nb 0.1927083 -0.007993497 0.18947368 0.97297297
## Accuracy...2     1    dt 0.7083333  0.099648300 0.26829268 0.29729730
## Accuracy...3     1   knn 0.6562500  0.083731020 0.24561404 0.37837838
## Accuracy...4     1   svm 0.7187500  0.148628675 0.30232558 0.35135135
## Accuracy...5     1    rf 0.7968750  0.263861581 0.46153846 0.32432432
## Accuracy...11    2    nb 0.2598870 -0.011958277 0.18709677 0.85294118
## Accuracy...21    2    dt 0.6892655  0.174650276 0.30188679 0.47058824
## Accuracy...31    2   knn 0.5762712  0.003453194 0.19402985 0.38235294
## Accuracy...41    2   svm 0.4858757  0.054031832 0.21782178 0.64705882
## Accuracy...51    2    rf 0.6497175  0.220817949 0.31081081 0.67647059
## Accuracy...12    3    nb 0.1691176 -0.019368533 0.14615385 0.90476190
## Accuracy...22    3    dt 0.6470588  0.078746825 0.20000000 0.42857143
## Accuracy...32    3   knn 0.5588235 -0.066666667 0.11764706 0.28571429
## Accuracy...42    3   svm 0.3823529  0.060680809 0.18181818 0.85714286
## Accuracy...52    3    rf 0.6250000  0.082053997 0.20000000 0.47619048
## Accuracy...13    4    nb 0.6311475 -0.175588865 0.05263158 0.03571429
## Accuracy...23    4    dt 0.6393443  0.258972943 0.36206897 0.75000000
## Accuracy...33    4   knn 0.5737705  0.055390113 0.26000000 0.46428571
## Accuracy...43    4   svm 0.3852459  0.042285954 0.24731183 0.82142857
## Accuracy...53    4    rf 0.5000000  0.000000000 0.22950820 0.50000000
## Accuracy...14    5    nb 0.3863636  0.026229508 0.37209302 1.00000000
## Accuracy...24    5    dt 0.5000000  0.000000000 0.36363636 0.50000000
## Accuracy...34    5   knn 0.5227273 -0.017621145 0.35294118 0.37500000
## Accuracy...44    5   svm 0.4545455  0.070422535 0.38888889 0.87500000
## Accuracy...54    5    rf 0.5000000 -0.052173913 0.33333333 0.37500000
## Accuracy...15    6    nb 0.4878049 -0.108821636 0.52054795 0.84444444
## Accuracy...25    6    dt 0.7195122  0.421117250 0.70370370 0.84444444
## Accuracy...35    6   knn 0.6341463  0.261261261 0.66666667 0.66666667
## Accuracy...45    6   svm 0.5853659  0.097734628 0.57333333 0.95555556
## Accuracy...55    6    rf 0.7804878  0.565114909 0.86486486 0.71111111
## Accuracy...16    7    nb 0.6444444  0.037433155 0.76470588 0.76470588
## Accuracy...26    7    dt 0.5222222  0.105409154 0.82051282 0.47058824
## Accuracy...36    7   knn 0.5111111  0.148020654 0.87500000 0.41176471
## Accuracy...46    7   svm 0.6555556  0.260731320 0.86274510 0.64705882
## Accuracy...56    7    rf 0.5333333  0.202531646 0.93333333 0.41176471
## Accuracy...17    8    nb 0.1843972  0.008438819 0.16666667 1.00000000
## Accuracy...27    8    dt 0.4397163  0.055456627 0.18888889 0.73913043
## Accuracy...37    8   knn 0.3829787 -0.007722008 0.15957447 0.65217391
## Accuracy...47    8   svm 0.2482270 -0.001205788 0.16260163 0.86956522
## Accuracy...57    8    rf 0.5531915  0.151008315 0.23684211 0.78260870
## Accuracy...18    9    nb 0.2127660 -0.023543261 0.18181818 0.88888889
## Accuracy...28    9    dt 0.5319149 -0.001937984 0.19047619 0.44444444
## Accuracy...38    9   knn 0.4468085  0.114492754 0.24242424 0.88888889
## Accuracy...48    9   svm 0.2127660  0.010244735 0.19565217 1.00000000
## Accuracy...58    9    rf 0.5531915 -0.044444444 0.16666667 0.33333333
## Accuracy...19   10    nb 0.4563758 -0.168231536 0.51304348 0.70238095
## Accuracy...29   10    dt 0.5503356  0.096806297 0.61038961 0.55952381
## Accuracy...39   10   knn 0.5436242  0.065658429 0.59090909 0.61904762
## Accuracy...49   10   svm 0.5302013  0.010436433 0.56730769 0.70238095
## Accuracy...59   10    rf 0.5906040  0.210270223 0.71698113 0.45238095
##                 F1_Score
## Accuracy...1  0.31718062
## Accuracy...2  0.28205128
## Accuracy...3  0.29787234
## Accuracy...4  0.32500000
## Accuracy...5  0.38095238
## Accuracy...11 0.30687831
## Accuracy...21 0.36781609
## Accuracy...31 0.25742574
## Accuracy...41 0.32592593
## Accuracy...51 0.42592593
## Accuracy...12 0.25165563
## Accuracy...22 0.27272727
## Accuracy...32 0.16666667
## Accuracy...42 0.30000000
## Accuracy...52 0.28169014
## Accuracy...13 0.04255319
## Accuracy...23 0.48837209
## Accuracy...33 0.33333333
## Accuracy...43 0.38016529
## Accuracy...53 0.31460674
## Accuracy...14 0.54237288
## Accuracy...24 0.42105263
## Accuracy...34 0.36363636
## Accuracy...44 0.53846154
## Accuracy...54 0.35294118
## Accuracy...15 0.64406780
## Accuracy...25 0.76767677
## Accuracy...35 0.66666667
## Accuracy...45 0.71666667
## Accuracy...55 0.78048780
## Accuracy...16 0.76470588
## Accuracy...26 0.59813084
## Accuracy...36 0.56000000
## Accuracy...46 0.73949580
## Accuracy...56 0.57142857
## Accuracy...17 0.28571429
## Accuracy...27 0.30088496
## Accuracy...37 0.25641026
## Accuracy...47 0.27397260
## Accuracy...57 0.36363636
## Accuracy...18 0.30188679
## Accuracy...28 0.26666667
## Accuracy...38 0.38095238
## Accuracy...48 0.32727273
## Accuracy...58 0.22222222
## Accuracy...19 0.59296482
## Accuracy...29 0.58385093
## Accuracy...39 0.60465116
## Accuracy...49 0.62765957
## Accuracy...59 0.55474453
# Rata-rata per model
cat("\n=== RATA-RATA METRIK PER MODEL (SPATIAL K-FOLD BIASA) ===\n")
## 
## === RATA-RATA METRIK PER MODEL (SPATIAL K-FOLD BIASA) ===
hasil_summary <- hasil_all %>%
  group_by(Model) %>%
  summarise(across(Accuracy:F1_Score, mean), .groups = "drop")
print(hasil_summary)
## # A tibble: 5 × 6
##   Model Accuracy   Kappa Precision Recall F1_Score
##   <chr>    <dbl>   <dbl>     <dbl>  <dbl>    <dbl>
## 1 dt       0.595  0.129      0.401  0.550    0.435
## 2 knn      0.541  0.0640     0.370  0.512    0.389
## 3 nb       0.363 -0.0443     0.309  0.797    0.405
## 4 rf       0.608  0.160      0.445  0.504    0.425
## 5 svm      0.466  0.0754     0.370  0.773    0.455
# ===============================================
# STRATIFIED SPATIAL K-FOLD + SMOTE + ALL METRICS
# ===============================================

library(dplyr)
library(purrr)
## 
## Attaching package: 'purrr'
## The following object is masked from 'package:caret':
## 
##     lift
library(themis)
library(recipes)
library(e1071)
library(rpart)
library(ranger)
library(caret)
library(yardstick)
library(readxl)

set.seed(42)

# 1. Load dan siapkan data
desa_df <- read_excel("C:/Users/User/Downloads/Pangan.xlsx")
desa_df$Y <- factor(
  desa_df$Y,
  levels = 1:6,
  labels = c("Sangat Rentan", "Rentan", "Agak Rentan", "Agak Tahan", "Tahan", "Sangat Tahan")
)
desa_df$Y <- ifelse(desa_df$Y %in% c("Sangat Rentan", "Rentan", "Agak Rentan","Agak Tahan"), "Rentan", "Tahan")
desa_df$Y <- as.factor(desa_df$Y)

# 2. Stratified spatial folds
cluster_per_kelas <- function(df_kelas, n_folds = 5) {
  coords <- df_kelas[, c("lon", "lat")]
  km <- kmeans(coords, centers = n_folds)
  df_kelas$spatial_fold <- km$cluster
  return(df_kelas)
}

desa_df_strat <- desa_df %>%
  split(.$Y) %>%
  map_dfr(~cluster_per_kelas(.x, 5))

folds_index <- lapply(1:5, function(k) {
  which(desa_df_strat$spatial_fold == k)
})

form <- Y ~ . -lon -lat -spatial_fold

# 3. Loop CV semua model
results_all <- list()

for (i in 1:5) {
  cat("\n=== STRATIFIED Fold", i, "===\n")
  
  train_data <- desa_df_strat[-folds_index[[i]], ]
  test_data  <- desa_df_strat[ folds_index[[i]], ]

  rec <- recipe(Y ~ ., data = train_data) %>%
    step_smote(Y) %>%
    prep()
  train_data_smote <- bake(rec, new_data = NULL)

  models <- list()
  preds  <- list()
  metrics <- list()

  # ============ MODEL 1: NAIVE BAYES
  models$nb <- naiveBayes(form, data = train_data_smote)
  preds$nb <- predict(models$nb, test_data)

  # ============ MODEL 2: DECISION TREE
  models$dt <- rpart(form, data = train_data_smote)
  preds$dt <- predict(models$dt, test_data, type = "class")

  # ============ MODEL 3: KNN
  models$knn <- train(form, data = train_data_smote, method = "knn",
                      trControl = trainControl(method = "none"),
                      tuneGrid = data.frame(k = 5))
  preds$knn <- predict(models$knn, test_data)

  # ============ MODEL 4: SVM
  models$svm <- svm(form, data = train_data_smote)
  preds$svm <- predict(models$svm, test_data)

  # ============ MODEL 5: RANDOM FOREST
  models$rf <- ranger(formula = form, data = train_data_smote, num.trees = 500)
  preds$rf <- predict(models$rf, data = test_data)$predictions

  # Hitung metrik semua model
  for (model_name in names(preds)) {
    pred <- preds[[model_name]]
    obs <- test_data$Y
    eval_df <- data.frame(obs = obs, pred = pred)

    cm <- confusionMatrix(pred, obs, positive = "Rentan")

    acc <- cm$overall["Accuracy"]
    kappa <- cm$overall["Kappa"]
    prec <- precision(eval_df, truth = obs, estimate = pred)$.estimate
    rec  <- recall(eval_df, truth = obs, estimate = pred)$.estimate
    f1   <- f_meas(eval_df, truth = obs, estimate = pred)$.estimate

    row <- data.frame(
      Fold = i,
      Model = model_name,
      Accuracy = acc,
      Kappa = kappa,
      Precision = prec,
      Recall = rec,
      F1_Score = f1
    )
    metrics[[model_name]] <- row
  }

  results_all[[i]] <- bind_rows(metrics)
}
## 
## === STRATIFIED Fold 1 ===
## 
## === STRATIFIED Fold 2 ===
## 
## === STRATIFIED Fold 3 ===
## 
## === STRATIFIED Fold 4 ===
## 
## === STRATIFIED Fold 5 ===
# Gabungkan hasil semua fold
hasil_all <- do.call(rbind, results_all)

# Output hasil per model per fold
print(hasil_all)
##               Fold Model  Accuracy        Kappa Precision    Recall  F1_Score
## Accuracy...1     1    nb 0.4230769  0.051383399 0.3917526 0.9743590 0.5588235
## Accuracy...2     1    dt 0.5096154  0.081081081 0.4090909 0.6923077 0.5142857
## Accuracy...3     1   knn 0.5192308  0.024390244 0.3877551 0.4871795 0.4318182
## Accuracy...4     1   svm 0.4807692  0.103734440 0.4117647 0.8974359 0.5645161
## Accuracy...5     1    rf 0.5000000 -0.004830918 0.3725490 0.4871795 0.4222222
## Accuracy...11    2    nb 0.2067308 -0.119373777 0.1573034 0.6511628 0.2533937
## Accuracy...21    2    dt 0.4182692  0.041876047 0.2253521 0.7441860 0.3459459
## Accuracy...31    2   knn 0.4182692 -0.004790802 0.2045455 0.6279070 0.3085714
## Accuracy...41    2   svm 0.2740385  0.014805521 0.2127660 0.9302326 0.3463203
## Accuracy...51    2    rf 0.5096154  0.013668061 0.2135922 0.5116279 0.3013699
## Accuracy...12    3    nb 0.8168498  0.145165331 0.3200000 0.1951220 0.2424242
## Accuracy...22    3    dt 0.5677656  0.017146693 0.1592920 0.4390244 0.2337662
## Accuracy...32    3   knn 0.5970696  0.094336208 0.2000000 0.5609756 0.2948718
## Accuracy...42    3   svm 0.6886447 -0.041937946 0.1206897 0.1707317 0.1414141
## Accuracy...52    3    rf 0.5494505  0.015653856 0.1583333 0.4634146 0.2360248
## Accuracy...13    4    nb 0.5384615  0.142087542 0.4609375 0.7972973 0.5841584
## Accuracy...23    4    dt 0.6538462  0.262257110 0.5901639 0.4864865 0.5333333
## Accuracy...33    4   knn 0.5769231  0.106135987 0.4769231 0.4189189 0.4460432
## Accuracy...43    4   svm 0.5219780  0.072516401 0.4386792 0.6283784 0.5166667
## Accuracy...53    4    rf 0.6153846  0.169383231 0.5363636 0.3986486 0.4573643
## Accuracy...14    5    nb 0.4069264 -0.027933868 0.3981043 0.8936170 0.5508197
## Accuracy...24    5    dt 0.6017316  0.177617831 0.5104167 0.5212766 0.5157895
## Accuracy...34    5   knn 0.5454545  0.081284800 0.4495413 0.5212766 0.4827586
## Accuracy...44    5   svm 0.4978355  0.065690377 0.4320988 0.7446809 0.5468750
## Accuracy...54    5    rf 0.5844156  0.124378109 0.4880952 0.4361702 0.4606742
# Rata-rata per model
cat("\n=== RATA-RATA METRIK PER MODEL ===\n")
## 
## === RATA-RATA METRIK PER MODEL ===
hasil_summary <- hasil_all %>%
  group_by(Model) %>%
  summarise(across(Accuracy:F1_Score, mean), .groups = "drop")
print(hasil_summary)
## # A tibble: 5 × 6
##   Model Accuracy  Kappa Precision Recall F1_Score
##   <chr>    <dbl>  <dbl>     <dbl>  <dbl>    <dbl>
## 1 dt       0.550 0.116      0.379  0.577    0.429
## 2 knn      0.531 0.0603     0.344  0.523    0.393
## 3 nb       0.478 0.0383     0.346  0.702    0.438
## 4 rf       0.552 0.0637     0.354  0.459    0.376
## 5 svm      0.493 0.0430     0.323  0.674    0.423

Random Forest

# =====================================================
# RANDOM FOREST - K-FOLD + SMOTE + ALL EVALUATION METRICS
# =====================================================

library(readxl)
library(dplyr)
library(caret)
library(themis)
library(recipes)
library(randomForest)
## randomForest 4.7-1.2
## Type rfNews() to see new features/changes/bug fixes.
## 
## Attaching package: 'randomForest'
## The following object is masked from 'package:ggplot2':
## 
##     margin
## The following object is masked from 'package:ranger':
## 
##     importance
## The following object is masked from 'package:dplyr':
## 
##     combine
library(yardstick)
library(pROC)
## Type 'citation("pROC")' for a citation.
## 
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
## 
##     cov, smooth, var
library(ggplot2)
library(tibble)

set.seed(42)

# === 1. Load dan relabel target
desa_df <- read_excel("C:/Users/User/Downloads/Pangan.xlsx")
desa_df$Y <- factor(
  desa_df$Y,
  levels = 1:6,
  labels = c("Sangat Rentan", "Rentan", "Agak Rentan", "Agak Tahan", "Tahan", "Sangat Tahan")
)
desa_df$Y <- ifelse(desa_df$Y %in% c("Sangat Rentan", "Rentan", "Agak Rentan", "Agak Tahan"), "Rentan", "Tahan")
desa_df$Y <- as.factor(desa_df$Y)

# === 2. Setup K-Fold CV
k <- 10
folds <- createFolds(desa_df$Y, k = k, returnTrain = FALSE)
form <- Y ~ . -lon -lat

# === 3. Inisialisasi hasil
results_all <- list()
roc_plot <- TRUE  # Aktifkan untuk plot ROC
colors <- rainbow(k)

# === 4. Loop per Fold
for (i in 1:k) {
  cat("\n=== Fold", i, "===\n")

  test_idx <- folds[[i]]
  train_data <- desa_df[-test_idx, ]
  test_data  <- desa_df[ test_idx, ]

  # === SMOTE
  rec <- recipe(Y ~ ., data = train_data) %>%
    step_smote(Y) %>%
    prep()
  train_balanced <- bake(rec, new_data = NULL)

  # === Train Random Forest
  rf_model <- randomForest(formula = form,
                           data = train_balanced,
                           ntree = 500,
                           importance = TRUE)

  # === Predict kelas dan probabilitas
  rf_pred <- predict(rf_model, newdata = test_data)
  rf_prob <- predict(rf_model, newdata = test_data, type = "prob")
  eval_df <- data.frame(obs = test_data$Y, pred = rf_pred)

  # === Metrik Evaluasi
  acc <- accuracy(eval_df, truth = obs, estimate = pred)[[3]]
  kap <- kap(eval_df, truth = obs, estimate = pred)[[3]]
  prec <- precision(eval_df, truth = obs, estimate = pred)[[3]]
  rec  <- recall(eval_df, truth = obs, estimate = pred)[[3]]
  f1   <- f_meas(eval_df, truth = obs, estimate = pred)[[3]]

  # === ROC & AUC
  roc_obj <- roc(response = test_data$Y,
                 predictor = rf_prob[, "Rentan"],
                 levels = c("Tahan", "Rentan"))
  auc_val <- as.numeric(auc(roc_obj))

  # === ROC Curve plot
  if (roc_plot) {
    if (i == 1) {
      plot(roc_obj, col = colors[i], lwd = 2, main = "ROC Curve per Fold")
    } else {
      plot(roc_obj, col = colors[i], lwd = 2, add = TRUE)
    }
  }

  # === Simpan hasil fold ke list
  results_all[[i]] <- tibble(
    Fold = i,
    Model = "rf",
    Accuracy = acc,
    Kappa = kap,
    Precision = prec,
    Recall = rec,
    F1_Score = f1,
    AUC = auc_val
  )

  if (i == 1) {
    rf_model_first <- rf_model  # untuk varImpPlot
  }
}
## 
## === Fold 1 ===
## Setting direction: controls < cases
## 
## === Fold 2 ===
## Setting direction: controls < cases
## 
## === Fold 3 ===
## Setting direction: controls < cases
## 
## === Fold 4 ===
## Setting direction: controls < cases
## 
## === Fold 5 ===
## Setting direction: controls < cases
## 
## === Fold 6 ===
## Setting direction: controls < cases
## 
## === Fold 7 ===
## Setting direction: controls < cases
## 
## === Fold 8 ===
## Setting direction: controls < cases
## 
## === Fold 9 ===
## Setting direction: controls < cases
## 
## === Fold 10 ===
## Setting direction: controls < cases

# === 5. Gabungkan dan tampilkan hasil semua fold
rf_kfold_smote <- bind_rows(results_all)

cat("\n=== HASIL PER FOLD (RF) ===\n")
## 
## === HASIL PER FOLD (RF) ===
print(rf_kfold_smote)
## # A tibble: 10 × 8
##     Fold Model Accuracy  Kappa Precision Recall F1_Score   AUC
##    <int> <chr>    <dbl>  <dbl>     <dbl>  <dbl>    <dbl> <dbl>
##  1     1 rf       0.605 0.181      0.407  0.595    0.484 0.613
##  2     2 rf       0.695 0.349      0.510  0.676    0.581 0.776
##  3     3 rf       0.607 0.118      0.381  0.444    0.410 0.616
##  4     4 rf       0.686 0.309      0.489  0.611    0.543 0.747
##  5     5 rf       0.605 0.0983     0.375  0.405    0.390 0.549
##  6     6 rf       0.590 0.119      0.375  0.5      0.429 0.600
##  7     7 rf       0.627 0.147      0.4    0.444    0.421 0.643
##  8     8 rf       0.630 0.174      0.419  0.486    0.45  0.689
##  9     9 rf       0.632 0.157      0.410  0.444    0.427 0.635
## 10    10 rf       0.593 0.143      0.392  0.541    0.455 0.644
cat("\n=== RATA-RATA METRIK (RF) ===\n")
## 
## === RATA-RATA METRIK (RF) ===
print(rf_kfold_smote %>%
  summarise(across(Accuracy:AUC, mean), .groups = "drop"))
## # A tibble: 1 × 6
##   Accuracy Kappa Precision Recall F1_Score   AUC
##      <dbl> <dbl>     <dbl>  <dbl>    <dbl> <dbl>
## 1    0.627 0.180     0.416  0.515    0.459 0.651
# === 6. Plot Variable Importance dari RF
varImpPlot(rf_model_first, main = "Random Forest Variable Importance (Fold 1)")

Decision Tree

# ===================================================
# DECISION TREE (rpart) - K-FOLD + SMOTE + VISUALISASI
# ===================================================

library(readxl)
library(dplyr)
library(caret)
library(themis)
library(recipes)
library(rpart)
library(rpart.plot)
library(yardstick)
library(pROC)
library(ggplot2)
library(tibble)

set.seed(42)

# 1. Load & relabel data
desa_df <- read_excel("C:/Users/User/Downloads/Pangan.xlsx")
desa_df$Y <- factor(
  desa_df$Y,
  levels = 1:6,
  labels = c("Sangat Rentan", "Rentan", "Agak Rentan", "Agak Tahan", "Tahan", "Sangat Tahan")
)
desa_df$Y <- ifelse(desa_df$Y %in% c("Sangat Renten","Rentan","Agak Rentan","Agak Tahan"), "Rentan", "Tahan")
desa_df$Y <- factor(desa_df$Y, levels = c("Tahan","Rentan"))

# 2. Siapkan K‑Fold
k <- 10
folds <- createFolds(desa_df$Y, k = k, returnTrain = FALSE)
form <- Y ~ . -lon -lat

results_all <- list()
colors <- rainbow(k)

# 3. Loop tiap fold
for (i in seq_along(folds)) {
  test_idx <- folds[[i]]
  train_data <- desa_df[-test_idx, ]
  test_data  <- desa_df[test_idx, ]

  # 3.a Apply SMOTE
  rec <- recipe(Y ~ ., data = train_data) %>%
    step_smote(Y) %>%
    prep()
  train_balanced <- bake(rec, new_data = NULL)

  # 3.b Train Decision Tree
  dt_model <- rpart(formula = form,
                    data = train_balanced,
                    method = "class",
                    control = rpart.control(cp = 0.01))

  # 3.c Predict
  dt_pred <- predict(dt_model, newdata = test_data, type = "class")
  dt_prob <- predict(dt_model, newdata = test_data, type = "prob")[, "Rentan"]
  eval_df <- tibble(obs = test_data$Y, pred = dt_pred)

  # 3.d Metrics
  acc  <- accuracy(eval_df, truth = obs, estimate = pred)[[3]]
  kap  <- kap(eval_df, truth = obs, estimate = pred)[[3]]
  prec <- precision(eval_df, truth = obs, estimate = pred)[[3]]
  rec  <- recall(eval_df, truth = obs, estimate = pred)[[3]]
  f1   <- f_meas(eval_df, truth = obs, estimate = pred)[[3]]

  # 3.e ROC & AUC
  roc_obj <- roc(response = test_data$Y, predictor = dt_prob,
                 levels = c("Tahan","Rentan"))
  auc_val <- as.numeric(auc(roc_obj))
  plot(roc_obj, col = colors[i], lwd = 2,
       main = "ROC Curve per Fold (Decision Tree)",
       add = i != 1)

  # 3.f Simpan hasil dan model fold 1
  results_all[[i]] <- tibble(
    Fold = i,
    Model = "dt",
    Accuracy = acc,
    Kappa = kap,
    Precision = prec,
    Recall = rec,
    F1_Score = f1,
    AUC = auc_val
  )
  if (i == 3) {
    dt_first_model <- dt_model
  }
}
## Setting direction: controls < cases
## Setting direction: controls < cases
## Setting direction: controls < cases
## Setting direction: controls < cases
## Setting direction: controls < cases
## Setting direction: controls < cases
## Setting direction: controls < cases
## Setting direction: controls < cases
## Setting direction: controls < cases
## Setting direction: controls < cases

# 4. Hasil akhir
dt_kfold_smote <- bind_rows(results_all)
cat("\n=== Decision Tree – Hasil tiap fold ===\n")
## 
## === Decision Tree – Hasil tiap fold ===
print(dt_kfold_smote)
## # A tibble: 10 × 8
##     Fold Model Accuracy  Kappa Precision Recall F1_Score   AUC
##    <int> <chr>    <dbl>  <dbl>     <dbl>  <dbl>    <dbl> <dbl>
##  1     1 dt       0.538 0.123      0.765  0.476    0.586 0.605
##  2     2 dt       0.661 0.295      0.815  0.654    0.726 0.702
##  3     3 dt       0.718 0.407      0.864  0.704    0.776 0.713
##  4     4 dt       0.542 0.114      0.759  0.5      0.603 0.565
##  5     5 dt       0.613 0.148      0.743  0.671    0.705 0.569
##  6     6 dt       0.564 0.120      0.75   0.556    0.638 0.579
##  7     7 dt       0.610 0.205      0.790  0.598    0.681 0.632
##  8     8 dt       0.681 0.325      0.824  0.683    0.747 0.691
##  9     9 dt       0.521 0.0396     0.712  0.519    0.6   0.576
## 10    10 dt       0.602 0.155      0.75   0.630    0.685 0.594
cat("\n=== Rata-rata metrik ===\n")
## 
## === Rata-rata metrik ===
print(dt_kfold_smote %>% summarise(across(Accuracy:AUC, mean)))
## # A tibble: 1 × 6
##   Accuracy Kappa Precision Recall F1_Score   AUC
##      <dbl> <dbl>     <dbl>  <dbl>    <dbl> <dbl>
## 1    0.605 0.193     0.777  0.599    0.675 0.622
# 5. Plot variable importance
# Fix variable importance data frame
varImp_dt <- data.frame(
  Importance = dt_first_model$variable.importance,
  Feature = names(dt_first_model$variable.importance)
)

ggplot(varImp_dt, aes(x = reorder(Feature, Importance), y = Importance)) +
  geom_col(fill = "coral") +
  coord_flip() +
  labs(
    title = "Decision Tree - Variable Importance",
    x = "Feature", y = "Importance"
  )

# 6. Tampilkan pohon
rpart.plot(dt_first_model, main = "Decision Tree (Fold 1)", type = 4, extra = 101)

K-Nearest Neightboor

# ====================================================
# KNN (caret) - 10-Fold Cross Validation + SMOTE
# Tanpa kolom lon dan lat
# ====================================================

library(readxl)
library(dplyr)
library(caret)
library(themis)
library(recipes)
library(yardstick)
library(pROC)
library(ggplot2)
library(tibble)

set.seed(42)

# === 1. Load data & ubah target
desa_df <- read_excel("C:/Users/User/Downloads/Pangan.xlsx")

# Relabel target
desa_df$Y <- factor(
  desa_df$Y,
  levels = 1:6,
  labels = c("Sangat Rentan", "Rentan", "Agak Rentan", "Agak Tahan", "Tahan", "Sangat Tahan")
)
desa_df$Y <- ifelse(desa_df$Y %in% c("Sangat Rentan", "Rentan", "Agak Rentan", "Agak Tahan"), "Rentan", "Tahan")
desa_df$Y <- factor(desa_df$Y, levels = c("Tahan", "Rentan"))

# Hapus kolom lon dan lat
desa_df <- desa_df %>% dplyr::select(-lon, -lat)

# === 2. Setup K-Fold
k <- 10
folds <- createFolds(desa_df$Y, k = k, returnTrain = FALSE)
form <- Y ~ .

results_all <- list()
colors <- rainbow(k)

# === 3. Loop setiap fold
for (i in seq_along(folds)) {
  cat("\n=== Fold", i, "===\n")

  test_idx <- folds[[i]]
  train_data <- desa_df[-test_idx, ]
  test_data  <- desa_df[ test_idx, ]

  # === 3a. SMOTE
  rec <- recipe(form, data = train_data) %>%
    step_smote(Y) %>%
    prep()
  train_balanced <- bake(rec, new_data = NULL)

  # === 3b. Train KNN (k = 5)
  knn_model <- train(
    form, data = train_balanced,
    method = "knn",
    trControl = trainControl(method = "none"),
    tuneGrid = data.frame(k = 5)
  )

  # === 3c. Predict
  knn_pred <- predict(knn_model, test_data)
  knn_prob <- predict(knn_model, test_data, type = "prob")[, "Rentan"]
  eval_df <- tibble(obs = test_data$Y, pred = knn_pred)

  # === 3d. Metrics
  acc  <- accuracy(eval_df, truth = obs, estimate = pred)[[3]]
  kap  <- kap(eval_df, truth = obs, estimate = pred)[[3]]
  prec <- precision(eval_df, truth = obs, estimate = pred)[[3]]
  rec  <- recall(eval_df, truth = obs, estimate = pred)[[3]]
  f1   <- f_meas(eval_df, truth = obs, estimate = pred)[[3]]

  # === 3e. ROC + AUC
  roc_obj <- roc(response = test_data$Y, predictor = knn_prob, levels = c("Tahan", "Rentan"))
  auc_val <- as.numeric(auc(roc_obj))

  # Plot ROC
  if (i == 1) {
    plot(roc_obj, col = colors[i], lwd = 2, main = "ROC Curve per Fold (KNN)")
  } else {
    plot(roc_obj, col = colors[i], lwd = 2, add = TRUE)
  }

  # Simpan hasil
  results_all[[i]] <- tibble(
    Fold = i,
    Model = "knn",
    Accuracy = acc,
    Kappa = kap,
    Precision = prec,
    Recall = rec,
    F1_Score = f1,
    AUC = auc_val
  )
}
## 
## === Fold 1 ===
## Setting direction: controls < cases
## 
## === Fold 2 ===
## Setting direction: controls < cases
## 
## === Fold 3 ===
## Setting direction: controls < cases
## 
## === Fold 4 ===
## Setting direction: controls < cases
## 
## === Fold 5 ===
## Setting direction: controls < cases
## 
## === Fold 6 ===
## Setting direction: controls < cases
## 
## === Fold 7 ===
## Setting direction: controls < cases
## 
## === Fold 8 ===
## Setting direction: controls < cases
## 
## === Fold 9 ===
## Setting direction: controls < cases
## 
## === Fold 10 ===
## Setting direction: controls < cases

# === 4. Gabungkan dan tampilkan hasil
knn_kfold_smote <- bind_rows(results_all)

cat("\n=== Hasil per Fold (KNN) ===\n")
## 
## === Hasil per Fold (KNN) ===
print(knn_kfold_smote)
## # A tibble: 10 × 8
##     Fold Model Accuracy    Kappa Precision Recall F1_Score   AUC
##    <int> <chr>    <dbl>    <dbl>     <dbl>  <dbl>    <dbl> <dbl>
##  1     1 knn      0.555  0.123       0.754  0.524    0.619 0.578
##  2     2 knn      0.551  0.0843      0.726  0.556    0.629 0.551
##  3     3 knn      0.624  0.169       0.753  0.679    0.714 0.641
##  4     4 knn      0.627  0.229       0.797  0.622    0.699 0.636
##  5     5 knn      0.597  0.136       0.743  0.634    0.684 0.585
##  6     6 knn      0.556  0.0315      0.704  0.617    0.658 0.595
##  7     7 knn      0.551 -0.00450     0.693  0.634    0.662 0.495
##  8     8 knn      0.613  0.183       0.765  0.634    0.693 0.609
##  9     9 knn      0.564  0.0569      0.714  0.617    0.662 0.516
## 10    10 knn      0.5    0.0306      0.704  0.469    0.563 0.598
cat("\n=== Rata-rata Metrik KNN ===\n")
## 
## === Rata-rata Metrik KNN ===
print(knn_kfold_smote %>%
  summarise(across(Accuracy:AUC, mean), .groups = "drop"))
## # A tibble: 1 × 6
##   Accuracy Kappa Precision Recall F1_Score   AUC
##      <dbl> <dbl>     <dbl>  <dbl>    <dbl> <dbl>
## 1    0.574 0.104     0.735  0.599    0.658 0.580

SVM

# ====================================================
# SVM - 10-Fold Cross Validation + SMOTE
# ====================================================

library(readxl)
library(dplyr)
library(caret)
library(themis)
library(recipes)
library(e1071)
library(yardstick)
library(pROC)
library(ggplot2)
library(tibble)

set.seed(42)

# === 1. Load data & ubah target
desa_df <- read_excel("C:/Users/User/Downloads/Pangan.xlsx")

# Relabel target
desa_df$Y <- factor(
  desa_df$Y,
  levels = 1:6,
  labels = c("Sangat Rentan", "Rentan", "Agak Rentan", "Agak Tahan", "Tahan", "Sangat Tahan")
)
desa_df$Y <- ifelse(desa_df$Y %in% c("Sangat Rentan", "Rentan", "Agak Rentan", "Agak Tahan"), "Rentan", "Tahan")
desa_df$Y <- factor(desa_df$Y, levels = c("Tahan", "Rentan"))

# Hapus kolom lon dan lat
desa_df <- desa_df %>% dplyr::select(-lon, -lat)

# === 2. Setup 10-Fold
k <- 10
folds <- createFolds(desa_df$Y, k = k, returnTrain = FALSE)
form <- Y ~ .

results_all <- list()
colors <- rainbow(k)

# === 3. Loop tiap fold
for (i in seq_along(folds)) {
  cat("\n=== Fold", i, "===\n")

  test_idx <- folds[[i]]
  train_data <- desa_df[-test_idx, ]
  test_data  <- desa_df[test_idx, ]

  # SMOTE
  rec <- recipe(form, data = train_data) %>%
    step_smote(Y) %>%
    prep()
  train_balanced <- bake(rec, new_data = NULL)

  # Train SVM (default: radial kernel)
  svm_model <- svm(form, data = train_balanced, probability = TRUE)

  # Predict
  svm_pred <- predict(svm_model, test_data)
  svm_prob <- attr(predict(svm_model, test_data, probability = TRUE), "probabilities")[, "Rentan"]

  eval_df <- tibble(obs = test_data$Y, pred = svm_pred)

  # Metrics
  acc  <- accuracy(eval_df, truth = obs, estimate = pred)[[3]]
  kap  <- kap(eval_df, truth = obs, estimate = pred)[[3]]
  prec <- precision(eval_df, truth = obs, estimate = pred)[[3]]
  rec  <- recall(eval_df, truth = obs, estimate = pred)[[3]]
  f1   <- f_meas(eval_df, truth = obs, estimate = pred)[[3]]

  # ROC & AUC
  roc_obj <- roc(response = test_data$Y, predictor = svm_prob, levels = c("Tahan", "Rentan"))
  auc_val <- as.numeric(auc(roc_obj))

  # ROC plot
  if (i == 1) {
    plot(roc_obj, col = colors[i], lwd = 2, main = "ROC Curve per Fold (SVM)")
  } else {
    plot(roc_obj, col = colors[i], lwd = 2, add = TRUE)
  }

  # Simpan hasil
  results_all[[i]] <- tibble(
    Fold = i,
    Model = "svm",
    Accuracy = acc,
    Kappa = kap,
    Precision = prec,
    Recall = rec,
    F1_Score = f1,
    AUC = auc_val
  )
}
## 
## === Fold 1 ===
## Setting direction: controls < cases
## 
## === Fold 2 ===
## Setting direction: controls < cases
## 
## === Fold 3 ===
## Setting direction: controls < cases
## 
## === Fold 4 ===
## Setting direction: controls < cases
## 
## === Fold 5 ===
## Setting direction: controls < cases
## 
## === Fold 6 ===
## Setting direction: controls < cases
## 
## === Fold 7 ===
## Setting direction: controls < cases
## 
## === Fold 8 ===
## Setting direction: controls < cases
## 
## === Fold 9 ===
## Setting direction: controls < cases
## 
## === Fold 10 ===
## Setting direction: controls < cases

# === 4. Gabungkan hasil
svm_kfold_smote <- bind_rows(results_all)

cat("\n=== Hasil per Fold (SVM) ===\n")
## 
## === Hasil per Fold (SVM) ===
print(svm_kfold_smote)
## # A tibble: 10 × 8
##     Fold Model Accuracy  Kappa Precision Recall F1_Score   AUC
##    <int> <chr>    <dbl>  <dbl>     <dbl>  <dbl>    <dbl> <dbl>
##  1     1 svm      0.403 0.0413     0.762  0.195    0.311 0.567
##  2     2 svm      0.534 0.177      0.842  0.395    0.538 0.687
##  3     3 svm      0.538 0.172      0.829  0.420    0.557 0.669
##  4     4 svm      0.576 0.264      0.944  0.415    0.576 0.711
##  5     5 svm      0.471 0.0632     0.744  0.354    0.479 0.603
##  6     6 svm      0.462 0.0725     0.765  0.321    0.452 0.603
##  7     7 svm      0.458 0.0362     0.725  0.354    0.475 0.586
##  8     8 svm      0.529 0.143      0.795  0.427    0.556 0.634
##  9     9 svm      0.462 0.0510     0.737  0.346    0.471 0.585
## 10    10 svm      0.466 0.0247     0.705  0.383    0.496 0.552
cat("\n=== Rata-rata Metrik SVM ===\n")
## 
## === Rata-rata Metrik SVM ===
print(svm_kfold_smote %>%
  summarise(across(Accuracy:AUC, mean), .groups = "drop"))
## # A tibble: 1 × 6
##   Accuracy Kappa Precision Recall F1_Score   AUC
##      <dbl> <dbl>     <dbl>  <dbl>    <dbl> <dbl>
## 1    0.490 0.105     0.785  0.361    0.491 0.620

Naive Baiyes

# ==========================================================
# Naive Bayes (klaR::NaiveBayes) + Spatial K-Fold + SMOTE
# ==========================================================

library(readxl)
library(dplyr)
library(caret)
library(CAST)      # untuk CreateSpacetimeFolds
library(themis)
library(recipes)
library(klaR)      # NaiveBayes
library(yardstick)
library(pROC)
library(tibble)
library(ggplot2)

set.seed(42)

# === 1. Load & siapkan data
desa_df <- read_excel("C:/Users/User/Downloads/Pangan.xlsx")

# Relabel target jadi biner
desa_df$Y <- factor(
  desa_df$Y,
  levels = 1:6,
  labels = c("Sangat Rentan", "Rentan", "Agak Rentan", "Agak Tahan", "Tahan", "Sangat Tahan")
)
desa_df$Y <- ifelse(desa_df$Y %in% c("Sangat Rentan", "Rentan", "Agak Rentan", "Agak Tahan"), "Rentan", "Tahan")
desa_df$Y <- factor(desa_df$Y, levels = c("Tahan", "Rentan"))

# === 2. Spatial clustering dan k-fold CV
k <- 5
desa_df$spatial_cluster <- kmeans(desa_df[, c("lon", "lat")], centers = k)$cluster
spatial_folds <- CreateSpacetimeFolds(desa_df, spacevar = "spatial_cluster", k = k)
folds_index <- spatial_folds$indexOut

# === 3. Siapkan formula dan hapus kolom spatial
form <- Y ~ . -lon -lat -spatial_cluster

results_all <- list()
colors <- rainbow(k)

# === 4. Loop tiap fold
for (i in seq_along(folds_index)) {
  cat("\n=== Fold", i, "===\n")

  test_idx <- folds_index[[i]]
  train_data <- desa_df[-test_idx, ]
  test_data  <- desa_df[ test_idx, ]

  # === SMOTE
  rec <- recipe(Y ~ ., data = train_data) %>%
    step_rm(lon, lat, spatial_cluster) %>%
    step_smote(Y) %>%
    prep()
  train_balanced <- bake(rec, new_data = NULL)
  test_data_clean <- test_data %>% dplyr::select(-lon, -lat, -spatial_cluster)

  # === Train Naive Bayes (klaR)
  nb_model <- NaiveBayes(Y ~ ., data = train_balanced)
  nb_pred <- predict(nb_model, test_data_clean)$class
  nb_prob <- predict(nb_model, test_data_clean)$posterior[, "Rentan"]

  eval_df <- tibble(obs = test_data_clean$Y, pred = nb_pred)

  # === Metrik
  acc  <- accuracy(eval_df, truth = obs, estimate = pred)[[3]]
  kap  <- kap(eval_df, truth = obs, estimate = pred)[[3]]
  prec <- precision(eval_df, truth = obs, estimate = pred)[[3]]
  rec  <- recall(eval_df, truth = obs, estimate = pred)[[3]]
  f1   <- f_meas(eval_df, truth = obs, estimate = pred)[[3]]

  # === ROC & AUC
  roc_obj <- roc(response = test_data_clean$Y, predictor = nb_prob, levels = c("Tahan", "Rentan"))
  auc_val <- as.numeric(auc(roc_obj))

  # === Plot ROC
  if (i == 1) {
    plot(roc_obj, col = colors[i], lwd = 2, main = "ROC Curve per Fold (Naive Bayes)")
  } else {
    plot(roc_obj, col = colors[i], lwd = 2, add = TRUE)
  }

  # === Simpan hasil
  results_all[[i]] <- tibble(
    Fold = i,
    Model = "nb",
    Accuracy = acc,
    Kappa = kap,
    Precision = prec,
    Recall = rec,
    F1_Score = f1,
    AUC = auc_val
  )
}
## 
## === Fold 1 ===
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 41
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 51
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 96
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 152
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 173
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 174
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 175
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 176
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 243
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 339
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 41
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 51
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 96
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 152
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 173
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 174
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 175
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 176
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 243
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 339
## Setting direction: controls < cases
## 
## === Fold 2 ===
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 9
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 73
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 96
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 176
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 177
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 179
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 224
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 230
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 9
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 73
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 96
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 176
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 177
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 179
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 224
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 230
## Setting direction: controls > cases
## 
## === Fold 3 ===
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 4
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 29
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 31
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 44
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 45
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 61
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 76
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 96
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 102
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 106
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 4
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 29
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 31
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 44
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 45
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 61
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 76
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 96
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 102
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 106
## Setting direction: controls > cases
## 
## === Fold 4 ===
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 121
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 121
## Setting direction: controls < cases
## 
## === Fold 5 ===
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 16
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 18
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 34
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 35
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 36
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 48
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 73
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 83
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 89
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 90
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 129
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 16
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 18
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 34
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 35
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 36
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 48
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 73
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 83
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 89
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 90
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 129
## Setting direction: controls < cases

# === 5. Gabungkan dan tampilkan hasil
nb_spatial_kfold_smote <- bind_rows(results_all)

cat("\n=== Hasil per Fold (Naive Bayes - Spatial K-Fold + SMOTE) ===\n")
## 
## === Hasil per Fold (Naive Bayes - Spatial K-Fold + SMOTE) ===
print(nb_spatial_kfold_smote)
## # A tibble: 5 × 8
##    Fold Model Accuracy    Kappa Precision Recall F1_Score   AUC
##   <int> <chr>    <dbl>    <dbl>     <dbl>  <dbl>    <dbl> <dbl>
## 1     1 nb       0.232 -0.0200      0.588 0.0305   0.0580 0.594
## 2     2 nb       0.427 -0.0223      0.467 0.05     0.0903 0.541
## 3     3 nb       0.617 -0.163       0.692 0.846    0.761  0.597
## 4     4 nb       0.176 -0.00456     0.75  0.0197   0.0385 0.644
## 5     5 nb       0.456 -0.168       0.265 0.138    0.182  0.506
cat("\n=== Rata-rata Metrik Naive Bayes ===\n")
## 
## === Rata-rata Metrik Naive Bayes ===
print(nb_spatial_kfold_smote %>%
  summarise(across(Accuracy:AUC, mean), .groups = "drop"))
## # A tibble: 1 × 6
##   Accuracy   Kappa Precision Recall F1_Score   AUC
##      <dbl>   <dbl>     <dbl>  <dbl>    <dbl> <dbl>
## 1    0.381 -0.0756     0.552  0.217    0.226 0.577
# ====================================================
# Naive Bayes - 10-Fold Cross Validation + SMOTE
# ====================================================

library(readxl)
library(dplyr)
library(caret)
library(themis)
library(recipes)
library(klaR)
library(yardstick)
library(pROC)
library(tibble)
library(ggplot2)

set.seed(42)

# === 1. Load dan siapkan data
desa_df <- read_excel("C:/Users/User/Downloads/Pangan.xlsx")

# Relabel target jadi biner
desa_df$Y <- factor(
  desa_df$Y,
  levels = 1:6,
  labels = c("Sangat Rentan", "Rentan", "Agak Rentan", "Agak Tahan", "Tahan", "Sangat Tahan")
)
desa_df$Y <- ifelse(desa_df$Y %in% c("Sangat Rentan", "Rentan", "Agak Rentan", "Agak Tahan"), "Rentan", "Tahan")
desa_df$Y <- factor(desa_df$Y, levels = c("Tahan", "Rentan"))

# Hapus kolom lon dan lat
desa_df <- desa_df %>% dplyr::select(-lon, -lat)

# === 2. Setup K-Fold
k <- 10
folds <- createFolds(desa_df$Y, k = k, returnTrain = FALSE)
form <- Y ~ .

results_all <- list()
colors <- rainbow(k)

# === 3. Loop setiap fold
for (i in seq_along(folds)) {
  cat("\n=== Fold", i, "===\n")

  test_idx <- folds[[i]]
  train_data <- desa_df[-test_idx, ]
  test_data  <- desa_df[ test_idx, ]

  # === 3a. SMOTE preprocessing
  rec <- recipe(form, data = train_data) %>%
    step_smote(Y) %>%
    prep()
  train_balanced <- bake(rec, new_data = NULL)

  # === 3b. Train Naive Bayes
  nb_model <- NaiveBayes(Y ~ ., data = train_balanced)
  nb_pred <- predict(nb_model, test_data)$class
  nb_prob <- predict(nb_model, test_data)$posterior[, "Rentan"]

  eval_df <- tibble(obs = test_data$Y, pred = nb_pred)

  # === 3c. Metrik
  acc  <- accuracy(eval_df, truth = obs, estimate = pred)[[3]]
  kap  <- kap(eval_df, truth = obs, estimate = pred)[[3]]
  prec <- precision(eval_df, truth = obs, estimate = pred)[[3]]
  rec  <- recall(eval_df, truth = obs, estimate = pred)[[3]]
  f1   <- f_meas(eval_df, truth = obs, estimate = pred)[[3]]

  # === 3d. ROC & AUC
  roc_obj <- roc(response = test_data$Y, predictor = nb_prob, levels = c("Tahan", "Rentan"))
  auc_val <- as.numeric(auc(roc_obj))

  # === 3e. Plot ROC
  if (i == 1) {
    plot(roc_obj, col = colors[i], lwd = 2, main = "ROC Curve per Fold (Naive Bayes)")
  } else {
    plot(roc_obj, col = colors[i], lwd = 2, add = TRUE)
  }

  # === 3f. Simpan hasil
  results_all[[i]] <- tibble(
    Fold = i,
    Model = "nb",
    Accuracy = acc,
    Kappa = kap,
    Precision = prec,
    Recall = rec,
    F1_Score = f1,
    AUC = auc_val
  )
}
## 
## === Fold 1 ===
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 24
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 74
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 24
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 74
## Setting direction: controls < cases
## 
## === Fold 2 ===
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 30
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 42
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 99
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 30
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 42
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 99
## Setting direction: controls < cases
## 
## === Fold 3 ===
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 5
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 14
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 28
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 72
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 74
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 75
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 82
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 99
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 102
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 5
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 14
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 28
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 72
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 74
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 75
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 82
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 99
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 102
## Setting direction: controls > cases
## 
## === Fold 4 ===
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 8
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 27
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 29
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 72
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 85
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 86
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 8
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 27
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 29
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 72
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 85
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 86
## Setting direction: controls < cases
## 
## === Fold 5 ===
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 4
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 84
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 4
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 84
## Setting direction: controls < cases
## 
## === Fold 6 ===
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 5
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 19
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 53
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 5
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 19
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 53
## Setting direction: controls > cases
## 
## === Fold 7 ===
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 40
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 49
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 87
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 89
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 93
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 40
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 49
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 87
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 89
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 93
## Setting direction: controls > cases
## 
## === Fold 8 ===
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 6
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 28
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 40
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 6
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 28
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 40
## Setting direction: controls < cases
## 
## === Fold 9 ===
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 22
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 32
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 55
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 80
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 93
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 22
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 32
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 55
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 80
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 93
## Setting direction: controls > cases
## 
## === Fold 10 ===
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 14
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 45
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 88
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 106
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 14
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 45
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 88
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 106
## Setting direction: controls < cases

# === 4. Gabungkan semua hasil
nb_kfold_smote <- bind_rows(results_all)

# === 5. Output hasil
cat("\n=== Hasil per Fold (Naive Bayes - K-Fold + SMOTE) ===\n")
## 
## === Hasil per Fold (Naive Bayes - K-Fold + SMOTE) ===
print(nb_kfold_smote)
## # A tibble: 10 × 8
##     Fold Model Accuracy    Kappa Precision Recall F1_Score   AUC
##    <int> <chr>    <dbl>    <dbl>     <dbl>  <dbl>    <dbl> <dbl>
##  1     1 nb       0.328 -0.0228      0.6   0.0732   0.130  0.581
##  2     2 nb       0.339 -0.00458     0.667 0.0741   0.133  0.620
##  3     3 nb       0.265 -0.109       0.222 0.0247   0.0444 0.532
##  4     4 nb       0.305 -0.0294      0.5   0.0366   0.0682 0.612
##  5     5 nb       0.336 -0.00513     0.667 0.0732   0.132  0.499
##  6     6 nb       0.333  0.00393     0.714 0.0617   0.114  0.486
##  7     7 nb       0.322 -0.0452      0.571 0.0976   0.167  0.500
##  8     8 nb       0.319 -0.0111      0.6   0.0366   0.0690 0.600
##  9     9 nb       0.453  0.0631      0.758 0.309    0.439  0.487
## 10    10 nb       0.449  0.0274      0.711 0.333    0.454  0.489
cat("\n=== Rata-rata Metrik Naive Bayes ===\n")
## 
## === Rata-rata Metrik Naive Bayes ===
print(nb_kfold_smote %>%
  summarise(across(Accuracy:AUC, mean), .groups = "drop"))
## # A tibble: 1 × 6
##   Accuracy   Kappa Precision Recall F1_Score   AUC
##      <dbl>   <dbl>     <dbl>  <dbl>    <dbl> <dbl>
## 1    0.345 -0.0133     0.601  0.112    0.175 0.541

XGBoost

# ====================================================
# XGBoost - 10-Fold Cross Validation + SMOTE
# ====================================================

library(readxl)
library(dplyr)
library(caret)
library(themis)
library(recipes)
library(xgboost)
## 
## Attaching package: 'xgboost'
## The following object is masked from 'package:dplyr':
## 
##     slice
library(yardstick)
library(pROC)
library(tibble)
library(ggplot2)

set.seed(42)

# === 1. Load data
desa_df <- read_excel("C:/Users/User/Downloads/Pangan.xlsx")
desa_df$Y <- factor(desa_df$Y, levels = 1:6,
                    labels = c("Sangat Rentan", "Rentan", "Agak Rentan", "Agak Tahan", "Tahan", "Sangat Tahan"))
desa_df$Y <- ifelse(desa_df$Y %in% c("Sangat Rentan", "Rentan", "Agak Rentan", "Agak Tahan"), "Rentan", "Tahan")
desa_df$Y <- factor(desa_df$Y, levels = c("Tahan", "Rentan"))
desa_df <- desa_df %>% dplyr::select(-lon, -lat)

# === 2. K-Fold
k <- 10
folds <- createFolds(desa_df$Y, k = k)
form <- Y ~ .

results_all <- list()
colors <- rainbow(k)

for (i in seq_along(folds)) {
  cat("\n=== Fold", i, "===\n")
  
  test_idx <- folds[[i]]
  train_data <- desa_df[-test_idx, ]
  test_data  <- desa_df[test_idx, ]

  rec <- recipe(form, data = train_data) %>%
    step_smote(Y) %>%
    prep()
  train_bal <- bake(rec, new_data = NULL)

  # One-hot encode for XGBoost
  dtrain <- model.matrix(Y ~ . - 1, data = train_bal)
  dtest  <- model.matrix(Y ~ . - 1, data = test_data)
  label_train <- as.numeric(train_bal$Y) - 1
  label_test  <- as.numeric(test_data$Y) - 1

  xgb_model <- xgboost(
    data = dtrain, label = label_train,
    objective = "binary:logistic", nrounds = 100,
    verbose = 0
  )

  pred_prob <- predict(xgb_model, dtest)
  pred_class <- ifelse(pred_prob > 0.5, "Rentan", "Tahan")
  eval_df <- tibble(obs = test_data$Y, pred = factor(pred_class, levels = c("Tahan", "Rentan")))

  acc <- accuracy(eval_df, truth = obs, estimate = pred)[[3]]
  kap <- kap(eval_df, truth = obs, estimate = pred)[[3]]
  prec <- precision(eval_df, truth = obs, estimate = pred)[[3]]
  rec <- recall(eval_df, truth = obs, estimate = pred)[[3]]
  f1 <- f_meas(eval_df, truth = obs, estimate = pred)[[3]]
  auc_val <- auc(roc(test_data$Y, pred_prob, levels = c("Tahan", "Rentan")))

  # ROC
  roc_obj <- roc(test_data$Y, pred_prob, levels = c("Tahan", "Rentan"))
  if (i == 1) {
    plot(roc_obj, col = colors[i], lwd = 2, main = "ROC Curve per Fold (XGBoost)")
  } else {
    plot(roc_obj, col = colors[i], lwd = 2, add = TRUE)
  }

  results_all[[i]] <- tibble(
    Fold = i, Model = "xgboost",
    Accuracy = acc, Kappa = kap, Precision = prec,
    Recall = rec, F1_Score = f1, AUC = as.numeric(auc_val)
  )
}
## 
## === Fold 1 ===
## Setting direction: controls < cases
## Setting direction: controls < cases
## 
## === Fold 2 ===
## Setting direction: controls < cases
## Setting direction: controls < cases
## 
## === Fold 3 ===
## Setting direction: controls < cases
## Setting direction: controls < cases
## 
## === Fold 4 ===
## Setting direction: controls < cases
## Setting direction: controls < cases
## 
## === Fold 5 ===
## Setting direction: controls < cases
## Setting direction: controls < cases
## 
## === Fold 6 ===
## Setting direction: controls < cases
## Setting direction: controls < cases
## 
## === Fold 7 ===
## Setting direction: controls < cases
## Setting direction: controls < cases
## 
## === Fold 8 ===
## Setting direction: controls < cases
## Setting direction: controls < cases
## 
## === Fold 9 ===
## Setting direction: controls < cases
## Setting direction: controls < cases
## 
## === Fold 10 ===
## Setting direction: controls < cases
## Setting direction: controls < cases

xgb_kfold_smote <- bind_rows(results_all)

cat("\n=== RATA-RATA METRIK XGBoost ===\n")
## 
## === RATA-RATA METRIK XGBoost ===
print(xgb_kfold_smote %>% summarise(across(Accuracy:AUC, mean), .groups = "drop"))
## # A tibble: 1 × 6
##   Accuracy Kappa Precision Recall F1_Score   AUC
##      <dbl> <dbl>     <dbl>  <dbl>    <dbl> <dbl>
## 1    0.643 0.207     0.767  0.697    0.729 0.654

Klastering

Kelompok

# ===========================================
# 1. LIBRARY
# ===========================================
library(cluster)       # silhouette, agnes, diana
library(factoextra)    # fviz_nbclust, fviz_gap_stat
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(kernlab)       # spectral clustering
## 
## Attaching package: 'kernlab'
## The following object is masked from 'package:purrr':
## 
##     cross
## The following object is masked from 'package:ggplot2':
## 
##     alpha
library(NbClust)       # alternatif pemilih k
library(dplyr)         # data wrangling
library(tibble)
library(ggplot2)
library(Rtsne)         # optional visualisasi
library(fpc)           # untuk CLARANS (via pamk)

# ===========================================
# 2. DATA (gunakan data numerik saja)
# ===========================================
data_klaster <- desa_df %>% dplyr::select(starts_with("X")) %>% scale()

# ===========================================
# 3. CARI K OPTIMAL (untuk k-means)
# ===========================================
fviz_nbclust(data_klaster, kmeans, method = "wss", k.max = 10) +
  labs(title = "Elbow Method")

fviz_nbclust(data_klaster, kmeans, method = "silhouette", k.max = 10) +
  labs(title = "Silhouette Method")

set.seed(123)
gap_stat <- clusGap(data_klaster, FUN = kmeans, K.max = 10, B = 50)
fviz_gap_stat(gap_stat)

# ===========================================
# 4. GUNAKAN K OPTIMAL
# ===========================================
k <- 2  # ganti dengan hasil dari grafik di atas

# ===========================================
# 5. KMEANS
# ===========================================
set.seed(42)
km_res <- kmeans(data_klaster, centers = k)
sil_km <- silhouette(km_res$cluster, dist(data_klaster))
avg_sil_km <- mean(sil_km[, 3])

# ===========================================
# 6. AGNES (Hierarki Aglomeratif)
# ===========================================
agnes_res <- agnes(data_klaster, method = "ward")
hc_agnes <- cutree(as.hclust(agnes_res), k = k)
sil_agnes <- silhouette(hc_agnes, dist(data_klaster))
avg_sil_agnes <- mean(sil_agnes[, 3])

# ===========================================
# 7. DIANA (Divisive)
# ===========================================
diana_res <- diana(data_klaster)
hc_diana <- cutree(as.hclust(diana_res), k = k)
sil_diana <- silhouette(hc_diana, dist(data_klaster))
avg_sil_diana <- mean(sil_diana[, 3])

# ===========================================
# 8. SPECTRAL CLUSTERING
# ===========================================
spec_res <- specc(as.matrix(data_klaster), centers = k)
sil_spec <- silhouette(as.integer(spec_res), dist(data_klaster))
avg_sil_spec <- mean(sil_spec[, 3])

# ===========================================
# 9. CLARANS (via pamk)
# ===========================================
clarans_res <- pamk(data_klaster, krange = k, usepam = FALSE)
clarans_cluster <- clarans_res$pamobject$clustering
sil_clarans <- silhouette(clarans_cluster, dist(data_klaster))
avg_sil_clarans <- mean(sil_clarans[, 3])

# ===========================================
# 10. RANGKUM HASIL
# ===========================================
hasil_klaster <- tibble(
  Metode = c("K-Means", "Agnes (Hierarki)", "Diana", "Spectral", "CLARANS"),
  Silhouette_Score = round(c(avg_sil_km, avg_sil_agnes, avg_sil_diana, avg_sil_spec, avg_sil_clarans), 3)
)

print(hasil_klaster)
## # A tibble: 5 × 2
##   Metode           Silhouette_Score
##   <chr>                       <dbl>
## 1 K-Means                     0.543
## 2 Agnes (Hierarki)            0.707
## 3 Diana                       0.939
## 4 Spectral                    0.712
## 5 CLARANS                     0.531
# ===========================================
# 11. VISUALISASI
# ===========================================
ggplot(hasil_klaster, aes(x = reorder(Metode, Silhouette_Score), y = Silhouette_Score, fill = Metode)) +
  geom_col(show.legend = FALSE) +
  coord_flip() +
  labs(title = "Perbandingan Metode Klaster (Silhouette Score)", x = "Metode", y = "Silhouette Score") +
  theme_minimal(base_size = 14)

# ===========================================
# 1. LIBRARY
# ===========================================
library(cluster)       # pam, clara, agnes, diana
library(factoextra)    # fviz_nbclust
library(kernlab)       # spectral clustering
library(fpc)           # pamk (clarans)
library(dbscan)        # dbscan
## 
## Attaching package: 'dbscan'
## The following object is masked from 'package:fpc':
## 
##     dbscan
## The following object is masked from 'package:stats':
## 
##     as.dendrogram
library(dplyr)         # wrangling
library(tibble)
library(ggplot2)

# ===========================================
# 2. DATA & PREP
# ===========================================
data_klaster <- desa_df %>% dplyr::select(starts_with("X")) %>% scale()
dist_mat <- dist(data_klaster)

# ===========================================
# 3. Fungsi Silhouette
# ===========================================
get_avg_sil <- function(cluster_vec) {
  sil <- silhouette(cluster_vec, dist_mat)
  mean(sil[, 3])
}

# ===========================================
# 4. K OPTIMUM PER METODE
# ===========================================

# --- KMEANS
sil_km_all <- sapply(2:10, function(k) {
  cl <- kmeans(data_klaster, centers = k)$cluster
  get_avg_sil(cl)
})
k_kmeans <- which.max(sil_km_all) + 1

# --- AGNES
sil_agnes_all <- sapply(2:10, function(k) {
  cl <- cutree(as.hclust(agnes(data_klaster, method = "ward")), k)
  get_avg_sil(cl)
})
k_agnes <- which.max(sil_agnes_all) + 1

# --- DIANA
sil_diana_all <- sapply(2:10, function(k) {
  cl <- cutree(as.hclust(diana(data_klaster)), k)
  get_avg_sil(cl)
})
k_diana <- which.max(sil_diana_all) + 1

# --- SPECTRAL
sil_spec_all <- sapply(2:10, function(k) {
  cl <- specc(as.matrix(data_klaster), centers = k)
  get_avg_sil(as.integer(cl))
})
k_spec <- which.max(sil_spec_all) + 1

# --- CLARANS (otomatis)
clarans_res <- pamk(data_klaster, krange = 2:10, usepam = FALSE)
k_clarans <- clarans_res$nc

# --- K-MEDOIDS (PAM)
sil_pam_all <- sapply(2:10, function(k) {
  cl <- pam(data_klaster, k)$clustering
  get_avg_sil(cl)
})
k_pam <- which.max(sil_pam_all) + 1

# --- CLARA
sil_clara_all <- sapply(2:10, function(k) {
  cl <- clara(data_klaster, k)$clustering
  get_avg_sil(cl)
})
k_clara <- which.max(sil_clara_all) + 1

# ===========================================
# 5. KLASTERING FINAL DENGAN K OPTIMAL
# ===========================================

# --- KMeans
km_final <- kmeans(data_klaster, centers = k_kmeans)
sil_km <- get_avg_sil(km_final$cluster)

# --- Agnes
agnes_final <- agnes(data_klaster, method = "ward")
hc_agnes <- cutree(as.hclust(agnes_final), k = k_agnes)
sil_agnes <- get_avg_sil(hc_agnes)

# --- Diana
diana_final <- diana(data_klaster)
hc_diana <- cutree(as.hclust(diana_final), k = k_diana)
sil_diana <- get_avg_sil(hc_diana)

# --- Spectral
spec_final <- specc(as.matrix(data_klaster), centers = k_spec)
sil_spec <- get_avg_sil(as.integer(spec_final))

# --- CLARANS
clarans_cluster <- clarans_res$pamobject$clustering
sil_clarans <- get_avg_sil(clarans_cluster)

# --- K-Medoids
pam_final <- pam(data_klaster, k = k_pam)
sil_pam <- get_avg_sil(pam_final$clustering)

# --- CLARA
clara_final <- clara(data_klaster, k = k_clara)
sil_clara <- get_avg_sil(clara_final$clustering)



# ===========================================
# 6. RANGKUM HASIL
# ===========================================
hasil_klaster <- tibble(
  Metode = c("KMeans", "Agnes", "Diana", "Spectral", "CLARANS", "K-Medoids", "CLARA"),
  K_Optimum = c(k_kmeans, k_agnes, k_diana, k_spec, k_clarans, k_pam, k_clara),
  Silhouette_Score = round(c(sil_km, sil_agnes, sil_diana, sil_spec, sil_clarans, sil_pam, sil_clara), 3)
)

print(hasil_klaster)
## # A tibble: 7 × 3
##   Metode    K_Optimum Silhouette_Score
##   <chr>         <dbl>            <dbl>
## 1 KMeans            4            0.676
## 2 Agnes            10            0.715
## 3 Diana             2            0.939
## 4 Spectral          2            0.852
## 5 CLARANS           4            0.678
## 6 K-Medoids         4            0.677
## 7 CLARA             4            0.678
# ===========================================
# 7. VISUALISASI
# ===========================================
ggplot(hasil_klaster, aes(x = reorder(Metode, Silhouette_Score), y = Silhouette_Score, fill = Metode)) +
  geom_col(show.legend = FALSE) +
  coord_flip() +
  labs(title = "Perbandingan Metode Klaster (K Spesifik)", x = "Metode", y = "Silhouette Score") +
  theme_minimal(base_size = 14)

## K-Means

# ======================================
# 1. LIBRARY
# ======================================
library(readxl)
library(dplyr)
library(cluster)
library(factoextra)
library(ggplot2)
library(corrplot)
## corrplot 0.95 loaded
library(fpc)
library(clusterSim)
library(NbClust)
library(reshape2)
library(sf)
## Linking to GEOS 3.13.1, GDAL 3.11.0, PROJ 9.6.0; sf_use_s2() is TRUE
library(tibble)
library(tidyr)
## 
## Attaching package: 'tidyr'
## The following object is masked from 'package:reshape2':
## 
##     smiths
# ======================================
# 2. BACA DATA & STANDARISASI
# ======================================
desa_klaster <- read_excel("C:/Users/User/Downloads/Pangan1.xlsx")
data_scaled <- scale(desa_klaster %>% dplyr::select(X1:X6))
dist_mat <- dist(data_scaled)

# ======================================
# 3. KORELASI
# ======================================
corrplot(cor(data_scaled), method = "circle", type = "upper")

# ======================================
# 4. CARI JUMLAH K OPTIMAL
# ======================================
silhouette_scores <- sapply(2:10, function(k) {
  km <- kmeans(data_scaled, centers = k, nstart = 25)
  sil <- silhouette(km$cluster, dist_mat)
  mean(sil[, 3])
})

wss_values <- sapply(2:10, function(k) {
  kmeans(data_scaled, centers = k, nstart = 25)$tot.withinss
})

set.seed(123)
gap_stat <- clusGap(data_scaled, FUN = kmeans, K.max = 10, B = 50)
gap_df <- as.data.frame(gap_stat$Tab)
gap_score <- gap_df$gap[2:10]

ch_index <- sapply(2:10, function(k) {
  km <- kmeans(data_scaled, centers = k, nstart = 25)
  calinhara(data_scaled, km$cluster)
})

db_index <- sapply(2:10, function(k) {
  km <- kmeans(data_scaled, centers = k, nstart = 25)
  index.DB(data_scaled, km$cluster)$DB
})

eval_df <- tibble(
  K = 2:10,
  Silhouette = round(silhouette_scores, 3),
  WSS = round(wss_values, 2),
  Gap = round(gap_score, 3),
  Calinski_Harabasz = round(ch_index, 2),
  Davies_Bouldin = round(db_index, 3)
)
print(eval_df)
## # A tibble: 9 × 6
##       K Silhouette   WSS   Gap Calinski_Harabasz Davies_Bouldin
##   <int>      <dbl> <dbl> <dbl>             <dbl>          <dbl>
## 1     2      0.718 5258.  2.37              407.          1.61 
## 2     3      0.658 4198.  2.76              403.          1.47 
## 3     4      0.676 3096   2.81              432.          1.38 
## 4     5      0.686 2409.  2.83              484.          0.928
## 5     6      0.706 1882.  2.94              684.          0.918
## 6     7      0.627 1708.  3.01              618.          0.886
## 7     8      0.631 1497.  2.98              586.          0.892
## 8     9      0.649 1228.  3.02              561.          0.929
## 9    10      0.661  938.  3.16              636.          0.875
# Visualisasi evaluasi
eval_long <- eval_df %>%
  pivot_longer(-K, names_to = "Metrik", values_to = "Nilai")

ggplot(eval_long, aes(x = K, y = Nilai, color = Metrik)) +
  geom_line() + geom_point() +
  facet_wrap(~Metrik, scales = "free_y") +
  labs(title = "Evaluasi K-Means Berdasarkan Nilai K", x = "Jumlah Klaster (k)") +
  theme_minimal()

# Tambahan visualisasi individual
fviz_nbclust(data_scaled, kmeans, method = "wss", k.max = 10) +
  geom_vline(xintercept = 4, linetype = 2)

fviz_nbclust(data_scaled, kmeans, method = "silhouette", k.max = 10) +
  geom_vline(xintercept = 4, linetype = 2)

fviz_gap_stat(gap_stat)

# ======================================
# 5. FIT KMEANS
# ======================================
k <- 4
set.seed(42)
kmeans_res <- kmeans(data_scaled, centers = k, nstart = 25)
desa_klaster$cluster <- factor(kmeans_res$cluster)

# ======================================
# 6. PROFIL VARIABEL PER KLASTER
# ======================================
profil_klaster <- desa_klaster %>%
  group_by(cluster) %>%
  summarise(across(X1:X6, mean), .groups = "drop")
print(profil_klaster)
## # A tibble: 4 × 7
##   cluster     X1      X2      X3    X4       X5      X6
##   <fct>    <dbl>   <dbl>   <dbl> <dbl>    <dbl>   <dbl>
## 1 1       0.573   0.0230 0.00205 91.6  -0.00118 0.00663
## 2 2       0.315   2.22   0.270   13.3  -0.535   0.513  
## 3 3       0.0270 49.5    3.61    23.6  -0.504   1      
## 4 4       0.0916  0.0169 0.00155  7.34 -0.00254 0.00228
profil_long <- melt(profil_klaster, id.vars = "cluster")
ggplot(profil_long, aes(x = variable, y = value, fill = cluster)) +
  geom_col(position = "dodge") +
  labs(title = "Profil Variabel per Klaster", x = "Variabel", y = "Rata-rata") +
  theme_minimal()

# ======================================
# 7. PCA VISUALISASI KLASTER
# ======================================
fviz_cluster(kmeans_res, data = data_scaled, ellipse.type = "norm", repel = F) +
  labs(title = "Visualisasi Klaster (PCA)") +
  theme_minimal()
## Too few points to calculate an ellipse

# ======================================
# 8. BACA PETA DAN GABUNGKAN
# ======================================
desa_shp <- st_read("C:/Users/User/Documents/AKA/Tingkat 3/Semester 6/DMBG/Projek/BATAS DESA DESEMBER 2019 DUKCAPIL MALUKU UTARA/Fix/batas_desa_fix.shp")
## Reading layer `batas_desa_fix' from data source 
##   `C:\Users\User\Documents\AKA\Tingkat 3\Semester 6\DMBG\Projek\BATAS DESA DESEMBER 2019 DUKCAPIL MALUKU UTARA\Fix\batas_desa_fix.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 1184 features and 26 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 124.2831 ymin: -2.477203 xmax: 129.9236 ymax: 2.645245
## Geodetic CRS:  WGS 84
desa_shp$KDEPUM <- gsub("\\.", "", desa_shp$KDEPUM)
desa_shp$KDEPUM <- as.numeric(desa_shp$KDEPUM)
## Warning: NAs introduced by coercion
desa_klaster$KDEPUM <- as.numeric(desa_klaster$KDEPUM)

desa_map <- desa_shp %>%
  left_join(desa_klaster %>% dplyr::select(KDEPUM = KDEPUM, cluster), by = "KDEPUM") %>%
  filter(!is.na(cluster))

# ======================================
# 9. PETA KLASTER
# ======================================
ggplot(desa_map) +
  geom_sf(aes(fill = factor(cluster)), color = "white", size = 0.1) +
  scale_fill_brewer(palette = "Set2") +
  labs(title = "Peta Klaster K-Means - Maluku Utara", fill = "Klaster") +
  theme_minimal()

# ======================================
# 10. EVALUASI AKHIR
# ======================================
sil <- silhouette(kmeans_res$cluster, dist_mat)
fviz_silhouette(sil)
##   cluster size ave.sil.width
## 1       1  323          0.53
## 2       2  103          0.16
## 3       3    2         -0.15
## 4       4  752          0.80

cat("Silhouette Average:", round(mean(sil[, 3]), 3), "\n")
## Silhouette Average: 0.667
btw_ratio <- kmeans_res$betweenss / kmeans_res$totss
cat("BetweenSS/TotalSS Ratio:", round(btw_ratio, 3), "\n")
## BetweenSS/TotalSS Ratio: 0.562
ch_index_final <- calinhara(data_scaled, kmeans_res$cluster)
cat("Calinski-Harabasz Index:", round(ch_index_final, 3), "\n")
## Calinski-Harabasz Index: 503.675
dbi <- index.DB(data_scaled, kmeans_res$cluster)$DB
cat("Davies-Bouldin Index:", round(dbi, 3), "\n")
## Davies-Bouldin Index: 1.097
# ======================================
# 11. UKURAN KLASTER
# ======================================
print(table(desa_klaster$cluster))
## 
##   1   2   3   4 
## 323 103   2 752
ggplot(desa_klaster, aes(x = cluster)) +
  geom_bar(fill = "steelblue") +
  labs(title = "Jumlah Desa per Klaster", x = "Klaster", y = "Jumlah") +
  theme_minimal()

# ======================================
# 12. CENTROID SPASIAL PER KLASTER
# ======================================
library(sf)
desa_map <- st_make_valid(desa_map)

# Hitung centroid setiap desa
desa_map$centroid <- st_centroid(desa_map$geometry)
coords <- st_coordinates(desa_map$centroid)
desa_map$lon_centroid <- coords[, 1]
desa_map$lat_centroid <- coords[, 2]

# Rata-rata koordinat per klaster
centroid_klaster <- desa_map %>%
  group_by(cluster) %>%
  summarise(
    lon = mean(lon_centroid, na.rm = TRUE),
    lat = mean(lat_centroid, na.rm = TRUE)
  )
print(centroid_klaster)
## Simple feature collection with 4 features and 3 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 124.2831 ymin: -2.477203 xmax: 129.9236 ymax: 2.645245
## Geodetic CRS:  WGS 84
## # A tibble: 4 × 4
##   cluster   lon    lat                                                  geometry
##   <fct>   <dbl>  <dbl>                                        <MULTIPOLYGON [°]>
## 1 1        128.  0.844 (((125.4217 -1.767736, 125.4215 -1.767744, 125.4213 -1.7…
## 2 2        127. -0.179 (((128.321 0.901669, 128.3208 0.9019579, 128.3162 0.9134…
## 3 3        128.  0.339 (((127.8964 0.1746875, 127.8904 0.1728329, 127.8838 0.17…
## 4 4        128.  0.369 (((128.3602 1.480541, 128.3602 1.480656, 128.3602 1.4807…
# Tampilkan peta dengan centroid
ggplot() +
  geom_sf(data = desa_map, aes(fill = cluster), color = "white") +
  geom_point(data = centroid_klaster, aes(x = lon, y = lat), color = "red", size = 2) +
  labs(title = "Peta Klaster + Centroid K-Means") +
  theme_minimal()

K-Medois

# ======================================
# 1. LIBRARY
# ======================================
library(readxl)
library(dplyr)
library(cluster)
library(factoextra)
library(ggplot2)
library(corrplot)
library(fpc)
library(clusterSim)
library(reshape2)
library(sf)
library(tibble)
library(tidyr)

# ======================================
# 2. BACA DATA & STANDARISASI
# ======================================
desa_medoid <- read_excel("C:/Users/User/Downloads/Pangan1.xlsx")
data_scaled <- scale(desa_medoid %>% dplyr::select(X1:X6))
dist_mat <- dist(data_scaled)  # PAM butuh matriks jarak

# ======================================
# 3. KORELASI
# ======================================
corrplot(cor(data_scaled), method = "circle", type = "upper")

# ======================================
# 4. TENTUKAN K OPTIMAL
# ======================================
sil_scores <- sapply(2:10, function(k) {
  pam_res <- pam(dist_mat, k = k)
  pam_res$silinfo$avg.width
})

# ================================
# 5. FIT PAM dengan k yang dipilih
# ================================
k <- which.max(sil_scores) + 1
pam_res <- pam(data_scaled, k = k)  # BENAR untuk fviz_cluster

desa_medoid$cluster <- factor(pam_res$clustering)

# ================================
# 6. Evaluasi & Visualisasi PAM
# ================================
# a. Silhouette Plot
fviz_silhouette(pam_res) +
  labs(title = paste("Silhouette Plot PAM – k =", k))
##   cluster size ave.sil.width
## 1       1  334          0.51
## 2       2  740          0.81
## 3       3   42         -0.15
## 4       4   64          0.58

# b. Plot Profil Variabel
profil_medoid <- desa_medoid %>%
  group_by(cluster) %>%
  summarise(across(X1:X6, mean), .groups = "drop")
profil_long <- melt(profil_medoid, id.vars = "cluster")
ggplot(profil_long, aes(x = variable, y = value, fill = cluster)) +
  geom_col(position = "dodge") +
  labs(title = "Profil Variabel per Klaster (PAM)") +
  theme_minimal()

# c. PCA Scatter
fviz_cluster(pam_res, data = data_scaled, ellipse.type = "norm", repel = F) +
  labs(title = "PCA Plot Klaster PAM") +
  theme_minimal()

# d. Distribusi Cluster
ggplot(desa_medoid, aes(x = cluster)) +
  geom_bar(fill = "steelblue") +
  labs(title = "Jumlah Desa per Klaster (PAM)") +
  theme_minimal()

# ======================================
# 7. SPASIALISASI DI PETA
# ======================================
desa_shp <- st_read("C:/Users/User/Documents/AKA/Tingkat 3/Semester 6/DMBG/Projek/BATAS DESA DESEMBER 2019 DUKCAPIL MALUKU UTARA/Fix/batas_desa_fix.shp")
## Reading layer `batas_desa_fix' from data source 
##   `C:\Users\User\Documents\AKA\Tingkat 3\Semester 6\DMBG\Projek\BATAS DESA DESEMBER 2019 DUKCAPIL MALUKU UTARA\Fix\batas_desa_fix.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 1184 features and 26 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 124.2831 ymin: -2.477203 xmax: 129.9236 ymax: 2.645245
## Geodetic CRS:  WGS 84
desa_shp$KDEPUM <- gsub("\\.", "", desa_shp$KDEPUM)
desa_shp$KDEPUM <- as.numeric(desa_shp$KDEPUM)
## Warning: NAs introduced by coercion
desa_medoid$KDEPUM <- as.numeric(desa_medoid$KDEPUM)

desa_map <- desa_shp %>%
  left_join(desa_medoid %>% dplyr::select(KDEPUM, cluster), by = "KDEPUM") %>%
  filter(!is.na(cluster))

ggplot(desa_map) +
  geom_sf(aes(fill = factor(cluster)), color = "white", size = 0.1) +
  scale_fill_brewer(palette = "Set2") +
  labs(title = paste("Peta Klaster PAM – k =", k), fill = "Cluster") +
  theme_minimal()

# ======================================
# 8. HITUNG CENTROID SPASIAL
# ======================================
desa_map <- st_make_valid(desa_map)
desa_map$centroid <- st_centroid(desa_map$geometry)
coords <- st_coordinates(desa_map$centroid)
desa_map$lon <- coords[,1]; desa_map$lat <- coords[,2]

centroid_medoid <- desa_map %>%
  group_by(cluster) %>%
  summarise(lon = mean(lon), lat = mean(lat))
ggplot() +
  geom_sf(data = desa_map, aes(fill = cluster), color = "grey90") +
  geom_point(data = centroid_medoid, aes(x = lon, y = lat), color = "red", size = 2) +
  labs(title = "Peta Klaster PAM + Centroid") +
  theme_minimal()

# ======================================
# 9. METRIK EKSTRA
# ======================================
cat("Rata-rata Silhouette PAM (k=", k, "): ", round(sil_scores[k-1], 3), "\n")
## Rata-rata Silhouette PAM (k= 4 ):  0.677
btw_ratio <- pam_res$betweenss / pam_res$tot.withinss
cat("BetweenSS/WithinSS Ratio:", round(btw_ratio, 3), "\n")
## BetweenSS/WithinSS Ratio:
ch <- calinhara(data_scaled, pam_res$clustering)
db <- index.DB(data_scaled, pam_res$clustering)$DB
cat("Calinski-Harabasz Index:", round(ch, 3), "\n")
## Calinski-Harabasz Index: 382.366
cat("Davies-Bouldin Index:", round(db, 3), "\n")
## Davies-Bouldin Index: 1.636

Clarans

# ======================================
# 1. LIBRARY
# ======================================
library(readxl)
library(dplyr)
library(cluster)
library(factoextra)
library(ggplot2)
library(sf)
library(fpc)          # untuk clara()
library(clusterSim)
library(tibble)
library(tidyr)
library(reshape2)

# ======================================
# 2. BACA DATA & STANDARISASI
# ======================================
desa_clarans <- read_excel("C:/Users/User/Downloads/Pangan1.xlsx")
data_scaled <- scale(desa_clarans %>% dplyr::select(X1:X6))

# ======================================
# 3. CARI K OPTIMAL (Silhouette)
# ======================================
sil_scores <- sapply(2:10, function(k) {
  clara_model <- clara(data_scaled, k = k, samples = 5)
  mean(silhouette(clara_model$clustering, dist(data_scaled))[, 3])
})

# Visualisasi nilai silhouette
k_opt <- which.max(sil_scores) + 1
plot(2:10, sil_scores, type = "b", pch = 19, col = "blue",
     main = "Silhouette Score vs K", xlab = "Jumlah Klaster (K)", ylab = "Silhouette Avg")
abline(v = k_opt, lty = 2, col = "red")

# ======================================
# 4. FIT CLARANS/CLARA
# ======================================
set.seed(42)
clara_res <- clara(data_scaled, k = k_opt, samples = 5)
desa_clarans$cluster <- factor(clara_res$clustering)

# ======================================
# 5. PROFIL VARIABEL PER KLASTER
# ======================================
profil <- desa_clarans %>%
  group_by(cluster) %>%
  summarise(across(X1:X6, mean), .groups = "drop")
print(profil)
## # A tibble: 4 × 7
##   cluster     X1     X2      X3    X4       X5      X6
##   <fct>    <dbl>  <dbl>   <dbl> <dbl>    <dbl>   <dbl>
## 1 1       0.549  0.0238 0.00203 90.8  -0.00119 0.00658
## 2 2       0.0969 0.0165 0.00125  6.73 -0.00213 0.00226
## 3 3       0.650  5.66   0.460   25.9  -0.536   1.31   
## 4 4       0.0813 1.41   0.249    5.78 -0.530   0
profil_long <- melt(profil, id.vars = "cluster")
ggplot(profil_long, aes(x = variable, y = value, fill = cluster)) +
  geom_col(position = "dodge") +
  labs(title = "Profil Variabel per Klaster (CLARANS)", x = "Variabel", y = "Rata-rata") +
  theme_minimal()

# ======================================
# 6. PCA VISUALISASI
# ======================================
fviz_cluster(list(data = data_scaled, cluster = clara_res$clustering), 
             ellipse.type = "norm", geom = "point", show.clust.cent = TRUE) +
  labs(title = "PCA Klaster CLARANS") +
  theme_minimal()

# ======================================
# 7. SPASIALISASI DI PETA
# ======================================
desa_shp <- st_read("C:/Users/User/Documents/AKA/Tingkat 3/Semester 6/DMBG/Projek/BATAS DESA DESEMBER 2019 DUKCAPIL MALUKU UTARA/Fix/batas_desa_fix.shp")
## Reading layer `batas_desa_fix' from data source 
##   `C:\Users\User\Documents\AKA\Tingkat 3\Semester 6\DMBG\Projek\BATAS DESA DESEMBER 2019 DUKCAPIL MALUKU UTARA\Fix\batas_desa_fix.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 1184 features and 26 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 124.2831 ymin: -2.477203 xmax: 129.9236 ymax: 2.645245
## Geodetic CRS:  WGS 84
desa_shp$KDEPUM <- gsub("\\.", "", desa_shp$KDEPUM)
desa_shp$KDEPUM <- as.numeric(desa_shp$KDEPUM)
## Warning: NAs introduced by coercion
desa_clarans$KDEPUM <- as.numeric(desa_clarans$KDEPUM)

desa_map <- desa_shp %>%
  left_join(desa_clarans %>% dplyr::select(KDEPUM,cluster), by = "KDEPUM") %>%
  filter(!is.na(cluster))

ggplot(desa_map) +
  geom_sf(aes(fill = factor(cluster)), color = "white", size = 0.1) +
  scale_fill_brewer(palette = "Set2") +
  labs(title = paste("Peta Klaster CLARANS – k =", k_opt), fill = "Cluster") +
  theme_minimal()

# ======================================
# 8. CENTROID SPASIAL
# ======================================
desa_map <- st_make_valid(desa_map)
desa_map$centroid <- st_centroid(desa_map$geometry)
coords <- st_coordinates(desa_map$centroid)
desa_map$lon <- coords[, 1]
desa_map$lat <- coords[, 2]

centroid_clarans <- desa_map %>%
  group_by(cluster) %>%
  summarise(
    lon = mean(lon),
    lat = mean(lat)
  )

ggplot() +
  geom_sf(data = desa_map, aes(fill = cluster), color = "grey90") +
  geom_point(data = centroid_clarans, aes(x = lon, y = lat), color = "red", size = 2) +
  labs(title = "Peta Klaster CLARANS + Titik Tengah") +
  theme_minimal()

# ======================================
# 9. METRIK EVALUASI
# ======================================
cat("Jumlah Klaster Optimal:", k_opt, "\n")
## Jumlah Klaster Optimal: 4
cat("Rata-rata Silhouette:", round(sil_scores[k_opt - 1], 3), "\n")
## Rata-rata Silhouette: 0.678
cat("Calinski-Harabasz Index:", round(calinhara(data_scaled, clara_res$clustering), 3), "\n")
## Calinski-Harabasz Index: 382.497
cat("Davies-Bouldin Index:", round(index.DB(data_scaled, clara_res$clustering)$DB, 3), "\n")
## Davies-Bouldin Index: 1.636
# Jumlah anggota tiap klaster
print(table(desa_clarans$cluster))
## 
##   1   2   3   4 
## 331 743  42  64

Spectral

# ======================================
# 1. LIBRARY
# ======================================
library(readxl)
library(dplyr)
library(ggplot2)
library(factoextra)
library(cluster)
library(corrplot)
library(kernlab)       # specc()
library(reshape2)
library(sf)
library(clusterSim)
library(tibble)

# ======================================
# 2. BACA DATA & STANDARISASI
# ======================================
desa_spec <- read_excel("C:/Users/User/Downloads/Pangan1.xlsx")
data_scaled <- scale(desa_spec %>% dplyr::select(X1:X6))

# ======================================
# 3. KORELASI
# ======================================
corrplot(cor(data_scaled), method = "circle", type = "upper")

# ======================================
# 4. TENTUKAN K OPTIMAL (dengan silhouette)
# ======================================
silhouette_scores <- sapply(2:10, function(k) {
  spec_model <- specc(data_scaled, centers = k)
  clust <- as.integer(spec_model)
  sil <- silhouette(clust, dist(data_scaled))
  mean(sil[, 3])
})

# Visualisasi Silhouette
k_opt <- which.max(silhouette_scores) + 1
plot(2:10, silhouette_scores, type = "b", pch = 19, col = "blue",
     main = "Silhouette Score vs K (Spectral)", xlab = "K", ylab = "Silhouette")
abline(v = k_opt, lty = 2, col = "red")

# ======================================
# 5. FIT SPECTRAL CLUSTERING
# ======================================
set.seed(42)
spec_model <- specc(data_scaled, centers = k_opt)
desa_spec$cluster <- factor(spec_model@.Data)

# ======================================
# 6. PROFIL VARIABEL PER KLASTER
# ======================================
profil <- desa_spec %>%
  group_by(cluster) %>%
  summarise(across(X1:X6, mean), .groups = "drop")

profil_long <- melt(profil, id.vars = "cluster")
ggplot(profil_long, aes(x = variable, y = value, fill = cluster)) +
  geom_col(position = "dodge") +
  labs(title = "Profil Variabel per Klaster (Spectral)", x = "Variabel", y = "Rata-rata") +
  theme_minimal()

# ======================================
# 7. VISUALISASI PCA
# ======================================
fviz_cluster(list(data = data_scaled, cluster = spec_model@.Data), 
             geom = "point", ellipse.type = "norm", repel = F) +
  labs(title = "PCA Plot Klaster (Spectral)") +
  theme_minimal()

# ======================================
# 8. PETA SPASIAL
# ======================================
desa_shp <- st_read("C:/Users/User/Documents/AKA/Tingkat 3/Semester 6/DMBG/Projek/BATAS DESA DESEMBER 2019 DUKCAPIL MALUKU UTARA/Fix/batas_desa_fix.shp")
## Reading layer `batas_desa_fix' from data source 
##   `C:\Users\User\Documents\AKA\Tingkat 3\Semester 6\DMBG\Projek\BATAS DESA DESEMBER 2019 DUKCAPIL MALUKU UTARA\Fix\batas_desa_fix.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 1184 features and 26 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 124.2831 ymin: -2.477203 xmax: 129.9236 ymax: 2.645245
## Geodetic CRS:  WGS 84
desa_shp$KDEPUM <- gsub("\\.", "", desa_shp$KDEPUM)
desa_shp$KDEPUM <- as.numeric(desa_shp$KDEPUM)
## Warning: NAs introduced by coercion
desa_spec$KDEPUM <- as.numeric(desa_spec$KDEPUM)

desa_map <- desa_shp %>%
  left_join(desa_clarans %>% dplyr::select(KDEPUM,cluster), by = "KDEPUM") %>%
  filter(!is.na(cluster))

ggplot(desa_map) +
  geom_sf(aes(fill = factor(cluster)), color = "white", size = 0.1) +
  scale_fill_brewer(palette = "Set2") +
  labs(title = paste("Peta Klaster Spectral – k =", k_opt), fill = "Cluster") +
  theme_minimal()

# ======================================
# 9. CENTROID SPASIAL
# ======================================
desa_map <- st_make_valid(desa_map)
desa_map$centroid <- st_centroid(desa_map$geometry)
coords <- st_coordinates(desa_map$centroid)
desa_map$lon <- coords[, 1]
desa_map$lat <- coords[, 2]

centroid_spec <- desa_map %>%
  group_by(cluster) %>%
  summarise(
    lon = mean(lon),
    lat = mean(lat)
  )

ggplot() +
  geom_sf(data = desa_map, aes(fill = cluster), color = "grey90") +
  geom_point(data = centroid_spec, aes(x = lon, y = lat), color = "red", size = 2) +
  labs(title = "Peta Klaster Spectral + Centroid") +
  theme_minimal()

# ======================================
# 10. EVALUASI
# ======================================
cat("Jumlah Klaster Optimal:", k_opt, "\n")
## Jumlah Klaster Optimal: 2
sil <- silhouette(as.integer(spec_model), dist(data_scaled))
cat("Rata-rata Silhouette:", round(mean(sil[, 3]), 3), "\n")
## Rata-rata Silhouette: 0.712
ch <- calinhara(data_scaled, as.integer(spec_model))
cat("Calinski-Harabasz Index:", round(ch, 3), "\n")
## Calinski-Harabasz Index: 402.064
db <- index.DB(data_scaled, as.integer(spec_model))$DB
cat("Davies-Bouldin Index:", round(db, 3), "\n")
## Davies-Bouldin Index: 1.625

DIana

# ======================================
# 1. LIBRARY
# ======================================
library(readxl)
library(dplyr)
library(cluster)       # diana(), silhouette()
library(factoextra)    # fviz_cluster
library(ggplot2)
library(clusterSim)    # calinhara, index.DB
library(reshape2)
library(tibble)
library(sf)

# ======================================
# 2. BACA DATA & STANDARISASI
# ======================================
desa_diana <- read_excel("C:/Users/User/Downloads/Pangan1.xlsx")
data_klaster <- scale(desa_diana %>% dplyr::select(X1:X6))

# ======================================
# 3. FIT DIANA & CARI K OPTIMAL
diana_res <- diana(data_klaster)
silhouette_scores <- sapply(2:10, function(k) {
  cl <- cutree(as.hclust(diana_res), k = k)
  sil <- silhouette(cl, dist(data_klaster))
  mean(sil[, 3])
})

# Visualisasi k optimal
plot(2:10, silhouette_scores, type = "b", pch = 19, col = "darkgreen",
     xlab = "Jumlah Klaster", ylab = "Average Silhouette Width",
     main = "Pemilihan k Optimal - DIANA")
k_opt <- which.max(silhouette_scores) + 1
abline(v = k_opt, lty = 2, col = "red")

cat("K optimal (silhouette):", k_opt, "\n")
## K optimal (silhouette): 2
# ======================================
# 4. DENDROGRAM + GARIS K
hc_diana <- as.hclust(diana_res)
plot(hc_diana, main = "Dendrogram - DIANA", xlab = "", sub = "")
rect.hclust(hc_diana, k = k_opt, border = 2:6)

# ======================================
# 5. KLASTERISASI
cluster_diana <- cutree(hc_diana, k = k_opt)
desa_diana$cluster <- factor(cluster_diana)

# ======================================
# 6. PROFIL VARIABEL PER KLASTER
profil <- desa_diana %>%
  group_by(cluster) %>%
  summarise(across(X1:X6, mean), .groups = "drop")

profil_long <- melt(profil, id.vars = "cluster")
ggplot(profil_long, aes(x = variable, y = value, fill = cluster)) +
  geom_col(position = "dodge") +
  labs(title = "Profil Variabel per Klaster - DIANA", x = "Variabel", y = "Rata-rata") +
  theme_minimal()

# ======================================
# 7. PCA VISUALISASI
fviz_cluster(list(data = data_klaster, cluster = cluster_diana),
             ellipse.type = "norm", show.clust.cent = TRUE,
             repel = F, geom = "point") +
  labs(title = "PCA Visualisasi Klaster - DIANA") +
  theme_minimal()
## Too few points to calculate an ellipse

# ======================================
# 8. PETA SPASIAL
desa_shp <- st_read("C:/Users/User/Documents/AKA/Tingkat 3/Semester 6/DMBG/Projek/BATAS DESA DESEMBER 2019 DUKCAPIL MALUKU UTARA/Fix/batas_desa_fix.shp")
## Reading layer `batas_desa_fix' from data source 
##   `C:\Users\User\Documents\AKA\Tingkat 3\Semester 6\DMBG\Projek\BATAS DESA DESEMBER 2019 DUKCAPIL MALUKU UTARA\Fix\batas_desa_fix.shp' 
##   using driver `ESRI Shapefile'
## Simple feature collection with 1184 features and 26 fields
## Geometry type: MULTIPOLYGON
## Dimension:     XY
## Bounding box:  xmin: 124.2831 ymin: -2.477203 xmax: 129.9236 ymax: 2.645245
## Geodetic CRS:  WGS 84
desa_shp$KDEPUM <- gsub("\\.", "", desa_shp$KDEPUM) %>% as.numeric()
## Warning in gsub("\\.", "", desa_shp$KDEPUM) %>% as.numeric(): NAs introduced by
## coercion
desa_diana$KDEPUM <- as.numeric(desa_diana$KDEPUM)

# Gabungkan data klaster ke shapefile
desa_map <- desa_shp %>%
  left_join(desa_diana %>% dplyr::select(KDEPUM, cluster), by = "KDEPUM") %>%
  filter(!is.na(cluster))

# Peta
ggplot(desa_map) +
  geom_sf(aes(fill = cluster), color = "white", size = 0.1) +
  scale_fill_brewer(palette = "Set2") +
  labs(title = "Peta Klaster DIANA - Maluku Utara", fill = "Klaster") +
  theme_minimal()

# ======================================
# 9. TITIK TENGAH PER KLASTER
desa_map <- st_make_valid(desa_map)
coords <- st_coordinates(st_centroid(desa_map$geometry))
desa_map$lon <- coords[, 1]
desa_map$lat <- coords[, 2]

centroid_df <- desa_map %>%
  group_by(cluster) %>%
  summarise(lon = mean(lon), lat = mean(lat))

ggplot() +
  geom_sf(data = desa_map, aes(fill = cluster), color = "white") +
  geom_point(data = centroid_df, aes(x = lon, y = lat), color = "red", size = 2) +
  labs(title = "Peta Klaster + Titik Tengah (DIANA)") +
  theme_minimal()

# ======================================
# 10. EVALUASI KLASTER
sil <- silhouette(cluster_diana, dist(data_klaster))
cat("Silhouette Avg :", round(mean(sil[, 3]), 3), "\n")
## Silhouette Avg : 0.939
cat("Calinski-Harabasz Index:", round(calinhara(data_klaster, cluster_diana), 3), "\n")
## Calinski-Harabasz Index: 226.175
cat("Davies-Bouldin Index:", round(index.DB(data_klaster, cluster_diana)$DB, 3), "\n")
## Davies-Bouldin Index: 0.066