# ===============================================
# K-FOLD BIASA TANPA SMOTE + METRIK LENGKAP
# ===============================================
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
##
## Attaching package: 'recipes'
## The following object is masked from 'package:stats':
##
## step
## Loading required package: ggplot2
## Loading required package: lattice
##
## Attaching package: 'yardstick'
## The following objects are masked from 'package:caret':
##
## precision, recall, sensitivity, specificity
library(readxl)
set.seed(42)
# 1. Load dan label data (4 Rentan)
desa_df <- read_excel("C:/Users/User/Downloads/Pangan.xlsx")
desa_df$Y <- factor(
desa_df$Y,
levels = 1:6,
labels = c("Sangat Rentan", "Rentan", "Agak Rentan", "Agak Tahan", "Tahan", "Sangat Tahan")
)
desa_df$Y <- ifelse(desa_df$Y %in% c("Sangat Rentan", "Rentan", "Agak Rentan", "Agak Tahan"), "Rentan", "Tahan")
desa_df$Y <- as.factor(desa_df$Y)
# 2. Buat fold biasa (non-spasial)
k <- 5
folds <- createFolds(desa_df$Y, k = k, returnTrain = FALSE)
# 3. Formula model
form <- Y ~ . -lon -lat
# 4. Loop K-Fold
results_all <- list()
for (i in 1:k) {
cat("\n=== K-FOLD BIASA TANPA SMOTE Fold", i, "===\n")
test_idx <- folds[[i]]
train_data <- desa_df[-test_idx, ]
test_data <- desa_df[test_idx, ]
models <- list()
preds <- list()
metrics <- list()
# === MODEL 1: NAIVE BAYES
models$nb <- naiveBayes(form, data = train_data)
preds$nb <- predict(models$nb, test_data)
# === MODEL 2: DECISION TREE
models$dt <- rpart(form, data = train_data)
preds$dt <- predict(models$dt, test_data, type = "class")
# === MODEL 3: KNN
models$knn <- train(form, data = train_data, method = "knn",
trControl = trainControl(method = "none"),
tuneGrid = data.frame(k = 5))
preds$knn <- predict(models$knn, test_data)
# === MODEL 4: SVM
models$svm <- svm(form, data = train_data)
preds$svm <- predict(models$svm, test_data)
# === MODEL 5: RANDOM FOREST
models$rf <- ranger(formula = form, data = train_data, num.trees = 500)
preds$rf <- predict(models$rf, data = test_data)$predictions
for (model_name in names(preds)) {
pred <- preds[[model_name]]
obs <- test_data$Y
eval_df <- data.frame(obs = obs, pred = pred)
cm <- confusionMatrix(pred, obs, positive = "Rentan")
acc <- cm$overall["Accuracy"]
kappa <- cm$overall["Kappa"]
prec <- precision(eval_df, truth = obs, estimate = pred)[[3]]
rec <- recall(eval_df, truth = obs, estimate = pred)[[3]]
f1 <- f_meas(eval_df, truth = obs, estimate = pred)[[3]]
metrics[[model_name]] <- data.frame(
Fold = i,
Model = model_name,
Accuracy = acc,
Kappa = kappa,
Precision = prec,
Recall = rec,
F1_Score = f1
)
}
results_all[[i]] <- bind_rows(metrics)
}
##
## === K-FOLD BIASA TANPA SMOTE Fold 1 ===
##
## === K-FOLD BIASA TANPA SMOTE Fold 2 ===
## Warning: While computing binary `precision()`, no predicted events were detected (i.e.
## `true_positive + false_positive = 0`).
## Precision is undefined in this case, and `NA` will be returned.
## Note that 73 true event(s) actually occurred for the problematic event level,
## Rentan
## While computing binary `precision()`, no predicted events were detected (i.e.
## `true_positive + false_positive = 0`).
## Precision is undefined in this case, and `NA` will be returned.
## Note that 73 true event(s) actually occurred for the problematic event level,
## Rentan
##
## === K-FOLD BIASA TANPA SMOTE Fold 3 ===
##
## === K-FOLD BIASA TANPA SMOTE Fold 4 ===
##
## === K-FOLD BIASA TANPA SMOTE Fold 5 ===
# 5. Gabung hasil
hasil_all <- bind_rows(results_all)
cat("\n=== METRIK PER FOLD (TANPA SMOTE) ===\n")
##
## === METRIK PER FOLD (TANPA SMOTE) ===
## Fold Model Accuracy Kappa Precision Recall F1_Score
## Accuracy...1 1 nb 0.4788136 0.122172493 0.3579545 0.86301370 0.50602410
## Accuracy...2 1 dt 0.6440678 0.083833996 0.3877551 0.26027397 0.31147541
## Accuracy...3 1 knn 0.6610169 0.148705925 0.4363636 0.32876712 0.37500000
## Accuracy...4 1 svm 0.6864407 -0.008430535 0.0000000 0.00000000 0.00000000
## Accuracy...5 1 rf 0.6991525 0.215836765 0.5217391 0.32876712 0.40336134
## Accuracy...6 2 nb 0.4279661 -0.007335273 0.3062500 0.67123288 0.42060086
## Accuracy...7 2 dt 0.6779661 0.142556650 0.4634146 0.26027397 0.33333333
## Accuracy...8 2 knn 0.6525424 0.090516026 0.4000000 0.24657534 0.30508475
## Accuracy...9 2 svm 0.6906780 0.000000000 NA 0.00000000 NA
## Accuracy...10 2 rf 0.7161017 0.220623028 0.5882353 0.27397260 0.37383178
## Accuracy...11 3 nb 0.4661017 0.078182156 0.3413174 0.78082192 0.47500000
## Accuracy...12 3 dt 0.7033898 0.159629667 0.5555556 0.20547945 0.30000000
## Accuracy...13 3 knn 0.6355932 0.029735156 0.3414634 0.19178082 0.24561404
## Accuracy...14 3 svm 0.6906780 0.010340074 0.5000000 0.01369863 0.02666667
## Accuracy...15 3 rf 0.6906780 0.143226576 0.5000000 0.21917808 0.30476190
## Accuracy...16 4 nb 0.6949153 0.076421350 0.5384615 0.09589041 0.16279070
## Accuracy...17 4 dt 0.6652542 0.097055405 0.4210526 0.21917808 0.28828829
## Accuracy...18 4 knn 0.6779661 0.171087901 0.4693878 0.31506849 0.37704918
## Accuracy...19 4 svm 0.6949153 0.018824345 1.0000000 0.01369863 0.02702703
## Accuracy...20 4 rf 0.6652542 0.104858844 0.4250000 0.23287671 0.30088496
## Accuracy...21 5 nb 0.4703390 0.088042537 0.3452381 0.79452055 0.48132780
## Accuracy...22 5 dt 0.6779661 0.164134588 0.4680851 0.30136986 0.36666667
## Accuracy...23 5 knn 0.6567797 0.105391239 0.4130435 0.26027397 0.31932773
## Accuracy...24 5 svm 0.6864407 -0.008430535 0.0000000 0.00000000 0.00000000
## Accuracy...25 5 rf 0.7161017 0.240829652 0.5750000 0.31506849 0.40707965
##
## === RATA-RATA METRIK PER MODEL ===
hasil_summary <- hasil_all %>%
group_by(Model) %>%
summarise(across(Accuracy:F1_Score, mean, na.rm = TRUE), .groups = "drop")
## Warning: There was 1 warning in `summarise()`.
## ℹ In argument: `across(Accuracy:F1_Score, mean, na.rm = TRUE)`.
## ℹ In group 1: `Model = "dt"`.
## Caused by warning:
## ! The `...` argument of `across()` is deprecated as of dplyr 1.1.0.
## Supply arguments directly to `.fns` through an anonymous function instead.
##
## # Previously
## across(a:b, mean, na.rm = TRUE)
##
## # Now
## across(a:b, \(x) mean(x, na.rm = TRUE))
## # A tibble: 5 × 6
## Model Accuracy Kappa Precision Recall F1_Score
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 dt 0.674 0.129 0.459 0.249 0.320
## 2 knn 0.657 0.109 0.412 0.268 0.324
## 3 nb 0.508 0.0715 0.378 0.641 0.409
## 4 rf 0.697 0.185 0.522 0.274 0.358
## 5 svm 0.690 0.00246 0.375 0.00548 0.0134
# ===============================================
# K-FOLD BIASA + SMOTE + ALL METRICS
# ===============================================
library(dplyr)
library(themis)
library(recipes)
library(e1071)
library(rpart)
library(ranger)
library(caret)
library(yardstick)
library(readxl)
set.seed(42)
# 1. Load dan re-label data (4 Rentan, 2 Tahan)
desa_df <- read_excel("C:/Users/User/Downloads/Pangan.xlsx")
desa_df$Y <- factor(
desa_df$Y,
levels = 1:6,
labels = c("Sangat Rentan", "Rentan", "Agak Rentan", "Agak Tahan", "Tahan", "Sangat Tahan")
)
desa_df$Y <- ifelse(desa_df$Y %in% c("Sangat Rentan", "Rentan", "Agak Rentan", "Agak Tahan"), "Rentan", "Tahan")
desa_df$Y <- as.factor(desa_df$Y)
# 2. Buat fold biasa (non-spasial)
k <- 10
folds <- createFolds(desa_df$Y, k = k, returnTrain = FALSE)
# 3. Formula model
form <- Y ~ . -lon -lat # tidak ada spatial_fold
# 4. Loop K-Fold
results_all <- list()
for (i in 1:k) {
cat("\n=== K-FOLD BIASA Fold", i, "===\n")
test_idx <- folds[[i]]
train_data <- desa_df[-test_idx, ]
test_data <- desa_df[test_idx, ]
# SMOTE pada training set
rec <- recipe(Y ~ ., data = train_data) %>%
step_smote(Y) %>%
prep()
train_data_smote <- bake(rec, new_data = NULL)
models <- list()
preds <- list()
metrics <- list()
# === MODEL 1: NAIVE BAYES
models$nb <- naiveBayes(form, data = train_data_smote)
preds$nb <- predict(models$nb, test_data)
# === MODEL 2: DECISION TREE
models$dt <- rpart(form, data = train_data_smote)
preds$dt <- predict(models$dt, test_data, type = "class")
# === MODEL 3: KNN
models$knn <- train(form, data = train_data_smote, method = "knn",
trControl = trainControl(method = "none"),
tuneGrid = data.frame(k = 10))
preds$knn <- predict(models$knn, test_data)
# === MODEL 4: SVM
models$svm <- svm(form, data = train_data_smote)
preds$svm <- predict(models$svm, test_data)
# === MODEL 5: RANDOM FOREST
models$rf <- ranger(formula = form, data = train_data_smote, num.trees = 500)
preds$rf <- predict(models$rf, data = test_data)$predictions
for (model_name in names(preds)) {
pred <- preds[[model_name]]
obs <- test_data$Y
eval_df <- data.frame(obs = obs, pred = pred)
cm <- confusionMatrix(pred, obs, positive = "Rentan")
acc <- cm$overall["Accuracy"]
kappa <- cm$overall["Kappa"]
prec <- precision(eval_df, truth = obs, estimate = pred)[[3]]
rec <- recall(eval_df, truth = obs, estimate = pred)[[3]]
f1 <- f_meas(eval_df, truth = obs, estimate = pred)[[3]]
metrics[[model_name]] <- data.frame(
Fold = i,
Model = model_name,
Accuracy = acc,
Kappa = kappa,
Precision = prec,
Recall = rec,
F1_Score = f1
)
}
results_all[[i]] <- bind_rows(metrics)
}
##
## === K-FOLD BIASA Fold 1 ===
##
## === K-FOLD BIASA Fold 2 ===
##
## === K-FOLD BIASA Fold 3 ===
##
## === K-FOLD BIASA Fold 4 ===
##
## === K-FOLD BIASA Fold 5 ===
##
## === K-FOLD BIASA Fold 6 ===
##
## === K-FOLD BIASA Fold 7 ===
##
## === K-FOLD BIASA Fold 8 ===
##
## === K-FOLD BIASA Fold 9 ===
##
## === K-FOLD BIASA Fold 10 ===
# 5. Gabung & cetak hasil
hasil_all <- bind_rows(results_all)
cat("\n=== METRIK PER FOLD (K-FOLD BIASA) ===\n")
##
## === METRIK PER FOLD (K-FOLD BIASA) ===
## Fold Model Accuracy Kappa Precision Recall F1_Score
## Accuracy...1 1 nb 0.3445378 -0.006942938 0.3084112 0.8918919 0.4583333
## Accuracy...2 1 dt 0.5378151 0.123006834 0.3676471 0.6756757 0.4761905
## Accuracy...3 1 knn 0.4957983 0.060279021 0.3380282 0.6486486 0.4444444
## Accuracy...4 1 svm 0.4033613 0.041302621 0.3265306 0.8648649 0.4740741
## Accuracy...5 1 rf 0.5966387 0.147208122 0.3921569 0.5405405 0.4545455
## Accuracy...6 2 nb 0.3305085 -0.022373327 0.3055556 0.8918919 0.4551724
## Accuracy...7 2 dt 0.6271186 0.290128521 0.4477612 0.8108108 0.5769231
## Accuracy...8 2 knn 0.4661017 -0.074588031 0.2758621 0.4324324 0.3368421
## Accuracy...9 2 svm 0.5423729 0.187452181 0.3924051 0.8378378 0.5344828
## Accuracy...10 2 rf 0.7288136 0.404979515 0.5555556 0.6756757 0.6097561
## Accuracy...11 3 nb 0.2649573 -0.109126984 0.2685185 0.8055556 0.4027778
## Accuracy...12 3 dt 0.6923077 0.374331551 0.5000000 0.7777778 0.6086957
## Accuracy...13 3 knn 0.6068376 0.178571429 0.4038462 0.5833333 0.4772727
## Accuracy...14 3 svm 0.5213675 0.141509434 0.3684211 0.7777778 0.5000000
## Accuracy...15 3 rf 0.6495726 0.208023774 0.4390244 0.5000000 0.4675325
## Accuracy...16 4 nb 0.3135593 -0.031958540 0.2935780 0.8888889 0.4413793
## Accuracy...17 4 dt 0.5254237 0.104121475 0.3529412 0.6666667 0.4615385
## Accuracy...18 4 knn 0.5932203 0.158645276 0.3888889 0.5833333 0.4666667
## Accuracy...19 4 svm 0.5762712 0.264339152 0.4146341 0.9444444 0.5762712
## Accuracy...20 4 rf 0.6440678 0.209821429 0.4318182 0.5277778 0.4750000
## Accuracy...21 5 nb 0.3361345 -0.005132043 0.3090909 0.9189189 0.4625850
## Accuracy...22 5 dt 0.6386555 0.162931458 0.4210526 0.4324324 0.4266667
## Accuracy...23 5 knn 0.5882353 0.135251372 0.3846154 0.5405405 0.4494382
## Accuracy...24 5 svm 0.4705882 0.063226290 0.3375000 0.7297297 0.4615385
## Accuracy...25 5 rf 0.6302521 0.137112722 0.4054054 0.4054054 0.4054054
## Accuracy...26 6 nb 0.3247863 -0.013820336 0.3027523 0.9166667 0.4551724
## Accuracy...27 6 dt 0.5982906 0.143057504 0.3877551 0.5277778 0.4470588
## Accuracy...28 6 knn 0.6153846 0.201909959 0.4150943 0.6111111 0.4943820
## Accuracy...29 6 svm 0.4615385 0.072480181 0.3373494 0.7777778 0.4705882
## Accuracy...30 6 rf 0.5897436 0.093023256 0.3636364 0.4444444 0.4000000
## Accuracy...31 7 nb 0.3220339 -0.055928412 0.2843137 0.8055556 0.4202899
## Accuracy...32 7 dt 0.6440678 0.209821429 0.4318182 0.5277778 0.4750000
## Accuracy...33 7 knn 0.6016949 0.134789392 0.3829787 0.5000000 0.4337349
## Accuracy...34 7 svm 0.4745763 0.055268595 0.3289474 0.6944444 0.4464286
## Accuracy...35 7 rf 0.6779661 0.240514905 0.4722222 0.4722222 0.4722222
## Accuracy...36 8 nb 0.3277311 -0.003372681 0.3097345 0.9459459 0.4666667
## Accuracy...37 8 dt 0.6470588 0.222464219 0.4444444 0.5405405 0.4878049
## Accuracy...38 8 knn 0.5882353 0.111534359 0.3750000 0.4864865 0.4235294
## Accuracy...39 8 svm 0.5294118 0.143224479 0.3733333 0.7567568 0.5000000
## Accuracy...40 8 rf 0.6890756 0.329117782 0.5000000 0.6486486 0.5647059
## Accuracy...41 9 nb 0.4529915 0.073496659 0.3372093 0.8055556 0.4754098
## Accuracy...42 9 dt 0.6410256 0.170212766 0.4210526 0.4444444 0.4324324
## Accuracy...43 9 knn 0.5042735 -0.035714286 0.2884615 0.4166667 0.3409091
## Accuracy...44 9 svm 0.4700855 0.060606061 0.3333333 0.7222222 0.4561404
## Accuracy...45 9 rf 0.6495726 0.171073095 0.4285714 0.4166667 0.4225352
## Accuracy...46 10 nb 0.4067797 -0.008054674 0.3103448 0.7297297 0.4354839
## Accuracy...47 10 dt 0.6271186 0.193036991 0.4255319 0.5405405 0.4761905
## Accuracy...48 10 knn 0.5762712 0.119140042 0.3773585 0.5405405 0.4444444
## Accuracy...49 10 svm 0.4745763 0.045656144 0.3333333 0.6756757 0.4464286
## Accuracy...50 10 rf 0.6271186 0.203925176 0.4285714 0.5675676 0.4883721
##
## === RATA-RATA METRIK PER MODEL ===
hasil_summary <- hasil_all %>%
group_by(Model) %>%
summarise(across(Accuracy:F1_Score, mean, na.rm = TRUE), .groups = "drop")
print(hasil_summary)
## # A tibble: 5 × 6
## Model Accuracy Kappa Precision Recall F1_Score
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 dt 0.618 0.199 0.420 0.594 0.487
## 2 knn 0.564 0.0990 0.363 0.534 0.431
## 3 nb 0.342 -0.0183 0.303 0.860 0.447
## 4 rf 0.648 0.214 0.442 0.520 0.476
## 5 svm 0.492 0.108 0.355 0.778 0.487
# ===============================================
# SPATIAL K-FOLD BIASA + SMOTE + METRIK LENGKAP
# ===============================================
library(CAST)
library(caret)
library(e1071)
library(rpart)
library(ranger)
library(klaR)
## Loading required package: MASS
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
library(themis)
library(recipes)
library(dplyr)
library(yardstick)
library(readxl)
set.seed(42)
# 1. Load & siapkan data
desa_df <- read_excel("C:/Users/User/Downloads/Pangan.xlsx")
desa_df$Y <- factor(
desa_df$Y,
levels = 1:6,
labels = c("Sangat Rentan", "Rentan", "Agak Rentan", "Agak Tahan", "Tahan", "Sangat Tahan")
)
desa_df$Y <- ifelse(desa_df$Y %in% c("Sangat Rentan", "Rentan", "Agak Rentan","Agak Tahan"), "Rentan", "Tahan")
desa_df$Y <- as.factor(desa_df$Y)
# 2. Buat cluster spasial
desa_df$spatial_cluster <- kmeans(desa_df[, c("lon", "lat")], centers = 10)$cluster
# 3. Spatial CV biasa
spatial_folds <- CreateSpacetimeFolds(desa_df, spacevar = "spatial_cluster", k = 10)
folds_index <- spatial_folds$indexOut
# 4. Formula
form <- Y ~ . -lon -lat -spatial_cluster
# 5. Looping Fold
results_all <- list()
for (i in 1:10) {
cat("\n=== Fold", i, "===\n")
train_data <- desa_df[-folds_index[[i]], ]
test_data <- desa_df[ folds_index[[i]], ]
# SMOTE
rec <- recipe(Y ~ ., data = train_data) %>%
step_smote(Y) %>%
prep()
train_data_smote <- bake(rec, new_data = NULL)
models <- list()
preds <- list()
metrics <- list()
# MODELS:
models$nb <- naiveBayes(form, data = train_data_smote)
preds$nb <- predict(models$nb, test_data)
models$dt <- rpart(form, data = train_data_smote)
preds$dt <- predict(models$dt, test_data, type = "class")
models$knn <- train(form, data = train_data_smote, method = "knn",
trControl = trainControl(method = "none"),
tuneGrid = data.frame(k = 10))
preds$knn <- predict(models$knn, test_data)
models$svm <- svm(form, data = train_data_smote)
preds$svm <- predict(models$svm, test_data)
models$rf <- ranger(formula = form, data = train_data_smote, num.trees = 500)
preds$rf <- predict(models$rf, data = test_data)$predictions
# Evaluasi semua model
for (model_name in names(preds)) {
pred <- preds[[model_name]]
obs <- test_data$Y
eval_df <- data.frame(obs = obs, pred = pred)
cm <- confusionMatrix(pred, obs, positive = "Rentan")
acc <- cm$overall["Accuracy"]
kappa <- cm$overall["Kappa"]
prec <- precision(eval_df, truth = obs, estimate = pred)$.estimate
rec <- recall(eval_df, truth = obs, estimate = pred)$.estimate
f1 <- f_meas(eval_df, truth = obs, estimate = pred)$.estimate
row <- data.frame(
Fold = i,
Model = model_name,
Accuracy = acc,
Kappa = kappa,
Precision = prec,
Recall = rec,
F1_Score = f1
)
metrics[[model_name]] <- row
}
results_all[[i]] <- bind_rows(metrics)
}
##
## === Fold 1 ===
##
## === Fold 2 ===
##
## === Fold 3 ===
##
## === Fold 4 ===
##
## === Fold 5 ===
##
## === Fold 6 ===
##
## === Fold 7 ===
##
## === Fold 8 ===
##
## === Fold 9 ===
##
## === Fold 10 ===
## Fold Model Accuracy Kappa Precision Recall
## Accuracy...1 1 nb 0.1927083 -0.007993497 0.18947368 0.97297297
## Accuracy...2 1 dt 0.7083333 0.099648300 0.26829268 0.29729730
## Accuracy...3 1 knn 0.6562500 0.083731020 0.24561404 0.37837838
## Accuracy...4 1 svm 0.7187500 0.148628675 0.30232558 0.35135135
## Accuracy...5 1 rf 0.7968750 0.263861581 0.46153846 0.32432432
## Accuracy...11 2 nb 0.2598870 -0.011958277 0.18709677 0.85294118
## Accuracy...21 2 dt 0.6892655 0.174650276 0.30188679 0.47058824
## Accuracy...31 2 knn 0.5762712 0.003453194 0.19402985 0.38235294
## Accuracy...41 2 svm 0.4858757 0.054031832 0.21782178 0.64705882
## Accuracy...51 2 rf 0.6497175 0.220817949 0.31081081 0.67647059
## Accuracy...12 3 nb 0.1691176 -0.019368533 0.14615385 0.90476190
## Accuracy...22 3 dt 0.6470588 0.078746825 0.20000000 0.42857143
## Accuracy...32 3 knn 0.5588235 -0.066666667 0.11764706 0.28571429
## Accuracy...42 3 svm 0.3823529 0.060680809 0.18181818 0.85714286
## Accuracy...52 3 rf 0.6250000 0.082053997 0.20000000 0.47619048
## Accuracy...13 4 nb 0.6311475 -0.175588865 0.05263158 0.03571429
## Accuracy...23 4 dt 0.6393443 0.258972943 0.36206897 0.75000000
## Accuracy...33 4 knn 0.5737705 0.055390113 0.26000000 0.46428571
## Accuracy...43 4 svm 0.3852459 0.042285954 0.24731183 0.82142857
## Accuracy...53 4 rf 0.5000000 0.000000000 0.22950820 0.50000000
## Accuracy...14 5 nb 0.3863636 0.026229508 0.37209302 1.00000000
## Accuracy...24 5 dt 0.5000000 0.000000000 0.36363636 0.50000000
## Accuracy...34 5 knn 0.5227273 -0.017621145 0.35294118 0.37500000
## Accuracy...44 5 svm 0.4545455 0.070422535 0.38888889 0.87500000
## Accuracy...54 5 rf 0.5000000 -0.052173913 0.33333333 0.37500000
## Accuracy...15 6 nb 0.4878049 -0.108821636 0.52054795 0.84444444
## Accuracy...25 6 dt 0.7195122 0.421117250 0.70370370 0.84444444
## Accuracy...35 6 knn 0.6341463 0.261261261 0.66666667 0.66666667
## Accuracy...45 6 svm 0.5853659 0.097734628 0.57333333 0.95555556
## Accuracy...55 6 rf 0.7804878 0.565114909 0.86486486 0.71111111
## Accuracy...16 7 nb 0.6444444 0.037433155 0.76470588 0.76470588
## Accuracy...26 7 dt 0.5222222 0.105409154 0.82051282 0.47058824
## Accuracy...36 7 knn 0.5111111 0.148020654 0.87500000 0.41176471
## Accuracy...46 7 svm 0.6555556 0.260731320 0.86274510 0.64705882
## Accuracy...56 7 rf 0.5333333 0.202531646 0.93333333 0.41176471
## Accuracy...17 8 nb 0.1843972 0.008438819 0.16666667 1.00000000
## Accuracy...27 8 dt 0.4397163 0.055456627 0.18888889 0.73913043
## Accuracy...37 8 knn 0.3829787 -0.007722008 0.15957447 0.65217391
## Accuracy...47 8 svm 0.2482270 -0.001205788 0.16260163 0.86956522
## Accuracy...57 8 rf 0.5531915 0.151008315 0.23684211 0.78260870
## Accuracy...18 9 nb 0.2127660 -0.023543261 0.18181818 0.88888889
## Accuracy...28 9 dt 0.5319149 -0.001937984 0.19047619 0.44444444
## Accuracy...38 9 knn 0.4468085 0.114492754 0.24242424 0.88888889
## Accuracy...48 9 svm 0.2127660 0.010244735 0.19565217 1.00000000
## Accuracy...58 9 rf 0.5531915 -0.044444444 0.16666667 0.33333333
## Accuracy...19 10 nb 0.4563758 -0.168231536 0.51304348 0.70238095
## Accuracy...29 10 dt 0.5503356 0.096806297 0.61038961 0.55952381
## Accuracy...39 10 knn 0.5436242 0.065658429 0.59090909 0.61904762
## Accuracy...49 10 svm 0.5302013 0.010436433 0.56730769 0.70238095
## Accuracy...59 10 rf 0.5906040 0.210270223 0.71698113 0.45238095
## F1_Score
## Accuracy...1 0.31718062
## Accuracy...2 0.28205128
## Accuracy...3 0.29787234
## Accuracy...4 0.32500000
## Accuracy...5 0.38095238
## Accuracy...11 0.30687831
## Accuracy...21 0.36781609
## Accuracy...31 0.25742574
## Accuracy...41 0.32592593
## Accuracy...51 0.42592593
## Accuracy...12 0.25165563
## Accuracy...22 0.27272727
## Accuracy...32 0.16666667
## Accuracy...42 0.30000000
## Accuracy...52 0.28169014
## Accuracy...13 0.04255319
## Accuracy...23 0.48837209
## Accuracy...33 0.33333333
## Accuracy...43 0.38016529
## Accuracy...53 0.31460674
## Accuracy...14 0.54237288
## Accuracy...24 0.42105263
## Accuracy...34 0.36363636
## Accuracy...44 0.53846154
## Accuracy...54 0.35294118
## Accuracy...15 0.64406780
## Accuracy...25 0.76767677
## Accuracy...35 0.66666667
## Accuracy...45 0.71666667
## Accuracy...55 0.78048780
## Accuracy...16 0.76470588
## Accuracy...26 0.59813084
## Accuracy...36 0.56000000
## Accuracy...46 0.73949580
## Accuracy...56 0.57142857
## Accuracy...17 0.28571429
## Accuracy...27 0.30088496
## Accuracy...37 0.25641026
## Accuracy...47 0.27397260
## Accuracy...57 0.36363636
## Accuracy...18 0.30188679
## Accuracy...28 0.26666667
## Accuracy...38 0.38095238
## Accuracy...48 0.32727273
## Accuracy...58 0.22222222
## Accuracy...19 0.59296482
## Accuracy...29 0.58385093
## Accuracy...39 0.60465116
## Accuracy...49 0.62765957
## Accuracy...59 0.55474453
##
## === RATA-RATA METRIK PER MODEL (SPATIAL K-FOLD BIASA) ===
hasil_summary <- hasil_all %>%
group_by(Model) %>%
summarise(across(Accuracy:F1_Score, mean), .groups = "drop")
print(hasil_summary)
## # A tibble: 5 × 6
## Model Accuracy Kappa Precision Recall F1_Score
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 dt 0.595 0.129 0.401 0.550 0.435
## 2 knn 0.541 0.0640 0.370 0.512 0.389
## 3 nb 0.363 -0.0443 0.309 0.797 0.405
## 4 rf 0.608 0.160 0.445 0.504 0.425
## 5 svm 0.466 0.0754 0.370 0.773 0.455
# ===============================================
# STRATIFIED SPATIAL K-FOLD + SMOTE + ALL METRICS
# ===============================================
library(dplyr)
library(purrr)
##
## Attaching package: 'purrr'
## The following object is masked from 'package:caret':
##
## lift
library(themis)
library(recipes)
library(e1071)
library(rpart)
library(ranger)
library(caret)
library(yardstick)
library(readxl)
set.seed(42)
# 1. Load dan siapkan data
desa_df <- read_excel("C:/Users/User/Downloads/Pangan.xlsx")
desa_df$Y <- factor(
desa_df$Y,
levels = 1:6,
labels = c("Sangat Rentan", "Rentan", "Agak Rentan", "Agak Tahan", "Tahan", "Sangat Tahan")
)
desa_df$Y <- ifelse(desa_df$Y %in% c("Sangat Rentan", "Rentan", "Agak Rentan","Agak Tahan"), "Rentan", "Tahan")
desa_df$Y <- as.factor(desa_df$Y)
# 2. Stratified spatial folds
cluster_per_kelas <- function(df_kelas, n_folds = 5) {
coords <- df_kelas[, c("lon", "lat")]
km <- kmeans(coords, centers = n_folds)
df_kelas$spatial_fold <- km$cluster
return(df_kelas)
}
desa_df_strat <- desa_df %>%
split(.$Y) %>%
map_dfr(~cluster_per_kelas(.x, 5))
folds_index <- lapply(1:5, function(k) {
which(desa_df_strat$spatial_fold == k)
})
form <- Y ~ . -lon -lat -spatial_fold
# 3. Loop CV semua model
results_all <- list()
for (i in 1:5) {
cat("\n=== STRATIFIED Fold", i, "===\n")
train_data <- desa_df_strat[-folds_index[[i]], ]
test_data <- desa_df_strat[ folds_index[[i]], ]
rec <- recipe(Y ~ ., data = train_data) %>%
step_smote(Y) %>%
prep()
train_data_smote <- bake(rec, new_data = NULL)
models <- list()
preds <- list()
metrics <- list()
# ============ MODEL 1: NAIVE BAYES
models$nb <- naiveBayes(form, data = train_data_smote)
preds$nb <- predict(models$nb, test_data)
# ============ MODEL 2: DECISION TREE
models$dt <- rpart(form, data = train_data_smote)
preds$dt <- predict(models$dt, test_data, type = "class")
# ============ MODEL 3: KNN
models$knn <- train(form, data = train_data_smote, method = "knn",
trControl = trainControl(method = "none"),
tuneGrid = data.frame(k = 5))
preds$knn <- predict(models$knn, test_data)
# ============ MODEL 4: SVM
models$svm <- svm(form, data = train_data_smote)
preds$svm <- predict(models$svm, test_data)
# ============ MODEL 5: RANDOM FOREST
models$rf <- ranger(formula = form, data = train_data_smote, num.trees = 500)
preds$rf <- predict(models$rf, data = test_data)$predictions
# Hitung metrik semua model
for (model_name in names(preds)) {
pred <- preds[[model_name]]
obs <- test_data$Y
eval_df <- data.frame(obs = obs, pred = pred)
cm <- confusionMatrix(pred, obs, positive = "Rentan")
acc <- cm$overall["Accuracy"]
kappa <- cm$overall["Kappa"]
prec <- precision(eval_df, truth = obs, estimate = pred)$.estimate
rec <- recall(eval_df, truth = obs, estimate = pred)$.estimate
f1 <- f_meas(eval_df, truth = obs, estimate = pred)$.estimate
row <- data.frame(
Fold = i,
Model = model_name,
Accuracy = acc,
Kappa = kappa,
Precision = prec,
Recall = rec,
F1_Score = f1
)
metrics[[model_name]] <- row
}
results_all[[i]] <- bind_rows(metrics)
}
##
## === STRATIFIED Fold 1 ===
##
## === STRATIFIED Fold 2 ===
##
## === STRATIFIED Fold 3 ===
##
## === STRATIFIED Fold 4 ===
##
## === STRATIFIED Fold 5 ===
# Gabungkan hasil semua fold
hasil_all <- do.call(rbind, results_all)
# Output hasil per model per fold
print(hasil_all)
## Fold Model Accuracy Kappa Precision Recall F1_Score
## Accuracy...1 1 nb 0.4230769 0.051383399 0.3917526 0.9743590 0.5588235
## Accuracy...2 1 dt 0.5096154 0.081081081 0.4090909 0.6923077 0.5142857
## Accuracy...3 1 knn 0.5192308 0.024390244 0.3877551 0.4871795 0.4318182
## Accuracy...4 1 svm 0.4807692 0.103734440 0.4117647 0.8974359 0.5645161
## Accuracy...5 1 rf 0.5000000 -0.004830918 0.3725490 0.4871795 0.4222222
## Accuracy...11 2 nb 0.2067308 -0.119373777 0.1573034 0.6511628 0.2533937
## Accuracy...21 2 dt 0.4182692 0.041876047 0.2253521 0.7441860 0.3459459
## Accuracy...31 2 knn 0.4182692 -0.004790802 0.2045455 0.6279070 0.3085714
## Accuracy...41 2 svm 0.2740385 0.014805521 0.2127660 0.9302326 0.3463203
## Accuracy...51 2 rf 0.5096154 0.013668061 0.2135922 0.5116279 0.3013699
## Accuracy...12 3 nb 0.8168498 0.145165331 0.3200000 0.1951220 0.2424242
## Accuracy...22 3 dt 0.5677656 0.017146693 0.1592920 0.4390244 0.2337662
## Accuracy...32 3 knn 0.5970696 0.094336208 0.2000000 0.5609756 0.2948718
## Accuracy...42 3 svm 0.6886447 -0.041937946 0.1206897 0.1707317 0.1414141
## Accuracy...52 3 rf 0.5494505 0.015653856 0.1583333 0.4634146 0.2360248
## Accuracy...13 4 nb 0.5384615 0.142087542 0.4609375 0.7972973 0.5841584
## Accuracy...23 4 dt 0.6538462 0.262257110 0.5901639 0.4864865 0.5333333
## Accuracy...33 4 knn 0.5769231 0.106135987 0.4769231 0.4189189 0.4460432
## Accuracy...43 4 svm 0.5219780 0.072516401 0.4386792 0.6283784 0.5166667
## Accuracy...53 4 rf 0.6153846 0.169383231 0.5363636 0.3986486 0.4573643
## Accuracy...14 5 nb 0.4069264 -0.027933868 0.3981043 0.8936170 0.5508197
## Accuracy...24 5 dt 0.6017316 0.177617831 0.5104167 0.5212766 0.5157895
## Accuracy...34 5 knn 0.5454545 0.081284800 0.4495413 0.5212766 0.4827586
## Accuracy...44 5 svm 0.4978355 0.065690377 0.4320988 0.7446809 0.5468750
## Accuracy...54 5 rf 0.5844156 0.124378109 0.4880952 0.4361702 0.4606742
##
## === RATA-RATA METRIK PER MODEL ===
hasil_summary <- hasil_all %>%
group_by(Model) %>%
summarise(across(Accuracy:F1_Score, mean), .groups = "drop")
print(hasil_summary)
## # A tibble: 5 × 6
## Model Accuracy Kappa Precision Recall F1_Score
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 dt 0.550 0.116 0.379 0.577 0.429
## 2 knn 0.531 0.0603 0.344 0.523 0.393
## 3 nb 0.478 0.0383 0.346 0.702 0.438
## 4 rf 0.552 0.0637 0.354 0.459 0.376
## 5 svm 0.493 0.0430 0.323 0.674 0.423
# =====================================================
# RANDOM FOREST - K-FOLD + SMOTE + ALL EVALUATION METRICS
# =====================================================
library(readxl)
library(dplyr)
library(caret)
library(themis)
library(recipes)
library(randomForest)
## randomForest 4.7-1.2
## Type rfNews() to see new features/changes/bug fixes.
##
## Attaching package: 'randomForest'
## The following object is masked from 'package:ggplot2':
##
## margin
## The following object is masked from 'package:ranger':
##
## importance
## The following object is masked from 'package:dplyr':
##
## combine
## Type 'citation("pROC")' for a citation.
##
## Attaching package: 'pROC'
## The following objects are masked from 'package:stats':
##
## cov, smooth, var
library(ggplot2)
library(tibble)
set.seed(42)
# === 1. Load dan relabel target
desa_df <- read_excel("C:/Users/User/Downloads/Pangan.xlsx")
desa_df$Y <- factor(
desa_df$Y,
levels = 1:6,
labels = c("Sangat Rentan", "Rentan", "Agak Rentan", "Agak Tahan", "Tahan", "Sangat Tahan")
)
desa_df$Y <- ifelse(desa_df$Y %in% c("Sangat Rentan", "Rentan", "Agak Rentan", "Agak Tahan"), "Rentan", "Tahan")
desa_df$Y <- as.factor(desa_df$Y)
# === 2. Setup K-Fold CV
k <- 10
folds <- createFolds(desa_df$Y, k = k, returnTrain = FALSE)
form <- Y ~ . -lon -lat
# === 3. Inisialisasi hasil
results_all <- list()
roc_plot <- TRUE # Aktifkan untuk plot ROC
colors <- rainbow(k)
# === 4. Loop per Fold
for (i in 1:k) {
cat("\n=== Fold", i, "===\n")
test_idx <- folds[[i]]
train_data <- desa_df[-test_idx, ]
test_data <- desa_df[ test_idx, ]
# === SMOTE
rec <- recipe(Y ~ ., data = train_data) %>%
step_smote(Y) %>%
prep()
train_balanced <- bake(rec, new_data = NULL)
# === Train Random Forest
rf_model <- randomForest(formula = form,
data = train_balanced,
ntree = 500,
importance = TRUE)
# === Predict kelas dan probabilitas
rf_pred <- predict(rf_model, newdata = test_data)
rf_prob <- predict(rf_model, newdata = test_data, type = "prob")
eval_df <- data.frame(obs = test_data$Y, pred = rf_pred)
# === Metrik Evaluasi
acc <- accuracy(eval_df, truth = obs, estimate = pred)[[3]]
kap <- kap(eval_df, truth = obs, estimate = pred)[[3]]
prec <- precision(eval_df, truth = obs, estimate = pred)[[3]]
rec <- recall(eval_df, truth = obs, estimate = pred)[[3]]
f1 <- f_meas(eval_df, truth = obs, estimate = pred)[[3]]
# === ROC & AUC
roc_obj <- roc(response = test_data$Y,
predictor = rf_prob[, "Rentan"],
levels = c("Tahan", "Rentan"))
auc_val <- as.numeric(auc(roc_obj))
# === ROC Curve plot
if (roc_plot) {
if (i == 1) {
plot(roc_obj, col = colors[i], lwd = 2, main = "ROC Curve per Fold")
} else {
plot(roc_obj, col = colors[i], lwd = 2, add = TRUE)
}
}
# === Simpan hasil fold ke list
results_all[[i]] <- tibble(
Fold = i,
Model = "rf",
Accuracy = acc,
Kappa = kap,
Precision = prec,
Recall = rec,
F1_Score = f1,
AUC = auc_val
)
if (i == 1) {
rf_model_first <- rf_model # untuk varImpPlot
}
}
##
## === Fold 1 ===
## Setting direction: controls < cases
##
## === Fold 2 ===
## Setting direction: controls < cases
##
## === Fold 3 ===
## Setting direction: controls < cases
##
## === Fold 4 ===
## Setting direction: controls < cases
##
## === Fold 5 ===
## Setting direction: controls < cases
##
## === Fold 6 ===
## Setting direction: controls < cases
##
## === Fold 7 ===
## Setting direction: controls < cases
##
## === Fold 8 ===
## Setting direction: controls < cases
##
## === Fold 9 ===
## Setting direction: controls < cases
##
## === Fold 10 ===
## Setting direction: controls < cases
# === 5. Gabungkan dan tampilkan hasil semua fold
rf_kfold_smote <- bind_rows(results_all)
cat("\n=== HASIL PER FOLD (RF) ===\n")
##
## === HASIL PER FOLD (RF) ===
## # A tibble: 10 × 8
## Fold Model Accuracy Kappa Precision Recall F1_Score AUC
## <int> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 rf 0.605 0.181 0.407 0.595 0.484 0.613
## 2 2 rf 0.695 0.349 0.510 0.676 0.581 0.776
## 3 3 rf 0.607 0.118 0.381 0.444 0.410 0.616
## 4 4 rf 0.686 0.309 0.489 0.611 0.543 0.747
## 5 5 rf 0.605 0.0983 0.375 0.405 0.390 0.549
## 6 6 rf 0.590 0.119 0.375 0.5 0.429 0.600
## 7 7 rf 0.627 0.147 0.4 0.444 0.421 0.643
## 8 8 rf 0.630 0.174 0.419 0.486 0.45 0.689
## 9 9 rf 0.632 0.157 0.410 0.444 0.427 0.635
## 10 10 rf 0.593 0.143 0.392 0.541 0.455 0.644
##
## === RATA-RATA METRIK (RF) ===
## # A tibble: 1 × 6
## Accuracy Kappa Precision Recall F1_Score AUC
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.627 0.180 0.416 0.515 0.459 0.651
# === 6. Plot Variable Importance dari RF
varImpPlot(rf_model_first, main = "Random Forest Variable Importance (Fold 1)")
# ===================================================
# DECISION TREE (rpart) - K-FOLD + SMOTE + VISUALISASI
# ===================================================
library(readxl)
library(dplyr)
library(caret)
library(themis)
library(recipes)
library(rpart)
library(rpart.plot)
library(yardstick)
library(pROC)
library(ggplot2)
library(tibble)
set.seed(42)
# 1. Load & relabel data
desa_df <- read_excel("C:/Users/User/Downloads/Pangan.xlsx")
desa_df$Y <- factor(
desa_df$Y,
levels = 1:6,
labels = c("Sangat Rentan", "Rentan", "Agak Rentan", "Agak Tahan", "Tahan", "Sangat Tahan")
)
desa_df$Y <- ifelse(desa_df$Y %in% c("Sangat Renten","Rentan","Agak Rentan","Agak Tahan"), "Rentan", "Tahan")
desa_df$Y <- factor(desa_df$Y, levels = c("Tahan","Rentan"))
# 2. Siapkan K‑Fold
k <- 10
folds <- createFolds(desa_df$Y, k = k, returnTrain = FALSE)
form <- Y ~ . -lon -lat
results_all <- list()
colors <- rainbow(k)
# 3. Loop tiap fold
for (i in seq_along(folds)) {
test_idx <- folds[[i]]
train_data <- desa_df[-test_idx, ]
test_data <- desa_df[test_idx, ]
# 3.a Apply SMOTE
rec <- recipe(Y ~ ., data = train_data) %>%
step_smote(Y) %>%
prep()
train_balanced <- bake(rec, new_data = NULL)
# 3.b Train Decision Tree
dt_model <- rpart(formula = form,
data = train_balanced,
method = "class",
control = rpart.control(cp = 0.01))
# 3.c Predict
dt_pred <- predict(dt_model, newdata = test_data, type = "class")
dt_prob <- predict(dt_model, newdata = test_data, type = "prob")[, "Rentan"]
eval_df <- tibble(obs = test_data$Y, pred = dt_pred)
# 3.d Metrics
acc <- accuracy(eval_df, truth = obs, estimate = pred)[[3]]
kap <- kap(eval_df, truth = obs, estimate = pred)[[3]]
prec <- precision(eval_df, truth = obs, estimate = pred)[[3]]
rec <- recall(eval_df, truth = obs, estimate = pred)[[3]]
f1 <- f_meas(eval_df, truth = obs, estimate = pred)[[3]]
# 3.e ROC & AUC
roc_obj <- roc(response = test_data$Y, predictor = dt_prob,
levels = c("Tahan","Rentan"))
auc_val <- as.numeric(auc(roc_obj))
plot(roc_obj, col = colors[i], lwd = 2,
main = "ROC Curve per Fold (Decision Tree)",
add = i != 1)
# 3.f Simpan hasil dan model fold 1
results_all[[i]] <- tibble(
Fold = i,
Model = "dt",
Accuracy = acc,
Kappa = kap,
Precision = prec,
Recall = rec,
F1_Score = f1,
AUC = auc_val
)
if (i == 3) {
dt_first_model <- dt_model
}
}
## Setting direction: controls < cases
## Setting direction: controls < cases
## Setting direction: controls < cases
## Setting direction: controls < cases
## Setting direction: controls < cases
## Setting direction: controls < cases
## Setting direction: controls < cases
## Setting direction: controls < cases
## Setting direction: controls < cases
## Setting direction: controls < cases
# 4. Hasil akhir
dt_kfold_smote <- bind_rows(results_all)
cat("\n=== Decision Tree – Hasil tiap fold ===\n")
##
## === Decision Tree – Hasil tiap fold ===
## # A tibble: 10 × 8
## Fold Model Accuracy Kappa Precision Recall F1_Score AUC
## <int> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 dt 0.538 0.123 0.765 0.476 0.586 0.605
## 2 2 dt 0.661 0.295 0.815 0.654 0.726 0.702
## 3 3 dt 0.718 0.407 0.864 0.704 0.776 0.713
## 4 4 dt 0.542 0.114 0.759 0.5 0.603 0.565
## 5 5 dt 0.613 0.148 0.743 0.671 0.705 0.569
## 6 6 dt 0.564 0.120 0.75 0.556 0.638 0.579
## 7 7 dt 0.610 0.205 0.790 0.598 0.681 0.632
## 8 8 dt 0.681 0.325 0.824 0.683 0.747 0.691
## 9 9 dt 0.521 0.0396 0.712 0.519 0.6 0.576
## 10 10 dt 0.602 0.155 0.75 0.630 0.685 0.594
##
## === Rata-rata metrik ===
## # A tibble: 1 × 6
## Accuracy Kappa Precision Recall F1_Score AUC
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.605 0.193 0.777 0.599 0.675 0.622
# 5. Plot variable importance
# Fix variable importance data frame
varImp_dt <- data.frame(
Importance = dt_first_model$variable.importance,
Feature = names(dt_first_model$variable.importance)
)
ggplot(varImp_dt, aes(x = reorder(Feature, Importance), y = Importance)) +
geom_col(fill = "coral") +
coord_flip() +
labs(
title = "Decision Tree - Variable Importance",
x = "Feature", y = "Importance"
)
# 6. Tampilkan pohon
rpart.plot(dt_first_model, main = "Decision Tree (Fold 1)", type = 4, extra = 101)
# ====================================================
# KNN (caret) - 10-Fold Cross Validation + SMOTE
# Tanpa kolom lon dan lat
# ====================================================
library(readxl)
library(dplyr)
library(caret)
library(themis)
library(recipes)
library(yardstick)
library(pROC)
library(ggplot2)
library(tibble)
set.seed(42)
# === 1. Load data & ubah target
desa_df <- read_excel("C:/Users/User/Downloads/Pangan.xlsx")
# Relabel target
desa_df$Y <- factor(
desa_df$Y,
levels = 1:6,
labels = c("Sangat Rentan", "Rentan", "Agak Rentan", "Agak Tahan", "Tahan", "Sangat Tahan")
)
desa_df$Y <- ifelse(desa_df$Y %in% c("Sangat Rentan", "Rentan", "Agak Rentan", "Agak Tahan"), "Rentan", "Tahan")
desa_df$Y <- factor(desa_df$Y, levels = c("Tahan", "Rentan"))
# Hapus kolom lon dan lat
desa_df <- desa_df %>% dplyr::select(-lon, -lat)
# === 2. Setup K-Fold
k <- 10
folds <- createFolds(desa_df$Y, k = k, returnTrain = FALSE)
form <- Y ~ .
results_all <- list()
colors <- rainbow(k)
# === 3. Loop setiap fold
for (i in seq_along(folds)) {
cat("\n=== Fold", i, "===\n")
test_idx <- folds[[i]]
train_data <- desa_df[-test_idx, ]
test_data <- desa_df[ test_idx, ]
# === 3a. SMOTE
rec <- recipe(form, data = train_data) %>%
step_smote(Y) %>%
prep()
train_balanced <- bake(rec, new_data = NULL)
# === 3b. Train KNN (k = 5)
knn_model <- train(
form, data = train_balanced,
method = "knn",
trControl = trainControl(method = "none"),
tuneGrid = data.frame(k = 5)
)
# === 3c. Predict
knn_pred <- predict(knn_model, test_data)
knn_prob <- predict(knn_model, test_data, type = "prob")[, "Rentan"]
eval_df <- tibble(obs = test_data$Y, pred = knn_pred)
# === 3d. Metrics
acc <- accuracy(eval_df, truth = obs, estimate = pred)[[3]]
kap <- kap(eval_df, truth = obs, estimate = pred)[[3]]
prec <- precision(eval_df, truth = obs, estimate = pred)[[3]]
rec <- recall(eval_df, truth = obs, estimate = pred)[[3]]
f1 <- f_meas(eval_df, truth = obs, estimate = pred)[[3]]
# === 3e. ROC + AUC
roc_obj <- roc(response = test_data$Y, predictor = knn_prob, levels = c("Tahan", "Rentan"))
auc_val <- as.numeric(auc(roc_obj))
# Plot ROC
if (i == 1) {
plot(roc_obj, col = colors[i], lwd = 2, main = "ROC Curve per Fold (KNN)")
} else {
plot(roc_obj, col = colors[i], lwd = 2, add = TRUE)
}
# Simpan hasil
results_all[[i]] <- tibble(
Fold = i,
Model = "knn",
Accuracy = acc,
Kappa = kap,
Precision = prec,
Recall = rec,
F1_Score = f1,
AUC = auc_val
)
}
##
## === Fold 1 ===
## Setting direction: controls < cases
##
## === Fold 2 ===
## Setting direction: controls < cases
##
## === Fold 3 ===
## Setting direction: controls < cases
##
## === Fold 4 ===
## Setting direction: controls < cases
##
## === Fold 5 ===
## Setting direction: controls < cases
##
## === Fold 6 ===
## Setting direction: controls < cases
##
## === Fold 7 ===
## Setting direction: controls < cases
##
## === Fold 8 ===
## Setting direction: controls < cases
##
## === Fold 9 ===
## Setting direction: controls < cases
##
## === Fold 10 ===
## Setting direction: controls < cases
# === 4. Gabungkan dan tampilkan hasil
knn_kfold_smote <- bind_rows(results_all)
cat("\n=== Hasil per Fold (KNN) ===\n")
##
## === Hasil per Fold (KNN) ===
## # A tibble: 10 × 8
## Fold Model Accuracy Kappa Precision Recall F1_Score AUC
## <int> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 knn 0.555 0.123 0.754 0.524 0.619 0.578
## 2 2 knn 0.551 0.0843 0.726 0.556 0.629 0.551
## 3 3 knn 0.624 0.169 0.753 0.679 0.714 0.641
## 4 4 knn 0.627 0.229 0.797 0.622 0.699 0.636
## 5 5 knn 0.597 0.136 0.743 0.634 0.684 0.585
## 6 6 knn 0.556 0.0315 0.704 0.617 0.658 0.595
## 7 7 knn 0.551 -0.00450 0.693 0.634 0.662 0.495
## 8 8 knn 0.613 0.183 0.765 0.634 0.693 0.609
## 9 9 knn 0.564 0.0569 0.714 0.617 0.662 0.516
## 10 10 knn 0.5 0.0306 0.704 0.469 0.563 0.598
##
## === Rata-rata Metrik KNN ===
## # A tibble: 1 × 6
## Accuracy Kappa Precision Recall F1_Score AUC
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.574 0.104 0.735 0.599 0.658 0.580
# ====================================================
# SVM - 10-Fold Cross Validation + SMOTE
# ====================================================
library(readxl)
library(dplyr)
library(caret)
library(themis)
library(recipes)
library(e1071)
library(yardstick)
library(pROC)
library(ggplot2)
library(tibble)
set.seed(42)
# === 1. Load data & ubah target
desa_df <- read_excel("C:/Users/User/Downloads/Pangan.xlsx")
# Relabel target
desa_df$Y <- factor(
desa_df$Y,
levels = 1:6,
labels = c("Sangat Rentan", "Rentan", "Agak Rentan", "Agak Tahan", "Tahan", "Sangat Tahan")
)
desa_df$Y <- ifelse(desa_df$Y %in% c("Sangat Rentan", "Rentan", "Agak Rentan", "Agak Tahan"), "Rentan", "Tahan")
desa_df$Y <- factor(desa_df$Y, levels = c("Tahan", "Rentan"))
# Hapus kolom lon dan lat
desa_df <- desa_df %>% dplyr::select(-lon, -lat)
# === 2. Setup 10-Fold
k <- 10
folds <- createFolds(desa_df$Y, k = k, returnTrain = FALSE)
form <- Y ~ .
results_all <- list()
colors <- rainbow(k)
# === 3. Loop tiap fold
for (i in seq_along(folds)) {
cat("\n=== Fold", i, "===\n")
test_idx <- folds[[i]]
train_data <- desa_df[-test_idx, ]
test_data <- desa_df[test_idx, ]
# SMOTE
rec <- recipe(form, data = train_data) %>%
step_smote(Y) %>%
prep()
train_balanced <- bake(rec, new_data = NULL)
# Train SVM (default: radial kernel)
svm_model <- svm(form, data = train_balanced, probability = TRUE)
# Predict
svm_pred <- predict(svm_model, test_data)
svm_prob <- attr(predict(svm_model, test_data, probability = TRUE), "probabilities")[, "Rentan"]
eval_df <- tibble(obs = test_data$Y, pred = svm_pred)
# Metrics
acc <- accuracy(eval_df, truth = obs, estimate = pred)[[3]]
kap <- kap(eval_df, truth = obs, estimate = pred)[[3]]
prec <- precision(eval_df, truth = obs, estimate = pred)[[3]]
rec <- recall(eval_df, truth = obs, estimate = pred)[[3]]
f1 <- f_meas(eval_df, truth = obs, estimate = pred)[[3]]
# ROC & AUC
roc_obj <- roc(response = test_data$Y, predictor = svm_prob, levels = c("Tahan", "Rentan"))
auc_val <- as.numeric(auc(roc_obj))
# ROC plot
if (i == 1) {
plot(roc_obj, col = colors[i], lwd = 2, main = "ROC Curve per Fold (SVM)")
} else {
plot(roc_obj, col = colors[i], lwd = 2, add = TRUE)
}
# Simpan hasil
results_all[[i]] <- tibble(
Fold = i,
Model = "svm",
Accuracy = acc,
Kappa = kap,
Precision = prec,
Recall = rec,
F1_Score = f1,
AUC = auc_val
)
}
##
## === Fold 1 ===
## Setting direction: controls < cases
##
## === Fold 2 ===
## Setting direction: controls < cases
##
## === Fold 3 ===
## Setting direction: controls < cases
##
## === Fold 4 ===
## Setting direction: controls < cases
##
## === Fold 5 ===
## Setting direction: controls < cases
##
## === Fold 6 ===
## Setting direction: controls < cases
##
## === Fold 7 ===
## Setting direction: controls < cases
##
## === Fold 8 ===
## Setting direction: controls < cases
##
## === Fold 9 ===
## Setting direction: controls < cases
##
## === Fold 10 ===
## Setting direction: controls < cases
# === 4. Gabungkan hasil
svm_kfold_smote <- bind_rows(results_all)
cat("\n=== Hasil per Fold (SVM) ===\n")
##
## === Hasil per Fold (SVM) ===
## # A tibble: 10 × 8
## Fold Model Accuracy Kappa Precision Recall F1_Score AUC
## <int> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 svm 0.403 0.0413 0.762 0.195 0.311 0.567
## 2 2 svm 0.534 0.177 0.842 0.395 0.538 0.687
## 3 3 svm 0.538 0.172 0.829 0.420 0.557 0.669
## 4 4 svm 0.576 0.264 0.944 0.415 0.576 0.711
## 5 5 svm 0.471 0.0632 0.744 0.354 0.479 0.603
## 6 6 svm 0.462 0.0725 0.765 0.321 0.452 0.603
## 7 7 svm 0.458 0.0362 0.725 0.354 0.475 0.586
## 8 8 svm 0.529 0.143 0.795 0.427 0.556 0.634
## 9 9 svm 0.462 0.0510 0.737 0.346 0.471 0.585
## 10 10 svm 0.466 0.0247 0.705 0.383 0.496 0.552
##
## === Rata-rata Metrik SVM ===
## # A tibble: 1 × 6
## Accuracy Kappa Precision Recall F1_Score AUC
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.490 0.105 0.785 0.361 0.491 0.620
# ==========================================================
# Naive Bayes (klaR::NaiveBayes) + Spatial K-Fold + SMOTE
# ==========================================================
library(readxl)
library(dplyr)
library(caret)
library(CAST) # untuk CreateSpacetimeFolds
library(themis)
library(recipes)
library(klaR) # NaiveBayes
library(yardstick)
library(pROC)
library(tibble)
library(ggplot2)
set.seed(42)
# === 1. Load & siapkan data
desa_df <- read_excel("C:/Users/User/Downloads/Pangan.xlsx")
# Relabel target jadi biner
desa_df$Y <- factor(
desa_df$Y,
levels = 1:6,
labels = c("Sangat Rentan", "Rentan", "Agak Rentan", "Agak Tahan", "Tahan", "Sangat Tahan")
)
desa_df$Y <- ifelse(desa_df$Y %in% c("Sangat Rentan", "Rentan", "Agak Rentan", "Agak Tahan"), "Rentan", "Tahan")
desa_df$Y <- factor(desa_df$Y, levels = c("Tahan", "Rentan"))
# === 2. Spatial clustering dan k-fold CV
k <- 5
desa_df$spatial_cluster <- kmeans(desa_df[, c("lon", "lat")], centers = k)$cluster
spatial_folds <- CreateSpacetimeFolds(desa_df, spacevar = "spatial_cluster", k = k)
folds_index <- spatial_folds$indexOut
# === 3. Siapkan formula dan hapus kolom spatial
form <- Y ~ . -lon -lat -spatial_cluster
results_all <- list()
colors <- rainbow(k)
# === 4. Loop tiap fold
for (i in seq_along(folds_index)) {
cat("\n=== Fold", i, "===\n")
test_idx <- folds_index[[i]]
train_data <- desa_df[-test_idx, ]
test_data <- desa_df[ test_idx, ]
# === SMOTE
rec <- recipe(Y ~ ., data = train_data) %>%
step_rm(lon, lat, spatial_cluster) %>%
step_smote(Y) %>%
prep()
train_balanced <- bake(rec, new_data = NULL)
test_data_clean <- test_data %>% dplyr::select(-lon, -lat, -spatial_cluster)
# === Train Naive Bayes (klaR)
nb_model <- NaiveBayes(Y ~ ., data = train_balanced)
nb_pred <- predict(nb_model, test_data_clean)$class
nb_prob <- predict(nb_model, test_data_clean)$posterior[, "Rentan"]
eval_df <- tibble(obs = test_data_clean$Y, pred = nb_pred)
# === Metrik
acc <- accuracy(eval_df, truth = obs, estimate = pred)[[3]]
kap <- kap(eval_df, truth = obs, estimate = pred)[[3]]
prec <- precision(eval_df, truth = obs, estimate = pred)[[3]]
rec <- recall(eval_df, truth = obs, estimate = pred)[[3]]
f1 <- f_meas(eval_df, truth = obs, estimate = pred)[[3]]
# === ROC & AUC
roc_obj <- roc(response = test_data_clean$Y, predictor = nb_prob, levels = c("Tahan", "Rentan"))
auc_val <- as.numeric(auc(roc_obj))
# === Plot ROC
if (i == 1) {
plot(roc_obj, col = colors[i], lwd = 2, main = "ROC Curve per Fold (Naive Bayes)")
} else {
plot(roc_obj, col = colors[i], lwd = 2, add = TRUE)
}
# === Simpan hasil
results_all[[i]] <- tibble(
Fold = i,
Model = "nb",
Accuracy = acc,
Kappa = kap,
Precision = prec,
Recall = rec,
F1_Score = f1,
AUC = auc_val
)
}
##
## === Fold 1 ===
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 41
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 51
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 96
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 152
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 173
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 174
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 175
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 176
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 243
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 339
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 41
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 51
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 96
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 152
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 173
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 174
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 175
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 176
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 243
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 339
## Setting direction: controls < cases
##
## === Fold 2 ===
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 9
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 73
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 96
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 176
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 177
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 179
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 224
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 230
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 9
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 73
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 96
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 176
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 177
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 179
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 224
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 230
## Setting direction: controls > cases
##
## === Fold 3 ===
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 4
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 29
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 31
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 44
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 45
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 61
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 76
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 96
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 102
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 106
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 4
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 29
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 31
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 44
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 45
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 61
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 76
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 96
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 102
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 106
## Setting direction: controls > cases
##
## === Fold 4 ===
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 121
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 121
## Setting direction: controls < cases
##
## === Fold 5 ===
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 16
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 18
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 34
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 35
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 36
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 48
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 73
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 83
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 89
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 90
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 129
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 16
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 18
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 34
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 35
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 36
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 48
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 73
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 83
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 89
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 90
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 129
## Setting direction: controls < cases
# === 5. Gabungkan dan tampilkan hasil
nb_spatial_kfold_smote <- bind_rows(results_all)
cat("\n=== Hasil per Fold (Naive Bayes - Spatial K-Fold + SMOTE) ===\n")
##
## === Hasil per Fold (Naive Bayes - Spatial K-Fold + SMOTE) ===
## # A tibble: 5 × 8
## Fold Model Accuracy Kappa Precision Recall F1_Score AUC
## <int> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 nb 0.232 -0.0200 0.588 0.0305 0.0580 0.594
## 2 2 nb 0.427 -0.0223 0.467 0.05 0.0903 0.541
## 3 3 nb 0.617 -0.163 0.692 0.846 0.761 0.597
## 4 4 nb 0.176 -0.00456 0.75 0.0197 0.0385 0.644
## 5 5 nb 0.456 -0.168 0.265 0.138 0.182 0.506
##
## === Rata-rata Metrik Naive Bayes ===
## # A tibble: 1 × 6
## Accuracy Kappa Precision Recall F1_Score AUC
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.381 -0.0756 0.552 0.217 0.226 0.577
# ====================================================
# Naive Bayes - 10-Fold Cross Validation + SMOTE
# ====================================================
library(readxl)
library(dplyr)
library(caret)
library(themis)
library(recipes)
library(klaR)
library(yardstick)
library(pROC)
library(tibble)
library(ggplot2)
set.seed(42)
# === 1. Load dan siapkan data
desa_df <- read_excel("C:/Users/User/Downloads/Pangan.xlsx")
# Relabel target jadi biner
desa_df$Y <- factor(
desa_df$Y,
levels = 1:6,
labels = c("Sangat Rentan", "Rentan", "Agak Rentan", "Agak Tahan", "Tahan", "Sangat Tahan")
)
desa_df$Y <- ifelse(desa_df$Y %in% c("Sangat Rentan", "Rentan", "Agak Rentan", "Agak Tahan"), "Rentan", "Tahan")
desa_df$Y <- factor(desa_df$Y, levels = c("Tahan", "Rentan"))
# Hapus kolom lon dan lat
desa_df <- desa_df %>% dplyr::select(-lon, -lat)
# === 2. Setup K-Fold
k <- 10
folds <- createFolds(desa_df$Y, k = k, returnTrain = FALSE)
form <- Y ~ .
results_all <- list()
colors <- rainbow(k)
# === 3. Loop setiap fold
for (i in seq_along(folds)) {
cat("\n=== Fold", i, "===\n")
test_idx <- folds[[i]]
train_data <- desa_df[-test_idx, ]
test_data <- desa_df[ test_idx, ]
# === 3a. SMOTE preprocessing
rec <- recipe(form, data = train_data) %>%
step_smote(Y) %>%
prep()
train_balanced <- bake(rec, new_data = NULL)
# === 3b. Train Naive Bayes
nb_model <- NaiveBayes(Y ~ ., data = train_balanced)
nb_pred <- predict(nb_model, test_data)$class
nb_prob <- predict(nb_model, test_data)$posterior[, "Rentan"]
eval_df <- tibble(obs = test_data$Y, pred = nb_pred)
# === 3c. Metrik
acc <- accuracy(eval_df, truth = obs, estimate = pred)[[3]]
kap <- kap(eval_df, truth = obs, estimate = pred)[[3]]
prec <- precision(eval_df, truth = obs, estimate = pred)[[3]]
rec <- recall(eval_df, truth = obs, estimate = pred)[[3]]
f1 <- f_meas(eval_df, truth = obs, estimate = pred)[[3]]
# === 3d. ROC & AUC
roc_obj <- roc(response = test_data$Y, predictor = nb_prob, levels = c("Tahan", "Rentan"))
auc_val <- as.numeric(auc(roc_obj))
# === 3e. Plot ROC
if (i == 1) {
plot(roc_obj, col = colors[i], lwd = 2, main = "ROC Curve per Fold (Naive Bayes)")
} else {
plot(roc_obj, col = colors[i], lwd = 2, add = TRUE)
}
# === 3f. Simpan hasil
results_all[[i]] <- tibble(
Fold = i,
Model = "nb",
Accuracy = acc,
Kappa = kap,
Precision = prec,
Recall = rec,
F1_Score = f1,
AUC = auc_val
)
}
##
## === Fold 1 ===
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 24
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 74
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 24
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 74
## Setting direction: controls < cases
##
## === Fold 2 ===
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 30
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 42
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 99
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 30
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 42
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 99
## Setting direction: controls < cases
##
## === Fold 3 ===
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 5
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 14
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 28
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 72
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 74
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 75
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 82
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 99
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 102
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 5
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 14
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 28
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 72
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 74
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 75
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 82
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 99
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 102
## Setting direction: controls > cases
##
## === Fold 4 ===
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 8
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 27
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 29
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 72
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 85
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 86
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 8
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 27
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 29
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 72
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 85
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 86
## Setting direction: controls < cases
##
## === Fold 5 ===
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 4
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 84
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 4
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 84
## Setting direction: controls < cases
##
## === Fold 6 ===
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 5
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 19
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 53
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 5
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 19
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 53
## Setting direction: controls > cases
##
## === Fold 7 ===
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 40
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 49
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 87
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 89
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 93
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 40
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 49
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 87
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 89
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 93
## Setting direction: controls > cases
##
## === Fold 8 ===
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 6
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 28
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 40
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 6
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 28
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 40
## Setting direction: controls < cases
##
## === Fold 9 ===
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 22
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 32
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 55
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 80
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 93
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 22
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 32
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 55
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 80
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 93
## Setting direction: controls > cases
##
## === Fold 10 ===
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 14
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 45
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 88
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 106
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 14
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 45
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 88
## Warning in FUN(X[[i]], ...): Numerical 0 probability for all classes with
## observation 106
## Setting direction: controls < cases
# === 4. Gabungkan semua hasil
nb_kfold_smote <- bind_rows(results_all)
# === 5. Output hasil
cat("\n=== Hasil per Fold (Naive Bayes - K-Fold + SMOTE) ===\n")
##
## === Hasil per Fold (Naive Bayes - K-Fold + SMOTE) ===
## # A tibble: 10 × 8
## Fold Model Accuracy Kappa Precision Recall F1_Score AUC
## <int> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 nb 0.328 -0.0228 0.6 0.0732 0.130 0.581
## 2 2 nb 0.339 -0.00458 0.667 0.0741 0.133 0.620
## 3 3 nb 0.265 -0.109 0.222 0.0247 0.0444 0.532
## 4 4 nb 0.305 -0.0294 0.5 0.0366 0.0682 0.612
## 5 5 nb 0.336 -0.00513 0.667 0.0732 0.132 0.499
## 6 6 nb 0.333 0.00393 0.714 0.0617 0.114 0.486
## 7 7 nb 0.322 -0.0452 0.571 0.0976 0.167 0.500
## 8 8 nb 0.319 -0.0111 0.6 0.0366 0.0690 0.600
## 9 9 nb 0.453 0.0631 0.758 0.309 0.439 0.487
## 10 10 nb 0.449 0.0274 0.711 0.333 0.454 0.489
##
## === Rata-rata Metrik Naive Bayes ===
## # A tibble: 1 × 6
## Accuracy Kappa Precision Recall F1_Score AUC
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.345 -0.0133 0.601 0.112 0.175 0.541
# ====================================================
# XGBoost - 10-Fold Cross Validation + SMOTE
# ====================================================
library(readxl)
library(dplyr)
library(caret)
library(themis)
library(recipes)
library(xgboost)
##
## Attaching package: 'xgboost'
## The following object is masked from 'package:dplyr':
##
## slice
library(yardstick)
library(pROC)
library(tibble)
library(ggplot2)
set.seed(42)
# === 1. Load data
desa_df <- read_excel("C:/Users/User/Downloads/Pangan.xlsx")
desa_df$Y <- factor(desa_df$Y, levels = 1:6,
labels = c("Sangat Rentan", "Rentan", "Agak Rentan", "Agak Tahan", "Tahan", "Sangat Tahan"))
desa_df$Y <- ifelse(desa_df$Y %in% c("Sangat Rentan", "Rentan", "Agak Rentan", "Agak Tahan"), "Rentan", "Tahan")
desa_df$Y <- factor(desa_df$Y, levels = c("Tahan", "Rentan"))
desa_df <- desa_df %>% dplyr::select(-lon, -lat)
# === 2. K-Fold
k <- 10
folds <- createFolds(desa_df$Y, k = k)
form <- Y ~ .
results_all <- list()
colors <- rainbow(k)
for (i in seq_along(folds)) {
cat("\n=== Fold", i, "===\n")
test_idx <- folds[[i]]
train_data <- desa_df[-test_idx, ]
test_data <- desa_df[test_idx, ]
rec <- recipe(form, data = train_data) %>%
step_smote(Y) %>%
prep()
train_bal <- bake(rec, new_data = NULL)
# One-hot encode for XGBoost
dtrain <- model.matrix(Y ~ . - 1, data = train_bal)
dtest <- model.matrix(Y ~ . - 1, data = test_data)
label_train <- as.numeric(train_bal$Y) - 1
label_test <- as.numeric(test_data$Y) - 1
xgb_model <- xgboost(
data = dtrain, label = label_train,
objective = "binary:logistic", nrounds = 100,
verbose = 0
)
pred_prob <- predict(xgb_model, dtest)
pred_class <- ifelse(pred_prob > 0.5, "Rentan", "Tahan")
eval_df <- tibble(obs = test_data$Y, pred = factor(pred_class, levels = c("Tahan", "Rentan")))
acc <- accuracy(eval_df, truth = obs, estimate = pred)[[3]]
kap <- kap(eval_df, truth = obs, estimate = pred)[[3]]
prec <- precision(eval_df, truth = obs, estimate = pred)[[3]]
rec <- recall(eval_df, truth = obs, estimate = pred)[[3]]
f1 <- f_meas(eval_df, truth = obs, estimate = pred)[[3]]
auc_val <- auc(roc(test_data$Y, pred_prob, levels = c("Tahan", "Rentan")))
# ROC
roc_obj <- roc(test_data$Y, pred_prob, levels = c("Tahan", "Rentan"))
if (i == 1) {
plot(roc_obj, col = colors[i], lwd = 2, main = "ROC Curve per Fold (XGBoost)")
} else {
plot(roc_obj, col = colors[i], lwd = 2, add = TRUE)
}
results_all[[i]] <- tibble(
Fold = i, Model = "xgboost",
Accuracy = acc, Kappa = kap, Precision = prec,
Recall = rec, F1_Score = f1, AUC = as.numeric(auc_val)
)
}
##
## === Fold 1 ===
## Setting direction: controls < cases
## Setting direction: controls < cases
##
## === Fold 2 ===
## Setting direction: controls < cases
## Setting direction: controls < cases
##
## === Fold 3 ===
## Setting direction: controls < cases
## Setting direction: controls < cases
##
## === Fold 4 ===
## Setting direction: controls < cases
## Setting direction: controls < cases
##
## === Fold 5 ===
## Setting direction: controls < cases
## Setting direction: controls < cases
##
## === Fold 6 ===
## Setting direction: controls < cases
## Setting direction: controls < cases
##
## === Fold 7 ===
## Setting direction: controls < cases
## Setting direction: controls < cases
##
## === Fold 8 ===
## Setting direction: controls < cases
## Setting direction: controls < cases
##
## === Fold 9 ===
## Setting direction: controls < cases
## Setting direction: controls < cases
##
## === Fold 10 ===
## Setting direction: controls < cases
## Setting direction: controls < cases
##
## === RATA-RATA METRIK XGBoost ===
## # A tibble: 1 × 6
## Accuracy Kappa Precision Recall F1_Score AUC
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 0.643 0.207 0.767 0.697 0.729 0.654
# ===========================================
# 1. LIBRARY
# ===========================================
library(cluster) # silhouette, agnes, diana
library(factoextra) # fviz_nbclust, fviz_gap_stat
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
##
## Attaching package: 'kernlab'
## The following object is masked from 'package:purrr':
##
## cross
## The following object is masked from 'package:ggplot2':
##
## alpha
library(NbClust) # alternatif pemilih k
library(dplyr) # data wrangling
library(tibble)
library(ggplot2)
library(Rtsne) # optional visualisasi
library(fpc) # untuk CLARANS (via pamk)
# ===========================================
# 2. DATA (gunakan data numerik saja)
# ===========================================
data_klaster <- desa_df %>% dplyr::select(starts_with("X")) %>% scale()
# ===========================================
# 3. CARI K OPTIMAL (untuk k-means)
# ===========================================
fviz_nbclust(data_klaster, kmeans, method = "wss", k.max = 10) +
labs(title = "Elbow Method")
fviz_nbclust(data_klaster, kmeans, method = "silhouette", k.max = 10) +
labs(title = "Silhouette Method")
set.seed(123)
gap_stat <- clusGap(data_klaster, FUN = kmeans, K.max = 10, B = 50)
fviz_gap_stat(gap_stat)
# ===========================================
# 4. GUNAKAN K OPTIMAL
# ===========================================
k <- 2 # ganti dengan hasil dari grafik di atas
# ===========================================
# 5. KMEANS
# ===========================================
set.seed(42)
km_res <- kmeans(data_klaster, centers = k)
sil_km <- silhouette(km_res$cluster, dist(data_klaster))
avg_sil_km <- mean(sil_km[, 3])
# ===========================================
# 6. AGNES (Hierarki Aglomeratif)
# ===========================================
agnes_res <- agnes(data_klaster, method = "ward")
hc_agnes <- cutree(as.hclust(agnes_res), k = k)
sil_agnes <- silhouette(hc_agnes, dist(data_klaster))
avg_sil_agnes <- mean(sil_agnes[, 3])
# ===========================================
# 7. DIANA (Divisive)
# ===========================================
diana_res <- diana(data_klaster)
hc_diana <- cutree(as.hclust(diana_res), k = k)
sil_diana <- silhouette(hc_diana, dist(data_klaster))
avg_sil_diana <- mean(sil_diana[, 3])
# ===========================================
# 8. SPECTRAL CLUSTERING
# ===========================================
spec_res <- specc(as.matrix(data_klaster), centers = k)
sil_spec <- silhouette(as.integer(spec_res), dist(data_klaster))
avg_sil_spec <- mean(sil_spec[, 3])
# ===========================================
# 9. CLARANS (via pamk)
# ===========================================
clarans_res <- pamk(data_klaster, krange = k, usepam = FALSE)
clarans_cluster <- clarans_res$pamobject$clustering
sil_clarans <- silhouette(clarans_cluster, dist(data_klaster))
avg_sil_clarans <- mean(sil_clarans[, 3])
# ===========================================
# 10. RANGKUM HASIL
# ===========================================
hasil_klaster <- tibble(
Metode = c("K-Means", "Agnes (Hierarki)", "Diana", "Spectral", "CLARANS"),
Silhouette_Score = round(c(avg_sil_km, avg_sil_agnes, avg_sil_diana, avg_sil_spec, avg_sil_clarans), 3)
)
print(hasil_klaster)
## # A tibble: 5 × 2
## Metode Silhouette_Score
## <chr> <dbl>
## 1 K-Means 0.543
## 2 Agnes (Hierarki) 0.707
## 3 Diana 0.939
## 4 Spectral 0.712
## 5 CLARANS 0.531
# ===========================================
# 11. VISUALISASI
# ===========================================
ggplot(hasil_klaster, aes(x = reorder(Metode, Silhouette_Score), y = Silhouette_Score, fill = Metode)) +
geom_col(show.legend = FALSE) +
coord_flip() +
labs(title = "Perbandingan Metode Klaster (Silhouette Score)", x = "Metode", y = "Silhouette Score") +
theme_minimal(base_size = 14)
# ===========================================
# 1. LIBRARY
# ===========================================
library(cluster) # pam, clara, agnes, diana
library(factoextra) # fviz_nbclust
library(kernlab) # spectral clustering
library(fpc) # pamk (clarans)
library(dbscan) # dbscan
##
## Attaching package: 'dbscan'
## The following object is masked from 'package:fpc':
##
## dbscan
## The following object is masked from 'package:stats':
##
## as.dendrogram
library(dplyr) # wrangling
library(tibble)
library(ggplot2)
# ===========================================
# 2. DATA & PREP
# ===========================================
data_klaster <- desa_df %>% dplyr::select(starts_with("X")) %>% scale()
dist_mat <- dist(data_klaster)
# ===========================================
# 3. Fungsi Silhouette
# ===========================================
get_avg_sil <- function(cluster_vec) {
sil <- silhouette(cluster_vec, dist_mat)
mean(sil[, 3])
}
# ===========================================
# 4. K OPTIMUM PER METODE
# ===========================================
# --- KMEANS
sil_km_all <- sapply(2:10, function(k) {
cl <- kmeans(data_klaster, centers = k)$cluster
get_avg_sil(cl)
})
k_kmeans <- which.max(sil_km_all) + 1
# --- AGNES
sil_agnes_all <- sapply(2:10, function(k) {
cl <- cutree(as.hclust(agnes(data_klaster, method = "ward")), k)
get_avg_sil(cl)
})
k_agnes <- which.max(sil_agnes_all) + 1
# --- DIANA
sil_diana_all <- sapply(2:10, function(k) {
cl <- cutree(as.hclust(diana(data_klaster)), k)
get_avg_sil(cl)
})
k_diana <- which.max(sil_diana_all) + 1
# --- SPECTRAL
sil_spec_all <- sapply(2:10, function(k) {
cl <- specc(as.matrix(data_klaster), centers = k)
get_avg_sil(as.integer(cl))
})
k_spec <- which.max(sil_spec_all) + 1
# --- CLARANS (otomatis)
clarans_res <- pamk(data_klaster, krange = 2:10, usepam = FALSE)
k_clarans <- clarans_res$nc
# --- K-MEDOIDS (PAM)
sil_pam_all <- sapply(2:10, function(k) {
cl <- pam(data_klaster, k)$clustering
get_avg_sil(cl)
})
k_pam <- which.max(sil_pam_all) + 1
# --- CLARA
sil_clara_all <- sapply(2:10, function(k) {
cl <- clara(data_klaster, k)$clustering
get_avg_sil(cl)
})
k_clara <- which.max(sil_clara_all) + 1
# ===========================================
# 5. KLASTERING FINAL DENGAN K OPTIMAL
# ===========================================
# --- KMeans
km_final <- kmeans(data_klaster, centers = k_kmeans)
sil_km <- get_avg_sil(km_final$cluster)
# --- Agnes
agnes_final <- agnes(data_klaster, method = "ward")
hc_agnes <- cutree(as.hclust(agnes_final), k = k_agnes)
sil_agnes <- get_avg_sil(hc_agnes)
# --- Diana
diana_final <- diana(data_klaster)
hc_diana <- cutree(as.hclust(diana_final), k = k_diana)
sil_diana <- get_avg_sil(hc_diana)
# --- Spectral
spec_final <- specc(as.matrix(data_klaster), centers = k_spec)
sil_spec <- get_avg_sil(as.integer(spec_final))
# --- CLARANS
clarans_cluster <- clarans_res$pamobject$clustering
sil_clarans <- get_avg_sil(clarans_cluster)
# --- K-Medoids
pam_final <- pam(data_klaster, k = k_pam)
sil_pam <- get_avg_sil(pam_final$clustering)
# --- CLARA
clara_final <- clara(data_klaster, k = k_clara)
sil_clara <- get_avg_sil(clara_final$clustering)
# ===========================================
# 6. RANGKUM HASIL
# ===========================================
hasil_klaster <- tibble(
Metode = c("KMeans", "Agnes", "Diana", "Spectral", "CLARANS", "K-Medoids", "CLARA"),
K_Optimum = c(k_kmeans, k_agnes, k_diana, k_spec, k_clarans, k_pam, k_clara),
Silhouette_Score = round(c(sil_km, sil_agnes, sil_diana, sil_spec, sil_clarans, sil_pam, sil_clara), 3)
)
print(hasil_klaster)
## # A tibble: 7 × 3
## Metode K_Optimum Silhouette_Score
## <chr> <dbl> <dbl>
## 1 KMeans 4 0.676
## 2 Agnes 10 0.715
## 3 Diana 2 0.939
## 4 Spectral 2 0.852
## 5 CLARANS 4 0.678
## 6 K-Medoids 4 0.677
## 7 CLARA 4 0.678
# ===========================================
# 7. VISUALISASI
# ===========================================
ggplot(hasil_klaster, aes(x = reorder(Metode, Silhouette_Score), y = Silhouette_Score, fill = Metode)) +
geom_col(show.legend = FALSE) +
coord_flip() +
labs(title = "Perbandingan Metode Klaster (K Spesifik)", x = "Metode", y = "Silhouette Score") +
theme_minimal(base_size = 14)
## K-Means
# ======================================
# 1. LIBRARY
# ======================================
library(readxl)
library(dplyr)
library(cluster)
library(factoextra)
library(ggplot2)
library(corrplot)
## corrplot 0.95 loaded
## Linking to GEOS 3.13.1, GDAL 3.11.0, PROJ 9.6.0; sf_use_s2() is TRUE
##
## Attaching package: 'tidyr'
## The following object is masked from 'package:reshape2':
##
## smiths
# ======================================
# 2. BACA DATA & STANDARISASI
# ======================================
desa_klaster <- read_excel("C:/Users/User/Downloads/Pangan1.xlsx")
data_scaled <- scale(desa_klaster %>% dplyr::select(X1:X6))
dist_mat <- dist(data_scaled)
# ======================================
# 3. KORELASI
# ======================================
corrplot(cor(data_scaled), method = "circle", type = "upper")
# ======================================
# 4. CARI JUMLAH K OPTIMAL
# ======================================
silhouette_scores <- sapply(2:10, function(k) {
km <- kmeans(data_scaled, centers = k, nstart = 25)
sil <- silhouette(km$cluster, dist_mat)
mean(sil[, 3])
})
wss_values <- sapply(2:10, function(k) {
kmeans(data_scaled, centers = k, nstart = 25)$tot.withinss
})
set.seed(123)
gap_stat <- clusGap(data_scaled, FUN = kmeans, K.max = 10, B = 50)
gap_df <- as.data.frame(gap_stat$Tab)
gap_score <- gap_df$gap[2:10]
ch_index <- sapply(2:10, function(k) {
km <- kmeans(data_scaled, centers = k, nstart = 25)
calinhara(data_scaled, km$cluster)
})
db_index <- sapply(2:10, function(k) {
km <- kmeans(data_scaled, centers = k, nstart = 25)
index.DB(data_scaled, km$cluster)$DB
})
eval_df <- tibble(
K = 2:10,
Silhouette = round(silhouette_scores, 3),
WSS = round(wss_values, 2),
Gap = round(gap_score, 3),
Calinski_Harabasz = round(ch_index, 2),
Davies_Bouldin = round(db_index, 3)
)
print(eval_df)
## # A tibble: 9 × 6
## K Silhouette WSS Gap Calinski_Harabasz Davies_Bouldin
## <int> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2 0.718 5258. 2.37 407. 1.61
## 2 3 0.658 4198. 2.76 403. 1.47
## 3 4 0.676 3096 2.81 432. 1.38
## 4 5 0.686 2409. 2.83 484. 0.928
## 5 6 0.706 1882. 2.94 684. 0.918
## 6 7 0.627 1708. 3.01 618. 0.886
## 7 8 0.631 1497. 2.98 586. 0.892
## 8 9 0.649 1228. 3.02 561. 0.929
## 9 10 0.661 938. 3.16 636. 0.875
# Visualisasi evaluasi
eval_long <- eval_df %>%
pivot_longer(-K, names_to = "Metrik", values_to = "Nilai")
ggplot(eval_long, aes(x = K, y = Nilai, color = Metrik)) +
geom_line() + geom_point() +
facet_wrap(~Metrik, scales = "free_y") +
labs(title = "Evaluasi K-Means Berdasarkan Nilai K", x = "Jumlah Klaster (k)") +
theme_minimal()
# Tambahan visualisasi individual
fviz_nbclust(data_scaled, kmeans, method = "wss", k.max = 10) +
geom_vline(xintercept = 4, linetype = 2)
fviz_nbclust(data_scaled, kmeans, method = "silhouette", k.max = 10) +
geom_vline(xintercept = 4, linetype = 2)
# ======================================
# 5. FIT KMEANS
# ======================================
k <- 4
set.seed(42)
kmeans_res <- kmeans(data_scaled, centers = k, nstart = 25)
desa_klaster$cluster <- factor(kmeans_res$cluster)
# ======================================
# 6. PROFIL VARIABEL PER KLASTER
# ======================================
profil_klaster <- desa_klaster %>%
group_by(cluster) %>%
summarise(across(X1:X6, mean), .groups = "drop")
print(profil_klaster)
## # A tibble: 4 × 7
## cluster X1 X2 X3 X4 X5 X6
## <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 0.573 0.0230 0.00205 91.6 -0.00118 0.00663
## 2 2 0.315 2.22 0.270 13.3 -0.535 0.513
## 3 3 0.0270 49.5 3.61 23.6 -0.504 1
## 4 4 0.0916 0.0169 0.00155 7.34 -0.00254 0.00228
profil_long <- melt(profil_klaster, id.vars = "cluster")
ggplot(profil_long, aes(x = variable, y = value, fill = cluster)) +
geom_col(position = "dodge") +
labs(title = "Profil Variabel per Klaster", x = "Variabel", y = "Rata-rata") +
theme_minimal()
# ======================================
# 7. PCA VISUALISASI KLASTER
# ======================================
fviz_cluster(kmeans_res, data = data_scaled, ellipse.type = "norm", repel = F) +
labs(title = "Visualisasi Klaster (PCA)") +
theme_minimal()
## Too few points to calculate an ellipse
# ======================================
# 8. BACA PETA DAN GABUNGKAN
# ======================================
desa_shp <- st_read("C:/Users/User/Documents/AKA/Tingkat 3/Semester 6/DMBG/Projek/BATAS DESA DESEMBER 2019 DUKCAPIL MALUKU UTARA/Fix/batas_desa_fix.shp")
## Reading layer `batas_desa_fix' from data source
## `C:\Users\User\Documents\AKA\Tingkat 3\Semester 6\DMBG\Projek\BATAS DESA DESEMBER 2019 DUKCAPIL MALUKU UTARA\Fix\batas_desa_fix.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 1184 features and 26 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: 124.2831 ymin: -2.477203 xmax: 129.9236 ymax: 2.645245
## Geodetic CRS: WGS 84
## Warning: NAs introduced by coercion
desa_klaster$KDEPUM <- as.numeric(desa_klaster$KDEPUM)
desa_map <- desa_shp %>%
left_join(desa_klaster %>% dplyr::select(KDEPUM = KDEPUM, cluster), by = "KDEPUM") %>%
filter(!is.na(cluster))
# ======================================
# 9. PETA KLASTER
# ======================================
ggplot(desa_map) +
geom_sf(aes(fill = factor(cluster)), color = "white", size = 0.1) +
scale_fill_brewer(palette = "Set2") +
labs(title = "Peta Klaster K-Means - Maluku Utara", fill = "Klaster") +
theme_minimal()
# ======================================
# 10. EVALUASI AKHIR
# ======================================
sil <- silhouette(kmeans_res$cluster, dist_mat)
fviz_silhouette(sil)
## cluster size ave.sil.width
## 1 1 323 0.53
## 2 2 103 0.16
## 3 3 2 -0.15
## 4 4 752 0.80
## Silhouette Average: 0.667
btw_ratio <- kmeans_res$betweenss / kmeans_res$totss
cat("BetweenSS/TotalSS Ratio:", round(btw_ratio, 3), "\n")
## BetweenSS/TotalSS Ratio: 0.562
ch_index_final <- calinhara(data_scaled, kmeans_res$cluster)
cat("Calinski-Harabasz Index:", round(ch_index_final, 3), "\n")
## Calinski-Harabasz Index: 503.675
dbi <- index.DB(data_scaled, kmeans_res$cluster)$DB
cat("Davies-Bouldin Index:", round(dbi, 3), "\n")
## Davies-Bouldin Index: 1.097
# ======================================
# 11. UKURAN KLASTER
# ======================================
print(table(desa_klaster$cluster))
##
## 1 2 3 4
## 323 103 2 752
ggplot(desa_klaster, aes(x = cluster)) +
geom_bar(fill = "steelblue") +
labs(title = "Jumlah Desa per Klaster", x = "Klaster", y = "Jumlah") +
theme_minimal()
# ======================================
# 12. CENTROID SPASIAL PER KLASTER
# ======================================
library(sf)
desa_map <- st_make_valid(desa_map)
# Hitung centroid setiap desa
desa_map$centroid <- st_centroid(desa_map$geometry)
coords <- st_coordinates(desa_map$centroid)
desa_map$lon_centroid <- coords[, 1]
desa_map$lat_centroid <- coords[, 2]
# Rata-rata koordinat per klaster
centroid_klaster <- desa_map %>%
group_by(cluster) %>%
summarise(
lon = mean(lon_centroid, na.rm = TRUE),
lat = mean(lat_centroid, na.rm = TRUE)
)
print(centroid_klaster)
## Simple feature collection with 4 features and 3 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: 124.2831 ymin: -2.477203 xmax: 129.9236 ymax: 2.645245
## Geodetic CRS: WGS 84
## # A tibble: 4 × 4
## cluster lon lat geometry
## <fct> <dbl> <dbl> <MULTIPOLYGON [°]>
## 1 1 128. 0.844 (((125.4217 -1.767736, 125.4215 -1.767744, 125.4213 -1.7…
## 2 2 127. -0.179 (((128.321 0.901669, 128.3208 0.9019579, 128.3162 0.9134…
## 3 3 128. 0.339 (((127.8964 0.1746875, 127.8904 0.1728329, 127.8838 0.17…
## 4 4 128. 0.369 (((128.3602 1.480541, 128.3602 1.480656, 128.3602 1.4807…
# Tampilkan peta dengan centroid
ggplot() +
geom_sf(data = desa_map, aes(fill = cluster), color = "white") +
geom_point(data = centroid_klaster, aes(x = lon, y = lat), color = "red", size = 2) +
labs(title = "Peta Klaster + Centroid K-Means") +
theme_minimal()
# ======================================
# 1. LIBRARY
# ======================================
library(readxl)
library(dplyr)
library(cluster)
library(factoextra)
library(ggplot2)
library(corrplot)
library(fpc)
library(clusterSim)
library(reshape2)
library(sf)
library(tibble)
library(tidyr)
# ======================================
# 2. BACA DATA & STANDARISASI
# ======================================
desa_medoid <- read_excel("C:/Users/User/Downloads/Pangan1.xlsx")
data_scaled <- scale(desa_medoid %>% dplyr::select(X1:X6))
dist_mat <- dist(data_scaled) # PAM butuh matriks jarak
# ======================================
# 3. KORELASI
# ======================================
corrplot(cor(data_scaled), method = "circle", type = "upper")
# ======================================
# 4. TENTUKAN K OPTIMAL
# ======================================
sil_scores <- sapply(2:10, function(k) {
pam_res <- pam(dist_mat, k = k)
pam_res$silinfo$avg.width
})
# ================================
# 5. FIT PAM dengan k yang dipilih
# ================================
k <- which.max(sil_scores) + 1
pam_res <- pam(data_scaled, k = k) # BENAR untuk fviz_cluster
desa_medoid$cluster <- factor(pam_res$clustering)
# ================================
# 6. Evaluasi & Visualisasi PAM
# ================================
# a. Silhouette Plot
fviz_silhouette(pam_res) +
labs(title = paste("Silhouette Plot PAM – k =", k))
## cluster size ave.sil.width
## 1 1 334 0.51
## 2 2 740 0.81
## 3 3 42 -0.15
## 4 4 64 0.58
# b. Plot Profil Variabel
profil_medoid <- desa_medoid %>%
group_by(cluster) %>%
summarise(across(X1:X6, mean), .groups = "drop")
profil_long <- melt(profil_medoid, id.vars = "cluster")
ggplot(profil_long, aes(x = variable, y = value, fill = cluster)) +
geom_col(position = "dodge") +
labs(title = "Profil Variabel per Klaster (PAM)") +
theme_minimal()
# c. PCA Scatter
fviz_cluster(pam_res, data = data_scaled, ellipse.type = "norm", repel = F) +
labs(title = "PCA Plot Klaster PAM") +
theme_minimal()
# d. Distribusi Cluster
ggplot(desa_medoid, aes(x = cluster)) +
geom_bar(fill = "steelblue") +
labs(title = "Jumlah Desa per Klaster (PAM)") +
theme_minimal()
# ======================================
# 7. SPASIALISASI DI PETA
# ======================================
desa_shp <- st_read("C:/Users/User/Documents/AKA/Tingkat 3/Semester 6/DMBG/Projek/BATAS DESA DESEMBER 2019 DUKCAPIL MALUKU UTARA/Fix/batas_desa_fix.shp")
## Reading layer `batas_desa_fix' from data source
## `C:\Users\User\Documents\AKA\Tingkat 3\Semester 6\DMBG\Projek\BATAS DESA DESEMBER 2019 DUKCAPIL MALUKU UTARA\Fix\batas_desa_fix.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 1184 features and 26 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: 124.2831 ymin: -2.477203 xmax: 129.9236 ymax: 2.645245
## Geodetic CRS: WGS 84
## Warning: NAs introduced by coercion
desa_medoid$KDEPUM <- as.numeric(desa_medoid$KDEPUM)
desa_map <- desa_shp %>%
left_join(desa_medoid %>% dplyr::select(KDEPUM, cluster), by = "KDEPUM") %>%
filter(!is.na(cluster))
ggplot(desa_map) +
geom_sf(aes(fill = factor(cluster)), color = "white", size = 0.1) +
scale_fill_brewer(palette = "Set2") +
labs(title = paste("Peta Klaster PAM – k =", k), fill = "Cluster") +
theme_minimal()
# ======================================
# 8. HITUNG CENTROID SPASIAL
# ======================================
desa_map <- st_make_valid(desa_map)
desa_map$centroid <- st_centroid(desa_map$geometry)
coords <- st_coordinates(desa_map$centroid)
desa_map$lon <- coords[,1]; desa_map$lat <- coords[,2]
centroid_medoid <- desa_map %>%
group_by(cluster) %>%
summarise(lon = mean(lon), lat = mean(lat))
ggplot() +
geom_sf(data = desa_map, aes(fill = cluster), color = "grey90") +
geom_point(data = centroid_medoid, aes(x = lon, y = lat), color = "red", size = 2) +
labs(title = "Peta Klaster PAM + Centroid") +
theme_minimal()
# ======================================
# 9. METRIK EKSTRA
# ======================================
cat("Rata-rata Silhouette PAM (k=", k, "): ", round(sil_scores[k-1], 3), "\n")
## Rata-rata Silhouette PAM (k= 4 ): 0.677
btw_ratio <- pam_res$betweenss / pam_res$tot.withinss
cat("BetweenSS/WithinSS Ratio:", round(btw_ratio, 3), "\n")
## BetweenSS/WithinSS Ratio:
ch <- calinhara(data_scaled, pam_res$clustering)
db <- index.DB(data_scaled, pam_res$clustering)$DB
cat("Calinski-Harabasz Index:", round(ch, 3), "\n")
## Calinski-Harabasz Index: 382.366
## Davies-Bouldin Index: 1.636
# ======================================
# 1. LIBRARY
# ======================================
library(readxl)
library(dplyr)
library(cluster)
library(factoextra)
library(ggplot2)
library(sf)
library(fpc) # untuk clara()
library(clusterSim)
library(tibble)
library(tidyr)
library(reshape2)
# ======================================
# 2. BACA DATA & STANDARISASI
# ======================================
desa_clarans <- read_excel("C:/Users/User/Downloads/Pangan1.xlsx")
data_scaled <- scale(desa_clarans %>% dplyr::select(X1:X6))
# ======================================
# 3. CARI K OPTIMAL (Silhouette)
# ======================================
sil_scores <- sapply(2:10, function(k) {
clara_model <- clara(data_scaled, k = k, samples = 5)
mean(silhouette(clara_model$clustering, dist(data_scaled))[, 3])
})
# Visualisasi nilai silhouette
k_opt <- which.max(sil_scores) + 1
plot(2:10, sil_scores, type = "b", pch = 19, col = "blue",
main = "Silhouette Score vs K", xlab = "Jumlah Klaster (K)", ylab = "Silhouette Avg")
abline(v = k_opt, lty = 2, col = "red")
# ======================================
# 4. FIT CLARANS/CLARA
# ======================================
set.seed(42)
clara_res <- clara(data_scaled, k = k_opt, samples = 5)
desa_clarans$cluster <- factor(clara_res$clustering)
# ======================================
# 5. PROFIL VARIABEL PER KLASTER
# ======================================
profil <- desa_clarans %>%
group_by(cluster) %>%
summarise(across(X1:X6, mean), .groups = "drop")
print(profil)
## # A tibble: 4 × 7
## cluster X1 X2 X3 X4 X5 X6
## <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 1 0.549 0.0238 0.00203 90.8 -0.00119 0.00658
## 2 2 0.0969 0.0165 0.00125 6.73 -0.00213 0.00226
## 3 3 0.650 5.66 0.460 25.9 -0.536 1.31
## 4 4 0.0813 1.41 0.249 5.78 -0.530 0
profil_long <- melt(profil, id.vars = "cluster")
ggplot(profil_long, aes(x = variable, y = value, fill = cluster)) +
geom_col(position = "dodge") +
labs(title = "Profil Variabel per Klaster (CLARANS)", x = "Variabel", y = "Rata-rata") +
theme_minimal()
# ======================================
# 6. PCA VISUALISASI
# ======================================
fviz_cluster(list(data = data_scaled, cluster = clara_res$clustering),
ellipse.type = "norm", geom = "point", show.clust.cent = TRUE) +
labs(title = "PCA Klaster CLARANS") +
theme_minimal()
# ======================================
# 7. SPASIALISASI DI PETA
# ======================================
desa_shp <- st_read("C:/Users/User/Documents/AKA/Tingkat 3/Semester 6/DMBG/Projek/BATAS DESA DESEMBER 2019 DUKCAPIL MALUKU UTARA/Fix/batas_desa_fix.shp")
## Reading layer `batas_desa_fix' from data source
## `C:\Users\User\Documents\AKA\Tingkat 3\Semester 6\DMBG\Projek\BATAS DESA DESEMBER 2019 DUKCAPIL MALUKU UTARA\Fix\batas_desa_fix.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 1184 features and 26 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: 124.2831 ymin: -2.477203 xmax: 129.9236 ymax: 2.645245
## Geodetic CRS: WGS 84
## Warning: NAs introduced by coercion
desa_clarans$KDEPUM <- as.numeric(desa_clarans$KDEPUM)
desa_map <- desa_shp %>%
left_join(desa_clarans %>% dplyr::select(KDEPUM,cluster), by = "KDEPUM") %>%
filter(!is.na(cluster))
ggplot(desa_map) +
geom_sf(aes(fill = factor(cluster)), color = "white", size = 0.1) +
scale_fill_brewer(palette = "Set2") +
labs(title = paste("Peta Klaster CLARANS – k =", k_opt), fill = "Cluster") +
theme_minimal()
# ======================================
# 8. CENTROID SPASIAL
# ======================================
desa_map <- st_make_valid(desa_map)
desa_map$centroid <- st_centroid(desa_map$geometry)
coords <- st_coordinates(desa_map$centroid)
desa_map$lon <- coords[, 1]
desa_map$lat <- coords[, 2]
centroid_clarans <- desa_map %>%
group_by(cluster) %>%
summarise(
lon = mean(lon),
lat = mean(lat)
)
ggplot() +
geom_sf(data = desa_map, aes(fill = cluster), color = "grey90") +
geom_point(data = centroid_clarans, aes(x = lon, y = lat), color = "red", size = 2) +
labs(title = "Peta Klaster CLARANS + Titik Tengah") +
theme_minimal()
# ======================================
# 9. METRIK EVALUASI
# ======================================
cat("Jumlah Klaster Optimal:", k_opt, "\n")
## Jumlah Klaster Optimal: 4
## Rata-rata Silhouette: 0.678
## Calinski-Harabasz Index: 382.497
## Davies-Bouldin Index: 1.636
##
## 1 2 3 4
## 331 743 42 64
# ======================================
# 1. LIBRARY
# ======================================
library(readxl)
library(dplyr)
library(ggplot2)
library(factoextra)
library(cluster)
library(corrplot)
library(kernlab) # specc()
library(reshape2)
library(sf)
library(clusterSim)
library(tibble)
# ======================================
# 2. BACA DATA & STANDARISASI
# ======================================
desa_spec <- read_excel("C:/Users/User/Downloads/Pangan1.xlsx")
data_scaled <- scale(desa_spec %>% dplyr::select(X1:X6))
# ======================================
# 3. KORELASI
# ======================================
corrplot(cor(data_scaled), method = "circle", type = "upper")
# ======================================
# 4. TENTUKAN K OPTIMAL (dengan silhouette)
# ======================================
silhouette_scores <- sapply(2:10, function(k) {
spec_model <- specc(data_scaled, centers = k)
clust <- as.integer(spec_model)
sil <- silhouette(clust, dist(data_scaled))
mean(sil[, 3])
})
# Visualisasi Silhouette
k_opt <- which.max(silhouette_scores) + 1
plot(2:10, silhouette_scores, type = "b", pch = 19, col = "blue",
main = "Silhouette Score vs K (Spectral)", xlab = "K", ylab = "Silhouette")
abline(v = k_opt, lty = 2, col = "red")
# ======================================
# 5. FIT SPECTRAL CLUSTERING
# ======================================
set.seed(42)
spec_model <- specc(data_scaled, centers = k_opt)
desa_spec$cluster <- factor(spec_model@.Data)
# ======================================
# 6. PROFIL VARIABEL PER KLASTER
# ======================================
profil <- desa_spec %>%
group_by(cluster) %>%
summarise(across(X1:X6, mean), .groups = "drop")
profil_long <- melt(profil, id.vars = "cluster")
ggplot(profil_long, aes(x = variable, y = value, fill = cluster)) +
geom_col(position = "dodge") +
labs(title = "Profil Variabel per Klaster (Spectral)", x = "Variabel", y = "Rata-rata") +
theme_minimal()
# ======================================
# 7. VISUALISASI PCA
# ======================================
fviz_cluster(list(data = data_scaled, cluster = spec_model@.Data),
geom = "point", ellipse.type = "norm", repel = F) +
labs(title = "PCA Plot Klaster (Spectral)") +
theme_minimal()
# ======================================
# 8. PETA SPASIAL
# ======================================
desa_shp <- st_read("C:/Users/User/Documents/AKA/Tingkat 3/Semester 6/DMBG/Projek/BATAS DESA DESEMBER 2019 DUKCAPIL MALUKU UTARA/Fix/batas_desa_fix.shp")
## Reading layer `batas_desa_fix' from data source
## `C:\Users\User\Documents\AKA\Tingkat 3\Semester 6\DMBG\Projek\BATAS DESA DESEMBER 2019 DUKCAPIL MALUKU UTARA\Fix\batas_desa_fix.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 1184 features and 26 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: 124.2831 ymin: -2.477203 xmax: 129.9236 ymax: 2.645245
## Geodetic CRS: WGS 84
## Warning: NAs introduced by coercion
desa_spec$KDEPUM <- as.numeric(desa_spec$KDEPUM)
desa_map <- desa_shp %>%
left_join(desa_clarans %>% dplyr::select(KDEPUM,cluster), by = "KDEPUM") %>%
filter(!is.na(cluster))
ggplot(desa_map) +
geom_sf(aes(fill = factor(cluster)), color = "white", size = 0.1) +
scale_fill_brewer(palette = "Set2") +
labs(title = paste("Peta Klaster Spectral – k =", k_opt), fill = "Cluster") +
theme_minimal()
# ======================================
# 9. CENTROID SPASIAL
# ======================================
desa_map <- st_make_valid(desa_map)
desa_map$centroid <- st_centroid(desa_map$geometry)
coords <- st_coordinates(desa_map$centroid)
desa_map$lon <- coords[, 1]
desa_map$lat <- coords[, 2]
centroid_spec <- desa_map %>%
group_by(cluster) %>%
summarise(
lon = mean(lon),
lat = mean(lat)
)
ggplot() +
geom_sf(data = desa_map, aes(fill = cluster), color = "grey90") +
geom_point(data = centroid_spec, aes(x = lon, y = lat), color = "red", size = 2) +
labs(title = "Peta Klaster Spectral + Centroid") +
theme_minimal()
# ======================================
# 10. EVALUASI
# ======================================
cat("Jumlah Klaster Optimal:", k_opt, "\n")
## Jumlah Klaster Optimal: 2
sil <- silhouette(as.integer(spec_model), dist(data_scaled))
cat("Rata-rata Silhouette:", round(mean(sil[, 3]), 3), "\n")
## Rata-rata Silhouette: 0.712
ch <- calinhara(data_scaled, as.integer(spec_model))
cat("Calinski-Harabasz Index:", round(ch, 3), "\n")
## Calinski-Harabasz Index: 402.064
db <- index.DB(data_scaled, as.integer(spec_model))$DB
cat("Davies-Bouldin Index:", round(db, 3), "\n")
## Davies-Bouldin Index: 1.625
# ======================================
# 1. LIBRARY
# ======================================
library(readxl)
library(dplyr)
library(cluster) # diana(), silhouette()
library(factoextra) # fviz_cluster
library(ggplot2)
library(clusterSim) # calinhara, index.DB
library(reshape2)
library(tibble)
library(sf)
# ======================================
# 2. BACA DATA & STANDARISASI
# ======================================
desa_diana <- read_excel("C:/Users/User/Downloads/Pangan1.xlsx")
data_klaster <- scale(desa_diana %>% dplyr::select(X1:X6))
# ======================================
# 3. FIT DIANA & CARI K OPTIMAL
diana_res <- diana(data_klaster)
silhouette_scores <- sapply(2:10, function(k) {
cl <- cutree(as.hclust(diana_res), k = k)
sil <- silhouette(cl, dist(data_klaster))
mean(sil[, 3])
})
# Visualisasi k optimal
plot(2:10, silhouette_scores, type = "b", pch = 19, col = "darkgreen",
xlab = "Jumlah Klaster", ylab = "Average Silhouette Width",
main = "Pemilihan k Optimal - DIANA")
k_opt <- which.max(silhouette_scores) + 1
abline(v = k_opt, lty = 2, col = "red")
## K optimal (silhouette): 2
# ======================================
# 4. DENDROGRAM + GARIS K
hc_diana <- as.hclust(diana_res)
plot(hc_diana, main = "Dendrogram - DIANA", xlab = "", sub = "")
rect.hclust(hc_diana, k = k_opt, border = 2:6)
# ======================================
# 5. KLASTERISASI
cluster_diana <- cutree(hc_diana, k = k_opt)
desa_diana$cluster <- factor(cluster_diana)
# ======================================
# 6. PROFIL VARIABEL PER KLASTER
profil <- desa_diana %>%
group_by(cluster) %>%
summarise(across(X1:X6, mean), .groups = "drop")
profil_long <- melt(profil, id.vars = "cluster")
ggplot(profil_long, aes(x = variable, y = value, fill = cluster)) +
geom_col(position = "dodge") +
labs(title = "Profil Variabel per Klaster - DIANA", x = "Variabel", y = "Rata-rata") +
theme_minimal()
# ======================================
# 7. PCA VISUALISASI
fviz_cluster(list(data = data_klaster, cluster = cluster_diana),
ellipse.type = "norm", show.clust.cent = TRUE,
repel = F, geom = "point") +
labs(title = "PCA Visualisasi Klaster - DIANA") +
theme_minimal()
## Too few points to calculate an ellipse
# ======================================
# 8. PETA SPASIAL
desa_shp <- st_read("C:/Users/User/Documents/AKA/Tingkat 3/Semester 6/DMBG/Projek/BATAS DESA DESEMBER 2019 DUKCAPIL MALUKU UTARA/Fix/batas_desa_fix.shp")
## Reading layer `batas_desa_fix' from data source
## `C:\Users\User\Documents\AKA\Tingkat 3\Semester 6\DMBG\Projek\BATAS DESA DESEMBER 2019 DUKCAPIL MALUKU UTARA\Fix\batas_desa_fix.shp'
## using driver `ESRI Shapefile'
## Simple feature collection with 1184 features and 26 fields
## Geometry type: MULTIPOLYGON
## Dimension: XY
## Bounding box: xmin: 124.2831 ymin: -2.477203 xmax: 129.9236 ymax: 2.645245
## Geodetic CRS: WGS 84
## Warning in gsub("\\.", "", desa_shp$KDEPUM) %>% as.numeric(): NAs introduced by
## coercion
desa_diana$KDEPUM <- as.numeric(desa_diana$KDEPUM)
# Gabungkan data klaster ke shapefile
desa_map <- desa_shp %>%
left_join(desa_diana %>% dplyr::select(KDEPUM, cluster), by = "KDEPUM") %>%
filter(!is.na(cluster))
# Peta
ggplot(desa_map) +
geom_sf(aes(fill = cluster), color = "white", size = 0.1) +
scale_fill_brewer(palette = "Set2") +
labs(title = "Peta Klaster DIANA - Maluku Utara", fill = "Klaster") +
theme_minimal()
# ======================================
# 9. TITIK TENGAH PER KLASTER
desa_map <- st_make_valid(desa_map)
coords <- st_coordinates(st_centroid(desa_map$geometry))
desa_map$lon <- coords[, 1]
desa_map$lat <- coords[, 2]
centroid_df <- desa_map %>%
group_by(cluster) %>%
summarise(lon = mean(lon), lat = mean(lat))
ggplot() +
geom_sf(data = desa_map, aes(fill = cluster), color = "white") +
geom_point(data = centroid_df, aes(x = lon, y = lat), color = "red", size = 2) +
labs(title = "Peta Klaster + Titik Tengah (DIANA)") +
theme_minimal()
# ======================================
# 10. EVALUASI KLASTER
sil <- silhouette(cluster_diana, dist(data_klaster))
cat("Silhouette Avg :", round(mean(sil[, 3]), 3), "\n")
## Silhouette Avg : 0.939
## Calinski-Harabasz Index: 226.175
## Davies-Bouldin Index: 0.066