Sistem Deteksi Dini Gejolak Harga Pangan Multidimensi
1. Setup
packages <- c(
"tidyverse", "lubridate", "zoo", "caret",
"glmnet", "pROC", "PRROC", "janitor", "readxl",
"rpart", "rpart.plot"
)
for (pkg in packages) {
if (!require(pkg, character.only = TRUE, quietly = TRUE)) {
install.packages(pkg, dependencies = TRUE)
library(pkg, character.only = TRUE)
}
}
select <- dplyr::select
filter <- dplyr::filter
arrange <- dplyr::arrange
theme_set(theme_minimal() + theme(
plot.title = element_text(face = "bold", size = 13, hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5, size = 10, color = "gray40"),
axis.text.x = element_text(angle = 45, hjust = 1)
))
2. Import Data
file_path <- "~/SEMESTER 6/Satria Data_Data SEC5.xlsx"
if (!file.exists(file_path)) stop("File tidak ditemukan.")
data_raw <- read_excel(
file_path,
sheet = "dataanalisis"
) %>%
clean_names()
cat("Dimensi awal:", dim(data_raw), "\n")
## Dimensi awal: 840 11
3. Deduplikasi
data_raw <- data_raw %>%
group_by(tahun, bulan, komoditas) %>%
summarise(
harga_eceran = median(harga_eceran, na.rm = TRUE),
inflasi_komoditas = median(inflasi_komoditas, na.rm = TRUE),
volume_impor_kg = median(volume_impor_kg, na.rm = TRUE),
nilai_impor_usd = median(nilai_impor_usd, na.rm = TRUE),
usd_to_idr = median(usd_to_idr, na.rm = TRUE),
index_enso = median(index_enso, na.rm = TRUE),
fao_value = median(fao_value, na.rm = TRUE),
.groups = "drop"
)
cat("Dimensi setelah deduplikasi:", dim(data_raw), "\n")
## Dimensi setelah deduplikasi: 840 10
4. Data Makro Bulanan
data_makro <- data_raw %>%
group_by(tahun, bulan) %>%
summarise(
usd_to_idr = first(usd_to_idr),
index_enso = first(index_enso),
fao_value = first(fao_value),
.groups = "drop"
)
cat("Dimensi data makro per bulan:", dim(data_makro), "\n")
## Dimensi data makro per bulan: 120 5
5. Pivot Data Komoditas
data_pivot <- data_raw %>%
select(
tahun, bulan, komoditas,
harga_eceran,
inflasi_komoditas,
volume_impor_kg,
nilai_impor_usd
) %>%
pivot_wider(
id_cols = c(tahun, bulan),
names_from = komoditas,
values_from = c(
harga_eceran,
inflasi_komoditas,
volume_impor_kg,
nilai_impor_usd
),
names_sep = "_"
)
cat("Dimensi data pivot per bulan:", dim(data_pivot), "\n")
## Dimensi data pivot per bulan: 120 30
6. Gabungkan Data
data_wide <- data_pivot %>%
left_join(data_makro, by = c("tahun", "bulan")) %>%
arrange(tahun, bulan) %>%
mutate(date = make_date(tahun, bulan, 1))
cat("Dimensi data wide FINAL:", dim(data_wide), "\n")
## Dimensi data wide FINAL: 120 34
7. Imputasi Missing Value
data_wide <- data_wide %>%
mutate(across(where(is.numeric),
~ zoo::na.locf(., na.rm = FALSE))) %>%
mutate(across(where(is.numeric),
~ ifelse(is.na(.),
median(., na.rm = TRUE),
.)))
8. Fitur Kalender dan Iklim
ramadan_months <- tibble(
tahun = c(2016,2016, 2017,2017,2017, 2018,2018, 2019,2019,
2020,2020,2020, 2021,2021,2021, 2022,2022,
2023,2023,2023, 2024,2024, 2025,2025),
bulan = c(6,7, 5,6,7, 5,6, 5,6, 4,5,6, 4,5,6, 4,5, 3,4,5, 3,4, 3,4),
ramadan_dummy = 1L
)
data_wide <- data_wide %>%
left_join(ramadan_months, by = c("tahun", "bulan")) %>%
mutate(ramadan_dummy = replace_na(ramadan_dummy, 0L),
el_nino_dummy = ifelse(index_enso > 0.5, 1L, 0L),
panen_padi_dummy = ifelse(bulan %in% c(3,4), 1L, 0L),
panen_bawang_dummy = ifelse(bulan %in% c(6,7), 1L, 0L),
bulan_sin = sin(2*pi*bulan/12),
bulan_cos = cos(2*pi*bulan/12))
9. Lag Harga dan Volume
harga_cols <- grep("^harga_eceran_", names(data_wide), value = TRUE)
vol_cols <- grep("^volume_impor_kg_", names(data_wide), value = TRUE)
for (col in c(harga_cols, vol_cols)) {
data_wide[[paste0("lag1_", col)]] <- lag(data_wide[[col]], 1)
}
10. Harga Impor dalam Rupiah
nilai_cols <- grep("^nilai_impor_usd_", names(data_wide), value = TRUE)
vol_cols_nm <- grep("^volume_impor_kg_", names(data_wide), value = TRUE)
for (i in seq_along(nilai_cols)) {
kom <- gsub("nilai_impor_usd_", "", nilai_cols[i])
data_wide[[paste0("harga_impor_idr_", kom)]] <-
(data_wide[[nilai_cols[i]]] * data_wide$usd_to_idr) / (data_wide[[vol_cols_nm[i]]] + 1e-9)
}
11. Definisi Target Gejolak
bobot_ihk <- c(beras=0.036, cabai=0.012, bawang_merah=0.008,
daging_ayam=0.020, daging_sapi=0.012, gula=0.010, telur=0.018)
bobot_ihk <- bobot_ihk / sum(bobot_ihk)
inflasi_cols <- grep("^inflasi_komoditas_", names(data_wide), value = TRUE)
inflasi_mat <- as.matrix(data_wide[, inflasi_cols])
komoditas_order <- gsub("inflasi_komoditas_", "", inflasi_cols)
bobot_ordered <- bobot_ihk[komoditas_order]
bobot_ordered[is.na(bobot_ordered)] <- mean(bobot_ihk)
bobot_ordered <- bobot_ordered / sum(bobot_ordered)
data_wide$inflasi_tertimbang <- as.numeric(inflasi_mat %*% bobot_ordered)
for (col in harga_cols) {
kom <- gsub("harga_eceran_", "", col)
data_wide[[paste0("harga_change_", kom)]] <-
(data_wide[[col]] - lag(data_wide[[col]])) / (lag(data_wide[[col]]) + 1e-9) * 100
}
spike_cols <- grep("^harga_change_", names(data_wide), value = TRUE)
data_wide$pct_spike <- rowMeans(data_wide[, spike_cols] > 5, na.rm = TRUE) * 100
harga_mat <- as.matrix(data_wide[, harga_cols])
data_wide$harga_tertimbang <- as.numeric(harga_mat %*% bobot_ordered)
threshold_ekstrem <- median(data_wide$harga_tertimbang, na.rm = TRUE) +
1.5 * IQR(data_wide$harga_tertimbang, na.rm = TRUE)
data_wide$status_gejolak <- ifelse(
data_wide$inflasi_tertimbang > 2 |
data_wide$pct_spike > 30 |
data_wide$harga_tertimbang > threshold_ekstrem, 1L, 0L)
data_wide$status_gejolak[1] <- 0L
proporsi_gejolak <- mean(data_wide$status_gejolak)*100
cat("\nProporsi gejolak:", round(proporsi_gejolak,1), "%\n")
##
## Proporsi gejolak: 29.2 %
12. Persiapan Data Model
data_model <- data_wide %>%
arrange(date) %>%
mutate(target = lead(status_gejolak, 1)) %>%
filter(!is.na(target)) %>%
select(-date, -tahun, -bulan, -status_gejolak,
-starts_with("harga_change_"),
-inflasi_tertimbang, -harga_tertimbang, -pct_spike) %>%
mutate(across(everything(), as.numeric))
# --- PERBAIKAN: Hapus fitur impor tidak relevan (telur, daging ayam) -------
fitur_tidak_relevan <- c(
"volume_impor_kg_Telur", "nilai_impor_usd_Telur",
"volume_impor_kg_Daging Ayam", "nilai_impor_usd_Daging Ayam"
)
data_model <- data_model[, !names(data_model) %in% fitur_tidak_relevan]
X <- data_model %>% select(-target)
y <- data_model$target
cat(sprintf("\nData model: %d obs x %d fitur\n", nrow(X), ncol(X)))
##
## Data model: 119 obs x 54 fitur
print(table(y))
## y
## 0 1
## 84 35
13. Fungsi Helper
hitung_prauc <- function(actual, prob) {
if (length(unique(actual)) < 2) return(NA)
pr <- PRROC::pr.curve(scores.class0 = prob[actual==1], scores.class1 = prob[actual==0], curve=FALSE)
pr$auc.integral
}
hitung_f2 <- function(actual, pred) {
tp <- sum(actual==1 & pred==1); fp <- sum(actual==0 & pred==1); fn <- sum(actual==1 & pred==0)
denom <- 5*tp + 4*fn + fp; if(denom==0) return(0); 5*tp/denom
}
evaluasi_threshold <- function(actual, prob, thresholds = seq(0.1,0.7,0.05)) {
map_dfr(thresholds, function(thr) {
pred <- ifelse(prob >= thr, 1L, 0L)
if(length(unique(pred))<2) return(tibble(threshold=thr, recall=NA, precision=NA, specificity=NA, f2=0, f1=NA))
cm <- caret::confusionMatrix(as.factor(pred), as.factor(actual), positive="1")
tibble(threshold=thr, recall=cm$byClass["Sensitivity"], precision=cm$byClass["Pos Pred Value"],
specificity=cm$byClass["Specificity"], f2=hitung_f2(actual, pred), f1=cm$byClass["F1"])
})
}
14. Baseline
cat("\n========== BASELINE ==========\n")
##
## ========== BASELINE ==========
prauc_persist <- hitung_prauc(y[-1], as.numeric(y[-length(y)]))
f2_persist <- hitung_f2(y[-1], y[-length(y)])
cat(sprintf("Persistence | PR-AUC: %.4f | F2: %.4f\n", prauc_persist, f2_persist))
## Persistence | PR-AUC: 0.4506 | F2: 0.5172
rule_pred <- as.numeric(data_wide$el_nino_dummy[-nrow(data_wide)]==1 & data_wide$inflasi_tertimbang[-nrow(data_wide)]>1)
prauc_rule <- hitung_prauc(y, rule_pred)
f2_rule <- hitung_f2(y, rule_pred)
cat(sprintf("Rule-based | PR-AUC: %.4f | F2: %.4f\n", prauc_rule, f2_rule))
## Rule-based | PR-AUC: 0.2926 | F2: 0.0680
15. Elastic Net
cat("\n========== ELASTIC NET ==========\n")
##
## ========== ELASTIC NET ==========
window_size <- 60
n <- nrow(X)
pred_en <- rep(NA_real_, n - window_size)
actual_en <- rep(NA_real_, n - window_size)
success <- 0
for (i in seq(window_size, n-1)) {
idx <- i - window_size + 1
X_train <- as.matrix(X[1:i, ])
y_train <- y[1:i]
X_test <- as.matrix(X[i+1, , drop=FALSE])
y_test <- y[i+1]
actual_en[idx] <- y_test
na_rows <- which(apply(X_train, 1, anyNA))
if (length(na_rows) > 0) {
X_train <- X_train[-na_rows, , drop=FALSE]
y_train <- y_train[-na_rows]
}
if (length(unique(y_train)) < 2) next
if (nrow(X_train) < 10) next
vars <- apply(X_train, 2, var, na.rm=TRUE)
keep <- which(vars > 0 & !is.na(vars))
if (length(keep) < 3) next
X_train <- X_train[, keep, drop=FALSE]
X_test <- X_test[, keep, drop=FALSE]
if (anyNA(X_test)) next
means <- colMeans(X_train)
sds <- apply(X_train, 2, sd); sds[sds==0] <- 1
X_tr_s <- scale(X_train, center=means, scale=sds)
X_te_s <- matrix((X_test - means)/sds, nrow=1)
set.seed(42)
en_cv <- tryCatch(
cv.glmnet(X_tr_s, y_train, family="binomial", alpha=0.5, nfolds=min(5, length(y_train)), type.measure="deviance"),
error=function(e) NULL
)
if (!is.null(en_cv)) {
prob <- predict(en_cv, newx=X_te_s, s="lambda.1se", type="response")[1,1]
pred_en[idx] <- prob; success <- success+1
} else {
en_fit <- tryCatch(glmnet(X_tr_s, y_train, family="binomial", alpha=0.5), error=function(e) NULL)
if (!is.null(en_fit)) {
lam <- en_fit$lambda[ceiling(length(en_fit$lambda)/2)]
prob <- predict(en_fit, newx=X_te_s, s=lam, type="response")[1,1]
pred_en[idx] <- prob; success <- success+1
}
}
if (idx %% 50 == 0) cat(sprintf("Iterasi %d/%d, sukses: %d\n", idx, n-window_size, success))
}
## Warning in lognet(x, is.sparse, y, weights, offset, alpha, nobs, nvars, : one
## multinomial or binomial class has fewer than 8 observations; dangerous ground
## Warning in lognet(x, is.sparse, y, weights, offset, alpha, nobs, nvars, : one
## multinomial or binomial class has fewer than 8 observations; dangerous ground
## Warning in lognet(x, is.sparse, y, weights, offset, alpha, nobs, nvars, : one
## multinomial or binomial class has fewer than 8 observations; dangerous ground
## Warning in lognet(x, is.sparse, y, weights, offset, alpha, nobs, nvars, : one
## multinomial or binomial class has fewer than 8 observations; dangerous ground
## Iterasi 50/59, sukses: 50
valid <- !is.na(pred_en) & !is.na(actual_en)
pred_en <- pred_en[valid]; actual_en <- actual_en[valid]
cat(sprintf("\nPrediksi Elastic Net berhasil: %d dari %d iterasi (%.1f%%)\n", length(pred_en), n-window_size, 100*length(pred_en)/(n-window_size)))
##
## Prediksi Elastic Net berhasil: 59 dari 59 iterasi (100.0%)
if (length(pred_en)==0) stop("Elastic Net gagal total.")
if (length(unique(actual_en))<2) stop("Hanya satu kelas pada actual.")
prauc_en <- hitung_prauc(actual_en, pred_en)
roc_en <- pROC::roc(actual_en, pred_en, levels=c(0,1), direction="<", quiet=TRUE)
auc_en <- as.numeric(pROC::auc(roc_en))
cat(sprintf("PR-AUC: %.4f | ROC-AUC: %.4f\n", prauc_en, auc_en))
## PR-AUC: 0.7286 | ROC-AUC: 0.7012
thr_eval_en <- evaluasi_threshold(actual_en, pred_en)
best_idx_en <- which.max(thr_eval_en$f2)
best_thr_en <- thr_eval_en$threshold[best_idx_en]
cat(sprintf("Threshold optimal (F2): %.2f | Recall: %.3f | Precision: %.3f | F2: %.4f\n",
best_thr_en, thr_eval_en$recall[best_idx_en], thr_eval_en$precision[best_idx_en], thr_eval_en$f2[best_idx_en]))
## Threshold optimal (F2): 0.15 | Recall: 1.000 | Precision: 0.414 | F2: 0.7792
16. Cost-Benefit Analysis
# Asumsi: biaya false negative (gejolak tidak terdeteksi) = 5x biaya false positive
biaya_FN <- 5
biaya_FP <- 1
cost_df <- thr_eval_en %>%
mutate(
FN_rate = 1 - recall,
FP_rate = 1 - specificity,
n_actual_1 = sum(actual_en == 1),
n_actual_0 = sum(actual_en == 0),
total_biaya = (FN_rate * n_actual_1 * biaya_FN) + (FP_rate * n_actual_0 * biaya_FP)
)
best_cost_idx <- which.min(cost_df$total_biaya)
best_cost_thr <- cost_df$threshold[best_cost_idx]
cat(sprintf("\nThreshold optimal berdasarkan cost (FN=5xFP): %.2f\n", best_cost_thr))
##
## Threshold optimal berdasarkan cost (FN=5xFP): 0.15
cat(sprintf(" Recall: %.3f | Precision: %.3f | F2: %.4f\n",
cost_df$recall[best_cost_idx], cost_df$precision[best_cost_idx], cost_df$f2[best_cost_idx]))
## Recall: 1.000 | Precision: 0.414 | F2: 0.7792
p_cost <- ggplot(cost_df, aes(x = threshold, y = total_biaya)) +
geom_line(color = "purple", linewidth = 1) +
geom_point(size = 2) +
geom_vline(xintercept = best_cost_thr, linetype = "dotted", color = "gray40") +
labs(title = "Total Biaya Kesalahan vs Threshold",
subtitle = sprintf("Minimum biaya pada threshold = %.2f", best_cost_thr),
x = "Threshold", y = "Estimasi Total Biaya") +
theme_minimal()
print(p_cost)
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).

17. Decision Tree
cat("\n========== DECISION TREE (rpart) ==========\n")
##
## ========== DECISION TREE (rpart) ==========
pred_dt <- rep(NA_real_, n - window_size)
actual_dt <- rep(NA_real_, n - window_size)
success_dt <- 0
for (i in seq(window_size, n-1)) {
idx <- i - window_size + 1
X_train <- as.data.frame(X[1:i, ])
y_train <- y[1:i]
X_test <- as.data.frame(X[i+1, , drop=FALSE])
y_test <- y[i+1]
actual_dt[idx] <- y_test
na_rows <- which(apply(X_train, 1, anyNA))
if (length(na_rows) > 0) {
X_train <- X_train[-na_rows, , drop=FALSE]
y_train <- y_train[-na_rows]
}
if (length(unique(y_train)) < 2) next
if (nrow(X_train) < 10) next
vars <- apply(X_train, 2, var, na.rm=TRUE)
keep <- which(vars > 0 & !is.na(vars))
if (length(keep) < 2) next
X_train <- X_train[, keep, drop=FALSE]
X_test <- X_test[, keep, drop=FALSE]
if (anyNA(X_test)) next
set.seed(42)
dt_model <- tryCatch(
rpart(y_train ~ ., data = cbind(y_train, X_train), method = "class",
control = rpart.control(maxdepth=3, minsplit=20, cp=0.01)),
error = function(e) NULL
)
if (!is.null(dt_model)) {
prob <- predict(dt_model, newdata = X_test, type = "prob")[, "1"]
if (length(prob) == 0) prob <- 0.5
pred_dt[idx] <- prob
success_dt <- success_dt + 1
}
if (idx %% 50 == 0) cat(sprintf("Iterasi %d/%d, sukses: %d\n", idx, n-window_size, success_dt))
}
## Iterasi 50/59, sukses: 50
valid_dt <- !is.na(pred_dt) & !is.na(actual_dt)
pred_dt <- pred_dt[valid_dt]; actual_dt <- actual_dt[valid_dt]
cat(sprintf("\nPrediksi Decision Tree berhasil: %d dari %d iterasi (%.1f%%)\n",
length(pred_dt), n-window_size, 100*length(pred_dt)/(n-window_size)))
##
## Prediksi Decision Tree berhasil: 59 dari 59 iterasi (100.0%)
if (length(pred_dt) > 0 && length(unique(actual_dt)) >= 2) {
prauc_dt <- hitung_prauc(actual_dt, pred_dt)
roc_dt <- pROC::roc(actual_dt, pred_dt, levels=c(0,1), direction="<", quiet=TRUE)
auc_dt <- as.numeric(pROC::auc(roc_dt))
cat(sprintf("PR-AUC: %.4f | ROC-AUC: %.4f\n", prauc_dt, auc_dt))
thr_eval_dt <- evaluasi_threshold(actual_dt, pred_dt)
best_idx_dt <- which.max(thr_eval_dt$f2)
best_thr_dt <- thr_eval_dt$threshold[best_idx_dt]
cat(sprintf("Threshold optimal (F2): %.2f | Recall: %.3f | Precision: %.3f | F2: %.4f\n",
best_thr_dt, thr_eval_dt$recall[best_idx_dt], thr_eval_dt$precision[best_idx_dt], thr_eval_dt$f2[best_idx_dt]))
}
## PR-AUC: 0.5986 | ROC-AUC: 0.5970
## Threshold optimal (F2): 0.10 | Recall: 0.875 | Precision: 0.420 | F2: 0.7192
18. Visualisasi
# Plot status gejolak
p_status <- ggplot(data_wide, aes(x = date, y = status_gejolak)) +
geom_line(color = "gray60", linewidth = 0.6) +
geom_point(aes(color = factor(status_gejolak)), size = 2) +
scale_color_manual(values = c("0" = "#22c55e", "1" = "#ef4444"),
labels = c("Aman", "Gejolak")) +
labs(title = "Status Gejolak Harga Pangan Nasional (2016-2025)",
x = "Tanggal", y = "Status", color = NULL) +
theme(legend.position = "bottom")
print(p_status)

# PR Curve perbandingan
if (exists("prauc_en") && exists("prauc_dt")) {
pr_en_curve <- PRROC::pr.curve(scores.class0 = pred_en[actual_en==1], scores.class1 = pred_en[actual_en==0], curve=TRUE)
pr_dt_curve <- PRROC::pr.curve(scores.class0 = pred_dt[actual_dt==1], scores.class1 = pred_dt[actual_dt==0], curve=TRUE)
pr_df_en <- as.data.frame(pr_en_curve$curve); names(pr_df_en) <- c("recall", "precision", "threshold")
pr_df_dt <- as.data.frame(pr_dt_curve$curve); names(pr_df_dt) <- c("recall", "precision", "threshold")
pr_df_en$model <- "Elastic Net"
pr_df_dt$model <- "Decision Tree"
pr_comb <- rbind(pr_df_en, pr_df_dt)
p_pr_comb <- ggplot(pr_comb, aes(x = recall, y = precision, color = model)) +
geom_line(linewidth = 1) +
geom_hline(yintercept = mean(y), linetype = "dashed", color = "gray50") +
scale_color_manual(values = c("Elastic Net" = "#3B82F6", "Decision Tree" = "#22C55E")) +
labs(title = "Perbandingan Precision-Recall Curve",
subtitle = sprintf("PR-AUC | EN: %.4f | DT: %.4f", prauc_en, prauc_dt),
x = "Recall", y = "Precision", color = NULL) +
theme(legend.position = "bottom")
print(p_pr_comb)
}

# Trade-off Elastic Net
p_trade_en <- thr_eval_en %>%
select(threshold, recall, precision, f2) %>%
pivot_longer(-threshold, names_to = "metrik", values_to = "nilai") %>%
ggplot(aes(x = threshold, y = nilai, color = metrik)) +
geom_line(linewidth = 1) +
geom_vline(xintercept = best_thr_en, linetype = "dotted", color = "gray40") +
scale_color_manual(values = c("recall" = "#3B82F6", "precision" = "#EF4444", "f2" = "#22C55E"),
labels = c("f2" = "F2-score", "precision" = "Precision", "recall" = "Recall")) +
labs(title = "Trade-off Recall vs Precision (Elastic Net)",
subtitle = sprintf("Threshold optimal (F2) = %.2f", best_thr_en),
x = "Threshold Probabilitas", y = "Nilai Metrik", color = NULL) +
theme(legend.position = "bottom")
print(p_trade_en)
## Warning: Removed 10 rows containing missing values or values outside the scale range
## (`geom_line()`).

# Trade-off Decision Tree
if (exists("thr_eval_dt")) {
p_trade_dt <- thr_eval_dt %>%
select(threshold, recall, precision, f2) %>%
pivot_longer(-threshold, names_to = "metrik", values_to = "nilai") %>%
ggplot(aes(x = threshold, y = nilai, color = metrik)) +
geom_line(linewidth = 1) +
geom_vline(xintercept = best_thr_dt, linetype = "dotted", color = "gray40") +
scale_color_manual(values = c("recall" = "#3B82F6", "precision" = "#EF4444", "f2" = "#22C55E"),
labels = c("f2" = "F2-score", "precision" = "Precision", "recall" = "Recall")) +
labs(title = "Trade-off Recall vs Precision (Decision Tree)",
subtitle = sprintf("Threshold optimal (F2) = %.2f", best_thr_dt),
x = "Threshold Probabilitas", y = "Nilai Metrik", color = NULL) +
theme(legend.position = "bottom")
print(p_trade_dt)
}

# ROC Curve perbandingan
if (exists("roc_dt")) {
roc_compare <- ggroc(list("Elastic Net" = roc_en, "Decision Tree" = roc_dt), size = 1) +
geom_abline(slope = 1, intercept = 1, linetype = "dashed", color = "gray50") +
scale_color_manual(values = c("Elastic Net" = "#3B82F6", "Decision Tree" = "#22C55E")) +
labs(title = "Kurva ROC: Elastic Net vs Decision Tree",
subtitle = sprintf("ROC-AUC | EN: %.3f | DT: %.3f", auc_en, auc_dt),
x = "1 - Specificity", y = "Sensitivity", color = NULL) +
theme(legend.position = "bottom")
print(roc_compare)
}
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## ℹ The deprecated feature was likely used in the pROC package.
## Please report the issue at <https://github.com/xrobin/pROC/issues>.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

19. Interpretasi Model
# a) Koefisien Elastic Net (model final)
X_full <- as.matrix(X); y_full <- y
na_rows_full <- which(apply(X_full, 1, anyNA))
if (length(na_rows_full) > 0) {
X_full <- X_full[-na_rows_full, , drop=FALSE]; y_full <- y_full[-na_rows_full]
}
vars_full <- apply(X_full, 2, var, na.rm=TRUE)
keep_full <- which(vars_full > 0 & !is.na(vars_full))
X_full <- X_full[, keep_full, drop=FALSE]
means_f <- colMeans(X_full); sds_f <- apply(X_full, 2, sd); sds_f[sds_f==0] <- 1
X_scaled <- scale(X_full, center=means_f, scale=sds_f)
set.seed(42)
final_cv <- cv.glmnet(X_scaled, y_full, family="binomial", alpha=0.5, nfolds=5)
coef_full <- coef(final_cv, s = "lambda.1se")
coef_df <- data.frame(fitur = rownames(coef_full), koef = as.numeric(coef_full)) %>%
filter(fitur != "(Intercept)", koef != 0) %>% arrange(desc(abs(koef)))
cat("\n========== KOEFISIEN ELASTIC NET (FINAL) ==========\n")
##
## ========== KOEFISIEN ELASTIC NET (FINAL) ==========
cat(sprintf("Fitur aktif (koef ≠ 0): %d dari %d\n", nrow(coef_df), ncol(X)))
## Fitur aktif (koef ≠ 0): 8 dari 54
print(head(coef_df, 15))
## fitur koef
## 1 harga_eceran_Beras 0.1788878658
## 2 lag1_harga_eceran_Beras 0.1556201540
## 3 lag1_volume_impor_kg_Bawang Merah 0.1005617838
## 4 bulan_cos 0.0918205975
## 5 harga_eceran_Gula 0.0822243240
## 6 lag1_harga_eceran_Gula 0.0389842422
## 7 harga_eceran_Daging Sapi 0.0180703149
## 8 harga_eceran_Daging Ayam 0.0005992081
# Plot koefisien harga eceran
coef_harga <- coef_df %>% filter(grepl("harga_eceran_", fitur)) %>%
mutate(komoditas = gsub("harga_eceran_", "", fitur),
arah = ifelse(koef > 0, "Positif", "Negatif"))
if (nrow(coef_harga) > 0) {
p_coef <- ggplot(coef_harga, aes(x = reorder(komoditas, abs(koef)), y = koef, fill = arah)) +
geom_bar(stat = "identity") + coord_flip() +
scale_fill_manual(values = c("Positif" = "#EF4444", "Negatif" = "#3B82F6")) +
labs(title = "Koefisien Elastic Net: Harga Eceran per Komoditas",
x = "Komoditas", y = "Koefisien", fill = NULL) +
theme(legend.position = "bottom")
print(p_coef)
}

# b) Variable importance Decision Tree
X_train_dt <- X[complete.cases(X), ]
y_train_dt <- y[complete.cases(X)]
dt_final <- rpart(y_train_dt ~ ., data = cbind(y_train_dt, X_train_dt), method = "class",
control = rpart.control(maxdepth=3, minsplit=20, cp=0.01))
cat("\n========== VARIABLE IMPORTANCE DECISION TREE ==========\n")
##
## ========== VARIABLE IMPORTANCE DECISION TREE ==========
print(dt_final$variable.importance)
## harga_eceran_Beras harga_eceran_Daging Ayam
## 10.8113622 10.8113622
## harga_eceran_Daging Sapi harga_eceran_Gula
## 10.8113622 10.8113622
## harga_eceran_Telur lag1_harga_eceran_Beras
## 10.8113622 9.7302260
## bulan_cos lag1_volume_impor_kg_Bawang Merah
## 8.1814815 3.6362140
## nilai_impor_usd_Gula nilai_impor_usd_Bawang Merah
## 2.7676802 1.3635802
## volume_impor_kg_Bawang Merah volume_impor_kg_Cabai
## 1.3635802 1.3635802
## lag1_volume_impor_kg_Daging Sapi volume_impor_kg_Beras
## 1.1861487 1.1861487
## inflasi_komoditas_Bawang Merah inflasi_komoditas_Gula
## 0.9090535 0.7907658
## nilai_impor_usd_Daging Sapi volume_impor_kg_Daging Sapi
## 0.7907658 0.7907658
rpart.plot(dt_final, main = "Decision Tree Final (maxdepth=3)", extra = 101, under = TRUE, fallen.leaves = TRUE)

20. Prediksi Bulan Depan
last_row <- as.matrix(X[nrow(X), keep_full, drop=FALSE])
last_scaled <- matrix((last_row - means_f) / sds_f, nrow=1)
prob_next <- predict(final_cv, newx=last_scaled, s="lambda.1se", type="response")[1,1]
bulan_depan <- tail(data_wide$date, 1) + months(1)
# Gunakan threshold berbasis cost untuk rekomendasi final
thresh_final <- best_cost_thr
thresh_siaga_final <- thresh_final * 0.6
status_final <- case_when(
prob_next >= thresh_final ~ "WASPADA",
prob_next >= thresh_siaga_final ~ "SIAGA",
TRUE ~ "AMAN"
)
cat(sprintf("\nPrediksi %s: prob=%.3f -> %s (threshold cost=%.2f)\n",
format(bulan_depan, "%B %Y"), prob_next, status_final, thresh_final))
##
## Prediksi Januari 2026: prob=0.663 -> WASPADA (threshold cost=0.15)
21. Ringkasan Eksekutif
cat("\n========== RINGKASAN EKSEKUTIF (DENGAN KOREKSI) ==========\n")
##
## ========== RINGKASAN EKSEKUTIF (DENGAN KOREKSI) ==========
cat(sprintf("Total data bulan: %d | Observasi model: %d\n", nrow(data_wide), nrow(X)))
## Total data bulan: 120 | Observasi model: 119
cat(sprintf("Proporsi gejolak dalam data: %.1f%%\n", proporsi_gejolak))
## Proporsi gejolak dalam data: 29.2%
cat(sprintf("Elastic Net | PR-AUC: %.4f | ROC-AUC: %.4f\n", prauc_en, auc_en))
## Elastic Net | PR-AUC: 0.7286 | ROC-AUC: 0.7012
cat(sprintf("Threshold optimal F2: %.2f (Recall=%.3f, Precision=%.3f)\n",
best_thr_en, thr_eval_en$recall[best_idx_en], thr_eval_en$precision[best_idx_en]))
## Threshold optimal F2: 0.15 (Recall=1.000, Precision=0.414)
cat(sprintf("Threshold optimal cost (FN=5xFP): %.2f (Recall=%.3f, Precision=%.3f)\n",
best_cost_thr, cost_df$recall[best_cost_idx], cost_df$precision[best_cost_idx]))
## Threshold optimal cost (FN=5xFP): 0.15 (Recall=1.000, Precision=0.414)
cat(sprintf("Prediksi %s: prob=%.3f -> %s\n", format(bulan_depan, "%B %Y"), prob_next, status_final))
## Prediksi Januari 2026: prob=0.663 -> WASPADA
cat("\nKeterbatasan yang diakui:\n")
##
## Keterbatasan yang diakui:
cat("- Ukuran sampel kecil (119 bulan) → recall 100% rapuh, perlu validasi lebih lanjut\n")
## - Ukuran sampel kecil (119 bulan) → recall 100% rapuh, perlu validasi lebih lanjut
cat("- Multikolinieritas harga komoditas menyebabkan Decision Tree tidak diskriminatif\n")
## - Multikolinieritas harga komoditas menyebabkan Decision Tree tidak diskriminatif
cat("- Threshold 0,10 tidak realistis secara ekonomi; gunakan threshold berbasis cost (%.2f)\n", best_cost_thr)
## - Threshold 0,10 tidak realistis secara ekonomi; gunakan threshold berbasis cost (%.2f)
## 0.15
cat("- Fitur impor telur dan daging ayam dihapus karena tidak relevan secara substansi\n")
## - Fitur impor telur dan daging ayam dihapus karena tidak relevan secara substansi
cat("=========================================\n")
## =========================================