Sistem Deteksi Dini Gejolak Harga Pangan Multidimensi

1. Setup

packages <- c(
  "tidyverse", "lubridate", "zoo", "caret",
  "glmnet", "pROC", "PRROC", "janitor", "readxl",
  "rpart", "rpart.plot"
)

for (pkg in packages) {
  if (!require(pkg, character.only = TRUE, quietly = TRUE)) {
    install.packages(pkg, dependencies = TRUE)
    library(pkg, character.only = TRUE)
  }
}

select  <- dplyr::select
filter  <- dplyr::filter
arrange <- dplyr::arrange

theme_set(theme_minimal() + theme(
  plot.title    = element_text(face = "bold", size = 13, hjust = 0.5),
  plot.subtitle = element_text(hjust = 0.5, size = 10, color = "gray40"),
  axis.text.x   = element_text(angle = 45, hjust = 1)
))

2. Import Data

file_path <- "~/SEMESTER 6/Satria Data_Data SEC5.xlsx"

if (!file.exists(file_path)) stop("File tidak ditemukan.")

data_raw <- read_excel(
  file_path,
  sheet = "dataanalisis"
) %>%
  clean_names()

cat("Dimensi awal:", dim(data_raw), "\n")
## Dimensi awal: 840 11

3. Deduplikasi

data_raw <- data_raw %>%
  group_by(tahun, bulan, komoditas) %>%
  summarise(
    harga_eceran = median(harga_eceran, na.rm = TRUE),
    inflasi_komoditas = median(inflasi_komoditas, na.rm = TRUE),
    volume_impor_kg = median(volume_impor_kg, na.rm = TRUE),
    nilai_impor_usd = median(nilai_impor_usd, na.rm = TRUE),
    usd_to_idr = median(usd_to_idr, na.rm = TRUE),
    index_enso = median(index_enso, na.rm = TRUE),
    fao_value = median(fao_value, na.rm = TRUE),
    .groups = "drop"
  )

cat("Dimensi setelah deduplikasi:", dim(data_raw), "\n")
## Dimensi setelah deduplikasi: 840 10

4. Data Makro Bulanan

data_makro <- data_raw %>%
  group_by(tahun, bulan) %>%
  summarise(
    usd_to_idr = first(usd_to_idr),
    index_enso = first(index_enso),
    fao_value = first(fao_value),
    .groups = "drop"
  )

cat("Dimensi data makro per bulan:", dim(data_makro), "\n")
## Dimensi data makro per bulan: 120 5

5. Pivot Data Komoditas

data_pivot <- data_raw %>%
  select(
    tahun, bulan, komoditas,
    harga_eceran,
    inflasi_komoditas,
    volume_impor_kg,
    nilai_impor_usd
  ) %>%
  pivot_wider(
    id_cols = c(tahun, bulan),
    names_from = komoditas,
    values_from = c(
      harga_eceran,
      inflasi_komoditas,
      volume_impor_kg,
      nilai_impor_usd
    ),
    names_sep = "_"
  )

cat("Dimensi data pivot per bulan:", dim(data_pivot), "\n")
## Dimensi data pivot per bulan: 120 30

6. Gabungkan Data

data_wide <- data_pivot %>%
  left_join(data_makro, by = c("tahun", "bulan")) %>%
  arrange(tahun, bulan) %>%
  mutate(date = make_date(tahun, bulan, 1))

cat("Dimensi data wide FINAL:", dim(data_wide), "\n")
## Dimensi data wide FINAL: 120 34

7. Imputasi Missing Value

data_wide <- data_wide %>%
  mutate(across(where(is.numeric),
                ~ zoo::na.locf(., na.rm = FALSE))) %>%
  mutate(across(where(is.numeric),
                ~ ifelse(is.na(.),
                         median(., na.rm = TRUE),
                         .)))

8. Fitur Kalender dan Iklim

ramadan_months <- tibble(
  tahun = c(2016,2016, 2017,2017,2017, 2018,2018, 2019,2019,
            2020,2020,2020, 2021,2021,2021, 2022,2022,
            2023,2023,2023, 2024,2024, 2025,2025),
  bulan = c(6,7, 5,6,7, 5,6, 5,6, 4,5,6, 4,5,6, 4,5, 3,4,5, 3,4, 3,4),
  ramadan_dummy = 1L
)
data_wide <- data_wide %>%
  left_join(ramadan_months, by = c("tahun", "bulan")) %>%
  mutate(ramadan_dummy = replace_na(ramadan_dummy, 0L),
         el_nino_dummy = ifelse(index_enso > 0.5, 1L, 0L),
         panen_padi_dummy = ifelse(bulan %in% c(3,4), 1L, 0L),
         panen_bawang_dummy = ifelse(bulan %in% c(6,7), 1L, 0L),
         bulan_sin = sin(2*pi*bulan/12),
         bulan_cos = cos(2*pi*bulan/12))

9. Lag Harga dan Volume

harga_cols <- grep("^harga_eceran_", names(data_wide), value = TRUE)
vol_cols   <- grep("^volume_impor_kg_", names(data_wide), value = TRUE)
for (col in c(harga_cols, vol_cols)) {
  data_wide[[paste0("lag1_", col)]] <- lag(data_wide[[col]], 1)
}

10. Harga Impor dalam Rupiah

nilai_cols <- grep("^nilai_impor_usd_", names(data_wide), value = TRUE)
vol_cols_nm <- grep("^volume_impor_kg_", names(data_wide), value = TRUE)
for (i in seq_along(nilai_cols)) {
  kom <- gsub("nilai_impor_usd_", "", nilai_cols[i])
  data_wide[[paste0("harga_impor_idr_", kom)]] <-
    (data_wide[[nilai_cols[i]]] * data_wide$usd_to_idr) / (data_wide[[vol_cols_nm[i]]] + 1e-9)
}

11. Definisi Target Gejolak

bobot_ihk <- c(beras=0.036, cabai=0.012, bawang_merah=0.008,
               daging_ayam=0.020, daging_sapi=0.012, gula=0.010, telur=0.018)
bobot_ihk <- bobot_ihk / sum(bobot_ihk)
inflasi_cols <- grep("^inflasi_komoditas_", names(data_wide), value = TRUE)
inflasi_mat <- as.matrix(data_wide[, inflasi_cols])
komoditas_order <- gsub("inflasi_komoditas_", "", inflasi_cols)
bobot_ordered <- bobot_ihk[komoditas_order]
bobot_ordered[is.na(bobot_ordered)] <- mean(bobot_ihk)
bobot_ordered <- bobot_ordered / sum(bobot_ordered)
data_wide$inflasi_tertimbang <- as.numeric(inflasi_mat %*% bobot_ordered)

for (col in harga_cols) {
  kom <- gsub("harga_eceran_", "", col)
  data_wide[[paste0("harga_change_", kom)]] <-
    (data_wide[[col]] - lag(data_wide[[col]])) / (lag(data_wide[[col]]) + 1e-9) * 100
}
spike_cols <- grep("^harga_change_", names(data_wide), value = TRUE)
data_wide$pct_spike <- rowMeans(data_wide[, spike_cols] > 5, na.rm = TRUE) * 100

harga_mat <- as.matrix(data_wide[, harga_cols])
data_wide$harga_tertimbang <- as.numeric(harga_mat %*% bobot_ordered)
threshold_ekstrem <- median(data_wide$harga_tertimbang, na.rm = TRUE) +
  1.5 * IQR(data_wide$harga_tertimbang, na.rm = TRUE)

data_wide$status_gejolak <- ifelse(
  data_wide$inflasi_tertimbang > 2 |
    data_wide$pct_spike > 30 |
    data_wide$harga_tertimbang > threshold_ekstrem, 1L, 0L)
data_wide$status_gejolak[1] <- 0L

proporsi_gejolak <- mean(data_wide$status_gejolak)*100
cat("\nProporsi gejolak:", round(proporsi_gejolak,1), "%\n")
## 
## Proporsi gejolak: 29.2 %

12. Persiapan Data Model

data_model <- data_wide %>%
  arrange(date) %>%
  mutate(target = lead(status_gejolak, 1)) %>%
  filter(!is.na(target)) %>%
  select(-date, -tahun, -bulan, -status_gejolak,
         -starts_with("harga_change_"),
         -inflasi_tertimbang, -harga_tertimbang, -pct_spike) %>%
  mutate(across(everything(), as.numeric))

# --- PERBAIKAN: Hapus fitur impor tidak relevan (telur, daging ayam) -------
fitur_tidak_relevan <- c(
  "volume_impor_kg_Telur", "nilai_impor_usd_Telur",
  "volume_impor_kg_Daging Ayam", "nilai_impor_usd_Daging Ayam"
)
data_model <- data_model[, !names(data_model) %in% fitur_tidak_relevan]

X <- data_model %>% select(-target)
y <- data_model$target
cat(sprintf("\nData model: %d obs x %d fitur\n", nrow(X), ncol(X)))
## 
## Data model: 119 obs x 54 fitur
print(table(y))
## y
##  0  1 
## 84 35

13. Fungsi Helper

hitung_prauc <- function(actual, prob) {
  if (length(unique(actual)) < 2) return(NA)
  pr <- PRROC::pr.curve(scores.class0 = prob[actual==1], scores.class1 = prob[actual==0], curve=FALSE)
  pr$auc.integral
}
hitung_f2 <- function(actual, pred) {
  tp <- sum(actual==1 & pred==1); fp <- sum(actual==0 & pred==1); fn <- sum(actual==1 & pred==0)
  denom <- 5*tp + 4*fn + fp; if(denom==0) return(0); 5*tp/denom
}
evaluasi_threshold <- function(actual, prob, thresholds = seq(0.1,0.7,0.05)) {
  map_dfr(thresholds, function(thr) {
    pred <- ifelse(prob >= thr, 1L, 0L)
    if(length(unique(pred))<2) return(tibble(threshold=thr, recall=NA, precision=NA, specificity=NA, f2=0, f1=NA))
    cm <- caret::confusionMatrix(as.factor(pred), as.factor(actual), positive="1")
    tibble(threshold=thr, recall=cm$byClass["Sensitivity"], precision=cm$byClass["Pos Pred Value"],
           specificity=cm$byClass["Specificity"], f2=hitung_f2(actual, pred), f1=cm$byClass["F1"])
  })
}

14. Baseline

cat("\n========== BASELINE ==========\n")
## 
## ========== BASELINE ==========
prauc_persist <- hitung_prauc(y[-1], as.numeric(y[-length(y)]))
f2_persist <- hitung_f2(y[-1], y[-length(y)])
cat(sprintf("Persistence | PR-AUC: %.4f | F2: %.4f\n", prauc_persist, f2_persist))
## Persistence | PR-AUC: 0.4506 | F2: 0.5172
rule_pred <- as.numeric(data_wide$el_nino_dummy[-nrow(data_wide)]==1 & data_wide$inflasi_tertimbang[-nrow(data_wide)]>1)
prauc_rule <- hitung_prauc(y, rule_pred)
f2_rule <- hitung_f2(y, rule_pred)
cat(sprintf("Rule-based  | PR-AUC: %.4f | F2: %.4f\n", prauc_rule, f2_rule))
## Rule-based  | PR-AUC: 0.2926 | F2: 0.0680

15. Elastic Net

cat("\n========== ELASTIC NET ==========\n")
## 
## ========== ELASTIC NET ==========
window_size <- 60
n <- nrow(X)
pred_en <- rep(NA_real_, n - window_size)
actual_en <- rep(NA_real_, n - window_size)
success <- 0

for (i in seq(window_size, n-1)) {
  idx <- i - window_size + 1
  X_train <- as.matrix(X[1:i, ])
  y_train <- y[1:i]
  X_test  <- as.matrix(X[i+1, , drop=FALSE])
  y_test  <- y[i+1]
  actual_en[idx] <- y_test
  
  na_rows <- which(apply(X_train, 1, anyNA))
  if (length(na_rows) > 0) {
    X_train <- X_train[-na_rows, , drop=FALSE]
    y_train <- y_train[-na_rows]
  }
  if (length(unique(y_train)) < 2) next
  if (nrow(X_train) < 10) next
  
  vars <- apply(X_train, 2, var, na.rm=TRUE)
  keep <- which(vars > 0 & !is.na(vars))
  if (length(keep) < 3) next
  X_train <- X_train[, keep, drop=FALSE]
  X_test  <- X_test[,  keep, drop=FALSE]
  if (anyNA(X_test)) next
  
  means <- colMeans(X_train)
  sds <- apply(X_train, 2, sd); sds[sds==0] <- 1
  X_tr_s <- scale(X_train, center=means, scale=sds)
  X_te_s <- matrix((X_test - means)/sds, nrow=1)
  
  set.seed(42)
  en_cv <- tryCatch(
    cv.glmnet(X_tr_s, y_train, family="binomial", alpha=0.5, nfolds=min(5, length(y_train)), type.measure="deviance"),
    error=function(e) NULL
  )
  if (!is.null(en_cv)) {
    prob <- predict(en_cv, newx=X_te_s, s="lambda.1se", type="response")[1,1]
    pred_en[idx] <- prob; success <- success+1
  } else {
    en_fit <- tryCatch(glmnet(X_tr_s, y_train, family="binomial", alpha=0.5), error=function(e) NULL)
    if (!is.null(en_fit)) {
      lam <- en_fit$lambda[ceiling(length(en_fit$lambda)/2)]
      prob <- predict(en_fit, newx=X_te_s, s=lam, type="response")[1,1]
      pred_en[idx] <- prob; success <- success+1
    }
  }
  if (idx %% 50 == 0) cat(sprintf("Iterasi %d/%d, sukses: %d\n", idx, n-window_size, success))
}
## Warning in lognet(x, is.sparse, y, weights, offset, alpha, nobs, nvars, : one
## multinomial or binomial class has fewer than 8 observations; dangerous ground
## Warning in lognet(x, is.sparse, y, weights, offset, alpha, nobs, nvars, : one
## multinomial or binomial class has fewer than 8 observations; dangerous ground
## Warning in lognet(x, is.sparse, y, weights, offset, alpha, nobs, nvars, : one
## multinomial or binomial class has fewer than 8 observations; dangerous ground
## Warning in lognet(x, is.sparse, y, weights, offset, alpha, nobs, nvars, : one
## multinomial or binomial class has fewer than 8 observations; dangerous ground
## Iterasi 50/59, sukses: 50
valid <- !is.na(pred_en) & !is.na(actual_en)
pred_en <- pred_en[valid]; actual_en <- actual_en[valid]
cat(sprintf("\nPrediksi Elastic Net berhasil: %d dari %d iterasi (%.1f%%)\n", length(pred_en), n-window_size, 100*length(pred_en)/(n-window_size)))
## 
## Prediksi Elastic Net berhasil: 59 dari 59 iterasi (100.0%)
if (length(pred_en)==0) stop("Elastic Net gagal total.")
if (length(unique(actual_en))<2) stop("Hanya satu kelas pada actual.")

prauc_en <- hitung_prauc(actual_en, pred_en)
roc_en <- pROC::roc(actual_en, pred_en, levels=c(0,1), direction="<", quiet=TRUE)
auc_en <- as.numeric(pROC::auc(roc_en))
cat(sprintf("PR-AUC: %.4f | ROC-AUC: %.4f\n", prauc_en, auc_en))
## PR-AUC: 0.7286 | ROC-AUC: 0.7012
thr_eval_en <- evaluasi_threshold(actual_en, pred_en)
best_idx_en <- which.max(thr_eval_en$f2)
best_thr_en <- thr_eval_en$threshold[best_idx_en]
cat(sprintf("Threshold optimal (F2): %.2f | Recall: %.3f | Precision: %.3f | F2: %.4f\n",
            best_thr_en, thr_eval_en$recall[best_idx_en], thr_eval_en$precision[best_idx_en], thr_eval_en$f2[best_idx_en]))
## Threshold optimal (F2): 0.15 | Recall: 1.000 | Precision: 0.414 | F2: 0.7792

16. Cost-Benefit Analysis

# Asumsi: biaya false negative (gejolak tidak terdeteksi) = 5x biaya false positive
biaya_FN <- 5
biaya_FP <- 1

cost_df <- thr_eval_en %>%
  mutate(
    FN_rate = 1 - recall,
    FP_rate = 1 - specificity,
    n_actual_1 = sum(actual_en == 1),
    n_actual_0 = sum(actual_en == 0),
    total_biaya = (FN_rate * n_actual_1 * biaya_FN) + (FP_rate * n_actual_0 * biaya_FP)
  )
best_cost_idx <- which.min(cost_df$total_biaya)
best_cost_thr <- cost_df$threshold[best_cost_idx]

cat(sprintf("\nThreshold optimal berdasarkan cost (FN=5xFP): %.2f\n", best_cost_thr))
## 
## Threshold optimal berdasarkan cost (FN=5xFP): 0.15
cat(sprintf("  Recall: %.3f | Precision: %.3f | F2: %.4f\n",
            cost_df$recall[best_cost_idx], cost_df$precision[best_cost_idx], cost_df$f2[best_cost_idx]))
##   Recall: 1.000 | Precision: 0.414 | F2: 0.7792
p_cost <- ggplot(cost_df, aes(x = threshold, y = total_biaya)) +
  geom_line(color = "purple", linewidth = 1) +
  geom_point(size = 2) +
  geom_vline(xintercept = best_cost_thr, linetype = "dotted", color = "gray40") +
  labs(title = "Total Biaya Kesalahan vs Threshold",
       subtitle = sprintf("Minimum biaya pada threshold = %.2f", best_cost_thr),
       x = "Threshold", y = "Estimasi Total Biaya") +
  theme_minimal()
print(p_cost)
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_line()`).
## Warning: Removed 5 rows containing missing values or values outside the scale range
## (`geom_point()`).

17. Decision Tree

cat("\n========== DECISION TREE (rpart) ==========\n")
## 
## ========== DECISION TREE (rpart) ==========
pred_dt <- rep(NA_real_, n - window_size)
actual_dt <- rep(NA_real_, n - window_size)
success_dt <- 0

for (i in seq(window_size, n-1)) {
  idx <- i - window_size + 1
  X_train <- as.data.frame(X[1:i, ])
  y_train <- y[1:i]
  X_test  <- as.data.frame(X[i+1, , drop=FALSE])
  y_test  <- y[i+1]
  actual_dt[idx] <- y_test
  
  na_rows <- which(apply(X_train, 1, anyNA))
  if (length(na_rows) > 0) {
    X_train <- X_train[-na_rows, , drop=FALSE]
    y_train <- y_train[-na_rows]
  }
  if (length(unique(y_train)) < 2) next
  if (nrow(X_train) < 10) next
  
  vars <- apply(X_train, 2, var, na.rm=TRUE)
  keep <- which(vars > 0 & !is.na(vars))
  if (length(keep) < 2) next
  X_train <- X_train[, keep, drop=FALSE]
  X_test  <- X_test[, keep, drop=FALSE]
  if (anyNA(X_test)) next
  
  set.seed(42)
  dt_model <- tryCatch(
    rpart(y_train ~ ., data = cbind(y_train, X_train), method = "class",
          control = rpart.control(maxdepth=3, minsplit=20, cp=0.01)),
    error = function(e) NULL
  )
  if (!is.null(dt_model)) {
    prob <- predict(dt_model, newdata = X_test, type = "prob")[, "1"]
    if (length(prob) == 0) prob <- 0.5
    pred_dt[idx] <- prob
    success_dt <- success_dt + 1
  }
  if (idx %% 50 == 0) cat(sprintf("Iterasi %d/%d, sukses: %d\n", idx, n-window_size, success_dt))
}
## Iterasi 50/59, sukses: 50
valid_dt <- !is.na(pred_dt) & !is.na(actual_dt)
pred_dt <- pred_dt[valid_dt]; actual_dt <- actual_dt[valid_dt]
cat(sprintf("\nPrediksi Decision Tree berhasil: %d dari %d iterasi (%.1f%%)\n", 
            length(pred_dt), n-window_size, 100*length(pred_dt)/(n-window_size)))
## 
## Prediksi Decision Tree berhasil: 59 dari 59 iterasi (100.0%)
if (length(pred_dt) > 0 && length(unique(actual_dt)) >= 2) {
  prauc_dt <- hitung_prauc(actual_dt, pred_dt)
  roc_dt <- pROC::roc(actual_dt, pred_dt, levels=c(0,1), direction="<", quiet=TRUE)
  auc_dt <- as.numeric(pROC::auc(roc_dt))
  cat(sprintf("PR-AUC: %.4f | ROC-AUC: %.4f\n", prauc_dt, auc_dt))
  thr_eval_dt <- evaluasi_threshold(actual_dt, pred_dt)
  best_idx_dt <- which.max(thr_eval_dt$f2)
  best_thr_dt <- thr_eval_dt$threshold[best_idx_dt]
  cat(sprintf("Threshold optimal (F2): %.2f | Recall: %.3f | Precision: %.3f | F2: %.4f\n",
              best_thr_dt, thr_eval_dt$recall[best_idx_dt], thr_eval_dt$precision[best_idx_dt], thr_eval_dt$f2[best_idx_dt]))
}
## PR-AUC: 0.5986 | ROC-AUC: 0.5970
## Threshold optimal (F2): 0.10 | Recall: 0.875 | Precision: 0.420 | F2: 0.7192

18. Visualisasi

# Plot status gejolak
p_status <- ggplot(data_wide, aes(x = date, y = status_gejolak)) +
  geom_line(color = "gray60", linewidth = 0.6) +
  geom_point(aes(color = factor(status_gejolak)), size = 2) +
  scale_color_manual(values = c("0" = "#22c55e", "1" = "#ef4444"),
                     labels = c("Aman", "Gejolak")) +
  labs(title = "Status Gejolak Harga Pangan Nasional (2016-2025)",
       x = "Tanggal", y = "Status", color = NULL) +
  theme(legend.position = "bottom")
print(p_status)

# PR Curve perbandingan
if (exists("prauc_en") && exists("prauc_dt")) {
  pr_en_curve <- PRROC::pr.curve(scores.class0 = pred_en[actual_en==1], scores.class1 = pred_en[actual_en==0], curve=TRUE)
  pr_dt_curve <- PRROC::pr.curve(scores.class0 = pred_dt[actual_dt==1], scores.class1 = pred_dt[actual_dt==0], curve=TRUE)
  pr_df_en <- as.data.frame(pr_en_curve$curve); names(pr_df_en) <- c("recall", "precision", "threshold")
  pr_df_dt <- as.data.frame(pr_dt_curve$curve); names(pr_df_dt) <- c("recall", "precision", "threshold")
  pr_df_en$model <- "Elastic Net"
  pr_df_dt$model <- "Decision Tree"
  pr_comb <- rbind(pr_df_en, pr_df_dt)
  p_pr_comb <- ggplot(pr_comb, aes(x = recall, y = precision, color = model)) +
    geom_line(linewidth = 1) +
    geom_hline(yintercept = mean(y), linetype = "dashed", color = "gray50") +
    scale_color_manual(values = c("Elastic Net" = "#3B82F6", "Decision Tree" = "#22C55E")) +
    labs(title = "Perbandingan Precision-Recall Curve",
         subtitle = sprintf("PR-AUC | EN: %.4f | DT: %.4f", prauc_en, prauc_dt),
         x = "Recall", y = "Precision", color = NULL) +
    theme(legend.position = "bottom")
  print(p_pr_comb)
}

# Trade-off Elastic Net
p_trade_en <- thr_eval_en %>%
  select(threshold, recall, precision, f2) %>%
  pivot_longer(-threshold, names_to = "metrik", values_to = "nilai") %>%
  ggplot(aes(x = threshold, y = nilai, color = metrik)) +
  geom_line(linewidth = 1) +
  geom_vline(xintercept = best_thr_en, linetype = "dotted", color = "gray40") +
  scale_color_manual(values = c("recall" = "#3B82F6", "precision" = "#EF4444", "f2" = "#22C55E"),
                     labels = c("f2" = "F2-score", "precision" = "Precision", "recall" = "Recall")) +
  labs(title = "Trade-off Recall vs Precision (Elastic Net)",
       subtitle = sprintf("Threshold optimal (F2) = %.2f", best_thr_en),
       x = "Threshold Probabilitas", y = "Nilai Metrik", color = NULL) +
  theme(legend.position = "bottom")
print(p_trade_en)
## Warning: Removed 10 rows containing missing values or values outside the scale range
## (`geom_line()`).

# Trade-off Decision Tree
if (exists("thr_eval_dt")) {
  p_trade_dt <- thr_eval_dt %>%
    select(threshold, recall, precision, f2) %>%
    pivot_longer(-threshold, names_to = "metrik", values_to = "nilai") %>%
    ggplot(aes(x = threshold, y = nilai, color = metrik)) +
    geom_line(linewidth = 1) +
    geom_vline(xintercept = best_thr_dt, linetype = "dotted", color = "gray40") +
    scale_color_manual(values = c("recall" = "#3B82F6", "precision" = "#EF4444", "f2" = "#22C55E"),
                       labels = c("f2" = "F2-score", "precision" = "Precision", "recall" = "Recall")) +
    labs(title = "Trade-off Recall vs Precision (Decision Tree)",
         subtitle = sprintf("Threshold optimal (F2) = %.2f", best_thr_dt),
         x = "Threshold Probabilitas", y = "Nilai Metrik", color = NULL) +
    theme(legend.position = "bottom")
  print(p_trade_dt)
}

# ROC Curve perbandingan
if (exists("roc_dt")) {
  roc_compare <- ggroc(list("Elastic Net" = roc_en, "Decision Tree" = roc_dt), size = 1) +
    geom_abline(slope = 1, intercept = 1, linetype = "dashed", color = "gray50") +
    scale_color_manual(values = c("Elastic Net" = "#3B82F6", "Decision Tree" = "#22C55E")) +
    labs(title = "Kurva ROC: Elastic Net vs Decision Tree",
         subtitle = sprintf("ROC-AUC | EN: %.3f | DT: %.3f", auc_en, auc_dt),
         x = "1 - Specificity", y = "Sensitivity", color = NULL) +
    theme(legend.position = "bottom")
  print(roc_compare)
}
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## ℹ The deprecated feature was likely used in the pROC package.
##   Please report the issue at <https://github.com/xrobin/pROC/issues>.
## This warning is displayed once per session.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

19. Interpretasi Model

# a) Koefisien Elastic Net (model final)
X_full <- as.matrix(X); y_full <- y
na_rows_full <- which(apply(X_full, 1, anyNA))
if (length(na_rows_full) > 0) {
  X_full <- X_full[-na_rows_full, , drop=FALSE]; y_full <- y_full[-na_rows_full]
}
vars_full <- apply(X_full, 2, var, na.rm=TRUE)
keep_full <- which(vars_full > 0 & !is.na(vars_full))
X_full <- X_full[, keep_full, drop=FALSE]
means_f <- colMeans(X_full); sds_f <- apply(X_full, 2, sd); sds_f[sds_f==0] <- 1
X_scaled <- scale(X_full, center=means_f, scale=sds_f)
set.seed(42)
final_cv <- cv.glmnet(X_scaled, y_full, family="binomial", alpha=0.5, nfolds=5)
coef_full <- coef(final_cv, s = "lambda.1se")
coef_df <- data.frame(fitur = rownames(coef_full), koef = as.numeric(coef_full)) %>%
  filter(fitur != "(Intercept)", koef != 0) %>% arrange(desc(abs(koef)))
cat("\n========== KOEFISIEN ELASTIC NET (FINAL) ==========\n")
## 
## ========== KOEFISIEN ELASTIC NET (FINAL) ==========
cat(sprintf("Fitur aktif (koef ≠ 0): %d dari %d\n", nrow(coef_df), ncol(X)))
## Fitur aktif (koef ≠ 0): 8 dari 54
print(head(coef_df, 15))
##                               fitur         koef
## 1                harga_eceran_Beras 0.1788878658
## 2           lag1_harga_eceran_Beras 0.1556201540
## 3 lag1_volume_impor_kg_Bawang Merah 0.1005617838
## 4                         bulan_cos 0.0918205975
## 5                 harga_eceran_Gula 0.0822243240
## 6            lag1_harga_eceran_Gula 0.0389842422
## 7          harga_eceran_Daging Sapi 0.0180703149
## 8          harga_eceran_Daging Ayam 0.0005992081
# Plot koefisien harga eceran
coef_harga <- coef_df %>% filter(grepl("harga_eceran_", fitur)) %>%
  mutate(komoditas = gsub("harga_eceran_", "", fitur),
         arah = ifelse(koef > 0, "Positif", "Negatif"))
if (nrow(coef_harga) > 0) {
  p_coef <- ggplot(coef_harga, aes(x = reorder(komoditas, abs(koef)), y = koef, fill = arah)) +
    geom_bar(stat = "identity") + coord_flip() +
    scale_fill_manual(values = c("Positif" = "#EF4444", "Negatif" = "#3B82F6")) +
    labs(title = "Koefisien Elastic Net: Harga Eceran per Komoditas",
         x = "Komoditas", y = "Koefisien", fill = NULL) +
    theme(legend.position = "bottom")
  print(p_coef)
}

# b) Variable importance Decision Tree
X_train_dt <- X[complete.cases(X), ]
y_train_dt <- y[complete.cases(X)]
dt_final <- rpart(y_train_dt ~ ., data = cbind(y_train_dt, X_train_dt), method = "class",
                  control = rpart.control(maxdepth=3, minsplit=20, cp=0.01))
cat("\n========== VARIABLE IMPORTANCE DECISION TREE ==========\n")
## 
## ========== VARIABLE IMPORTANCE DECISION TREE ==========
print(dt_final$variable.importance)
##                harga_eceran_Beras          harga_eceran_Daging Ayam 
##                        10.8113622                        10.8113622 
##          harga_eceran_Daging Sapi                 harga_eceran_Gula 
##                        10.8113622                        10.8113622 
##                harga_eceran_Telur           lag1_harga_eceran_Beras 
##                        10.8113622                         9.7302260 
##                         bulan_cos lag1_volume_impor_kg_Bawang Merah 
##                         8.1814815                         3.6362140 
##              nilai_impor_usd_Gula      nilai_impor_usd_Bawang Merah 
##                         2.7676802                         1.3635802 
##      volume_impor_kg_Bawang Merah             volume_impor_kg_Cabai 
##                         1.3635802                         1.3635802 
##  lag1_volume_impor_kg_Daging Sapi             volume_impor_kg_Beras 
##                         1.1861487                         1.1861487 
##    inflasi_komoditas_Bawang Merah            inflasi_komoditas_Gula 
##                         0.9090535                         0.7907658 
##       nilai_impor_usd_Daging Sapi       volume_impor_kg_Daging Sapi 
##                         0.7907658                         0.7907658
rpart.plot(dt_final, main = "Decision Tree Final (maxdepth=3)", extra = 101, under = TRUE, fallen.leaves = TRUE)

20. Prediksi Bulan Depan

last_row <- as.matrix(X[nrow(X), keep_full, drop=FALSE])
last_scaled <- matrix((last_row - means_f) / sds_f, nrow=1)
prob_next <- predict(final_cv, newx=last_scaled, s="lambda.1se", type="response")[1,1]
bulan_depan <- tail(data_wide$date, 1) + months(1)

# Gunakan threshold berbasis cost untuk rekomendasi final
thresh_final <- best_cost_thr
thresh_siaga_final <- thresh_final * 0.6
status_final <- case_when(
  prob_next >= thresh_final ~ "WASPADA",
  prob_next >= thresh_siaga_final ~ "SIAGA",
  TRUE ~ "AMAN"
)
cat(sprintf("\nPrediksi %s: prob=%.3f -> %s (threshold cost=%.2f)\n", 
            format(bulan_depan, "%B %Y"), prob_next, status_final, thresh_final))
## 
## Prediksi Januari 2026: prob=0.663 -> WASPADA (threshold cost=0.15)

21. Ringkasan Eksekutif

cat("\n========== RINGKASAN EKSEKUTIF (DENGAN KOREKSI) ==========\n")
## 
## ========== RINGKASAN EKSEKUTIF (DENGAN KOREKSI) ==========
cat(sprintf("Total data bulan: %d | Observasi model: %d\n", nrow(data_wide), nrow(X)))
## Total data bulan: 120 | Observasi model: 119
cat(sprintf("Proporsi gejolak dalam data: %.1f%%\n", proporsi_gejolak))
## Proporsi gejolak dalam data: 29.2%
cat(sprintf("Elastic Net | PR-AUC: %.4f | ROC-AUC: %.4f\n", prauc_en, auc_en))
## Elastic Net | PR-AUC: 0.7286 | ROC-AUC: 0.7012
cat(sprintf("Threshold optimal F2: %.2f (Recall=%.3f, Precision=%.3f)\n", 
            best_thr_en, thr_eval_en$recall[best_idx_en], thr_eval_en$precision[best_idx_en]))
## Threshold optimal F2: 0.15 (Recall=1.000, Precision=0.414)
cat(sprintf("Threshold optimal cost (FN=5xFP): %.2f (Recall=%.3f, Precision=%.3f)\n",
            best_cost_thr, cost_df$recall[best_cost_idx], cost_df$precision[best_cost_idx]))
## Threshold optimal cost (FN=5xFP): 0.15 (Recall=1.000, Precision=0.414)
cat(sprintf("Prediksi %s: prob=%.3f -> %s\n", format(bulan_depan, "%B %Y"), prob_next, status_final))
## Prediksi Januari 2026: prob=0.663 -> WASPADA
cat("\nKeterbatasan yang diakui:\n")
## 
## Keterbatasan yang diakui:
cat("- Ukuran sampel kecil (119 bulan) → recall 100% rapuh, perlu validasi lebih lanjut\n")
## - Ukuran sampel kecil (119 bulan) → recall 100% rapuh, perlu validasi lebih lanjut
cat("- Multikolinieritas harga komoditas menyebabkan Decision Tree tidak diskriminatif\n")
## - Multikolinieritas harga komoditas menyebabkan Decision Tree tidak diskriminatif
cat("- Threshold 0,10 tidak realistis secara ekonomi; gunakan threshold berbasis cost (%.2f)\n", best_cost_thr)
## - Threshold 0,10 tidak realistis secara ekonomi; gunakan threshold berbasis cost (%.2f)
##  0.15
cat("- Fitur impor telur dan daging ayam dihapus karena tidak relevan secara substansi\n")
## - Fitur impor telur dan daging ayam dihapus karena tidak relevan secara substansi
cat("=========================================\n")
## =========================================