Optimasi Portofolio CCMV dengan MOCv-ABC serta Perhitungan VaR

## ===========================================
## 0) PACKAGES, THEME, SEED
## ===========================================

suppressPackageStartupMessages({
  pkgs <- c(
    "quantmod","dplyr","tidyr","moments","ggplot2","ggrepel","plotly",
    "heatmaply","gt","scales","RColorBrewer","zoo","xts","stats","openxlsx"
  )
  inst <- pkgs[!sapply(pkgs, requireNamespace, quietly = TRUE)]
  if (length(inst)) install.packages(inst)
  invisible(lapply(pkgs, library, character.only = TRUE))
})

## Warning: package 'quantmod' was built under R version 4.5.1

## Warning: package 'plotly' was built under R version 4.5.1

## Warning: package 'heatmaply' was built under R version 4.5.1

## Warning: package 'gt' was built under R version 4.5.1

set.seed(42)
theme_set(theme_minimal(base_size = 12))

## =============================================
## 1) KONFIGURASI DATA & PARAMETER GLOBAL
## =============================================

# Data universe & periode
trading_days <- 252
tickers <- c("AKRA","BBNI","BBRI","BMRI","EXCL","INTP",
             "ISAT","JSMR","MEDC","MTEL","PGAS","PGEO",
             "PTPP","SMGR","SSIA","TLKM","TOWR","UNTR")
yahoo_tickers <- paste0(tickers, ".JK")
from_date <- as.Date("2022-01-01")
to_date   <- as.Date("2025-09-30")

# Kendala CCMV (cardinality + bound)
alpha_w  <- 0.10      # bobot minimum tiap aset aktif
beta_w   <- 0.40      # bobot maksimum tiap aset aktif
d_min    <- 3         # kardinalitas minimum
d_max    <- 5         # kardinalitas maksimum
must_have_name <- "PGAS"  # kosongkan "" untuk menonaktifkan syarat wajib

# Parameter MOCv-ABC / NSGA-II
limit_factor <- 10
sigma_abc    <- 0.6
max_iter     <- 700
seed_list    <- c(101,123,321,111,222,333)  # dipakai di util run coverage
seed_opt     <- seed_list[1]    # dipakai untuk run subset manual
debug_opt    <- TRUE

# Risk-free rate (Sharpe)
rf <- 0.0475

# VaR Cornish–Fisher
cl    <- 0.95
alpha <- 1 - cl
V0    <- 1
hp    <- 1

## =============================================
## 2) AMBIL DATA SAHAM & BENTUK PANEL
## =============================================

# Prices (Adjusted Close)
prices_list <- lapply(yahoo_tickers, function(sym) {
  suppressWarnings(getSymbols(sym, from = from_date, to = to_date, auto.assign = FALSE))
})
adj_list   <- lapply(prices_list, Ad)
prices_xts <- do.call(merge, adj_list)
colnames(prices_xts) <- tickers
prices_xts <- na.omit(prices_xts)

# Log-returns harian
rets_xts <- na.omit(diff(log(prices_xts)))

# Drop aset yang all-NA (jaga konsistensi)
na_all <- sapply(rets_xts, function(x) all(is.na(x)))
if (any(na_all)) {
  keep <- which(!na_all)
  rets_xts <- rets_xts[, keep, drop = FALSE]
  tickers  <- tickers[keep]
  message("Menghapus aset tanpa data lengkap. Sisa aset: ", length(tickers),
          " -> ", paste(tickers, collapse = ", "))
}

# Long data untuk EDA
rets_df <- rets_xts |>
  as.data.frame() |> tibble::rownames_to_column("date") |>
  mutate(date = as.Date(date)) |>
  pivot_longer(-date, names_to = "ticker", values_to = "ret")

prices_df <- prices_xts |>
  as.data.frame() |> tibble::rownames_to_column("date") |>
  mutate(date = as.Date(date)) |>
  pivot_longer(-date, names_to = "ticker", values_to = "price")

## =======================================================
## 3) EDA INTERAKTIF (Harga, Distribusi, Deskriptif, Korelasi, Rolling Vol)
## =======================================================

# Helper facet per 9 saham
make_groups <- function(x, size = 9) {
  split(x, rep(seq_len(ceiling(length(x) / size)), each = size, length.out = length(x)))
}
groups <- make_groups(tickers, size = 9)

## 3a) Harga per saham (plotly facet)
plot_prices_by_group <- function(g_ids = 1:length(groups)) {
  pls <- vector("list", length(g_ids))
  for (ii in seq_along(g_ids)) {
    g <- groups[[ g_ids[ii] ]]
    df_sub <- prices_df |> dplyr::filter(ticker %in% g)

    p <- ggplot(df_sub, aes(date, price, color = ticker)) +
      geom_line(linewidth = 0.6, alpha = 0.9, show.legend = FALSE) +
      facet_wrap(~ ticker, ncol = 3, scales = "free_y") +
      geom_smooth(method = "loess", se = FALSE, linewidth = 0.2, color = "black") +
      labs(title = "Time Series Harga Saham", x = NULL, y = "Harga") +
      theme(axis.text.y = element_text(size = 7),
            axis.text.x = element_text(angle = 45, hjust = 1, size = 8),
            plot.title = element_text(size = 16, face = "bold", hjust = 0.5))
    pls[[ii]] <- plotly::ggplotly(p)
  }
  pls
}
price_plots <- plot_prices_by_group(1:2)

## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

price_plots[[1]]; price_plots[[2]]

## 3b) Distribusi return (density vs normal)
pal_colors <- c(AKRA="#0072B2",BBNI="#D55E00",BBRI="#009E73",BMRI="#6A0DAD",EXCL="#F0E442",
                INTP="#E69F00",ISAT="#4D4D4D",JSMR="#56B4E9",MEDC="#B2B200",MTEL="#CC79A7",
                PGAS="#800000",PGEO="#1F77B4",PTPP="#FF7F0E",SMGR="#2CA02C",SSIA="#9467BD",
                TLKM="#8C564B",TOWR="#17BECF",UNTR="#1B263B")
names(pal_colors) <- tickers

plot_density_by_group <- function(g_ids = 1:length(groups)) {
  pls <- vector("list", length(g_ids))

  summ_by_tkr_all <- rets_df |>
    group_by(ticker) |>
    summarise(
      mean_d = mean(ret, na.rm = TRUE),
      sd_d   = sd(ret,  na.rm = TRUE),
      x_min  = quantile(ret, 0.005, na.rm = TRUE),
      x_max  = quantile(ret, 0.995, na.rm = TRUE),
      .groups = "drop"
    )

  norm_df_all <- summ_by_tkr_all |>
    rowwise() |>
    mutate(x = list(seq(x_min, x_max, length.out = 400)),
           y = list(dnorm(x, mean = mean_d, sd = sd_d))) |>
    tidyr::unnest(c(x, y)) |> ungroup()

  for (ii in seq_along(g_ids)) {
    tick_sub <- groups[[ g_ids[ii] ]]
    rets_sub <- rets_df |> filter(ticker %in% tick_sub)
    summ_sub <- summ_by_tkr_all |> filter(ticker %in% tick_sub)
    norm_sub <- norm_df_all     |> filter(ticker %in% tick_sub)

    p <- ggplot() +
      geom_density(data = rets_sub, aes(ret, after_stat(density), color = ticker),
                   linewidth = 0.8, alpha = 0.9, show.legend = FALSE) +
      geom_line(data = norm_sub, aes(x, y), color = "black", linewidth = 0.4) +
      geom_vline(xintercept = 0, color = "grey40", linewidth = 0.4) +
      geom_vline(data = summ_sub, aes(xintercept = mean_d),
                 color = "black", linewidth = 0.5, linetype = "dashed") +
      facet_wrap(~ ticker, ncol = 3, scales = "free_y") +
      scale_color_manual(values = pal_colors[names(pal_colors) %in% tick_sub]) +
      labs(title = "Distribusi Return Harian", x = "Return", y = "Kepadatan") +
      theme(plot.title = element_text(size = 16, face = "bold", hjust = 0.5),
            strip.text = element_text(face = "bold"),
            panel.grid.minor = element_blank(),
            legend.position  = "none")
    pls[[ii]] <- plotly::ggplotly(p)
  }
  pls
}
density_plots <- plot_density_by_group(1:2)
density_plots[[1]]; density_plots[[2]]

## 3c) Statistik deskriptif & uji normalitas (JB)
jb_test <- function(x) {
  x <- x[is.finite(x)]
  n <- length(x); if (n < 8) return(NA_real_)
  s <- moments::skewness(x); k <- moments::kurtosis(x)
  JB <- n/6 * (s^2 + ((k-3)^2 / 4))
  1 - pchisq(JB, df = 2)
}
stats_df <- rets_df |>
  group_by(ticker) |>
  summarise(
    mean_d  = mean(ret, na.rm = TRUE),
    var_d   = var(ret,  na.rm = TRUE),
    sd_d    = sd(ret,   na.rm = TRUE),
    skew    = moments::skewness(ret, na.rm = TRUE),
    kurt    = moments::kurtosis(ret,  na.rm = TRUE),
    jb_p    = jb_test(ret),
    .groups = "drop"
  ) |>
  mutate(
    mean_ann = mean_d * trading_days,
    sd_ann   = sd_d   * sqrt(trading_days),
    var_ann  = sd_ann^2,
    rank_mean = rank(-mean_ann, ties.method = "min"),
    rank_sd   = rank(sd_ann,    ties.method = "min")
  ) |>
  arrange(rank_sd, rank_mean)

stats_gt <- stats_df |>
  select(ticker, mean_ann, sd_ann, skew, kurt, jb_p, rank_mean, rank_sd) |>
  gt(rowname_col = "ticker") |>
  fmt_number(columns = c(mean_ann, sd_ann), decimals = 4) |>
  fmt_number(columns = c(skew, kurt),      decimals = 2) |>
  fmt_number(columns = c(jb_p, rank_mean, rank_sd), decimals = 2) |>
  tab_header(title = md("**Statistik Deskriptif Return**")) |>
  cols_label(mean_ann = "Mean (ann.)", sd_ann="SD (ann.)", skew="Skew", kurt="Kurt",
             jb_p="JB p-val", rank_mean="Rank Mean", rank_sd="Rank SD") |>
  data_color(columns = c(rank_sd),
             colors  = scales::col_numeric(c("#2DC937","#E7B416","#CC3232"), domain = NULL)) |>
  tab_source_note(md("Warna hijau = lebih baik (SD rendah). JB p-val kecil → indikasi non-normal."))

## Warning: Since gt v0.9.0, the `colors` argument has been deprecated.
## • Please use the `fn` argument instead.
## This warning is displayed once every 8 hours.

stats_gt

	Mean (ann.)	SD (ann.)	Skew	Kurt	JB p-val	Rank Mean	Rank SD
Statistik Deskriptif Return
PGAS	0.1693	0.2869	0.26	6.47	0.00	4.00	1.00
TLKM	−0.0393	0.2875	0.06	4.50	0.00	14.00	2.00
BBRI	0.0044	0.3039	−0.08	6.13	0.00	12.00	3.00
MTEL	−0.0349	0.3048	−0.29	9.50	0.00	13.00	4.00
BBNI	0.0454	0.3089	0.08	5.36	0.00	8.00	5.00
BMRI	0.0229	0.3105	−0.23	6.29	0.00	11.00	6.00
EXCL	0.1049	0.3160	0.15	6.91	0.00	6.00	7.00
JSMR	0.0410	0.3184	0.36	5.06	0.00	10.00	8.00
UNTR	0.2004	0.3235	−0.01	14.70	0.00	3.00	9.00
INTP	−0.1773	0.3478	0.04	7.93	0.00	15.00	10.00
AKRA	0.0443	0.3707	0.72	14.13	0.00	9.00	11.00
TOWR	−0.2212	0.3810	0.80	8.20	0.00	17.00	12.00
SMGR	−0.3497	0.3897	0.10	9.21	0.00	18.00	13.00
ISAT	0.0728	0.4185	−0.71	9.10	0.00	7.00	14.00
MEDC	0.1312	0.4641	0.50	6.61	0.00	5.00	15.00
PGEO	0.2264	0.5015	1.08	6.88	0.00	2.00	16.00
PTPP	−0.2011	0.5725	1.57	10.74	0.00	16.00	17.00
SSIA	0.6058	0.5845	0.70	9.66	0.00	1.00	18.00
Warna hijau = lebih baik (SD rendah). JB p-val kecil → indikasi non-normal.

## 3c.1) Simpan Expected Return & Risiko (SD) tiap saham ke Excel
risk_return_df <- stats_df |>
  dplyr::select(ticker, mean_ann, sd_ann) |>
  dplyr::rename(Expected_Return = mean_ann, Risk_SD = sd_ann)
output_file <- "saham_risk_return.xlsx"
openxlsx::write.xlsx(risk_return_df, file = output_file,
                     sheetName = "RiskReturn", overwrite = TRUE)
cat("✅ File disimpan ke:", normalizePath(output_file), "\n")

## ✅ File disimpan ke: D:\KULIAH\BISMILLAH SKRIPSI\saham_risk_return.xlsx

## 3c.2) Top-5 Expected Return & Top-5 Risiko Terendah
top5_mean_ann <- stats_df |> slice_max(order_by = mean_ann, n = 5) |> arrange(mean_ann)
p_top_mean_ann <- ggplot(top5_mean_ann, aes(reorder(ticker, mean_ann), mean_ann)) +
  geom_col(fill = "#2E86DE") + coord_flip() +
  geom_text(aes(label = round(mean_ann, 3)), hjust = -0.1, size = 3.2) +
  expand_limits(y = max(top5_mean_ann$mean_ann) * 1.15) +
  labs(title = "Top-5 Expected Return Tertinggi", x = NULL, y = "Mean")
plotly::ggplotly(p_top_mean_ann)

top5_low_sd_ann <- stats_df |> slice_min(order_by = sd_ann, n = 5) |> arrange(sd_ann)
p_low_sd_ann <- ggplot(top5_low_sd_ann, aes(reorder(ticker, -sd_ann), sd_ann)) +
  geom_col(fill = "#27AE60") + coord_flip() +
  geom_text(aes(label = round(sd_ann, 4)), hjust = -0.1, size = 3.2) +
  expand_limits(y = max(top5_low_sd_ann$sd_ann) * 1.15) +
  labs(title = "Top-5 Risiko Terendah", x = NULL, y = "SD")
plotly::ggplotly(p_low_sd_ann)

## 3d) Korelasi + dendrogram (heatmaply)
pval_mat <- function(M) {
  k <- ncol(M)
  P <- matrix(NA_real_, k, k, dimnames = list(colnames(M), colnames(M)))
  for (i in 1:k) for (j in i:k) {
    ct <- suppressWarnings(cor.test(M[, i], M[, j], use = "pairwise.complete.obs"))
    P[i, j] <- P[j, i] <- ct$p.value
  }
  P
}
wide_ret <- rets_df |> tidyr::pivot_wider(names_from = ticker, values_from = ret) |> dplyr::select(-date)
cor_Pear <- cor(wide_ret, use = "pairwise.complete.obs", method = "pearson")
p_Pear   <- pval_mat(as.data.frame(wide_ret))
cor_sig  <- cor_Pear # (bisa masking p<=0.05 bila mau)

sector_lookup <- c(AKRA="Energi",BBNI="Keuangan",BBRI="Keuangan",BMRI="Keuangan",EXCL="Infrastruktur",
                   INTP="Barang Baku",ISAT="Infrastruktur",JSMR="Infrastruktur",MEDC="Energi",MTEL="Infrastruktur",
                   PGAS="Energi",PGEO="Infrastruktur",PTPP="Infrastruktur",SMGR="Barang Baku",SSIA="Infrastruktur",
                   TLKM="Infrastruktur",TOWR="Infrastruktur",UNTR="Perindustrian")
side_anno <- data.frame(Sector = sector_lookup[colnames(cor_sig)])
row.names(side_anno) <- colnames(cor_sig)

heatmaply(
  cor_sig, dendrogram = "row", seriate = "OLO", limits = c(-1, 1),
  colors = colorRampPalette(rev(RColorBrewer::brewer.pal(11, "RdBu")))(256),
  grid_color = "black", grid_width = 0.0001,
  label_names = c("Saham i", "Saham j", "Korelasi"),
  main = "Heatmap Korelasi", showticklabels = c(TRUE, TRUE), na.value = "grey90",
  row_side_colors = side_anno, column_side_colors = side_anno
)

## 3e) Rolling volatility 30 hari
roll_sd_xts <- zoo::rollapply(rets_xts, 30, sd, by.column = TRUE, align = "right")
roll_sd_df <- roll_sd_xts |>
  as.data.frame() |> tibble::rownames_to_column("date") |>
  mutate(date = as.Date(date)) |>
  pivot_longer(-date, names_to = "ticker", values_to = "roll_sd")

plot_rollvol_by_group <- function(g_ids = 1:length(groups)) {
  pls <- vector("list", length(g_ids))
  for (ii in seq_along(g_ids)) {
    tick_sub <- groups[[ g_ids[ii] ]]
    roll_sub <- roll_sd_df |> filter(ticker %in% tick_sub)

    p <- ggplot(roll_sub, aes(date, roll_sd, color = ticker)) +
      geom_line(linewidth = 0.4, alpha = 0.9, show.legend = FALSE) +
      facet_wrap(~ ticker, ncol = 3, scales = "free_y") +
      labs(title = "Rolling 30-hari Volatilitas", x = NULL, y = "SD") +
      theme(axis.text.y = element_text(size = 7),
            axis.text.x = element_text(angle = 45, hjust = 1, size = 8),
            plot.title = element_text(size = 16, face = "bold", hjust = 0.5))
    pls[[ii]] <- plotly::ggplotly(p)
  }
  pls
}
roll_plots <- plot_rollvol_by_group(1:2)
roll_plots[[1]]; roll_plots[[2]]

## =============================================
## 4) PARAMETER OPTIMASI (annualized μ & Σ)
## =============================================
mu <- colMeans(rets_xts, na.rm = TRUE) * trading_days
S  <- cov(rets_xts, use = "pairwise.complete.obs") * trading_days
stopifnot(is.numeric(mu), is.matrix(S), length(mu) == ncol(S), ncol(S) == nrow(S))
d <- length(mu)

## =============================================
## 5) UTILITAS CCMV + ABC + NSGA-II
## =============================================

# Proyeksi bobot ke simplex terbatas (sum=1, L<=w<=U)
project_box_simplex <- function(w, L, U, s = 1) {
  k <- length(w)
  L <- rep_len(L, k); U <- rep_len(U, k)
  w <- pmin(pmax(w, L), U)
  S <- sum(w); if (abs(S - s) < 1e-12) return(w)
  free <- rep(TRUE, k); iter <- 0
  while (abs(S - s) > 1e-12 && iter < 1000) {
    iter <- iter + 1
    idx <- which(free); if (!length(idx)) break
    delta <- (S - s) / length(idx)
    w[idx] <- w[idx] - delta
    hitL <- which(w < L + 1e-15); hitU <- which(w > U - 1e-15)
    if (length(hitL)) { w[hitL] <- L[hitL]; free[hitL] <- FALSE }
    if (length(hitU)) { w[hitU] <- U[hitU]; free[hitU] <- FALSE }
    S <- sum(w); if (!any(free)) break
  }
  if (abs(sum(w) - s) > 1e-8) {
    idx <- which((w > L + 1e-12) & (w < U - 1e-12))
    if (length(idx) > 0) {
      adj <- (sum(w) - s) / length(idx)
      w[idx] <- pmin(pmax(w[idx] - adj, L[idx]), U[idx])
    }
  }
  w
}

# Bersihkan solusi: tegakkan kardinalitas & aset wajib
clean_portfolio <- function(w, d, alpha, beta, d_min, d_max, must_have_idx_or_NULL) {
  stopifnot(length(w) == d)
  w[!is.finite(w)] <- 0; w[w < 1e-12] <- 0
  active <- which(w > 0)

  if (!is.null(must_have_idx_or_NULL)) {           # pastikan aset wajib aktif
    if (!(must_have_idx_or_NULL %in% active)) active <- c(must_have_idx_or_NULL, active)
  }

  nz <- length(active)                              # tegakkan kardinalitas
  if (nz < d_min) {
    cand <- setdiff(seq_len(d), active)
    add  <- if (length(cand) >= (d_min - nz)) sample(cand, d_min - nz) else cand
    active <- c(active, add)
  } else if (nz > d_max) {
    drop_cand <- if (is.null(must_have_idx_or_NULL)) active else setdiff(active, must_have_idx_or_NULL)
    if (length(drop_cand) > 0) {
      drop <- sample(drop_cand, nz - d_max)  # acak agar ragam pola meningkat
      active <- setdiff(active, drop)
    }
  }

  w[-active] <- 0
  if (length(active) > 0) {
    w_active <- w[active]; w_active[!is.finite(w_active)] <- 0
    if (sum(w_active) <= 0) w_active <- rep(1/length(active), length(active))
    w_active <- project_box_simplex(w_active, L = alpha, U = beta, s = 1)
    w[active] <- w_active
  }
  w[abs(w) < 1e-12] <- 0
  w
}

# Tujuan multi: f1=min SD, f2=min(-Return) → max Return
obj_vec <- function(w, mu, S) {
  risk_sd <- sqrt(max(0, as.numeric(t(w) %*% S %*% w)))
  ret  <- sum(w * mu)
  c(f1 = risk_sd, f2 = -ret)
}

# Pareto rules + sorting + crowding
dominates <- function(a, b) all(a <= b) && any(a < b)

fast_non_dominated_sort <- function(F) {
  F <- as.matrix(F); n <- nrow(F)
  Slist <- vector("list", n); n_dom <- integer(n); ranks <- integer(n)
  fronts <- list(); front1 <- integer(0)
  for (p in 1:n) {
    Slist[[p]] <- integer(0); n_dom[p] <- 0
    for (q in 1:n) if (p != q) {
      if (dominates(F[p,], F[q,])) Slist[[p]] <- c(Slist[[p]], q)
      else if (dominates(F[q,], F[p,])) n_dom[p] <- n_dom[p] + 1
    }
    if (n_dom[p] == 0) { ranks[p] <- 1; front1 <- c(front1, p) }
  }
  fronts[[1]] <- front1
  i <- 1
  while (length(fronts[[i]]) > 0) {
    Q <- integer(0)
    for (p in fronts[[i]]) for (q in Slist[[p]]) {
      n_dom[q] <- n_dom[q] - 1
      if (n_dom[q] == 0) { ranks[q] <- i + 1; Q <- c(Q, q) }
    }
    i <- i + 1; fronts[[i]] <- Q
  }
  fronts[length(fronts)] <- NULL
  list(fronts = fronts, rank = ranks)
}

crowding_distance <- function(Fvals, idxs) {
  if (length(idxs) == 0) return(numeric(0))
  if (length(idxs) == 1) return(Inf)
  Fvals <- as.matrix(Fvals); k <- length(idxs); m <- ncol(Fvals)
  dist <- rep(0, k)
  for (j in 1:m) {
    ord <- order(Fvals[idxs, j])
    dist[ord[1]] <- Inf; dist[ord[k]] <- Inf
    v <- Fvals[idxs[ord], j]
    denom <- max(v) - min(v); if (!is.finite(denom) || denom == 0) denom <- 1e-12
    for (t in 2:(k-1)) dist[ord[t]] <- dist[ord[t]] + (v[t+1] - v[t-1]) / denom
  }
  dist
}

.active_pattern <- function(W, eps = 1e-8)  apply(W, 1, function(w) paste(as.integer(w > eps), collapse = ""))
.pattern_div_score <- function(cands_W, chosen_patterns) {
  if (length(chosen_patterns) == 0) return(rep(1, nrow(cands_W)))
  pat_c <- .active_pattern(cands_W); tab <- table(chosen_patterns)
  sapply(pat_c, function(p) 1 / (1 + ifelse(is.na(tab[p]), 0, tab[p])))
}

# Seleksi NSGA-II + bonus diversitas di ruang keputusan
nsga2_select <- function(P, Fvals, target_size, lambda_dec = 0.25) {
  P <- as.matrix(P); Fvals <- as.matrix(Fvals)
  fs <- fast_non_dominated_sort(Fvals); fronts <- fs$fronts
  nextP <- matrix(numeric(0), 0, ncol(P)); nextF <- matrix(numeric(0), 0, ncol(Fvals))
  chosen_patterns <- character(0)

  for (f in fronts) {
    if (is.null(f) || length(f) == 0) next
    remain <- target_size - nrow(nextP); if (remain <= 0) break

    if (length(f) <= remain) {
      nextP <- rbind(nextP, P[f, , drop = FALSE]); nextF <- rbind(nextF, Fvals[f, , drop = FALSE])
      chosen_patterns <- c(chosen_patterns, .active_pattern(P[f, , drop = FALSE]))
    } else {
      dist_obj <- crowding_distance(Fvals, f)
      candW <- P[f, , drop = FALSE]; div_dec <- .pattern_div_score(candW, chosen_patterns)
      score <- dist_obj + lambda_dec * div_dec; ord <- order(score, decreasing = TRUE)

      take_idx <- integer(0); i <- 1
      while (length(take_idx) < remain && i <= length(ord)) {
        cand <- f[ord[i]]; pat_cand <- .active_pattern(P[cand, , drop = FALSE])
        if (!(pat_cand %in% chosen_patterns) || (remain - length(take_idx) > 1)) {
          take_idx <- c(take_idx, cand); chosen_patterns <- c(chosen_patterns, pat_cand)
        }
        i <- i + 1
      }
      if (length(take_idx) < remain) {
        rest <- setdiff(f[ord], take_idx)
        take_idx <- c(take_idx, head(rest, remain - length(take_idx)))
      }
      nextP <- rbind(nextP, P[take_idx, , drop = FALSE]); nextF <- rbind(nextF, Fvals[take_idx, , drop = FALSE])
      break
    }
  }
  if (nrow(nextP) > target_size) { keep <- seq_len(target_size); nextP <- nextP[keep, , drop = FALSE]; nextF <- nextF[keep, , drop = FALSE] }
  rownames(nextP) <- NULL; rownames(nextF) <- NULL
  list(P = nextP, F = nextF, fronts = fronts)
}

## =============================================
## 6) LOOP UTAMA MOCv-ABC + NSGA-II + UTIL COVERAGE
## =============================================
run_mcabc <- function(mu, S, tickers,
                      alpha = 0.1, beta = 0.4, d_min = 3, d_max = 5,
                      must_have_name = NULL, must_have_idx = NULL,
                      sigma = 0.6, limit_factor = 10, max_iter = 700, seed = seed_opt,
                      debug = FALSE) {

  set.seed(seed)
  d <- length(mu); stopifnot(ncol(S) == d, nrow(S) == d)

  # assert dim (membantu debug jika ukuran berubah di tengah jalan)
  assert_dims <- function(P, Fvals, stage, n_employed, d) {
    rP <- NROW(P); cP <- NCOL(P); rF <- NROW(Fvals); cF <- NCOL(Fvals)
    if (!is.matrix(P)) stop(sprintf("[%s] P bukan matrix (class=%s)", stage, paste(class(P), collapse = ",")))
    if (!(isTRUE(rP == n_employed) && isTRUE(cP == d)))
      stop(sprintf("[%s] dim(P)=%s, expected %d x %d", stage, paste(dim(P), collapse = "x"), n_employed, d))
    if (!is.matrix(Fvals)) stop(sprintf("[%s] Fvals bukan matrix (class=%s)", stage, paste(class(Fvals), collapse=",")))
    if (!(isTRUE(rF == n_employed) && isTRUE(cF == 2)))
      stop(sprintf("[%s] dim(Fvals)=%s, expected %d x 2", stage, paste(dim(Fvals), collapse = "x"), n_employed))
    invisible(TRUE)
  }

  # indeks aset wajib
  m_idx <- NULL
  if (!is.null(must_have_name) && nzchar(must_have_name)) {
    pos <- match(must_have_name, tickers)
    if (!is.na(pos)) m_idx <- pos else warning("Ticker wajib (", must_have_name, ") tidak ditemukan; syarat wajib DINONAKTIFKAN.")
  } else if (!is.null(must_have_idx)) {
    if (must_have_idx >= 1 && must_have_idx <= d) m_idx <- must_have_idx
  }

  # ukuran koloni
  hive_size  <- d * 10L
  n_employed <- as.integer(floor(hive_size / 2))
  n_onlooker <- as.integer(hive_size - n_employed)
  limit <- as.integer(limit_factor * d)

  if (debug) cat("[DEBUG] d =", d, " hive_size =", hive_size, " n_employed =", n_employed, " n_onlooker =", n_onlooker, "\n")

  # inisialisasi populasi (diproyeksikan ke kendala)
  init_population <- function(size) {
    P0 <- matrix(runif(size * d), nrow = size, ncol = d)
    Pclean <- matrix(NA_real_, nrow = size, ncol = d)
    for (i in seq_len(size)) Pclean[i, ] <- clean_portfolio(P0[i, ], d, alpha, beta, d_min, d_max, m_idx)
    storage.mode(Pclean) <- "double"; dimnames(Pclean) <- NULL; Pclean
  }

  P <- init_population(n_employed)
  Fvals <- t(apply(P, 1, function(w) obj_vec(w, mu, S)))

  if (debug) {
    cat("[DEBUG] dim(P) =", paste(dim(P), collapse=" x "), " dim(Fvals) =", paste(dim(Fvals), collapse=" x "), "\n")
    cat(sprintf("[DEBUG] Check(init): nrow(P)=%s | n_employed=%s\n", NROW(P), n_employed))
  }
  assert_dims(P, Fvals, stage = "INIT", n_employed = n_employed, d = d)

  stagn <- integer(n_employed); best_seen <- rowSums(Fvals)
  hist_min_risk <- numeric(max_iter); hist_max_ret <- numeric(max_iter)

  for (g in seq_len(max_iter)) {
    # === Employed Bees: neighbor + guided by BEST (F1) ===
    phi    <- runif(1, -1, 1)
    varphi <- runif(1,  0, 1)

    tmp <- fast_non_dominated_sort(Fvals); F1 <- tmp$fronts[[1]]; if (length(F1) == 0) F1 <- 1L
    cd_F1 <- crowding_distance(Fvals, F1)
    BEST  <- as.numeric(P[F1[order(cd_F1, decreasing = TRUE)][1], , drop = FALSE])

    P_emp <- P
    for (i in seq_len(n_employed)) {
      j  <- sample(setdiff(seq_len(n_employed), i), 1)
      dm <- sample.int(d, 1)
      Xi <- P[i, ]; Xj <- P[j, ]
      dfn <- Xi[dm] - Xj[dm]; dfb <- BEST[dm] - Xi[dm]
      Xi_new <- Xi; Xi_new[dm] <- Xi[dm] + phi * dfn + varphi * dfb
      Xi_new <- clean_portfolio(Xi_new, d, alpha, beta, d_min, d_max, m_idx)
      P_emp[i, ] <- Xi_new
    }

    # === Onlookers: sampling kovarians di sekitar front terbaik ===
    fronts <- tmp$fronts; alloc <- integer(0); left <- n_onlooker; f_idx <- 1L
    while (left > 0 && f_idx <= length(fronts) && length(fronts[[f_idx]]) > 0) {
      take <- ceiling(left/2); alloc <- c(alloc, rep(f_idx, take)); left <- left - take; f_idx <- f_idx + 1L
    }
    if (left > 0) alloc <- c(alloc, rep(1L, left)); alloc <- alloc[1:n_onlooker]

    P_onl <- matrix(NA_real_, nrow = n_onlooker, ncol = d)
    for (idx in seq_len(n_onlooker)) {
      grp <- alloc[idx]; ids <- fronts[[grp]]
      if (length(ids) <= 1) { m <- colMeans(P); C <- stats::cov(P) }
      else                  { m <- colMeans(P[ids, , drop = FALSE]); C <- stats::cov(P[ids, , drop = FALSE]) }
      eig <- eigen(C, symmetric = TRUE); B <- eig$vectors; Dm <- diag(sqrt(pmax(0, eig$values)))
      Z <- as.numeric(m + sigma * (B %*% Dm %*% runif(d)))
      P_onl[idx, ] <- clean_portfolio(Z, d, alpha, beta, d_min, d_max, m_idx)
    }

    # === Scout & evaluasi ===
    F_emp <- t(apply(P_emp, 1, function(w) obj_vec(w, mu, S)))
    sum_emp <- rowSums(F_emp); improved <- sum_emp < best_seen
    stagn[improved] <- 0L; stagn[!improved] <- stagn[!improved] + 1L; best_seen <- pmin(best_seen, sum_emp)
    idx_stag <- which(stagn >= limit)
    if (length(idx_stag)) {
      for (ii in idx_stag) P_emp[ii, ] <- clean_portfolio(runif(d), d, alpha, beta, d_min, d_max, m_idx)
      stagn[idx_stag] <- 0L
    }
    F_emp <- t(apply(P_emp, 1, function(w) obj_vec(w, mu, S)))

    # === Seleksi NSGA-II (gabung employed + onlooker) ===
    P_comb <- rbind(P, P_emp, P_onl)
    F_onl  <- t(apply(P_onl, 1, function(w) obj_vec(w, mu, S)))
    F_comb <- rbind(Fvals, F_emp, F_onl)
    sel <- nsga2_select(P_comb, F_comb, n_employed)
    P <- as.matrix(sel$P); Fvals <- as.matrix(sel$F)

    # === Anti duplikasi ringan di ruang keputusan ===
    dup_tol <- 6
    keys <- apply(round(P, dup_tol), 1, paste, collapse = "|"); dups <- duplicated(keys)
    if (any(dups)) {
      for (ii in which(dups)) {
        w_new <- P[ii, ] + rnorm(ncol(P), sd = 0.02)
        P[ii, ] <- clean_portfolio(w_new, d = ncol(P), alpha = alpha, beta = beta,
                                   d_min = d_min, d_max = d_max, must_have_idx_or_NULL = m_idx)
      }
      Fvals[dups, ] <- t(apply(P[dups, , drop = FALSE], 1, function(w) obj_vec(w, mu, S)))
    }

    if (debug && (g %% 10 == 0)) {
      cat(sprintf("[DEBUG] iter %d | dim(P)=%s | dim(Fvals)=%s\n",
                  g, paste(dim(P), collapse="x"), paste(dim(Fvals), collapse="x")))
    }
    assert_dims(P, Fvals, stage = paste0("ITER-", g), n_employed = n_employed, d = d)
    hist_min_risk[g] <- min(Fvals[,1]); hist_max_ret[g] <- max(-Fvals[,2])
  }

  list(P = P, Fvals = Fvals, tickers = tickers,
       history = data.frame(iter = seq_len(max_iter),
                            min_risk = hist_min_risk, max_return = hist_max_ret))
}

# Jalankan sampai mencakup k = 3,4,5 dalam satu run (pakai daftar seed)
run_mcabc_until_coverage <- function(mu, S, tickers,
                                     alpha, beta, d_min, d_max,
                                     must_have_name,
                                     sigma, limit_factor, max_iter,
                                     seeds = seed_list,
                                     debug = FALSE) {
  last <- NULL
  for (sd in seeds) {
    res <- run_mcabc(mu=mu, S=S, tickers=tickers,
                     alpha=alpha, beta=beta, d_min=d_min, d_max=d_max,
                     must_have_name=must_have_name,
                     sigma=sigma, limit_factor=limit_factor,
                     max_iter=max_iter, seed=sd, debug=debug)
    P <- res$P; k_count <- rowSums(P > 1e-8)
    if (all(c(3,4,5) %in% unique(k_count))) {
      message(sprintf("✅ Cakupan k=3,4,5 tercapai pada seed %d (max_iter=%d).", sd, max_iter))
      return(list(res = res, seed = sd, k_counts = table(k_count)))
    }
    last <- list(res = res, seed = sd, k_counts = table(k_count))
    message(sprintf("… seed %d belum lengkap. k_counts=%s",
                    sd, paste(names(table(k_count)), as.integer(table(k_count)), sep=":", collapse=", ")))
  }
  warning("Tidak ada seed yang menghasilkan cakupan lengkap k=3,4,5. Kembalikan hasil terakhir.")
  last
}

## =============================================
## 7) JALANKAN OPTIMASI & VISUALISASI FRONTIER
## =============================================
mcabc_cov <- run_mcabc_until_coverage(
  mu=mu, S=S, tickers=tickers,
  alpha=alpha_w, beta=beta_w, d_min=3, d_max=5,
  must_have_name=must_have_name,
  sigma=sigma_abc, limit_factor=limit_factor,
  max_iter=max_iter, seeds=seed_list, debug=FALSE
)

## … seed 101 belum lengkap. k_counts=4:9, 5:81

## … seed 123 belum lengkap. k_counts=4:10, 5:80

## ✅ Cakupan k=3,4,5 tercapai pada seed 321 (max_iter=700).

res <- mcabc_cov$res
seed_used <- mcabc_cov$seed
P <- res$P; Fvals <- res$Fvals

# Plot frontier (semua solusi, highlight Pareto)
fs  <- fast_non_dominated_sort(Fvals)
F1  <- fs$fronts[[1]]
cat("Jumlah solusi di Front-1:", length(F1), "\n")

## Jumlah solusi di Front-1: 72

dfF <- as.data.frame(Fvals); colnames(dfF) <- c("risk_sd","neg_ret")
dfF$return <- -dfF$neg_ret; dfF$pareto <- FALSE; dfF$pareto[F1] <- TRUE
p_front <- ggplot(dfF, aes(risk_sd, return, color = pareto)) +
  geom_point(alpha = .8) +
  scale_color_manual(values = c("#B0B0B0","#2E86DE")) +
  labs(title = "Risk-Return Frontier (SD vs Return)",
       x = "Risk (SD, annualized)", y = "Return (annualized)", color = NULL)
plotly::ggplotly(p_front)

# Frekuensi keikutsertaan aset di F1
P_F1 <- P[F1, , drop = FALSE]
freq <- colSums(P_F1 > 1e-8); pct <- 100 * freq / nrow(P_F1)
freq_df <- tibble::tibble(ticker = tickers, freq = freq, pct = pct) |> dplyr::arrange(desc(pct))
print(freq_df)

## # A tibble: 18 × 3
##    ticker  freq    pct
##    <chr>  <dbl>  <dbl>
##  1 PGAS      72 100   
##  2 UNTR      66  91.7 
##  3 EXCL      61  84.7 
##  4 SSIA      61  84.7 
##  5 PGEO      36  50   
##  6 BBNI      27  37.5 
##  7 MTEL      11  15.3 
##  8 TLKM      10  13.9 
##  9 JSMR       2   2.78
## 10 AKRA       0   0   
## 11 BBRI       0   0   
## 12 BMRI       0   0   
## 13 INTP       0   0   
## 14 ISAT       0   0   
## 15 MEDC       0   0   
## 16 PTPP       0   0   
## 17 SMGR       0   0   
## 18 TOWR       0   0

top5_freq <- head(freq_df, 5)
p_freq <- ggplot(top5_freq, aes(reorder(ticker, pct), pct)) +
  geom_col(fill = "#2E86DE") + coord_flip() +
  labs(title = "Top-5 Saham Paling Sering di Front-1", x = NULL, y = "Frekuensi (%)")
plotly::ggplotly(p_freq)

# Ringkasan jumlah solusi per-k (populasi & Pareto-1)
k_each <- rowSums(P > 1e-8)
count_pop <- as.data.frame(table(k = k_each)); names(count_pop) <- c("k","count_in_population"); count_pop$k <- as.integer(as.character(count_pop$k))
count_f1  <- as.data.frame(table(k = k_each[F1])); names(count_f1) <- c("k","count_in_F1"); count_f1$k <- as.integer(as.character(count_f1$k))
counts_summary <- dplyr::full_join(count_pop, count_f1, by = "k") |>
  dplyr::arrange(k) |>
  dplyr::mutate(dplyr::across(dplyr::starts_with("count"), ~replace(., is.na(.), 0L)))
print(counts_summary)

##   k count_in_population count_in_F1
## 1 3                   3           3
## 2 4                   8           8
## 3 5                  79          61

## =============================================
## 8) EMPAT PORTOFOLIO TERPILIH:
##    (1) Cluster manual + (2-4) MOCv-ABC k=3/4/5 (Sharpe tertinggi)
## =============================================

# Helper metrik & pemilihan berdasarkan Sharpe
portfolio_metrics <- function(W, mu, S, rf) {
  ER  <- as.numeric(W %*% mu)
  VAR <- apply(W, 1, function(w) as.numeric(t(w) %*% S %*% w))
  SD  <- sqrt(VAR)
  Sharpe <- (ER - rf) / pmax(SD, 1e-12)
  data.frame(ER = ER, SD = SD, Sharpe = Sharpe, row.names = NULL)
}
pick_best_by_k <- function(P, mu, S, rf, idx_rows) {
  if (length(idx_rows) == 0) return(NULL)
  Wk <- P[idx_rows, , drop = FALSE]; M <- portfolio_metrics(Wk, mu, S, rf)
  i_local <- which.max(M$Sharpe)
  list(w = Wk[i_local, ], ER = M$ER[i_local], SD = M$SD[i_local], Sharpe = M$Sharpe[i_local],
       idx_global = idx_rows[i_local])
}

# (A) Portofolio #1 dari hasil cluster/dendrogram (input manual saham)
cluster_manual <- c("PGAS", "AKRA", "TLKM", "BBRI")  # EDIT sesuai hasil dendrogrammu
stopifnot(all(cluster_manual %in% tickers))
sub_idx  <- match(cluster_manual, tickers)
mu_sub   <- mu[sub_idx]; S_sub <- S[sub_idx, sub_idx, drop = FALSE]
tkr_sub  <- tickers[sub_idx]; k_sub <- length(tkr_sub)
stopifnot(alpha_w * k_sub <= 1 + 1e-12, beta_w * k_sub >= 1 - 1e-12)

res_cluster <- run_mcabc(mu = mu_sub, S = S_sub, tickers = tkr_sub,
                         alpha = alpha_w, beta = beta_w,
                         d_min = k_sub, d_max = k_sub,
                         must_have_name = "", sigma = sigma_abc,
                         limit_factor = limit_factor, max_iter = max_iter,
                         seed = seed_opt, debug = FALSE)
P_cluster <- res_cluster$P
met_c     <- portfolio_metrics(P_cluster, mu_sub, S_sub, rf)
best_c_id <- which.max(met_c$Sharpe)
w_cluster <- numeric(length(tickers)); names(w_cluster) <- tickers
w_cluster[tkr_sub] <- P_cluster[best_c_id, ]  # expand ke universe penuh

# (B) Portofolio #2-#4 dari sekali run MOCv-ABC (k=3,4,5; Sharpe terbesar)
P_all <- res$P; k_count <- rowSums(P_all > 1e-8)
idx_k3 <- which(k_count == 3L); idx_k4 <- which(k_count == 4L); idx_k5 <- which(k_count == 5L)
best_k3 <- pick_best_by_k(P_all, mu, S, rf, idx_k3)
best_k4 <- pick_best_by_k(P_all, mu, S, rf, idx_k4)
best_k5 <- pick_best_by_k(P_all, mu, S, rf, idx_k5)

if (is.null(best_k3)) message("Tidak ada k=3 pada run ini—pertimbangkan menambah iterasi/seed.")
if (is.null(best_k4)) message("Tidak ada k=4 pada run ini—pertimbangkan menambah iterasi/seed.")
if (is.null(best_k5)) message("Tidak ada k=5 pada run ini—pertimbangkan menambah iterasi/seed.")

W_list <- list(
  `Cluster` = w_cluster,
  `ABC_k3`  = if (!is.null(best_k3)) best_k3$w else rep(NA_real_, length(tickers)),
  `ABC_k4`  = if (!is.null(best_k4)) best_k4$w else rep(NA_real_, length(tickers)),
  `ABC_k5`  = if (!is.null(best_k5)) best_k5$w else rep(NA_real_, length(tickers))
)
W4 <- do.call(rbind, W_list); colnames(W4) <- tickers

M4 <- portfolio_metrics(W4, mu, S, rf)
opt4_summary <- data.frame(Portfolio = rownames(W4), M4, row.names = NULL)
print(opt4_summary)

##   Portfolio         ER        SD    Sharpe
## 1   Cluster 0.07921994 0.1670981 0.1898282
## 2    ABC_k3 0.36556758 0.3427808 0.9279036
## 3    ABC_k4 0.35719188 0.2940212 1.0532978
## 4    ABC_k5 0.33945221 0.2774147 1.0524034

# Return harian 4 portofolio (untuk VaR & backtest)
common <- intersect(colnames(rets_xts), colnames(W4))
rets_use <- rets_xts[, common, drop = FALSE]
R4_mat   <- as.matrix(rets_use) %*% as.matrix(t(W4[, common, drop = FALSE]))
opt4_daily_log_returns <- xts::xts(R4_mat, order.by = index(rets_use))
colnames(opt4_daily_log_returns) <- rownames(W4)

# Scatterplot Risiko–Return (Plotly, label rapi)
text_positions <- c("top center","bottom center","top right","bottom right")
pos_vec <- rep(text_positions, length.out = nrow(opt4_summary))
y_offset <- (max(opt4_summary$ER) - min(opt4_summary$ER)) * 0.03
opt4_summary$label_y <- opt4_summary$ER + y_offset * c(1,-1,1,-1)
opt4_summary$label_hover <- paste0(
  "<b>", opt4_summary$Portfolio, "</b><br>",
  "Expected Return: ", sprintf("%.4f", opt4_summary$ER), "<br>",
  "Risk (SD): ", sprintf("%.4f", opt4_summary$SD), "<br>",
  "Sharpe Ratio: ", sprintf("%.4f", opt4_summary$Sharpe)
)

p_opt4_plotly_clean <- plot_ly(
  data = opt4_summary, x = ~SD, y = ~ER, type = "scatter", mode = "markers+text",
  text = ~Portfolio, textposition = pos_vec,
  textfont = list(size = 12, color = "black", family = "Arial"),
  marker = list(size = 14, color = ~Sharpe, colorscale = list(c(0,1), c("#d7191c","#1a9641")),
                line = list(color = "black", width = 1)),
  hovertemplate = opt4_summary$label_hover, showlegend = FALSE
) %>%
  layout(
    title = list(text = "Scatterplot Risiko–Return", x = 0.03, y = 0.97,
                 xanchor = "left", font = list(size = 18)),
    xaxis = list(title = "Risiko (SD, annualized)", tickformat = ".2f",
                 gridcolor = "rgba(200,200,200,0.3)", zeroline = TRUE),
    yaxis = list(title = "Expected Return (annualized)", tickformat = ".2f",
                 gridcolor = "rgba(200,200,200,0.3)", zeroline = TRUE),
    margin = list(t = 90, r = 40, b = 60, l = 70),
    paper_bgcolor = "white", plot_bgcolor = "white"
  )
p_opt4_plotly_clean

## =============================================
## 9) EFFICIENT FRONTIER — overlay 4 portofolio terpilih
##    Dua varian (A: dengan bound & wajib; B: hanya kardinalitas)
##    Tiga plot per k (k=3,4,5) → total 6 plot
## =============================================

# Util metrik & kardinalitas
metrics_from_W <- function(W, mu, S, rf) {
  ER  <- as.numeric(W %*% mu)
  VAR <- apply(W, 1, function(w) as.numeric(t(w) %*% S %*% w))
  SD  <- sqrt(VAR)
  Sharpe <- (ER - rf) / pmax(SD, 1e-12)
  data.frame(ER = ER, SD = SD, Sharpe = Sharpe)
}
k_card <- function(W, eps = 1e-8) rowSums(W > eps)

# Layer overlay untuk 4 portofolio (highlight k yang sama)
overlay_layer <- function(p, overlay_df, k_focus) {
  overlay_df$alpha_pt <- ifelse(overlay_df$k == k_focus, 1, 0.35)
  overlay_df$size_pt  <- ifelse(overlay_df$k == k_focus, 3.8, 3.0)
  overlay_df$stroke   <- ifelse(overlay_df$k == k_focus, 1.2, 0.6)
  p +
    geom_point(data = overlay_df, aes(SD, ER), shape = 21, fill = "skyblue", color = "black",
               size = overlay_df$size_pt, stroke = overlay_df$stroke, alpha = overlay_df$alpha_pt) +
    ggrepel::geom_text_repel(
      data = overlay_df, aes(SD, ER, label = Portfolio),
      size = 3.0, max.overlaps = 50, seed = 42, box.padding = 0.25, point.padding = 0.3
    )
}
pal_grad <- colorRampPalette(c("#d7191c","#fdae61","lightgreen","#1a9641"))(256)
k_overlay <- k_card(W4)
overlay_df <- cbind(opt4_summary, k = k_overlay) # kolom: Portfolio, ER, SD, Sharpe, k

# Varian A: batas bobot [alpha_w, beta_w], sum=1, PGAS wajib
set.seed(123)
d_all <- length(tickers)
m_idx <- match("PGAS", tickers); if (is.na(m_idx)) m_idx <- NULL

rand_weights_A <- function(k, n = 600L) {
  stopifnot(alpha_w * k <= 1 + 1e-12, beta_w * k >= 1 - 1e-12)
  W <- matrix(NA_real_, nrow = n, ncol = d_all); colnames(W) <- tickers
  for (i in 1:n) {
    w0 <- runif(d_all)
    W[i, ] <- clean_portfolio(w0, d_all, alpha_w, beta_w, d_min = k, d_max = k, must_have_idx_or_NULL = m_idx)
  }
  W
}
make_frontier_plot_A <- function(k) {
  W_rand <- rand_weights_A(k, n = 600)
  df_rand <- metrics_from_W(W_rand, mu, S, rf)
  p <- ggplot(df_rand, aes(SD, ER, color = Sharpe)) +
    geom_point(alpha = 0.85, size = 1.8) +
    scale_color_gradientn(colors = pal_grad) +
    labs(title = paste0("Efficient Frontier — Varian A (k = ", k, ")"),
         subtitle = "Random portfolios dengan batas bobot [α,β] & PGAS wajib",
         x = "Risiko (SD, annualized)", y = "Expected Return (annualized)", color = "Sharpe") +
    theme_minimal(base_size = 12)
  overlay_layer(p, overlay_df, k_focus = k) |> plotly::ggplotly()
}
pA_k3 <- make_frontier_plot_A(3); pA_k4 <- make_frontier_plot_A(4); pA_k5 <- make_frontier_plot_A(5)

## Warning in geom2trace.default(dots[[1L]][[1L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomTextRepel() has yet to be implemented in plotly.
##   If you'd like to see this geom implemented,
##   Please open an issue with your example code at
##   https://github.com/ropensci/plotly/issues
## Warning in geom2trace.default(dots[[1L]][[1L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomTextRepel() has yet to be implemented in plotly.
##   If you'd like to see this geom implemented,
##   Please open an issue with your example code at
##   https://github.com/ropensci/plotly/issues
## Warning in geom2trace.default(dots[[1L]][[1L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomTextRepel() has yet to be implemented in plotly.
##   If you'd like to see this geom implemented,
##   Please open an issue with your example code at
##   https://github.com/ropensci/plotly/issues

# Varian B: hanya kardinalitas = k (tanpa batas bobot & tanpa wajib)
set.seed(123)
rand_weights_B <- function(k, n = 600L) {
  W <- matrix(0, nrow = n, ncol = d_all); colnames(W) <- tickers
  for (i in 1:n) {
    idx <- sample.int(d_all, k, replace = FALSE)
    w   <- runif(k); w <- w / sum(w)
    W[i, idx] <- w
  }
  W
}
make_frontier_plot_B <- function(k) {
  W_rand <- rand_weights_B(k, n = 600)
  df_rand <- metrics_from_W(W_rand, mu, S, rf)
  p <- ggplot(df_rand, aes(SD, ER, color = Sharpe)) +
    geom_point(alpha = 0.85, size = 1.8) +
    scale_color_gradientn(colors = pal_grad) +
    labs(title = paste0("Efficient Frontier — Varian B (k = ", k, ")"),
         subtitle = "Random portfolios kardinalitas saja (tanpa batas bobot & wajib)",
         x = "Risiko (SD, annualized)", y = "Expected Return (annualized)", color = "Sharpe") +
    theme_minimal(base_size = 12)
  overlay_layer(p, overlay_df, k_focus = k) |> plotly::ggplotly()
}
pB_k3 <- make_frontier_plot_B(3); pB_k4 <- make_frontier_plot_B(4); pB_k5 <- make_frontier_plot_B(5)

## Warning in geom2trace.default(dots[[1L]][[1L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomTextRepel() has yet to be implemented in plotly.
##   If you'd like to see this geom implemented,
##   Please open an issue with your example code at
##   https://github.com/ropensci/plotly/issues
## Warning in geom2trace.default(dots[[1L]][[1L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomTextRepel() has yet to be implemented in plotly.
##   If you'd like to see this geom implemented,
##   Please open an issue with your example code at
##   https://github.com/ropensci/plotly/issues
## Warning in geom2trace.default(dots[[1L]][[1L]], dots[[2L]][[1L]], dots[[3L]][[1L]]): geom_GeomTextRepel() has yet to be implemented in plotly.
##   If you'd like to see this geom implemented,
##   Please open an issue with your example code at
##   https://github.com/ropensci/plotly/issues

# Tampilkan (opsional)
pA_k3; pA_k4; pA_k5

pB_k3; pB_k4; pB_k5

## =============================================
## 10) VAR CORNISH–FISHER (harian) UNTUK 4 PORTOFOLIO
## =============================================

# Statistik harian
Rport_daily <- as.matrix(opt4_daily_log_returns) # [T x 4]
port_names  <- colnames(Rport_daily)
mu_d   <- colMeans(Rport_daily, na.rm = TRUE)
sd_d   <- apply(Rport_daily, 2, sd, na.rm = TRUE)
sk_d   <- apply(Rport_daily, 2, function(x) moments::skewness(x, na.rm = TRUE))
kurt_d <- apply(Rport_daily, 2, function(x) moments::kurtosis(x, na.rm = TRUE))
ex_kurt_d <- kurt_d - 3

stats_top4_daily <- data.frame(
  Portfolio = port_names, mu_d = mu_d, sd_d = sd_d, skew = sk_d, kurt = kurt_d, ex_kurt = ex_kurt_d
)

# CF quantile & VaR
cf_transform <- function(z, skew, ex_kurt) {
  z + (1/6)*(z^2 - 1)*skew + (1/24)*(z^3 - 3*z)*ex_kurt + (1/36)*(2*z^3 - 5*z)*(skew^2)
}
cf_quantile <- function(alpha, skew, ex_kurt) {
  a <- qnorm(alpha)
  a + (skew/6)*(a^2 - 1) + (ex_kurt/24)*(a^3 - 3*a) - ((skew^2)/36)*(2*a^3 - 5*a)
}
var_cf_vector <- function(mu, sd, skew, ex_kurt, alpha, V0 = 1, hp = 1) {
  a_cf <- cf_quantile(alpha, skew, ex_kurt)
  VaR_CF <- V0 * (mu + a_cf * sd) * hp
  list(VaR_CF = VaR_CF, a_cf = a_cf)
}

res_list <- mapply(
  var_cf_vector, mu = mu_d, sd = sd_d, skew = sk_d, ex_kurt = ex_kurt_d,
  MoreArgs = list(alpha = alpha, V0 = V0, hp = hp), SIMPLIFY = FALSE
)
VaR_CF_vals <- sapply(res_list, function(x) x$VaR_CF)
a_cf_vals   <- sapply(res_list, function(x) x$a_cf)

VaR_CF_summary <- data.frame(
  Portfolio = port_names, CL = cl, alpha = alpha,
  mu_d = mu_d, sd_d = sd_d, skew = sk_d, kurt = kurt_d, excess_k = ex_kurt_d,
  a_cf = a_cf_vals, VaR_CF = VaR_CF_vals
)
VaR_CF_summary

##         Portfolio   CL alpha         mu_d       sd_d        skew      kurt
## Cluster   Cluster 0.95  0.05 0.0003143648 0.01052619  0.05596571 10.029615
## ABC_k3     ABC_k3 0.95  0.05 0.0014506650 0.02159316  0.38539345  5.709761
## ABC_k4     ABC_k4 0.95  0.05 0.0014174281 0.01852159 -0.14759380  8.743597
## ABC_k5     ABC_k5 0.95  0.05 0.0013470326 0.01747549 -0.17171038  9.062329
##         excess_k      a_cf      VaR_CF
## Cluster 7.029615 -1.487023 -0.01533833
## ABC_k3  2.709761 -1.477828 -0.03046032
## ABC_k4  5.743597 -1.570489 -0.02767053
## ABC_k5  6.062329 -1.570767 -0.02610289

## =============================================
# 11) BACKTEST VAR: KUPIEC + PLOT INTERAKTIF & TABEL GT
## =============================================

# Kupiec (unconditional coverage) — breach jika return < VaR (VaR negatif)
rets_mat   <- as.matrix(opt4_daily_log_returns)
port_names <- colnames(rets_mat)
VaR_compare_vec <- VaR_CF_summary$VaR_CF[seq_along(port_names)]

kupiec_test <- function(returns, VaR, alpha = 0.05) {
  x <- as.numeric(returns)
  v <- as.numeric(VaR); if (length(v) == 1) v <- rep(v, length(x))
  I <- as.integer(x < v)       # breach jika return < VaR (ingat VaR negatif)
  I <- I[is.finite(I)]
  n <- length(I); xB <- sum(I)
  if (n == 0) return(data.frame(n = 0, x = NA, prop = NA, LR_uc = NA, p_value = NA))
  p_hat <- pmax(pmin(xB / n, 1 - 1e-12), 1e-12)
  LR_uc <- -2 * ( (n - xB)*log(1 - alpha) + xB*log(alpha) - (n - xB)*log(1 - p_hat) - xB*log(p_hat) )
  p_uc  <- 1 - pchisq(LR_uc, df = 1)
  data.frame(n = n, x = xB, prop = xB/n, LR_uc = LR_uc, p_value = p_uc)
}

results_list <- lapply(seq_along(port_names), function(i) {
  ku <- kupiec_test(rets_mat[, i], VaR_compare_vec[i], alpha = alpha)
  data.frame(Portfolio = port_names[i], CL = 1 - alpha,
             Violations = ku$x, ViolationRate = ku$prop,
             Kupiec_LR = ku$LR_uc, Kupiec_p = ku$p_value)
})
VaR_backtest_table <- do.call(rbind, results_list)
VaR_backtest_table

##   Portfolio   CL Violations ViolationRate  Kupiec_LR  Kupiec_p
## 1   Cluster 0.95         31    0.05090312 0.01039807 0.9187798
## 2    ABC_k3 0.95         29    0.04761905 0.07380153 0.7858801
## 3    ABC_k4 0.95         25    0.04105090 1.09067064 0.2963227
## 4    ABC_k5 0.95         23    0.03776683 2.08830285 0.1484309

# Data untuk plot/gt (built from scratch agar konsisten dengan Kupiec)
if (exists("plot_df", inherits = FALSE)) rm(plot_df)

ret_df <- opt4_daily_log_returns |>
  as.data.frame() |> tibble::rownames_to_column("date") |>
  dplyr::mutate(date = as.Date(date)) |>
  tidyr::pivot_longer(-date, names_to = "Portfolio", values_to = "ret")

var_df <- VaR_CF_summary |> dplyr::transmute(Portfolio, VaR_line = VaR_CF)  # NEGATIF

plot_df <- dplyr::left_join(ret_df, var_df, by = "Portfolio") |>
  dplyr::mutate(breach = ret < VaR_line, breach_n = ave(breach, Portfolio, FUN = cumsum))

# validasi jumlah breach plot_df vs hasil kupiec
check_cnt <- plot_df |>
  dplyr::group_by(Portfolio) |>
  dplyr::summarise(breach = sum(breach), .groups = "drop")
print(check_cnt)

## # A tibble: 4 × 2
##   Portfolio breach
##   <chr>      <int>
## 1 ABC_k3        29
## 2 ABC_k4        25
## 3 ABC_k5        23
## 4 Cluster       31

# Plot pelanggaran per-portofolio (plotly)
plot_var_breach <- function(port_name) {
  dat <- plot_df |> filter(Portfolio == port_name) |> arrange(date)
  var_line <- unique(dat$VaR_line); stopifnot(length(var_line) == 1)
  p <- plot_ly()
  p <- add_bars(p, data = dat, x = ~date, y = ~ret, name = "Return",
                legendgroup = "g1",
                marker = list(line = list(width = 0)),
                hovertemplate = "<b>%{x}</b><br>Return: %{y:.4f}<extra>Return</extra>")
  p <- add_lines(p, data = dat, x = ~date, y = ~VaR_line, name = "VaR (ambang)",
                 legendgroup = "g2", line = list(width = 2), hoverinfo = "skip")
  p <- add_markers(p, data = subset(dat, breach), x = ~date, y = ~ret, name = "Pelanggaran",
                   legendgroup = "g3", marker = list(size = 8, symbol = "circle-open-dot", color = "red"),
                   customdata = ~breach_n,
                   hovertemplate = paste0("<b>%{x}</b><br>",
                                          "Return: %{y:.4f}<br>",
                                          "Ambang VaR: ", sprintf("%.4f", var_line), "<br>",
                                          "<b>PELANGGARAN</b> # %{customdata}<extra>Pelanggaran</extra>"))
  layout(p,
         title = list(text = paste0("Plot Pelanggaran VaR — ", port_name),
                      x = 0.02, y = 0.98, xanchor = "left", yanchor = "top"),
         margin = list(t = 90, r = 20, b = 40, l = 55),
         legend = list(orientation = "h", x = 0.02, y = 1.05, xanchor = "left", yanchor = "bottom",
                       bgcolor = "rgba(255,255,255,0.6)"),
         xaxis = list(title = NULL, automargin = TRUE, rangeslider = list(visible = TRUE)),
         yaxis = list(title = "Return harian", automargin = TRUE),
         barmode = "overlay")
}
plot_var_breach("Cluster")

plot_var_breach("ABC_k3")

plot_var_breach("ABC_k4")

plot_var_breach("ABC_k5")

# Tabel gt: daftar tanggal pelanggaran & ringkasannya
var_breach_table_gt <- function(port_name, digits = 4) {
  dat_port <- plot_df |> dplyr::filter(Portfolio == port_name)
  stopifnot(nrow(dat_port) > 0)
  n_total <- nrow(dat_port)
  n_breach <- sum(dat_port$breach, na.rm = TRUE)
  rate <- n_breach / n_total

  df_breach <- dat_port |>
    dplyr::filter(breach) |>
    arrange(date) |>
    transmute(
      Tanggal  = date,
      `Return` = ret,
      `VaR`    = VaR_line,
      `Selisih`= ret - VaR_line,
      `Ke-`    = breach_n
    )
  if (nrow(df_breach) == 0) {
    df_breach <- tibble::tibble(Tanggal = as.Date(character()),
                                `Return` = numeric(), `VaR` = numeric(),
                                `Selisih` = numeric(), `Ke-` = integer())
  }

  df_breach |>
    gt() |>
    fmt_number(columns = c(`Return`,`VaR`,`Selisih`), decimals = digits) |>
    data_color(columns = c(`Return`), colors = col_bin(palette = c("#CC3232","#2DC937"), bins = c(-Inf,0,Inf))) |>
    tab_header(
      title = md(paste0("**Pelanggaran VaR ", port_name, "**")),
      subtitle = md(paste0("Total observasi: **", n_total,
                           "** · Jumlah pelanggaran: **", n_breach,
                           "** · Violation rate: **", scales::percent(rate, accuracy = 0.01), "**"))
    ) |>
    cols_label(`Ke-` = "Pelanggaran ke-") |>
    tab_source_note(md("Catatan: Pelanggaran terjadi saat **return &lt; VaR** (ambang return minimum harian)."))
}
var_breach_table_gt("Cluster")

Tanggal	Return	VaR	Selisih	Pelanggaran ke-
Pelanggaran VaR Cluster
Total observasi: 609 · Jumlah pelanggaran: 31 · Violation rate: 5.09%
2023-03-14	−0.0215	−0.0153	−0.0062	1
2023-03-15	−0.0160	−0.0153	−0.0006	2
2023-03-16	−0.0234	−0.0153	−0.0081	3
2023-05-16	−0.0226	−0.0153	−0.0072	4
2023-09-07	−0.0158	−0.0153	−0.0005	5
2023-10-03	−0.0183	−0.0153	−0.0030	6
2023-10-30	−0.0182	−0.0153	−0.0029	7
2023-11-08	−0.0268	−0.0153	−0.0115	8
2023-12-07	−0.0181	−0.0153	−0.0028	9
2024-04-24	−0.0162	−0.0153	−0.0008	10
2024-04-26	−0.0390	−0.0153	−0.0236	11
2024-05-31	−0.0185	−0.0153	−0.0031	12
2024-07-09	−0.0280	−0.0153	−0.0127	13
2024-08-05	−0.0302	−0.0153	−0.0148	14
2024-08-27	−0.0244	−0.0153	−0.0091	15
2024-09-04	−0.0161	−0.0153	−0.0008	16
2024-10-15	−0.0160	−0.0153	−0.0007	17
2024-11-26	−0.0176	−0.0153	−0.0023	18
2024-12-10	−0.0173	−0.0153	−0.0019	19
2024-12-17	−0.0263	−0.0153	−0.0109	20
2024-12-19	−0.0163	−0.0153	−0.0009	21
2025-01-30	−0.0217	−0.0153	−0.0064	22
2025-02-06	−0.0205	−0.0153	−0.0051	23
2025-02-28	−0.0280	−0.0153	−0.0127	24
2025-03-13	−0.0168	−0.0153	−0.0015	25
2025-03-18	−0.0170	−0.0153	−0.0017	26
2025-04-08	−0.0703	−0.0153	−0.0550	27
2025-06-19	−0.0155	−0.0153	−0.0001	28
2025-06-20	−0.0259	−0.0153	−0.0106	29
2025-06-24	−0.0160	−0.0153	−0.0007	30
2025-06-25	−0.0263	−0.0153	−0.0109	31
Catatan: Pelanggaran terjadi saat return < VaR (ambang return minimum harian).

var_breach_table_gt("ABC_k3")

Tanggal	Return	VaR	Selisih	Pelanggaran ke-
Pelanggaran VaR ABC_k3
Total observasi: 609 · Jumlah pelanggaran: 29 · Violation rate: 4.76%
2023-03-13	−0.0315	−0.0305	−0.0011	1
2023-03-14	−0.0343	−0.0305	−0.0039	2
2023-03-27	−0.0318	−0.0305	−0.0013	3
2023-04-03	−0.0406	−0.0305	−0.0101	4
2023-05-16	−0.0308	−0.0305	−0.0003	5
2023-06-08	−0.0372	−0.0305	−0.0068	6
2023-09-07	−0.0308	−0.0305	−0.0003	7
2023-09-11	−0.0363	−0.0305	−0.0059	8
2023-10-12	−0.0391	−0.0305	−0.0087	9
2023-10-19	−0.0444	−0.0305	−0.0140	10
2023-11-08	−0.0469	−0.0305	−0.0164	11
2023-11-22	−0.0470	−0.0305	−0.0166	12
2024-01-12	−0.0369	−0.0305	−0.0065	13
2024-04-19	−0.0471	−0.0305	−0.0166	14
2024-05-20	−0.0545	−0.0305	−0.0240	15
2024-06-14	−0.0516	−0.0305	−0.0211	16
2024-08-05	−0.0719	−0.0305	−0.0415	17
2025-01-02	−0.0684	−0.0305	−0.0379	18
2025-02-11	−0.0316	−0.0305	−0.0012	19
2025-03-18	−0.0542	−0.0305	−0.0237	20
2025-04-08	−0.0978	−0.0305	−0.0674	21
2025-06-20	−0.0424	−0.0305	−0.0120	22
2025-07-16	−0.0387	−0.0305	−0.0082	23
2025-07-22	−0.0641	−0.0305	−0.0337	24
2025-07-30	−0.0344	−0.0305	−0.0039	25
2025-08-04	−0.0379	−0.0305	−0.0075	26
2025-09-01	−0.0335	−0.0305	−0.0030	27
2025-09-08	−0.0443	−0.0305	−0.0138	28
2025-09-24	−0.0366	−0.0305	−0.0061	29
Catatan: Pelanggaran terjadi saat return < VaR (ambang return minimum harian).

var_breach_table_gt("ABC_k4")

Tanggal	Return	VaR	Selisih	Pelanggaran ke-
Pelanggaran VaR ABC_k4
Total observasi: 609 · Jumlah pelanggaran: 25 · Violation rate: 4.11%
2023-03-14	−0.0392	−0.0277	−0.0115	1
2023-03-20	−0.0302	−0.0277	−0.0025	2
2023-04-03	−0.0278	−0.0277	−0.0001	3
2023-05-02	−0.0390	−0.0277	−0.0114	4
2023-05-16	−0.0318	−0.0277	−0.0041	5
2023-06-20	−0.0290	−0.0277	−0.0013	6
2023-10-19	−0.0336	−0.0277	−0.0059	7
2023-11-08	−0.0417	−0.0277	−0.0141	8
2024-01-12	−0.0308	−0.0277	−0.0031	9
2024-04-19	−0.0469	−0.0277	−0.0193	10
2024-05-20	−0.0414	−0.0277	−0.0138	11
2024-06-14	−0.0462	−0.0277	−0.0185	12
2024-08-05	−0.0745	−0.0277	−0.0469	13
2024-09-30	−0.0321	−0.0277	−0.0044	14
2024-11-11	−0.0301	−0.0277	−0.0024	15
2024-11-18	−0.0296	−0.0277	−0.0020	16
2025-01-02	−0.0784	−0.0277	−0.0507	17
2025-02-10	−0.0277	−0.0277	0.0000	18
2025-02-28	−0.0290	−0.0277	−0.0014	19
2025-03-18	−0.0517	−0.0277	−0.0240	20
2025-04-08	−0.1229	−0.0277	−0.0952	21
2025-06-20	−0.0281	−0.0277	−0.0004	22
2025-07-16	−0.0347	−0.0277	−0.0070	23
2025-07-22	−0.0500	−0.0277	−0.0223	24
2025-09-24	−0.0357	−0.0277	−0.0080	25
Catatan: Pelanggaran terjadi saat return < VaR (ambang return minimum harian).

var_breach_table_gt("ABC_k5")

Tanggal	Return	VaR	Selisih	Pelanggaran ke-
Pelanggaran VaR ABC_k5
Total observasi: 609 · Jumlah pelanggaran: 23 · Violation rate: 3.78%
2023-03-14	−0.0379	−0.0261	−0.0118	1
2023-03-20	−0.0300	−0.0261	−0.0039	2
2023-05-02	−0.0375	−0.0261	−0.0114	3
2023-05-16	−0.0287	−0.0261	−0.0026	4
2023-06-20	−0.0278	−0.0261	−0.0017	5
2023-10-19	−0.0330	−0.0261	−0.0069	6
2023-11-08	−0.0347	−0.0261	−0.0086	7
2024-01-12	−0.0329	−0.0261	−0.0068	8
2024-04-19	−0.0438	−0.0261	−0.0177	9
2024-05-20	−0.0389	−0.0261	−0.0128	10
2024-06-14	−0.0460	−0.0261	−0.0199	11
2024-08-05	−0.0731	−0.0261	−0.0470	12
2024-09-30	−0.0306	−0.0261	−0.0045	13
2024-11-11	−0.0306	−0.0261	−0.0045	14
2024-11-18	−0.0270	−0.0261	−0.0009	15
2025-01-02	−0.0745	−0.0261	−0.0484	16
2025-02-10	−0.0264	−0.0261	−0.0003	17
2025-02-28	−0.0275	−0.0261	−0.0014	18
2025-03-18	−0.0487	−0.0261	−0.0226	19
2025-04-08	−0.1167	−0.0261	−0.0906	20
2025-07-16	−0.0294	−0.0261	−0.0033	21
2025-07-22	−0.0474	−0.0261	−0.0213	22
2025-09-24	−0.0321	−0.0261	−0.0060	23
Catatan: Pelanggaran terjadi saat return < VaR (ambang return minimum harian).

Optimasi Portofolio CCMV dengan MOCv-ABC serta Perhitungan VaR

Musthofa Aruf

2025-11-07