# ============================================================
# 2. PEMBENTUKAN X MATRIX (PREDIKTOR UNTUK MIXTURE)
# ============================================================
# Prediktor: X1_s..X5_s, Z1_s, Z2_s → total R = 7
X_mat_mix <- as.matrix(
  bayes_dat %>% 
    dplyr::select(X1_s, X2_s, X3_s, X4_s, X5_s, Z1_s, Z2_s)
)

N <- nrow(bayes_dat)
R <- ncol(X_mat_mix)
K <- 2L

stan_data_mix <- list(
  N    = N,
  R    = R,
  K    = K,
  X    = X_mat_mix,
  Y    = bayes_dat$Y,
  comp = bayes_dat$comp
)

# ============================================================
# 3. RUN STAN: KNOWN-CLASS MIXTURE SKEW-NORMAL REGRESSION
# ============================================================
fit_mix_sn <- stan(
  file   = "skewnormal_knownmix.stan",   # model Stan tanpa beta0
  data   = stan_data_mix,
  chains = 4,
  warmup = 1000,
  iter   = 2000,
  seed   = 123,
  control = list(adapt_delta = 0.9)
)

# ============================================================
# 4. RINGKASAN PARAMETER MIXTURE + TABEL MIRIP TABLE 3
# ============================================================
print(
  fit_mix_sn,
  pars  = c("beta","sigma","alpha"),
  probs = c(0.025, 0.5, 0.975)
)
## Inference for Stan model: anon_model.
## 4 chains, each with iter=2000; warmup=1000; thin=1; 
## post-warmup draws per chain=1000, total post-warmup draws=4000.
## 
##             mean se_mean   sd   2.5%    50%  97.5% n_eff Rhat
## beta[1,1] -38.97    0.07 3.26 -45.33 -39.00 -32.47  1973    1
## beta[1,2]  10.74    0.03 1.66   7.56  10.75  14.03  2746    1
## beta[1,3]   6.68    0.02 1.12   4.39   6.75   8.71  3309    1
## beta[1,4]   2.04    0.02 1.40  -0.73   2.07   4.67  4476    1
## beta[1,5]  10.84    0.06 2.91   4.96  10.91  16.45  2713    1
## beta[1,6]   1.11    0.02 1.34  -1.58   1.10   3.70  3029    1
## beta[1,7]  -3.27    0.03 1.41  -6.09  -3.28  -0.46  2653    1
## beta[2,1]  -0.01    0.03 1.48  -2.98   0.01   2.91  3057    1
## beta[2,2]  -6.14    0.04 1.98  -9.88  -6.16  -2.14  2722    1
## beta[2,3]  -5.05    0.03 2.26  -9.36  -5.08  -0.44  4458    1
## beta[2,4]  -0.68    0.03 1.66  -4.15  -0.54   2.10  3930    1
## beta[2,5]  17.12    0.06 3.52  10.25  17.14  23.84  3323    1
## beta[2,6]  -6.05    0.04 2.48 -10.81  -6.10  -1.13  4112    1
## beta[2,7]   0.48    0.03 2.29  -4.07   0.52   4.87  4412    1
## sigma[1]   26.52    0.02 1.35  24.00  26.48  29.33  3339    1
## sigma[2]   35.32    0.03 1.89  31.88  35.23  39.12  4839    1
## alpha[1]    4.89    0.03 1.30   2.92   4.68   7.92  2327    1
## alpha[2]    5.87    0.03 1.62   3.19   5.71   9.42  2949    1
## 
## Samples were drawn using NUTS(diag_e) at Wed Dec 10 13:20:18 2025.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at 
## convergence, Rhat=1).
# 4.1 Ringkasan parameter dari Stan
sum_mix <- summary(
  fit_mix_sn,
  pars  = c("beta","sigma","alpha"),
  probs = c(0.025, 0.5, 0.975)
)$summary

tab_par <- data.frame(
  node      = rownames(sum_mix),
  mean      = round(sum_mix[, "mean"],   4),
  sd        = round(sum_mix[, "sd"],     4),
  MC_error  = signif(sum_mix[, "se_mean"], 3),
  `2.5%`    = round(sum_mix[, "2.5%"],   4),
  median    = round(sum_mix[, "50%"],    4),
  `97.5%`   = round(sum_mix[, "97.5%"],  4),
  row.names = NULL
)

# Ubah nama 'beta[...,...]' menjadi 'b[...,...]' seperti di tabel contoh
tab_par$node <- gsub("^beta", "b", tab_par$node)

# 4.2 Hitung proporsi komponen → P[1], P[2]
p_hat <- prop.table(table(bayes_dat$comp))

tab_P <- data.frame(
  node      = paste0("P[", names(p_hat), "]"),
  mean      = round(as.numeric(p_hat), 4),
  sd        = NA,
  MC_error  = NA,
  `2.5%`    = NA,
  median    = NA,
  `97.5%`   = NA
)

# 4.3 Gabungkan jadi satu tabel seperti TABLE 3
tab_mix_final <- rbind(tab_P, tab_par)

knitr::kable(
  tab_mix_final,
  caption = "ESTIMATION PARAMETER MIXTURE REGRESSION MODEL"
)
ESTIMATION PARAMETER MIXTURE REGRESSION MODEL
node mean sd MC_error X2.5. median X97.5.
P[1] 0.6115 NA NA NA NA NA
P[2] 0.3885 NA NA NA NA NA
b[1,1] -38.9712 3.2646 0.0735 -45.3294 -39.0043 -32.4707
b[1,2] 10.7372 1.6584 0.0316 7.5585 10.7488 14.0302
b[1,3] 6.6841 1.1248 0.0196 4.3864 6.7469 8.7095
b[1,4] 2.0446 1.3963 0.0209 -0.7277 2.0699 4.6703
b[1,5] 10.8379 2.9051 0.0558 4.9590 10.9069 16.4474
b[1,6] 1.1077 1.3421 0.0244 -1.5799 1.1018 3.7045
b[1,7] -3.2739 1.4100 0.0274 -6.0859 -3.2752 -0.4570
b[2,1] -0.0101 1.4808 0.0268 -2.9813 0.0075 2.9102
b[2,2] -6.1352 1.9778 0.0379 -9.8811 -6.1632 -2.1421
b[2,3] -5.0471 2.2620 0.0339 -9.3556 -5.0797 -0.4387
b[2,4] -0.6779 1.6638 0.0265 -4.1499 -0.5432 2.1033
b[2,5] 17.1198 3.5162 0.0610 10.2472 17.1380 23.8402
b[2,6] -6.0539 2.4846 0.0387 -10.8087 -6.0999 -1.1275
b[2,7] 0.4847 2.2881 0.0344 -4.0709 0.5215 4.8681
sigma[1] 26.5164 1.3478 0.0233 23.9976 26.4799 29.3274
sigma[2] 35.3230 1.8901 0.0272 31.8822 35.2329 39.1163
alpha[1] 4.8922 1.3027 0.0270 2.9234 4.6813 7.9193
alpha[2] 5.8732 1.6204 0.0298 3.1918 5.7139 9.4228
# ============================================================
# 5. LOO, WAIC, AIC, BIC
# ============================================================
log_lik_mix <- extract_log_lik(fit_mix_sn)

waic_mix <- waic(log_lik_mix)
## Warning: 
## 5 (1.3%) p_waic estimates greater than 0.4. We recommend trying loo instead.
loo_mix  <- loo(log_lik_mix)

waic_mix
## 
## Computed from 4000 by 381 log-likelihood matrix.
## 
##           Estimate   SE
## elpd_waic  -1624.2  9.8
## p_waic        11.5  1.7
## waic        3248.4 19.7
## 
## 5 (1.3%) p_waic estimates greater than 0.4. We recommend trying loo instead.
loo_mix
## 
## Computed from 4000 by 381 log-likelihood matrix.
## 
##          Estimate   SE
## elpd_loo  -1624.7  9.9
## p_loo        12.0  1.8
## looic      3249.3 19.7
## ------
## MCSE of elpd_loo is 0.1.
## MCSE and ESS estimates assume independent draws (r_eff=1).
## 
## All Pareto k estimates are good (k < 0.7).
## See help('pareto-k-diagnostic') for details.
logLik_tot_mix <- sum(apply(log_lik_mix, 2, mean))
p_eff_mix      <- loo_mix$estimates["p_loo", "Estimate"]

AIC_mix <- -2 * logLik_tot_mix + 2 * p_eff_mix
BIC_mix <- -2 * logLik_tot_mix + p_eff_mix * log(N)

AIC_mix
## [1] 3259.604
BIC_mix
## [1] 3306.822
# ============================================================
# 6. TRACE PLOT PARAMETER MIXTURE
# ============================================================
mcmc_arr_mix <- as.array(fit_mix_sn)

# Trace plot untuk parameter inti
mcmc_trace(
  mcmc_arr_mix,
  pars = c("alpha[1]","alpha[2]",
           "sigma[1]","sigma[2]")
)

# Contoh trace beberapa beta komponen 2
mcmc_trace(
  mcmc_arr_mix,
  pars = c("beta[2,1]", "beta[2,3]", "beta[2,7]")
)