## =========================================
## 1. RAPIKAN NAMA KOLOM: Y, X1..X12
## =========================================
print(names(dat))
## [1] "No" "Kategori" "Y" "Xl" "X2" "X3"
## [7] "X4" "XS" "X6" "X7" "X8" "X9"
## [13] "X10" "X11" "X12"
# Pastikan kolom ke-3 = Y
if (ncol(dat) >= 3) {
colnames(dat)[3] <- "Y"
}
# Kalau format awal: No, Kategori, Y, Xl, X2, X3, X4, XS, X6, X7, X8, X9, X10, X11, X12
# maka kita ubah Xl -> X1 dan XS -> X5 dulu (jaga-jaga)
if ("Xl" %in% names(dat)) {
names(dat)[names(dat) == "Xl"] <- "X1"
}
if ("XS" %in% names(dat)) {
names(dat)[names(dat) == "XS"] <- "X5"
}
# Jika kolom ke-4 sampai ke-15 memang ada, paksa jadikan X1..X12 (kasus data leasing kamu)
if (ncol(dat) >= 15) {
colnames(dat)[4:15] <- paste0("X", 1:12)
}
print(names(dat))
## [1] "No" "Kategori" "Y" "X1" "X2" "X3"
## [7] "X4" "X5" "X6" "X7" "X8" "X9"
## [13] "X10" "X11" "X12"
# Harapannya: "No" "Kategori" "Y" "X1" "X2" ... "X12"
## =========================================
## 2. BUAT RESPON 0-BASED (Yg) & PILIH X1..X12
## =========================================
if (!("Y" %in% names(dat))) {
stop("Kolom 'Y' tidak ditemukan. Cek kembali nama kolom.")
}
dat$Yg <- as.integer(dat$Y - 1L) # geometric 0-based: 0,1,2,...
# Pastikan semua X1..X12 ada
all_X <- paste0("X", 1:12)
if (!all(all_X %in% names(dat))) {
stop("Tidak semua X1..X12 ditemukan di data. Cek kembali struktur kolom.")
}
vars <- c("Yg", all_X)
dat_sub <- dat[, vars]
# Hapus baris dengan NA
dat_clean <- dat_sub[complete.cases(dat_sub), ]
dat_clean$Yg <- as.integer(dat_clean$Yg)
## =========================================
## 3. BANGUN MATRIKS DESAIN X UNTUK X1..X12
## =========================================
X <- model.matrix(
~ X1 + X2 + X3 + X4 + X5 + X6 +
X7 + X8 + X9 + X10 + X11 + X12,
data = dat_clean
)
# Hilangkan kolom intercept (Stan sudah punya p0)
X <- X[, -1, drop = TRUE]
X <- as.matrix(X)
N <- nrow(dat_clean)
K <- ncol(X)
cat("N =", N, "observasi\n")
## N = 334 observasi
cat("K =", K, "kovariat (dummy) di X\n")
## K = 12 kovariat (dummy) di X
cat("Nama kolom X:\n")
## Nama kolom X:
print(colnames(X))
## [1] "X1" "X2" "X3" "X4" "X5" "X6" "X7" "X8" "X9" "X10" "X11" "X12"
stan_data <- list(
N = N,
K = K,
y = dat_clean$Yg,
X = X,
alpha0 = 1, # prior Beta(1,1) untuk p0 (uniform)
beta0 = 1
)
## =========================================
## 4. MODEL STAN: REGRESI GEOMETRIK BAYESIAN
## =========================================
library(rstan)
## Loading required package: StanHeaders
##
## rstan version 2.32.7 (Stan version 2.32.2)
## For execution on a local, multicore CPU with excess RAM we recommend calling
## options(mc.cores = parallel::detectCores()).
## To avoid recompilation of unchanged Stan programs, we recommend calling
## rstan_options(auto_write = TRUE)
## For within-chain threading using `reduce_sum()` or `map_rect()` Stan functions,
## change `threads_per_chain` option:
## rstan_options(threads_per_chain = 1)
## Do not specify '-march=native' in 'LOCAL_CPPFLAGS' or a Makevars file
rstan_options(auto_write = TRUE)
options(mc.cores = parallel::detectCores())
stan_code <- "
data {
int<lower=1> N;
int<lower=1> K;
int<lower=0> y[N];
matrix[N, K] X;
real<lower=0> alpha0;
real<lower=0> beta0;
}
parameters {
real<lower=0, upper=1> p0;
vector[K] beta;
}
transformed parameters {
real beta0_logit;
vector[N] eta;
vector<lower=0, upper=1>[N] p;
beta0_logit = logit(p0);
eta = beta0_logit + X * beta;
for (n in 1:N) {
p[n] = inv_logit(eta[n]);
}
}
model {
// prior Beta untuk p0 (probabilitas dasar)
p0 ~ beta(alpha0, beta0);
// prior Normal(0,5) untuk koefisien regresi
beta ~ normal(0, 5);
// likelihood Geometrik 0-based: P(Y=y) = p * (1-p)^y
for (n in 1:N) {
target += y[n] * log1m(p[n]) + log(p[n]);
}
}
"
## =========================================
## 5. KOMPILE & SAMPLING MCMC
## =========================================
stan_model_geo <- stan_model(model_code = stan_code)
## WARNING: Rtools is required to build R packages, but is not currently installed.
##
## Please download and install the appropriate version of Rtools for 4.5.2 from
## https://cran.r-project.org/bin/windows/Rtools/.
## Trying to compile a simple C file
## Running "C:/PROGRA~1/R/R-45~1.2/bin/x64/Rcmd.exe" SHLIB foo.c
## using C compiler: 'gcc.exe (GCC) 14.3.0'
## gcc -I"C:/PROGRA~1/R/R-45~1.2/include" -DNDEBUG -I"C:/Users/Hani Khaulasari/AppData/Local/R/win-library/4.5/Rcpp/include/" -I"C:/Users/Hani Khaulasari/AppData/Local/R/win-library/4.5/RcppEigen/include/" -I"C:/Users/Hani Khaulasari/AppData/Local/R/win-library/4.5/RcppEigen/include/unsupported" -I"C:/Users/Hani Khaulasari/AppData/Local/R/win-library/4.5/BH/include" -I"C:/Users/Hani Khaulasari/AppData/Local/R/win-library/4.5/StanHeaders/include/src/" -I"C:/Users/Hani Khaulasari/AppData/Local/R/win-library/4.5/StanHeaders/include/" -I"C:/Users/Hani Khaulasari/AppData/Local/R/win-library/4.5/RcppParallel/include/" -DRCPP_PARALLEL_USE_TBB=1 -I"C:/Users/Hani Khaulasari/AppData/Local/R/win-library/4.5/rstan/include" -DEIGEN_NO_DEBUG -DBOOST_DISABLE_ASSERTS -DBOOST_PENDING_INTEGER_LOG2_HPP -DSTAN_THREADS -DUSE_STANC3 -DSTRICT_R_HEADERS -DBOOST_PHOENIX_NO_VARIADIC_EXPRESSION -D_HAS_AUTO_PTR_ETC=0 -include "C:/Users/Hani Khaulasari/AppData/Local/R/win-library/4.5/StanHeaders/include/stan/math/prim/fun/Eigen.hpp" -std=c++1y -I"C:/rtools45/x86_64-w64-mingw32.static.posix/include" -O2 -Wall -std=gnu2x -mfpmath=sse -msse2 -mstackrealign -c foo.c -o foo.o
## cc1.exe: warning: command-line option '-std=c++14' is valid for C++/ObjC++ but not for C
## In file included from C:/Users/Hani Khaulasari/AppData/Local/R/win-library/4.5/RcppEigen/include/Eigen/Core:19,
## from C:/Users/Hani Khaulasari/AppData/Local/R/win-library/4.5/RcppEigen/include/Eigen/Dense:1,
## from C:/Users/Hani Khaulasari/AppData/Local/R/win-library/4.5/StanHeaders/include/stan/math/prim/fun/Eigen.hpp:22,
## from <command-line>:
## C:/Users/Hani Khaulasari/AppData/Local/R/win-library/4.5/RcppEigen/include/Eigen/src/Core/util/Macros.h:679:10: fatal error: cmath: No such file or directory
## 679 | #include <cmath>
## | ^~~~~~~
## compilation terminated.
## make: *** [C:/PROGRA~1/R/R-45~1.2/etc/x64/Makeconf:289: foo.o] Error 1
## WARNING: Rtools is required to build R packages, but is not currently installed.
##
## Please download and install the appropriate version of Rtools for 4.5.2 from
## https://cran.r-project.org/bin/windows/Rtools/.
fit_geo_bayes <- sampling(
stan_model_geo,
data = stan_data,
chains = 4,
iter = 4000,
warmup = 1000,
seed = 123
)
print(fit_geo_bayes,
pars = c("p0", "beta"),
probs = c(0.025, 0.5, 0.975))
## Inference for Stan model: anon_model.
## 4 chains, each with iter=4000; warmup=1000; thin=1;
## post-warmup draws per chain=3000, total post-warmup draws=12000.
##
## mean se_mean sd 2.5% 50% 97.5% n_eff Rhat
## p0 0.10 0 0.04 0.05 0.09 0.18 7361 1
## beta[1] 0.00 0 0.01 -0.02 0.00 0.02 9161 1
## beta[2] 0.03 0 0.16 -0.30 0.03 0.34 11660 1
## beta[3] -0.11 0 0.17 -0.45 -0.11 0.23 10508 1
## beta[4] -0.09 0 0.13 -0.35 -0.09 0.17 10532 1
## beta[5] -0.24 0 0.13 -0.49 -0.24 0.01 9622 1
## beta[6] 0.02 0 0.06 -0.09 0.02 0.13 9912 1
## beta[7] -0.04 0 0.13 -0.29 -0.04 0.22 10415 1
## beta[8] -0.01 0 0.01 -0.02 0.00 0.01 13656 1
## beta[9] -0.01 0 0.01 -0.03 -0.01 0.01 11921 1
## beta[10] 0.04 0 0.06 -0.08 0.04 0.17 9189 1
## beta[11] 0.03 0 0.07 -0.11 0.03 0.16 7766 1
## beta[12] -0.01 0 0.04 -0.08 -0.01 0.06 10633 1
##
## Samples were drawn using NUTS(diag_e) at Mon Nov 17 20:05:28 2025.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at
## convergence, Rhat=1).
## =========================================
## 6. HITUNG ODDS RATIO (SEMUA X1..X12 & DUMMY)
## =========================================
post <- rstan::extract(fit_geo_bayes)
beta_post <- post$beta # iter × K
OR_summary <- t(apply(beta_post, 2, function(b) {
OR <- exp(b)
c(
OR_median = median(OR),
OR_2.5 = quantile(OR, 0.025),
OR_97.5 = quantile(OR, 0.975)
)
}))
colnames(OR_summary) <- c("OR_median", "OR_2.5%", "OR_97.5%")
rownames(OR_summary) <- colnames(X)
OR_summary
##
## OR_median OR_2.5% OR_97.5%
## X1 1.0016628 0.9849816 1.018482
## X2 1.0320296 0.7434623 1.410005
## X3 0.8996393 0.6393161 1.260743
## X4 0.9177956 0.7055764 1.181610
## X5 0.7858124 0.6147623 1.010457
## X6 1.0196852 0.9094305 1.143637
## X7 0.9618144 0.7453801 1.249386
## X8 0.9950230 0.9833458 1.006367
## X9 0.9925076 0.9727482 1.012404
## X10 1.0414716 0.9195154 1.181492
## X11 1.0291618 0.8997627 1.177686
## X12 0.9898822 0.9193519 1.062905
## =========================================
## 7. TRACE PLOT UNTUK DIAGNOSTIK MCMC
## =========================================
# Trace plot p0
traceplot(fit_geo_bayes, pars = "p0")

# Trace plot beberapa beta (misal 6 pertama agar plot tidak terlalu penuh)
idx_plot <- 1:min(6, K)
pars_beta_plot <- paste0("beta[", idx_plot, "]")
traceplot(fit_geo_bayes, pars = pars_beta_plot)

# Kalau ingin semua beta (hati-hati, bisa sangat ramai):
# pars_all <- c("p0", paste0("beta[", 1:K, "]"))
# traceplot(fit_geo_bayes, pars = pars_all)