bayesgeoreg

## =========================================
## 1. RAPIKAN NAMA KOLOM: Y, X1..X12
## =========================================

print(names(dat))

##  [1] "No"       "Kategori" "Y"        "Xl"       "X2"       "X3"      
##  [7] "X4"       "XS"       "X6"       "X7"       "X8"       "X9"      
## [13] "X10"      "X11"      "X12"

# Pastikan kolom ke-3 = Y
if (ncol(dat) >= 3) {
  colnames(dat)[3] <- "Y"
}

# Kalau format awal: No, Kategori, Y, Xl, X2, X3, X4, XS, X6, X7, X8, X9, X10, X11, X12
# maka kita ubah Xl -> X1 dan XS -> X5 dulu (jaga-jaga)
if ("Xl" %in% names(dat)) {
  names(dat)[names(dat) == "Xl"] <- "X1"
}
if ("XS" %in% names(dat)) {
  names(dat)[names(dat) == "XS"] <- "X5"
}

# Jika kolom ke-4 sampai ke-15 memang ada, paksa jadikan X1..X12 (kasus data leasing kamu)
if (ncol(dat) >= 15) {
  colnames(dat)[4:15] <- paste0("X", 1:12)
}

print(names(dat))

##  [1] "No"       "Kategori" "Y"        "X1"       "X2"       "X3"      
##  [7] "X4"       "X5"       "X6"       "X7"       "X8"       "X9"      
## [13] "X10"      "X11"      "X12"

# Harapannya: "No" "Kategori" "Y" "X1" "X2" ... "X12"

## =========================================
## 2. BUAT RESPON 0-BASED (Yg) & PILIH X1..X12
## =========================================

if (!("Y" %in% names(dat))) {
  stop("Kolom 'Y' tidak ditemukan. Cek kembali nama kolom.")
}

dat$Yg <- as.integer(dat$Y - 1L)  # geometric 0-based: 0,1,2,...

# Pastikan semua X1..X12 ada
all_X <- paste0("X", 1:12)
if (!all(all_X %in% names(dat))) {
  stop("Tidak semua X1..X12 ditemukan di data. Cek kembali struktur kolom.")
}

vars    <- c("Yg", all_X)
dat_sub <- dat[, vars]

# Hapus baris dengan NA
dat_clean      <- dat_sub[complete.cases(dat_sub), ]
dat_clean$Yg   <- as.integer(dat_clean$Yg)

## =========================================
## 3. BANGUN MATRIKS DESAIN X UNTUK X1..X12
## =========================================

X <- model.matrix(
  ~ X1 + X2 + X3 + X4 + X5 + X6 +
    X7 + X8 + X9 + X10 + X11 + X12,
  data = dat_clean
)

# Hilangkan kolom intercept (Stan sudah punya p0)
X <- X[, -1, drop = TRUE]
X <- as.matrix(X)

N <- nrow(dat_clean)
K <- ncol(X)

cat("N =", N, "observasi\n")

## N = 334 observasi

cat("K =", K, "kovariat (dummy) di X\n")

## K = 12 kovariat (dummy) di X

cat("Nama kolom X:\n")

## Nama kolom X:

print(colnames(X))

##  [1] "X1"  "X2"  "X3"  "X4"  "X5"  "X6"  "X7"  "X8"  "X9"  "X10" "X11" "X12"

stan_data <- list(
  N      = N,
  K      = K,
  y      = dat_clean$Yg,
  X      = X,
  alpha0 = 1,   # prior Beta(1,1) untuk p0 (uniform)
  beta0  = 1
)

## =========================================
## 4. MODEL STAN: REGRESI GEOMETRIK BAYESIAN
## =========================================

library(rstan)

## Loading required package: StanHeaders

## 
## rstan version 2.32.7 (Stan version 2.32.2)

## For execution on a local, multicore CPU with excess RAM we recommend calling
## options(mc.cores = parallel::detectCores()).
## To avoid recompilation of unchanged Stan programs, we recommend calling
## rstan_options(auto_write = TRUE)
## For within-chain threading using `reduce_sum()` or `map_rect()` Stan functions,
## change `threads_per_chain` option:
## rstan_options(threads_per_chain = 1)

## Do not specify '-march=native' in 'LOCAL_CPPFLAGS' or a Makevars file

rstan_options(auto_write = TRUE)
options(mc.cores = parallel::detectCores())

stan_code <- "
data {
  int<lower=1> N;          
  int<lower=1> K;          
  int<lower=0> y[N];       
  matrix[N, K] X;          
  real<lower=0> alpha0;    
  real<lower=0> beta0;     
}

parameters {
  real<lower=0, upper=1> p0;  
  vector[K] beta;             
}

transformed parameters {
  real beta0_logit;
  vector[N] eta;
  vector<lower=0, upper=1>[N] p;

  beta0_logit = logit(p0);
  eta = beta0_logit + X * beta;

  for (n in 1:N) {
    p[n] = inv_logit(eta[n]);
  }
}

model {
  // prior Beta untuk p0 (probabilitas dasar)
  p0   ~ beta(alpha0, beta0);

  // prior Normal(0,5) untuk koefisien regresi
  beta ~ normal(0, 5);

  // likelihood Geometrik 0-based: P(Y=y) = p * (1-p)^y
  for (n in 1:N) {
    target += y[n] * log1m(p[n]) + log(p[n]);
  }
}
"

## =========================================
## 5. KOMPILE & SAMPLING MCMC
## =========================================

stan_model_geo <- stan_model(model_code = stan_code)

## WARNING: Rtools is required to build R packages, but is not currently installed.
## 
## Please download and install the appropriate version of Rtools for 4.5.2 from
## https://cran.r-project.org/bin/windows/Rtools/.

## Trying to compile a simple C file

## Running "C:/PROGRA~1/R/R-45~1.2/bin/x64/Rcmd.exe" SHLIB foo.c
## using C compiler: 'gcc.exe (GCC) 14.3.0'
## gcc  -I"C:/PROGRA~1/R/R-45~1.2/include" -DNDEBUG   -I"C:/Users/Hani Khaulasari/AppData/Local/R/win-library/4.5/Rcpp/include/"  -I"C:/Users/Hani Khaulasari/AppData/Local/R/win-library/4.5/RcppEigen/include/"  -I"C:/Users/Hani Khaulasari/AppData/Local/R/win-library/4.5/RcppEigen/include/unsupported"  -I"C:/Users/Hani Khaulasari/AppData/Local/R/win-library/4.5/BH/include" -I"C:/Users/Hani Khaulasari/AppData/Local/R/win-library/4.5/StanHeaders/include/src/"  -I"C:/Users/Hani Khaulasari/AppData/Local/R/win-library/4.5/StanHeaders/include/"  -I"C:/Users/Hani Khaulasari/AppData/Local/R/win-library/4.5/RcppParallel/include/" -DRCPP_PARALLEL_USE_TBB=1 -I"C:/Users/Hani Khaulasari/AppData/Local/R/win-library/4.5/rstan/include" -DEIGEN_NO_DEBUG  -DBOOST_DISABLE_ASSERTS  -DBOOST_PENDING_INTEGER_LOG2_HPP  -DSTAN_THREADS  -DUSE_STANC3 -DSTRICT_R_HEADERS  -DBOOST_PHOENIX_NO_VARIADIC_EXPRESSION  -D_HAS_AUTO_PTR_ETC=0  -include "C:/Users/Hani Khaulasari/AppData/Local/R/win-library/4.5/StanHeaders/include/stan/math/prim/fun/Eigen.hpp"  -std=c++1y    -I"C:/rtools45/x86_64-w64-mingw32.static.posix/include"      -O2 -Wall -std=gnu2x  -mfpmath=sse -msse2 -mstackrealign   -c foo.c -o foo.o
## cc1.exe: warning: command-line option '-std=c++14' is valid for C++/ObjC++ but not for C
## In file included from C:/Users/Hani Khaulasari/AppData/Local/R/win-library/4.5/RcppEigen/include/Eigen/Core:19,
##                  from C:/Users/Hani Khaulasari/AppData/Local/R/win-library/4.5/RcppEigen/include/Eigen/Dense:1,
##                  from C:/Users/Hani Khaulasari/AppData/Local/R/win-library/4.5/StanHeaders/include/stan/math/prim/fun/Eigen.hpp:22,
##                  from <command-line>:
## C:/Users/Hani Khaulasari/AppData/Local/R/win-library/4.5/RcppEigen/include/Eigen/src/Core/util/Macros.h:679:10: fatal error: cmath: No such file or directory
##   679 | #include <cmath>
##       |          ^~~~~~~
## compilation terminated.
## make: *** [C:/PROGRA~1/R/R-45~1.2/etc/x64/Makeconf:289: foo.o] Error 1

## WARNING: Rtools is required to build R packages, but is not currently installed.
## 
## Please download and install the appropriate version of Rtools for 4.5.2 from
## https://cran.r-project.org/bin/windows/Rtools/.

fit_geo_bayes <- sampling(
  stan_model_geo,
  data   = stan_data,
  chains = 4,
  iter   = 4000,
  warmup = 1000,
  seed   = 123
)

print(fit_geo_bayes,
      pars  = c("p0", "beta"),
      probs = c(0.025, 0.5, 0.975))

## Inference for Stan model: anon_model.
## 4 chains, each with iter=4000; warmup=1000; thin=1; 
## post-warmup draws per chain=3000, total post-warmup draws=12000.
## 
##           mean se_mean   sd  2.5%   50% 97.5% n_eff Rhat
## p0        0.10       0 0.04  0.05  0.09  0.18  7361    1
## beta[1]   0.00       0 0.01 -0.02  0.00  0.02  9161    1
## beta[2]   0.03       0 0.16 -0.30  0.03  0.34 11660    1
## beta[3]  -0.11       0 0.17 -0.45 -0.11  0.23 10508    1
## beta[4]  -0.09       0 0.13 -0.35 -0.09  0.17 10532    1
## beta[5]  -0.24       0 0.13 -0.49 -0.24  0.01  9622    1
## beta[6]   0.02       0 0.06 -0.09  0.02  0.13  9912    1
## beta[7]  -0.04       0 0.13 -0.29 -0.04  0.22 10415    1
## beta[8]  -0.01       0 0.01 -0.02  0.00  0.01 13656    1
## beta[9]  -0.01       0 0.01 -0.03 -0.01  0.01 11921    1
## beta[10]  0.04       0 0.06 -0.08  0.04  0.17  9189    1
## beta[11]  0.03       0 0.07 -0.11  0.03  0.16  7766    1
## beta[12] -0.01       0 0.04 -0.08 -0.01  0.06 10633    1
## 
## Samples were drawn using NUTS(diag_e) at Mon Nov 17 20:05:28 2025.
## For each parameter, n_eff is a crude measure of effective sample size,
## and Rhat is the potential scale reduction factor on split chains (at 
## convergence, Rhat=1).

## =========================================
## 6. HITUNG ODDS RATIO (SEMUA X1..X12 & DUMMY)
## =========================================

post       <- rstan::extract(fit_geo_bayes)
beta_post  <- post$beta   # iter × K

OR_summary <- t(apply(beta_post, 2, function(b) {
  OR <- exp(b)
  c(
    OR_median = median(OR),
    OR_2.5    = quantile(OR, 0.025),
    OR_97.5   = quantile(OR, 0.975)
  )
}))

colnames(OR_summary) <- c("OR_median", "OR_2.5%", "OR_97.5%")
rownames(OR_summary) <- colnames(X)

OR_summary

##      
##       OR_median   OR_2.5% OR_97.5%
##   X1  1.0016628 0.9849816 1.018482
##   X2  1.0320296 0.7434623 1.410005
##   X3  0.8996393 0.6393161 1.260743
##   X4  0.9177956 0.7055764 1.181610
##   X5  0.7858124 0.6147623 1.010457
##   X6  1.0196852 0.9094305 1.143637
##   X7  0.9618144 0.7453801 1.249386
##   X8  0.9950230 0.9833458 1.006367
##   X9  0.9925076 0.9727482 1.012404
##   X10 1.0414716 0.9195154 1.181492
##   X11 1.0291618 0.8997627 1.177686
##   X12 0.9898822 0.9193519 1.062905

## =========================================
## 7. TRACE PLOT UNTUK DIAGNOSTIK MCMC
## =========================================

# Trace plot p0
traceplot(fit_geo_bayes, pars = "p0")

# Trace plot beberapa beta (misal 6 pertama agar plot tidak terlalu penuh)
idx_plot <- 1:min(6, K)
pars_beta_plot <- paste0("beta[", idx_plot, "]")

traceplot(fit_geo_bayes, pars = pars_beta_plot)

# Kalau ingin semua beta (hati-hati, bisa sangat ramai):
# pars_all <- c("p0", paste0("beta[", 1:K, "]"))
# traceplot(fit_geo_bayes, pars = pars_all)

bayesgeoreg

Hani Khaulasari

2025-11-17