## ============================================================
## 2. Design matrix - MODEL TERSTRATIFIKASI
## ============================================================
X <- model.matrix(~ Pemisah + X1 + X2 + X3 + X4 + X5, data = data)


## ============================================================
## 3. Log-likelihood Regresi Geometrik (Discrete-Time PH)
## ============================================================
loglik_geo <- function(beta, X, Y) {
  eta <- as.vector(X %*% beta)
  h   <- 1 / (1 + exp(-eta))              
  ll  <- sum(log(h) + (Y - 1) * log(1 - h))
  return(-ll)
}
## ============================================================
## 4. Estimasi Parameter (STRATIFIED MODEL)
## ============================================================
start <- rep(0, ncol(X))

fit <- optim(par = start,
             fn = loglik_geo,
             X = X,
             Y = Y,
             hessian = TRUE)

coef_geo <- fit$par
names(coef_geo) <- colnames(X)

## Variansi dan SE
vcov_geo <- solve(fit$hessian)
se_geo   <- sqrt(diag(vcov_geo))
names(se_geo) <- colnames(X)

## Wald z & p-value
z_geo <- coef_geo / se_geo
p_geo <- 2 * pnorm(-abs(z_geo))

## Hazard Ratio (HR) dan CI 95%
HR      <- exp(coef_geo)
lowerCI <- exp(coef_geo - 1.96 * se_geo)
upperCI <- exp(coef_geo + 1.96 * se_geo)

hasil <- data.frame(
  Parameter = names(coef_geo),
  Estimate  = coef_geo,
  Std.Error = se_geo,
  z.value   = z_geo,
  p.value   = p_geo,
  HR        = HR,
  CI.lower  = lowerCI,
  CI.upper  = upperCI
)

cat("\n============================\n")
## 
## ============================
cat("HASIL MODEL STRATIFIKASI (GABUNGAN)\n")
## HASIL MODEL STRATIFIKASI (GABUNGAN)
cat("============================\n")
## ============================
print(hasil, digits = 4)
##               Parameter Estimate Std.Error z.value   p.value      HR CI.lower
## (Intercept) (Intercept)  3.07667   0.70969  4.3353 1.456e-05 21.6861   5.3962
## Pemisah1       Pemisah1 -1.59261   0.35608 -4.4726 7.726e-06  0.2034   0.1012
## X1                   X1 -0.01748   0.01222 -1.4305 1.526e-01  0.9827   0.9594
## X2                   X2 -0.42453   0.08060 -5.2671 1.386e-07  0.6541   0.5585
## X31                 X31  0.07270   0.28416  0.2558 7.981e-01  1.0754   0.6162
## X41                 X41 -0.84949   0.24886 -3.4135 6.412e-04  0.4276   0.2626
## X51                 X51  0.26846   0.32382  0.8290 4.071e-01  1.3080   0.6933
##             CI.upper
## (Intercept)  87.1526
## Pemisah1      0.4087
## X1            1.0065
## X2            0.7660
## X31           1.8770
## X41           0.6965
## X51           2.4674
## ============================================================
## 5. UJI EFEK STRATIFIKASI (γ = Pemisah1)
## ============================================================

gamma    <- coef_geo["Pemisah1"]
se_gamma <- se_geo["Pemisah1"]
z_gamma  <- gamma / se_gamma
p_gamma  <- 2 * pnorm(-abs(z_gamma))

cat("\n======================================\n")
## 
## ======================================
cat("UJI EFEK STRATIFIKASI (γ = Pemisah1)\n")
## UJI EFEK STRATIFIKASI (γ = Pemisah1)
cat("======================================\n")
## ======================================
cat("Gamma estimate :", gamma, "\n")
## Gamma estimate : -1.592615
cat("SE            :", se_gamma, "\n")
## SE            : 0.3560793
cat("z-value       :", z_gamma, "\n")
## z-value       : -4.472641
cat("p-value       :", p_gamma, "\n")
## p-value       : 7.725948e-06
if(p_gamma < 0.05){
  cat("\nKESIMPULAN: Baseline hazard berbeda nyata.\n")
  cat(">>> Model Wajib Terstratifikasi.\n")
} else {
  cat("\nKESIMPULAN: Tidak ada perbedaan baseline hazard.\n")
  cat(">>> Model non-stratifikasi sudah cukup.\n")
}
## 
## KESIMPULAN: Baseline hazard berbeda nyata.
## >>> Model Wajib Terstratifikasi.
## ============================================================
## 6. Validasi Model / Kebaikan Model (LL, AIC, BIC)
## ============================================================

LL <- -fit$value
k  <- length(coef_geo)
n  <- nrow(data)

AIC <- -2*LL + 2*k
BIC <- -2*LL + k * log(n)

cat("\n======================================\n")
## 
## ======================================
cat("VALIDASI MODEL (GOF)\n")
## VALIDASI MODEL (GOF)
cat("======================================\n")
## ======================================
cat("Log-likelihood :", LL, "\n")
## Log-likelihood : -261.4985
cat("AIC            :", AIC, "\n")
## AIC            : 536.997
cat("BIC            :", BIC, "\n")
## BIC            : 560.0148
## ============================================================
## 7. Likelihood Ratio Test (Strata vs Non-strata)
## ============================================================

X0 <- model.matrix(~ X1 + X2 + X3 + X4 + X5, data = data)
start0 <- rep(0, ncol(X0))

fit0 <- optim(start0, loglik_geo, X=X0, Y=Y, hessian=TRUE)
LL0 <- -fit0$value

LRT   <- 2 * (LL - LL0)
df    <- 1
p_LRT <- 1 - pchisq(LRT, df)

cat("\n======================================\n")
## 
## ======================================
cat("LIKELIHOOD RATIO TEST (STRATA vs NON-STRATA)\n")
## LIKELIHOOD RATIO TEST (STRATA vs NON-STRATA)
cat("======================================\n")
## ======================================
cat("LRT statistic :", LRT, "\n")
## LRT statistic : 25.49427
cat("df           :", df, "\n")
## df           : 1
cat("p-value      :", p_LRT, "\n")
## p-value      : 4.436985e-07
if(p_LRT < 0.05){
  cat("\nKESIMPULAN: STRATIFIKASI SIGNIFIKAN.\n")
} else {
  cat("\nKESIMPULAN: STRATIFIKASI TIDAK SIGNIFIKAN.\n")
}
## 
## KESIMPULAN: STRATIFIKASI SIGNIFIKAN.
## ============================================================
## 8. Fungsi Survival per Profil
## ============================================================
surv_geo <- function(newdata, t.max = 10){
  newdata$X2      <- as.numeric(newdata$X2)
  newdata$X3      <- factor(newdata$X3,      levels = levels(data$X3))
  newdata$X4      <- factor(newdata$X4,      levels = levels(data$X4))
  newdata$X5      <- factor(newdata$X5,      levels = levels(data$X5))
  newdata$Pemisah <- factor(newdata$Pemisah, levels = levels(data$Pemisah))
  
  Xnew <- model.matrix(~ Pemisah + X1 + X2 + X3 + X4 + X5, newdata)
  
  eta  <- as.vector(Xnew %*% coef_geo)
  h    <- 1 / (1 + exp(-eta))
  
  t    <- 0:t.max
  S_t  <- (1 - h)^t
  
  data.frame(t = t, hazard = h, Surv = S_t)
}


## ============================================================
## 9. Contoh: Survival Model Rawat Jalan vs Rawat Inap
## ============================================================
profil_jalan <- data.frame(
  Pemisah = 0,
  X1 = 50,
  X2 = 3,
  X3 = 0,
  X4 = 0,
  X5 = 0
)

profil_inap <- profil_jalan
profil_inap$Pemisah <- 1

S_jalan <- surv_geo(profil_jalan, t.max = 10)
S_inap  <- surv_geo(profil_inap,  t.max = 10)

plot(S_jalan$t, S_jalan$Surv, type="b",
     ylim=c(0,1), xlab="Kunjungan ke-t",
     ylab="Probabilitas Bertahan S(t)",
     main="Survival Model: Rawat Jalan vs Rawat Inap")

lines(S_inap$t, S_inap$Surv, type="b", col="red", pch=2)

legend("topright",
       legend=c("Rawat Jalan (model)","Rawat Inap (model)"),
       col=c("black","red"),
       pch=c(1,2))

## ============================================================
## 10. KURVA KAPLAN–MEIER PER STRATA (Pemisah)
## ============================================================

## Jika tidak ada variabel status, anggap semua event terjadi:
status <- rep(1, nrow(data))

## Kalau Anda punya variabel status di data (misal kolom 'Status'),
## ganti baris di atas dengan:
## status <- data$Status

## Load package survival
if (!"survival" %in% rownames(installed.packages())) {
  install.packages("survival")
}
library(survival)

## Bentuk objek survival
surv_obj <- Surv(time = Y, event = status)

## Kaplan–Meier per strata Pemisah
fit_km <- survfit(surv_obj ~ Pemisah, data = data)

## Plot KM
plot(fit_km, col = c("blue","darkgreen"), lty = 1:2,
     xlab = "Waktu (jumlah pemeriksaan)",
     ylab = "Probabilitas Bertahan S(t)",
     main = "Kurva Kaplan–Meier: Rawat Jalan vs Rawat Inap")

legend("topright",
       legend = c("Rawat Jalan (KM)", "Rawat Inap (KM)"),
       col    = c("blue","darkgreen"),
       lty    = 1:2)

## ============================================================
## 5. UJI EFEK STRATIFIKASI
##    Apakah baseline rawat jalan ≠ rawat inap?
## ============================================================

gamma <- coef_geo["Pemisah1"]
se_gamma <- se_geo["Pemisah1"]
z_gamma <- gamma / se_gamma
p_gamma <- 2 * pnorm(-abs(z_gamma))

cat("\n======================================\n")
## 
## ======================================
cat("UJI EFEK STRATIFIKASI (γ = Pemisah1)\n")
## UJI EFEK STRATIFIKASI (γ = Pemisah1)
cat("======================================\n")
## ======================================
cat("Gamma estimate :", gamma, "\n")
## Gamma estimate : -1.592615
cat("SE            :", se_gamma, "\n")
## SE            : 0.3560793
cat("z-value       :", z_gamma, "\n")
## z-value       : -4.472641
cat("p-value       :", p_gamma, "\n")
## p-value       : 7.725948e-06
if(p_gamma < 0.05){
  cat("\nKESIMPULAN: Baseline hazard berbeda nyata.\n")
  cat(">>> Model Wajib Terstratifikasi.\n")
} else {
  cat("\nKESIMPULAN: Tidak ada perbedaan baseline hazard.\n")
  cat(">>> Model non-stratifikasi sudah cukup.\n")
}
## 
## KESIMPULAN: Baseline hazard berbeda nyata.
## >>> Model Wajib Terstratifikasi.
## ============================================================
## 6. Validasi Model / Kebaikan Model
## ============================================================

## (1) Log-likelihood
LL <- -fit$value

## (2) AIC dan BIC
k  <- length(coef_geo)  
n  <- nrow(data)

AIC <- -2*LL + 2*k
BIC <- -2*LL + k * log(n)

cat("\n======================================\n")
## 
## ======================================
cat("VALIDASI MODEL (GOF)\n")
## VALIDASI MODEL (GOF)
cat("======================================\n")
## ======================================
cat("Log-likelihood :", LL, "\n")
## Log-likelihood : -261.4985
cat("AIC            :", AIC, "\n")
## AIC            : 536.997
cat("BIC            :", BIC, "\n")
## BIC            : 560.0148
## ============================================================
## 7. Likelihood Ratio Test
##    Bandingkan model stratifikasi vs tanpa stratifikasi
## ============================================================

## Model tanpa pemisah (non-stratifikasi)
X0 <- model.matrix(~ X1 + X2 + X3 + X4 + X5, data = data)
start0 <- rep(0, ncol(X0))

fit0 <- optim(start0, loglik_geo, X=X0, Y=Y, hessian=TRUE)
LL0 <- -fit0$value

## LRT statistic
LRT <- 2*(LL - LL0)
df  <- 1        # hanya menguji γ
p_LRT <- 1 - pchisq(LRT, df)

cat("\n======================================\n")
## 
## ======================================
cat("LIKELIHOOD RATIO TEST (STRATA vs NON-STRATA)\n")
## LIKELIHOOD RATIO TEST (STRATA vs NON-STRATA)
cat("======================================\n")
## ======================================
cat("LRT statistic :", LRT, "\n")
## LRT statistic : 25.49427
cat("df           :", df, "\n")
## df           : 1
cat("p-value      :", p_LRT, "\n")
## p-value      : 4.436985e-07
if(p_LRT < 0.05){
  cat("\nKESIMPULAN: STRATIFIKASI SIGNIFIKAN.\n")
} else {
  cat("\nKESIMPULAN: STRATIFIKASI TIDAK SIGNIFIKAN.\n")
}
## 
## KESIMPULAN: STRATIFIKASI SIGNIFIKAN.
## ============================================================
## 8. Fungsi Survival per Profil
## ============================================================
surv_geo <- function(newdata, t.max = 10){
  
  newdata$X2      <- as.numeric(newdata$X2)
  newdata$X3      <- factor(newdata$X3,      levels = levels(data$X3))
  newdata$X4      <- factor(newdata$X4,      levels = levels(data$X4))
  newdata$X5      <- factor(newdata$X5,      levels = levels(data$X5))
  newdata$Pemisah <- factor(newdata$Pemisah, levels = levels(data$Pemisah))
  
  Xnew <- model.matrix(~ Pemisah + X1 + X2 + X3 + X4 + X5, newdata)
  
  eta  <- as.vector(Xnew %*% coef_geo)
  h    <- 1 / (1 + exp(-eta))
  
  t    <- 0:t.max
  S_t  <- (1 - h)^t
  
  return(data.frame(t = t, hazard = h, Surv = S_t))
}


## ============================================================
## 9. Contoh Survival untuk Dua Strata
## ============================================================
profil_jalan <- data.frame(
  Pemisah = 0,
  X1 = 50,
  X2 = 3,
  X3 = 0,
  X4 = 0,
  X5 = 0
)

profil_inap <- profil_jalan
profil_inap$Pemisah <- 1

S_jalan <- surv_geo(profil_jalan, t.max = 10)
S_inap  <- surv_geo(profil_inap,  t.max = 10)

plot(S_jalan$t, S_jalan$Surv, type="b",
     ylim=c(0,1), xlab="Kunjungan ke-t",
     ylab="Probabilitas Bertahan S(t)",
     main="Survival Strata: Rawat Jalan vs Rawat Inap")

lines(S_inap$t, S_inap$Surv, type="b", col="red", pch=2)

legend("topright",
       legend=c("Rawat Jalan","Rawat Inap"),
       col=c("black","red"),
       pch=c(1,2))