## ============================================================
## 2. Design matrix - MODEL TERSTRATIFIKASI
## ============================================================
X <- model.matrix(~ Pemisah + X1 + X2 + X3 + X4 + X5, data = data)
## ============================================================
## 3. Log-likelihood Regresi Geometrik (Discrete-Time PH)
## ============================================================
loglik_geo <- function(beta, X, Y) {
eta <- as.vector(X %*% beta)
h <- 1 / (1 + exp(-eta))
ll <- sum(log(h) + (Y - 1) * log(1 - h))
return(-ll)
}
## ============================================================
## 4. Estimasi Parameter (STRATIFIED MODEL)
## ============================================================
start <- rep(0, ncol(X))
fit <- optim(par = start,
fn = loglik_geo,
X = X,
Y = Y,
hessian = TRUE)
coef_geo <- fit$par
names(coef_geo) <- colnames(X)
## Variansi dan SE
vcov_geo <- solve(fit$hessian)
se_geo <- sqrt(diag(vcov_geo))
names(se_geo) <- colnames(X)
## Wald z & p-value
z_geo <- coef_geo / se_geo
p_geo <- 2 * pnorm(-abs(z_geo))
## Hazard Ratio (HR) dan CI 95%
HR <- exp(coef_geo)
lowerCI <- exp(coef_geo - 1.96 * se_geo)
upperCI <- exp(coef_geo + 1.96 * se_geo)
hasil <- data.frame(
Parameter = names(coef_geo),
Estimate = coef_geo,
Std.Error = se_geo,
z.value = z_geo,
p.value = p_geo,
HR = HR,
CI.lower = lowerCI,
CI.upper = upperCI
)
cat("\n============================\n")
##
## ============================
cat("HASIL MODEL STRATIFIKASI (GABUNGAN)\n")
## HASIL MODEL STRATIFIKASI (GABUNGAN)
cat("============================\n")
## ============================
print(hasil, digits = 4)
## Parameter Estimate Std.Error z.value p.value HR CI.lower
## (Intercept) (Intercept) 3.07667 0.70969 4.3353 1.456e-05 21.6861 5.3962
## Pemisah1 Pemisah1 -1.59261 0.35608 -4.4726 7.726e-06 0.2034 0.1012
## X1 X1 -0.01748 0.01222 -1.4305 1.526e-01 0.9827 0.9594
## X2 X2 -0.42453 0.08060 -5.2671 1.386e-07 0.6541 0.5585
## X31 X31 0.07270 0.28416 0.2558 7.981e-01 1.0754 0.6162
## X41 X41 -0.84949 0.24886 -3.4135 6.412e-04 0.4276 0.2626
## X51 X51 0.26846 0.32382 0.8290 4.071e-01 1.3080 0.6933
## CI.upper
## (Intercept) 87.1526
## Pemisah1 0.4087
## X1 1.0065
## X2 0.7660
## X31 1.8770
## X41 0.6965
## X51 2.4674
## ============================================================
## 5. UJI EFEK STRATIFIKASI (γ = Pemisah1)
## ============================================================
gamma <- coef_geo["Pemisah1"]
se_gamma <- se_geo["Pemisah1"]
z_gamma <- gamma / se_gamma
p_gamma <- 2 * pnorm(-abs(z_gamma))
cat("\n======================================\n")
##
## ======================================
cat("UJI EFEK STRATIFIKASI (γ = Pemisah1)\n")
## UJI EFEK STRATIFIKASI (γ = Pemisah1)
cat("======================================\n")
## ======================================
cat("Gamma estimate :", gamma, "\n")
## Gamma estimate : -1.592615
cat("SE :", se_gamma, "\n")
## SE : 0.3560793
cat("z-value :", z_gamma, "\n")
## z-value : -4.472641
cat("p-value :", p_gamma, "\n")
## p-value : 7.725948e-06
if(p_gamma < 0.05){
cat("\nKESIMPULAN: Baseline hazard berbeda nyata.\n")
cat(">>> Model Wajib Terstratifikasi.\n")
} else {
cat("\nKESIMPULAN: Tidak ada perbedaan baseline hazard.\n")
cat(">>> Model non-stratifikasi sudah cukup.\n")
}
##
## KESIMPULAN: Baseline hazard berbeda nyata.
## >>> Model Wajib Terstratifikasi.
## ============================================================
## 6. Validasi Model / Kebaikan Model (LL, AIC, BIC)
## ============================================================
LL <- -fit$value
k <- length(coef_geo)
n <- nrow(data)
AIC <- -2*LL + 2*k
BIC <- -2*LL + k * log(n)
cat("\n======================================\n")
##
## ======================================
cat("VALIDASI MODEL (GOF)\n")
## VALIDASI MODEL (GOF)
cat("======================================\n")
## ======================================
cat("Log-likelihood :", LL, "\n")
## Log-likelihood : -261.4985
cat("AIC :", AIC, "\n")
## AIC : 536.997
cat("BIC :", BIC, "\n")
## BIC : 560.0148
## ============================================================
## 7. Likelihood Ratio Test (Strata vs Non-strata)
## ============================================================
X0 <- model.matrix(~ X1 + X2 + X3 + X4 + X5, data = data)
start0 <- rep(0, ncol(X0))
fit0 <- optim(start0, loglik_geo, X=X0, Y=Y, hessian=TRUE)
LL0 <- -fit0$value
LRT <- 2 * (LL - LL0)
df <- 1
p_LRT <- 1 - pchisq(LRT, df)
cat("\n======================================\n")
##
## ======================================
cat("LIKELIHOOD RATIO TEST (STRATA vs NON-STRATA)\n")
## LIKELIHOOD RATIO TEST (STRATA vs NON-STRATA)
cat("======================================\n")
## ======================================
cat("LRT statistic :", LRT, "\n")
## LRT statistic : 25.49427
cat("df :", df, "\n")
## df : 1
cat("p-value :", p_LRT, "\n")
## p-value : 4.436985e-07
if(p_LRT < 0.05){
cat("\nKESIMPULAN: STRATIFIKASI SIGNIFIKAN.\n")
} else {
cat("\nKESIMPULAN: STRATIFIKASI TIDAK SIGNIFIKAN.\n")
}
##
## KESIMPULAN: STRATIFIKASI SIGNIFIKAN.
## ============================================================
## 8. Fungsi Survival per Profil
## ============================================================
surv_geo <- function(newdata, t.max = 10){
newdata$X2 <- as.numeric(newdata$X2)
newdata$X3 <- factor(newdata$X3, levels = levels(data$X3))
newdata$X4 <- factor(newdata$X4, levels = levels(data$X4))
newdata$X5 <- factor(newdata$X5, levels = levels(data$X5))
newdata$Pemisah <- factor(newdata$Pemisah, levels = levels(data$Pemisah))
Xnew <- model.matrix(~ Pemisah + X1 + X2 + X3 + X4 + X5, newdata)
eta <- as.vector(Xnew %*% coef_geo)
h <- 1 / (1 + exp(-eta))
t <- 0:t.max
S_t <- (1 - h)^t
data.frame(t = t, hazard = h, Surv = S_t)
}
## ============================================================
## 9. Contoh: Survival Model Rawat Jalan vs Rawat Inap
## ============================================================
profil_jalan <- data.frame(
Pemisah = 0,
X1 = 50,
X2 = 3,
X3 = 0,
X4 = 0,
X5 = 0
)
profil_inap <- profil_jalan
profil_inap$Pemisah <- 1
S_jalan <- surv_geo(profil_jalan, t.max = 10)
S_inap <- surv_geo(profil_inap, t.max = 10)
plot(S_jalan$t, S_jalan$Surv, type="b",
ylim=c(0,1), xlab="Kunjungan ke-t",
ylab="Probabilitas Bertahan S(t)",
main="Survival Model: Rawat Jalan vs Rawat Inap")
lines(S_inap$t, S_inap$Surv, type="b", col="red", pch=2)
legend("topright",
legend=c("Rawat Jalan (model)","Rawat Inap (model)"),
col=c("black","red"),
pch=c(1,2))

## ============================================================
## 10. KURVA KAPLAN–MEIER PER STRATA (Pemisah)
## ============================================================
## Jika tidak ada variabel status, anggap semua event terjadi:
status <- rep(1, nrow(data))
## Kalau Anda punya variabel status di data (misal kolom 'Status'),
## ganti baris di atas dengan:
## status <- data$Status
## Load package survival
if (!"survival" %in% rownames(installed.packages())) {
install.packages("survival")
}
library(survival)
## Bentuk objek survival
surv_obj <- Surv(time = Y, event = status)
## Kaplan–Meier per strata Pemisah
fit_km <- survfit(surv_obj ~ Pemisah, data = data)
## Plot KM
plot(fit_km, col = c("blue","darkgreen"), lty = 1:2,
xlab = "Waktu (jumlah pemeriksaan)",
ylab = "Probabilitas Bertahan S(t)",
main = "Kurva Kaplan–Meier: Rawat Jalan vs Rawat Inap")
legend("topright",
legend = c("Rawat Jalan (KM)", "Rawat Inap (KM)"),
col = c("blue","darkgreen"),
lty = 1:2)

## ============================================================
## 5. UJI EFEK STRATIFIKASI
## Apakah baseline rawat jalan ≠ rawat inap?
## ============================================================
gamma <- coef_geo["Pemisah1"]
se_gamma <- se_geo["Pemisah1"]
z_gamma <- gamma / se_gamma
p_gamma <- 2 * pnorm(-abs(z_gamma))
cat("\n======================================\n")
##
## ======================================
cat("UJI EFEK STRATIFIKASI (γ = Pemisah1)\n")
## UJI EFEK STRATIFIKASI (γ = Pemisah1)
cat("======================================\n")
## ======================================
cat("Gamma estimate :", gamma, "\n")
## Gamma estimate : -1.592615
cat("SE :", se_gamma, "\n")
## SE : 0.3560793
cat("z-value :", z_gamma, "\n")
## z-value : -4.472641
cat("p-value :", p_gamma, "\n")
## p-value : 7.725948e-06
if(p_gamma < 0.05){
cat("\nKESIMPULAN: Baseline hazard berbeda nyata.\n")
cat(">>> Model Wajib Terstratifikasi.\n")
} else {
cat("\nKESIMPULAN: Tidak ada perbedaan baseline hazard.\n")
cat(">>> Model non-stratifikasi sudah cukup.\n")
}
##
## KESIMPULAN: Baseline hazard berbeda nyata.
## >>> Model Wajib Terstratifikasi.
## ============================================================
## 6. Validasi Model / Kebaikan Model
## ============================================================
## (1) Log-likelihood
LL <- -fit$value
## (2) AIC dan BIC
k <- length(coef_geo)
n <- nrow(data)
AIC <- -2*LL + 2*k
BIC <- -2*LL + k * log(n)
cat("\n======================================\n")
##
## ======================================
cat("VALIDASI MODEL (GOF)\n")
## VALIDASI MODEL (GOF)
cat("======================================\n")
## ======================================
cat("Log-likelihood :", LL, "\n")
## Log-likelihood : -261.4985
cat("AIC :", AIC, "\n")
## AIC : 536.997
cat("BIC :", BIC, "\n")
## BIC : 560.0148
## ============================================================
## 7. Likelihood Ratio Test
## Bandingkan model stratifikasi vs tanpa stratifikasi
## ============================================================
## Model tanpa pemisah (non-stratifikasi)
X0 <- model.matrix(~ X1 + X2 + X3 + X4 + X5, data = data)
start0 <- rep(0, ncol(X0))
fit0 <- optim(start0, loglik_geo, X=X0, Y=Y, hessian=TRUE)
LL0 <- -fit0$value
## LRT statistic
LRT <- 2*(LL - LL0)
df <- 1 # hanya menguji γ
p_LRT <- 1 - pchisq(LRT, df)
cat("\n======================================\n")
##
## ======================================
cat("LIKELIHOOD RATIO TEST (STRATA vs NON-STRATA)\n")
## LIKELIHOOD RATIO TEST (STRATA vs NON-STRATA)
cat("======================================\n")
## ======================================
cat("LRT statistic :", LRT, "\n")
## LRT statistic : 25.49427
cat("df :", df, "\n")
## df : 1
cat("p-value :", p_LRT, "\n")
## p-value : 4.436985e-07
if(p_LRT < 0.05){
cat("\nKESIMPULAN: STRATIFIKASI SIGNIFIKAN.\n")
} else {
cat("\nKESIMPULAN: STRATIFIKASI TIDAK SIGNIFIKAN.\n")
}
##
## KESIMPULAN: STRATIFIKASI SIGNIFIKAN.
## ============================================================
## 8. Fungsi Survival per Profil
## ============================================================
surv_geo <- function(newdata, t.max = 10){
newdata$X2 <- as.numeric(newdata$X2)
newdata$X3 <- factor(newdata$X3, levels = levels(data$X3))
newdata$X4 <- factor(newdata$X4, levels = levels(data$X4))
newdata$X5 <- factor(newdata$X5, levels = levels(data$X5))
newdata$Pemisah <- factor(newdata$Pemisah, levels = levels(data$Pemisah))
Xnew <- model.matrix(~ Pemisah + X1 + X2 + X3 + X4 + X5, newdata)
eta <- as.vector(Xnew %*% coef_geo)
h <- 1 / (1 + exp(-eta))
t <- 0:t.max
S_t <- (1 - h)^t
return(data.frame(t = t, hazard = h, Surv = S_t))
}
## ============================================================
## 9. Contoh Survival untuk Dua Strata
## ============================================================
profil_jalan <- data.frame(
Pemisah = 0,
X1 = 50,
X2 = 3,
X3 = 0,
X4 = 0,
X5 = 0
)
profil_inap <- profil_jalan
profil_inap$Pemisah <- 1
S_jalan <- surv_geo(profil_jalan, t.max = 10)
S_inap <- surv_geo(profil_inap, t.max = 10)
plot(S_jalan$t, S_jalan$Surv, type="b",
ylim=c(0,1), xlab="Kunjungan ke-t",
ylab="Probabilitas Bertahan S(t)",
main="Survival Strata: Rawat Jalan vs Rawat Inap")
lines(S_inap$t, S_inap$Surv, type="b", col="red", pch=2)
legend("topright",
legend=c("Rawat Jalan","Rawat Inap"),
col=c("black","red"),
pch=c(1,2))
