data <- read_excel("C:/Users/asuso/Downloads/DataFix2.xlsx")
str(data)
## tibble [212 × 11] (S3: tbl_df/tbl/data.frame)
##  $ i    : num [1:212] 1 2 3 4 5 6 7 8 9 10 ...
##  $ t    : num [1:212] 303 353 37 327 170 348 53 172 101 156 ...
##  $ event: num [1:212] 0 0 1 0 0 0 0 0 0 0 ...
##  $ usia : num [1:212] 66 74 77 54 85 85 89 50 66 58 ...
##  $ dias : num [1:212] 89 90 70 89 72 91 91 77 65 87 ...
##  $ sist : num [1:212] 161 158 140 171 142 136 148 130 101 115 ...
##  $ jk   : num [1:212] 1 1 1 1 0 0 1 0 0 0 ...
##  $ komp : num [1:212] 1 1 0 0 1 1 1 1 0 1 ...
##  $ dm   : num [1:212] 1 0 0 0 1 0 0 0 0 0 ...
##  $ imt  : num [1:212] 26.6 24.3 31.2 NA NA ...
##  $ obat : num [1:212] 1 1 0 0 0 1 1 1 1 1 ...
summary(data)
##        i                t              event             usia      
##  Min.   :  1.00   Min.   :  1.00   Min.   :0.0000   Min.   :22.00  
##  1st Qu.: 53.75   1st Qu.: 71.25   1st Qu.:0.0000   1st Qu.:47.75  
##  Median :106.50   Median :152.00   Median :0.0000   Median :58.00  
##  Mean   :106.50   Mean   :163.96   Mean   :0.2453   Mean   :56.85  
##  3rd Qu.:159.25   3rd Qu.:240.00   3rd Qu.:0.0000   3rd Qu.:67.00  
##  Max.   :212.00   Max.   :363.00   Max.   :1.0000   Max.   :90.00  
##                                                                    
##       dias             sist             jk              komp       
##  Min.   : 50.00   Min.   :101.0   Min.   :0.0000   Min.   :0.0000  
##  1st Qu.: 76.75   1st Qu.:138.8   1st Qu.:0.0000   1st Qu.:0.0000  
##  Median : 87.00   Median :149.0   Median :1.0000   Median :1.0000  
##  Mean   : 85.24   Mean   :150.5   Mean   :0.6274   Mean   :0.6179  
##  3rd Qu.: 92.25   3rd Qu.:160.2   3rd Qu.:1.0000   3rd Qu.:1.0000  
##  Max.   :140.00   Max.   :236.0   Max.   :1.0000   Max.   :1.0000  
##                                                                    
##        dm              imt             obat       
##  Min.   :0.0000   Min.   :12.49   Min.   :0.0000  
##  1st Qu.:0.0000   1st Qu.:23.31   1st Qu.:0.0000  
##  Median :0.0000   Median :24.71   Median :0.0000  
##  Mean   :0.3066   Mean   :25.66   Mean   :0.3726  
##  3rd Qu.:1.0000   3rd Qu.:27.21   3rd Qu.:1.0000  
##  Max.   :1.0000   Max.   :45.27   Max.   :1.0000  
##                   NA's   :74
# Hapus kolom ID (tidak diperlukan)
data$i <- NULL

# Tangani missing value (contoh pada IMT)
data$imt[is.na(data$imt)] <- median(data$imt, na.rm = TRUE)
# Ubah variabel kategorik jadi faktor
data$jk   <- as.factor(data$jk)
data$komp <- as.factor(data$komp)
data$dm   <- as.factor(data$dm)
data$obat <- as.factor(data$obat)
# Lihat hasil akhir
head(data)
## # A tibble: 6 × 10
##       t event  usia  dias  sist jk    komp  dm      imt obat 
##   <dbl> <dbl> <dbl> <dbl> <dbl> <fct> <fct> <fct> <dbl> <fct>
## 1   303     0    66    89   161 1     1     1      26.6 1    
## 2   353     0    74    90   158 1     1     0      24.3 1    
## 3    37     1    77    70   140 1     0     0      31.2 0    
## 4   327     0    54    89   171 1     0     0      24.7 0    
## 5   170     0    85    72   142 0     1     1      24.7 0    
## 6   348     0    85    91   136 0     1     0      27.1 1
summary(data)
##        t              event             usia            dias       
##  Min.   :  1.00   Min.   :0.0000   Min.   :22.00   Min.   : 50.00  
##  1st Qu.: 71.25   1st Qu.:0.0000   1st Qu.:47.75   1st Qu.: 76.75  
##  Median :152.00   Median :0.0000   Median :58.00   Median : 87.00  
##  Mean   :163.96   Mean   :0.2453   Mean   :56.85   Mean   : 85.24  
##  3rd Qu.:240.00   3rd Qu.:0.0000   3rd Qu.:67.00   3rd Qu.: 92.25  
##  Max.   :363.00   Max.   :1.0000   Max.   :90.00   Max.   :140.00  
##       sist       jk      komp    dm           imt        obat   
##  Min.   :101.0   0: 79   0: 81   0:147   Min.   :12.49   0:133  
##  1st Qu.:138.8   1:133   1:131   1: 65   1st Qu.:23.48   1: 79  
##  Median :149.0                           Median :24.71          
##  Mean   :150.5                           Mean   :25.33          
##  3rd Qu.:160.2                           3rd Qu.:25.74          
##  Max.   :236.0                           Max.   :45.27
# =========================================================
# 📊 2️⃣ ANALISIS DESKRIPTIF DAN KAPLAN-MEIER
# =========================================================

# Buat objek survival
surv_object <- Surv(time = data$t, event = data$event)
# perbedaan survival berdasarkan jenis kelamin (jk)
fit_km <- survfit(surv_object ~ jk, data = data)

# Plot kurva Kaplan-Meier
ggsurvplot(
  fit_km,
  data = data,
  pval = TRUE,
  conf.int = TRUE,
  risk.table = TRUE,
  legend.title = "Jenis Kelamin",
  legend.labs = c("Laki-laki", "Perempuan"),
  xlab = "Waktu (hari)",
  ylab = "Probabilitas Bertahan",
  title = "Kurva Kaplan-Meier Berdasarkan Jenis Kelamin"
)

# Uji log-rank test
survdiff(Surv(t, event) ~ jk, data = data)
## Call:
## survdiff(formula = Surv(t, event) ~ jk, data = data)
## 
##        N Observed Expected (O-E)^2/E (O-E)^2/V
## jk=0  79       16     19.6     0.663      1.07
## jk=1 133       36     32.4     0.401      1.07
## 
##  Chisq= 1.1  on 1 degrees of freedom, p= 0.3
# Model Kaplan-Meier untuk komplikasi
fit_komp <- survfit(Surv(t, event) ~ komp, data = data)
# Plot
ggsurvplot(
  fit_komp,
  data = data,
  pval = TRUE,  
  conf.int = TRUE,
  risk.table = TRUE,
  legend.labs = c("Tanpa Komplikasi", "Dengan Komplikasi"),
  legend.title = "Komplikasi",
  xlab = "Time",
  ylab = "Survival Probability"
)

# Uji log-rank test
survdiff(Surv(t, event) ~ komp, data = data)
## Call:
## survdiff(formula = Surv(t, event) ~ komp, data = data)
## 
##          N Observed Expected (O-E)^2/E (O-E)^2/V
## komp=0  81        8     18.8      6.21      9.77
## komp=1 131       44     33.2      3.52      9.77
## 
##  Chisq= 9.8  on 1 degrees of freedom, p= 0.002
#Kaplan-Meier untuk Diabetes
fit_dm <- survfit(Surv(t, event) ~ dm, data = data)

ggsurvplot(
  fit_dm,
  data = data,
  pval = TRUE,
  conf.int = TRUE,
  risk.table = TRUE,
  legend.labs = c("Non-Diabetes", "Diabetes"),
  legend.title = "Status DM",
  xlab = "Time",
  ylab = "Survival Probability"
)

# Uji log-rank test
survdiff(Surv(t, event) ~ dm, data = data)
## Call:
## survdiff(formula = Surv(t, event) ~ dm, data = data)
## 
##        N Observed Expected (O-E)^2/E (O-E)^2/V
## dm=0 147       22     36.2      5.56      18.3
## dm=1  65       30     15.8     12.71      18.3
## 
##  Chisq= 18.3  on 1 degrees of freedom, p= 2e-05
#Kaplan-Meier untuk Penggunaan Obat
fit_obat <- survfit(Surv(t, event) ~ obat, data = data)

ggsurvplot(
  fit_obat,
  data = data,
  pval = TRUE,
  conf.int = TRUE,
  risk.table = TRUE,
  legend.labs = c("Tidak Menggunakan Obat", "Menggunakan Obat"),
  legend.title = "Penggunaan Obat",
  xlab = "Time",
  ylab = "Survival Probability"
)

# Uji log-rank test
survdiff(Surv(t, event) ~ obat, data = data)
## Call:
## survdiff(formula = Surv(t, event) ~ obat, data = data)
## 
##          N Observed Expected (O-E)^2/E (O-E)^2/V
## obat=0 133       35     31.8     0.330     0.853
## obat=1  79       17     20.2     0.518     0.853
## 
##  Chisq= 0.9  on 1 degrees of freedom, p= 0.4
# =========================================================
# ⚙️ 3️⃣ MODEL COX PROPORTIONAL HAZARD
# =========================================================
cox_basic <- coxph(Surv(t, event) ~ usia + dias + sist + jk + komp + dm + imt + obat, data = data)
summary(cox_basic)
## Call:
## coxph(formula = Surv(t, event) ~ usia + dias + sist + jk + komp + 
##     dm + imt + obat, data = data)
## 
##   n= 212, number of events= 52 
## 
##            coef exp(coef)  se(coef)      z Pr(>|z|)   
## usia   0.007921  1.007952  0.012352  0.641  0.52134   
## dias   0.021223  1.021450  0.012589  1.686  0.09184 . 
## sist  -0.024230  0.976061  0.009265 -2.615  0.00892 **
## jk1    0.173311  1.189236  0.309690  0.560  0.57573   
## komp1  0.923620  2.518389  0.439976  2.099  0.03579 * 
## dm1    1.049177  2.855299  0.339480  3.091  0.00200 **
## imt    0.041824  1.042711  0.029874  1.400  0.16151   
## obat1 -0.881423  0.414193  0.317105 -2.780  0.00544 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##       exp(coef) exp(-coef) lower .95 upper .95
## usia     1.0080     0.9921    0.9838    1.0327
## dias     1.0214     0.9790    0.9966    1.0470
## sist     0.9761     1.0245    0.9585    0.9939
## jk1      1.1892     0.8409    0.6481    2.1821
## komp1    2.5184     0.3971    1.0632    5.9653
## dm1      2.8553     0.3502    1.4679    5.5541
## imt      1.0427     0.9590    0.9834    1.1056
## obat1    0.4142     2.4143    0.2225    0.7711
## 
## Concordance= 0.729  (se = 0.034 )
## Likelihood ratio test= 36.91  on 8 df,   p=1e-05
## Wald test            = 35.7  on 8 df,   p=2e-05
## Score (logrank) test = 39.01  on 8 df,   p=5e-06
# =========================================================
# 🧩 4️⃣ UJI ASUMSI PROPORTIONAL HAZARDS (PH)
# =========================================================
ph_test <- cox.zph(cox_basic)
ph_test
##           chisq df     p
## usia    2.27313  1 0.132
## dias    0.01746  1 0.895
## sist    0.00207  1 0.964
## jk      5.14151  1 0.023
## komp    3.00828  1 0.083
## dm      0.02157  1 0.883
## imt     0.37163  1 0.542
## obat    0.00168  1 0.967
## GLOBAL 11.95934  8 0.153
plot(ph_test)

# Lihat variabel mana yang melanggar asumsi PH
violated <- rownames(ph_test$table)[ph_test$table[,3] < 0.05]
violated
## [1] "jk"
# =========================================================
# 🔁 5️⃣ MODEL REGRESI COX STRATIFIED
# =========================================================
# Ganti variabel di strata() dengan variabel yang melanggar PH
# jk melanggar PH

cox_strat <- coxph(Surv(t, event) ~ usia + dias + sist + komp + dm + imt + obat + strata(jk), data = data)
summary(cox_strat)
## Call:
## coxph(formula = Surv(t, event) ~ usia + dias + sist + komp + 
##     dm + imt + obat + strata(jk), data = data)
## 
##   n= 212, number of events= 52 
## 
##            coef exp(coef)  se(coef)      z Pr(>|z|)   
## usia   0.007253  1.007279  0.012360  0.587  0.55735   
## dias   0.021486  1.021719  0.012578  1.708  0.08759 . 
## sist  -0.024595  0.975705  0.009289 -2.648  0.00810 **
## komp1  0.951057  2.588445  0.439433  2.164  0.03044 * 
## dm1    1.039972  2.829137  0.338653  3.071  0.00213 **
## imt    0.043870  1.044846  0.030543  1.436  0.15092   
## obat1 -0.871220  0.418441  0.316586 -2.752  0.00592 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##       exp(coef) exp(-coef) lower .95 upper .95
## usia     1.0073     0.9928    0.9832    1.0320
## dias     1.0217     0.9787    0.9968    1.0472
## sist     0.9757     1.0249    0.9581    0.9936
## komp1    2.5884     0.3863    1.0939    6.1247
## dm1      2.8291     0.3535    1.4568    5.4943
## imt      1.0448     0.9571    0.9841    1.1093
## obat1    0.4184     2.3898    0.2250    0.7782
## 
## Concordance= 0.73  (se = 0.035 )
## Likelihood ratio test= 35.95  on 7 df,   p=7e-06
## Wald test            = 33.27  on 7 df,   p=2e-05
## Score (logrank) test = 36.84  on 7 df,   p=5e-06
# =========================================================
# 📈 6️⃣ VISUALISASI MODEL STRATIFIED
# =========================================================

fit_strat <- survfit(cox_strat)
ggsurvplot(
  fit_strat,
  data = data,
  conf.int = TRUE,
  risk.table = TRUE,
  xlab = "Waktu (hari)",
  ylab = "Probabilitas Bertahan",
  title = "Kurva Survival Model Cox Stratified"
)

# =========================================================
# 🧾 7️⃣ EVALUASI DAN PERBANDINGAN MODEL
# =========================================================

AIC(cox_basic, cox_strat)
##           df      AIC
## cox_basic  8 498.3922
## cox_strat  7 432.3170
# Model tanpa variabel tambahan
cox_strat_simple <- coxph(Surv(t, event) ~ usia + dias + sist + strata(jk), data = data)

# Model lengkap (nested)
cox_strat_full <- coxph(Surv(t, event) ~ usia + dias + sist + komp + dm + imt + obat + strata(jk), data = data)

# Sekarang baru bisa dibandingkan
anova(cox_strat_simple, cox_strat_full, test = "LRT")
## Analysis of Deviance Table
##  Cox model: response is  Surv(t, event)
##  Model 1: ~ usia + dias + sist + strata(jk)
##  Model 2: ~ usia + dias + sist + komp + dm + imt + obat + strata(jk)
##    loglik  Chisq Df Pr(>|Chi|)    
## 1 -223.02                         
## 2 -209.16 27.718  4  1.423e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Model dasar dengan strata tapi tanpa kovariat
cox_null_strat <- coxph(Surv(t, event) ~ strata(jk), data = data)

# Model lengkap (stratified)
cox_strat <- coxph(Surv(t, event) ~ usia + dias + sist + komp + dm + imt + obat + strata(jk), data = data)

# LRT valid karena dua-duanya punya strata yang sama
anova(cox_null_strat, cox_strat, test = "LRT")
## Analysis of Deviance Table
##  Cox model: response is  Surv(t, event)
##  Model 1: ~ strata(jk)
##  Model 2: ~ usia + dias + sist + komp + dm + imt + obat + strata(jk)
##    loglik  Chisq Df Pr(>|Chi|)    
## 1 -227.13                         
## 2 -209.16 35.949  7  7.411e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
logLik(cox_basic)
## 'log Lik.' -241.1961 (df=8)
logLik(cox_strat)
## 'log Lik.' -209.1585 (df=7)
LRT <- 2 * (logLik(cox_strat) - logLik(cox_basic))
pval <- pchisq(LRT, df = 1, lower.tail = FALSE)
pval
## 'log Lik.' 1.197583e-15 (df=7)