if (!require("survival")) install.packages("survival", dependencies = TRUE)
## Loading required package: survival
library(survival)
data_kidney <- read.csv("D:/KULIAH/Semester 7/AKH/kidrecurr.csv")
head(data_kidney)
##   patient time1 infect1 time2 infect2  age gender gn an pkd
## 1       1    16       1     8       1 28.0      0  0  0   0
## 2       2    13       0    23       1 48.0      1  1  0   0
## 3       3    22       1    28       1 32.0      0  0  0   0
## 4       4   318       1   447       1 31.5      1  0  0   0
## 5       5    30       1    12       1 10.0      0  0  0   0
## 6       6    24       1   245       1 16.5      1  0  0   0
# -----------------------------------------------------------------------------
# Langkah 2: Pra-Pemrosesan Data (Metode "Counting Process" dari file .docx)
# Mengubah dari format 'wide' ke 'long' dengan format (id, start, stop, event)
# -----------------------------------------------------------------------------
data_long <- data.frame()

for (i in 1:nrow(data_kidney)) {
  # Episode 1: Dari waktu 0 s/d time1 
  ep1 <- data.frame(
    id = data_kidney$patient[i],
    start = 0,
    stop = data_kidney$time1[i],
    event = data_kidney$infect1[i],
    episode = 1,
    age = data_kidney$age[i],
    gender = data_kidney$gender[i],
    gn = data_kidney$gn[i],
    an = data_kidney$an[i],
    pkd = data_kidney$pkd[i]
  )
  
  # Episode 2: Dari waktu time1 s/d time1 + time2 [cite: 12]
  ep2 <- data.frame(
    id = data_kidney$patient[i],
    start = data_kidney$time1[i],
    stop = data_kidney$time1[i] + data_kidney$time2[i],
    event = data_kidney$infect2[i],
    episode = 2,
    age = data_kidney$age[i],
    gender = data_kidney$gender[i],
    gn = data_kidney$gn[i],
    an = data_kidney$an[i],
    pkd = data_kidney$pkd[i]
  )
  
  # Gabungkan episode 1 dan 2 untuk pasien ini
  data_long <- rbind(data_long, ep1, ep2)
}

# Membersihkan data: hapus baris jika 'stop' time tidak valid
data_long <- data_long[!is.na(data_long$stop) & data_long$stop > data_long$start, ]

cat("Langkah 2: Data berhasil diubah ke format 'long' (Counting Process).\n")
## Langkah 2: Data berhasil diubah ke format 'long' (Counting Process).
print(head(data_long))
##   id start stop event episode age gender gn an pkd
## 1  1     0   16     1       1  28      0  0  0   0
## 2  1    16   24     1       2  28      0  0  0   0
## 3  2     0   13     0       1  48      1  1  0   0
## 4  2    13   36     1       2  48      1  1  0   0
## 5  3     0   22     1       1  32      0  0  0   0
## 6  3    22   50     1       2  32      0  0  0   0
# -----------------------------------------------------------------------------
# Langkah 3: Fitting 3 Model
# -----------------------------------------------------------------------------

# Model 1: Andersen-Gill (AG)
# Baseline hazard sama untuk semua kejadian. [cite: 16]
cat("\nMenjalankan Model 1: Andersen-Gill (AG)...\n")
## 
## Menjalankan Model 1: Andersen-Gill (AG)...
model_AG <- coxph(
  Surv(start, stop, event) ~ age + gender + gn + an + pkd + cluster(id),
  data = data_long
)

# Model 2: Prentice, Williams, Peterson (PWP)
# Baseline hazard berbeda antar episode (strata(episode)). [cite: 24]
cat("Menjalankan Model 2: PWP...\n")
## Menjalankan Model 2: PWP...
model_PWP <- coxph(
  Surv(start, stop, event) ~ age + gender + gn + an + pkd + strata(episode) + cluster(id),
  data = data_long
)

# Model 3: Frailty Model
# Menambahkan random effect untuk 'id' (pasien)
cat("Menjalankan Model 3: Frailty...\n")
## Menjalankan Model 3: Frailty...
model_Frailty <- coxph(
  Surv(start, stop, event) ~ age + gender + gn + an + pkd + frailty(id),
  data = data_long
)
## Warning in coxpenal.fit(X, Y, istrat, offset, init = init, control, weights =
## weights, : Inner loop failed to coverge for iterations 2
cat("\nLangkah 3: Ketiga model berhasil dijalankan.\n")
## 
## Langkah 3: Ketiga model berhasil dijalankan.
# -----------------------------------------------------------------------------
# Langkah 4: Menampilkan Hasil dan Perbandingan AIC
# -----------------------------------------------------------------------------

cat("\n\n--- HASIL MODEL 1: ANDERSEN-GILL (AG) ---\n")
## 
## 
## --- HASIL MODEL 1: ANDERSEN-GILL (AG) ---
print(summary(model_AG))
## Call:
## coxph(formula = Surv(start, stop, event) ~ age + gender + gn + 
##     an + pkd, data = data_long, cluster = id)
## 
##   n= 76, number of events= 58 
## 
##              coef  exp(coef)   se(coef)  robust se      z Pr(>|z|)    
## age     0.0009516  1.0009521  0.0112269  0.0071186  0.134    0.894    
## gender -1.4412639  0.2366285  0.3558093  0.3572753 -4.034 5.48e-05 ***
## gn      0.1086609  1.1147843  0.4148139  0.3191014  0.341    0.733    
## an      0.6475365  1.9108278  0.4142653  0.3360662  1.927    0.054 .  
## pkd    -1.1123114  0.3287981  0.6016784  0.8107260 -1.372    0.170    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##        exp(coef) exp(-coef) lower .95 upper .95
## age       1.0010     0.9990   0.98708    1.0150
## gender    0.2366     4.2260   0.11748    0.4766
## gn        1.1148     0.8970   0.59645    2.0836
## an        1.9108     0.5233   0.98892    3.6922
## pkd       0.3288     3.0414   0.06712    1.6107
## 
## Concordance= 0.694  (se = 0.038 )
## Likelihood ratio test= 18.48  on 5 df,   p=0.002
## Wald test            = 24.32  on 5 df,   p=2e-04
## Score (logrank) test = 20.49  on 5 df,   p=0.001,   Robust = 12.93  p=0.02
## 
##   (Note: the likelihood ratio and score tests assume independence of
##      observations within a cluster, the Wald and robust score tests do not).
cat("\n\n--- HASIL MODEL 2: PRENTICE-WILLIAMS-PETERSON (PWP) ---\n")
## 
## 
## --- HASIL MODEL 2: PRENTICE-WILLIAMS-PETERSON (PWP) ---
print(summary(model_PWP))
## Call:
## coxph(formula = Surv(start, stop, event) ~ age + gender + gn + 
##     an + pkd + strata(episode), data = data_long, cluster = id)
## 
##   n= 76, number of events= 58 
## 
##             coef exp(coef)  se(coef) robust se      z Pr(>|z|)    
## age     0.001107  1.001107  0.011810  0.007445  0.149    0.882    
## gender -1.313331  0.268923  0.364780  0.328402 -3.999 6.36e-05 ***
## gn      0.059059  1.060838  0.420246  0.301624  0.196    0.845    
## an      0.495384  1.641128  0.434559  0.329274  1.504    0.132    
## pkd    -0.996039  0.369340  0.645210  0.760696 -1.309    0.190    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##        exp(coef) exp(-coef) lower .95 upper .95
## age       1.0011     0.9989   0.98660    1.0158
## gender    0.2689     3.7185   0.14128    0.5119
## gn        1.0608     0.9427   0.58736    1.9160
## an        1.6411     0.6093   0.86072    3.1291
## pkd       0.3693     2.7075   0.08316    1.6403
## 
## Concordance= 0.685  (se = 0.043 )
## Likelihood ratio test= 14.69  on 5 df,   p=0.01
## Wald test            = 21.07  on 5 df,   p=8e-04
## Score (logrank) test = 15.9  on 5 df,   p=0.007,   Robust = 13.83  p=0.02
## 
##   (Note: the likelihood ratio and score tests assume independence of
##      observations within a cluster, the Wald and robust score tests do not).
cat("\n\n--- HASIL MODEL 3: FRAILTY ---\n")
## 
## 
## --- HASIL MODEL 3: FRAILTY ---
print(summary(model_Frailty))
## Call:
## coxph(formula = Surv(start, stop, event) ~ age + gender + gn + 
##     an + pkd + frailty(id), data = data_long)
## 
##   n= 76, number of events= 58 
## 
##             coef       se(coef) se2     Chisq DF   p      
## age          0.0009555 0.01124  0.01122  0.01 1.00 9.3e-01
## gender      -1.4428515 0.35635  0.35578 16.39 1.00 5.1e-05
## gn           0.1091966 0.41524  0.41468  0.07 1.00 7.9e-01
## an           0.6481638 0.41476  0.41422  2.44 1.00 1.2e-01
## pkd         -1.1118130 0.60277  0.60161  3.40 1.00 6.5e-02
## frailty(id)                              0.09 0.06 5.0e-01
## 
##        exp(coef) exp(-coef) lower .95 upper .95
## age       1.0010     0.9990    0.9792     1.023
## gender    0.2363     4.2327    0.1175     0.475
## gn        1.1154     0.8966    0.4943     2.517
## an        1.9120     0.5230    0.8481     4.311
## pkd       0.3290     3.0399    0.1009     1.072
## 
## Iterations: 5 outer, 44 Newton-Raphson
##      Variance of random effect= 0.001521103   I-likelihood = -158.7 
## Degrees of freedom for terms= 1.0 1.0 1.0 1.0 1.0 0.1 
## Concordance= 0.7  (se = 0.037 )
## Likelihood ratio test= 18.65  on 5.05 df,   p=0.002
cat("\n\n--- PERBANDINGAN KEBAIKAN MODEL (AIC) ---\n")
## 
## 
## --- PERBANDINGAN KEBAIKAN MODEL (AIC) ---
# Menggunakan extractAIC untuk perbandingan yang konsisten
aic_ag <- extractAIC(model_AG)[2]
aic_pwp <- extractAIC(model_PWP)[2]
aic_frailty <- extractAIC(model_Frailty)[2]

aic_table <- data.frame(
  Model = c("Andersen-Gill", "PWP", "Frailty"),
  AIC = c(aic_ag, aic_pwp, aic_frailty)
)

# Urutkan dari AIC terkecil (Model Terbaik)
print(aic_table[order(aic_table$AIC), ])
##           Model      AIC
## 2           PWP 260.2368
## 3       Frailty 327.3603
## 1 Andersen-Gill 327.4444