library(survival)
library(car)
## Loading required package: carData
library(ggplot2)
data(ovarian)
## Warning in data(ovarian): data set 'ovarian' not found
summary(ovarian)
##      futime           fustat            age           resid.ds    
##  Min.   :  59.0   Min.   :0.0000   Min.   :38.89   Min.   :1.000  
##  1st Qu.: 368.0   1st Qu.:0.0000   1st Qu.:50.17   1st Qu.:1.000  
##  Median : 476.0   Median :0.0000   Median :56.85   Median :2.000  
##  Mean   : 599.5   Mean   :0.4615   Mean   :56.17   Mean   :1.577  
##  3rd Qu.: 794.8   3rd Qu.:1.0000   3rd Qu.:62.38   3rd Qu.:2.000  
##  Max.   :1227.0   Max.   :1.0000   Max.   :74.50   Max.   :2.000  
##        rx         ecog.ps     
##  Min.   :1.0   Min.   :1.000  
##  1st Qu.:1.0   1st Qu.:1.000  
##  Median :1.5   Median :1.000  
##  Mean   :1.5   Mean   :1.462  
##  3rd Qu.:2.0   3rd Qu.:2.000  
##  Max.   :2.0   Max.   :2.000

1. Recode variabel umur yang ada di dataset ovarian menjadi 4 kategori (A-D) berdasarkan informasi kuartil.

ovarian$cage <- cut(ovarian$age, breaks = c(0, 50.17, 56.85, 62.38, "inf"),
                    labels = c("A", "B", "C", "D"))
head(ovarian)
##   futime fustat     age resid.ds rx ecog.ps cage
## 1     59      1 72.3315        2  1       1    D
## 2    115      1 74.4932        2  1       1    D
## 3    156      1 66.4658        2  1       2    D
## 4    421      0 53.3644        2  2       1    B
## 5    431      1 50.3397        2  1       1    B
## 6    448      0 56.4301        1  1       2    B

2. Lakukan pengecekan asumsi PH pada semua variabel menggunakan metode grafis dengan log-log plot dan pendekatan uji Goodness of Fit.

# Fungsi Survival
ovarian$s_obj<-Surv(ovarian$futime,ovarian$fustat)
head(ovarian)
##   futime fustat     age resid.ds rx ecog.ps cage s_obj
## 1     59      1 72.3315        2  1       1    D    59
## 2    115      1 74.4932        2  1       1    D   115
## 3    156      1 66.4658        2  1       2    D   156
## 4    421      0 53.3644        2  2       1    B  421+
## 5    431      1 50.3397        2  1       1    B   431
## 6    448      0 56.4301        1  1       2    B  448+

a. Metode Grafik (Log-Log Plot)

# Variabel "age"
fit_age <- survfit(s_obj ~ cage, data = ovarian)
plot(fit_age,
     fun = "cloglog",
     main = "Plot Log-Log Group",
     xlab="Time",
     mark="+",
     ylab="log-log survival",
     lty=2,
     col=c("red","blue", "green", "black"))
legend(60, 0.5,
       c("A", "B", "C", "D"),
       lty=2,
       col=c("red","blue", "green", "black"))

# Variabel "resid.ds"
fit_residds <- survfit(s_obj ~ resid.ds, data = ovarian)
plot(fit_residds,
     fun = "cloglog",
     main = "Plot Log-Log Group",
     xlab="Time",
     mark="+",
     ylab="log-log survival",
     lty=2,
     col=c("red","blue"))
legend(60, -0.25,
       c("No", "Yes"),
       lty=2,
       col=c("red","blue"))

# Variabel "rx"
fit_rx <- survfit(s_obj ~ rx, data = ovarian)
plot(fit_rx,
     fun = "cloglog",
     main = "Plot Log-Log Group",
     xlab="Time",
     mark="+",
     ylab="log-log survival",
     lty=2,
     col=c("red","blue"))
legend(60, -0.25,
       c("Treatment 1", "Treatment 2"),
       lty=2,
       col=c("red","blue"))

# Variabel "ecog.ps"
fit_ecogps <- survfit(s_obj ~ ecog.ps, data = ovarian)
plot(fit_ecogps,
     fun = "cloglog",
     main = "Plot Log-Log Group",
     xlab="Time",
     mark="+",
     ylab="log-log survival",
     lty=2,
     col=c("red","blue"))
legend(60, -0.25,
       c("Lebih Baik", "Biasa"),
       lty=2,
       col=c("red","blue"))

b. Goodness of Fit Test

# Uji Goodness of Fit untuk semua variabel penjelas
fit1 <- coxph(s_obj ~ cage + resid.ds + rx + ecog.ps, data = ovarian)
print(cox.zph(fit1))
##          chisq df     p
## cage      6.13  3 0.105
## resid.ds  1.32  1 0.250
## rx        1.68  1 0.195
## ecog.ps   3.48  1 0.062
## GLOBAL   11.07  6 0.086

3. Interpretasikan hasilnya!