Dataset

Data terdiri dari beberapa variabel, termasuk variabel dependen (survival time dan status) dan variabel independen. Variabel dalam data memiliki deskripsi sebagai berikut.

  1. Laser: Jenis laser yang digunakan (Xenon atau Argon).
  2. Eye: Mata sebelah mana yang dilakukan perawatan (Kanan atau Kiri)
  3. Age: Umur saat di diagnosis diabetes
  4. Type: Tipe diabetes (Juvenile: diagnosis dibawah 20 tahun, Adult: setelah 20 tahun)
  5. Trt: 0 control eye, 1 treated eye.
  6. Risk: Skor risiko untuk mata

Load Libraries

library(survival)
## Warning: package 'survival' was built under R version 4.1.2
library(ggsurvfit) 
## Loading required package: ggplot2
library(tidycmprsk)
library(survminer)
## Loading required package: ggpubr
## Warning: package 'ggpubr' was built under R version 4.1.2
## 
## Attaching package: 'survminer'
## The following object is masked from 'package:survival':
## 
##     myeloma
library(fitdistrplus)
## Warning: package 'fitdistrplus' was built under R version 4.1.2
## Loading required package: MASS
library(ggplot2)
library(flexsurv)
## Warning: package 'flexsurv' was built under R version 4.1.2
library(MASS)
library(RColorBrewer)

Load Dataset

retino = read.csv("/Users/adindaprilly/Downloads/retinopathy.csv")
head(retino)

Pre-processing

Handling Missing Values

na_indices<-which(is.na(retino))
length(na_indices)
## [1] 0

Tidak terdapat missing values pada dataset retinopathy.

Handling Duplicated Data

dup_indices<-retino[duplicated(retino), ] #cek mana baris duplicated
nrow(dup_indices) #banyak data duplikat
## [1] 0
#data<-retino[!duplicated(retino),]

Tidak terdapat data duplikat pada dataset retinopathy.

str(retino)
## 'data.frame':    394 obs. of  10 variables:
##  $ rownames: int  1 2 3 4 5 6 7 8 9 10 ...
##  $ id      : int  5 5 14 14 16 16 25 25 29 29 ...
##  $ laser   : chr  "argon" "argon" "argon" "argon" ...
##  $ eye     : chr  "left" "left" "right" "right" ...
##  $ age     : int  28 28 12 12 9 9 9 9 13 13 ...
##  $ type    : chr  "adult" "adult" "juvenile" "juvenile" ...
##  $ trt     : int  1 0 1 0 1 0 1 0 1 0 ...
##  $ futime  : num  46.2 46.2 42.5 31.3 42.3 ...
##  $ status  : int  0 0 0 1 0 0 0 0 0 1 ...
##  $ risk    : int  9 9 8 6 11 11 11 11 9 10 ...

Data character pelru diubah menjadi data kategori.

retino$laser <- as.factor(retino$laser)
retino$eye <- as.factor(retino$eye)
retino$type <- as.factor(retino$type)
retino$trt <- as.factor(retino$trt)

Mendefinisikan Respon

y=Surv(retino$futime,retino$status)

Deskripsi Data

kmfit1 = survfit(y~1)
kmfit1
## Call: survfit(formula = y ~ 1)
## 
##        n events median 0.95LCL 0.95UCL
## [1,] 394    155     NA    59.8      NA
par(mfrow=c(2,2), mar = c(2, 2, 2, 2), oma = c(1, 1, 1, 1))

frekuensi <- table(retino$laser)
colors <- brewer.pal(n = 3, name = "Pastel1")[1:length(frekuensi)]
persentase <- round(100 * frekuensi / sum(frekuensi), 1)
labels_with_percentase <- paste(names(frekuensi), persentase, "%", sep = " ")
pie(frekuensi, labels = labels_with_percentase, main = "Pie Chart Variabel Laser", col = colors)

frekuensi <- table(retino$eye)
colors <- brewer.pal(n = 3, name = "Pastel1")[1:length(frekuensi)]
persentase <- round(100 * frekuensi / sum(frekuensi), 1)
labels_with_percentase <- paste(names(frekuensi), persentase, "%", sep = " ")
pie(frekuensi, labels = labels_with_percentase, main = "Pie Chart Variabel Eye", col = colors)

frekuensi <- table(retino$type)
colors <- brewer.pal(n = 3, name = "Pastel1")[1:length(frekuensi)]
persentase <- round(100 * frekuensi / sum(frekuensi), 1)
labels_with_percentase <- paste(names(frekuensi), persentase, "%", sep = " ")
pie(frekuensi, labels = labels_with_percentase, main = "Pie Chart Variabel Type", col = colors)

frekuensi <- table(retino$trt)
colors <- brewer.pal(n = 3, name = "Pastel1")[1:length(frekuensi)]
persentase <- round(100 * frekuensi / sum(frekuensi), 1)
labels_with_percentase <- paste(names(frekuensi), persentase, "%", sep = " ")
pie(frekuensi, labels = labels_with_percentase, main = "Pie Chart Variabel Trt", col = colors)

par(mfrow=c(1,2))
hist(retino$age, main = "Histogram Variabel Age", xlab = "Value", ylab = "Frequency", col = "lightblue", border = "black")
mean_value <- mean(retino$age)
abline(v = mean(retino$age), col = "red", lwd = 2, lty = 2)
text(mean_value, max(hist(retino$age, plot = FALSE)$counts), labels = paste("Mean =", round(mean_value, 2)), pos = 4, col = "red")

barplot(table(retino$risk), main = "Bar Chart Variabel Risk", xlab = "Value", ylab = "Frequency", col = "lightblue", border = "black",ylim = c(0, max(table(retino$risk)) + 1))

KM Curves

plot(kmfit1, conf.int=TRUE, lty=1, xlab="Time (Month)", ylab="Survival Probability", main="Survival Curves with 95% Confidence Bands", mark.time=F)

kmfit2 = survfit(y ~ retino$trt)
kmfit3 = survfit(y ~ retino$laser)
kmfit4 = survfit(y ~ retino$eye)
kmfit5 = survfit(y ~ retino$type)
par(mfrow=c(2,2))
plot(kmfit2, lty=c("solid", "dashed"), col=c("black", "red"), xlab="survival time in months", ylab="survival probabilities") 
legend("topright", c("Control eye", "Treated eye"), lty=c("solid","dashed"), col=c("black", "red"), cex=1.5)
plot(kmfit3, lty=c("solid", "dashed"), col=c("black", "red"), xlab="survival time in months", ylab="survival probabilities") 
legend("topright", c("Argon", "Xenon"), lty=c("solid","dashed"), col=c("black", "red"), cex=1.5)
plot(kmfit4, lty=c("solid", "dashed"), col=c("black", "red"), xlab="survival time in months", ylab="survival probabilities") 
legend("topright", c("Left", "Right"), lty=c("solid","dashed"), col=c("black", "red"), cex=1.5)
plot(kmfit5, lty=c("solid", "dashed"), col=c("black", "red"), xlab="survival time in months", ylab="survival probabilities") 
legend("topright", c("Adult", "Juvenile"), lty=c("solid","dashed"), col=c("black", "red"), cex=1.5)

ggsurvplot(
  kmfit2, data = retino,# survfit model object
  conf.int = TRUE,            # Add 95% confidence interval
  risk.table = TRUE,          # Show risk table
  pval = TRUE,                # Show p-value of log-rank test
  xlab = "Months",             # X-axis label
  surv.median.line = "hv",
  ylab = "Overall survival probability", # Y-axis label
  title = "Survival Curves with 95% Confidence Bands" # Judul plot
)

ggsurvplot(
  kmfit3, data = retino,# survfit model object
  conf.int = TRUE,            # Add 95% confidence interval
  risk.table = TRUE,          # Show risk table
  pval = TRUE,                # Show p-value of log-rank test
  xlab = "Months",             # X-axis label
  surv.median.line = "hv",
  ylab = "Overall survival probability", # Y-axis label
  title = "Survival Curves with 95% Confidence Bands" # Judul plot
)
## Warning in .add_surv_median(p, fit, type = surv.median.line, fun = fun, : Median
## survival not reached.

ggsurvplot(
  kmfit4, data = retino,# survfit model object
  conf.int = TRUE,            # Add 95% confidence interval
  risk.table = TRUE,          # Show risk table
  pval = TRUE,                # Show p-value of log-rank test
  xlab = "Months",             # X-axis label
  surv.median.line = "hv",
  ylab = "Overall survival probability", # Y-axis label
  title = "Survival Curves with 95% Confidence Bands" # Judul plot
)

ggsurvplot(
  kmfit5, data = retino,# survfit model object
  conf.int = TRUE,            # Add 95% confidence interval
  risk.table = TRUE,          # Show risk table
  pval = TRUE,                # Show p-value of log-rank test
  xlab = "Months",             # X-axis label
  surv.median.line = "hv",
  ylab = "Overall survival probability", # Y-axis label
  title = "Survival Curves with 95% Confidence Bands" # Judul plot
)

Uji Log-rank

survdiff(y ~ retino$trt)
## Call:
## survdiff(formula = y ~ retino$trt)
## 
##                N Observed Expected (O-E)^2/E (O-E)^2/V
## retino$trt=0 197      101     71.8      11.9      22.2
## retino$trt=1 197       54     83.2      10.3      22.2
## 
##  Chisq= 22.2  on 1 degrees of freedom, p= 2e-06

P-value < 0.05 artinya ada perbedaan signifikan antara survival probability untuk control eye dengan treated eye.

survdiff(y ~ retino$laser)
## Call:
## survdiff(formula = y ~ retino$laser)
## 
##                      N Observed Expected (O-E)^2/E (O-E)^2/V
## retino$laser=argon 194       79     72.8     0.534      1.01
## retino$laser=xenon 200       76     82.2     0.473      1.01
## 
##  Chisq= 1  on 1 degrees of freedom, p= 0.3

P-value > 0.05 artinya tidak ada perbedaan signifikan antara survival probability untuk laser xenon dan argon.

survdiff(y ~ retino$eye)
## Call:
## survdiff(formula = y ~ retino$eye)
## 
##                    N Observed Expected (O-E)^2/E (O-E)^2/V
## retino$eye=left  216       95     84.7      1.26      2.79
## retino$eye=right 178       60     70.3      1.52      2.79
## 
##  Chisq= 2.8  on 1 degrees of freedom, p= 0.09

P-value > 0.05 artinya tidak ada perbedaan signifikan antara survival probability untuk mata kanan dan maka kiri.

survdiff(y ~ retino$type)
## Call:
## survdiff(formula = y ~ retino$type)
## 
##                        N Observed Expected (O-E)^2/E (O-E)^2/V
## retino$type=adult    166       68     67.2   0.00988    0.0175
## retino$type=juvenile 228       87     87.8   0.00756    0.0175
## 
##  Chisq= 0  on 1 degrees of freedom, p= 0.9

P-value > 0.05 artinya tidak ada perbedaan signifikan antara survival probability untuk laser xenon dan argon.

Uji Distribusi Variabel Waktu

Dalam langkah ini, ingin diketahui apakah variabel ‘futime’ mengikuti distribusi tertentu untuk penentuan model yang dipakai. Jika variabel ‘futime’ mengikuti suatu dsitribusi, analisis dapat dilanjut menggunakan model parametrik. Distribusi yang biasa digunakan untuk survival time adalah weibull, exponential, dan log-logistic.

fitwei <- fitdist(retino$futime, "weibull")
ks.test(retino$futime, "pweibull", shape = fitwei$estimate["shape"], scale = fitwei$estimate["scale"])
## Warning in ks.test(retino$futime, "pweibull", shape =
## fitwei$estimate["shape"], : ties should not be present for the
## Kolmogorov-Smirnov test
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  retino$futime
## D = 0.17097, p-value = 1.983e-10
## alternative hypothesis: two-sided

Survival time tidak mengikuti distribusi Weibull.

fit_exp <- fitdist(retino$futime, "exp")
ks.test(retino$futime, "pexp", rate = fit_exp$estimate["rate"])
## Warning in ks.test(retino$futime, "pexp", rate = fit_exp$estimate["rate"]): ties
## should not be present for the Kolmogorov-Smirnov test
## 
##  One-sample Kolmogorov-Smirnov test
## 
## data:  retino$futime
## D = 0.20775, p-value = 3.442e-15
## alternative hypothesis: two-sided

Assessing PH Assumption

Log-log KM Curve

retino$age_category <- cut(retino$age, 
                         breaks = c(-Inf, 14, 24, 58), 
                         labels = c("Anak", "Remaja", "Dewasa"), 
                         right = TRUE)
kmfit6 = survfit(y ~ retino$age_category)

retino$risk_category <- cut(retino$risk, 
                          breaks = c(-Inf, 9, Inf), 
                          labels = c("6-9", "10-12"), 
                          right = TRUE)
kmfit7 = survfit(y ~ retino$risk_category)
par(mfrow=c(1,3))
plot(kmfit2, fun = "cloglog",  col = c("purple", "black"), lty = c("solid", "dotted"),
     xlab = "Time in months using log scale", 
     ylab = "Log-log survival",
     main = "log–log KM curves by trt")
legend("topleft", legend = c("Control eye", "Treated eye"), col = c("purple", "black"), 
       lty = c("solid", "dotted"))

plot(kmfit3, fun = "cloglog",  col = c("purple", "black"), lty = c("solid", "dotted"),
     xlab = "Time in months using log scale", 
     ylab = "Log-log survival",
     main = "log–log KM curves by laser")
legend("topleft", legend = c("Argon", "Xenon"), col = c("purple", "black"), 
       lty = c("solid", "dotted"))

plot(kmfit4, fun = "cloglog",  col = c("purple", "black","red"), lty = c("solid", "dotted"),
     xlab = "Time in months using log scale", 
     ylab = "Log-log survival",
     main = "log-log KM curves by eye")
legend("topleft", legend = c("Left", "Right"), col = c("purple", "black","red"), 
       lty = c("solid", "dotted"))

par(mfrow=c(1,3))
plot(kmfit5, fun = "cloglog",  col = c("purple", "black"), lty = c("solid", "dotted"),
     xlab = "Time in months using log scale", 
     ylab = "Log-log survival",
     main = "log–log KM curves by type")
legend("topleft", legend = c("Adult", "Juvenile"), col = c("purple", "black"), 
       lty = c("solid", "dotted"))

plot(kmfit6, fun = "cloglog",  col = c("purple", "black"), lty = c("solid", "dotted","dashed"),
     xlab = "Time in months using log scale", 
     ylab = "Log-log survival",
     main = "log–log KM curves by age group")
legend("topleft", legend = c("Anak", "Remaja", "Dewasa"), col = c("purple", "black"), 
       lty = c("solid", "dotted","dashed"))

plot(kmfit7, fun = "cloglog",  col = c("purple", "black","red"), lty = c("solid", "dotted"),
     xlab = "Time in months using log scale", 
     ylab = "Log-log survival",
     main = "log-log KM curves by risk")
legend("topleft", legend = c("6-9", "10-12"), col = c("purple", "black","red"), 
       lty = c("solid", "dotted"))

Observed vs Expected

plot(kmfit2, lty=c("solid", "dashed"), col="black", xlab="survival time in months", ylab="survival probabilities",
     main = "Observed vs Expected Plot by trt") 
legend("topright", c("Control eye", "Treated eye"), lty=c("solid","dashed"), col="black")

## Expected survival plots, using Cox regression with PH assumption
cox.fit <- coxph(y ~ trt, data=retino)    # Fit Cox regression (PH assumed)
newdat  <- data.frame(trt = 0:1)         # newdata to get expected curves
lines(survfit(cox.fit, newdata = newdat),
      col = "red", lty = 1:2)
## Warning in model.frame.default(data = structure(list(trt = 0:1), class =
## "data.frame", row.names = c(NA, : variable 'trt' is not a factor

plot(kmfit3, lty=c("solid", "dashed"), col="black", xlab="survival time in months", ylab="survival probabilities",
     main = "Observed vs Expected Plot by laser") 
legend("topright", c("Argon", "Xenon"), lty=c("solid","dashed"), col="black")

## Expected survival plots, using Cox regression with PH assumption
cox.fit <- coxph(y ~ factor(laser), data=retino)    # Fit Cox regression (PH assumed)
newdat  <- data.frame(laser = factor(c("argon","xenon"), levels = c("argon","xenon")))         # newdata to get expected curves
lines(survfit(cox.fit, newdata = newdat),
      col = "red", lty = 1:2)

plot(kmfit4, lty=c("solid", "dashed"), col="black", xlab="survival time in months", ylab="survival probabilities",
     main = "Observed vs Expected Plot by eye") 
legend("topright", c("Left", "Right"), lty=c("solid","dashed"), col="black")

## Expected survival plots, using Cox regression with PH assumption
cox.fit <- coxph(y ~ factor(eye), data=retino)    # Fit Cox regression (PH assumed)
newdat  <- data.frame(eye = factor(c("left","right"), levels = c("left","right")))         # newdata to get expected curves
lines(survfit(cox.fit, newdata = newdat),
      col = "red", lty = 1:2)

plot(kmfit5, lty=c("solid", "dashed"), col="black", xlab="survival time in months", ylab="survival probabilities",
     main = "Observed vs Expected Plot by type") 
legend("topright", c("Adult", "Juvenile"), lty=c("solid","dashed"), col="black")

## Expected survival plots, using Cox regression with PH assumption
cox.fit <- coxph(y ~ factor(type), data=retino)    # Fit Cox regression (PH assumed)
newdat  <- data.frame(type =factor(c("adult","juvenile"), levels = c("adult","juvenile")))         # newdata to get expected curves
lines(survfit(cox.fit, newdata = newdat),
      col = "red", lty = 1:2)

plot(kmfit6, lty=c("solid", "dashed"), col="black", xlab="survival time in months", ylab="survival probabilities",
     main = "Observed vs Expected Plot by age group") 
legend("topright", c("Anak", "Remaja", "Dewasa"), lty=c("solid","dashed"), col="black")

## Expected survival plots, using Cox regression with PH assumption
cox.fit <- coxph(y ~ factor(age_category), data=retino)    # Fit Cox regression (PH assumed)
newdat  <- data.frame(age_category = factor(c("Anak", "Remaja", "Dewasa"), levels = c("Anak", "Remaja", "Dewasa")))         # newdata to get expected curves
lines(survfit(cox.fit, newdata = newdat),
      col = "red", lty = 1:3)

plot(kmfit7, lty=c("solid", "dashed"), col="black", xlab="survival time in months", ylab="survival probabilities",
     main = "Observed vs Expected Plot by risk") 
legend("topright", c("6-9", "10-12"), lty=c("solid","dashed"), col="black")

## Expected survival plots, using Cox regression with PH assumption
cox.fit <- coxph(y ~ factor(risk_category), data=retino)    # Fit Cox regression (PH assumed)
newdat  <- data.frame(risk_category = factor(c("6-9", "10-12"), levels = c("6-9", "10-12")))         # newdata to get expected curves
lines(survfit(cox.fit, newdata = newdat),
      col = "red", lty = 1:2)

Pemodelan Cox Proportional Hazard

model = coxph(y ~ laser + eye + age + type + trt + risk, data=retino, method = "breslow")
summary(model)
## Call:
## coxph(formula = y ~ laser + eye + age + type + trt + risk, data = retino, 
##     method = "breslow")
## 
##   n= 394, number of events= 155 
## 
##                  coef exp(coef) se(coef)      z Pr(>|z|)    
## laserxenon   -0.16591   0.84712  0.16214 -1.023   0.3062    
## eyeright     -0.21711   0.80484  0.16756 -1.296   0.1951    
## age           0.01050   1.01055  0.00983  1.068   0.2856    
## typejuvenile  0.15721   1.17024  0.29354  0.536   0.5922    
## trt1         -0.78627   0.45554  0.16925 -4.646 3.39e-06 ***
## risk          0.14147   1.15197  0.05600  2.526   0.0115 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##              exp(coef) exp(-coef) lower .95 upper .95
## laserxenon      0.8471     1.1805    0.6165    1.1640
## eyeright        0.8048     1.2425    0.5795    1.1177
## age             1.0106     0.9896    0.9913    1.0302
## typejuvenile    1.1702     0.8545    0.6583    2.0803
## trt1            0.4555     2.1952    0.3269    0.6347
## risk            1.1520     0.8681    1.0322    1.2856
## 
## Concordance= 0.629  (se = 0.023 )
## Likelihood ratio test= 33.41  on 6 df,   p=9e-06
## Wald test            = 31.9  on 6 df,   p=2e-05
## Score (logrank) test = 33.05  on 6 df,   p=1e-05
cox.zph(model,transform=rank)
##        chisq df    p
## laser  0.794  1 0.37
## eye    0.811  1 0.37
## age    0.342  1 0.56
## type   0.806  1 0.37
## trt    0.965  1 0.33
## risk   0.918  1 0.34
## GLOBAL 5.003  6 0.54