Data terdiri dari beberapa variabel, termasuk variabel dependen (survival time dan status) dan variabel independen. Variabel dalam data memiliki deskripsi sebagai berikut.
library(survival)
## Warning: package 'survival' was built under R version 4.1.2
library(ggsurvfit)
## Loading required package: ggplot2
library(tidycmprsk)
library(survminer)
## Loading required package: ggpubr
## Warning: package 'ggpubr' was built under R version 4.1.2
##
## Attaching package: 'survminer'
## The following object is masked from 'package:survival':
##
## myeloma
library(fitdistrplus)
## Warning: package 'fitdistrplus' was built under R version 4.1.2
## Loading required package: MASS
library(ggplot2)
library(flexsurv)
## Warning: package 'flexsurv' was built under R version 4.1.2
library(MASS)
library(RColorBrewer)
retino = read.csv("/Users/adindaprilly/Downloads/retinopathy.csv")
head(retino)
na_indices<-which(is.na(retino))
length(na_indices)
## [1] 0
Tidak terdapat missing values pada dataset retinopathy.
dup_indices<-retino[duplicated(retino), ] #cek mana baris duplicated
nrow(dup_indices) #banyak data duplikat
## [1] 0
#data<-retino[!duplicated(retino),]
Tidak terdapat data duplikat pada dataset retinopathy.
str(retino)
## 'data.frame': 394 obs. of 10 variables:
## $ rownames: int 1 2 3 4 5 6 7 8 9 10 ...
## $ id : int 5 5 14 14 16 16 25 25 29 29 ...
## $ laser : chr "argon" "argon" "argon" "argon" ...
## $ eye : chr "left" "left" "right" "right" ...
## $ age : int 28 28 12 12 9 9 9 9 13 13 ...
## $ type : chr "adult" "adult" "juvenile" "juvenile" ...
## $ trt : int 1 0 1 0 1 0 1 0 1 0 ...
## $ futime : num 46.2 46.2 42.5 31.3 42.3 ...
## $ status : int 0 0 0 1 0 0 0 0 0 1 ...
## $ risk : int 9 9 8 6 11 11 11 11 9 10 ...
Data character pelru diubah menjadi data kategori.
retino$laser <- as.factor(retino$laser)
retino$eye <- as.factor(retino$eye)
retino$type <- as.factor(retino$type)
retino$trt <- as.factor(retino$trt)
y=Surv(retino$futime,retino$status)
kmfit1 = survfit(y~1)
kmfit1
## Call: survfit(formula = y ~ 1)
##
## n events median 0.95LCL 0.95UCL
## [1,] 394 155 NA 59.8 NA
par(mfrow=c(2,2), mar = c(2, 2, 2, 2), oma = c(1, 1, 1, 1))
frekuensi <- table(retino$laser)
colors <- brewer.pal(n = 3, name = "Pastel1")[1:length(frekuensi)]
persentase <- round(100 * frekuensi / sum(frekuensi), 1)
labels_with_percentase <- paste(names(frekuensi), persentase, "%", sep = " ")
pie(frekuensi, labels = labels_with_percentase, main = "Pie Chart Variabel Laser", col = colors)
frekuensi <- table(retino$eye)
colors <- brewer.pal(n = 3, name = "Pastel1")[1:length(frekuensi)]
persentase <- round(100 * frekuensi / sum(frekuensi), 1)
labels_with_percentase <- paste(names(frekuensi), persentase, "%", sep = " ")
pie(frekuensi, labels = labels_with_percentase, main = "Pie Chart Variabel Eye", col = colors)
frekuensi <- table(retino$type)
colors <- brewer.pal(n = 3, name = "Pastel1")[1:length(frekuensi)]
persentase <- round(100 * frekuensi / sum(frekuensi), 1)
labels_with_percentase <- paste(names(frekuensi), persentase, "%", sep = " ")
pie(frekuensi, labels = labels_with_percentase, main = "Pie Chart Variabel Type", col = colors)
frekuensi <- table(retino$trt)
colors <- brewer.pal(n = 3, name = "Pastel1")[1:length(frekuensi)]
persentase <- round(100 * frekuensi / sum(frekuensi), 1)
labels_with_percentase <- paste(names(frekuensi), persentase, "%", sep = " ")
pie(frekuensi, labels = labels_with_percentase, main = "Pie Chart Variabel Trt", col = colors)
par(mfrow=c(1,2))
hist(retino$age, main = "Histogram Variabel Age", xlab = "Value", ylab = "Frequency", col = "lightblue", border = "black")
mean_value <- mean(retino$age)
abline(v = mean(retino$age), col = "red", lwd = 2, lty = 2)
text(mean_value, max(hist(retino$age, plot = FALSE)$counts), labels = paste("Mean =", round(mean_value, 2)), pos = 4, col = "red")
barplot(table(retino$risk), main = "Bar Chart Variabel Risk", xlab = "Value", ylab = "Frequency", col = "lightblue", border = "black",ylim = c(0, max(table(retino$risk)) + 1))
plot(kmfit1, conf.int=TRUE, lty=1, xlab="Time (Month)", ylab="Survival Probability", main="Survival Curves with 95% Confidence Bands", mark.time=F)
kmfit2 = survfit(y ~ retino$trt)
kmfit3 = survfit(y ~ retino$laser)
kmfit4 = survfit(y ~ retino$eye)
kmfit5 = survfit(y ~ retino$type)
par(mfrow=c(2,2))
plot(kmfit2, lty=c("solid", "dashed"), col=c("black", "red"), xlab="survival time in months", ylab="survival probabilities")
legend("topright", c("Control eye", "Treated eye"), lty=c("solid","dashed"), col=c("black", "red"), cex=1.5)
plot(kmfit3, lty=c("solid", "dashed"), col=c("black", "red"), xlab="survival time in months", ylab="survival probabilities")
legend("topright", c("Argon", "Xenon"), lty=c("solid","dashed"), col=c("black", "red"), cex=1.5)
plot(kmfit4, lty=c("solid", "dashed"), col=c("black", "red"), xlab="survival time in months", ylab="survival probabilities")
legend("topright", c("Left", "Right"), lty=c("solid","dashed"), col=c("black", "red"), cex=1.5)
plot(kmfit5, lty=c("solid", "dashed"), col=c("black", "red"), xlab="survival time in months", ylab="survival probabilities")
legend("topright", c("Adult", "Juvenile"), lty=c("solid","dashed"), col=c("black", "red"), cex=1.5)
ggsurvplot(
kmfit2, data = retino,# survfit model object
conf.int = TRUE, # Add 95% confidence interval
risk.table = TRUE, # Show risk table
pval = TRUE, # Show p-value of log-rank test
xlab = "Months", # X-axis label
surv.median.line = "hv",
ylab = "Overall survival probability", # Y-axis label
title = "Survival Curves with 95% Confidence Bands" # Judul plot
)
ggsurvplot(
kmfit3, data = retino,# survfit model object
conf.int = TRUE, # Add 95% confidence interval
risk.table = TRUE, # Show risk table
pval = TRUE, # Show p-value of log-rank test
xlab = "Months", # X-axis label
surv.median.line = "hv",
ylab = "Overall survival probability", # Y-axis label
title = "Survival Curves with 95% Confidence Bands" # Judul plot
)
## Warning in .add_surv_median(p, fit, type = surv.median.line, fun = fun, : Median
## survival not reached.
ggsurvplot(
kmfit4, data = retino,# survfit model object
conf.int = TRUE, # Add 95% confidence interval
risk.table = TRUE, # Show risk table
pval = TRUE, # Show p-value of log-rank test
xlab = "Months", # X-axis label
surv.median.line = "hv",
ylab = "Overall survival probability", # Y-axis label
title = "Survival Curves with 95% Confidence Bands" # Judul plot
)
ggsurvplot(
kmfit5, data = retino,# survfit model object
conf.int = TRUE, # Add 95% confidence interval
risk.table = TRUE, # Show risk table
pval = TRUE, # Show p-value of log-rank test
xlab = "Months", # X-axis label
surv.median.line = "hv",
ylab = "Overall survival probability", # Y-axis label
title = "Survival Curves with 95% Confidence Bands" # Judul plot
)
survdiff(y ~ retino$trt)
## Call:
## survdiff(formula = y ~ retino$trt)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## retino$trt=0 197 101 71.8 11.9 22.2
## retino$trt=1 197 54 83.2 10.3 22.2
##
## Chisq= 22.2 on 1 degrees of freedom, p= 2e-06
P-value < 0.05 artinya ada perbedaan signifikan antara survival probability untuk control eye dengan treated eye.
survdiff(y ~ retino$laser)
## Call:
## survdiff(formula = y ~ retino$laser)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## retino$laser=argon 194 79 72.8 0.534 1.01
## retino$laser=xenon 200 76 82.2 0.473 1.01
##
## Chisq= 1 on 1 degrees of freedom, p= 0.3
P-value > 0.05 artinya tidak ada perbedaan signifikan antara survival probability untuk laser xenon dan argon.
survdiff(y ~ retino$eye)
## Call:
## survdiff(formula = y ~ retino$eye)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## retino$eye=left 216 95 84.7 1.26 2.79
## retino$eye=right 178 60 70.3 1.52 2.79
##
## Chisq= 2.8 on 1 degrees of freedom, p= 0.09
P-value > 0.05 artinya tidak ada perbedaan signifikan antara survival probability untuk mata kanan dan maka kiri.
survdiff(y ~ retino$type)
## Call:
## survdiff(formula = y ~ retino$type)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## retino$type=adult 166 68 67.2 0.00988 0.0175
## retino$type=juvenile 228 87 87.8 0.00756 0.0175
##
## Chisq= 0 on 1 degrees of freedom, p= 0.9
P-value > 0.05 artinya tidak ada perbedaan signifikan antara survival probability untuk laser xenon dan argon.
Dalam langkah ini, ingin diketahui apakah variabel ‘futime’ mengikuti distribusi tertentu untuk penentuan model yang dipakai. Jika variabel ‘futime’ mengikuti suatu dsitribusi, analisis dapat dilanjut menggunakan model parametrik. Distribusi yang biasa digunakan untuk survival time adalah weibull, exponential, dan log-logistic.
fitwei <- fitdist(retino$futime, "weibull")
ks.test(retino$futime, "pweibull", shape = fitwei$estimate["shape"], scale = fitwei$estimate["scale"])
## Warning in ks.test(retino$futime, "pweibull", shape =
## fitwei$estimate["shape"], : ties should not be present for the
## Kolmogorov-Smirnov test
##
## One-sample Kolmogorov-Smirnov test
##
## data: retino$futime
## D = 0.17097, p-value = 1.983e-10
## alternative hypothesis: two-sided
Survival time tidak mengikuti distribusi Weibull.
fit_exp <- fitdist(retino$futime, "exp")
ks.test(retino$futime, "pexp", rate = fit_exp$estimate["rate"])
## Warning in ks.test(retino$futime, "pexp", rate = fit_exp$estimate["rate"]): ties
## should not be present for the Kolmogorov-Smirnov test
##
## One-sample Kolmogorov-Smirnov test
##
## data: retino$futime
## D = 0.20775, p-value = 3.442e-15
## alternative hypothesis: two-sided
retino$age_category <- cut(retino$age,
breaks = c(-Inf, 14, 24, 58),
labels = c("Anak", "Remaja", "Dewasa"),
right = TRUE)
kmfit6 = survfit(y ~ retino$age_category)
retino$risk_category <- cut(retino$risk,
breaks = c(-Inf, 9, Inf),
labels = c("6-9", "10-12"),
right = TRUE)
kmfit7 = survfit(y ~ retino$risk_category)
par(mfrow=c(1,3))
plot(kmfit2, fun = "cloglog", col = c("purple", "black"), lty = c("solid", "dotted"),
xlab = "Time in months using log scale",
ylab = "Log-log survival",
main = "log–log KM curves by trt")
legend("topleft", legend = c("Control eye", "Treated eye"), col = c("purple", "black"),
lty = c("solid", "dotted"))
plot(kmfit3, fun = "cloglog", col = c("purple", "black"), lty = c("solid", "dotted"),
xlab = "Time in months using log scale",
ylab = "Log-log survival",
main = "log–log KM curves by laser")
legend("topleft", legend = c("Argon", "Xenon"), col = c("purple", "black"),
lty = c("solid", "dotted"))
plot(kmfit4, fun = "cloglog", col = c("purple", "black","red"), lty = c("solid", "dotted"),
xlab = "Time in months using log scale",
ylab = "Log-log survival",
main = "log-log KM curves by eye")
legend("topleft", legend = c("Left", "Right"), col = c("purple", "black","red"),
lty = c("solid", "dotted"))
par(mfrow=c(1,3))
plot(kmfit5, fun = "cloglog", col = c("purple", "black"), lty = c("solid", "dotted"),
xlab = "Time in months using log scale",
ylab = "Log-log survival",
main = "log–log KM curves by type")
legend("topleft", legend = c("Adult", "Juvenile"), col = c("purple", "black"),
lty = c("solid", "dotted"))
plot(kmfit6, fun = "cloglog", col = c("purple", "black"), lty = c("solid", "dotted","dashed"),
xlab = "Time in months using log scale",
ylab = "Log-log survival",
main = "log–log KM curves by age group")
legend("topleft", legend = c("Anak", "Remaja", "Dewasa"), col = c("purple", "black"),
lty = c("solid", "dotted","dashed"))
plot(kmfit7, fun = "cloglog", col = c("purple", "black","red"), lty = c("solid", "dotted"),
xlab = "Time in months using log scale",
ylab = "Log-log survival",
main = "log-log KM curves by risk")
legend("topleft", legend = c("6-9", "10-12"), col = c("purple", "black","red"),
lty = c("solid", "dotted"))
plot(kmfit2, lty=c("solid", "dashed"), col="black", xlab="survival time in months", ylab="survival probabilities",
main = "Observed vs Expected Plot by trt")
legend("topright", c("Control eye", "Treated eye"), lty=c("solid","dashed"), col="black")
## Expected survival plots, using Cox regression with PH assumption
cox.fit <- coxph(y ~ trt, data=retino) # Fit Cox regression (PH assumed)
newdat <- data.frame(trt = 0:1) # newdata to get expected curves
lines(survfit(cox.fit, newdata = newdat),
col = "red", lty = 1:2)
## Warning in model.frame.default(data = structure(list(trt = 0:1), class =
## "data.frame", row.names = c(NA, : variable 'trt' is not a factor
plot(kmfit3, lty=c("solid", "dashed"), col="black", xlab="survival time in months", ylab="survival probabilities",
main = "Observed vs Expected Plot by laser")
legend("topright", c("Argon", "Xenon"), lty=c("solid","dashed"), col="black")
## Expected survival plots, using Cox regression with PH assumption
cox.fit <- coxph(y ~ factor(laser), data=retino) # Fit Cox regression (PH assumed)
newdat <- data.frame(laser = factor(c("argon","xenon"), levels = c("argon","xenon"))) # newdata to get expected curves
lines(survfit(cox.fit, newdata = newdat),
col = "red", lty = 1:2)
plot(kmfit4, lty=c("solid", "dashed"), col="black", xlab="survival time in months", ylab="survival probabilities",
main = "Observed vs Expected Plot by eye")
legend("topright", c("Left", "Right"), lty=c("solid","dashed"), col="black")
## Expected survival plots, using Cox regression with PH assumption
cox.fit <- coxph(y ~ factor(eye), data=retino) # Fit Cox regression (PH assumed)
newdat <- data.frame(eye = factor(c("left","right"), levels = c("left","right"))) # newdata to get expected curves
lines(survfit(cox.fit, newdata = newdat),
col = "red", lty = 1:2)
plot(kmfit5, lty=c("solid", "dashed"), col="black", xlab="survival time in months", ylab="survival probabilities",
main = "Observed vs Expected Plot by type")
legend("topright", c("Adult", "Juvenile"), lty=c("solid","dashed"), col="black")
## Expected survival plots, using Cox regression with PH assumption
cox.fit <- coxph(y ~ factor(type), data=retino) # Fit Cox regression (PH assumed)
newdat <- data.frame(type =factor(c("adult","juvenile"), levels = c("adult","juvenile"))) # newdata to get expected curves
lines(survfit(cox.fit, newdata = newdat),
col = "red", lty = 1:2)
plot(kmfit6, lty=c("solid", "dashed"), col="black", xlab="survival time in months", ylab="survival probabilities",
main = "Observed vs Expected Plot by age group")
legend("topright", c("Anak", "Remaja", "Dewasa"), lty=c("solid","dashed"), col="black")
## Expected survival plots, using Cox regression with PH assumption
cox.fit <- coxph(y ~ factor(age_category), data=retino) # Fit Cox regression (PH assumed)
newdat <- data.frame(age_category = factor(c("Anak", "Remaja", "Dewasa"), levels = c("Anak", "Remaja", "Dewasa"))) # newdata to get expected curves
lines(survfit(cox.fit, newdata = newdat),
col = "red", lty = 1:3)
plot(kmfit7, lty=c("solid", "dashed"), col="black", xlab="survival time in months", ylab="survival probabilities",
main = "Observed vs Expected Plot by risk")
legend("topright", c("6-9", "10-12"), lty=c("solid","dashed"), col="black")
## Expected survival plots, using Cox regression with PH assumption
cox.fit <- coxph(y ~ factor(risk_category), data=retino) # Fit Cox regression (PH assumed)
newdat <- data.frame(risk_category = factor(c("6-9", "10-12"), levels = c("6-9", "10-12"))) # newdata to get expected curves
lines(survfit(cox.fit, newdata = newdat),
col = "red", lty = 1:2)
model = coxph(y ~ laser + eye + age + type + trt + risk, data=retino, method = "breslow")
summary(model)
## Call:
## coxph(formula = y ~ laser + eye + age + type + trt + risk, data = retino,
## method = "breslow")
##
## n= 394, number of events= 155
##
## coef exp(coef) se(coef) z Pr(>|z|)
## laserxenon -0.16591 0.84712 0.16214 -1.023 0.3062
## eyeright -0.21711 0.80484 0.16756 -1.296 0.1951
## age 0.01050 1.01055 0.00983 1.068 0.2856
## typejuvenile 0.15721 1.17024 0.29354 0.536 0.5922
## trt1 -0.78627 0.45554 0.16925 -4.646 3.39e-06 ***
## risk 0.14147 1.15197 0.05600 2.526 0.0115 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## laserxenon 0.8471 1.1805 0.6165 1.1640
## eyeright 0.8048 1.2425 0.5795 1.1177
## age 1.0106 0.9896 0.9913 1.0302
## typejuvenile 1.1702 0.8545 0.6583 2.0803
## trt1 0.4555 2.1952 0.3269 0.6347
## risk 1.1520 0.8681 1.0322 1.2856
##
## Concordance= 0.629 (se = 0.023 )
## Likelihood ratio test= 33.41 on 6 df, p=9e-06
## Wald test = 31.9 on 6 df, p=2e-05
## Score (logrank) test = 33.05 on 6 df, p=1e-05
cox.zph(model,transform=rank)
## chisq df p
## laser 0.794 1 0.37
## eye 0.811 1 0.37
## age 0.342 1 0.56
## type 0.806 1 0.37
## trt 0.965 1 0.33
## risk 0.918 1 0.34
## GLOBAL 5.003 6 0.54