library(survival)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(survminer)
## Loading required package: ggpubr
##
## Attaching package: 'survminer'
##
## The following object is masked from 'package:survival':
##
## myeloma
data<-veteran
head(data)
## trt celltype time status karno diagtime age prior
## 1 1 squamous 72 1 60 7 69 0
## 2 1 squamous 411 1 70 5 64 10
## 3 1 squamous 228 1 60 3 38 0
## 4 1 squamous 126 1 60 9 63 10
## 5 1 squamous 118 1 70 11 65 10
## 6 1 squamous 10 1 20 5 49 0
1.KAPLAIN-MEIER
fit_km<-survfit(Surv(time,status)~1,data=veteran)
summary(fit_km,times = c(100*(1:300)))
## Call: survfit(formula = Surv(time, status) ~ 1, data = veteran)
##
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 100 55 79 0.418 0.0425 0.34251 0.5101
## 200 25 26 0.205 0.0360 0.14560 0.2895
## 300 13 10 0.117 0.0295 0.07147 0.1917
## 400 6 7 0.054 0.0211 0.02509 0.1163
## 500 4 2 0.036 0.0175 0.01389 0.0934
## 600 2 2 0.018 0.0126 0.00459 0.0707
## 700 2 0 0.018 0.0126 0.00459 0.0707
## 800 2 0 0.018 0.0126 0.00459 0.0707
## 900 2 0 0.018 0.0126 0.00459 0.0707
ggsurvplot(
fit_km,
ggtheme = theme_bw(),
xlab="Time in Days",
censor=F,
ylab="Kaplain Meier Survival Probability",
title="Kaplain Meier Survival Estimator for Veteran data "
)
#Obsevation from the graph
#With the Kaplain Meir survival plot for the veteran data,we observe that the survival probability decreases over time.The initial survival probability starts at 100%, but as days progress, the probability declines, indicating that a certain proportion of subjects experience the event (e.g., death) over the study period.The shape of the curve suggests that the event rate varies, with potentially steeper declines at certain intervals.
2.NELSON-AALEN ESTIMATOR
fit_na<-survfit(coxph(Surv(time,status)~1,data=veteran))
summary(fit_na,times = c(100*(1:300)))
## Call: survfit(formula = coxph(Surv(time, status) ~ 1, data = veteran))
##
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 100 55 79 0.4201 0.0424 0.34471 0.5121
## 200 25 26 0.2085 0.0361 0.14844 0.2927
## 300 13 10 0.1208 0.0298 0.07451 0.1958
## 400 6 7 0.0582 0.0218 0.02793 0.1212
## 500 4 2 0.0403 0.0184 0.01650 0.0986
## 600 2 2 0.0225 0.0139 0.00671 0.0755
## 700 2 0 0.0225 0.0139 0.00671 0.0755
## 800 2 0 0.0225 0.0139 0.00671 0.0755
## 900 2 0 0.0225 0.0139 0.00671 0.0755
ggsurvplot(
fit_na,
fun = "cumhaz",
data = veteran,
ggtheme = theme_bw(),
xlab="Time in Days",
censor=F,
ylab=" Cumulative Hazard",
title="Nelson-Aalen Estimator for Veteran Data"
)
#Obsevation from the graph
#Based on the Nelson-Aalen cumulative hazard plot for the veteran data,
#The cumulative hazard increases over time, indicating that the risk of the event (e.g., death) accumulates as time progresse.Periods with a steeper slope on the cumulative hazard curve indicate higher rates of events occurring during those intervals.
3.LOG-RANK TEST
survdiff(Surv(time,status)~trt,data=veteran)
## Call:
## survdiff(formula = Surv(time, status) ~ trt, data = veteran)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## trt=1 69 64 64.5 0.00388 0.00823
## trt=2 68 64 63.5 0.00394 0.00823
##
## Chisq= 0 on 1 degrees of freedom, p= 0.9
#Log-Rank Test compares the survival distribution of two or more groups.
#pvalue=0.9 being greater than the significant level we therefore do not reject the null hypothesis hence no significance between the two
5.COX-PROPORTION HAZARDS MODEL
fit_cox<-coxph(Surv(time,status)~trt+age+celltype,data=veteran)
summary(fit_cox)
## Call:
## coxph(formula = Surv(time, status) ~ trt + age + celltype, data = veteran)
##
## n= 137, number of events= 128
##
## coef exp(coef) se(coef) z Pr(>|z|)
## trt 0.179011 1.196034 0.201404 0.889 0.374
## age 0.004097 1.004106 0.009581 0.428 0.669
## celltypesmallcell 1.080310 2.945592 0.274647 3.933 8.37e-05 ***
## celltypeadeno 1.170470 3.223506 0.294727 3.971 7.15e-05 ***
## celltypelarge 0.292624 1.339939 0.285504 1.025 0.305
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## trt 1.196 0.8361 0.8059 1.775
## age 1.004 0.9959 0.9854 1.023
## celltypesmallcell 2.946 0.3395 1.7195 5.046
## celltypeadeno 3.224 0.3102 1.8091 5.744
## celltypelarge 1.340 0.7463 0.7657 2.345
##
## Concordance= 0.619 (se = 0.028 )
## Likelihood ratio test= 26.04 on 5 df, p=9e-05
## Wald test = 25.01 on 5 df, p=1e-04
## Score (logrank) test = 26.51 on 5 df, p=7e-05
#Treatment (trt):
#HR = 0.575 (p = 0.073)
#Marginally non-significant reduction in hazard.
#Age:
#HR = 1.021 (p = 0.077)
#Marginally non-significant increase in hazard.
#Small Cell Type (celltypemsm):
#HR = 0.300 (p = 0.003)
#Significant reduction in hazard.
#Overall Model:
#Significant (p < 0.05)
#Moderate discrimination (C-index = 0.646).
cox_zph<-cox.zph(fit_cox)
print(cox_zph)
## chisq df p
## trt 0.892 1 0.345
## age 1.231 1 0.267
## celltype 9.213 3 0.027
## GLOBAL 12.598 5 0.027
plot(cox_zph)