library(survival)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(survminer)
## Loading required package: ggpubr
## 
## Attaching package: 'survminer'
## 
## The following object is masked from 'package:survival':
## 
##     myeloma
data<-veteran
head(data)
##   trt celltype time status karno diagtime age prior
## 1   1 squamous   72      1    60        7  69     0
## 2   1 squamous  411      1    70        5  64    10
## 3   1 squamous  228      1    60        3  38     0
## 4   1 squamous  126      1    60        9  63    10
## 5   1 squamous  118      1    70       11  65    10
## 6   1 squamous   10      1    20        5  49     0

1.KAPLAIN-MEIER

fit_km<-survfit(Surv(time,status)~1,data=veteran)
summary(fit_km,times = c(100*(1:300)))
## Call: survfit(formula = Surv(time, status) ~ 1, data = veteran)
## 
##  time n.risk n.event survival std.err lower 95% CI upper 95% CI
##   100     55      79    0.418  0.0425      0.34251       0.5101
##   200     25      26    0.205  0.0360      0.14560       0.2895
##   300     13      10    0.117  0.0295      0.07147       0.1917
##   400      6       7    0.054  0.0211      0.02509       0.1163
##   500      4       2    0.036  0.0175      0.01389       0.0934
##   600      2       2    0.018  0.0126      0.00459       0.0707
##   700      2       0    0.018  0.0126      0.00459       0.0707
##   800      2       0    0.018  0.0126      0.00459       0.0707
##   900      2       0    0.018  0.0126      0.00459       0.0707
ggsurvplot(
  fit_km,
  ggtheme = theme_bw(),
  xlab="Time in Days",
  censor=F,
  ylab="Kaplain Meier Survival Probability",
  title="Kaplain Meier Survival Estimator for Veteran data "
)

#Obsevation from the graph
#With the Kaplain Meir survival plot for the veteran data,we observe that the survival probability decreases over time.The initial survival probability starts at 100%, but as days progress, the probability declines, indicating that a certain proportion of subjects experience the event (e.g., death) over the study period.The shape of the curve suggests that the event rate varies, with potentially steeper declines at certain intervals.

2.NELSON-AALEN ESTIMATOR

fit_na<-survfit(coxph(Surv(time,status)~1,data=veteran))
summary(fit_na,times = c(100*(1:300)))
## Call: survfit(formula = coxph(Surv(time, status) ~ 1, data = veteran))
## 
##  time n.risk n.event survival std.err lower 95% CI upper 95% CI
##   100     55      79   0.4201  0.0424      0.34471       0.5121
##   200     25      26   0.2085  0.0361      0.14844       0.2927
##   300     13      10   0.1208  0.0298      0.07451       0.1958
##   400      6       7   0.0582  0.0218      0.02793       0.1212
##   500      4       2   0.0403  0.0184      0.01650       0.0986
##   600      2       2   0.0225  0.0139      0.00671       0.0755
##   700      2       0   0.0225  0.0139      0.00671       0.0755
##   800      2       0   0.0225  0.0139      0.00671       0.0755
##   900      2       0   0.0225  0.0139      0.00671       0.0755
ggsurvplot(
  fit_na,
  fun = "cumhaz",
  data = veteran,
  ggtheme = theme_bw(),
  xlab="Time in Days",
  censor=F,
  ylab=" Cumulative Hazard",
  title="Nelson-Aalen Estimator for Veteran Data"
)

#Obsevation from the graph
#Based on the Nelson-Aalen cumulative hazard plot for the veteran data,
#The cumulative hazard increases over time, indicating that the risk of the event (e.g., death) accumulates as time progresse.Periods with a steeper slope on the cumulative hazard curve indicate higher rates of events occurring during those intervals.

3.LOG-RANK TEST

survdiff(Surv(time,status)~trt,data=veteran)
## Call:
## survdiff(formula = Surv(time, status) ~ trt, data = veteran)
## 
##        N Observed Expected (O-E)^2/E (O-E)^2/V
## trt=1 69       64     64.5   0.00388   0.00823
## trt=2 68       64     63.5   0.00394   0.00823
## 
##  Chisq= 0  on 1 degrees of freedom, p= 0.9
#Log-Rank Test compares the survival distribution of two or more groups.
#pvalue=0.9 being greater than the significant level we therefore do not reject the null hypothesis hence no significance between the two

5.COX-PROPORTION HAZARDS MODEL

fit_cox<-coxph(Surv(time,status)~trt+age+celltype,data=veteran)
summary(fit_cox)
## Call:
## coxph(formula = Surv(time, status) ~ trt + age + celltype, data = veteran)
## 
##   n= 137, number of events= 128 
## 
##                       coef exp(coef) se(coef)     z Pr(>|z|)    
## trt               0.179011  1.196034 0.201404 0.889    0.374    
## age               0.004097  1.004106 0.009581 0.428    0.669    
## celltypesmallcell 1.080310  2.945592 0.274647 3.933 8.37e-05 ***
## celltypeadeno     1.170470  3.223506 0.294727 3.971 7.15e-05 ***
## celltypelarge     0.292624  1.339939 0.285504 1.025    0.305    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##                   exp(coef) exp(-coef) lower .95 upper .95
## trt                   1.196     0.8361    0.8059     1.775
## age                   1.004     0.9959    0.9854     1.023
## celltypesmallcell     2.946     0.3395    1.7195     5.046
## celltypeadeno         3.224     0.3102    1.8091     5.744
## celltypelarge         1.340     0.7463    0.7657     2.345
## 
## Concordance= 0.619  (se = 0.028 )
## Likelihood ratio test= 26.04  on 5 df,   p=9e-05
## Wald test            = 25.01  on 5 df,   p=1e-04
## Score (logrank) test = 26.51  on 5 df,   p=7e-05
#Treatment (trt):

#HR = 0.575 (p = 0.073)

#Marginally non-significant reduction in hazard.

#Age:

#HR = 1.021 (p = 0.077)

#Marginally non-significant increase in hazard.

#Small Cell Type (celltypemsm):

#HR = 0.300 (p = 0.003)

#Significant reduction in hazard.

#Overall Model:

#Significant (p < 0.05)

#Moderate discrimination (C-index = 0.646).
cox_zph<-cox.zph(fit_cox)
print(cox_zph)
##           chisq df     p
## trt       0.892  1 0.345
## age       1.231  1 0.267
## celltype  9.213  3 0.027
## GLOBAL   12.598  5 0.027
plot(cox_zph)