Load Necessary Libraries

if (!requireNamespace("survival", quietly = TRUE)) install.packages("survival")
if (!requireNamespace("survminer", quietly = TRUE)) install.packages("survminer")
library(survival)
library(survminer)
## Loading required package: ggplot2
## Loading required package: ggpubr
## 
## Attaching package: 'survminer'
## The following object is masked from 'package:survival':
## 
##     myeloma

Load and Prepare Data

data("lung", package = "survival")
## Warning in data("lung", package = "survival"): data set 'lung' not found
lung <- na.omit(as.data.frame(lung))  # Convert to DataFrame and remove missing values
str(lung)
## 'data.frame':    167 obs. of  10 variables:
##  $ inst     : num  3 5 12 7 11 1 7 6 12 22 ...
##  $ time     : num  455 210 1022 310 361 ...
##  $ status   : num  2 2 1 2 2 2 2 2 2 2 ...
##  $ age      : num  68 57 74 68 71 53 61 57 57 70 ...
##  $ sex      : num  1 1 1 2 2 1 1 1 1 1 ...
##  $ ph.ecog  : num  0 1 1 2 2 1 2 1 1 1 ...
##  $ ph.karno : num  90 90 50 70 60 70 70 80 80 90 ...
##  $ pat.karno: num  90 60 80 60 80 80 70 80 70 100 ...
##  $ meal.cal : num  1225 1150 513 384 538 ...
##  $ wt.loss  : num  15 11 0 10 1 16 34 27 60 -5 ...
##  - attr(*, "na.action")= 'omit' Named int [1:61] 1 3 5 12 13 14 16 20 23 25 ...
##   ..- attr(*, "names")= chr [1:61] "1" "3" "5" "12" ...

Create Survival Object

surv_object <- Surv(time = lung$time, event = lung$status)

1. Kaplan-Meier Estimator

km_fit <- survfit(surv_object ~ 1, data = lung)
ggsurvplot(km_fit, data = lung, conf.int = TRUE, pval = TRUE, title = "Kaplan-Meier Curve")
## Warning in .pvalue(fit, data = data, method = method, pval = pval, pval.coord = pval.coord, : There are no survival curves to be compared. 
##  This is a null model.

2. Nelson-Aalen Estimator

cox_model_na <- coxph(surv_object ~ 1, data = lung)
na_fit <- basehaz(cox_model_na, centered = FALSE)
plot(na_fit$time, na_fit$hazard, type = "s", xlab = "Time", ylab = "Cumulative Hazard", main = "Nelson-Aalen Estimator")

3. Log-rank Test

km_fit_sex <- survfit(surv_object ~ sex, data = lung)
ggsurvplot(km_fit_sex, data = lung, pval = TRUE, title = "Kaplan-Meier by Sex")

survdiff(surv_object ~ sex, data = lung)  # Log-rank test
## Call:
## survdiff(formula = surv_object ~ sex, data = lung)
## 
##         N Observed Expected (O-E)^2/E (O-E)^2/V
## sex=1 103       82     68.7      2.57      6.05
## sex=2  64       38     51.3      3.44      6.05
## 
##  Chisq= 6  on 1 degrees of freedom, p= 0.01

4. Cox Proportional Hazards Model

cox_model <- coxph(surv_object ~ age + sex + ph.ecog, data = lung)
summary(cox_model)
## Call:
## coxph(formula = surv_object ~ age + sex + ph.ecog, data = lung)
## 
##   n= 167, number of events= 120 
## 
##              coef exp(coef)  se(coef)      z Pr(>|z|)    
## age      0.008034  1.008067  0.011086  0.725 0.468623    
## sex     -0.502180  0.605210  0.197336 -2.545 0.010934 *  
## ph.ecog  0.455257  1.576579  0.136857  3.327 0.000879 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##         exp(coef) exp(-coef) lower .95 upper .95
## age        1.0081     0.9920    0.9864     1.030
## sex        0.6052     1.6523    0.4111     0.891
## ph.ecog    1.5766     0.6343    1.2057     2.062
## 
## Concordance= 0.637  (se = 0.031 )
## Likelihood ratio test= 20.01  on 3 df,   p=2e-04
## Wald test            = 19.69  on 3 df,   p=2e-04
## Score (logrank) test = 20.02  on 3 df,   p=2e-04

Visualizing Cox Model

ggforest(cox_model, data = lung)

Conclusion

This report presents survival analysis using the lung dataset. The Kaplan-Meier estimator visualizes survival probability, the Nelson-Aalen estimator shows cumulative hazard, the log-rank test compares survival distributions, and the Cox model assesses covariate impacts.