# Load necessary libraries
library(survival)
## Warning: package 'survival' was built under R version 4.4.2
library(survminer)
## Warning: package 'survminer' was built under R version 4.4.2
## Loading required package: ggplot2
## Loading required package: ggpubr
##
## Attaching package: 'survminer'
## The following object is masked from 'package:survival':
##
## myeloma
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.2
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
# Load built-in lung cancer dataset
# The dataset contains survival time, event status, and covariates
data(lung)
## Warning in data(lung): data set 'lung' not found
lung <- na.omit(lung) # Remove missing values
# Define survival object
surv_obj <- Surv(time = lung$time, event = lung$status)
# 1. Kaplan-Meier Estimator
km_fit <- survfit(surv_obj ~ 1,data = lung ) # Overall survival curve
ggplot_km <- ggsurvplot(km_fit,
data = lung, # Make sure this matches the dataset used in km_fit
conf.int = TRUE,
pval = TRUE,
risk.table = TRUE,
title = "Kaplan-Meier Survival Curve")
## Warning in .pvalue(fit, data = data, method = method, pval = pval, pval.coord = pval.coord, : There are no survival curves to be compared.
## This is a null model.
print(ggplot_km)

# 2. Nelson-Aalen Estimator
nelson_aalen_fit <- survfit(surv_obj ~ 1, type = "fh") # Fleming-Harrington estimator (Nelson-Aalen approximation)
plot(nelson_aalen_fit, fun = "cumhaz", xlab = "Time", ylab = "Cumulative Hazard", main = "Nelson-Aalen Estimator")

# 3. Log-Rank Test
km_fit_sex <- survfit(surv_obj ~ lung$sex)
log_rank_test <- survdiff(surv_obj ~ lung$sex)
print(log_rank_test)
## Call:
## survdiff(formula = surv_obj ~ lung$sex)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## lung$sex=1 103 82 68.7 2.57 6.05
## lung$sex=2 64 38 51.3 3.44 6.05
##
## Chisq= 6 on 1 degrees of freedom, p= 0.01
ggplot_lr <- ggsurvplot(km_fit_sex,
data = lung,
conf.int = TRUE,
pval = TRUE,
risk.table = TRUE,
title = "Kaplan-Meier Survival by Sex")
print(ggplot_lr)

# 4. Cox Proportional Hazards Model
cox_model <- coxph(surv_obj ~ age + sex + ph.ecog, data = lung)
summary(cox_model)
## Call:
## coxph(formula = surv_obj ~ age + sex + ph.ecog, data = lung)
##
## n= 167, number of events= 120
##
## coef exp(coef) se(coef) z Pr(>|z|)
## age 0.008034 1.008067 0.011086 0.725 0.468623
## sex -0.502180 0.605210 0.197336 -2.545 0.010934 *
## ph.ecog 0.455257 1.576579 0.136857 3.327 0.000879 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## age 1.0081 0.9920 0.9864 1.030
## sex 0.6052 1.6523 0.4111 0.891
## ph.ecog 1.5766 0.6343 1.2057 2.062
##
## Concordance= 0.637 (se = 0.031 )
## Likelihood ratio test= 20.01 on 3 df, p=2e-04
## Wald test = 19.69 on 3 df, p=2e-04
## Score (logrank) test = 20.02 on 3 df, p=2e-04
# Interpretation:
# - The coefficients represent log-hazard ratios
# - The p-values indicate the significance of each covariate
# - A hazard ratio >1 means an increased risk, <1 means a decreased risk
# Plot Cox model adjusted survival curves
ggcoxzph <- ggcoxzph(cox.zph(cox_model))
print(ggcoxzph)
