title: “SURVIVAL ANALYSIS” author: “Teddy Wambua 21/05600” date: “2025-03-01” output: html_document
library(survival)
library(survminer)
## Loading required package: ggplot2
## Loading required package: ggpubr
##
## Attaching package: 'survminer'
## The following object is masked from 'package:survival':
##
## myeloma
library(ggplot2)
# Load lung dataset
data(lung)
## Warning in data(lung): data set 'lung' not found
head(lung)
## inst time status age sex ph.ecog ph.karno pat.karno meal.cal wt.loss
## 1 3 306 2 74 1 1 90 100 1175 NA
## 2 3 455 2 68 1 0 90 90 1225 15
## 3 3 1010 1 56 1 0 90 90 NA 15
## 4 5 210 2 57 1 1 90 60 1150 11
## 5 1 883 2 60 1 0 100 90 NA 0
## 6 12 1022 1 74 1 1 50 80 513 0
lung <- na.omit(lung) # Remove missing values
# Convert survival time and event status
lung$SurvObj <- with(lung, Surv(time, status == 2))
# Check structure
str(lung)
## 'data.frame': 167 obs. of 11 variables:
## $ inst : num 3 5 12 7 11 1 7 6 12 22 ...
## $ time : num 455 210 1022 310 361 ...
## $ status : num 2 2 1 2 2 2 2 2 2 2 ...
## $ age : num 68 57 74 68 71 53 61 57 57 70 ...
## $ sex : num 1 1 1 2 2 1 1 1 1 1 ...
## $ ph.ecog : num 0 1 1 2 2 1 2 1 1 1 ...
## $ ph.karno : num 90 90 50 70 60 70 70 80 80 90 ...
## $ pat.karno: num 90 60 80 60 80 80 70 80 70 100 ...
## $ meal.cal : num 1225 1150 513 384 538 ...
## $ wt.loss : num 15 11 0 10 1 16 34 27 60 -5 ...
## $ SurvObj : 'Surv' num [1:167, 1:2] 455 210 1022+ 310 361 218 166 170 567 613 ...
## ..- attr(*, "dimnames")=List of 2
## .. ..$ : NULL
## .. ..$ : chr [1:2] "time" "status"
## ..- attr(*, "type")= chr "right"
## - attr(*, "na.action")= 'omit' Named int [1:61] 1 3 5 12 13 14 16 20 23 25 ...
## ..- attr(*, "names")= chr [1:61] "1" "3" "5" "12" ...
# Fit Kaplan-Meier survival curve
km_fit <- survfit(Surv(time, status == 2) ~ 1, data = lung)
# Plot Kaplan-Meier curve
ggsurvplot(km_fit, data = lung, conf.int = TRUE,
xlab = "Time in days", ylab = "Survival Probability",
ggtheme = theme_minimal(),
risk.table = TRUE) +
labs(title = "Kaplan-Meier Survival Curve")
# Fit Cox model
cox_model <- coxph(Surv(time, status == 2) ~ 1, data = lung)
# Compute cumulative hazard
na_fit <- basehaz(cox_model, centered = FALSE)
# Plot Nelson-Aalen cumulative hazard function
ggplot(na_fit, aes(x = time, y = hazard)) +
geom_line(color = "blue") +
labs(title = "Nelson-Aalen Cumulative Hazard Function",
x = "Time in days",
y = "Cumulative Hazard") +
theme_minimal()
# Compare survival between male and female patients
surv_diff <- survdiff(Surv(time, status == 2) ~ sex, data = lung)
print(surv_diff)
## Call:
## survdiff(formula = Surv(time, status == 2) ~ sex, data = lung)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## sex=1 103 82 68.7 2.57 6.05
## sex=2 64 38 51.3 3.44 6.05
##
## Chisq= 6 on 1 degrees of freedom, p= 0.01
# Fit Cox model with age, sex, and ECOG score
cox_model <- coxph(Surv(time, status == 2) ~ age + sex + ph.ecog, data = lung)
# Display Cox model summary
summary(cox_model)
## Call:
## coxph(formula = Surv(time, status == 2) ~ age + sex + ph.ecog,
## data = lung)
##
## n= 167, number of events= 120
##
## coef exp(coef) se(coef) z Pr(>|z|)
## age 0.008034 1.008067 0.011086 0.725 0.468623
## sex -0.502180 0.605210 0.197336 -2.545 0.010934 *
## ph.ecog 0.455257 1.576579 0.136857 3.327 0.000879 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## age 1.0081 0.9920 0.9864 1.030
## sex 0.6052 1.6523 0.4111 0.891
## ph.ecog 1.5766 0.6343 1.2057 2.062
##
## Concordance= 0.637 (se = 0.031 )
## Likelihood ratio test= 20.01 on 3 df, p=2e-04
## Wald test = 19.69 on 3 df, p=2e-04
## Score (logrank) test = 20.02 on 3 df, p=2e-04