#loading necessary packages
library(survival)
library(survminer)
## Loading required package: ggplot2
## Loading required package: ggpubr
##
## Attaching package: 'survminer'
## The following object is masked from 'package:survival':
##
## myeloma
library(survival) library(survminer)
#loading kidney dataset
data<-kidney
surv_object <- Surv(time = kidney$time, event = kidney$status)
#Viewing the first few rows in the data
head(kidney)
## id time status age sex disease frail
## 1 1 8 1 28 1 Other 2.3
## 2 1 16 1 28 1 Other 2.3
## 3 2 23 1 48 2 GN 1.9
## 4 2 13 0 48 2 GN 1.9
## 5 3 22 1 32 1 Other 1.2
## 6 3 28 1 32 1 Other 1.2
#summary statistics
summary(surv_object)
## time status
## Min. : 2.0 Min. :0.0000
## 1st Qu.: 16.0 1st Qu.:1.0000
## Median : 39.5 Median :1.0000
## Mean :101.6 Mean :0.7632
## 3rd Qu.:149.8 3rd Qu.:1.0000
## Max. :562.0 Max. :1.0000
#interpretation The dataset suggests that most patients in the study experienced kidney infection recurrence relatively quickly, with 50% experiencing recurrence within 39.5 days. However, there is considerable variation, with some patients surviving without recurrence for much longer. The relatively high event rate (76.32%) suggests that recurrence is common in this population #strucure of the data
str(surv_object)
## 'Surv' num [1:76, 1:2] 8 16 23 13+ 22 28 447 318 30 12 ...
## - attr(*, "dimnames")=List of 2
## ..$ : NULL
## ..$ : chr [1:2] "time" "status"
## - attr(*, "type")= chr "right"
#interpretation The right-censoring attribute indicates that some patients did not experience the event during the study period, meaning their survival time is only recorded up to the last available follow-up.
#The kaplan-meir estimator
km_fit <- survfit(surv_object ~ 1)
plot(km_fit, main="Kaplan-Meier Survival Curve", xlab="Time", ylab="Survival Probability", col="blue", lwd=2)
grid()
#explanation It illustrates the probability of patients remaining
recurrence-free over time. The solid blue line represents the estimated
survival function, while the dashed lines indicate the 95% confidence
interval. The curve starts at 1.0 (100% survival) and gradually
declines, showing that the risk of kidney infection recurrence increases
over time. The steep drops suggest periods where multiple patients
experienced recurrence, while the flatter sections indicate times with
fewer or no events. The long tail suggests that some patients remained
recurrence-free for an extended period.
#The nelson-aalen estimator
na_fit <- survfit(surv_object~ 1, type="fleming-harrington")
#plotting the Cumulative hazard curve(nelson aalen)
H_t <- -log(na_fit$surv)
plot(na_fit$time, H_t, type="s", col="red", lwd=2, main="Nelson-Aalen Cumulative Hazard Function",
xlab="Time", ylab="Cumulative Hazard")
grid()
#Explanation It shows how the risk of kidney infection recurrence
accumulates over time. The y-axis (Cumulative Hazard) represents the
total hazard experienced by patients up to a given time, while the
x-axis (Time) indicates the study duration. The increasing red stepwise
curve suggests that the hazard rises progressively, meaning the risk of
recurrence accumulates as time passes. The steeper sections indicate
periods where multiple recurrences occurred, whereas flatter segments
show times with fewer events. The final sharp increase suggests a higher
risk of recurrence in later stages for remaining patients. This function
helps understand how recurrence risk evolves over time in the study
population. #Create a categorical variable for groups (e.g., sex)
kidney$sex <- factor(kidney$sex, labels = c("Female", "Male"))
log_rank_test <- survdiff(surv_object ~ sex, data = kidney)
print(log_rank_test)
## Call:
## survdiff(formula = surv_object ~ sex, data = kidney)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## sex=Female 20 18 10.2 5.99 8.31
## sex=Male 56 40 47.8 1.28 8.31
##
## Chisq= 8.3 on 1 degrees of freedom, p= 0.004
#Interpretation The log-rank test for the kidney dataset compares survival between male and female patients. Females (N = 20) had 18 observed recurrences, significantly higher than the expected 10.2, while males (N = 56) had 40 recurrences, lower than the expected 47.8. The Chi-square statistic (8.3, p = 0.004) indicates a statistically significant difference in survival, suggesting that females experience a higher risk of kidney infection recurrence than males. This finding highlights sex as a key factor influencing recurrence rates in the study population.
km_fit_sex <- survfit(surv_object ~ sex, data = kidney)
ggsurvplot(km_fit_sex, data = kidney,
title = "Kaplan-Meier Survival Curves by Sex for Kidney Patients",
xlab = "Time (days)",
ylab = "Survival Probability",
risk.table = TRUE)
#Interpretation Females experience a steeper decline in survival probability early on, indicating a higher recurrence rate of kidney infections compared to males. Males have a more gradual decline, suggesting better survival outcomes over time. By around 200 days, most females have experienced the event (recurrence), whereas a significant proportion of males remain event-free for a longer period. This supports the earlier log-rank test finding that females have a significantly higher risk of recurrence compared to males
cox_fit <- coxph(Surv(time, status) ~ age + sex, data = kidney)
summary(cox_fit)
## Call:
## coxph(formula = Surv(time, status) ~ age + sex, data = kidney)
##
## n= 76, number of events= 58
##
## coef exp(coef) se(coef) z Pr(>|z|)
## age 0.002032 1.002034 0.009246 0.220 0.82607
## sexMale -0.829314 0.436349 0.298955 -2.774 0.00554 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## age 1.0020 0.998 0.9840 1.020
## sexMale 0.4363 2.292 0.2429 0.784
##
## Concordance= 0.662 (se = 0.045 )
## Likelihood ratio test= 7.12 on 2 df, p=0.03
## Wald test = 8.02 on 2 df, p=0.02
## Score (logrank) test = 8.45 on 2 df, p=0.01
#intepreting ouput Cox proportional hazards model examines the effect of age and sex on survival time. The age coefficient (HR = 1.002, p = 0.826) suggests that age has no significant impact on survival. However, sex is statistically significant (HR = 0.436, p = 0.005), indicating that males have a 56.4% lower hazard of recurrence compared to females. The concordance (C = 0.662) shows a moderate predictive ability of the model. The likelihood ratio, Wald, and log-rank tests confirm that the model is statistically significant (p < 0.05), suggesting that sex plays a crucial role in kidney disease recurrence, with males having better survival outcomes.