library(readr)
library(readxl)
library(survival)
library(survminer)
## Loading required package: ggplot2
## Loading required package: ggpubr
##
## Attaching package: 'survminer'
## The following object is masked from 'package:survival':
##
## myeloma
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ stringr 1.5.1
## ✔ forcats 1.0.0 ✔ tibble 3.2.1
## ✔ lubridate 1.9.4 ✔ tidyr 1.3.1
## ✔ purrr 1.0.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(purrr)
# Load the lung cancer dataset
Ldata <- na.omit(lung) # Remove rows with NA values
# Convert 'status' to a binary event indicator (1 = event occurred, 0 = censored)
Ldata$status <- ifelse(Ldata$status == 2, 1, 0)
# Convert categorical variables to factors
Ldata$sex <- as.factor(Ldata$sex) # Comparing survival by sex
# Kaplan-Meier Estimator
km_fit <- survfit(Surv(time, status) ~ sex, data = Ldata)
ggsurvplot(km_fit, data = Ldata, conf.int = TRUE, pval = TRUE, risk.table = TRUE,
ggtheme = theme_minimal(), legend.title = "Sex")
print(summary(km_fit)) # Display survival estimates
## Call: survfit(formula = Surv(time, status) ~ sex, data = Ldata)
##
## sex=1
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 11 103 1 0.9903 0.00966 0.9715 1.000
## 12 102 1 0.9806 0.01360 0.9543 1.000
## 13 101 1 0.9709 0.01657 0.9389 1.000
## 15 100 1 0.9612 0.01904 0.9246 0.999
## 26 99 1 0.9515 0.02118 0.9108 0.994
## 30 98 1 0.9417 0.02308 0.8976 0.988
## 31 97 1 0.9320 0.02480 0.8847 0.982
## 53 96 2 0.9126 0.02782 0.8597 0.969
## 54 94 1 0.9029 0.02917 0.8475 0.962
## 59 93 1 0.8932 0.03043 0.8355 0.955
## 60 92 1 0.8835 0.03161 0.8237 0.948
## 65 91 1 0.8738 0.03272 0.8119 0.940
## 88 90 1 0.8641 0.03377 0.8004 0.933
## 92 89 1 0.8544 0.03476 0.7889 0.925
## 93 88 1 0.8447 0.03569 0.7775 0.918
## 95 87 1 0.8350 0.03658 0.7663 0.910
## 110 86 1 0.8252 0.03742 0.7551 0.902
## 118 85 1 0.8155 0.03822 0.7440 0.894
## 135 84 1 0.8058 0.03898 0.7329 0.886
## 142 83 1 0.7961 0.03970 0.7220 0.878
## 147 82 1 0.7864 0.04038 0.7111 0.870
## 156 81 2 0.7670 0.04165 0.6895 0.853
## 163 79 3 0.7379 0.04333 0.6576 0.828
## 166 76 1 0.7282 0.04384 0.6471 0.819
## 170 75 1 0.7184 0.04432 0.6366 0.811
## 176 73 1 0.7086 0.04479 0.6260 0.802
## 179 72 1 0.6988 0.04523 0.6155 0.793
## 180 71 1 0.6889 0.04566 0.6050 0.784
## 181 70 2 0.6692 0.04642 0.5842 0.767
## 183 68 1 0.6594 0.04677 0.5738 0.758
## 197 64 1 0.6491 0.04716 0.5629 0.748
## 207 62 1 0.6386 0.04755 0.5519 0.739
## 210 61 1 0.6282 0.04791 0.5409 0.729
## 212 60 1 0.6177 0.04824 0.5300 0.720
## 218 59 1 0.6072 0.04855 0.5191 0.710
## 222 57 1 0.5966 0.04885 0.5081 0.700
## 223 55 1 0.5857 0.04915 0.4969 0.690
## 229 52 1 0.5745 0.04948 0.4852 0.680
## 230 51 1 0.5632 0.04977 0.4736 0.670
## 246 50 1 0.5519 0.05004 0.4621 0.659
## 267 48 1 0.5404 0.05030 0.4503 0.649
## 269 47 1 0.5289 0.05053 0.4386 0.638
## 270 46 1 0.5174 0.05072 0.4270 0.627
## 283 45 1 0.5059 0.05088 0.4154 0.616
## 284 44 1 0.4944 0.05101 0.4039 0.605
## 285 42 1 0.4827 0.05113 0.3922 0.594
## 286 41 1 0.4709 0.05122 0.3805 0.583
## 288 40 1 0.4591 0.05128 0.3689 0.571
## 291 39 1 0.4473 0.05129 0.3573 0.560
## 301 36 1 0.4349 0.05135 0.3451 0.548
## 303 34 1 0.4221 0.05141 0.3325 0.536
## 320 32 1 0.4089 0.05147 0.3195 0.523
## 337 31 1 0.3957 0.05147 0.3067 0.511
## 353 30 2 0.3694 0.05131 0.2813 0.485
## 363 28 1 0.3562 0.05114 0.2688 0.472
## 371 27 1 0.3430 0.05092 0.2564 0.459
## 390 26 1 0.3298 0.05064 0.2441 0.446
## 428 23 1 0.3154 0.05043 0.2306 0.432
## 429 22 1 0.3011 0.05014 0.2173 0.417
## 455 21 1 0.2868 0.04976 0.2041 0.403
## 457 20 1 0.2724 0.04929 0.1911 0.388
## 460 18 1 0.2573 0.04882 0.1774 0.373
## 477 17 1 0.2422 0.04824 0.1639 0.358
## 519 16 1 0.2270 0.04754 0.1506 0.342
## 524 15 1 0.2119 0.04672 0.1375 0.326
## 558 14 1 0.1968 0.04577 0.1247 0.310
## 567 13 1 0.1816 0.04468 0.1121 0.294
## 574 12 1 0.1665 0.04344 0.0998 0.278
## 583 11 1 0.1514 0.04205 0.0878 0.261
## 613 10 1 0.1362 0.04048 0.0761 0.244
## 643 9 1 0.1211 0.03870 0.0647 0.227
## 655 8 1 0.1059 0.03671 0.0537 0.209
## 689 7 1 0.0908 0.03444 0.0432 0.191
## 707 6 1 0.0757 0.03185 0.0332 0.173
## 791 5 1 0.0605 0.02886 0.0238 0.154
## 814 3 1 0.0404 0.02533 0.0118 0.138
##
## sex=2
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 5 64 1 0.984 0.0155 0.9545 1.000
## 60 63 1 0.969 0.0217 0.9270 1.000
## 61 62 1 0.953 0.0264 0.9027 1.000
## 62 61 1 0.938 0.0303 0.8800 0.999
## 79 60 1 0.922 0.0335 0.8584 0.990
## 81 59 1 0.906 0.0364 0.8376 0.981
## 95 58 1 0.891 0.0390 0.8174 0.970
## 107 56 1 0.875 0.0414 0.7972 0.960
## 145 55 1 0.859 0.0436 0.7774 0.949
## 153 54 1 0.843 0.0456 0.7581 0.937
## 167 53 1 0.827 0.0475 0.7390 0.925
## 199 50 1 0.810 0.0493 0.7194 0.913
## 201 49 1 0.794 0.0510 0.7000 0.900
## 226 45 1 0.776 0.0528 0.6794 0.887
## 239 43 1 0.758 0.0546 0.6584 0.873
## 245 40 1 0.739 0.0564 0.6366 0.859
## 268 37 1 0.719 0.0583 0.6136 0.843
## 285 34 1 0.698 0.0603 0.5894 0.827
## 293 32 1 0.676 0.0623 0.5647 0.810
## 305 30 1 0.654 0.0641 0.5394 0.792
## 310 29 1 0.631 0.0658 0.5146 0.774
## 345 27 1 0.608 0.0674 0.4892 0.755
## 348 26 1 0.584 0.0687 0.4642 0.736
## 351 25 1 0.561 0.0698 0.4397 0.716
## 361 24 1 0.538 0.0707 0.4155 0.696
## 363 23 1 0.514 0.0714 0.3918 0.675
## 426 19 1 0.487 0.0726 0.3639 0.653
## 433 18 1 0.460 0.0734 0.3366 0.629
## 444 17 1 0.433 0.0739 0.3100 0.605
## 450 16 1 0.406 0.0741 0.2839 0.581
## 473 15 1 0.379 0.0739 0.2585 0.556
## 520 13 1 0.350 0.0738 0.2314 0.529
## 550 11 1 0.318 0.0736 0.2020 0.501
## 641 8 1 0.278 0.0744 0.1648 0.470
## 687 7 1 0.239 0.0736 0.1303 0.437
## 705 6 1 0.199 0.0713 0.0984 0.401
## 731 5 1 0.159 0.0672 0.0695 0.364
## 765 3 1 0.106 0.0623 0.0335 0.335
The Kaplan-Meier plot shows survival probability over time for two different sex groups (sex = 1 in red, sex = 2 in blue).The blue group (sex = 2) appears to have better survival rates compared to the red group (sex = 1), as its curve remains higher for a longer time.
The p-value is 0.014, which is below 0.05, indicating a statistically significant difference in survival between the two groups.This suggests that sex may play a role in survival outcomes in this dataset.
The risk table below the graph shows the number of subjects still at risk at different time points. The number of individuals at risk decreases over time, which is expected.
# Nelson-Aalen Estimator
na_fit <- survfit(Surv(time, status) ~ sex, data = Ldata, type = "fh")
plot(na_fit, fun = "cumhaz", col = c("blue", "red"), lty = 1:2,
ylab = "Cumulative Hazard", xlab = "Time", main = "Nelson-Aalen Estimate by Sex")
legend("topright", legend = levels(Ldata$sex), col = c("blue", "red"), lty = 1:2)
The Nelson-Aalen provides an estimate of cummulative hazard over time
and it has reinforced what was shown in the KM results. That sex 1(blue)
has a lower survival probability then sex 2 (red)
The blue curve (sex 1) shows a higher cumulative hazard compared to the red curve (sex 2).Where a steeper slope indicates a higher hazard rate, meaning individuals in that group experience events at a faster rate.
You can also embed plots, for example:
# Log-Rank Test
logrank_test <- survdiff(Surv(time, status) ~ sex, data = Ldata)
print(logrank_test) # Display log-rank test results
## Call:
## survdiff(formula = Surv(time, status) ~ sex, data = Ldata)
##
## N Observed Expected (O-E)^2/E (O-E)^2/V
## sex=1 103 82 68.7 2.57 6.05
## sex=2 64 38 51.3 3.44 6.05
##
## Chisq= 6 on 1 degrees of freedom, p= 0.01
The log-rank test assesses whether there is a significant difference in survival distributions between groups:
Chi-square = 6 with p-value = 0.01 suggests that the survival experience differs significantly between the two sexes. Since p < 0.05, we reject the null hypothesis, meaning that the difference in survival between males and females is statistically significant. This aligns with the Nelson-Aalen plot, where the cumulative hazard differs between the two groups.
# Cox Proportional Hazards Model
cox_fit <- coxph(Surv(time, status) ~ sex, data = Ldata)
summary_cox <- summary(cox_fit)
print(summary_cox) # Display model summary
## Call:
## coxph(formula = Surv(time, status) ~ sex, data = Ldata)
##
## n= 167, number of events= 120
##
## coef exp(coef) se(coef) z Pr(>|z|)
## sex2 -0.4792 0.6193 0.1966 -2.437 0.0148 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## exp(coef) exp(-coef) lower .95 upper .95
## sex2 0.6193 1.615 0.4212 0.9104
##
## Concordance= 0.567 (se = 0.025 )
## Likelihood ratio test= 6.25 on 1 df, p=0.01
## Wald test = 5.94 on 1 df, p=0.01
## Score (logrank) test = 6.05 on 1 df, p=0.01
The Cox model estimates the hazard ratio for sex. A hazard ratio greater than 1 means increased risk, while less than 1 suggests a protective effect.
The coefficient (coef) for sex2 is -0.4792, meaning females (sex2) have a lower hazard compared to males. The hazard ratio (exp(coef)) = 0.6193, meaning females have about 38% lower risk compared to males. The p-value (0.0148) indicates statistical significance, confirming that sex has a significant effect on survival.