library(readr)
library(readxl)
library(survival)
library(survminer)
## Loading required package: ggplot2
## Loading required package: ggpubr
## 
## Attaching package: 'survminer'
## The following object is masked from 'package:survival':
## 
##     myeloma
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ stringr   1.5.1
## ✔ forcats   1.0.0     ✔ tibble    3.2.1
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.0.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(purrr)
# Load the lung cancer dataset
Ldata <- na.omit(lung)  # Remove rows with NA values

# Convert 'status' to a binary event indicator (1 = event occurred, 0 = censored)
Ldata$status <- ifelse(Ldata$status == 2, 1, 0)

# Convert categorical variables to factors
Ldata$sex <- as.factor(Ldata$sex)  # Comparing survival by sex
 # Kaplan-Meier Estimator
km_fit <- survfit(Surv(time, status) ~ sex, data = Ldata)
ggsurvplot(km_fit, data = Ldata, conf.int = TRUE, pval = TRUE, risk.table = TRUE, 
           ggtheme = theme_minimal(), legend.title = "Sex")

print(summary(km_fit))  # Display survival estimates
## Call: survfit(formula = Surv(time, status) ~ sex, data = Ldata)
## 
##                 sex=1 
##  time n.risk n.event survival std.err lower 95% CI upper 95% CI
##    11    103       1   0.9903 0.00966       0.9715        1.000
##    12    102       1   0.9806 0.01360       0.9543        1.000
##    13    101       1   0.9709 0.01657       0.9389        1.000
##    15    100       1   0.9612 0.01904       0.9246        0.999
##    26     99       1   0.9515 0.02118       0.9108        0.994
##    30     98       1   0.9417 0.02308       0.8976        0.988
##    31     97       1   0.9320 0.02480       0.8847        0.982
##    53     96       2   0.9126 0.02782       0.8597        0.969
##    54     94       1   0.9029 0.02917       0.8475        0.962
##    59     93       1   0.8932 0.03043       0.8355        0.955
##    60     92       1   0.8835 0.03161       0.8237        0.948
##    65     91       1   0.8738 0.03272       0.8119        0.940
##    88     90       1   0.8641 0.03377       0.8004        0.933
##    92     89       1   0.8544 0.03476       0.7889        0.925
##    93     88       1   0.8447 0.03569       0.7775        0.918
##    95     87       1   0.8350 0.03658       0.7663        0.910
##   110     86       1   0.8252 0.03742       0.7551        0.902
##   118     85       1   0.8155 0.03822       0.7440        0.894
##   135     84       1   0.8058 0.03898       0.7329        0.886
##   142     83       1   0.7961 0.03970       0.7220        0.878
##   147     82       1   0.7864 0.04038       0.7111        0.870
##   156     81       2   0.7670 0.04165       0.6895        0.853
##   163     79       3   0.7379 0.04333       0.6576        0.828
##   166     76       1   0.7282 0.04384       0.6471        0.819
##   170     75       1   0.7184 0.04432       0.6366        0.811
##   176     73       1   0.7086 0.04479       0.6260        0.802
##   179     72       1   0.6988 0.04523       0.6155        0.793
##   180     71       1   0.6889 0.04566       0.6050        0.784
##   181     70       2   0.6692 0.04642       0.5842        0.767
##   183     68       1   0.6594 0.04677       0.5738        0.758
##   197     64       1   0.6491 0.04716       0.5629        0.748
##   207     62       1   0.6386 0.04755       0.5519        0.739
##   210     61       1   0.6282 0.04791       0.5409        0.729
##   212     60       1   0.6177 0.04824       0.5300        0.720
##   218     59       1   0.6072 0.04855       0.5191        0.710
##   222     57       1   0.5966 0.04885       0.5081        0.700
##   223     55       1   0.5857 0.04915       0.4969        0.690
##   229     52       1   0.5745 0.04948       0.4852        0.680
##   230     51       1   0.5632 0.04977       0.4736        0.670
##   246     50       1   0.5519 0.05004       0.4621        0.659
##   267     48       1   0.5404 0.05030       0.4503        0.649
##   269     47       1   0.5289 0.05053       0.4386        0.638
##   270     46       1   0.5174 0.05072       0.4270        0.627
##   283     45       1   0.5059 0.05088       0.4154        0.616
##   284     44       1   0.4944 0.05101       0.4039        0.605
##   285     42       1   0.4827 0.05113       0.3922        0.594
##   286     41       1   0.4709 0.05122       0.3805        0.583
##   288     40       1   0.4591 0.05128       0.3689        0.571
##   291     39       1   0.4473 0.05129       0.3573        0.560
##   301     36       1   0.4349 0.05135       0.3451        0.548
##   303     34       1   0.4221 0.05141       0.3325        0.536
##   320     32       1   0.4089 0.05147       0.3195        0.523
##   337     31       1   0.3957 0.05147       0.3067        0.511
##   353     30       2   0.3694 0.05131       0.2813        0.485
##   363     28       1   0.3562 0.05114       0.2688        0.472
##   371     27       1   0.3430 0.05092       0.2564        0.459
##   390     26       1   0.3298 0.05064       0.2441        0.446
##   428     23       1   0.3154 0.05043       0.2306        0.432
##   429     22       1   0.3011 0.05014       0.2173        0.417
##   455     21       1   0.2868 0.04976       0.2041        0.403
##   457     20       1   0.2724 0.04929       0.1911        0.388
##   460     18       1   0.2573 0.04882       0.1774        0.373
##   477     17       1   0.2422 0.04824       0.1639        0.358
##   519     16       1   0.2270 0.04754       0.1506        0.342
##   524     15       1   0.2119 0.04672       0.1375        0.326
##   558     14       1   0.1968 0.04577       0.1247        0.310
##   567     13       1   0.1816 0.04468       0.1121        0.294
##   574     12       1   0.1665 0.04344       0.0998        0.278
##   583     11       1   0.1514 0.04205       0.0878        0.261
##   613     10       1   0.1362 0.04048       0.0761        0.244
##   643      9       1   0.1211 0.03870       0.0647        0.227
##   655      8       1   0.1059 0.03671       0.0537        0.209
##   689      7       1   0.0908 0.03444       0.0432        0.191
##   707      6       1   0.0757 0.03185       0.0332        0.173
##   791      5       1   0.0605 0.02886       0.0238        0.154
##   814      3       1   0.0404 0.02533       0.0118        0.138
## 
##                 sex=2 
##  time n.risk n.event survival std.err lower 95% CI upper 95% CI
##     5     64       1    0.984  0.0155       0.9545        1.000
##    60     63       1    0.969  0.0217       0.9270        1.000
##    61     62       1    0.953  0.0264       0.9027        1.000
##    62     61       1    0.938  0.0303       0.8800        0.999
##    79     60       1    0.922  0.0335       0.8584        0.990
##    81     59       1    0.906  0.0364       0.8376        0.981
##    95     58       1    0.891  0.0390       0.8174        0.970
##   107     56       1    0.875  0.0414       0.7972        0.960
##   145     55       1    0.859  0.0436       0.7774        0.949
##   153     54       1    0.843  0.0456       0.7581        0.937
##   167     53       1    0.827  0.0475       0.7390        0.925
##   199     50       1    0.810  0.0493       0.7194        0.913
##   201     49       1    0.794  0.0510       0.7000        0.900
##   226     45       1    0.776  0.0528       0.6794        0.887
##   239     43       1    0.758  0.0546       0.6584        0.873
##   245     40       1    0.739  0.0564       0.6366        0.859
##   268     37       1    0.719  0.0583       0.6136        0.843
##   285     34       1    0.698  0.0603       0.5894        0.827
##   293     32       1    0.676  0.0623       0.5647        0.810
##   305     30       1    0.654  0.0641       0.5394        0.792
##   310     29       1    0.631  0.0658       0.5146        0.774
##   345     27       1    0.608  0.0674       0.4892        0.755
##   348     26       1    0.584  0.0687       0.4642        0.736
##   351     25       1    0.561  0.0698       0.4397        0.716
##   361     24       1    0.538  0.0707       0.4155        0.696
##   363     23       1    0.514  0.0714       0.3918        0.675
##   426     19       1    0.487  0.0726       0.3639        0.653
##   433     18       1    0.460  0.0734       0.3366        0.629
##   444     17       1    0.433  0.0739       0.3100        0.605
##   450     16       1    0.406  0.0741       0.2839        0.581
##   473     15       1    0.379  0.0739       0.2585        0.556
##   520     13       1    0.350  0.0738       0.2314        0.529
##   550     11       1    0.318  0.0736       0.2020        0.501
##   641      8       1    0.278  0.0744       0.1648        0.470
##   687      7       1    0.239  0.0736       0.1303        0.437
##   705      6       1    0.199  0.0713       0.0984        0.401
##   731      5       1    0.159  0.0672       0.0695        0.364
##   765      3       1    0.106  0.0623       0.0335        0.335

The Kaplan-Meier plot shows survival probability over time for two different sex groups (sex = 1 in red, sex = 2 in blue).The blue group (sex = 2) appears to have better survival rates compared to the red group (sex = 1), as its curve remains higher for a longer time.

The p-value is 0.014, which is below 0.05, indicating a statistically significant difference in survival between the two groups.This suggests that sex may play a role in survival outcomes in this dataset.

The risk table below the graph shows the number of subjects still at risk at different time points. The number of individuals at risk decreases over time, which is expected.

# Nelson-Aalen Estimator
na_fit <- survfit(Surv(time, status) ~ sex, data = Ldata, type = "fh")
plot(na_fit, fun = "cumhaz", col = c("blue", "red"), lty = 1:2, 
     ylab = "Cumulative Hazard", xlab = "Time", main = "Nelson-Aalen Estimate by Sex")
legend("topright", legend = levels(Ldata$sex), col = c("blue", "red"), lty = 1:2)

The Nelson-Aalen provides an estimate of cummulative hazard over time and it has reinforced what was shown in the KM results. That sex 1(blue) has a lower survival probability then sex 2 (red)

The blue curve (sex 1) shows a higher cumulative hazard compared to the red curve (sex 2).Where a steeper slope indicates a higher hazard rate, meaning individuals in that group experience events at a faster rate.

Including Plots

You can also embed plots, for example:

# Log-Rank Test
logrank_test <- survdiff(Surv(time, status) ~ sex, data = Ldata)
print(logrank_test)  # Display log-rank test results
## Call:
## survdiff(formula = Surv(time, status) ~ sex, data = Ldata)
## 
##         N Observed Expected (O-E)^2/E (O-E)^2/V
## sex=1 103       82     68.7      2.57      6.05
## sex=2  64       38     51.3      3.44      6.05
## 
##  Chisq= 6  on 1 degrees of freedom, p= 0.01

The log-rank test assesses whether there is a significant difference in survival distributions between groups:

Chi-square = 6 with p-value = 0.01 suggests that the survival experience differs significantly between the two sexes. Since p < 0.05, we reject the null hypothesis, meaning that the difference in survival between males and females is statistically significant. This aligns with the Nelson-Aalen plot, where the cumulative hazard differs between the two groups.

# Cox Proportional Hazards Model
cox_fit <- coxph(Surv(time, status) ~ sex, data = Ldata)
summary_cox <- summary(cox_fit)
print(summary_cox)  # Display model summary
## Call:
## coxph(formula = Surv(time, status) ~ sex, data = Ldata)
## 
##   n= 167, number of events= 120 
## 
##         coef exp(coef) se(coef)      z Pr(>|z|)  
## sex2 -0.4792    0.6193   0.1966 -2.437   0.0148 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
##      exp(coef) exp(-coef) lower .95 upper .95
## sex2    0.6193      1.615    0.4212    0.9104
## 
## Concordance= 0.567  (se = 0.025 )
## Likelihood ratio test= 6.25  on 1 df,   p=0.01
## Wald test            = 5.94  on 1 df,   p=0.01
## Score (logrank) test = 6.05  on 1 df,   p=0.01

The Cox model estimates the hazard ratio for sex. A hazard ratio greater than 1 means increased risk, while less than 1 suggests a protective effect.

The coefficient (coef) for sex2 is -0.4792, meaning females (sex2) have a lower hazard compared to males. The hazard ratio (exp(coef)) = 0.6193, meaning females have about 38% lower risk compared to males. The p-value (0.0148) indicates statistical significance, confirming that sex has a significant effect on survival.