Analisis Daya Tahan Pasien Non-Small Cell Lung Cancer dengan Metode Kaplan Meier

Analisis Daya Tahan Nonparametrik: Kaplan-Meier

Dataset : NSCLC Radiogenomics (TCIA) Sumber : https://www.cancerimagingarchive.net/collection/nsclc-radiogenomics/

# Memanggil package utama
library(readxl)
## Warning: package 'readxl' was built under R version 4.4.3
library(survival)
## Warning: package 'survival' was built under R version 4.4.3
library(survminer)
## Warning: package 'survminer' was built under R version 4.4.3
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.4.3
## Loading required package: ggpubr
## Warning: package 'ggpubr' was built under R version 4.4.3
## 
## Attaching package: 'survminer'
## The following object is masked from 'package:survival':
## 
##     myeloma
library(dplyr)
## Warning: package 'dplyr' was built under R version 4.4.3
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

1. Memuat dan Menyiapkan Data

data_raw <- read_excel("C:\\Users\\FAQIH\\Downloads\\Data adt.xlsx",
                       sheet = "NSCLCR01Radiogenomic_RawData")

Eksplorasi singkat

glimpse(data_raw[, c("Case ID", "Gender", "Survival Status",
                     "Time to Death (days)", "Date of Last Known Alive",
                     "CT Date")])
## Rows: 211
## Columns: 6
## $ `Case ID`                  <chr> "AMC-001", "AMC-002", "AMC-003", "AMC-004",…
## $ Gender                     <chr> "Male", "Female", "Female", "Female", "Male…
## $ `Survival Status`          <chr> "Dead", "Alive", "Alive", "Alive", "Alive",…
## $ `Time to Death (days)`     <chr> "872", "N/A", "N/A", "N/A", "N/A", "N/A", "…
## $ `Date of Last Known Alive` <chr> "1/7/1997", "3/20/1992", "6/19/1996", "12/1…
## $ `CT Date`                  <chr> "8/10/1994", "2/19/1992", "2/23/1995", "12/…

2. Pembersihan Data

# Pilih kolom relevan dan beri nama yang lebih ringkas
data_km <- data_raw %>%
  select(
    case_id         = `Case ID`,
    gender          = `Gender`,
    survival_status = `Survival Status`,
    time_to_death   = `Time to Death (days)`,
    date_last_alive = `Date of Last Known Alive`,
    ct_date         = `CT Date`
  ) %>%
  mutate(
    # Konversi tanggal
    date_last_alive = as.Date(date_last_alive, format = "%m/%d/%Y"),
    ct_date         = as.Date(ct_date, format = "%m/%d/%Y"),
    # Kolom time_to_death berisi "N/A" untuk pasien hidup, perlu dikonversi
    time_to_death   = suppressWarnings(as.numeric(time_to_death))
  ) %>%
  mutate(
    # Hitung waktu survival:
    # Pasien Dead  -> langsung dari Time to Death (days)
    # Pasien Alive -> selisih Date of Last Known Alive dengan CT Date
    survival_time = case_when(
      survival_status == "Dead"  ~ time_to_death,
      survival_status == "Alive" ~ as.numeric(date_last_alive - ct_date),
      TRUE ~ NA_real_
    ),
    # Rekode status: 1 = event (meninggal), 0 = tersensor (masih hidup)
    event = ifelse(survival_status == "Dead", 1, 0)
  ) %>%
  filter(!is.na(survival_time), survival_time >= 0)

head(data_km[, c("case_id", "gender", "survival_status", "survival_time", "event")])
## # A tibble: 6 × 5
##   case_id gender survival_status survival_time event
##   <chr>   <chr>  <chr>                   <dbl> <dbl>
## 1 AMC-001 Male   Dead                      872     1
## 2 AMC-002 Female Alive                      30     0
## 3 AMC-003 Female Alive                     482     0
## 4 AMC-004 Female Alive                    1448     0
## 5 AMC-005 Male   Alive                     901     0
## 6 AMC-006 Female Alive                     739     0

3. Membuat Objek Survival

# Tanda '+' pada output menunjukkan data yang tersensor (kanan)
surv_obj <- Surv(time = data_km$survival_time, event = data_km$event)
head(surv_obj, 15)
##  [1]  872    30+  482+ 1448+  901+  739+ 1160+ 1270+   12   815+ 1548+ 1154+
## [13] 1315+ 1425+ 1254+

4. Fit Model Kaplan-Meier: Keseluruhan

# Fit KM tanpa pengelompokan (semua 211 pasien)
km_overall <- survfit(Surv(survival_time, event) ~ 1, data = data_km)

# Menampilkan ringkasan statistik
print(km_overall)
## Call: survfit(formula = Surv(survival_time, event) ~ 1, data = data_km)
## 
##        n events median 0.95LCL 0.95UCL
## [1,] 211     63     NA    2356      NA
# Menampilkan tabel KM lengkap (t, n.risk, n.event, n.censor, surv)
summary(km_overall)
## Call: survfit(formula = Surv(survival_time, event) ~ 1, data = data_km)
## 
##  time n.risk n.event survival std.err lower 95% CI upper 95% CI
##     6    211       1    0.995 0.00473        0.986        1.000
##    12    210       1    0.991 0.00667        0.978        1.000
##    14    209       1    0.986 0.00815        0.970        1.000
##    22    206       1    0.981 0.00941        0.963        1.000
##    28    204       1    0.976 0.01052        0.956        0.997
##    36    202       1    0.971 0.01153        0.949        0.994
##    41    200       1    0.966 0.01245        0.942        0.991
##    42    199       1    0.962 0.01330        0.936        0.988
##    47    198       1    0.957 0.01409        0.930        0.985
##   159    191       1    0.952 0.01488        0.923        0.981
##   201    190       1    0.947 0.01562        0.917        0.978
##   210    188       1    0.942 0.01633        0.910        0.974
##   225    186       1    0.937 0.01701        0.904        0.971
##   261    183       1    0.932 0.01767        0.898        0.967
##   276    182       1    0.926 0.01830        0.891        0.963
##   280    181       1    0.921 0.01890        0.885        0.959
##   286    180       1    0.916 0.01948        0.879        0.955
##   299    179       1    0.911 0.02003        0.873        0.951
##   328    178       1    0.906 0.02056        0.867        0.947
##   346    177       1    0.901 0.02107        0.860        0.943
##   360    175       1    0.896 0.02157        0.854        0.939
##   366    174       1    0.891 0.02205        0.848        0.935
##   417    172       1    0.885 0.02252        0.842        0.931
##   428    171       1    0.880 0.02298        0.836        0.926
##   430    170       1    0.875 0.02342        0.830        0.922
##   441    169       1    0.870 0.02385        0.824        0.918
##   446    168       1    0.865 0.02426        0.818        0.914
##   462    167       1    0.859 0.02466        0.812        0.909
##   474    166       1    0.854 0.02505        0.807        0.905
##   477    165       1    0.849 0.02543        0.801        0.900
##   514    163       1    0.844 0.02580        0.795        0.896
##   600    160       1    0.839 0.02617        0.789        0.892
##   623    158       1    0.833 0.02654        0.783        0.887
##   664    155       1    0.828 0.02691        0.777        0.882
##   667    153       1    0.823 0.02727        0.771        0.878
##   671    152       1    0.817 0.02762        0.765        0.873
##   776    144       1    0.811 0.02801        0.758        0.868
##   777    143       1    0.806 0.02838        0.752        0.863
##   792    140       1    0.800 0.02876        0.746        0.858
##   857    136       1    0.794 0.02914        0.739        0.853
##   861    135       1    0.788 0.02951        0.732        0.848
##   867    133       1    0.782 0.02988        0.726        0.843
##   872    132       1    0.776 0.03024        0.719        0.838
##   952    126       1    0.770 0.03062        0.713        0.833
##   985    121       1    0.764 0.03102        0.705        0.827
##  1011    118       1    0.757 0.03142        0.698        0.822
##  1048    115       1    0.751 0.03183        0.691        0.816
##  1083    113       1    0.744 0.03224        0.684        0.810
##  1123    111       1    0.737 0.03264        0.676        0.804
##  1133    110       1    0.731 0.03302        0.669        0.798
##  1165    104       1    0.724 0.03344        0.661        0.792
##  1176    101       1    0.717 0.03387        0.653        0.786
##  1258     95       1    0.709 0.03434        0.645        0.780
##  1322     89       1    0.701 0.03487        0.636        0.773
##  1344     88       1    0.693 0.03537        0.627        0.766
##  1352     86       1    0.685 0.03587        0.618        0.759
##  1425     82       1    0.677 0.03639        0.609        0.752
##  1456     78       1    0.668 0.03694        0.599        0.744
##  1491     72       1    0.659 0.03758        0.589        0.737
##  1798     58       1    0.647 0.03861        0.576        0.728
##  1890     49       1    0.634 0.04002        0.560        0.718
##  2041     34       1    0.616 0.04297        0.537        0.706
##  2356     15       1    0.574 0.05639        0.474        0.696

5. Fit Model Kaplan-Meier: Berdasarkan Jenis Kelamin

# Fit KM berdasarkan gender (Male vs Female)
km_gender <- survfit(Surv(survival_time, event) ~ gender, data = data_km)

print(km_gender)
## Call: survfit(formula = Surv(survival_time, event) ~ gender, data = data_km)
## 
##                 n events median 0.95LCL 0.95UCL
## gender=Female  76     10     NA      NA      NA
## gender=Male   135     53   2356    1798      NA
summary(km_gender)
## Call: survfit(formula = Surv(survival_time, event) ~ gender, data = data_km)
## 
##                 gender=Female 
##  time n.risk n.event survival std.err lower 95% CI upper 95% CI
##   276     69       1    0.986  0.0144        0.958        1.000
##   328     68       1    0.971  0.0202        0.932        1.000
##   477     67       1    0.957  0.0246        0.910        1.000
##   777     58       1    0.940  0.0291        0.885        0.999
##   867     53       1    0.922  0.0336        0.859        0.990
##   952     48       1    0.903  0.0380        0.832        0.981
##   985     45       1    0.883  0.0421        0.804        0.969
##  1133     41       1    0.861  0.0462        0.775        0.957
##  1258     32       1    0.835  0.0521        0.739        0.943
##  1322     26       1    0.802  0.0591        0.695        0.927
## 
##                 gender=Male 
##  time n.risk n.event survival std.err lower 95% CI upper 95% CI
##     6    135       1    0.993 0.00738        0.978        1.000
##    12    134       1    0.985 0.01040        0.965        1.000
##    14    133       1    0.978 0.01269        0.953        1.000
##    22    131       1    0.970 0.01462        0.942        0.999
##    28    129       1    0.963 0.01633        0.931        0.995
##    36    128       1    0.955 0.01785        0.921        0.991
##    41    126       1    0.948 0.01925        0.911        0.986
##    42    125       1    0.940 0.02054        0.901        0.981
##    47    124       1    0.933 0.02172        0.891        0.976
##   159    121       1    0.925 0.02287        0.881        0.971
##   201    120       1    0.917 0.02394        0.871        0.965
##   210    118       1    0.909 0.02497        0.862        0.960
##   225    117       1    0.902 0.02594        0.852        0.954
##   261    114       1    0.894 0.02689        0.842        0.948
##   280    113       1    0.886 0.02779        0.833        0.942
##   286    112       1    0.878 0.02865        0.823        0.936
##   299    111       1    0.870 0.02946        0.814        0.930
##   346    110       1    0.862 0.03023        0.805        0.923
##   360    108       1    0.854 0.03099        0.795        0.917
##   366    107       1    0.846 0.03171        0.786        0.911
##   417    105       1    0.838 0.03242        0.777        0.904
##   428    104       1    0.830 0.03309        0.768        0.897
##   430    103       1    0.822 0.03374        0.758        0.891
##   441    102       1    0.814 0.03435        0.749        0.884
##   446    101       1    0.806 0.03495        0.740        0.877
##   462    100       1    0.798 0.03551        0.731        0.870
##   474     99       1    0.790 0.03606        0.722        0.864
##   514     98       1    0.782 0.03658        0.713        0.857
##   600     96       1    0.773 0.03709        0.704        0.850
##   623     94       1    0.765 0.03760        0.695        0.843
##   664     92       1    0.757 0.03810        0.686        0.835
##   667     90       1    0.749 0.03859        0.677        0.828
##   671     89       1    0.740 0.03907        0.667        0.821
##   776     86       1    0.731 0.03955        0.658        0.813
##   792     85       1    0.723 0.04001        0.649        0.806
##   857     83       1    0.714 0.04046        0.639        0.798
##   861     82       1    0.705 0.04090        0.630        0.790
##   872     80       1    0.697 0.04132        0.620        0.783
##  1011     75       1    0.687 0.04180        0.610        0.774
##  1048     74       1    0.678 0.04226        0.600        0.766
##  1083     72       1    0.669 0.04271        0.590        0.758
##  1123     70       1    0.659 0.04315        0.580        0.749
##  1165     69       1    0.650 0.04357        0.570        0.741
##  1176     67       1    0.640 0.04399        0.559        0.732
##  1344     63       1    0.630 0.04445        0.548        0.723
##  1352     61       1    0.619 0.04490        0.537        0.714
##  1425     59       1    0.609 0.04535        0.526        0.705
##  1456     57       1    0.598 0.04580        0.515        0.695
##  1491     53       1    0.587 0.04630        0.503        0.685
##  1798     45       1    0.574 0.04707        0.489        0.674
##  1890     36       1    0.558 0.04839        0.471        0.661
##  2041     25       1    0.536 0.05134        0.444        0.646
##  2356      8       1    0.469 0.07707        0.340        0.647

6. Visualisasi Kurva Kaplan-Meier

# Plot 1: Kurva KM Keseluruhan
ggsurvplot(
  km_overall,
  data          = data_km,
  conf.int      = TRUE,             # Tampilkan confidence interval
  surv.median.line = "hv",          # Garis bantu titik median survival
  palette       = c("#2E75B6"),
  title         = "Kurva Kaplan-Meier: Kelangsungan Hidup Pasien NSCLC",
  xlab          = "Waktu (Hari)",
  ylab          = "Probabilitas Survival",
  legend        = "none"
)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## ℹ The deprecated feature was likely used in the ggpubr package.
##   Please report the issue at <https://github.com/kassambara/ggpubr/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning in .add_surv_median(p, fit, type = surv.median.line, fun = fun, :
## Median survival not reached.
## Ignoring unknown labels:
## • fill : "Strata"
## Ignoring unknown labels:
## • fill : "Strata"

# Plot 2: Kurva KM Perbandingan Male vs Female
ggsurvplot(
  km_gender,
  data          = data_km,
  conf.int      = TRUE,             # Tampilkan confidence interval
  surv.median.line = "hv",          # Garis bantu titik median survival
  legend.labs   = c("Female", "Male"),
  palette       = c("#E7B800", "#2E9FDF"),
  title         = "Kurva Kaplan-Meier: Kelangsungan Hidup Pasien NSCLC\nBerdasarkan Jenis Kelamin",
  xlab          = "Waktu (Hari)",
  ylab          = "Probabilitas Survival"
)