Analisis Daya Tahan Pasien Non-Small Cell Lung Cancer dengan Metode Kaplan Meier
Analisis Daya Tahan Nonparametrik: Kaplan-Meier
Dataset : NSCLC Radiogenomics (TCIA) Sumber : https://www.cancerimagingarchive.net/collection/nsclc-radiogenomics/
## Warning: package 'readxl' was built under R version 4.4.3
## Warning: package 'survival' was built under R version 4.4.3
## Warning: package 'survminer' was built under R version 4.4.3
## Loading required package: ggplot2
## Warning: package 'ggplot2' was built under R version 4.4.3
## Loading required package: ggpubr
## Warning: package 'ggpubr' was built under R version 4.4.3
##
## Attaching package: 'survminer'
## The following object is masked from 'package:survival':
##
## myeloma
## Warning: package 'dplyr' was built under R version 4.4.3
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
1. Memuat dan Menyiapkan Data
Eksplorasi singkat
glimpse(data_raw[, c("Case ID", "Gender", "Survival Status",
"Time to Death (days)", "Date of Last Known Alive",
"CT Date")])## Rows: 211
## Columns: 6
## $ `Case ID` <chr> "AMC-001", "AMC-002", "AMC-003", "AMC-004",…
## $ Gender <chr> "Male", "Female", "Female", "Female", "Male…
## $ `Survival Status` <chr> "Dead", "Alive", "Alive", "Alive", "Alive",…
## $ `Time to Death (days)` <chr> "872", "N/A", "N/A", "N/A", "N/A", "N/A", "…
## $ `Date of Last Known Alive` <chr> "1/7/1997", "3/20/1992", "6/19/1996", "12/1…
## $ `CT Date` <chr> "8/10/1994", "2/19/1992", "2/23/1995", "12/…
2. Pembersihan Data
# Pilih kolom relevan dan beri nama yang lebih ringkas
data_km <- data_raw %>%
select(
case_id = `Case ID`,
gender = `Gender`,
survival_status = `Survival Status`,
time_to_death = `Time to Death (days)`,
date_last_alive = `Date of Last Known Alive`,
ct_date = `CT Date`
) %>%
mutate(
# Konversi tanggal
date_last_alive = as.Date(date_last_alive, format = "%m/%d/%Y"),
ct_date = as.Date(ct_date, format = "%m/%d/%Y"),
# Kolom time_to_death berisi "N/A" untuk pasien hidup, perlu dikonversi
time_to_death = suppressWarnings(as.numeric(time_to_death))
) %>%
mutate(
# Hitung waktu survival:
# Pasien Dead -> langsung dari Time to Death (days)
# Pasien Alive -> selisih Date of Last Known Alive dengan CT Date
survival_time = case_when(
survival_status == "Dead" ~ time_to_death,
survival_status == "Alive" ~ as.numeric(date_last_alive - ct_date),
TRUE ~ NA_real_
),
# Rekode status: 1 = event (meninggal), 0 = tersensor (masih hidup)
event = ifelse(survival_status == "Dead", 1, 0)
) %>%
filter(!is.na(survival_time), survival_time >= 0)
head(data_km[, c("case_id", "gender", "survival_status", "survival_time", "event")])## # A tibble: 6 × 5
## case_id gender survival_status survival_time event
## <chr> <chr> <chr> <dbl> <dbl>
## 1 AMC-001 Male Dead 872 1
## 2 AMC-002 Female Alive 30 0
## 3 AMC-003 Female Alive 482 0
## 4 AMC-004 Female Alive 1448 0
## 5 AMC-005 Male Alive 901 0
## 6 AMC-006 Female Alive 739 0
3. Membuat Objek Survival
# Tanda '+' pada output menunjukkan data yang tersensor (kanan)
surv_obj <- Surv(time = data_km$survival_time, event = data_km$event)
head(surv_obj, 15)## [1] 872 30+ 482+ 1448+ 901+ 739+ 1160+ 1270+ 12 815+ 1548+ 1154+
## [13] 1315+ 1425+ 1254+
4. Fit Model Kaplan-Meier: Keseluruhan
# Fit KM tanpa pengelompokan (semua 211 pasien)
km_overall <- survfit(Surv(survival_time, event) ~ 1, data = data_km)
# Menampilkan ringkasan statistik
print(km_overall)## Call: survfit(formula = Surv(survival_time, event) ~ 1, data = data_km)
##
## n events median 0.95LCL 0.95UCL
## [1,] 211 63 NA 2356 NA
## Call: survfit(formula = Surv(survival_time, event) ~ 1, data = data_km)
##
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 6 211 1 0.995 0.00473 0.986 1.000
## 12 210 1 0.991 0.00667 0.978 1.000
## 14 209 1 0.986 0.00815 0.970 1.000
## 22 206 1 0.981 0.00941 0.963 1.000
## 28 204 1 0.976 0.01052 0.956 0.997
## 36 202 1 0.971 0.01153 0.949 0.994
## 41 200 1 0.966 0.01245 0.942 0.991
## 42 199 1 0.962 0.01330 0.936 0.988
## 47 198 1 0.957 0.01409 0.930 0.985
## 159 191 1 0.952 0.01488 0.923 0.981
## 201 190 1 0.947 0.01562 0.917 0.978
## 210 188 1 0.942 0.01633 0.910 0.974
## 225 186 1 0.937 0.01701 0.904 0.971
## 261 183 1 0.932 0.01767 0.898 0.967
## 276 182 1 0.926 0.01830 0.891 0.963
## 280 181 1 0.921 0.01890 0.885 0.959
## 286 180 1 0.916 0.01948 0.879 0.955
## 299 179 1 0.911 0.02003 0.873 0.951
## 328 178 1 0.906 0.02056 0.867 0.947
## 346 177 1 0.901 0.02107 0.860 0.943
## 360 175 1 0.896 0.02157 0.854 0.939
## 366 174 1 0.891 0.02205 0.848 0.935
## 417 172 1 0.885 0.02252 0.842 0.931
## 428 171 1 0.880 0.02298 0.836 0.926
## 430 170 1 0.875 0.02342 0.830 0.922
## 441 169 1 0.870 0.02385 0.824 0.918
## 446 168 1 0.865 0.02426 0.818 0.914
## 462 167 1 0.859 0.02466 0.812 0.909
## 474 166 1 0.854 0.02505 0.807 0.905
## 477 165 1 0.849 0.02543 0.801 0.900
## 514 163 1 0.844 0.02580 0.795 0.896
## 600 160 1 0.839 0.02617 0.789 0.892
## 623 158 1 0.833 0.02654 0.783 0.887
## 664 155 1 0.828 0.02691 0.777 0.882
## 667 153 1 0.823 0.02727 0.771 0.878
## 671 152 1 0.817 0.02762 0.765 0.873
## 776 144 1 0.811 0.02801 0.758 0.868
## 777 143 1 0.806 0.02838 0.752 0.863
## 792 140 1 0.800 0.02876 0.746 0.858
## 857 136 1 0.794 0.02914 0.739 0.853
## 861 135 1 0.788 0.02951 0.732 0.848
## 867 133 1 0.782 0.02988 0.726 0.843
## 872 132 1 0.776 0.03024 0.719 0.838
## 952 126 1 0.770 0.03062 0.713 0.833
## 985 121 1 0.764 0.03102 0.705 0.827
## 1011 118 1 0.757 0.03142 0.698 0.822
## 1048 115 1 0.751 0.03183 0.691 0.816
## 1083 113 1 0.744 0.03224 0.684 0.810
## 1123 111 1 0.737 0.03264 0.676 0.804
## 1133 110 1 0.731 0.03302 0.669 0.798
## 1165 104 1 0.724 0.03344 0.661 0.792
## 1176 101 1 0.717 0.03387 0.653 0.786
## 1258 95 1 0.709 0.03434 0.645 0.780
## 1322 89 1 0.701 0.03487 0.636 0.773
## 1344 88 1 0.693 0.03537 0.627 0.766
## 1352 86 1 0.685 0.03587 0.618 0.759
## 1425 82 1 0.677 0.03639 0.609 0.752
## 1456 78 1 0.668 0.03694 0.599 0.744
## 1491 72 1 0.659 0.03758 0.589 0.737
## 1798 58 1 0.647 0.03861 0.576 0.728
## 1890 49 1 0.634 0.04002 0.560 0.718
## 2041 34 1 0.616 0.04297 0.537 0.706
## 2356 15 1 0.574 0.05639 0.474 0.696
5. Fit Model Kaplan-Meier: Berdasarkan Jenis Kelamin
# Fit KM berdasarkan gender (Male vs Female)
km_gender <- survfit(Surv(survival_time, event) ~ gender, data = data_km)
print(km_gender)## Call: survfit(formula = Surv(survival_time, event) ~ gender, data = data_km)
##
## n events median 0.95LCL 0.95UCL
## gender=Female 76 10 NA NA NA
## gender=Male 135 53 2356 1798 NA
## Call: survfit(formula = Surv(survival_time, event) ~ gender, data = data_km)
##
## gender=Female
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 276 69 1 0.986 0.0144 0.958 1.000
## 328 68 1 0.971 0.0202 0.932 1.000
## 477 67 1 0.957 0.0246 0.910 1.000
## 777 58 1 0.940 0.0291 0.885 0.999
## 867 53 1 0.922 0.0336 0.859 0.990
## 952 48 1 0.903 0.0380 0.832 0.981
## 985 45 1 0.883 0.0421 0.804 0.969
## 1133 41 1 0.861 0.0462 0.775 0.957
## 1258 32 1 0.835 0.0521 0.739 0.943
## 1322 26 1 0.802 0.0591 0.695 0.927
##
## gender=Male
## time n.risk n.event survival std.err lower 95% CI upper 95% CI
## 6 135 1 0.993 0.00738 0.978 1.000
## 12 134 1 0.985 0.01040 0.965 1.000
## 14 133 1 0.978 0.01269 0.953 1.000
## 22 131 1 0.970 0.01462 0.942 0.999
## 28 129 1 0.963 0.01633 0.931 0.995
## 36 128 1 0.955 0.01785 0.921 0.991
## 41 126 1 0.948 0.01925 0.911 0.986
## 42 125 1 0.940 0.02054 0.901 0.981
## 47 124 1 0.933 0.02172 0.891 0.976
## 159 121 1 0.925 0.02287 0.881 0.971
## 201 120 1 0.917 0.02394 0.871 0.965
## 210 118 1 0.909 0.02497 0.862 0.960
## 225 117 1 0.902 0.02594 0.852 0.954
## 261 114 1 0.894 0.02689 0.842 0.948
## 280 113 1 0.886 0.02779 0.833 0.942
## 286 112 1 0.878 0.02865 0.823 0.936
## 299 111 1 0.870 0.02946 0.814 0.930
## 346 110 1 0.862 0.03023 0.805 0.923
## 360 108 1 0.854 0.03099 0.795 0.917
## 366 107 1 0.846 0.03171 0.786 0.911
## 417 105 1 0.838 0.03242 0.777 0.904
## 428 104 1 0.830 0.03309 0.768 0.897
## 430 103 1 0.822 0.03374 0.758 0.891
## 441 102 1 0.814 0.03435 0.749 0.884
## 446 101 1 0.806 0.03495 0.740 0.877
## 462 100 1 0.798 0.03551 0.731 0.870
## 474 99 1 0.790 0.03606 0.722 0.864
## 514 98 1 0.782 0.03658 0.713 0.857
## 600 96 1 0.773 0.03709 0.704 0.850
## 623 94 1 0.765 0.03760 0.695 0.843
## 664 92 1 0.757 0.03810 0.686 0.835
## 667 90 1 0.749 0.03859 0.677 0.828
## 671 89 1 0.740 0.03907 0.667 0.821
## 776 86 1 0.731 0.03955 0.658 0.813
## 792 85 1 0.723 0.04001 0.649 0.806
## 857 83 1 0.714 0.04046 0.639 0.798
## 861 82 1 0.705 0.04090 0.630 0.790
## 872 80 1 0.697 0.04132 0.620 0.783
## 1011 75 1 0.687 0.04180 0.610 0.774
## 1048 74 1 0.678 0.04226 0.600 0.766
## 1083 72 1 0.669 0.04271 0.590 0.758
## 1123 70 1 0.659 0.04315 0.580 0.749
## 1165 69 1 0.650 0.04357 0.570 0.741
## 1176 67 1 0.640 0.04399 0.559 0.732
## 1344 63 1 0.630 0.04445 0.548 0.723
## 1352 61 1 0.619 0.04490 0.537 0.714
## 1425 59 1 0.609 0.04535 0.526 0.705
## 1456 57 1 0.598 0.04580 0.515 0.695
## 1491 53 1 0.587 0.04630 0.503 0.685
## 1798 45 1 0.574 0.04707 0.489 0.674
## 1890 36 1 0.558 0.04839 0.471 0.661
## 2041 25 1 0.536 0.05134 0.444 0.646
## 2356 8 1 0.469 0.07707 0.340 0.647
6. Visualisasi Kurva Kaplan-Meier
# Plot 1: Kurva KM Keseluruhan
ggsurvplot(
km_overall,
data = data_km,
conf.int = TRUE, # Tampilkan confidence interval
surv.median.line = "hv", # Garis bantu titik median survival
palette = c("#2E75B6"),
title = "Kurva Kaplan-Meier: Kelangsungan Hidup Pasien NSCLC",
xlab = "Waktu (Hari)",
ylab = "Probabilitas Survival",
legend = "none"
)## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## ℹ The deprecated feature was likely used in the ggpubr package.
## Please report the issue at <https://github.com/kassambara/ggpubr/issues>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning in .add_surv_median(p, fit, type = surv.median.line, fun = fun, :
## Median survival not reached.
## Ignoring unknown labels:
## • fill : "Strata"
## Ignoring unknown labels:
## • fill : "Strata"
# Plot 2: Kurva KM Perbandingan Male vs Female
ggsurvplot(
km_gender,
data = data_km,
conf.int = TRUE, # Tampilkan confidence interval
surv.median.line = "hv", # Garis bantu titik median survival
legend.labs = c("Female", "Male"),
palette = c("#E7B800", "#2E9FDF"),
title = "Kurva Kaplan-Meier: Kelangsungan Hidup Pasien NSCLC\nBerdasarkan Jenis Kelamin",
xlab = "Waktu (Hari)",
ylab = "Probabilitas Survival"
)