head(lung12boost)
## sex agediag dep gor typeKC dead survtday myocardial_infection
## 1 1 76 1 H NSCLC 0 1252 0
## 2 1 81 3 H NSCLC 1 220 0
## 3 1 64 3 H NSCLC 1 146 0
## 4 2 73 3 H NSCLC 1 377 0
## 5 2 73 5 H NSCLC 1 47 0
## 6 2 81 2 H NSCLC 1 21 0
## congestive_heart_failure peripheral_vascular_disease cerebrovascular_disease
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
## dementia COPD rheumatic_disease peptic_ulcer_disease mild_liver_disease
## 1 0 0 0 0 0
## 2 0 0 0 0 0
## 3 0 0 0 0 0
## 4 0 0 0 0 0
## 5 0 0 0 0 0
## 6 0 1 0 0 0
## diabetes_without_comp diabetes_with_comp hemiplegia_paraplegia renal_disease
## 1 0 0 0 0
## 2 0 0 0 0
## 3 0 0 0 0
## 4 0 0 0 0
## 5 0 0 0 0
## 6 0 0 0 0
## moderate_severe_liver_disease AIDS_HIV any_malignancy
## 1 0 0 0
## 2 0 0 0
## 3 0 0 0
## 4 0 0 0
## 5 0 0 0
## 6 0 0 0
## metastatic_solid_tumours obesity agecat stage PS CCI CCI_cat
## 1 0 0 74-79 early Good 3 CCI 3-4
## 2 0 0 >=80 advanced Poor 4 CCI 3-4
## 3 0 0 15-65 advanced Poor 2 CCI 1-2
## 4 0 0 66-73 advanced Good 3 CCI 3-4
## 5 0 0 66-73 advanced Poor 3 CCI 3-4
## 6 0 0 >=80 advanced Poor 10 CCI >5
hist(lung12boost$CCI, main = "Distribution of the Charston Comorbidity Index", xlab = "CCI")
table(lung12boost$CCI_cat)
##
## CCI >5 CCI 1-2 CCI 3-4
## 9325 9045 15143
library(survival)
library(survminer)
ggsurvplot(survfit(Surv(survtday,dead)~ CCI_cat,data = na.omit(lung12boost)), risk.table=TRUE)
library(randomForestSRC)
start_time <- Sys.time()
RF_lung <- rfsrc(Surv(survtday,dead)~ factor(agecat) + factor(stage) + factor(PS) + factor(dep) + factor(CCI_cat), data = na.omit(lung12boost), nodesize = 500, ntree =100, importance =TRUE, membership = TRUE)
end_time <- Sys.time()
end_time - start_time
## Time difference of 3.604972 mins
barplot(RF_lung$importance[order(RF_lung$importance)], main = "Variable Importance", horiz=T, xlab ="Importance")