5250 random survival forest

options(repos = c(CRAN = "https://cloud.r-project.org"))
install.packages("Hmisc")

## 
## The downloaded binary packages are in
##  /var/folders/yd/g4p50_293gs40n1_0xp8mz780000gn/T//Rtmpbx8twO/downloaded_packages

data_train <- Desktop/5250/data/SSC-FIRSTDAY-TRAIN.csv

# install.packages("randomForestSRC")
install.packages("Hmisc")

## 
## The downloaded binary packages are in
##  /var/folders/yd/g4p50_293gs40n1_0xp8mz780000gn/T//Rtmpbx8twO/downloaded_packages

library(randomForestSRC)

## 
##  randomForestSRC 3.2.3 
##  
##  Type rfsrc.news() to see new features, changes, and bug fixes. 
##

library(survival)
library(Hmisc)

## 
## Attaching package: 'Hmisc'

## The following object is masked from 'package:randomForestSRC':
## 
##     impute

## The following objects are masked from 'package:base':
## 
##     format.pval, units

data_train <- read.csv("/Users/youxueren/Desktop/5250/data/SSC-FIRSTDAY-TRAIN.csv", header = TRUE, sep = ",")
data_test <- read.csv("/Users/youxueren/Desktop/5250/data/SSC-FIRSTDAY.csv", header = TRUE, sep = ",")

# correct the data type 
data_test$hospital_expire_flag <- as.integer(gsub("\\[|\\]", "", data_test$hospital_expire_flag))
data_train$hospital_expire_flag <- as.integer(gsub("\\[|\\]", "", data_train$hospital_expire_flag))
# clean the dataset
features_to_remove <- c("stay_id","subject_id","hadm_id","LODS_neurologic",
                        "LODS_cardiovascular","LODS_renal","LODS_pulmonary",
                        "LODS_hematologic","LODS_hepatic","OASIS_gcs","OASIS_heart_rate",
                        "OASIS_mean_bp","OASIS_resp_rate","OASIS_mean_bp_1","OASIS_temp","OASIS_urine_output","SOFA")

# Remove features from the data_train
data_train <- data_train[, !(colnames(data_train) %in% features_to_remove)]
data_test <- data_test[, !(colnames(data_test) %in% features_to_remove)]

# Convert the 'admission_type' column to a factor
data_train$admission_type <- factor(data_train$admission_type)
data_test$admission_type <- factor(data_test$admission_type)

length(data_train)

## [1] 172

# Calculate the number of missing values per column
missing_counts <- sapply(data_train, function(x) sum(is.na(x)))

# Identify columns with more than 80 missing values
columns_to_drop <- names(missing_counts[missing_counts > 50])

# Drop these columns from data_train
data_train <- data_train[, !(colnames(data_train) %in% columns_to_drop)]

# Optionally, print the names of the dropped columns to check which were removed
print(paste("Dropped columns:", paste(columns_to_drop, collapse = ", ")))

## [1] "Dropped columns: lactate_min, lactate_max, ph_min, ph_max, so2_min, so2_max, po2_min, po2_max, pco2_min, pco2_max, aado2_min, aado2_max, aado2_calc_min, aado2_calc_max, pao2fio2ratio_min, pao2fio2ratio_max, baseexcess_min, baseexcess_max, bicarbonate_min, bicarbonate_max, totalco2_min, totalco2_max, hematocrit_min, hematocrit_max, hemoglobin_min, hemoglobin_max, carboxyhemoglobin_min, carboxyhemoglobin_max, methemoglobin_min, methemoglobin_max, temperature_min_1, temperature_max_1, chloride_min, chloride_max, calcium_min, calcium_max, glucose_min_1, glucose_max_1, potassium_min, potassium_max, sodium_min, sodium_max, albumin_min, albumin_max, globulin_min, globulin_max, total_protein_min, total_protein_max, abs_basophils_min, abs_basophils_max, abs_eosinophils_min, abs_eosinophils_max, abs_lymphocytes_min, abs_lymphocytes_max, abs_monocytes_min, abs_monocytes_max, abs_neutrophils_min, abs_neutrophils_max, atyps_min, atyps_max, bands_min, bands_max, imm_granulocytes_min, imm_granulocytes_max, metas_min, metas_max, nrbc_min, nrbc_max, d_dimer_min, d_dimer_max, fibrinogen_min, fibrinogen_max, thrombin_min, thrombin_max, alt_min, alt_max, alp_min, alp_max, ast_min, ast_max, amylase_min, amylase_max, bilirubin_total_min, bilirubin_total_max, bilirubin_direct_min, bilirubin_direct_max, bilirubin_indirect_min, bilirubin_indirect_max, ck_cpk_min, ck_cpk_max, ck_mb_min, ck_mb_max, ggt_min, ggt_max, ld_ldh_min, ld_ldh_max, respiration, liver"

length(data_train)

## [1] 74

# Summarize the number of missing values per column
colSums(is.na(data_train))

##         ICU_length_of_stay       hospital_expire_flag 
##                          0                          0 
##                        age             admission_type 
##                          0                          0 
##        number_of_ICD_codes charlson_comorbidity_index 
##                          0                          0 
##          OASIS_pre_icu_los             OASIS_mechvent 
##                          0                          0 
##     OASIS_elective_surgery             heart_rate_min 
##                          0                          3 
##             heart_rate_max            heart_rate_mean 
##                          3                          3 
##                    sbp_min                    sbp_max 
##                          3                          3 
##                   sbp_mean                    dbp_min 
##                          3                          3 
##                    dbp_max                   dbp_mean 
##                          3                          3 
##                    mbp_min                    mbp_max 
##                          3                          3 
##                   mbp_mean              resp_rate_min 
##                          3                          3 
##              resp_rate_max             resp_rate_mean 
##                          3                          3 
##            temperature_min            temperature_max 
##                          6                          6 
##           temperature_mean                   spo2_min 
##                          6                          3 
##                   spo2_max                  spo2_mean 
##                          3                          3 
##                glucose_min                glucose_max 
##                          7                          7 
##               glucose_mean                    gcs_min 
##                          7                          6 
##                  gcs_motor                 gcs_verbal 
##                          7                          6 
##                   gcs_eyes                 gcs_unable 
##                          7                          6 
##           hematocrit_min_1           hematocrit_max_1 
##                          2                          2 
##           hemoglobin_min_1           hemoglobin_max_1 
##                          2                          2 
##              platelets_min              platelets_max 
##                          2                          2 
##                    wbc_min                    wbc_max 
##                          2                          2 
##               aniongap_min               aniongap_max 
##                          6                          6 
##          bicarbonate_min_1          bicarbonate_max_1 
##                          6                          6 
##                    bun_min                    bun_max 
##                          3                          3 
##              calcium_min_1              calcium_max_1 
##                         13                         13 
##             chloride_min_1             chloride_max_1 
##                          6                          6 
##             creatinine_min             creatinine_max 
##                          5                          5 
##              glucose_min_2              glucose_max_2 
##                          6                          6 
##               sodium_min_1               sodium_max_1 
##                          6                          6 
##            potassium_min_1            potassium_max_1 
##                          5                          5 
##                    inr_min                    inr_max 
##                         25                         25 
##                     pt_min                     pt_max 
##                         25                         25 
##                    ptt_min                    ptt_max 
##                         25                         25 
##                coagulation             cardiovascular 
##                          2                          3 
##                        cns                      renal 
##                          6                          0

data_train <- na.omit(data_train)

# 查看清理后的数据
print(dim(data_train))

## [1] 156  74

library(randomForestSRC)

# If you wish to explicitly omit rows with missing data, you can specify:
#  the nodesize：the minimum number of cases in a terminal node of the tree. 1/5/10
# ntree： 500，1000，2000
# splitrule = logrank / random

model1 <- rfsrc(Surv(ICU_length_of_stay, hospital_expire_flag) ~ ., data = data_train, 
          ntree = 1000,splitrule = "logrank", nodesize = 1,   importance = TRUE)
model2 <- rfsrc(Surv(ICU_length_of_stay, hospital_expire_flag) ~ ., data = data_train, 
          ntree = 500,splitrule = "random", nodesize = 1,  importance = TRUE)

model3 <- rfsrc(Surv(ICU_length_of_stay, hospital_expire_flag) ~ ., data = data_train, 
          ntree = 1000,splitrule = "logrank", nodesize = 5,   importance = TRUE)
model4 <- rfsrc(Surv(ICU_length_of_stay, hospital_expire_flag) ~ ., data = data_train, 
          ntree = 500,splitrule = "random", nodesize = 5,  importance = TRUE)


model5 <- rfsrc(Surv(ICU_length_of_stay, hospital_expire_flag) ~ ., data = data_train, 
          ntree = 1000,splitrule = "logrank", nodesize = 10,   importance = TRUE)
model6 <- rfsrc(Surv(ICU_length_of_stay, hospital_expire_flag) ~ ., data = data_train, 
          ntree = 500,splitrule = "random", nodesize = 10,  importance = TRUE)

model7 <- rfsrc(Surv(ICU_length_of_stay, hospital_expire_flag) ~ ., data = data_train, 
          ntree = 2000,splitrule = "logrank", nodesize = 1,   importance = TRUE)
model8 <- rfsrc(Surv(ICU_length_of_stay, hospital_expire_flag) ~ ., data = data_train, 
          ntree = 2000,splitrule = "random", nodesize = 1,   importance = TRUE)

model9 <- rfsrc(Surv(ICU_length_of_stay, hospital_expire_flag) ~ ., data = data_train, 
          ntree = 2000,splitrule = "logrank", nodesize = 5,   importance = TRUE)
model10 <- rfsrc(Surv(ICU_length_of_stay, hospital_expire_flag) ~ ., data = data_train, 
          ntree = 2000,splitrule = "random", nodesize = 5,   importance = TRUE)


model11 <- rfsrc(Surv(ICU_length_of_stay, hospital_expire_flag) ~ ., data = data_train, 
           ntree = 2000,splitrule = "logrank", nodesize = 10,   importance = TRUE)
model12 <- rfsrc(Surv(ICU_length_of_stay, hospital_expire_flag) ~ ., data = data_train, 
           ntree = 2000,splitrule = "random", nodesize = 10,   importance = TRUE)


model13 <- rfsrc(Surv(ICU_length_of_stay, hospital_expire_flag) ~ ., data = data_train, 
            ntree = 500, splitrule = "logrank", nodesize = 1,   importance = TRUE)
model14 <- rfsrc(Surv(ICU_length_of_stay, hospital_expire_flag) ~ ., data = data_train, 
            ntree = 500, splitrule = "logrank", nodesize = 5,   importance = TRUE)
model15 <- rfsrc(Surv(ICU_length_of_stay, hospital_expire_flag) ~ ., data = data_train, 
           ntree = 500, splitrule = "logrank", nodesize = 10,   importance = TRUE)

model16 <- rfsrc(Surv(ICU_length_of_stay, hospital_expire_flag) ~ ., data = data_train, 
            ntree = 1000, splitrule = "random", nodesize = 1,   importance = TRUE)

model17 <- rfsrc(Surv(ICU_length_of_stay, hospital_expire_flag) ~ ., data = data_train, 
            ntree = 1000, splitrule = "random", nodesize = 5,   importance = TRUE)
model18 <- rfsrc(Surv(ICU_length_of_stay, hospital_expire_flag) ~ ., data = data_train, 
           ntree = 1000, splitrule = "random", nodesize = 10,   importance = TRUE)





# Print the model summary
print(model1)

##                          Sample size: 156
##                     Number of deaths: 28
##                      Number of trees: 1000
##            Forest terminal node size: 1
##        Average no. of terminal nodes: 37.828
## No. of variables tried at each split: 9
##               Total no. of variables: 72
##        Resampling used to grow trees: swor
##     Resample size used to grow trees: 99
##                             Analysis: RSF
##                               Family: surv
##                       Splitting rule: logrank *random*
##        Number of random split points: 10
##                           (OOB) CRPS: 0.18267935
##    (OOB) Requested performance error: 0.14990421

print(model2)

##                          Sample size: 156
##                     Number of deaths: 28
##                      Number of trees: 500
##            Forest terminal node size: 1
##        Average no. of terminal nodes: 44.178
## No. of variables tried at each split: 1
##               Total no. of variables: 72
##        Resampling used to grow trees: swor
##     Resample size used to grow trees: 99
##                             Analysis: RSF
##                               Family: surv
##                       Splitting rule: random
##                           (OOB) CRPS: 0.19131456
##    (OOB) Requested performance error: 0.12356322

print(model3)

##                          Sample size: 156
##                     Number of deaths: 28
##                      Number of trees: 1000
##            Forest terminal node size: 5
##        Average no. of terminal nodes: 13.991
## No. of variables tried at each split: 9
##               Total no. of variables: 72
##        Resampling used to grow trees: swor
##     Resample size used to grow trees: 99
##                             Analysis: RSF
##                               Family: surv
##                       Splitting rule: logrank *random*
##        Number of random split points: 10
##                           (OOB) CRPS: 0.15662612
##    (OOB) Requested performance error: 0.15277778

print(model4)

##                          Sample size: 156
##                     Number of deaths: 28
##                      Number of trees: 500
##            Forest terminal node size: 5
##        Average no. of terminal nodes: 17.692
## No. of variables tried at each split: 1
##               Total no. of variables: 72
##        Resampling used to grow trees: swor
##     Resample size used to grow trees: 99
##                             Analysis: RSF
##                               Family: surv
##                       Splitting rule: random
##                           (OOB) CRPS: 0.17544176
##    (OOB) Requested performance error: 0.15948276

print(model5)

##                          Sample size: 156
##                     Number of deaths: 28
##                      Number of trees: 1000
##            Forest terminal node size: 10
##        Average no. of terminal nodes: 9.942
## No. of variables tried at each split: 9
##               Total no. of variables: 72
##        Resampling used to grow trees: swor
##     Resample size used to grow trees: 99
##                             Analysis: RSF
##                               Family: surv
##                       Splitting rule: logrank *random*
##        Number of random split points: 10
##                           (OOB) CRPS: 0.15720142
##    (OOB) Requested performance error: 0.16187739

print(model6)

##                          Sample size: 156
##                     Number of deaths: 28
##                      Number of trees: 500
##            Forest terminal node size: 10
##        Average no. of terminal nodes: 10.044
## No. of variables tried at each split: 1
##               Total no. of variables: 72
##        Resampling used to grow trees: swor
##     Resample size used to grow trees: 99
##                             Analysis: RSF
##                               Family: surv
##                       Splitting rule: random
##                           (OOB) CRPS: 0.17745728
##    (OOB) Requested performance error: 0.15469349

print(model7)

##                          Sample size: 156
##                     Number of deaths: 28
##                      Number of trees: 2000
##            Forest terminal node size: 1
##        Average no. of terminal nodes: 37.4185
## No. of variables tried at each split: 9
##               Total no. of variables: 72
##        Resampling used to grow trees: swor
##     Resample size used to grow trees: 99
##                             Analysis: RSF
##                               Family: surv
##                       Splitting rule: logrank *random*
##        Number of random split points: 10
##                           (OOB) CRPS: 0.18085924
##    (OOB) Requested performance error: 0.14319923

print(model8)

##                          Sample size: 156
##                     Number of deaths: 28
##                      Number of trees: 2000
##            Forest terminal node size: 1
##        Average no. of terminal nodes: 43.6025
## No. of variables tried at each split: 1
##               Total no. of variables: 72
##        Resampling used to grow trees: swor
##     Resample size used to grow trees: 99
##                             Analysis: RSF
##                               Family: surv
##                       Splitting rule: random
##                           (OOB) CRPS: 0.1948914
##    (OOB) Requested performance error: 0.13409962

print(model9)

##                          Sample size: 156
##                     Number of deaths: 28
##                      Number of trees: 2000
##            Forest terminal node size: 5
##        Average no. of terminal nodes: 14.0305
## No. of variables tried at each split: 9
##               Total no. of variables: 72
##        Resampling used to grow trees: swor
##     Resample size used to grow trees: 99
##                             Analysis: RSF
##                               Family: surv
##                       Splitting rule: logrank *random*
##        Number of random split points: 10
##                           (OOB) CRPS: 0.15584295
##    (OOB) Requested performance error: 0.15708812

print(model10)

##                          Sample size: 156
##                     Number of deaths: 28
##                      Number of trees: 2000
##            Forest terminal node size: 5
##        Average no. of terminal nodes: 17.5445
## No. of variables tried at each split: 1
##               Total no. of variables: 72
##        Resampling used to grow trees: swor
##     Resample size used to grow trees: 99
##                             Analysis: RSF
##                               Family: surv
##                       Splitting rule: random
##                           (OOB) CRPS: 0.17758012
##    (OOB) Requested performance error: 0.15373563

print(model11)

##                          Sample size: 156
##                     Number of deaths: 28
##                      Number of trees: 2000
##            Forest terminal node size: 10
##        Average no. of terminal nodes: 10
## No. of variables tried at each split: 9
##               Total no. of variables: 72
##        Resampling used to grow trees: swor
##     Resample size used to grow trees: 99
##                             Analysis: RSF
##                               Family: surv
##                       Splitting rule: logrank *random*
##        Number of random split points: 10
##                           (OOB) CRPS: 0.15740813
##    (OOB) Requested performance error: 0.16139847

print(model12)

##                          Sample size: 156
##                     Number of deaths: 28
##                      Number of trees: 2000
##            Forest terminal node size: 10
##        Average no. of terminal nodes: 10.1655
## No. of variables tried at each split: 1
##               Total no. of variables: 72
##        Resampling used to grow trees: swor
##     Resample size used to grow trees: 99
##                             Analysis: RSF
##                               Family: surv
##                       Splitting rule: random
##                           (OOB) CRPS: 0.17827986
##    (OOB) Requested performance error: 0.14846743

print(model13)

##                          Sample size: 156
##                     Number of deaths: 28
##                      Number of trees: 500
##            Forest terminal node size: 1
##        Average no. of terminal nodes: 37.384
## No. of variables tried at each split: 9
##               Total no. of variables: 72
##        Resampling used to grow trees: swor
##     Resample size used to grow trees: 99
##                             Analysis: RSF
##                               Family: surv
##                       Splitting rule: logrank *random*
##        Number of random split points: 10
##                           (OOB) CRPS: 0.18240845
##    (OOB) Requested performance error: 0.1532567

print(model14)

##                          Sample size: 156
##                     Number of deaths: 28
##                      Number of trees: 500
##            Forest terminal node size: 5
##        Average no. of terminal nodes: 13.96
## No. of variables tried at each split: 9
##               Total no. of variables: 72
##        Resampling used to grow trees: swor
##     Resample size used to grow trees: 99
##                             Analysis: RSF
##                               Family: surv
##                       Splitting rule: logrank *random*
##        Number of random split points: 10
##                           (OOB) CRPS: 0.15667571
##    (OOB) Requested performance error: 0.15948276

print(model15)

##                          Sample size: 156
##                     Number of deaths: 28
##                      Number of trees: 500
##            Forest terminal node size: 10
##        Average no. of terminal nodes: 10.086
## No. of variables tried at each split: 9
##               Total no. of variables: 72
##        Resampling used to grow trees: swor
##     Resample size used to grow trees: 99
##                             Analysis: RSF
##                               Family: surv
##                       Splitting rule: logrank *random*
##        Number of random split points: 10
##                           (OOB) CRPS: 0.1555463
##    (OOB) Requested performance error: 0.16475096

print(model16)

##                          Sample size: 156
##                     Number of deaths: 28
##                      Number of trees: 1000
##            Forest terminal node size: 1
##        Average no. of terminal nodes: 43.129
## No. of variables tried at each split: 1
##               Total no. of variables: 72
##        Resampling used to grow trees: swor
##     Resample size used to grow trees: 99
##                             Analysis: RSF
##                               Family: surv
##                       Splitting rule: random
##                           (OOB) CRPS: 0.19641767
##    (OOB) Requested performance error: 0.14750958

print(model17)

##                          Sample size: 156
##                     Number of deaths: 28
##                      Number of trees: 1000
##            Forest terminal node size: 5
##        Average no. of terminal nodes: 17.539
## No. of variables tried at each split: 1
##               Total no. of variables: 72
##        Resampling used to grow trees: swor
##     Resample size used to grow trees: 99
##                             Analysis: RSF
##                               Family: surv
##                       Splitting rule: random
##                           (OOB) CRPS: 0.17778849
##    (OOB) Requested performance error: 0.1585249

print(model18)

##                          Sample size: 156
##                     Number of deaths: 28
##                      Number of trees: 1000
##            Forest terminal node size: 10
##        Average no. of terminal nodes: 9.995
## No. of variables tried at each split: 1
##               Total no. of variables: 72
##        Resampling used to grow trees: swor
##     Resample size used to grow trees: 99
##                             Analysis: RSF
##                               Family: surv
##                       Splitting rule: random
##                           (OOB) CRPS: 0.17881111
##    (OOB) Requested performance error: 0.16810345

length(model4$predicted.oob)

## [1] 156

length(data_train$ICU_length_of_stay)

## [1] 156

pred <- predict(model2, data_test)
print(pred)

##   Sample size of test (predict) data: 80
##                 Number of grow trees: 500
##   Average no. of grow terminal nodes: 44.178
##          Total no. of grow variables: 72
##        Resampling used to grow trees: swor
##     Resample size used to grow trees: 99
##                             Analysis: RSF
##                               Family: surv
##                                 CRPS: 0.27449873
##          Requested performance error: 0.19720768

vi <- vimp(model2, cause = 1)  # Adjust 'cause' if you have competing risks
print(vi)

##   Sample size of test (predict) data: 156
##                 Number of grow trees: 500
##   Average no. of grow terminal nodes: 44.178
##          Total no. of grow variables: 72
##        Resampling used to grow trees: swor
##     Resample size used to grow trees: 99
##                             Analysis: RSF
##                               Family: surv
##                                 CRPS: 0.19131456
##          Requested performance error: 0.12356322

plot(model2)

## 
##                              Importance   Relative Imp
## coagulation                      0.0331         1.0000
## potassium_max_1                  0.0240         0.7247
## gcs_motor                        0.0236         0.7135
## inr_min                          0.0231         0.6982
## creatinine_max                   0.0201         0.6063
## temperature_min                  0.0184         0.5553
## calcium_min_1                    0.0179         0.5405
## bicarbonate_max_1                0.0161         0.4863
## dbp_min                          0.0159         0.4790
## bicarbonate_min_1                0.0158         0.4782
## ptt_max                          0.0155         0.4669
## inr_max                          0.0153         0.4605
## sodium_max_1                     0.0147         0.4430
## OASIS_mechvent                   0.0146         0.4405
## aniongap_min                     0.0146         0.4397
## glucose_max_2                    0.0145         0.4375
## ptt_min                          0.0143         0.4320
## gcs_eyes                         0.0142         0.4295
## renal                            0.0136         0.4097
## charlson_comorbidity_index       0.0134         0.4046
## cardiovascular                   0.0132         0.3975
## pt_min                           0.0126         0.3812
## pt_max                           0.0126         0.3804
## heart_rate_max                   0.0125         0.3786
## cns                              0.0118         0.3567
## number_of_ICD_codes              0.0117         0.3529
## heart_rate_min                   0.0116         0.3495
## mbp_min                          0.0114         0.3452
## glucose_max                      0.0113         0.3424
## temperature_mean                 0.0112         0.3397
## gcs_verbal                       0.0108         0.3254
## admission_type                   0.0106         0.3208
## resp_rate_mean                   0.0101         0.3051
## hemoglobin_min_1                 0.0095         0.2867
## temperature_max                  0.0089         0.2679
## gcs_unable                       0.0089         0.2676
## dbp_max                          0.0081         0.2445
## hematocrit_min_1                 0.0081         0.2445
## resp_rate_min                    0.0078         0.2340
## glucose_min_2                    0.0077         0.2331
## sodium_min_1                     0.0077         0.2314
## hematocrit_max_1                 0.0074         0.2241
## hemoglobin_max_1                 0.0069         0.2073
## platelets_max                    0.0067         0.2027
## potassium_min_1                  0.0066         0.1993
## gcs_min                          0.0065         0.1954
## spo2_mean                        0.0064         0.1923
## chloride_max_1                   0.0063         0.1912
## sbp_mean                         0.0063         0.1897
## platelets_min                    0.0060         0.1810
## chloride_min_1                   0.0056         0.1698
## bun_min                          0.0055         0.1669
## spo2_max                         0.0055         0.1667
## creatinine_min                   0.0054         0.1618
## sbp_min                          0.0051         0.1551
## sbp_max                          0.0049         0.1473
## resp_rate_max                    0.0039         0.1163
## aniongap_max                     0.0038         0.1136
## calcium_max_1                    0.0031         0.0924
## mbp_max                          0.0029         0.0865
## glucose_mean                     0.0026         0.0775
## wbc_min                          0.0025         0.0760
## glucose_min                      0.0024         0.0724
## wbc_max                          0.0017         0.0519
## OASIS_elective_surgery           0.0015         0.0463
## heart_rate_mean                  0.0015         0.0455
## age                              0.0006         0.0175
## dbp_mean                         0.0002         0.0047
## OASIS_pre_icu_los               -0.0002        -0.0054
## bun_max                         -0.0009        -0.0283
## spo2_min                        -0.0019        -0.0570
## mbp_mean                        -0.0050        -0.1522

# Print the C-index from the model output
# Print the C-index from the model output
print(model4$c.index)

## NULL

get.cindex(time = data_train$ICU_length_of_stay, censoring = data_train$hospital_expire_flag, predicted = model4$predicted.oob)

## [1] 0.1594828

# less brier score would be better : the 0.25 is the threshold 
# inverse probability of censoring weights (IPCW)
## obtain Brier score using KM and RSF censoring distribution estimators
bs.km <- get.brier.survival(model10, cens.mode = "km")$brier.score
bs.rsf <- get.brier.survival(model1, cens.mode = "rfsrc")$brier.score

## plot the brier score
plot(bs.km, type = "s", col = 2)
lines(bs.rsf, type ="s", col = 4)
legend("bottomright", legend = c("cens.model = km", "cens.model = rfsrc"), fill = c(2,4))

## obtain Brier score using KM and RSF censoring distribution estimators
bs.km <- get.brier.survival(model2, cens.mode = "km")$brier.score
bs.rsf <- get.brier.survival(model2, cens.mode = "rfsrc")$brier.score

## plot the brier score
plot(bs.km, type = "s", col = 2)
lines(bs.rsf, type ="s", col = 4)
legend("bottomright", legend = c("cens.model = km", "cens.model = rfsrc"), fill = c(2,4))

oo <- subsample(model2, verbose = FALSE)
# take a delete-d-jackknife procedure for example
vimpCI <- extract.subsample(oo)$var.jk.sel.Z
vimpCI

##                                   lower        mean     upper      pvalue
## age                        -0.711158366  0.05785715 0.8268727 0.441385040
## admission_type             -0.224768417  1.06258652 2.3499415 0.052857166
## number_of_ICD_codes        -0.018565728  1.16887850 2.3563227 0.026845581
## charlson_comorbidity_index -0.130078532  1.34002666 2.8101319 0.037005809
## OASIS_pre_icu_los          -0.808145741 -0.01798453 0.7721767 0.517790882
## OASIS_mechvent              0.204930269  1.45902566 2.7131210 0.011296746
## OASIS_elective_surgery     -0.072330368  0.15342073 0.3791718 0.091431082
## heart_rate_min             -0.322409468  1.15762704 2.6376636 0.062636983
## heart_rate_max             -0.203138805  1.25396966 2.7110781 0.045828511
## heart_rate_mean            -0.585535221  0.15072359 0.8869824 0.344123554
## sbp_min                    -0.611082482  0.51373079 1.6385441 0.185349358
## sbp_max                    -0.445829818  0.48785888 1.4215476 0.152895227
## sbp_mean                   -0.409776474  0.62829603 1.6663685 0.117757214
## dbp_min                     0.026284747  1.58653414 3.1467835 0.023131680
## dbp_max                    -0.261119224  0.80994327 1.8810058 0.069152135
## dbp_mean                   -0.754606047  0.01556332 0.7857327 0.484203504
## mbp_min                    -0.078057590  1.14348754 2.3650327 0.033273526
## mbp_max                    -0.634169822  0.28643521 1.2070402 0.270990766
## mbp_mean                   -1.727919107 -0.50412035 0.7196784 0.790273166
## resp_rate_min              -0.194157412  0.77508857 1.7443346 0.058516694
## resp_rate_max              -0.404644962  0.38504532 1.1747356 0.169622165
## resp_rate_mean             -0.160092910  1.01048107 2.1810550 0.045331512
## temperature_min             0.531514810  1.83933876 3.1471627 0.002921037
## temperature_max            -0.235103172  0.88748334 2.0100699 0.060632114
## temperature_mean           -0.059310725  1.12498755 2.3092858 0.031315134
## spo2_min                   -1.140857564 -0.18891508 0.7630274 0.651346824
## spo2_max                   -0.266690153  0.55216617 1.3710225 0.093145547
## spo2_mean                  -0.284605865  0.63676575 1.5581374 0.087781522
## glucose_min                -0.686294679  0.23987069 1.1660361 0.305860748
## glucose_max                -0.198841522  1.13401022 2.4668620 0.047700886
## glucose_mean               -0.567342635  0.25685072 1.0810441 0.270665652
## gcs_min                    -0.121697265  0.64723286 1.4161630 0.049495626
## gcs_motor                   0.548969537  2.36330928 4.1776490 0.005340045
## gcs_verbal                  0.031789302  1.07777134 2.1237534 0.021716036
## gcs_eyes                   -0.030248312  1.42256222 2.8753728 0.027482268
## gcs_unable                 -0.167607663  0.88618464 1.9399769 0.049652947
## hematocrit_min_1           -0.249883844  0.80989425 1.8696723 0.067089186
## hematocrit_max_1           -0.378816330  0.74221222 1.8632408 0.097202816
## hemoglobin_min_1           -0.015117252  0.94969492 1.9145071 0.026849662
## hemoglobin_max_1           -0.234033875  0.68650078 1.6070354 0.071915992
## platelets_min              -0.536528336  0.59960964 1.7357476 0.150476212
## platelets_max              -0.285068205  0.67149365 1.6280555 0.084431168
## wbc_min                    -0.444254367  0.25183302 0.9479204 0.239136522
## wbc_max                    -0.622000487  0.17189129 0.9657831 0.335649428
## aniongap_min                0.021470668  1.45633539 2.8912001 0.023334491
## aniongap_max               -0.598614569  0.37626793 1.3511504 0.224683042
## bicarbonate_min_1           0.081499705  1.58371029 3.0859209 0.019400162
## bicarbonate_max_1          -0.006232367  1.61072428 3.2276809 0.025444801
## bun_min                    -0.174734931  0.55286420 1.2804633 0.068207715
## bun_max                    -0.970394065 -0.09383656 0.7827210 0.583094571
## calcium_min_1               0.164646464  1.79022285 3.4157992 0.015445309
## calcium_max_1              -0.585419927  0.30603275 1.1974854 0.250521636
## chloride_min_1             -0.350978488  0.56237396 1.4757264 0.113754805
## chloride_max_1             -0.349372442  0.63325732 1.6158871 0.103276277
## creatinine_min             -0.330205420  0.53591115 1.4020277 0.112616311
## creatinine_max             -0.198661111  2.00819836 4.2150578 0.037250168
## glucose_min_2              -0.276691374  0.77217635 1.8210441 0.074520749
## glucose_max_2              -0.116827107  1.44904426 3.0149156 0.034859323
## sodium_min_1               -0.453688874  0.76637606 1.9864410 0.109135448
## sodium_max_1                0.112407453  1.46742951 2.8224516 0.016895575
## potassium_min_1            -0.339423457  0.66000202 1.6594275 0.097776798
## potassium_max_1            -0.254299358  2.40041510 5.0551296 0.038179364
## inr_min                     0.351621372  2.31244785 4.2732743 0.010404539
## inr_max                     0.217346082  1.52519756 2.8330490 0.011136441
## pt_min                      0.022043915  1.26253253 2.5030211 0.023032718
## pt_max                      0.022753943  1.26004100 2.4973281 0.022966521
## ptt_min                    -0.085483093  1.43081891 2.9471209 0.032195061
## ptt_max                     0.056449812  1.54649490 3.0365400 0.020964886
## coagulation                 0.891196474  3.31215438 5.7331123 0.003665077
## cardiovascular             -0.077783164  1.31671692 2.7112170 0.032110675
## cns                         0.033600089  1.18129283 2.3289856 0.021829798
## renal                      -0.159118190  1.35693739 2.8729930 0.039693393
##                            signif
## age                         FALSE
## admission_type              FALSE
## number_of_ICD_codes         FALSE
## charlson_comorbidity_index  FALSE
## OASIS_pre_icu_los           FALSE
## OASIS_mechvent               TRUE
## OASIS_elective_surgery      FALSE
## heart_rate_min              FALSE
## heart_rate_max              FALSE
## heart_rate_mean             FALSE
## sbp_min                     FALSE
## sbp_max                     FALSE
## sbp_mean                    FALSE
## dbp_min                      TRUE
## dbp_max                     FALSE
## dbp_mean                    FALSE
## mbp_min                     FALSE
## mbp_max                     FALSE
## mbp_mean                    FALSE
## resp_rate_min               FALSE
## resp_rate_max               FALSE
## resp_rate_mean              FALSE
## temperature_min              TRUE
## temperature_max             FALSE
## temperature_mean            FALSE
## spo2_min                    FALSE
## spo2_max                    FALSE
## spo2_mean                   FALSE
## glucose_min                 FALSE
## glucose_max                 FALSE
## glucose_mean                FALSE
## gcs_min                     FALSE
## gcs_motor                    TRUE
## gcs_verbal                   TRUE
## gcs_eyes                    FALSE
## gcs_unable                  FALSE
## hematocrit_min_1            FALSE
## hematocrit_max_1            FALSE
## hemoglobin_min_1            FALSE
## hemoglobin_max_1            FALSE
## platelets_min               FALSE
## platelets_max               FALSE
## wbc_min                     FALSE
## wbc_max                     FALSE
## aniongap_min                 TRUE
## aniongap_max                FALSE
## bicarbonate_min_1            TRUE
## bicarbonate_max_1           FALSE
## bun_min                     FALSE
## bun_max                     FALSE
## calcium_min_1                TRUE
## calcium_max_1               FALSE
## chloride_min_1              FALSE
## chloride_max_1              FALSE
## creatinine_min              FALSE
## creatinine_max              FALSE
## glucose_min_2               FALSE
## glucose_max_2               FALSE
## sodium_min_1                FALSE
## sodium_max_1                 TRUE
## potassium_min_1             FALSE
## potassium_max_1             FALSE
## inr_min                      TRUE
## inr_max                      TRUE
## pt_min                       TRUE
## pt_max                       TRUE
## ptt_min                     FALSE
## ptt_max                      TRUE
## coagulation                  TRUE
## cardiovascular              FALSE
## cns                          TRUE
## renal                       FALSE

significant_features <- vimpCI[vimpCI$signif == TRUE, ]

# Print the significant features
print(significant_features)

##                        lower     mean    upper      pvalue signif
## OASIS_mechvent    0.20493027 1.459026 2.713121 0.011296746   TRUE
## dbp_min           0.02628475 1.586534 3.146784 0.023131680   TRUE
## temperature_min   0.53151481 1.839339 3.147163 0.002921037   TRUE
## gcs_motor         0.54896954 2.363309 4.177649 0.005340045   TRUE
## gcs_verbal        0.03178930 1.077771 2.123753 0.021716036   TRUE
## aniongap_min      0.02147067 1.456335 2.891200 0.023334491   TRUE
## bicarbonate_min_1 0.08149970 1.583710 3.085921 0.019400162   TRUE
## calcium_min_1     0.16464646 1.790223 3.415799 0.015445309   TRUE
## sodium_max_1      0.11240745 1.467430 2.822452 0.016895575   TRUE
## inr_min           0.35162137 2.312448 4.273274 0.010404539   TRUE
## inr_max           0.21734608 1.525198 2.833049 0.011136441   TRUE
## pt_min            0.02204392 1.262533 2.503021 0.023032718   TRUE
## pt_max            0.02275394 1.260041 2.497328 0.022966521   TRUE
## ptt_max           0.05644981 1.546495 3.036540 0.020964886   TRUE
## coagulation       0.89119647 3.312154 5.733112 0.003665077   TRUE
## cns               0.03360009 1.181293 2.328986 0.021829798   TRUE

# Confidence Intervals for VIMP
plot.subsample(oo)

plot.variable(model4, xvar.names = "sbp_min", partial = TRUE)

plot.variable(model4, xvar.names = "temperature_min", partial = TRUE)

plot.variable(model4, xvar.names = "temperature_mean", partial = TRUE)

plot.variable(model4, xvar.names = "gcs_motor", partial = TRUE)

plot.variable(model4, xvar.names = "aniongap_min", partial = TRUE)

plot.variable(model4, xvar.names = "bicarbonate_max_1", partial = TRUE)

plot.variable(model4, xvar.names = "inr_min", partial = TRUE)

plot.variable(model4, xvar.names = "pt_max", partial = TRUE)

plot.variable(model4, xvar.names = "inr_max", partial = TRUE)

5250 random survival forest

2024-05-22