libraryの読み込み

pacman::p_load(reader, stringr, missForest, mice, ggplot2, sandwich, tidyverse, readxl, tableone, lubridate, skimr, summarytools, naniar, norm2, lmtest, car, ROCR, pROC, Hmisc, rms, glmnet, ggpubr, ggcorrplot)

データの読み込み

data<-read_excel("varix_prediction.xlsx")

データの確認

全体の把握

str(data)
## tibble [980 × 62] (S3: tbl_df/tbl/data.frame)
##  $ hosp_id       : num [1:980] 1001 1001 1001 1001 1001 ...
##  $ pt_id         : num [1:980] 1 2 3 4 5 6 7 8 9 10 ...
##  $ hosp_num      : num [1:980] 1 1 1 1 1 1 1 1 1 1 ...
##  $ year          : num [1:980] 2012 2011 2010 2011 2010 ...
##  $ age           : num [1:980] 50 80 59 44 67 47 65 49 73 69 ...
##  $ sex           : chr [1:980] "M" "M" "F" "M" ...
##  $ bmi           : num [1:980] NA 25.3 NA 14.5 NA ...
##  $ smoke         : num [1:980] 0 0 0 240 0 270 0 NA 1000 0 ...
##  $ barthel       : num [1:980] NA 2 0 1 NA NA NA NA 0 NA ...
##  $ child_num     : num [1:980] 11 6 NA 8 NA 11 NA NA 9 15 ...
##  $ child_score   : num [1:980] 2 0 NA 1 NA 2 NA NA 1 2 ...
##  $ gcs           : num [1:980] 15 15 15 15 15 15 15 15 15 6 ...
##  $ cci_num       : num [1:980] 4 4 3 4 4 4 3 4 4 4 ...
##  $ pad           : num [1:980] 0 0 0 0 0 0 0 0 0 0 ...
##  $ stroke        : num [1:980] 0 0 0 0 0 0 0 0 0 0 ...
##  $ dimentia      : num [1:980] 0 0 0 0 0 0 0 0 0 0 ...
##  $ ch_lung       : num [1:980] 0 0 0 0 0 0 0 0 0 0 ...
##  $ rheumati      : num [1:980] 0 0 0 0 0 0 0 0 0 0 ...
##  $ pept_ulcer    : num [1:980] 0 0 0 0 0 0 0 0 0 0 ...
##  $ dm            : num [1:980] 0 1 0 0 0 0 0 0 0 0 ...
##  $ dm_compli     : num [1:980] 0 0 0 0 0 0 0 0 0 0 ...
##  $ paralysis     : num [1:980] 0 0 0 0 0 0 0 0 0 0 ...
##  $ malignancy    : num [1:980] 0 0 0 0 0 0 0 0 0 0 ...
##  $ meta_tumor    : num [1:980] 0 0 0 0 0 0 0 0 0 0 ...
##  $ aids          : num [1:980] 0 0 0 0 0 0 0 0 0 0 ...
##  $ eGFR30        : num [1:980] 0 0 0 0 0 0 0 0 0 1 ...
##  $ hd            : num [1:980] 0 0 0 0 0 0 0 0 0 0 ...
##  $ hcc           : num [1:980] 0 0 0 0 0 0 0 0 0 0 ...
##  $ alcohol       : num [1:980] 1 0 0 0 0 1 0 1 1 0 ...
##  $ past_rupture  : num [1:980] 0 0 0 0 0 0 0 0 0 0 ...
##  $ antiplate     : num [1:980] 0 0 0 0 0 0 0 0 0 0 ...
##  $ anticoag      : num [1:980] 0 0 0 0 0 0 0 0 0 0 ...
##  $ antithro      : num [1:980] 0 0 0 0 0 0 0 0 0 0 ...
##  $ nsaids        : num [1:980] 0 0 0 0 0 0 0 0 0 0 ...
##  $ steroid       : num [1:980] 0 0 0 0 0 0 0 0 0 0 ...
##  $ beta          : num [1:980] 0 0 0 0 0 0 0 0 0 0 ...
##  $ vaso          : num [1:980] 0 0 0 0 0 0 0 0 0 1 ...
##  $ map           : num [1:980] 0 2 0 6 2 0 2 0 4 14 ...
##  $ ffp           : num [1:980] 0 1 0 0 1 0 1 0 1 1 ...
##  $ pc            : num [1:980] 0 0 0 0 0 0 0 0 0 0 ...
##  $ albner        : num [1:980] 0 0 0 0 0 0 0 0 0 1 ...
##  $ bt            : num [1:980] 36.4 36.8 35.9 36 36.6 36.4 38.4 37 37 35.5 ...
##  $ sBP           : num [1:980] 78 88 100 69 66 102 84 132 90 52 ...
##  $ dBP           : num [1:980] 48 49 56 44 40 67 46 69 54 37 ...
##  $ hr            : num [1:980] 118 72 110 104 72 83 127 83 114 98 ...
##  $ shock         : num [1:980] 1 0 1 1 1 0 1 0 1 1 ...
##  $ bil           : num [1:980] 2.2 1.2 3.1 3.4 1.2 7.5 2.4 1.2 2.2 8.7 ...
##  $ ast           : num [1:980] 217 31 60 129 52 112 90 154 55 96 ...
##  $ alt           : num [1:980] 63 22 40 46 36 53 19 109 20 87 ...
##  $ wbc           : num [1:980] 7400 5000 7800 9100 3900 9800 8000 7900 11100 12800 ...
##  $ hb            : num [1:980] 6.9 10.8 9.7 10.7 6.3 12.2 9.8 13.5 5.6 6 ...
##  $ plt           : num [1:980] 115 77 74 162 63 69 93 132 124 168 ...
##  $ tp            : num [1:980] 6.3 5.6 6.4 6.1 5.1 5.7 7.2 7.6 4.9 5.3 ...
##  $ alb           : num [1:980] 2.2 3.2 2.8 2.9 2.8 2.6 3.2 4 2.3 1.2 ...
##  $ eGFR          : num [1:980] 58 57.7 63.7 112.4 123.6 ...
##  $ bun           : num [1:980] 13.2 41.5 26.3 2.9 27.4 13.7 15.8 15.5 27 63.2 ...
##  $ cre           : num [1:980] 1.08 0.96 0.72 0.61 0.38 0.74 0.44 0.4 0.97 2.07 ...
##  $ crp           : num [1:980] 0.92 0.29 0.68 0.29 0.29 0.1 0.96 0.29 NA 2.75 ...
##  $ pt            : num [1:980] 37.8 55 46.7 37.8 74.6 28.5 45.9 49.7 45.9 37.2 ...
##  $ aptt          : num [1:980] 29.4 29 27.2 35.3 30.1 37.8 27.6 32.6 27.5 36 ...
##  $ hosp_mortality: num [1:980] 0 0 0 0 0 0 0 0 0 1 ...
##  $ los           : num [1:980] 12 7 0 10 3 6 2 1 8 0 ...

列名の抽出

colnames(data)
##  [1] "hosp_id"        "pt_id"          "hosp_num"       "year"          
##  [5] "age"            "sex"            "bmi"            "smoke"         
##  [9] "barthel"        "child_num"      "child_score"    "gcs"           
## [13] "cci_num"        "pad"            "stroke"         "dimentia"      
## [17] "ch_lung"        "rheumati"       "pept_ulcer"     "dm"            
## [21] "dm_compli"      "paralysis"      "malignancy"     "meta_tumor"    
## [25] "aids"           "eGFR30"         "hd"             "hcc"           
## [29] "alcohol"        "past_rupture"   "antiplate"      "anticoag"      
## [33] "antithro"       "nsaids"         "steroid"        "beta"          
## [37] "vaso"           "map"            "ffp"            "pc"            
## [41] "albner"         "bt"             "sBP"            "dBP"           
## [45] "hr"             "shock"          "bil"            "ast"           
## [49] "alt"            "wbc"            "hb"             "plt"           
## [53] "tp"             "alb"            "eGFR"           "bun"           
## [57] "cre"            "crp"            "pt"             "aptt"          
## [61] "hosp_mortality" "los"

classの整理

Barthelは0自立、2寝たきり、1それ以外 にカテゴライズ

Child scoreは0:A,1:B,2:C

df <-
  data|>  #miceパッケージが順序ありの因子型でないと読めないためこれを指定。
  mutate(
        hosp_id=as.integer(hosp_id),
        pt_id=as.integer(pt_id),
        hosp_num=as.integer(hosp_num),
        year=as.integer(year),
        age=as.integer(age),
        sex= factor(sex, levels = c("M", "F")),
        smoke= as.integer(smoke),
        barthel= factor(barthel, levels = c("0", "1", "2")),
        child_num= as.integer(child_num),
        child_score=factor(child_score, levels = c("0", "1", "2")),
        gcs=as.integer(gcs),
        cci_num=as.integer(cci_num),
        pad=factor(pad),
        stroke=factor(stroke),
        dimentia=factor(dimentia),
        ch_lung=factor(ch_lung),
        rheumati=factor(rheumati),
        pept_ulcer=factor(pept_ulcer),
        dm=factor(dm),
        dm_compli=factor(dm_compli),
        paralysis=factor(paralysis),
        malignancy=factor(malignancy),
        meta_tumor=factor(meta_tumor),
        aids=factor(aids),
        eGFR30=factor(eGFR30),
        hd=factor(hd),
        hcc=factor(hcc),
        alcohol=factor(alcohol),
        past_rupture=factor(past_rupture),
        antiplate=factor(antiplate),
        anticoag=factor(anticoag),
        antithro=factor(antithro),
        nsaids=factor(nsaids),
        steroid=factor(steroid),
        beta=factor(beta),
        vaso=factor(vaso),
        map= as.integer(map),
        ffp=factor(ffp),
        pc=factor(pc),
        albner=factor(albner),
        sBP= as.integer(sBP),
        dBP= as.integer(dBP),
        hr=as.integer(hr),
        shock=factor(shock),
        los=as.integer(los)
         )

整理後の確認

str(df)
## tibble [980 × 62] (S3: tbl_df/tbl/data.frame)
##  $ hosp_id       : int [1:980] 1001 1001 1001 1001 1001 1001 1001 1001 1001 1001 ...
##  $ pt_id         : int [1:980] 1 2 3 4 5 6 7 8 9 10 ...
##  $ hosp_num      : int [1:980] 1 1 1 1 1 1 1 1 1 1 ...
##  $ year          : int [1:980] 2012 2011 2010 2011 2010 2017 2010 2010 2010 2011 ...
##  $ age           : int [1:980] 50 80 59 44 67 47 65 49 73 69 ...
##  $ sex           : Factor w/ 2 levels "M","F": 1 1 2 1 2 1 1 2 1 2 ...
##  $ bmi           : num [1:980] NA 25.3 NA 14.5 NA ...
##  $ smoke         : int [1:980] 0 0 0 240 0 270 0 NA 1000 0 ...
##  $ barthel       : Factor w/ 3 levels "0","1","2": NA 3 1 2 NA NA NA NA 1 NA ...
##  $ child_num     : int [1:980] 11 6 NA 8 NA 11 NA NA 9 15 ...
##  $ child_score   : Factor w/ 3 levels "0","1","2": 3 1 NA 2 NA 3 NA NA 2 3 ...
##  $ gcs           : int [1:980] 15 15 15 15 15 15 15 15 15 6 ...
##  $ cci_num       : int [1:980] 4 4 3 4 4 4 3 4 4 4 ...
##  $ pad           : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ stroke        : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ dimentia      : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ ch_lung       : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ rheumati      : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ pept_ulcer    : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ dm            : Factor w/ 2 levels "0","1": 1 2 1 1 1 1 1 1 1 1 ...
##  $ dm_compli     : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ paralysis     : Factor w/ 1 level "0": 1 1 1 1 1 1 1 1 1 1 ...
##  $ malignancy    : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ meta_tumor    : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ aids          : Factor w/ 1 level "0": 1 1 1 1 1 1 1 1 1 1 ...
##  $ eGFR30        : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 2 ...
##  $ hd            : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ hcc           : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ alcohol       : Factor w/ 2 levels "0","1": 2 1 1 1 1 2 1 2 2 1 ...
##  $ past_rupture  : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ antiplate     : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ anticoag      : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ antithro      : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ nsaids        : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ steroid       : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ beta          : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ vaso          : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 2 ...
##  $ map           : int [1:980] 0 2 0 6 2 0 2 0 4 14 ...
##  $ ffp           : Factor w/ 2 levels "0","1": 1 2 1 1 2 1 2 1 2 2 ...
##  $ pc            : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ albner        : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 2 ...
##  $ bt            : num [1:980] 36.4 36.8 35.9 36 36.6 36.4 38.4 37 37 35.5 ...
##  $ sBP           : int [1:980] 78 88 100 69 66 102 84 132 90 52 ...
##  $ dBP           : int [1:980] 48 49 56 44 40 67 46 69 54 37 ...
##  $ hr            : int [1:980] 118 72 110 104 72 83 127 83 114 98 ...
##  $ shock         : Factor w/ 2 levels "0","1": 2 1 2 2 2 1 2 1 2 2 ...
##  $ bil           : num [1:980] 2.2 1.2 3.1 3.4 1.2 7.5 2.4 1.2 2.2 8.7 ...
##  $ ast           : num [1:980] 217 31 60 129 52 112 90 154 55 96 ...
##  $ alt           : num [1:980] 63 22 40 46 36 53 19 109 20 87 ...
##  $ wbc           : num [1:980] 7400 5000 7800 9100 3900 9800 8000 7900 11100 12800 ...
##  $ hb            : num [1:980] 6.9 10.8 9.7 10.7 6.3 12.2 9.8 13.5 5.6 6 ...
##  $ plt           : num [1:980] 115 77 74 162 63 69 93 132 124 168 ...
##  $ tp            : num [1:980] 6.3 5.6 6.4 6.1 5.1 5.7 7.2 7.6 4.9 5.3 ...
##  $ alb           : num [1:980] 2.2 3.2 2.8 2.9 2.8 2.6 3.2 4 2.3 1.2 ...
##  $ eGFR          : num [1:980] 58 57.7 63.7 112.4 123.6 ...
##  $ bun           : num [1:980] 13.2 41.5 26.3 2.9 27.4 13.7 15.8 15.5 27 63.2 ...
##  $ cre           : num [1:980] 1.08 0.96 0.72 0.61 0.38 0.74 0.44 0.4 0.97 2.07 ...
##  $ crp           : num [1:980] 0.92 0.29 0.68 0.29 0.29 0.1 0.96 0.29 NA 2.75 ...
##  $ pt            : num [1:980] 37.8 55 46.7 37.8 74.6 28.5 45.9 49.7 45.9 37.2 ...
##  $ aptt          : num [1:980] 29.4 29 27.2 35.3 30.1 37.8 27.6 32.6 27.5 36 ...
##  $ hosp_mortality: num [1:980] 0 0 0 0 0 0 0 0 0 1 ...
##  $ los           : int [1:980] 12 7 0 10 3 6 2 1 8 0 ...

tableone作成

col_continuous = c("age", "bmi","smoke","child_num","gcs","cci_num","map","bt","sBP","dBP","hr","bil","ast","alt","wbc","hb","plt","tp","alb","eGFR","bun","cre","crp","pt","aptt","los")
col_factors = c("sex","barthel","child_score","pad","stroke","dimentia","ch_lung","rheumati","pept_ulcer","dm","dm_compli","paralysis","malignancy","meta_tumor","aids","eGFR30","hd","hcc","alcohol","past_rupture","antiplate","anticoag","antithro","nsaids","steroid","beta", "vaso","ffp","pc", "albner","shock","hosp_mortality")

# Create your table
df %>%  
  select(c(col_continuous, col_factors)) %>% 
  CreateTableOne(vars = c(col_continuous, col_factors), strata="hosp_mortality",factorVars = col_factors, addOverall = T) -> tableone
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
##   # Was:
##   data %>% select(col_continuous)
## 
##   # Now:
##   data %>% select(all_of(col_continuous))
## 
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
##   # Was:
##   data %>% select(col_factors)
## 
##   # Now:
##   data %>% select(all_of(col_factors))
## 
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# Print your table
print(tableone, smd = TRUE, missing = TRUE, test = TRUE, explain = TRUE) 
##                         Stratified by hosp_mortality
##                          Overall           0                 1                 
##   n                          980               862                118          
##   age (mean (SD))          60.86 (13.01)     60.52 (13.02)      63.37 (12.71)  
##   bmi (mean (SD))          23.61 (11.49)     23.73 (12.06)      22.62 (3.75)   
##   smoke (mean (SD))       265.31 (483.85)   259.50 (381.11)    311.74 (975.09) 
##   child_num (mean (SD))     8.59 (2.12)       8.31 (1.95)       10.74 (2.14)   
##   gcs (mean (SD))          14.46 (1.87)      14.69 (1.26)       12.74 (3.75)   
##   cci_num (mean (SD))       4.47 (1.32)       4.44 (1.22)        4.68 (1.91)   
##   map (mean (SD))           3.37 (3.96)       3.00 (3.70)        6.03 (4.73)   
##   bt (mean (SD))           36.73 (0.72)      36.74 (0.61)       36.62 (1.27)   
##   sBP (mean (SD))          88.55 (16.32)     91.20 (14.67)      69.19 (14.68)  
##   dBP (mean (SD))          52.78 (12.72)     54.48 (12.03)      40.29 (10.54)  
##   hr (mean (SD))           86.90 (21.06)     84.87 (19.78)     101.98 (24.05)  
##   bil (mean (SD))           2.30 (2.19)       2.07 (1.83)        4.02 (3.49)   
##   ast (mean (SD))          83.87 (100.75)    76.25 (84.23)     138.98 (170.37) 
##   alt (mean (SD))          41.79 (46.84)     39.22 (41.36)      60.38 (73.04)  
##   wbc (mean (SD))        8676.10 (4747.62) 8492.40 (4679.66)  9999.74 (5037.08)
##   hb (mean (SD))            8.72 (2.49)       8.80 (2.51)        8.14 (2.22)   
##   plt (mean (SD))         113.92 (66.44)    113.46 (65.16)     117.21 (75.29)  
##   tp (mean (SD))            6.11 (0.92)       6.17 (0.88)        5.71 (1.14)   
##   alb (mean (SD))           2.85 (0.61)       2.93 (0.58)        2.30 (0.59)   
##   eGFR (mean (SD))         69.80 (31.04)     73.20 (30.71)      45.36 (20.79)  
##   bun (mean (SD))          28.52 (17.92)     27.67 (17.16)      34.68 (21.79)  
##   cre (mean (SD))           1.06 (0.90)       1.00 (0.90)        1.49 (0.84)   
##   crp (mean (SD))           0.84 (1.76)       0.72 (1.61)        1.68 (2.42)   
##   pt (mean (SD))           54.98 (17.67)     56.80 (16.85)      42.18 (18.11)  
##   aptt (mean (SD))         34.08 (17.48)     32.40 (12.89)      45.63 (33.51)  
##   los (mean (SD))          12.37 (14.69)     12.60 (14.15)      10.69 (18.12)  
##   sex = F (%)                246 ( 25.1)       214 ( 24.8)         32 ( 27.1)  
##   barthel (%)                                                                  
##      0                       313 ( 38.4)       303 ( 42.1)         10 ( 10.3)  
##      1                       249 ( 30.5)       228 ( 31.7)         21 ( 21.6)  
##      2                       254 ( 31.1)       188 ( 26.1)         66 ( 68.0)  
##   child_score (%)                                                              
##      0                       149 ( 16.9)       146 ( 18.9)          3 (  2.7)  
##      1                       437 ( 49.5)       410 ( 53.0)         27 ( 24.5)  
##      2                       297 ( 33.6)       217 ( 28.1)         80 ( 72.7)  
##   pad = 1 (%)                  2 (  0.2)         2 (  0.2)          0 (  0.0)  
##   stroke = 1 (%)              21 (  2.1)        18 (  2.1)          3 (  2.5)  
##   dimentia = 1 (%)            10 (  1.0)         9 (  1.0)          1 (  0.8)  
##   ch_lung = 1 (%)             15 (  1.5)        12 (  1.4)          3 (  2.5)  
##   rheumati = 1 (%)             3 (  0.3)         3 (  0.3)          0 (  0.0)  
##   pept_ulcer = 1 (%)         101 ( 10.3)        98 ( 11.4)          3 (  2.5)  
##   dm = 1 (%)                 210 ( 21.4)       194 ( 22.5)         16 ( 13.6)  
##   dm_compli = 1 (%)           13 (  1.3)        12 (  1.4)          1 (  0.8)  
##   paralysis = 0 (%)          980 (100.0)       862 (100.0)        118 (100.0)  
##   malignancy = 1 (%)         115 ( 11.7)        96 ( 11.1)         19 ( 16.1)  
##   meta_tumor = 1 (%)          17 (  1.7)         9 (  1.0)          8 (  6.8)  
##   aids = 0 (%)               980 (100.0)       862 (100.0)        118 (100.0)  
##   eGFR30 = 1 (%)              87 (  9.1)        56 (  6.7)         31 ( 26.5)  
##   hd = 1 (%)                  13 (  1.3)        10 (  1.2)          3 (  2.5)  
##   hcc = 1 (%)                175 ( 17.9)       151 ( 17.5)         24 ( 20.3)  
##   alcohol = 1 (%)            472 ( 48.2)       417 ( 48.4)         55 ( 46.6)  
##   past_rupture = 1 (%)       225 ( 23.0)       211 ( 24.5)         14 ( 11.9)  
##   antiplate = 1 (%)            7 (  0.7)         5 (  0.6)          2 (  1.7)  
##   anticoag = 1 (%)             4 (  0.4)         4 (  0.5)          0 (  0.0)  
##   antithro = 1 (%)            10 (  1.0)         8 (  0.9)          2 (  1.7)  
##   nsaids = 1 (%)               9 (  0.9)         9 (  1.0)          0 (  0.0)  
##   steroid = 1 (%)              5 (  0.5)         4 (  0.5)          1 (  0.8)  
##   beta = 1 (%)                58 (  5.9)        58 (  6.7)          0 (  0.0)  
##   vaso = 1 (%)                58 (  5.9)        29 (  3.4)         29 ( 24.6)  
##   ffp = 1 (%)                291 ( 29.7)       226 ( 26.2)         65 ( 55.1)  
##   pc = 1 (%)                  17 (  1.7)         9 (  1.0)          8 (  6.8)  
##   albner = 1 (%)              80 (  8.2)        58 (  6.7)         22 ( 18.6)  
##   shock = 1 (%)              409 ( 43.0)       308 ( 36.7)        101 ( 89.4)  
##   hosp_mortality = 1 (%)     118 ( 12.0)         0 (  0.0)        118 (100.0)  
##                         Stratified by hosp_mortality
##                          p      test SMD    Missing
##   n                                                
##   age (mean (SD))         0.025       0.222  0.0   
##   bmi (mean (SD))         0.390       0.124 11.7   
##   smoke (mean (SD))       0.321       0.071 12.9   
##   child_num (mean (SD))  <0.001       1.190 13.2   
##   gcs (mean (SD))        <0.001       0.699  0.0   
##   cci_num (mean (SD))     0.070       0.147  0.0   
##   map (mean (SD))        <0.001       0.712  0.0   
##   bt (mean (SD))          0.112       0.118  3.8   
##   sBP (mean (SD))        <0.001       1.500  2.3   
##   dBP (mean (SD))        <0.001       1.256  2.3   
##   hr (mean (SD))         <0.001       0.777  2.9   
##   bil (mean (SD))        <0.001       0.700  3.8   
##   ast (mean (SD))        <0.001       0.467  2.6   
##   alt (mean (SD))        <0.001       0.356  2.6   
##   wbc (mean (SD))         0.001       0.310  2.0   
##   hb (mean (SD))          0.007       0.279  2.0   
##   plt (mean (SD))         0.568       0.053  2.0   
##   tp (mean (SD))         <0.001       0.454  7.9   
##   alb (mean (SD))        <0.001       1.086  4.7   
##   eGFR (mean (SD))       <0.001       1.062  2.1   
##   bun (mean (SD))        <0.001       0.358  2.1   
##   cre (mean (SD))        <0.001       0.570  2.9   
##   crp (mean (SD))        <0.001       0.464  5.3   
##   pt (mean (SD))         <0.001       0.836  6.0   
##   aptt (mean (SD))       <0.001       0.521 11.4   
##   los (mean (SD))         0.188       0.117  0.0   
##   sex = F (%)             0.670       0.052  0.0   
##   barthel (%)            <0.001       0.998 16.7   
##      0                                             
##      1                                             
##      2                                             
##   child_score (%)        <0.001       1.036  9.9   
##      0                                             
##      1                                             
##      2                                             
##   pad = 1 (%)             1.000       0.068  0.0   
##   stroke = 1 (%)          1.000       0.030  0.0   
##   dimentia = 1 (%)        1.000       0.020  0.0   
##   ch_lung = 1 (%)         0.579       0.083  0.0   
##   rheumati = 1 (%)        1.000       0.084  0.0   
##   pept_ulcer = 1 (%)      0.005       0.352  0.0   
##   dm = 1 (%)              0.036       0.234  0.0   
##   dm_compli = 1 (%)       0.955       0.052  0.0   
##   paralysis = 0 (%)          NA      <0.001  0.0   
##   malignancy = 1 (%)      0.156       0.145  0.0   
##   meta_tumor = 1 (%)     <0.001       0.299  0.0   
##   aids = 0 (%)               NA      <0.001  0.0   
##   eGFR30 = 1 (%)         <0.001       0.554  2.1   
##   hd = 1 (%)              0.423       0.103  0.0   
##   hcc = 1 (%)             0.534       0.072  0.0   
##   alcohol = 1 (%)         0.793       0.035  0.0   
##   past_rupture = 1 (%)    0.003       0.332  0.0   
##   antiplate = 1 (%)       0.444       0.105  0.0   
##   anticoag = 1 (%)        1.000       0.097  0.0   
##   antithro = 1 (%)        0.773       0.067  0.0   
##   nsaids = 1 (%)          0.548       0.145  0.0   
##   steroid = 1 (%)         1.000       0.048  0.0   
##   beta = 1 (%)            0.007       0.380  0.0   
##   vaso = 1 (%)           <0.001       0.643  0.0   
##   ffp = 1 (%)            <0.001       0.615  0.0   
##   pc = 1 (%)             <0.001       0.299  0.0   
##   albner = 1 (%)         <0.001       0.364  0.0   
##   shock = 1 (%)          <0.001       1.302  2.9   
##   hosp_mortality = 1 (%) <0.001         NaN  0.0

連続量の可視化

df |>  #全体
  select(col_continuous) |> 
  pivot_longer(cols = col_continuous, names_to = "name", values_to = "value") |> 
  ggplot()+
  geom_histogram(aes(x = value), color = "black")+
  facet_wrap(~ name, scales = "free", ncol = 5) +
  theme_bw()+
  theme(text = element_text(size = 12))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 1044 rows containing non-finite values (`stat_bin()`).

連続変数の相関確認

数値で

# 指定した変数だけで相関行列を作成します。
corresult <- df %>% 
  dplyr::select(all_of(col_continuous)) %>% 
  drop_na() %>% 
  cor(method = "pearson")

# 相関行列の数値を表示します。
print(corresult)
##                    age           bmi         smoke   child_num          gcs
## age        1.000000000 -0.0836856830 -0.1030432494 -0.11532157 -0.029765488
## bmi       -0.083685683  1.0000000000  0.0300070548 -0.06205996 -0.003933984
## smoke     -0.103043249  0.0300070548  1.0000000000  0.05570718  0.014443388
## child_num -0.115321574 -0.0620599629  0.0557071836  1.00000000 -0.398642612
## gcs       -0.029765488 -0.0039339840  0.0144433884 -0.39864261  1.000000000
## cci_num    0.189054010  0.0151040658  0.0007541775 -0.02107435  0.020195287
## map       -0.034080140 -0.1002045503  0.0167592044  0.31022161 -0.238511159
## bt        -0.159171182  0.0400759875 -0.0314451418  0.08796750  0.035710109
## sBP       -0.003553962  0.0994623895 -0.0249117512 -0.31560557  0.231281712
## dBP       -0.035371276  0.0769375514  0.0340903324 -0.27357860  0.226396654
## hr        -0.170417688  0.0005776778  0.0424078291  0.27096039 -0.052205044
## bil       -0.275277489 -0.0503555087  0.0951954651  0.62091885 -0.230616604
## ast       -0.239942463 -0.1226764279 -0.0144673264  0.21510256 -0.056066152
## alt       -0.073180367 -0.0224991340 -0.0343036850  0.04977977 -0.003007327
## wbc       -0.072822566  0.0155731107  0.0459858923  0.14416954 -0.087888163
## hb        -0.037084320  0.1561822336  0.0538305838 -0.23955322  0.059902667
## plt        0.098002934 -0.0030810777 -0.0693506283 -0.13074305 -0.002834339
## tp        -0.087020334  0.0526073942 -0.0248881145 -0.21198841  0.047297528
## alb       -0.132938753  0.0545768985  0.0079741569 -0.63647109  0.243137323
## eGFR      -0.412776549  0.0215964915  0.0095727137 -0.13198414  0.197706550
## bun        0.261555897 -0.0390854964 -0.0110387883 -0.04257567 -0.089929556
## cre        0.138821544 -0.0531175288  0.0122796667  0.09087990 -0.167038052
## crp        0.044771776 -0.0459572450 -0.0118945421  0.20708388 -0.037334570
## pt         0.181701617  0.0477928040 -0.0773375131 -0.70284859  0.194416323
## aptt       0.038816549 -0.0619956642 -0.0049792656  0.28669126 -0.112356676
## los        0.114263795 -0.1111178379 -0.0450300079  0.13624296 -0.040538560
##                 cci_num          map            bt          sBP          dBP
## age        0.1890540105 -0.034080140 -0.1591711824 -0.003553962 -0.035371276
## bmi        0.0151040658 -0.100204550  0.0400759875  0.099462389  0.076937551
## smoke      0.0007541775  0.016759204 -0.0314451418 -0.024911751  0.034090332
## child_num -0.0210743540  0.310221611  0.0879674980 -0.315605565 -0.273578595
## gcs        0.0201952874 -0.238511159  0.0357101093  0.231281712  0.226396654
## cci_num    1.0000000000 -0.013084758 -0.0370318828 -0.044976370 -0.083537092
## map       -0.0130847578  1.000000000  0.0057615199 -0.310568280 -0.295058357
## bt        -0.0370318828  0.005761520  1.0000000000  0.113951446  0.092868079
## sBP       -0.0449763703 -0.310568280  0.1139514459  1.000000000  0.676892618
## dBP       -0.0835370916 -0.295058357  0.0928680794  0.676892618  1.000000000
## hr        -0.0503131628  0.177325517  0.2737750179 -0.121261864 -0.032104375
## bil       -0.1525660253  0.117874076  0.1196263065 -0.125416270 -0.137379960
## ast       -0.0850293362  0.031412518  0.0857017384 -0.020936377  0.003905327
## alt        0.0724000799 -0.063328574 -0.0134775722 -0.009074588 -0.010225221
## wbc       -0.0170060378  0.136092233  0.0814465841 -0.187422764 -0.121956606
## hb        -0.0308465564 -0.457998042 -0.0002519479  0.234358309  0.260565799
## plt        0.0687420490 -0.046073792 -0.0859568732 -0.058442402 -0.014481801
## tp        -0.0309910195 -0.290917505 -0.0045907049  0.255393037  0.189640521
## alb       -0.0913911585 -0.339151516 -0.0303835483  0.329406942  0.290084418
## eGFR      -0.1316832935 -0.231260712  0.0619466271  0.220847282  0.222863015
## bun        0.1179889869  0.077631937 -0.0542283504 -0.134737843 -0.113796341
## cre        0.1643842038  0.135471299 -0.0241751059 -0.064381658 -0.100591803
## crp        0.1332803653  0.008689486  0.0023633207 -0.101310832 -0.061777674
## pt        -0.0028383704 -0.343047953 -0.0864311845  0.237336843  0.241271644
## aptt      -0.0342688162  0.199282167  0.0773348901 -0.181047012 -0.184841742
## los       -0.0076579357  0.021718031  0.0148842687 -0.160695920 -0.104468910
##                      hr          bil          ast          alt         wbc
## age       -0.1704176880 -0.275277489 -0.239942463 -0.073180367 -0.07282257
## bmi        0.0005776778 -0.050355509 -0.122676428 -0.022499134  0.01557311
## smoke      0.0424078291  0.095195465 -0.014467326 -0.034303685  0.04598589
## child_num  0.2709603884  0.620918853  0.215102562  0.049779766  0.14416954
## gcs       -0.0522050438 -0.230616604 -0.056066152 -0.003007327 -0.08788816
## cci_num   -0.0503131628 -0.152566025 -0.085029336  0.072400080 -0.01700604
## map        0.1773255167  0.117874076  0.031412518 -0.063328574  0.13609223
## bt         0.2737750179  0.119626307  0.085701738 -0.013477572  0.08144658
## sBP       -0.1212618644 -0.125416270 -0.020936377 -0.009074588 -0.18742276
## dBP       -0.0321043746 -0.137379960  0.003905327 -0.010225221 -0.12195661
## hr         1.0000000000  0.283697599  0.140895234  0.026762968  0.17602935
## bil        0.2836975992  1.000000000  0.302422661  0.090551980  0.13150758
## ast        0.1408952345  0.302422661  1.000000000  0.720155422  0.10128185
## alt        0.0267629682  0.090551980  0.720155422  1.000000000  0.04253651
## wbc        0.1760293491  0.131507582  0.101281849  0.042536508  1.00000000
## hb        -0.0806652198  0.050166038 -0.009587753  0.067143363 -0.06623161
## plt       -0.0766741681 -0.195063935 -0.073463078  0.004121659  0.49538955
## tp        -0.0874577000  0.034113681  0.073620250  0.046276833 -0.11628316
## alb       -0.1852456995 -0.191177646 -0.034876750  0.033688307 -0.06014093
## eGFR      -0.0241579739 -0.001960715  0.059988401  0.028581768 -0.17260179
## bun        0.0300234288 -0.059887023 -0.072740272  0.056148602  0.26431605
## cre        0.0610362422  0.079345427  0.001922576 -0.016173234  0.09887551
## crp        0.0782172666  0.177763017  0.132902240  0.128460605  0.17422558
## pt        -0.2339026905 -0.497721734 -0.208271322 -0.062425736 -0.19145959
## aptt       0.0965794827  0.214250749  0.013094804 -0.031131796  0.07282673
## los       -0.1332941242  0.032263183  0.077848295  0.023696953  0.01117657
##                      hb          plt           tp          alb         eGFR
## age       -0.0370843197  0.098002934 -0.087020334 -0.132938753 -0.412776549
## bmi        0.1561822336 -0.003081078  0.052607394  0.054576898  0.021596491
## smoke      0.0538305838 -0.069350628 -0.024888115  0.007974157  0.009572714
## child_num -0.2395532154 -0.130743052 -0.211988410 -0.636471086 -0.131984141
## gcs        0.0599026668 -0.002834339  0.047297528  0.243137323  0.197706550
## cci_num   -0.0308465564  0.068742049 -0.030991019 -0.091391158 -0.131683293
## map       -0.4579980423 -0.046073792 -0.290917505 -0.339151516 -0.231260712
## bt        -0.0002519479 -0.085956873 -0.004590705 -0.030383548  0.061946627
## sBP        0.2343583090 -0.058442402  0.255393037  0.329406942  0.220847282
## dBP        0.2605657986 -0.014481801  0.189640521  0.290084418  0.222863015
## hr        -0.0806652198 -0.076674168 -0.087457700 -0.185245699 -0.024157974
## bil        0.0501660383 -0.195063935  0.034113681 -0.191177646 -0.001960715
## ast       -0.0095877532 -0.073463078  0.073620250 -0.034876750  0.059988401
## alt        0.0671433628  0.004121659  0.046276833  0.033688307  0.028581768
## wbc       -0.0662316113  0.495389551 -0.116283164 -0.060140931 -0.172601795
## hb         1.0000000000 -0.031249885  0.410814845  0.455021639  0.267550502
## plt       -0.0312498847  1.000000000  0.011564275  0.063691267 -0.121022879
## tp         0.4108148445  0.011564275  1.000000000  0.469938593  0.193389634
## alb        0.4550216395  0.063691267  0.469938593  1.000000000  0.255246914
## eGFR       0.2675505020 -0.121022879  0.193389634  0.255246914  1.000000000
## bun       -0.2426806993  0.171230560 -0.161836978 -0.042886158 -0.488892798
## cre       -0.1669469476  0.027234662 -0.094345303 -0.153877541 -0.638631152
## crp       -0.0705158860  0.123529251 -0.051463581 -0.251078305 -0.115854620
## pt         0.2817458923  0.162259905  0.209186089  0.470931996  0.085698826
## aptt      -0.1075630819 -0.040467717 -0.255855561 -0.374059567 -0.130872759
## los       -0.0717282157  0.007980212 -0.011777560 -0.137322777 -0.055197075
##                   bun          cre          crp          pt         aptt
## age        0.26155590  0.138821544  0.044771776  0.18170162  0.038816549
## bmi       -0.03908550 -0.053117529 -0.045957245  0.04779280 -0.061995664
## smoke     -0.01103879  0.012279667 -0.011894542 -0.07733751 -0.004979266
## child_num -0.04257567  0.090879898  0.207083875 -0.70284859  0.286691263
## gcs       -0.08992956 -0.167038052 -0.037334570  0.19441632 -0.112356676
## cci_num    0.11798899  0.164384204  0.133280365 -0.00283837 -0.034268816
## map        0.07763194  0.135471299  0.008689486 -0.34304795  0.199282167
## bt        -0.05422835 -0.024175106  0.002363321 -0.08643118  0.077334890
## sBP       -0.13473784 -0.064381658 -0.101310832  0.23733684 -0.181047012
## dBP       -0.11379634 -0.100591803 -0.061777674  0.24127164 -0.184841742
## hr         0.03002343  0.061036242  0.078217267 -0.23390269  0.096579483
## bil       -0.05988702  0.079345427  0.177763017 -0.49772173  0.214250749
## ast       -0.07274027  0.001922576  0.132902240 -0.20827132  0.013094804
## alt        0.05614860 -0.016173234  0.128460605 -0.06242574 -0.031131796
## wbc        0.26431605  0.098875505  0.174225578 -0.19145959  0.072826728
## hb        -0.24268070 -0.166946948 -0.070515886  0.28174589 -0.107563082
## plt        0.17123056  0.027234662  0.123529251  0.16225991 -0.040467717
## tp        -0.16183698 -0.094345303 -0.051463581  0.20918609 -0.255855561
## alb       -0.04288616 -0.153877541 -0.251078305  0.47093200 -0.374059567
## eGFR      -0.48889280 -0.638631152 -0.115854620  0.08569883 -0.130872759
## bun        1.00000000  0.514910065  0.108784779  0.01932169  0.027465356
## cre        0.51491006  1.000000000  0.153689986 -0.07648649  0.103488023
## crp        0.10878478  0.153689986  1.000000000 -0.10429396  0.086830690
## pt         0.01932169 -0.076486494 -0.104293964  1.00000000 -0.341667669
## aptt       0.02746536  0.103488023  0.086830690 -0.34166767  1.000000000
## los        0.03640477 -0.010374943  0.044141321 -0.06794976  0.003187148
##                    los
## age        0.114263795
## bmi       -0.111117838
## smoke     -0.045030008
## child_num  0.136242957
## gcs       -0.040538560
## cci_num   -0.007657936
## map        0.021718031
## bt         0.014884269
## sBP       -0.160695920
## dBP       -0.104468910
## hr        -0.133294124
## bil        0.032263183
## ast        0.077848295
## alt        0.023696953
## wbc        0.011176572
## hb        -0.071728216
## plt        0.007980212
## tp        -0.011777560
## alb       -0.137322777
## eGFR      -0.055197075
## bun        0.036404771
## cre       -0.010374943
## crp        0.044141321
## pt        -0.067949764
## aptt       0.003187148
## los        1.000000000

相関プロット(見づらいから拡大推奨)

# 連続変数のリストを指定します。
col_continuous <- c("age", "bmi","smoke","child_num","gcs","cci_num","map","bt","sBP","dBP","hr","bil","ast","alt","wbc","hb","plt","tp","alb","eGFR","bun","cre","crp","pt","aptt","los")

# 指定した連続変数だけで相関行列を作成します。
corresult <- df %>% 
  dplyr::select(all_of(col_continuous)) %>% 
  drop_na() %>% 
  cor(method = "pearson")

# 相関行列を基に相関プロットを作成します。
corrplot <- ggcorrplot(corr = corresult, hc.order = FALSE, method = "square", title = "cor plot",
                       colors = c("#4b61ba", "white", "red"), lab = TRUE)

# プロットを表示します。
corrplot

変数の線形評価

# 連続変数の変数名をまとめる
con_var <- c("age", "bmi","smoke","child_num","gcs","cci_num","map","bt","sBP","dBP","hr","bil","ast","alt","wbc","hb","plt","tp","alb","eGFR","bun","cre","crp","pt","aptt","los")

# datadistを計算
ddist <- datadist(df)
options(datadist='ddist')

# プロット結果をまとめるリストを用意する
plot <- list()

for (x in con_var){
  # lrmに投入するformulaを文字列で、for文で順番に指定していく
  formula_tmp <- as.formula(paste("hosp_mortality ~ rcs(", x, ", 4)"))
  fit_tmp <- lrm(formula_tmp, data = df)
  
  # Predict関数の呼び出しを文字列として作成し、それをパースして評価する
  plot_cmd <- paste("plot(Predict(fit_tmp, ", x, "))")
  plot_tmp <- eval(parse(text = plot_cmd))
  plot[[x]] <- plot_tmp
}

# 結果をggarrangeでまとめて表示する
logOR_plot <- ggarrange(plotlist = plot, ncol = 2, nrow = 2)

logOR_plot
## $`1`

## 
## $`2`

## 
## $`3`

## 
## $`4`

## 
## $`5`

## 
## $`6`

## 
## $`7`

## 
## attr(,"class")
## [1] "list"      "ggarrange"

・cci_numはスプライン関数にしてもよいか ・btはU字polへ ・hrもスプライン関数に ・hbもスプライン ・pltもスプライン ・cre/crp/apttはスプライン

欠測を可視化

その1

naplot_1 <- gg_miss_var(df, show_pct = TRUE)

naplot_1
## Warning: The `guide` argument in `scale_*()` cannot be `FALSE`. This was deprecated in
## ggplot2 3.3.4.
## ℹ Please use "none" instead.
## ℹ The deprecated feature was likely used in the naniar package.
##   Please report the issue at <]8;;https://github.com/njtierney/naniar/issueshttps://github.com/njtierney/naniar/issues]8;;>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

### その2

naplot_2 <- vis_miss(df)
## Warning: `gather_()` was deprecated in tidyr 1.2.0.
## ℹ Please use `gather()` instead.
## ℹ The deprecated feature was likely used in the visdat package.
##   Please report the issue at <]8;;https://github.com/ropensci/visdat/issueshttps://github.com/ropensci/visdat/issues]8;;>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
naplot_2

多重代入

#imp <- mice(df, 
#             m=20, #mは代入の結果として作成するデータセットの数
#             maxit =20, #maxitは反復回数
#             method="pmm",
#             printFlag = FALSE,
#             seed = 10
#             )
#
#save(imp, file = "imp.Rdata")
load("imp.Rdata")

多重代入後の収束評価

plot_mice <- plot(imp)

plot_mice

densityplotの確認

densityplot_mice <- densityplot(imp)

densityplot_mice

内視鏡前終了までに集められそうなデータでモデル開発:この変数はChildの中にもBil/PT/alb/意識があったりするから適切ではなさそう。思い切ってChildは抜くことも選択肢。CCIも来院直後では予想できないであろう。

predictors <- c("age", "sex", "bmi", "smoke", "barthel", "child_score", "gcs", "cci_num", "eGFR", "hd", "hcc", "alcohol", "past_rupture", "antithro","nsaids", "steroid","beta", "bt", "shock","bil", "ast", "alt", "wbc", "hb", "plt", "tp", "alb", "crp", "pt", "aptt")

ここまで思い切ってカットChildは内視鏡直後じゃわからない可能性あり(腹水評価しない)CCIも抜く。

predictors1 <- c("age", "sex", "bmi", "smoke", "barthel", "gcs", "eGFR", "hd", "hcc", "alcohol", "past_rupture", "antithro","nsaids", "steroid","beta", "bt", "shock","bil", "ast", "alt", "wbc", "hb", "plt", "tp", "alb", "crp", "pt", "aptt")

Lassoの適用 1se ルールに基づいて少しでも変数を少なくしたい。

# 予測変数を定義します
predictors1 <- c("age", "sex", "bmi", "smoke", "barthel", "gcs", "eGFR", "hd", "hcc", "alcohol", "past_rupture", "antithro","nsaids", "steroid","beta", "bt", "shock","bil", "ast", "alt", "wbc", "hb", "plt", "tp", "alb", "crp", "pt", "aptt")

# rcsを適用したい変数を定義します
rcs_vars <- c("gcs",  "hb", "plt", "crp", "aptt")

# 二次項を適用したい変数を定義します
squared_vars <- "bt"

# 補完データで選択された変数を格納するリストを初期化します
selected_vars <- list()

set.seed(123)  # 再現性のために seed を設定

# 各補完データに対して実行します
for(i in 1:20){
  # i 番目の補完データを取得し、予測変数に相当する列だけを選択します
  data <- complete(imp, i) %>% select(all_of(predictors1), hosp_mortality)
  
  # rcs関数を適用する
  for(var in rcs_vars){
    data[[var]] <- rcs(data[[var]], 4)
  }
  
  # 二次項を適用する
  data[[squared_vars]] <- I(data[[squared_vars]]^2)
  
  # 応答変数と予測変数を準備します
  x <- model.matrix(hosp_mortality ~., data = data)
  y <- data$hosp_mortality
  
  # Lasso ロジスティック回帰を適用します
  cv_fit <- cv.glmnet(x, y, family = "binomial")
  
  # "1se" ルールに従って最適な lambda を選択し、モデルをフィットします
  best_lambda <- cv_fit$lambda.1se
  best_fit <- glmnet(x, y, family = "binomial", alpha = 1, lambda = best_lambda)
  
  # 選択された変数を取得します
  selected_vars[[i]] <- rownames(coef(best_fit))[which(coef(best_fit) != 0)]
}

# 各補完データで選択された変数のリストを表示します
selected_vars
## [[1]]
##  [1] "(Intercept)" "barthel2"    "gcsdata"     "gcsdata''"   "eGFR"       
##  [6] "shock1"      "bil"         "ast"         "alb"         "crpdata"    
## [11] "pt"          "apttdata"   
## 
## [[2]]
##  [1] "(Intercept)" "barthel2"    "gcsdata"     "eGFR"        "shock1"     
##  [6] "bil"         "ast"         "alb"         "crpdata"     "pt"         
## [11] "apttdata"   
## 
## [[3]]
##  [1] "(Intercept)" "barthel2"    "gcsdata"     "gcsdata''"   "eGFR"       
##  [6] "shock1"      "bil"         "ast"         "alb"         "crpdata"    
## 
## [[4]]
##  [1] "(Intercept)" "barthel2"    "gcsdata"     "gcsdata''"   "eGFR"       
##  [6] "shock1"      "bil"         "ast"         "alb"         "crpdata"    
## [11] "pt"         
## 
## [[5]]
##  [1] "(Intercept)" "barthel2"    "gcsdata"     "gcsdata''"   "eGFR"       
##  [6] "shock1"      "bil"         "ast"         "alb"         "crpdata"    
## 
## [[6]]
##  [1] "(Intercept)" "barthel2"    "gcsdata"     "gcsdata''"   "eGFR"       
##  [6] "shock1"      "bil"         "ast"         "alb"         "crpdata"    
## [11] "pt"          "apttdata"   
## 
## [[7]]
##  [1] "(Intercept)" "barthel2"    "gcsdata"     "gcsdata''"   "eGFR"       
##  [6] "beta1"       "shock1"      "bil"         "ast"         "alb"        
## [11] "crpdata"     "pt"          "apttdata"   
## 
## [[8]]
##  [1] "(Intercept)" "barthel2"    "gcsdata"     "gcsdata''"   "eGFR"       
##  [6] "shock1"      "bil"         "ast"         "alb"         "crpdata"    
## [11] "pt"          "apttdata"   
## 
## [[9]]
##  [1] "(Intercept)" "barthel2"    "gcsdata"     "gcsdata''"   "eGFR"       
##  [6] "shock1"      "bil"         "ast"         "alb"         "crpdata"    
## [11] "pt"          "apttdata"   
## 
## [[10]]
##  [1] "(Intercept)" "barthel2"    "gcsdata"     "gcsdata''"   "eGFR"       
##  [6] "shock1"      "bil"         "ast"         "alb"         "crpdata"    
## [11] "pt"         
## 
## [[11]]
##  [1] "(Intercept)" "barthel2"    "gcsdata"     "gcsdata''"   "eGFR"       
##  [6] "shock1"      "bil"         "ast"         "alb"         "crpdata"    
## [11] "apttdata"   
## 
## [[12]]
##  [1] "(Intercept)" "barthel2"    "gcsdata"     "gcsdata''"   "eGFR"       
##  [6] "shock1"      "bil"         "ast"         "alb"         "pt"         
## 
## [[13]]
##  [1] "(Intercept)" "barthel2"    "gcsdata"     "gcsdata''"   "eGFR"       
##  [6] "shock1"      "bil"         "ast"         "alb"         "crpdata"    
## [11] "pt"          "apttdata"   
## 
## [[14]]
##  [1] "(Intercept)" "barthel2"    "gcsdata"     "gcsdata''"   "eGFR"       
##  [6] "shock1"      "bil"         "ast"         "alb"         "crpdata"    
## [11] "pt"          "apttdata"   
## 
## [[15]]
##  [1] "(Intercept)" "barthel2"    "gcsdata"     "gcsdata''"   "eGFR"       
##  [6] "shock1"      "bil"         "ast"         "alb"         "crpdata"    
## [11] "pt"          "apttdata"   
## 
## [[16]]
##  [1] "(Intercept)" "barthel2"    "gcsdata"     "gcsdata''"   "eGFR"       
##  [6] "shock1"      "bil"         "ast"         "alb"         "crpdata"    
## [11] "pt"          "apttdata"   
## 
## [[17]]
##  [1] "(Intercept)" "barthel2"    "gcsdata"     "gcsdata''"   "eGFR"       
##  [6] "shock1"      "bil"         "ast"         "alb"         "crpdata"    
## [11] "pt"         
## 
## [[18]]
##  [1] "(Intercept)" "barthel2"    "gcsdata"     "gcsdata''"   "eGFR"       
##  [6] "shock1"      "bil"         "ast"         "alb"         "crpdata"    
## [11] "pt"          "apttdata"   
## 
## [[19]]
##  [1] "(Intercept)" "barthel2"    "gcsdata"     "gcsdata''"   "eGFR"       
##  [6] "shock1"      "bil"         "ast"         "alb"         "crpdata"    
## [11] "pt"          "apttdata"   
## 
## [[20]]
## [1] "(Intercept)" "barthel2"    "gcsdata"     "gcsdata''"   "eGFR"       
## [6] "shock1"      "bil"         "ast"         "alb"

Lassoの結果選ばれた変数を選択 20データセットあるため10以上選ばれた変数を採用とする。

# selected_vars リストをフラットなベクトルに変換
flat_vars <- unlist(selected_vars)

# 各変数がいくつの補完データセットで選択されたかをカウント
table(flat_vars)
## flat_vars
## (Intercept)         alb    apttdata         ast    barthel2       beta1 
##          20          20          13          20          20           1 
##         bil     crpdata        eGFR     gcsdata   gcsdata''          pt 
##          20          18          20          20          19          16 
##      shock1 
##          20

1つのデータセットを対象にROC,calibration plot作成へ

1つのデータセットの抽出

imp1<-complete(imp,1)

最終的なモデルの作成へ

final_formula<-lrm(hosp_mortality ~ alb+rcs(aptt,4)+ast+barthel+bil+rcs(crp,4)+eGFR+rcs(gcs,4)+ pt+ shock,data = imp1 )
summary(final_formula)
##              Effects              Response : hosp_mortality 
## 
##  Factor        Low    High   Diff.  Effect    S.E.     Lower 0.95 Upper 0.95
##  alb            2.500  3.200  0.700 -0.421690 0.209110 -0.8315400 -0.011847 
##   Odds Ratio    2.500  3.200  0.700  0.655940       NA  0.4353800  0.988220 
##  aptt          27.200 35.800  8.600 -0.075024 0.295770 -0.6547200  0.504680 
##   Odds Ratio   27.200 35.800  8.600  0.927720       NA  0.5195900  1.656400 
##  ast           33.000 93.000 60.000  0.126810 0.060268  0.0086893  0.244940 
##   Odds Ratio   33.000 93.000 60.000  1.135200       NA  1.0087000  1.277500 
##  bil            0.970  2.885  1.915  0.250670 0.112790  0.0296090  0.471720 
##   Odds Ratio    0.970  2.885  1.915  1.284900       NA  1.0301000  1.602800 
##  crp            0.110  0.790  0.680  0.386470 0.287160 -0.1763400  0.949290 
##   Odds Ratio    0.110  0.790  0.680  1.471800       NA  0.8383300  2.583900 
##  eGFR          48.898 89.236 40.338 -1.164400 0.227950 -1.6111000 -0.717590 
##   Odds Ratio   48.898 89.236 40.338  0.312120       NA  0.1996600  0.487930 
##  gcs            3.000 15.000 12.000 -3.431400 1.001700 -5.3947000 -1.468100 
##   Odds Ratio    3.000 15.000 12.000  0.032341       NA  0.0045405  0.230350 
##  pt            42.600 67.000 24.400 -0.297610 0.260740 -0.8086500  0.213430 
##   Odds Ratio   42.600 67.000 24.400  0.742590       NA  0.4454600  1.237900 
##  barthel - 1:0  1.000  2.000     NA  0.519640 0.423390 -0.3101800  1.349500 
##   Odds Ratio    1.000  2.000     NA  1.681400       NA  0.7333100  3.855300 
##  barthel - 2:0  1.000  3.000     NA  0.998850 0.384740  0.2447700  1.752900 
##   Odds Ratio    1.000  3.000     NA  2.715200       NA  1.2773000  5.771500 
##  shock - 1:0    1.000  2.000     NA  2.223200 0.383010  1.4725000  2.973900 
##   Odds Ratio    1.000  2.000     NA  9.236500       NA  4.3600000 19.567000
# モデルをフィットします
fit <- lrm(hosp_mortality ~ alb+rcs(aptt,4)+ast+barthel+bil+rcs(crp,4)+eGFR+rcs(gcs,4)+ pt+ shock,data = imp1 )

# 回帰係数を取得します
coefficients <- coef(fit)

# オッズ比を計算します
odds_ratios <- exp(coefficients)

# 小数点以下3桁まで表示する
odds_ratios_formatted <- format(odds_ratios, digits = 3, scientific = FALSE)
print(odds_ratios_formatted)
##                                                                                                                                                                                        Intercept 
## "                                                                           49.1220137450622047481374465860426425933837890625000000000000000000000000000000000000000000000000000000000000000000" 
##                                                                                                                                                                                              alb 
## "                                                                            0.5474866929826283090676497522508725523948669433593750000000000000000000000000000000000000000000000000000000000000" 
##                                                                                                                                                                                             aptt 
## "                                                                            1.0131949801997497395689151744591072201728820800781250000000000000000000000000000000000000000000000000000000000000" 
##                                                                                                                                                                                            aptt' 
## "                                                                            0.8533078867507345455223344288242515176534652709960937500000000000000000000000000000000000000000000000000000000000" 
##                                                                                                                                                                                           aptt'' 
## "                                                                            1.4940459130949692578838039480615407228469848632812500000000000000000000000000000000000000000000000000000000000000" 
##                                                                                                                                                                                              ast 
## "                                                                            1.0021157786623380303581143380142748355865478515625000000000000000000000000000000000000000000000000000000000000000" 
##                                                                                                                                                                                        barthel=1 
## "                                                                            1.6814203733248649363929416722385212779045104980468750000000000000000000000000000000000000000000000000000000000000" 
##                                                                                                                                                                                        barthel=2 
## "                                                                            2.7151684122092758499888986989390105009078979492187500000000000000000000000000000000000000000000000000000000000000" 
##                                                                                                                                                                                              bil 
## "                                                                            1.1398495357933264848782073386246338486671447753906250000000000000000000000000000000000000000000000000000000000000" 
##                                                                                                                                                                                              crp 
## "                                                                            0.1011075639571644146919027207331964746117591857910156250000000000000000000000000000000000000000000000000000000000" 
##                                                                                                                                                                                             crp' 
## "12758080848842324716977539676677042458095203380089585308415726829881910099968.0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000" 
##                                                                                                                                                                                            crp'' 
## "                                                                            0.0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000366" 
##                                                                                                                                                                                             eGFR 
## "                                                                            0.9715474282845987241685747903829906135797500610351562500000000000000000000000000000000000000000000000000000000000" 
##                                                                                                                                                                                              gcs 
## "                                                                            0.5232881925495114217028458369895815849304199218750000000000000000000000000000000000000000000000000000000000000000" 
##                                                                                                                                                                                             gcs' 
## "                                                                            1.6408426954412047571452148986281827092170715332031250000000000000000000000000000000000000000000000000000000000000" 
##                                                                                                                                                                                            gcs'' 
## "                                                                            0.0000000000271609360723319205908327334694866159528481297513735626125708222389221191406250000000000000000000000000" 
##                                                                                                                                                                                               pt 
## "                                                                            0.9878769662371085225061051460215821862220764160156250000000000000000000000000000000000000000000000000000000000000" 
##                                                                                                                                                                                          shock=1 
## "                                                                            9.2365247125562390806408075150102376937866210937500000000000000000000000000000000000000000000000000000000000000000"

Logit(P(hosp_mortality)) = Intercept + (-0.422) * alb + (-0.075) * aptt + 0.127 * ast + 0.251 * bil + 0.386 * crp - 1.164 * eGFR - 3.431 * gcs - 0.298 * pt + 0.520 * barthel(1:0) + 0.999 * barthel(2:0) + 2.223 * shock(1:0)

ROCとAUC

ROC

#install.packages("ggplot2") 
#install.packages("pROC") 
library(pROC) 
library(ggplot2) 
fit_k<- lrm(hosp_mortality ~alb+rcs(aptt,4)+ast+barthel+bil+rcs(crp,4)+eGFR+rcs(gcs,4)+ pt+ shock,x=TRUE, y=TRUE, data = imp1) 
imp1$fitted <- predict(fit_k,type="fitted") 
ROC <- roc(imp1$hosp_mortality ~ imp1$fitted, ci=TRUE, direction="auto") 
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
ggroc(ROC) 

AUC

print(ROC)  
## 
## Call:
## roc.formula(formula = imp1$hosp_mortality ~ imp1$fitted, ci = TRUE,     direction = "auto")
## 
## Data: imp1$fitted in 862 controls (imp1$hosp_mortality 0) < 118 cases (imp1$hosp_mortality 1).
## Area under the curve: 0.9279
## 95% CI: 0.9081-0.9478 (DeLong)

calibration plot

val.prob(imp1$fitted, imp1$hosp_mortality, g=10, cex=.5)  

##           Dxy       C (ROC)            R2             D      D:Chi-sq 
##  8.559125e-01  9.279563e-01  5.384151e-01  3.279903e-01  3.224305e+02 
##           D:p             U      U:Chi-sq           U:p             Q 
##            NA -2.040816e-03  1.136868e-13  1.000000e+00  3.300311e-01 
##         Brier     Intercept         Slope          Emax           E90 
##  6.200715e-02 -3.690602e-10  1.000000e+00  5.400114e-02  2.507903e-02 
##          Eavg           S:z           S:p 
##  8.710764e-03  3.041256e-01  7.610322e-01

内的検証 Kフォールド

set.seed(10)
fit_k<- lrm(hosp_mortality ~alb+rcs(aptt,4)+ast+barthel+bil+rcs(crp,4)+eGFR+rcs(gcs,4)+ pt+ shock,x=TRUE, y=TRUE, data = imp1) 
cv <-validate (fit_k, bw=FALSE,B=5,method="crossvalidation") 
cv 
##           index.orig training    test optimism index.corrected n
## Dxy           0.8559   0.8589  0.8232   0.0357          0.8202 5
## R2            0.5384   0.5448  0.4813   0.0635          0.4749 5
## Intercept     0.0000   0.0000 -0.1858   0.1858         -0.1858 5
## Slope         1.0000   1.0000  0.8614   0.1386          0.8614 5
## Emax          0.0000   0.0000  0.0685   0.0685          0.0685 5
## D             0.3280   0.3325  0.2837   0.0488          0.2792 5
## U            -0.0020  -0.0026  0.0068  -0.0094          0.0073 5
## Q             0.3300   0.3350  0.2769   0.0581          0.2719 5
## B             0.0620   0.0613  0.0697  -0.0084          0.0704 5
## g             2.6963   2.7544  2.3631   0.3913          2.3049 5
## gp            0.1800   0.1808  0.1712   0.0096          0.1704 5
print("cv_AUC") 
## [1] "cv_AUC"
print(cv[1,3]*0.5 + 0.5) 
## [1] 0.9115976
print("corrected_AUC") 
## [1] "corrected_AUC"
print(cv[1,5]*0.5 + 0.5) 
## [1] 0.9100847

内的検証 ブートストラップ

set.seed(10)
cv <-validate (fit_k, bw=FALSE,B=100,method="boot") 
## 
## Divergence or singularity in 2 samples
cv 
##           index.orig training    test optimism index.corrected  n
## Dxy           0.8559   0.8677  0.8425   0.0253          0.8306 98
## R2            0.5384   0.5628  0.5150   0.0477          0.4907 98
## Intercept     0.0000   0.0000 -0.1470   0.1470         -0.1470 98
## Slope         1.0000   1.0000  0.8811   0.1189          0.8811 98
## Emax          0.0000   0.0000  0.0556   0.0556          0.0556 98
## D             0.3280   0.3458  0.3112   0.0346          0.2934 98
## U            -0.0020  -0.0020  0.0025  -0.0045          0.0025 98
## Q             0.3300   0.3479  0.3088   0.0391          0.2909 98
## B             0.0620   0.0589  0.0651  -0.0062          0.0682 98
## g             2.6963   2.9010  2.5549   0.3461          2.3502 98
## gp            0.1800   0.1827  0.1765   0.0063          0.1737 98
print("corrected_AUC") 
## [1] "corrected_AUC"
print(cv[1,5]*0.5 + 0.5) 
## [1] 0.9153031