libraryの読み込み

pacman::p_load(reader, stringr, missForest, mice, ggplot2, sandwich, tidyverse, readxl, tableone, lubridate, skimr, summarytools, naniar, norm2, lmtest, car, ROCR, pROC, Hmisc, rms, glmnet, ggpubr, ggcorrplot,VIM,stringr,doParallel,gtsummary)

データの読み込み

data<-read_excel("varix_prediction.xlsx")

年度に基づいてchort列を作成し”develop” “validation”をいれる

# "cohort"という新しい列を作成します。これは、年度に基づいて "develop"または "validation"となります。
data_cohort <- data %>%
  mutate(cohort = ifelse(year %in% 2010:2016, "develop",
                         ifelse(year %in% 2017:2022, "validation", NA)))

classの整理

Barthelは0自立、2寝たきり、1それ以外 にカテゴライズ

Child scoreは0:A,1:B,2:C

df <-
  data_cohort|>  
  mutate(
        hosp_id=as.integer(hosp_id),
        pt_id=as.integer(pt_id),
        hosp_num=as.integer(hosp_num),
        year=as.integer(year),
        age=as.integer(age),
        sex= factor(sex, levels = c("M", "F")),
        smoke= as.integer(smoke),
        barthel= factor(barthel, levels = c("0", "1", "2")),
        child_num= as.integer(child_num),
        child_score=factor(child_score, levels = c("0", "1", "2")),
        gcs=as.integer(gcs),
        cci_num=as.integer(cci_num),
        pad=factor(pad),
        stroke=factor(stroke),
        dimentia=factor(dimentia),
        ch_lung=factor(ch_lung),
        rheumati=factor(rheumati),
        pept_ulcer=factor(pept_ulcer),
        dm=factor(dm),
        dm_compli=factor(dm_compli),
        paralysis=factor(paralysis),
        malignancy=factor(malignancy),
        meta_tumor=factor(meta_tumor),
        aids=factor(aids),
        eGFR30=factor(eGFR30),
        hd=factor(hd),
        hcc=factor(hcc),
        alcohol=factor(alcohol),
        past_rupture=factor(past_rupture),
        antiplate=factor(antiplate),
        anticoag=factor(anticoag),
        antithro=factor(antithro),
        nsaids=factor(nsaids),
        steroid=factor(steroid),
        beta=factor(beta),
        vaso=factor(vaso),
        map= as.integer(map),
        ffp=factor(ffp),
        pc=factor(pc),
        albner=factor(albner),
        sBP= as.integer(sBP),
        dBP= as.integer(dBP),
        hr=as.integer(hr),
        shock=factor(shock),
        los=as.integer(los),
        cohort=factor(cohort)
         )

開発コホート・検証コホートに分割

# 新しい列 "cohort" を使用してデータフレームを分割します
df_dev <- df %>%
  filter(cohort == "develop")

df_val <- df %>%
  filter(cohort == "validation")
str(df)
## tibble [980 × 63] (S3: tbl_df/tbl/data.frame)
##  $ hosp_id       : int [1:980] 1001 1001 1001 1001 1001 1001 1001 1001 1001 1001 ...
##  $ pt_id         : int [1:980] 1 2 3 4 5 6 7 8 9 10 ...
##  $ hosp_num      : int [1:980] 1 1 1 1 1 1 1 1 1 1 ...
##  $ year          : int [1:980] 2012 2011 2010 2011 2010 2017 2010 2010 2010 2011 ...
##  $ age           : int [1:980] 50 80 59 44 67 47 65 49 73 69 ...
##  $ sex           : Factor w/ 2 levels "M","F": 1 1 2 1 2 1 1 2 1 2 ...
##  $ bmi           : num [1:980] NA 25.3 NA 14.5 NA ...
##  $ smoke         : int [1:980] 0 0 0 240 0 270 0 NA 1000 0 ...
##  $ barthel       : Factor w/ 3 levels "0","1","2": NA 3 1 2 NA NA NA NA 1 NA ...
##  $ child_num     : int [1:980] 11 6 NA 8 NA 11 NA NA 9 15 ...
##  $ child_score   : Factor w/ 3 levels "0","1","2": 3 1 NA 2 NA 3 NA NA 2 3 ...
##  $ gcs           : int [1:980] 15 15 15 15 15 15 15 15 15 6 ...
##  $ cci_num       : int [1:980] 4 4 3 4 4 4 3 4 4 4 ...
##  $ pad           : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ stroke        : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ dimentia      : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ ch_lung       : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ rheumati      : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ pept_ulcer    : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ dm            : Factor w/ 2 levels "0","1": 1 2 1 1 1 1 1 1 1 1 ...
##  $ dm_compli     : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ paralysis     : Factor w/ 1 level "0": 1 1 1 1 1 1 1 1 1 1 ...
##  $ malignancy    : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ meta_tumor    : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ aids          : Factor w/ 1 level "0": 1 1 1 1 1 1 1 1 1 1 ...
##  $ eGFR30        : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 2 ...
##  $ hd            : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ hcc           : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ alcohol       : Factor w/ 2 levels "0","1": 2 1 1 1 1 2 1 2 2 1 ...
##  $ past_rupture  : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ antiplate     : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ anticoag      : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ antithro      : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ nsaids        : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ steroid       : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ beta          : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ vaso          : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 2 ...
##  $ map           : int [1:980] 0 2 0 6 2 0 2 0 4 14 ...
##  $ ffp           : Factor w/ 2 levels "0","1": 1 2 1 1 2 1 2 1 2 2 ...
##  $ pc            : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ albner        : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 2 ...
##  $ bt            : num [1:980] 36.4 36.8 35.9 36 36.6 36.4 38.4 37 37 35.5 ...
##  $ sBP           : int [1:980] 78 88 100 69 66 102 84 132 90 52 ...
##  $ dBP           : int [1:980] 48 49 56 44 40 67 46 69 54 37 ...
##  $ hr            : int [1:980] 118 72 110 104 72 83 127 83 114 98 ...
##  $ shock         : Factor w/ 2 levels "0","1": 2 1 2 2 2 1 2 1 2 2 ...
##  $ bil           : num [1:980] 2.2 1.2 3.1 3.4 1.2 7.5 2.4 1.2 2.2 8.7 ...
##  $ ast           : num [1:980] 217 31 60 129 52 112 90 154 55 96 ...
##  $ alt           : num [1:980] 63 22 40 46 36 53 19 109 20 87 ...
##  $ wbc           : num [1:980] 7400 5000 7800 9100 3900 9800 8000 7900 11100 12800 ...
##  $ hb            : num [1:980] 6.9 10.8 9.7 10.7 6.3 12.2 9.8 13.5 5.6 6 ...
##  $ plt           : num [1:980] 115 77 74 162 63 69 93 132 124 168 ...
##  $ tp            : num [1:980] 6.3 5.6 6.4 6.1 5.1 5.7 7.2 7.6 4.9 5.3 ...
##  $ alb           : num [1:980] 2.2 3.2 2.8 2.9 2.8 2.6 3.2 4 2.3 1.2 ...
##  $ eGFR          : num [1:980] 58 57.7 63.7 112.4 123.6 ...
##  $ bun           : num [1:980] 13.2 41.5 26.3 2.9 27.4 13.7 15.8 15.5 27 63.2 ...
##  $ cre           : num [1:980] 1.08 0.96 0.72 0.61 0.38 0.74 0.44 0.4 0.97 2.07 ...
##  $ crp           : num [1:980] 0.92 0.29 0.68 0.29 0.29 0.1 0.96 0.29 NA 2.75 ...
##  $ pt            : num [1:980] 37.8 55 46.7 37.8 74.6 28.5 45.9 49.7 45.9 37.2 ...
##  $ aptt          : num [1:980] 29.4 29 27.2 35.3 30.1 37.8 27.6 32.6 27.5 36 ...
##  $ hosp_mortality: num [1:980] 0 0 0 0 0 0 0 0 0 1 ...
##  $ los           : int [1:980] 12 7 0 10 3 6 2 1 8 0 ...
##  $ cohort        : Factor w/ 2 levels "develop","validation": 1 1 1 1 1 2 1 1 1 1 ...

tableone作成

col_continuous = c("age", "bmi","smoke","child_num","gcs","cci_num","map","bt","sBP","dBP","hr","bil","ast","alt","wbc","hb","plt","tp","alb","eGFR","bun","cre","crp","pt","aptt","los")
col_factors = c("sex","barthel","child_score","pad","stroke","dimentia","ch_lung","rheumati","pept_ulcer","dm","dm_compli","paralysis","malignancy","meta_tumor","aids","eGFR30","hd","hcc","alcohol","past_rupture","antiplate","anticoag","antithro","nsaids","steroid","beta", "vaso","ffp","pc", "albner","shock","hosp_mortality", "cohort")


# Create your table
df %>%  
  select(c(col_continuous, col_factors)) %>% 
  CreateTableOne(vars = c(col_continuous, col_factors), strata="cohort",factorVars = col_factors, addOverall = T) -> tableone
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
##   # Was:
##   data %>% select(col_continuous)
## 
##   # Now:
##   data %>% select(all_of(col_continuous))
## 
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
##   # Was:
##   data %>% select(col_factors)
## 
##   # Now:
##   data %>% select(all_of(col_factors))
## 
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# Print your table
print(tableone, smd = TRUE, missing = TRUE, test = TRUE, explain = TRUE) 
##                          Stratified by cohort
##                           Overall           develop           validation       
##   n                           980               536               444          
##   age (mean (SD))           60.86 (13.01)     61.23 (13.06)     60.42 (12.96)  
##   bmi (mean (SD))           23.61 (11.49)     22.91 (3.73)      24.38 (16.12)  
##   smoke (mean (SD))        265.31 (483.85)   299.62 (571.82)   226.73 (356.99) 
##   child_num (mean (SD))      8.59 (2.12)       8.77 (2.13)       8.38 (2.09)   
##   gcs (mean (SD))           14.46 (1.87)      14.33 (2.20)      14.61 (1.34)   
##   cci_num (mean (SD))        4.47 (1.32)       4.44 (1.20)       4.50 (1.45)   
##   map (mean (SD))            3.37 (3.96)       3.49 (4.51)       3.21 (3.18)   
##   bt (mean (SD))            36.73 (0.72)      36.65 (0.76)      36.81 (0.65)   
##   sBP (mean (SD))           88.55 (16.32)     87.35 (15.48)     89.95 (17.16)  
##   dBP (mean (SD))           52.78 (12.72)     51.00 (11.90)     54.84 (13.33)  
##   hr (mean (SD))            86.90 (21.06)     85.58 (21.10)     88.43 (20.93)  
##   bil (mean (SD))            2.30 (2.19)       2.20 (2.08)       2.42 (2.32)   
##   ast (mean (SD))           83.87 (100.75)    85.41 (92.54)     82.05 (109.78) 
##   alt (mean (SD))           41.79 (46.84)     43.36 (50.96)     39.93 (41.41)  
##   wbc (mean (SD))         8676.10 (4747.62) 8348.30 (4401.79) 9068.42 (5108.48)
##   hb (mean (SD))             8.72 (2.49)       8.55 (2.46)       8.92 (2.51)   
##   plt (mean (SD))          113.92 (66.44)    110.70 (57.80)    117.76 (75.39)  
##   tp (mean (SD))             6.11 (0.92)       6.09 (0.93)       6.13 (0.92)   
##   alb (mean (SD))            2.85 (0.61)       2.82 (0.59)       2.90 (0.64)   
##   eGFR (mean (SD))          69.80 (31.04)     69.15 (30.82)     70.59 (31.31)  
##   bun (mean (SD))           28.52 (17.92)     28.41 (18.64)     28.64 (17.04)  
##   cre (mean (SD))            1.06 (0.90)       1.07 (0.92)       1.05 (0.89)   
##   crp (mean (SD))            0.84 (1.76)       0.81 (1.67)       0.87 (1.86)   
##   pt (mean (SD))            54.98 (17.67)     51.73 (16.99)     58.78 (17.72)  
##   aptt (mean (SD))          34.08 (17.48)     32.43 (17.30)     36.06 (17.52)  
##   los (mean (SD))           12.37 (14.69)     13.77 (15.28)     10.67 (13.77)  
##   sex = F (%)                 246 ( 25.1)       131 ( 24.4)       115 ( 25.9)  
##   barthel (%)                                                                  
##      0                        313 ( 38.4)       142 ( 35.9)       171 ( 40.7)  
##      1                        249 ( 30.5)       118 ( 29.8)       131 ( 31.2)  
##      2                        254 ( 31.1)       136 ( 34.3)       118 ( 28.1)  
##   child_score (%)                                                              
##      0                        149 ( 16.9)        66 ( 13.9)        83 ( 20.3)  
##      1                        437 ( 49.5)       231 ( 48.7)       206 ( 50.4)  
##      2                        297 ( 33.6)       177 ( 37.3)       120 ( 29.3)  
##   pad = 1 (%)                   2 (  0.2)         1 (  0.2)         1 (  0.2)  
##   stroke = 1 (%)               21 (  2.1)        10 (  1.9)        11 (  2.5)  
##   dimentia = 1 (%)             10 (  1.0)         4 (  0.7)         6 (  1.4)  
##   ch_lung = 1 (%)              15 (  1.5)        11 (  2.1)         4 (  0.9)  
##   rheumati = 1 (%)              3 (  0.3)         3 (  0.6)         0 (  0.0)  
##   pept_ulcer = 1 (%)          101 ( 10.3)        59 ( 11.0)        42 (  9.5)  
##   dm = 1 (%)                  210 ( 21.4)       106 ( 19.8)       104 ( 23.4)  
##   dm_compli = 1 (%)            13 (  1.3)         6 (  1.1)         7 (  1.6)  
##   paralysis = 0 (%)           980 (100.0)       536 (100.0)       444 (100.0)  
##   malignancy = 1 (%)          115 ( 11.7)        58 ( 10.8)        57 ( 12.8)  
##   meta_tumor = 1 (%)           17 (  1.7)         6 (  1.1)        11 (  2.5)  
##   aids = 0 (%)                980 (100.0)       536 (100.0)       444 (100.0)  
##   eGFR30 = 1 (%)               87 (  9.1)        51 (  9.8)        36 (  8.2)  
##   hd = 1 (%)                   13 (  1.3)         3 (  0.6)        10 (  2.3)  
##   hcc = 1 (%)                 175 ( 17.9)       111 ( 20.7)        64 ( 14.4)  
##   alcohol = 1 (%)             472 ( 48.2)       231 ( 43.1)       241 ( 54.3)  
##   past_rupture = 1 (%)        225 ( 23.0)       104 ( 19.4)       121 ( 27.3)  
##   antiplate = 1 (%)             7 (  0.7)         4 (  0.7)         3 (  0.7)  
##   anticoag = 1 (%)              4 (  0.4)         1 (  0.2)         3 (  0.7)  
##   antithro = 1 (%)             10 (  1.0)         4 (  0.7)         6 (  1.4)  
##   nsaids = 1 (%)                9 (  0.9)         5 (  0.9)         4 (  0.9)  
##   steroid = 1 (%)               5 (  0.5)         2 (  0.4)         3 (  0.7)  
##   beta = 1 (%)                 58 (  5.9)        18 (  3.4)        40 (  9.0)  
##   vaso = 1 (%)                 58 (  5.9)        31 (  5.8)        27 (  6.1)  
##   ffp = 1 (%)                 291 ( 29.7)       137 ( 25.6)       154 ( 34.7)  
##   pc = 1 (%)                   17 (  1.7)         6 (  1.1)        11 (  2.5)  
##   albner = 1 (%)               80 (  8.2)        47 (  8.8)        33 (  7.4)  
##   shock = 1 (%)               409 ( 43.0)       215 ( 42.0)       194 ( 44.1)  
##   hosp_mortality = 1 (%)      118 ( 12.0)        73 ( 13.6)        45 ( 10.1)  
##   cohort = validation (%)     444 ( 45.3)         0 (  0.0)       444 (100.0)  
##                          Stratified by cohort
##                           p      test SMD    Missing
##   n                                                 
##   age (mean (SD))          0.336       0.062  0.0   
##   bmi (mean (SD))          0.060       0.126 11.7   
##   smoke (mean (SD))        0.028       0.153 12.9   
##   child_num (mean (SD))    0.007       0.186 13.2   
##   gcs (mean (SD))          0.023       0.149  0.0   
##   cci_num (mean (SD))      0.476       0.045  0.0   
##   map (mean (SD))          0.273       0.071  0.0   
##   bt (mean (SD))           0.001       0.224  3.8   
##   sBP (mean (SD))          0.014       0.160  2.3   
##   dBP (mean (SD))         <0.001       0.304  2.3   
##   hr (mean (SD))           0.038       0.135  2.9   
##   bil (mean (SD))          0.135       0.097  3.8   
##   ast (mean (SD))          0.608       0.033  2.6   
##   alt (mean (SD))          0.261       0.074  2.6   
##   wbc (mean (SD))          0.019       0.151  2.0   
##   hb (mean (SD))           0.022       0.148  2.0   
##   plt (mean (SD))          0.101       0.105  2.0   
##   tp (mean (SD))           0.517       0.043  7.9   
##   alb (mean (SD))          0.042       0.133  4.7   
##   eGFR (mean (SD))         0.474       0.046  2.1   
##   bun (mean (SD))          0.844       0.013  2.1   
##   cre (mean (SD))          0.820       0.015  2.9   
##   crp (mean (SD))          0.632       0.031  5.3   
##   pt (mean (SD))          <0.001       0.407  6.0   
##   aptt (mean (SD))         0.002       0.208 11.4   
##   los (mean (SD))          0.001       0.213  0.0   
##   sex = F (%)              0.652       0.034  0.0   
##   barthel (%)              0.140       0.139 16.7   
##      0                                              
##      1                                              
##      2                                              
##   child_score (%)          0.008       0.210  9.9   
##      0                                              
##      1                                              
##      2                                              
##   pad = 1 (%)              1.000       0.009  0.0   
##   stroke = 1 (%)           0.662       0.042  0.0   
##   dimentia = 1 (%)         0.536       0.059  0.0   
##   ch_lung = 1 (%)          0.230       0.096  0.0   
##   rheumati = 1 (%)         0.318       0.106  0.0   
##   pept_ulcer = 1 (%)       0.492       0.051  0.0   
##   dm = 1 (%)               0.191       0.089  0.0   
##   dm_compli = 1 (%)        0.732       0.040  0.0   
##   paralysis = 0 (%)           NA      <0.001  0.0   
##   malignancy = 1 (%)       0.381       0.062  0.0   
##   meta_tumor = 1 (%)       0.169       0.102  0.0   
##   aids = 0 (%)                NA      <0.001  0.0   
##   eGFR30 = 1 (%)           0.478       0.054  2.1   
##   hd = 1 (%)               0.043       0.144  0.0   
##   hcc = 1 (%)              0.013       0.166  0.0   
##   alcohol = 1 (%)          0.001       0.225  0.0   
##   past_rupture = 1 (%)     0.005       0.186  0.0   
##   antiplate = 1 (%)        1.000       0.008  0.0   
##   anticoag = 1 (%)         0.489       0.075  0.0   
##   antithro = 1 (%)         0.536       0.059  0.0   
##   nsaids = 1 (%)           1.000       0.003  0.0   
##   steroid = 1 (%)          0.833       0.042  0.0   
##   beta = 1 (%)            <0.001       0.236  0.0   
##   vaso = 1 (%)             0.952       0.013  0.0   
##   ffp = 1 (%)              0.002       0.200  0.0   
##   pc = 1 (%)               0.169       0.102  0.0   
##   albner = 1 (%)           0.520       0.049  0.0   
##   shock = 1 (%)            0.558       0.042  2.9   
##   hosp_mortality = 1 (%)   0.116       0.108  0.0   
##   cohort = validation (%) <0.001         NaN  0.0

tbl_summaryでmedian+IQRも準備する

# specify your data and variables
tbl_summary(data = df, 
            by = "cohort",
            type = list(gcs ~ "continuous", year ~ "categorical"),
            statistic = all_continuous() ~ "{median} ({p25}, {p75})",
            digits = all_continuous() ~ c(0, 2))
Characteristic develop, N = 5361 validation, N = 4441
hosp_id 1,011 (1,004.00, 1,024) 1,022 (1,006.00, 1,024)
pt_id 372 (177.75, 589) 492 (244.75, 643)
hosp_num
    1 472 (88%) 365 (82%)
    2 48 (9.0%) 50 (11%)
    3 15 (2.8%) 13 (2.9%)
    4 1 (0.2%) 10 (2.3%)
    5 0 (0%) 4 (0.9%)
    6 0 (0%) 2 (0.5%)
year
    2010 76 (14%) 0 (0%)
    2011 79 (15%) 0 (0%)
    2012 88 (16%) 0 (0%)
    2013 70 (13%) 0 (0%)
    2014 77 (14%) 0 (0%)
    2015 72 (13%) 0 (0%)
    2016 74 (14%) 0 (0%)
    2017 0 (0%) 64 (14%)
    2018 0 (0%) 62 (14%)
    2019 0 (0%) 72 (16%)
    2020 0 (0%) 82 (18%)
    2021 0 (0%) 83 (19%)
    2022 0 (0%) 81 (18%)
age 62 (51.75, 70) 60 (50.00, 70)
sex
    M 405 (76%) 329 (74%)
    F 131 (24%) 115 (26%)
bmi 23 (20.32, 25) 23 (20.59, 26)
    Unknown 85 30
smoke 20 (0.00, 440) 0 (0.00, 360)
    Unknown 84 42
barthel
    0 142 (36%) 171 (41%)
    1 118 (30%) 131 (31%)
    2 136 (34%) 118 (28%)
    Unknown 140 24
child_num 8 (7.00, 10) 8 (7.00, 10)
    Unknown 81 48
child_score
    0 66 (14%) 83 (20%)
    1 231 (49%) 206 (50%)
    2 177 (37%) 120 (29%)
    Unknown 62 35
gcs 15 (15.00, 15) 15 (15.00, 15)
cci_num 4 (4.00, 5) 4 (4.00, 5)
pad
    0 535 (100%) 443 (100%)
    1 1 (0.2%) 1 (0.2%)
stroke
    0 526 (98%) 433 (98%)
    1 10 (1.9%) 11 (2.5%)
dimentia
    0 532 (99%) 438 (99%)
    1 4 (0.7%) 6 (1.4%)
ch_lung
    0 525 (98%) 440 (99%)
    1 11 (2.1%) 4 (0.9%)
rheumati
    0 533 (99%) 444 (100%)
    1 3 (0.6%) 0 (0%)
pept_ulcer
    0 477 (89%) 402 (91%)
    1 59 (11%) 42 (9.5%)
dm
    0 430 (80%) 340 (77%)
    1 106 (20%) 104 (23%)
dm_compli
    0 530 (99%) 437 (98%)
    1 6 (1.1%) 7 (1.6%)
paralysis
    0 536 (100%) 444 (100%)
malignancy
    0 478 (89%) 387 (87%)
    1 58 (11%) 57 (13%)
meta_tumor
    0 530 (99%) 433 (98%)
    1 6 (1.1%) 11 (2.5%)
aids
    0 536 (100%) 444 (100%)
eGFR30
    0 471 (90%) 401 (92%)
    1 51 (9.8%) 36 (8.2%)
    Unknown 14 7
hd
    0 533 (99%) 434 (98%)
    1 3 (0.6%) 10 (2.3%)
hcc
    0 425 (79%) 380 (86%)
    1 111 (21%) 64 (14%)
alcohol
    0 305 (57%) 203 (46%)
    1 231 (43%) 241 (54%)
past_rupture
    0 432 (81%) 323 (73%)
    1 104 (19%) 121 (27%)
antiplate
    0 532 (99%) 441 (99%)
    1 4 (0.7%) 3 (0.7%)
anticoag
    0 535 (100%) 441 (99%)
    1 1 (0.2%) 3 (0.7%)
antithro
    0 532 (99%) 438 (99%)
    1 4 (0.7%) 6 (1.4%)
nsaids
    0 531 (99%) 440 (99%)
    1 5 (0.9%) 4 (0.9%)
steroid
    0 534 (100%) 441 (99%)
    1 2 (0.4%) 3 (0.7%)
beta
    0 518 (97%) 404 (91%)
    1 18 (3.4%) 40 (9.0%)
vaso
    0 505 (94%) 417 (94%)
    1 31 (5.8%) 27 (6.1%)
map 4 (0.00, 4) 4 (0.00, 4)
ffp
    0 399 (74%) 290 (65%)
    1 137 (26%) 154 (35%)
pc
    0 530 (99%) 433 (98%)
    1 6 (1.1%) 11 (2.5%)
albner
    0 489 (91%) 411 (93%)
    1 47 (8.8%) 33 (7.4%)
bt 37 (36.30, 37) 37 (36.50, 37)
    Unknown 32 5
sBP 89 (79.00, 96) 91 (80.00, 100)
    Unknown 22 1
dBP 51 (44.00, 58) 54 (46.00, 63)
    Unknown 22 1
hr 82 (70.00, 98) 85 (73.00, 101)
    Unknown 24 4
shock
    0 297 (58%) 246 (56%)
    1 215 (42%) 194 (44%)
    Unknown 24 4
bil 1 (0.90, 3) 2 (1.00, 3)
    Unknown 28 9
ast 56 (33.00, 96) 49 (32.00, 89)
    Unknown 18 7
alt 29 (20.00, 47) 29 (19.00, 44)
    Unknown 18 7
wbc 7,330 (5,315.00, 10,395) 8,300 (5,800.00, 11,100)
    Unknown 13 7
hb 8 (6.90, 10) 9 (7.20, 10)
    Unknown 13 7
plt 97 (72.00, 135) 103 (75.00, 141)
    Unknown 13 7
tp 6 (5.50, 7) 6 (5.50, 7)
    Unknown 45 32
alb 3 (2.50, 3) 3 (2.50, 3)
    Unknown 34 12
eGFR 67 (48.25, 89) 68 (49.43, 89)
    Unknown 14 7
bun 23 (15.85, 36) 25 (16.60, 36)
    Unknown 14 7
cre 1 (0.67, 1) 1 (0.65, 1)
    Unknown 18 10
crp 0 (0.11, 1) 0 (0.12, 1)
    Unknown 37 15
pt 51 (40.00, 63) 59 (46.00, 72)
    Unknown 39 20
aptt 29 (25.90, 33) 32 (29.65, 37)
    Unknown 63 49
hosp_mortality 73 (14%) 45 (10%)
los 10 (6.00, 18) 7 (5.00, 13)
1 Median (IQR); n (%)

全体での連続量の可視化

df |>  #全体
  select(col_continuous) |> 
  pivot_longer(cols = col_continuous, names_to = "name", values_to = "value") |> 
  ggplot()+
  geom_histogram(aes(x = value), color = "black")+
  facet_wrap(~ name, scales = "free", ncol = 5) +
  theme_bw()+
  theme(text = element_text(size = 12))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 1044 rows containing non-finite values (`stat_bin()`).

開発コホートtableone作成

col_cont = c("age", "bmi","smoke","child_num","gcs","cci_num","map","bt","sBP","dBP","hr","bil","ast","alt","wbc","hb","plt","tp","alb","eGFR","bun","cre","crp","pt","aptt","los")
col_fact = c("sex","barthel","child_score","pad","stroke","dimentia","ch_lung","rheumati","pept_ulcer","dm","dm_compli","paralysis","malignancy","meta_tumor","aids","eGFR30","hd","hcc","alcohol","past_rupture","antiplate","anticoag","antithro","nsaids","steroid","beta", "vaso","ffp","pc", "albner","shock","hosp_mortality")

# Create your table
df_dev %>%  
  select(c(col_cont, col_fact)) %>% 
  CreateTableOne(vars = c(col_cont, col_fact), strata="hosp_mortality",factorVars = col_fact, addOverall = T) -> tableone_dev
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
##   # Was:
##   data %>% select(col_cont)
## 
##   # Now:
##   data %>% select(all_of(col_cont))
## 
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
##   # Was:
##   data %>% select(col_fact)
## 
##   # Now:
##   data %>% select(all_of(col_fact))
## 
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# Print your table
print(tableone_dev, smd = TRUE, missing = TRUE, test = TRUE, explain = TRUE) 
##                         Stratified by hosp_mortality
##                          Overall           0                 1                
##   n                          536               463                73          
##   age (mean (SD))          61.23 (13.06)     60.82 (13.02)     63.84 (13.09)  
##   bmi (mean (SD))          22.91 (3.73)      23.02 (3.76)      22.17 (3.41)   
##   smoke (mean (SD))       299.62 (571.82)   283.65 (394.78)   406.02 (1214.88)
##   child_num (mean (SD))     8.77 (2.13)       8.47 (1.95)      10.91 (2.13)   
##   gcs (mean (SD))          14.33 (2.20)      14.72 (1.27)      11.90 (4.33)   
##   cci_num (mean (SD))       4.44 (1.20)       4.43 (1.08)       4.53 (1.78)   
##   map (mean (SD))           3.49 (4.51)       3.10 (4.35)       5.96 (4.71)   
##   bt (mean (SD))           36.65 (0.76)      36.68 (0.67)      36.48 (1.24)   
##   sBP (mean (SD))          87.35 (15.48)     90.12 (13.62)     69.74 (15.05)  
##   dBP (mean (SD))          51.00 (11.90)     52.83 (11.00)     39.39 (10.83)  
##   hr (mean (SD))           85.58 (21.10)     83.17 (19.18)    100.84 (25.93)  
##   bil (mean (SD))           2.20 (2.08)       1.97 (1.80)       3.69 (2.92)   
##   ast (mean (SD))          85.41 (92.54)     79.65 (88.38)    121.08 (109.08) 
##   alt (mean (SD))          43.36 (50.96)     41.65 (50.23)     53.94 (54.43)  
##   wbc (mean (SD))        8348.30 (4401.79) 8193.93 (4211.45) 9299.86 (5366.73)
##   hb (mean (SD))            8.55 (2.46)       8.62 (2.47)       8.11 (2.38)   
##   plt (mean (SD))         110.70 (57.80)    109.10 (52.04)    120.60 (84.92)  
##   tp (mean (SD))            6.09 (0.93)       6.15 (0.86)       5.75 (1.21)   
##   alb (mean (SD))           2.82 (0.59)       2.90 (0.54)       2.30 (0.62)   
##   eGFR (mean (SD))         69.15 (30.82)     72.85 (30.55)     46.36 (21.32)  
##   bun (mean (SD))          28.41 (18.64)     27.53 (17.80)     33.94 (22.59)  
##   cre (mean (SD))           1.07 (0.92)       1.00 (0.92)       1.46 (0.82)   
##   crp (mean (SD))           0.81 (1.67)       0.71 (1.59)       1.46 (1.96)   
##   pt (mean (SD))           51.73 (16.99)     53.40 (16.04)     41.66 (19.06)  
##   aptt (mean (SD))         32.43 (17.30)     30.76 (12.33)     42.18 (32.59)  
##   los (mean (SD))          13.77 (15.28)     14.21 (14.24)     11.01 (20.59)  
##   sex = F (%)                131 ( 24.4)       111 ( 24.0)        20 ( 27.4)  
##   barthel (%)                                                                 
##      0                       142 ( 35.9)       136 ( 39.8)         6 ( 11.1)  
##      1                       118 ( 29.8)       108 ( 31.6)        10 ( 18.5)  
##      2                       136 ( 34.3)        98 ( 28.7)        38 ( 70.4)  
##   child_score (%)                                                             
##      0                        66 ( 13.9)        65 ( 15.9)         1 (  1.5)  
##      1                       231 ( 48.7)       217 ( 53.1)        14 ( 21.5)  
##      2                       177 ( 37.3)       127 ( 31.1)        50 ( 76.9)  
##   pad = 1 (%)                  1 (  0.2)         1 (  0.2)         0 (  0.0)  
##   stroke = 1 (%)              10 (  1.9)        10 (  2.2)         0 (  0.0)  
##   dimentia = 1 (%)             4 (  0.7)         4 (  0.9)         0 (  0.0)  
##   ch_lung = 1 (%)             11 (  2.1)         9 (  1.9)         2 (  2.7)  
##   rheumati = 1 (%)             3 (  0.6)         3 (  0.6)         0 (  0.0)  
##   pept_ulcer = 1 (%)          59 ( 11.0)        58 ( 12.5)         1 (  1.4)  
##   dm = 1 (%)                 106 ( 19.8)        98 ( 21.2)         8 ( 11.0)  
##   dm_compli = 1 (%)            6 (  1.1)         6 (  1.3)         0 (  0.0)  
##   paralysis = 0 (%)          536 (100.0)       463 (100.0)        73 (100.0)  
##   malignancy = 1 (%)          58 ( 10.8)        46 (  9.9)        12 ( 16.4)  
##   meta_tumor = 1 (%)           6 (  1.1)         2 (  0.4)         4 (  5.5)  
##   aids = 0 (%)               536 (100.0)       463 (100.0)        73 (100.0)  
##   eGFR30 = 1 (%)              51 (  9.8)        34 (  7.6)        17 ( 23.3)  
##   hd = 1 (%)                   3 (  0.6)         3 (  0.6)         0 (  0.0)  
##   hcc = 1 (%)                111 ( 20.7)        95 ( 20.5)        16 ( 21.9)  
##   alcohol = 1 (%)            231 ( 43.1)       205 ( 44.3)        26 ( 35.6)  
##   past_rupture = 1 (%)       104 ( 19.4)        96 ( 20.7)         8 ( 11.0)  
##   antiplate = 1 (%)            4 (  0.7)         3 (  0.6)         1 (  1.4)  
##   anticoag = 1 (%)             1 (  0.2)         1 (  0.2)         0 (  0.0)  
##   antithro = 1 (%)             4 (  0.7)         3 (  0.6)         1 (  1.4)  
##   nsaids = 1 (%)               5 (  0.9)         5 (  1.1)         0 (  0.0)  
##   steroid = 1 (%)              2 (  0.4)         1 (  0.2)         1 (  1.4)  
##   beta = 1 (%)                18 (  3.4)        18 (  3.9)         0 (  0.0)  
##   vaso = 1 (%)                31 (  5.8)        11 (  2.4)        20 ( 27.4)  
##   ffp = 1 (%)                137 ( 25.6)       100 ( 21.6)        37 ( 50.7)  
##   pc = 1 (%)                   6 (  1.1)         3 (  0.6)         3 (  4.1)  
##   albner = 1 (%)              47 (  8.8)        32 (  6.9)        15 ( 20.5)  
##   shock = 1 (%)              215 ( 42.0)       155 ( 35.1)        60 ( 85.7)  
##   hosp_mortality = 1 (%)      73 ( 13.6)         0 (  0.0)        73 (100.0)  
##                         Stratified by hosp_mortality
##                          p      test SMD    Missing
##   n                                                
##   age (mean (SD))         0.066       0.231  0.0   
##   bmi (mean (SD))         0.108       0.236 15.9   
##   smoke (mean (SD))       0.125       0.135 15.7   
##   child_num (mean (SD))  <0.001       1.191 15.1   
##   gcs (mean (SD))        <0.001       0.882  0.0   
##   cci_num (mean (SD))     0.490       0.071  0.0   
##   map (mean (SD))        <0.001       0.630  0.0   
##   bt (mean (SD))          0.056       0.197  6.0   
##   sBP (mean (SD))        <0.001       1.420  4.1   
##   dBP (mean (SD))        <0.001       1.231  4.1   
##   hr (mean (SD))         <0.001       0.775  4.5   
##   bil (mean (SD))        <0.001       0.708  5.2   
##   ast (mean (SD))        <0.001       0.417  3.4   
##   alt (mean (SD))         0.057       0.235  3.4   
##   wbc (mean (SD))         0.046       0.229  2.4   
##   hb (mean (SD))          0.099       0.211  2.4   
##   plt (mean (SD))         0.115       0.163  2.4   
##   tp (mean (SD))          0.001       0.381  8.4   
##   alb (mean (SD))        <0.001       1.019  6.3   
##   eGFR (mean (SD))       <0.001       1.006  2.6   
##   bun (mean (SD))         0.007       0.316  2.6   
##   cre (mean (SD))        <0.001       0.533  3.4   
##   crp (mean (SD))        <0.001       0.421  6.9   
##   pt (mean (SD))         <0.001       0.667  7.3   
##   aptt (mean (SD))       <0.001       0.463 11.8   
##   los (mean (SD))         0.097       0.180  0.0   
##   sex = F (%)             0.627       0.078  0.0   
##   barthel (%)            <0.001       0.955 26.1   
##      0                                             
##      1                                             
##      2                                             
##   child_score (%)        <0.001       1.072 11.6   
##      0                                             
##      1                                             
##      2                                             
##   pad = 1 (%)             1.000       0.066  0.0   
##   stroke = 1 (%)          0.422       0.210  0.0   
##   dimentia = 1 (%)        0.948       0.132  0.0   
##   ch_lung = 1 (%)         0.999       0.053  0.0   
##   rheumati = 1 (%)        1.000       0.114  0.0   
##   pept_ulcer = 1 (%)      0.009       0.450  0.0   
##   dm = 1 (%)              0.061       0.281  0.0   
##   dm_compli = 1 (%)       0.704       0.162  0.0   
##   paralysis = 0 (%)          NA      <0.001  0.0   
##   malignancy = 1 (%)      0.144       0.193  0.0   
##   meta_tumor = 1 (%)      0.001       0.301  0.0   
##   aids = 0 (%)               NA      <0.001  0.0   
##   eGFR30 = 1 (%)         <0.001       0.446  2.6   
##   hd = 1 (%)              1.000       0.114  0.0   
##   hcc = 1 (%)             0.905       0.034  0.0   
##   alcohol = 1 (%)         0.207       0.178  0.0   
##   past_rupture = 1 (%)    0.071       0.270  0.0   
##   antiplate = 1 (%)       1.000       0.072  0.0   
##   anticoag = 1 (%)        1.000       0.066  0.0   
##   antithro = 1 (%)        1.000       0.072  0.0   
##   nsaids = 1 (%)          0.813       0.148  0.0   
##   steroid = 1 (%)         0.638       0.130  0.0   
##   beta = 1 (%)            0.173       0.284  0.0   
##   vaso = 1 (%)           <0.001       0.751  0.0   
##   ffp = 1 (%)            <0.001       0.635  0.0   
##   pc = 1 (%)              0.044       0.229  0.0   
##   albner = 1 (%)         <0.001       0.404  0.0   
##   shock = 1 (%)          <0.001       1.210  4.5   
##   hosp_mortality = 1 (%) <0.001         NaN  0.0

開発データでもtbl_summaryでmedian+IQRも準備する

# specify your data and variables
tbl_summary(data = df_dev, 
            by = "hosp_mortality",
            type = list(gcs ~ "continuous", year ~ "categorical"),
            statistic = all_continuous() ~ "{median} ({p25}, {p75})",
            digits = all_continuous() ~ c(0, 2))
Characteristic 0, N = 4631 1, N = 731
hosp_id 1,010 (1,003.00, 1,024) 1,017 (1,005.00, 1,024)
pt_id 359 (149.50, 583) 426 (235.00, 599)
hosp_num
    1 404 (87%) 68 (93%)
    2 43 (9.3%) 5 (6.8%)
    3 15 (3.2%) 0 (0%)
    4 1 (0.2%) 0 (0%)
year
    2010 68 (15%) 8 (11%)
    2011 70 (15%) 9 (12%)
    2012 72 (16%) 16 (22%)
    2013 55 (12%) 15 (21%)
    2014 69 (15%) 8 (11%)
    2015 67 (14%) 5 (6.8%)
    2016 62 (13%) 12 (16%)
age 62 (51.00, 70) 66 (55.00, 74)
sex
    M 352 (76%) 53 (73%)
    F 111 (24%) 20 (27%)
bmi 23 (20.39, 25) 22 (20.15, 24)
    Unknown 69 16
smoke 60 (0.00, 440) 0 (0.00, 400)
    Unknown 70 14
barthel
    0 136 (40%) 6 (11%)
    1 108 (32%) 10 (19%)
    2 98 (29%) 38 (70%)
    Unknown 121 19
child_num 8 (7.00, 10) 11 (9.00, 12)
    Unknown 64 17
child_score
    0 65 (16%) 1 (1.5%)
    1 217 (53%) 14 (22%)
    2 127 (31%) 50 (77%)
    Unknown 54 8
gcs 15 (15.00, 15) 15 (9.00, 15)
cci_num 4 (4.00, 5) 4 (4.00, 5)
pad
    0 462 (100%) 73 (100%)
    1 1 (0.2%) 0 (0%)
stroke
    0 453 (98%) 73 (100%)
    1 10 (2.2%) 0 (0%)
dimentia
    0 459 (99%) 73 (100%)
    1 4 (0.9%) 0 (0%)
ch_lung
    0 454 (98%) 71 (97%)
    1 9 (1.9%) 2 (2.7%)
rheumati
    0 460 (99%) 73 (100%)
    1 3 (0.6%) 0 (0%)
pept_ulcer
    0 405 (87%) 72 (99%)
    1 58 (13%) 1 (1.4%)
dm
    0 365 (79%) 65 (89%)
    1 98 (21%) 8 (11%)
dm_compli
    0 457 (99%) 73 (100%)
    1 6 (1.3%) 0 (0%)
paralysis
    0 463 (100%) 73 (100%)
malignancy
    0 417 (90%) 61 (84%)
    1 46 (9.9%) 12 (16%)
meta_tumor
    0 461 (100%) 69 (95%)
    1 2 (0.4%) 4 (5.5%)
aids
    0 463 (100%) 73 (100%)
eGFR30
    0 415 (92%) 56 (77%)
    1 34 (7.6%) 17 (23%)
    Unknown 14 0
hd
    0 460 (99%) 73 (100%)
    1 3 (0.6%) 0 (0%)
hcc
    0 368 (79%) 57 (78%)
    1 95 (21%) 16 (22%)
alcohol
    0 258 (56%) 47 (64%)
    1 205 (44%) 26 (36%)
past_rupture
    0 367 (79%) 65 (89%)
    1 96 (21%) 8 (11%)
antiplate
    0 460 (99%) 72 (99%)
    1 3 (0.6%) 1 (1.4%)
anticoag
    0 462 (100%) 73 (100%)
    1 1 (0.2%) 0 (0%)
antithro
    0 460 (99%) 72 (99%)
    1 3 (0.6%) 1 (1.4%)
nsaids
    0 458 (99%) 73 (100%)
    1 5 (1.1%) 0 (0%)
steroid
    0 462 (100%) 72 (99%)
    1 1 (0.2%) 1 (1.4%)
beta
    0 445 (96%) 73 (100%)
    1 18 (3.9%) 0 (0%)
vaso
    0 452 (98%) 53 (73%)
    1 11 (2.4%) 20 (27%)
map 4 (0.00, 4) 6 (4.00, 8)
ffp
    0 363 (78%) 36 (49%)
    1 100 (22%) 37 (51%)
pc
    0 460 (99%) 70 (96%)
    1 3 (0.6%) 3 (4.1%)
albner
    0 431 (93%) 58 (79%)
    1 32 (6.9%) 15 (21%)
bt 37 (36.30, 37) 36 (36.00, 37)
    Unknown 22 10
sBP 90 (82.00, 98) 64 (59.00, 79)
    Unknown 19 3
dBP 52 (46.00, 60) 39 (33.00, 46)
    Unknown 19 3
hr 80 (70.00, 94) 101 (89.00, 112)
    Unknown 21 3
shock
    0 287 (65%) 10 (14%)
    1 155 (35%) 60 (86%)
    Unknown 21 3
bil 1 (0.90, 2) 3 (1.53, 5)
    Unknown 24 4
ast 53 (32.00, 92) 82 (48.75, 153)
    Unknown 17 1
alt 28 (20.00, 44) 33 (25.00, 63)
    Unknown 17 1
wbc 7,215 (5,300.00, 10,300) 7,990 (6,200.00, 10,800)
    Unknown 13 0
hb 8 (7.00, 10) 8 (6.60, 10)
    Unknown 13 0
plt 97 (72.00, 135) 98 (76.00, 130)
    Unknown 13 0
tp 6 (5.50, 7) 6 (5.10, 6)
    Unknown 37 8
alb 3 (2.58, 3) 2 (1.80, 3)
    Unknown 31 3
eGFR 71 (52.10, 92) 44 (30.40, 58)
    Unknown 14 0
bun 23 (15.25, 35) 28 (17.98, 45)
    Unknown 13 1
cre 1 (0.65, 1) 1 (0.99, 2)
    Unknown 18 0
crp 0 (0.10, 1) 1 (0.37, 2)
    Unknown 33 4
pt 53 (42.00, 64) 40 (26.90, 56)
    Unknown 37 2
aptt 28 (25.40, 33) 32 (29.10, 44)
    Unknown 59 4
los 10 (7.00, 18) 4 (1.00, 16)
cohort
    develop 463 (100%) 73 (100%)
    validation 0 (0%) 0 (0%)
1 Median (IQR); n (%)

開発コホート 連続量可視化

df_dev |>  #全体
  select(col_continuous) |> 
  pivot_longer(cols = col_continuous, names_to = "name", values_to = "value") |> 
  ggplot()+
  geom_histogram(aes(x = value), color = "black")+
  facet_wrap(~ name, scales = "free", ncol = 5) +
  theme_bw()+
  theme(text = element_text(size = 12))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 717 rows containing non-finite values (`stat_bin()`).

検証コホートtableone作成

# Create your table
df_val %>%  
  select(c(col_cont, col_fact)) %>% 
  CreateTableOne(vars = c(col_cont, col_fact), strata="hosp_mortality",factorVars = col_fact, addOverall = T) -> tableone_val


# Print your table
print(tableone_val, smd = TRUE, missing = TRUE, test = TRUE, explain = TRUE) 
##                         Stratified by hosp_mortality
##                          Overall           0                 1                 
##   n                          444               399                 45          
##   age (mean (SD))          60.42 (12.96)     60.18 (13.03)      62.62 (12.19)  
##   bmi (mean (SD))          24.38 (16.12)     24.46 (16.74)      23.43 (4.22)   
##   smoke (mean (SD))       226.73 (356.99)   233.57 (364.62)    157.22 (261.19) 
##   child_num (mean (SD))     8.38 (2.09)       8.13 (1.93)       10.52 (2.14)   
##   gcs (mean (SD))          14.61 (1.34)      14.66 (1.24)       14.09 (1.94)   
##   cci_num (mean (SD))       4.50 (1.45)       4.46 (1.36)        4.91 (2.11)   
##   map (mean (SD))           3.21 (3.18)       2.88 (2.76)        6.13 (4.81)   
##   bt (mean (SD))           36.81 (0.65)      36.81 (0.54)       36.84 (1.30)   
##   sBP (mean (SD))          89.95 (17.16)     92.40 (15.70)      68.33 (14.21)  
##   dBP (mean (SD))          54.84 (13.33)     56.33 (12.84)      41.69 (10.03)  
##   hr (mean (SD))           88.43 (20.93)     86.76 (20.28)     103.84 (20.79)  
##   bil (mean (SD))           2.42 (2.32)       2.18 (1.86)        4.54 (4.21)   
##   ast (mean (SD))          82.05 (109.78)    72.39 (79.19)     168.27 (237.83) 
##   alt (mean (SD))          39.93 (41.41)     36.47 (27.91)      70.91 (95.88)  
##   wbc (mean (SD))        9068.42 (5108.48) 8834.15 (5148.20) 11160.91 (4243.62)
##   hb (mean (SD))            8.92 (2.51)       9.00 (2.55)        8.18 (1.96)   
##   plt (mean (SD))         117.76 (75.39)    118.45 (77.27)     111.57 (56.24)  
##   tp (mean (SD))            6.13 (0.92)       6.19 (0.90)        5.64 (1.03)   
##   alb (mean (SD))           2.90 (0.64)       2.97 (0.61)        2.29 (0.54)   
##   eGFR (mean (SD))         70.59 (31.31)     73.60 (30.92)      43.71 (20.02)  
##   bun (mean (SD))          28.64 (17.04)     27.83 (16.43)      35.88 (20.60)  
##   cre (mean (SD))           1.05 (0.89)       1.00 (0.87)        1.54 (0.88)   
##   crp (mean (SD))           0.87 (1.86)       0.74 (1.64)        2.03 (3.01)   
##   pt (mean (SD))           58.78 (17.72)     60.61 (16.94)      43.01 (16.65)  
##   aptt (mean (SD))         36.06 (17.52)     34.27 (13.26)      51.43 (34.63)  
##   los (mean (SD))          10.67 (13.77)     10.72 (13.83)      10.18 (13.38)  
##   sex = F (%)                115 ( 25.9)       103 ( 25.8)         12 ( 26.7)  
##   barthel (%)                                                                  
##      0                       171 ( 40.7)       167 ( 44.3)          4 (  9.3)  
##      1                       131 ( 31.2)       120 ( 31.8)         11 ( 25.6)  
##      2                       118 ( 28.1)        90 ( 23.9)         28 ( 65.1)  
##   child_score (%)                                                              
##      0                        83 ( 20.3)        81 ( 22.3)          2 (  4.4)  
##      1                       206 ( 50.4)       193 ( 53.0)         13 ( 28.9)  
##      2                       120 ( 29.3)        90 ( 24.7)         30 ( 66.7)  
##   pad = 1 (%)                  1 (  0.2)         1 (  0.3)          0 (  0.0)  
##   stroke = 1 (%)              11 (  2.5)         8 (  2.0)          3 (  6.7)  
##   dimentia = 1 (%)             6 (  1.4)         5 (  1.3)          1 (  2.2)  
##   ch_lung = 1 (%)              4 (  0.9)         3 (  0.8)          1 (  2.2)  
##   rheumati = 0 (%)           444 (100.0)       399 (100.0)         45 (100.0)  
##   pept_ulcer = 1 (%)          42 (  9.5)        40 ( 10.0)          2 (  4.4)  
##   dm = 1 (%)                 104 ( 23.4)        96 ( 24.1)          8 ( 17.8)  
##   dm_compli = 1 (%)            7 (  1.6)         6 (  1.5)          1 (  2.2)  
##   paralysis = 0 (%)          444 (100.0)       399 (100.0)         45 (100.0)  
##   malignancy = 1 (%)          57 ( 12.8)        50 ( 12.5)          7 ( 15.6)  
##   meta_tumor = 1 (%)          11 (  2.5)         7 (  1.8)          4 (  8.9)  
##   aids = 0 (%)               444 (100.0)       399 (100.0)         45 (100.0)  
##   eGFR30 = 1 (%)              36 (  8.2)        22 (  5.6)         14 ( 31.8)  
##   hd = 1 (%)                  10 (  2.3)         7 (  1.8)          3 (  6.7)  
##   hcc = 1 (%)                 64 ( 14.4)        56 ( 14.0)          8 ( 17.8)  
##   alcohol = 1 (%)            241 ( 54.3)       212 ( 53.1)         29 ( 64.4)  
##   past_rupture = 1 (%)       121 ( 27.3)       115 ( 28.8)          6 ( 13.3)  
##   antiplate = 1 (%)            3 (  0.7)         2 (  0.5)          1 (  2.2)  
##   anticoag = 1 (%)             3 (  0.7)         3 (  0.8)          0 (  0.0)  
##   antithro = 1 (%)             6 (  1.4)         5 (  1.3)          1 (  2.2)  
##   nsaids = 1 (%)               4 (  0.9)         4 (  1.0)          0 (  0.0)  
##   steroid = 1 (%)              3 (  0.7)         3 (  0.8)          0 (  0.0)  
##   beta = 1 (%)                40 (  9.0)        40 ( 10.0)          0 (  0.0)  
##   vaso = 1 (%)                27 (  6.1)        18 (  4.5)          9 ( 20.0)  
##   ffp = 1 (%)                154 ( 34.7)       126 ( 31.6)         28 ( 62.2)  
##   pc = 1 (%)                  11 (  2.5)         6 (  1.5)          5 ( 11.1)  
##   albner = 1 (%)              33 (  7.4)        26 (  6.5)          7 ( 15.6)  
##   shock = 1 (%)              194 ( 44.1)       153 ( 38.5)         41 ( 95.3)  
##   hosp_mortality = 1 (%)      45 ( 10.1)         0 (  0.0)         45 (100.0)  
##                         Stratified by hosp_mortality
##                          p      test SMD    Missing
##   n                                                
##   age (mean (SD))         0.230       0.194  0.0   
##   bmi (mean (SD))         0.730       0.084  6.8   
##   smoke (mean (SD))       0.221       0.241  9.5   
##   child_num (mean (SD))  <0.001       1.174 10.8   
##   gcs (mean (SD))         0.006       0.353  0.0   
##   cci_num (mean (SD))     0.048       0.255  0.0   
##   map (mean (SD))        <0.001       0.828  0.0   
##   bt (mean (SD))          0.785       0.029  1.1   
##   sBP (mean (SD))        <0.001       1.607  0.2   
##   dBP (mean (SD))        <0.001       1.271  0.2   
##   hr (mean (SD))         <0.001       0.832  0.9   
##   bil (mean (SD))        <0.001       0.724  2.0   
##   ast (mean (SD))        <0.001       0.541  1.6   
##   alt (mean (SD))        <0.001       0.488  1.6   
##   wbc (mean (SD))         0.004       0.493  1.6   
##   hb (mean (SD))          0.040       0.361  1.6   
##   plt (mean (SD))         0.566       0.102  1.6   
##   tp (mean (SD))         <0.001       0.566  7.2   
##   alb (mean (SD))        <0.001       1.177  2.7   
##   eGFR (mean (SD))       <0.001       1.148  1.6   
##   bun (mean (SD))         0.003       0.432  1.6   
##   cre (mean (SD))        <0.001       0.623  2.3   
##   crp (mean (SD))        <0.001       0.532  3.4   
##   pt (mean (SD))         <0.001       1.048  4.5   
##   aptt (mean (SD))       <0.001       0.654 11.0   
##   los (mean (SD))         0.801       0.040  0.0   
##   sex = F (%)             1.000       0.019  0.0   
##   barthel (%)            <0.001       1.041  5.4   
##      0                                             
##      1                                             
##      2                                             
##   child_score (%)        <0.001       0.972  7.9   
##      0                                             
##      1                                             
##      2                                             
##   pad = 1 (%)             1.000       0.071  0.0   
##   stroke = 1 (%)          0.161       0.230  0.0   
##   dimentia = 1 (%)        1.000       0.074  0.0   
##   ch_lung = 1 (%)         0.875       0.122  0.0   
##   rheumati = 0 (%)           NA      <0.001  0.0   
##   pept_ulcer = 1 (%)      0.345       0.217  0.0   
##   dm = 1 (%)              0.449       0.155  0.0   
##   dm_compli = 1 (%)       1.000       0.053  0.0   
##   paralysis = 0 (%)          NA      <0.001  0.0   
##   malignancy = 1 (%)      0.734       0.087  0.0   
##   meta_tumor = 1 (%)      0.016       0.322  0.0   
##   aids = 0 (%)               NA      <0.001  0.0   
##   eGFR30 = 1 (%)         <0.001       0.714  1.6   
##   hd = 1 (%)              0.115       0.246  0.0   
##   hcc = 1 (%)             0.650       0.102  0.0   
##   alcohol = 1 (%)         0.198       0.231  0.0   
##   past_rupture = 1 (%)    0.042       0.387  0.0   
##   antiplate = 1 (%)       0.707       0.149  0.0   
##   anticoag = 1 (%)        1.000       0.123  0.0   
##   antithro = 1 (%)        1.000       0.074  0.0   
##   nsaids = 1 (%)          1.000       0.142  0.0   
##   steroid = 1 (%)         1.000       0.123  0.0   
##   beta = 1 (%)            0.051       0.472  0.0   
##   vaso = 1 (%)           <0.001       0.486  0.0   
##   ffp = 1 (%)            <0.001       0.645  0.0   
##   pc = 1 (%)              0.001       0.403  0.0   
##   albner = 1 (%)          0.059       0.292  0.0   
##   shock = 1 (%)          <0.001       1.515  0.9   
##   hosp_mortality = 1 (%) <0.001         NaN  0.0

検証データでもtbl_summaryでmedian+IQRも準備する

# specify your data and variables
tbl_summary(data = df_val, 
            by = "hosp_mortality",
            type = list(gcs ~ "continuous", year ~ "categorical"),
            statistic = all_continuous() ~ "{median} ({p25}, {p75})",
            digits = all_continuous() ~ c(0, 2))
Characteristic 0, N = 3991 1, N = 451
hosp_id 1,022 (1,005.00, 1,024) 1,022 (1,006.00, 1,062)
pt_id 488 (232.50, 640) 510 (299.00, 738)
hosp_num
    1 326 (82%) 39 (87%)
    2 46 (12%) 4 (8.9%)
    3 12 (3.0%) 1 (2.2%)
    4 10 (2.5%) 0 (0%)
    5 3 (0.8%) 1 (2.2%)
    6 2 (0.5%) 0 (0%)
year
    2017 57 (14%) 7 (16%)
    2018 53 (13%) 9 (20%)
    2019 63 (16%) 9 (20%)
    2020 74 (19%) 8 (18%)
    2021 76 (19%) 7 (16%)
    2022 76 (19%) 5 (11%)
age 60 (50.00, 70) 64 (52.00, 70)
sex
    M 296 (74%) 33 (73%)
    F 103 (26%) 12 (27%)
bmi 23 (20.59, 26) 23 (20.16, 26)
    Unknown 17 13
smoke 0 (0.00, 390) 0 (0.00, 312)
    Unknown 33 9
barthel
    0 167 (44%) 4 (9.3%)
    1 120 (32%) 11 (26%)
    2 90 (24%) 28 (65%)
    Unknown 22 2
child_num 8 (7.00, 9) 11 (9.00, 12)
    Unknown 45 3
child_score
    0 81 (22%) 2 (4.4%)
    1 193 (53%) 13 (29%)
    2 90 (25%) 30 (67%)
    Unknown 35 0
gcs 15 (15.00, 15) 15 (14.00, 15)
cci_num 4 (4.00, 5) 4 (4.00, 5)
pad
    0 398 (100%) 45 (100%)
    1 1 (0.3%) 0 (0%)
stroke
    0 391 (98%) 42 (93%)
    1 8 (2.0%) 3 (6.7%)
dimentia
    0 394 (99%) 44 (98%)
    1 5 (1.3%) 1 (2.2%)
ch_lung
    0 396 (99%) 44 (98%)
    1 3 (0.8%) 1 (2.2%)
rheumati
    0 399 (100%) 45 (100%)
    1 0 (0%) 0 (0%)
pept_ulcer
    0 359 (90%) 43 (96%)
    1 40 (10%) 2 (4.4%)
dm
    0 303 (76%) 37 (82%)
    1 96 (24%) 8 (18%)
dm_compli
    0 393 (98%) 44 (98%)
    1 6 (1.5%) 1 (2.2%)
paralysis
    0 399 (100%) 45 (100%)
malignancy
    0 349 (87%) 38 (84%)
    1 50 (13%) 7 (16%)
meta_tumor
    0 392 (98%) 41 (91%)
    1 7 (1.8%) 4 (8.9%)
aids
    0 399 (100%) 45 (100%)
eGFR30
    0 371 (94%) 30 (68%)
    1 22 (5.6%) 14 (32%)
    Unknown 6 1
hd
    0 392 (98%) 42 (93%)
    1 7 (1.8%) 3 (6.7%)
hcc
    0 343 (86%) 37 (82%)
    1 56 (14%) 8 (18%)
alcohol
    0 187 (47%) 16 (36%)
    1 212 (53%) 29 (64%)
past_rupture
    0 284 (71%) 39 (87%)
    1 115 (29%) 6 (13%)
antiplate
    0 397 (99%) 44 (98%)
    1 2 (0.5%) 1 (2.2%)
anticoag
    0 396 (99%) 45 (100%)
    1 3 (0.8%) 0 (0%)
antithro
    0 394 (99%) 44 (98%)
    1 5 (1.3%) 1 (2.2%)
nsaids
    0 395 (99%) 45 (100%)
    1 4 (1.0%) 0 (0%)
steroid
    0 396 (99%) 45 (100%)
    1 3 (0.8%) 0 (0%)
beta
    0 359 (90%) 45 (100%)
    1 40 (10%) 0 (0%)
vaso
    0 381 (95%) 36 (80%)
    1 18 (4.5%) 9 (20%)
map 2 (0.00, 4) 4 (4.00, 8)
ffp
    0 273 (68%) 17 (38%)
    1 126 (32%) 28 (62%)
pc
    0 393 (98%) 40 (89%)
    1 6 (1.5%) 5 (11%)
albner
    0 373 (93%) 38 (84%)
    1 26 (6.5%) 7 (16%)
bt 37 (36.50, 37) 37 (36.20, 38)
    Unknown 1 4
sBP 93 (82.25, 101) 68 (56.00, 79)
    Unknown 1 0
dBP 56 (48.00, 64) 43 (34.00, 49)
    Unknown 1 0
hr 83 (72.00, 100) 106 (90.50, 118)
    Unknown 2 2
shock
    0 244 (61%) 2 (4.7%)
    1 153 (39%) 41 (95%)
    Unknown 2 2
bil 2 (0.96, 3) 3 (1.63, 6)
    Unknown 8 1
ast 47 (31.00, 84) 81 (39.00, 167)
    Unknown 6 1
alt 29 (19.00, 42) 33 (18.75, 70)
    Unknown 6 1
wbc 8,050 (5,600.00, 10,800) 9,850 (8,842.50, 13,925)
    Unknown 6 1
hb 9 (7.30, 11) 8 (6.88, 9)
    Unknown 6 1
plt 102 (75.00, 141) 109 (55.75, 147)
    Unknown 6 1
tp 6 (5.60, 7) 6 (5.00, 6)
    Unknown 30 2
alb 3 (2.50, 3) 2 (1.80, 3)
    Unknown 11 1
eGFR 71 (53.87, 92) 41 (28.14, 58)
    Unknown 6 1
bun 25 (16.40, 36) 32 (19.82, 48)
    Unknown 6 1
cre 1 (0.64, 1) 1 (0.92, 2)
    Unknown 9 1
crp 0 (0.11, 1) 1 (0.19, 2)
    Unknown 13 2
pt 60 (48.22, 72) 41 (30.75, 54)
    Unknown 19 1
aptt 32 (29.33, 36) 40 (35.30, 56)
    Unknown 45 4
los 7 (5.00, 13) 6 (2.00, 12)
cohort
    develop 0 (0%) 0 (0%)
    validation 399 (100%) 45 (100%)
1 Median (IQR); n (%)

検証コホート 連続量可視化

df_val |>  #全体
  select(col_continuous) |> 
  pivot_longer(cols = col_continuous, names_to = "name", values_to = "value") |> 
  ggplot()+
  geom_histogram(aes(x = value), color = "black")+
  facet_wrap(~ name, scales = "free", ncol = 5) +
  theme_bw()+
  theme(text = element_text(size = 12))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 327 rows containing non-finite values (`stat_bin()`).

連続変数の相関確認

数値で

# 連続変数のリストを指定します。
col_continu <- c("age", "bmi","smoke","child_num","gcs","cci_num","bt","sBP","dBP","hr","bil","ast","alt","wbc","hb","plt","tp","alb","eGFR","bun","cre","crp","pt","aptt")
# 指定した変数だけで相関行列を作成します。
corresult <- df_dev %>% 
  dplyr::select(all_of(col_continu)) %>% 
  drop_na() %>% 
  cor(method = "pearson")

# 相関行列の数値を表示します。
print(corresult)
##                    age           bmi       smoke   child_num          gcs
## age        1.000000000 -1.296100e-01 -0.13183344 -0.07904746 -0.056003460
## bmi       -0.129610011  1.000000e+00  0.05466316 -0.03675640 -0.002670049
## smoke     -0.131833435  5.466316e-02  1.00000000  0.11712280  0.012041939
## child_num -0.079047459 -3.675640e-02  0.11712280  1.00000000 -0.446313124
## gcs       -0.056003460 -2.670049e-03  0.01204194 -0.44631312  1.000000000
## cci_num    0.173806387  5.024066e-02  0.01130821  0.01885518  0.032132947
## bt        -0.105549686 -4.913514e-05 -0.02113526  0.05596907  0.055599410
## sBP       -0.050237673  2.058716e-02 -0.07168332 -0.33446966  0.249658005
## dBP       -0.065716671  2.963271e-03 -0.01202908 -0.26953193  0.248596331
## hr        -0.129842553 -8.735062e-02  0.15546953  0.28402598 -0.049489433
## bil       -0.231662689 -5.051886e-02  0.15359723  0.64601643 -0.255979204
## ast       -0.161693213 -1.337139e-01 -0.02897554  0.18680748 -0.050480785
## alt        0.003882512 -1.409287e-02 -0.04844208  0.06228414  0.014555916
## wbc       -0.193288481  3.516464e-02  0.11847396  0.22927340 -0.091775211
## hb        -0.058416009  1.827695e-01 -0.01758879 -0.22162265  0.065107613
## plt        0.092221345 -4.431667e-02 -0.08794923 -0.15424065  0.028328140
## tp        -0.085020719  3.998964e-02 -0.10721569 -0.20602561  0.028327021
## alb       -0.142467295  7.661253e-02 -0.09456042 -0.65673209  0.346804566
## eGFR      -0.421581160 -1.510401e-04 -0.02066158 -0.18092871  0.247110945
## bun        0.236651282 -4.360478e-02 -0.07590300  0.06459151 -0.137820967
## cre        0.093021049 -7.317137e-03  0.05553996  0.15426154 -0.243630164
## crp       -0.005293644 -6.675397e-02  0.02185618  0.19786931 -0.027971543
## pt         0.177417560  2.038415e-02 -0.15734504 -0.66164510  0.225617727
## aptt       0.032688987 -7.245584e-02  0.05439131  0.30180218 -0.163820383
##                cci_num            bt           sBP          dBP          hr
## age        0.173806387 -1.055497e-01 -0.0502376731 -0.065716671 -0.12984255
## bmi        0.050240657 -4.913514e-05  0.0205871583  0.002963271 -0.08735062
## smoke      0.011308208 -2.113526e-02 -0.0716833228 -0.012029085  0.15546953
## child_num  0.018855185  5.596907e-02 -0.3344696572 -0.269531928  0.28402598
## gcs        0.032132947  5.559941e-02  0.2496580047  0.248596331 -0.04948943
## cci_num    1.000000000 -1.281198e-01 -0.0811716905 -0.178497367 -0.13172033
## bt        -0.128119797  1.000000e+00  0.2368694297  0.164333733  0.25591612
## sBP       -0.081171691  2.368694e-01  1.0000000000  0.605357525 -0.15071133
## dBP       -0.178497367  1.643337e-01  0.6053575253  1.000000000 -0.10642854
## hr        -0.131720328  2.559161e-01 -0.1507113296 -0.106428543  1.00000000
## bil       -0.133648822  5.981601e-02 -0.1677211615 -0.221208944  0.31476430
## ast       -0.068234899  3.059386e-02 -0.0576587901 -0.036318866  0.15641394
## alt        0.132973609 -5.003688e-02 -0.0318011591 -0.047556489  0.02960149
## wbc       -0.090971027  8.325622e-02 -0.1625636450 -0.126387695  0.28203400
## hb        -0.006796172 -5.898742e-04  0.0920759203  0.113575537 -0.10008674
## plt        0.108837598 -1.072875e-01 -0.0617947620 -0.008897726 -0.05141042
## tp         0.027755188 -5.917779e-02  0.1792214811  0.077045024 -0.09569402
## alb       -0.043708852  1.711312e-02  0.3427230030  0.277426076 -0.17757371
## eGFR      -0.035233837  9.245822e-02  0.2033327260  0.158352541 -0.04424801
## bun        0.078270164 -3.851523e-02 -0.0528615394 -0.048348133  0.06432638
## cre        0.035292036 -4.830078e-02 -0.0377773508 -0.035203607  0.12673924
## crp       -0.021155411 -6.147842e-02 -0.0005477627 -0.023554640  0.02831096
## pt         0.010651701 -3.238337e-02  0.2486208414  0.237449213 -0.27288392
## aptt      -0.073689313  5.748142e-02 -0.1422777096 -0.179225336  0.13148526
##                    bil         ast          alt         wbc            hb
## age       -0.231662689 -0.16169321  0.003882512 -0.19328848 -0.0584160095
## bmi       -0.050518861 -0.13371394 -0.014092866  0.03516464  0.1827695297
## smoke      0.153597229 -0.02897554 -0.048442083  0.11847396 -0.0175887861
## child_num  0.646016426  0.18680748  0.062284144  0.22927340 -0.2216226477
## gcs       -0.255979204 -0.05048079  0.014555916 -0.09177521  0.0651076127
## cci_num   -0.133648822 -0.06823490  0.132973609 -0.09097103 -0.0067961722
## bt         0.059816013  0.03059386 -0.050036882  0.08325622 -0.0005898742
## sBP       -0.167721161 -0.05765879 -0.031801159 -0.16256365  0.0920759203
## dBP       -0.221208944 -0.03631887 -0.047556489 -0.12638769  0.1135755375
## hr         0.314764295  0.15641394  0.029601488  0.28203400 -0.1000867410
## bil        1.000000000  0.24515899  0.080686623  0.24509282  0.0052262177
## ast        0.245158987  1.00000000  0.713569279  0.23456177 -0.0865216624
## alt        0.080686623  0.71356928  1.000000000  0.11793436  0.0096813469
## wbc        0.245092818  0.23456177  0.117934355  1.00000000 -0.1655430038
## hb         0.005226218 -0.08652166  0.009681347 -0.16554300  1.0000000000
## plt       -0.174243770  0.05342028  0.082966951  0.30106793 -0.1054903948
## tp         0.012518151  0.04101746  0.023922312 -0.09972653  0.4019832235
## alb       -0.255644701 -0.08217463  0.015350205 -0.12925971  0.3884854516
## eGFR      -0.054887252 -0.02619620 -0.037626487 -0.14849069  0.2648169513
## bun        0.030376381  0.03514411  0.147371774  0.16964975 -0.2699004874
## cre        0.155929485  0.06726628  0.040052392  0.11988522 -0.1810436238
## crp        0.302861515  0.24460499  0.206884173  0.17776026 -0.0491261686
## pt        -0.503926564 -0.19129890 -0.098671705 -0.34108680  0.3188720028
## aptt       0.224658658  0.04295108 -0.001659784  0.08477517 -0.1081389027
##                    plt           tp         alb          eGFR         bun
## age        0.092221345 -0.085020719 -0.14246730 -0.4215811597  0.23665128
## bmi       -0.044316671  0.039989635  0.07661253 -0.0001510401 -0.04360478
## smoke     -0.087949228 -0.107215689 -0.09456042 -0.0206615776 -0.07590300
## child_num -0.154240647 -0.206025609 -0.65673209 -0.1809287110  0.06459151
## gcs        0.028328140  0.028327021  0.34680457  0.2471109454 -0.13782097
## cci_num    0.108837598  0.027755188 -0.04370885 -0.0352338370  0.07827016
## bt        -0.107287516 -0.059177786  0.01711312  0.0924582208 -0.03851523
## sBP       -0.061794762  0.179221481  0.34272300  0.2033327260 -0.05286154
## dBP       -0.008897726  0.077045024  0.27742608  0.1583525414 -0.04834813
## hr        -0.051410421 -0.095694019 -0.17757371 -0.0442480079  0.06432638
## bil       -0.174243770  0.012518151 -0.25564470 -0.0548872524  0.03037638
## ast        0.053420281  0.041017463 -0.08217463 -0.0261962029  0.03514411
## alt        0.082966951  0.023922312  0.01535021 -0.0376264873  0.14737177
## wbc        0.301067926 -0.099726529 -0.12925971 -0.1484906941  0.16964975
## hb        -0.105490395  0.401983223  0.38848545  0.2648169513 -0.26990049
## plt        1.000000000  0.089248410  0.06965940 -0.1080944525  0.13865202
## tp         0.089248410  1.000000000  0.45416125  0.1829208844 -0.15122064
## alb        0.069659398  0.454161248  1.00000000  0.2868053223 -0.07656109
## eGFR      -0.108094453  0.182920884  0.28680532  1.0000000000 -0.48614545
## bun        0.138652016 -0.151220644 -0.07656109 -0.4861454533  1.00000000
## cre        0.012580290 -0.075551213 -0.12332435 -0.6435014645  0.59022745
## crp        0.147401481  0.001746056 -0.13218785 -0.0529216750  0.09367348
## pt         0.138877637  0.225869452  0.48651726  0.1217545292 -0.09432596
## aptt      -0.109063601 -0.319854060 -0.42762353 -0.1874785739  0.11294775
##                    cre           crp          pt         aptt
## age        0.093021049 -0.0052936444  0.17741756  0.032688987
## bmi       -0.007317137 -0.0667539682  0.02038415 -0.072455842
## smoke      0.055539958  0.0218561798 -0.15734504  0.054391313
## child_num  0.154261541  0.1978693093 -0.66164510  0.301802176
## gcs       -0.243630164 -0.0279715426  0.22561773 -0.163820383
## cci_num    0.035292036 -0.0211554113  0.01065170 -0.073689313
## bt        -0.048300776 -0.0614784176 -0.03238337  0.057481422
## sBP       -0.037777351 -0.0005477627  0.24862084 -0.142277710
## dBP       -0.035203607 -0.0235546395  0.23744921 -0.179225336
## hr         0.126739240  0.0283109574 -0.27288392  0.131485257
## bil        0.155929485  0.3028615150 -0.50392656  0.224658658
## ast        0.067266283  0.2446049888 -0.19129890  0.042951078
## alt        0.040052392  0.2068841729 -0.09867171 -0.001659784
## wbc        0.119885215  0.1777602553 -0.34108680  0.084775171
## hb        -0.181043624 -0.0491261686  0.31887200 -0.108138903
## plt        0.012580290  0.1474014808  0.13887764 -0.109063601
## tp        -0.075551213  0.0017460561  0.22586945 -0.319854060
## alb       -0.123324350 -0.1321878521  0.48651726 -0.427623526
## eGFR      -0.643501465 -0.0529216750  0.12175453 -0.187478574
## bun        0.590227449  0.0936734761 -0.09432596  0.112947751
## cre        1.000000000  0.0716154896 -0.10466814  0.168143227
## crp        0.071615490  1.0000000000 -0.12359882  0.070965103
## pt        -0.104668145 -0.1235988181  1.00000000 -0.441098246
## aptt       0.168143227  0.0709651031 -0.44109825  1.000000000

相関プロット(拡大推奨)

# 指定した連続変数だけで相関行列を作成します。
corresult <- df_dev %>% 
  dplyr::select(all_of(col_continu)) %>% 
  drop_na() %>% 
  cor(method = "pearson")

# 相関行列を基に相関プロットを作成します。
corrplot <- ggcorrplot(corr = corresult, hc.order = FALSE, method = "square", title = "cor plot",
                       colors = c("#4b61ba", "white", "red"), lab = TRUE)

# プロットを表示します。
corrplot

bilとchild_numlが0.65と相関係数は高め。ただ他は許容できそう。

変数の線形評価

# 連続変数の変数名をまとめる
con_var <- c("age", "bmi","smoke","child_num","gcs","cci_num","map","bt","sBP","dBP","hr","bil","ast","alt","wbc","hb","plt","tp","alb","eGFR","bun","cre","crp","pt","aptt")

# datadistを計算
ddist <- datadist(df_dev)
options(datadist='ddist')

# プロット結果をまとめるリストを用意する
plot <- list()

for (x in con_var){
  # lrmに投入するformulaを文字列で、for文で順番に指定していく
  formula_tmp <- as.formula(paste("hosp_mortality ~ rcs(", x, ", 4)"))
  fit_tmp <- lrm(formula_tmp, data = df_dev)
  
  # Predict関数の呼び出しを文字列として作成し、それをパースして評価する
  plot_cmd <- paste("plot(Predict(fit_tmp, ", x, "))")
  plot_tmp <- eval(parse(text = plot_cmd))
  plot[[x]] <- plot_tmp
}

# 結果をggarrangeでまとめて表示する
logOR_plot <- ggarrange(plotlist = plot, ncol = 1, nrow = 1)

logOR_plot
## $`1`

## 
## $`2`

## 
## $`3`

## 
## $`4`

## 
## $`5`

## 
## $`6`

## 
## $`7`

## 
## $`8`

## 
## $`9`

## 
## $`10`

## 
## $`11`

## 
## $`12`

## 
## $`13`

## 
## $`14`

## 
## $`15`

## 
## $`16`

## 
## $`17`

## 
## $`18`

## 
## $`19`

## 
## $`20`

## 
## $`21`

## 
## $`22`

## 
## $`23`

## 
## $`24`

## 
## $`25`

## 
## attr(,"class")
## [1] "list"      "ggarrange"

欠測を可視化

その1

naplot_1 <- gg_miss_var(df_dev, show_pct = TRUE)

naplot_1
## Warning: The `guide` argument in `scale_*()` cannot be `FALSE`. This was deprecated in
## ggplot2 3.3.4.
## ℹ Please use "none" instead.
## ℹ The deprecated feature was likely used in the naniar package.
##   Please report the issue at <]8;;https://github.com/njtierney/naniar/issueshttps://github.com/njtierney/naniar/issues]8;;>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

その2

naplot_2 <- vis_miss(df_dev)
## Warning: `gather_()` was deprecated in tidyr 1.2.0.
## ℹ Please use `gather()` instead.
## ℹ The deprecated feature was likely used in the visdat package.
##   Please report the issue at <]8;;https://github.com/ropensci/visdat/issueshttps://github.com/ropensci/visdat/issues]8;;>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
naplot_2

vari_cat.f <-c("sex","barthel","child_score","pad","stroke","dimentia","ch_lung","rheumati","pept_ulcer","dm","dm_compli","paralysis","malignancy","meta_tumor","aids","eGFR30","hd","hcc","alcohol","past_rupture","antiplate","anticoag","antithro","nsaids","steroid","beta", "vaso","ffp","pc", "albner","shock","hosp_mortality")


vari_numeric <- c("age", "bmi","smoke","child_num","gcs","cci_num","map","bt","sBP","dBP","hr","bil","ast","alt","wbc","hb","plt","tp","alb","eGFR","bun","cre","crp","pt","aptt","los")
# vari_cat.f (defined as above) in the data_original is changed to factor type and set into data_factor_for_imp
data_factor_for_imp <- as.data.frame(lapply(df_dev[vari_cat.f],as.factor))
#check all the variable is factor
str(data_factor_for_imp)
## 'data.frame':    536 obs. of  32 variables:
##  $ sex           : Factor w/ 2 levels "M","F": 1 1 2 1 2 1 2 1 2 1 ...
##  $ barthel       : Factor w/ 3 levels "0","1","2": NA 3 1 2 NA NA NA 1 NA 3 ...
##  $ child_score   : Factor w/ 3 levels "0","1","2": 3 1 NA 2 NA NA NA 2 3 3 ...
##  $ pad           : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ stroke        : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ dimentia      : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ ch_lung       : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ rheumati      : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ pept_ulcer    : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ dm            : Factor w/ 2 levels "0","1": 1 2 1 1 1 1 1 1 1 1 ...
##  $ dm_compli     : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ paralysis     : Factor w/ 1 level "0": 1 1 1 1 1 1 1 1 1 1 ...
##  $ malignancy    : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ meta_tumor    : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ aids          : Factor w/ 1 level "0": 1 1 1 1 1 1 1 1 1 1 ...
##  $ eGFR30        : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 2 1 ...
##  $ hd            : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ hcc           : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ alcohol       : Factor w/ 2 levels "0","1": 2 1 1 1 1 1 2 2 1 1 ...
##  $ past_rupture  : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ antiplate     : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ anticoag      : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ antithro      : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ nsaids        : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ steroid       : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ beta          : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ vaso          : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 2 1 ...
##  $ ffp           : Factor w/ 2 levels "0","1": 1 2 1 1 2 2 1 2 2 2 ...
##  $ pc            : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ albner        : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 2 1 ...
##  $ shock         : Factor w/ 2 levels "0","1": 2 1 2 2 2 2 1 2 2 2 ...
##  $ hosp_mortality: Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 2 2 ...
#summary numeric type
data_numeric_for_imp <- as.data.frame(lapply(df_dev[vari_numeric],as.numeric))
# check all the variable is numeric
str(data_numeric_for_imp)
## 'data.frame':    536 obs. of  26 variables:
##  $ age      : num  50 80 59 44 67 65 49 73 69 62 ...
##  $ bmi      : num  NA 25.3 NA 14.5 NA ...
##  $ smoke    : num  0 0 0 240 0 0 NA 1000 0 0 ...
##  $ child_num: num  11 6 NA 8 NA NA NA 9 15 11 ...
##  $ gcs      : num  15 15 15 15 15 15 15 15 6 15 ...
##  $ cci_num  : num  4 4 3 4 4 3 4 4 4 4 ...
##  $ map      : num  0 2 0 6 2 2 0 4 14 6 ...
##  $ bt       : num  36.4 36.8 35.9 36 36.6 38.4 37 37 35.5 36.6 ...
##  $ sBP      : num  78 88 100 69 66 84 132 90 52 58 ...
##  $ dBP      : num  48 49 56 44 40 46 69 54 37 37 ...
##  $ hr       : num  118 72 110 104 72 127 83 114 98 106 ...
##  $ bil      : num  2.2 1.2 3.1 3.4 1.2 2.4 1.2 2.2 8.7 2.1 ...
##  $ ast      : num  217 31 60 129 52 90 154 55 96 121 ...
##  $ alt      : num  63 22 40 46 36 19 109 20 87 63 ...
##  $ wbc      : num  7400 5000 7800 9100 3900 8000 7900 11100 12800 8300 ...
##  $ hb       : num  6.9 10.8 9.7 10.7 6.3 9.8 13.5 5.6 6 9.8 ...
##  $ plt      : num  115 77 74 162 63 93 132 124 168 84 ...
##  $ tp       : num  6.3 5.6 6.4 6.1 5.1 7.2 7.6 4.9 5.3 6.3 ...
##  $ alb      : num  2.2 3.2 2.8 2.9 2.8 3.2 4 2.3 1.2 2.5 ...
##  $ eGFR     : num  58 57.7 63.7 112.4 123.6 ...
##  $ bun      : num  13.2 41.5 26.3 2.9 27.4 15.8 15.5 27 63.2 13.8 ...
##  $ cre      : num  1.08 0.96 0.72 0.61 0.38 0.44 0.4 0.97 2.07 0.87 ...
##  $ crp      : num  0.92 0.29 0.68 0.29 0.29 0.96 0.29 NA 2.75 0.12 ...
##  $ pt       : num  37.8 55 46.7 37.8 74.6 45.9 49.7 45.9 37.2 54 ...
##  $ aptt     : num  29.4 29 27.2 35.3 30.1 27.6 32.6 27.5 36 29.1 ...
##  $ los      : num  12 7 0 10 3 2 1 8 0 16 ...
# combine the factor type and numeric type
data_for_imp <- cbind(data_factor_for_imp, data_numeric_for_imp) 
#check all the variable type
str(data_for_imp)
## 'data.frame':    536 obs. of  58 variables:
##  $ sex           : Factor w/ 2 levels "M","F": 1 1 2 1 2 1 2 1 2 1 ...
##  $ barthel       : Factor w/ 3 levels "0","1","2": NA 3 1 2 NA NA NA 1 NA 3 ...
##  $ child_score   : Factor w/ 3 levels "0","1","2": 3 1 NA 2 NA NA NA 2 3 3 ...
##  $ pad           : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ stroke        : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ dimentia      : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ ch_lung       : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ rheumati      : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ pept_ulcer    : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ dm            : Factor w/ 2 levels "0","1": 1 2 1 1 1 1 1 1 1 1 ...
##  $ dm_compli     : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ paralysis     : Factor w/ 1 level "0": 1 1 1 1 1 1 1 1 1 1 ...
##  $ malignancy    : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ meta_tumor    : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ aids          : Factor w/ 1 level "0": 1 1 1 1 1 1 1 1 1 1 ...
##  $ eGFR30        : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 2 1 ...
##  $ hd            : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ hcc           : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ alcohol       : Factor w/ 2 levels "0","1": 2 1 1 1 1 1 2 2 1 1 ...
##  $ past_rupture  : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ antiplate     : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ anticoag      : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ antithro      : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ nsaids        : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ steroid       : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ beta          : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ vaso          : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 2 1 ...
##  $ ffp           : Factor w/ 2 levels "0","1": 1 2 1 1 2 2 1 2 2 2 ...
##  $ pc            : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ albner        : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 2 1 ...
##  $ shock         : Factor w/ 2 levels "0","1": 2 1 2 2 2 2 1 2 2 2 ...
##  $ hosp_mortality: Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 2 2 ...
##  $ age           : num  50 80 59 44 67 65 49 73 69 62 ...
##  $ bmi           : num  NA 25.3 NA 14.5 NA ...
##  $ smoke         : num  0 0 0 240 0 0 NA 1000 0 0 ...
##  $ child_num     : num  11 6 NA 8 NA NA NA 9 15 11 ...
##  $ gcs           : num  15 15 15 15 15 15 15 15 6 15 ...
##  $ cci_num       : num  4 4 3 4 4 3 4 4 4 4 ...
##  $ map           : num  0 2 0 6 2 2 0 4 14 6 ...
##  $ bt            : num  36.4 36.8 35.9 36 36.6 38.4 37 37 35.5 36.6 ...
##  $ sBP           : num  78 88 100 69 66 84 132 90 52 58 ...
##  $ dBP           : num  48 49 56 44 40 46 69 54 37 37 ...
##  $ hr            : num  118 72 110 104 72 127 83 114 98 106 ...
##  $ bil           : num  2.2 1.2 3.1 3.4 1.2 2.4 1.2 2.2 8.7 2.1 ...
##  $ ast           : num  217 31 60 129 52 90 154 55 96 121 ...
##  $ alt           : num  63 22 40 46 36 19 109 20 87 63 ...
##  $ wbc           : num  7400 5000 7800 9100 3900 8000 7900 11100 12800 8300 ...
##  $ hb            : num  6.9 10.8 9.7 10.7 6.3 9.8 13.5 5.6 6 9.8 ...
##  $ plt           : num  115 77 74 162 63 93 132 124 168 84 ...
##  $ tp            : num  6.3 5.6 6.4 6.1 5.1 7.2 7.6 4.9 5.3 6.3 ...
##  $ alb           : num  2.2 3.2 2.8 2.9 2.8 3.2 4 2.3 1.2 2.5 ...
##  $ eGFR          : num  58 57.7 63.7 112.4 123.6 ...
##  $ bun           : num  13.2 41.5 26.3 2.9 27.4 15.8 15.5 27 63.2 13.8 ...
##  $ cre           : num  1.08 0.96 0.72 0.61 0.38 0.44 0.4 0.97 2.07 0.87 ...
##  $ crp           : num  0.92 0.29 0.68 0.29 0.29 0.96 0.29 NA 2.75 0.12 ...
##  $ pt            : num  37.8 55 46.7 37.8 74.6 45.9 49.7 45.9 37.2 54 ...
##  $ aptt          : num  29.4 29 27.2 35.3 30.1 27.6 32.6 27.5 36 29.1 ...
##  $ los           : num  12 7 0 10 3 2 1 8 0 16 ...

missforestによる単一代入

cores <- detectCores(logical = FALSE) ###並列化処理
registerDoParallel(cores = cores) ###並列化処理
set.seed(2023)
md.pattern(data_for_imp) #see patern the missing

##     sex pad stroke dimentia ch_lung rheumati pept_ulcer dm dm_compli paralysis
## 216   1   1      1        1       1        1          1  1         1         1
## 72    1   1      1        1       1        1          1  1         1         1
## 16    1   1      1        1       1        1          1  1         1         1
## 5     1   1      1        1       1        1          1  1         1         1
## 25    1   1      1        1       1        1          1  1         1         1
## 10    1   1      1        1       1        1          1  1         1         1
## 9     1   1      1        1       1        1          1  1         1         1
## 1     1   1      1        1       1        1          1  1         1         1
## 9     1   1      1        1       1        1          1  1         1         1
## 2     1   1      1        1       1        1          1  1         1         1
## 1     1   1      1        1       1        1          1  1         1         1
## 1     1   1      1        1       1        1          1  1         1         1
## 2     1   1      1        1       1        1          1  1         1         1
## 1     1   1      1        1       1        1          1  1         1         1
## 9     1   1      1        1       1        1          1  1         1         1
## 1     1   1      1        1       1        1          1  1         1         1
## 2     1   1      1        1       1        1          1  1         1         1
## 1     1   1      1        1       1        1          1  1         1         1
## 2     1   1      1        1       1        1          1  1         1         1
## 14    1   1      1        1       1        1          1  1         1         1
## 2     1   1      1        1       1        1          1  1         1         1
## 3     1   1      1        1       1        1          1  1         1         1
## 3     1   1      1        1       1        1          1  1         1         1
## 1     1   1      1        1       1        1          1  1         1         1
## 7     1   1      1        1       1        1          1  1         1         1
## 9     1   1      1        1       1        1          1  1         1         1
## 1     1   1      1        1       1        1          1  1         1         1
## 1     1   1      1        1       1        1          1  1         1         1
## 6     1   1      1        1       1        1          1  1         1         1
## 3     1   1      1        1       1        1          1  1         1         1
## 2     1   1      1        1       1        1          1  1         1         1
## 2     1   1      1        1       1        1          1  1         1         1
## 9     1   1      1        1       1        1          1  1         1         1
## 3     1   1      1        1       1        1          1  1         1         1
## 2     1   1      1        1       1        1          1  1         1         1
## 2     1   1      1        1       1        1          1  1         1         1
## 2     1   1      1        1       1        1          1  1         1         1
## 1     1   1      1        1       1        1          1  1         1         1
## 1     1   1      1        1       1        1          1  1         1         1
## 1     1   1      1        1       1        1          1  1         1         1
## 1     1   1      1        1       1        1          1  1         1         1
## 1     1   1      1        1       1        1          1  1         1         1
## 1     1   1      1        1       1        1          1  1         1         1
## 1     1   1      1        1       1        1          1  1         1         1
## 1     1   1      1        1       1        1          1  1         1         1
## 2     1   1      1        1       1        1          1  1         1         1
## 1     1   1      1        1       1        1          1  1         1         1
## 1     1   1      1        1       1        1          1  1         1         1
## 3     1   1      1        1       1        1          1  1         1         1
## 1     1   1      1        1       1        1          1  1         1         1
## 1     1   1      1        1       1        1          1  1         1         1
## 1     1   1      1        1       1        1          1  1         1         1
## 1     1   1      1        1       1        1          1  1         1         1
## 4     1   1      1        1       1        1          1  1         1         1
## 1     1   1      1        1       1        1          1  1         1         1
## 1     1   1      1        1       1        1          1  1         1         1
## 1     1   1      1        1       1        1          1  1         1         1
## 1     1   1      1        1       1        1          1  1         1         1
## 1     1   1      1        1       1        1          1  1         1         1
## 1     1   1      1        1       1        1          1  1         1         1
## 1     1   1      1        1       1        1          1  1         1         1
## 3     1   1      1        1       1        1          1  1         1         1
## 1     1   1      1        1       1        1          1  1         1         1
## 1     1   1      1        1       1        1          1  1         1         1
## 1     1   1      1        1       1        1          1  1         1         1
## 3     1   1      1        1       1        1          1  1         1         1
## 1     1   1      1        1       1        1          1  1         1         1
## 1     1   1      1        1       1        1          1  1         1         1
## 1     1   1      1        1       1        1          1  1         1         1
## 1     1   1      1        1       1        1          1  1         1         1
## 1     1   1      1        1       1        1          1  1         1         1
## 1     1   1      1        1       1        1          1  1         1         1
## 2     1   1      1        1       1        1          1  1         1         1
## 1     1   1      1        1       1        1          1  1         1         1
## 1     1   1      1        1       1        1          1  1         1         1
## 1     1   1      1        1       1        1          1  1         1         1
## 1     1   1      1        1       1        1          1  1         1         1
## 1     1   1      1        1       1        1          1  1         1         1
## 1     1   1      1        1       1        1          1  1         1         1
## 1     1   1      1        1       1        1          1  1         1         1
## 1     1   1      1        1       1        1          1  1         1         1
## 1     1   1      1        1       1        1          1  1         1         1
## 2     1   1      1        1       1        1          1  1         1         1
## 1     1   1      1        1       1        1          1  1         1         1
## 1     1   1      1        1       1        1          1  1         1         1
## 1     1   1      1        1       1        1          1  1         1         1
## 1     1   1      1        1       1        1          1  1         1         1
## 1     1   1      1        1       1        1          1  1         1         1
## 1     1   1      1        1       1        1          1  1         1         1
## 1     1   1      1        1       1        1          1  1         1         1
## 1     1   1      1        1       1        1          1  1         1         1
## 1     1   1      1        1       1        1          1  1         1         1
## 5     1   1      1        1       1        1          1  1         1         1
## 4     1   1      1        1       1        1          1  1         1         1
## 2     1   1      1        1       1        1          1  1         1         1
## 1     1   1      1        1       1        1          1  1         1         1
##       0   0      0        0       0        0          0  0         0         0
##     malignancy meta_tumor aids hd hcc alcohol past_rupture antiplate anticoag
## 216          1          1    1  1   1       1            1         1        1
## 72           1          1    1  1   1       1            1         1        1
## 16           1          1    1  1   1       1            1         1        1
## 5            1          1    1  1   1       1            1         1        1
## 25           1          1    1  1   1       1            1         1        1
## 10           1          1    1  1   1       1            1         1        1
## 9            1          1    1  1   1       1            1         1        1
## 1            1          1    1  1   1       1            1         1        1
## 9            1          1    1  1   1       1            1         1        1
## 2            1          1    1  1   1       1            1         1        1
## 1            1          1    1  1   1       1            1         1        1
## 1            1          1    1  1   1       1            1         1        1
## 2            1          1    1  1   1       1            1         1        1
## 1            1          1    1  1   1       1            1         1        1
## 9            1          1    1  1   1       1            1         1        1
## 1            1          1    1  1   1       1            1         1        1
## 2            1          1    1  1   1       1            1         1        1
## 1            1          1    1  1   1       1            1         1        1
## 2            1          1    1  1   1       1            1         1        1
## 14           1          1    1  1   1       1            1         1        1
## 2            1          1    1  1   1       1            1         1        1
## 3            1          1    1  1   1       1            1         1        1
## 3            1          1    1  1   1       1            1         1        1
## 1            1          1    1  1   1       1            1         1        1
## 7            1          1    1  1   1       1            1         1        1
## 9            1          1    1  1   1       1            1         1        1
## 1            1          1    1  1   1       1            1         1        1
## 1            1          1    1  1   1       1            1         1        1
## 6            1          1    1  1   1       1            1         1        1
## 3            1          1    1  1   1       1            1         1        1
## 2            1          1    1  1   1       1            1         1        1
## 2            1          1    1  1   1       1            1         1        1
## 9            1          1    1  1   1       1            1         1        1
## 3            1          1    1  1   1       1            1         1        1
## 2            1          1    1  1   1       1            1         1        1
## 2            1          1    1  1   1       1            1         1        1
## 2            1          1    1  1   1       1            1         1        1
## 1            1          1    1  1   1       1            1         1        1
## 1            1          1    1  1   1       1            1         1        1
## 1            1          1    1  1   1       1            1         1        1
## 1            1          1    1  1   1       1            1         1        1
## 1            1          1    1  1   1       1            1         1        1
## 1            1          1    1  1   1       1            1         1        1
## 1            1          1    1  1   1       1            1         1        1
## 1            1          1    1  1   1       1            1         1        1
## 2            1          1    1  1   1       1            1         1        1
## 1            1          1    1  1   1       1            1         1        1
## 1            1          1    1  1   1       1            1         1        1
## 3            1          1    1  1   1       1            1         1        1
## 1            1          1    1  1   1       1            1         1        1
## 1            1          1    1  1   1       1            1         1        1
## 1            1          1    1  1   1       1            1         1        1
## 1            1          1    1  1   1       1            1         1        1
## 4            1          1    1  1   1       1            1         1        1
## 1            1          1    1  1   1       1            1         1        1
## 1            1          1    1  1   1       1            1         1        1
## 1            1          1    1  1   1       1            1         1        1
## 1            1          1    1  1   1       1            1         1        1
## 1            1          1    1  1   1       1            1         1        1
## 1            1          1    1  1   1       1            1         1        1
## 1            1          1    1  1   1       1            1         1        1
## 3            1          1    1  1   1       1            1         1        1
## 1            1          1    1  1   1       1            1         1        1
## 1            1          1    1  1   1       1            1         1        1
## 1            1          1    1  1   1       1            1         1        1
## 3            1          1    1  1   1       1            1         1        1
## 1            1          1    1  1   1       1            1         1        1
## 1            1          1    1  1   1       1            1         1        1
## 1            1          1    1  1   1       1            1         1        1
## 1            1          1    1  1   1       1            1         1        1
## 1            1          1    1  1   1       1            1         1        1
## 1            1          1    1  1   1       1            1         1        1
## 2            1          1    1  1   1       1            1         1        1
## 1            1          1    1  1   1       1            1         1        1
## 1            1          1    1  1   1       1            1         1        1
## 1            1          1    1  1   1       1            1         1        1
## 1            1          1    1  1   1       1            1         1        1
## 1            1          1    1  1   1       1            1         1        1
## 1            1          1    1  1   1       1            1         1        1
## 1            1          1    1  1   1       1            1         1        1
## 1            1          1    1  1   1       1            1         1        1
## 1            1          1    1  1   1       1            1         1        1
## 2            1          1    1  1   1       1            1         1        1
## 1            1          1    1  1   1       1            1         1        1
## 1            1          1    1  1   1       1            1         1        1
## 1            1          1    1  1   1       1            1         1        1
## 1            1          1    1  1   1       1            1         1        1
## 1            1          1    1  1   1       1            1         1        1
## 1            1          1    1  1   1       1            1         1        1
## 1            1          1    1  1   1       1            1         1        1
## 1            1          1    1  1   1       1            1         1        1
## 1            1          1    1  1   1       1            1         1        1
## 5            1          1    1  1   1       1            1         1        1
## 4            1          1    1  1   1       1            1         1        1
## 2            1          1    1  1   1       1            1         1        1
## 1            1          1    1  1   1       1            1         1        1
##              0          0    0  0   0       0            0         0        0
##     antithro nsaids steroid beta vaso ffp pc albner hosp_mortality age gcs
## 216        1      1       1    1    1   1  1      1              1   1   1
## 72         1      1       1    1    1   1  1      1              1   1   1
## 16         1      1       1    1    1   1  1      1              1   1   1
## 5          1      1       1    1    1   1  1      1              1   1   1
## 25         1      1       1    1    1   1  1      1              1   1   1
## 10         1      1       1    1    1   1  1      1              1   1   1
## 9          1      1       1    1    1   1  1      1              1   1   1
## 1          1      1       1    1    1   1  1      1              1   1   1
## 9          1      1       1    1    1   1  1      1              1   1   1
## 2          1      1       1    1    1   1  1      1              1   1   1
## 1          1      1       1    1    1   1  1      1              1   1   1
## 1          1      1       1    1    1   1  1      1              1   1   1
## 2          1      1       1    1    1   1  1      1              1   1   1
## 1          1      1       1    1    1   1  1      1              1   1   1
## 9          1      1       1    1    1   1  1      1              1   1   1
## 1          1      1       1    1    1   1  1      1              1   1   1
## 2          1      1       1    1    1   1  1      1              1   1   1
## 1          1      1       1    1    1   1  1      1              1   1   1
## 2          1      1       1    1    1   1  1      1              1   1   1
## 14         1      1       1    1    1   1  1      1              1   1   1
## 2          1      1       1    1    1   1  1      1              1   1   1
## 3          1      1       1    1    1   1  1      1              1   1   1
## 3          1      1       1    1    1   1  1      1              1   1   1
## 1          1      1       1    1    1   1  1      1              1   1   1
## 7          1      1       1    1    1   1  1      1              1   1   1
## 9          1      1       1    1    1   1  1      1              1   1   1
## 1          1      1       1    1    1   1  1      1              1   1   1
## 1          1      1       1    1    1   1  1      1              1   1   1
## 6          1      1       1    1    1   1  1      1              1   1   1
## 3          1      1       1    1    1   1  1      1              1   1   1
## 2          1      1       1    1    1   1  1      1              1   1   1
## 2          1      1       1    1    1   1  1      1              1   1   1
## 9          1      1       1    1    1   1  1      1              1   1   1
## 3          1      1       1    1    1   1  1      1              1   1   1
## 2          1      1       1    1    1   1  1      1              1   1   1
## 2          1      1       1    1    1   1  1      1              1   1   1
## 2          1      1       1    1    1   1  1      1              1   1   1
## 1          1      1       1    1    1   1  1      1              1   1   1
## 1          1      1       1    1    1   1  1      1              1   1   1
## 1          1      1       1    1    1   1  1      1              1   1   1
## 1          1      1       1    1    1   1  1      1              1   1   1
## 1          1      1       1    1    1   1  1      1              1   1   1
## 1          1      1       1    1    1   1  1      1              1   1   1
## 1          1      1       1    1    1   1  1      1              1   1   1
## 1          1      1       1    1    1   1  1      1              1   1   1
## 2          1      1       1    1    1   1  1      1              1   1   1
## 1          1      1       1    1    1   1  1      1              1   1   1
## 1          1      1       1    1    1   1  1      1              1   1   1
## 3          1      1       1    1    1   1  1      1              1   1   1
## 1          1      1       1    1    1   1  1      1              1   1   1
## 1          1      1       1    1    1   1  1      1              1   1   1
## 1          1      1       1    1    1   1  1      1              1   1   1
## 1          1      1       1    1    1   1  1      1              1   1   1
## 4          1      1       1    1    1   1  1      1              1   1   1
## 1          1      1       1    1    1   1  1      1              1   1   1
## 1          1      1       1    1    1   1  1      1              1   1   1
## 1          1      1       1    1    1   1  1      1              1   1   1
## 1          1      1       1    1    1   1  1      1              1   1   1
## 1          1      1       1    1    1   1  1      1              1   1   1
## 1          1      1       1    1    1   1  1      1              1   1   1
## 1          1      1       1    1    1   1  1      1              1   1   1
## 3          1      1       1    1    1   1  1      1              1   1   1
## 1          1      1       1    1    1   1  1      1              1   1   1
## 1          1      1       1    1    1   1  1      1              1   1   1
## 1          1      1       1    1    1   1  1      1              1   1   1
## 3          1      1       1    1    1   1  1      1              1   1   1
## 1          1      1       1    1    1   1  1      1              1   1   1
## 1          1      1       1    1    1   1  1      1              1   1   1
## 1          1      1       1    1    1   1  1      1              1   1   1
## 1          1      1       1    1    1   1  1      1              1   1   1
## 1          1      1       1    1    1   1  1      1              1   1   1
## 1          1      1       1    1    1   1  1      1              1   1   1
## 2          1      1       1    1    1   1  1      1              1   1   1
## 1          1      1       1    1    1   1  1      1              1   1   1
## 1          1      1       1    1    1   1  1      1              1   1   1
## 1          1      1       1    1    1   1  1      1              1   1   1
## 1          1      1       1    1    1   1  1      1              1   1   1
## 1          1      1       1    1    1   1  1      1              1   1   1
## 1          1      1       1    1    1   1  1      1              1   1   1
## 1          1      1       1    1    1   1  1      1              1   1   1
## 1          1      1       1    1    1   1  1      1              1   1   1
## 1          1      1       1    1    1   1  1      1              1   1   1
## 2          1      1       1    1    1   1  1      1              1   1   1
## 1          1      1       1    1    1   1  1      1              1   1   1
## 1          1      1       1    1    1   1  1      1              1   1   1
## 1          1      1       1    1    1   1  1      1              1   1   1
## 1          1      1       1    1    1   1  1      1              1   1   1
## 1          1      1       1    1    1   1  1      1              1   1   1
## 1          1      1       1    1    1   1  1      1              1   1   1
## 1          1      1       1    1    1   1  1      1              1   1   1
## 1          1      1       1    1    1   1  1      1              1   1   1
## 1          1      1       1    1    1   1  1      1              1   1   1
## 5          1      1       1    1    1   1  1      1              1   1   1
## 4          1      1       1    1    1   1  1      1              1   1   1
## 2          1      1       1    1    1   1  1      1              1   1   1
## 1          1      1       1    1    1   1  1      1              1   1   1
##            0      0       0    0    0   0  0      0              0   0   0
##     cci_num map los wbc hb plt eGFR30 eGFR bun ast alt cre sBP dBP shock hr bil
## 216       1   1   1   1  1   1      1    1   1   1   1   1   1   1     1  1   1
## 72        1   1   1   1  1   1      1    1   1   1   1   1   1   1     1  1   1
## 16        1   1   1   1  1   1      1    1   1   1   1   1   1   1     1  1   1
## 5         1   1   1   1  1   1      1    1   1   1   1   1   1   1     1  1   1
## 25        1   1   1   1  1   1      1    1   1   1   1   1   1   1     1  1   1
## 10        1   1   1   1  1   1      1    1   1   1   1   1   1   1     1  1   1
## 9         1   1   1   1  1   1      1    1   1   1   1   1   1   1     1  1   1
## 1         1   1   1   1  1   1      1    1   1   1   1   1   1   1     1  1   1
## 9         1   1   1   1  1   1      1    1   1   1   1   1   1   1     1  1   1
## 2         1   1   1   1  1   1      1    1   1   1   1   1   1   1     1  1   1
## 1         1   1   1   1  1   1      1    1   1   1   1   1   1   1     1  1   1
## 1         1   1   1   1  1   1      1    1   1   1   1   1   1   1     1  1   1
## 2         1   1   1   1  1   1      1    1   1   1   1   1   1   1     1  1   1
## 1         1   1   1   1  1   1      1    1   1   1   1   1   1   1     1  1   1
## 9         1   1   1   1  1   1      1    1   1   1   1   1   1   1     1  1   1
## 1         1   1   1   1  1   1      1    1   1   1   1   1   1   1     1  1   1
## 2         1   1   1   1  1   1      1    1   1   1   1   1   1   1     1  1   1
## 1         1   1   1   1  1   1      1    1   1   1   1   1   1   1     1  1   1
## 2         1   1   1   1  1   1      1    1   1   1   1   1   1   1     1  1   1
## 14        1   1   1   1  1   1      1    1   1   1   1   1   1   1     1  1   1
## 2         1   1   1   1  1   1      1    1   1   1   1   1   1   1     1  1   1
## 3         1   1   1   1  1   1      1    1   1   1   1   1   1   1     1  1   1
## 3         1   1   1   1  1   1      1    1   1   1   1   1   1   1     1  1   1
## 1         1   1   1   1  1   1      1    1   1   1   1   1   1   1     1  1   1
## 7         1   1   1   1  1   1      1    1   1   1   1   1   1   1     1  1   1
## 9         1   1   1   1  1   1      1    1   1   1   1   1   1   1     1  1   1
## 1         1   1   1   1  1   1      1    1   1   1   1   1   1   1     1  1   1
## 1         1   1   1   1  1   1      1    1   1   1   1   1   1   1     1  1   1
## 6         1   1   1   1  1   1      1    1   1   1   1   1   1   1     1  1   1
## 3         1   1   1   1  1   1      1    1   1   1   1   1   1   1     1  1   1
## 2         1   1   1   1  1   1      1    1   1   1   1   1   1   1     1  1   1
## 2         1   1   1   1  1   1      1    1   1   1   1   1   1   1     1  1   1
## 9         1   1   1   1  1   1      1    1   1   1   1   1   1   1     1  1   1
## 3         1   1   1   1  1   1      1    1   1   1   1   1   1   1     1  1   1
## 2         1   1   1   1  1   1      1    1   1   1   1   1   1   1     1  1   1
## 2         1   1   1   1  1   1      1    1   1   1   1   1   1   1     1  1   1
## 2         1   1   1   1  1   1      1    1   1   1   1   1   1   1     1  1   1
## 1         1   1   1   1  1   1      1    1   1   1   1   1   1   1     1  1   1
## 1         1   1   1   1  1   1      1    1   1   1   1   1   1   1     1  1   1
## 1         1   1   1   1  1   1      1    1   1   1   1   1   1   1     1  1   1
## 1         1   1   1   1  1   1      1    1   1   1   1   1   1   1     1  1   1
## 1         1   1   1   1  1   1      1    1   1   1   1   1   1   1     1  1   1
## 1         1   1   1   1  1   1      1    1   1   1   1   1   1   1     1  1   1
## 1         1   1   1   1  1   1      1    1   1   1   1   1   1   1     1  1   1
## 1         1   1   1   1  1   1      1    1   1   1   1   1   1   1     1  1   1
## 2         1   1   1   1  1   1      1    1   1   1   1   1   1   1     1  1   1
## 1         1   1   1   1  1   1      1    1   1   1   1   1   1   1     1  1   1
## 1         1   1   1   1  1   1      1    1   1   1   1   1   1   1     1  1   1
## 3         1   1   1   1  1   1      1    1   1   1   1   1   1   1     1  1   1
## 1         1   1   1   1  1   1      1    1   1   1   1   1   1   1     1  1   1
## 1         1   1   1   1  1   1      1    1   1   1   1   1   1   1     1  1   1
## 1         1   1   1   1  1   1      1    1   1   1   1   1   1   1     1  1   1
## 1         1   1   1   1  1   1      1    1   1   1   1   1   1   1     1  1   1
## 4         1   1   1   1  1   1      1    1   1   1   1   1   1   1     1  1   1
## 1         1   1   1   1  1   1      1    1   1   1   1   1   1   1     1  1   1
## 1         1   1   1   1  1   1      1    1   1   1   1   1   1   1     1  1   1
## 1         1   1   1   1  1   1      1    1   1   1   1   1   1   1     1  1   1
## 1         1   1   1   1  1   1      1    1   1   1   1   1   1   1     1  1   1
## 1         1   1   1   1  1   1      1    1   1   1   1   1   1   1     1  1   1
## 1         1   1   1   1  1   1      1    1   1   1   1   1   1   1     1  1   1
## 1         1   1   1   1  1   1      1    1   1   1   1   1   1   1     1  1   0
## 3         1   1   1   1  1   1      1    1   1   1   1   1   1   1     1  1   0
## 1         1   1   1   1  1   1      1    1   1   1   1   1   1   1     1  1   0
## 1         1   1   1   1  1   1      1    1   1   1   1   1   1   1     0  0   1
## 1         1   1   1   1  1   1      1    1   1   1   1   1   1   1     0  0   1
## 3         1   1   1   1  1   1      1    1   1   1   1   1   0   0     0  0   1
## 1         1   1   1   1  1   1      1    1   1   1   1   1   0   0     0  0   1
## 1         1   1   1   1  1   1      1    1   1   1   1   1   0   0     0  0   1
## 1         1   1   1   1  1   1      1    1   1   1   1   1   0   0     0  0   1
## 1         1   1   1   1  1   1      1    1   1   1   1   1   0   0     0  0   1
## 1         1   1   1   1  1   1      1    1   1   1   1   1   0   0     0  0   1
## 1         1   1   1   1  1   1      1    1   1   1   1   1   0   0     0  0   1
## 2         1   1   1   1  1   1      1    1   1   1   1   1   0   0     0  0   1
## 1         1   1   1   1  1   1      1    1   1   1   1   1   0   0     0  0   1
## 1         1   1   1   1  1   1      1    1   1   1   1   1   0   0     0  0   1
## 1         1   1   1   1  1   1      1    1   1   1   1   1   0   0     0  0   1
## 1         1   1   1   1  1   1      1    1   1   1   1   1   0   0     0  0   0
## 1         1   1   1   1  1   1      1    1   1   1   1   1   0   0     0  0   0
## 1         1   1   1   1  1   1      1    1   1   1   1   1   0   0     0  0   0
## 1         1   1   1   1  1   1      1    1   1   1   1   1   0   0     0  0   0
## 1         1   1   1   1  1   1      1    1   1   1   1   1   0   0     0  0   0
## 1         1   1   1   1  1   1      1    1   1   1   1   0   1   1     1  1   1
## 2         1   1   1   1  1   1      1    1   1   1   1   0   1   1     1  1   1
## 1         1   1   1   1  1   1      1    1   1   1   1   0   1   1     1  1   1
## 1         1   1   1   1  1   1      1    1   1   0   0   1   1   1     1  1   0
## 1         1   1   1   1  1   1      1    1   1   0   0   1   1   1     1  1   0
## 1         1   1   1   1  1   1      1    1   1   0   0   1   1   1     1  1   0
## 1         1   1   1   1  1   1      1    1   1   0   0   1   1   1     1  1   0
## 1         1   1   1   1  1   1      1    1   0   0   0   1   1   1     1  1   0
## 1         1   1   1   1  1   1      0    0   1   1   1   0   1   1     1  1   1
## 1         1   1   1   1  1   1      0    0   0   0   0   0   1   1     1  1   0
## 1         1   1   1   0  0   0      1    1   1   1   1   1   1   1     1  1   1
## 5         1   1   1   0  0   0      0    0   0   0   0   0   1   1     1  1   0
## 4         1   1   1   0  0   0      0    0   0   0   0   0   1   1     1  1   0
## 2         1   1   1   0  0   0      0    0   0   0   0   0   0   0     0  0   0
## 1         1   1   1   0  0   0      0    0   0   0   0   0   0   0     0  0   0
##           0   0   0  13 13  13     14   14  14  18  18  18  22  22    24 24  28
##     bt alb crp pt tp child_score aptt child_num smoke bmi barthel    
## 216  1   1   1  1  1           1    1         1     1   1       1   0
## 72   1   1   1  1  1           1    1         1     1   1       0   1
## 16   1   1   1  1  1           1    1         1     1   0       1   1
## 5    1   1   1  1  1           1    1         1     1   0       0   2
## 25   1   1   1  1  1           1    1         1     0   1       1   1
## 10   1   1   1  1  1           1    1         1     0   1       0   2
## 9    1   1   1  1  1           1    1         1     0   0       1   2
## 1    1   1   1  1  1           1    1         1     0   0       0   3
## 9    1   1   1  1  1           1    1         0     1   1       1   1
## 2    1   1   1  1  1           1    1         0     1   1       0   2
## 1    1   1   1  1  1           1    1         0     1   0       1   2
## 1    1   1   1  1  1           1    1         0     1   0       0   3
## 2    1   1   1  1  1           1    1         0     0   0       1   3
## 1    1   1   1  1  1           1    1         0     0   0       0   4
## 9    1   1   1  1  1           1    0         1     1   1       1   1
## 1    1   1   1  1  1           1    0         1     1   1       0   2
## 2    1   1   1  1  1           1    0         1     0   1       1   2
## 1    1   1   1  1  1           1    0         1     0   1       0   3
## 2    1   1   1  1  1           1    0         1     0   0       1   3
## 14   1   1   1  1  1           0    1         0     1   1       1   2
## 2    1   1   1  1  1           0    1         0     1   1       0   3
## 3    1   1   1  1  1           0    1         0     1   0       1   3
## 3    1   1   1  1  1           0    1         0     1   0       0   4
## 1    1   1   1  1  1           0    1         0     0   1       1   3
## 7    1   1   1  1  1           0    1         0     0   0       1   4
## 9    1   1   1  1  1           0    1         0     0   0       0   5
## 1    1   1   1  1  1           0    0         0     1   1       0   4
## 1    1   1   1  1  1           0    0         0     0   0       1   5
## 6    1   1   1  1  0           1    1         1     1   1       1   1
## 3    1   1   1  1  0           1    1         1     1   1       0   2
## 2    1   1   1  1  0           1    1         1     1   0       1   2
## 2    1   1   1  1  0           1    0         1     1   1       1   2
## 9    1   1   1  0  1           1    0         1     1   1       1   2
## 3    1   1   1  0  1           1    0         1     1   1       0   3
## 2    1   1   1  0  1           0    0         0     1   1       1   4
## 2    1   1   0  1  1           1    1         1     1   1       1   1
## 2    1   1   0  1  1           1    1         1     1   1       0   2
## 1    1   1   0  1  1           1    1         1     1   0       1   2
## 1    1   1   0  1  1           1    1         1     0   1       1   2
## 1    1   1   0  1  1           1    1         1     0   0       1   3
## 1    1   1   0  1  1           1    1         1     0   0       0   4
## 1    1   1   0  1  1           1    0         1     0   0       1   4
## 1    1   1   0  1  1           0    1         0     1   0       0   5
## 1    1   1   0  1  1           0    0         0     0   0       0   7
## 1    1   1   0  1  0           1    1         1     1   1       1   2
## 2    1   1   0  0  1           1    0         1     1   1       1   3
## 1    1   0   1  1  1           1    1         1     1   1       0   2
## 1    1   0   1  1  1           1    1         1     0   1       0   3
## 3    1   0   1  1  0           1    1         1     1   1       1   2
## 1    1   0   1  0  1           1    0         1     1   1       0   4
## 1    1   0   1  0  1           0    0         0     1   1       1   5
## 1    1   0   1  0  0           1    0         1     1   1       1   4
## 1    1   0   0  0  0           1    0         1     1   1       1   5
## 4    0   1   1  1  1           1    1         1     1   1       0   2
## 1    0   1   1  1  1           1    1         1     0   1       1   2
## 1    0   1   1  1  1           1    1         1     0   1       0   3
## 1    0   1   1  1  1           1    1         0     1   1       1   2
## 1    0   1   1  1  1           1    1         0     0   0       0   5
## 1    0   1   0  1  1           0    1         0     1   1       0   5
## 1    0   0   1  1  1           1    1         1     0   0       1   4
## 1    1   1   1  1  1           1    1         1     1   0       1   2
## 3    1   1   1  1  1           0    1         0     1   1       1   3
## 1    1   0   0  1  0           1    1         1     0   0       1   6
## 1    1   1   1  1  1           1    1         1     1   1       0   3
## 1    1   1   1  1  1           0    1         0     1   1       1   4
## 3    0   1   1  1  1           1    1         1     1   1       0   6
## 1    0   1   1  1  1           0    1         0     0   0       1   9
## 1    0   1   1  1  0           1    1         1     1   0       1   7
## 1    0   1   0  1  1           1    1         1     1   1       1   6
## 1    0   1   0  1  0           1    1         1     1   1       1   7
## 1    0   0   1  1  0           1    1         1     1   1       1   7
## 1    0   0   1  1  0           1    1         1     1   1       0   8
## 2    0   0   1  1  0           1    1         1     1   0       1   8
## 1    0   0   1  1  0           1    1         1     1   0       0   9
## 1    0   0   1  1  0           0    1         0     1   0       1  10
## 1    0   0   1  0  0           1    0         1     1   1       1   9
## 1    0   1   1  1  1           1    1         0     1   1       1   7
## 1    0   1   1  1  1           1    0         1     1   1       1   7
## 1    0   1   1  1  1           0    1         0     1   1       1   8
## 1    0   1   1  0  1           1    0         1     1   0       1   9
## 1    0   1   0  0  1           1    0         1     1   1       1   9
## 1    1   1   1  1  1           1    1         1     1   1       1   1
## 2    1   1   1  1  1           1    1         1     1   0       1   2
## 1    1   1   1  1  1           0    1         0     1   1       1   3
## 1    1   1   1  0  1           1    0         1     1   1       0   6
## 1    1   1   0  0  1           1    0         1     1   0       0   8
## 1    1   0   0  1  0           0    0         0     0   0       0  12
## 1    1   0   0  0  0           1    0         1     1   1       1   8
## 1    1   0   0  1  0           0    0         0     1   1       1  10
## 1    1   1   0  1  1           1    1         1     1   0       1   5
## 1    1   0   1  0  0           1    0         1     1   1       0  12
## 1    1   1   1  1  1           1    1         1     1   1       1   3
## 5    1   0   0  0  0           1    0         1     1   1       1  15
## 4    1   0   0  0  0           0    0         0     1   1       1  17
## 2    0   0   0  0  0           1    0         1     1   1       1  20
## 1    0   0   0  0  0           0    0         0     1   1       1  22
##     32  34  37 39 45          62   63        81    84  85     140 957
imp.mf <- missForest(data_for_imp,
                     maxiter = 10,
                     ntree = 100,
                     mtry = floor(sqrt(ncol(data_for_imp))),
                     parallelize = "variables",
                     verbose = TRUE)
##   parallelizing over the variables of the input data matrix 'xmis'
##   missForest iteration 1 in progress...
## randomForest 4.7-1.1
## Type rfNews() to see new features/changes/bug fixes.
## 
##  次のパッケージを付け加えます: 'randomForest'
##  以下のオブジェクトは 'package:dplyr' からマスクされています:
## 
##     combine
##  以下のオブジェクトは 'package:ggplot2' からマスクされています:
## 
##     margin
##  要求されたパッケージ rngtools をロード中です
## done!
##     estimated error(s): 0.3973894 0.02092398 
##     difference(s): 0.0005230651 0.00623834 
##     time: 1.132 seconds
## 
##   missForest iteration 2 in progress...done!
##     estimated error(s): 0.3955067 0.01939846 
##     difference(s): 0.0001509834 0.001399254 
##     time: 1.068 seconds
## 
##   missForest iteration 3 in progress...done!
##     estimated error(s): 0.3945409 0.01947556 
##     difference(s): 6.172943e-05 0.001690765 
##     time: 1.154 seconds
## 
##   missForest iteration 4 in progress...done!
##     estimated error(s): 0.402882 0.01925298 
##     difference(s): 7.942041e-05 0.001340951 
##     time: 1.102 seconds
## 
##   missForest iteration 5 in progress...done!
##     estimated error(s): 0.3981342 0.01953794 
##     difference(s): 7.829139e-05 0.002273787 
##     time: 1.071 seconds
## 
##   missForest iteration 6 in progress...done!
##     estimated error(s): 0.4042122 0.0200077 
##     difference(s): 8.352589e-05 0.00285681 
##     time: 1.14 seconds
summary(imp.mf$ximp)
##  sex     barthel child_score pad     stroke  dimentia ch_lung rheumati
##  M:405   0:188   0: 66       0:535   0:526   0:532    0:525   0:533   
##  F:131   1:167   1:289       1:  1   1: 10   1:  4    1: 11   1:  3   
##          2:181   2:181                                                
##                                                                       
##                                                                       
##                                                                       
##  pept_ulcer dm      dm_compli paralysis malignancy meta_tumor aids    eGFR30 
##  0:477      0:430   0:530     0:536     0:478      0:530      0:536   0:485  
##  1: 59      1:106   1:  6               1: 58      1:  6              1: 51  
##                                                                              
##                                                                              
##                                                                              
##                                                                              
##  hd      hcc     alcohol past_rupture antiplate anticoag antithro nsaids 
##  0:533   0:425   0:305   0:432        0:532     0:535    0:532    0:531  
##  1:  3   1:111   1:231   1:104        1:  4     1:  1    1:  4    1:  5  
##                                                                          
##                                                                          
##                                                                          
##                                                                          
##  steroid beta    vaso    ffp     pc      albner  shock   hosp_mortality
##  0:534   0:518   0:505   0:399   0:530   0:489   0:318   0:463         
##  1:  2   1: 18   1: 31   1:137   1:  6   1: 47   1:218   1: 73         
##                                                                        
##                                                                        
##                                                                        
##                                                                        
##       age             bmi            smoke          child_num     
##  Min.   :26.00   Min.   :13.72   Min.   :   0.0   Min.   : 5.000  
##  1st Qu.:51.75   1st Qu.:20.81   1st Qu.:   0.0   1st Qu.: 7.205  
##  Median :62.00   Median :22.64   Median : 200.0   Median : 8.265  
##  Mean   :61.23   Mean   :22.88   Mean   : 299.5   Mean   : 8.797  
##  3rd Qu.:70.00   3rd Qu.:24.37   3rd Qu.: 410.0   3rd Qu.:10.000  
##  Max.   :90.00   Max.   :33.95   Max.   :9000.0   Max.   :15.000  
##       gcs           cci_num            map               bt       
##  Min.   : 3.00   Min.   : 3.000   Min.   : 0.000   Min.   :32.80  
##  1st Qu.:15.00   1st Qu.: 4.000   1st Qu.: 0.000   1st Qu.:36.30  
##  Median :15.00   Median : 4.000   Median : 4.000   Median :36.70  
##  Mean   :14.33   Mean   : 4.444   Mean   : 3.493   Mean   :36.65  
##  3rd Qu.:15.00   3rd Qu.: 5.000   3rd Qu.: 4.000   3rd Qu.:37.00  
##  Max.   :15.00   Max.   :12.000   Max.   :68.000   Max.   :41.70  
##       sBP              dBP              hr              bil        
##  Min.   : 50.00   Min.   : 3.00   Min.   : 39.00   Min.   : 0.200  
##  1st Qu.: 79.75   1st Qu.:44.00   1st Qu.: 71.00   1st Qu.: 0.976  
##  Median : 90.00   Median :51.00   Median : 82.00   Median : 1.457  
##  Mean   : 87.55   Mean   :51.12   Mean   : 85.26   Mean   : 2.191  
##  3rd Qu.: 96.00   3rd Qu.:58.01   3rd Qu.: 97.25   3rd Qu.: 2.678  
##  Max.   :150.00   Max.   :93.00   Max.   :222.00   Max.   :15.800  
##       ast              alt              wbc              hb        
##  Min.   : 10.00   Min.   :  7.00   Min.   : 1300   Min.   : 2.800  
##  1st Qu.: 34.00   1st Qu.: 20.00   1st Qu.: 5400   1st Qu.: 7.000  
##  Median : 57.00   Median : 30.00   Median : 7270   Median : 8.400  
##  Mean   : 85.18   Mean   : 43.39   Mean   : 8314   Mean   : 8.588  
##  3rd Qu.: 96.00   3rd Qu.: 47.00   3rd Qu.:10300   3rd Qu.:10.100  
##  Max.   :984.00   Max.   :562.00   Max.   :37700   Max.   :16.500  
##       plt              tp            alb             eGFR        
##  Min.   : 21.0   Min.   :2.20   Min.   :1.100   Min.   :  4.274  
##  1st Qu.: 73.0   1st Qu.:5.50   1st Qu.:2.500   1st Qu.: 48.745  
##  Median : 98.0   Median :6.20   Median :2.881   Median : 67.634  
##  Mean   :110.7   Mean   :6.11   Mean   :2.831   Mean   : 69.269  
##  3rd Qu.:134.0   3rd Qu.:6.70   3rd Qu.:3.200   3rd Qu.: 88.284  
##  Max.   :482.0   Max.   :9.10   Max.   :4.400   Max.   :196.029  
##       bun              cre               crp                pt        
##  Min.   :  2.90   Min.   : 0.3300   Min.   : 0.0000   Min.   :  7.50  
##  1st Qu.: 16.10   1st Qu.: 0.6700   1st Qu.: 0.1200   1st Qu.: 40.58  
##  Median : 23.00   Median : 0.8413   Median : 0.3200   Median : 52.65  
##  Mean   : 28.28   Mean   : 1.0575   Mean   : 0.7937   Mean   : 52.25  
##  3rd Qu.: 35.62   3rd Qu.: 1.1400   3rd Qu.: 0.7865   3rd Qu.: 63.52  
##  Max.   :124.80   Max.   :10.9500   Max.   :18.6370   Max.   :107.90  
##       aptt             los        
##  Min.   : 17.60   Min.   :  0.00  
##  1st Qu.: 26.40   1st Qu.:  6.00  
##  Median : 28.80   Median : 10.00  
##  Mean   : 32.29   Mean   : 13.77  
##  3rd Qu.: 33.23   3rd Qu.: 18.00  
##  Max.   :240.00   Max.   :159.00
md.pattern(imp.mf$ximp)
##  /\     /\
## {  `---'  }
## {  O   O  }
## ==>  V <==  No need for mice. This data set is completely observed.
##  \  \|/  /
##   `-----'

##     sex barthel child_score pad stroke dimentia ch_lung rheumati pept_ulcer dm
## 536   1       1           1   1      1        1       1        1          1  1
##       0       0           0   0      0        0       0        0          0  0
##     dm_compli paralysis malignancy meta_tumor aids eGFR30 hd hcc alcohol
## 536         1         1          1          1    1      1  1   1       1
##             0         0          0          0    0      0  0   0       0
##     past_rupture antiplate anticoag antithro nsaids steroid beta vaso ffp pc
## 536            1         1        1        1      1       1    1    1   1  1
##                0         0        0        0      0       0    0    0   0  0
##     albner shock hosp_mortality age bmi smoke child_num gcs cci_num map bt sBP
## 536      1     1              1   1   1     1         1   1       1   1  1   1
##          0     0              0   0   0     0         0   0       0   0  0   0
##     dBP hr bil ast alt wbc hb plt tp alb eGFR bun cre crp pt aptt los  
## 536   1  1   1   1   1   1  1   1  1   1    1   1   1   1  1    1   1 0
##       0  0   0   0   0   0  0   0  0   0    0   0   0   0  0    0   0 0
#if exclude ID is necessary add the ID
data_imp <- imp.mf$ximp
data_imp$pt_id <- df_dev$pt_id
data_imp$hosp_num<- df_dev$hosp_num
data_imp$hosp_id<- df_dev$hosp_id
data_imp$year<- df_dev$year
#check
str(data_imp)
## 'data.frame':    536 obs. of  62 variables:
##  $ sex           : Factor w/ 2 levels "M","F": 1 1 2 1 2 1 2 1 2 1 ...
##  $ barthel       : Factor w/ 3 levels "0","1","2": 3 3 1 2 3 1 1 1 3 3 ...
##  $ child_score   : Factor w/ 3 levels "0","1","2": 3 1 2 2 2 2 2 2 3 3 ...
##  $ pad           : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ stroke        : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ dimentia      : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ ch_lung       : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ rheumati      : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ pept_ulcer    : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ dm            : Factor w/ 2 levels "0","1": 1 2 1 1 1 1 1 1 1 1 ...
##  $ dm_compli     : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ paralysis     : Factor w/ 1 level "0": 1 1 1 1 1 1 1 1 1 1 ...
##  $ malignancy    : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ meta_tumor    : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ aids          : Factor w/ 1 level "0": 1 1 1 1 1 1 1 1 1 1 ...
##  $ eGFR30        : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 2 1 ...
##  $ hd            : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ hcc           : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ alcohol       : Factor w/ 2 levels "0","1": 2 1 1 1 1 1 2 2 1 1 ...
##  $ past_rupture  : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ antiplate     : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ anticoag      : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ antithro      : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ nsaids        : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ steroid       : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ beta          : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ vaso          : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 2 1 ...
##  $ ffp           : Factor w/ 2 levels "0","1": 1 2 1 1 2 2 1 2 2 2 ...
##  $ pc            : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ albner        : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 2 1 ...
##  $ shock         : Factor w/ 2 levels "0","1": 2 1 2 2 2 2 1 2 2 2 ...
##  $ hosp_mortality: Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 2 2 ...
##  $ age           : num  50 80 59 44 67 65 49 73 69 62 ...
##  $ bmi           : num  23.1 25.3 23 14.5 22.5 ...
##  $ smoke         : num  0 0 0 240 0 ...
##  $ child_num     : num  11 6 8.64 8 7.83 ...
##  $ gcs           : num  15 15 15 15 15 15 15 15 6 15 ...
##  $ cci_num       : num  4 4 3 4 4 3 4 4 4 4 ...
##  $ map           : num  0 2 0 6 2 2 0 4 14 6 ...
##  $ bt            : num  36.4 36.8 35.9 36 36.6 38.4 37 37 35.5 36.6 ...
##  $ sBP           : num  78 88 100 69 66 84 132 90 52 58 ...
##  $ dBP           : num  48 49 56 44 40 46 69 54 37 37 ...
##  $ hr            : num  118 72 110 104 72 127 83 114 98 106 ...
##  $ bil           : num  2.2 1.2 3.1 3.4 1.2 2.4 1.2 2.2 8.7 2.1 ...
##  $ ast           : num  217 31 60 129 52 90 154 55 96 121 ...
##  $ alt           : num  63 22 40 46 36 19 109 20 87 63 ...
##  $ wbc           : num  7400 5000 7800 9100 3900 8000 7900 11100 12800 8300 ...
##  $ hb            : num  6.9 10.8 9.7 10.7 6.3 9.8 13.5 5.6 6 9.8 ...
##  $ plt           : num  115 77 74 162 63 93 132 124 168 84 ...
##  $ tp            : num  6.3 5.6 6.4 6.1 5.1 7.2 7.6 4.9 5.3 6.3 ...
##  $ alb           : num  2.2 3.2 2.8 2.9 2.8 3.2 4 2.3 1.2 2.5 ...
##  $ eGFR          : num  58 57.7 63.7 112.4 123.6 ...
##  $ bun           : num  13.2 41.5 26.3 2.9 27.4 15.8 15.5 27 63.2 13.8 ...
##  $ cre           : num  1.08 0.96 0.72 0.61 0.38 0.44 0.4 0.97 2.07 0.87 ...
##  $ crp           : num  0.92 0.29 0.68 0.29 0.29 ...
##  $ pt            : num  37.8 55 46.7 37.8 74.6 45.9 49.7 45.9 37.2 54 ...
##  $ aptt          : num  29.4 29 27.2 35.3 30.1 27.6 32.6 27.5 36 29.1 ...
##  $ los           : num  12 7 0 10 3 2 1 8 0 16 ...
##  $ pt_id         : int  1 2 3 4 5 7 8 9 10 11 ...
##  $ hosp_num      : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ hosp_id       : int  1001 1001 1001 1001 1001 1001 1001 1001 1001 1001 ...
##  $ year          : int  2012 2011 2010 2011 2010 2010 2010 2010 2011 2012 ...
#write the csv
#write.csv(imp.mf$ximp, file = "data_after_imputation.csv")

データの確認

str(data_imp)
## 'data.frame':    536 obs. of  62 variables:
##  $ sex           : Factor w/ 2 levels "M","F": 1 1 2 1 2 1 2 1 2 1 ...
##  $ barthel       : Factor w/ 3 levels "0","1","2": 3 3 1 2 3 1 1 1 3 3 ...
##  $ child_score   : Factor w/ 3 levels "0","1","2": 3 1 2 2 2 2 2 2 3 3 ...
##  $ pad           : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ stroke        : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ dimentia      : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ ch_lung       : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ rheumati      : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ pept_ulcer    : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ dm            : Factor w/ 2 levels "0","1": 1 2 1 1 1 1 1 1 1 1 ...
##  $ dm_compli     : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ paralysis     : Factor w/ 1 level "0": 1 1 1 1 1 1 1 1 1 1 ...
##  $ malignancy    : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ meta_tumor    : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ aids          : Factor w/ 1 level "0": 1 1 1 1 1 1 1 1 1 1 ...
##  $ eGFR30        : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 2 1 ...
##  $ hd            : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ hcc           : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ alcohol       : Factor w/ 2 levels "0","1": 2 1 1 1 1 1 2 2 1 1 ...
##  $ past_rupture  : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ antiplate     : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ anticoag      : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ antithro      : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ nsaids        : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ steroid       : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ beta          : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ vaso          : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 2 1 ...
##  $ ffp           : Factor w/ 2 levels "0","1": 1 2 1 1 2 2 1 2 2 2 ...
##  $ pc            : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ albner        : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 2 1 ...
##  $ shock         : Factor w/ 2 levels "0","1": 2 1 2 2 2 2 1 2 2 2 ...
##  $ hosp_mortality: Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 2 2 ...
##  $ age           : num  50 80 59 44 67 65 49 73 69 62 ...
##  $ bmi           : num  23.1 25.3 23 14.5 22.5 ...
##  $ smoke         : num  0 0 0 240 0 ...
##  $ child_num     : num  11 6 8.64 8 7.83 ...
##  $ gcs           : num  15 15 15 15 15 15 15 15 6 15 ...
##  $ cci_num       : num  4 4 3 4 4 3 4 4 4 4 ...
##  $ map           : num  0 2 0 6 2 2 0 4 14 6 ...
##  $ bt            : num  36.4 36.8 35.9 36 36.6 38.4 37 37 35.5 36.6 ...
##  $ sBP           : num  78 88 100 69 66 84 132 90 52 58 ...
##  $ dBP           : num  48 49 56 44 40 46 69 54 37 37 ...
##  $ hr            : num  118 72 110 104 72 127 83 114 98 106 ...
##  $ bil           : num  2.2 1.2 3.1 3.4 1.2 2.4 1.2 2.2 8.7 2.1 ...
##  $ ast           : num  217 31 60 129 52 90 154 55 96 121 ...
##  $ alt           : num  63 22 40 46 36 19 109 20 87 63 ...
##  $ wbc           : num  7400 5000 7800 9100 3900 8000 7900 11100 12800 8300 ...
##  $ hb            : num  6.9 10.8 9.7 10.7 6.3 9.8 13.5 5.6 6 9.8 ...
##  $ plt           : num  115 77 74 162 63 93 132 124 168 84 ...
##  $ tp            : num  6.3 5.6 6.4 6.1 5.1 7.2 7.6 4.9 5.3 6.3 ...
##  $ alb           : num  2.2 3.2 2.8 2.9 2.8 3.2 4 2.3 1.2 2.5 ...
##  $ eGFR          : num  58 57.7 63.7 112.4 123.6 ...
##  $ bun           : num  13.2 41.5 26.3 2.9 27.4 15.8 15.5 27 63.2 13.8 ...
##  $ cre           : num  1.08 0.96 0.72 0.61 0.38 0.44 0.4 0.97 2.07 0.87 ...
##  $ crp           : num  0.92 0.29 0.68 0.29 0.29 ...
##  $ pt            : num  37.8 55 46.7 37.8 74.6 45.9 49.7 45.9 37.2 54 ...
##  $ aptt          : num  29.4 29 27.2 35.3 30.1 27.6 32.6 27.5 36 29.1 ...
##  $ los           : num  12 7 0 10 3 2 1 8 0 16 ...
##  $ pt_id         : int  1 2 3 4 5 7 8 9 10 11 ...
##  $ hosp_num      : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ hosp_id       : int  1001 1001 1001 1001 1001 1001 1001 1001 1001 1001 ...
##  $ year          : int  2012 2011 2010 2011 2010 2010 2010 2010 2011 2012 ...

データclass調整

dev_imp <-
  data_imp|>  
  mutate(
        hosp_id=as.integer(hosp_id),
        pt_id=as.integer(pt_id),
        hosp_num=as.integer(hosp_num),
        year=as.integer(year),
        age=as.integer(age),
        sex= factor(sex, levels = c("M", "F")),
        smoke= as.integer(smoke),
        barthel= factor(barthel, levels = c("0", "1", "2")),
        child_num= as.integer(round(data_imp$child_num)),
        child_score=factor(child_score, levels = c("0", "1", "2")),
        gcs=as.integer(gcs),
        cci_num=as.integer(cci_num),
        pad=factor(pad),
        stroke=factor(stroke),
        dimentia=factor(dimentia),
        ch_lung=factor(ch_lung),
        rheumati=factor(rheumati),
        pept_ulcer=factor(pept_ulcer),
        dm=factor(dm),
        dm_compli=factor(dm_compli),
        paralysis=factor(paralysis),
        malignancy=factor(malignancy),
        meta_tumor=factor(meta_tumor),
        aids=factor(aids),
        eGFR30=factor(eGFR30),
        hd=factor(hd),
        hcc=factor(hcc),
        alcohol=factor(alcohol),
        past_rupture=factor(past_rupture),
        antiplate=factor(antiplate),
        anticoag=factor(anticoag),
        antithro=factor(antithro),
        nsaids=factor(nsaids),
        steroid=factor(steroid),
        beta=factor(beta),
        vaso=factor(vaso),
        map= as.integer(map),
        ffp=factor(ffp),
        pc=factor(pc),
        albner=factor(albner),
        sBP= as.integer(sBP),
        dBP= as.integer(dBP),
        hr=as.integer(hr),
        shock=factor(shock),
        los=as.integer(los),
          )

dev_imp$child_numが整数になっているかの確認

table(dev_imp$child_num)
## 
##   5   6   7   8   9  10  11  12  13  14  15 
##  20  46  78 129  83  67  48  38  20   3   4

作成データの確認

dfSummary(dev_imp) %>% view()
## Switching method to 'browser'
## Output file written: /var/folders/n9/tf_wmwpn3gl2t7l1cz4tqk000000gn/T//RtmpLMQ7kI/file12e86492e7d7.html
str(dev_imp)
## 'data.frame':    536 obs. of  62 variables:
##  $ sex           : Factor w/ 2 levels "M","F": 1 1 2 1 2 1 2 1 2 1 ...
##  $ barthel       : Factor w/ 3 levels "0","1","2": 3 3 1 2 3 1 1 1 3 3 ...
##  $ child_score   : Factor w/ 3 levels "0","1","2": 3 1 2 2 2 2 2 2 3 3 ...
##  $ pad           : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ stroke        : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ dimentia      : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ ch_lung       : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ rheumati      : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ pept_ulcer    : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ dm            : Factor w/ 2 levels "0","1": 1 2 1 1 1 1 1 1 1 1 ...
##  $ dm_compli     : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ paralysis     : Factor w/ 1 level "0": 1 1 1 1 1 1 1 1 1 1 ...
##  $ malignancy    : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ meta_tumor    : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ aids          : Factor w/ 1 level "0": 1 1 1 1 1 1 1 1 1 1 ...
##  $ eGFR30        : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 2 1 ...
##  $ hd            : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ hcc           : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ alcohol       : Factor w/ 2 levels "0","1": 2 1 1 1 1 1 2 2 1 1 ...
##  $ past_rupture  : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ antiplate     : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ anticoag      : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ antithro      : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ nsaids        : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ steroid       : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ beta          : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ vaso          : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 2 1 ...
##  $ ffp           : Factor w/ 2 levels "0","1": 1 2 1 1 2 2 1 2 2 2 ...
##  $ pc            : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ albner        : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 2 1 ...
##  $ shock         : Factor w/ 2 levels "0","1": 2 1 2 2 2 2 1 2 2 2 ...
##  $ hosp_mortality: Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 2 2 ...
##  $ age           : int  50 80 59 44 67 65 49 73 69 62 ...
##  $ bmi           : num  23.1 25.3 23 14.5 22.5 ...
##  $ smoke         : int  0 0 0 240 0 0 245 1000 0 0 ...
##  $ child_num     : int  11 6 9 8 8 9 7 9 15 11 ...
##  $ gcs           : int  15 15 15 15 15 15 15 15 6 15 ...
##  $ cci_num       : int  4 4 3 4 4 3 4 4 4 4 ...
##  $ map           : int  0 2 0 6 2 2 0 4 14 6 ...
##  $ bt            : num  36.4 36.8 35.9 36 36.6 38.4 37 37 35.5 36.6 ...
##  $ sBP           : int  78 88 100 69 66 84 132 90 52 58 ...
##  $ dBP           : int  48 49 56 44 40 46 69 54 37 37 ...
##  $ hr            : int  118 72 110 104 72 127 83 114 98 106 ...
##  $ bil           : num  2.2 1.2 3.1 3.4 1.2 2.4 1.2 2.2 8.7 2.1 ...
##  $ ast           : num  217 31 60 129 52 90 154 55 96 121 ...
##  $ alt           : num  63 22 40 46 36 19 109 20 87 63 ...
##  $ wbc           : num  7400 5000 7800 9100 3900 8000 7900 11100 12800 8300 ...
##  $ hb            : num  6.9 10.8 9.7 10.7 6.3 9.8 13.5 5.6 6 9.8 ...
##  $ plt           : num  115 77 74 162 63 93 132 124 168 84 ...
##  $ tp            : num  6.3 5.6 6.4 6.1 5.1 7.2 7.6 4.9 5.3 6.3 ...
##  $ alb           : num  2.2 3.2 2.8 2.9 2.8 3.2 4 2.3 1.2 2.5 ...
##  $ eGFR          : num  58 57.7 63.7 112.4 123.6 ...
##  $ bun           : num  13.2 41.5 26.3 2.9 27.4 15.8 15.5 27 63.2 13.8 ...
##  $ cre           : num  1.08 0.96 0.72 0.61 0.38 0.44 0.4 0.97 2.07 0.87 ...
##  $ crp           : num  0.92 0.29 0.68 0.29 0.29 ...
##  $ pt            : num  37.8 55 46.7 37.8 74.6 45.9 49.7 45.9 37.2 54 ...
##  $ aptt          : num  29.4 29 27.2 35.3 30.1 27.6 32.6 27.5 36 29.1 ...
##  $ los           : int  12 7 0 10 3 2 1 8 0 16 ...
##  $ pt_id         : int  1 2 3 4 5 7 8 9 10 11 ...
##  $ hosp_num      : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ hosp_id       : int  1001 1001 1001 1001 1001 1001 1001 1001 1001 1001 ...
##  $ year          : int  2012 2011 2010 2011 2010 2010 2010 2010 2011 2012 ...

代入後、開発コホートtableone作成

# Create your table
dev_imp %>%  
  select(c(col_cont, col_fact)) %>% 
  CreateTableOne(vars = c(col_cont, col_fact), strata="hosp_mortality",factorVars = col_fact, addOverall = T) -> tableone_dev_imp


# Print your table
print(tableone_dev_imp, smd = TRUE, missing = TRUE, test = TRUE, explain = TRUE) 
##                         Stratified by hosp_mortality
##                          Overall           0                 1                
##   n                          536               463                73          
##   age (mean (SD))          61.23 (13.06)     60.82 (13.02)     63.84 (13.09)  
##   bmi (mean (SD))          22.88 (3.43)      22.97 (3.48)      22.29 (3.03)   
##   smoke (mean (SD))       299.43 (527.67)   285.29 (366.79)   389.15 (1093.71)
##   child_num (mean (SD))     8.80 (2.07)       8.46 (1.87)      10.93 (2.02)   
##   gcs (mean (SD))          14.33 (2.20)      14.72 (1.27)      11.90 (4.33)   
##   cci_num (mean (SD))       4.44 (1.20)       4.43 (1.08)       4.53 (1.78)   
##   map (mean (SD))           3.49 (4.51)       3.10 (4.35)       5.96 (4.71)   
##   bt (mean (SD))           36.65 (0.74)      36.68 (0.65)      36.48 (1.16)   
##   sBP (mean (SD))          87.53 (15.29)     90.31 (13.39)     69.89 (14.90)  
##   dBP (mean (SD))          51.10 (11.74)     52.94 (10.82)     39.40 (10.66)  
##   hr (mean (SD))           85.24 (20.74)     82.85 (18.81)    100.38 (25.61)  
##   bil (mean (SD))           2.19 (2.04)       1.95 (1.76)       3.71 (2.86)   
##   ast (mean (SD))          85.18 (91.14)     79.45 (86.87)    121.52 (108.39) 
##   alt (mean (SD))          43.39 (50.13)     41.70 (49.33)     54.11 (54.07)  
##   wbc (mean (SD))        8313.80 (4357.53) 8158.33 (4161.96) 9299.86 (5366.73)
##   hb (mean (SD))            8.59 (2.45)       8.66 (2.46)       8.11 (2.38)   
##   plt (mean (SD))         110.70 (57.12)    109.14 (51.34)    120.60 (84.92)  
##   tp (mean (SD))            6.11 (0.90)       6.16 (0.84)       5.77 (1.16)   
##   alb (mean (SD))           2.83 (0.58)       2.91 (0.53)       2.32 (0.62)   
##   eGFR (mean (SD))         69.27 (30.50)     72.88 (30.17)     46.36 (21.32)  
##   bun (mean (SD))          28.28 (18.46)     27.35 (17.59)     34.14 (22.50)  
##   cre (mean (SD))           1.06 (0.91)       0.99 (0.90)       1.46 (0.82)   
##   crp (mean (SD))           0.79 (1.61)       0.69 (1.54)       1.44 (1.90)   
##   pt (mean (SD))           52.25 (16.69)     53.92 (15.69)     41.61 (18.92)  
##   aptt (mean (SD))         32.29 (16.40)     30.68 (11.57)     42.51 (31.88)  
##   los (mean (SD))          13.77 (15.28)     14.21 (14.24)     11.01 (20.59)  
##   sex = F (%)                131 ( 24.4)       111 ( 24.0)        20 ( 27.4)  
##   barthel (%)                                                                 
##      0                       188 ( 35.1)       181 ( 39.1)         7 (  9.6)  
##      1                       167 ( 31.2)       155 ( 33.5)        12 ( 16.4)  
##      2                       181 ( 33.8)       127 ( 27.4)        54 ( 74.0)  
##   child_score (%)                                                             
##      0                        66 ( 12.3)        65 ( 14.0)         1 (  1.4)  
##      1                       289 ( 53.9)       270 ( 58.3)        19 ( 26.0)  
##      2                       181 ( 33.8)       128 ( 27.6)        53 ( 72.6)  
##   pad = 1 (%)                  1 (  0.2)         1 (  0.2)         0 (  0.0)  
##   stroke = 1 (%)              10 (  1.9)        10 (  2.2)         0 (  0.0)  
##   dimentia = 1 (%)             4 (  0.7)         4 (  0.9)         0 (  0.0)  
##   ch_lung = 1 (%)             11 (  2.1)         9 (  1.9)         2 (  2.7)  
##   rheumati = 1 (%)             3 (  0.6)         3 (  0.6)         0 (  0.0)  
##   pept_ulcer = 1 (%)          59 ( 11.0)        58 ( 12.5)         1 (  1.4)  
##   dm = 1 (%)                 106 ( 19.8)        98 ( 21.2)         8 ( 11.0)  
##   dm_compli = 1 (%)            6 (  1.1)         6 (  1.3)         0 (  0.0)  
##   paralysis = 0 (%)          536 (100.0)       463 (100.0)        73 (100.0)  
##   malignancy = 1 (%)          58 ( 10.8)        46 (  9.9)        12 ( 16.4)  
##   meta_tumor = 1 (%)           6 (  1.1)         2 (  0.4)         4 (  5.5)  
##   aids = 0 (%)               536 (100.0)       463 (100.0)        73 (100.0)  
##   eGFR30 = 1 (%)              51 (  9.5)        34 (  7.3)        17 ( 23.3)  
##   hd = 1 (%)                   3 (  0.6)         3 (  0.6)         0 (  0.0)  
##   hcc = 1 (%)                111 ( 20.7)        95 ( 20.5)        16 ( 21.9)  
##   alcohol = 1 (%)            231 ( 43.1)       205 ( 44.3)        26 ( 35.6)  
##   past_rupture = 1 (%)       104 ( 19.4)        96 ( 20.7)         8 ( 11.0)  
##   antiplate = 1 (%)            4 (  0.7)         3 (  0.6)         1 (  1.4)  
##   anticoag = 1 (%)             1 (  0.2)         1 (  0.2)         0 (  0.0)  
##   antithro = 1 (%)             4 (  0.7)         3 (  0.6)         1 (  1.4)  
##   nsaids = 1 (%)               5 (  0.9)         5 (  1.1)         0 (  0.0)  
##   steroid = 1 (%)              2 (  0.4)         1 (  0.2)         1 (  1.4)  
##   beta = 1 (%)                18 (  3.4)        18 (  3.9)         0 (  0.0)  
##   vaso = 1 (%)                31 (  5.8)        11 (  2.4)        20 ( 27.4)  
##   ffp = 1 (%)                137 ( 25.6)       100 ( 21.6)        37 ( 50.7)  
##   pc = 1 (%)                   6 (  1.1)         3 (  0.6)         3 (  4.1)  
##   albner = 1 (%)              47 (  8.8)        32 (  6.9)        15 ( 20.5)  
##   shock = 1 (%)              218 ( 40.7)       156 ( 33.7)        62 ( 84.9)  
##   hosp_mortality = 1 (%)      73 ( 13.6)         0 (  0.0)        73 (100.0)  
##                         Stratified by hosp_mortality
##                          p      test SMD    Missing
##   n                                                
##   age (mean (SD))         0.066       0.231 0.0    
##   bmi (mean (SD))         0.112       0.210 0.0    
##   smoke (mean (SD))       0.118       0.127 0.0    
##   child_num (mean (SD))  <0.001       1.269 0.0    
##   gcs (mean (SD))        <0.001       0.882 0.0    
##   cci_num (mean (SD))     0.490       0.071 0.0    
##   map (mean (SD))        <0.001       0.630 0.0    
##   bt (mean (SD))          0.033       0.212 0.0    
##   sBP (mean (SD))        <0.001       1.442 0.0    
##   dBP (mean (SD))        <0.001       1.261 0.0    
##   hr (mean (SD))         <0.001       0.780 0.0    
##   bil (mean (SD))        <0.001       0.741 0.0    
##   ast (mean (SD))        <0.001       0.428 0.0    
##   alt (mean (SD))         0.049       0.240 0.0    
##   wbc (mean (SD))         0.037       0.238 0.0    
##   hb (mean (SD))          0.073       0.229 0.0    
##   plt (mean (SD))         0.111       0.163 0.0    
##   tp (mean (SD))          0.001       0.387 0.0    
##   alb (mean (SD))        <0.001       1.028 0.0    
##   eGFR (mean (SD))       <0.001       1.015 0.0    
##   bun (mean (SD))         0.003       0.336 0.0    
##   cre (mean (SD))        <0.001       0.546 0.0    
##   crp (mean (SD))        <0.001       0.432 0.0    
##   pt (mean (SD))         <0.001       0.709 0.0    
##   aptt (mean (SD))       <0.001       0.494 0.0    
##   los (mean (SD))         0.097       0.180 0.0    
##   sex = F (%)             0.627       0.078 0.0    
##   barthel (%)            <0.001       1.079 0.0    
##      0                                             
##      1                                             
##      2                                             
##   child_score (%)        <0.001       1.046 0.0    
##      0                                             
##      1                                             
##      2                                             
##   pad = 1 (%)             1.000       0.066 0.0    
##   stroke = 1 (%)          0.422       0.210 0.0    
##   dimentia = 1 (%)        0.948       0.132 0.0    
##   ch_lung = 1 (%)         0.999       0.053 0.0    
##   rheumati = 1 (%)        1.000       0.114 0.0    
##   pept_ulcer = 1 (%)      0.009       0.450 0.0    
##   dm = 1 (%)              0.061       0.281 0.0    
##   dm_compli = 1 (%)       0.704       0.162 0.0    
##   paralysis = 0 (%)          NA      <0.001 0.0    
##   malignancy = 1 (%)      0.144       0.193 0.0    
##   meta_tumor = 1 (%)      0.001       0.301 0.0    
##   aids = 0 (%)               NA      <0.001 0.0    
##   eGFR30 = 1 (%)         <0.001       0.454 0.0    
##   hd = 1 (%)              1.000       0.114 0.0    
##   hcc = 1 (%)             0.905       0.034 0.0    
##   alcohol = 1 (%)         0.207       0.178 0.0    
##   past_rupture = 1 (%)    0.071       0.270 0.0    
##   antiplate = 1 (%)       1.000       0.072 0.0    
##   anticoag = 1 (%)        1.000       0.066 0.0    
##   antithro = 1 (%)        1.000       0.072 0.0    
##   nsaids = 1 (%)          0.813       0.148 0.0    
##   steroid = 1 (%)         0.638       0.130 0.0    
##   beta = 1 (%)            0.173       0.284 0.0    
##   vaso = 1 (%)           <0.001       0.751 0.0    
##   ffp = 1 (%)            <0.001       0.635 0.0    
##   pc = 1 (%)              0.044       0.229 0.0    
##   albner = 1 (%)         <0.001       0.404 0.0    
##   shock = 1 (%)          <0.001       1.222 0.0    
##   hosp_mortality = 1 (%) <0.001         NaN 0.0

代入後の開発データでもtbl_summaryでmedian+IQRも準備する

# specify your data and variables
tbl_summary(data = dev_imp, 
            by = "hosp_mortality",
            type = list(gcs ~ "continuous", year ~ "categorical"),
            statistic = all_continuous() ~ "{median} ({p25}, {p75})",
            digits = all_continuous() ~ c(0, 2))
Characteristic 0, N = 4631 1, N = 731
sex
    M 352 (76%) 53 (73%)
    F 111 (24%) 20 (27%)
barthel
    0 181 (39%) 7 (9.6%)
    1 155 (33%) 12 (16%)
    2 127 (27%) 54 (74%)
child_score
    0 65 (14%) 1 (1.4%)
    1 270 (58%) 19 (26%)
    2 128 (28%) 53 (73%)
pad
    0 462 (100%) 73 (100%)
    1 1 (0.2%) 0 (0%)
stroke
    0 453 (98%) 73 (100%)
    1 10 (2.2%) 0 (0%)
dimentia
    0 459 (99%) 73 (100%)
    1 4 (0.9%) 0 (0%)
ch_lung
    0 454 (98%) 71 (97%)
    1 9 (1.9%) 2 (2.7%)
rheumati
    0 460 (99%) 73 (100%)
    1 3 (0.6%) 0 (0%)
pept_ulcer
    0 405 (87%) 72 (99%)
    1 58 (13%) 1 (1.4%)
dm
    0 365 (79%) 65 (89%)
    1 98 (21%) 8 (11%)
dm_compli
    0 457 (99%) 73 (100%)
    1 6 (1.3%) 0 (0%)
paralysis
    0 463 (100%) 73 (100%)
malignancy
    0 417 (90%) 61 (84%)
    1 46 (9.9%) 12 (16%)
meta_tumor
    0 461 (100%) 69 (95%)
    1 2 (0.4%) 4 (5.5%)
aids
    0 463 (100%) 73 (100%)
eGFR30
    0 429 (93%) 56 (77%)
    1 34 (7.3%) 17 (23%)
hd
    0 460 (99%) 73 (100%)
    1 3 (0.6%) 0 (0%)
hcc
    0 368 (79%) 57 (78%)
    1 95 (21%) 16 (22%)
alcohol
    0 258 (56%) 47 (64%)
    1 205 (44%) 26 (36%)
past_rupture
    0 367 (79%) 65 (89%)
    1 96 (21%) 8 (11%)
antiplate
    0 460 (99%) 72 (99%)
    1 3 (0.6%) 1 (1.4%)
anticoag
    0 462 (100%) 73 (100%)
    1 1 (0.2%) 0 (0%)
antithro
    0 460 (99%) 72 (99%)
    1 3 (0.6%) 1 (1.4%)
nsaids
    0 458 (99%) 73 (100%)
    1 5 (1.1%) 0 (0%)
steroid
    0 462 (100%) 72 (99%)
    1 1 (0.2%) 1 (1.4%)
beta
    0 445 (96%) 73 (100%)
    1 18 (3.9%) 0 (0%)
vaso
    0 452 (98%) 53 (73%)
    1 11 (2.4%) 20 (27%)
ffp
    0 363 (78%) 36 (49%)
    1 100 (22%) 37 (51%)
pc
    0 460 (99%) 70 (96%)
    1 3 (0.6%) 3 (4.1%)
albner
    0 431 (93%) 58 (79%)
    1 32 (6.9%) 15 (21%)
shock
    0 307 (66%) 11 (15%)
    1 156 (34%) 62 (85%)
age 62 (51.00, 70) 66 (55.00, 74)
bmi 23 (20.81, 25) 23 (20.96, 24)
smoke 200 (0.00, 410) 180 (0.00, 398)
child_num 8 (7.00, 10) 11 (9.00, 12)
gcs 15 (15.00, 15) 15 (9.00, 15)
cci_num 4 (4.00, 5) 4 (4.00, 5)
map 4 (0.00, 4) 6 (4.00, 8)
bt 37 (36.30, 37) 36 (36.00, 37)
sBP 90 (82.00, 98) 65 (59.00, 79)
dBP 53 (46.00, 60) 39 (33.00, 46)
hr 80 (70.00, 93) 100 (89.00, 112)
bil 1 (0.90, 2) 3 (1.66, 5)
ast 54 (33.00, 91) 84 (49.00, 153)
alt 29 (20.00, 45) 33 (25.00, 64)
wbc 7,170 (5,315.00, 10,240) 7,990 (6,200.00, 10,800)
hb 8 (7.00, 10) 8 (6.60, 10)
plt 98 (73.00, 134) 98 (76.00, 130)
tp 6 (5.60, 7) 6 (5.20, 6)
alb 3 (2.60, 3) 2 (1.80, 3)
eGFR 71 (52.42, 92) 44 (30.40, 58)
bun 23 (15.50, 35) 28 (18.10, 46)
cre 1 (0.66, 1) 1 (0.99, 2)
crp 0 (0.11, 1) 1 (0.40, 2)
pt 54 (43.00, 64) 40 (26.80, 55)
aptt 28 (26.00, 32) 32 (29.10, 44)
los 10 (7.00, 18) 4 (1.00, 16)
pt_id 359 (149.50, 583) 426 (235.00, 599)
hosp_num
    1 404 (87%) 68 (93%)
    2 43 (9.3%) 5 (6.8%)
    3 15 (3.2%) 0 (0%)
    4 1 (0.2%) 0 (0%)
hosp_id 1,010 (1,003.00, 1,024) 1,017 (1,005.00, 1,024)
year
    2010 68 (15%) 8 (11%)
    2011 70 (15%) 9 (12%)
    2012 72 (16%) 16 (22%)
    2013 55 (12%) 15 (21%)
    2014 69 (15%) 8 (11%)
    2015 67 (14%) 5 (6.8%)
    2016 62 (13%) 12 (16%)
1 n (%); Median (IQR)

開発コホート 連続量可視化

dev_imp |>  #全体
  select(col_continuous) |> 
  pivot_longer(cols = col_continuous, names_to = "name", values_to = "value") |> 
  ggplot()+
  geom_histogram(aes(x = value), color = "black")+
  facet_wrap(~ name, scales = "free", ncol = 5) +
  theme_bw()+
  theme(text = element_text(size = 12))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

# 連続変数の変数名をまとめる
con_var <- c("age", "bmi","smoke","child_num","gcs","cci_num","map","bt","sBP","dBP","hr","bil","ast","alt","wbc","hb","plt","tp","alb","eGFR","bun","cre","crp","pt","aptt")

# datadistを計算
ddist <- datadist(dev_imp)
options(datadist='ddist')

# プロット結果をまとめるリストを用意する
plot <- list()

for (x in con_var){
  # lrmに投入するformulaを文字列で、for文で順番に指定していく
  formula_tmp <- as.formula(paste("hosp_mortality ~ rcs(", x, ", 4)"))
  fit_tmp <- lrm(formula_tmp, data = dev_imp)
  
  # Predict関数の呼び出しを文字列として作成し、それをパースして評価する
  plot_cmd <- paste("plot(Predict(fit_tmp, ", x, "))")
  plot_tmp <- eval(parse(text = plot_cmd))
  plot[[x]] <- plot_tmp
}

# 結果をggarrangeでまとめて表示する
logOR_plot <- ggarrange(plotlist = plot, ncol = 1, nrow = 1)

logOR_plot
## $`1`

## 
## $`2`

## 
## $`3`

## 
## $`4`

## 
## $`5`

## 
## $`6`

## 
## $`7`

## 
## $`8`

## 
## $`9`

## 
## $`10`

## 
## $`11`

## 
## $`12`

## 
## $`13`

## 
## $`14`

## 
## $`15`

## 
## $`16`

## 
## $`17`

## 
## $`18`

## 
## $`19`

## 
## $`20`

## 
## $`21`

## 
## $`22`

## 
## $`23`

## 
## $`24`

## 
## $`25`

## 
## attr(,"class")
## [1] "list"      "ggarrange"

以下のCut offを設ける ・age:60以上・未満 60未満:0, 60以上:1 ・bmi:25以上・未満 25未満:0, 25以上:1 ・smoke:cut offなし →むしろ変数から削除 ・child_numl:cut offなし→むしろ変数から削除 ・gcs:12以上・未満 12未満:0, 12以上:1 ・cci:6点以上・未満 6未満:0, 6以上:1 ・bt:37度以上・未満 37度未満:0,37度以上:1 ・sBP:80以上・未満 80以上:0,80未満:1 ・dBP:50以上・未満 50以上:0,50未満:1 ・HR:100以上・未満 100未満:0,100以上:1 ・bil:5以上・未満: 5未満:0,5以上:1 ・ast:200以上・未満 200未満:0, 200以上:1 ・alt:100以上・未満 100未満:0, 100以上:1 ・wbc:12000以上・未満 12000未満:0, 12000以上:1  ・hb:8以上・未満 9以上:0, 8未満:1 ・plt:100以上・未満 100以上:0, 100未満:1  ・tp:6以上・未満 6以上:0, 6未満:1 ・alb:2.8以上・未満 2.8以上:0, 2.8未満:1 ・Cre:1.5以上・未満 1.5未満:0, Cre1.5以上:1 ・CRP:2以上・未満 2未満:0, CRP2以上:1 ・pt:50以上・未満 50以上:0, 50未満:1 ・aptt:50以上・未満 aptt50未満:0,50以上:1

上記を目安に新しく列を追加

#新規カテゴリ列を作成
dev_imp$age_cate <- ifelse(dev_imp$age >= 60, 1, 0)
dev_imp$bmi_cate <- ifelse(dev_imp$bmi >= 25, 1, 0) # bmiは25以上:1, 25未満:0
dev_imp$gcs_cate <- ifelse(dev_imp$gcs >= 12, 1, 0)
dev_imp$cci_cate <- ifelse(dev_imp$cci_num >= 6, 1, 0)
dev_imp$bt_cate <- ifelse(dev_imp$bt >= 37, 1, 0)
dev_imp$sBP_cate <- ifelse(dev_imp$sBP < 80, 1, 0)
dev_imp$dBP_cate <- ifelse(dev_imp$dBP < 50, 1, 0)
dev_imp$hr_cate <- ifelse(dev_imp$hr >= 100, 1, 0)
dev_imp$bil_cate <- ifelse(dev_imp$bil >= 5, 1, 0)
dev_imp$ast_cate <- ifelse(dev_imp$ast >= 200, 1, 0)
dev_imp$alt_cate <- ifelse(dev_imp$alt >= 100, 1, 0)
dev_imp$wbc_cate <- ifelse(dev_imp$wbc >= 12000, 1, 0)
dev_imp$hb_cate <- ifelse(dev_imp$hb < 8, 1, 0)
dev_imp$plt_cate <- ifelse(dev_imp$plt < 100, 1, 0) # pltは100未満:1、100以上:0
dev_imp$tp_cate <- ifelse(dev_imp$tp < 6, 1, 0)
dev_imp$alb_cate <- ifelse(dev_imp$alb < 2.8, 1, 0)
dev_imp$cre_cate <- ifelse(dev_imp$cre >= 1.5, 1, 0)
dev_imp$crp_cate <- ifelse(dev_imp$crp >= 2, 1, 0)
dev_imp$pt_cate <- ifelse(dev_imp$pt < 50, 1, 0)
dev_imp$aptt_cate <- ifelse(dev_imp$aptt >= 50, 1, 0) # apttは50以上:1, 50未満:0

カテゴリー変換後のtable1作成

Createtableone

col_fact_cate=c("sex","barthel","child_score","pad","stroke","dimentia","ch_lung","rheumati","pept_ulcer","dm","dm_compli","paralysis","malignancy","meta_tumor","aids","eGFR30","hd","hcc","alcohol","past_rupture","antiplate","anticoag","antithro","nsaids","steroid","beta", "vaso","ffp","pc", "albner","shock","hosp_mortality","age_cate","bmi_cate","gcs_cate","cci_cate","bt_cate","sBP_cate","dBP_cate","hr_cate","bil_cate","ast_cate","alt_cate","wbc_cate","hb_cate","plt_cate","tp_cate","alb_cate","cre_cate","crp_cate","pt_cate","aptt_cate")

# Create your table
dev_imp %>%  
  select(c(col_fact_cate)) %>% 
  CreateTableOne(vars = c(col_fact_cate), strata="hosp_mortality",factorVars = col_fact_cate, addOverall = T) -> tableone_dev_imp_cate
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
##   # Was:
##   data %>% select(col_fact_cate)
## 
##   # Now:
##   data %>% select(all_of(col_fact_cate))
## 
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# Print your table
print(tableone_dev_imp_cate, smd = TRUE, missing = TRUE, test = TRUE, explain = TRUE) 
##                         Stratified by hosp_mortality
##                          Overall      0            1           p      test
##   n                      536          463          73                     
##   sex = F (%)            131 ( 24.4)  111 ( 24.0)  20 ( 27.4)   0.627     
##   barthel (%)                                                  <0.001     
##      0                   188 ( 35.1)  181 ( 39.1)   7 (  9.6)             
##      1                   167 ( 31.2)  155 ( 33.5)  12 ( 16.4)             
##      2                   181 ( 33.8)  127 ( 27.4)  54 ( 74.0)             
##   child_score (%)                                              <0.001     
##      0                    66 ( 12.3)   65 ( 14.0)   1 (  1.4)             
##      1                   289 ( 53.9)  270 ( 58.3)  19 ( 26.0)             
##      2                   181 ( 33.8)  128 ( 27.6)  53 ( 72.6)             
##   pad = 1 (%)              1 (  0.2)    1 (  0.2)   0 (  0.0)   1.000     
##   stroke = 1 (%)          10 (  1.9)   10 (  2.2)   0 (  0.0)   0.422     
##   dimentia = 1 (%)         4 (  0.7)    4 (  0.9)   0 (  0.0)   0.948     
##   ch_lung = 1 (%)         11 (  2.1)    9 (  1.9)   2 (  2.7)   0.999     
##   rheumati = 1 (%)         3 (  0.6)    3 (  0.6)   0 (  0.0)   1.000     
##   pept_ulcer = 1 (%)      59 ( 11.0)   58 ( 12.5)   1 (  1.4)   0.009     
##   dm = 1 (%)             106 ( 19.8)   98 ( 21.2)   8 ( 11.0)   0.061     
##   dm_compli = 1 (%)        6 (  1.1)    6 (  1.3)   0 (  0.0)   0.704     
##   paralysis = 0 (%)      536 (100.0)  463 (100.0)  73 (100.0)      NA     
##   malignancy = 1 (%)      58 ( 10.8)   46 (  9.9)  12 ( 16.4)   0.144     
##   meta_tumor = 1 (%)       6 (  1.1)    2 (  0.4)   4 (  5.5)   0.001     
##   aids = 0 (%)           536 (100.0)  463 (100.0)  73 (100.0)      NA     
##   eGFR30 = 1 (%)          51 (  9.5)   34 (  7.3)  17 ( 23.3)  <0.001     
##   hd = 1 (%)               3 (  0.6)    3 (  0.6)   0 (  0.0)   1.000     
##   hcc = 1 (%)            111 ( 20.7)   95 ( 20.5)  16 ( 21.9)   0.905     
##   alcohol = 1 (%)        231 ( 43.1)  205 ( 44.3)  26 ( 35.6)   0.207     
##   past_rupture = 1 (%)   104 ( 19.4)   96 ( 20.7)   8 ( 11.0)   0.071     
##   antiplate = 1 (%)        4 (  0.7)    3 (  0.6)   1 (  1.4)   1.000     
##   anticoag = 1 (%)         1 (  0.2)    1 (  0.2)   0 (  0.0)   1.000     
##   antithro = 1 (%)         4 (  0.7)    3 (  0.6)   1 (  1.4)   1.000     
##   nsaids = 1 (%)           5 (  0.9)    5 (  1.1)   0 (  0.0)   0.813     
##   steroid = 1 (%)          2 (  0.4)    1 (  0.2)   1 (  1.4)   0.638     
##   beta = 1 (%)            18 (  3.4)   18 (  3.9)   0 (  0.0)   0.173     
##   vaso = 1 (%)            31 (  5.8)   11 (  2.4)  20 ( 27.4)  <0.001     
##   ffp = 1 (%)            137 ( 25.6)  100 ( 21.6)  37 ( 50.7)  <0.001     
##   pc = 1 (%)               6 (  1.1)    3 (  0.6)   3 (  4.1)   0.044     
##   albner = 1 (%)          47 (  8.8)   32 (  6.9)  15 ( 20.5)  <0.001     
##   shock = 1 (%)          218 ( 40.7)  156 ( 33.7)  62 ( 84.9)  <0.001     
##   hosp_mortality = 1 (%)  73 ( 13.6)    0 (  0.0)  73 (100.0)  <0.001     
##   age_cate = 1 (%)       303 ( 56.5)  259 ( 55.9)  44 ( 60.3)   0.570     
##   bmi_cate = 1 (%)       111 ( 20.7)  101 ( 21.8)  10 ( 13.7)   0.151     
##   gcs_cate = 1 (%)       504 ( 94.0)  452 ( 97.6)  52 ( 71.2)  <0.001     
##   cci_cate = 1 (%)        78 ( 14.6)   65 ( 14.0)  13 ( 17.8)   0.503     
##   bt_cate = 1 (%)        147 ( 27.4)  131 ( 28.3)  16 ( 21.9)   0.320     
##   sBP_cate = 1 (%)       134 ( 25.0)   78 ( 16.8)  56 ( 76.7)  <0.001     
##   dBP_cate = 1 (%)       229 ( 42.7)  167 ( 36.1)  62 ( 84.9)  <0.001     
##   hr_cate = 1 (%)        120 ( 22.4)   81 ( 17.5)  39 ( 53.4)  <0.001     
##   bil_cate = 1 (%)        41 (  7.6)   23 (  5.0)  18 ( 24.7)  <0.001     
##   ast_cate = 1 (%)        40 (  7.5)   26 (  5.6)  14 ( 19.2)  <0.001     
##   alt_cate = 1 (%)        36 (  6.7)   26 (  5.6)  10 ( 13.7)   0.021     
##   wbc_cate = 1 (%)        89 ( 16.6)   73 ( 15.8)  16 ( 21.9)   0.253     
##   hb_cate = 1 (%)        230 ( 42.9)  193 ( 41.7)  37 ( 50.7)   0.188     
##   plt_cate = 1 (%)       273 ( 50.9)  235 ( 50.8)  38 ( 52.1)   0.936     
##   tp_cate = 1 (%)        213 ( 39.7)  176 ( 38.0)  37 ( 50.7)   0.054     
##   alb_cate = 1 (%)       230 ( 42.9)  173 ( 37.4)  57 ( 78.1)  <0.001     
##   cre_cate = 1 (%)        62 ( 11.6)   37 (  8.0)  25 ( 34.2)  <0.001     
##   crp_cate = 1 (%)        47 (  8.8)   30 (  6.5)  17 ( 23.3)  <0.001     
##   pt_cate = 1 (%)        248 ( 46.3)  201 ( 43.4)  47 ( 64.4)   0.001     
##   aptt_cate = 1 (%)       22 (  4.1)    9 (  1.9)  13 ( 17.8)  <0.001     
##                         Stratified by hosp_mortality
##                          SMD    Missing
##   n                                    
##   sex = F (%)             0.078 0.0    
##   barthel (%)             1.079 0.0    
##      0                                 
##      1                                 
##      2                                 
##   child_score (%)         1.046 0.0    
##      0                                 
##      1                                 
##      2                                 
##   pad = 1 (%)             0.066 0.0    
##   stroke = 1 (%)          0.210 0.0    
##   dimentia = 1 (%)        0.132 0.0    
##   ch_lung = 1 (%)         0.053 0.0    
##   rheumati = 1 (%)        0.114 0.0    
##   pept_ulcer = 1 (%)      0.450 0.0    
##   dm = 1 (%)              0.281 0.0    
##   dm_compli = 1 (%)       0.162 0.0    
##   paralysis = 0 (%)      <0.001 0.0    
##   malignancy = 1 (%)      0.193 0.0    
##   meta_tumor = 1 (%)      0.301 0.0    
##   aids = 0 (%)           <0.001 0.0    
##   eGFR30 = 1 (%)          0.454 0.0    
##   hd = 1 (%)              0.114 0.0    
##   hcc = 1 (%)             0.034 0.0    
##   alcohol = 1 (%)         0.178 0.0    
##   past_rupture = 1 (%)    0.270 0.0    
##   antiplate = 1 (%)       0.072 0.0    
##   anticoag = 1 (%)        0.066 0.0    
##   antithro = 1 (%)        0.072 0.0    
##   nsaids = 1 (%)          0.148 0.0    
##   steroid = 1 (%)         0.130 0.0    
##   beta = 1 (%)            0.284 0.0    
##   vaso = 1 (%)            0.751 0.0    
##   ffp = 1 (%)             0.635 0.0    
##   pc = 1 (%)              0.229 0.0    
##   albner = 1 (%)          0.404 0.0    
##   shock = 1 (%)           1.222 0.0    
##   hosp_mortality = 1 (%)    NaN 0.0    
##   age_cate = 1 (%)        0.088 0.0    
##   bmi_cate = 1 (%)        0.214 0.0    
##   gcs_cate = 1 (%)        0.781 0.0    
##   cci_cate = 1 (%)        0.103 0.0    
##   bt_cate = 1 (%)         0.147 0.0    
##   sBP_cate = 1 (%)        1.500 0.0    
##   dBP_cate = 1 (%)        1.154 0.0    
##   hr_cate = 1 (%)         0.810 0.0    
##   bil_cate = 1 (%)        0.577 0.0    
##   ast_cate = 1 (%)        0.421 0.0    
##   alt_cate = 1 (%)        0.276 0.0    
##   wbc_cate = 1 (%)        0.158 0.0    
##   hb_cate = 1 (%)         0.181 0.0    
##   plt_cate = 1 (%)        0.026 0.0    
##   tp_cate = 1 (%)         0.257 0.0    
##   alb_cate = 1 (%)        0.905 0.0    
##   cre_cate = 1 (%)        0.679 0.0    
##   crp_cate = 1 (%)        0.486 0.0    
##   pt_cate = 1 (%)         0.430 0.0    
##   aptt_cate = 1 (%)       0.552 0.0

tbl_summary

# specify your data and variables
tbl_summary(data = dev_imp, 
            by = "hosp_mortality",
            type = list(gcs ~ "continuous", year ~ "categorical"),
            statistic = all_continuous() ~ "{median} ({p25}, {p75})",
            digits = all_continuous() ~ c(0, 2))
Characteristic 0, N = 4631 1, N = 731
sex
    M 352 (76%) 53 (73%)
    F 111 (24%) 20 (27%)
barthel
    0 181 (39%) 7 (9.6%)
    1 155 (33%) 12 (16%)
    2 127 (27%) 54 (74%)
child_score
    0 65 (14%) 1 (1.4%)
    1 270 (58%) 19 (26%)
    2 128 (28%) 53 (73%)
pad
    0 462 (100%) 73 (100%)
    1 1 (0.2%) 0 (0%)
stroke
    0 453 (98%) 73 (100%)
    1 10 (2.2%) 0 (0%)
dimentia
    0 459 (99%) 73 (100%)
    1 4 (0.9%) 0 (0%)
ch_lung
    0 454 (98%) 71 (97%)
    1 9 (1.9%) 2 (2.7%)
rheumati
    0 460 (99%) 73 (100%)
    1 3 (0.6%) 0 (0%)
pept_ulcer
    0 405 (87%) 72 (99%)
    1 58 (13%) 1 (1.4%)
dm
    0 365 (79%) 65 (89%)
    1 98 (21%) 8 (11%)
dm_compli
    0 457 (99%) 73 (100%)
    1 6 (1.3%) 0 (0%)
paralysis
    0 463 (100%) 73 (100%)
malignancy
    0 417 (90%) 61 (84%)
    1 46 (9.9%) 12 (16%)
meta_tumor
    0 461 (100%) 69 (95%)
    1 2 (0.4%) 4 (5.5%)
aids
    0 463 (100%) 73 (100%)
eGFR30
    0 429 (93%) 56 (77%)
    1 34 (7.3%) 17 (23%)
hd
    0 460 (99%) 73 (100%)
    1 3 (0.6%) 0 (0%)
hcc
    0 368 (79%) 57 (78%)
    1 95 (21%) 16 (22%)
alcohol
    0 258 (56%) 47 (64%)
    1 205 (44%) 26 (36%)
past_rupture
    0 367 (79%) 65 (89%)
    1 96 (21%) 8 (11%)
antiplate
    0 460 (99%) 72 (99%)
    1 3 (0.6%) 1 (1.4%)
anticoag
    0 462 (100%) 73 (100%)
    1 1 (0.2%) 0 (0%)
antithro
    0 460 (99%) 72 (99%)
    1 3 (0.6%) 1 (1.4%)
nsaids
    0 458 (99%) 73 (100%)
    1 5 (1.1%) 0 (0%)
steroid
    0 462 (100%) 72 (99%)
    1 1 (0.2%) 1 (1.4%)
beta
    0 445 (96%) 73 (100%)
    1 18 (3.9%) 0 (0%)
vaso
    0 452 (98%) 53 (73%)
    1 11 (2.4%) 20 (27%)
ffp
    0 363 (78%) 36 (49%)
    1 100 (22%) 37 (51%)
pc
    0 460 (99%) 70 (96%)
    1 3 (0.6%) 3 (4.1%)
albner
    0 431 (93%) 58 (79%)
    1 32 (6.9%) 15 (21%)
shock
    0 307 (66%) 11 (15%)
    1 156 (34%) 62 (85%)
age 62 (51.00, 70) 66 (55.00, 74)
bmi 23 (20.81, 25) 23 (20.96, 24)
smoke 200 (0.00, 410) 180 (0.00, 398)
child_num 8 (7.00, 10) 11 (9.00, 12)
gcs 15 (15.00, 15) 15 (9.00, 15)
cci_num 4 (4.00, 5) 4 (4.00, 5)
map 4 (0.00, 4) 6 (4.00, 8)
bt 37 (36.30, 37) 36 (36.00, 37)
sBP 90 (82.00, 98) 65 (59.00, 79)
dBP 53 (46.00, 60) 39 (33.00, 46)
hr 80 (70.00, 93) 100 (89.00, 112)
bil 1 (0.90, 2) 3 (1.66, 5)
ast 54 (33.00, 91) 84 (49.00, 153)
alt 29 (20.00, 45) 33 (25.00, 64)
wbc 7,170 (5,315.00, 10,240) 7,990 (6,200.00, 10,800)
hb 8 (7.00, 10) 8 (6.60, 10)
plt 98 (73.00, 134) 98 (76.00, 130)
tp 6 (5.60, 7) 6 (5.20, 6)
alb 3 (2.60, 3) 2 (1.80, 3)
eGFR 71 (52.42, 92) 44 (30.40, 58)
bun 23 (15.50, 35) 28 (18.10, 46)
cre 1 (0.66, 1) 1 (0.99, 2)
crp 0 (0.11, 1) 1 (0.40, 2)
pt 54 (43.00, 64) 40 (26.80, 55)
aptt 28 (26.00, 32) 32 (29.10, 44)
los 10 (7.00, 18) 4 (1.00, 16)
pt_id 359 (149.50, 583) 426 (235.00, 599)
hosp_num
    1 404 (87%) 68 (93%)
    2 43 (9.3%) 5 (6.8%)
    3 15 (3.2%) 0 (0%)
    4 1 (0.2%) 0 (0%)
hosp_id 1,010 (1,003.00, 1,024) 1,017 (1,005.00, 1,024)
year
    2010 68 (15%) 8 (11%)
    2011 70 (15%) 9 (12%)
    2012 72 (16%) 16 (22%)
    2013 55 (12%) 15 (21%)
    2014 69 (15%) 8 (11%)
    2015 67 (14%) 5 (6.8%)
    2016 62 (13%) 12 (16%)
age_cate 259 (56%) 44 (60%)
bmi_cate 101 (22%) 10 (14%)
gcs_cate 452 (98%) 52 (71%)
cci_cate 65 (14%) 13 (18%)
bt_cate 131 (28%) 16 (22%)
sBP_cate 78 (17%) 56 (77%)
dBP_cate 167 (36%) 62 (85%)
hr_cate 81 (17%) 39 (53%)
bil_cate 23 (5.0%) 18 (25%)
ast_cate 26 (5.6%) 14 (19%)
alt_cate 26 (5.6%) 10 (14%)
wbc_cate 73 (16%) 16 (22%)
hb_cate 193 (42%) 37 (51%)
plt_cate 235 (51%) 38 (52%)
tp_cate 176 (38%) 37 (51%)
alb_cate 173 (37%) 57 (78%)
cre_cate 37 (8.0%) 25 (34%)
crp_cate 30 (6.5%) 17 (23%)
pt_cate 201 (43%) 47 (64%)
aptt_cate 9 (1.9%) 13 (18%)
1 n (%); Median (IQR)

カテゴリー変換後の変数のclass確認

# "initial_vars" vector containing the names of variables
initial_vars <- c("sex", "barthel", "pept_ulcer", "dm", "malignancy", "hcc", "alcohol", 
                  "past_rupture", "antithro", "steroid", "beta", "shock", 
                  "age_cate", "bmi_cate", "gcs_cate", "bt_cate", "bil_cate", "ast_cate", "alt_cate", 
                  "wbc_cate", "hb_cate", "plt_cate", "alb_cate", "cre_cate", 
                  "crp_cate", "aptt_cate","hosp_mortality")

# Loop through each variable in "initial_vars" and print its class
for (var in initial_vars) {
  print(paste(var, ": ", class(dev_imp[[var]]), sep=""))
}
## [1] "sex: factor"
## [1] "barthel: factor"
## [1] "pept_ulcer: factor"
## [1] "dm: factor"
## [1] "malignancy: factor"
## [1] "hcc: factor"
## [1] "alcohol: factor"
## [1] "past_rupture: factor"
## [1] "antithro: factor"
## [1] "steroid: factor"
## [1] "beta: factor"
## [1] "shock: factor"
## [1] "age_cate: numeric"
## [1] "bmi_cate: numeric"
## [1] "gcs_cate: numeric"
## [1] "bt_cate: numeric"
## [1] "bil_cate: numeric"
## [1] "ast_cate: numeric"
## [1] "alt_cate: numeric"
## [1] "wbc_cate: numeric"
## [1] "hb_cate: numeric"
## [1] "plt_cate: numeric"
## [1] "alb_cate: numeric"
## [1] "cre_cate: numeric"
## [1] "crp_cate: numeric"
## [1] "aptt_cate: numeric"
## [1] "hosp_mortality: factor"

shockありでの初期変数の決定(臨床的観点から)とstepwiseでの変数選択

set.seed(2023)
initial_vars <- c("sex", "barthel", "pept_ulcer", "dm", "malignancy", "hcc", "alcohol", 
                  "past_rupture", "antithro", "steroid", "beta", "shock", 
                  "age_cate", "bmi_cate", "gcs_cate", "bt_cate", "bil_cate", "ast_cate", "alt_cate", 
                  "wbc_cate", "hb_cate", "plt_cate", "alb_cate", "cre_cate", 
                  "crp_cate", "aptt_cate","hosp_mortality")

# fastbwによる変数選択
fit_full <- lrm(hosp_mortality ~ ., data = dev_imp[initial_vars])
fit_reduced <- fastbw(fit_full)

# 選択された変数の表示
print(fit_reduced)
## 
##  Deleted      Chi-Sq d.f. P      Residual d.f. P      AIC   
##  past_rupture 0.01   1    0.9147  0.01     1   0.9147  -1.99
##  antithro     0.05   1    0.8216  0.06     2   0.9693  -3.94
##  beta         0.07   1    0.7904  0.13     3   0.9876  -5.87
##  hb_cate      0.17   1    0.6837  0.30     4   0.9899  -7.70
##  aptt_cate    0.17   1    0.6819  0.47     5   0.9933  -9.53
##  sex          0.24   1    0.6224  0.71     6   0.9943 -11.29
##  age_cate     0.43   1    0.5112  1.14     7   0.9922 -12.86
##  alt_cate     0.56   1    0.4524  1.71     8   0.9887 -14.29
##  plt_cate     1.46   1    0.2264  3.17     9   0.9572 -14.83
##  alcohol      1.24   1    0.2662  4.41    10   0.9272 -15.59
##  bmi_cate     2.02   1    0.1548  6.43    11   0.8432 -15.57
##  bil_cate     1.32   1    0.2498  7.75    12   0.8041 -16.25
##  hcc          1.49   1    0.2226  9.24    13   0.7545 -16.76
##  bt_cate      2.81   1    0.0935 12.05    14   0.6020 -15.95
##  malignancy   3.68   1    0.0551 15.73    15   0.4000 -14.27
##  pept_ulcer   3.49   1    0.0617 19.22    16   0.2573 -12.78
##  alb_cate     3.66   1    0.0559 22.88    17   0.1532 -11.12
##  ast_cate     3.75   1    0.0529 26.63    18   0.0863  -9.37
##  wbc_cate     2.53   1    0.1114 29.16    19   0.0635  -8.84
##  dm           3.42   1    0.0645 32.58    20   0.0375  -7.42
##  cre_cate     5.29   1    0.0214 37.87    21   0.0133  -4.13
##  steroid      5.30   1    0.0213 43.17    22   0.0045  -0.83
## 
## Approximate Estimates after Deleting Factors
## 
##             Coef   S.E. Wald Z         P
## Intercept -1.846 0.7229 -2.553 1.068e-02
## barthel=1  0.628 0.5623  1.117 2.641e-01
## barthel=2  1.414 0.4816  2.937 3.312e-03
## shock=1    1.657 0.4181  3.964 7.372e-05
## gcs_cate  -1.838 0.5961 -3.084 2.041e-03
## crp_cate   1.135 0.4635  2.449 1.431e-02
## 
## Factors in Final Model
## 
## [1] barthel  shock    gcs_cate crp_cate

Goodness of fitを計算する

# hosp_mortalityを数値に変換
dev_imp$hosp_mortality <- as.numeric(as.character(dev_imp$hosp_mortality))

# 予測変数として選択された変数を使って新たにモデルを作成
fit_reduced_model <- lrm(hosp_mortality ~ barthel + shock + gcs_cate + crp_cate, data = dev_imp)

# 予測確率を計算
dev_imp$fitted <- predict(fit_reduced_model, type="fitted")

# Calibration in large
calibration <- mean(dev_imp$hosp_mortality) - mean(dev_imp$fitted)
fitstat <- data.frame(Name = "Calibration in the large", Value = calibration)

# Brier Score
dev_imp$diff2 <- (dev_imp$fitted - dev_imp$hosp_mortality)^2
brier_score <- mean(dev_imp$diff2)
fitstat <- rbind(fitstat, data.frame(Name = "Brier score", Value = brier_score))

# Pseudo R-square
pseudo_r2 <- fit_reduced_model$stats["R2"]
fitstat <- rbind(fitstat, data.frame(Name = "R2", Value = pseudo_r2))

# AIC
aic <- AIC(fit_reduced_model)
fitstat <- rbind(fitstat, data.frame(Name = "AIC", Value = aic))

# Change the display option
options(scipen = 999)

# Display the final table
fitstat
##                        Name                  Value
## 1  Calibration in the large  -0.000000000000334871
## 2               Brier score   0.080675354129527643
## R2                       R2   0.418586466203857921
## 11                      AIC 298.733845742880930629

ROCを記載

# Load necessary libraries
library(pROC)
library(ggplot2)

# Re-run the model using the selected variables from earlier
fit_reduced_model <- lrm(hosp_mortality ~ barthel + shock + gcs_cate + crp_cate, data = dev_imp)

# Calculate fitted probabilities
dev_imp$fitted <- predict(fit_reduced_model, type="fitted")

# Create ROC object, setting the response variable and predictor
roc_obj <- roc(dev_imp$hosp_mortality ~ dev_imp$fitted, ci=TRUE, direction="auto")
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
# Plot ROC curve
roc_plot <- ggroc(roc_obj)

# Add the theme you used before. For example, if you used theme_minimal:
roc_plot <- roc_plot + theme_minimal()


# Calculate AUC and its confidence interval
auc_roc <- auc(roc_obj)
ci <- ci.auc(roc_obj)

# Print the ROC plot
print(roc_plot)

# Print the AUC and its confidence interval
cat("AUC for the development logistic model: ", auc_roc, "\n")
## AUC for the development logistic model:  0.8780289
cat("95% CI for AUC: (", ci[1], ",", ci[2], ")\n")
## 95% CI for AUC: ( 0.8400065 , 0.8780289 )
# Load necessary libraries
library(pROC)
library(ggplot2)

# Re-run the model using the selected variables from earlier
fit_reduced_model <- lrm(hosp_mortality ~ barthel + shock + gcs_cate + crp_cate, data = dev_imp)

# Calculate fitted probabilities
dev_imp$fitted <- predict(fit_reduced_model, type="fitted")

# Create ROC object, setting the response variable and predictor
roc_obj <- roc(dev_imp$hosp_mortality ~ dev_imp$fitted, ci=TRUE, direction="auto")
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
# Plot ROC curve
plot(roc_obj, main="ROC curve for the development logistic model")

cat("AUC for the development logistic mode: ", auc(roc_obj), "\n")
## AUC for the development logistic mode:  0.8780289

calibration plotの作成

# Create calibration plot
val.prob(dev_imp$fitted, dev_imp$hosp_mortality, g=10, cex=.5)

##                     Dxy                 C (ROC)                      R2 
##   0.7560578715346607526   0.8780289357673304318   0.4185864662038579209 
##                       D                D:Chi-sq                     D:p 
##   0.2591709784207106848 139.9156444335009155111                      NA 
##                       U                U:Chi-sq                     U:p 
##  -0.0037313432835823016  -0.0000000000001136868   1.0000000000000000000 
##                       Q                   Brier               Intercept 
##   0.2629023217042930072   0.0806753541295276433  -0.0000000267358833469 
##                   Slope                    Emax                     E90 
##   0.9999999596919528777   0.0432344638645756896   0.0308623817664151617 
##                    Eavg                     S:z                     S:p 
##   0.0103251173109911668   0.0568988067149717586   0.9546258048783312633

内的検証 ブードストラップ B=1000

# Re-run the model using the selected variables from earlier
fit_reduced_model <- lrm(hosp_mortality ~ barthel + shock + gcs_cate + crp_cate, data = dev_imp, x = TRUE, y = TRUE)

# RMSの関数を使用してブートストラップによる内的検証を実行
cv <- validate(fit_reduced_model, bw = FALSE, B = 1000, method = "boot", seed = 2023)

# Corrected AUCの計算
corrected_AUC <- cv[1, 5] * 0.5 + 0.5

# 結果の表示
cv
##           index.orig training    test optimism index.corrected    n
## Dxy           0.7561   0.7597  0.7461   0.0136          0.7425 1000
## R2            0.4186   0.4306  0.4073   0.0233          0.3953 1000
## Intercept     0.0000   0.0000 -0.0501   0.0501         -0.0501 1000
## Slope         1.0000   1.0000  0.9474   0.0526          0.9474 1000
## Emax          0.0000   0.0000  0.0208   0.0208          0.0208 1000
## D             0.2592   0.2677  0.2512   0.0165          0.2426 1000
## U            -0.0037  -0.0037  0.0015  -0.0053          0.0015 1000
## Q             0.2629   0.2714  0.2496   0.0218          0.2411 1000
## B             0.0807   0.0789  0.0824  -0.0035          0.0841 1000
## g             1.8747   1.9700  1.8391   0.1309          1.7438 1000
## gp            0.1748   0.1756  0.1723   0.0033          0.1715 1000
print("corrected_AUC")
## [1] "corrected_AUC"
print(corrected_AUC)
## [1] 0.8712354
# Load necessary library
library(boot)
## 
##  次のパッケージを付け加えます: 'boot'
##  以下のオブジェクトは 'package:car' からマスクされています:
## 
##     logit
# Define a function to calculate the metrics
calc_metrics <- function(data, indices) {
  data_boot <- data[indices, ]
  fit_model <- lrm(hosp_mortality ~ barthel + shock + gcs_cate + crp_cate, data = data_boot, x = TRUE, y = TRUE)
  
  predicted <- predict(fit_model, type="fitted")
  
  calibration <- mean(data_boot$hosp_mortality) - mean(predicted)
  brier_score <- mean((predicted - data_boot$hosp_mortality)^2)
  pseudo_r2 <- fit_model$stats["R2"]
  aic <- AIC(fit_model)
  
  return(c(calibration, brier_score, pseudo_r2, aic))
}

# Bootstrap resampling
set.seed(2023)
results <- boot(data = dev_imp, statistic = calc_metrics, R = 1000)

# Display the results
print(results)
## 
## ORDINARY NONPARAMETRIC BOOTSTRAP
## 
## 
## Call:
## boot(data = dev_imp, statistic = calc_metrics, R = 1000)
## 
## 
## Bootstrap Statistics :
##                   original             bias         std. error
## t1*  -0.000000000000334871 -0.000000009407901  0.0000001446726
## t2*   0.080675354129527643 -0.001629802519338  0.0081454825462
## t3*   0.418586466203857921  0.011453923386884  0.0522775350164
## t4* 298.733845742880930629 -6.332406365650058 26.4061495701321

検証データに対するmissforestによる単一代入を含めた外的検証の準備

###準備

# vari_cat.f (defined as above) in the data_original is changed to factor type and set into data_factor_for_imp
data_factor_for_imp_val <- as.data.frame(lapply(df_val[vari_cat.f],as.factor))
#check all the variable is factor
str(data_factor_for_imp_val)
## 'data.frame':    444 obs. of  32 variables:
##  $ sex           : Factor w/ 2 levels "M","F": 1 2 1 1 1 2 1 2 2 1 ...
##  $ barthel       : Factor w/ 3 levels "0","1","2": NA 3 2 1 3 1 2 1 1 3 ...
##  $ child_score   : Factor w/ 3 levels "0","1","2": 3 2 2 2 2 1 2 2 2 3 ...
##  $ pad           : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ stroke        : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ dimentia      : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ ch_lung       : Factor w/ 2 levels "0","1": 1 2 1 1 1 1 1 1 1 1 ...
##  $ rheumati      : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ pept_ulcer    : Factor w/ 2 levels "0","1": 1 1 1 1 1 2 1 1 1 1 ...
##  $ dm            : Factor w/ 2 levels "0","1": 1 1 2 1 2 1 1 2 2 2 ...
##  $ dm_compli     : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ paralysis     : Factor w/ 1 level "0": 1 1 1 1 1 1 1 1 1 1 ...
##  $ malignancy    : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ meta_tumor    : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ aids          : Factor w/ 1 level "0": 1 1 1 1 1 1 1 1 1 1 ...
##  $ eGFR30        : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ hd            : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ hcc           : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ alcohol       : Factor w/ 2 levels "0","1": 2 1 1 1 1 1 1 1 2 1 ...
##  $ past_rupture  : Factor w/ 2 levels "0","1": 1 1 1 1 1 2 1 1 1 2 ...
##  $ antiplate     : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ anticoag      : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ antithro      : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ nsaids        : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ steroid       : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 2 ...
##  $ beta          : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ vaso          : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ ffp           : Factor w/ 2 levels "0","1": 1 2 1 1 1 1 1 1 1 2 ...
##  $ pc            : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ albner        : Factor w/ 2 levels "0","1": 1 2 1 1 2 1 1 1 1 1 ...
##  $ shock         : Factor w/ 2 levels "0","1": 1 NA 1 1 1 1 1 1 1 1 ...
##  $ hosp_mortality: Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
#summary numeric type
data_numeric_for_imp_val <- as.data.frame(lapply(df_val[vari_numeric],as.numeric))
# check all the variable is numeric
str(data_numeric_for_imp_val)
## 'data.frame':    444 obs. of  26 variables:
##  $ age      : num  47 62 82 57 69 47 50 68 75 53 ...
##  $ bmi      : num  21 20.8 26.9 23.2 23.6 ...
##  $ smoke    : num  270 0 0 0 0 135 NA 0 0 0 ...
##  $ child_num: num  11 9 8 8 8 6 7 7 8 14 ...
##  $ gcs      : num  15 13 15 15 15 15 15 15 15 3 ...
##  $ cci_num  : num  4 4 4 3 6 5 4 4 5 5 ...
##  $ map      : num  0 8 6 2 4 0 2 4 4 4 ...
##  $ bt       : num  36.4 NA 36.9 37.1 37.2 36.1 36.5 37 37 36 ...
##  $ sBP      : num  102 NA 95 111 125 116 102 96 97 71 ...
##  $ dBP      : num  67 NA 49 68 70 84 88 55 54 49 ...
##  $ hr       : num  83 NA 60 101 121 56 79 72 75 56 ...
##  $ bil      : num  7.5 0.5 0.91 2.35 2.49 0.76 1.84 2.18 1.13 3.88 ...
##  $ ast      : num  112 56 37 42 74 18 19 24 31 62 ...
##  $ alt      : num  53 30 19 35 43 13 18 16 25 44 ...
##  $ wbc      : num  9800 14600 3350 12030 6800 ...
##  $ hb       : num  12.2 3.7 7.7 9.8 11.2 10.7 6.6 12.9 10.4 7.7 ...
##  $ plt      : num  69 437 109 107 107 60 140 84 152 178 ...
##  $ tp       : num  5.7 5.6 5.8 5.4 5.9 NA 5.9 6.1 7 6.8 ...
##  $ alb      : num  2.6 2.1 2.8 3.4 3 3.7 3.4 3.7 3.3 2.7 ...
##  $ eGFR     : num  89.3 65.8 46.6 94.2 90.7 ...
##  $ bun      : num  13.7 42.8 24.2 26.9 7.9 7.6 35 24.9 35.3 23.9 ...
##  $ cre      : num  0.74 0.69 1.16 0.67 0.66 0.54 0.97 0.59 0.66 1.43 ...
##  $ crp      : num  0.1 0.17 0.22 0.04 0.111 ...
##  $ pt       : num  28.5 51.3 60.4 54.8 54 80 70 68 78 69 ...
##  $ aptt     : num  37.8 30.9 30.2 33.4 34.5 NA 29.1 30.2 33.1 30.2 ...
##  $ los      : num  6 2 14 5 12 4 1 6 9 16 ...
# combine the factor type and numeric type
data_for_imp_val <- cbind(data_factor_for_imp_val, data_numeric_for_imp_val) 
#check all the variable type
str(data_for_imp_val)
## 'data.frame':    444 obs. of  58 variables:
##  $ sex           : Factor w/ 2 levels "M","F": 1 2 1 1 1 2 1 2 2 1 ...
##  $ barthel       : Factor w/ 3 levels "0","1","2": NA 3 2 1 3 1 2 1 1 3 ...
##  $ child_score   : Factor w/ 3 levels "0","1","2": 3 2 2 2 2 1 2 2 2 3 ...
##  $ pad           : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ stroke        : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ dimentia      : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ ch_lung       : Factor w/ 2 levels "0","1": 1 2 1 1 1 1 1 1 1 1 ...
##  $ rheumati      : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ pept_ulcer    : Factor w/ 2 levels "0","1": 1 1 1 1 1 2 1 1 1 1 ...
##  $ dm            : Factor w/ 2 levels "0","1": 1 1 2 1 2 1 1 2 2 2 ...
##  $ dm_compli     : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ paralysis     : Factor w/ 1 level "0": 1 1 1 1 1 1 1 1 1 1 ...
##  $ malignancy    : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ meta_tumor    : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ aids          : Factor w/ 1 level "0": 1 1 1 1 1 1 1 1 1 1 ...
##  $ eGFR30        : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ hd            : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ hcc           : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ alcohol       : Factor w/ 2 levels "0","1": 2 1 1 1 1 1 1 1 2 1 ...
##  $ past_rupture  : Factor w/ 2 levels "0","1": 1 1 1 1 1 2 1 1 1 2 ...
##  $ antiplate     : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ anticoag      : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ antithro      : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ nsaids        : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ steroid       : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 2 ...
##  $ beta          : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ vaso          : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ ffp           : Factor w/ 2 levels "0","1": 1 2 1 1 1 1 1 1 1 2 ...
##  $ pc            : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ albner        : Factor w/ 2 levels "0","1": 1 2 1 1 2 1 1 1 1 1 ...
##  $ shock         : Factor w/ 2 levels "0","1": 1 NA 1 1 1 1 1 1 1 1 ...
##  $ hosp_mortality: Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ age           : num  47 62 82 57 69 47 50 68 75 53 ...
##  $ bmi           : num  21 20.8 26.9 23.2 23.6 ...
##  $ smoke         : num  270 0 0 0 0 135 NA 0 0 0 ...
##  $ child_num     : num  11 9 8 8 8 6 7 7 8 14 ...
##  $ gcs           : num  15 13 15 15 15 15 15 15 15 3 ...
##  $ cci_num       : num  4 4 4 3 6 5 4 4 5 5 ...
##  $ map           : num  0 8 6 2 4 0 2 4 4 4 ...
##  $ bt            : num  36.4 NA 36.9 37.1 37.2 36.1 36.5 37 37 36 ...
##  $ sBP           : num  102 NA 95 111 125 116 102 96 97 71 ...
##  $ dBP           : num  67 NA 49 68 70 84 88 55 54 49 ...
##  $ hr            : num  83 NA 60 101 121 56 79 72 75 56 ...
##  $ bil           : num  7.5 0.5 0.91 2.35 2.49 0.76 1.84 2.18 1.13 3.88 ...
##  $ ast           : num  112 56 37 42 74 18 19 24 31 62 ...
##  $ alt           : num  53 30 19 35 43 13 18 16 25 44 ...
##  $ wbc           : num  9800 14600 3350 12030 6800 ...
##  $ hb            : num  12.2 3.7 7.7 9.8 11.2 10.7 6.6 12.9 10.4 7.7 ...
##  $ plt           : num  69 437 109 107 107 60 140 84 152 178 ...
##  $ tp            : num  5.7 5.6 5.8 5.4 5.9 NA 5.9 6.1 7 6.8 ...
##  $ alb           : num  2.6 2.1 2.8 3.4 3 3.7 3.4 3.7 3.3 2.7 ...
##  $ eGFR          : num  89.3 65.8 46.6 94.2 90.7 ...
##  $ bun           : num  13.7 42.8 24.2 26.9 7.9 7.6 35 24.9 35.3 23.9 ...
##  $ cre           : num  0.74 0.69 1.16 0.67 0.66 0.54 0.97 0.59 0.66 1.43 ...
##  $ crp           : num  0.1 0.17 0.22 0.04 0.111 ...
##  $ pt            : num  28.5 51.3 60.4 54.8 54 80 70 68 78 69 ...
##  $ aptt          : num  37.8 30.9 30.2 33.4 34.5 NA 29.1 30.2 33.1 30.2 ...
##  $ los           : num  6 2 14 5 12 4 1 6 9 16 ...

###実行

cores <- detectCores(logical = FALSE) ###並列化処理
registerDoParallel(cores = cores) ###並列化処理
set.seed(2023)
md.pattern(data_for_imp_val) #see patern the missing

##     sex pad stroke dimentia ch_lung rheumati pept_ulcer dm dm_compli paralysis
## 270   1   1      1        1       1        1          1  1         1         1
## 13    1   1      1        1       1        1          1  1         1         1
## 9     1   1      1        1       1        1          1  1         1         1
## 1     1   1      1        1       1        1          1  1         1         1
## 14    1   1      1        1       1        1          1  1         1         1
## 1     1   1      1        1       1        1          1  1         1         1
## 1     1   1      1        1       1        1          1  1         1         1
## 20    1   1      1        1       1        1          1  1         1         1
## 1     1   1      1        1       1        1          1  1         1         1
## 7     1   1      1        1       1        1          1  1         1         1
## 1     1   1      1        1       1        1          1  1         1         1
## 7     1   1      1        1       1        1          1  1         1         1
## 8     1   1      1        1       1        1          1  1         1         1
## 1     1   1      1        1       1        1          1  1         1         1
## 13    1   1      1        1       1        1          1  1         1         1
## 1     1   1      1        1       1        1          1  1         1         1
## 12    1   1      1        1       1        1          1  1         1         1
## 1     1   1      1        1       1        1          1  1         1         1
## 17    1   1      1        1       1        1          1  1         1         1
## 2     1   1      1        1       1        1          1  1         1         1
## 1     1   1      1        1       1        1          1  1         1         1
## 1     1   1      1        1       1        1          1  1         1         1
## 1     1   1      1        1       1        1          1  1         1         1
## 1     1   1      1        1       1        1          1  1         1         1
## 8     1   1      1        1       1        1          1  1         1         1
## 2     1   1      1        1       1        1          1  1         1         1
## 1     1   1      1        1       1        1          1  1         1         1
## 1     1   1      1        1       1        1          1  1         1         1
## 1     1   1      1        1       1        1          1  1         1         1
## 5     1   1      1        1       1        1          1  1         1         1
## 1     1   1      1        1       1        1          1  1         1         1
## 2     1   1      1        1       1        1          1  1         1         1
## 1     1   1      1        1       1        1          1  1         1         1
## 2     1   1      1        1       1        1          1  1         1         1
## 1     1   1      1        1       1        1          1  1         1         1
## 1     1   1      1        1       1        1          1  1         1         1
## 1     1   1      1        1       1        1          1  1         1         1
## 6     1   1      1        1       1        1          1  1         1         1
## 1     1   1      1        1       1        1          1  1         1         1
## 2     1   1      1        1       1        1          1  1         1         1
## 1     1   1      1        1       1        1          1  1         1         1
## 1     1   1      1        1       1        1          1  1         1         1
## 1     1   1      1        1       1        1          1  1         1         1
## 1     1   1      1        1       1        1          1  1         1         1
##       0   0      0        0       0        0          0  0         0         0
##     malignancy meta_tumor aids hd hcc alcohol past_rupture antiplate anticoag
## 270          1          1    1  1   1       1            1         1        1
## 13           1          1    1  1   1       1            1         1        1
## 9            1          1    1  1   1       1            1         1        1
## 1            1          1    1  1   1       1            1         1        1
## 14           1          1    1  1   1       1            1         1        1
## 1            1          1    1  1   1       1            1         1        1
## 1            1          1    1  1   1       1            1         1        1
## 20           1          1    1  1   1       1            1         1        1
## 1            1          1    1  1   1       1            1         1        1
## 7            1          1    1  1   1       1            1         1        1
## 1            1          1    1  1   1       1            1         1        1
## 7            1          1    1  1   1       1            1         1        1
## 8            1          1    1  1   1       1            1         1        1
## 1            1          1    1  1   1       1            1         1        1
## 13           1          1    1  1   1       1            1         1        1
## 1            1          1    1  1   1       1            1         1        1
## 12           1          1    1  1   1       1            1         1        1
## 1            1          1    1  1   1       1            1         1        1
## 17           1          1    1  1   1       1            1         1        1
## 2            1          1    1  1   1       1            1         1        1
## 1            1          1    1  1   1       1            1         1        1
## 1            1          1    1  1   1       1            1         1        1
## 1            1          1    1  1   1       1            1         1        1
## 1            1          1    1  1   1       1            1         1        1
## 8            1          1    1  1   1       1            1         1        1
## 2            1          1    1  1   1       1            1         1        1
## 1            1          1    1  1   1       1            1         1        1
## 1            1          1    1  1   1       1            1         1        1
## 1            1          1    1  1   1       1            1         1        1
## 5            1          1    1  1   1       1            1         1        1
## 1            1          1    1  1   1       1            1         1        1
## 2            1          1    1  1   1       1            1         1        1
## 1            1          1    1  1   1       1            1         1        1
## 2            1          1    1  1   1       1            1         1        1
## 1            1          1    1  1   1       1            1         1        1
## 1            1          1    1  1   1       1            1         1        1
## 1            1          1    1  1   1       1            1         1        1
## 6            1          1    1  1   1       1            1         1        1
## 1            1          1    1  1   1       1            1         1        1
## 2            1          1    1  1   1       1            1         1        1
## 1            1          1    1  1   1       1            1         1        1
## 1            1          1    1  1   1       1            1         1        1
## 1            1          1    1  1   1       1            1         1        1
## 1            1          1    1  1   1       1            1         1        1
##              0          0    0  0   0       0            0         0        0
##     antithro nsaids steroid beta vaso ffp pc albner hosp_mortality age gcs
## 270        1      1       1    1    1   1  1      1              1   1   1
## 13         1      1       1    1    1   1  1      1              1   1   1
## 9          1      1       1    1    1   1  1      1              1   1   1
## 1          1      1       1    1    1   1  1      1              1   1   1
## 14         1      1       1    1    1   1  1      1              1   1   1
## 1          1      1       1    1    1   1  1      1              1   1   1
## 1          1      1       1    1    1   1  1      1              1   1   1
## 20         1      1       1    1    1   1  1      1              1   1   1
## 1          1      1       1    1    1   1  1      1              1   1   1
## 7          1      1       1    1    1   1  1      1              1   1   1
## 1          1      1       1    1    1   1  1      1              1   1   1
## 7          1      1       1    1    1   1  1      1              1   1   1
## 8          1      1       1    1    1   1  1      1              1   1   1
## 1          1      1       1    1    1   1  1      1              1   1   1
## 13         1      1       1    1    1   1  1      1              1   1   1
## 1          1      1       1    1    1   1  1      1              1   1   1
## 12         1      1       1    1    1   1  1      1              1   1   1
## 1          1      1       1    1    1   1  1      1              1   1   1
## 17         1      1       1    1    1   1  1      1              1   1   1
## 2          1      1       1    1    1   1  1      1              1   1   1
## 1          1      1       1    1    1   1  1      1              1   1   1
## 1          1      1       1    1    1   1  1      1              1   1   1
## 1          1      1       1    1    1   1  1      1              1   1   1
## 1          1      1       1    1    1   1  1      1              1   1   1
## 8          1      1       1    1    1   1  1      1              1   1   1
## 2          1      1       1    1    1   1  1      1              1   1   1
## 1          1      1       1    1    1   1  1      1              1   1   1
## 1          1      1       1    1    1   1  1      1              1   1   1
## 1          1      1       1    1    1   1  1      1              1   1   1
## 5          1      1       1    1    1   1  1      1              1   1   1
## 1          1      1       1    1    1   1  1      1              1   1   1
## 2          1      1       1    1    1   1  1      1              1   1   1
## 1          1      1       1    1    1   1  1      1              1   1   1
## 2          1      1       1    1    1   1  1      1              1   1   1
## 1          1      1       1    1    1   1  1      1              1   1   1
## 1          1      1       1    1    1   1  1      1              1   1   1
## 1          1      1       1    1    1   1  1      1              1   1   1
## 6          1      1       1    1    1   1  1      1              1   1   1
## 1          1      1       1    1    1   1  1      1              1   1   1
## 2          1      1       1    1    1   1  1      1              1   1   1
## 1          1      1       1    1    1   1  1      1              1   1   1
## 1          1      1       1    1    1   1  1      1              1   1   1
## 1          1      1       1    1    1   1  1      1              1   1   1
## 1          1      1       1    1    1   1  1      1              1   1   1
##            0      0       0    0    0   0  0      0              0   0   0
##     cci_num map los sBP dBP shock hr bt eGFR30 ast alt wbc hb plt eGFR bun bil
## 270       1   1   1   1   1     1  1  1      1   1   1   1  1   1    1   1   1
## 13        1   1   1   1   1     1  1  1      1   1   1   1  1   1    1   1   1
## 9         1   1   1   1   1     1  1  1      1   1   1   1  1   1    1   1   1
## 1         1   1   1   1   1     1  1  1      1   1   1   1  1   1    1   1   1
## 14        1   1   1   1   1     1  1  1      1   1   1   1  1   1    1   1   1
## 1         1   1   1   1   1     1  1  1      1   1   1   1  1   1    1   1   1
## 1         1   1   1   1   1     1  1  1      1   1   1   1  1   1    1   1   1
## 20        1   1   1   1   1     1  1  1      1   1   1   1  1   1    1   1   1
## 1         1   1   1   1   1     1  1  1      1   1   1   1  1   1    1   1   1
## 7         1   1   1   1   1     1  1  1      1   1   1   1  1   1    1   1   1
## 1         1   1   1   1   1     1  1  1      1   1   1   1  1   1    1   1   1
## 7         1   1   1   1   1     1  1  1      1   1   1   1  1   1    1   1   1
## 8         1   1   1   1   1     1  1  1      1   1   1   1  1   1    1   1   1
## 1         1   1   1   1   1     1  1  1      1   1   1   1  1   1    1   1   1
## 13        1   1   1   1   1     1  1  1      1   1   1   1  1   1    1   1   1
## 1         1   1   1   1   1     1  1  1      1   1   1   1  1   1    1   1   1
## 12        1   1   1   1   1     1  1  1      1   1   1   1  1   1    1   1   1
## 1         1   1   1   1   1     1  1  1      1   1   1   1  1   1    1   1   1
## 17        1   1   1   1   1     1  1  1      1   1   1   1  1   1    1   1   1
## 2         1   1   1   1   1     1  1  1      1   1   1   1  1   1    1   1   1
## 1         1   1   1   1   1     1  1  1      1   1   1   1  1   1    1   1   1
## 1         1   1   1   1   1     1  1  1      1   1   1   1  1   1    1   1   1
## 1         1   1   1   1   1     1  1  1      1   1   1   1  1   1    1   1   1
## 1         1   1   1   1   1     1  1  1      1   1   1   1  1   1    1   1   1
## 8         1   1   1   1   1     1  1  1      1   1   1   1  1   1    1   1   1
## 2         1   1   1   1   1     1  1  1      1   1   1   1  1   1    1   1   1
## 1         1   1   1   1   1     1  1  1      1   1   1   1  1   1    1   1   1
## 1         1   1   1   1   1     1  1  1      1   1   1   1  1   1    1   1   1
## 1         1   1   1   1   1     1  1  1      1   1   1   1  1   1    1   1   1
## 5         1   1   1   1   1     1  1  1      1   1   1   1  1   1    1   1   1
## 1         1   1   1   1   1     1  1  1      1   1   1   1  1   1    1   1   1
## 2         1   1   1   1   1     1  1  1      1   1   1   1  1   1    1   1   1
## 1         1   1   1   1   1     1  1  1      1   1   1   1  1   1    1   1   1
## 2         1   1   1   1   1     1  1  1      1   1   1   1  1   1    1   1   1
## 1         1   1   1   1   1     1  1  1      1   1   1   1  1   1    1   1   1
## 1         1   1   1   1   1     1  1  1      1   1   1   1  1   1    1   1   0
## 1         1   1   1   1   1     1  1  1      1   1   1   1  1   1    1   1   0
## 6         1   1   1   1   1     1  1  1      0   0   0   0  0   0    0   0   0
## 1         1   1   1   1   1     1  1  1      0   0   0   0  0   0    0   0   0
## 2         1   1   1   1   1     1  1  0      1   1   1   1  1   1    1   1   1
## 1         1   1   1   1   1     0  0  1      1   1   1   1  1   1    1   1   1
## 1         1   1   1   1   1     0  0  0      1   1   1   1  1   1    1   1   1
## 1         1   1   1   1   1     0  0  0      1   1   1   1  1   1    1   1   1
## 1         1   1   1   0   0     0  0  0      1   1   1   1  1   1    1   1   1
##           0   0   0   1   1     4  4  5      7   7   7   7  7   7    7   7   9
##     cre alb crp pt barthel bmi tp child_score smoke child_num aptt    
## 270   1   1   1  1       1   1  1           1     1         1    1   0
## 13    1   1   1  1       1   1  1           1     1         1    0   1
## 9     1   1   1  1       1   1  1           1     1         0    1   1
## 1     1   1   1  1       1   1  1           1     1         0    0   2
## 14    1   1   1  1       1   1  1           1     0         1    1   1
## 1     1   1   1  1       1   1  1           1     0         1    0   2
## 1     1   1   1  1       1   1  1           1     0         0    1   2
## 20    1   1   1  1       1   1  1           0     1         0    1   2
## 1     1   1   1  1       1   1  1           0     1         0    0   3
## 7     1   1   1  1       1   1  1           0     0         0    1   3
## 1     1   1   1  1       1   1  1           0     0         0    0   4
## 7     1   1   1  1       1   1  0           1     1         1    1   1
## 8     1   1   1  1       1   1  0           1     1         1    0   2
## 1     1   1   1  1       1   1  0           1     1         0    1   2
## 13    1   1   1  1       1   0  1           1     1         1    1   1
## 1     1   1   1  1       1   0  1           1     1         1    0   2
## 12    1   1   1  1       1   0  1           1     0         1    1   2
## 1     1   1   1  1       1   0  0           1     1         1    0   3
## 17    1   1   1  1       0   1  1           1     1         1    1   1
## 2     1   1   1  1       0   1  1           1     1         1    0   2
## 1     1   1   1  1       0   1  1           1     0         1    1   2
## 1     1   1   1  1       0   1  1           1     0         0    1   3
## 1     1   1   1  1       0   1  0           1     1         1    1   2
## 1     1   1   1  1       0   0  1           1     1         1    1   2
## 8     1   1   1  0       1   1  1           1     1         1    0   2
## 2     1   1   1  0       1   1  1           0     1         0    0   4
## 1     1   1   1  0       1   1  1           0     0         0    0   5
## 1     1   1   1  0       1   1  0           1     1         1    0   3
## 1     1   1   1  0       1   1  0           1     0         1    0   4
## 5     1   1   0  1       1   1  1           1     1         1    1   1
## 1     1   0   1  1       1   1  1           0     1         0    1   3
## 2     1   0   1  1       1   1  0           1     1         1    1   2
## 1     1   0   0  1       1   1  0           1     1         1    1   3
## 2     0   1   1  1       1   1  1           1     1         1    1   1
## 1     0   1   0  1       1   1  1           0     1         0    1   4
## 1     1   1   0  1       1   1  1           1     0         1    1   3
## 1     1   0   1  1       1   1  0           1     1         1    1   3
## 6     0   0   0  0       1   1  0           1     1         1    0  15
## 1     0   0   0  0       1   1  0           0     1         0    0  17
## 2     1   1   1  1       1   0  1           1     1         1    1   2
## 1     1   1   1  1       0   1  1           1     1         1    1   3
## 1     1   1   1  1       1   1  1           1     0         1    1   4
## 1     1   1   1  1       1   1  0           1     1         1    1   4
## 1     1   1   1  1       1   1  1           1     1         1    1   5
##      10  12  15 20      24  30 32          35    42        48   49 397
imp.mf_val <- missForest(data_for_imp_val,
                     maxiter = 10,
                     ntree = 100,
                     mtry = floor(sqrt(ncol(data_for_imp_val))),
                     parallelize = "variables",
                     verbose = TRUE)
##   parallelizing over the variables of the input data matrix 'xmis'
##   missForest iteration 1 in progress...done!
##     estimated error(s): 0.4496393 0.01998259 
##     difference(s): 0.0001284863 0.001548423 
##     time: 0.853 seconds
## 
##   missForest iteration 2 in progress...done!
##     estimated error(s): 0.4480937 0.01951802 
##     difference(s): 0.0001457033 0.0004926802 
##     time: 0.857 seconds
## 
##   missForest iteration 3 in progress...done!
##     estimated error(s): 0.4520882 0.01976926 
##     difference(s): 0.0000430826 0.0005630631 
##     time: 0.871 seconds
## 
##   missForest iteration 4 in progress...done!
##     estimated error(s): 0.4460195 0.01977595 
##     difference(s): 0.00001554856 0.0006334459 
##     time: 0.858 seconds
## 
##   missForest iteration 5 in progress...done!
##     estimated error(s): 0.443625 0.0182599 
##     difference(s): 0.00007279451 0.0005630631 
##     time: 0.856 seconds
## 
##   missForest iteration 6 in progress...done!
##     estimated error(s): 0.4465679 0.01905619 
##     difference(s): 0.00001796719 0.0005630631 
##     time: 0.864 seconds
## 
##   missForest iteration 7 in progress...done!
##     estimated error(s): 0.4505507 0.02074618 
##     difference(s): 0.00003858134 0.0005630631 
##     time: 0.868 seconds
summary(imp.mf_val$ximp)
##  sex     barthel child_score pad     stroke  dimentia ch_lung rheumati
##  M:329   0:178   0: 84       0:443   0:433   0:438    0:440   0:444   
##  F:115   1:138   1:239       1:  1   1: 11   1:  6    1:  4   1:  0   
##          2:128   2:121                                                
##                                                                       
##                                                                       
##                                                                       
##  pept_ulcer dm      dm_compli paralysis malignancy meta_tumor aids    eGFR30 
##  0:402      0:340   0:437     0:444     0:387      0:433      0:444   0:408  
##  1: 42      1:104   1:  7               1: 57      1: 11              1: 36  
##                                                                              
##                                                                              
##                                                                              
##                                                                              
##  hd      hcc     alcohol past_rupture antiplate anticoag antithro nsaids 
##  0:434   0:380   0:203   0:323        0:441     0:441    0:438    0:440  
##  1: 10   1: 64   1:241   1:121        1:  3     1:  3    1:  6    1:  4  
##                                                                          
##                                                                          
##                                                                          
##                                                                          
##  steroid beta    vaso    ffp     pc      albner  shock   hosp_mortality
##  0:441   0:404   0:417   0:290   0:433   0:411   0:248   0:399         
##  1:  3   1: 40   1: 27   1:154   1: 11   1: 33   1:196   1: 45         
##                                                                        
##                                                                        
##                                                                        
##                                                                        
##       age             bmi             smoke        child_num     
##  Min.   :24.00   Min.   : 15.28   Min.   :   0   Min.   : 5.000  
##  1st Qu.:50.00   1st Qu.: 20.76   1st Qu.:   0   1st Qu.: 7.000  
##  Median :60.00   Median : 23.17   Median :   0   Median : 8.000  
##  Mean   :60.42   Mean   : 24.37   Mean   : 229   Mean   : 8.419  
##  3rd Qu.:70.00   3rd Qu.: 26.01   3rd Qu.: 320   3rd Qu.:10.000  
##  Max.   :93.00   Max.   :339.76   Max.   :2100   Max.   :14.000  
##       gcs           cci_num            map               bt       
##  Min.   : 3.00   Min.   : 3.000   Min.   : 0.000   Min.   :33.90  
##  1st Qu.:15.00   1st Qu.: 4.000   1st Qu.: 0.000   1st Qu.:36.50  
##  Median :15.00   Median : 4.000   Median : 4.000   Median :36.80  
##  Mean   :14.61   Mean   : 4.505   Mean   : 3.214   Mean   :36.81  
##  3rd Qu.:15.00   3rd Qu.: 5.000   3rd Qu.: 4.000   3rd Qu.:37.10  
##  Max.   :15.00   Max.   :13.000   Max.   :20.000   Max.   :40.20  
##       sBP              dBP              hr              bil        
##  Min.   : 50.00   Min.   :22.00   Min.   : 41.00   Min.   : 0.200  
##  1st Qu.: 80.00   1st Qu.:46.00   1st Qu.: 73.00   1st Qu.: 1.000  
##  Median : 91.00   Median :54.00   Median : 85.00   Median : 1.672  
##  Mean   : 89.95   Mean   :54.83   Mean   : 88.45   Mean   : 2.417  
##  3rd Qu.:100.00   3rd Qu.:63.00   3rd Qu.:101.00   3rd Qu.: 2.993  
##  Max.   :149.00   Max.   :92.00   Max.   :160.00   Max.   :16.180  
##       ast               alt              wbc              hb        
##  Min.   :  13.00   Min.   :  7.00   Min.   : 2100   Min.   : 2.100  
##  1st Qu.:  32.00   1st Qu.: 19.00   1st Qu.: 5800   1st Qu.: 7.200  
##  Median :  50.00   Median : 29.00   Median : 8215   Median : 8.800  
##  Mean   :  82.01   Mean   : 39.96   Mean   : 9048   Mean   : 8.953  
##  3rd Qu.:  88.25   3rd Qu.: 44.00   3rd Qu.:11070   3rd Qu.:10.500  
##  Max.   :1122.00   Max.   :462.00   Max.   :58400   Max.   :16.500  
##       plt                tp             alb             eGFR        
##  Min.   :  16.00   Min.   :3.000   Min.   :1.400   Min.   :  5.057  
##  1st Qu.:  75.75   1st Qu.:5.600   1st Qu.:2.500   1st Qu.: 49.568  
##  Median : 102.50   Median :6.100   Median :2.900   Median : 67.731  
##  Mean   : 117.53   Mean   :6.149   Mean   :2.912   Mean   : 70.803  
##  3rd Qu.: 140.25   3rd Qu.:6.700   3rd Qu.:3.400   3rd Qu.: 90.404  
##  Max.   :1073.00   Max.   :9.000   Max.   :4.900   Max.   :186.229  
##       bun              cre             crp                pt        
##  Min.   :  3.50   Min.   :0.300   Min.   : 0.0000   Min.   :  9.99  
##  1st Qu.: 16.50   1st Qu.:0.650   1st Qu.: 0.1195   1st Qu.: 46.20  
##  Median : 24.90   Median :0.825   Median : 0.2875   Median : 59.70  
##  Mean   : 28.51   Mean   :1.046   Mean   : 0.8577   Mean   : 59.15  
##  3rd Qu.: 36.40   3rd Qu.:1.110   3rd Qu.: 0.7485   3rd Qu.: 72.00  
##  Max.   :107.80   Max.   :9.080   Max.   :18.9370   Max.   :100.20  
##       aptt             los        
##  Min.   : 18.90   Min.   :  0.00  
##  1st Qu.: 29.88   1st Qu.:  5.00  
##  Median : 32.40   Median :  7.00  
##  Mean   : 35.85   Mean   : 10.67  
##  3rd Qu.: 37.20   3rd Qu.: 13.00  
##  Max.   :200.01   Max.   :217.00
md.pattern(imp.mf_val$ximp)
##  /\     /\
## {  `---'  }
## {  O   O  }
## ==>  V <==  No need for mice. This data set is completely observed.
##  \  \|/  /
##   `-----'

##     sex barthel child_score pad stroke dimentia ch_lung rheumati pept_ulcer dm
## 444   1       1           1   1      1        1       1        1          1  1
##       0       0           0   0      0        0       0        0          0  0
##     dm_compli paralysis malignancy meta_tumor aids eGFR30 hd hcc alcohol
## 444         1         1          1          1    1      1  1   1       1
##             0         0          0          0    0      0  0   0       0
##     past_rupture antiplate anticoag antithro nsaids steroid beta vaso ffp pc
## 444            1         1        1        1      1       1    1    1   1  1
##                0         0        0        0      0       0    0    0   0  0
##     albner shock hosp_mortality age bmi smoke child_num gcs cci_num map bt sBP
## 444      1     1              1   1   1     1         1   1       1   1  1   1
##          0     0              0   0   0     0         0   0       0   0  0   0
##     dBP hr bil ast alt wbc hb plt tp alb eGFR bun cre crp pt aptt los  
## 444   1  1   1   1   1   1  1   1  1   1    1   1   1   1  1    1   1 0
##       0  0   0   0   0   0  0   0  0   0    0   0   0   0  0    0   0 0
#if exclude ID is necessary add the ID
data_imp_val <- imp.mf_val$ximp
data_imp_val$pt_id <- df_val$pt_id
data_imp_val$hosp_num<- df_val$hosp_num
data_imp_val$hosp_id<- df_val$hosp_id
data_imp_val$year<- df_val$year
#check
str(data_imp_val)
## 'data.frame':    444 obs. of  62 variables:
##  $ sex           : Factor w/ 2 levels "M","F": 1 2 1 1 1 2 1 2 2 1 ...
##  $ barthel       : Factor w/ 3 levels "0","1","2": 2 3 2 1 3 1 2 1 1 3 ...
##  $ child_score   : Factor w/ 3 levels "0","1","2": 3 2 2 2 2 1 2 2 2 3 ...
##  $ pad           : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ stroke        : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ dimentia      : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ ch_lung       : Factor w/ 2 levels "0","1": 1 2 1 1 1 1 1 1 1 1 ...
##  $ rheumati      : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ pept_ulcer    : Factor w/ 2 levels "0","1": 1 1 1 1 1 2 1 1 1 1 ...
##  $ dm            : Factor w/ 2 levels "0","1": 1 1 2 1 2 1 1 2 2 2 ...
##  $ dm_compli     : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ paralysis     : Factor w/ 1 level "0": 1 1 1 1 1 1 1 1 1 1 ...
##  $ malignancy    : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ meta_tumor    : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ aids          : Factor w/ 1 level "0": 1 1 1 1 1 1 1 1 1 1 ...
##  $ eGFR30        : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ hd            : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ hcc           : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ alcohol       : Factor w/ 2 levels "0","1": 2 1 1 1 1 1 1 1 2 1 ...
##  $ past_rupture  : Factor w/ 2 levels "0","1": 1 1 1 1 1 2 1 1 1 2 ...
##  $ antiplate     : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ anticoag      : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ antithro      : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ nsaids        : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ steroid       : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 2 ...
##  $ beta          : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ vaso          : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ ffp           : Factor w/ 2 levels "0","1": 1 2 1 1 1 1 1 1 1 2 ...
##  $ pc            : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ albner        : Factor w/ 2 levels "0","1": 1 2 1 1 2 1 1 1 1 1 ...
##  $ shock         : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ hosp_mortality: Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ age           : num  47 62 82 57 69 47 50 68 75 53 ...
##  $ bmi           : num  21 20.8 26.9 23.2 23.6 ...
##  $ smoke         : num  270 0 0 0 0 ...
##  $ child_num     : num  11 9 8 8 8 6 7 7 8 14 ...
##  $ gcs           : num  15 13 15 15 15 15 15 15 15 3 ...
##  $ cci_num       : num  4 4 4 3 6 5 4 4 5 5 ...
##  $ map           : num  0 8 6 2 4 0 2 4 4 4 ...
##  $ bt            : num  36.4 36.8 36.9 37.1 37.2 ...
##  $ sBP           : num  102 88.9 95 111 125 ...
##  $ dBP           : num  67 51.5 49 68 70 ...
##  $ hr            : num  83 78.4 60 101 121 ...
##  $ bil           : num  7.5 0.5 0.91 2.35 2.49 0.76 1.84 2.18 1.13 3.88 ...
##  $ ast           : num  112 56 37 42 74 18 19 24 31 62 ...
##  $ alt           : num  53 30 19 35 43 13 18 16 25 44 ...
##  $ wbc           : num  9800 14600 3350 12030 6800 ...
##  $ hb            : num  12.2 3.7 7.7 9.8 11.2 10.7 6.6 12.9 10.4 7.7 ...
##  $ plt           : num  69 437 109 107 107 60 140 84 152 178 ...
##  $ tp            : num  5.7 5.6 5.8 5.4 5.9 ...
##  $ alb           : num  2.6 2.1 2.8 3.4 3 3.7 3.4 3.7 3.3 2.7 ...
##  $ eGFR          : num  89.3 65.8 46.6 94.2 90.7 ...
##  $ bun           : num  13.7 42.8 24.2 26.9 7.9 7.6 35 24.9 35.3 23.9 ...
##  $ cre           : num  0.74 0.69 1.16 0.67 0.66 0.54 0.97 0.59 0.66 1.43 ...
##  $ crp           : num  0.1 0.17 0.22 0.04 0.111 ...
##  $ pt            : num  28.5 51.3 60.4 54.8 54 80 70 68 78 69 ...
##  $ aptt          : num  37.8 30.9 30.2 33.4 34.5 ...
##  $ los           : num  6 2 14 5 12 4 1 6 9 16 ...
##  $ pt_id         : int  6 17 24 32 33 37 38 40 42 44 ...
##  $ hosp_num      : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ hosp_id       : int  1001 1001 1002 1002 1003 1003 1003 1003 1003 1003 ...
##  $ year          : int  2017 2019 2022 2022 2022 2017 2022 2020 2021 2022 ...
#write the csv
#write.csv(imp.mf$ximp, file = "data_after_imputation.csv")

データの確認

str(data_imp_val)
## 'data.frame':    444 obs. of  62 variables:
##  $ sex           : Factor w/ 2 levels "M","F": 1 2 1 1 1 2 1 2 2 1 ...
##  $ barthel       : Factor w/ 3 levels "0","1","2": 2 3 2 1 3 1 2 1 1 3 ...
##  $ child_score   : Factor w/ 3 levels "0","1","2": 3 2 2 2 2 1 2 2 2 3 ...
##  $ pad           : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ stroke        : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ dimentia      : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ ch_lung       : Factor w/ 2 levels "0","1": 1 2 1 1 1 1 1 1 1 1 ...
##  $ rheumati      : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ pept_ulcer    : Factor w/ 2 levels "0","1": 1 1 1 1 1 2 1 1 1 1 ...
##  $ dm            : Factor w/ 2 levels "0","1": 1 1 2 1 2 1 1 2 2 2 ...
##  $ dm_compli     : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ paralysis     : Factor w/ 1 level "0": 1 1 1 1 1 1 1 1 1 1 ...
##  $ malignancy    : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ meta_tumor    : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ aids          : Factor w/ 1 level "0": 1 1 1 1 1 1 1 1 1 1 ...
##  $ eGFR30        : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ hd            : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ hcc           : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ alcohol       : Factor w/ 2 levels "0","1": 2 1 1 1 1 1 1 1 2 1 ...
##  $ past_rupture  : Factor w/ 2 levels "0","1": 1 1 1 1 1 2 1 1 1 2 ...
##  $ antiplate     : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ anticoag      : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ antithro      : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ nsaids        : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ steroid       : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 2 ...
##  $ beta          : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ vaso          : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ ffp           : Factor w/ 2 levels "0","1": 1 2 1 1 1 1 1 1 1 2 ...
##  $ pc            : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ albner        : Factor w/ 2 levels "0","1": 1 2 1 1 2 1 1 1 1 1 ...
##  $ shock         : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ hosp_mortality: Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ age           : num  47 62 82 57 69 47 50 68 75 53 ...
##  $ bmi           : num  21 20.8 26.9 23.2 23.6 ...
##  $ smoke         : num  270 0 0 0 0 ...
##  $ child_num     : num  11 9 8 8 8 6 7 7 8 14 ...
##  $ gcs           : num  15 13 15 15 15 15 15 15 15 3 ...
##  $ cci_num       : num  4 4 4 3 6 5 4 4 5 5 ...
##  $ map           : num  0 8 6 2 4 0 2 4 4 4 ...
##  $ bt            : num  36.4 36.8 36.9 37.1 37.2 ...
##  $ sBP           : num  102 88.9 95 111 125 ...
##  $ dBP           : num  67 51.5 49 68 70 ...
##  $ hr            : num  83 78.4 60 101 121 ...
##  $ bil           : num  7.5 0.5 0.91 2.35 2.49 0.76 1.84 2.18 1.13 3.88 ...
##  $ ast           : num  112 56 37 42 74 18 19 24 31 62 ...
##  $ alt           : num  53 30 19 35 43 13 18 16 25 44 ...
##  $ wbc           : num  9800 14600 3350 12030 6800 ...
##  $ hb            : num  12.2 3.7 7.7 9.8 11.2 10.7 6.6 12.9 10.4 7.7 ...
##  $ plt           : num  69 437 109 107 107 60 140 84 152 178 ...
##  $ tp            : num  5.7 5.6 5.8 5.4 5.9 ...
##  $ alb           : num  2.6 2.1 2.8 3.4 3 3.7 3.4 3.7 3.3 2.7 ...
##  $ eGFR          : num  89.3 65.8 46.6 94.2 90.7 ...
##  $ bun           : num  13.7 42.8 24.2 26.9 7.9 7.6 35 24.9 35.3 23.9 ...
##  $ cre           : num  0.74 0.69 1.16 0.67 0.66 0.54 0.97 0.59 0.66 1.43 ...
##  $ crp           : num  0.1 0.17 0.22 0.04 0.111 ...
##  $ pt            : num  28.5 51.3 60.4 54.8 54 80 70 68 78 69 ...
##  $ aptt          : num  37.8 30.9 30.2 33.4 34.5 ...
##  $ los           : num  6 2 14 5 12 4 1 6 9 16 ...
##  $ pt_id         : int  6 17 24 32 33 37 38 40 42 44 ...
##  $ hosp_num      : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ hosp_id       : int  1001 1001 1002 1002 1003 1003 1003 1003 1003 1003 ...
##  $ year          : int  2017 2019 2022 2022 2022 2017 2022 2020 2021 2022 ...

classの変換

val_imp <-
  data_imp_val|>  
  mutate(
        hosp_id=as.integer(hosp_id),
        pt_id=as.integer(pt_id),
        hosp_num=as.integer(hosp_num),
        year=as.integer(year),
        age=as.integer(age),
        sex= factor(sex, levels = c("M", "F")),
        smoke= as.integer(smoke),
        barthel= factor(barthel, levels = c("0", "1", "2")),
        child_num= as.integer(round(data_imp_val$child_num)),
        child_score=factor(child_score, levels = c("0", "1", "2")),
        gcs=as.integer(gcs),
        cci_num=as.integer(cci_num),
        pad=factor(pad),
        stroke=factor(stroke),
        dimentia=factor(dimentia),
        ch_lung=factor(ch_lung),
        rheumati=factor(rheumati),
        pept_ulcer=factor(pept_ulcer),
        dm=factor(dm),
        dm_compli=factor(dm_compli),
        paralysis=factor(paralysis),
        malignancy=factor(malignancy),
        meta_tumor=factor(meta_tumor),
        aids=factor(aids),
        eGFR30=factor(eGFR30),
        hd=factor(hd),
        hcc=factor(hcc),
        alcohol=factor(alcohol),
        past_rupture=factor(past_rupture),
        antiplate=factor(antiplate),
        anticoag=factor(anticoag),
        antithro=factor(antithro),
        nsaids=factor(nsaids),
        steroid=factor(steroid),
        beta=factor(beta),
        vaso=factor(vaso),
        map= as.integer(map),
        ffp=factor(ffp),
        pc=factor(pc),
        albner=factor(albner),
        sBP= as.integer(sBP),
        dBP= as.integer(dBP),
        hr=as.integer(hr),
        shock=factor(shock),
        los=as.integer(los),
          )

連続量をカテゴリー化

#新規カテゴリ列を作成
val_imp$age_cate <- ifelse(val_imp$age >= 60, 1, 0)
val_imp$bmi_cate <- ifelse(val_imp$bmi >= 25, 1, 0) # bmiは25以上:1, 25未満:0
val_imp$gcs_cate <- ifelse(val_imp$gcs >= 12, 1, 0)
val_imp$cci_cate <- ifelse(val_imp$cci_num >= 6, 1, 0)
val_imp$bt_cate <- ifelse(val_imp$bt >= 37, 1, 0)
val_imp$sBP_cate <- ifelse(val_imp$sBP < 80, 1, 0)
val_imp$dBP_cate <- ifelse(val_imp$dBP < 50, 1, 0)
val_imp$hr_cate <- ifelse(val_imp$hr >= 100, 1, 0)
val_imp$bil_cate <- ifelse(val_imp$bil >= 5, 1, 0)
val_imp$ast_cate <- ifelse(val_imp$ast >= 200, 1, 0)
val_imp$alt_cate <- ifelse(val_imp$alt >= 100, 1, 0)
val_imp$wbc_cate <- ifelse(val_imp$wbc >= 12000, 1, 0)
val_imp$hb_cate <- ifelse(val_imp$hb < 8, 1, 0)
val_imp$plt_cate <- ifelse(val_imp$plt < 100, 1, 0) # pltは100未満:1、100以上:0
val_imp$tp_cate <- ifelse(val_imp$tp < 6, 1, 0)
val_imp$alb_cate <- ifelse(val_imp$alb < 2.8, 1, 0)
val_imp$cre_cate <- ifelse(val_imp$cre >= 1.5, 1, 0)
val_imp$crp_cate <- ifelse(val_imp$crp >= 2, 1, 0)
val_imp$pt_cate <- ifelse(val_imp$pt < 50, 1, 0)
val_imp$aptt_cate <- ifelse(val_imp$aptt >= 50, 1, 0) # apttは50以上:1, 50未満:0
str(val_imp)
## 'data.frame':    444 obs. of  82 variables:
##  $ sex           : Factor w/ 2 levels "M","F": 1 2 1 1 1 2 1 2 2 1 ...
##  $ barthel       : Factor w/ 3 levels "0","1","2": 2 3 2 1 3 1 2 1 1 3 ...
##  $ child_score   : Factor w/ 3 levels "0","1","2": 3 2 2 2 2 1 2 2 2 3 ...
##  $ pad           : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ stroke        : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ dimentia      : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ ch_lung       : Factor w/ 2 levels "0","1": 1 2 1 1 1 1 1 1 1 1 ...
##  $ rheumati      : Factor w/ 1 level "0": 1 1 1 1 1 1 1 1 1 1 ...
##  $ pept_ulcer    : Factor w/ 2 levels "0","1": 1 1 1 1 1 2 1 1 1 1 ...
##  $ dm            : Factor w/ 2 levels "0","1": 1 1 2 1 2 1 1 2 2 2 ...
##  $ dm_compli     : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ paralysis     : Factor w/ 1 level "0": 1 1 1 1 1 1 1 1 1 1 ...
##  $ malignancy    : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ meta_tumor    : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ aids          : Factor w/ 1 level "0": 1 1 1 1 1 1 1 1 1 1 ...
##  $ eGFR30        : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ hd            : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ hcc           : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ alcohol       : Factor w/ 2 levels "0","1": 2 1 1 1 1 1 1 1 2 1 ...
##  $ past_rupture  : Factor w/ 2 levels "0","1": 1 1 1 1 1 2 1 1 1 2 ...
##  $ antiplate     : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ anticoag      : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ antithro      : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ nsaids        : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ steroid       : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 2 ...
##  $ beta          : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ vaso          : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ ffp           : Factor w/ 2 levels "0","1": 1 2 1 1 1 1 1 1 1 2 ...
##  $ pc            : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ albner        : Factor w/ 2 levels "0","1": 1 2 1 1 2 1 1 1 1 1 ...
##  $ shock         : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ hosp_mortality: Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ age           : int  47 62 82 57 69 47 50 68 75 53 ...
##  $ bmi           : num  21 20.8 26.9 23.2 23.6 ...
##  $ smoke         : int  270 0 0 0 0 135 428 0 0 0 ...
##  $ child_num     : int  11 9 8 8 8 6 7 7 8 14 ...
##  $ gcs           : int  15 13 15 15 15 15 15 15 15 3 ...
##  $ cci_num       : int  4 4 4 3 6 5 4 4 5 5 ...
##  $ map           : int  0 8 6 2 4 0 2 4 4 4 ...
##  $ bt            : num  36.4 36.8 36.9 37.1 37.2 ...
##  $ sBP           : int  102 88 95 111 125 116 102 96 97 71 ...
##  $ dBP           : int  67 51 49 68 70 84 88 55 54 49 ...
##  $ hr            : int  83 78 60 101 121 56 79 72 75 56 ...
##  $ bil           : num  7.5 0.5 0.91 2.35 2.49 0.76 1.84 2.18 1.13 3.88 ...
##  $ ast           : num  112 56 37 42 74 18 19 24 31 62 ...
##  $ alt           : num  53 30 19 35 43 13 18 16 25 44 ...
##  $ wbc           : num  9800 14600 3350 12030 6800 ...
##  $ hb            : num  12.2 3.7 7.7 9.8 11.2 10.7 6.6 12.9 10.4 7.7 ...
##  $ plt           : num  69 437 109 107 107 60 140 84 152 178 ...
##  $ tp            : num  5.7 5.6 5.8 5.4 5.9 ...
##  $ alb           : num  2.6 2.1 2.8 3.4 3 3.7 3.4 3.7 3.3 2.7 ...
##  $ eGFR          : num  89.3 65.8 46.6 94.2 90.7 ...
##  $ bun           : num  13.7 42.8 24.2 26.9 7.9 7.6 35 24.9 35.3 23.9 ...
##  $ cre           : num  0.74 0.69 1.16 0.67 0.66 0.54 0.97 0.59 0.66 1.43 ...
##  $ crp           : num  0.1 0.17 0.22 0.04 0.111 ...
##  $ pt            : num  28.5 51.3 60.4 54.8 54 80 70 68 78 69 ...
##  $ aptt          : num  37.8 30.9 30.2 33.4 34.5 ...
##  $ los           : int  6 2 14 5 12 4 1 6 9 16 ...
##  $ pt_id         : int  6 17 24 32 33 37 38 40 42 44 ...
##  $ hosp_num      : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ hosp_id       : int  1001 1001 1002 1002 1003 1003 1003 1003 1003 1003 ...
##  $ year          : int  2017 2019 2022 2022 2022 2017 2022 2020 2021 2022 ...
##  $ age_cate      : num  0 1 1 0 1 0 0 1 1 0 ...
##  $ bmi_cate      : num  0 0 1 0 0 1 1 1 0 1 ...
##  $ gcs_cate      : num  1 1 1 1 1 1 1 1 1 0 ...
##  $ cci_cate      : num  0 0 0 0 1 0 0 0 0 0 ...
##  $ bt_cate       : num  0 0 0 1 1 0 0 1 1 0 ...
##  $ sBP_cate      : num  0 0 0 0 0 0 0 0 0 1 ...
##  $ dBP_cate      : num  0 0 1 0 0 0 0 0 0 1 ...
##  $ hr_cate       : num  0 0 0 1 1 0 0 0 0 0 ...
##  $ bil_cate      : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ ast_cate      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ alt_cate      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ wbc_cate      : num  0 1 0 1 0 0 0 0 0 0 ...
##  $ hb_cate       : num  0 1 1 0 0 0 1 0 0 1 ...
##  $ plt_cate      : num  1 0 0 0 0 1 0 1 0 0 ...
##  $ tp_cate       : num  1 1 1 1 1 0 1 0 0 0 ...
##  $ alb_cate      : num  1 1 0 0 0 0 0 0 0 1 ...
##  $ cre_cate      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ crp_cate      : num  0 0 0 0 0 0 0 0 0 1 ...
##  $ pt_cate       : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ aptt_cate     : num  0 0 0 0 0 0 0 0 0 0 ...

tableの作成

Createtableone

col_fact_cate=c("sex","barthel","child_score","pad","stroke","dimentia","ch_lung","rheumati","pept_ulcer","dm","dm_compli","paralysis","malignancy","meta_tumor","aids","eGFR30","hd","hcc","alcohol","past_rupture","antiplate","anticoag","antithro","nsaids","steroid","beta", "vaso","ffp","pc", "albner","shock","hosp_mortality","age_cate","bmi_cate","gcs_cate","cci_cate","bt_cate","sBP_cate","dBP_cate","hr_cate","bil_cate","ast_cate","alt_cate","wbc_cate","hb_cate","plt_cate","tp_cate","alb_cate","cre_cate","crp_cate","pt_cate","aptt_cate")

# Create your table
val_imp %>%  
  select(c(col_fact_cate)) %>% 
  CreateTableOne(vars = c(col_fact_cate), strata="hosp_mortality",factorVars = col_fact_cate, addOverall = T) -> tableone_dev_imp_cate


# Print your table
print(tableone_dev_imp_cate, smd = TRUE, missing = TRUE, test = TRUE, explain = TRUE) 
##                         Stratified by hosp_mortality
##                          Overall      0            1           p      test
##   n                      444          399          45                     
##   sex = F (%)            115 ( 25.9)  103 ( 25.8)  12 ( 26.7)   1.000     
##   barthel (%)                                                  <0.001     
##      0                   178 ( 40.1)  174 ( 43.6)   4 (  8.9)             
##      1                   138 ( 31.1)  127 ( 31.8)  11 ( 24.4)             
##      2                   128 ( 28.8)   98 ( 24.6)  30 ( 66.7)             
##   child_score (%)                                              <0.001     
##      0                    84 ( 18.9)   82 ( 20.6)   2 (  4.4)             
##      1                   239 ( 53.8)  226 ( 56.6)  13 ( 28.9)             
##      2                   121 ( 27.3)   91 ( 22.8)  30 ( 66.7)             
##   pad = 1 (%)              1 (  0.2)    1 (  0.3)   0 (  0.0)   1.000     
##   stroke = 1 (%)          11 (  2.5)    8 (  2.0)   3 (  6.7)   0.161     
##   dimentia = 1 (%)         6 (  1.4)    5 (  1.3)   1 (  2.2)   1.000     
##   ch_lung = 1 (%)          4 (  0.9)    3 (  0.8)   1 (  2.2)   0.875     
##   rheumati = 0 (%)       444 (100.0)  399 (100.0)  45 (100.0)      NA     
##   pept_ulcer = 1 (%)      42 (  9.5)   40 ( 10.0)   2 (  4.4)   0.345     
##   dm = 1 (%)             104 ( 23.4)   96 ( 24.1)   8 ( 17.8)   0.449     
##   dm_compli = 1 (%)        7 (  1.6)    6 (  1.5)   1 (  2.2)   1.000     
##   paralysis = 0 (%)      444 (100.0)  399 (100.0)  45 (100.0)      NA     
##   malignancy = 1 (%)      57 ( 12.8)   50 ( 12.5)   7 ( 15.6)   0.734     
##   meta_tumor = 1 (%)      11 (  2.5)    7 (  1.8)   4 (  8.9)   0.016     
##   aids = 0 (%)           444 (100.0)  399 (100.0)  45 (100.0)      NA     
##   eGFR30 = 1 (%)          36 (  8.1)   22 (  5.5)  14 ( 31.1)  <0.001     
##   hd = 1 (%)              10 (  2.3)    7 (  1.8)   3 (  6.7)   0.115     
##   hcc = 1 (%)             64 ( 14.4)   56 ( 14.0)   8 ( 17.8)   0.650     
##   alcohol = 1 (%)        241 ( 54.3)  212 ( 53.1)  29 ( 64.4)   0.198     
##   past_rupture = 1 (%)   121 ( 27.3)  115 ( 28.8)   6 ( 13.3)   0.042     
##   antiplate = 1 (%)        3 (  0.7)    2 (  0.5)   1 (  2.2)   0.707     
##   anticoag = 1 (%)         3 (  0.7)    3 (  0.8)   0 (  0.0)   1.000     
##   antithro = 1 (%)         6 (  1.4)    5 (  1.3)   1 (  2.2)   1.000     
##   nsaids = 1 (%)           4 (  0.9)    4 (  1.0)   0 (  0.0)   1.000     
##   steroid = 1 (%)          3 (  0.7)    3 (  0.8)   0 (  0.0)   1.000     
##   beta = 1 (%)            40 (  9.0)   40 ( 10.0)   0 (  0.0)   0.051     
##   vaso = 1 (%)            27 (  6.1)   18 (  4.5)   9 ( 20.0)  <0.001     
##   ffp = 1 (%)            154 ( 34.7)  126 ( 31.6)  28 ( 62.2)  <0.001     
##   pc = 1 (%)              11 (  2.5)    6 (  1.5)   5 ( 11.1)   0.001     
##   albner = 1 (%)          33 (  7.4)   26 (  6.5)   7 ( 15.6)   0.059     
##   shock = 1 (%)          196 ( 44.1)  153 ( 38.3)  43 ( 95.6)  <0.001     
##   hosp_mortality = 1 (%)  45 ( 10.1)    0 (  0.0)  45 (100.0)  <0.001     
##   age_cate = 1 (%)       226 ( 50.9)  200 ( 50.1)  26 ( 57.8)   0.414     
##   bmi_cate = 1 (%)       139 ( 31.3)  126 ( 31.6)  13 ( 28.9)   0.842     
##   gcs_cate = 1 (%)       434 ( 97.7)  391 ( 98.0)  43 ( 95.6)   0.606     
##   cci_cate = 1 (%)        71 ( 16.0)   62 ( 15.5)   9 ( 20.0)   0.576     
##   bt_cate = 1 (%)        150 ( 33.8)  135 ( 33.8)  15 ( 33.3)   1.000     
##   sBP_cate = 1 (%)       102 ( 23.0)   68 ( 17.0)  34 ( 75.6)  <0.001     
##   dBP_cate = 1 (%)       152 ( 34.2)  118 ( 29.6)  34 ( 75.6)  <0.001     
##   hr_cate = 1 (%)        127 ( 28.6)  102 ( 25.6)  25 ( 55.6)  <0.001     
##   bil_cate = 1 (%)        50 ( 11.3)   38 (  9.5)  12 ( 26.7)   0.001     
##   ast_cate = 1 (%)        30 (  6.8)   21 (  5.3)   9 ( 20.0)   0.001     
##   alt_cate = 1 (%)        25 (  5.6)   18 (  4.5)   7 ( 15.6)   0.007     
##   wbc_cate = 1 (%)        92 ( 20.7)   75 ( 18.8)  17 ( 37.8)   0.005     
##   hb_cate = 1 (%)        164 ( 36.9)  138 ( 34.6)  26 ( 57.8)   0.004     
##   plt_cate = 1 (%)       208 ( 46.8)  187 ( 46.9)  21 ( 46.7)   1.000     
##   tp_cate = 1 (%)        186 ( 41.9)  156 ( 39.1)  30 ( 66.7)   0.001     
##   alb_cate = 1 (%)       177 ( 39.9)  141 ( 35.3)  36 ( 80.0)  <0.001     
##   cre_cate = 1 (%)        51 ( 11.5)   33 (  8.3)  18 ( 40.0)  <0.001     
##   crp_cate = 1 (%)        47 ( 10.6)   34 (  8.5)  13 ( 28.9)  <0.001     
##   pt_cate = 1 (%)        138 ( 31.1)  107 ( 26.8)  31 ( 68.9)  <0.001     
##   aptt_cate = 1 (%)       27 (  6.1)   11 (  2.8)  16 ( 35.6)  <0.001     
##                         Stratified by hosp_mortality
##                          SMD    Missing
##   n                                    
##   sex = F (%)             0.019 0.0    
##   barthel (%)             1.052 0.0    
##      0                                 
##      1                                 
##      2                                 
##   child_score (%)         1.014 0.0    
##      0                                 
##      1                                 
##      2                                 
##   pad = 1 (%)             0.071 0.0    
##   stroke = 1 (%)          0.230 0.0    
##   dimentia = 1 (%)        0.074 0.0    
##   ch_lung = 1 (%)         0.122 0.0    
##   rheumati = 0 (%)       <0.001 0.0    
##   pept_ulcer = 1 (%)      0.217 0.0    
##   dm = 1 (%)              0.155 0.0    
##   dm_compli = 1 (%)       0.053 0.0    
##   paralysis = 0 (%)      <0.001 0.0    
##   malignancy = 1 (%)      0.087 0.0    
##   meta_tumor = 1 (%)      0.322 0.0    
##   aids = 0 (%)           <0.001 0.0    
##   eGFR30 = 1 (%)          0.701 0.0    
##   hd = 1 (%)              0.246 0.0    
##   hcc = 1 (%)             0.102 0.0    
##   alcohol = 1 (%)         0.231 0.0    
##   past_rupture = 1 (%)    0.387 0.0    
##   antiplate = 1 (%)       0.149 0.0    
##   anticoag = 1 (%)        0.123 0.0    
##   antithro = 1 (%)        0.074 0.0    
##   nsaids = 1 (%)          0.142 0.0    
##   steroid = 1 (%)         0.123 0.0    
##   beta = 1 (%)            0.472 0.0    
##   vaso = 1 (%)            0.486 0.0    
##   ffp = 1 (%)             0.645 0.0    
##   pc = 1 (%)              0.403 0.0    
##   albner = 1 (%)          0.292 0.0    
##   shock = 1 (%)           1.532 0.0    
##   hosp_mortality = 1 (%)    NaN 0.0    
##   age_cate = 1 (%)        0.154 0.0    
##   bmi_cate = 1 (%)        0.059 0.0    
##   gcs_cate = 1 (%)        0.138 0.0    
##   cci_cate = 1 (%)        0.117 0.0    
##   bt_cate = 1 (%)         0.011 0.0    
##   sBP_cate = 1 (%)        1.449 0.0    
##   dBP_cate = 1 (%)        1.037 0.0    
##   hr_cate = 1 (%)         0.641 0.0    
##   bil_cate = 1 (%)        0.457 0.0    
##   ast_cate = 1 (%)        0.455 0.0    
##   alt_cate = 1 (%)        0.374 0.0    
##   wbc_cate = 1 (%)        0.431 0.0    
##   hb_cate = 1 (%)         0.478 0.0    
##   plt_cate = 1 (%)        0.004 0.0    
##   tp_cate = 1 (%)         0.575 0.0    
##   alb_cate = 1 (%)        1.013 0.0    
##   cre_cate = 1 (%)        0.798 0.0    
##   crp_cate = 1 (%)        0.541 0.0    
##   pt_cate = 1 (%)         0.929 0.0    
##   aptt_cate = 1 (%)       0.917 0.0

tbl_summary

# specify your data and variables
tbl_summary(data = val_imp, 
            by = "hosp_mortality",
            type = list(gcs ~ "continuous", year ~ "categorical"),
            statistic = all_continuous() ~ "{median} ({p25}, {p75})",
            digits = all_continuous() ~ c(0, 2))
Characteristic 0, N = 3991 1, N = 451
sex
    M 296 (74%) 33 (73%)
    F 103 (26%) 12 (27%)
barthel
    0 174 (44%) 4 (8.9%)
    1 127 (32%) 11 (24%)
    2 98 (25%) 30 (67%)
child_score
    0 82 (21%) 2 (4.4%)
    1 226 (57%) 13 (29%)
    2 91 (23%) 30 (67%)
pad
    0 398 (100%) 45 (100%)
    1 1 (0.3%) 0 (0%)
stroke
    0 391 (98%) 42 (93%)
    1 8 (2.0%) 3 (6.7%)
dimentia
    0 394 (99%) 44 (98%)
    1 5 (1.3%) 1 (2.2%)
ch_lung
    0 396 (99%) 44 (98%)
    1 3 (0.8%) 1 (2.2%)
rheumati
    0 399 (100%) 45 (100%)
pept_ulcer
    0 359 (90%) 43 (96%)
    1 40 (10%) 2 (4.4%)
dm
    0 303 (76%) 37 (82%)
    1 96 (24%) 8 (18%)
dm_compli
    0 393 (98%) 44 (98%)
    1 6 (1.5%) 1 (2.2%)
paralysis
    0 399 (100%) 45 (100%)
malignancy
    0 349 (87%) 38 (84%)
    1 50 (13%) 7 (16%)
meta_tumor
    0 392 (98%) 41 (91%)
    1 7 (1.8%) 4 (8.9%)
aids
    0 399 (100%) 45 (100%)
eGFR30
    0 377 (94%) 31 (69%)
    1 22 (5.5%) 14 (31%)
hd
    0 392 (98%) 42 (93%)
    1 7 (1.8%) 3 (6.7%)
hcc
    0 343 (86%) 37 (82%)
    1 56 (14%) 8 (18%)
alcohol
    0 187 (47%) 16 (36%)
    1 212 (53%) 29 (64%)
past_rupture
    0 284 (71%) 39 (87%)
    1 115 (29%) 6 (13%)
antiplate
    0 397 (99%) 44 (98%)
    1 2 (0.5%) 1 (2.2%)
anticoag
    0 396 (99%) 45 (100%)
    1 3 (0.8%) 0 (0%)
antithro
    0 394 (99%) 44 (98%)
    1 5 (1.3%) 1 (2.2%)
nsaids
    0 395 (99%) 45 (100%)
    1 4 (1.0%) 0 (0%)
steroid
    0 396 (99%) 45 (100%)
    1 3 (0.8%) 0 (0%)
beta
    0 359 (90%) 45 (100%)
    1 40 (10%) 0 (0%)
vaso
    0 381 (95%) 36 (80%)
    1 18 (4.5%) 9 (20%)
ffp
    0 273 (68%) 17 (38%)
    1 126 (32%) 28 (62%)
pc
    0 393 (98%) 40 (89%)
    1 6 (1.5%) 5 (11%)
albner
    0 373 (93%) 38 (84%)
    1 26 (6.5%) 7 (16%)
shock
    0 246 (62%) 2 (4.4%)
    1 153 (38%) 43 (96%)
age 60 (50.00, 70) 64 (52.00, 70)
bmi 23 (20.68, 26) 23 (22.31, 25)
smoke 0 (0.00, 340) 0 (0.00, 296)
child_num 8 (7.00, 9) 11 (9.00, 12)
gcs 15 (15.00, 15) 15 (14.00, 15)
cci_num 4 (4.00, 5) 4 (4.00, 5)
map 2 (0.00, 4) 4 (4.00, 8)
bt 37 (36.50, 37) 37 (36.30, 37)
sBP 93 (82.50, 101) 68 (56.00, 79)
dBP 56 (48.00, 64) 43 (34.00, 49)
hr 83 (72.00, 100) 105 (91.00, 117)
bil 2 (1.00, 3) 3 (1.70, 5)
ast 48 (31.50, 84) 81 (40.00, 171)
alt 29 (19.00, 42) 34 (19.00, 69)
wbc 7,960 (5,700.00, 10,760) 10,000 (8,900.00, 13,833)
hb 9 (7.30, 11) 8 (6.90, 9)
plt 102 (76.00, 140) 114 (56.00, 146)
tp 6 (5.70, 7) 6 (5.00, 6)
alb 3 (2.55, 3) 2 (1.80, 3)
eGFR 72 (54.21, 93) 42 (28.38, 57)
bun 24 (16.40, 36) 33 (20.20, 47)
cre 1 (0.64, 1) 1 (0.94, 2)
crp 0 (0.12, 1) 1 (0.19, 2)
pt 61 (49.00, 73) 40 (31.00, 53)
aptt 32 (29.70, 36) 42 (35.50, 52)
los 7 (5.00, 13) 6 (2.00, 12)
pt_id 488 (232.50, 640) 510 (299.00, 738)
hosp_num
    1 326 (82%) 39 (87%)
    2 46 (12%) 4 (8.9%)
    3 12 (3.0%) 1 (2.2%)
    4 10 (2.5%) 0 (0%)
    5 3 (0.8%) 1 (2.2%)
    6 2 (0.5%) 0 (0%)
hosp_id 1,022 (1,005.00, 1,024) 1,022 (1,006.00, 1,062)
year
    2017 57 (14%) 7 (16%)
    2018 53 (13%) 9 (20%)
    2019 63 (16%) 9 (20%)
    2020 74 (19%) 8 (18%)
    2021 76 (19%) 7 (16%)
    2022 76 (19%) 5 (11%)
age_cate 200 (50%) 26 (58%)
bmi_cate 126 (32%) 13 (29%)
gcs_cate 391 (98%) 43 (96%)
cci_cate 62 (16%) 9 (20%)
bt_cate 135 (34%) 15 (33%)
sBP_cate 68 (17%) 34 (76%)
dBP_cate 118 (30%) 34 (76%)
hr_cate 102 (26%) 25 (56%)
bil_cate 38 (9.5%) 12 (27%)
ast_cate 21 (5.3%) 9 (20%)
alt_cate 18 (4.5%) 7 (16%)
wbc_cate 75 (19%) 17 (38%)
hb_cate 138 (35%) 26 (58%)
plt_cate 187 (47%) 21 (47%)
tp_cate 156 (39%) 30 (67%)
alb_cate 141 (35%) 36 (80%)
cre_cate 33 (8.3%) 18 (40%)
crp_cate 34 (8.5%) 13 (29%)
pt_cate 107 (27%) 31 (69%)
aptt_cate 11 (2.8%) 16 (36%)
1 n (%); Median (IQR)

baerthelはダミー変数を追加する。

# ダミー変数を作成
dummy_vars_val <- model.matrix(~barthel, data = val_imp)

# データフレームに追加
val_imp <- cbind(val_imp, dummy_vars_val)

intercept列は抜く

val_imp <- val_imp[ , !(names(val_imp) %in% "(Intercept)")]

確認

str(val_imp)
## 'data.frame':    444 obs. of  84 variables:
##  $ sex           : Factor w/ 2 levels "M","F": 1 2 1 1 1 2 1 2 2 1 ...
##  $ barthel       : Factor w/ 3 levels "0","1","2": 2 3 2 1 3 1 2 1 1 3 ...
##  $ child_score   : Factor w/ 3 levels "0","1","2": 3 2 2 2 2 1 2 2 2 3 ...
##  $ pad           : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ stroke        : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ dimentia      : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ ch_lung       : Factor w/ 2 levels "0","1": 1 2 1 1 1 1 1 1 1 1 ...
##  $ rheumati      : Factor w/ 1 level "0": 1 1 1 1 1 1 1 1 1 1 ...
##  $ pept_ulcer    : Factor w/ 2 levels "0","1": 1 1 1 1 1 2 1 1 1 1 ...
##  $ dm            : Factor w/ 2 levels "0","1": 1 1 2 1 2 1 1 2 2 2 ...
##  $ dm_compli     : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ paralysis     : Factor w/ 1 level "0": 1 1 1 1 1 1 1 1 1 1 ...
##  $ malignancy    : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ meta_tumor    : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ aids          : Factor w/ 1 level "0": 1 1 1 1 1 1 1 1 1 1 ...
##  $ eGFR30        : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ hd            : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ hcc           : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ alcohol       : Factor w/ 2 levels "0","1": 2 1 1 1 1 1 1 1 2 1 ...
##  $ past_rupture  : Factor w/ 2 levels "0","1": 1 1 1 1 1 2 1 1 1 2 ...
##  $ antiplate     : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ anticoag      : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ antithro      : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ nsaids        : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ steroid       : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 2 ...
##  $ beta          : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ vaso          : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ ffp           : Factor w/ 2 levels "0","1": 1 2 1 1 1 1 1 1 1 2 ...
##  $ pc            : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ albner        : Factor w/ 2 levels "0","1": 1 2 1 1 2 1 1 1 1 1 ...
##  $ shock         : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ hosp_mortality: Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ age           : int  47 62 82 57 69 47 50 68 75 53 ...
##  $ bmi           : num  21 20.8 26.9 23.2 23.6 ...
##  $ smoke         : int  270 0 0 0 0 135 428 0 0 0 ...
##  $ child_num     : int  11 9 8 8 8 6 7 7 8 14 ...
##  $ gcs           : int  15 13 15 15 15 15 15 15 15 3 ...
##  $ cci_num       : int  4 4 4 3 6 5 4 4 5 5 ...
##  $ map           : int  0 8 6 2 4 0 2 4 4 4 ...
##  $ bt            : num  36.4 36.8 36.9 37.1 37.2 ...
##  $ sBP           : int  102 88 95 111 125 116 102 96 97 71 ...
##  $ dBP           : int  67 51 49 68 70 84 88 55 54 49 ...
##  $ hr            : int  83 78 60 101 121 56 79 72 75 56 ...
##  $ bil           : num  7.5 0.5 0.91 2.35 2.49 0.76 1.84 2.18 1.13 3.88 ...
##  $ ast           : num  112 56 37 42 74 18 19 24 31 62 ...
##  $ alt           : num  53 30 19 35 43 13 18 16 25 44 ...
##  $ wbc           : num  9800 14600 3350 12030 6800 ...
##  $ hb            : num  12.2 3.7 7.7 9.8 11.2 10.7 6.6 12.9 10.4 7.7 ...
##  $ plt           : num  69 437 109 107 107 60 140 84 152 178 ...
##  $ tp            : num  5.7 5.6 5.8 5.4 5.9 ...
##  $ alb           : num  2.6 2.1 2.8 3.4 3 3.7 3.4 3.7 3.3 2.7 ...
##  $ eGFR          : num  89.3 65.8 46.6 94.2 90.7 ...
##  $ bun           : num  13.7 42.8 24.2 26.9 7.9 7.6 35 24.9 35.3 23.9 ...
##  $ cre           : num  0.74 0.69 1.16 0.67 0.66 0.54 0.97 0.59 0.66 1.43 ...
##  $ crp           : num  0.1 0.17 0.22 0.04 0.111 ...
##  $ pt            : num  28.5 51.3 60.4 54.8 54 80 70 68 78 69 ...
##  $ aptt          : num  37.8 30.9 30.2 33.4 34.5 ...
##  $ los           : int  6 2 14 5 12 4 1 6 9 16 ...
##  $ pt_id         : int  6 17 24 32 33 37 38 40 42 44 ...
##  $ hosp_num      : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ hosp_id       : int  1001 1001 1002 1002 1003 1003 1003 1003 1003 1003 ...
##  $ year          : int  2017 2019 2022 2022 2022 2017 2022 2020 2021 2022 ...
##  $ age_cate      : num  0 1 1 0 1 0 0 1 1 0 ...
##  $ bmi_cate      : num  0 0 1 0 0 1 1 1 0 1 ...
##  $ gcs_cate      : num  1 1 1 1 1 1 1 1 1 0 ...
##  $ cci_cate      : num  0 0 0 0 1 0 0 0 0 0 ...
##  $ bt_cate       : num  0 0 0 1 1 0 0 1 1 0 ...
##  $ sBP_cate      : num  0 0 0 0 0 0 0 0 0 1 ...
##  $ dBP_cate      : num  0 0 1 0 0 0 0 0 0 1 ...
##  $ hr_cate       : num  0 0 0 1 1 0 0 0 0 0 ...
##  $ bil_cate      : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ ast_cate      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ alt_cate      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ wbc_cate      : num  0 1 0 1 0 0 0 0 0 0 ...
##  $ hb_cate       : num  0 1 1 0 0 0 1 0 0 1 ...
##  $ plt_cate      : num  1 0 0 0 0 1 0 1 0 0 ...
##  $ tp_cate       : num  1 1 1 1 1 0 1 0 0 0 ...
##  $ alb_cate      : num  1 1 0 0 0 0 0 0 0 1 ...
##  $ cre_cate      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ crp_cate      : num  0 0 0 0 0 0 0 0 0 1 ...
##  $ pt_cate       : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ aptt_cate     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ barthel1      : num  1 0 1 0 0 0 1 0 0 0 ...
##  $ barthel2      : num  0 1 0 0 1 0 0 0 0 1 ...

開発データにもbaerthelはダミー変数を追加する。

# ダミー変数を作成
dummy_vars <- model.matrix(~barthel, data = dev_imp)

# データフレームに追加
dev_imp <- cbind(dev_imp, dummy_vars)

確認

str(dev_imp)
## 'data.frame':    536 obs. of  87 variables:
##  $ sex           : Factor w/ 2 levels "M","F": 1 1 2 1 2 1 2 1 2 1 ...
##  $ barthel       : Factor w/ 3 levels "0","1","2": 3 3 1 2 3 1 1 1 3 3 ...
##  $ child_score   : Factor w/ 3 levels "0","1","2": 3 1 2 2 2 2 2 2 3 3 ...
##  $ pad           : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ stroke        : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ dimentia      : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ ch_lung       : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ rheumati      : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ pept_ulcer    : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ dm            : Factor w/ 2 levels "0","1": 1 2 1 1 1 1 1 1 1 1 ...
##  $ dm_compli     : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ paralysis     : Factor w/ 1 level "0": 1 1 1 1 1 1 1 1 1 1 ...
##  $ malignancy    : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ meta_tumor    : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ aids          : Factor w/ 1 level "0": 1 1 1 1 1 1 1 1 1 1 ...
##  $ eGFR30        : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 2 1 ...
##  $ hd            : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ hcc           : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ alcohol       : Factor w/ 2 levels "0","1": 2 1 1 1 1 1 2 2 1 1 ...
##  $ past_rupture  : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ antiplate     : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ anticoag      : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ antithro      : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ nsaids        : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ steroid       : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ beta          : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ vaso          : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 2 1 ...
##  $ ffp           : Factor w/ 2 levels "0","1": 1 2 1 1 2 2 1 2 2 2 ...
##  $ pc            : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ albner        : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 2 1 ...
##  $ shock         : Factor w/ 2 levels "0","1": 2 1 2 2 2 2 1 2 2 2 ...
##  $ hosp_mortality: num  0 0 0 0 0 0 0 0 1 1 ...
##  $ age           : int  50 80 59 44 67 65 49 73 69 62 ...
##  $ bmi           : num  23.1 25.3 23 14.5 22.5 ...
##  $ smoke         : int  0 0 0 240 0 0 245 1000 0 0 ...
##  $ child_num     : int  11 6 9 8 8 9 7 9 15 11 ...
##  $ gcs           : int  15 15 15 15 15 15 15 15 6 15 ...
##  $ cci_num       : int  4 4 3 4 4 3 4 4 4 4 ...
##  $ map           : int  0 2 0 6 2 2 0 4 14 6 ...
##  $ bt            : num  36.4 36.8 35.9 36 36.6 38.4 37 37 35.5 36.6 ...
##  $ sBP           : int  78 88 100 69 66 84 132 90 52 58 ...
##  $ dBP           : int  48 49 56 44 40 46 69 54 37 37 ...
##  $ hr            : int  118 72 110 104 72 127 83 114 98 106 ...
##  $ bil           : num  2.2 1.2 3.1 3.4 1.2 2.4 1.2 2.2 8.7 2.1 ...
##  $ ast           : num  217 31 60 129 52 90 154 55 96 121 ...
##  $ alt           : num  63 22 40 46 36 19 109 20 87 63 ...
##  $ wbc           : num  7400 5000 7800 9100 3900 8000 7900 11100 12800 8300 ...
##  $ hb            : num  6.9 10.8 9.7 10.7 6.3 9.8 13.5 5.6 6 9.8 ...
##  $ plt           : num  115 77 74 162 63 93 132 124 168 84 ...
##  $ tp            : num  6.3 5.6 6.4 6.1 5.1 7.2 7.6 4.9 5.3 6.3 ...
##  $ alb           : num  2.2 3.2 2.8 2.9 2.8 3.2 4 2.3 1.2 2.5 ...
##  $ eGFR          : num  58 57.7 63.7 112.4 123.6 ...
##  $ bun           : num  13.2 41.5 26.3 2.9 27.4 15.8 15.5 27 63.2 13.8 ...
##  $ cre           : num  1.08 0.96 0.72 0.61 0.38 0.44 0.4 0.97 2.07 0.87 ...
##  $ crp           : num  0.92 0.29 0.68 0.29 0.29 ...
##  $ pt            : num  37.8 55 46.7 37.8 74.6 45.9 49.7 45.9 37.2 54 ...
##  $ aptt          : num  29.4 29 27.2 35.3 30.1 27.6 32.6 27.5 36 29.1 ...
##  $ los           : int  12 7 0 10 3 2 1 8 0 16 ...
##  $ pt_id         : int  1 2 3 4 5 7 8 9 10 11 ...
##  $ hosp_num      : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ hosp_id       : int  1001 1001 1001 1001 1001 1001 1001 1001 1001 1001 ...
##  $ year          : int  2012 2011 2010 2011 2010 2010 2010 2010 2011 2012 ...
##  $ age_cate      : num  0 1 0 0 1 1 0 1 1 1 ...
##  $ bmi_cate      : num  0 1 0 0 0 0 0 0 0 0 ...
##  $ gcs_cate      : num  1 1 1 1 1 1 1 1 0 1 ...
##  $ cci_cate      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ bt_cate       : num  0 0 0 0 0 1 1 1 0 0 ...
##  $ sBP_cate      : num  1 0 0 1 1 0 0 0 1 1 ...
##  $ dBP_cate      : num  1 1 0 1 1 1 0 0 1 1 ...
##  $ hr_cate       : num  1 0 1 1 0 1 0 1 0 1 ...
##  $ bil_cate      : num  0 0 0 0 0 0 0 0 1 0 ...
##  $ ast_cate      : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ alt_cate      : num  0 0 0 0 0 0 1 0 0 0 ...
##  $ wbc_cate      : num  0 0 0 0 0 0 0 0 1 0 ...
##  $ hb_cate       : num  1 0 0 0 1 0 0 1 1 0 ...
##  $ plt_cate      : num  0 1 1 0 1 1 0 0 0 1 ...
##  $ tp_cate       : num  0 1 0 0 1 0 0 1 1 0 ...
##  $ alb_cate      : num  1 0 0 0 0 0 0 1 1 1 ...
##  $ cre_cate      : num  0 0 0 0 0 0 0 0 1 0 ...
##  $ crp_cate      : num  0 0 0 0 0 0 0 0 1 0 ...
##  $ pt_cate       : num  1 0 1 1 0 1 1 1 1 0 ...
##  $ aptt_cate     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ fitted        : num  0.2985 0.0491 0.0808 0.1244 0.2985 ...
##  $ diff2         : num  0.08912 0.00241 0.00653 0.01548 0.08912 ...
##  $ (Intercept)   : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ barthel1      : num  0 0 0 1 0 0 0 0 0 0 ...
##  $ barthel2      : num  1 1 0 0 1 0 0 0 1 1 ...

intercept列は抜く

dev_imp <- dev_imp[ , !(names(dev_imp) %in% "(Intercept)")]
dev_imp <- dev_imp[ , !(names(dev_imp) %in% "fitted")]
dev_imp <- dev_imp[ , !(names(dev_imp) %in% "diff2")]

確認

str(dev_imp)
## 'data.frame':    536 obs. of  84 variables:
##  $ sex           : Factor w/ 2 levels "M","F": 1 1 2 1 2 1 2 1 2 1 ...
##  $ barthel       : Factor w/ 3 levels "0","1","2": 3 3 1 2 3 1 1 1 3 3 ...
##  $ child_score   : Factor w/ 3 levels "0","1","2": 3 1 2 2 2 2 2 2 3 3 ...
##  $ pad           : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ stroke        : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ dimentia      : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ ch_lung       : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ rheumati      : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ pept_ulcer    : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ dm            : Factor w/ 2 levels "0","1": 1 2 1 1 1 1 1 1 1 1 ...
##  $ dm_compli     : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ paralysis     : Factor w/ 1 level "0": 1 1 1 1 1 1 1 1 1 1 ...
##  $ malignancy    : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ meta_tumor    : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ aids          : Factor w/ 1 level "0": 1 1 1 1 1 1 1 1 1 1 ...
##  $ eGFR30        : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 2 1 ...
##  $ hd            : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ hcc           : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ alcohol       : Factor w/ 2 levels "0","1": 2 1 1 1 1 1 2 2 1 1 ...
##  $ past_rupture  : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ antiplate     : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ anticoag      : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ antithro      : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ nsaids        : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ steroid       : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ beta          : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ vaso          : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 2 1 ...
##  $ ffp           : Factor w/ 2 levels "0","1": 1 2 1 1 2 2 1 2 2 2 ...
##  $ pc            : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
##  $ albner        : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 2 1 ...
##  $ shock         : Factor w/ 2 levels "0","1": 2 1 2 2 2 2 1 2 2 2 ...
##  $ hosp_mortality: num  0 0 0 0 0 0 0 0 1 1 ...
##  $ age           : int  50 80 59 44 67 65 49 73 69 62 ...
##  $ bmi           : num  23.1 25.3 23 14.5 22.5 ...
##  $ smoke         : int  0 0 0 240 0 0 245 1000 0 0 ...
##  $ child_num     : int  11 6 9 8 8 9 7 9 15 11 ...
##  $ gcs           : int  15 15 15 15 15 15 15 15 6 15 ...
##  $ cci_num       : int  4 4 3 4 4 3 4 4 4 4 ...
##  $ map           : int  0 2 0 6 2 2 0 4 14 6 ...
##  $ bt            : num  36.4 36.8 35.9 36 36.6 38.4 37 37 35.5 36.6 ...
##  $ sBP           : int  78 88 100 69 66 84 132 90 52 58 ...
##  $ dBP           : int  48 49 56 44 40 46 69 54 37 37 ...
##  $ hr            : int  118 72 110 104 72 127 83 114 98 106 ...
##  $ bil           : num  2.2 1.2 3.1 3.4 1.2 2.4 1.2 2.2 8.7 2.1 ...
##  $ ast           : num  217 31 60 129 52 90 154 55 96 121 ...
##  $ alt           : num  63 22 40 46 36 19 109 20 87 63 ...
##  $ wbc           : num  7400 5000 7800 9100 3900 8000 7900 11100 12800 8300 ...
##  $ hb            : num  6.9 10.8 9.7 10.7 6.3 9.8 13.5 5.6 6 9.8 ...
##  $ plt           : num  115 77 74 162 63 93 132 124 168 84 ...
##  $ tp            : num  6.3 5.6 6.4 6.1 5.1 7.2 7.6 4.9 5.3 6.3 ...
##  $ alb           : num  2.2 3.2 2.8 2.9 2.8 3.2 4 2.3 1.2 2.5 ...
##  $ eGFR          : num  58 57.7 63.7 112.4 123.6 ...
##  $ bun           : num  13.2 41.5 26.3 2.9 27.4 15.8 15.5 27 63.2 13.8 ...
##  $ cre           : num  1.08 0.96 0.72 0.61 0.38 0.44 0.4 0.97 2.07 0.87 ...
##  $ crp           : num  0.92 0.29 0.68 0.29 0.29 ...
##  $ pt            : num  37.8 55 46.7 37.8 74.6 45.9 49.7 45.9 37.2 54 ...
##  $ aptt          : num  29.4 29 27.2 35.3 30.1 27.6 32.6 27.5 36 29.1 ...
##  $ los           : int  12 7 0 10 3 2 1 8 0 16 ...
##  $ pt_id         : int  1 2 3 4 5 7 8 9 10 11 ...
##  $ hosp_num      : int  1 1 1 1 1 1 1 1 1 1 ...
##  $ hosp_id       : int  1001 1001 1001 1001 1001 1001 1001 1001 1001 1001 ...
##  $ year          : int  2012 2011 2010 2011 2010 2010 2010 2010 2011 2012 ...
##  $ age_cate      : num  0 1 0 0 1 1 0 1 1 1 ...
##  $ bmi_cate      : num  0 1 0 0 0 0 0 0 0 0 ...
##  $ gcs_cate      : num  1 1 1 1 1 1 1 1 0 1 ...
##  $ cci_cate      : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ bt_cate       : num  0 0 0 0 0 1 1 1 0 0 ...
##  $ sBP_cate      : num  1 0 0 1 1 0 0 0 1 1 ...
##  $ dBP_cate      : num  1 1 0 1 1 1 0 0 1 1 ...
##  $ hr_cate       : num  1 0 1 1 0 1 0 1 0 1 ...
##  $ bil_cate      : num  0 0 0 0 0 0 0 0 1 0 ...
##  $ ast_cate      : num  1 0 0 0 0 0 0 0 0 0 ...
##  $ alt_cate      : num  0 0 0 0 0 0 1 0 0 0 ...
##  $ wbc_cate      : num  0 0 0 0 0 0 0 0 1 0 ...
##  $ hb_cate       : num  1 0 0 0 1 0 0 1 1 0 ...
##  $ plt_cate      : num  0 1 1 0 1 1 0 0 0 1 ...
##  $ tp_cate       : num  0 1 0 0 1 0 0 1 1 0 ...
##  $ alb_cate      : num  1 0 0 0 0 0 0 1 1 1 ...
##  $ cre_cate      : num  0 0 0 0 0 0 0 0 1 0 ...
##  $ crp_cate      : num  0 0 0 0 0 0 0 0 1 0 ...
##  $ pt_cate       : num  1 0 1 1 0 1 1 1 1 0 ...
##  $ aptt_cate     : num  0 0 0 0 0 0 0 0 0 0 ...
##  $ barthel1      : num  0 0 0 1 0 0 0 0 0 0 ...
##  $ barthel2      : num  1 1 0 0 1 0 0 0 1 1 ...

スコアリングモデル作成

# モデルのサマリーを表示
fit_reduced_model <- lrm(hosp_mortality ~ barthel + shock + gcs_cate + crp_cate, data = dev_imp)
# モデルの係数を表示
coefficients(fit_reduced_model)
##  Intercept  barthel=1  barthel=2    shock=1   gcs_cate   crp_cate 
## -2.2705171  0.4802429  1.5770981  2.1101229 -2.2710242  1.4483711
# datadistを設定
ddist <- datadist(dev_imp)
options(datadist='ddist')

# モデルのサマリーを表示
fit_reduced_model <- lrm(hosp_mortality ~ barthel + shock + gcs_cate + crp_cate, data = dev_imp)
summary(fit_reduced_model)
##              Effects              Response : hosp_mortality 
## 
##  Factor        Low High Diff. Effect   S.E.    Lower 0.95 Upper 0.95
##  gcs_cate      0   1     1    -2.27100 0.48224 -3.216200  -1.32580  
##   Odds Ratio   0   1     1     0.10321      NA  0.040107   0.26558  
##  crp_cate      0   1     1     1.44840 0.42289  0.619520   2.27720  
##   Odds Ratio   0   1     1     4.25620      NA  1.858000   9.74960  
##  barthel - 1:0 1   2    NA     0.48024 0.52127 -0.541430   1.50190  
##   Odds Ratio   1   2    NA     1.61650      NA  0.581910   4.49030  
##  barthel - 2:0 1   3    NA     1.57710 0.45345  0.688360   2.46580  
##   Odds Ratio   1   3    NA     4.84090      NA  1.990400  11.77300  
##  shock - 1:0   1   2    NA     2.11010 0.37827  1.368700   2.85150  
##   Odds Ratio   1   2    NA     8.24930      NA  3.930300  17.31400
# 新たな予測値を計算
#new_pred <- -2.2705171 +
#            0.5 * dev_imp$barthel1 +
#            1.5 * dev_imp$barthel2 +
#            2 * dev_imp$shock +
#            -2.0 * dev_imp$gcs_cate +
#            1.5 * dev_imp$crp_cate

スコアを列に加える。

dev_imp$barthel1_score <- ifelse(dev_imp$barthel1 == 1, 1, 0)
dev_imp$barthel2_score <- ifelse(dev_imp$barthel2 == 1, 3, 0)
dev_imp$shock_score <- ifelse(dev_imp$shock == 1, 4, 0)
dev_imp$gcs_score <- ifelse(dev_imp$gcs_cate == 1, -4, 0)
dev_imp$crp_score <- ifelse(dev_imp$crp_cate == 1, 3, 0)
dev_imp$sum_score <- dev_imp$barthel1_score + dev_imp$barthel2_score + dev_imp$shock_score + dev_imp$gcs_score + dev_imp$crp_score

val_imp$barthel1_score <- ifelse(val_imp$barthel1 == 1, 1, 0)
val_imp$barthel2_score <- ifelse(val_imp$barthel2 == 1, 3, 0)
val_imp$shock_score <- ifelse(val_imp$shock == 1, 4, 0)
val_imp$gcs_score <- ifelse(val_imp$gcs_cate == 1, -4, 0)
val_imp$crp_score <- ifelse(val_imp$crp_cate == 1, 3, 0)
val_imp$sum_score <- val_imp$barthel1_score + val_imp$barthel2_score + val_imp$shock_score + val_imp$gcs_score + val_imp$crp_score

開発データのROC曲線の描図とAUC

# 開発データセットのROCカーブとAUC
roc_obj_dev <- roc(dev_imp$hosp_mortality, dev_imp$sum_score)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
plot(roc_obj_dev, main="ROC curve for the development data")

# Calculate AUC and its confidence interval for development data
auc_roc_dev <- auc(roc_obj_dev)
ci_dev <- ci.auc(roc_obj_dev)

# Print the AUC and its confidence interval for development data
cat("AUC for the development data: ", auc_roc_dev, "\n")
## AUC for the development data:  0.8775851
cat("95% CI for AUC (development): (", ci_dev[1], ",", ci_dev[2], ")\n")
## 95% CI for AUC (development): ( 0.8394229 , 0.8775851 )

検証データのROC曲線の描図とAUC

# 検証データセットのROCカーブとAUC
roc_obj_val <- roc(val_imp$hosp_mortality, val_imp$sum_score)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
plot(roc_obj_val, main="ROC curve for the validation data")

# Calculate AUC and its confidence interval for validation data
auc_roc_val <- auc(roc_obj_val)
ci_val <- ci.auc(roc_obj_val)

# Print the AUC and its confidence interval for validation data
cat("AUC for the validation data: ", auc_roc_val, "\n")
## AUC for the validation data:  0.8677249
cat("95% CI for AUC (validation): (", ci_val[1], ",", ci_val[2], ")\n")
## 95% CI for AUC (validation): ( 0.8189511 , 0.8677249 )

開発データ・検証データを共に描図する

# 開発データと検証データのROCオブジェクトの計算
roc_obj_dev <- roc(dev_imp$hosp_mortality, dev_imp$sum_score)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
roc_obj_val <- roc(val_imp$hosp_mortality, val_imp$sum_score)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
# 開発データと検証データのAUCの計算
auc_dev <- auc(roc_obj_dev)
auc_val <- auc(roc_obj_val)

# ROC dataをデータフレームに変換
roc_data <- data.frame(
  Specificity = c(roc_obj_dev$specificities, roc_obj_val$specificities),
  Sensitivity = c(roc_obj_dev$sensitivities, roc_obj_val$sensitivities),
  ROC = c(rep("Development", length(roc_obj_dev$sensitivities)), rep("Validation", length(roc_obj_val$sensitivities)))
)

# Plot ROC curve
library(ggplot2)
ggplot(data = roc_data, aes(x = Specificity, y = Sensitivity, color = ROC)) +
  geom_line() +
  scale_x_reverse(limits = c(1, 0)) +
  labs(x = "Specificity", y = "Sensitivity", title = "ROC curves") +
  theme_minimal() +
  scale_color_manual(values = c("Development" = "blue", "Validation" = "red")) +
  annotate("text", x = 0.75, y = 0.25, label = paste("AUC for Development = ", round(auc_dev, 2)), color="blue") +
  annotate("text", x = 0.75, y = 0.15, label = paste("AUC for Validation = ", round(auc_val, 2)), color="red")

# dev_impデータセットのsum_scoreのヒストグラム
hist(dev_imp$sum_score, main = "Histogram of total score for development data", xlab = "sum_score")

# val_impデータセットのsum_scoreのヒストグラム
hist(val_imp$sum_score, main = "Histogram of total score for validation data", xlab = "sum_score")

## calibration

スコアリング前のモデルでのcalibration評価

まずはモデル式を定義

#モデルの構築
dev_imp <-
  dev_imp|>  
  mutate(hosp_mortality=as.numeric(hosp_mortality)) #numericにしないと回帰が回らない
val_imp <- val_imp |>
  mutate(hosp_mortality = as.numeric(hosp_mortality) - 1) #valは1,2になってしまっているため対処

model <- glm(hosp_mortality ~ barthel + shock + gcs_cate +crp_cate, family = binomial, data = dev_imp)

# サマリー統計量の取得
summary_stats <- summary(model)

# 係数(coefficient)、標準誤差(SE)、およびp値の取得
coefficients <- summary_stats$coefficients

# 各項目の結果を表示
print(coefficients)
##               Estimate Std. Error    z value        Pr(>|z|)
## (Intercept) -2.2705171  0.6399523 -3.5479473 0.0003882458953
## barthel1     0.4802429  0.5212677  0.9212981 0.3568948301549
## barthel2     1.5770981  0.4534406  3.4780696 0.0005050387733
## shock1       2.1101229  0.3782694  5.5783599 0.0000000242797
## gcs_cate    -2.2710242  0.4822417 -4.7093073 0.0000024856009
## crp_cate     1.4483711  0.4228898  3.4249373 0.0006149413370

開発データ,モデル式でのcalibrationのための割合比較

# 予測確率の計算
pred_dev <- predict(model, data = dev_imp, type = "response")
dev_imp$predicted <- pred_dev

# スコアごとに要約
dev_by <- dplyr::group_by(dev_imp, sum_score)
table_dev <- dplyr::summarize(dev_by,
      hosp_mortality = sum(hosp_mortality), # outcomeは1/0
      n = n(),
      mortality_rate = round(hosp_mortality / n, 3),
      prediction = round(mean(predicted), 3)
    )
table_dev
## # A tibble: 13 × 5
##    sum_score hosp_mortality     n mortality_rate prediction
##        <dbl>          <dbl> <int>          <dbl>      <dbl>
##  1        -4              0   131          0          0.011
##  2        -3              3   102          0.029      0.017
##  3        -1              3    63          0.048      0.048
##  4         0              5    47          0.106      0.08 
##  5         1              4    53          0.075      0.125
##  6         2              1     7          0.143      0.18 
##  7         3             30    95          0.316      0.3  
##  8         4              3     6          0.5        0.391
##  9         5              1     1          1          0.579
## 10         6              7    11          0.636      0.648
## 11         7             12    15          0.8        0.805
## 12         8              1     1          1          0.854
## 13        10              3     4          0.75       0.946

検証データ,モデル式でのcalibrationのための割合比較

# 予測確率の計算
pred_val <- predict(model, newdata = val_imp, type = "response")
val_imp$predicted <- pred_val


# スコアごとに要約
val_by <- dplyr::group_by(val_imp, sum_score)
table_val <- dplyr::summarize(val_by,
      hosp_mortality = sum(hosp_mortality), # outcomeは1/0
      n = n(),
      mortality_rate = round(hosp_mortality / n, 3),
      prediction = round(mean(predicted), 3)
    )
table_val
## # A tibble: 11 × 5
##    sum_score hosp_mortality     n mortality_rate prediction
##        <dbl>          <dbl> <int>          <dbl>      <dbl>
##  1        -4              1   109          0.009      0.011
##  2        -3              0    81          0          0.017
##  3        -1              1    43          0.023      0.049
##  4         0              3    67          0.045      0.08 
##  5         1              5    39          0.128      0.124
##  6         2              0     5          0          0.18 
##  7         3             20    66          0.303      0.298
##  8         4              6    11          0.545      0.377
##  9         5              0     1          0          0.579
## 10         6              7    17          0.412      0.649
## 11         7              2     5          0.4        0.805

開発データ、モデル式でのグラフ化

# 'sum_score' の範囲に基づいて新しいカテゴリ列を作成します
dev_imp <- dev_imp %>%
  mutate(sum_score_group = case_when(
    sum_score >= -4 & sum_score <= -1 ~ "-4 to -1",
    sum_score >= 0 & sum_score <= 2 ~ "0 to 2",
    sum_score >= 3 & sum_score <= 4 ~ "3 to 4",
    sum_score >= 5 & sum_score <= 7 ~ "5 to 7",
    sum_score >= 8 & sum_score <= 10 ~ "8 to 10"
  ))

# 'predicted'列の名前を'prediction'に変更します
names(dev_imp)[names(dev_imp) == "predicted"] <- "prediction"

# グループごとの 'hosp_mortality' の合計と 'n' の合計を計算します
summary_table <- dev_imp %>%
  group_by(sum_score_group) %>%
  summarise(hosp_mortality = sum(hosp_mortality), n = n(), prediction = mean(prediction, na.rm = TRUE)) %>%
  mutate(mortality_rate = hosp_mortality / n)

# pivot_longerを使って長い形式に変換
summary_table_long <- summary_table %>%
  pivot_longer(c(mortality_rate, prediction), names_to = "variable", values_to = "value")

# 新しい形式で棒グラフをプロット
ggplot(summary_table_long, aes(x = sum_score_group, y = value*100, fill = variable)) +
  geom_col(position = "dodge", width = 0.6) +
  labs(y = "Percentage (%)", x = "Sum Score Group", fill = "Variable") +
  scale_fill_manual(values = c("mortality_rate" = "blue", "prediction" = "red")) +
  theme_minimal()

検証データ、モデル式でのグラフ化

# 'sum_score' の範囲に基づいて新しいカテゴリ列を作成します
val_imp <- val_imp %>%
  mutate(sum_score_group = case_when(
    sum_score >= -4 & sum_score <= -1 ~ "-4 to -1",
    sum_score >= 0 & sum_score <= 2 ~ "0 to 2",
    sum_score >= 3 & sum_score <= 4 ~ "3 to 4",
    sum_score >= 5 & sum_score <= 7 ~ "5 to 7",
    sum_score >= 8 & sum_score <= 10 ~ "8 to 10"
  ))

# グループごとの 'hosp_mortality' の合計と 'n' の合計を計算します
summary_table_val <- val_imp %>%
  group_by(sum_score_group) %>%
  summarise(hosp_mortality = sum(hosp_mortality), n = n(), prediction = mean(predicted)) %>%
  mutate(mortality_rate = hosp_mortality / n)

# データを長い形式に変換します
summary_table_long_val <- summary_table_val %>%
  pivot_longer(c(mortality_rate, prediction), names_to = "variable", values_to = "value")

# 棒グラフをプロットします
ggplot(summary_table_long_val, aes(x = sum_score_group, y = value*100, fill = variable)) +
  geom_col(position = "dodge", width = 0.6) +
  labs(y = "Percentage (%)", x = "Sum Score Group", fill = "Variable") +
  scale_fill_manual(values = c("mortality_rate" = "blue", "prediction" = "red")) +
  theme_minimal() +
  theme(plot.title = element_text(hjust = 0.5))

スコアモデル 開発データでのcaliration評価

# 'shock' 列を数値に変換します
dev_imp$shock <- as.numeric(as.character(dev_imp$shock))

# 手動で予測値を計算します
liner_predict_manual <- -2.2705171 + 0.5 * dev_imp$barthel1 +1.5 * dev_imp$barthel2 +2 * dev_imp$shock -2.0 * dev_imp$gcs_cate +1.5 * dev_imp$crp_cate
pred_manual <- exp(liner_predict_manual) / (exp(liner_predict_manual) + 1)

# 新しい予測値でテーブルを作成します
dev_imp$predicted_manual <- pred_manual
dev_by_manual <- dplyr::group_by(dev_imp, sum_score)
table_dev_manual <- dplyr::summarize(dev_by_manual,
      hosp_mortality = sum(hosp_mortality),
      n = n(),
      mortality_rate = round(hosp_mortality / n, 3),
      prediction_manual = round(mean(predicted_manual), 3)
    )
table_dev_manual
## # A tibble: 13 × 5
##    sum_score hosp_mortality     n mortality_rate prediction_manual
##        <dbl>          <dbl> <int>          <dbl>             <dbl>
##  1        -4              0   131          0                 0.014
##  2        -3              3   102          0.029             0.023
##  3        -1              3    63          0.048             0.059
##  4         0              5    47          0.106             0.094
##  5         1              4    53          0.075             0.145
##  6         2              1     7          0.143             0.219
##  7         3             30    95          0.316             0.316
##  8         4              3     6          0.5               0.433
##  9         5              1     1          1                 0.557
## 10         6              7    11          0.636             0.675
## 11         7             12    15          0.8               0.774
## 12         8              1     1          1                 0.849
## 13        10              3     4          0.75              0.939

スコアモデル 開発データでのcaliration グラフ化

# 'sum_score' の範囲に基づいて新しいカテゴリ列を作成します
dev_imp <- dev_imp %>%
  mutate(sum_score_group = case_when(
    sum_score >= -4 & sum_score <= -1 ~ "-4 to -1",
    sum_score >= 0 & sum_score <= 2 ~ "0 to 2",
    sum_score >= 3 & sum_score <= 4 ~ "3 to 4",
    sum_score >= 5 & sum_score <= 7 ~ "5 to 7",
    sum_score >= 8 & sum_score <= 10 ~ "8 to 10"
  ))

# グループごとの 'hosp_mortality' の合計と 'n' の合計を計算します
summary_table_dev_manual <- dev_imp %>%
  group_by(sum_score_group) %>%
  summarise(hosp_mortality = sum(hosp_mortality), n = n(), prediction = mean(predicted_manual)) %>%
  mutate(mortality_rate = hosp_mortality / n)

# データを長い形式に変換します
summary_table_long_dev_manual <- summary_table_dev_manual %>%
  pivot_longer(c(mortality_rate, prediction), names_to = "variable", values_to = "value")

# 棒グラフをプロットします
ggplot(summary_table_long_dev_manual, aes(x = sum_score_group, y = value*100, fill = variable)) +
  geom_col(position = "dodge", width = 0.6) +
  labs(y = "Percentage (%)", x = "Sum Score Group", fill = "Variable") +
  scale_fill_manual(values = c("mortality_rate" = "blue", "prediction" = "red")) +
  theme_minimal() +
  theme(plot.title = element_text(hjust = 0.5))

スコアモデル 検証データでのcaliration評価

# 'shock' 列を数値に変換します
val_imp$shock <- as.numeric(as.character(val_imp$shock))

# 手動で予測値を計算します
liner_predict_manual_val <- -2.2705171 + 0.5 * val_imp$barthel1 +1.5 * val_imp$barthel2 +2 * val_imp$shock -2.0 * val_imp$gcs_cate +1.5 * val_imp$crp_cate
pred_manual_val <- exp(liner_predict_manual_val) / (exp(liner_predict_manual_val) + 1)

# 新しい予測値でテーブルを作成します
val_imp$predicted_manual <- pred_manual_val
val_by_manual <- dplyr::group_by(val_imp, sum_score)
table_val_manual <- dplyr::summarize(val_by_manual,
      hosp_mortality = sum(hosp_mortality),
      n = n(),
      mortality_rate = round(hosp_mortality / n, 3),
      prediction_manual = round(mean(predicted_manual), 3)
    )
table_val_manual
## # A tibble: 11 × 5
##    sum_score hosp_mortality     n mortality_rate prediction_manual
##        <dbl>          <dbl> <int>          <dbl>             <dbl>
##  1        -4              1   109          0.009             0.014
##  2        -3              0    81          0                 0.023
##  3        -1              1    43          0.023             0.059
##  4         0              3    67          0.045             0.094
##  5         1              5    39          0.128             0.145
##  6         2              0     5          0                 0.219
##  7         3             20    66          0.303             0.316
##  8         4              6    11          0.545             0.433
##  9         5              0     1          0                 0.557
## 10         6              7    17          0.412             0.675
## 11         7              2     5          0.4               0.774

スコアモデル 検証データでのcaliration グラフ化

# 'sum_score' の範囲に基づいて新しいカテゴリ列を作成します
val_imp <- val_imp %>%
  mutate(sum_score_group = case_when(
    sum_score >= -4 & sum_score <= -1 ~ "-4 to -1",
    sum_score >= 0 & sum_score <= 2 ~ "0 to 2",
    sum_score >= 3 & sum_score <= 4 ~ "3 to 4",
    sum_score >= 5 & sum_score <= 7 ~ "5 to 7",
    sum_score >= 8 & sum_score <= 10 ~ "8 to 10"
  ))

# グループごとの 'hosp_mortality' の合計と 'n' の合計を計算します
summary_table_val_manual <- val_imp %>%
  group_by(sum_score_group) %>%
  summarise(hosp_mortality = sum(hosp_mortality), n = n(), prediction = mean(predicted_manual)) %>%
  mutate(mortality_rate = hosp_mortality / n)

# データを長い形式に変換します
summary_table_long_val_manual <- summary_table_val_manual %>%
  pivot_longer(c(mortality_rate, prediction), names_to = "variable", values_to = "value")

# 棒グラフをプロットします
ggplot(summary_table_long_val_manual, aes(x = sum_score_group, y = value*100, fill = variable)) +
  geom_col(position = "dodge", width = 0.6) +
  labs(y = "Percentage (%)", x = "Sum Score Group", fill = "Variable") +
  scale_fill_manual(values = c("mortality_rate" = "blue", "prediction" = "red")) +
  theme_minimal() +
  theme(plot.title = element_text(hjust = 0.5))

## netbenefitの計算

liner_predict_manual <- -2.2705171 + 0.5 * dev_imp$barthel1 +1.5 * dev_imp$barthel2 +2 * dev_imp$shock -2.0 * dev_imp$gcs_cate +1.5 * dev_imp$crp_cate
pred_manual <- exp(liner_predict_manual) / (exp(liner_predict_manual) + 1)
roc_obj <- roc(dev_imp$hosp_mortality, pred_manual)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
threshold <- 0.7 # この値は臨床的な意義に基づいて設定する必要があります
net_benefit <- sum(roc_obj$sensitivities * (1 - threshold)) - sum(roc_obj$specificities * threshold)
print(net_benefit)
## [1] -5.208367

曲線を描く

# Calculate the coordinates of the ROC curve
roc_coords <- pROC::coords(roc_obj, seq(0, 1, by = 0.01), transpose = FALSE)

# Calculate the net benefit for the model
net_benefit_model <- roc_coords$sensitivity - roc_coords$specificity * (1 - seq(0, 1, by = 0.01)) / seq(0, 1, by = 0.01)

# Calculate the net benefit for the "All" strategy
net_benefit_all <- roc_coords$sensitivity

# Calculate the net benefit for the "None" strategy
net_benefit_none <- rep(0, length(seq(0, 1, by = 0.01)))

# Combine the net benefits into one data frame
net_benefit_data <- data.frame(
  Threshold = seq(0, 1, by = 0.01),
  Model = net_benefit_model,
  All = net_benefit_all,
  None = net_benefit_none
)

# Plot the decision curve
ggplot(net_benefit_data, aes(x = Threshold)) +
  geom_line(aes(y = Model, color = "Model")) +
  geom_line(aes(y = All, color = "All")) +
  geom_line(aes(y = None, color = "None")) +
  labs(x = "Threshold Probability", y = "Net Benefit", color = "Strategy") +
  theme_minimal()
## Warning: Removed 1 row containing missing values (`geom_line()`).