pacman::p_load(reader, stringr, missForest, mice, ggplot2, sandwich, tidyverse, readxl, tableone, lubridate, skimr, summarytools, naniar, norm2, lmtest, car, ROCR, pROC, Hmisc, rms, glmnet, ggpubr, ggcorrplot)
data<-read_excel("varix_prediction.xlsx")
str(data)
## tibble [980 × 62] (S3: tbl_df/tbl/data.frame)
## $ hosp_id : num [1:980] 1001 1001 1001 1001 1001 ...
## $ pt_id : num [1:980] 1 2 3 4 5 6 7 8 9 10 ...
## $ hosp_num : num [1:980] 1 1 1 1 1 1 1 1 1 1 ...
## $ year : num [1:980] 2012 2011 2010 2011 2010 ...
## $ age : num [1:980] 50 80 59 44 67 47 65 49 73 69 ...
## $ sex : chr [1:980] "M" "M" "F" "M" ...
## $ bmi : num [1:980] NA 25.3 NA 14.5 NA ...
## $ smoke : num [1:980] 0 0 0 240 0 270 0 NA 1000 0 ...
## $ barthel : num [1:980] NA 2 0 1 NA NA NA NA 0 NA ...
## $ child_num : num [1:980] 11 6 NA 8 NA 11 NA NA 9 15 ...
## $ child_score : num [1:980] 2 0 NA 1 NA 2 NA NA 1 2 ...
## $ gcs : num [1:980] 15 15 15 15 15 15 15 15 15 6 ...
## $ cci_num : num [1:980] 4 4 3 4 4 4 3 4 4 4 ...
## $ pad : num [1:980] 0 0 0 0 0 0 0 0 0 0 ...
## $ stroke : num [1:980] 0 0 0 0 0 0 0 0 0 0 ...
## $ dimentia : num [1:980] 0 0 0 0 0 0 0 0 0 0 ...
## $ ch_lung : num [1:980] 0 0 0 0 0 0 0 0 0 0 ...
## $ rheumati : num [1:980] 0 0 0 0 0 0 0 0 0 0 ...
## $ pept_ulcer : num [1:980] 0 0 0 0 0 0 0 0 0 0 ...
## $ dm : num [1:980] 0 1 0 0 0 0 0 0 0 0 ...
## $ dm_compli : num [1:980] 0 0 0 0 0 0 0 0 0 0 ...
## $ paralysis : num [1:980] 0 0 0 0 0 0 0 0 0 0 ...
## $ malignancy : num [1:980] 0 0 0 0 0 0 0 0 0 0 ...
## $ meta_tumor : num [1:980] 0 0 0 0 0 0 0 0 0 0 ...
## $ aids : num [1:980] 0 0 0 0 0 0 0 0 0 0 ...
## $ eGFR30 : num [1:980] 0 0 0 0 0 0 0 0 0 1 ...
## $ hd : num [1:980] 0 0 0 0 0 0 0 0 0 0 ...
## $ hcc : num [1:980] 0 0 0 0 0 0 0 0 0 0 ...
## $ alcohol : num [1:980] 1 0 0 0 0 1 0 1 1 0 ...
## $ past_rupture : num [1:980] 0 0 0 0 0 0 0 0 0 0 ...
## $ antiplate : num [1:980] 0 0 0 0 0 0 0 0 0 0 ...
## $ anticoag : num [1:980] 0 0 0 0 0 0 0 0 0 0 ...
## $ antithro : num [1:980] 0 0 0 0 0 0 0 0 0 0 ...
## $ nsaids : num [1:980] 0 0 0 0 0 0 0 0 0 0 ...
## $ steroid : num [1:980] 0 0 0 0 0 0 0 0 0 0 ...
## $ beta : num [1:980] 0 0 0 0 0 0 0 0 0 0 ...
## $ vaso : num [1:980] 0 0 0 0 0 0 0 0 0 1 ...
## $ map : num [1:980] 0 2 0 6 2 0 2 0 4 14 ...
## $ ffp : num [1:980] 0 1 0 0 1 0 1 0 1 1 ...
## $ pc : num [1:980] 0 0 0 0 0 0 0 0 0 0 ...
## $ albner : num [1:980] 0 0 0 0 0 0 0 0 0 1 ...
## $ bt : num [1:980] 36.4 36.8 35.9 36 36.6 36.4 38.4 37 37 35.5 ...
## $ sBP : num [1:980] 78 88 100 69 66 102 84 132 90 52 ...
## $ dBP : num [1:980] 48 49 56 44 40 67 46 69 54 37 ...
## $ hr : num [1:980] 118 72 110 104 72 83 127 83 114 98 ...
## $ shock : num [1:980] 1 0 1 1 1 0 1 0 1 1 ...
## $ bil : num [1:980] 2.2 1.2 3.1 3.4 1.2 7.5 2.4 1.2 2.2 8.7 ...
## $ ast : num [1:980] 217 31 60 129 52 112 90 154 55 96 ...
## $ alt : num [1:980] 63 22 40 46 36 53 19 109 20 87 ...
## $ wbc : num [1:980] 7400 5000 7800 9100 3900 9800 8000 7900 11100 12800 ...
## $ hb : num [1:980] 6.9 10.8 9.7 10.7 6.3 12.2 9.8 13.5 5.6 6 ...
## $ plt : num [1:980] 115 77 74 162 63 69 93 132 124 168 ...
## $ tp : num [1:980] 6.3 5.6 6.4 6.1 5.1 5.7 7.2 7.6 4.9 5.3 ...
## $ alb : num [1:980] 2.2 3.2 2.8 2.9 2.8 2.6 3.2 4 2.3 1.2 ...
## $ eGFR : num [1:980] 58 57.7 63.7 112.4 123.6 ...
## $ bun : num [1:980] 13.2 41.5 26.3 2.9 27.4 13.7 15.8 15.5 27 63.2 ...
## $ cre : num [1:980] 1.08 0.96 0.72 0.61 0.38 0.74 0.44 0.4 0.97 2.07 ...
## $ crp : num [1:980] 0.92 0.29 0.68 0.29 0.29 0.1 0.96 0.29 NA 2.75 ...
## $ pt : num [1:980] 37.8 55 46.7 37.8 74.6 28.5 45.9 49.7 45.9 37.2 ...
## $ aptt : num [1:980] 29.4 29 27.2 35.3 30.1 37.8 27.6 32.6 27.5 36 ...
## $ hosp_mortality: num [1:980] 0 0 0 0 0 0 0 0 0 1 ...
## $ los : num [1:980] 12 7 0 10 3 6 2 1 8 0 ...
colnames(data)
## [1] "hosp_id" "pt_id" "hosp_num" "year"
## [5] "age" "sex" "bmi" "smoke"
## [9] "barthel" "child_num" "child_score" "gcs"
## [13] "cci_num" "pad" "stroke" "dimentia"
## [17] "ch_lung" "rheumati" "pept_ulcer" "dm"
## [21] "dm_compli" "paralysis" "malignancy" "meta_tumor"
## [25] "aids" "eGFR30" "hd" "hcc"
## [29] "alcohol" "past_rupture" "antiplate" "anticoag"
## [33] "antithro" "nsaids" "steroid" "beta"
## [37] "vaso" "map" "ffp" "pc"
## [41] "albner" "bt" "sBP" "dBP"
## [45] "hr" "shock" "bil" "ast"
## [49] "alt" "wbc" "hb" "plt"
## [53] "tp" "alb" "eGFR" "bun"
## [57] "cre" "crp" "pt" "aptt"
## [61] "hosp_mortality" "los"
df <-
data|> #miceパッケージが順序ありの因子型でないと読めないためこれを指定。
mutate(
hosp_id=as.integer(hosp_id),
pt_id=as.integer(pt_id),
hosp_num=as.integer(hosp_num),
year=as.integer(year),
age=as.integer(age),
sex= factor(sex, levels = c("M", "F")),
smoke= as.integer(smoke),
barthel= factor(barthel, levels = c("0", "1", "2")),
child_num= as.integer(child_num),
child_score=factor(child_score, levels = c("0", "1", "2")),
gcs=as.integer(gcs),
cci_num=as.integer(cci_num),
pad=factor(pad),
stroke=factor(stroke),
dimentia=factor(dimentia),
ch_lung=factor(ch_lung),
rheumati=factor(rheumati),
pept_ulcer=factor(pept_ulcer),
dm=factor(dm),
dm_compli=factor(dm_compli),
paralysis=factor(paralysis),
malignancy=factor(malignancy),
meta_tumor=factor(meta_tumor),
aids=factor(aids),
eGFR30=factor(eGFR30),
hd=factor(hd),
hcc=factor(hcc),
alcohol=factor(alcohol),
past_rupture=factor(past_rupture),
antiplate=factor(antiplate),
anticoag=factor(anticoag),
antithro=factor(antithro),
nsaids=factor(nsaids),
steroid=factor(steroid),
beta=factor(beta),
vaso=factor(vaso),
map= as.integer(map),
ffp=factor(ffp),
pc=factor(pc),
albner=factor(albner),
sBP= as.integer(sBP),
dBP= as.integer(dBP),
hr=as.integer(hr),
shock=factor(shock),
los=as.integer(los)
)
str(df)
## tibble [980 × 62] (S3: tbl_df/tbl/data.frame)
## $ hosp_id : int [1:980] 1001 1001 1001 1001 1001 1001 1001 1001 1001 1001 ...
## $ pt_id : int [1:980] 1 2 3 4 5 6 7 8 9 10 ...
## $ hosp_num : int [1:980] 1 1 1 1 1 1 1 1 1 1 ...
## $ year : int [1:980] 2012 2011 2010 2011 2010 2017 2010 2010 2010 2011 ...
## $ age : int [1:980] 50 80 59 44 67 47 65 49 73 69 ...
## $ sex : Factor w/ 2 levels "M","F": 1 1 2 1 2 1 1 2 1 2 ...
## $ bmi : num [1:980] NA 25.3 NA 14.5 NA ...
## $ smoke : int [1:980] 0 0 0 240 0 270 0 NA 1000 0 ...
## $ barthel : Factor w/ 3 levels "0","1","2": NA 3 1 2 NA NA NA NA 1 NA ...
## $ child_num : int [1:980] 11 6 NA 8 NA 11 NA NA 9 15 ...
## $ child_score : Factor w/ 3 levels "0","1","2": 3 1 NA 2 NA 3 NA NA 2 3 ...
## $ gcs : int [1:980] 15 15 15 15 15 15 15 15 15 6 ...
## $ cci_num : int [1:980] 4 4 3 4 4 4 3 4 4 4 ...
## $ pad : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ stroke : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ dimentia : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ ch_lung : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ rheumati : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ pept_ulcer : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ dm : Factor w/ 2 levels "0","1": 1 2 1 1 1 1 1 1 1 1 ...
## $ dm_compli : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ paralysis : Factor w/ 1 level "0": 1 1 1 1 1 1 1 1 1 1 ...
## $ malignancy : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ meta_tumor : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ aids : Factor w/ 1 level "0": 1 1 1 1 1 1 1 1 1 1 ...
## $ eGFR30 : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 2 ...
## $ hd : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ hcc : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ alcohol : Factor w/ 2 levels "0","1": 2 1 1 1 1 2 1 2 2 1 ...
## $ past_rupture : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ antiplate : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ anticoag : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ antithro : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ nsaids : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ steroid : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ beta : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ vaso : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 2 ...
## $ map : int [1:980] 0 2 0 6 2 0 2 0 4 14 ...
## $ ffp : Factor w/ 2 levels "0","1": 1 2 1 1 2 1 2 1 2 2 ...
## $ pc : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ albner : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 2 ...
## $ bt : num [1:980] 36.4 36.8 35.9 36 36.6 36.4 38.4 37 37 35.5 ...
## $ sBP : int [1:980] 78 88 100 69 66 102 84 132 90 52 ...
## $ dBP : int [1:980] 48 49 56 44 40 67 46 69 54 37 ...
## $ hr : int [1:980] 118 72 110 104 72 83 127 83 114 98 ...
## $ shock : Factor w/ 2 levels "0","1": 2 1 2 2 2 1 2 1 2 2 ...
## $ bil : num [1:980] 2.2 1.2 3.1 3.4 1.2 7.5 2.4 1.2 2.2 8.7 ...
## $ ast : num [1:980] 217 31 60 129 52 112 90 154 55 96 ...
## $ alt : num [1:980] 63 22 40 46 36 53 19 109 20 87 ...
## $ wbc : num [1:980] 7400 5000 7800 9100 3900 9800 8000 7900 11100 12800 ...
## $ hb : num [1:980] 6.9 10.8 9.7 10.7 6.3 12.2 9.8 13.5 5.6 6 ...
## $ plt : num [1:980] 115 77 74 162 63 69 93 132 124 168 ...
## $ tp : num [1:980] 6.3 5.6 6.4 6.1 5.1 5.7 7.2 7.6 4.9 5.3 ...
## $ alb : num [1:980] 2.2 3.2 2.8 2.9 2.8 2.6 3.2 4 2.3 1.2 ...
## $ eGFR : num [1:980] 58 57.7 63.7 112.4 123.6 ...
## $ bun : num [1:980] 13.2 41.5 26.3 2.9 27.4 13.7 15.8 15.5 27 63.2 ...
## $ cre : num [1:980] 1.08 0.96 0.72 0.61 0.38 0.74 0.44 0.4 0.97 2.07 ...
## $ crp : num [1:980] 0.92 0.29 0.68 0.29 0.29 0.1 0.96 0.29 NA 2.75 ...
## $ pt : num [1:980] 37.8 55 46.7 37.8 74.6 28.5 45.9 49.7 45.9 37.2 ...
## $ aptt : num [1:980] 29.4 29 27.2 35.3 30.1 37.8 27.6 32.6 27.5 36 ...
## $ hosp_mortality: num [1:980] 0 0 0 0 0 0 0 0 0 1 ...
## $ los : int [1:980] 12 7 0 10 3 6 2 1 8 0 ...
col_continuous = c("age", "bmi","smoke","child_num","gcs","cci_num","map","bt","sBP","dBP","hr","bil","ast","alt","wbc","hb","plt","tp","alb","eGFR","bun","cre","crp","pt","aptt","los")
col_factors = c("sex","barthel","child_score","pad","stroke","dimentia","ch_lung","rheumati","pept_ulcer","dm","dm_compli","paralysis","malignancy","meta_tumor","aids","eGFR30","hd","hcc","alcohol","past_rupture","antiplate","anticoag","antithro","nsaids","steroid","beta", "vaso","ffp","pc", "albner","shock","hosp_mortality")
# Create your table
df %>%
select(c(col_continuous, col_factors)) %>%
CreateTableOne(vars = c(col_continuous, col_factors), strata="hosp_mortality",factorVars = col_factors, addOverall = T) -> tableone
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
## # Was:
## data %>% select(col_continuous)
##
## # Now:
## data %>% select(all_of(col_continuous))
##
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
## # Was:
## data %>% select(col_factors)
##
## # Now:
## data %>% select(all_of(col_factors))
##
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# Print your table
print(tableone, smd = TRUE, missing = TRUE, test = TRUE, explain = TRUE)
## Stratified by hosp_mortality
## Overall 0 1
## n 980 862 118
## age (mean (SD)) 60.86 (13.01) 60.52 (13.02) 63.37 (12.71)
## bmi (mean (SD)) 23.61 (11.49) 23.73 (12.06) 22.62 (3.75)
## smoke (mean (SD)) 265.31 (483.85) 259.50 (381.11) 311.74 (975.09)
## child_num (mean (SD)) 8.59 (2.12) 8.31 (1.95) 10.74 (2.14)
## gcs (mean (SD)) 14.46 (1.87) 14.69 (1.26) 12.74 (3.75)
## cci_num (mean (SD)) 4.47 (1.32) 4.44 (1.22) 4.68 (1.91)
## map (mean (SD)) 3.37 (3.96) 3.00 (3.70) 6.03 (4.73)
## bt (mean (SD)) 36.73 (0.72) 36.74 (0.61) 36.62 (1.27)
## sBP (mean (SD)) 88.55 (16.32) 91.20 (14.67) 69.19 (14.68)
## dBP (mean (SD)) 52.78 (12.72) 54.48 (12.03) 40.29 (10.54)
## hr (mean (SD)) 86.90 (21.06) 84.87 (19.78) 101.98 (24.05)
## bil (mean (SD)) 2.30 (2.19) 2.07 (1.83) 4.02 (3.49)
## ast (mean (SD)) 83.87 (100.75) 76.25 (84.23) 138.98 (170.37)
## alt (mean (SD)) 41.79 (46.84) 39.22 (41.36) 60.38 (73.04)
## wbc (mean (SD)) 8676.10 (4747.62) 8492.40 (4679.66) 9999.74 (5037.08)
## hb (mean (SD)) 8.72 (2.49) 8.80 (2.51) 8.14 (2.22)
## plt (mean (SD)) 113.92 (66.44) 113.46 (65.16) 117.21 (75.29)
## tp (mean (SD)) 6.11 (0.92) 6.17 (0.88) 5.71 (1.14)
## alb (mean (SD)) 2.85 (0.61) 2.93 (0.58) 2.30 (0.59)
## eGFR (mean (SD)) 69.80 (31.04) 73.20 (30.71) 45.36 (20.79)
## bun (mean (SD)) 28.52 (17.92) 27.67 (17.16) 34.68 (21.79)
## cre (mean (SD)) 1.06 (0.90) 1.00 (0.90) 1.49 (0.84)
## crp (mean (SD)) 0.84 (1.76) 0.72 (1.61) 1.68 (2.42)
## pt (mean (SD)) 54.98 (17.67) 56.80 (16.85) 42.18 (18.11)
## aptt (mean (SD)) 34.08 (17.48) 32.40 (12.89) 45.63 (33.51)
## los (mean (SD)) 12.37 (14.69) 12.60 (14.15) 10.69 (18.12)
## sex = F (%) 246 ( 25.1) 214 ( 24.8) 32 ( 27.1)
## barthel (%)
## 0 313 ( 38.4) 303 ( 42.1) 10 ( 10.3)
## 1 249 ( 30.5) 228 ( 31.7) 21 ( 21.6)
## 2 254 ( 31.1) 188 ( 26.1) 66 ( 68.0)
## child_score (%)
## 0 149 ( 16.9) 146 ( 18.9) 3 ( 2.7)
## 1 437 ( 49.5) 410 ( 53.0) 27 ( 24.5)
## 2 297 ( 33.6) 217 ( 28.1) 80 ( 72.7)
## pad = 1 (%) 2 ( 0.2) 2 ( 0.2) 0 ( 0.0)
## stroke = 1 (%) 21 ( 2.1) 18 ( 2.1) 3 ( 2.5)
## dimentia = 1 (%) 10 ( 1.0) 9 ( 1.0) 1 ( 0.8)
## ch_lung = 1 (%) 15 ( 1.5) 12 ( 1.4) 3 ( 2.5)
## rheumati = 1 (%) 3 ( 0.3) 3 ( 0.3) 0 ( 0.0)
## pept_ulcer = 1 (%) 101 ( 10.3) 98 ( 11.4) 3 ( 2.5)
## dm = 1 (%) 210 ( 21.4) 194 ( 22.5) 16 ( 13.6)
## dm_compli = 1 (%) 13 ( 1.3) 12 ( 1.4) 1 ( 0.8)
## paralysis = 0 (%) 980 (100.0) 862 (100.0) 118 (100.0)
## malignancy = 1 (%) 115 ( 11.7) 96 ( 11.1) 19 ( 16.1)
## meta_tumor = 1 (%) 17 ( 1.7) 9 ( 1.0) 8 ( 6.8)
## aids = 0 (%) 980 (100.0) 862 (100.0) 118 (100.0)
## eGFR30 = 1 (%) 87 ( 9.1) 56 ( 6.7) 31 ( 26.5)
## hd = 1 (%) 13 ( 1.3) 10 ( 1.2) 3 ( 2.5)
## hcc = 1 (%) 175 ( 17.9) 151 ( 17.5) 24 ( 20.3)
## alcohol = 1 (%) 472 ( 48.2) 417 ( 48.4) 55 ( 46.6)
## past_rupture = 1 (%) 225 ( 23.0) 211 ( 24.5) 14 ( 11.9)
## antiplate = 1 (%) 7 ( 0.7) 5 ( 0.6) 2 ( 1.7)
## anticoag = 1 (%) 4 ( 0.4) 4 ( 0.5) 0 ( 0.0)
## antithro = 1 (%) 10 ( 1.0) 8 ( 0.9) 2 ( 1.7)
## nsaids = 1 (%) 9 ( 0.9) 9 ( 1.0) 0 ( 0.0)
## steroid = 1 (%) 5 ( 0.5) 4 ( 0.5) 1 ( 0.8)
## beta = 1 (%) 58 ( 5.9) 58 ( 6.7) 0 ( 0.0)
## vaso = 1 (%) 58 ( 5.9) 29 ( 3.4) 29 ( 24.6)
## ffp = 1 (%) 291 ( 29.7) 226 ( 26.2) 65 ( 55.1)
## pc = 1 (%) 17 ( 1.7) 9 ( 1.0) 8 ( 6.8)
## albner = 1 (%) 80 ( 8.2) 58 ( 6.7) 22 ( 18.6)
## shock = 1 (%) 409 ( 43.0) 308 ( 36.7) 101 ( 89.4)
## hosp_mortality = 1 (%) 118 ( 12.0) 0 ( 0.0) 118 (100.0)
## Stratified by hosp_mortality
## p test SMD Missing
## n
## age (mean (SD)) 0.025 0.222 0.0
## bmi (mean (SD)) 0.390 0.124 11.7
## smoke (mean (SD)) 0.321 0.071 12.9
## child_num (mean (SD)) <0.001 1.190 13.2
## gcs (mean (SD)) <0.001 0.699 0.0
## cci_num (mean (SD)) 0.070 0.147 0.0
## map (mean (SD)) <0.001 0.712 0.0
## bt (mean (SD)) 0.112 0.118 3.8
## sBP (mean (SD)) <0.001 1.500 2.3
## dBP (mean (SD)) <0.001 1.256 2.3
## hr (mean (SD)) <0.001 0.777 2.9
## bil (mean (SD)) <0.001 0.700 3.8
## ast (mean (SD)) <0.001 0.467 2.6
## alt (mean (SD)) <0.001 0.356 2.6
## wbc (mean (SD)) 0.001 0.310 2.0
## hb (mean (SD)) 0.007 0.279 2.0
## plt (mean (SD)) 0.568 0.053 2.0
## tp (mean (SD)) <0.001 0.454 7.9
## alb (mean (SD)) <0.001 1.086 4.7
## eGFR (mean (SD)) <0.001 1.062 2.1
## bun (mean (SD)) <0.001 0.358 2.1
## cre (mean (SD)) <0.001 0.570 2.9
## crp (mean (SD)) <0.001 0.464 5.3
## pt (mean (SD)) <0.001 0.836 6.0
## aptt (mean (SD)) <0.001 0.521 11.4
## los (mean (SD)) 0.188 0.117 0.0
## sex = F (%) 0.670 0.052 0.0
## barthel (%) <0.001 0.998 16.7
## 0
## 1
## 2
## child_score (%) <0.001 1.036 9.9
## 0
## 1
## 2
## pad = 1 (%) 1.000 0.068 0.0
## stroke = 1 (%) 1.000 0.030 0.0
## dimentia = 1 (%) 1.000 0.020 0.0
## ch_lung = 1 (%) 0.579 0.083 0.0
## rheumati = 1 (%) 1.000 0.084 0.0
## pept_ulcer = 1 (%) 0.005 0.352 0.0
## dm = 1 (%) 0.036 0.234 0.0
## dm_compli = 1 (%) 0.955 0.052 0.0
## paralysis = 0 (%) NA <0.001 0.0
## malignancy = 1 (%) 0.156 0.145 0.0
## meta_tumor = 1 (%) <0.001 0.299 0.0
## aids = 0 (%) NA <0.001 0.0
## eGFR30 = 1 (%) <0.001 0.554 2.1
## hd = 1 (%) 0.423 0.103 0.0
## hcc = 1 (%) 0.534 0.072 0.0
## alcohol = 1 (%) 0.793 0.035 0.0
## past_rupture = 1 (%) 0.003 0.332 0.0
## antiplate = 1 (%) 0.444 0.105 0.0
## anticoag = 1 (%) 1.000 0.097 0.0
## antithro = 1 (%) 0.773 0.067 0.0
## nsaids = 1 (%) 0.548 0.145 0.0
## steroid = 1 (%) 1.000 0.048 0.0
## beta = 1 (%) 0.007 0.380 0.0
## vaso = 1 (%) <0.001 0.643 0.0
## ffp = 1 (%) <0.001 0.615 0.0
## pc = 1 (%) <0.001 0.299 0.0
## albner = 1 (%) <0.001 0.364 0.0
## shock = 1 (%) <0.001 1.302 2.9
## hosp_mortality = 1 (%) <0.001 NaN 0.0
df |> #全体
select(col_continuous) |>
pivot_longer(cols = col_continuous, names_to = "name", values_to = "value") |>
ggplot()+
geom_histogram(aes(x = value), color = "black")+
facet_wrap(~ name, scales = "free", ncol = 5) +
theme_bw()+
theme(text = element_text(size = 12))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 1044 rows containing non-finite values (`stat_bin()`).
# 指定した変数だけで相関行列を作成します。
corresult <- df %>%
dplyr::select(all_of(col_continuous)) %>%
drop_na() %>%
cor(method = "pearson")
# 相関行列の数値を表示します。
print(corresult)
## age bmi smoke child_num gcs
## age 1.000000000 -0.0836856830 -0.1030432494 -0.11532157 -0.029765488
## bmi -0.083685683 1.0000000000 0.0300070548 -0.06205996 -0.003933984
## smoke -0.103043249 0.0300070548 1.0000000000 0.05570718 0.014443388
## child_num -0.115321574 -0.0620599629 0.0557071836 1.00000000 -0.398642612
## gcs -0.029765488 -0.0039339840 0.0144433884 -0.39864261 1.000000000
## cci_num 0.189054010 0.0151040658 0.0007541775 -0.02107435 0.020195287
## map -0.034080140 -0.1002045503 0.0167592044 0.31022161 -0.238511159
## bt -0.159171182 0.0400759875 -0.0314451418 0.08796750 0.035710109
## sBP -0.003553962 0.0994623895 -0.0249117512 -0.31560557 0.231281712
## dBP -0.035371276 0.0769375514 0.0340903324 -0.27357860 0.226396654
## hr -0.170417688 0.0005776778 0.0424078291 0.27096039 -0.052205044
## bil -0.275277489 -0.0503555087 0.0951954651 0.62091885 -0.230616604
## ast -0.239942463 -0.1226764279 -0.0144673264 0.21510256 -0.056066152
## alt -0.073180367 -0.0224991340 -0.0343036850 0.04977977 -0.003007327
## wbc -0.072822566 0.0155731107 0.0459858923 0.14416954 -0.087888163
## hb -0.037084320 0.1561822336 0.0538305838 -0.23955322 0.059902667
## plt 0.098002934 -0.0030810777 -0.0693506283 -0.13074305 -0.002834339
## tp -0.087020334 0.0526073942 -0.0248881145 -0.21198841 0.047297528
## alb -0.132938753 0.0545768985 0.0079741569 -0.63647109 0.243137323
## eGFR -0.412776549 0.0215964915 0.0095727137 -0.13198414 0.197706550
## bun 0.261555897 -0.0390854964 -0.0110387883 -0.04257567 -0.089929556
## cre 0.138821544 -0.0531175288 0.0122796667 0.09087990 -0.167038052
## crp 0.044771776 -0.0459572450 -0.0118945421 0.20708388 -0.037334570
## pt 0.181701617 0.0477928040 -0.0773375131 -0.70284859 0.194416323
## aptt 0.038816549 -0.0619956642 -0.0049792656 0.28669126 -0.112356676
## los 0.114263795 -0.1111178379 -0.0450300079 0.13624296 -0.040538560
## cci_num map bt sBP dBP
## age 0.1890540105 -0.034080140 -0.1591711824 -0.003553962 -0.035371276
## bmi 0.0151040658 -0.100204550 0.0400759875 0.099462389 0.076937551
## smoke 0.0007541775 0.016759204 -0.0314451418 -0.024911751 0.034090332
## child_num -0.0210743540 0.310221611 0.0879674980 -0.315605565 -0.273578595
## gcs 0.0201952874 -0.238511159 0.0357101093 0.231281712 0.226396654
## cci_num 1.0000000000 -0.013084758 -0.0370318828 -0.044976370 -0.083537092
## map -0.0130847578 1.000000000 0.0057615199 -0.310568280 -0.295058357
## bt -0.0370318828 0.005761520 1.0000000000 0.113951446 0.092868079
## sBP -0.0449763703 -0.310568280 0.1139514459 1.000000000 0.676892618
## dBP -0.0835370916 -0.295058357 0.0928680794 0.676892618 1.000000000
## hr -0.0503131628 0.177325517 0.2737750179 -0.121261864 -0.032104375
## bil -0.1525660253 0.117874076 0.1196263065 -0.125416270 -0.137379960
## ast -0.0850293362 0.031412518 0.0857017384 -0.020936377 0.003905327
## alt 0.0724000799 -0.063328574 -0.0134775722 -0.009074588 -0.010225221
## wbc -0.0170060378 0.136092233 0.0814465841 -0.187422764 -0.121956606
## hb -0.0308465564 -0.457998042 -0.0002519479 0.234358309 0.260565799
## plt 0.0687420490 -0.046073792 -0.0859568732 -0.058442402 -0.014481801
## tp -0.0309910195 -0.290917505 -0.0045907049 0.255393037 0.189640521
## alb -0.0913911585 -0.339151516 -0.0303835483 0.329406942 0.290084418
## eGFR -0.1316832935 -0.231260712 0.0619466271 0.220847282 0.222863015
## bun 0.1179889869 0.077631937 -0.0542283504 -0.134737843 -0.113796341
## cre 0.1643842038 0.135471299 -0.0241751059 -0.064381658 -0.100591803
## crp 0.1332803653 0.008689486 0.0023633207 -0.101310832 -0.061777674
## pt -0.0028383704 -0.343047953 -0.0864311845 0.237336843 0.241271644
## aptt -0.0342688162 0.199282167 0.0773348901 -0.181047012 -0.184841742
## los -0.0076579357 0.021718031 0.0148842687 -0.160695920 -0.104468910
## hr bil ast alt wbc
## age -0.1704176880 -0.275277489 -0.239942463 -0.073180367 -0.07282257
## bmi 0.0005776778 -0.050355509 -0.122676428 -0.022499134 0.01557311
## smoke 0.0424078291 0.095195465 -0.014467326 -0.034303685 0.04598589
## child_num 0.2709603884 0.620918853 0.215102562 0.049779766 0.14416954
## gcs -0.0522050438 -0.230616604 -0.056066152 -0.003007327 -0.08788816
## cci_num -0.0503131628 -0.152566025 -0.085029336 0.072400080 -0.01700604
## map 0.1773255167 0.117874076 0.031412518 -0.063328574 0.13609223
## bt 0.2737750179 0.119626307 0.085701738 -0.013477572 0.08144658
## sBP -0.1212618644 -0.125416270 -0.020936377 -0.009074588 -0.18742276
## dBP -0.0321043746 -0.137379960 0.003905327 -0.010225221 -0.12195661
## hr 1.0000000000 0.283697599 0.140895234 0.026762968 0.17602935
## bil 0.2836975992 1.000000000 0.302422661 0.090551980 0.13150758
## ast 0.1408952345 0.302422661 1.000000000 0.720155422 0.10128185
## alt 0.0267629682 0.090551980 0.720155422 1.000000000 0.04253651
## wbc 0.1760293491 0.131507582 0.101281849 0.042536508 1.00000000
## hb -0.0806652198 0.050166038 -0.009587753 0.067143363 -0.06623161
## plt -0.0766741681 -0.195063935 -0.073463078 0.004121659 0.49538955
## tp -0.0874577000 0.034113681 0.073620250 0.046276833 -0.11628316
## alb -0.1852456995 -0.191177646 -0.034876750 0.033688307 -0.06014093
## eGFR -0.0241579739 -0.001960715 0.059988401 0.028581768 -0.17260179
## bun 0.0300234288 -0.059887023 -0.072740272 0.056148602 0.26431605
## cre 0.0610362422 0.079345427 0.001922576 -0.016173234 0.09887551
## crp 0.0782172666 0.177763017 0.132902240 0.128460605 0.17422558
## pt -0.2339026905 -0.497721734 -0.208271322 -0.062425736 -0.19145959
## aptt 0.0965794827 0.214250749 0.013094804 -0.031131796 0.07282673
## los -0.1332941242 0.032263183 0.077848295 0.023696953 0.01117657
## hb plt tp alb eGFR
## age -0.0370843197 0.098002934 -0.087020334 -0.132938753 -0.412776549
## bmi 0.1561822336 -0.003081078 0.052607394 0.054576898 0.021596491
## smoke 0.0538305838 -0.069350628 -0.024888115 0.007974157 0.009572714
## child_num -0.2395532154 -0.130743052 -0.211988410 -0.636471086 -0.131984141
## gcs 0.0599026668 -0.002834339 0.047297528 0.243137323 0.197706550
## cci_num -0.0308465564 0.068742049 -0.030991019 -0.091391158 -0.131683293
## map -0.4579980423 -0.046073792 -0.290917505 -0.339151516 -0.231260712
## bt -0.0002519479 -0.085956873 -0.004590705 -0.030383548 0.061946627
## sBP 0.2343583090 -0.058442402 0.255393037 0.329406942 0.220847282
## dBP 0.2605657986 -0.014481801 0.189640521 0.290084418 0.222863015
## hr -0.0806652198 -0.076674168 -0.087457700 -0.185245699 -0.024157974
## bil 0.0501660383 -0.195063935 0.034113681 -0.191177646 -0.001960715
## ast -0.0095877532 -0.073463078 0.073620250 -0.034876750 0.059988401
## alt 0.0671433628 0.004121659 0.046276833 0.033688307 0.028581768
## wbc -0.0662316113 0.495389551 -0.116283164 -0.060140931 -0.172601795
## hb 1.0000000000 -0.031249885 0.410814845 0.455021639 0.267550502
## plt -0.0312498847 1.000000000 0.011564275 0.063691267 -0.121022879
## tp 0.4108148445 0.011564275 1.000000000 0.469938593 0.193389634
## alb 0.4550216395 0.063691267 0.469938593 1.000000000 0.255246914
## eGFR 0.2675505020 -0.121022879 0.193389634 0.255246914 1.000000000
## bun -0.2426806993 0.171230560 -0.161836978 -0.042886158 -0.488892798
## cre -0.1669469476 0.027234662 -0.094345303 -0.153877541 -0.638631152
## crp -0.0705158860 0.123529251 -0.051463581 -0.251078305 -0.115854620
## pt 0.2817458923 0.162259905 0.209186089 0.470931996 0.085698826
## aptt -0.1075630819 -0.040467717 -0.255855561 -0.374059567 -0.130872759
## los -0.0717282157 0.007980212 -0.011777560 -0.137322777 -0.055197075
## bun cre crp pt aptt
## age 0.26155590 0.138821544 0.044771776 0.18170162 0.038816549
## bmi -0.03908550 -0.053117529 -0.045957245 0.04779280 -0.061995664
## smoke -0.01103879 0.012279667 -0.011894542 -0.07733751 -0.004979266
## child_num -0.04257567 0.090879898 0.207083875 -0.70284859 0.286691263
## gcs -0.08992956 -0.167038052 -0.037334570 0.19441632 -0.112356676
## cci_num 0.11798899 0.164384204 0.133280365 -0.00283837 -0.034268816
## map 0.07763194 0.135471299 0.008689486 -0.34304795 0.199282167
## bt -0.05422835 -0.024175106 0.002363321 -0.08643118 0.077334890
## sBP -0.13473784 -0.064381658 -0.101310832 0.23733684 -0.181047012
## dBP -0.11379634 -0.100591803 -0.061777674 0.24127164 -0.184841742
## hr 0.03002343 0.061036242 0.078217267 -0.23390269 0.096579483
## bil -0.05988702 0.079345427 0.177763017 -0.49772173 0.214250749
## ast -0.07274027 0.001922576 0.132902240 -0.20827132 0.013094804
## alt 0.05614860 -0.016173234 0.128460605 -0.06242574 -0.031131796
## wbc 0.26431605 0.098875505 0.174225578 -0.19145959 0.072826728
## hb -0.24268070 -0.166946948 -0.070515886 0.28174589 -0.107563082
## plt 0.17123056 0.027234662 0.123529251 0.16225991 -0.040467717
## tp -0.16183698 -0.094345303 -0.051463581 0.20918609 -0.255855561
## alb -0.04288616 -0.153877541 -0.251078305 0.47093200 -0.374059567
## eGFR -0.48889280 -0.638631152 -0.115854620 0.08569883 -0.130872759
## bun 1.00000000 0.514910065 0.108784779 0.01932169 0.027465356
## cre 0.51491006 1.000000000 0.153689986 -0.07648649 0.103488023
## crp 0.10878478 0.153689986 1.000000000 -0.10429396 0.086830690
## pt 0.01932169 -0.076486494 -0.104293964 1.00000000 -0.341667669
## aptt 0.02746536 0.103488023 0.086830690 -0.34166767 1.000000000
## los 0.03640477 -0.010374943 0.044141321 -0.06794976 0.003187148
## los
## age 0.114263795
## bmi -0.111117838
## smoke -0.045030008
## child_num 0.136242957
## gcs -0.040538560
## cci_num -0.007657936
## map 0.021718031
## bt 0.014884269
## sBP -0.160695920
## dBP -0.104468910
## hr -0.133294124
## bil 0.032263183
## ast 0.077848295
## alt 0.023696953
## wbc 0.011176572
## hb -0.071728216
## plt 0.007980212
## tp -0.011777560
## alb -0.137322777
## eGFR -0.055197075
## bun 0.036404771
## cre -0.010374943
## crp 0.044141321
## pt -0.067949764
## aptt 0.003187148
## los 1.000000000
# 連続変数のリストを指定します。
col_continuous <- c("age", "bmi","smoke","child_num","gcs","cci_num","map","bt","sBP","dBP","hr","bil","ast","alt","wbc","hb","plt","tp","alb","eGFR","bun","cre","crp","pt","aptt","los")
# 指定した連続変数だけで相関行列を作成します。
corresult <- df %>%
dplyr::select(all_of(col_continuous)) %>%
drop_na() %>%
cor(method = "pearson")
# 相関行列を基に相関プロットを作成します。
corrplot <- ggcorrplot(corr = corresult, hc.order = FALSE, method = "square", title = "cor plot",
colors = c("#4b61ba", "white", "red"), lab = TRUE)
# プロットを表示します。
corrplot
# 連続変数の変数名をまとめる
con_var <- c("age", "bmi","smoke","child_num","gcs","cci_num","map","bt","sBP","dBP","hr","bil","ast","alt","wbc","hb","plt","tp","alb","eGFR","bun","cre","crp","pt","aptt","los")
# datadistを計算
ddist <- datadist(df)
options(datadist='ddist')
# プロット結果をまとめるリストを用意する
plot <- list()
for (x in con_var){
# lrmに投入するformulaを文字列で、for文で順番に指定していく
formula_tmp <- as.formula(paste("hosp_mortality ~ rcs(", x, ", 4)"))
fit_tmp <- lrm(formula_tmp, data = df)
# Predict関数の呼び出しを文字列として作成し、それをパースして評価する
plot_cmd <- paste("plot(Predict(fit_tmp, ", x, "))")
plot_tmp <- eval(parse(text = plot_cmd))
plot[[x]] <- plot_tmp
}
# 結果をggarrangeでまとめて表示する
logOR_plot <- ggarrange(plotlist = plot, ncol = 2, nrow = 2)
logOR_plot
## $`1`
##
## $`2`
##
## $`3`
##
## $`4`
##
## $`5`
##
## $`6`
##
## $`7`
##
## attr(,"class")
## [1] "list" "ggarrange"
・cci_numはスプライン関数にしてもよいか ・btはU字polへ ・hrもスプライン関数に ・hbもスプライン ・pltもスプライン ・cre/crp/apttはスプライン
naplot_1 <- gg_miss_var(df, show_pct = TRUE)
naplot_1
## Warning: The `guide` argument in `scale_*()` cannot be `FALSE`. This was deprecated in
## ggplot2 3.3.4.
## ℹ Please use "none" instead.
## ℹ The deprecated feature was likely used in the naniar package.
## Please report the issue at <]8;;https://github.com/njtierney/naniar/issueshttps://github.com/njtierney/naniar/issues]8;;>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
### その2
naplot_2 <- vis_miss(df)
## Warning: `gather_()` was deprecated in tidyr 1.2.0.
## ℹ Please use `gather()` instead.
## ℹ The deprecated feature was likely used in the visdat package.
## Please report the issue at <]8;;https://github.com/ropensci/visdat/issueshttps://github.com/ropensci/visdat/issues]8;;>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
naplot_2
#imp <- mice(df,
# m=20, #mは代入の結果として作成するデータセットの数
# maxit =20, #maxitは反復回数
# method="pmm",
# printFlag = FALSE,
# seed = 10
# )
#
#save(imp, file = "imp.Rdata")
load("imp.Rdata")
plot_mice <- plot(imp)
plot_mice
densityplot_mice <- densityplot(imp)
densityplot_mice
predictors <- c("age", "sex", "bmi", "smoke", "barthel", "child_score", "gcs", "cci_num", "eGFR", "hd", "hcc", "alcohol", "past_rupture", "antithro","nsaids", "steroid","beta", "bt", "shock","bil", "ast", "alt", "wbc", "hb", "plt", "tp", "alb", "crp", "pt", "aptt")
predictors1 <- c("age", "sex", "bmi", "smoke", "barthel", "gcs", "eGFR", "hd", "hcc", "alcohol", "past_rupture", "antithro","nsaids", "steroid","beta", "bt", "shock","bil", "ast", "alt", "wbc", "hb", "plt", "tp", "alb", "crp", "pt", "aptt")
# 予測変数を定義します
predictors1 <- c("age", "sex", "bmi", "smoke", "barthel", "gcs", "eGFR", "hd", "hcc", "alcohol", "past_rupture", "antithro","nsaids", "steroid","beta", "bt", "shock","bil", "ast", "alt", "wbc", "hb", "plt", "tp", "alb", "crp", "pt", "aptt")
# rcsを適用したい変数を定義します
rcs_vars <- c("gcs", "hb", "plt", "crp", "aptt")
# 二次項を適用したい変数を定義します
squared_vars <- "bt"
# 補完データで選択された変数を格納するリストを初期化します
selected_vars <- list()
set.seed(123) # 再現性のために seed を設定
# 各補完データに対して実行します
for(i in 1:20){
# i 番目の補完データを取得し、予測変数に相当する列だけを選択します
data <- complete(imp, i) %>% select(all_of(predictors1), hosp_mortality)
# rcs関数を適用する
for(var in rcs_vars){
data[[var]] <- rcs(data[[var]], 4)
}
# 二次項を適用する
data[[squared_vars]] <- I(data[[squared_vars]]^2)
# 応答変数と予測変数を準備します
x <- model.matrix(hosp_mortality ~., data = data)
y <- data$hosp_mortality
# Lasso ロジスティック回帰を適用します
cv_fit <- cv.glmnet(x, y, family = "binomial")
# "1se" ルールに従って最適な lambda を選択し、モデルをフィットします
best_lambda <- cv_fit$lambda.1se
best_fit <- glmnet(x, y, family = "binomial", alpha = 1, lambda = best_lambda)
# 選択された変数を取得します
selected_vars[[i]] <- rownames(coef(best_fit))[which(coef(best_fit) != 0)]
}
# 各補完データで選択された変数のリストを表示します
selected_vars
## [[1]]
## [1] "(Intercept)" "barthel2" "gcsdata" "gcsdata''" "eGFR"
## [6] "shock1" "bil" "ast" "alb" "crpdata"
## [11] "pt" "apttdata"
##
## [[2]]
## [1] "(Intercept)" "barthel2" "gcsdata" "eGFR" "shock1"
## [6] "bil" "ast" "alb" "crpdata" "pt"
## [11] "apttdata"
##
## [[3]]
## [1] "(Intercept)" "barthel2" "gcsdata" "gcsdata''" "eGFR"
## [6] "shock1" "bil" "ast" "alb" "crpdata"
##
## [[4]]
## [1] "(Intercept)" "barthel2" "gcsdata" "gcsdata''" "eGFR"
## [6] "shock1" "bil" "ast" "alb" "crpdata"
## [11] "pt"
##
## [[5]]
## [1] "(Intercept)" "barthel2" "gcsdata" "gcsdata''" "eGFR"
## [6] "shock1" "bil" "ast" "alb" "crpdata"
##
## [[6]]
## [1] "(Intercept)" "barthel2" "gcsdata" "gcsdata''" "eGFR"
## [6] "shock1" "bil" "ast" "alb" "crpdata"
## [11] "pt" "apttdata"
##
## [[7]]
## [1] "(Intercept)" "barthel2" "gcsdata" "gcsdata''" "eGFR"
## [6] "beta1" "shock1" "bil" "ast" "alb"
## [11] "crpdata" "pt" "apttdata"
##
## [[8]]
## [1] "(Intercept)" "barthel2" "gcsdata" "gcsdata''" "eGFR"
## [6] "shock1" "bil" "ast" "alb" "crpdata"
## [11] "pt" "apttdata"
##
## [[9]]
## [1] "(Intercept)" "barthel2" "gcsdata" "gcsdata''" "eGFR"
## [6] "shock1" "bil" "ast" "alb" "crpdata"
## [11] "pt" "apttdata"
##
## [[10]]
## [1] "(Intercept)" "barthel2" "gcsdata" "gcsdata''" "eGFR"
## [6] "shock1" "bil" "ast" "alb" "crpdata"
## [11] "pt"
##
## [[11]]
## [1] "(Intercept)" "barthel2" "gcsdata" "gcsdata''" "eGFR"
## [6] "shock1" "bil" "ast" "alb" "crpdata"
## [11] "apttdata"
##
## [[12]]
## [1] "(Intercept)" "barthel2" "gcsdata" "gcsdata''" "eGFR"
## [6] "shock1" "bil" "ast" "alb" "pt"
##
## [[13]]
## [1] "(Intercept)" "barthel2" "gcsdata" "gcsdata''" "eGFR"
## [6] "shock1" "bil" "ast" "alb" "crpdata"
## [11] "pt" "apttdata"
##
## [[14]]
## [1] "(Intercept)" "barthel2" "gcsdata" "gcsdata''" "eGFR"
## [6] "shock1" "bil" "ast" "alb" "crpdata"
## [11] "pt" "apttdata"
##
## [[15]]
## [1] "(Intercept)" "barthel2" "gcsdata" "gcsdata''" "eGFR"
## [6] "shock1" "bil" "ast" "alb" "crpdata"
## [11] "pt" "apttdata"
##
## [[16]]
## [1] "(Intercept)" "barthel2" "gcsdata" "gcsdata''" "eGFR"
## [6] "shock1" "bil" "ast" "alb" "crpdata"
## [11] "pt" "apttdata"
##
## [[17]]
## [1] "(Intercept)" "barthel2" "gcsdata" "gcsdata''" "eGFR"
## [6] "shock1" "bil" "ast" "alb" "crpdata"
## [11] "pt"
##
## [[18]]
## [1] "(Intercept)" "barthel2" "gcsdata" "gcsdata''" "eGFR"
## [6] "shock1" "bil" "ast" "alb" "crpdata"
## [11] "pt" "apttdata"
##
## [[19]]
## [1] "(Intercept)" "barthel2" "gcsdata" "gcsdata''" "eGFR"
## [6] "shock1" "bil" "ast" "alb" "crpdata"
## [11] "pt" "apttdata"
##
## [[20]]
## [1] "(Intercept)" "barthel2" "gcsdata" "gcsdata''" "eGFR"
## [6] "shock1" "bil" "ast" "alb"
# selected_vars リストをフラットなベクトルに変換
flat_vars <- unlist(selected_vars)
# 各変数がいくつの補完データセットで選択されたかをカウント
table(flat_vars)
## flat_vars
## (Intercept) alb apttdata ast barthel2 beta1
## 20 20 13 20 20 1
## bil crpdata eGFR gcsdata gcsdata'' pt
## 20 18 20 20 19 16
## shock1
## 20
imp1<-complete(imp,1)
final_formula<-lrm(hosp_mortality ~ alb+rcs(aptt,4)+ast+barthel+bil+rcs(crp,4)+eGFR+rcs(gcs,4)+ pt+ shock,data = imp1 )
summary(final_formula)
## Effects Response : hosp_mortality
##
## Factor Low High Diff. Effect S.E. Lower 0.95 Upper 0.95
## alb 2.500 3.200 0.700 -0.421690 0.209110 -0.8315400 -0.011847
## Odds Ratio 2.500 3.200 0.700 0.655940 NA 0.4353800 0.988220
## aptt 27.200 35.800 8.600 -0.075024 0.295770 -0.6547200 0.504680
## Odds Ratio 27.200 35.800 8.600 0.927720 NA 0.5195900 1.656400
## ast 33.000 93.000 60.000 0.126810 0.060268 0.0086893 0.244940
## Odds Ratio 33.000 93.000 60.000 1.135200 NA 1.0087000 1.277500
## bil 0.970 2.885 1.915 0.250670 0.112790 0.0296090 0.471720
## Odds Ratio 0.970 2.885 1.915 1.284900 NA 1.0301000 1.602800
## crp 0.110 0.790 0.680 0.386470 0.287160 -0.1763400 0.949290
## Odds Ratio 0.110 0.790 0.680 1.471800 NA 0.8383300 2.583900
## eGFR 48.898 89.236 40.338 -1.164400 0.227950 -1.6111000 -0.717590
## Odds Ratio 48.898 89.236 40.338 0.312120 NA 0.1996600 0.487930
## gcs 3.000 15.000 12.000 -3.431400 1.001700 -5.3947000 -1.468100
## Odds Ratio 3.000 15.000 12.000 0.032341 NA 0.0045405 0.230350
## pt 42.600 67.000 24.400 -0.297610 0.260740 -0.8086500 0.213430
## Odds Ratio 42.600 67.000 24.400 0.742590 NA 0.4454600 1.237900
## barthel - 1:0 1.000 2.000 NA 0.519640 0.423390 -0.3101800 1.349500
## Odds Ratio 1.000 2.000 NA 1.681400 NA 0.7333100 3.855300
## barthel - 2:0 1.000 3.000 NA 0.998850 0.384740 0.2447700 1.752900
## Odds Ratio 1.000 3.000 NA 2.715200 NA 1.2773000 5.771500
## shock - 1:0 1.000 2.000 NA 2.223200 0.383010 1.4725000 2.973900
## Odds Ratio 1.000 2.000 NA 9.236500 NA 4.3600000 19.567000
# モデルをフィットします
fit <- lrm(hosp_mortality ~ alb+rcs(aptt,4)+ast+barthel+bil+rcs(crp,4)+eGFR+rcs(gcs,4)+ pt+ shock,data = imp1 )
# 回帰係数を取得します
coefficients <- coef(fit)
# オッズ比を計算します
odds_ratios <- exp(coefficients)
# 小数点以下3桁まで表示する
odds_ratios_formatted <- format(odds_ratios, digits = 3, scientific = FALSE)
print(odds_ratios_formatted)
## Intercept
## " 49.1220137450622047481374465860426425933837890625000000000000000000000000000000000000000000000000000000000000000000"
## alb
## " 0.5474866929826283090676497522508725523948669433593750000000000000000000000000000000000000000000000000000000000000"
## aptt
## " 1.0131949801997497395689151744591072201728820800781250000000000000000000000000000000000000000000000000000000000000"
## aptt'
## " 0.8533078867507345455223344288242515176534652709960937500000000000000000000000000000000000000000000000000000000000"
## aptt''
## " 1.4940459130949692578838039480615407228469848632812500000000000000000000000000000000000000000000000000000000000000"
## ast
## " 1.0021157786623380303581143380142748355865478515625000000000000000000000000000000000000000000000000000000000000000"
## barthel=1
## " 1.6814203733248649363929416722385212779045104980468750000000000000000000000000000000000000000000000000000000000000"
## barthel=2
## " 2.7151684122092758499888986989390105009078979492187500000000000000000000000000000000000000000000000000000000000000"
## bil
## " 1.1398495357933264848782073386246338486671447753906250000000000000000000000000000000000000000000000000000000000000"
## crp
## " 0.1011075639571644146919027207331964746117591857910156250000000000000000000000000000000000000000000000000000000000"
## crp'
## "12758080848842324716977539676677042458095203380089585308415726829881910099968.0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000"
## crp''
## " 0.0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000366"
## eGFR
## " 0.9715474282845987241685747903829906135797500610351562500000000000000000000000000000000000000000000000000000000000"
## gcs
## " 0.5232881925495114217028458369895815849304199218750000000000000000000000000000000000000000000000000000000000000000"
## gcs'
## " 1.6408426954412047571452148986281827092170715332031250000000000000000000000000000000000000000000000000000000000000"
## gcs''
## " 0.0000000000271609360723319205908327334694866159528481297513735626125708222389221191406250000000000000000000000000"
## pt
## " 0.9878769662371085225061051460215821862220764160156250000000000000000000000000000000000000000000000000000000000000"
## shock=1
## " 9.2365247125562390806408075150102376937866210937500000000000000000000000000000000000000000000000000000000000000000"
#install.packages("ggplot2")
#install.packages("pROC")
library(pROC)
library(ggplot2)
fit_k<- lrm(hosp_mortality ~alb+rcs(aptt,4)+ast+barthel+bil+rcs(crp,4)+eGFR+rcs(gcs,4)+ pt+ shock,x=TRUE, y=TRUE, data = imp1)
imp1$fitted <- predict(fit_k,type="fitted")
ROC <- roc(imp1$hosp_mortality ~ imp1$fitted, ci=TRUE, direction="auto")
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
ggroc(ROC)
print(ROC)
##
## Call:
## roc.formula(formula = imp1$hosp_mortality ~ imp1$fitted, ci = TRUE, direction = "auto")
##
## Data: imp1$fitted in 862 controls (imp1$hosp_mortality 0) < 118 cases (imp1$hosp_mortality 1).
## Area under the curve: 0.9279
## 95% CI: 0.9081-0.9478 (DeLong)
val.prob(imp1$fitted, imp1$hosp_mortality, g=10, cex=.5)
## Dxy C (ROC) R2 D D:Chi-sq
## 8.559125e-01 9.279563e-01 5.384151e-01 3.279903e-01 3.224305e+02
## D:p U U:Chi-sq U:p Q
## NA -2.040816e-03 1.136868e-13 1.000000e+00 3.300311e-01
## Brier Intercept Slope Emax E90
## 6.200715e-02 -3.690602e-10 1.000000e+00 5.400114e-02 2.507903e-02
## Eavg S:z S:p
## 8.710764e-03 3.041256e-01 7.610322e-01
set.seed(10)
fit_k<- lrm(hosp_mortality ~alb+rcs(aptt,4)+ast+barthel+bil+rcs(crp,4)+eGFR+rcs(gcs,4)+ pt+ shock,x=TRUE, y=TRUE, data = imp1)
cv <-validate (fit_k, bw=FALSE,B=5,method="crossvalidation")
cv
## index.orig training test optimism index.corrected n
## Dxy 0.8559 0.8589 0.8232 0.0357 0.8202 5
## R2 0.5384 0.5448 0.4813 0.0635 0.4749 5
## Intercept 0.0000 0.0000 -0.1858 0.1858 -0.1858 5
## Slope 1.0000 1.0000 0.8614 0.1386 0.8614 5
## Emax 0.0000 0.0000 0.0685 0.0685 0.0685 5
## D 0.3280 0.3325 0.2837 0.0488 0.2792 5
## U -0.0020 -0.0026 0.0068 -0.0094 0.0073 5
## Q 0.3300 0.3350 0.2769 0.0581 0.2719 5
## B 0.0620 0.0613 0.0697 -0.0084 0.0704 5
## g 2.6963 2.7544 2.3631 0.3913 2.3049 5
## gp 0.1800 0.1808 0.1712 0.0096 0.1704 5
print("cv_AUC")
## [1] "cv_AUC"
print(cv[1,3]*0.5 + 0.5)
## [1] 0.9115976
print("corrected_AUC")
## [1] "corrected_AUC"
print(cv[1,5]*0.5 + 0.5)
## [1] 0.9100847
set.seed(10)
cv <-validate (fit_k, bw=FALSE,B=100,method="boot")
##
## Divergence or singularity in 2 samples
cv
## index.orig training test optimism index.corrected n
## Dxy 0.8559 0.8677 0.8425 0.0253 0.8306 98
## R2 0.5384 0.5628 0.5150 0.0477 0.4907 98
## Intercept 0.0000 0.0000 -0.1470 0.1470 -0.1470 98
## Slope 1.0000 1.0000 0.8811 0.1189 0.8811 98
## Emax 0.0000 0.0000 0.0556 0.0556 0.0556 98
## D 0.3280 0.3458 0.3112 0.0346 0.2934 98
## U -0.0020 -0.0020 0.0025 -0.0045 0.0025 98
## Q 0.3300 0.3479 0.3088 0.0391 0.2909 98
## B 0.0620 0.0589 0.0651 -0.0062 0.0682 98
## g 2.6963 2.9010 2.5549 0.3461 2.3502 98
## gp 0.1800 0.1827 0.1765 0.0063 0.1737 98
print("corrected_AUC")
## [1] "corrected_AUC"
print(cv[1,5]*0.5 + 0.5)
## [1] 0.9153031