pacman::p_load(reader, stringr, missForest, mice, ggplot2, sandwich, tidyverse, readxl, tableone, lubridate, skimr, summarytools, naniar, norm2, lmtest, car, ROCR, pROC, Hmisc, rms, glmnet, ggpubr, ggcorrplot,VIM,stringr,doParallel,gtsummary,rmda)
data<-read_excel("varix_prediction_meld.xlsx")
# "cohort"という新しい列を作成します。これは、年度に基づいて "develop"または "validation"となります。
data_cohort <- data %>%
mutate(cohort = ifelse(year %in% 2010:2016, "develop",
ifelse(year %in% 2017:2022, "validation", NA)))
df <-
data_cohort|>
mutate(
hosp_id=as.integer(hosp_id),
pt_id=as.integer(pt_id),
hosp_num=as.integer(hosp_num),
year=as.integer(year),
age=as.integer(age),
sex= factor(sex, levels = c("M", "F")),
smoke= as.integer(smoke),
barthel= factor(barthel, levels = c("0", "1", "2")),
child_num= as.integer(child_num),
child_score=factor(child_score, levels = c("0", "1", "2")),
gcs=as.integer(gcs),
cci_num=as.integer(cci_num),
pad=factor(pad),
stroke=factor(stroke),
dimentia=factor(dimentia),
ch_lung=factor(ch_lung),
rheumati=factor(rheumati),
pept_ulcer=factor(pept_ulcer),
dm=factor(dm),
dm_compli=factor(dm_compli),
paralysis=factor(paralysis),
malignancy=factor(malignancy),
meta_tumor=factor(meta_tumor),
aids=factor(aids),
eGFR30=factor(eGFR30),
hd=factor(hd),
hcc=factor(hcc),
alcohol=factor(alcohol),
past_rupture=factor(past_rupture),
antiplate=factor(antiplate),
anticoag=factor(anticoag),
antithro=factor(antithro),
nsaids=factor(nsaids),
steroid=factor(steroid),
beta=factor(beta),
vaso=factor(vaso),
map= as.integer(map),
ffp=factor(ffp),
pc=factor(pc),
albner=factor(albner),
sBP= as.integer(sBP),
dBP= as.integer(dBP),
hr=as.integer(hr),
shock=factor(shock),
los=as.integer(los),
cohort=factor(cohort),
meld=as.integer(meld)
)
# 新しい列 "cohort" を使用してデータフレームを分割します
df_dev <- df %>%
filter(cohort == "develop")
df_val <- df %>%
filter(cohort == "validation")
#str(df)
col_continuous = c("age", "bmi","smoke","child_num","gcs","cci_num","map","bt","sBP","dBP","hr","bil","ast","alt","wbc","hb","plt","tp","alb","eGFR","bun","cre","crp","pt","aptt","los","meld")
col_factors = c("sex","barthel","child_score","pad","stroke","dimentia","ch_lung","rheumati","pept_ulcer","dm","dm_compli","paralysis","malignancy","meta_tumor","aids","eGFR30","hd","hcc","alcohol","past_rupture","antiplate","anticoag","antithro","nsaids","steroid","beta", "vaso","ffp","pc", "albner","shock","hosp_mortality", "cohort")
# Create your table
df %>%
select(c(col_continuous, col_factors)) %>%
CreateTableOne(vars = c(col_continuous, col_factors), strata="cohort",factorVars = col_factors, addOverall = T) -> tableone
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
## # Was:
## data %>% select(col_continuous)
##
## # Now:
## data %>% select(all_of(col_continuous))
##
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
## # Was:
## data %>% select(col_factors)
##
## # Now:
## data %>% select(all_of(col_factors))
##
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# Print your table
print(tableone, smd = TRUE, missing = TRUE, test = TRUE, explain = TRUE)
## Stratified by cohort
## Overall develop validation
## n 980 536 444
## age (mean (SD)) 60.86 (13.01) 61.23 (13.06) 60.42 (12.96)
## bmi (mean (SD)) 23.61 (11.49) 22.91 (3.73) 24.38 (16.12)
## smoke (mean (SD)) 265.31 (483.85) 299.62 (571.82) 226.73 (356.99)
## child_num (mean (SD)) 8.59 (2.12) 8.77 (2.13) 8.38 (2.09)
## gcs (mean (SD)) 14.46 (1.87) 14.33 (2.20) 14.61 (1.34)
## cci_num (mean (SD)) 4.47 (1.32) 4.44 (1.20) 4.50 (1.45)
## map (mean (SD)) 3.37 (3.96) 3.49 (4.51) 3.21 (3.18)
## bt (mean (SD)) 36.73 (0.72) 36.65 (0.76) 36.81 (0.65)
## sBP (mean (SD)) 88.55 (16.32) 87.35 (15.48) 89.95 (17.16)
## dBP (mean (SD)) 52.78 (12.72) 51.00 (11.90) 54.84 (13.33)
## hr (mean (SD)) 86.90 (21.06) 85.58 (21.10) 88.43 (20.93)
## bil (mean (SD)) 2.30 (2.19) 2.20 (2.08) 2.42 (2.32)
## ast (mean (SD)) 83.87 (100.75) 85.41 (92.54) 82.05 (109.78)
## alt (mean (SD)) 41.79 (46.84) 43.36 (50.96) 39.93 (41.41)
## wbc (mean (SD)) 8676.10 (4747.62) 8348.30 (4401.79) 9068.42 (5108.48)
## hb (mean (SD)) 8.72 (2.49) 8.55 (2.46) 8.92 (2.51)
## plt (mean (SD)) 113.92 (66.44) 110.70 (57.80) 117.76 (75.39)
## tp (mean (SD)) 6.11 (0.92) 6.09 (0.93) 6.13 (0.92)
## alb (mean (SD)) 2.85 (0.61) 2.82 (0.59) 2.90 (0.64)
## eGFR (mean (SD)) 69.80 (31.04) 69.15 (30.82) 70.59 (31.31)
## bun (mean (SD)) 28.52 (17.92) 28.41 (18.64) 28.64 (17.04)
## cre (mean (SD)) 1.06 (0.90) 1.07 (0.92) 1.05 (0.89)
## crp (mean (SD)) 0.84 (1.76) 0.81 (1.67) 0.87 (1.86)
## pt (mean (SD)) 54.98 (17.67) 51.73 (16.99) 58.78 (17.72)
## aptt (mean (SD)) 34.08 (17.48) 32.43 (17.30) 36.06 (17.52)
## los (mean (SD)) 12.37 (14.69) 13.77 (15.28) 10.67 (13.77)
## meld (mean (SD)) 11.91 (6.07) 12.13 (6.16) 11.66 (5.97)
## sex = F (%) 246 ( 25.1) 131 ( 24.4) 115 ( 25.9)
## barthel (%)
## 0 313 ( 38.4) 142 ( 35.9) 171 ( 40.7)
## 1 249 ( 30.5) 118 ( 29.8) 131 ( 31.2)
## 2 254 ( 31.1) 136 ( 34.3) 118 ( 28.1)
## child_score (%)
## 0 149 ( 16.9) 66 ( 13.9) 83 ( 20.3)
## 1 437 ( 49.5) 231 ( 48.7) 206 ( 50.4)
## 2 297 ( 33.6) 177 ( 37.3) 120 ( 29.3)
## pad = 1 (%) 2 ( 0.2) 1 ( 0.2) 1 ( 0.2)
## stroke = 1 (%) 21 ( 2.1) 10 ( 1.9) 11 ( 2.5)
## dimentia = 1 (%) 10 ( 1.0) 4 ( 0.7) 6 ( 1.4)
## ch_lung = 1 (%) 15 ( 1.5) 11 ( 2.1) 4 ( 0.9)
## rheumati = 1 (%) 3 ( 0.3) 3 ( 0.6) 0 ( 0.0)
## pept_ulcer = 1 (%) 101 ( 10.3) 59 ( 11.0) 42 ( 9.5)
## dm = 1 (%) 210 ( 21.4) 106 ( 19.8) 104 ( 23.4)
## dm_compli = 1 (%) 13 ( 1.3) 6 ( 1.1) 7 ( 1.6)
## paralysis = 0 (%) 980 (100.0) 536 (100.0) 444 (100.0)
## malignancy = 1 (%) 115 ( 11.7) 58 ( 10.8) 57 ( 12.8)
## meta_tumor = 1 (%) 17 ( 1.7) 6 ( 1.1) 11 ( 2.5)
## aids = 0 (%) 980 (100.0) 536 (100.0) 444 (100.0)
## eGFR30 = 1 (%) 87 ( 9.1) 51 ( 9.8) 36 ( 8.2)
## hd = 1 (%) 13 ( 1.3) 3 ( 0.6) 10 ( 2.3)
## hcc = 1 (%) 175 ( 17.9) 111 ( 20.7) 64 ( 14.4)
## alcohol = 1 (%) 472 ( 48.2) 231 ( 43.1) 241 ( 54.3)
## past_rupture = 1 (%) 225 ( 23.0) 104 ( 19.4) 121 ( 27.3)
## antiplate = 1 (%) 7 ( 0.7) 4 ( 0.7) 3 ( 0.7)
## anticoag = 1 (%) 4 ( 0.4) 1 ( 0.2) 3 ( 0.7)
## antithro = 1 (%) 10 ( 1.0) 4 ( 0.7) 6 ( 1.4)
## nsaids = 1 (%) 9 ( 0.9) 5 ( 0.9) 4 ( 0.9)
## steroid = 1 (%) 5 ( 0.5) 2 ( 0.4) 3 ( 0.7)
## beta = 1 (%) 58 ( 5.9) 18 ( 3.4) 40 ( 9.0)
## vaso = 1 (%) 58 ( 5.9) 31 ( 5.8) 27 ( 6.1)
## ffp = 1 (%) 291 ( 29.7) 137 ( 25.6) 154 ( 34.7)
## pc = 1 (%) 17 ( 1.7) 6 ( 1.1) 11 ( 2.5)
## albner = 1 (%) 80 ( 8.2) 47 ( 8.8) 33 ( 7.4)
## shock = 1 (%) 409 ( 43.0) 215 ( 42.0) 194 ( 44.1)
## hosp_mortality = 1 (%) 118 ( 12.0) 73 ( 13.6) 45 ( 10.1)
## cohort = validation (%) 444 ( 45.3) 0 ( 0.0) 444 (100.0)
## Stratified by cohort
## p test SMD Missing
## n
## age (mean (SD)) 0.336 0.062 0.0
## bmi (mean (SD)) 0.060 0.126 11.7
## smoke (mean (SD)) 0.028 0.153 12.9
## child_num (mean (SD)) 0.007 0.186 13.2
## gcs (mean (SD)) 0.023 0.149 0.0
## cci_num (mean (SD)) 0.476 0.045 0.0
## map (mean (SD)) 0.273 0.071 0.0
## bt (mean (SD)) 0.001 0.224 3.8
## sBP (mean (SD)) 0.014 0.160 2.3
## dBP (mean (SD)) <0.001 0.304 2.3
## hr (mean (SD)) 0.038 0.135 2.9
## bil (mean (SD)) 0.135 0.097 3.8
## ast (mean (SD)) 0.608 0.033 2.6
## alt (mean (SD)) 0.261 0.074 2.6
## wbc (mean (SD)) 0.019 0.151 2.0
## hb (mean (SD)) 0.022 0.148 2.0
## plt (mean (SD)) 0.101 0.105 2.0
## tp (mean (SD)) 0.517 0.043 7.9
## alb (mean (SD)) 0.042 0.133 4.7
## eGFR (mean (SD)) 0.474 0.046 2.1
## bun (mean (SD)) 0.844 0.013 2.1
## cre (mean (SD)) 0.820 0.015 2.9
## crp (mean (SD)) 0.632 0.031 5.3
## pt (mean (SD)) <0.001 0.407 6.0
## aptt (mean (SD)) 0.002 0.208 11.4
## los (mean (SD)) 0.001 0.213 0.0
## meld (mean (SD)) 0.251 0.077 8.2
## sex = F (%) 0.652 0.034 0.0
## barthel (%) 0.140 0.139 16.7
## 0
## 1
## 2
## child_score (%) 0.008 0.210 9.9
## 0
## 1
## 2
## pad = 1 (%) 1.000 0.009 0.0
## stroke = 1 (%) 0.662 0.042 0.0
## dimentia = 1 (%) 0.536 0.059 0.0
## ch_lung = 1 (%) 0.230 0.096 0.0
## rheumati = 1 (%) 0.318 0.106 0.0
## pept_ulcer = 1 (%) 0.492 0.051 0.0
## dm = 1 (%) 0.191 0.089 0.0
## dm_compli = 1 (%) 0.732 0.040 0.0
## paralysis = 0 (%) NA <0.001 0.0
## malignancy = 1 (%) 0.381 0.062 0.0
## meta_tumor = 1 (%) 0.169 0.102 0.0
## aids = 0 (%) NA <0.001 0.0
## eGFR30 = 1 (%) 0.478 0.054 2.1
## hd = 1 (%) 0.043 0.144 0.0
## hcc = 1 (%) 0.013 0.166 0.0
## alcohol = 1 (%) 0.001 0.225 0.0
## past_rupture = 1 (%) 0.005 0.186 0.0
## antiplate = 1 (%) 1.000 0.008 0.0
## anticoag = 1 (%) 0.489 0.075 0.0
## antithro = 1 (%) 0.536 0.059 0.0
## nsaids = 1 (%) 1.000 0.003 0.0
## steroid = 1 (%) 0.833 0.042 0.0
## beta = 1 (%) <0.001 0.236 0.0
## vaso = 1 (%) 0.952 0.013 0.0
## ffp = 1 (%) 0.002 0.200 0.0
## pc = 1 (%) 0.169 0.102 0.0
## albner = 1 (%) 0.520 0.049 0.0
## shock = 1 (%) 0.558 0.042 2.9
## hosp_mortality = 1 (%) 0.116 0.108 0.0
## cohort = validation (%) <0.001 NaN 0.0
tbl_summary(data = df,
by = "cohort",
type = list(gcs ~ "continuous", year ~ "categorical"),
statistic = all_continuous() ~ "{median} ({p25}, {p75})",
digits = all_continuous() ~ c(2, 2))
| Characteristic | develop, N = 5361 | validation, N = 4441 |
|---|---|---|
| hosp_id | 1,011.00 (1,004.00, 1,024.00) | 1,022.00 (1,006.00, 1,024.00) |
| pt_id | 372.00 (177.75, 589.25) | 491.50 (244.75, 643.25) |
| hosp_num | ||
| 1 | 472 (88%) | 365 (82%) |
| 2 | 48 (9.0%) | 50 (11%) |
| 3 | 15 (2.8%) | 13 (2.9%) |
| 4 | 1 (0.2%) | 10 (2.3%) |
| 5 | 0 (0%) | 4 (0.9%) |
| 6 | 0 (0%) | 2 (0.5%) |
| year | ||
| 2010 | 76 (14%) | 0 (0%) |
| 2011 | 79 (15%) | 0 (0%) |
| 2012 | 88 (16%) | 0 (0%) |
| 2013 | 70 (13%) | 0 (0%) |
| 2014 | 77 (14%) | 0 (0%) |
| 2015 | 72 (13%) | 0 (0%) |
| 2016 | 74 (14%) | 0 (0%) |
| 2017 | 0 (0%) | 64 (14%) |
| 2018 | 0 (0%) | 62 (14%) |
| 2019 | 0 (0%) | 72 (16%) |
| 2020 | 0 (0%) | 82 (18%) |
| 2021 | 0 (0%) | 83 (19%) |
| 2022 | 0 (0%) | 81 (18%) |
| age | 62.00 (51.75, 70.00) | 60.00 (50.00, 70.00) |
| sex | ||
| M | 405 (76%) | 329 (74%) |
| F | 131 (24%) | 115 (26%) |
| bmi | 22.60 (20.32, 24.91) | 23.07 (20.59, 26.23) |
| Unknown | 85 | 30 |
| smoke | 20.00 (0.00, 440.00) | 0.00 (0.00, 360.00) |
| Unknown | 84 | 42 |
| barthel | ||
| 0 | 142 (36%) | 171 (41%) |
| 1 | 118 (30%) | 131 (31%) |
| 2 | 136 (34%) | 118 (28%) |
| Unknown | 140 | 24 |
| child_num | 8.00 (7.00, 10.00) | 8.00 (7.00, 10.00) |
| Unknown | 81 | 48 |
| child_score | ||
| 0 | 66 (14%) | 83 (20%) |
| 1 | 231 (49%) | 206 (50%) |
| 2 | 177 (37%) | 120 (29%) |
| Unknown | 62 | 35 |
| gcs | 15.00 (15.00, 15.00) | 15.00 (15.00, 15.00) |
| cci_num | 4.00 (4.00, 5.00) | 4.00 (4.00, 5.00) |
| pad | ||
| 0 | 535 (100%) | 443 (100%) |
| 1 | 1 (0.2%) | 1 (0.2%) |
| stroke | ||
| 0 | 526 (98%) | 433 (98%) |
| 1 | 10 (1.9%) | 11 (2.5%) |
| dimentia | ||
| 0 | 532 (99%) | 438 (99%) |
| 1 | 4 (0.7%) | 6 (1.4%) |
| ch_lung | ||
| 0 | 525 (98%) | 440 (99%) |
| 1 | 11 (2.1%) | 4 (0.9%) |
| rheumati | ||
| 0 | 533 (99%) | 444 (100%) |
| 1 | 3 (0.6%) | 0 (0%) |
| pept_ulcer | ||
| 0 | 477 (89%) | 402 (91%) |
| 1 | 59 (11%) | 42 (9.5%) |
| dm | ||
| 0 | 430 (80%) | 340 (77%) |
| 1 | 106 (20%) | 104 (23%) |
| dm_compli | ||
| 0 | 530 (99%) | 437 (98%) |
| 1 | 6 (1.1%) | 7 (1.6%) |
| paralysis | ||
| 0 | 536 (100%) | 444 (100%) |
| malignancy | ||
| 0 | 478 (89%) | 387 (87%) |
| 1 | 58 (11%) | 57 (13%) |
| meta_tumor | ||
| 0 | 530 (99%) | 433 (98%) |
| 1 | 6 (1.1%) | 11 (2.5%) |
| aids | ||
| 0 | 536 (100%) | 444 (100%) |
| eGFR30 | ||
| 0 | 471 (90%) | 401 (92%) |
| 1 | 51 (9.8%) | 36 (8.2%) |
| Unknown | 14 | 7 |
| hd | ||
| 0 | 533 (99%) | 434 (98%) |
| 1 | 3 (0.6%) | 10 (2.3%) |
| hcc | ||
| 0 | 425 (79%) | 380 (86%) |
| 1 | 111 (21%) | 64 (14%) |
| alcohol | ||
| 0 | 305 (57%) | 203 (46%) |
| 1 | 231 (43%) | 241 (54%) |
| past_rupture | ||
| 0 | 432 (81%) | 323 (73%) |
| 1 | 104 (19%) | 121 (27%) |
| antiplate | ||
| 0 | 532 (99%) | 441 (99%) |
| 1 | 4 (0.7%) | 3 (0.7%) |
| anticoag | ||
| 0 | 535 (100%) | 441 (99%) |
| 1 | 1 (0.2%) | 3 (0.7%) |
| antithro | ||
| 0 | 532 (99%) | 438 (99%) |
| 1 | 4 (0.7%) | 6 (1.4%) |
| nsaids | ||
| 0 | 531 (99%) | 440 (99%) |
| 1 | 5 (0.9%) | 4 (0.9%) |
| steroid | ||
| 0 | 534 (100%) | 441 (99%) |
| 1 | 2 (0.4%) | 3 (0.7%) |
| beta | ||
| 0 | 518 (97%) | 404 (91%) |
| 1 | 18 (3.4%) | 40 (9.0%) |
| vaso | ||
| 0 | 505 (94%) | 417 (94%) |
| 1 | 31 (5.8%) | 27 (6.1%) |
| map | 4.00 (0.00, 4.00) | 4.00 (0.00, 4.00) |
| ffp | ||
| 0 | 399 (74%) | 290 (65%) |
| 1 | 137 (26%) | 154 (35%) |
| pc | ||
| 0 | 530 (99%) | 433 (98%) |
| 1 | 6 (1.1%) | 11 (2.5%) |
| albner | ||
| 0 | 489 (91%) | 411 (93%) |
| 1 | 47 (8.8%) | 33 (7.4%) |
| bt | 36.70 (36.30, 37.00) | 36.80 (36.50, 37.10) |
| Unknown | 32 | 5 |
| sBP | 89.00 (79.00, 96.00) | 91.00 (80.00, 100.00) |
| Unknown | 22 | 1 |
| dBP | 51.00 (44.00, 58.00) | 54.00 (46.00, 63.00) |
| Unknown | 22 | 1 |
| hr | 82.00 (70.00, 98.00) | 85.00 (73.00, 101.00) |
| Unknown | 24 | 4 |
| shock | ||
| 0 | 297 (58%) | 246 (56%) |
| 1 | 215 (42%) | 194 (44%) |
| Unknown | 24 | 4 |
| bil | 1.46 (0.90, 2.70) | 1.65 (1.00, 3.00) |
| Unknown | 28 | 9 |
| ast | 56.00 (33.00, 96.00) | 49.00 (32.00, 89.00) |
| Unknown | 18 | 7 |
| alt | 29.00 (20.00, 47.00) | 29.00 (19.00, 44.00) |
| Unknown | 18 | 7 |
| wbc | 7,330.00 (5,315.00, 10,395.00) | 8,300.00 (5,800.00, 11,100.00) |
| Unknown | 13 | 7 |
| hb | 8.30 (6.90, 10.00) | 8.80 (7.20, 10.50) |
| Unknown | 13 | 7 |
| plt | 97.00 (72.00, 135.00) | 103.00 (75.00, 141.00) |
| Unknown | 13 | 7 |
| tp | 6.10 (5.50, 6.70) | 6.10 (5.50, 6.70) |
| Unknown | 45 | 32 |
| alb | 2.80 (2.50, 3.20) | 2.90 (2.50, 3.30) |
| Unknown | 34 | 12 |
| eGFR | 66.63 (48.25, 88.92) | 67.61 (49.43, 89.32) |
| Unknown | 14 | 7 |
| bun | 23.00 (15.85, 36.25) | 24.90 (16.60, 36.50) |
| Unknown | 14 | 7 |
| cre | 0.86 (0.67, 1.15) | 0.83 (0.65, 1.11) |
| Unknown | 18 | 10 |
| crp | 0.29 (0.11, 0.84) | 0.27 (0.12, 0.77) |
| Unknown | 37 | 15 |
| pt | 51.00 (40.00, 63.00) | 59.00 (46.00, 71.73) |
| Unknown | 39 | 20 |
| aptt | 28.70 (25.90, 33.40) | 32.40 (29.65, 37.35) |
| Unknown | 63 | 49 |
| meld | 11.00 (7.00, 15.00) | 10.00 (6.00, 15.00) |
| Unknown | 54 | 26 |
| hosp_mortality | 73 (14%) | 45 (10%) |
| los | 10.00 (6.00, 18.00) | 7.00 (5.00, 13.00) |
| 1 Median (IQR); n (%) | ||
#df |> #全体
# select(col_continuous) |>
# pivot_longer(cols = col_continuous, names_to = "name", values_to = "value") |>
# ggplot()+
# geom_histogram(aes(x = value), color = "black")+
# facet_wrap(~ name, scales = "free", ncol = 5) +
# theme_bw()+
# theme(text = element_text(size = 12))
col_cont = c("age", "bmi","smoke","child_num","gcs","cci_num","map","bt","sBP","dBP","hr","bil","ast","alt","wbc","hb","plt","tp","alb","eGFR","bun","cre","crp","pt","aptt","los","meld")
col_fact = c("sex","barthel","child_score","pad","stroke","dimentia","ch_lung","rheumati","pept_ulcer","dm","dm_compli","paralysis","malignancy","meta_tumor","aids","eGFR30","hd","hcc","alcohol","past_rupture","antiplate","anticoag","antithro","nsaids","steroid","beta", "vaso","ffp","pc", "albner","shock","hosp_mortality")
# Create your table
df_dev %>%
select(c(col_cont, col_fact)) %>%
CreateTableOne(vars = c(col_cont, col_fact), strata="hosp_mortality",factorVars = col_fact, addOverall = T) -> tableone_dev
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
## # Was:
## data %>% select(col_cont)
##
## # Now:
## data %>% select(all_of(col_cont))
##
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
## # Was:
## data %>% select(col_fact)
##
## # Now:
## data %>% select(all_of(col_fact))
##
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# Print your table
print(tableone_dev, smd = TRUE, missing = TRUE, test = TRUE, explain = TRUE)
## Stratified by hosp_mortality
## Overall 0 1
## n 536 463 73
## age (mean (SD)) 61.23 (13.06) 60.82 (13.02) 63.84 (13.09)
## bmi (mean (SD)) 22.91 (3.73) 23.02 (3.76) 22.17 (3.41)
## smoke (mean (SD)) 299.62 (571.82) 283.65 (394.78) 406.02 (1214.88)
## child_num (mean (SD)) 8.77 (2.13) 8.47 (1.95) 10.91 (2.13)
## gcs (mean (SD)) 14.33 (2.20) 14.72 (1.27) 11.90 (4.33)
## cci_num (mean (SD)) 4.44 (1.20) 4.43 (1.08) 4.53 (1.78)
## map (mean (SD)) 3.49 (4.51) 3.10 (4.35) 5.96 (4.71)
## bt (mean (SD)) 36.65 (0.76) 36.68 (0.67) 36.48 (1.24)
## sBP (mean (SD)) 87.35 (15.48) 90.12 (13.62) 69.74 (15.05)
## dBP (mean (SD)) 51.00 (11.90) 52.83 (11.00) 39.39 (10.83)
## hr (mean (SD)) 85.58 (21.10) 83.17 (19.18) 100.84 (25.93)
## bil (mean (SD)) 2.20 (2.08) 1.97 (1.80) 3.69 (2.92)
## ast (mean (SD)) 85.41 (92.54) 79.65 (88.38) 121.08 (109.08)
## alt (mean (SD)) 43.36 (50.96) 41.65 (50.23) 53.94 (54.43)
## wbc (mean (SD)) 8348.30 (4401.79) 8193.93 (4211.45) 9299.86 (5366.73)
## hb (mean (SD)) 8.55 (2.46) 8.62 (2.47) 8.11 (2.38)
## plt (mean (SD)) 110.70 (57.80) 109.10 (52.04) 120.60 (84.92)
## tp (mean (SD)) 6.09 (0.93) 6.15 (0.86) 5.75 (1.21)
## alb (mean (SD)) 2.82 (0.59) 2.90 (0.54) 2.30 (0.62)
## eGFR (mean (SD)) 69.15 (30.82) 72.85 (30.55) 46.36 (21.32)
## bun (mean (SD)) 28.41 (18.64) 27.53 (17.80) 33.94 (22.59)
## cre (mean (SD)) 1.07 (0.92) 1.00 (0.92) 1.46 (0.82)
## crp (mean (SD)) 0.81 (1.67) 0.71 (1.59) 1.46 (1.96)
## pt (mean (SD)) 51.73 (16.99) 53.40 (16.04) 41.66 (19.06)
## aptt (mean (SD)) 32.43 (17.30) 30.76 (12.33) 42.18 (32.59)
## los (mean (SD)) 13.77 (15.28) 14.21 (14.24) 11.01 (20.59)
## meld (mean (SD)) 12.13 (6.16) 11.04 (4.91) 18.84 (8.52)
## sex = F (%) 131 ( 24.4) 111 ( 24.0) 20 ( 27.4)
## barthel (%)
## 0 142 ( 35.9) 136 ( 39.8) 6 ( 11.1)
## 1 118 ( 29.8) 108 ( 31.6) 10 ( 18.5)
## 2 136 ( 34.3) 98 ( 28.7) 38 ( 70.4)
## child_score (%)
## 0 66 ( 13.9) 65 ( 15.9) 1 ( 1.5)
## 1 231 ( 48.7) 217 ( 53.1) 14 ( 21.5)
## 2 177 ( 37.3) 127 ( 31.1) 50 ( 76.9)
## pad = 1 (%) 1 ( 0.2) 1 ( 0.2) 0 ( 0.0)
## stroke = 1 (%) 10 ( 1.9) 10 ( 2.2) 0 ( 0.0)
## dimentia = 1 (%) 4 ( 0.7) 4 ( 0.9) 0 ( 0.0)
## ch_lung = 1 (%) 11 ( 2.1) 9 ( 1.9) 2 ( 2.7)
## rheumati = 1 (%) 3 ( 0.6) 3 ( 0.6) 0 ( 0.0)
## pept_ulcer = 1 (%) 59 ( 11.0) 58 ( 12.5) 1 ( 1.4)
## dm = 1 (%) 106 ( 19.8) 98 ( 21.2) 8 ( 11.0)
## dm_compli = 1 (%) 6 ( 1.1) 6 ( 1.3) 0 ( 0.0)
## paralysis = 0 (%) 536 (100.0) 463 (100.0) 73 (100.0)
## malignancy = 1 (%) 58 ( 10.8) 46 ( 9.9) 12 ( 16.4)
## meta_tumor = 1 (%) 6 ( 1.1) 2 ( 0.4) 4 ( 5.5)
## aids = 0 (%) 536 (100.0) 463 (100.0) 73 (100.0)
## eGFR30 = 1 (%) 51 ( 9.8) 34 ( 7.6) 17 ( 23.3)
## hd = 1 (%) 3 ( 0.6) 3 ( 0.6) 0 ( 0.0)
## hcc = 1 (%) 111 ( 20.7) 95 ( 20.5) 16 ( 21.9)
## alcohol = 1 (%) 231 ( 43.1) 205 ( 44.3) 26 ( 35.6)
## past_rupture = 1 (%) 104 ( 19.4) 96 ( 20.7) 8 ( 11.0)
## antiplate = 1 (%) 4 ( 0.7) 3 ( 0.6) 1 ( 1.4)
## anticoag = 1 (%) 1 ( 0.2) 1 ( 0.2) 0 ( 0.0)
## antithro = 1 (%) 4 ( 0.7) 3 ( 0.6) 1 ( 1.4)
## nsaids = 1 (%) 5 ( 0.9) 5 ( 1.1) 0 ( 0.0)
## steroid = 1 (%) 2 ( 0.4) 1 ( 0.2) 1 ( 1.4)
## beta = 1 (%) 18 ( 3.4) 18 ( 3.9) 0 ( 0.0)
## vaso = 1 (%) 31 ( 5.8) 11 ( 2.4) 20 ( 27.4)
## ffp = 1 (%) 137 ( 25.6) 100 ( 21.6) 37 ( 50.7)
## pc = 1 (%) 6 ( 1.1) 3 ( 0.6) 3 ( 4.1)
## albner = 1 (%) 47 ( 8.8) 32 ( 6.9) 15 ( 20.5)
## shock = 1 (%) 215 ( 42.0) 155 ( 35.1) 60 ( 85.7)
## hosp_mortality = 1 (%) 73 ( 13.6) 0 ( 0.0) 73 (100.0)
## Stratified by hosp_mortality
## p test SMD Missing
## n
## age (mean (SD)) 0.066 0.231 0.0
## bmi (mean (SD)) 0.108 0.236 15.9
## smoke (mean (SD)) 0.125 0.135 15.7
## child_num (mean (SD)) <0.001 1.191 15.1
## gcs (mean (SD)) <0.001 0.882 0.0
## cci_num (mean (SD)) 0.490 0.071 0.0
## map (mean (SD)) <0.001 0.630 0.0
## bt (mean (SD)) 0.056 0.197 6.0
## sBP (mean (SD)) <0.001 1.420 4.1
## dBP (mean (SD)) <0.001 1.231 4.1
## hr (mean (SD)) <0.001 0.775 4.5
## bil (mean (SD)) <0.001 0.708 5.2
## ast (mean (SD)) <0.001 0.417 3.4
## alt (mean (SD)) 0.057 0.235 3.4
## wbc (mean (SD)) 0.046 0.229 2.4
## hb (mean (SD)) 0.099 0.211 2.4
## plt (mean (SD)) 0.115 0.163 2.4
## tp (mean (SD)) 0.001 0.381 8.4
## alb (mean (SD)) <0.001 1.019 6.3
## eGFR (mean (SD)) <0.001 1.006 2.6
## bun (mean (SD)) 0.007 0.316 2.6
## cre (mean (SD)) <0.001 0.533 3.4
## crp (mean (SD)) <0.001 0.421 6.9
## pt (mean (SD)) <0.001 0.667 7.3
## aptt (mean (SD)) <0.001 0.463 11.8
## los (mean (SD)) 0.097 0.180 0.0
## meld (mean (SD)) <0.001 1.121 10.1
## sex = F (%) 0.627 0.078 0.0
## barthel (%) <0.001 0.955 26.1
## 0
## 1
## 2
## child_score (%) <0.001 1.072 11.6
## 0
## 1
## 2
## pad = 1 (%) 1.000 0.066 0.0
## stroke = 1 (%) 0.422 0.210 0.0
## dimentia = 1 (%) 0.948 0.132 0.0
## ch_lung = 1 (%) 0.999 0.053 0.0
## rheumati = 1 (%) 1.000 0.114 0.0
## pept_ulcer = 1 (%) 0.009 0.450 0.0
## dm = 1 (%) 0.061 0.281 0.0
## dm_compli = 1 (%) 0.704 0.162 0.0
## paralysis = 0 (%) NA <0.001 0.0
## malignancy = 1 (%) 0.144 0.193 0.0
## meta_tumor = 1 (%) 0.001 0.301 0.0
## aids = 0 (%) NA <0.001 0.0
## eGFR30 = 1 (%) <0.001 0.446 2.6
## hd = 1 (%) 1.000 0.114 0.0
## hcc = 1 (%) 0.905 0.034 0.0
## alcohol = 1 (%) 0.207 0.178 0.0
## past_rupture = 1 (%) 0.071 0.270 0.0
## antiplate = 1 (%) 1.000 0.072 0.0
## anticoag = 1 (%) 1.000 0.066 0.0
## antithro = 1 (%) 1.000 0.072 0.0
## nsaids = 1 (%) 0.813 0.148 0.0
## steroid = 1 (%) 0.638 0.130 0.0
## beta = 1 (%) 0.173 0.284 0.0
## vaso = 1 (%) <0.001 0.751 0.0
## ffp = 1 (%) <0.001 0.635 0.0
## pc = 1 (%) 0.044 0.229 0.0
## albner = 1 (%) <0.001 0.404 0.0
## shock = 1 (%) <0.001 1.210 4.5
## hosp_mortality = 1 (%) <0.001 NaN 0.0
# specify your data and variables
tbl_summary(data = df_dev,
by = "hosp_mortality",
type = list(gcs ~ "continuous", year ~ "categorical"),
statistic = all_continuous() ~ "{median} ({p25}, {p75})",
digits = all_continuous() ~ c(2, 2))
| Characteristic | 0, N = 4631 | 1, N = 731 |
|---|---|---|
| hosp_id | 1,010.00 (1,003.00, 1,024.00) | 1,017.00 (1,005.00, 1,024.00) |
| pt_id | 359.00 (149.50, 583.00) | 426.00 (235.00, 599.00) |
| hosp_num | ||
| 1 | 404 (87%) | 68 (93%) |
| 2 | 43 (9.3%) | 5 (6.8%) |
| 3 | 15 (3.2%) | 0 (0%) |
| 4 | 1 (0.2%) | 0 (0%) |
| year | ||
| 2010 | 68 (15%) | 8 (11%) |
| 2011 | 70 (15%) | 9 (12%) |
| 2012 | 72 (16%) | 16 (22%) |
| 2013 | 55 (12%) | 15 (21%) |
| 2014 | 69 (15%) | 8 (11%) |
| 2015 | 67 (14%) | 5 (6.8%) |
| 2016 | 62 (13%) | 12 (16%) |
| age | 62.00 (51.00, 70.00) | 66.00 (55.00, 74.00) |
| sex | ||
| M | 352 (76%) | 53 (73%) |
| F | 111 (24%) | 20 (27%) |
| bmi | 22.63 (20.39, 25.08) | 22.49 (20.15, 23.71) |
| Unknown | 69 | 16 |
| smoke | 60.00 (0.00, 440.00) | 0.00 (0.00, 400.00) |
| Unknown | 70 | 14 |
| barthel | ||
| 0 | 136 (40%) | 6 (11%) |
| 1 | 108 (32%) | 10 (19%) |
| 2 | 98 (29%) | 38 (70%) |
| Unknown | 121 | 19 |
| child_num | 8.00 (7.00, 10.00) | 11.00 (9.00, 12.00) |
| Unknown | 64 | 17 |
| child_score | ||
| 0 | 65 (16%) | 1 (1.5%) |
| 1 | 217 (53%) | 14 (22%) |
| 2 | 127 (31%) | 50 (77%) |
| Unknown | 54 | 8 |
| gcs | 15.00 (15.00, 15.00) | 15.00 (9.00, 15.00) |
| cci_num | 4.00 (4.00, 5.00) | 4.00 (4.00, 5.00) |
| pad | ||
| 0 | 462 (100%) | 73 (100%) |
| 1 | 1 (0.2%) | 0 (0%) |
| stroke | ||
| 0 | 453 (98%) | 73 (100%) |
| 1 | 10 (2.2%) | 0 (0%) |
| dimentia | ||
| 0 | 459 (99%) | 73 (100%) |
| 1 | 4 (0.9%) | 0 (0%) |
| ch_lung | ||
| 0 | 454 (98%) | 71 (97%) |
| 1 | 9 (1.9%) | 2 (2.7%) |
| rheumati | ||
| 0 | 460 (99%) | 73 (100%) |
| 1 | 3 (0.6%) | 0 (0%) |
| pept_ulcer | ||
| 0 | 405 (87%) | 72 (99%) |
| 1 | 58 (13%) | 1 (1.4%) |
| dm | ||
| 0 | 365 (79%) | 65 (89%) |
| 1 | 98 (21%) | 8 (11%) |
| dm_compli | ||
| 0 | 457 (99%) | 73 (100%) |
| 1 | 6 (1.3%) | 0 (0%) |
| paralysis | ||
| 0 | 463 (100%) | 73 (100%) |
| malignancy | ||
| 0 | 417 (90%) | 61 (84%) |
| 1 | 46 (9.9%) | 12 (16%) |
| meta_tumor | ||
| 0 | 461 (100%) | 69 (95%) |
| 1 | 2 (0.4%) | 4 (5.5%) |
| aids | ||
| 0 | 463 (100%) | 73 (100%) |
| eGFR30 | ||
| 0 | 415 (92%) | 56 (77%) |
| 1 | 34 (7.6%) | 17 (23%) |
| Unknown | 14 | 0 |
| hd | ||
| 0 | 460 (99%) | 73 (100%) |
| 1 | 3 (0.6%) | 0 (0%) |
| hcc | ||
| 0 | 368 (79%) | 57 (78%) |
| 1 | 95 (21%) | 16 (22%) |
| alcohol | ||
| 0 | 258 (56%) | 47 (64%) |
| 1 | 205 (44%) | 26 (36%) |
| past_rupture | ||
| 0 | 367 (79%) | 65 (89%) |
| 1 | 96 (21%) | 8 (11%) |
| antiplate | ||
| 0 | 460 (99%) | 72 (99%) |
| 1 | 3 (0.6%) | 1 (1.4%) |
| anticoag | ||
| 0 | 462 (100%) | 73 (100%) |
| 1 | 1 (0.2%) | 0 (0%) |
| antithro | ||
| 0 | 460 (99%) | 72 (99%) |
| 1 | 3 (0.6%) | 1 (1.4%) |
| nsaids | ||
| 0 | 458 (99%) | 73 (100%) |
| 1 | 5 (1.1%) | 0 (0%) |
| steroid | ||
| 0 | 462 (100%) | 72 (99%) |
| 1 | 1 (0.2%) | 1 (1.4%) |
| beta | ||
| 0 | 445 (96%) | 73 (100%) |
| 1 | 18 (3.9%) | 0 (0%) |
| vaso | ||
| 0 | 452 (98%) | 53 (73%) |
| 1 | 11 (2.4%) | 20 (27%) |
| map | 4.00 (0.00, 4.00) | 6.00 (4.00, 8.00) |
| ffp | ||
| 0 | 363 (78%) | 36 (49%) |
| 1 | 100 (22%) | 37 (51%) |
| pc | ||
| 0 | 460 (99%) | 70 (96%) |
| 1 | 3 (0.6%) | 3 (4.1%) |
| albner | ||
| 0 | 431 (93%) | 58 (79%) |
| 1 | 32 (6.9%) | 15 (21%) |
| bt | 36.70 (36.30, 37.00) | 36.40 (36.00, 36.95) |
| Unknown | 22 | 10 |
| sBP | 90.00 (82.00, 98.00) | 64.00 (59.00, 78.75) |
| Unknown | 19 | 3 |
| dBP | 52.00 (46.00, 60.00) | 39.00 (33.00, 46.00) |
| Unknown | 19 | 3 |
| hr | 80.00 (70.00, 94.00) | 101.00 (89.00, 112.00) |
| Unknown | 21 | 3 |
| shock | ||
| 0 | 287 (65%) | 10 (14%) |
| 1 | 155 (35%) | 60 (86%) |
| Unknown | 21 | 3 |
| bil | 1.40 (0.90, 2.40) | 2.99 (1.53, 4.60) |
| Unknown | 24 | 4 |
| ast | 53.00 (32.00, 91.75) | 82.50 (48.75, 153.00) |
| Unknown | 17 | 1 |
| alt | 28.00 (20.00, 44.00) | 33.00 (25.00, 63.25) |
| Unknown | 17 | 1 |
| wbc | 7,215.00 (5,300.00, 10,300.00) | 7,990.00 (6,200.00, 10,800.00) |
| Unknown | 13 | 0 |
| hb | 8.40 (7.00, 10.10) | 7.90 (6.60, 9.80) |
| Unknown | 13 | 0 |
| plt | 97.00 (72.00, 135.00) | 98.00 (76.00, 130.00) |
| Unknown | 13 | 0 |
| tp | 6.20 (5.50, 6.70) | 5.90 (5.10, 6.40) |
| Unknown | 37 | 8 |
| alb | 2.90 (2.58, 3.20) | 2.40 (1.80, 2.70) |
| Unknown | 31 | 3 |
| eGFR | 71.30 (52.10, 91.98) | 44.28 (30.40, 58.10) |
| Unknown | 14 | 0 |
| bun | 22.60 (15.25, 35.15) | 27.55 (17.98, 44.93) |
| Unknown | 13 | 1 |
| cre | 0.81 (0.65, 1.08) | 1.18 (0.99, 1.72) |
| Unknown | 18 | 0 |
| crp | 0.25 (0.10, 0.68) | 0.84 (0.37, 1.87) |
| Unknown | 33 | 4 |
| pt | 53.00 (42.00, 63.98) | 39.70 (26.90, 55.80) |
| Unknown | 37 | 2 |
| aptt | 28.30 (25.40, 32.52) | 31.70 (29.10, 43.80) |
| Unknown | 59 | 4 |
| meld | 10.00 (7.00, 14.00) | 16.00 (13.00, 24.50) |
| Unknown | 48 | 6 |
| los | 10.00 (7.00, 18.00) | 4.00 (1.00, 16.00) |
| cohort | ||
| develop | 463 (100%) | 73 (100%) |
| validation | 0 (0%) | 0 (0%) |
| 1 Median (IQR); n (%) | ||
#df_dev |> #全体
# select(col_continuous) |>
# pivot_longer(cols = col_continuous, names_to = "name", values_to = "value") |>
# ggplot()+
# geom_histogram(aes(x = value), color = "black")+
# facet_wrap(~ name, scales = "free", ncol = 5) +
# theme_bw()+
# theme(text = element_text(size = 12))
# Create your table
df_val %>%
select(c(col_cont, col_fact)) %>%
CreateTableOne(vars = c(col_cont, col_fact), strata="hosp_mortality",factorVars = col_fact, addOverall = T) -> tableone_val
# Print your table
print(tableone_val, smd = TRUE, missing = TRUE, test = TRUE, explain = TRUE)
## Stratified by hosp_mortality
## Overall 0 1
## n 444 399 45
## age (mean (SD)) 60.42 (12.96) 60.18 (13.03) 62.62 (12.19)
## bmi (mean (SD)) 24.38 (16.12) 24.46 (16.74) 23.43 (4.22)
## smoke (mean (SD)) 226.73 (356.99) 233.57 (364.62) 157.22 (261.19)
## child_num (mean (SD)) 8.38 (2.09) 8.13 (1.93) 10.52 (2.14)
## gcs (mean (SD)) 14.61 (1.34) 14.66 (1.24) 14.09 (1.94)
## cci_num (mean (SD)) 4.50 (1.45) 4.46 (1.36) 4.91 (2.11)
## map (mean (SD)) 3.21 (3.18) 2.88 (2.76) 6.13 (4.81)
## bt (mean (SD)) 36.81 (0.65) 36.81 (0.54) 36.84 (1.30)
## sBP (mean (SD)) 89.95 (17.16) 92.40 (15.70) 68.33 (14.21)
## dBP (mean (SD)) 54.84 (13.33) 56.33 (12.84) 41.69 (10.03)
## hr (mean (SD)) 88.43 (20.93) 86.76 (20.28) 103.84 (20.79)
## bil (mean (SD)) 2.42 (2.32) 2.18 (1.86) 4.54 (4.21)
## ast (mean (SD)) 82.05 (109.78) 72.39 (79.19) 168.27 (237.83)
## alt (mean (SD)) 39.93 (41.41) 36.47 (27.91) 70.91 (95.88)
## wbc (mean (SD)) 9068.42 (5108.48) 8834.15 (5148.20) 11160.91 (4243.62)
## hb (mean (SD)) 8.92 (2.51) 9.00 (2.55) 8.18 (1.96)
## plt (mean (SD)) 117.76 (75.39) 118.45 (77.27) 111.57 (56.24)
## tp (mean (SD)) 6.13 (0.92) 6.19 (0.90) 5.64 (1.03)
## alb (mean (SD)) 2.90 (0.64) 2.97 (0.61) 2.29 (0.54)
## eGFR (mean (SD)) 70.59 (31.31) 73.60 (30.92) 43.71 (20.02)
## bun (mean (SD)) 28.64 (17.04) 27.83 (16.43) 35.88 (20.60)
## cre (mean (SD)) 1.05 (0.89) 1.00 (0.87) 1.54 (0.88)
## crp (mean (SD)) 0.87 (1.86) 0.74 (1.64) 2.03 (3.01)
## pt (mean (SD)) 58.78 (17.72) 60.61 (16.94) 43.01 (16.65)
## aptt (mean (SD)) 36.06 (17.52) 34.27 (13.26) 51.43 (34.63)
## los (mean (SD)) 10.67 (13.77) 10.72 (13.83) 10.18 (13.38)
## meld (mean (SD)) 11.66 (5.97) 10.71 (4.98) 19.75 (7.47)
## sex = F (%) 115 ( 25.9) 103 ( 25.8) 12 ( 26.7)
## barthel (%)
## 0 171 ( 40.7) 167 ( 44.3) 4 ( 9.3)
## 1 131 ( 31.2) 120 ( 31.8) 11 ( 25.6)
## 2 118 ( 28.1) 90 ( 23.9) 28 ( 65.1)
## child_score (%)
## 0 83 ( 20.3) 81 ( 22.3) 2 ( 4.4)
## 1 206 ( 50.4) 193 ( 53.0) 13 ( 28.9)
## 2 120 ( 29.3) 90 ( 24.7) 30 ( 66.7)
## pad = 1 (%) 1 ( 0.2) 1 ( 0.3) 0 ( 0.0)
## stroke = 1 (%) 11 ( 2.5) 8 ( 2.0) 3 ( 6.7)
## dimentia = 1 (%) 6 ( 1.4) 5 ( 1.3) 1 ( 2.2)
## ch_lung = 1 (%) 4 ( 0.9) 3 ( 0.8) 1 ( 2.2)
## rheumati = 0 (%) 444 (100.0) 399 (100.0) 45 (100.0)
## pept_ulcer = 1 (%) 42 ( 9.5) 40 ( 10.0) 2 ( 4.4)
## dm = 1 (%) 104 ( 23.4) 96 ( 24.1) 8 ( 17.8)
## dm_compli = 1 (%) 7 ( 1.6) 6 ( 1.5) 1 ( 2.2)
## paralysis = 0 (%) 444 (100.0) 399 (100.0) 45 (100.0)
## malignancy = 1 (%) 57 ( 12.8) 50 ( 12.5) 7 ( 15.6)
## meta_tumor = 1 (%) 11 ( 2.5) 7 ( 1.8) 4 ( 8.9)
## aids = 0 (%) 444 (100.0) 399 (100.0) 45 (100.0)
## eGFR30 = 1 (%) 36 ( 8.2) 22 ( 5.6) 14 ( 31.8)
## hd = 1 (%) 10 ( 2.3) 7 ( 1.8) 3 ( 6.7)
## hcc = 1 (%) 64 ( 14.4) 56 ( 14.0) 8 ( 17.8)
## alcohol = 1 (%) 241 ( 54.3) 212 ( 53.1) 29 ( 64.4)
## past_rupture = 1 (%) 121 ( 27.3) 115 ( 28.8) 6 ( 13.3)
## antiplate = 1 (%) 3 ( 0.7) 2 ( 0.5) 1 ( 2.2)
## anticoag = 1 (%) 3 ( 0.7) 3 ( 0.8) 0 ( 0.0)
## antithro = 1 (%) 6 ( 1.4) 5 ( 1.3) 1 ( 2.2)
## nsaids = 1 (%) 4 ( 0.9) 4 ( 1.0) 0 ( 0.0)
## steroid = 1 (%) 3 ( 0.7) 3 ( 0.8) 0 ( 0.0)
## beta = 1 (%) 40 ( 9.0) 40 ( 10.0) 0 ( 0.0)
## vaso = 1 (%) 27 ( 6.1) 18 ( 4.5) 9 ( 20.0)
## ffp = 1 (%) 154 ( 34.7) 126 ( 31.6) 28 ( 62.2)
## pc = 1 (%) 11 ( 2.5) 6 ( 1.5) 5 ( 11.1)
## albner = 1 (%) 33 ( 7.4) 26 ( 6.5) 7 ( 15.6)
## shock = 1 (%) 194 ( 44.1) 153 ( 38.5) 41 ( 95.3)
## hosp_mortality = 1 (%) 45 ( 10.1) 0 ( 0.0) 45 (100.0)
## Stratified by hosp_mortality
## p test SMD Missing
## n
## age (mean (SD)) 0.230 0.194 0.0
## bmi (mean (SD)) 0.730 0.084 6.8
## smoke (mean (SD)) 0.221 0.241 9.5
## child_num (mean (SD)) <0.001 1.174 10.8
## gcs (mean (SD)) 0.006 0.353 0.0
## cci_num (mean (SD)) 0.048 0.255 0.0
## map (mean (SD)) <0.001 0.828 0.0
## bt (mean (SD)) 0.785 0.029 1.1
## sBP (mean (SD)) <0.001 1.607 0.2
## dBP (mean (SD)) <0.001 1.271 0.2
## hr (mean (SD)) <0.001 0.832 0.9
## bil (mean (SD)) <0.001 0.724 2.0
## ast (mean (SD)) <0.001 0.541 1.6
## alt (mean (SD)) <0.001 0.488 1.6
## wbc (mean (SD)) 0.004 0.493 1.6
## hb (mean (SD)) 0.040 0.361 1.6
## plt (mean (SD)) 0.566 0.102 1.6
## tp (mean (SD)) <0.001 0.566 7.2
## alb (mean (SD)) <0.001 1.177 2.7
## eGFR (mean (SD)) <0.001 1.148 1.6
## bun (mean (SD)) 0.003 0.432 1.6
## cre (mean (SD)) <0.001 0.623 2.3
## crp (mean (SD)) <0.001 0.532 3.4
## pt (mean (SD)) <0.001 1.048 4.5
## aptt (mean (SD)) <0.001 0.654 11.0
## los (mean (SD)) 0.801 0.040 0.0
## meld (mean (SD)) <0.001 1.424 5.9
## sex = F (%) 1.000 0.019 0.0
## barthel (%) <0.001 1.041 5.4
## 0
## 1
## 2
## child_score (%) <0.001 0.972 7.9
## 0
## 1
## 2
## pad = 1 (%) 1.000 0.071 0.0
## stroke = 1 (%) 0.161 0.230 0.0
## dimentia = 1 (%) 1.000 0.074 0.0
## ch_lung = 1 (%) 0.875 0.122 0.0
## rheumati = 0 (%) NA <0.001 0.0
## pept_ulcer = 1 (%) 0.345 0.217 0.0
## dm = 1 (%) 0.449 0.155 0.0
## dm_compli = 1 (%) 1.000 0.053 0.0
## paralysis = 0 (%) NA <0.001 0.0
## malignancy = 1 (%) 0.734 0.087 0.0
## meta_tumor = 1 (%) 0.016 0.322 0.0
## aids = 0 (%) NA <0.001 0.0
## eGFR30 = 1 (%) <0.001 0.714 1.6
## hd = 1 (%) 0.115 0.246 0.0
## hcc = 1 (%) 0.650 0.102 0.0
## alcohol = 1 (%) 0.198 0.231 0.0
## past_rupture = 1 (%) 0.042 0.387 0.0
## antiplate = 1 (%) 0.707 0.149 0.0
## anticoag = 1 (%) 1.000 0.123 0.0
## antithro = 1 (%) 1.000 0.074 0.0
## nsaids = 1 (%) 1.000 0.142 0.0
## steroid = 1 (%) 1.000 0.123 0.0
## beta = 1 (%) 0.051 0.472 0.0
## vaso = 1 (%) <0.001 0.486 0.0
## ffp = 1 (%) <0.001 0.645 0.0
## pc = 1 (%) 0.001 0.403 0.0
## albner = 1 (%) 0.059 0.292 0.0
## shock = 1 (%) <0.001 1.515 0.9
## hosp_mortality = 1 (%) <0.001 NaN 0.0
# specify your data and variables
tbl_summary(data = df_val,
by = "hosp_mortality",
type = list(gcs ~ "continuous", year ~ "categorical"),
statistic = all_continuous() ~ "{median} ({p25}, {p75})",
digits = all_continuous() ~ c(2, 2))
| Characteristic | 0, N = 3991 | 1, N = 451 |
|---|---|---|
| hosp_id | 1,022.00 (1,005.00, 1,024.00) | 1,022.00 (1,006.00, 1,062.00) |
| pt_id | 488.00 (232.50, 639.50) | 510.00 (299.00, 738.00) |
| hosp_num | ||
| 1 | 326 (82%) | 39 (87%) |
| 2 | 46 (12%) | 4 (8.9%) |
| 3 | 12 (3.0%) | 1 (2.2%) |
| 4 | 10 (2.5%) | 0 (0%) |
| 5 | 3 (0.8%) | 1 (2.2%) |
| 6 | 2 (0.5%) | 0 (0%) |
| year | ||
| 2017 | 57 (14%) | 7 (16%) |
| 2018 | 53 (13%) | 9 (20%) |
| 2019 | 63 (16%) | 9 (20%) |
| 2020 | 74 (19%) | 8 (18%) |
| 2021 | 76 (19%) | 7 (16%) |
| 2022 | 76 (19%) | 5 (11%) |
| age | 60.00 (50.00, 70.00) | 64.00 (52.00, 70.00) |
| sex | ||
| M | 296 (74%) | 33 (73%) |
| F | 103 (26%) | 12 (27%) |
| bmi | 23.14 (20.59, 26.28) | 22.78 (20.16, 25.62) |
| Unknown | 17 | 13 |
| smoke | 0.00 (0.00, 390.00) | 0.00 (0.00, 312.50) |
| Unknown | 33 | 9 |
| barthel | ||
| 0 | 167 (44%) | 4 (9.3%) |
| 1 | 120 (32%) | 11 (26%) |
| 2 | 90 (24%) | 28 (65%) |
| Unknown | 22 | 2 |
| child_num | 8.00 (7.00, 9.00) | 11.00 (9.00, 12.00) |
| Unknown | 45 | 3 |
| child_score | ||
| 0 | 81 (22%) | 2 (4.4%) |
| 1 | 193 (53%) | 13 (29%) |
| 2 | 90 (25%) | 30 (67%) |
| Unknown | 35 | 0 |
| gcs | 15.00 (15.00, 15.00) | 15.00 (14.00, 15.00) |
| cci_num | 4.00 (4.00, 5.00) | 4.00 (4.00, 5.00) |
| pad | ||
| 0 | 398 (100%) | 45 (100%) |
| 1 | 1 (0.3%) | 0 (0%) |
| stroke | ||
| 0 | 391 (98%) | 42 (93%) |
| 1 | 8 (2.0%) | 3 (6.7%) |
| dimentia | ||
| 0 | 394 (99%) | 44 (98%) |
| 1 | 5 (1.3%) | 1 (2.2%) |
| ch_lung | ||
| 0 | 396 (99%) | 44 (98%) |
| 1 | 3 (0.8%) | 1 (2.2%) |
| rheumati | ||
| 0 | 399 (100%) | 45 (100%) |
| 1 | 0 (0%) | 0 (0%) |
| pept_ulcer | ||
| 0 | 359 (90%) | 43 (96%) |
| 1 | 40 (10%) | 2 (4.4%) |
| dm | ||
| 0 | 303 (76%) | 37 (82%) |
| 1 | 96 (24%) | 8 (18%) |
| dm_compli | ||
| 0 | 393 (98%) | 44 (98%) |
| 1 | 6 (1.5%) | 1 (2.2%) |
| paralysis | ||
| 0 | 399 (100%) | 45 (100%) |
| malignancy | ||
| 0 | 349 (87%) | 38 (84%) |
| 1 | 50 (13%) | 7 (16%) |
| meta_tumor | ||
| 0 | 392 (98%) | 41 (91%) |
| 1 | 7 (1.8%) | 4 (8.9%) |
| aids | ||
| 0 | 399 (100%) | 45 (100%) |
| eGFR30 | ||
| 0 | 371 (94%) | 30 (68%) |
| 1 | 22 (5.6%) | 14 (32%) |
| Unknown | 6 | 1 |
| hd | ||
| 0 | 392 (98%) | 42 (93%) |
| 1 | 7 (1.8%) | 3 (6.7%) |
| hcc | ||
| 0 | 343 (86%) | 37 (82%) |
| 1 | 56 (14%) | 8 (18%) |
| alcohol | ||
| 0 | 187 (47%) | 16 (36%) |
| 1 | 212 (53%) | 29 (64%) |
| past_rupture | ||
| 0 | 284 (71%) | 39 (87%) |
| 1 | 115 (29%) | 6 (13%) |
| antiplate | ||
| 0 | 397 (99%) | 44 (98%) |
| 1 | 2 (0.5%) | 1 (2.2%) |
| anticoag | ||
| 0 | 396 (99%) | 45 (100%) |
| 1 | 3 (0.8%) | 0 (0%) |
| antithro | ||
| 0 | 394 (99%) | 44 (98%) |
| 1 | 5 (1.3%) | 1 (2.2%) |
| nsaids | ||
| 0 | 395 (99%) | 45 (100%) |
| 1 | 4 (1.0%) | 0 (0%) |
| steroid | ||
| 0 | 396 (99%) | 45 (100%) |
| 1 | 3 (0.8%) | 0 (0%) |
| beta | ||
| 0 | 359 (90%) | 45 (100%) |
| 1 | 40 (10%) | 0 (0%) |
| vaso | ||
| 0 | 381 (95%) | 36 (80%) |
| 1 | 18 (4.5%) | 9 (20%) |
| map | 2.00 (0.00, 4.00) | 4.00 (4.00, 8.00) |
| ffp | ||
| 0 | 273 (68%) | 17 (38%) |
| 1 | 126 (32%) | 28 (62%) |
| pc | ||
| 0 | 393 (98%) | 40 (89%) |
| 1 | 6 (1.5%) | 5 (11%) |
| albner | ||
| 0 | 373 (93%) | 38 (84%) |
| 1 | 26 (6.5%) | 7 (16%) |
| bt | 36.80 (36.50, 37.10) | 36.60 (36.20, 37.70) |
| Unknown | 1 | 4 |
| sBP | 93.00 (82.25, 101.00) | 68.00 (56.00, 79.00) |
| Unknown | 1 | 0 |
| dBP | 56.00 (48.00, 64.00) | 43.00 (34.00, 49.00) |
| Unknown | 1 | 0 |
| hr | 83.00 (72.00, 100.00) | 106.00 (90.50, 117.50) |
| Unknown | 2 | 2 |
| shock | ||
| 0 | 244 (61%) | 2 (4.7%) |
| 1 | 153 (39%) | 41 (95%) |
| Unknown | 2 | 2 |
| bil | 1.60 (0.96, 2.71) | 3.06 (1.63, 5.55) |
| Unknown | 8 | 1 |
| ast | 47.00 (31.00, 84.00) | 81.00 (39.00, 167.25) |
| Unknown | 6 | 1 |
| alt | 29.00 (19.00, 42.00) | 33.00 (18.75, 69.75) |
| Unknown | 6 | 1 |
| wbc | 8,050.00 (5,600.00, 10,800.00) | 9,850.00 (8,842.50, 13,925.00) |
| Unknown | 6 | 1 |
| hb | 9.00 (7.30, 10.60) | 7.75 (6.88, 9.15) |
| Unknown | 6 | 1 |
| plt | 102.00 (75.00, 141.00) | 109.00 (55.75, 147.25) |
| Unknown | 6 | 1 |
| tp | 6.10 (5.60, 6.70) | 5.50 (5.00, 6.30) |
| Unknown | 30 | 2 |
| alb | 3.00 (2.50, 3.40) | 2.25 (1.80, 2.62) |
| Unknown | 11 | 1 |
| eGFR | 71.39 (53.87, 92.42) | 41.26 (28.14, 57.66) |
| Unknown | 6 | 1 |
| bun | 24.60 (16.40, 35.90) | 32.10 (19.82, 47.60) |
| Unknown | 6 | 1 |
| cre | 0.80 (0.64, 1.06) | 1.32 (0.92, 1.91) |
| Unknown | 9 | 1 |
| crp | 0.25 (0.11, 0.66) | 1.17 (0.19, 2.32) |
| Unknown | 13 | 2 |
| pt | 60.40 (48.22, 72.05) | 41.00 (30.75, 53.67) |
| Unknown | 19 | 1 |
| aptt | 32.00 (29.33, 36.30) | 40.30 (35.30, 55.70) |
| Unknown | 45 | 4 |
| meld | 10.00 (6.00, 14.00) | 20.00 (14.00, 24.00) |
| Unknown | 25 | 1 |
| los | 7.00 (5.00, 13.00) | 6.00 (2.00, 12.00) |
| cohort | ||
| develop | 0 (0%) | 0 (0%) |
| validation | 399 (100%) | 45 (100%) |
| 1 Median (IQR); n (%) | ||
#df_val |> #全体
# select(col_continuous) |>
# pivot_longer(cols = col_continuous, names_to = "name", values_to = "value") |>
# ggplot()+
# geom_histogram(aes(x = value), color = "black")+
# facet_wrap(~ name, scales = "free", ncol = 5) +
# theme_bw()+
# theme(text = element_text(size = 12))
## 連続変数のリストを指定します。
#col_continu <- c("age", "bmi","smoke","child_num","gcs","cci_num","bt","sBP","dBP","hr","bil","ast","alt","wbc","hb","plt","tp","alb","eGFR","b#un","cre","crp","pt","aptt")
## 指定した変数だけで相関行列を作成します。
#corresult <- df_dev %>%
# dplyr::select(all_of(col_continu)) %>%
# drop_na() %>%
# cor(method = "pearson")
#
## 相関行列の数値を表示します。
#print(corresult)
#
## 指定した連続変数だけで相関行列を作成します。
#corresult <- df_dev %>%
# dplyr::select(all_of(col_continu)) %>%
# drop_na() %>%
# cor(method = "pearson")
#
## 相関行列を基に相関プロットを作成します。
#corrplot <- ggcorrplot(corr = corresult, hc.order = FALSE, method = "square", title = "cor plot",
# colors = c("#4b61ba", "white", "red"), lab = TRUE)
#
## プロットを表示します。
#corrplot
bilとchild_numlが0.65と相関係数は高め。ただ他は許容できそう。
# 連続変数の変数名をまとめる
con_var <- c("age", "bmi","smoke","child_num","gcs","cci_num","map","bt","sBP","dBP","hr","bil","ast","alt","wbc","hb","plt","tp","alb","eGFR","bun","cre","crp","pt","aptt","meld")
# datadistを計算
ddist <- datadist(df_dev)
options(datadist='ddist')
# プロット結果をまとめるリストを用意する
plot <- list()
for (x in con_var){
# lrmに投入するformulaを文字列で、for文で順番に指定していく
formula_tmp <- as.formula(paste("hosp_mortality ~ rcs(", x, ", 4)"))
fit_tmp <- lrm(formula_tmp, data = df_dev)
# Predict関数の呼び出しを文字列として作成し、それをパースして評価する
plot_cmd <- paste("plot(Predict(fit_tmp, ", x, "))")
plot_tmp <- eval(parse(text = plot_cmd))
plot[[x]] <- plot_tmp
}
# 結果をggarrangeでまとめて表示する
logOR_plot <- ggarrange(plotlist = plot, ncol = 1, nrow = 1)
logOR_plot
## $`1`
##
## $`2`
##
## $`3`
##
## $`4`
##
## $`5`
##
## $`6`
##
## $`7`
##
## $`8`
##
## $`9`
##
## $`10`
##
## $`11`
##
## $`12`
##
## $`13`
##
## $`14`
##
## $`15`
##
## $`16`
##
## $`17`
##
## $`18`
##
## $`19`
##
## $`20`
##
## $`21`
##
## $`22`
##
## $`23`
##
## $`24`
##
## $`25`
##
## $`26`
##
## attr(,"class")
## [1] "list" "ggarrange"
#naplot_1 <- gg_miss_var(df_dev, show_pct = TRUE)
#
#naplot_1
#naplot_2 <- vis_miss(df_dev)
#
#naplot_2
vari_cat.f <-c("sex","barthel","child_score","pad","stroke","dimentia","ch_lung","rheumati","pept_ulcer","dm","dm_compli","paralysis","malignancy","meta_tumor","aids","eGFR30","hd","hcc","alcohol","past_rupture","antiplate","anticoag","antithro","nsaids","steroid","beta", "vaso","ffp","pc", "albner","shock","hosp_mortality")
vari_numeric <- c("age", "bmi","smoke","child_num","gcs","cci_num","map","bt","sBP","dBP","hr","bil","ast","alt","wbc","hb","plt","tp","alb","eGFR","bun","cre","crp","pt","aptt","los","meld")
# vari_cat.f (defined as above) in the data_original is changed to factor type and set into data_factor_for_imp
data_factor_for_imp <- as.data.frame(lapply(df_dev[vari_cat.f],as.factor))
#check all the variable is factor
str(data_factor_for_imp)
## 'data.frame': 536 obs. of 32 variables:
## $ sex : Factor w/ 2 levels "M","F": 1 1 2 1 2 1 2 1 2 1 ...
## $ barthel : Factor w/ 3 levels "0","1","2": NA 3 1 2 NA NA NA 1 NA 3 ...
## $ child_score : Factor w/ 3 levels "0","1","2": 3 1 NA 2 NA NA NA 2 3 3 ...
## $ pad : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ stroke : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ dimentia : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ ch_lung : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ rheumati : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ pept_ulcer : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ dm : Factor w/ 2 levels "0","1": 1 2 1 1 1 1 1 1 1 1 ...
## $ dm_compli : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ paralysis : Factor w/ 1 level "0": 1 1 1 1 1 1 1 1 1 1 ...
## $ malignancy : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ meta_tumor : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ aids : Factor w/ 1 level "0": 1 1 1 1 1 1 1 1 1 1 ...
## $ eGFR30 : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 2 1 ...
## $ hd : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ hcc : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ alcohol : Factor w/ 2 levels "0","1": 2 1 1 1 1 1 2 2 1 1 ...
## $ past_rupture : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ antiplate : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ anticoag : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ antithro : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ nsaids : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ steroid : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ beta : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ vaso : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 2 1 ...
## $ ffp : Factor w/ 2 levels "0","1": 1 2 1 1 2 2 1 2 2 2 ...
## $ pc : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ albner : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 2 1 ...
## $ shock : Factor w/ 2 levels "0","1": 2 1 2 2 2 2 1 2 2 2 ...
## $ hosp_mortality: Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 2 2 ...
#summary numeric type
data_numeric_for_imp <- as.data.frame(lapply(df_dev[vari_numeric],as.numeric))
# check all the variable is numeric
str(data_numeric_for_imp)
## 'data.frame': 536 obs. of 27 variables:
## $ age : num 50 80 59 44 67 65 49 73 69 62 ...
## $ bmi : num NA 25.3 NA 14.5 NA ...
## $ smoke : num 0 0 0 240 0 0 NA 1000 0 0 ...
## $ child_num: num 11 6 NA 8 NA NA NA 9 15 11 ...
## $ gcs : num 15 15 15 15 15 15 15 15 6 15 ...
## $ cci_num : num 4 4 3 4 4 3 4 4 4 4 ...
## $ map : num 0 2 0 6 2 2 0 4 14 6 ...
## $ bt : num 36.4 36.8 35.9 36 36.6 38.4 37 37 35.5 36.6 ...
## $ sBP : num 78 88 100 69 66 84 132 90 52 58 ...
## $ dBP : num 48 49 56 44 40 46 69 54 37 37 ...
## $ hr : num 118 72 110 104 72 127 83 114 98 106 ...
## $ bil : num 2.2 1.2 3.1 3.4 1.2 2.4 1.2 2.2 8.7 2.1 ...
## $ ast : num 217 31 60 129 52 90 154 55 96 121 ...
## $ alt : num 63 22 40 46 36 19 109 20 87 63 ...
## $ wbc : num 7400 5000 7800 9100 3900 8000 7900 11100 12800 8300 ...
## $ hb : num 6.9 10.8 9.7 10.7 6.3 9.8 13.5 5.6 6 9.8 ...
## $ plt : num 115 77 74 162 63 93 132 124 168 84 ...
## $ tp : num 6.3 5.6 6.4 6.1 5.1 7.2 7.6 4.9 5.3 6.3 ...
## $ alb : num 2.2 3.2 2.8 2.9 2.8 3.2 4 2.3 1.2 2.5 ...
## $ eGFR : num 58 57.7 63.7 112.4 123.6 ...
## $ bun : num 13.2 41.5 26.3 2.9 27.4 15.8 15.5 27 63.2 13.8 ...
## $ cre : num 1.08 0.96 0.72 0.61 0.38 0.44 0.4 0.97 2.07 0.87 ...
## $ crp : num 0.92 0.29 0.68 0.29 0.29 0.96 0.29 NA 2.75 0.12 ...
## $ pt : num 37.8 55 46.7 37.8 74.6 45.9 49.7 45.9 37.2 54 ...
## $ aptt : num 29.4 29 27.2 35.3 30.1 27.6 32.6 27.5 36 29.1 ...
## $ los : num 12 7 0 10 3 2 1 8 0 16 ...
## $ meld : num 16 11 13 14 6 8 6 15 29 12 ...
# combine the factor type and numeric type
data_for_imp <- cbind(data_factor_for_imp, data_numeric_for_imp)
#check all the variable type
str(data_for_imp)
## 'data.frame': 536 obs. of 59 variables:
## $ sex : Factor w/ 2 levels "M","F": 1 1 2 1 2 1 2 1 2 1 ...
## $ barthel : Factor w/ 3 levels "0","1","2": NA 3 1 2 NA NA NA 1 NA 3 ...
## $ child_score : Factor w/ 3 levels "0","1","2": 3 1 NA 2 NA NA NA 2 3 3 ...
## $ pad : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ stroke : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ dimentia : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ ch_lung : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ rheumati : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ pept_ulcer : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ dm : Factor w/ 2 levels "0","1": 1 2 1 1 1 1 1 1 1 1 ...
## $ dm_compli : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ paralysis : Factor w/ 1 level "0": 1 1 1 1 1 1 1 1 1 1 ...
## $ malignancy : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ meta_tumor : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ aids : Factor w/ 1 level "0": 1 1 1 1 1 1 1 1 1 1 ...
## $ eGFR30 : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 2 1 ...
## $ hd : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ hcc : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ alcohol : Factor w/ 2 levels "0","1": 2 1 1 1 1 1 2 2 1 1 ...
## $ past_rupture : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ antiplate : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ anticoag : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ antithro : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ nsaids : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ steroid : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ beta : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ vaso : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 2 1 ...
## $ ffp : Factor w/ 2 levels "0","1": 1 2 1 1 2 2 1 2 2 2 ...
## $ pc : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ albner : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 2 1 ...
## $ shock : Factor w/ 2 levels "0","1": 2 1 2 2 2 2 1 2 2 2 ...
## $ hosp_mortality: Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 2 2 ...
## $ age : num 50 80 59 44 67 65 49 73 69 62 ...
## $ bmi : num NA 25.3 NA 14.5 NA ...
## $ smoke : num 0 0 0 240 0 0 NA 1000 0 0 ...
## $ child_num : num 11 6 NA 8 NA NA NA 9 15 11 ...
## $ gcs : num 15 15 15 15 15 15 15 15 6 15 ...
## $ cci_num : num 4 4 3 4 4 3 4 4 4 4 ...
## $ map : num 0 2 0 6 2 2 0 4 14 6 ...
## $ bt : num 36.4 36.8 35.9 36 36.6 38.4 37 37 35.5 36.6 ...
## $ sBP : num 78 88 100 69 66 84 132 90 52 58 ...
## $ dBP : num 48 49 56 44 40 46 69 54 37 37 ...
## $ hr : num 118 72 110 104 72 127 83 114 98 106 ...
## $ bil : num 2.2 1.2 3.1 3.4 1.2 2.4 1.2 2.2 8.7 2.1 ...
## $ ast : num 217 31 60 129 52 90 154 55 96 121 ...
## $ alt : num 63 22 40 46 36 19 109 20 87 63 ...
## $ wbc : num 7400 5000 7800 9100 3900 8000 7900 11100 12800 8300 ...
## $ hb : num 6.9 10.8 9.7 10.7 6.3 9.8 13.5 5.6 6 9.8 ...
## $ plt : num 115 77 74 162 63 93 132 124 168 84 ...
## $ tp : num 6.3 5.6 6.4 6.1 5.1 7.2 7.6 4.9 5.3 6.3 ...
## $ alb : num 2.2 3.2 2.8 2.9 2.8 3.2 4 2.3 1.2 2.5 ...
## $ eGFR : num 58 57.7 63.7 112.4 123.6 ...
## $ bun : num 13.2 41.5 26.3 2.9 27.4 15.8 15.5 27 63.2 13.8 ...
## $ cre : num 1.08 0.96 0.72 0.61 0.38 0.44 0.4 0.97 2.07 0.87 ...
## $ crp : num 0.92 0.29 0.68 0.29 0.29 0.96 0.29 NA 2.75 0.12 ...
## $ pt : num 37.8 55 46.7 37.8 74.6 45.9 49.7 45.9 37.2 54 ...
## $ aptt : num 29.4 29 27.2 35.3 30.1 27.6 32.6 27.5 36 29.1 ...
## $ los : num 12 7 0 10 3 2 1 8 0 16 ...
## $ meld : num 16 11 13 14 6 8 6 15 29 12 ...
cores <- detectCores(logical = FALSE) ###並列化処理
registerDoParallel(cores = cores) ###並列化処理
set.seed(2023)
md.pattern(data_for_imp) #see patern the missing
## sex pad stroke dimentia ch_lung rheumati pept_ulcer dm dm_compli paralysis
## 216 1 1 1 1 1 1 1 1 1 1
## 72 1 1 1 1 1 1 1 1 1 1
## 16 1 1 1 1 1 1 1 1 1 1
## 5 1 1 1 1 1 1 1 1 1 1
## 25 1 1 1 1 1 1 1 1 1 1
## 10 1 1 1 1 1 1 1 1 1 1
## 9 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1
## 9 1 1 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1
## 9 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1 1 1 1
## 14 1 1 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1 1 1 1
## 3 1 1 1 1 1 1 1 1 1 1
## 3 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1
## 7 1 1 1 1 1 1 1 1 1 1
## 9 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1
## 6 1 1 1 1 1 1 1 1 1 1
## 3 1 1 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1 1 1 1
## 9 1 1 1 1 1 1 1 1 1 1
## 3 1 1 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1
## 3 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1
## 4 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1
## 3 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1
## 3 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1
## 5 1 1 1 1 1 1 1 1 1 1
## 4 1 1 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1
## 0 0 0 0 0 0 0 0 0 0
## malignancy meta_tumor aids hd hcc alcohol past_rupture antiplate anticoag
## 216 1 1 1 1 1 1 1 1 1
## 72 1 1 1 1 1 1 1 1 1
## 16 1 1 1 1 1 1 1 1 1
## 5 1 1 1 1 1 1 1 1 1
## 25 1 1 1 1 1 1 1 1 1
## 10 1 1 1 1 1 1 1 1 1
## 9 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1
## 9 1 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1
## 9 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1 1 1
## 14 1 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1 1 1
## 3 1 1 1 1 1 1 1 1 1
## 3 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1
## 7 1 1 1 1 1 1 1 1 1
## 9 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1
## 6 1 1 1 1 1 1 1 1 1
## 3 1 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1 1 1
## 9 1 1 1 1 1 1 1 1 1
## 3 1 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1
## 3 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1
## 4 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1
## 3 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1
## 3 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1
## 5 1 1 1 1 1 1 1 1 1
## 4 1 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1
## 0 0 0 0 0 0 0 0 0
## antithro nsaids steroid beta vaso ffp pc albner hosp_mortality age gcs
## 216 1 1 1 1 1 1 1 1 1 1 1
## 72 1 1 1 1 1 1 1 1 1 1 1
## 16 1 1 1 1 1 1 1 1 1 1 1
## 5 1 1 1 1 1 1 1 1 1 1 1
## 25 1 1 1 1 1 1 1 1 1 1 1
## 10 1 1 1 1 1 1 1 1 1 1 1
## 9 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1
## 9 1 1 1 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1
## 9 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1 1 1 1 1
## 14 1 1 1 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1 1 1 1 1
## 3 1 1 1 1 1 1 1 1 1 1 1
## 3 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1
## 7 1 1 1 1 1 1 1 1 1 1 1
## 9 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1
## 6 1 1 1 1 1 1 1 1 1 1 1
## 3 1 1 1 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1 1 1 1 1
## 9 1 1 1 1 1 1 1 1 1 1 1
## 3 1 1 1 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1
## 3 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1
## 4 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1
## 3 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1
## 3 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1
## 5 1 1 1 1 1 1 1 1 1 1 1
## 4 1 1 1 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1
## 0 0 0 0 0 0 0 0 0 0 0
## cci_num map los wbc hb plt eGFR30 eGFR bun ast alt cre sBP dBP shock hr bil
## 216 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 72 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 16 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 5 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 25 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 10 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 9 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 9 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 9 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 14 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 3 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 3 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 7 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 9 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 6 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 3 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 9 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 3 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 3 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 4 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0
## 3 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 1
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 1
## 3 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 1
## 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 1
## 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 1
## 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 1
## 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 1
## 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 1
## 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 1
## 2 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 1
## 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 1
## 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 1
## 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 1
## 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0
## 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0
## 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0
## 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0
## 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0
## 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1
## 2 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1 0 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 0 0 1 1 1 1 1 0
## 1 1 1 1 1 1 1 1 1 1 0 0 1 1 1 1 1 0
## 1 1 1 1 1 1 1 1 1 1 0 0 1 1 1 1 1 0
## 1 1 1 1 1 1 1 1 1 1 0 0 1 1 1 1 1 0
## 1 1 1 1 1 1 1 1 1 0 0 0 1 1 1 1 1 0
## 1 1 1 1 1 1 1 0 0 1 1 1 0 1 1 1 1 1
## 1 1 1 1 1 1 1 0 0 0 0 0 0 1 1 1 1 0
## 1 1 1 1 0 0 0 1 1 1 1 1 1 1 1 1 1 1
## 5 1 1 1 0 0 0 0 0 0 0 0 0 1 1 1 1 0
## 4 1 1 1 0 0 0 0 0 0 0 0 0 1 1 1 1 0
## 2 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0
## 0 0 0 13 13 13 14 14 14 18 18 18 22 22 24 24 28
## bt alb crp pt tp meld child_score aptt child_num smoke bmi barthel
## 216 1 1 1 1 1 1 1 1 1 1 1 1 0
## 72 1 1 1 1 1 1 1 1 1 1 1 0 1
## 16 1 1 1 1 1 1 1 1 1 1 0 1 1
## 5 1 1 1 1 1 1 1 1 1 1 0 0 2
## 25 1 1 1 1 1 1 1 1 1 0 1 1 1
## 10 1 1 1 1 1 1 1 1 1 0 1 0 2
## 9 1 1 1 1 1 1 1 1 1 0 0 1 2
## 1 1 1 1 1 1 1 1 1 1 0 0 0 3
## 9 1 1 1 1 1 1 1 1 0 1 1 1 1
## 2 1 1 1 1 1 1 1 1 0 1 1 0 2
## 1 1 1 1 1 1 1 1 1 0 1 0 1 2
## 1 1 1 1 1 1 1 1 1 0 1 0 0 3
## 2 1 1 1 1 1 1 1 1 0 0 0 1 3
## 1 1 1 1 1 1 1 1 1 0 0 0 0 4
## 9 1 1 1 1 1 1 1 0 1 1 1 1 1
## 1 1 1 1 1 1 1 1 0 1 1 1 0 2
## 2 1 1 1 1 1 1 1 0 1 0 1 1 2
## 1 1 1 1 1 1 1 1 0 1 0 1 0 3
## 2 1 1 1 1 1 1 1 0 1 0 0 1 3
## 14 1 1 1 1 1 1 0 1 0 1 1 1 2
## 2 1 1 1 1 1 1 0 1 0 1 1 0 3
## 3 1 1 1 1 1 1 0 1 0 1 0 1 3
## 3 1 1 1 1 1 1 0 1 0 1 0 0 4
## 1 1 1 1 1 1 1 0 1 0 0 1 1 3
## 7 1 1 1 1 1 1 0 1 0 0 0 1 4
## 9 1 1 1 1 1 1 0 1 0 0 0 0 5
## 1 1 1 1 1 1 1 0 0 0 1 1 0 4
## 1 1 1 1 1 1 1 0 0 0 0 0 1 5
## 6 1 1 1 1 0 1 1 1 1 1 1 1 1
## 3 1 1 1 1 0 1 1 1 1 1 1 0 2
## 2 1 1 1 1 0 1 1 1 1 1 0 1 2
## 2 1 1 1 1 0 1 1 0 1 1 1 1 2
## 9 1 1 1 0 1 0 1 0 1 1 1 1 3
## 3 1 1 1 0 1 0 1 0 1 1 1 0 4
## 2 1 1 1 0 1 0 0 0 0 1 1 1 5
## 2 1 1 0 1 1 1 1 1 1 1 1 1 1
## 2 1 1 0 1 1 1 1 1 1 1 1 0 2
## 1 1 1 0 1 1 1 1 1 1 1 0 1 2
## 1 1 1 0 1 1 1 1 1 1 0 1 1 2
## 1 1 1 0 1 1 1 1 1 1 0 0 1 3
## 1 1 1 0 1 1 1 1 1 1 0 0 0 4
## 1 1 1 0 1 1 1 1 0 1 0 0 1 4
## 1 1 1 0 1 1 1 0 1 0 1 0 0 5
## 1 1 1 0 1 1 1 0 0 0 0 0 0 7
## 1 1 1 0 1 0 1 1 1 1 1 1 1 2
## 2 1 1 0 0 1 0 1 0 1 1 1 1 4
## 1 1 0 1 1 1 1 1 1 1 1 1 0 2
## 1 1 0 1 1 1 1 1 1 1 0 1 0 3
## 3 1 0 1 1 0 1 1 1 1 1 1 1 2
## 1 1 0 1 0 1 0 1 0 1 1 1 0 5
## 1 1 0 1 0 1 0 0 0 0 1 1 1 6
## 1 1 0 1 0 0 0 1 0 1 1 1 1 5
## 1 1 0 0 0 0 0 1 0 1 1 1 1 6
## 4 0 1 1 1 1 1 1 1 1 1 1 0 2
## 1 0 1 1 1 1 1 1 1 1 0 1 1 2
## 1 0 1 1 1 1 1 1 1 1 0 1 0 3
## 1 0 1 1 1 1 1 1 1 0 1 1 1 2
## 1 0 1 1 1 1 1 1 1 0 0 0 0 5
## 1 0 1 0 1 1 1 0 1 0 1 1 0 5
## 1 0 0 1 1 1 1 1 1 1 0 0 1 4
## 1 1 1 1 1 1 0 1 1 1 1 0 1 3
## 3 1 1 1 1 1 0 0 1 0 1 1 1 4
## 1 1 0 0 1 0 0 1 1 1 0 0 1 7
## 1 1 1 1 1 1 1 1 1 1 1 1 0 3
## 1 1 1 1 1 1 1 0 1 0 1 1 1 4
## 3 0 1 1 1 1 1 1 1 1 1 1 0 6
## 1 0 1 1 1 1 1 0 1 0 0 0 1 9
## 1 0 1 1 1 0 1 1 1 1 1 0 1 7
## 1 0 1 0 1 1 1 1 1 1 1 1 1 6
## 1 0 1 0 1 0 1 1 1 1 1 1 1 7
## 1 0 0 1 1 0 1 1 1 1 1 1 1 7
## 1 0 0 1 1 0 1 1 1 1 1 1 0 8
## 2 0 0 1 1 0 1 1 1 1 1 0 1 8
## 1 0 0 1 1 0 1 1 1 1 1 0 0 9
## 1 0 0 1 1 0 1 0 1 0 1 0 1 10
## 1 0 0 1 0 0 0 1 0 1 1 1 1 10
## 1 0 1 1 1 1 0 1 1 0 1 1 1 8
## 1 0 1 1 1 1 0 1 0 1 1 1 1 8
## 1 0 1 1 1 1 0 0 1 0 1 1 1 9
## 1 0 1 1 0 1 0 1 0 1 1 0 1 10
## 1 0 1 0 0 1 0 1 0 1 1 1 1 10
## 1 1 1 1 1 1 0 1 1 1 1 1 1 2
## 2 1 1 1 1 1 0 1 1 1 1 0 1 3
## 1 1 1 1 1 1 0 0 1 0 1 1 1 4
## 1 1 1 1 0 1 0 1 0 1 1 1 0 7
## 1 1 1 0 0 1 0 1 0 1 1 0 0 9
## 1 1 0 0 1 0 0 0 0 0 0 0 0 13
## 1 1 0 0 0 0 0 1 0 1 1 1 1 9
## 1 1 0 0 1 0 0 0 0 0 1 1 1 11
## 1 1 1 0 1 1 0 1 1 1 1 0 1 6
## 1 1 0 1 0 0 0 1 0 1 1 1 0 13
## 1 1 1 1 1 1 1 1 1 1 1 1 1 3
## 5 1 0 0 0 0 0 1 0 1 1 1 1 16
## 4 1 0 0 0 0 0 0 0 0 1 1 1 18
## 2 0 0 0 0 0 0 1 0 1 1 1 1 21
## 1 0 0 0 0 0 0 0 0 0 1 1 1 23
## 32 34 37 39 45 54 62 63 81 84 85 140 1011
imp.mf <- missForest(data_for_imp,
maxiter = 10,
ntree = 100,
mtry = floor(sqrt(ncol(data_for_imp))),
parallelize = "variables",
verbose = TRUE)
## parallelizing over the variables of the input data matrix 'xmis'
## missForest iteration 1 in progress...
## randomForest 4.7-1.1
## Type rfNews() to see new features/changes/bug fixes.
##
## 次のパッケージを付け加えます: 'randomForest'
## 以下のオブジェクトは 'package:dplyr' からマスクされています:
##
## combine
## 以下のオブジェクトは 'package:ggplot2' からマスクされています:
##
## margin
## 要求されたパッケージ rngtools をロード中です
## done!
## estimated error(s): 0.4019074 0.02111852
## difference(s): 0.0004840515 0.005597015
## time: 1.147 seconds
##
## missForest iteration 2 in progress...done!
## estimated error(s): 0.4004004 0.0207036
## difference(s): 0.0001136674 0.002390392
## time: 1.238 seconds
##
## missForest iteration 3 in progress...done!
## estimated error(s): 0.3995062 0.02053885
## difference(s): 0.0001359508 0.002390392
## time: 1.193 seconds
summary(imp.mf$ximp)
## sex barthel child_score pad stroke dimentia ch_lung rheumati
## M:405 0:193 0: 66 0:535 0:526 0:532 0:525 0:533
## F:131 1:154 1:291 1: 1 1: 10 1: 4 1: 11 1: 3
## 2:189 2:179
##
##
##
## pept_ulcer dm dm_compli paralysis malignancy meta_tumor aids eGFR30
## 0:477 0:430 0:530 0:536 0:478 0:530 0:536 0:485
## 1: 59 1:106 1: 6 1: 58 1: 6 1: 51
##
##
##
##
## hd hcc alcohol past_rupture antiplate anticoag antithro nsaids
## 0:533 0:425 0:305 0:432 0:532 0:535 0:532 0:531
## 1: 3 1:111 1:231 1:104 1: 4 1: 1 1: 4 1: 5
##
##
##
##
## steroid beta vaso ffp pc albner shock hosp_mortality
## 0:534 0:518 0:505 0:399 0:530 0:489 0:318 0:463
## 1: 2 1: 18 1: 31 1:137 1: 6 1: 47 1:218 1: 73
##
##
##
##
## age bmi smoke child_num
## Min. :26.00 Min. :13.72 Min. : 0.0 Min. : 5.000
## 1st Qu.:51.75 1st Qu.:20.81 1st Qu.: 0.0 1st Qu.: 7.045
## Median :62.00 Median :22.66 Median : 200.0 Median : 8.291
## Mean :61.23 Mean :22.90 Mean : 301.3 Mean : 8.805
## 3rd Qu.:70.00 3rd Qu.:24.45 3rd Qu.: 411.6 3rd Qu.:10.000
## Max. :90.00 Max. :33.95 Max. :9000.0 Max. :15.000
## gcs cci_num map bt
## Min. : 3.00 Min. : 3.000 Min. : 0.000 Min. :32.80
## 1st Qu.:15.00 1st Qu.: 4.000 1st Qu.: 0.000 1st Qu.:36.30
## Median :15.00 Median : 4.000 Median : 4.000 Median :36.70
## Mean :14.33 Mean : 4.444 Mean : 3.493 Mean :36.66
## 3rd Qu.:15.00 3rd Qu.: 5.000 3rd Qu.: 4.000 3rd Qu.:37.00
## Max. :15.00 Max. :12.000 Max. :68.000 Max. :41.70
## sBP dBP hr bil
## Min. : 50.00 Min. : 3.00 Min. : 39.00 Min. : 0.2000
## 1st Qu.: 79.75 1st Qu.:44.00 1st Qu.: 71.00 1st Qu.: 0.9775
## Median : 90.00 Median :51.00 Median : 82.00 Median : 1.4750
## Mean : 87.50 Mean :51.14 Mean : 85.39 Mean : 2.1906
## 3rd Qu.: 96.00 3rd Qu.:58.00 3rd Qu.: 97.25 3rd Qu.: 2.6775
## Max. :150.00 Max. :93.00 Max. :222.00 Max. :15.8000
## ast alt wbc hb
## Min. : 10.00 Min. : 7.00 Min. : 1300 Min. : 2.800
## 1st Qu.: 34.00 1st Qu.: 20.00 1st Qu.: 5400 1st Qu.: 7.000
## Median : 58.16 Median : 30.00 Median : 7300 Median : 8.400
## Mean : 85.04 Mean : 43.35 Mean : 8316 Mean : 8.578
## 3rd Qu.: 95.00 3rd Qu.: 47.00 3rd Qu.:10300 3rd Qu.:10.000
## Max. :984.00 Max. :562.00 Max. :37700 Max. :16.500
## plt tp alb eGFR
## Min. : 21.0 Min. :2.200 Min. :1.100 Min. : 4.274
## 1st Qu.: 73.0 1st Qu.:5.500 1st Qu.:2.500 1st Qu.: 48.745
## Median : 98.0 Median :6.200 Median :2.812 Median : 67.746
## Mean :110.6 Mean :6.107 Mean :2.828 Mean : 69.256
## 3rd Qu.:134.2 3rd Qu.:6.635 3rd Qu.:3.200 3rd Qu.: 88.257
## Max. :482.0 Max. :9.100 Max. :4.400 Max. :196.029
## bun cre crp pt
## Min. : 2.90 Min. : 0.330 Min. : 0.0000 Min. : 7.50
## 1st Qu.: 16.10 1st Qu.: 0.670 1st Qu.: 0.1200 1st Qu.: 40.58
## Median : 23.20 Median : 0.860 Median : 0.3268 Median : 52.57
## Mean : 28.34 Mean : 1.061 Mean : 0.7968 Mean : 52.20
## 3rd Qu.: 35.62 3rd Qu.: 1.140 3rd Qu.: 0.8145 3rd Qu.: 63.50
## Max. :124.80 Max. :10.950 Max. :18.6370 Max. :107.90
## aptt los meld
## Min. : 17.60 Min. : 0.00 Min. : 6.00
## 1st Qu.: 26.40 1st Qu.: 6.00 1st Qu.: 7.00
## Median : 28.70 Median : 10.00 Median :11.00
## Mean : 32.27 Mean : 13.77 Mean :12.06
## 3rd Qu.: 33.23 3rd Qu.: 18.00 3rd Qu.:15.00
## Max. :240.00 Max. :159.00 Max. :40.00
md.pattern(imp.mf$ximp)
## /\ /\
## { `---' }
## { O O }
## ==> V <== No need for mice. This data set is completely observed.
## \ \|/ /
## `-----'
## sex barthel child_score pad stroke dimentia ch_lung rheumati pept_ulcer dm
## 536 1 1 1 1 1 1 1 1 1 1
## 0 0 0 0 0 0 0 0 0 0
## dm_compli paralysis malignancy meta_tumor aids eGFR30 hd hcc alcohol
## 536 1 1 1 1 1 1 1 1 1
## 0 0 0 0 0 0 0 0 0
## past_rupture antiplate anticoag antithro nsaids steroid beta vaso ffp pc
## 536 1 1 1 1 1 1 1 1 1 1
## 0 0 0 0 0 0 0 0 0 0
## albner shock hosp_mortality age bmi smoke child_num gcs cci_num map bt sBP
## 536 1 1 1 1 1 1 1 1 1 1 1 1
## 0 0 0 0 0 0 0 0 0 0 0 0
## dBP hr bil ast alt wbc hb plt tp alb eGFR bun cre crp pt aptt los meld
## 536 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0
## 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
#if exclude ID is necessary add the ID
data_imp <- imp.mf$ximp
data_imp$pt_id <- df_dev$pt_id
data_imp$hosp_num<- df_dev$hosp_num
data_imp$hosp_id<- df_dev$hosp_id
data_imp$year<- df_dev$year
#check
str(data_imp)
## 'data.frame': 536 obs. of 63 variables:
## $ sex : Factor w/ 2 levels "M","F": 1 1 2 1 2 1 2 1 2 1 ...
## $ barthel : Factor w/ 3 levels "0","1","2": 3 3 1 2 3 1 1 1 3 3 ...
## $ child_score : Factor w/ 3 levels "0","1","2": 3 1 2 2 2 2 2 2 3 3 ...
## $ pad : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ stroke : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ dimentia : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ ch_lung : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ rheumati : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ pept_ulcer : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ dm : Factor w/ 2 levels "0","1": 1 2 1 1 1 1 1 1 1 1 ...
## $ dm_compli : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ paralysis : Factor w/ 1 level "0": 1 1 1 1 1 1 1 1 1 1 ...
## $ malignancy : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ meta_tumor : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ aids : Factor w/ 1 level "0": 1 1 1 1 1 1 1 1 1 1 ...
## $ eGFR30 : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 2 1 ...
## $ hd : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ hcc : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ alcohol : Factor w/ 2 levels "0","1": 2 1 1 1 1 1 2 2 1 1 ...
## $ past_rupture : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ antiplate : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ anticoag : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ antithro : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ nsaids : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ steroid : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ beta : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ vaso : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 2 1 ...
## $ ffp : Factor w/ 2 levels "0","1": 1 2 1 1 2 2 1 2 2 2 ...
## $ pc : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ albner : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 2 1 ...
## $ shock : Factor w/ 2 levels "0","1": 2 1 2 2 2 2 1 2 2 2 ...
## $ hosp_mortality: Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 2 2 ...
## $ age : num 50 80 59 44 67 65 49 73 69 62 ...
## $ bmi : num 23.4 25.3 23.5 14.5 21.9 ...
## $ smoke : num 0 0 0 240 0 ...
## $ child_num : num 11 6 8.75 8 7.55 ...
## $ gcs : num 15 15 15 15 15 15 15 15 6 15 ...
## $ cci_num : num 4 4 3 4 4 3 4 4 4 4 ...
## $ map : num 0 2 0 6 2 2 0 4 14 6 ...
## $ bt : num 36.4 36.8 35.9 36 36.6 38.4 37 37 35.5 36.6 ...
## $ sBP : num 78 88 100 69 66 84 132 90 52 58 ...
## $ dBP : num 48 49 56 44 40 46 69 54 37 37 ...
## $ hr : num 118 72 110 104 72 127 83 114 98 106 ...
## $ bil : num 2.2 1.2 3.1 3.4 1.2 2.4 1.2 2.2 8.7 2.1 ...
## $ ast : num 217 31 60 129 52 90 154 55 96 121 ...
## $ alt : num 63 22 40 46 36 19 109 20 87 63 ...
## $ wbc : num 7400 5000 7800 9100 3900 8000 7900 11100 12800 8300 ...
## $ hb : num 6.9 10.8 9.7 10.7 6.3 9.8 13.5 5.6 6 9.8 ...
## $ plt : num 115 77 74 162 63 93 132 124 168 84 ...
## $ tp : num 6.3 5.6 6.4 6.1 5.1 7.2 7.6 4.9 5.3 6.3 ...
## $ alb : num 2.2 3.2 2.8 2.9 2.8 3.2 4 2.3 1.2 2.5 ...
## $ eGFR : num 58 57.7 63.7 112.4 123.6 ...
## $ bun : num 13.2 41.5 26.3 2.9 27.4 15.8 15.5 27 63.2 13.8 ...
## $ cre : num 1.08 0.96 0.72 0.61 0.38 0.44 0.4 0.97 2.07 0.87 ...
## $ crp : num 0.92 0.29 0.68 0.29 0.29 ...
## $ pt : num 37.8 55 46.7 37.8 74.6 45.9 49.7 45.9 37.2 54 ...
## $ aptt : num 29.4 29 27.2 35.3 30.1 27.6 32.6 27.5 36 29.1 ...
## $ los : num 12 7 0 10 3 2 1 8 0 16 ...
## $ meld : num 16 11 13 14 6 8 6 15 29 12 ...
## $ pt_id : int 1 2 3 4 5 7 8 9 10 11 ...
## $ hosp_num : int 1 1 1 1 1 1 1 1 1 1 ...
## $ hosp_id : int 1001 1001 1001 1001 1001 1001 1001 1001 1001 1001 ...
## $ year : int 2012 2011 2010 2011 2010 2010 2010 2010 2011 2012 ...
#write the csv
#write.csv(imp.mf$ximp, file = "data_after_imputation.csv")
str(data_imp)
## 'data.frame': 536 obs. of 63 variables:
## $ sex : Factor w/ 2 levels "M","F": 1 1 2 1 2 1 2 1 2 1 ...
## $ barthel : Factor w/ 3 levels "0","1","2": 3 3 1 2 3 1 1 1 3 3 ...
## $ child_score : Factor w/ 3 levels "0","1","2": 3 1 2 2 2 2 2 2 3 3 ...
## $ pad : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ stroke : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ dimentia : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ ch_lung : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ rheumati : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ pept_ulcer : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ dm : Factor w/ 2 levels "0","1": 1 2 1 1 1 1 1 1 1 1 ...
## $ dm_compli : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ paralysis : Factor w/ 1 level "0": 1 1 1 1 1 1 1 1 1 1 ...
## $ malignancy : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ meta_tumor : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ aids : Factor w/ 1 level "0": 1 1 1 1 1 1 1 1 1 1 ...
## $ eGFR30 : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 2 1 ...
## $ hd : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ hcc : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ alcohol : Factor w/ 2 levels "0","1": 2 1 1 1 1 1 2 2 1 1 ...
## $ past_rupture : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ antiplate : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ anticoag : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ antithro : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ nsaids : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ steroid : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ beta : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ vaso : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 2 1 ...
## $ ffp : Factor w/ 2 levels "0","1": 1 2 1 1 2 2 1 2 2 2 ...
## $ pc : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ albner : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 2 1 ...
## $ shock : Factor w/ 2 levels "0","1": 2 1 2 2 2 2 1 2 2 2 ...
## $ hosp_mortality: Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 2 2 ...
## $ age : num 50 80 59 44 67 65 49 73 69 62 ...
## $ bmi : num 23.4 25.3 23.5 14.5 21.9 ...
## $ smoke : num 0 0 0 240 0 ...
## $ child_num : num 11 6 8.75 8 7.55 ...
## $ gcs : num 15 15 15 15 15 15 15 15 6 15 ...
## $ cci_num : num 4 4 3 4 4 3 4 4 4 4 ...
## $ map : num 0 2 0 6 2 2 0 4 14 6 ...
## $ bt : num 36.4 36.8 35.9 36 36.6 38.4 37 37 35.5 36.6 ...
## $ sBP : num 78 88 100 69 66 84 132 90 52 58 ...
## $ dBP : num 48 49 56 44 40 46 69 54 37 37 ...
## $ hr : num 118 72 110 104 72 127 83 114 98 106 ...
## $ bil : num 2.2 1.2 3.1 3.4 1.2 2.4 1.2 2.2 8.7 2.1 ...
## $ ast : num 217 31 60 129 52 90 154 55 96 121 ...
## $ alt : num 63 22 40 46 36 19 109 20 87 63 ...
## $ wbc : num 7400 5000 7800 9100 3900 8000 7900 11100 12800 8300 ...
## $ hb : num 6.9 10.8 9.7 10.7 6.3 9.8 13.5 5.6 6 9.8 ...
## $ plt : num 115 77 74 162 63 93 132 124 168 84 ...
## $ tp : num 6.3 5.6 6.4 6.1 5.1 7.2 7.6 4.9 5.3 6.3 ...
## $ alb : num 2.2 3.2 2.8 2.9 2.8 3.2 4 2.3 1.2 2.5 ...
## $ eGFR : num 58 57.7 63.7 112.4 123.6 ...
## $ bun : num 13.2 41.5 26.3 2.9 27.4 15.8 15.5 27 63.2 13.8 ...
## $ cre : num 1.08 0.96 0.72 0.61 0.38 0.44 0.4 0.97 2.07 0.87 ...
## $ crp : num 0.92 0.29 0.68 0.29 0.29 ...
## $ pt : num 37.8 55 46.7 37.8 74.6 45.9 49.7 45.9 37.2 54 ...
## $ aptt : num 29.4 29 27.2 35.3 30.1 27.6 32.6 27.5 36 29.1 ...
## $ los : num 12 7 0 10 3 2 1 8 0 16 ...
## $ meld : num 16 11 13 14 6 8 6 15 29 12 ...
## $ pt_id : int 1 2 3 4 5 7 8 9 10 11 ...
## $ hosp_num : int 1 1 1 1 1 1 1 1 1 1 ...
## $ hosp_id : int 1001 1001 1001 1001 1001 1001 1001 1001 1001 1001 ...
## $ year : int 2012 2011 2010 2011 2010 2010 2010 2010 2011 2012 ...
data_imp$meld <- round(data_imp$meld)
min(data_imp$meld, na.rm = TRUE)
## [1] 6
max(data_imp$meld, na.rm = TRUE)
## [1] 40
dev_imp <-
data_imp|>
mutate(
hosp_id=as.integer(hosp_id),
pt_id=as.integer(pt_id),
hosp_num=as.integer(hosp_num),
year=as.integer(year),
age=as.integer(age),
sex= factor(sex, levels = c("M", "F")),
smoke= as.integer(smoke),
barthel= factor(barthel, levels = c("0", "1", "2")),
child_num= as.integer(round(data_imp$child_num)),
child_score=factor(child_score, levels = c("0", "1", "2")),
gcs=as.integer(gcs),
cci_num=as.integer(cci_num),
pad=factor(pad),
stroke=factor(stroke),
dimentia=factor(dimentia),
ch_lung=factor(ch_lung),
rheumati=factor(rheumati),
pept_ulcer=factor(pept_ulcer),
dm=factor(dm),
dm_compli=factor(dm_compli),
paralysis=factor(paralysis),
malignancy=factor(malignancy),
meta_tumor=factor(meta_tumor),
aids=factor(aids),
eGFR30=factor(eGFR30),
hd=factor(hd),
hcc=factor(hcc),
alcohol=factor(alcohol),
past_rupture=factor(past_rupture),
antiplate=factor(antiplate),
anticoag=factor(anticoag),
antithro=factor(antithro),
nsaids=factor(nsaids),
steroid=factor(steroid),
beta=factor(beta),
vaso=factor(vaso),
map= as.integer(map),
ffp=factor(ffp),
pc=factor(pc),
albner=factor(albner),
sBP= as.integer(sBP),
dBP= as.integer(dBP),
hr=as.integer(hr),
shock=factor(shock),
los=as.integer(los),
meld=as.integer(meld)
)
#table(dev_imp$child_num)
#str(dev_imp)
## Create your table
#dev_imp %>%
# select(c(col_cont, col_fact)) %>%
# CreateTableOne(vars = c(col_cont, col_fact), strata="hosp_mortality",factorVars = col_fact, addOverall = T) -> tableone_dev_imp
#
#
## Print your table
#print(tableone_dev_imp, smd = TRUE, missing = TRUE, test = TRUE, explain = TRUE)
## specify your data and variables
#tbl_summary(data = dev_imp,
# by = "hosp_mortality",
# type = list(gcs ~ "continuous", year ~ "categorical"),
# statistic = all_continuous() ~ "{median} ({p25}, {p75})",
# digits = all_continuous() ~ c(0, 2))
#
#dev_imp |> #全体
# select(col_continuous) |>
# pivot_longer(cols = col_continuous, names_to = "name", values_to = "value") |>
# ggplot()+
# geom_histogram(aes(x = value), color = "black")+
# facet_wrap(~ name, scales = "free", ncol = 5) +
# theme_bw()+
# theme(text = element_text(size = 12))
# 連続変数の変数名をまとめる
con_var <- c("age", "bmi","smoke","child_num","gcs","cci_num","map","bt","sBP","dBP","hr","bil","ast","alt","wbc","hb","plt","tp","alb","eGFR","bun","cre","crp","pt","aptt","meld")
# datadistを計算
ddist <- datadist(dev_imp)
options(datadist='ddist')
# プロット結果をまとめるリストを用意する
plot <- list()
for (x in con_var){
# lrmに投入するformulaを文字列で、for文で順番に指定していく
formula_tmp <- as.formula(paste("hosp_mortality ~ rcs(", x, ", 4)"))
fit_tmp <- lrm(formula_tmp, data = dev_imp)
# Predict関数の呼び出しを文字列として作成し、それをパースして評価する
plot_cmd <- paste("plot(Predict(fit_tmp, ", x, "))")
plot_tmp <- eval(parse(text = plot_cmd))
plot[[x]] <- plot_tmp
}
# 結果をggarrangeでまとめて表示する
logOR_plot <- ggarrange(plotlist = plot, ncol = 1, nrow = 1)
logOR_plot
## $`1`
##
## $`2`
##
## $`3`
##
## $`4`
##
## $`5`
##
## $`6`
##
## $`7`
##
## $`8`
##
## $`9`
##
## $`10`
##
## $`11`
##
## $`12`
##
## $`13`
##
## $`14`
##
## $`15`
##
## $`16`
##
## $`17`
##
## $`18`
##
## $`19`
##
## $`20`
##
## $`21`
##
## $`22`
##
## $`23`
##
## $`24`
##
## $`25`
##
## $`26`
##
## attr(,"class")
## [1] "list" "ggarrange"
以下のCut offを設ける ・age:60以上・未満 60未満:0, 60以上:1 ・bmi:25以上・未満 25未満:0, 25以上:1 ・smoke:cut offなし →むしろ変数から削除 ・child_numl:cut offなし→むしろ変数から削除 ・gcs:12以上・未満 12未満:1, 12以上:0 ・cci:6点以上・未満 6未満:0, 6以上:1 ・bt:37度以上・未満 37度未満:0,37度以上:1 ・sBP:80以上・未満 80以上:0,80未満:1 ・dBP:50以上・未満 50以上:0,50未満:1 ・HR:100以上・未満 100未満:0,100以上:1 ・bil:5以上・未満: 5未満:0,5以上:1 ・ast:200以上・未満 200未満:0, 200以上:1 ・alt:100以上・未満 100未満:0, 100以上:1 ・wbc:12000以上・未満 12000未満:0, 12000以上:1 ・hb:8以上・未満 9以上:0, 8未満:1 ・plt:100以上・未満 100以上:0, 100未満:1 ・tp:6以上・未満 6以上:0, 6未満:1 ・alb:2.8以上・未満 2.8以上:0, 2.8未満:1 ・Cre:1.5以上・未満 1.5未満:0, Cre1.5以上:1 ・CRP:2以上・未満 2未満:0, CRP2以上:1 ・pt:50以上・未満 50以上:0, 50未満:1 ・aptt:50以上・未満 aptt50未満:0,50以上:1
#新規カテゴリ列を作成
dev_imp$age_cate <- ifelse(dev_imp$age >= 60, 1, 0)
dev_imp$bmi_cate <- ifelse(dev_imp$bmi >= 25, 1, 0) # bmiは25以上:1, 25未満:0
dev_imp$gcs_cate <- ifelse(dev_imp$gcs <= 12, 1, 0) ##ここを以上に変更!!
dev_imp$cci_cate <- ifelse(dev_imp$cci_num >= 6, 1, 0)
dev_imp$bt_cate <- ifelse(dev_imp$bt >= 37, 1, 0)
dev_imp$sBP_cate <- ifelse(dev_imp$sBP < 80, 1, 0)
dev_imp$dBP_cate <- ifelse(dev_imp$dBP < 50, 1, 0)
dev_imp$hr_cate <- ifelse(dev_imp$hr >= 100, 1, 0)
dev_imp$bil_cate <- ifelse(dev_imp$bil >= 5, 1, 0)
dev_imp$ast_cate <- ifelse(dev_imp$ast >= 200, 1, 0)
dev_imp$alt_cate <- ifelse(dev_imp$alt >= 100, 1, 0)
dev_imp$wbc_cate <- ifelse(dev_imp$wbc >= 12000, 1, 0)
dev_imp$hb_cate <- ifelse(dev_imp$hb < 8, 1, 0)
dev_imp$plt_cate <- ifelse(dev_imp$plt < 100, 1, 0) # pltは100未満:1、100以上:0
dev_imp$tp_cate <- ifelse(dev_imp$tp < 6, 1, 0)
dev_imp$alb_cate <- ifelse(dev_imp$alb < 2.8, 1, 0)
dev_imp$cre_cate <- ifelse(dev_imp$cre >= 1.5, 1, 0)
dev_imp$crp_cate <- ifelse(dev_imp$crp >= 2, 1, 0)
dev_imp$pt_cate <- ifelse(dev_imp$pt < 50, 1, 0)
dev_imp$aptt_cate <- ifelse(dev_imp$aptt >= 50, 1, 0) # apttは50以上:1, 50未満:0
#col_fact_cate=c("sex","barthel","child_score","pad","stroke","dimentia","ch_lung","rheumati","pept_ulcer","dm","dm_compli","paralysis","maligna#ncy","meta_tumor","aids","eGFR30","hd","hcc","alcohol","past_rupture","antiplate","anticoag","antithro","nsaids","steroid","beta", #"vaso","ffp","pc", "albner","shock","hosp_mortality","age_cate","bmi_cate","gcs_cate","cci_cate","bt_cate","sBP_cate","dBP_cate","hr_cate","bil#_cate","ast_cate","alt_cate","wbc_cate","hb_cate","plt_cate","tp_cate","alb_cate","cre_cate","crp_cate","pt_cate","aptt_cate")
#
## Create your table
#dev_imp %>%
# select(c(col_fact_cate)) %>%
# CreateTableOne(vars = c(col_fact_cate), strata="hosp_mortality",factorVars = col_fact_cate, addOverall = T) -> tableone_dev_imp_cate
#
#
## Print your table
#print(tableone_dev_imp_cate, smd = TRUE, missing = TRUE, test = TRUE, explain = TRUE)
## specify your data and variables
#tbl_summary(data = dev_imp,
# by = "hosp_mortality",
# type = list(gcs ~ "continuous", year ~ "categorical"),
# statistic = all_continuous() ~ "{median} ({p25}, {p75})",
# digits = all_continuous() ~ c(0, 2))
#
# "initial_vars" vector containing the names of variables
initial_vars <- c("sex", "barthel", "pept_ulcer", "dm", "malignancy", "hcc", "alcohol",
"past_rupture", "antithro", "steroid", "beta", "sBP_cate",
"age_cate", "bmi_cate", "gcs_cate", "bt_cate", "bil_cate", "ast_cate", "alt_cate",
"wbc_cate", "hb_cate", "plt_cate", "alb_cate", "cre_cate",
"crp_cate", "pt_cate","aptt_cate")
# Loop through each variable in "initial_vars" and print its class
for (var in initial_vars) {
print(paste(var, ": ", class(dev_imp[[var]]), sep=""))
}
## [1] "sex: factor"
## [1] "barthel: factor"
## [1] "pept_ulcer: factor"
## [1] "dm: factor"
## [1] "malignancy: factor"
## [1] "hcc: factor"
## [1] "alcohol: factor"
## [1] "past_rupture: factor"
## [1] "antithro: factor"
## [1] "steroid: factor"
## [1] "beta: factor"
## [1] "sBP_cate: numeric"
## [1] "age_cate: numeric"
## [1] "bmi_cate: numeric"
## [1] "gcs_cate: numeric"
## [1] "bt_cate: numeric"
## [1] "bil_cate: numeric"
## [1] "ast_cate: numeric"
## [1] "alt_cate: numeric"
## [1] "wbc_cate: numeric"
## [1] "hb_cate: numeric"
## [1] "plt_cate: numeric"
## [1] "alb_cate: numeric"
## [1] "cre_cate: numeric"
## [1] "crp_cate: numeric"
## [1] "pt_cate: numeric"
## [1] "aptt_cate: numeric"
# 変数リストの設定(応答変数を除外)
initial_vars <- c("sex", "pept_ulcer", "dm", "malignancy", "hcc", "alcohol",
"past_rupture", "antithro", "steroid", "beta", "sBP_cate",
"age_cate", "bmi_cate", "gcs_cate", "bt_cate", "bil_cate", "ast_cate", "alt_cate",
"wbc_cate", "hb_cate", "plt_cate", "alb_cate", "cre_cate",
"crp_cate", "pt_cate","aptt_cate")
# one-hot encoding を行う
x <- model.matrix(~.-1, dev_imp[initial_vars])
# y のデータ型変換
y <- as.numeric(as.character(dev_imp$hosp_mortality))
# 乱数のシードを設定
set.seed(2023)
# cv.glmnetを用いた1SE ruleによるLasso
cvfit <- cv.glmnet(x, y, family = "binomial")
# lambda.1seを用いての最適モデルの選択
lasso_model <- glmnet(x, y, family = "binomial", alpha = 1, lambda = cvfit$lambda.1se)
# 変数の重要度の表示
coef(lasso_model)
## 28 x 1 sparse Matrix of class "dgCMatrix"
## s0
## (Intercept) -2.98748170
## sexM .
## sexF .
## pept_ulcer1 .
## dm1 .
## malignancy1 .
## hcc1 .
## alcohol1 .
## past_rupture1 .
## antithro1 .
## steroid1 .
## beta1 .
## sBP_cate 1.69308373
## age_cate .
## bmi_cate .
## gcs_cate 1.02517028
## bt_cate .
## bil_cate 0.41663678
## ast_cate 0.18104451
## alt_cate .
## wbc_cate .
## hb_cate .
## plt_cate .
## alb_cate 0.33725053
## cre_cate 0.51911880
## crp_cate .
## pt_cate .
## aptt_cate 0.03839149
# Lasso path plot without the red line
plot_glmnet <- function(cvfit, s = "lambda.min", ...) {
plot(cvfit, ...)
abline(v = log(cvfit$lambda[s]), col = "red", lty = 2)
}
plot_glmnet(cvfit, s = "lambda.min")
# 変数とその重要度(オッズ比)を指定
varImp <- data.frame(
variable = c("Systolic blood pressure", "Glasgow Come Scale",
"Total bilirubin", "Creatinine", "Albumin"),
importance = c(1.69308373, 1.02517028, 0.41663678, 0.51911880, 0.33725053)
)
# 変数重要度のバープロットを作成
library(ggplot2)
ggplot(varImp, aes(x = reorder(variable, importance), y = importance)) +
geom_bar(stat = "identity", fill = "steelblue") +
coord_flip() +
labs(x = "Variable", y = "Lasso Coefficients") +
theme_minimal()
# hosp_mortalityを数値に変換
dev_imp$hosp_mortality <- as.numeric(as.character(dev_imp$hosp_mortality))
# 予測変数として選択された変数を使って新たにモデルを作成
fit_reduced_model <- lrm(hosp_mortality ~ sBP_cate +gcs_cate + bil_cate + cre_cate + alb_cate, data = dev_imp)
# 予測確率を計算
dev_imp$fitted <- predict(fit_reduced_model, type="fitted")
# Calibration in large
calibration <- mean(dev_imp$hosp_mortality) - mean(dev_imp$fitted)
fitstat <- data.frame(Name = "Calibration in the large", Value = calibration)
# Brier Score
dev_imp$diff2 <- (dev_imp$fitted - dev_imp$hosp_mortality)^2
brier_score <- mean(dev_imp$diff2)
fitstat <- rbind(fitstat, data.frame(Name = "Brier score", Value = brier_score))
# Pseudo R-square
pseudo_r2 <- fit_reduced_model$stats["R2"]
fitstat <- rbind(fitstat, data.frame(Name = "R2", Value = pseudo_r2))
# AIC
aic <- AIC(fit_reduced_model)
fitstat <- rbind(fitstat, data.frame(Name = "AIC", Value = aic))
# Change the display option
options(scipen = 999)
# Display the final table
fitstat
## Name Value
## 1 Calibration in the large -0.000000000000004440892
## 2 Brier score 0.071752221408604280328
## R2 R2 0.484264461407449664510
## 11 AIC 273.042712121431009109074
# Load necessary libraries
library(pROC)
library(ggplot2)
# Re-run the model using the selected variables from earlier
fit_reduced_model <- lrm(hosp_mortality ~ sBP_cate +gcs_cate + bil_cate + cre_cate + alb_cate, data = dev_imp)
# Calculate fitted probabilities
dev_imp$fitted <- predict(fit_reduced_model, type="fitted")
# Create ROC object, setting the response variable and predictor
roc_obj <- roc(dev_imp$hosp_mortality ~ dev_imp$fitted, ci=TRUE, direction="auto")
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
# Plot ROC curve
roc_plot <- ggroc(roc_obj)
# Add the theme you used before. For example, if you used theme_minimal:
roc_plot <- roc_plot + theme_minimal()
# Calculate AUC and its confidence interval
auc_roc <- auc(roc_obj)
ci <- ci.auc(roc_obj)
# Calculate the upper bound of the CI
upper_bound <- 2*auc_roc - ci[1]
# Print the AUC and its confidence interval
cat("AUC for the development logistic model: ", auc_roc, "\n")
## AUC for the development logistic model: 0.8942572
cat("95% CI for AUC: (", ci[1], ",", upper_bound, ")\n")
## 95% CI for AUC: ( 0.8525718 , 0.9359427 )
# Load necessary libraries
library(pROC)
library(ggplot2)
# Re-run the model using the selected variables from earlier
fit_reduced_model <- lrm(hosp_mortality ~ sBP_cate +gcs_cate + bil_cate + cre_cate + alb_cate, data = dev_imp)
# Calculate fitted probabilities
dev_imp$fitted <- predict(fit_reduced_model, type="fitted")
# Create ROC object, setting the response variable and predictor
roc_obj <- roc(dev_imp$hosp_mortality ~ dev_imp$fitted, ci=TRUE, direction="auto")
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
# Plot ROC curve
plot(roc_obj, main="ROC curve for the development logistic model")
cat("AUC for the development logistic mode: ", auc(roc_obj), "\n")
## AUC for the development logistic mode: 0.8942572
# Create calibration plot
val.prob(dev_imp$fitted, dev_imp$hosp_mortality, g=10, cex=.5)
## Dxy C (ROC) R2
## 0.788514453090328082 0.894257226545164041 0.484264461407449665
## D D:Chi-sq D:p
## 0.307102197863714232 165.606778054950837031 NA
## U U:Chi-sq U:p
## -0.003731343283588135 -0.000000000003240075 1.000000000000000000
## Q Brier Intercept
## 0.310833541147302383 0.071752221408604280 -0.000000022251851024
## Slope Emax E90
## 0.999999929020504830 0.045598356783793897 0.025583732758442883
## Eavg S:z S:p
## 0.008004960660536889 0.017098347097183400 0.986358157545818459
# Re-run the model using the selected variables from earlier
fit_reduced_model <- lrm(hosp_mortality ~ sBP_cate +gcs_cate + bil_cate + cre_cate + alb_cate, data = dev_imp, x = TRUE, y = TRUE)
# RMSの関数を使用してブートストラップによる内的検証を実行
cv <- validate(fit_reduced_model, bw = FALSE, B = 1000, method = "boot", seed = 2023)
# Corrected AUCの計算
corrected_AUC <- cv[1, 5] * 0.5 + 0.5
# 結果の表示
cv
## index.orig training test optimism index.corrected n
## Dxy 0.7885 0.7919 0.7832 0.0087 0.7798 1000
## R2 0.4843 0.4950 0.4745 0.0205 0.4638 1000
## Intercept 0.0000 0.0000 -0.0313 0.0313 -0.0313 1000
## Slope 1.0000 1.0000 0.9587 0.0413 0.9587 1000
## Emax 0.0000 0.0000 0.0147 0.0147 0.0147 1000
## D 0.3071 0.3151 0.2998 0.0153 0.2918 1000
## U -0.0037 -0.0037 0.0008 -0.0045 0.0008 1000
## Q 0.3108 0.3189 0.2991 0.0198 0.2911 1000
## B 0.0718 0.0699 0.0735 -0.0036 0.0753 1000
## g 1.7941 1.8561 1.7607 0.0954 1.6988 1000
## gp 0.1851 0.1858 0.1832 0.0026 0.1825 1000
print("corrected_AUC")
## [1] "corrected_AUC"
print(corrected_AUC)
## [1] 0.8899068
信頼区間も出す
# 必要なパッケージの読み込み
library(pROC)
# 各ブートストラップサンプルでAUCを保存するためのベクトルの初期化
aucs <- numeric(1000)
# ブートストラップ検証
set.seed(2023) # 再現性のための固定シード
for(i in 1:1000) {
# ブートストラップサンプルの作成
sample_rows <- sample(nrow(dev_imp), replace = TRUE)
boot_data <- dev_imp[sample_rows, ]
# モデルのフィット
fit <- lrm(hosp_mortality ~ sBP_cate + gcs_cate + bil_cate + cre_cate + alb_cate, data = boot_data)
# 予測値の計算とAUCの保存
predictions <- predict(fit, type = "fitted")
aucs[i] <- auc(roc(response = boot_data$hosp_mortality, predictor = predictions, quiet = TRUE))
}
# 信頼区間の計算と表示
ci <- quantile(aucs, c(0.025, 0.975))
cat("Bootstrap 95% CI for AUC: (", ci[1], ",", ci[2], ")\n")
## Bootstrap 95% CI for AUC: ( 0.8522771 , 0.9349086 )
# Load necessary library
library(boot)
##
## 次のパッケージを付け加えます: 'boot'
## 以下のオブジェクトは 'package:car' からマスクされています:
##
## logit
# Define a function to calculate the metrics
calc_metrics <- function(data, indices) {
data_boot <- data[indices, ]
fit_model <- lrm(hosp_mortality ~ sBP_cate +gcs_cate + bil_cate + cre_cate + alb_cate, data = data_boot, x = TRUE, y = TRUE)
predicted <- predict(fit_model, type="fitted")
calibration <- mean(data_boot$hosp_mortality) - mean(predicted)
brier_score <- mean((predicted - data_boot$hosp_mortality)^2)
pseudo_r2 <- fit_model$stats["R2"]
aic <- AIC(fit_model)
return(c(calibration, brier_score, pseudo_r2, aic))
}
# Bootstrap resampling
set.seed(2023)
results <- boot(data = dev_imp, statistic = calc_metrics, R = 1000)
# Display the results
print(results)
##
## ORDINARY NONPARAMETRIC BOOTSTRAP
##
##
## Call:
## boot(data = dev_imp, statistic = calc_metrics, R = 1000)
##
##
## Bootstrap Statistics :
## original bias std. error
## t1* -0.000000000000004440892 -0.0000000003609623 0.0000000009241876
## t2* 0.071752221408604280328 -0.0019036277383014 0.0079044100875209
## t3* 0.484264461407449664510 0.0113946783951797 0.0543409727092246
## t4* 273.042712121431009109074 -6.5142428476403893 26.5032022844468571
df_val->df_val_sens
df_val_sens$age_cate <- ifelse(df_val_sens$age >= 60, 1, 0)
df_val_sens$bmi_cate <- ifelse(df_val_sens$bmi >= 25, 1, 0) # bmiは25以上:1, 25未満:0
df_val_sens$gcs_cate <- ifelse(df_val_sens$gcs < 12, 1, 0)
df_val_sens$cci_cate <- ifelse(df_val_sens$cci_num >= 6, 1, 0)
df_val_sens$bt_cate <- ifelse(df_val_sens$bt >= 37, 1, 0)
df_val_sens$sBP_cate <- ifelse(df_val_sens$sBP < 80, 1, 0)
df_val_sens$dBP_cate <- ifelse(df_val_sens$dBP < 50, 1, 0)
df_val_sens$hr_cate <- ifelse(df_val_sens$hr >= 100, 1, 0)
df_val_sens$bil_cate <- ifelse(df_val_sens$bil >= 5, 1, 0)
df_val_sens$ast_cate <- ifelse(df_val_sens$ast >= 200, 1, 0)
df_val_sens$alt_cate <- ifelse(df_val_sens$alt >= 100, 1, 0)
df_val_sens$wbc_cate <- ifelse(df_val_sens$wbc >= 12000, 1, 0)
df_val_sens$hb_cate <- ifelse(df_val_sens$hb < 8, 1, 0)
df_val_sens$plt_cate <- ifelse(df_val_sens$plt < 100, 1, 0) # pltは100未満:1、100以上:0
df_val_sens$tp_cate <- ifelse(df_val_sens$tp < 6, 1, 0)
df_val_sens$alb_cate <- ifelse(df_val_sens$alb < 2.8, 1, 0)
df_val_sens$cre_cate <- ifelse(df_val_sens$cre >= 1.5, 1, 0)
df_val_sens$crp_cate <- ifelse(df_val_sens$crp >= 2, 1, 0)
df_val_sens$pt_cate <- ifelse(df_val_sens$pt < 50, 1, 0)
df_val_sens$aptt_cate <- ifelse(df_val_sens$aptt >= 50, 1, 0) # apttは50以上:1, 50未満:0
completedata_val <- na.omit(df_val_sens)
str(completedata_val)
## tibble [269 × 84] (S3: tbl_df/tbl/data.frame)
## $ hosp_id : int [1:269] 1002 1002 1003 1003 1003 1003 1003 1003 1003 1003 ...
## $ pt_id : int [1:269] 24 32 33 40 42 44 46 49 53 53 ...
## $ hosp_num : int [1:269] 1 1 1 1 1 1 1 1 3 4 ...
## $ year : int [1:269] 2022 2022 2022 2020 2021 2022 2019 2018 2019 2019 ...
## $ age : int [1:269] 82 57 69 68 75 53 43 44 50 50 ...
## $ sex : Factor w/ 2 levels "M","F": 1 1 1 2 2 1 1 2 1 1 ...
## $ bmi : num [1:269] 26.9 23.2 23.6 34.2 19.6 ...
## $ smoke : int [1:269] 0 0 0 0 0 0 69 0 600 600 ...
## $ barthel : Factor w/ 3 levels "0","1","2": 2 1 3 1 1 3 3 2 3 2 ...
## $ child_num : int [1:269] 8 8 8 7 8 14 10 8 13 12 ...
## $ child_score : Factor w/ 3 levels "0","1","2": 2 2 2 2 2 3 3 2 3 3 ...
## $ gcs : int [1:269] 15 15 15 15 15 3 15 15 15 15 ...
## $ cci_num : int [1:269] 4 3 6 4 5 5 4 11 5 5 ...
## $ pad : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ stroke : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ dimentia : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ ch_lung : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ rheumati : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ pept_ulcer : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 2 2 ...
## $ dm : Factor w/ 2 levels "0","1": 2 1 2 2 2 2 1 1 1 1 ...
## $ dm_compli : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ paralysis : Factor w/ 1 level "0": 1 1 1 1 1 1 1 1 1 1 ...
## $ malignancy : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 2 1 1 ...
## $ meta_tumor : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 2 1 1 ...
## $ aids : Factor w/ 1 level "0": 1 1 1 1 1 1 1 1 1 1 ...
## $ eGFR30 : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ hd : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ hcc : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ alcohol : Factor w/ 2 levels "0","1": 1 1 1 1 2 1 2 1 2 2 ...
## $ past_rupture : Factor w/ 2 levels "0","1": 1 1 1 1 1 2 1 1 2 2 ...
## $ antiplate : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ anticoag : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ antithro : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ nsaids : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ steroid : Factor w/ 2 levels "0","1": 1 1 1 1 1 2 1 1 1 1 ...
## $ beta : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ vaso : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ map : int [1:269] 6 2 4 4 4 4 0 0 6 8 ...
## $ ffp : Factor w/ 2 levels "0","1": 1 1 1 1 1 2 1 1 1 2 ...
## $ pc : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ albner : Factor w/ 2 levels "0","1": 1 1 2 1 1 1 1 1 1 1 ...
## $ bt : num [1:269] 36.9 37.1 37.2 37 37 36 38.3 39.2 37.2 36.5 ...
## $ sBP : int [1:269] 95 111 125 96 97 71 81 60 99 80 ...
## $ dBP : int [1:269] 49 68 70 55 54 49 54 43 65 48 ...
## $ hr : int [1:269] 60 101 121 72 75 56 115 151 77 93 ...
## $ shock : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 2 2 1 2 ...
## $ bil : num [1:269] 0.91 2.35 2.49 2.18 1.13 ...
## $ ast : num [1:269] 37 42 74 24 31 62 234 105 113 58 ...
## $ alt : num [1:269] 19 35 43 16 25 44 30 98 37 25 ...
## $ wbc : num [1:269] 3350 12030 6800 6000 8500 ...
## $ hb : num [1:269] 7.7 9.8 11.2 12.9 10.4 7.7 12.8 8.1 8.4 7.8 ...
## $ plt : num [1:269] 109 107 107 84 152 178 86 183 54 44 ...
## $ tp : num [1:269] 5.8 5.4 5.9 6.1 7 6.8 6.5 5.6 7 5.7 ...
## $ alb : num [1:269] 2.8 3.4 3 3.7 3.3 2.7 2.9 2 1.8 1.5 ...
## $ eGFR : num [1:269] 46.6 94.2 90.7 76.1 65.4 ...
## $ bun : num [1:269] 24.2 26.9 7.9 24.9 35.3 23.9 19.6 13.7 7.6 6.7 ...
## $ cre : num [1:269] 1.16 0.67 0.66 0.59 0.66 1.43 0.51 1.18 0.44 0.64 ...
## $ crp : num [1:269] 0.22 0.04 0.111 0.255 0.579 ...
## $ pt : num [1:269] 60.4 54.8 54 68 78 69 43 47 29 35 ...
## $ aptt : num [1:269] 30.2 33.4 34.5 30.2 33.1 30.2 37.3 46.9 53.6 75.9 ...
## $ meld : int [1:269] 10 9 10 7 6 17 11 14 16 16 ...
## $ hosp_mortality: num [1:269] 0 0 0 0 0 0 0 1 0 0 ...
## $ los : int [1:269] 14 5 12 6 9 16 3 6 20 39 ...
## $ cohort : Factor w/ 2 levels "develop","validation": 2 2 2 2 2 2 2 2 2 2 ...
## $ age_cate : num [1:269] 1 0 1 1 1 0 0 0 0 0 ...
## $ bmi_cate : num [1:269] 1 0 0 1 0 1 1 0 0 0 ...
## $ gcs_cate : num [1:269] 0 0 0 0 0 1 0 0 0 0 ...
## $ cci_cate : num [1:269] 0 0 1 0 0 0 0 1 0 0 ...
## $ bt_cate : num [1:269] 0 1 1 1 1 0 1 1 1 0 ...
## $ sBP_cate : num [1:269] 0 0 0 0 0 1 0 1 0 0 ...
## $ dBP_cate : num [1:269] 1 0 0 0 0 1 0 1 0 1 ...
## $ hr_cate : num [1:269] 0 1 1 0 0 0 1 1 0 0 ...
## $ bil_cate : num [1:269] 0 0 0 0 0 0 0 0 1 1 ...
## $ ast_cate : num [1:269] 0 0 0 0 0 0 1 0 0 0 ...
## $ alt_cate : num [1:269] 0 0 0 0 0 0 0 0 0 0 ...
## $ wbc_cate : num [1:269] 0 1 0 0 0 0 0 1 0 0 ...
## $ hb_cate : num [1:269] 1 0 0 0 0 1 0 0 0 1 ...
## $ plt_cate : num [1:269] 0 0 0 1 0 0 1 0 1 1 ...
## $ tp_cate : num [1:269] 1 1 1 0 0 0 0 1 0 1 ...
## $ alb_cate : num [1:269] 0 0 0 0 0 1 0 1 1 1 ...
## $ cre_cate : num [1:269] 0 0 0 0 0 0 0 0 0 0 ...
## $ crp_cate : num [1:269] 0 0 0 0 0 1 0 1 0 0 ...
## $ pt_cate : num [1:269] 0 0 0 0 0 0 1 1 1 1 ...
## $ aptt_cate : num [1:269] 0 0 0 0 0 0 0 0 1 1 ...
## - attr(*, "na.action")= 'omit' Named int [1:175] 1 2 6 7 13 14 18 19 25 29 ...
## ..- attr(*, "names")= chr [1:175] "1" "2" "6" "7" ...
col_fact_cate=c("sex","barthel","child_score","pad","stroke","dimentia","ch_lung","rheumati","pept_ulcer","dm","dm_compli","paralysis","malignancy","meta_tumor","aids","eGFR30","hd","hcc","alcohol","past_rupture","antiplate","anticoag","antithro","nsaids","steroid","beta", "vaso","ffp","pc", "albner","shock","hosp_mortality","age_cate","bmi_cate","gcs_cate","cci_cate","bt_cate","sBP_cate","dBP_cate","hr_cate","bil_cate","ast_cate","alt_cate","wbc_cate","hb_cate","plt_cate","tp_cate","alb_cate","cre_cate","crp_cate","pt_cate","aptt_cate")
# Create your table
completedata_val %>%
select(c(col_fact_cate)) %>%
CreateTableOne(vars = c(col_fact_cate), strata="hosp_mortality",factorVars = col_fact_cate, addOverall = T) -> tableone_completedata_val
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
## # Was:
## data %>% select(col_fact_cate)
##
## # Now:
## data %>% select(all_of(col_fact_cate))
##
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# Print your table
print(tableone_completedata_val, smd = TRUE, missing = TRUE, test = TRUE, explain = TRUE)
## Stratified by hosp_mortality
## Overall 0 1 p test
## n 269 251 18
## sex = F (%) 68 ( 25.3) 65 ( 25.9) 3 ( 16.7) 0.555
## barthel (%) 0.003
## 0 104 ( 38.7) 103 ( 41.0) 1 ( 5.6)
## 1 85 ( 31.6) 79 ( 31.5) 6 ( 33.3)
## 2 80 ( 29.7) 69 ( 27.5) 11 ( 61.1)
## child_score (%) 0.001
## 0 56 ( 20.8) 54 ( 21.5) 2 ( 11.1)
## 1 146 ( 54.3) 141 ( 56.2) 5 ( 27.8)
## 2 67 ( 24.9) 56 ( 22.3) 11 ( 61.1)
## pad = 1 (%) 1 ( 0.4) 1 ( 0.4) 0 ( 0.0) 1.000
## stroke = 1 (%) 5 ( 1.9) 4 ( 1.6) 1 ( 5.6) 0.765
## dimentia = 1 (%) 3 ( 1.1) 3 ( 1.2) 0 ( 0.0) 1.000
## ch_lung = 1 (%) 2 ( 0.7) 2 ( 0.8) 0 ( 0.0) 1.000
## rheumati = 0 (%) 269 (100.0) 251 (100.0) 18 (100.0) NA
## pept_ulcer = 1 (%) 31 ( 11.5) 30 ( 12.0) 1 ( 5.6) 0.661
## dm = 1 (%) 76 ( 28.3) 72 ( 28.7) 4 ( 22.2) 0.751
## dm_compli = 1 (%) 2 ( 0.7) 1 ( 0.4) 1 ( 5.6) 0.298
## paralysis = 0 (%) 269 (100.0) 251 (100.0) 18 (100.0) NA
## malignancy = 1 (%) 32 ( 11.9) 28 ( 11.2) 4 ( 22.2) 0.306
## meta_tumor = 1 (%) 3 ( 1.1) 1 ( 0.4) 2 ( 11.1) 0.003
## aids = 0 (%) 269 (100.0) 251 (100.0) 18 (100.0) NA
## eGFR30 = 1 (%) 19 ( 7.1) 13 ( 5.2) 6 ( 33.3) <0.001
## hd = 1 (%) 6 ( 2.2) 3 ( 1.2) 3 ( 16.7) 0.001
## hcc = 1 (%) 36 ( 13.4) 32 ( 12.7) 4 ( 22.2) 0.434
## alcohol = 1 (%) 148 ( 55.0) 136 ( 54.2) 12 ( 66.7) 0.434
## past_rupture = 1 (%) 75 ( 27.9) 72 ( 28.7) 3 ( 16.7) 0.409
## antiplate = 1 (%) 2 ( 0.7) 1 ( 0.4) 1 ( 5.6) 0.298
## anticoag = 1 (%) 2 ( 0.7) 2 ( 0.8) 0 ( 0.0) 1.000
## antithro = 1 (%) 4 ( 1.5) 3 ( 1.2) 1 ( 5.6) 0.639
## nsaids = 1 (%) 2 ( 0.7) 2 ( 0.8) 0 ( 0.0) 1.000
## steroid = 1 (%) 2 ( 0.7) 2 ( 0.8) 0 ( 0.0) 1.000
## beta = 1 (%) 25 ( 9.3) 25 ( 10.0) 0 ( 0.0) 0.324
## vaso = 1 (%) 15 ( 5.6) 13 ( 5.2) 2 ( 11.1) 0.598
## ffp = 1 (%) 90 ( 33.5) 80 ( 31.9) 10 ( 55.6) 0.072
## pc = 1 (%) 5 ( 1.9) 4 ( 1.6) 1 ( 5.6) 0.765
## albner = 1 (%) 25 ( 9.3) 22 ( 8.8) 3 ( 16.7) 0.487
## shock = 1 (%) 117 ( 43.5) 99 ( 39.4) 18 (100.0) <0.001
## hosp_mortality = 1 (%) 18 ( 6.7) 0 ( 0.0) 18 (100.0) <0.001
## age_cate = 1 (%) 129 ( 48.0) 121 ( 48.2) 8 ( 44.4) 0.949
## bmi_cate = 1 (%) 84 ( 31.2) 77 ( 30.7) 7 ( 38.9) 0.643
## gcs_cate = 1 (%) 5 ( 1.9) 4 ( 1.6) 1 ( 5.6) 0.765
## cci_cate = 1 (%) 37 ( 13.8) 32 ( 12.7) 5 ( 27.8) 0.152
## bt_cate = 1 (%) 91 ( 33.8) 85 ( 33.9) 6 ( 33.3) 1.000
## sBP_cate = 1 (%) 55 ( 20.4) 43 ( 17.1) 12 ( 66.7) <0.001
## dBP_cate = 1 (%) 88 ( 32.7) 75 ( 29.9) 13 ( 72.2) 0.001
## hr_cate = 1 (%) 77 ( 28.6) 68 ( 27.1) 9 ( 50.0) 0.071
## bil_cate = 1 (%) 22 ( 8.2) 17 ( 6.8) 5 ( 27.8) 0.007
## ast_cate = 1 (%) 16 ( 5.9) 16 ( 6.4) 0 ( 0.0) 0.556
## alt_cate = 1 (%) 14 ( 5.2) 14 ( 5.6) 0 ( 0.0) 0.631
## wbc_cate = 1 (%) 55 ( 20.4) 49 ( 19.5) 6 ( 33.3) 0.271
## hb_cate = 1 (%) 95 ( 35.3) 84 ( 33.5) 11 ( 61.1) 0.034
## plt_cate = 1 (%) 120 ( 44.6) 112 ( 44.6) 8 ( 44.4) 1.000
## tp_cate = 1 (%) 112 ( 41.6) 100 ( 39.8) 12 ( 66.7) 0.047
## alb_cate = 1 (%) 97 ( 36.1) 85 ( 33.9) 12 ( 66.7) 0.011
## cre_cate = 1 (%) 27 ( 10.0) 19 ( 7.6) 8 ( 44.4) <0.001
## crp_cate = 1 (%) 28 ( 10.4) 22 ( 8.8) 6 ( 33.3) 0.004
## pt_cate = 1 (%) 72 ( 26.8) 63 ( 25.1) 9 ( 50.0) 0.042
## aptt_cate = 1 (%) 8 ( 3.0) 6 ( 2.4) 2 ( 11.1) 0.166
## Stratified by hosp_mortality
## SMD Missing
## n
## sex = F (%) 0.227 0.0
## barthel (%) 0.997 0.0
## 0
## 1
## 2
## child_score (%) 0.856 0.0
## 0
## 1
## 2
## pad = 1 (%) 0.089 0.0
## stroke = 1 (%) 0.215 0.0
## dimentia = 1 (%) 0.156 0.0
## ch_lung = 1 (%) 0.127 0.0
## rheumati = 0 (%) <0.001 0.0
## pept_ulcer = 1 (%) 0.228 0.0
## dm = 1 (%) 0.149 0.0
## dm_compli = 1 (%) 0.307 0.0
## paralysis = 0 (%) <0.001 0.0
## malignancy = 1 (%) 0.300 0.0
## meta_tumor = 1 (%) 0.473 0.0
## aids = 0 (%) <0.001 0.0
## eGFR30 = 1 (%) 0.764 0.0
## hd = 1 (%) 0.564 0.0
## hcc = 1 (%) 0.251 0.0
## alcohol = 1 (%) 0.257 0.0
## past_rupture = 1 (%) 0.290 0.0
## antiplate = 1 (%) 0.307 0.0
## anticoag = 1 (%) 0.127 0.0
## antithro = 1 (%) 0.243 0.0
## nsaids = 1 (%) 0.127 0.0
## steroid = 1 (%) 0.127 0.0
## beta = 1 (%) 0.470 0.0
## vaso = 1 (%) 0.218 0.0
## ffp = 1 (%) 0.492 0.0
## pc = 1 (%) 0.215 0.0
## albner = 1 (%) 0.239 0.0
## shock = 1 (%) 1.752 0.0
## hosp_mortality = 1 (%) NaN 0.0
## age_cate = 1 (%) 0.076 0.0
## bmi_cate = 1 (%) 0.173 0.0
## gcs_cate = 1 (%) 0.215 0.0
## cci_cate = 1 (%) 0.381 0.0
## bt_cate = 1 (%) 0.011 0.0
## sBP_cate = 1 (%) 1.161 0.0
## dBP_cate = 1 (%) 0.935 0.0
## hr_cate = 1 (%) 0.484 0.0
## bil_cate = 1 (%) 0.578 0.0
## ast_cate = 1 (%) 0.369 0.0
## alt_cate = 1 (%) 0.344 0.0
## wbc_cate = 1 (%) 0.317 0.0
## hb_cate = 1 (%) 0.576 0.0
## plt_cate = 1 (%) 0.004 0.0
## tp_cate = 1 (%) 0.558 0.0
## alb_cate = 1 (%) 0.694 0.0
## cre_cate = 1 (%) 0.926 0.0
## crp_cate = 1 (%) 0.632 0.0
## pt_cate = 1 (%) 0.532 0.0
## aptt_cate = 1 (%) 0.353 0.0
tbl_summary(data = completedata_val,
by = "hosp_mortality",
type = list(gcs ~ "continuous", year ~ "categorical"),
statistic = all_continuous() ~ "{median} ({p25}, {p75})",
digits = all_continuous() ~ c(2, 2))
| Characteristic | 0, N = 2511 | 1, N = 181 |
|---|---|---|
| hosp_id | 1,017.00 (1,004.00, 1,024.00) | 1,015.50 (1,003.00, 1,023.50) |
| pt_id | 442.00 (207.00, 625.50) | 428.00 (136.50, 544.50) |
| hosp_num | ||
| 1 | 206 (82%) | 14 (78%) |
| 2 | 27 (11%) | 2 (11%) |
| 3 | 8 (3.2%) | 1 (5.6%) |
| 4 | 6 (2.4%) | 0 (0%) |
| 5 | 3 (1.2%) | 1 (5.6%) |
| 6 | 1 (0.4%) | 0 (0%) |
| year | ||
| 2017 | 25 (10.0%) | 2 (11%) |
| 2018 | 36 (14%) | 4 (22%) |
| 2019 | 44 (18%) | 4 (22%) |
| 2020 | 49 (20%) | 3 (17%) |
| 2021 | 48 (19%) | 2 (11%) |
| 2022 | 49 (20%) | 3 (17%) |
| age | 59.00 (50.00, 71.00) | 56.50 (50.00, 70.00) |
| sex | ||
| M | 186 (74%) | 15 (83%) |
| F | 65 (26%) | 3 (17%) |
| bmi | 22.89 (20.59, 26.10) | 23.23 (21.96, 28.67) |
| smoke | 0.00 (0.00, 380.00) | 0.00 (0.00, 277.50) |
| barthel | ||
| 0 | 103 (41%) | 1 (5.6%) |
| 1 | 79 (31%) | 6 (33%) |
| 2 | 69 (27%) | 11 (61%) |
| child_num | 8.00 (7.00, 9.00) | 11.00 (9.00, 11.00) |
| child_score | ||
| 0 | 54 (22%) | 2 (11%) |
| 1 | 141 (56%) | 5 (28%) |
| 2 | 56 (22%) | 11 (61%) |
| gcs | 15.00 (15.00, 15.00) | 15.00 (13.00, 15.00) |
| cci_num | ||
| 3 | 43 (17%) | 2 (11%) |
| 4 | 115 (46%) | 9 (50%) |
| 5 | 61 (24%) | 2 (11%) |
| 6 | 20 (8.0%) | 1 (5.6%) |
| 7 | 9 (3.6%) | 2 (11%) |
| 8 | 2 (0.8%) | 0 (0%) |
| 11 | 0 (0%) | 2 (11%) |
| 13 | 1 (0.4%) | 0 (0%) |
| pad | ||
| 0 | 250 (100%) | 18 (100%) |
| 1 | 1 (0.4%) | 0 (0%) |
| stroke | ||
| 0 | 247 (98%) | 17 (94%) |
| 1 | 4 (1.6%) | 1 (5.6%) |
| dimentia | ||
| 0 | 248 (99%) | 18 (100%) |
| 1 | 3 (1.2%) | 0 (0%) |
| ch_lung | ||
| 0 | 249 (99%) | 18 (100%) |
| 1 | 2 (0.8%) | 0 (0%) |
| rheumati | ||
| 0 | 251 (100%) | 18 (100%) |
| 1 | 0 (0%) | 0 (0%) |
| pept_ulcer | ||
| 0 | 221 (88%) | 17 (94%) |
| 1 | 30 (12%) | 1 (5.6%) |
| dm | ||
| 0 | 179 (71%) | 14 (78%) |
| 1 | 72 (29%) | 4 (22%) |
| dm_compli | ||
| 0 | 250 (100%) | 17 (94%) |
| 1 | 1 (0.4%) | 1 (5.6%) |
| paralysis | ||
| 0 | 251 (100%) | 18 (100%) |
| malignancy | ||
| 0 | 223 (89%) | 14 (78%) |
| 1 | 28 (11%) | 4 (22%) |
| meta_tumor | ||
| 0 | 250 (100%) | 16 (89%) |
| 1 | 1 (0.4%) | 2 (11%) |
| aids | ||
| 0 | 251 (100%) | 18 (100%) |
| eGFR30 | ||
| 0 | 238 (95%) | 12 (67%) |
| 1 | 13 (5.2%) | 6 (33%) |
| hd | ||
| 0 | 248 (99%) | 15 (83%) |
| 1 | 3 (1.2%) | 3 (17%) |
| hcc | ||
| 0 | 219 (87%) | 14 (78%) |
| 1 | 32 (13%) | 4 (22%) |
| alcohol | ||
| 0 | 115 (46%) | 6 (33%) |
| 1 | 136 (54%) | 12 (67%) |
| past_rupture | ||
| 0 | 179 (71%) | 15 (83%) |
| 1 | 72 (29%) | 3 (17%) |
| antiplate | ||
| 0 | 250 (100%) | 17 (94%) |
| 1 | 1 (0.4%) | 1 (5.6%) |
| anticoag | ||
| 0 | 249 (99%) | 18 (100%) |
| 1 | 2 (0.8%) | 0 (0%) |
| antithro | ||
| 0 | 248 (99%) | 17 (94%) |
| 1 | 3 (1.2%) | 1 (5.6%) |
| nsaids | ||
| 0 | 249 (99%) | 18 (100%) |
| 1 | 2 (0.8%) | 0 (0%) |
| steroid | ||
| 0 | 249 (99%) | 18 (100%) |
| 1 | 2 (0.8%) | 0 (0%) |
| beta | ||
| 0 | 226 (90%) | 18 (100%) |
| 1 | 25 (10.0%) | 0 (0%) |
| vaso | ||
| 0 | 238 (95%) | 16 (89%) |
| 1 | 13 (5.2%) | 2 (11%) |
| map | 4.00 (0.00, 4.00) | 4.00 (2.50, 8.00) |
| ffp | ||
| 0 | 171 (68%) | 8 (44%) |
| 1 | 80 (32%) | 10 (56%) |
| pc | ||
| 0 | 247 (98%) | 17 (94%) |
| 1 | 4 (1.6%) | 1 (5.6%) |
| albner | ||
| 0 | 229 (91%) | 15 (83%) |
| 1 | 22 (8.8%) | 3 (17%) |
| bt | 36.80 (36.50, 37.10) | 36.65 (36.40, 37.70) |
| sBP | 92.00 (82.00, 101.00) | 70.00 (57.00, 80.00) |
| dBP | 55.00 (47.50, 64.00) | 43.50 (37.25, 49.50) |
| hr | 84.00 (73.50, 100.00) | 102.50 (90.25, 122.75) |
| shock | ||
| 0 | 152 (61%) | 0 (0%) |
| 1 | 99 (39%) | 18 (100%) |
| bil | 1.60 (0.91, 2.50) | 2.86 (1.19, 6.91) |
| ast | 46.00 (31.00, 87.00) | 58.00 (26.25, 97.25) |
| alt | 28.00 (19.00, 44.00) | 26.50 (17.00, 37.75) |
| wbc | 8,190.00 (5,935.00, 11,080.00) | 9,950.00 (7,775.00, 12,867.50) |
| hb | 9.00 (7.50, 10.50) | 7.50 (6.58, 8.62) |
| plt | 104.00 (79.00, 143.50) | 109.00 (60.75, 148.00) |
| tp | 6.20 (5.70, 6.70) | 5.55 (4.78, 6.22) |
| alb | 3.00 (2.50, 3.40) | 2.30 (1.80, 2.80) |
| eGFR | 69.52 (53.96, 90.50) | 39.87 (29.41, 55.90) |
| bun | 24.60 (16.85, 36.40) | 34.65 (22.42, 43.47) |
| cre | 0.81 (0.65, 1.06) | 1.45 (1.05, 1.94) |
| crp | 0.24 (0.11, 0.68) | 0.86 (0.15, 3.66) |
| pt | 61.30 (49.50, 72.50) | 50.00 (33.58, 61.48) |
| aptt | 31.60 (29.30, 35.45) | 36.35 (31.50, 43.95) |
| meld | 9.00 (6.00, 14.00) | 17.50 (13.25, 23.75) |
| los | 8.00 (5.00, 13.00) | 5.50 (2.25, 10.00) |
| cohort | ||
| develop | 0 (0%) | 0 (0%) |
| validation | 251 (100%) | 18 (100%) |
| age_cate | 121 (48%) | 8 (44%) |
| bmi_cate | 77 (31%) | 7 (39%) |
| gcs_cate | 4 (1.6%) | 1 (5.6%) |
| cci_cate | 32 (13%) | 5 (28%) |
| bt_cate | 85 (34%) | 6 (33%) |
| sBP_cate | 43 (17%) | 12 (67%) |
| dBP_cate | 75 (30%) | 13 (72%) |
| hr_cate | 68 (27%) | 9 (50%) |
| bil_cate | 17 (6.8%) | 5 (28%) |
| ast_cate | 16 (6.4%) | 0 (0%) |
| alt_cate | 14 (5.6%) | 0 (0%) |
| wbc_cate | 49 (20%) | 6 (33%) |
| hb_cate | 84 (33%) | 11 (61%) |
| plt_cate | 112 (45%) | 8 (44%) |
| tp_cate | 100 (40%) | 12 (67%) |
| alb_cate | 85 (34%) | 12 (67%) |
| cre_cate | 19 (7.6%) | 8 (44%) |
| crp_cate | 22 (8.8%) | 6 (33%) |
| pt_cate | 63 (25%) | 9 (50%) |
| aptt_cate | 6 (2.4%) | 2 (11%) |
| 1 Median (IQR); n (%) | ||
###準備
# vari_cat.f (defined as above) in the data_original is changed to factor type and set into data_factor_for_imp
data_factor_for_imp_val <- as.data.frame(lapply(df_val[vari_cat.f],as.factor))
#check all the variable is factor
str(data_factor_for_imp_val)
## 'data.frame': 444 obs. of 32 variables:
## $ sex : Factor w/ 2 levels "M","F": 1 2 1 1 1 2 1 2 2 1 ...
## $ barthel : Factor w/ 3 levels "0","1","2": NA 3 2 1 3 1 2 1 1 3 ...
## $ child_score : Factor w/ 3 levels "0","1","2": 3 2 2 2 2 1 2 2 2 3 ...
## $ pad : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ stroke : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ dimentia : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ ch_lung : Factor w/ 2 levels "0","1": 1 2 1 1 1 1 1 1 1 1 ...
## $ rheumati : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ pept_ulcer : Factor w/ 2 levels "0","1": 1 1 1 1 1 2 1 1 1 1 ...
## $ dm : Factor w/ 2 levels "0","1": 1 1 2 1 2 1 1 2 2 2 ...
## $ dm_compli : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ paralysis : Factor w/ 1 level "0": 1 1 1 1 1 1 1 1 1 1 ...
## $ malignancy : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ meta_tumor : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ aids : Factor w/ 1 level "0": 1 1 1 1 1 1 1 1 1 1 ...
## $ eGFR30 : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ hd : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ hcc : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ alcohol : Factor w/ 2 levels "0","1": 2 1 1 1 1 1 1 1 2 1 ...
## $ past_rupture : Factor w/ 2 levels "0","1": 1 1 1 1 1 2 1 1 1 2 ...
## $ antiplate : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ anticoag : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ antithro : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ nsaids : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ steroid : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 2 ...
## $ beta : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ vaso : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ ffp : Factor w/ 2 levels "0","1": 1 2 1 1 1 1 1 1 1 2 ...
## $ pc : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ albner : Factor w/ 2 levels "0","1": 1 2 1 1 2 1 1 1 1 1 ...
## $ shock : Factor w/ 2 levels "0","1": 1 NA 1 1 1 1 1 1 1 1 ...
## $ hosp_mortality: Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
#summary numeric type
data_numeric_for_imp_val <- as.data.frame(lapply(df_val[vari_numeric],as.numeric))
# check all the variable is numeric
str(data_numeric_for_imp_val)
## 'data.frame': 444 obs. of 27 variables:
## $ age : num 47 62 82 57 69 47 50 68 75 53 ...
## $ bmi : num 21 20.8 26.9 23.2 23.6 ...
## $ smoke : num 270 0 0 0 0 135 NA 0 0 0 ...
## $ child_num: num 11 9 8 8 8 6 7 7 8 14 ...
## $ gcs : num 15 13 15 15 15 15 15 15 15 3 ...
## $ cci_num : num 4 4 4 3 6 5 4 4 5 5 ...
## $ map : num 0 8 6 2 4 0 2 4 4 4 ...
## $ bt : num 36.4 NA 36.9 37.1 37.2 36.1 36.5 37 37 36 ...
## $ sBP : num 102 NA 95 111 125 116 102 96 97 71 ...
## $ dBP : num 67 NA 49 68 70 84 88 55 54 49 ...
## $ hr : num 83 NA 60 101 121 56 79 72 75 56 ...
## $ bil : num 7.5 0.5 0.91 2.35 2.49 0.76 1.84 2.18 1.13 3.88 ...
## $ ast : num 112 56 37 42 74 18 19 24 31 62 ...
## $ alt : num 53 30 19 35 43 13 18 16 25 44 ...
## $ wbc : num 9800 14600 3350 12030 6800 ...
## $ hb : num 12.2 3.7 7.7 9.8 11.2 10.7 6.6 12.9 10.4 7.7 ...
## $ plt : num 69 437 109 107 107 60 140 84 152 178 ...
## $ tp : num 5.7 5.6 5.8 5.4 5.9 NA 5.9 6.1 7 6.8 ...
## $ alb : num 2.6 2.1 2.8 3.4 3 3.7 3.4 3.7 3.3 2.7 ...
## $ eGFR : num 89.3 65.8 46.6 94.2 90.7 ...
## $ bun : num 13.7 42.8 24.2 26.9 7.9 7.6 35 24.9 35.3 23.9 ...
## $ cre : num 0.74 0.69 1.16 0.67 0.66 0.54 0.97 0.59 0.66 1.43 ...
## $ crp : num 0.1 0.17 0.22 0.04 0.111 ...
## $ pt : num 28.5 51.3 60.4 54.8 54 80 70 68 78 69 ...
## $ aptt : num 37.8 30.9 30.2 33.4 34.5 NA 29.1 30.2 33.1 30.2 ...
## $ los : num 6 2 14 5 12 4 1 6 9 16 ...
## $ meld : num 20 6 10 9 10 6 10 7 6 17 ...
# combine the factor type and numeric type
data_for_imp_val <- cbind(data_factor_for_imp_val, data_numeric_for_imp_val)
#check all the variable type
str(data_for_imp_val)
## 'data.frame': 444 obs. of 59 variables:
## $ sex : Factor w/ 2 levels "M","F": 1 2 1 1 1 2 1 2 2 1 ...
## $ barthel : Factor w/ 3 levels "0","1","2": NA 3 2 1 3 1 2 1 1 3 ...
## $ child_score : Factor w/ 3 levels "0","1","2": 3 2 2 2 2 1 2 2 2 3 ...
## $ pad : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ stroke : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ dimentia : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ ch_lung : Factor w/ 2 levels "0","1": 1 2 1 1 1 1 1 1 1 1 ...
## $ rheumati : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ pept_ulcer : Factor w/ 2 levels "0","1": 1 1 1 1 1 2 1 1 1 1 ...
## $ dm : Factor w/ 2 levels "0","1": 1 1 2 1 2 1 1 2 2 2 ...
## $ dm_compli : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ paralysis : Factor w/ 1 level "0": 1 1 1 1 1 1 1 1 1 1 ...
## $ malignancy : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ meta_tumor : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ aids : Factor w/ 1 level "0": 1 1 1 1 1 1 1 1 1 1 ...
## $ eGFR30 : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ hd : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ hcc : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ alcohol : Factor w/ 2 levels "0","1": 2 1 1 1 1 1 1 1 2 1 ...
## $ past_rupture : Factor w/ 2 levels "0","1": 1 1 1 1 1 2 1 1 1 2 ...
## $ antiplate : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ anticoag : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ antithro : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ nsaids : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ steroid : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 2 ...
## $ beta : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ vaso : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ ffp : Factor w/ 2 levels "0","1": 1 2 1 1 1 1 1 1 1 2 ...
## $ pc : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ albner : Factor w/ 2 levels "0","1": 1 2 1 1 2 1 1 1 1 1 ...
## $ shock : Factor w/ 2 levels "0","1": 1 NA 1 1 1 1 1 1 1 1 ...
## $ hosp_mortality: Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ age : num 47 62 82 57 69 47 50 68 75 53 ...
## $ bmi : num 21 20.8 26.9 23.2 23.6 ...
## $ smoke : num 270 0 0 0 0 135 NA 0 0 0 ...
## $ child_num : num 11 9 8 8 8 6 7 7 8 14 ...
## $ gcs : num 15 13 15 15 15 15 15 15 15 3 ...
## $ cci_num : num 4 4 4 3 6 5 4 4 5 5 ...
## $ map : num 0 8 6 2 4 0 2 4 4 4 ...
## $ bt : num 36.4 NA 36.9 37.1 37.2 36.1 36.5 37 37 36 ...
## $ sBP : num 102 NA 95 111 125 116 102 96 97 71 ...
## $ dBP : num 67 NA 49 68 70 84 88 55 54 49 ...
## $ hr : num 83 NA 60 101 121 56 79 72 75 56 ...
## $ bil : num 7.5 0.5 0.91 2.35 2.49 0.76 1.84 2.18 1.13 3.88 ...
## $ ast : num 112 56 37 42 74 18 19 24 31 62 ...
## $ alt : num 53 30 19 35 43 13 18 16 25 44 ...
## $ wbc : num 9800 14600 3350 12030 6800 ...
## $ hb : num 12.2 3.7 7.7 9.8 11.2 10.7 6.6 12.9 10.4 7.7 ...
## $ plt : num 69 437 109 107 107 60 140 84 152 178 ...
## $ tp : num 5.7 5.6 5.8 5.4 5.9 NA 5.9 6.1 7 6.8 ...
## $ alb : num 2.6 2.1 2.8 3.4 3 3.7 3.4 3.7 3.3 2.7 ...
## $ eGFR : num 89.3 65.8 46.6 94.2 90.7 ...
## $ bun : num 13.7 42.8 24.2 26.9 7.9 7.6 35 24.9 35.3 23.9 ...
## $ cre : num 0.74 0.69 1.16 0.67 0.66 0.54 0.97 0.59 0.66 1.43 ...
## $ crp : num 0.1 0.17 0.22 0.04 0.111 ...
## $ pt : num 28.5 51.3 60.4 54.8 54 80 70 68 78 69 ...
## $ aptt : num 37.8 30.9 30.2 33.4 34.5 NA 29.1 30.2 33.1 30.2 ...
## $ los : num 6 2 14 5 12 4 1 6 9 16 ...
## $ meld : num 20 6 10 9 10 6 10 7 6 17 ...
###実行
cores <- detectCores(logical = FALSE) ###並列化処理
registerDoParallel(cores = cores) ###並列化処理
set.seed(2023)
md.pattern(data_for_imp_val) #see patern the missing
## sex pad stroke dimentia ch_lung rheumati pept_ulcer dm dm_compli paralysis
## 269 1 1 1 1 1 1 1 1 1 1
## 13 1 1 1 1 1 1 1 1 1 1
## 9 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1
## 14 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1
## 20 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1
## 7 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1
## 7 1 1 1 1 1 1 1 1 1 1
## 8 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1
## 13 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1
## 12 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1
## 17 1 1 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1
## 8 1 1 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1
## 5 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1
## 6 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1
## 0 0 0 0 0 0 0 0 0 0
## malignancy meta_tumor aids hd hcc alcohol past_rupture antiplate anticoag
## 269 1 1 1 1 1 1 1 1 1
## 13 1 1 1 1 1 1 1 1 1
## 9 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1
## 14 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1
## 20 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1
## 7 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1
## 7 1 1 1 1 1 1 1 1 1
## 8 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1
## 13 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1
## 12 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1
## 17 1 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1
## 8 1 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1
## 5 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1
## 6 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1
## 0 0 0 0 0 0 0 0 0
## antithro nsaids steroid beta vaso ffp pc albner hosp_mortality age gcs
## 269 1 1 1 1 1 1 1 1 1 1 1
## 13 1 1 1 1 1 1 1 1 1 1 1
## 9 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1
## 14 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1
## 20 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1
## 7 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1
## 7 1 1 1 1 1 1 1 1 1 1 1
## 8 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1
## 13 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1
## 12 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1
## 17 1 1 1 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1
## 8 1 1 1 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1
## 5 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1
## 6 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1
## 0 0 0 0 0 0 0 0 0 0 0
## cci_num map los sBP dBP shock hr bt eGFR30 ast alt wbc hb plt eGFR bun bil
## 269 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 13 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 9 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 14 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 20 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 7 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 7 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 8 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 13 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 12 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 17 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 8 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 5 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0
## 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0
## 6 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0
## 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0
## 2 1 1 1 1 1 1 1 0 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 0 0 1 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 0 0 0 1 1 1 1 1 1 1 1 1
## 1 1 1 1 1 1 0 0 0 1 1 1 1 1 1 1 1 1
## 1 1 1 1 0 0 0 0 0 1 1 1 1 1 1 1 1 1
## 0 0 0 1 1 4 4 5 7 7 7 7 7 7 7 7 9
## cre alb crp pt barthel meld bmi tp child_score smoke child_num aptt
## 269 1 1 1 1 1 1 1 1 1 1 1 1 0
## 13 1 1 1 1 1 1 1 1 1 1 1 0 1
## 9 1 1 1 1 1 1 1 1 1 1 0 1 1
## 1 1 1 1 1 1 1 1 1 1 1 0 0 2
## 14 1 1 1 1 1 1 1 1 1 0 1 1 1
## 1 1 1 1 1 1 1 1 1 1 0 1 0 2
## 1 1 1 1 1 1 1 1 1 1 0 0 1 2
## 20 1 1 1 1 1 1 1 1 0 1 0 1 2
## 1 1 1 1 1 1 1 1 1 0 1 0 0 3
## 7 1 1 1 1 1 1 1 1 0 0 0 1 3
## 1 1 1 1 1 1 1 1 1 0 0 0 0 4
## 7 1 1 1 1 1 1 1 0 1 1 1 1 1
## 8 1 1 1 1 1 1 1 0 1 1 1 0 2
## 1 1 1 1 1 1 1 1 0 1 1 0 1 2
## 13 1 1 1 1 1 1 0 1 1 1 1 1 1
## 1 1 1 1 1 1 1 0 1 1 1 1 0 2
## 12 1 1 1 1 1 1 0 1 1 0 1 1 2
## 1 1 1 1 1 1 1 0 0 1 1 1 0 3
## 1 1 1 1 1 1 0 1 1 1 1 1 1 1
## 17 1 1 1 1 0 1 1 1 1 1 1 1 1
## 2 1 1 1 1 0 1 1 1 1 1 1 0 2
## 1 1 1 1 1 0 1 1 1 1 0 1 1 2
## 1 1 1 1 1 0 1 1 1 1 0 0 1 3
## 1 1 1 1 1 0 1 1 0 1 1 1 1 2
## 1 1 1 1 1 0 1 0 1 1 1 1 1 2
## 8 1 1 1 0 1 0 1 1 1 1 1 0 3
## 2 1 1 1 0 1 0 1 1 0 1 0 0 5
## 1 1 1 1 0 1 0 1 1 0 0 0 0 6
## 1 1 1 1 0 1 0 1 0 1 1 1 0 4
## 1 1 1 1 0 1 0 1 0 1 0 1 0 5
## 5 1 1 0 1 1 1 1 1 1 1 1 1 1
## 1 1 0 1 1 1 1 1 1 0 1 0 1 3
## 2 1 0 1 1 1 1 1 0 1 1 1 1 2
## 1 1 0 0 1 1 1 1 0 1 1 1 1 3
## 2 0 1 1 1 1 0 1 1 1 1 1 1 2
## 1 0 1 0 1 1 0 1 1 0 1 0 1 5
## 1 1 1 0 1 1 0 1 1 1 0 1 1 4
## 1 1 0 1 1 1 0 1 0 1 1 1 1 4
## 6 0 0 0 0 1 0 1 0 1 1 1 0 16
## 1 0 0 0 0 1 0 1 0 0 1 0 0 18
## 2 1 1 1 1 1 1 0 1 1 1 1 1 2
## 1 1 1 1 1 0 1 1 1 1 1 1 1 3
## 1 1 1 1 1 1 1 1 1 1 0 1 1 4
## 1 1 1 1 1 1 1 1 0 1 1 1 1 4
## 1 1 1 1 1 1 1 1 1 1 1 1 1 5
## 10 12 15 20 24 26 30 32 35 42 48 49 423
imp.mf_val <- missForest(data_for_imp_val,
maxiter = 10,
ntree = 100,
mtry = floor(sqrt(ncol(data_for_imp_val))),
parallelize = "variables",
verbose = TRUE)
## parallelizing over the variables of the input data matrix 'xmis'
## missForest iteration 1 in progress...done!
## estimated error(s): 0.453138 0.02131973
## difference(s): 0.00009726191 0.001618806
## time: 0.915 seconds
##
## missForest iteration 2 in progress...done!
## estimated error(s): 0.4419382 0.0192313
## difference(s): 0.0001107889 0.0004222973
## time: 0.914 seconds
##
## missForest iteration 3 in progress...done!
## estimated error(s): 0.4409547 0.0197921
## difference(s): 0.0000704322 0.0005630631
## time: 0.91 seconds
##
## missForest iteration 4 in progress...done!
## estimated error(s): 0.4446167 0.02060827
## difference(s): 0.00007781678 0.0004222973
## time: 0.972 seconds
##
## missForest iteration 5 in progress...done!
## estimated error(s): 0.4480345 0.01957716
## difference(s): 0.00005413141 0.0005630631
## time: 0.911 seconds
##
## missForest iteration 6 in progress...done!
## estimated error(s): 0.4424111 0.01935843
## difference(s): 0.00005504244 0.0003519144
## time: 0.926 seconds
##
## missForest iteration 7 in progress...done!
## estimated error(s): 0.44556 0.01935092
## difference(s): 0.00008066198 0.0004222973
## time: 1.002 seconds
summary(imp.mf_val$ximp)
## sex barthel child_score pad stroke dimentia ch_lung rheumati
## M:329 0:174 0: 83 0:443 0:433 0:438 0:440 0:444
## F:115 1:141 1:240 1: 1 1: 11 1: 6 1: 4 1: 0
## 2:129 2:121
##
##
##
## pept_ulcer dm dm_compli paralysis malignancy meta_tumor aids eGFR30
## 0:402 0:340 0:437 0:444 0:387 0:433 0:444 0:408
## 1: 42 1:104 1: 7 1: 57 1: 11 1: 36
##
##
##
##
## hd hcc alcohol past_rupture antiplate anticoag antithro nsaids
## 0:434 0:380 0:203 0:323 0:441 0:441 0:438 0:440
## 1: 10 1: 64 1:241 1:121 1: 3 1: 3 1: 6 1: 4
##
##
##
##
## steroid beta vaso ffp pc albner shock hosp_mortality
## 0:441 0:404 0:417 0:290 0:433 0:411 0:248 0:399
## 1: 3 1: 40 1: 27 1:154 1: 11 1: 33 1:196 1: 45
##
##
##
##
## age bmi smoke child_num
## Min. :24.00 Min. : 15.28 Min. : 0.0 Min. : 5.000
## 1st Qu.:50.00 1st Qu.: 20.76 1st Qu.: 0.0 1st Qu.: 7.000
## Median :60.00 Median : 23.11 Median : 0.0 Median : 8.000
## Mean :60.42 Mean : 24.37 Mean : 228.5 Mean : 8.426
## 3rd Qu.:70.00 3rd Qu.: 25.99 3rd Qu.: 320.0 3rd Qu.:10.000
## Max. :93.00 Max. :339.76 Max. :2100.0 Max. :14.000
## gcs cci_num map bt
## Min. : 3.00 Min. : 3.000 Min. : 0.000 Min. :33.90
## 1st Qu.:15.00 1st Qu.: 4.000 1st Qu.: 0.000 1st Qu.:36.50
## Median :15.00 Median : 4.000 Median : 4.000 Median :36.80
## Mean :14.61 Mean : 4.505 Mean : 3.214 Mean :36.81
## 3rd Qu.:15.00 3rd Qu.: 5.000 3rd Qu.: 4.000 3rd Qu.:37.10
## Max. :15.00 Max. :13.000 Max. :20.000 Max. :40.20
## sBP dBP hr bil
## Min. : 50.00 Min. :22.00 Min. : 41.00 Min. : 0.200
## 1st Qu.: 80.00 1st Qu.:46.00 1st Qu.: 73.00 1st Qu.: 1.000
## Median : 91.08 Median :54.00 Median : 85.00 Median : 1.660
## Mean : 89.96 Mean :54.84 Mean : 88.46 Mean : 2.415
## 3rd Qu.:100.00 3rd Qu.:63.00 3rd Qu.:101.00 3rd Qu.: 2.993
## Max. :149.00 Max. :92.00 Max. :160.00 Max. :16.180
## ast alt wbc hb
## Min. : 13.00 Min. : 7.00 Min. : 2100 Min. : 2.100
## 1st Qu.: 32.00 1st Qu.: 19.00 1st Qu.: 5800 1st Qu.: 7.200
## Median : 50.00 Median : 29.00 Median : 8215 Median : 8.800
## Mean : 82.03 Mean : 39.96 Mean : 9046 Mean : 8.952
## 3rd Qu.: 88.25 3rd Qu.: 44.00 3rd Qu.:11070 3rd Qu.:10.500
## Max. :1122.00 Max. :462.00 Max. :58400 Max. :16.500
## plt tp alb eGFR
## Min. : 16.00 Min. :3.000 Min. :1.400 Min. : 5.057
## 1st Qu.: 75.75 1st Qu.:5.600 1st Qu.:2.500 1st Qu.: 49.568
## Median : 103.00 Median :6.100 Median :2.900 Median : 67.749
## Mean : 117.56 Mean :6.149 Mean :2.911 Mean : 70.869
## 3rd Qu.: 140.25 3rd Qu.:6.700 3rd Qu.:3.400 3rd Qu.: 90.404
## Max. :1073.00 Max. :9.000 Max. :4.900 Max. :186.229
## bun cre crp pt
## Min. : 3.50 Min. :0.300 Min. : 0.0000 Min. : 9.99
## 1st Qu.: 16.48 1st Qu.:0.650 1st Qu.: 0.1195 1st Qu.: 46.20
## Median : 24.90 Median :0.820 Median : 0.2890 Median : 59.41
## Mean : 28.51 Mean :1.046 Mean : 0.8568 Mean : 59.11
## 3rd Qu.: 36.40 3rd Qu.:1.110 3rd Qu.: 0.7615 3rd Qu.: 72.00
## Max. :107.80 Max. :9.080 Max. :18.9370 Max. :100.20
## aptt los meld
## Min. : 18.90 Min. : 0.00 Min. : 6.00
## 1st Qu.: 29.84 1st Qu.: 5.00 1st Qu.: 6.00
## Median : 32.40 Median : 7.00 Median :10.00
## Mean : 35.85 Mean : 10.67 Mean :11.57
## 3rd Qu.: 37.18 3rd Qu.: 13.00 3rd Qu.:15.00
## Max. :200.01 Max. :217.00 Max. :40.00
md.pattern(imp.mf_val$ximp)
## /\ /\
## { `---' }
## { O O }
## ==> V <== No need for mice. This data set is completely observed.
## \ \|/ /
## `-----'
## sex barthel child_score pad stroke dimentia ch_lung rheumati pept_ulcer dm
## 444 1 1 1 1 1 1 1 1 1 1
## 0 0 0 0 0 0 0 0 0 0
## dm_compli paralysis malignancy meta_tumor aids eGFR30 hd hcc alcohol
## 444 1 1 1 1 1 1 1 1 1
## 0 0 0 0 0 0 0 0 0
## past_rupture antiplate anticoag antithro nsaids steroid beta vaso ffp pc
## 444 1 1 1 1 1 1 1 1 1 1
## 0 0 0 0 0 0 0 0 0 0
## albner shock hosp_mortality age bmi smoke child_num gcs cci_num map bt sBP
## 444 1 1 1 1 1 1 1 1 1 1 1 1
## 0 0 0 0 0 0 0 0 0 0 0 0
## dBP hr bil ast alt wbc hb plt tp alb eGFR bun cre crp pt aptt los meld
## 444 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0
## 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
#if exclude ID is necessary add the ID
data_imp_val <- imp.mf_val$ximp
data_imp_val$pt_id <- df_val$pt_id
data_imp_val$hosp_num<- df_val$hosp_num
data_imp_val$hosp_id<- df_val$hosp_id
data_imp_val$year<- df_val$year
#check
str(data_imp_val)
## 'data.frame': 444 obs. of 63 variables:
## $ sex : Factor w/ 2 levels "M","F": 1 2 1 1 1 2 1 2 2 1 ...
## $ barthel : Factor w/ 3 levels "0","1","2": 2 3 2 1 3 1 2 1 1 3 ...
## $ child_score : Factor w/ 3 levels "0","1","2": 3 2 2 2 2 1 2 2 2 3 ...
## $ pad : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ stroke : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ dimentia : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ ch_lung : Factor w/ 2 levels "0","1": 1 2 1 1 1 1 1 1 1 1 ...
## $ rheumati : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ pept_ulcer : Factor w/ 2 levels "0","1": 1 1 1 1 1 2 1 1 1 1 ...
## $ dm : Factor w/ 2 levels "0","1": 1 1 2 1 2 1 1 2 2 2 ...
## $ dm_compli : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ paralysis : Factor w/ 1 level "0": 1 1 1 1 1 1 1 1 1 1 ...
## $ malignancy : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ meta_tumor : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ aids : Factor w/ 1 level "0": 1 1 1 1 1 1 1 1 1 1 ...
## $ eGFR30 : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ hd : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ hcc : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ alcohol : Factor w/ 2 levels "0","1": 2 1 1 1 1 1 1 1 2 1 ...
## $ past_rupture : Factor w/ 2 levels "0","1": 1 1 1 1 1 2 1 1 1 2 ...
## $ antiplate : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ anticoag : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ antithro : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ nsaids : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ steroid : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 2 ...
## $ beta : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ vaso : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ ffp : Factor w/ 2 levels "0","1": 1 2 1 1 1 1 1 1 1 2 ...
## $ pc : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ albner : Factor w/ 2 levels "0","1": 1 2 1 1 2 1 1 1 1 1 ...
## $ shock : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ hosp_mortality: Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ age : num 47 62 82 57 69 47 50 68 75 53 ...
## $ bmi : num 21 20.8 26.9 23.2 23.6 ...
## $ smoke : num 270 0 0 0 0 ...
## $ child_num : num 11 9 8 8 8 6 7 7 8 14 ...
## $ gcs : num 15 13 15 15 15 15 15 15 15 3 ...
## $ cci_num : num 4 4 4 3 6 5 4 4 5 5 ...
## $ map : num 0 8 6 2 4 0 2 4 4 4 ...
## $ bt : num 36.4 36.8 36.9 37.1 37.2 ...
## $ sBP : num 102 91.2 95 111 125 ...
## $ dBP : num 67 54.3 49 68 70 ...
## $ hr : num 83 82.3 60 101 121 ...
## $ bil : num 7.5 0.5 0.91 2.35 2.49 0.76 1.84 2.18 1.13 3.88 ...
## $ ast : num 112 56 37 42 74 18 19 24 31 62 ...
## $ alt : num 53 30 19 35 43 13 18 16 25 44 ...
## $ wbc : num 9800 14600 3350 12030 6800 ...
## $ hb : num 12.2 3.7 7.7 9.8 11.2 10.7 6.6 12.9 10.4 7.7 ...
## $ plt : num 69 437 109 107 107 60 140 84 152 178 ...
## $ tp : num 5.7 5.6 5.8 5.4 5.9 ...
## $ alb : num 2.6 2.1 2.8 3.4 3 3.7 3.4 3.7 3.3 2.7 ...
## $ eGFR : num 89.3 65.8 46.6 94.2 90.7 ...
## $ bun : num 13.7 42.8 24.2 26.9 7.9 7.6 35 24.9 35.3 23.9 ...
## $ cre : num 0.74 0.69 1.16 0.67 0.66 0.54 0.97 0.59 0.66 1.43 ...
## $ crp : num 0.1 0.17 0.22 0.04 0.111 ...
## $ pt : num 28.5 51.3 60.4 54.8 54 80 70 68 78 69 ...
## $ aptt : num 37.8 30.9 30.2 33.4 34.5 ...
## $ los : num 6 2 14 5 12 4 1 6 9 16 ...
## $ meld : num 20 6 10 9 10 6 10 7 6 17 ...
## $ pt_id : int 6 17 24 32 33 37 38 40 42 44 ...
## $ hosp_num : int 1 1 1 1 1 1 1 1 1 1 ...
## $ hosp_id : int 1001 1001 1002 1002 1003 1003 1003 1003 1003 1003 ...
## $ year : int 2017 2019 2022 2022 2022 2017 2022 2020 2021 2022 ...
#write the csv
#write.csv(imp.mf$ximp, file = "data_after_imputation.csv")
str(data_imp_val)
## 'data.frame': 444 obs. of 63 variables:
## $ sex : Factor w/ 2 levels "M","F": 1 2 1 1 1 2 1 2 2 1 ...
## $ barthel : Factor w/ 3 levels "0","1","2": 2 3 2 1 3 1 2 1 1 3 ...
## $ child_score : Factor w/ 3 levels "0","1","2": 3 2 2 2 2 1 2 2 2 3 ...
## $ pad : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ stroke : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ dimentia : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ ch_lung : Factor w/ 2 levels "0","1": 1 2 1 1 1 1 1 1 1 1 ...
## $ rheumati : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ pept_ulcer : Factor w/ 2 levels "0","1": 1 1 1 1 1 2 1 1 1 1 ...
## $ dm : Factor w/ 2 levels "0","1": 1 1 2 1 2 1 1 2 2 2 ...
## $ dm_compli : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ paralysis : Factor w/ 1 level "0": 1 1 1 1 1 1 1 1 1 1 ...
## $ malignancy : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ meta_tumor : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ aids : Factor w/ 1 level "0": 1 1 1 1 1 1 1 1 1 1 ...
## $ eGFR30 : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ hd : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ hcc : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ alcohol : Factor w/ 2 levels "0","1": 2 1 1 1 1 1 1 1 2 1 ...
## $ past_rupture : Factor w/ 2 levels "0","1": 1 1 1 1 1 2 1 1 1 2 ...
## $ antiplate : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ anticoag : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ antithro : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ nsaids : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ steroid : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 2 ...
## $ beta : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ vaso : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ ffp : Factor w/ 2 levels "0","1": 1 2 1 1 1 1 1 1 1 2 ...
## $ pc : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ albner : Factor w/ 2 levels "0","1": 1 2 1 1 2 1 1 1 1 1 ...
## $ shock : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ hosp_mortality: Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
## $ age : num 47 62 82 57 69 47 50 68 75 53 ...
## $ bmi : num 21 20.8 26.9 23.2 23.6 ...
## $ smoke : num 270 0 0 0 0 ...
## $ child_num : num 11 9 8 8 8 6 7 7 8 14 ...
## $ gcs : num 15 13 15 15 15 15 15 15 15 3 ...
## $ cci_num : num 4 4 4 3 6 5 4 4 5 5 ...
## $ map : num 0 8 6 2 4 0 2 4 4 4 ...
## $ bt : num 36.4 36.8 36.9 37.1 37.2 ...
## $ sBP : num 102 91.2 95 111 125 ...
## $ dBP : num 67 54.3 49 68 70 ...
## $ hr : num 83 82.3 60 101 121 ...
## $ bil : num 7.5 0.5 0.91 2.35 2.49 0.76 1.84 2.18 1.13 3.88 ...
## $ ast : num 112 56 37 42 74 18 19 24 31 62 ...
## $ alt : num 53 30 19 35 43 13 18 16 25 44 ...
## $ wbc : num 9800 14600 3350 12030 6800 ...
## $ hb : num 12.2 3.7 7.7 9.8 11.2 10.7 6.6 12.9 10.4 7.7 ...
## $ plt : num 69 437 109 107 107 60 140 84 152 178 ...
## $ tp : num 5.7 5.6 5.8 5.4 5.9 ...
## $ alb : num 2.6 2.1 2.8 3.4 3 3.7 3.4 3.7 3.3 2.7 ...
## $ eGFR : num 89.3 65.8 46.6 94.2 90.7 ...
## $ bun : num 13.7 42.8 24.2 26.9 7.9 7.6 35 24.9 35.3 23.9 ...
## $ cre : num 0.74 0.69 1.16 0.67 0.66 0.54 0.97 0.59 0.66 1.43 ...
## $ crp : num 0.1 0.17 0.22 0.04 0.111 ...
## $ pt : num 28.5 51.3 60.4 54.8 54 80 70 68 78 69 ...
## $ aptt : num 37.8 30.9 30.2 33.4 34.5 ...
## $ los : num 6 2 14 5 12 4 1 6 9 16 ...
## $ meld : num 20 6 10 9 10 6 10 7 6 17 ...
## $ pt_id : int 6 17 24 32 33 37 38 40 42 44 ...
## $ hosp_num : int 1 1 1 1 1 1 1 1 1 1 ...
## $ hosp_id : int 1001 1001 1002 1002 1003 1003 1003 1003 1003 1003 ...
## $ year : int 2017 2019 2022 2022 2022 2017 2022 2020 2021 2022 ...
data_imp_val$meld
## [1] 20.000000 6.000000 10.000000 9.000000 10.000000 6.000000 10.000000
## [8] 7.000000 6.000000 17.000000 11.000000 14.000000 10.284000 13.000000
## [15] 16.000000 16.000000 30.000000 6.000000 13.000000 40.000000 15.000000
## [22] 12.000000 15.000000 6.000000 13.000000 6.000000 9.000000 12.000000
## [29] 9.000000 8.000000 17.000000 7.000000 13.000000 15.000000 6.000000
## [36] 9.000000 6.000000 11.000000 7.000000 6.000000 11.000000 14.000000
## [43] 24.000000 16.000000 14.000000 15.000000 17.000000 6.000000 15.000000
## [50] 12.000000 20.000000 11.000000 8.000000 13.000000 10.000000 9.000000
## [57] 10.000000 6.000000 6.000000 17.000000 6.000000 14.000000 15.000000
## [64] 6.000000 11.000000 15.000000 13.270000 10.000000 12.000000 10.000000
## [71] 10.000000 8.000000 23.000000 16.000000 17.510000 15.000000 15.000000
## [78] 14.000000 6.000000 14.000000 11.000000 10.000000 6.000000 6.000000
## [85] 15.000000 12.000000 10.000000 6.000000 6.000000 6.000000 10.000000
## [92] 18.000000 13.000000 6.000000 6.000000 11.000000 6.000000 11.000000
## [99] 19.000000 6.000000 13.000000 13.000000 13.000000 6.000000 15.876667
## [106] 10.270000 8.760000 10.000000 11.000000 16.000000 18.000000 6.000000
## [113] 6.000000 17.000000 24.000000 8.000000 22.000000 26.000000 15.000000
## [120] 8.000000 14.000000 12.000000 6.000000 16.000000 9.000000 14.000000
## [127] 6.000000 23.000000 8.000000 11.000000 6.000000 6.000000 9.000000
## [134] 12.000000 7.000000 24.000000 26.000000 13.720000 6.000000 23.000000
## [141] 7.000000 13.000000 17.000000 16.000000 6.000000 6.000000 7.000000
## [148] 33.000000 6.000000 11.000000 11.000000 6.000000 15.000000 6.000000
## [155] 26.000000 14.100000 8.754286 6.000000 7.000000 6.000000 6.886667
## [162] 6.670000 9.000000 6.000000 9.000000 12.000000 16.000000 12.000000
## [169] 6.000000 12.000000 6.000000 6.000000 6.000000 6.000000 12.000000
## [176] 20.000000 22.000000 6.000000 11.000000 7.000000 10.000000 8.000000
## [183] 6.000000 7.000000 6.000000 14.000000 14.000000 27.000000 14.000000
## [190] 6.000000 14.000000 10.000000 6.000000 6.000000 7.000000 13.000000
## [197] 23.000000 7.000000 6.000000 12.000000 6.000000 32.000000 15.000000
## [204] 6.000000 19.000000 6.000000 8.000000 15.000000 6.000000 18.000000
## [211] 8.180000 9.000000 6.000000 7.000000 17.000000 13.000000 6.000000
## [218] 9.000000 8.000000 8.000000 9.000000 24.000000 7.000000 9.000000
## [225] 12.000000 6.000000 6.000000 13.000000 6.000000 8.000000 12.000000
## [232] 12.000000 15.000000 29.000000 33.000000 8.000000 8.000000 23.000000
## [239] 22.000000 12.000000 12.000000 12.000000 7.000000 6.000000 8.000000
## [246] 6.000000 7.000000 8.000000 19.000000 15.000000 20.000000 14.000000
## [253] 12.000000 16.000000 15.000000 6.000000 6.364286 6.484286 6.000000
## [260] 6.754286 6.754286 6.854286 16.000000 6.000000 16.000000 6.000000
## [267] 6.000000 14.000000 8.942000 12.000000 22.000000 12.000000 10.000000
## [274] 19.000000 6.000000 8.000000 8.000000 11.000000 11.000000 8.000000
## [281] 7.134286 6.000000 6.000000 16.000000 18.000000 8.000000 8.000000
## [288] 8.000000 7.000000 8.000000 14.000000 10.000000 10.000000 11.000000
## [295] 6.000000 8.000000 9.000000 11.000000 8.000000 17.000000 6.000000
## [302] 6.000000 6.000000 6.000000 14.000000 15.000000 12.000000 13.000000
## [309] 9.750000 19.000000 11.000000 15.000000 20.000000 6.000000 21.000000
## [316] 11.000000 16.000000 13.000000 6.000000 6.000000 15.000000 18.000000
## [323] 10.000000 17.000000 12.000000 6.000000 12.000000 15.000000 16.000000
## [330] 6.000000 6.000000 7.000000 7.000000 32.000000 9.000000 24.000000
## [337] 15.000000 6.000000 11.000000 6.000000 6.000000 6.000000 6.000000
## [344] 6.000000 7.000000 25.000000 10.000000 7.000000 9.000000 26.000000
## [351] 19.000000 10.390000 14.610000 6.700000 9.000000 14.000000 11.000000
## [358] 8.000000 6.000000 6.000000 24.000000 15.000000 14.000000 11.000000
## [365] 8.000000 8.000000 7.000000 13.000000 20.000000 15.000000 8.000000
## [372] 6.000000 11.000000 14.000000 11.000000 11.000000 6.000000 6.000000
## [379] 6.000000 23.000000 20.000000 9.000000 29.000000 6.000000 6.000000
## [386] 11.000000 6.000000 8.000000 8.000000 6.000000 6.000000 13.000000
## [393] 19.000000 10.000000 6.000000 15.000000 6.000000 18.000000 21.000000
## [400] 6.000000 10.000000 19.000000 26.000000 6.000000 19.000000 23.000000
## [407] 16.000000 6.000000 8.000000 6.000000 14.000000 19.000000 15.000000
## [414] 14.000000 22.000000 11.000000 11.000000 7.000000 19.730000 15.000000
## [421] 11.000000 18.000000 20.000000 6.000000 6.000000 8.510000 6.000000
## [428] 6.000000 6.000000 15.000000 6.000000 6.000000 16.000000 6.000000
## [435] 11.000000 22.000000 20.000000 7.000000 6.000000 9.000000 6.000000
## [442] 6.000000 12.000000 10.000000
data_imp_val$meld <- round(data_imp_val$meld)
val_imp <-
data_imp_val|>
mutate(
hosp_id=as.integer(hosp_id),
pt_id=as.integer(pt_id),
hosp_num=as.integer(hosp_num),
year=as.integer(year),
age=as.integer(age),
sex= factor(sex, levels = c("M", "F")),
smoke= as.integer(smoke),
barthel= factor(barthel, levels = c("0", "1", "2")),
child_num= as.integer(round(data_imp_val$child_num)),
child_score=factor(child_score, levels = c("0", "1", "2")),
gcs=as.integer(gcs),
cci_num=as.integer(cci_num),
pad=factor(pad),
stroke=factor(stroke),
dimentia=factor(dimentia),
ch_lung=factor(ch_lung),
rheumati=factor(rheumati),
pept_ulcer=factor(pept_ulcer),
dm=factor(dm),
dm_compli=factor(dm_compli),
paralysis=factor(paralysis),
malignancy=factor(malignancy),
meta_tumor=factor(meta_tumor),
aids=factor(aids),
eGFR30=factor(eGFR30),
hd=factor(hd),
hcc=factor(hcc),
alcohol=factor(alcohol),
past_rupture=factor(past_rupture),
antiplate=factor(antiplate),
anticoag=factor(anticoag),
antithro=factor(antithro),
nsaids=factor(nsaids),
steroid=factor(steroid),
beta=factor(beta),
vaso=factor(vaso),
map= as.integer(map),
ffp=factor(ffp),
pc=factor(pc),
albner=factor(albner),
sBP= as.integer(sBP),
dBP= as.integer(dBP),
hr=as.integer(hr),
shock=factor(shock),
los=as.integer(los),
meld=as.integer(meld)
)
#新規カテゴリ列を作成
val_imp$age_cate <- ifelse(val_imp$age >= 60, 1, 0)
val_imp$bmi_cate <- ifelse(val_imp$bmi >= 25, 1, 0) # bmiは25以上:1, 25未満:0
val_imp$gcs_cate <- ifelse(val_imp$gcs <= 12, 1, 0)
val_imp$cci_cate <- ifelse(val_imp$cci_num >= 6, 1, 0)
val_imp$bt_cate <- ifelse(val_imp$bt >= 37, 1, 0)
val_imp$sBP_cate <- ifelse(val_imp$sBP < 80, 1, 0)
val_imp$dBP_cate <- ifelse(val_imp$dBP < 50, 1, 0)
val_imp$hr_cate <- ifelse(val_imp$hr >= 100, 1, 0)
val_imp$bil_cate <- ifelse(val_imp$bil >= 5, 1, 0)
val_imp$ast_cate <- ifelse(val_imp$ast >= 200, 1, 0)
val_imp$alt_cate <- ifelse(val_imp$alt >= 100, 1, 0)
val_imp$wbc_cate <- ifelse(val_imp$wbc >= 12000, 1, 0)
val_imp$hb_cate <- ifelse(val_imp$hb < 8, 1, 0)
val_imp$plt_cate <- ifelse(val_imp$plt < 100, 1, 0) # pltは100未満:1、100以上:0
val_imp$tp_cate <- ifelse(val_imp$tp < 6, 1, 0)
val_imp$alb_cate <- ifelse(val_imp$alb < 2.8, 1, 0)
val_imp$cre_cate <- ifelse(val_imp$cre >= 1.5, 1, 0)
val_imp$crp_cate <- ifelse(val_imp$crp >= 2, 1, 0)
val_imp$pt_cate <- ifelse(val_imp$pt < 50, 1, 0)
val_imp$aptt_cate <- ifelse(val_imp$aptt >= 50, 1, 0) # apttは50以上:1, 50未満:0
#str(val_imp)
#col_fact_cate=c("sex","barthel","child_score","pad","stroke","dimentia","ch_lung","rheumati","pept_ulcer","dm","dm_compli","paralysis","maligna#ncy","meta_tumor","aids","eGFR30","hd","hcc","alcohol","past_rupture","antiplate","anticoag","antithro","nsaids","steroid","beta", #"vaso","ffp","pc", "albner","shock","hosp_mortality","age_cate","bmi_cate","gcs_cate","cci_cate","bt_cate","sBP_cate","dBP_cate","hr_cate","bil#_cate","ast_cate","alt_cate","wbc_cate","hb_cate","plt_cate","tp_cate","alb_cate","cre_cate","crp_cate","pt_cate","aptt_cate")
#
## Create your table
#val_imp %>%
# select(c(col_fact_cate)) %>%
# CreateTableOne(vars = c(col_fact_cate), strata="hosp_mortality",factorVars = col_fact_cate, addOverall = T) -> tableone_dev_imp_cate
#
#
## Print your table
#print(tableone_dev_imp_cate, smd = TRUE, missing = TRUE, test = TRUE, explain = TRUE)
## specify your data and variables
#tbl_summary(data = val_imp,
# by = "hosp_mortality",
# type = list(gcs ~ "continuous", year ~ "categorical"),
# statistic = all_continuous() ~ "{median} ({p25}, {p75})",
# digits = all_continuous() ~ c(0, 2))
#
# ダミー変数を作成
dummy_vars_val <- model.matrix(~barthel, data = val_imp)
# データフレームに追加
val_imp <- cbind(val_imp, dummy_vars_val)
val_imp <- val_imp[ , !(names(val_imp) %in% "(Intercept)")]
確認
#str(val_imp)
# ダミー変数を作成
dummy_vars <- model.matrix(~barthel, data = dev_imp)
# データフレームに追加
dev_imp <- cbind(dev_imp, dummy_vars)
確認
#str(dev_imp)
dev_imp <- dev_imp[ , !(names(dev_imp) %in% "(Intercept)")]
dev_imp <- dev_imp[ , !(names(dev_imp) %in% "fitted")]
dev_imp <- dev_imp[ , !(names(dev_imp) %in% "diff2")]
確認
#str(dev_imp)
# モデルのサマリーを表示
fit_reduced_model <- lrm(hosp_mortality ~ sBP_cate +gcs_cate + bil_cate + cre_cate + alb_cate, data = dev_imp)
# モデルの係数を表示
coefficients(fit_reduced_model)
## Intercept sBP_cate gcs_cate bil_cate cre_cate alb_cate
## -4.260968 2.428260 1.446221 1.340650 1.294727 1.125006
# datadistを設定
ddist <- datadist(dev_imp)
options(datadist='ddist')
# モデルのサマリーを表示
fit_reduced_model <- lrm(hosp_mortality ~ sBP_cate +gcs_cate + bil_cate + cre_cate + alb_cate, data = dev_imp)
summary(fit_reduced_model)
## Effects Response : hosp_mortality
##
## Factor Low High Diff. Effect S.E. Lower 0.95 Upper 0.95
## sBP_cate 0 1 1 2.4283 0.34598 1.75010 3.1064
## Odds Ratio 0 1 1 11.3390 NA 5.75540 22.3400
## gcs_cate 0 1 1 1.4462 0.41117 0.64035 2.2521
## Odds Ratio 0 1 1 4.2470 NA 1.89710 9.5076
## bil_cate 0 1 1 1.3407 0.49251 0.37535 2.3060
## Odds Ratio 0 1 1 3.8215 NA 1.45550 10.0340
## cre_cate 0 1 1 1.2947 0.41088 0.48942 2.1000
## Odds Ratio 0 1 1 3.6500 NA 1.63140 8.1664
## alb_cate 0 1 1 1.1250 0.35302 0.43311 1.8169
## Odds Ratio 0 1 1 3.0802 NA 1.54200 6.1528
dev_imp$alb_score <- ifelse(dev_imp$alb_cate == 1, 1, 0)
dev_imp$sBP_score <- ifelse(dev_imp$sBP_cate == 1, 2, 0)
dev_imp$gcs_score <- ifelse(dev_imp$gcs_cate == 1, 1, 0)
dev_imp$bil_score <- ifelse(dev_imp$bil_cate == 1, 1, 0)
dev_imp$cre_score <- ifelse(dev_imp$cre_cate == 1, 1, 0)
dev_imp$sum_score <- dev_imp$alb_score + dev_imp$sBP_score + dev_imp$gcs_score + dev_imp$bil_score + dev_imp$cre_score
val_imp$alb_score <- ifelse(val_imp$alb_cate == 1, 1, 0)
val_imp$sBP_score <- ifelse(val_imp$sBP_cate == 1, 2, 0)
val_imp$gcs_score <- ifelse(val_imp$gcs_cate == 1, 1, 0)
val_imp$bil_score <- ifelse(val_imp$bil_cate == 1, 1, 0)
val_imp$cre_score <- ifelse(val_imp$cre_cate == 1, 1, 0)
val_imp$sum_score <- val_imp$alb_score + val_imp$sBP_score + val_imp$gcs_score + val_imp$bil_score + val_imp$cre_score
completedata_val$alb_score <- ifelse(completedata_val$alb_cate == 1, 1, 0)
completedata_val$sBP_score <- ifelse(completedata_val$sBP_cate == 1, 2, 0)
completedata_val$gcs_score <- ifelse(completedata_val$gcs_cate == 1, 1, 0)
completedata_val$bil_score <- ifelse(completedata_val$bil_cate == 1, 1, 0)
completedata_val$cre_score <- ifelse(completedata_val$cre_cate == 1, 1, 0)
completedata_val$sum_score <- completedata_val$alb_score + completedata_val$sBP_score + completedata_val$gcs_score + completedata_val$bil_score + completedata_val$cre_score
# 開発データセットのROCカーブとAUC
roc_obj_dev <- roc(dev_imp$hosp_mortality, dev_imp$sum_score)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
plot(roc_obj_dev, main="ROC curve for the development data")
# Calculate AUC and its confidence interval for development data
auc_roc_dev <- auc(roc_obj_dev)
ci_dev <- ci.auc(roc_obj_dev)
# Calculate the upper bound of the CI
upper_bound_dev <- 2*auc_roc_dev - ci_dev[1]
# Print the AUC and its confidence interval for development data
cat("AUC for the development data: ", auc_roc_dev, "\n")
## AUC for the development data: 0.8916536
cat("95% CI for AUC (development): (", ci_dev[1], ",", upper_bound_dev, ")\n")
## 95% CI for AUC (development): ( 0.8498056 , 0.9335016 )
# 検証データセットのROCカーブとAUC
roc_obj_val <- roc(val_imp$hosp_mortality, val_imp$sum_score)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
plot(roc_obj_val, main="ROC curve for the validation data")
# Calculate AUC and its confidence interval for validation data
auc_roc_val <- auc(roc_obj_val)
ci_val <- ci.auc(roc_obj_val)
# Calculate the upper bound of the CI for validation data
upper_bound_val <- 2*auc_roc_val - ci_val[1]
# Print the AUC and its confidence interval for validation data
cat("AUC for the validation data: ", auc_roc_val, "\n")
## AUC for the validation data: 0.8896965
cat("95% CI for AUC (validation): (", ci_val[1], ",", upper_bound_val, ")\n")
## 95% CI for AUC (validation): ( 0.849892 , 0.9295009 )
# 検証データセットのROCカーブとAUC
roc_obj_completedata_val <- roc(completedata_val$hosp_mortality, completedata_val$sum_score)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
plot(roc_obj_completedata_val, main="ROC curve for the completedata validation data")
# Calculate AUC and its confidence interval for validation data
auc_roc_completedata_val <- auc(roc_obj_completedata_val)
ci_completedata_val <- ci.auc(roc_obj_completedata_val)
# Calculate the upper bound of the CI for validation data
upper_bound_completedata_val <- 2*auc_roc_completedata_val- ci_completedata_val[1]
# Print the AUC and its confidence interval for validation data
cat("AUC for the completedata validation data: ", auc_roc_completedata_val, "\n")
## AUC for the completedata validation data: 0.8674192
cat("95% CI for AUC (completedata validation data): (", ci_completedata_val[1], ",", upper_bound_completedata_val, ")\n")
## 95% CI for AUC (completedata validation data): ( 0.8020284 , 0.93281 )
# 開発データと検証データのROCオブジェクトの計算
roc_obj_dev <- roc(dev_imp$hosp_mortality, dev_imp$sum_score)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
roc_obj_val <- roc(val_imp$hosp_mortality, val_imp$sum_score)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
# 開発データと検証データのAUCの計算
auc_dev <- auc(roc_obj_dev)
auc_val <- auc(roc_obj_val)
# ROC dataをデータフレームに変換
roc_data <- data.frame(
Specificity = c(roc_obj_dev$specificities, roc_obj_val$specificities),
Sensitivity = c(roc_obj_dev$sensitivities, roc_obj_val$sensitivities),
ROC = c(rep("Development", length(roc_obj_dev$sensitivities)), rep("Validation: Imputation data", length(roc_obj_val$sensitivities)))
)
# Plot ROC curve
library(ggplot2)
ggplot(data = roc_data, aes(x = Specificity, y = Sensitivity, color = ROC)) +
geom_line() +
scale_x_reverse(limits = c(1, 0)) +
labs(x = "Specificity", y = "Sensitivity") +
theme_minimal() +
scale_color_manual(values = c("Development" = "blue", "Validation: Imputation data" = "red")) +
annotate("text", x = 0.75, y = 0.25, label = paste("AUC for Development = ", round(auc_dev, 3)), color="blue") +
annotate("text", x = 0.75, y = 0.15, label = paste("AUC for Validation Imputation data = ", round(auc_val, 3)), color="red")
# 開発データセットのAUCの表示
print(paste("AUC for the development data: ", auc_dev))
## [1] "AUC for the development data: 0.891653599218912"
# 検証データセットのAUCの表示
print(paste("AUC for the Validation: After imputation: ", auc_val))
## [1] "AUC for the Validation: After imputation: 0.889696463380674"
# 検証データセットのROCオブジェクトの計算
roc_obj_completedata_val <- roc(completedata_val$hosp_mortality, completedata_val$sum_score)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
# 検証データセットのAUCの計算
auc_completedata_val <- auc(roc_obj_completedata_val)
# ROC dataにcompletedataに基づくROC dataを追加
roc_data <- rbind(roc_data, data.frame(
Specificity = roc_obj_completedata_val$specificities,
Sensitivity = roc_obj_completedata_val$sensitivities,
ROC = rep("Validation: Complete data", length(roc_obj_completedata_val$sensitivities))
))
# ROCカラムのファクターレベルを調整
roc_data$ROC <- factor(roc_data$ROC, levels = c("Development", "Validation: Imputation data", "Validation: Complete data"))
# ROC曲線の描画
ggplot(data = roc_data, aes(x = Specificity, y = Sensitivity, color = ROC)) +
geom_line() +
scale_x_reverse(limits = c(1, 0)) +
labs(x = "Specificity", y = "Sensitivity") +
theme_minimal() +
scale_color_manual(values = c("Development" = "blue", "Validation: Imputation data" = "red", "Validation: Complete data" = "green4")) +
annotate("text", x = 0.75, y = 0.35, label = paste("Development = ", round(auc_dev, 3)), color="blue") +
annotate("text", x = 0.75, y = 0.25, label = paste("Imputation data = ", round(auc_val, 3)), color="red") +
annotate("text", x = 0.75, y = 0.15, label = paste("Complete data = ", round(auc_completedata_val, 3)), color="green4")
# 開発データセットのAUCの表示
print(paste("AUC:Development: ", auc_dev))
## [1] "AUC:Development: 0.891653599218912"
# 検証データセットのAUCの表示
print(paste("AUC:Validation Imputation data: ", auc_val))
## [1] "AUC:Validation Imputation data: 0.889696463380674"
# 完全データセットのAUCの表示
print(paste("AUC:Validation Complete data: ", auc_completedata_val))
## [1] "AUC:Validation Complete data: 0.867419212040726"
# child_numのみを用いたモデル
model_child_num <- glm(hosp_mortality ~ child_num, data = dev_imp, family = binomial())
pred_child_num_dev <- predict(model_child_num, dev_imp, type = "response")
# ageのみを用いたモデル
model_age <- glm(hosp_mortality ~ age, data = dev_imp, family = binomial())
pred_age_dev <- predict(model_age, dev_imp, type = "response")
# MELDスコアのみを用いたモデル
model_meld <- glm(hosp_mortality ~ meld, data = dev_imp, family = binomial())
pred_meld_dev <- predict(model_meld, dev_imp, type = "response")
# ROC curveとAUCの計算
roc_obj_child_num_dev <- roc(dev_imp$hosp_mortality, pred_child_num_dev)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
roc_obj_age_dev <- roc(dev_imp$hosp_mortality, pred_age_dev)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
# AUCの計算
auc_child_num_dev <- auc(roc_obj_child_num_dev)
auc_age_dev <- auc(roc_obj_age_dev)
# ROC curveをデータフレームに変換
roc_data <- data.frame(
Specificity = c(roc_obj_dev$specificities, roc_obj_child_num_dev$specificities, roc_obj_age_dev$specificities),
Sensitivity = c(roc_obj_dev$sensitivities, roc_obj_child_num_dev$sensitivities, roc_obj_age_dev$sensitivities),
ROC = c(rep("HOPE-EVL score", length(roc_obj_dev$sensitivities)),
rep("Child-Pugh score", length(roc_obj_child_num_dev$sensitivities)),
rep("Age", length(roc_obj_age_dev$sensitivities)))
)
# MELDスコアのみを用いたモデル
model_meld <- glm(hosp_mortality ~ meld, data = dev_imp, family = binomial())
pred_meld_dev <- predict(model_meld, dev_imp, type = "response")
# ROC curveとAUCの計算
roc_obj_meld_dev <- roc(dev_imp$hosp_mortality, pred_meld_dev)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
# AUCの計算
auc_meld_dev <- auc(roc_obj_meld_dev)
# ROC curveをデータフレームに追加
roc_data <- rbind(roc_data,
data.frame(Specificity = roc_obj_meld_dev$specificities,
Sensitivity = roc_obj_meld_dev$sensitivities,
ROC = rep("MELD score", length(roc_obj_meld_dev$sensitivities))))
# ROCカラムのファクターレベルを調整
roc_data$ROC <- factor(roc_data$ROC, levels = c("HOPE-EVL score", "MELD score", "Child-Pugh score", "Age"))
# Plot ROC curve
ggplot(data = roc_data, aes(x = Specificity, y = Sensitivity, color = ROC)) +
geom_line() +
scale_x_reverse(limits = c(1, 0)) +
labs(x = "Specificity", y = "Sensitivity") +
theme_minimal() +
scale_color_manual(values = c("HOPE-EVL score" = "#374E55FF",
"MELD score" = "#DF8F44FF",
"Child-Pugh score" = "#00A1D5FF",
"Age" = "#B24745FF")) +
annotate("text", x = 0.75, y = 0.25, label = paste("HOPE-EVL score = ", round(auc_dev, 3)), color="#374E55FF") +
annotate("text", x = 0.75, y = 0.20, label = paste("MELD score = ", round(auc_meld_dev, 3)), color="#DF8F44FF") +
annotate("text", x = 0.75, y = 0.15, label = paste("Child-Pugh score = ", round(auc_child_num_dev, 3)), color="#00A1D5FF") +
annotate("text", x = 0.75, y = 0.10, label = paste("Age = ", round(auc_age_dev, 3)), color="#B24745FF")
# Compute CI for Child-Pugh score
ci_child_num_dev <- ci.auc(roc_obj_child_num_dev)
ci_child_num_dev_upper <- 2*auc_child_num_dev - ci_child_num_dev[1]
cat("AUC for Child-Pugh score: ", auc_child_num_dev, "\n")
## AUC for Child-Pugh score: 0.8069913
cat("95% CI for AUC (Child-Pugh score): (", ci_child_num_dev[1], ",", ci_child_num_dev_upper, ")\n")
## 95% CI for AUC (Child-Pugh score): ( 0.7525509 , 0.8614318 )
# Compute CI for Age
ci_age_dev <- ci.auc(roc_obj_age_dev)
ci_age_dev_upper <- 2*auc_age_dev - ci_age_dev[1]
cat("AUC for Age: ", auc_age_dev, "\n")
## AUC for Age: 0.5713335
cat("95% CI for AUC (Age): (", ci_age_dev[1], ",", ci_age_dev_upper, ")\n")
## 95% CI for AUC (Age): ( 0.4999139 , 0.6427531 )
# Compute CI for MELD score
ci_meld_dev <- ci.auc(roc_obj_meld_dev)
ci_meld_dev_upper <- 2*auc_meld_dev - ci_meld_dev[1]
cat("AUC for MELD score: ", auc_meld_dev, "\n")
## AUC for MELD score: 0.8087517
cat("95% CI for AUC (MELD score): (", ci_meld_dev[1], ",", ci_meld_dev_upper, ")\n")
## 95% CI for AUC (MELD score): ( 0.757591 , 0.8599124 )
# HOPE-EVL score と Child-Pugh score のAUCの比較
roc_obj_hope <- roc(dev_imp$hosp_mortality, dev_imp$sum_score) # HOPE-EVL scoreのROC curve計算
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
roc_obj_child <- roc(dev_imp$hosp_mortality, pred_child_num_dev) # Child-Pugh scoreのROC curve計算
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
roc_test_hope_child <- roc.test(roc_obj_hope, roc_obj_child)
cat("DeLong's test for HOPE-EVL score and Child-Pugh score: ", roc_test_hope_child$p.value, "\n")
## DeLong's test for HOPE-EVL score and Child-Pugh score: 0.003914966
# HOPE-EVL score と Age のAUCの比較
roc_obj_age <- roc(dev_imp$hosp_mortality, pred_age_dev) # AgeのROC curve計算
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
roc_test_hope_age <- roc.test(roc_obj_hope, roc_obj_age)
cat("DeLong's test for HOPE-EVL score and Age: ", roc_test_hope_age$p.value, "\n")
## DeLong's test for HOPE-EVL score and Age: 0.00000000000003015109
# HOPE-EVL score と MELD score のAUCの比較
roc_obj_meld <- roc(dev_imp$hosp_mortality, pred_meld_dev) # MELD scoreのROC curve計算
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
roc_test_hope_meld <- roc.test(roc_obj_hope, roc_obj_meld)
cat("DeLong's test for HOPE-EVL score and MELD score: ", roc_test_hope_meld$p.value, "\n")
## DeLong's test for HOPE-EVL score and MELD score: 0.003090362
# 検証データセットでの予測
pred_child_num_val <- predict(model_child_num, val_imp, type = "response")
pred_age_val <- predict(model_age, val_imp, type = "response")
# 検証データセットでのROC curveとAUCの計算
roc_obj_child_num_val <- roc(val_imp$hosp_mortality, pred_child_num_val)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
roc_obj_age_val <- roc(val_imp$hosp_mortality, pred_age_val)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
# 検証データセットでのAUCの計算
auc_child_num_val <- auc(roc_obj_child_num_val)
auc_age_val <- auc(roc_obj_age_val)
# ROC curveをデータフレームに変換
roc_data <- data.frame(
Specificity = c(roc_obj_val$specificities, roc_obj_child_num_val$specificities, roc_obj_age_val$specificities),
Sensitivity = c(roc_obj_val$sensitivities, roc_obj_child_num_val$sensitivities, roc_obj_age_val$sensitivities),
ROC = c(rep("HOPE-EVL score", length(roc_obj_val$sensitivities)),
rep("Child-Pugh score", length(roc_obj_child_num_val$sensitivities)),
rep("Age", length(roc_obj_age_val$sensitivities)))
)
# MELDスコアのみを用いたモデルで予測
pred_meld_val <- predict(model_meld, val_imp, type = "response")
# 検証データセットでのROC curveとAUCの計算
roc_obj_meld_val <- roc(val_imp$hosp_mortality, pred_meld_val)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
# 検証データセットでのAUCの計算
auc_meld_val <- auc(roc_obj_meld_val)
# ROC curveをデータフレームに変換
roc_data <- rbind(roc_data,
data.frame(Specificity = roc_obj_meld_val$specificities,
Sensitivity = roc_obj_meld_val$sensitivities,
ROC = rep("MELD score", length(roc_obj_meld_val$sensitivities))))
# ROCカラムのファクターレベルを調整
roc_data$ROC <- factor(roc_data$ROC, levels = c("HOPE-EVL score", "MELD score", "Child-Pugh score", "Age"))
# Plot ROC curve
ggplot(data = roc_data, aes(x = Specificity, y = Sensitivity, color = ROC)) +
geom_line() +
scale_x_reverse(limits = c(1, 0)) +
labs(x = "Specificity", y = "Sensitivity") +
theme_minimal() +
scale_color_manual(values = c("HOPE-EVL score" = "#374E55FF",
"MELD score" = "#DF8F44FF",
"Child-Pugh score" = "#00A1D5FF",
"Age" = "#B24745FF")) +
annotate("text", x = 0.75, y = 0.25, label = paste("HOPE-EVL score = ", round(auc_val, 3)), color="#374E55FF") +
annotate("text", x = 0.75, y = 0.20, label = paste("MELD score = ", round(auc_meld_val, 3)), color="#DF8F44FF") +
annotate("text", x = 0.75, y = 0.15, label = paste("Child-Pugh score = ", round(auc_child_num_val, 3)), color="#00A1D5FF") +
annotate("text", x = 0.75, y = 0.10, label = paste("Age = ", round(auc_age_val, 3)), color="#B24745FF")
# Compute CI for Child-Pugh score
ci_child_num_val <- ci.auc(roc_obj_child_num_val)
ci_child_num_val_upper <- 2*auc_child_num_val - ci_child_num_val[1]
cat("AUC for Child-Pugh score: ", auc_child_num_val, "\n")
## AUC for Child-Pugh score: 0.797995
cat("95% CI for AUC (Child-Pugh score): (", ci_child_num_val[1], ",", ci_child_num_val_upper, ")\n")
## 95% CI for AUC (Child-Pugh score): ( 0.7269886 , 0.8690014 )
# Compute CI for Age
ci_age_val <- ci.auc(roc_obj_age_val)
ci_age_val_upper <- 2*auc_age_val - ci_age_val[1]
cat("AUC for Age: ", auc_age_val, "\n")
## AUC for Age: 0.5504595
cat("95% CI for AUC (Age): (", ci_age_val[1], ",", ci_age_val_upper, ")\n")
## 95% CI for AUC (Age): ( 0.4638899 , 0.6370291 )
# Compute CI for MELD score
ci_meld_val <- ci.auc(roc_obj_meld_val)
ci_meld_val_upper <- 2*auc_meld_val - ci_meld_val[1]
cat("AUC for MELD score: ", auc_meld_val, "\n")
## AUC for MELD score: 0.8529936
cat("95% CI for AUC (MELD score): (", ci_meld_val[1], ",", ci_meld_val_upper, ")\n")
## 95% CI for AUC (MELD score): ( 0.7942431 , 0.9117441 )
# HOPE-EVL score と Child-Pugh score のAUCの比較(検証データ)
roc_obj_hope_val <- roc(val_imp$hosp_mortality, val_imp$sum_score) # HOPE-EVL scoreのROC curve計算
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
roc_obj_child_val <- roc(val_imp$hosp_mortality, pred_child_num_val) # Child-Pugh scoreのROC curve計算
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
roc_test_hope_child_val <- roc.test(roc_obj_hope_val, roc_obj_child_val)
cat("DeLong's test for HOPE-EVL score and Child-Pugh score (validation data): ", roc_test_hope_child_val$p.value, "\n")
## DeLong's test for HOPE-EVL score and Child-Pugh score (validation data): 0.01626644
# HOPE-EVL score と Age のAUCの比較(検証データ)
roc_obj_age_val <- roc(val_imp$hosp_mortality, pred_age_val) # AgeのROC curve計算
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
roc_test_hope_age_val <- roc.test(roc_obj_hope_val, roc_obj_age_val)
cat("DeLong's test for HOPE-EVL score and Age (validation data): ", roc_test_hope_age_val$p.value, "\n")
## DeLong's test for HOPE-EVL score and Age (validation data): 0.00000000001817155
# HOPE-EVL score と MELD score のAUCの比較(検証データ)
roc_obj_meld_val <- roc(val_imp$hosp_mortality, pred_meld_val) # MELD scoreのROC curve計算
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
roc_test_hope_meld_val <- roc.test(roc_obj_hope_val, roc_obj_meld_val)
cat("DeLong's test for HOPE-EVL score and MELD score (validation data): ", roc_test_hope_meld_val$p.value, "\n")
## DeLong's test for HOPE-EVL score and MELD score (validation data): 0.2446392
# 完全なデータセットでの予測
pred_child_num_val_comp <- predict(model_child_num, completedata_val, type = "response")
pred_age_val_comp <- predict(model_age, completedata_val, type = "response")
pred_meld_val_comp <- predict(model_meld, completedata_val, type = "response")
# 完全なデータセットでのROC curveとAUCの計算
roc_obj_child_num_val_comp <- roc(completedata_val$hosp_mortality, pred_child_num_val_comp)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
roc_obj_age_val_comp <- roc(completedata_val$hosp_mortality, pred_age_val_comp)
## Setting levels: control = 0, case = 1
## Setting direction: controls > cases
roc_obj_meld_val_comp <- roc(completedata_val$hosp_mortality, pred_meld_val_comp)
## Setting levels: control = 0, case = 1
## Setting direction: controls < cases
# 完全なデータセットでのAUCの計算
auc_child_num_val_comp <- auc(roc_obj_child_num_val_comp)
auc_age_val_comp <- auc(roc_obj_age_val_comp)
auc_meld_val_comp <- auc(roc_obj_meld_val_comp)
# ROC curveをデータフレームに変換
roc_data <- data.frame(
Specificity = c(roc_obj_completedata_val$specificities, roc_obj_child_num_val_comp$specificities, roc_obj_age_val_comp$specificities, roc_obj_meld_val_comp$specificities),
Sensitivity = c(roc_obj_completedata_val$sensitivities, roc_obj_child_num_val_comp$sensitivities, roc_obj_age_val_comp$sensitivities, roc_obj_meld_val_comp$sensitivities),
ROC = c(rep("HOPE-EVL score", length(roc_obj_completedata_val$sensitivities)),
rep("Child-Pugh score", length(roc_obj_child_num_val_comp$sensitivities)),
rep("Age", length(roc_obj_age_val_comp$sensitivities)),
rep("MELD score", length(roc_obj_meld_val_comp$sensitivities)))
)
# ROCカラムのファクターレベルを調整
roc_data$ROC <- factor(roc_data$ROC, levels = c("HOPE-EVL score", "MELD score", "Child-Pugh score", "Age"))
# Plot ROC curve
ggplot(data = roc_data, aes(x = Specificity, y = Sensitivity, color = ROC)) +
geom_line() +
scale_x_reverse(limits = c(1, 0)) +
labs(x = "Specificity", y = "Sensitivity") +
theme_minimal() +
scale_color_manual(values = c("HOPE-EVL score" = "#374E55FF",
"MELD score" = "#DF8F44FF",
"Child-Pugh score" = "#00A1D5FF",
"Age" = "#B24745FF")) +
annotate("text", x = 0.75, y = 0.25, label = paste("HOPE-EVL score = ", round(auc_completedata_val, 3)), color="#374E55FF") +
annotate("text", x = 0.75, y = 0.20, label = paste("MELD score = ", round(auc_meld_val_comp, 3)), color="#DF8F44FF") +
annotate("text", x = 0.75, y = 0.15, label = paste("Child-Pugh score = ", round(auc_child_num_val_comp, 3)), color="#00A1D5FF") +
annotate("text", x = 0.75, y = 0.10, label = paste("Age = ", round(auc_age_val_comp, 3)), color="#B24745FF")
# Compute CI for Child-Pugh score
ci_child_num_val_comp <- ci.auc(roc_obj_child_num_val_comp)
ci_child_num_val_comp_upper <- 2*auc_child_num_val_comp - ci_child_num_val_comp[1]
cat("AUC for Child-Pugh score: ", auc_child_num_val_comp, "\n")
## AUC for Child-Pugh score: 0.748008
cat("95% CI for AUC (Child-Pugh score): (", ci_child_num_val_comp[1], ",", ci_child_num_val_comp_upper, ")\n")
## 95% CI for AUC (Child-Pugh score): ( 0.6115139 , 0.8845021 )
# Compute CI for Age
ci_age_val_comp <- ci.auc(roc_obj_age_val_comp)
ci_age_val_comp_upper <- 2*auc_age_val_comp - ci_age_val_comp[1]
cat("AUC for Age: ", auc_age_val_comp, "\n")
## AUC for Age: 0.5130589
cat("95% CI for AUC (Age): (", ci_age_val_comp[1], ",", ci_age_val_comp_upper, ")\n")
## 95% CI for AUC (Age): ( 0.3775144 , 0.6486033 )
# Compute CI for MELD score
ci_meld_val_comp <- ci.auc(roc_obj_meld_val_comp)
ci_meld_val_comp_upper <- 2*auc_meld_val_comp - ci_meld_val_comp[1]
cat("AUC for MELD score: ", auc_meld_val_comp, "\n")
## AUC for MELD score: 0.8173971
cat("95% CI for AUC (MELD score): (", ci_meld_val_comp[1], ",", ci_meld_val_comp_upper, ")\n")
## 95% CI for AUC (MELD score): ( 0.709188 , 0.9256062 )
# HOPE-EVL score と Child-Pugh score のAUCの比較(完全なデータ)
roc_test_hope_child_val_comp <- roc.test(roc_obj_completedata_val, roc_obj_child_num_val_comp)
cat("DeLong's test for HOPE-EVL score and Child-Pugh score (complete data): ", roc_test_hope_child_val_comp$p.value, "\n")
## DeLong's test for HOPE-EVL score and Child-Pugh score (complete data): 0.09065988
# HOPE-EVL score と Age のAUCの比較(完全なデータ)
roc_test_hope_age_val_comp <- roc.test(roc_obj_completedata_val, roc_obj_age_val_comp)
cat("DeLong's test for HOPE-EVL score and Age (complete data): ", roc_test_hope_age_val_comp$p.value, "\n")
## DeLong's test for HOPE-EVL score and Age (complete data): 0.00000004647915
# HOPE-EVL score と MELD score のAUCの比較(完全なデータ)
roc_test_hope_meld_val_comp <- roc.test(roc_obj_completedata_val, roc_obj_meld_val_comp)
cat("DeLong's test for HOPE-EVL score and MELD score (complete data): ", roc_test_hope_meld_val_comp$p.value, "\n")
## DeLong's test for HOPE-EVL score and MELD score (complete data): 0.2621558
# Compute CI for Child-Pugh score
ci_child_num_val <- ci.auc(roc_obj_child_num_val)
ci_child_num_val_upper <- 2*auc_child_num_val - ci_child_num_val[1]
cat("AUC for Child-Pugh score: ", auc_child_num_val, "\n")
## AUC for Child-Pugh score: 0.797995
cat("95% CI for AUC (Child-Pugh score): (", ci_child_num_val[1], ",", ci_child_num_val_upper, ")\n")
## 95% CI for AUC (Child-Pugh score): ( 0.7269886 , 0.8690014 )
# Compute CI for Age
ci_age_val <- ci.auc(roc_obj_age_val)
ci_age_val_upper <- 2*auc_age_val - ci_age_val[1]
cat("AUC for Age: ", auc_age_val, "\n")
## AUC for Age: 0.5504595
cat("95% CI for AUC (Age): (", ci_age_val[1], ",", ci_age_val_upper, ")\n")
## 95% CI for AUC (Age): ( 0.4638899 , 0.6370291 )
# Compute CI for MELD score
ci_meld_val <- ci.auc(roc_obj_meld_val)
ci_meld_val_upper <- 2*auc_meld_val - ci_meld_val[1]
cat("AUC for MELD score: ", auc_meld_val, "\n")
## AUC for MELD score: 0.8529936
cat("95% CI for AUC (MELD score): (", ci_meld_val[1], ",", ci_meld_val_upper, ")\n")
## 95% CI for AUC (MELD score): ( 0.7942431 , 0.9117441 )
# dev_impデータセットのsum_scoreのヒストグラム
hist(dev_imp$sum_score, main = "Histogram of total score for development data", xlab = "sum_score")
# val_impデータセットのsum_scoreのヒストグラム
hist(val_imp$sum_score, main = "Histogram of total score for validation data", xlab = "sum_score")
#モデルの構築
dev_imp <-
dev_imp|>
mutate(hosp_mortality=as.numeric(hosp_mortality)) #numericにしないと回帰が回らない
val_imp <- val_imp |>
mutate(hosp_mortality = as.numeric(hosp_mortality)-1) #valは1,2になってしまっているため対処
model <- glm(hosp_mortality ~ sBP_cate +gcs_cate + bil_cate + cre_cate + alb_cate, data = dev_imp, family = binomial())
# サマリー統計量の取得
summary_stats <- summary(model)
# 係数(coefficient)、標準誤差(SE)、およびp値の取得
coefficients <- summary_stats$coefficients
# 各項目の結果を表示
print(coefficients)
## Estimate Std. Error z value
## (Intercept) -4.260968 0.3743901 -11.381093
## sBP_cate 2.428260 0.3459835 7.018427
## gcs_cate 1.446221 0.4111649 3.517375
## bil_cate 1.340650 0.4925096 2.722079
## cre_cate 1.294727 0.4108772 3.151128
## alb_cate 1.125006 0.3530143 3.186855
## Pr(>|z|)
## (Intercept) 0.000000000000000000000000000005194457
## sBP_cate 0.000000000002243801090793020168152533
## gcs_cate 0.000435836822957486220986300651247802
## bil_cate 0.006487261661891025738002625189437822
## cre_cate 0.001626411725390087322645760181671903
## alb_cate 0.001438287161487352035227349489332482
# 95% confidence intervals for the coefficients
conf_int <- confint(model)
## Waiting for profiling to be done...
# Display the coefficients, standard errors, p-values, and 95% confidence intervals
coefficients_with_CI <- cbind(coefficients, conf_int)
colnames(coefficients_with_CI) <- c("Estimate", "Std. Error", "z value", "Pr(>|z|)", "2.5 %", "97.5 %")
# Print the results
print(coefficients_with_CI)
## Estimate Std. Error z value
## (Intercept) -4.260968 0.3743901 -11.381093
## sBP_cate 2.428260 0.3459835 7.018427
## gcs_cate 1.446221 0.4111649 3.517375
## bil_cate 1.340650 0.4925096 2.722079
## cre_cate 1.294727 0.4108772 3.151128
## alb_cate 1.125006 0.3530143 3.186855
## Pr(>|z|) 2.5 % 97.5 %
## (Intercept) 0.000000000000000000000000000005194457 -5.0565644 -3.581665
## sBP_cate 0.000000000002243801090793020168152533 1.7714724 3.135984
## gcs_cate 0.000435836822957486220986300651247802 0.6421754 2.260623
## bil_cate 0.006487261661891025738002625189437822 0.3657365 2.307385
## cre_cate 0.001626411725390087322645760181671903 0.4842156 2.102799
## alb_cate 0.001438287161487352035227349489332482 0.4475275 1.839981
# Odds Ratiosの計算
odds_ratios <- exp(coefficients_with_CI[, "Estimate"])
# 95% confidence intervals for the Odds Ratios
conf_int_OR <- exp(conf_int)
# 各変数のOdds Ratiosとその95%信頼区間をまとめた表の作成
OR_with_CI <- cbind(odds_ratios, conf_int_OR)
colnames(OR_with_CI) <- c("Odds Ratio", "2.5 %", "97.5 %")
# Print the results
print(OR_with_CI)
## Odds Ratio 2.5 % 97.5 %
## (Intercept) 0.01410863 0.006367398 0.02782933
## sBP_cate 11.33913655 5.879504064 23.01127085
## gcs_cate 4.24703619 1.900610880 9.58905677
## bil_cate 3.82152734 1.441575329 10.04811112
## cre_cate 3.64999813 1.622901468 8.18905964
## alb_cate 3.08023423 1.564439294 6.29641639
# 変数リストの設定(応答変数を除外)
initial_vars_dammy <- c("sex","barthel1","barthel2", "pept_ulcer", "dm", "malignancy", "hcc", "alcohol",
"past_rupture", "antithro", "steroid", "beta", "sBP_cate",
"age_cate", "bmi_cate", "gcs_cate", "bt_cate", "bil_cate", "ast_cate", "alt_cate",
"wbc_cate", "hb_cate", "plt_cate", "alb_cate", "cre_cate",
"crp_cate", "aptt_cate","pt_cate" )
# Initialize an empty data frame for results
results <- data.frame()
# Loop through each variable in the list
for (var in initial_vars_dammy) {
# Perform logistic regression
model_crude <- glm(formula(paste("hosp_mortality ~", var)), data = dev_imp, family = "binomial")
# Calculate odds ratio, 95% CI, and p-value
coefs <- coef(summary(model_crude))
OR <- round(exp(coefs[2, "Estimate"]), 2)
lower <- round(exp(coefs[2, "Estimate"] - 1.96 * coefs[2, "Std. Error"]), 2)
upper <- round(exp(coefs[2, "Estimate"] + 1.96 * coefs[2, "Std. Error"]), 2)
p_value <- round(coefs[2, "Pr(>|z|)"], 3)
# Add results to the data frame
results <- rbind(results, data.frame(Variable = var, OR = OR, Lower = lower, Upper = upper, P_value = p_value))
}
# Show the results
print(results)
## Variable OR Lower Upper P_value
## 1 sex 1.20 0.69 2.09 0.527
## 2 barthel1 0.44 0.23 0.85 0.015
## 3 barthel2 6.91 3.94 12.09 0.000
## 4 pept_ulcer 0.10 0.01 0.71 0.022
## 5 dm 0.46 0.21 0.99 0.046
## 6 malignancy 1.78 0.89 3.55 0.100
## 7 hcc 1.09 0.60 1.98 0.784
## 8 alcohol 0.70 0.42 1.16 0.166
## 9 past_rupture 0.47 0.22 1.01 0.054
## 10 antithro 2.13 0.22 20.75 0.515
## 11 steroid 6.42 0.40 103.74 0.190
## 12 beta 0.00 0.00 Inf 0.979
## 13 sBP_cate 16.26 8.97 29.47 0.000
## 14 age_cate 1.20 0.72 1.98 0.488
## 15 bmi_cate 0.57 0.28 1.15 0.116
## 16 gcs_cate 12.35 6.47 23.57 0.000
## 17 bt_cate 0.71 0.39 1.28 0.258
## 18 bil_cate 5.81 2.93 11.53 0.000
## 19 ast_cate 3.99 1.97 8.07 0.000
## 20 alt_cate 2.67 1.23 5.79 0.013
## 21 wbc_cate 1.50 0.82 2.76 0.192
## 22 hb_cate 1.44 0.88 2.36 0.150
## 23 plt_cate 1.04 0.64 1.71 0.863
## 24 alb_cate 6.36 3.50 11.57 0.000
## 25 cre_cate 6.00 3.33 10.80 0.000
## 26 crp_cate 4.38 2.27 8.45 0.000
## 27 aptt_cate 10.93 4.48 26.66 0.000
## 28 pt_cate 2.38 1.42 3.97 0.001
# 予測確率の計算
pred_dev <- predict(model, data = dev_imp, type = "response")
dev_imp$predicted <- pred_dev
# スコアごとに要約
dev_by <- dplyr::group_by(dev_imp, sum_score)
table_dev <- dplyr::summarize(dev_by,
hosp_mortality = sum(hosp_mortality), # outcomeは1/0
n = n(),
mortality_rate = round(hosp_mortality / n, 3),
prediction = round(mean(predicted), 3)
)
table_dev
## # A tibble: 7 × 5
## sum_score hosp_mortality n mortality_rate prediction
## <dbl> <dbl> <int> <dbl> <dbl>
## 1 0 3 220 0.014 0.014
## 2 1 7 154 0.045 0.044
## 3 2 6 52 0.115 0.142
## 4 3 24 65 0.369 0.343
## 5 4 21 31 0.677 0.672
## 6 5 8 10 0.8 0.881
## 7 6 4 4 1 0.967
# 予測確率の計算
pred_val <- predict(model, newdata = val_imp, type = "response")
val_imp$predicted <- pred_val
# スコアごとに要約
val_by <- dplyr::group_by(val_imp, sum_score)
table_val <- dplyr::summarize(val_by,
hosp_mortality = sum(hosp_mortality), # outcomeは1/0
n = n(),
mortality_rate = round(hosp_mortality / n, 3),
prediction = round(mean(predicted), 3)
)
table_val
## # A tibble: 7 × 5
## sum_score hosp_mortality n mortality_rate prediction
## <dbl> <dbl> <int> <dbl> <dbl>
## 1 0 0 197 0 0.014
## 2 1 6 109 0.055 0.044
## 3 2 8 60 0.133 0.142
## 4 3 12 45 0.267 0.343
## 5 4 14 25 0.56 0.662
## 6 5 4 6 0.667 0.873
## 7 6 1 2 0.5 0.967
# For each cut-off point, calculate the metrics
for (cut_off in 0:6) {
# Create a binary variable to separate the group from the others
dev_imp$group_binary_dev <- ifelse(dev_imp$sum_score >= cut_off, 1, 0)
# Calculate confusion matrix
CM_dev <- table(Predicted = dev_imp$group_binary_dev, Actual = dev_imp$hosp_mortality)
# If confusion matrix is not 2x2, adjust it
if (any(dim(CM_dev) != c(2, 2))) {
levels_factor_dev <- levels(as.factor(dev_imp$hosp_mortality))
CM_dev <- matrix(c(CM_dev, rep(0, 4 - length(CM_dev))), nrow = 2,
dimnames = list(Predicted = c(0, 1), Actual = levels_factor_dev))
}
# Values from confusion matrix
TP_dev <- ifelse(is.na(CM_dev[2, 2]), 0, CM_dev[2, 2])
FP_dev <- ifelse(is.na(CM_dev[2, 1]), 0, CM_dev[2, 1])
TN_dev <- ifelse(is.na(CM_dev[1, 1]), 0, CM_dev[1, 1])
FN_dev <- ifelse(is.na(CM_dev[1, 2]), 0, CM_dev[1, 2])
# Calculate metrics
if ((FP_dev + TN_dev) != 0) {Sp_dev <- TN_dev / (FP_dev + TN_dev)} else {Sp_dev <- NA}
if ((TP_dev + FN_dev) != 0) {Se_dev <- TP_dev / (TP_dev + FN_dev)} else {Se_dev <- NA}
if ((1 - Sp_dev) != 0) {LR_plus_dev <- Se_dev / (1 - Sp_dev)} else {LR_plus_dev <- NA}
if (Sp_dev != 0) {LR_minus_dev <- (1 - Se_dev) / Sp_dev} else {LR_minus_dev <- NA}
# Calculate PPV and NPV
if ((TP_dev + FP_dev) != 0) {PPV_dev <- TP_dev / (TP_dev + FP_dev)} else {PPV_dev <- NA}
if ((TN_dev + FN_dev) != 0) {NPV_dev <- TN_dev / (TN_dev + FN_dev)} else {NPV_dev <- NA}
# Print results
cat("Cut-off: ≥", cut_off, "\n")
print(CM_dev) # Added here to print the confusion matrix
cat("特異度(Sp): ", Sp_dev, "\n")
cat("感度(Se): ", Se_dev, "\n")
cat("陽性尤度比(LR+): ", LR_plus_dev, "\n")
cat("陰性尤度比(LR-): ", LR_minus_dev, "\n")
cat("陽性的中率(PPV): ", PPV_dev, "\n")
cat("陰性的中率(NPV): ", NPV_dev, "\n")
cat("真陽性数(TP): ", TP_dev, "\n")
cat("真陰性数(TN): ", TN_dev, "\n")
cat("偽陽性数(FP): ", FP_dev, "\n")
cat("偽陰性数(FN): ", FN_dev, "\n\n")
}
## Cut-off: ≥ 0
## Actual
## Predicted 0 1
## 0 463 0
## 1 73 0
## 特異度(Sp): 0.863806
## 感度(Se): NA
## 陽性尤度比(LR+): NA
## 陰性尤度比(LR-): NA
## 陽性的中率(PPV): 0
## 陰性的中率(NPV): 1
## 真陽性数(TP): 0
## 真陰性数(TN): 463
## 偽陽性数(FP): 73
## 偽陰性数(FN): 0
##
## Cut-off: ≥ 1
## Actual
## Predicted 0 1
## 0 217 3
## 1 246 70
## 特異度(Sp): 0.4686825
## 感度(Se): 0.9589041
## 陽性尤度比(LR+): 1.804767
## 陰性尤度比(LR-): 0.08768386
## 陽性的中率(PPV): 0.221519
## 陰性的中率(NPV): 0.9863636
## 真陽性数(TP): 70
## 真陰性数(TN): 217
## 偽陽性数(FP): 246
## 偽陰性数(FN): 3
##
## Cut-off: ≥ 2
## Actual
## Predicted 0 1
## 0 364 10
## 1 99 63
## 特異度(Sp): 0.7861771
## 感度(Se): 0.8630137
## 陽性尤度比(LR+): 4.036115
## 陰性尤度比(LR-): 0.1742436
## 陽性的中率(PPV): 0.3888889
## 陰性的中率(NPV): 0.973262
## 真陽性数(TP): 63
## 真陰性数(TN): 364
## 偽陽性数(FP): 99
## 偽陰性数(FN): 10
##
## Cut-off: ≥ 3
## Actual
## Predicted 0 1
## 0 410 16
## 1 53 57
## 特異度(Sp): 0.8855292
## 感度(Se): 0.7808219
## 陽性尤度比(LR+): 6.821142
## 陰性尤度比(LR-): 0.2475109
## 陽性的中率(PPV): 0.5181818
## 陰性的中率(NPV): 0.9624413
## 真陽性数(TP): 57
## 真陰性数(TN): 410
## 偽陽性数(FP): 53
## 偽陰性数(FN): 16
##
## Cut-off: ≥ 4
## Actual
## Predicted 0 1
## 0 451 40
## 1 12 33
## 特異度(Sp): 0.9740821
## 感度(Se): 0.4520548
## 陽性尤度比(LR+): 17.44178
## 陰性尤度比(LR-): 0.5625247
## 陽性的中率(PPV): 0.7333333
## 陰性的中率(NPV): 0.9185336
## 真陽性数(TP): 33
## 真陰性数(TN): 451
## 偽陽性数(FP): 12
## 偽陰性数(FN): 40
##
## Cut-off: ≥ 5
## Actual
## Predicted 0 1
## 0 461 61
## 1 2 12
## 特異度(Sp): 0.9956803
## 感度(Se): 0.1643836
## 陽性尤度比(LR+): 38.05479
## 陰性尤度比(LR-): 0.8392417
## 陽性的中率(PPV): 0.8571429
## 陰性的中率(NPV): 0.8831418
## 真陽性数(TP): 12
## 真陰性数(TN): 461
## 偽陽性数(FP): 2
## 偽陰性数(FN): 61
##
## Cut-off: ≥ 6
## Actual
## Predicted 0 1
## 0 463 69
## 1 0 4
## 特異度(Sp): 1
## 感度(Se): 0.05479452
## 陽性尤度比(LR+): NA
## 陰性尤度比(LR-): 0.9452055
## 陽性的中率(PPV): 1
## 陰性的中率(NPV): 0.8703008
## 真陽性数(TP): 4
## 真陰性数(TN): 463
## 偽陽性数(FP): 0
## 偽陰性数(FN): 69
#### 注意 cut-off≥0のみ誤って記載される。真陽性:73,偽陽性:463が正しい。感度・特異度・尤度比の数値は注意
cutoff 0だけ別計算
# Values from confusion matrix
TP <- 73
FP <- 463
TN <- 0
FN <- 0
# Calculate PPV and NPV
if ((TP + FP) != 0) {PPV <- TP / (TP + FP)} else {PPV <- NA}
if ((TN + FN) != 0) {NPV <- TN / (TN + FN)} else {NPV <- NA}
# Print results
cat("陽性的中率(PPV): ", PPV, "\n")
## 陽性的中率(PPV): 0.136194
cat("陰性的中率(NPV): ", NPV, "\n")
## 陰性的中率(NPV): NA
# 'sum_score' の範囲に基づいて新しいカテゴリ列を作成します
dev_imp <- dev_imp %>%
mutate(sum_score_group = case_when(
sum_score >= 0 & sum_score <= 1 ~ "0 to 1",
sum_score >= 2 & sum_score <= 3 ~ "2 to 3",
sum_score == 4 ~ "4",
sum_score == 5 ~ "5",
sum_score == 6 ~ "6"
))
# 'sum_score_group'を順序付きの因子として扱う
dev_imp$sum_score_group <- factor(dev_imp$sum_score_group, levels = c("0 to 1", "2 to 3", "4", "5", "6"), ordered = TRUE)
# 'predicted'列の名前を'prediction'に変更します
names(dev_imp)[names(dev_imp) == "predicted"] <- "prediction"
# グループごとの 'hosp_mortality' の合計と 'n' の合計を計算します
summary_table <- dev_imp %>%
group_by(sum_score_group) %>%
summarise(hosp_mortality = sum(hosp_mortality), n = n(), prediction = mean(prediction, na.rm = TRUE)) %>%
mutate(mortality_rate = hosp_mortality / n)
# pivot_longerを使って長い形式に変換
summary_table_long <- summary_table %>%
pivot_longer(c(mortality_rate, prediction), names_to = "variable", values_to = "value")
# 新しい形式で棒グラフをプロット
ggplot(summary_table_long, aes(x = sum_score_group, y = value*100, fill = variable)) +
geom_col(position = "dodge", width = 0.6) +
labs(y = "Percentage (%)", x = "Sum Score Group", fill = "Variable") +
scale_fill_manual(values = c("mortality_rate" = "#374e55", "prediction" = "#79af97")) +
theme_minimal()
# 'sum_score' の範囲に基づいて新しいカテゴリ列を作成します
val_imp <- val_imp %>%
mutate(sum_score_group = case_when(
sum_score >= 0 & sum_score <= 1 ~ "0 to 1",
sum_score >= 2 & sum_score <= 3 ~ "2 to 3",
sum_score == 4 ~ "4",
sum_score == 5 ~ "5",
sum_score == 6 ~ "6"
))
# 'sum_score_group'を順序付きの因子として扱う
val_imp$sum_score_group <- factor(val_imp$sum_score_group, levels = c("0 to 1", "2 to 3", "4", "5", "6"), ordered = TRUE)
# グループごとの 'hosp_mortality' の合計と 'n' の合計を計算します
summary_table_val <- val_imp %>%
group_by(sum_score_group) %>%
summarise(hosp_mortality = sum(hosp_mortality), n = n(), prediction = mean(predicted)) %>%
mutate(mortality_rate = hosp_mortality / n)
# データを長い形式に変換します
summary_table_long_val <- summary_table_val %>%
pivot_longer(c(mortality_rate, prediction), names_to = "variable", values_to = "value")
# 棒グラフをプロットします
ggplot(summary_table_long_val, aes(x = sum_score_group, y = value*100, fill = variable)) +
geom_col(position = "dodge", width = 0.6) +
labs(y = "Percentage (%)", x = "Sum Score Group", fill = "Variable") +
scale_fill_manual(values = c("mortality_rate" = "#374e55", "prediction" = "#79af97")) +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5))
# 手動で予測値を計算します
liner_predict_manual <- -4.260968 + 2.5 * dev_imp$sBP_cate + 1.25 * dev_imp$gcs_cate + 1.25 * dev_imp$bil_cate + 1.25 * dev_imp$cre_cate + 1.25 * dev_imp$alb_cate
pred_manual <- exp(liner_predict_manual) / (exp(liner_predict_manual) + 1)
# 新しい予測値でテーブルを作成します
dev_imp$predicted_manual <- pred_manual
dev_by_manual <- dplyr::group_by(dev_imp, sum_score)
table_dev_manual <- dplyr::summarize(dev_by_manual,
hosp_mortality = sum(hosp_mortality),
n = n(),
mortality_rate = round(hosp_mortality / n, 3),
prediction_manual = round(mean(predicted_manual), 3)
)
table_dev_manual
## # A tibble: 7 × 5
## sum_score hosp_mortality n mortality_rate prediction_manual
## <dbl> <dbl> <int> <dbl> <dbl>
## 1 0 3 220 0.014 0.014
## 2 1 7 154 0.045 0.047
## 3 2 6 52 0.115 0.147
## 4 3 24 65 0.369 0.375
## 5 4 21 31 0.677 0.677
## 6 5 8 10 0.8 0.88
## 7 6 4 4 1 0.962
# 'sum_score' の範囲に基づいて新しいカテゴリ列を作成します
dev_imp <- dev_imp %>%
mutate(sum_score_group = case_when(
sum_score >= 0 & sum_score <= 1 ~ "0 to 1",
sum_score >= 2 & sum_score <= 3 ~ "2 to 3",
sum_score == 4 ~ "4",
sum_score == 5 ~ "5",
sum_score == 6 ~ "6"
))
# グループごとの 'hosp_mortality' の合計と 'n' の合計を計算します
summary_table_dev_manual <- dev_imp %>%
group_by(sum_score_group) %>%
summarise(hosp_mortality = sum(hosp_mortality), n = n(), Prediction = mean(predicted_manual)) %>%
mutate(Observed = hosp_mortality / n)
# データを長い形式に変換します
summary_table_long_dev_manual <- summary_table_dev_manual %>%
pivot_longer(c(Observed, Prediction), names_to = "variable", values_to = "value")
# 棒グラフをプロットします
ggplot(summary_table_long_dev_manual, aes(x = sum_score_group, y = value*100, fill = variable)) +
geom_col(position = "dodge", width = 0.6) +
labs(y = "Percentage (%)", x = "Sum Score Group", fill = "Variable") +
scale_fill_manual(values = c("Observed" = "#374e55", "Prediction" = "#79af97")) +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5))
# 'sum_score' の範囲に基づいて新しいカテゴリ列を作成します
dev_imp <- dev_imp %>%
mutate(sum_score_group = case_when(
sum_score >= 0 & sum_score <= 1 ~ "Low risk (Score 0,1)",
sum_score >= 2 & sum_score <= 3 ~ "Middle risk (Score 2,3)",
sum_score >= 4 & sum_score <= 5 ~ "High risk (Score 4,5)",
sum_score == 6 ~ "Very high risk (Score 6)"
))
# 因子として定義し、レベルを指定します
dev_imp$sum_score_group <- factor(dev_imp$sum_score_group,
levels = c("Low risk (Score 0,1)", "Middle risk (Score 2,3)",
"High risk (Score 4,5)", "Very high risk (Score 6)"))
# グループごとの 'hosp_mortality' の合計と 'n' の合計を計算します
summary_table_dev_manual <- dev_imp %>%
group_by(sum_score_group) %>%
summarise(hosp_mortality = sum(hosp_mortality), n = n(), Prediction = mean(predicted_manual)) %>%
mutate(Observed = hosp_mortality / n)
# データを長い形式に変換します
summary_table_long_dev_manual <- summary_table_dev_manual %>%
pivot_longer(c(Observed, Prediction), names_to = "variable", values_to = "value")
# 棒グラフをプロットします
ggplot(summary_table_long_dev_manual, aes(x = sum_score_group, y = value*100, fill = variable)) +
geom_col(position = "dodge", width = 0.6) +
labs(y = "Percentage (%)", x = "Group", fill = "Variable") +
scale_fill_manual(values = c("Observed" = "#374e55", "Prediction" = "#79af97")) +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5))
# 'sum_score' の範囲に基づいて新しいカテゴリ列を作成します
dev_imp <- dev_imp %>%
mutate(sum_score_group = case_when(
sum_score >= 0 & sum_score <= 1 ~ "Low risk (Score 0,1)",
sum_score >= 2 & sum_score <= 3 ~ "Middle risk (Score 2,3)",
sum_score >= 4 & sum_score <= 6 ~ "High risk (Score ≥4)"
))
# 因子として定義し、レベルを指定します
dev_imp$sum_score_group <- factor(dev_imp$sum_score_group,
levels = c("Low risk (Score 0,1)", "Middle risk (Score 2,3)",
"High risk (Score ≥4)"))
# グループごとの 'hosp_mortality' の合計と 'n' の合計を計算します
summary_table_dev_manual <- dev_imp %>%
group_by(sum_score_group) %>%
summarise(hosp_mortality = sum(hosp_mortality), n = n(), Prediction = mean(predicted_manual)) %>%
mutate(Observed = hosp_mortality / n)
# データを長い形式に変換します
summary_table_long_dev_manual <- summary_table_dev_manual %>%
pivot_longer(c(Observed, Prediction), names_to = "variable", values_to = "value")
# 棒グラフをプロットします
ggplot(summary_table_long_dev_manual, aes(x = sum_score_group, y = value*100, fill = variable)) +
geom_col(position = "dodge", width = 0.6) +
labs(y = "Percentage (%)", x = "Group", fill = "Variable") +
scale_fill_manual(values = c("Observed" = "#374e55", "Prediction" = "#79af97")) +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5))
# 'sum_score' の範囲に基づいて新しいカテゴリ列を作成します
dev_imp <- dev_imp %>%
mutate(risk_group = case_when(
sum_score >= 0 & sum_score <= 1 ~ "Low risk",
sum_score >= 2 & sum_score <= 3 ~ "Middle risk",
sum_score >= 4 & sum_score <= 5 ~ "High risk",
sum_score == 6 ~ "Very high risk"
))
# グループごとの 'hosp_mortality' の合計と 'n' の合計を計算します
summary_table_dev_manual <- dev_imp %>%
group_by(risk_group) %>%
summarise(hosp_mortality = sum(hosp_mortality), n = n(), Prediction = mean(predicted_manual)) %>%
mutate(Observed = hosp_mortality / n)
# 各リスクグループのObservedとPredictionの具体的な数値を表示します
summary_table_dev_manual %>% select(risk_group, Observed, Prediction)
## # A tibble: 4 × 3
## risk_group Observed Prediction
## <chr> <dbl> <dbl>
## 1 High risk 0.707 0.726
## 2 Low risk 0.0267 0.0275
## 3 Middle risk 0.256 0.274
## 4 Very high risk 1 0.962
# 'sum_score' の範囲に基づいて新しいカテゴリ列を作成します
dev_imp <- dev_imp %>%
mutate(risk_group = case_when(
sum_score >= 0 & sum_score <= 1 ~ "Low risk",
sum_score >= 2 & sum_score <= 3 ~ "Middle risk",
sum_score >= 4 & sum_score <= 6 ~ "High risk"
))
# グループごとの 'hosp_mortality' の合計と 'n' の合計を計算します
summary_table_dev_manual <- dev_imp %>%
group_by(risk_group) %>%
summarise(hosp_mortality = sum(hosp_mortality), n = n(), Prediction = mean(predicted_manual)) %>%
mutate(Observed = hosp_mortality / n)
# 各リスクグループのObservedとPredictionの具体的な数値を表示します
summary_table_dev_manual %>% select(risk_group, Observed, Prediction)
## # A tibble: 3 × 3
## risk_group Observed Prediction
## <chr> <dbl> <dbl>
## 1 High risk 0.733 0.747
## 2 Low risk 0.0267 0.0275
## 3 Middle risk 0.256 0.274
# Load necessary library
library(Hmisc)
# Add standard error and confidence intervals
summary_table_dev_manual <- summary_table_dev_manual %>%
group_by(risk_group) %>%
mutate(
se = sqrt((Prediction * (1 - Prediction)) / n),
ci_lower = Prediction - qt(1 - (0.05 / 2), df = n - 1) * se,
ci_upper = Prediction + qt(1 - (0.05 / 2), df = n - 1) * se
)
summary_table_dev_manual
## # A tibble: 3 × 8
## # Groups: risk_group [3]
## risk_group hosp_mortality n Prediction Observed se ci_lower ci_upper
## <chr> <dbl> <int> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 High risk 33 45 0.747 0.733 0.0648 0.617 0.878
## 2 Low risk 10 374 0.0275 0.0267 0.00846 0.0109 0.0441
## 3 Middle risk 30 117 0.274 0.256 0.0412 0.192 0.355
# 手動で予測値を計算します
liner_predict_manual_val <- -4.260968 + 2.5 * val_imp$sBP_cate + 1.25 * val_imp$gcs_cate + 1.25 * val_imp$bil_cate + 1.25 * val_imp$cre_cate + 1.25 * val_imp$alb_cate
pred_manual_val <- exp(liner_predict_manual_val) / (exp(liner_predict_manual_val) + 1)
# 新しい予測値でテーブルを作成します
val_imp$predicted_manual <- pred_manual_val
val_by_manual <- dplyr::group_by(val_imp, sum_score)
table_val_manual <- dplyr::summarize(val_by_manual,
hosp_mortality = sum(hosp_mortality),
n = n(),
mortality_rate = round(hosp_mortality / n, 3),
prediction_manual = round(mean(predicted_manual), 3)
)
table_val_manual
## # A tibble: 7 × 5
## sum_score hosp_mortality n mortality_rate prediction_manual
## <dbl> <dbl> <int> <dbl> <dbl>
## 1 0 0 197 0 0.014
## 2 1 6 109 0.055 0.047
## 3 2 8 60 0.133 0.147
## 4 3 12 45 0.267 0.375
## 5 4 14 25 0.56 0.677
## 6 5 4 6 0.667 0.88
## 7 6 1 2 0.5 0.962
# 'sum_score' の範囲に基づいて新しいカテゴリ列を作成します
val_imp <- val_imp %>%
mutate(sum_score_group = case_when(
sum_score >= 0 & sum_score <= 1 ~ "0 to 1",
sum_score >= 2 & sum_score <= 3 ~ "2 to 3",
sum_score == 4 ~ "4",
sum_score == 5 ~ "5",
sum_score == 6 ~ "6"
))
# グループごとの 'hosp_mortality' の合計と 'n' の合計を計算します
summary_table_val_manual <- val_imp %>%
group_by(sum_score_group) %>%
summarise(hosp_mortality = sum(hosp_mortality), n = n(), Prediction = mean(predicted_manual)) %>%
mutate(Observed = hosp_mortality / n)
# データを長い形式に変換します
summary_table_long_val_manual <- summary_table_val_manual %>%
pivot_longer(c(Observed, Prediction), names_to = "variable", values_to = "value")
# 棒グラフをプロットします
ggplot(summary_table_long_val_manual, aes(x = sum_score_group, y = value*100, fill = variable)) +
geom_col(position = "dodge", width = 0.6) +
labs(y = "Percentage (%)", x = "Sum Score Group", fill = "Variable") +
scale_fill_manual(values = c("Observed" = "#374e55", "Prediction" = "#79af97")) +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5))
# 'sum_score' の範囲に基づいて新しいカテゴリ列を作成します
val_imp <- val_imp %>%
mutate(sum_score_group = case_when(
sum_score >= 0 & sum_score <= 1 ~ "Low risk (Score 0,1)",
sum_score >= 2 & sum_score <= 3 ~ "Middle risk (Score 2,3)",
sum_score >= 4 & sum_score <= 5 ~ "High risk (Score 4,5)",
sum_score == 6 ~ "6"
))
# 因子として定義し、レベルを指定します
val_imp$sum_score_group <- factor(val_imp$sum_score_group,
levels = c("Low risk (Score 0,1)", "Middle risk (Score 2,3)",
"High risk (Score 4,5)", "6"))
# グループごとの 'hosp_mortality' の合計と 'n' の合計を計算します
summary_table_val_manual <- val_imp %>%
group_by(sum_score_group) %>%
summarise(hosp_mortality = sum(hosp_mortality), n = n(), Prediction = mean(predicted_manual)) %>%
mutate(Observed = hosp_mortality / n)
# データを長い形式に変換します
summary_table_long_val_manual <- summary_table_val_manual %>%
pivot_longer(c(Observed, Prediction), names_to = "variable", values_to = "value")
# 棒グラフをプロットします
ggplot(summary_table_long_val_manual, aes(x = sum_score_group, y = value*100, fill = variable)) +
geom_col(position = "dodge", width = 0.6) +
labs(y = "Percentage (%)", x = "Group", fill = "Variable") +
scale_fill_manual(values = c("Observed" = "#374e55", "Prediction" = "#79af97")) +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5))
# 'sum_score' の範囲に基づいて新しいカテゴリ列を作成します
val_imp <- val_imp %>%
mutate(sum_score_group = case_when(
sum_score >= 0 & sum_score <= 1 ~ "Low risk (Score 0,1)",
sum_score >= 2 & sum_score <= 3 ~ "Middle risk (Score 2,3)",
sum_score >= 4 & sum_score <= 6 ~ "High risk (Score ≥4)"
))
# 因子として定義し、レベルを指定します
val_imp$sum_score_group <- factor(val_imp$sum_score_group,
levels = c("Low risk (Score 0,1)", "Middle risk (Score 2,3)",
"High risk (Score ≥4)"))
# グループごとの 'hosp_mortality' の合計と 'n' の合計を計算します
summary_table_val_manual <- val_imp %>%
group_by(sum_score_group) %>%
summarise(hosp_mortality = sum(hosp_mortality), n = n(), Prediction = mean(predicted_manual)) %>%
mutate(Observed = hosp_mortality / n)
# データを長い形式に変換します
summary_table_long_val_manual <- summary_table_val_manual %>%
pivot_longer(c(Observed, Prediction), names_to = "variable", values_to = "value")
# 棒グラフをプロットします
ggplot(summary_table_long_val_manual, aes(x = sum_score_group, y = value*100, fill = variable)) +
geom_col(position = "dodge", width = 0.6) +
labs(y = "Percentage (%)", x = "Group", fill = "Variable") +
scale_fill_manual(values = c("Observed" = "#374e55", "Prediction" = "#79af97")) +
theme_minimal() +
theme(plot.title = element_text(hjust = 0.5))
# 'sum_score' の範囲に基づいて新しいカテゴリ列を作成します
val_imp <- val_imp %>%
mutate(risk_group = case_when(
sum_score >= 0 & sum_score <= 1 ~ "Low risk",
sum_score >= 2 & sum_score <= 3 ~ "Middle risk",
sum_score >= 4 & sum_score <= 5 ~ "High risk",
sum_score == 6 ~ "Very high risk"
))
# グループごとの 'hosp_mortality' の合計と 'n' の合計を計算します
summary_table_val_manual <- val_imp %>%
group_by(risk_group) %>%
summarise(hosp_mortality = sum(hosp_mortality), n = n(), Prediction = mean(predicted_manual)) %>%
mutate(Observed = hosp_mortality / n)
# 各リスクグループのObservedとPredictionの具体的な数値を表示します
summary_table_val_manual %>% select(risk_group, Observed, Prediction)
## # A tibble: 4 × 3
## risk_group Observed Prediction
## <chr> <dbl> <dbl>
## 1 High risk 0.581 0.716
## 2 Low risk 0.0196 0.0257
## 3 Middle risk 0.190 0.245
## 4 Very high risk 0.5 0.962
# 'sum_score' の範囲に基づいて新しいカテゴリ列を作成します
val_imp <- val_imp %>%
mutate(risk_group = case_when(
sum_score >= 0 & sum_score <= 1 ~ "Low risk",
sum_score >= 2 & sum_score <= 3 ~ "Middle risk",
sum_score >= 4 & sum_score <= 6 ~ "High risk"
))
# グループごとの 'hosp_mortality' の合計と 'n' の合計を計算します
summary_table_val_manual <- val_imp %>%
group_by(risk_group) %>%
summarise(hosp_mortality = sum(hosp_mortality), n = n(), Prediction = mean(predicted_manual)) %>%
mutate(Observed = hosp_mortality / n)
# 各リスクグループのObservedとPredictionの具体的な数値を表示します
summary_table_val_manual %>% select(risk_group, Observed, Prediction)
## # A tibble: 3 × 3
## risk_group Observed Prediction
## <chr> <dbl> <dbl>
## 1 High risk 0.576 0.731
## 2 Low risk 0.0196 0.0257
## 3 Middle risk 0.190 0.245
# Load necessary library
library(Hmisc)
# Add standard error and confidence intervals
summary_table_val_manual <- summary_table_val_manual %>%
group_by(risk_group) %>%
mutate(
se = sqrt((Prediction * (1 - Prediction)) / n),
ci_lower = Prediction - qt(1 - (0.05 / 2), df = n - 1) * se,
ci_upper = Prediction + qt(1 - (0.05 / 2), df = n - 1) * se
)
summary_table_val_manual
## # A tibble: 3 × 8
## # Groups: risk_group [3]
## risk_group hosp_mortality n Prediction Observed se ci_lower ci_upper
## <chr> <dbl> <int> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 High risk 19 33 0.731 0.576 0.0772 0.574 0.888
## 2 Low risk 6 306 0.0257 0.0196 0.00904 0.00788 0.0435
## 3 Middle risk 20 105 0.245 0.190 0.0419 0.161 0.328
# For each cut-off point, calculate the metrics
for (cut_off in 0:5) {
# Create a binary variable to separate the group from the others
val_imp$group_binary <- ifelse(val_imp$sum_score >= cut_off, 1, 0)
# Calculate confusion matrix
CM <- table(Predicted = val_imp$group_binary, Actual = val_imp$hosp_mortality)
# If confusion matrix is not 2x2, adjust it
if (any(dim(CM) != c(2, 2))) {
levels_factor <- levels(as.factor(val_imp$hosp_mortality))
CM <- matrix(c(CM, rep(0, 4 - length(CM))), nrow = 2,
dimnames = list(Predicted = c(0, 1), Actual = levels_factor))
}
# Values from confusion matrix
TP <- ifelse(is.na(CM[2, 2]), 0, CM[2, 2])
FP <- ifelse(is.na(CM[2, 1]), 0, CM[2, 1])
TN <- ifelse(is.na(CM[1, 1]), 0, CM[1, 1])
FN <- ifelse(is.na(CM[1, 2]), 0, CM[1, 2])
# Calculate metrics
if ((FP + TN) != 0) {Sp <- TN / (FP + TN)} else {Sp <- NA}
if ((TP + FN) != 0) {Se <- TP / (TP + FN)} else {Se <- NA}
if ((1 - Sp) != 0) {LR_plus <- Se / (1 - Sp)} else {LR_plus <- NA}
if (Sp != 0) {LR_minus <- (1 - Se) / Sp} else {LR_minus <- NA}
# Print results
cat("Cut-off: ≥", cut_off, "\n")
print(CM) # Added here to print the confusion matrix
cat("特異度(Sp): ", Sp, "\n")
cat("感度(Se): ", Se, "\n")
cat("陽性尤度比(LR+): ", LR_plus, "\n")
cat("陰性尤度比(LR-): ", LR_minus, "\n")
cat("真陽性数(TP): ", TP, "\n")
cat("真陰性数(TN): ", TN, "\n")
cat("偽陽性数(FP): ", FP, "\n")
cat("偽陰性数(FN): ", FN, "\n\n")
}
## Cut-off: ≥ 0
## Actual
## Predicted 0 1
## 0 399 0
## 1 45 0
## 特異度(Sp): 0.8986486
## 感度(Se): NA
## 陽性尤度比(LR+): NA
## 陰性尤度比(LR-): NA
## 真陽性数(TP): 0
## 真陰性数(TN): 399
## 偽陽性数(FP): 45
## 偽陰性数(FN): 0
##
## Cut-off: ≥ 1
## Actual
## Predicted 0 1
## 0 197 0
## 1 202 45
## 特異度(Sp): 0.4937343
## 感度(Se): 1
## 陽性尤度比(LR+): 1.975248
## 陰性尤度比(LR-): 0
## 真陽性数(TP): 45
## 真陰性数(TN): 197
## 偽陽性数(FP): 202
## 偽陰性数(FN): 0
##
## Cut-off: ≥ 2
## Actual
## Predicted 0 1
## 0 300 6
## 1 99 39
## 特異度(Sp): 0.7518797
## 感度(Se): 0.8666667
## 陽性尤度比(LR+): 3.492929
## 陰性尤度比(LR-): 0.1773333
## 真陽性数(TP): 39
## 真陰性数(TN): 300
## 偽陽性数(FP): 99
## 偽陰性数(FN): 6
##
## Cut-off: ≥ 3
## Actual
## Predicted 0 1
## 0 352 14
## 1 47 31
## 特異度(Sp): 0.8822055
## 感度(Se): 0.6888889
## 陽性尤度比(LR+): 5.848227
## 陰性尤度比(LR-): 0.3526515
## 真陽性数(TP): 31
## 真陰性数(TN): 352
## 偽陽性数(FP): 47
## 偽陰性数(FN): 14
##
## Cut-off: ≥ 4
## Actual
## Predicted 0 1
## 0 385 26
## 1 14 19
## 特異度(Sp): 0.9649123
## 感度(Se): 0.4222222
## 陽性尤度比(LR+): 12.03333
## 陰性尤度比(LR-): 0.5987879
## 真陽性数(TP): 19
## 真陰性数(TN): 385
## 偽陽性数(FP): 14
## 偽陰性数(FN): 26
##
## Cut-off: ≥ 5
## Actual
## Predicted 0 1
## 0 396 40
## 1 3 5
## 特異度(Sp): 0.9924812
## 感度(Se): 0.1111111
## 陽性尤度比(LR+): 14.77778
## 陰性尤度比(LR-): 0.8956229
## 真陽性数(TP): 5
## 真陰性数(TN): 396
## 偽陽性数(FP): 3
## 偽陰性数(FN): 40
dca_optin <-
decision_curve(hosp_mortality ~ predicted_manual,
data = dev_imp,
policy = "opt-in",
fitted.risk = T,
thresholds = seq(0, 0.5 ,by = 0.05))
# データをプロット
plot_decision_curve(dca_optin,
curve.names = "Model",
legend.position = "topright",
confidence.intervals = F,
standardize = F)
# 初期のモデルのDCAを計算
dca_optin <- decision_curve(hosp_mortality ~ predicted_manual,
data = dev_imp,
policy = "opt-in",
fitted.risk = T,
thresholds = seq(0, 0.5 ,by = 0.05))
# 年齢に基づくロジスティック回帰モデルを作成
age_model <- glm(hosp_mortality ~ age, data = dev_imp, family = binomial)
# 予測値(予測確率)を取得
dev_imp$age_predicted <- predict(age_model, newdata = dev_imp, type = "response")
# 年齢に基づくモデルのDCAを計算
dca_age <- decision_curve(hosp_mortality ~ age_predicted,
data = dev_imp,
policy = "opt-in",
fitted.risk = T,
thresholds = seq(0, 0.5 ,by = 0.05))
# child_numに基づくロジスティック回帰モデルを作成
child_num_model <- glm(hosp_mortality ~ child_num, data = dev_imp, family = binomial)
# 予測値(予測確率)を取得
dev_imp$child_num_predicted <- predict(child_num_model, newdata = dev_imp, type = "response")
# child_numに基づくモデルのDCAを計算
dca_child_num <- decision_curve(hosp_mortality ~ child_num_predicted,
data = dev_imp,
policy = "opt-in",
fitted.risk = T,
thresholds = seq(0, 0.5 ,by = 0.05))
# MELDに基づくロジスティック回帰モデルを作成
meld_model <- glm(hosp_mortality ~ meld, data = dev_imp, family = binomial)
# 予測値(予測確率)を取得
dev_imp$meld_predicted <- predict(meld_model, newdata = dev_imp, type = "response")
# MELDに基づくモデルのDCAを計算
dca_meld <- decision_curve(hosp_mortality ~ meld_predicted,
data = dev_imp,
policy = "opt-in",
fitted.risk = T,
thresholds = seq(0, 0.5 ,by = 0.05))
# すべてのモデルをプロット
plot_decision_curve(list(dca_optin, dca_meld, dca_child_num, dca_age),
curve.names = c("HOPE-EVL score", "MELD score", "Child-Pugh score","Age"),
col = c("#374E55FF","#DF8F44FF","#00A1D5FF","#B24745FF"),
legend.position = "topright",
confidence.intervals = F,
standardize = F)
## Note: When multiple decision curves are plotted, decision curves for 'All' are calculated using the prevalence from the first DecisionCurve object in the list provided.
# 初期のモデルのDCAを計算
dca_optin_val <- decision_curve(hosp_mortality ~ predicted_manual,
data = val_imp,
policy = "opt-in",
fitted.risk = T,
thresholds = seq(0, 0.5 ,by = 0.05))
# 年齢に基づくロジスティック回帰モデルを作成
age_model_val <- glm(hosp_mortality ~ age, data = val_imp, family = binomial)
# 予測値(予測確率)を取得
val_imp$age_predicted <- predict(age_model_val, newdata = val_imp, type = "response")
# 年齢に基づくモデルのDCAを計算
dca_age_val <- decision_curve(hosp_mortality ~ age_predicted,
data = val_imp,
policy = "opt-in",
fitted.risk = T,
thresholds = seq(0, 0.5 ,by = 0.05))
# child_numに基づくロジスティック回帰モデルを作成
child_num_model_val <- glm(hosp_mortality ~ child_num, data = val_imp, family = binomial)
# 予測値(予測確率)を取得
val_imp$child_num_predicted <- predict(child_num_model_val, newdata = val_imp, type = "response")
# child_numに基づくモデルのDCAを計算
dca_child_num_val <- decision_curve(hosp_mortality ~ child_num_predicted,
data = val_imp,
policy = "opt-in",
fitted.risk = T,
thresholds = seq(0, 0.5 ,by = 0.05))
# MELDに基づくロジスティック回帰モデルを作成
meld_model_val <- glm(hosp_mortality ~ meld, data = val_imp, family = binomial)
# 予測値(予測確率)を取得
val_imp$meld_predicted <- predict(meld_model_val, newdata = val_imp, type = "response")
# MELDに基づくモデルのDCAを計算
dca_meld_val <- decision_curve(hosp_mortality ~ meld_predicted,
data = val_imp,
policy = "opt-in",
fitted.risk = T,
thresholds = seq(0, 0.5 ,by = 0.05))
# すべてのモデルをプロット
plot_decision_curve(list(dca_optin_val, dca_meld_val, dca_child_num_val, dca_age_val),
curve.names = c("HOPE-EVL score", "MELD score", "Child-Pugh score","Age"),
col = c("#374E55FF","#DF8F44FF","#00A1D5FF","#B24745FF"),
legend.position = "topright",
confidence.intervals = F,
standardize = F)
## Note: When multiple decision curves are plotted, decision curves for 'All' are calculated using the prevalence from the first DecisionCurve object in the list provided.