#libraryロード
pacman::p_load(tidyverse,readxl,ggplot2,dplyr,tableone,ggpubr,ggrepel,DBI,skimr,dbplyr,pryr,lubridate, survival, survminer,car,svglite,MatchIt, cobalt)
con_mimic <- dbConnect(RSQLite::SQLite(), dbname = "/Volumes/drive/RSQLite/temp/mimic.db")
db_ad <- tbl(con_mimic, "admissions")
db_ad |> colnames()
## [1] "ROW_ID" "SUBJECT_ID" "HADM_ID"
## [4] "ADMITTIME" "DISCHTIME" "DEATHTIME"
## [7] "ADMISSION_TYPE" "ADMISSION_LOCATION" "DISCHARGE_LOCATION"
## [10] "INSURANCE" "LANGUAGE" "RELIGION"
## [13] "MARITAL_STATUS" "ETHNICITY" "EDREGTIME"
## [16] "EDOUTTIME" "DIAGNOSIS" "HOSPITAL_EXPIRE_FLAG"
## [19] "HAS_CHARTEVENTS_DATA"
db_ad |> count()
## # Source: SQL [1 x 1]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
## n
## <int>
## 1 530784
db_pt <- tbl(con_mimic, "patients")
db_pt |> colnames()
## [1] "ROW_ID" "SUBJECT_ID" "GENDER" "DOB" "DOD"
## [6] "DOD_HOSP" "DOD_SSN" "EXPIRE_FLAG"
db_pt |> count()
## # Source: SQL [1 x 1]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
## n
## <int>
## 1 46520
#診断名を確認
db_diag <- tbl(con_mimic, "diagnoses_icd")
db_diag |> colnames()
## [1] "ROW_ID" "SUBJECT_ID" "HADM_ID" "SEQ_NUM" "ICD9_CODE"
db_diag |> head(10)
## # Source: SQL [10 x 5]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
## ROW_ID SUBJECT_ID HADM_ID SEQ_NUM ICD9_CODE
## <dbl> <dbl> <dbl> <dbl> <chr>
## 1 1297 109 172335 1 40301
## 2 1298 109 172335 2 486
## 3 1299 109 172335 3 58281
## 4 1300 109 172335 4 5855
## 5 1301 109 172335 5 4254
## 6 1302 109 172335 6 2762
## 7 1303 109 172335 7 7100
## 8 1304 109 172335 8 2767
## 9 1305 109 172335 9 7243
## 10 1306 109 172335 10 45829
db_d_icd_diag <- tbl(con_mimic, "d_icd_diagnoses")
db_d_icd_diag |> colnames()
## [1] "ROW_ID" "ICD9_CODE" "SHORT_TITLE" "LONG_TITLE"
db_d_icd_diag |> count()
## # Source: SQL [1 x 1]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
## n
## <int>
## 1 14567
db_d_icd_diag |> head(10)
## # Source: SQL [10 x 4]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
## ROW_ID ICD9_CODE SHORT_TITLE LONG_TITLE
## <dbl> <chr> <chr> <chr>
## 1 174 01166 TB pneumonia-oth test Tuberculous pneumonia [any form], …
## 2 175 01170 TB pneumothorax-unspec Tuberculous pneumothorax, unspecif…
## 3 176 01171 TB pneumothorax-no exam Tuberculous pneumothorax, bacterio…
## 4 177 01172 TB pneumothorx-exam unkn Tuberculous pneumothorax, bacterio…
## 5 178 01173 TB pneumothorax-micro dx Tuberculous pneumothorax, tubercle…
## 6 179 01174 TB pneumothorax-cult dx Tuberculous pneumothorax, tubercle…
## 7 180 01175 TB pneumothorax-histo dx Tuberculous pneumothorax, tubercle…
## 8 181 01176 TB pneumothorax-oth test Tuberculous pneumothorax, tubercle…
## 9 182 01180 Pulmonary TB NEC-unspec Other specified pulmonary tubercul…
## 10 183 01181 Pulmonary TB NEC-no exam Other specified pulmonary tubercul…
df_d_icd_diag <- collect(db_d_icd_diag)
# df_d_icd_diag |> View()
hf_candidates <-
df_d_icd_diag |>
filter(str_detect(LONG_TITLE, regex("heart failure", ignore_case = T))) #coll()
icd_hf <-
hf_candidates |>
filter(!str_detect(LONG_TITLE, regex("without heart failure", ignore_case = T))) |>
select(ICD9_CODE) |> pull()
icd_hf
## [1] "39891" "40201" "40211" "40291" "40401" "40403" "40411" "40413" "40491"
## [10] "40493" "4280" "4281" "42820" "42821" "42822" "42823" "42830" "42831"
## [19] "42832" "42833" "42840" "42841" "42842" "42843" "4289"
icd_hf_from_article <-
c(4280, 4281, 4289, 39891, 40201, 40211, 40291, 40401, 40403, 40411, 40413, 40491,
40493, 42820, 42821, 42822, 42823, 42830, 42831, 42832, 42833, 42840, 42841, 42842, 42843)
icd_hf %in% icd_hf_from_article |> as.character()
## [1] "TRUE" "TRUE" "TRUE" "TRUE" "TRUE" "TRUE" "TRUE" "TRUE" "TRUE" "TRUE"
## [11] "TRUE" "TRUE" "TRUE" "TRUE" "TRUE" "TRUE" "TRUE" "TRUE" "TRUE" "TRUE"
## [21] "TRUE" "TRUE" "TRUE" "TRUE" "TRUE"
length(icd_hf)
## [1] 25
length(icd_hf_from_article)
## [1] 25
#HADM_ID for HF
db_diag_hf <-
db_diag |>
filter(ICD9_CODE %in% icd_hf)
db_diag_hf |> count()
## # Source: SQL [1 x 1]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
## n
## <int>
## 1 21274
db_diag_hf |> head(10)
## # Source: SQL [10 x 5]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
## ROW_ID SUBJECT_ID HADM_ID SEQ_NUM ICD9_CODE
## <dbl> <dbl> <dbl> <dbl> <chr>
## 1 1511 115 114585 10 4280
## 2 1527 117 140784 5 4280
## 3 1610 124 138376 12 42833
## 4 1613 124 138376 15 4280
## 5 1671 130 198214 2 4280
## 6 550 68 108329 10 4280
## 7 555 68 170467 4 42820
## 8 556 68 170467 5 4280
## 9 620 77 142768 2 4280
## 10 649 83 158569 2 39891
db_diag_hf |>
distinct(HADM_ID) |>
count()
## # Source: SQL [1 x 1]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
## n
## <int>
## 1 14040
db_diag_hf |>
mutate_if(is.numeric, as.character) |>
skim()
## Applying predicate on the first 100 rows
| Name | mutate_if(db_diag_hf, is…. |
| Number of rows | 21274 |
| Number of columns | 5 |
| _______________________ | |
| Column type frequency: | |
| character | 5 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| ROW_ID | 0 | 1 | 3 | 8 | 0 | 21274 | 0 |
| SUBJECT_ID | 0 | 1 | 3 | 7 | 0 | 10436 | 0 |
| HADM_ID | 0 | 1 | 8 | 8 | 0 | 14040 | 0 |
| SEQ_NUM | 0 | 1 | 3 | 4 | 0 | 36 | 0 |
| ICD9_CODE | 0 | 1 | 4 | 5 | 0 | 25 | 0 |
db_icustay <- tbl(con_mimic, "icustays")
db_icustay |> colnames()
## [1] "ROW_ID" "SUBJECT_ID" "HADM_ID" "ICUSTAY_ID"
## [5] "DBSOURCE" "FIRST_CAREUNIT" "LAST_CAREUNIT" "FIRST_WARDID"
## [9] "LAST_WARDID" "INTIME" "OUTTIME" "LOS"
#一度の入院で何回もICU入室している可能性あるので、HADM_IDで重複を削除する ※ICU_IDで重複削除しても複数回ICU入室しているもの全てuniqueとしてカウントしてしまう。
db_icustay_hadm <-
db_icustay |>
distinct(HADM_ID, .keep_all = T) |>
select(HADM_ID, ICUSTAY_ID)
hadm_icu_vec <-
db_icustay_hadm |>
select(HADM_ID) |>
pull()
icustay_id_vec <-
db_icustay_hadm |>
select(ICUSTAY_ID) |>
pull()
db_diag_hf_icu <-
db_diag_hf |>
filter(HADM_ID %in% hadm_icu_vec)
db_diag_hf_icu |>
mutate_if(is.numeric, as.character) |>
skim()
## Applying predicate on the first 100 rows
| Name | mutate_if(db_diag_hf_icu,… |
| Number of rows | 21061 |
| Number of columns | 5 |
| _______________________ | |
| Column type frequency: | |
| character | 5 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| ROW_ID | 0 | 1 | 3 | 8 | 0 | 21061 | 0 |
| SUBJECT_ID | 0 | 1 | 3 | 7 | 0 | 10405 | 0 |
| HADM_ID | 0 | 1 | 8 | 8 | 0 | 13874 | 0 |
| SEQ_NUM | 0 | 1 | 3 | 4 | 0 | 36 | 0 |
| ICD9_CODE | 0 | 1 | 4 | 5 | 0 | 25 | 0 |
hadm_hf <-
db_diag_hf_icu |>
distinct(HADM_ID) |>
pull(HADM_ID)
length(hadm_hf)
## [1] 13874
#18歳以上に限定
db_ad_pt_age <-
db_ad |>
left_join(db_pt, by = "SUBJECT_ID") |>
mutate(birthday = DOB/(60*60*24),
admitday = ADMITTIME/(60*60*24),
age = (admitday - birthday)/365.25)
db_ad_pt_age |> head(10)
## # Source: SQL [10 x 29]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
## ROW_ID.x SUBJECT_ID HADM_ID ADMITTIME DISCHTIME DEATHTIME ADMISSION_TYPE
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr>
## 1 21 22 165315 7140486360 7140585240 NA EMERGENCY
## 2 22 23 152223 5796170100 5796645000 NA ELECTIVE
## 3 23 23 124321 5926332840 5926917600 NA EMERGENCY
## 4 24 24 161859 5346663240 5346910080 NA EMERGENCY
## 5 25 25 129635 6022260360 6022565700 NA EMERGENCY
## 6 26 26 197661 4933754160 4934358000 NA EMERGENCY
## 7 27 27 134931 7002972960 7003205100 NA NEWBORN
## 8 28 28 162569 6553379700 6553843200 NA ELECTIVE
## 9 29 30 104557 6399353820 6399787020 NA URGENT
## 10 30 31 128652 4375121220 4375782000 4375782000 EMERGENCY
## # ℹ 22 more variables: ADMISSION_LOCATION <chr>, DISCHARGE_LOCATION <chr>,
## # INSURANCE <chr>, LANGUAGE <chr>, RELIGION <chr>, MARITAL_STATUS <chr>,
## # ETHNICITY <chr>, EDREGTIME <dbl>, EDOUTTIME <dbl>, DIAGNOSIS <chr>,
## # HOSPITAL_EXPIRE_FLAG <dbl>, HAS_CHARTEVENTS_DATA <dbl>, ROW_ID.y <dbl>,
## # GENDER <chr>, DOB <dbl>, DOD <dbl>, DOD_HOSP <dbl>, DOD_SSN <dbl>,
## # EXPIRE_FLAG <dbl>, birthday <dbl>, admitday <dbl>, age <dbl>
db_ad_pt_age18 <-
db_ad_pt_age |>
filter(age >= 18) |>
filter(HADM_ID %in% hadm_hf) |>
distinct(SUBJECT_ID, .keep_all = T)
db_ad_pt_age18 |>
mutate_if(is.numeric, as.character) |>
skim()
## Applying predicate on the first 100 rows
| Name | mutate_if(db_ad_pt_age18,… |
| Number of rows | 10390 |
| Number of columns | 29 |
| _______________________ | |
| Column type frequency: | |
| character | 29 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| ROW_ID.x | 0 | 1.00 | 3 | 7 | 0 | 10390 | 0 |
| SUBJECT_ID | 0 | 1.00 | 3 | 7 | 0 | 10390 | 0 |
| HADM_ID | 0 | 1.00 | 8 | 8 | 0 | 10390 | 0 |
| ADMITTIME | 0 | 1.00 | 12 | 12 | 0 | 10381 | 0 |
| DISCHTIME | 0 | 1.00 | 12 | 12 | 0 | 10379 | 0 |
| DEATHTIME | 8834 | 0.15 | 12 | 12 | 0 | 1556 | 0 |
| ADMISSION_TYPE | 0 | 1.00 | 6 | 9 | 0 | 3 | 0 |
| ADMISSION_LOCATION | 0 | 1.00 | 20 | 25 | 0 | 7 | 0 |
| DISCHARGE_LOCATION | 0 | 1.00 | 3 | 25 | 0 | 16 | 0 |
| INSURANCE | 0 | 1.00 | 7 | 10 | 0 | 5 | 0 |
| LANGUAGE | 4276 | 0.59 | 4 | 4 | 0 | 53 | 0 |
| RELIGION | 77 | 0.99 | 5 | 22 | 0 | 19 | 0 |
| MARITAL_STATUS | 439 | 0.96 | 6 | 17 | 0 | 7 | 0 |
| ETHNICITY | 0 | 1.00 | 5 | 56 | 0 | 35 | 0 |
| EDREGTIME | 4574 | 0.56 | 12 | 12 | 0 | 5816 | 0 |
| EDOUTTIME | 4574 | 0.56 | 12 | 12 | 0 | 5816 | 0 |
| DIAGNOSIS | 1 | 1.00 | 2 | 190 | 0 | 4206 | 0 |
| HOSPITAL_EXPIRE_FLAG | 0 | 1.00 | 3 | 3 | 0 | 2 | 0 |
| HAS_CHARTEVENTS_DATA | 0 | 1.00 | 3 | 3 | 0 | 2 | 0 |
| ROW_ID.y | 0 | 1.00 | 3 | 7 | 0 | 10390 | 0 |
| GENDER | 0 | 1.00 | 1 | 1 | 0 | 2 | 0 |
| DOB | 0 | 1.00 | 12 | 13 | 0 | 9448 | 0 |
| DOD | 4223 | 0.59 | 12 | 12 | 0 | 5762 | 0 |
| DOD_HOSP | 6498 | 0.37 | 12 | 12 | 0 | 3722 | 0 |
| DOD_SSN | 5033 | 0.52 | 12 | 12 | 0 | 5055 | 0 |
| EXPIRE_FLAG | 0 | 1.00 | 3 | 3 | 0 | 2 | 0 |
| birthday | 0 | 1.00 | 7 | 8 | 0 | 9448 | 0 |
| admitday | 0 | 1.00 | 7 | 16 | 0 | 10381 | 0 |
| age | 0 | 1.00 | 13 | 16 | 0 | 10089 | 0 |
db_ad_pt_age18 |>
filter(age >150) |>
summarize(count = n())
## # Source: SQL [1 x 1]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
## count
## <int>
## 1 1032
db_ad_pt_age18 |> select(age, GENDER) |>
ggplot() +
geom_histogram(aes(x = age), color = "black", alpha = 0.8, ) +
theme_bw()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
db_ad_pt_age18_mod <-
db_ad_pt_age18 |>
mutate(age = ifelse(age > 100, 95, age))
db_ad_pt_age18_mod |>
select(age, GENDER) |>
ggplot() +
geom_histogram(aes(x = age), color = "black", alpha = 0.8) +
theme_bw()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
#10390データの日付整理したファイル作成
cols <- c("SUBJECT_ID", "HADM_ID", "ICUSTAY_ID", "age", "ADMISSION_TYPE",
"INSURANCE", "ETHNICITY", "DIAGNOSIS",
"HOSPITAL_EXPIRE_FLAG", "GENDER", "EXPIRE_FLAG",
"birthday", "admitday", "dischday", "deathday", "hosp_deathday")
db_patients <-
db_ad_pt_age18_mod |>
left_join(db_icustay_hadm, by = "HADM_ID") |>
mutate(dischday = DISCHTIME/(60*60*24),
hosp_deathday = DEATHTIME/(60*60*24),
deathday = DOD/(60*60*24)) |>
select(cols)
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
## # Was:
## data %>% select(cols)
##
## # Now:
## data %>% select(all_of(cols))
##
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
db_patients |> head(10)
## # Source: SQL [10 x 16]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
## SUBJECT_ID HADM_ID ICUSTAY_ID age ADMISSION_TYPE INSURANCE ETHNICITY
## <dbl> <dbl> <dbl> <dbl> <chr> <chr> <chr>
## 1 3 145834 211552 76.5 EMERGENCY Medicare WHITE
## 2 9 150750 220597 41.8 EMERGENCY Medicaid UNKNOWN/NOT SPE…
## 3 21 109451 217847 87.4 EMERGENCY Medicare WHITE
## 4 26 197661 244882 72.0 EMERGENCY Medicare UNKNOWN/NOT SPE…
## 5 30 104557 225176 95 URGENT Medicare UNKNOWN/NOT SPE…
## 6 34 115799 263086 95 EMERGENCY Medicare WHITE
## 7 37 188670 213503 68.9 EMERGENCY Medicare WHITE
## 8 38 185910 248910 75.9 EMERGENCY Medicare WHITE
## 9 42 119203 210828 61.2 EMERGENCY Private UNKNOWN/NOT SPE…
## 10 49 190539 249195 80.9 ELECTIVE Medicare WHITE
## # ℹ 9 more variables: DIAGNOSIS <chr>, HOSPITAL_EXPIRE_FLAG <dbl>,
## # GENDER <chr>, EXPIRE_FLAG <dbl>, birthday <dbl>, admitday <dbl>,
## # dischday <dbl>, deathday <dbl>, hosp_deathday <dbl>
db_patients |> count()
## # Source: SQL [1 x 1]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
## n
## <int>
## 1 10390
#曝露情報H2RA
db_presc <- tbl(con_mimic,"prescriptions")
db_presc |> colnames()
## [1] "ROW_ID" "SUBJECT_ID" "HADM_ID"
## [4] "ICUSTAY_ID" "STARTDATE" "ENDDATE"
## [7] "DRUG_TYPE" "DRUG" "DRUG_NAME_POE"
## [10] "DRUG_NAME_GENERIC" "FORMULARY_DRUG_CD" "GSN"
## [13] "NDC" "PROD_STRENGTH" "DOSE_VAL_RX"
## [16] "DOSE_UNIT_RX" "FORM_VAL_DISP" "FORM_UNIT_DISP"
## [19] "ROUTE"
db_presc |> head(10)
## # Source: SQL [10 x 19]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
## ROW_ID SUBJECT_ID HADM_ID ICUSTAY_ID STARTDATE ENDDATE DRUG_TYPE DRUG
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr>
## 1 2214776 6 107064 NA 6483110400 6483196800 MAIN Tacrol…
## 2 2214775 6 107064 NA 6483110400 6483196800 MAIN Warfar…
## 3 2215524 6 107064 NA 6483110400 6483196800 MAIN Hepari…
## 4 2216265 6 107064 NA 6483110400 6483196800 BASE D5W
## 5 2214773 6 107064 NA 6483110400 6483196800 MAIN Furose…
## 6 2214774 6 107064 NA 6483110400 6483456000 MAIN Warfar…
## 7 2215525 6 107064 NA 6483196800 6483196800 MAIN Hepari…
## 8 2216266 6 107064 NA 6483196800 6483196800 BASE D5W
## 9 2215526 6 107064 NA 6483196800 6483283200 MAIN Hepari…
## 10 2214778 6 107064 NA 6483196800 6483283200 MAIN Warfar…
## # ℹ 11 more variables: DRUG_NAME_POE <chr>, DRUG_NAME_GENERIC <chr>,
## # FORMULARY_DRUG_CD <chr>, GSN <chr>, NDC <chr>, PROD_STRENGTH <chr>,
## # DOSE_VAL_RX <chr>, DOSE_UNIT_RX <chr>, FORM_VAL_DISP <chr>,
## # FORM_UNIT_DISP <chr>, ROUTE <chr>
db_presc |> count()
## # Source: SQL [1 x 1]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
## n
## <int>
## 1 4156450
col_presc <- c("SUBJECT_ID", "HADM_ID", "ICUSTAY_ID", "DRUG", "DRUG_NAME_POE", "DRUG_NAME_GENERIC", "FORMULARY_DRUG_CD")
db_presc_drug <- db_presc |> select(col_presc)
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
## # Was:
## data %>% select(col_presc)
##
## # Now:
## data %>% select(all_of(col_presc))
##
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
db_presc_drug |> head(10)
## # Source: SQL [10 x 7]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
## SUBJECT_ID HADM_ID ICUSTAY_ID DRUG DRUG_NAME_POE DRUG_NAME_GENERIC
## <dbl> <dbl> <dbl> <chr> <chr> <chr>
## 1 6 107064 NA Tacrolimus Tacrolimus Tacrolimus
## 2 6 107064 NA Warfarin Warfarin Warfarin
## 3 6 107064 NA Heparin Sodium <NA> <NA>
## 4 6 107064 NA D5W <NA> <NA>
## 5 6 107064 NA Furosemide Furosemide Furosemide
## 6 6 107064 NA Warfarin Warfarin Warfarin
## 7 6 107064 NA Heparin Sodium <NA> <NA>
## 8 6 107064 NA D5W <NA> <NA>
## 9 6 107064 NA Heparin Sodium <NA> <NA>
## 10 6 107064 NA Warfarin Warfarin Warfarin
## # ℹ 1 more variable: FORMULARY_DRUG_CD <chr>
drugs <-
db_presc_drug |>
distinct(DRUG, DRUG_NAME_POE, DRUG_NAME_GENERIC, FORMULARY_DRUG_CD)
drugs |> count()
## # Source: SQL [1 x 1]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
## n
## <int>
## 1 8575
drugs |> head(10)
## # Source: SQL [10 x 4]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
## DRUG DRUG_NAME_POE DRUG_NAME_GENERIC FORMULARY_DRUG_CD
## <chr> <chr> <chr> <chr>
## 1 Tacrolimus Tacrolimus Tacrolimus TACR1
## 2 Warfarin Warfarin Warfarin WARF5
## 3 Heparin Sodium <NA> <NA> HEPAPREMIX
## 4 D5W <NA> <NA> HEPBASE
## 5 Furosemide Furosemide Furosemide FURO20
## 6 Warfarin Warfarin Warfarin WARF0
## 7 Warfarin Warfarin Warfarin WARF2
## 8 Tacrolimus Tacrolimus Tacrolimus TACR5
## 9 Mycophenolate Mofetil Mycophenolate Mofe… Mycophenolate Mo… MYCO500T
## 10 Mycophenolate Mofetil Mycophenolate Mofe… Mycophenolate Mo… MYCO250
df_drugs <- drugs |> collect()
df_drugs |> head(20)
## # A tibble: 20 × 4
## DRUG DRUG_NAME_POE DRUG_NAME_GENERIC FORMULARY_DRUG_CD
## <chr> <chr> <chr> <chr>
## 1 Tacrolimus Tacrolimus Tacrolimus TACR1
## 2 Warfarin Warfarin Warfarin WARF5
## 3 Heparin Sodium <NA> <NA> HEPAPREMIX
## 4 D5W <NA> <NA> HEPBASE
## 5 Furosemide Furosemide Furosemide FURO20
## 6 Warfarin Warfarin Warfarin WARF0
## 7 Warfarin Warfarin Warfarin WARF2
## 8 Tacrolimus Tacrolimus Tacrolimus TACR5
## 9 Mycophenolate Mofetil Mycophenolate Mofe… Mycophenolate Mo… MYCO500T
## 10 Mycophenolate Mofetil Mycophenolate Mofe… Mycophenolate Mo… MYCO250
## 11 Neutra-Phos Neutra-Phos Neutra-Phos NEUT
## 12 Nitroglycerin <NA> <NA> NTG100PB
## 13 Docusate Sodium Docusate Sodium Docusate Sodium DOCU100
## 14 Insulin Insulin Insulin INSULIN
## 15 Atropine Sulfate Atropine Sulfate Atropine Sulfate ATRO1I
## 16 Zolpidem Tartrate Zolpidem Tartrate Zolpidem Tartrate AMBI5
## 17 Midazolam HCl Midazolam HCl Midazolam HCl MIDA2I
## 18 Nitroglycerin SL Nitroglycerin SL Nitroglycerin SL NTG3SL
## 19 Lorazepam Lorazepam Lorazepam LORA2I
## 20 Magnesium Sulfate Magnesium Sulfate Magnesium Sulfate MAGS1I
col_h2 <-
df_drugs |>
filter(str_detect(DRUG, regex("tidine", ignore_case = T))|
str_detect(DRUG_NAME_POE, regex("tidine", ignore_case = T))|
str_detect(DRUG_NAME_GENERIC, regex("tidine", ignore_case = T))
) |>
select(DRUG) |> pull() |> unique()
col_h2 |> dput()
## c("Ranitidine", "Famotidine", "Famotidine (IV)", "Ranitidine (Liquid)",
## "Cimetidine", "NEO*PO*Ranitidine", "Ranitidine HCl", "Famotidine (PO)",
## "Azacitidine", "Claritin", "Loratadine", "Azacitidine (Subcut)",
## "CLARITIN", "Ranitidine Hcl", "Famoti", "Famo", "Nizatidine",
## "famotidine", "Cimetidine HCl")
vec_h2ras <- c("Cimetidine", "Cimetidine HCl", "Famo", "Famoti", "Famotidine",
"Famotidine (IV)", "Famotidine (PO)", "NEO*PO*Ranitidine",
"Nizatidine", "Ranitidine", "Ranitidine (Liquid)", "Ranitidine HCl",
"Ranitidine Hcl", "famotidine",
"Zantac 75", "Zantac Maximum Strength")
H2RAs_user <-
db_presc_drug |>
filter(DRUG %in% vec_h2ras) |>
select(SUBJECT_ID) |>
pull()
db_patients_h2 <-
db_patients |>
mutate(h2ras = ifelse(SUBJECT_ID %in% H2RAs_user, 1, 0),
.after = ICUSTAY_ID)
db_patients_h2 |> head(10)
## # Source: SQL [10 x 17]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
## SUBJECT_ID HADM_ID ICUSTAY_ID h2ras age ADMISSION_TYPE INSURANCE ETHNICITY
## <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr> <chr>
## 1 3 145834 211552 0 76.5 EMERGENCY Medicare WHITE
## 2 9 150750 220597 0 41.8 EMERGENCY Medicaid UNKNOWN/N…
## 3 21 109451 217847 0 87.4 EMERGENCY Medicare WHITE
## 4 26 197661 244882 0 72.0 EMERGENCY Medicare UNKNOWN/N…
## 5 30 104557 225176 0 95 URGENT Medicare UNKNOWN/N…
## 6 34 115799 263086 0 95 EMERGENCY Medicare WHITE
## 7 37 188670 213503 0 68.9 EMERGENCY Medicare WHITE
## 8 38 185910 248910 1 75.9 EMERGENCY Medicare WHITE
## 9 42 119203 210828 0 61.2 EMERGENCY Private UNKNOWN/N…
## 10 49 190539 249195 1 80.9 ELECTIVE Medicare WHITE
## # ℹ 9 more variables: DIAGNOSIS <chr>, HOSPITAL_EXPIRE_FLAG <dbl>,
## # GENDER <chr>, EXPIRE_FLAG <dbl>, birthday <dbl>, admitday <dbl>,
## # dischday <dbl>, deathday <dbl>, hosp_deathday <dbl>
db_patients_h2 |> count(h2ras)
## # Source: SQL [2 x 2]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
## h2ras n
## <dbl> <int>
## 1 0 5949
## 2 1 4441
db_PEO <-
db_patients_h2 |>
mutate(death_elasp = deathday - admitday,
mortality30 = ifelse(death_elasp <= 30, 1, 0),
# hospital_death = HOSPITAL_EXPIRE_FLAG,
.after = h2ras) |>
mutate(INSURANCE = ifelse(INSURANCE == "Medicaid", "Medicare", INSURANCE)) |>
# mutate(mortality30 = coalesce(mortality30, 0))
replace_na(list(mortality30 = 0))
db_PEO <- db_PEO |> compute()
db_PEO |> head(10)
## # Source: SQL [10 x 19]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
## SUBJECT_ID HADM_ID ICUSTAY_ID h2ras death_elasp mortality30 age
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 3 145834 211552 0 236. 0 76.5
## 2 9 150750 220597 0 4.45 1 41.8
## 3 21 109451 217847 0 149. 0 87.4
## 4 26 197661 244882 0 659. 0 72.0
## 5 30 104557 225176 0 NA 0 95
## 6 34 115799 263086 0 2021. 0 95
## 7 37 188670 213503 0 503. 0 68.9
## 8 38 185910 248910 1 NA 0 75.9
## 9 42 119203 210828 0 1999. 0 61.2
## 10 49 190539 249195 1 NA 0 80.9
## # ℹ 12 more variables: ADMISSION_TYPE <chr>, INSURANCE <chr>, ETHNICITY <chr>,
## # DIAGNOSIS <chr>, HOSPITAL_EXPIRE_FLAG <dbl>, GENDER <chr>,
## # EXPIRE_FLAG <dbl>, birthday <dbl>, admitday <dbl>, dischday <dbl>,
## # deathday <dbl>, hosp_deathday <dbl>
db_heightweight <- tbl(con_mimic, "heightweight")
db_heightFirst <- tbl(con_mimic, "heightfirstday")
db_weightFirst <- tbl(con_mimic, "weightfirstday")
db_weightDuration <- tbl(con_mimic, "weightduration")
db_pivot_height <- tbl(con_mimic, "pivoted_height") #subject_id
db_heightweight |> count()
## # Source: SQL [1 x 1]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
## n
## <int>
## 1 61532
db_weightDuration1 <-
db_weightDuration |>
arrange(icustay_id, starttime) |>
distinct(icustay_id, .keep_all = T) |>
rename(weight_duration = weight) |>
select(icustay_id, weight_duration)
db_pivot_height1 <-
db_pivot_height |> distinct(subject_id, .keep_all = T) |>
rename(height_pivot = height) |>
select(subject_id, height_pivot)
db_pivot_height1
## # Source: SQL [?? x 2]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
## subject_id height_pivot
## <dbl> <dbl>
## 1 3 178.
## 2 4 173.
## 3 8 49
## 4 10 38
## 5 17 165.
## 6 18 183.
## 7 21 185.
## 8 24 185.
## 9 25 183.
## 10 26 188.
## # ℹ more rows
db_hw_1 <-
db_PEO |>
left_join(db_heightweight, by = c("ICUSTAY_ID" = "icustay_id"), copy = T)
db_hw_1 <- db_hw_1 |> compute()
db_hw_2 <-
db_hw_1 |>
left_join(db_heightFirst, by = c("ICUSTAY_ID" = "icustay_id"), copy = T)
db_hw_2 <- db_hw_2 |> compute()
db_hw_3 <-
db_hw_2 |>
left_join(db_weightFirst, by = c("ICUSTAY_ID" = "icustay_id"), copy = T)
db_hw_3 <- db_hw_3 |> compute()
db_hw_4 <-
db_hw_3 |>
left_join(db_weightDuration1, by = c("ICUSTAY_ID" = "icustay_id"), copy = T)
db_hw_4 <- db_hw_4 |> compute()
## Warning: ORDER BY is ignored in subqueries without LIMIT
## ℹ Do you need to move arrange() later in the pipeline or use window_order() instead?
db_hw_4 |> count()
## # Source: SQL [1 x 1]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
## n
## <int>
## 1 10390
db_PEO_HtWt <-
db_hw_4 |>
mutate(
Height = case_when(
!is.na(height_first) ~ height_first,
!is.na(Height) ~ Height,
!is.na(Height_echo) ~ Height_echo,
!is.na(Height_chart) ~ Height_chart),
#ここでのcase_whenは、height_firstがNAでなければheight_firstを、height_firstがNAならばHeightを、HeightもNAならばHeight_echoを、Height_echoもNAならばHeight_chartを、Height_chartもNAならば、NAが入力されるという意味
#height_firstがNAでなければheight_firstが採用され、case_whenは終了となる。(case_whenは与えられた条件を順番に採用する)
weight = case_when(
!is.na(weight_first) ~ weight_first,
!is.na(weight_admit) ~weight_admit,
!is.na(weight) ~ weight,
!is.na(weight_duration) ~ weight_duration,
!is.na(weight_echoinhosp) ~ weight_echoinhosp,
!is.na(weight_daily) ~ weight_daily
)
) |>
select(!c("weight_first", "weight_min", "weight_max", "height_first", "height_min",
"height_max", "Height_chart", "Height_echo", "weight_admit",
"weight_daily", "weight_echoinhosp", "weight_echoprehosp", "weight_duration"))
db_PEO_HtWt |> count()
## # Source: SQL [1 x 1]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
## n
## <int>
## 1 10390
db_PEO_HtWt1 <-
db_PEO_HtWt |>
left_join(db_pivot_height1, by = c("SUBJECT_ID" = "subject_id"), copy = T) |>
mutate(Height = ifelse(!is.na(Height), Height, height_pivot)) |>
select(!c(height_pivot))
db_PEO_HtWt1 <- db_PEO_HtWt1 |> compute()
db_PEO_HtWt2 <-
db_PEO_HtWt1 |>
mutate(Height = ifelse(Height < 250 & Height > 80, Height, NA),
weight = ifelse(weight > 10, weight, NA)) |>
mutate(BMI = weight/(Height/100)^2)
db_PEO_HtWt2 <- db_PEO_HtWt2 |> compute()
db_HtWt <- db_PEO_HtWt2 |>
select("HADM_ID", "weight", "Height", "BMI")
db_HtWt <- db_HtWt |> compute()
db_vital <- tbl(con_mimic, "pivoted_vitals" )
db_vital |> count()
## # Source: SQL [1 x 1]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
## n
## <int>
## 1 9152810
icustay_id_vec |> length()
## [1] 57786
db_vital_target <- db_vital |> filter(icustay_id %in% icustay_id_vec)
db_vital_target |> head(10)
## # Source: SQL [10 x 10]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
## icustay_id charttime HeartRate SysBP DiasBP MeanBP RespRate TempC SpO2
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 226115 6134934600 NA NA NA NA NA NA NA
## 2 226403 7056760620 NA NA NA NA 38 NA NA
## 3 226651 5053984200 NA NA NA NA NA NA NA
## 4 226656 4901881680 NA NA NA NA NA NA NA
## 5 226889 5308514400 NA NA NA NA NA NA NA
## 6 227129 6998653080 NA NA NA NA NA NA NA
## 7 227347 6997553760 NA NA NA NA NA NA NA
## 8 227368 5121366300 NA NA NA NA NA NA NA
## 9 227489 6517883340 NA NA NA NA NA NA NA
## 10 227865 7199496660 NA NA NA NA NA NA NA
## # ℹ 1 more variable: Glucose <dbl>
db_vital_target |> mutate_if(is.numeric, as.character) |> skim()
## Applying predicate on the first 100 rows
| Name | mutate_if(db_vital_target… |
| Number of rows | 8439687 |
| Number of columns | 10 |
| _______________________ | |
| Column type frequency: | |
| character | 10 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| icustay_id | 0 | 1.00 | 8 | 8 | 0 | 56696 | 0 |
| charttime | 0 | 1.00 | 12 | 12 | 0 | 3126810 | 0 |
| HeartRate | 1119085 | 0.87 | 3 | 17 | 0 | 419 | 0 |
| SysBP | 3209874 | 0.62 | 3 | 18 | 0 | 726 | 0 |
| DiasBP | 3211509 | 0.62 | 3 | 17 | 0 | 539 | 0 |
| MeanBP | 3198031 | 0.62 | 3 | 17 | 0 | 2907 | 0 |
| RespRate | 2778181 | 0.67 | 3 | 17 | 0 | 244 | 0 |
| TempC | 6852744 | 0.19 | 4 | 16 | 0 | 2088 | 0 |
| SpO2 | 2943503 | 0.65 | 3 | 17 | 0 | 123 | 0 |
| Glucose | 7281949 | 0.14 | 3 | 16 | 0 | 1724 | 0 |
#バイタル毎にicustay_idに紐づくデータを抽出する。 1入室中に複数のバイタルデータがあることがほとんどだが、どの時点でのデータを抽出するかは論文未記載(たぶん)。 以下では入室最初のデータを採用したが、平均値・中央値にすることもあり得る。 (以下のコードをcompute()なしで回してみると、compute()の意味が分かると思います。)
db_HR <-
db_vital_target |>
select(icustay_id, charttime, HeartRate) |>
filter(!is.na(HeartRate)) |>
group_by(icustay_id) |>
summarize(HeartRate = case_when(min(charttime) ~ HeartRate)) |>
compute()
## Warning: Missing values are always removed in SQL aggregation functions.
## Use `na.rm = TRUE` to silence this warning
## This warning is displayed once every 8 hours.
db_HR |> head(10)
## # Source: SQL [10 x 2]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
## icustay_id HeartRate
## <dbl> <dbl>
## 1 200001 115
## 2 200003 132
## 3 200006 84
## 4 200007 90
## 5 200009 92
## 6 200010 109
## 7 200011 77
## 8 200012 109
## 9 200014 71
## 10 200016 67
db_HR|>mutate_if(is.numeric,as.character)|>skim()
## Applying predicate on the first 100 rows
| Name | mutate_if(db_HR, is.numer… |
| Number of rows | 56566 |
| Number of columns | 2 |
| _______________________ | |
| Column type frequency: | |
| character | 2 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| icustay_id | 0 | 1 | 8 | 8 | 0 | 56566 | 0 |
| HeartRate | 0 | 1 | 3 | 16 | 0 | 190 | 0 |
db_sBP <-
db_vital_target |>
select(icustay_id, charttime,SysBP) |>
filter(!is.na(SysBP)) |>
group_by(icustay_id) |>
summarize(SysBP = case_when(min(charttime) ~ SysBP)) |>
compute()
db_sBP |> mutate_if(is.numeric, as.character) |> skim()
## Applying predicate on the first 100 rows
| Name | mutate_if(db_sBP, is.nume… |
| Number of rows | 48901 |
| Number of columns | 2 |
| _______________________ | |
| Column type frequency: | |
| character | 2 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| icustay_id | 0 | 1 | 8 | 8 | 0 | 48901 | 0 |
| SysBP | 0 | 1 | 4 | 16 | 0 | 346 | 0 |
db_dBP <-
db_vital_target |>
select(icustay_id, charttime,DiasBP) |>
filter(!is.na(DiasBP)) |>
group_by(icustay_id) |>
summarize(DiasBP = case_when(min(charttime) ~ DiasBP)) |>
compute()
db_mBP <-
db_vital_target |>
select(icustay_id, charttime,MeanBP) |>
filter(!is.na(MeanBP)) |>
group_by(icustay_id) |>
summarize(MeanBP = case_when(min(charttime) ~ MeanBP)) |>
compute()
db_mBP |> head(10)
## # Source: SQL [10 x 2]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
## icustay_id MeanBP
## <dbl> <dbl>
## 1 200001 77
## 2 200003 73
## 3 200006 97.3
## 4 200007 87.7
## 5 200009 100
## 6 200010 92
## 7 200011 73
## 8 200012 84
## 9 200014 89
## 10 200016 93
db_RR <-
db_vital_target |>
select(icustay_id, charttime,RespRate) |>
filter(!is.na(RespRate)) |>
group_by(icustay_id) |>
summarize(RespRate = case_when(min(charttime) ~ RespRate)) |>
compute()
db_tmpc <-
db_vital_target |>
select(icustay_id, charttime,TempC) |>
filter(!is.na(TempC)) |>
group_by(icustay_id) |>
summarize(TempC = case_when(min(charttime) ~ TempC)) |>
compute()
db_spo2 <-
db_vital_target |>
select(icustay_id, charttime,SpO2) |>
filter(!is.na(SpO2)) |>
group_by(icustay_id) |>
summarize(SpO2 = case_when(min(charttime) ~ SpO2)) |>
compute()
#バイタルデータを結合する
db_vitals_first <-
db_HR |>
left_join(db_sBP, by = "icustay_id") |>
left_join(db_dBP, by = "icustay_id") |>
left_join(db_mBP, by = "icustay_id") |>
left_join(db_RR, by = "icustay_id") |>
left_join(db_tmpc, by = "icustay_id") |>
left_join(db_spo2, by = "icustay_id")
db_vitals_first |> head(10)
## # Source: SQL [10 x 8]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
## icustay_id HeartRate SysBP DiasBP MeanBP RespRate TempC SpO2
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 200001 115 113 65 77 22 37.3 94
## 2 200003 132 106 68 73 25 39.6 97
## 3 200006 84 118 87 97.3 20 36.2 100
## 4 200007 90 121 71 87.7 13 36.4 97
## 5 200009 92 125 84 100 14 34.7 100
## 6 200010 109 138 78 92 17 36.7 98
## 7 200011 77 137 62 73 31 36.8 98
## 8 200012 109 104 79 84 15 37.5 96
## 9 200014 71 133 67 89 10 35.7 100
## 10 200016 67 126 69 93 14 35.1 100
left_joinなのでdb_PEOにあるicustay_idのみ結合され、それ以外は削除される
db_vitals_first <- db_vitals_first |> compute()
db_PEO_vital <-
db_PEO |>
left_join(db_vitals_first, by = c("ICUSTAY_ID" = "icustay_id"),copy = TRUE)
db_PEO_vital <- db_PEO_vital |> compute()
db_PEO_vital |> count()
## # Source: SQL [1 x 1]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
## n
## <int>
## 1 10390
cols_vital <- c("HeartRate", "SysBP", "DiasBP", "MeanBP", "RespRate", "TempC", "SpO2")
db_vital <-
db_PEO_vital |>
select("HADM_ID", cols_vital)
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
## # Was:
## data %>% select(cols_vital)
##
## # Now:
## data %>% select(all_of(cols_vital))
##
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
db_vital <- db_vital |> compute()
#urine output
#db_pivot_urine <- tbl(con_mimic, "pivot_urine_output")
db_urine_out <- tbl(con_mimic, "urineoutput")
db_urine_firstday <- tbl(con_mimic, "urine_output_firstday")
#db_pivot_urine: icustya_id・時刻ごとの尿量が記載されている。一日の尿量に直す必要がある。 時間を秒単位から日単位に修正し、 各icustay_idの入室時刻をもとに、 そこから1日(24時間)後までのデータから尿量を計算する。 (コードは結構乱雑です)
db_urine_out |> head(10)
## # Source: SQL [10 x 3]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
## icustay_id charttime value
## <dbl> <dbl> <dbl>
## 1 262400 6803121600 720
## 2 201728 5799682800 330
## 3 268544 4561884000 270
## 4 269568 6926698800 27
## 5 270592 4923151200 7
## 6 270592 4923162000 27
## 7 270848 6946108200 48
## 8 271616 4766220000 7
## 9 271616 4768909200 8
## 10 271616 4765392000 9
db_urine_out |> arrange(icustay_id)
## # Source: SQL [?? x 3]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
## # Ordered by: icustay_id
## icustay_id charttime value
## <dbl> <dbl> <dbl>
## 1 200001 6687221700 50
## 2 200001 6687007320 250
## 3 200001 6687089220 60
## 4 200003 7245248400 170
## 5 200003 7245100800 230
## 6 200003 7245360000 420
## 7 200003 7245432000 370
## 8 200003 7245085500 360
## 9 200003 7245082800 275
## 10 200003 7245482400 40
## # ℹ more rows
db_urine1 <-
db_urine_out |> filter(!is.na(icustay_id)) |>
mutate(charttime = charttime/(60*60*24))
db_urine1 |>arrange(icustay_id)|> head(10)
## # Source: SQL [10 x 3]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
## # Ordered by: icustay_id
## icustay_id charttime value
## <dbl> <dbl> <dbl>
## 1 200001 77398. 50
## 2 200001 77396. 250
## 3 200001 77397. 60
## 4 200003 83857. 170
## 5 200003 83855. 230
## 6 200003 83858. 420
## 7 200003 83859. 370
## 8 200003 83855. 360
## 9 200003 83855. 275
## 10 200003 83860. 40
db_urine_time0 <-
db_urine1 |>
group_by(icustay_id) |>
summarize(time0 = min(charttime))
db_urine_time0 |> head(10)
## # Source: SQL [10 x 2]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
## icustay_id time0
## <dbl> <dbl>
## 1 200001 77396.
## 2 200003 83855.
## 3 200006 69277.
## 4 200007 50817.
## 5 200009 80323.
## 6 200010 59386.
## 7 200011 79841.
## 8 200012 67196.
## 9 200014 49355.
## 10 200016 66080.
#1日(24時間)以内の尿量(value)のデータを抽出する
db_urine2 <-
db_urine1 |>
left_join(db_urine_time0, by = "icustay_id") |>
mutate(elaspe = charttime - time0) |>
filter(elaspe <= 1) |>
compute()
db_urine2 |> arrange(icustay_id)|>head(10)
## # Source: SQL [10 x 5]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
## # Ordered by: icustay_id
## icustay_id charttime value time0 elaspe
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 200001 77396. 250 77396. 0
## 2 200001 77397. 60 77396. 0.948
## 3 200003 83855. 230 83855. 0.490
## 4 200003 83855. 360 83855. 0.312
## 5 200003 83855. 275 83855. 0.281
## 6 200003 83855. 320 83855. 0.323
## 7 200003 83855. 250 83855. 0.531
## 8 200003 83856. 220 83855. 0.948
## 9 200003 83855. 230 83855. 0
## 10 200003 83856. 130 83855. 0.698
#1日(24時間)以内の尿量の合計をみる
db_urine3 <-
db_urine2 |>
group_by(icustay_id) |>
summarize(value = sum(value))
db_urine3 |> arrange(icustay_id)|>head(10)
## # Source: SQL [10 x 2]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
## # Ordered by: icustay_id
## icustay_id value
## <dbl> <dbl>
## 1 200001 310
## 2 200003 3732
## 3 200006 2355
## 4 200007 1295
## 5 200009 2170
## 6 200010 2050
## 7 200011 3085
## 8 200012 1200
## 9 200014 844
## 10 200016 1275
db_urine3 <- db_urine3 |> compute()
db_urine3 |> head(10)
## # Source: SQL [10 x 2]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
## icustay_id value
## <dbl> <dbl>
## 1 200001 310
## 2 200003 3732
## 3 200006 2355
## 4 200007 1295
## 5 200009 2170
## 6 200010 2050
## 7 200011 3085
## 8 200012 1200
## 9 200014 844
## 10 200016 1275
#db_urine_firstday: 初日のデータのみなので処理は楽。
db_urine_firstday |>head(10)
## # Source: SQL [10 x 4]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
## subject_id hadm_id icustay_id urineoutput
## <dbl> <dbl> <dbl> <dbl>
## 1 120 188923 273725 NA
## 2 1007 141227 267926 NA
## 3 1137 120833 281449 NA
## 4 1319 197140 276358 NA
## 5 1662 180788 207701 NA
## 6 1718 121066 238595 NA
## 7 1855 142848 298888 NA
## 8 1860 195382 283907 NA
## 9 1991 187693 285178 NA
## 10 2061 174783 256004 NA
#is.na(db_urine_firstday$urineoutput) |> sum()ではdbへアクセスできないため、使えない。
#使いたいならcollect()を使う。
db_urine_firstday |>mutate_if(is.numeric,as.character)|>skim()
## Applying predicate on the first 100 rows
| Name | mutate_if(db_urine_firstd… |
| Number of rows | 53359 |
| Number of columns | 4 |
| _______________________ | |
| Column type frequency: | |
| character | 4 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| subject_id | 0 | 1 | 3 | 7 | 0 | 41338 | 0 |
| hadm_id | 0 | 1 | 8 | 8 | 0 | 50763 | 0 |
| icustay_id | 0 | 1 | 8 | 8 | 0 | 53359 | 0 |
| urineoutput | 158 | 1 | 3 | 13 | 0 | 4357 | 0 |
db_urine_firstday1 <-
db_urine_firstday |>
select(icustay_id, urineoutput)
is.na(db_urine_firstday1$urineoutput) |> sum()
## [1] 0
db_urine_firstday1 |>arrange(icustay_id)|> head(10)
## # Source: SQL [10 x 2]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
## # Ordered by: icustay_id
## icustay_id urineoutput
## <dbl> <dbl>
## 1 200001 250
## 2 200003 3652
## 3 200006 1955
## 4 200007 1295
## 5 200009 1570
## 6 200010 2050
## 7 200011 3085
## 8 200012 1200
## 9 200014 664
## 10 200016 1275
#3つのファイルをdb_PEOにマージして、 どのファイルのデータを優先して使うかを決めたうえでデータを固定する(論文未記載)
db_u1 <-
db_PEO |>
left_join(db_urine_firstday1, by = c("ICUSTAY_ID" = "icustay_id"),copy = TRUE) |>
compute()
db_u2 <-
db_u1 |>
left_join(db_urine3, by = c("ICUSTAY_ID" = "icustay_id"),copy = TRUE) |>
compute()
db_u2 |> mutate_if(is.numeric,as.character)|>skim()
## Applying predicate on the first 100 rows
| Name | mutate_if(db_u2, is.numer… |
| Number of rows | 10390 |
| Number of columns | 21 |
| _______________________ | |
| Column type frequency: | |
| character | 21 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| SUBJECT_ID | 0 | 1.00 | 3 | 7 | 0 | 10390 | 0 |
| HADM_ID | 0 | 1.00 | 8 | 8 | 0 | 10390 | 0 |
| ICUSTAY_ID | 0 | 1.00 | 8 | 8 | 0 | 10390 | 0 |
| h2ras | 0 | 1.00 | 3 | 3 | 0 | 2 | 0 |
| death_elasp | 4223 | 0.59 | 3 | 20 | 0 | 6107 | 0 |
| mortality30 | 0 | 1.00 | 3 | 3 | 0 | 2 | 0 |
| age | 0 | 1.00 | 4 | 16 | 0 | 9355 | 0 |
| ADMISSION_TYPE | 0 | 1.00 | 6 | 9 | 0 | 3 | 0 |
| INSURANCE | 0 | 1.00 | 7 | 10 | 0 | 4 | 0 |
| ETHNICITY | 0 | 1.00 | 5 | 56 | 0 | 35 | 0 |
| DIAGNOSIS | 1 | 1.00 | 2 | 190 | 0 | 4206 | 0 |
| HOSPITAL_EXPIRE_FLAG | 0 | 1.00 | 3 | 3 | 0 | 2 | 0 |
| GENDER | 0 | 1.00 | 1 | 1 | 0 | 2 | 0 |
| EXPIRE_FLAG | 0 | 1.00 | 3 | 3 | 0 | 2 | 0 |
| birthday | 0 | 1.00 | 7 | 8 | 0 | 9448 | 0 |
| admitday | 0 | 1.00 | 7 | 16 | 0 | 10381 | 0 |
| dischday | 0 | 1.00 | 7 | 16 | 0 | 10379 | 0 |
| deathday | 4223 | 0.59 | 7 | 7 | 0 | 5762 | 0 |
| hosp_deathday | 8834 | 0.15 | 7 | 16 | 0 | 1556 | 0 |
| urineoutput | 480 | 0.95 | 3 | 11 | 0 | 2693 | 0 |
| value | 416 | 0.96 | 3 | 11 | 0 | 2941 | 0 |
db_PEO_urine <-
db_u2 |>
mutate(urine_out = case_when(
!is.na(urineoutput) ~ urineoutput,
!is.na(value) ~ value
))
db_PEO_urine|>mutate_if(is.numeric,as.character)|>skim()
## Applying predicate on the first 100 rows
| Name | mutate_if(db_PEO_urine, i… |
| Number of rows | 10390 |
| Number of columns | 22 |
| _______________________ | |
| Column type frequency: | |
| character | 22 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| SUBJECT_ID | 0 | 1.00 | 3 | 7 | 0 | 10390 | 0 |
| HADM_ID | 0 | 1.00 | 8 | 8 | 0 | 10390 | 0 |
| ICUSTAY_ID | 0 | 1.00 | 8 | 8 | 0 | 10390 | 0 |
| h2ras | 0 | 1.00 | 3 | 3 | 0 | 2 | 0 |
| death_elasp | 4223 | 0.59 | 3 | 20 | 0 | 6107 | 0 |
| mortality30 | 0 | 1.00 | 3 | 3 | 0 | 2 | 0 |
| age | 0 | 1.00 | 4 | 16 | 0 | 9355 | 0 |
| ADMISSION_TYPE | 0 | 1.00 | 6 | 9 | 0 | 3 | 0 |
| INSURANCE | 0 | 1.00 | 7 | 10 | 0 | 4 | 0 |
| ETHNICITY | 0 | 1.00 | 5 | 56 | 0 | 35 | 0 |
| DIAGNOSIS | 1 | 1.00 | 2 | 190 | 0 | 4206 | 0 |
| HOSPITAL_EXPIRE_FLAG | 0 | 1.00 | 3 | 3 | 0 | 2 | 0 |
| GENDER | 0 | 1.00 | 1 | 1 | 0 | 2 | 0 |
| EXPIRE_FLAG | 0 | 1.00 | 3 | 3 | 0 | 2 | 0 |
| birthday | 0 | 1.00 | 7 | 8 | 0 | 9448 | 0 |
| admitday | 0 | 1.00 | 7 | 16 | 0 | 10381 | 0 |
| dischday | 0 | 1.00 | 7 | 16 | 0 | 10379 | 0 |
| deathday | 4223 | 0.59 | 7 | 7 | 0 | 5762 | 0 |
| hosp_deathday | 8834 | 0.15 | 7 | 16 | 0 | 1556 | 0 |
| urineoutput | 480 | 0.95 | 3 | 11 | 0 | 2693 | 0 |
| value | 416 | 0.96 | 3 | 11 | 0 | 2941 | 0 |
| urine_out | 416 | 0.96 | 3 | 11 | 0 | 2695 | 0 |
db_PEO_urine <- db_PEO_urine |> compute()
db_PEO_urine |> mutate_if(is.numeric,as.character)|>skim()
## Applying predicate on the first 100 rows
| Name | mutate_if(db_PEO_urine, i… |
| Number of rows | 10390 |
| Number of columns | 22 |
| _______________________ | |
| Column type frequency: | |
| character | 22 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| SUBJECT_ID | 0 | 1.00 | 3 | 7 | 0 | 10390 | 0 |
| HADM_ID | 0 | 1.00 | 8 | 8 | 0 | 10390 | 0 |
| ICUSTAY_ID | 0 | 1.00 | 8 | 8 | 0 | 10390 | 0 |
| h2ras | 0 | 1.00 | 3 | 3 | 0 | 2 | 0 |
| death_elasp | 4223 | 0.59 | 3 | 20 | 0 | 6107 | 0 |
| mortality30 | 0 | 1.00 | 3 | 3 | 0 | 2 | 0 |
| age | 0 | 1.00 | 4 | 16 | 0 | 9355 | 0 |
| ADMISSION_TYPE | 0 | 1.00 | 6 | 9 | 0 | 3 | 0 |
| INSURANCE | 0 | 1.00 | 7 | 10 | 0 | 4 | 0 |
| ETHNICITY | 0 | 1.00 | 5 | 56 | 0 | 35 | 0 |
| DIAGNOSIS | 1 | 1.00 | 2 | 190 | 0 | 4206 | 0 |
| HOSPITAL_EXPIRE_FLAG | 0 | 1.00 | 3 | 3 | 0 | 2 | 0 |
| GENDER | 0 | 1.00 | 1 | 1 | 0 | 2 | 0 |
| EXPIRE_FLAG | 0 | 1.00 | 3 | 3 | 0 | 2 | 0 |
| birthday | 0 | 1.00 | 7 | 8 | 0 | 9448 | 0 |
| admitday | 0 | 1.00 | 7 | 16 | 0 | 10381 | 0 |
| dischday | 0 | 1.00 | 7 | 16 | 0 | 10379 | 0 |
| deathday | 4223 | 0.59 | 7 | 7 | 0 | 5762 | 0 |
| hosp_deathday | 8834 | 0.15 | 7 | 16 | 0 | 1556 | 0 |
| urineoutput | 480 | 0.95 | 3 | 11 | 0 | 2693 | 0 |
| value | 416 | 0.96 | 3 | 11 | 0 | 2941 | 0 |
| urine_out | 416 | 0.96 | 3 | 11 | 0 | 2695 | 0 |
vars <- c("urine_out")
db_PEO_urine |> select(c(vars, "h2ras"))|> collect() |>
CreateTableOne(vars = vars, strata = "h2ras") -> tableone
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
## # Was:
## data %>% select(vars)
##
## # Now:
## data %>% select(all_of(vars))
##
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
print(tableone, smd = TRUE, missing = TRUE) #|> write.csv("/Users/abekohei/Desktop/大学院授業/mimic 演習/mimic演習/test2.csv")
## Stratified by h2ras
## 0 1 p test SMD
## n 5949 4441
## urine_out (mean (SD)) 1781.19 (1381.17) 1842.99 (1309.36) 0.024 0.046
## Stratified by h2ras
## Missing
## n
## urine_out (mean (SD)) 4.0
cols_continuous <- c("urine_out")
db_PEO_urine |>
select(cols_continuous) |>
pivot_longer(cols = cols_continuous, names_to = "name", values_to = "value") |>
ggplot()+
geom_histogram(aes(x = value), color = "black")+
facet_wrap(~ name, scales = "free", ncol = 2) +
theme_bw()+
theme(text = element_text(size = 20))
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
## # Was:
## data %>% select(cols_continuous)
##
## # Now:
## data %>% select(all_of(cols_continuous))
##
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 416 rows containing non-finite values (`stat_bin()`).
db_urine <-
db_PEO_urine |>
select("HADM_ID", "urine_out")
db_urine <- db_urine |> compute()
#SOFA, SAPSiii 重症度スコアは各ICU入室ごとに作成されている。正直このまま使えばいいのは楽。
db_sofa <- tbl(con_mimic, "sofa")
db_saps <- tbl(con_mimic, "apsiii")
db_sofa_1 <- db_sofa |> select(icustay_id, SOFA)
db_saps_1 <- db_saps |> select(icustay_id, apsiii)
db_PEO_sofa <-
db_PEO |>
left_join(db_sofa_1, by = c("ICUSTAY_ID" = "icustay_id"),copy = TRUE) |>
left_join(db_saps_1, by = c("ICUSTAY_ID" = "icustay_id"),copy = TRUE)
db_PEO_sofa <- db_PEO_sofa |> compute()
vars <- c("SOFA", "apsiii")
db_PEO_sofa |> select(c(vars, "h2ras"))|> collect() |>
CreateTableOne(vars = vars, strata = "h2ras") -> tableone
print(tableone, smd = TRUE, missing = TRUE) #|> write.csv("/Users/abekohei/Desktop/大学院授業/mimic 演習/mimic演習/test3.csv")
## Stratified by h2ras
## 0 1 p test SMD Missing
## n 5949 4441
## SOFA (mean (SD)) 4.59 (3.04) 5.04 (3.02) <0.001 0.147 0.0
## apsiii (mean (SD)) 48.66 (20.54) 46.51 (19.58) <0.001 0.107 0.0
cols_continuous <- c( "SOFA", "apsiii")
db_PEO_sofa |>
select(cols_continuous) |>
pivot_longer(cols = cols_continuous, names_to = "name", values_to = "value") |>
ggplot()+
geom_histogram(aes(x = value), color = "black")+
facet_wrap(~ name, scales = "free", ncol = 2) +
theme_bw()+
theme(text = element_text(size = 20))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
db_sofa <-
db_PEO_sofa |>
select("HADM_ID", "SOFA", "apsiii")
db_sofa <- db_sofa |> compute()
#CRRT(持続的腎代替療法) rrtしてる人のうち、dialysis_typeがCRRT, CVVH, CVVHD, CVVHDFの全員を対象とする。 pivot_RRTからicustay_idを取得する。
dbListTables(con_mimic)
## [1] "admissions" "apsiii"
## [3] "callout" "caregivers"
## [5] "chartevents" "cptevents"
## [7] "d_cpt" "d_icd_diagnoses"
## [9] "d_icd_procedures" "d_items"
## [11] "d_labitems" "datetimeevents"
## [13] "db_LVEF_assess" "dbplyr_001"
## [15] "dbplyr_002" "dbplyr_003"
## [17] "dbplyr_004" "dbplyr_005"
## [19] "dbplyr_006" "dbplyr_007"
## [21] "dbplyr_008" "dbplyr_009"
## [23] "dbplyr_010" "dbplyr_011"
## [25] "dbplyr_012" "dbplyr_013"
## [27] "dbplyr_014" "dbplyr_015"
## [29] "dbplyr_016" "dbplyr_017"
## [31] "dbplyr_018" "dbplyr_019"
## [33] "dbplyr_020" "dbplyr_021"
## [35] "dbplyr_022" "dbplyr_023"
## [37] "dbplyr_024" "dbplyr_025"
## [39] "dbplyr_026" "df_lab_list"
## [41] "diagnoses_icd" "drgcodes"
## [43] "elixhauser_ahrq_v37" "elixhauser_quan"
## [45] "heightfirstday" "heightweight"
## [47] "icustay_details" "icustays"
## [49] "inputevents_cv" "inputevents_mv"
## [51] "labevents" "microbiologyevents"
## [53] "noteevents" "outputevents"
## [55] "patients" "pivoted _ rrt"
## [57] "pivoted_height" "pivoted_labo"
## [59] "pivoted_vitals" "prescriptions"
## [61] "procedureevents_mv" "procedures_icd"
## [63] "services" "sofa"
## [65] "sqlite_stat1" "sqlite_stat4"
## [67] "transfers" "urine_output_firstday"
## [69] "urineoutput" "ventilation_classification"
## [71] "ventilation_duration" "weightduration"
## [73] "weightfirstday"
db_rrt <- tbl(con_mimic, "pivoted _ rrt")
db_rrt |> count()
## # Source: SQL [1 x 1]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
## n
## <int>
## 1 1399045
db_rrt |> mutate_if(is.numeric, as.character) |> skim()
## Applying predicate on the first 100 rows
| Name | mutate_if(db_rrt, is.nume… |
| Number of rows | 1399045 |
| Number of columns | 5 |
| _______________________ | |
| Column type frequency: | |
| character | 5 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| icustay_id | 0 | 1.00 | 8 | 8 | 0 | 3622 | 0 |
| charttime | 0 | 1.00 | 12 | 12 | 0 | 253399 | 0 |
| dialysis_present | 0 | 1.00 | 3 | 3 | 0 | 1 | 0 |
| dialysis_active | 0 | 1.00 | 3 | 3 | 0 | 2 | 0 |
| dialysis_type | 342019 | 0.76 | 3 | 10 | 0 | 8 | 0 |
db_rrt |> head(10)
## # Source: SQL [10 x 5]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
## icustay_id charttime dialysis_present dialysis_active dialysis_type
## <dbl> <dbl> <dbl> <dbl> <chr>
## 1 200001 6687101640 1 0 <NA>
## 2 200001 6687117720 1 0 <NA>
## 3 200001 6687259560 1 0 <NA>
## 4 200055 6602027940 1 0 <NA>
## 5 200055 6602058300 1 0 <NA>
## 6 200065 4740669900 1 0 <NA>
## 7 200065 4740732000 1 0 <NA>
## 8 200065 4740782400 1 0 <NA>
## 9 200065 4740832800 1 0 <NA>
## 10 200065 4740843600 1 0 <NA>
#そもそもdialysis_typeがどのくらいあるのかチェックする. #列の入力要素を確認できる
db_rrt |> distinct(dialysis_type)
## # Source: SQL [9 x 1]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
## dialysis_type
## <chr>
## 1 <NA>
## 2 IHD
## 3 CAVH
## 4 CRRT
## 5 CVVH
## 6 SCUF
## 7 CVVHD
## 8 CVVHDF
## 9 Peritoneal
#特定のdialysis_typeに限定し、該当するicustay_idを取得する
RRT_icu_id <-
db_rrt |> filter(dialysis_type %in% c("CRRT", "CVVH", "CVVHD", "CVVHDF")) |>
distinct(icustay_id) |> pull(icustay_id)
#db_PEOにrrtの有無を示す列を作る。
db_PEO_rrt <-
db_PEO |>
mutate(rrt = ifelse(ICUSTAY_ID %in% RRT_icu_id, 1, 0))
db_PEO_rrt <- db_PEO_rrt |> compute()
vars <- c("rrt")
vars_fact <- vars
db_PEO_rrt |> select(c(vars, "h2ras"))|> collect() |>
CreateTableOne(vars = vars, strata = "h2ras", factorVars = vars_fact) -> tableone
print(tableone, smd = TRUE, missing = TRUE) #|> write.csv("/Users/abekohei/Desktop/大学院授業/mimic 演習/mimic演習/test4.csv")
## Stratified by h2ras
## 0 1 p test SMD Missing
## n 5949 4441
## rrt = 1 (%) 155 (2.6) 138 (3.1) 0.142 0.030 0.0
db_rrt <-
db_PEO_rrt |>
select("HADM_ID", "rrt")
db_rrt <- db_rrt |> compute("db_rrt")
#use of ventilator ventilation_durationのあるICUSTAY_IDを人工呼吸ありとする
dbListTables(con_mimic)
## [1] "admissions" "apsiii"
## [3] "callout" "caregivers"
## [5] "chartevents" "cptevents"
## [7] "d_cpt" "d_icd_diagnoses"
## [9] "d_icd_procedures" "d_items"
## [11] "d_labitems" "datetimeevents"
## [13] "db_LVEF_assess" "db_rrt"
## [15] "dbplyr_001" "dbplyr_002"
## [17] "dbplyr_003" "dbplyr_004"
## [19] "dbplyr_005" "dbplyr_006"
## [21] "dbplyr_007" "dbplyr_008"
## [23] "dbplyr_009" "dbplyr_010"
## [25] "dbplyr_011" "dbplyr_012"
## [27] "dbplyr_013" "dbplyr_014"
## [29] "dbplyr_015" "dbplyr_016"
## [31] "dbplyr_017" "dbplyr_018"
## [33] "dbplyr_019" "dbplyr_020"
## [35] "dbplyr_021" "dbplyr_022"
## [37] "dbplyr_023" "dbplyr_024"
## [39] "dbplyr_025" "dbplyr_026"
## [41] "dbplyr_027" "df_lab_list"
## [43] "diagnoses_icd" "drgcodes"
## [45] "elixhauser_ahrq_v37" "elixhauser_quan"
## [47] "heightfirstday" "heightweight"
## [49] "icustay_details" "icustays"
## [51] "inputevents_cv" "inputevents_mv"
## [53] "labevents" "microbiologyevents"
## [55] "noteevents" "outputevents"
## [57] "patients" "pivoted _ rrt"
## [59] "pivoted_height" "pivoted_labo"
## [61] "pivoted_vitals" "prescriptions"
## [63] "procedureevents_mv" "procedures_icd"
## [65] "services" "sofa"
## [67] "sqlite_stat1" "sqlite_stat4"
## [69] "transfers" "urine_output_firstday"
## [71] "urineoutput" "ventilation_classification"
## [73] "ventilation_duration" "weightduration"
## [75] "weightfirstday"
db_venti_d <- tbl(con_mimic, "ventilation_duration")
db_venti_d |> head(10)
## # Source: SQL [10 x 5]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
## icustay_id ventnum starttime endtime duration_hours
## <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 200241 1 5542520400 5542527600 2
## 2 200614 1 5608353600 5608360800 2
## 3 201027 1 7091373600 7091380800 2
## 4 201705 1 5497311600 5497318800 2
## 5 201706 1 7235535600 7235542800 2
## 6 202391 1 6914916000 6914923200 2
## 7 202404 1 6504073200 6504080400 2
## 8 203143 1 5711580000 5711587200 2
## 9 203480 1 6343106400 6343113600 2
## 10 203674 1 4264786800 4264794000 2
db_venti_d |> count()
## # Source: SQL [1 x 1]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
## n
## <int>
## 1 38518
db_venti_d |> mutate_if(is.numeric, as.character) |> skim()
## Applying predicate on the first 100 rows
| Name | mutate_if(db_venti_d, is…. |
| Number of rows | 38518 |
| Number of columns | 5 |
| _______________________ | |
| Column type frequency: | |
| character | 5 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| icustay_id | 34 | 1 | 8 | 8 | 0 | 26796 | 0 |
| ventnum | 0 | 1 | 3 | 4 | 0 | 62 | 0 |
| starttime | 0 | 1 | 12 | 12 | 0 | 38089 | 0 |
| endtime | 0 | 1 | 12 | 12 | 0 | 38039 | 0 |
| duration_hours | 0 | 1 | 3 | 18 | 0 | 7114 | 0 |
venti_icu_id <-
db_venti_d |> distinct(icustay_id) |> pull(icustay_id)
db_PEO_venti <-
db_PEO |>
mutate(ventilation = ifelse(ICUSTAY_ID %in% venti_icu_id, 1, 0)) |>
replace_na(list(ventilation = 0)) #|> replace_na(list(ventilation = 0))なくて良い
db_PEO_venti <- db_PEO_venti |> compute()
db_PEO_venti |> mutate_if(is.numeric, as.character) |> skim()
## Applying predicate on the first 100 rows
| Name | mutate_if(db_PEO_venti, i… |
| Number of rows | 10390 |
| Number of columns | 20 |
| _______________________ | |
| Column type frequency: | |
| character | 20 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| SUBJECT_ID | 0 | 1.00 | 3 | 7 | 0 | 10390 | 0 |
| HADM_ID | 0 | 1.00 | 8 | 8 | 0 | 10390 | 0 |
| ICUSTAY_ID | 0 | 1.00 | 8 | 8 | 0 | 10390 | 0 |
| h2ras | 0 | 1.00 | 3 | 3 | 0 | 2 | 0 |
| death_elasp | 4223 | 0.59 | 3 | 20 | 0 | 6107 | 0 |
| mortality30 | 0 | 1.00 | 3 | 3 | 0 | 2 | 0 |
| age | 0 | 1.00 | 4 | 16 | 0 | 9355 | 0 |
| ADMISSION_TYPE | 0 | 1.00 | 6 | 9 | 0 | 3 | 0 |
| INSURANCE | 0 | 1.00 | 7 | 10 | 0 | 4 | 0 |
| ETHNICITY | 0 | 1.00 | 5 | 56 | 0 | 35 | 0 |
| DIAGNOSIS | 1 | 1.00 | 2 | 190 | 0 | 4206 | 0 |
| HOSPITAL_EXPIRE_FLAG | 0 | 1.00 | 3 | 3 | 0 | 2 | 0 |
| GENDER | 0 | 1.00 | 1 | 1 | 0 | 2 | 0 |
| EXPIRE_FLAG | 0 | 1.00 | 3 | 3 | 0 | 2 | 0 |
| birthday | 0 | 1.00 | 7 | 8 | 0 | 9448 | 0 |
| admitday | 0 | 1.00 | 7 | 16 | 0 | 10381 | 0 |
| dischday | 0 | 1.00 | 7 | 16 | 0 | 10379 | 0 |
| deathday | 4223 | 0.59 | 7 | 7 | 0 | 5762 | 0 |
| hosp_deathday | 8834 | 0.15 | 7 | 16 | 0 | 1556 | 0 |
| ventilation | 0 | 1.00 | 3 | 3 | 0 | 2 | 0 |
vars <- c("ventilation")
vars_fact <- vars
db_PEO_venti |> select(c(vars, "h2ras"))|> collect() |>
CreateTableOne(vars = vars, strata = "h2ras", factorVars = vars_fact) -> tableone
print(tableone, smd = TRUE, missing = TRUE) #|> write.csv("/Users/abekohei/Desktop/大学院授業/mimic 演習/mimic演習/test5.csv")
## Stratified by h2ras
## 0 1 p test SMD Missing
## n 5949 4441
## ventilation = 1 (%) 2403 (40.4) 2837 (63.9) <0.001 0.484 0.0
db_venti <-
db_PEO_venti |>
select("HADM_ID", "ventilation")
# dbRemoveTable(con_mimic, "db_venti")
db_venti <- db_venti |> compute("db_venti")
#details: Ethnicity もともとEthnicityはadmissionsに入っているが分類が細かすぎる。 より一般的な分類にした表を公式が作成していて、icustay_detailsから参照できる。
db_details <- tbl(con_mimic, "icustay_details")
db_details |> head(10)
## # Source: SQL [10 x 19]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
## subject_id hadm_id icustay_id gender dod admittime dischtime los_hospital
## <dbl> <dbl> <dbl> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 2 163353 243653 M NA 5318679840 5319013680 4
## 2 5 178980 214757 M NA 4199833860 4200034500 2
## 3 110 154943 282073 M NA 4430849220 4431438720 7
## 4 116 127203 208829 M NA 4632845160 4633184700 4
## 5 168 141436 224501 M NA 5347083300 5347250040 2
## 6 204 181170 232244 F NA 4810846260 4811056800 2
## 7 258 189406 284648 F NA 4882391940 4882693920 3
## 8 258 189406 207952 F NA 4882391940 4882693920 3
## 9 299 195143 204434 M NA 6516529620 6516691200 2
## 10 442 168519 257970 M NA 6627264060 6627444780 2
## # ℹ 11 more variables: admission_age <dbl>, ethnicity <chr>,
## # ethnicity_grouped <chr>, hospital_expire_flag <dbl>, hospstay_seq <dbl>,
## # first_hosp_stay <int>, intime <dbl>, outtime <dbl>, los_icu <dbl>,
## # icustay_seq <dbl>, first_icu_stay <int>
db_details |> mutate_if(is.numeric, as.character) |> skim()
## Applying predicate on the first 100 rows
| Name | mutate_if(db_details, is…. |
| Number of rows | 61051 |
| Number of columns | 19 |
| _______________________ | |
| Column type frequency: | |
| character | 19 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| subject_id | 0 | 1.00 | 3 | 7 | 0 | 46428 | 0 |
| hadm_id | 0 | 1.00 | 8 | 8 | 0 | 57328 | 0 |
| icustay_id | 0 | 1.00 | 8 | 8 | 0 | 61051 | 0 |
| gender | 0 | 1.00 | 1 | 1 | 0 | 2 | 0 |
| dod | 37162 | 0.39 | 12 | 12 | 0 | 12884 | 0 |
| admittime | 0 | 1.00 | 12 | 12 | 0 | 57047 | 0 |
| dischtime | 0 | 1.00 | 12 | 12 | 0 | 57036 | 0 |
| los_hospital | 0 | 1.00 | 3 | 5 | 0 | 162 | 0 |
| admission_age | 0 | 1.00 | 3 | 5 | 0 | 89 | 0 |
| ethnicity | 0 | 1.00 | 5 | 56 | 0 | 41 | 0 |
| ethnicity_grouped | 0 | 1.00 | 5 | 8 | 0 | 7 | 0 |
| hospital_expire_flag | 0 | 1.00 | 3 | 3 | 0 | 2 | 0 |
| hospstay_seq | 0 | 1.00 | 3 | 4 | 0 | 41 | 0 |
| first_hosp_stay | 0 | 1.00 | 1 | 1 | 0 | 2 | 0 |
| intime | 0 | 1.00 | 12 | 12 | 0 | 61050 | 0 |
| outtime | 10 | 1.00 | 12 | 12 | 0 | 61037 | 0 |
| los_icu | 10 | 1.00 | 3 | 5 | 0 | 146 | 0 |
| icustay_seq | 0 | 1.00 | 3 | 3 | 0 | 7 | 0 |
| first_icu_stay | 0 | 1.00 | 1 | 1 | 0 | 2 | 0 |
#ethinicityの種類を確認する
db_details |> distinct(ethnicity_grouped)
## # Source: SQL [7 x 1]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
## ethnicity_grouped
## <chr>
## 1 asian
## 2 other
## 3 white
## 4 black
## 5 unknown
## 6 hispanic
## 7 native
db_details |> group_by(ethnicity_grouped) |> count()
## # Source: SQL [7 x 2]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
## # Groups: ethnicity_grouped
## ethnicity_grouped n
## <chr> <int>
## 1 asian 2024
## 2 black 5955
## 3 hispanic 2182
## 4 native 57
## 5 other 1827
## 6 unknown 6173
## 7 white 42833
#native, other, unknownはotherとした。
db_details_ethnicity <-
db_details |>
distinct(hadm_id, .keep_all = T) |>
select(hadm_id, ethnicity_grouped) |>
mutate(ethnicity_5 = case_when(
ethnicity_grouped %in% c("native", "other", "unknown") ~ "other",
TRUE ~ ethnicity_grouped
)) #TRUE ~ ethnicity_groupedはethnicity_groupedがそのまま代入。
db_details_ethnicity |> group_by(ethnicity_5) |> count()
## # Source: SQL [5 x 2]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
## # Groups: ethnicity_5
## ethnicity_5 n
## <chr> <int>
## 1 asian 1932
## 2 black 5625
## 3 hispanic 2077
## 4 other 7565
## 5 white 40129
db_PEO_ethnicity <-
db_PEO |>
left_join(db_details_ethnicity, by = c("HADM_ID" = "hadm_id"),copy = TRUE)
db_PEO_ethnicity <- db_PEO_ethnicity |> compute()
asiaの数が逆転している
var = c("ethnicity_5")
tableone<-
db_PEO_ethnicity |> select(c(var, "h2ras")) |> collect() |>
CreateTableOne(vars = var, strata = "h2ras", factorVars = var)
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
## # Was:
## data %>% select(var)
##
## # Now:
## data %>% select(all_of(var))
##
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
print(tableone, smd=T, missing = T)
## Stratified by h2ras
## 0 1 p test SMD Missing
## n 5949 4441
## ethnicity_5 (%) 0.002 0.081 0.9
## asian 101 ( 1.7) 80 ( 1.8)
## black 499 ( 8.5) 362 ( 8.2)
## hispanic 123 ( 2.1) 125 ( 2.8)
## other 892 (15.2) 569 (12.9)
## white 4262 (72.5) 3279 (74.3)
db_ethnicity <-
db_PEO_ethnicity |>
select(HADM_ID, ethnicity_5)
db_ethnicity <- db_ethnicity |> compute()
#comorbidity Quanの定義による合併症リストはBigQueryから入手可能 しかし、論文ではこの合併症リストにないものが多い(H2ブロッカー使用に関連のありそうな合併症を選択している) また、Quanのリストにある合併症に関してもBigQueryファイルと論文とでかなり食い違う。おそらく著者らは病名のICD9コードから自分たちで抽出しているっぽい
我々もQuanのリストからは採用せず、手動でデータ抽出していく事にする
hypertension “d_icd_diagnoses”と”diagnoses_icd”を使う やることは心不全を抽出した時と同じなので、比較的単純
HT_icd_9 <-
df_d_icd_diag |>
filter(str_detect(LONG_TITLE, coll("hypertension", ignore_case = T))) |>
filter(!str_detect(LONG_TITLE, coll("without", ignore_case = T))) |>
pull(ICD9_CODE)
hadm_id_vec <- db_PEO |> pull(HADM_ID)
db_ht <-
db_diag |>
filter(HADM_ID %in% hadm_id_vec) |>
mutate(HT = ifelse(ICD9_CODE %in% HT_icd_9, 1, 0)) |>
group_by(HADM_ID) |>
summarize(hypertension = max(HT))
db_ht |> head(10)
## # Source: SQL [10 x 2]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
## HADM_ID hypertension
## <dbl> <dbl>
## 1 100018 0
## 2 100035 0
## 3 100036 0
## 4 100050 1
## 5 100078 1
## 6 100087 1
## 7 100095 0
## 8 100099 1
## 9 100113 0
## 10 100124 0
db_ht |> mutate_if(is.numeric, as.character) |> skim()
## Applying predicate on the first 100 rows
| Name | mutate_if(db_ht, is.numer… |
| Number of rows | 10390 |
| Number of columns | 2 |
| _______________________ | |
| Column type frequency: | |
| character | 2 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| HADM_ID | 0 | 1 | 8 | 8 | 0 | 10390 | 0 |
| hypertension | 0 | 1 | 3 | 3 | 0 | 2 | 0 |
db_ht |> count()
## # Source: SQL [1 x 1]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
## n
## <int>
## 1 10390
#論文にある全ての合併症 以下にコード例を示す。自分でも試行錯誤してみると良い。
df_d_icd_diag|>head(10)
## # A tibble: 10 × 4
## ROW_ID ICD9_CODE SHORT_TITLE LONG_TITLE
## <dbl> <chr> <chr> <chr>
## 1 174 01166 TB pneumonia-oth test Tuberculous pneumonia [any form], …
## 2 175 01170 TB pneumothorax-unspec Tuberculous pneumothorax, unspecif…
## 3 176 01171 TB pneumothorax-no exam Tuberculous pneumothorax, bacterio…
## 4 177 01172 TB pneumothorx-exam unkn Tuberculous pneumothorax, bacterio…
## 5 178 01173 TB pneumothorax-micro dx Tuberculous pneumothorax, tubercle…
## 6 179 01174 TB pneumothorax-cult dx Tuberculous pneumothorax, tubercle…
## 7 180 01175 TB pneumothorax-histo dx Tuberculous pneumothorax, tubercle…
## 8 181 01176 TB pneumothorax-oth test Tuberculous pneumothorax, tubercle…
## 9 182 01180 Pulmonary TB NEC-unspec Other specified pulmonary tubercul…
## 10 183 01181 Pulmonary TB NEC-no exam Other specified pulmonary tubercul…
#hypertension
HT_icd_9 <-
df_d_icd_diag |>
filter(str_detect(LONG_TITLE, coll("hypertension", ignore_case = T))) |>
filter(!str_detect(LONG_TITLE, coll("without", ignore_case = T))) |>
pull(ICD9_CODE)
#diabetes
DM_icd_9 <-
df_d_icd_diag |>
filter(str_detect(LONG_TITLE, coll("diabetes me", ignore_case = T))|
str_detect(SHORT_TITLE, coll("DMI", ignore_case = F))) |>
filter(str_starts(ICD9_CODE, "2")) |>
pull(ICD9_CODE)
#anemia 貧血
anemia_icd_9 <-
df_d_icd_diag |>
filter(str_detect(LONG_TITLE, coll("anemia", ignore_case = T))) |>
filter(str_starts(ICD9_CODE, "2")) |>
pull(ICD9_CODE)
#af
af_icd_9 <- c("42731")
#coronary atherosclerosis 冠動脈アテローム性動脈硬化症
coronary_icd_9 <-
df_d_icd_diag |>
filter(str_detect(LONG_TITLE, coll("coronary atherosclerosis", ignore_case = T))) |>
pull(ICD9_CODE)
#venous thrombus 静脈血栓
venous_icd_9 <-
df_d_icd_diag |>
filter(str_detect(LONG_TITLE, coll("venous embo", ignore_case = T))|
str_detect(SHORT_TITLE, coll("vein thromb", ignore_case = T))) |>
pull(ICD9_CODE)
#myocardial infarction 心筋梗塞
mi_icd_9 <-
df_d_icd_diag |>
filter(str_detect(LONG_TITLE, coll("myocardial infarction", ignore_case = T))) |>
filter(!ICD9_CODE %in% c("4110", "41181", "42979")) |>
pull(ICD9_CODE)
#gastritis 胃炎
gastritis_icd_9 <-
df_d_icd_diag |>
filter(str_detect(LONG_TITLE, coll("gastritis", ignore_case = T))) |>
pull(ICD9_CODE)
#duodenal ulcer 十二指腸潰瘍
d_ulcer_icd_9 <-
df_d_icd_diag |>
filter(str_detect(LONG_TITLE, coll("duodenal ulcer", ignore_case = T))) |>
pull(ICD9_CODE)
#gastric ulcer 胃潰瘍
g_ulcer_icd_9 <-
df_d_icd_diag |>
filter(str_detect(LONG_TITLE, coll("gastric ulcer", ignore_case = T))) |>
pull(ICD9_CODE)
#gastric bleeding 胃出血。
g_bleed_icd_9 <-
df_d_icd_diag |>
filter(str_detect(LONG_TITLE, coll("gastr", ignore_case = T))|
str_detect(LONG_TITLE, coll("intest", ignore_case = T))|
str_detect(LONG_TITLE, coll("duodenal", ignore_case = T))|
str_detect(LONG_TITLE, coll("colon", ignore_case = T))) |>
filter(str_detect(LONG_TITLE, coll("hemorrh", ignore_case = T))) |>
filter(!str_detect(LONG_TITLE, coll("without mention of hemorrhage", ignore_case = T))) |>
pull(ICD9_CODE)
#AKI 急性腎障害
aki_icd_9 <-
df_d_icd_diag |>
filter(str_detect(LONG_TITLE, coll("acute kidney", ignore_case = T))) |>
pull(ICD9_CODE)
#septic shock 敗血症性ショック
s_shock_icd_9 <-
df_d_icd_diag |>
filter(str_detect(LONG_TITLE, coll("septic shock", ignore_case = T))) |>
pull(ICD9_CODE)
#pneumonia 肺炎
pneumonia_icd_9 <-
df_d_icd_diag |>
filter(str_detect(LONG_TITLE, coll("pneumonia", ignore_case = T))) |>
pull(ICD9_CODE)
hadm_id_vec <- db_PEO |> pull(HADM_ID)
db_comorb_manual <-
db_diag |>
filter(HADM_ID %in% hadm_id_vec) |>
mutate(HT = ifelse(ICD9_CODE %in% HT_icd_9, 1, 0),
DM = ifelse(ICD9_CODE %in% DM_icd_9, 1, 0),
anemia = ifelse(ICD9_CODE %in% anemia_icd_9, 1, 0),
Af = ifelse(ICD9_CODE %in% af_icd_9, 1, 0),
coronary_atherosclerosis = ifelse(ICD9_CODE %in% coronary_icd_9, 1, 0),
venous_thrombus = ifelse(ICD9_CODE %in% venous_icd_9, 1, 0),
MI = ifelse(ICD9_CODE %in% mi_icd_9, 1, 0),
gastritis = ifelse(ICD9_CODE %in% gastritis_icd_9, 1, 0),
g_ulcer = ifelse(ICD9_CODE %in% g_ulcer_icd_9, 1, 0),
d_ulcer = ifelse(ICD9_CODE %in% d_ulcer_icd_9, 1, 0),
g_bleeding = ifelse(ICD9_CODE %in% g_bleed_icd_9, 1, 0),
aki = ifelse(ICD9_CODE %in% aki_icd_9, 1, 0),
s_shock = ifelse(ICD9_CODE %in% s_shock_icd_9, 1, 0),
pneumonia = ifelse(ICD9_CODE %in% pneumonia_icd_9, 1, 0)) |>
group_by(HADM_ID) |>
summarize(hypertension = max(HT),
DM = max(DM),
anemia = max(anemia),
Af = max(Af),
coronary_atherosclerosis = max(coronary_atherosclerosis),
venous_thrombus = max(venous_thrombus),
MI = max(MI),
gastritis = max(gastritis),
gastric_ulcer = max(g_ulcer),
duodenal_ulcer = max(d_ulcer),
gastric_bleeding = max(g_bleeding),
aki = max(aki),
septic_shock = max(s_shock),
pneumonia = max(pneumonia))
db_comorb_manual |> count()
## # Source: SQL [1 x 1]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
## n
## <int>
## 1 10390
db_PEO_comorb <-
db_PEO |>
left_join(db_comorb_manual, by = "HADM_ID",copy = TRUE)
db_PEO_comorb <- db_PEO_comorb |> compute()
cols_comorb <- c(
"hypertension", "DM", "anemia", "Af", "coronary_atherosclerosis", "venous_thrombus", "MI",
"gastritis", "gastric_ulcer", "duodenal_ulcer", "gastric_bleeding", "aki", "septic_shock",
"pneumonia")
#Pneumonia(肺炎)がH2RAS使用群と非使用群で論文と逆になっている。
vars <- cols_comorb
factors <- cols_comorb
tableone <-db_PEO_comorb |> select(c(vars, "h2ras"))|> collect() |>
CreateTableOne(vars = vars, strata = "h2ras", factorVars = factors)
print(tableone, smd = TRUE, missing = TRUE) #|> write.csv("/Users/abekohei/Desktop/大学院授業/mimic 演習/mimic演習/test6.csv")
## Stratified by h2ras
## 0 1 p test SMD
## n 5949 4441
## hypertension = 1 (%) 2356 (39.6) 2027 (45.6) <0.001 0.122
## DM = 1 (%) 2132 (35.8) 1643 (37.0) 0.233 0.024
## anemia = 1 (%) 1996 (33.6) 1416 (31.9) 0.077 0.036
## Af = 1 (%) 2537 (42.6) 2034 (45.8) 0.001 0.064
## coronary_atherosclerosis = 1 (%) 2268 (38.1) 2047 (46.1) <0.001 0.162
## venous_thrombus = 1 (%) 112 ( 1.9) 76 ( 1.7) 0.566 0.013
## MI = 1 (%) 1002 (16.8) 716 (16.1) 0.341 0.019
## gastritis = 1 (%) 121 ( 2.0) 61 ( 1.4) 0.014 0.051
## gastric_ulcer = 1 (%) 66 ( 1.1) 20 ( 0.5) <0.001 0.075
## duodenal_ulcer = 1 (%) 71 ( 1.2) 25 ( 0.6) 0.001 0.068
## gastric_bleeding = 1 (%) 516 ( 8.7) 184 ( 4.1) <0.001 0.186
## aki = 1 (%) 2262 (38.0) 1410 (31.7) <0.001 0.132
## septic_shock = 1 (%) 415 ( 7.0) 260 ( 5.9) 0.024 0.046
## pneumonia = 1 (%) 1343 (22.6) 926 (20.9) 0.038 0.042
## Stratified by h2ras
## Missing
## n
## hypertension = 1 (%) 0.0
## DM = 1 (%) 0.0
## anemia = 1 (%) 0.0
## Af = 1 (%) 0.0
## coronary_atherosclerosis = 1 (%) 0.0
## venous_thrombus = 1 (%) 0.0
## MI = 1 (%) 0.0
## gastritis = 1 (%) 0.0
## gastric_ulcer = 1 (%) 0.0
## duodenal_ulcer = 1 (%) 0.0
## gastric_bleeding = 1 (%) 0.0
## aki = 1 (%) 0.0
## septic_shock = 1 (%) 0.0
## pneumonia = 1 (%) 0.0
db_comorb <-
db_PEO_comorb |>
select("HADM_ID", all_of(cols_comorb))
db_comorb <- db_comorb |> compute()
#LVEF noteeventsを使う 超音波検査の結果を記述した文章からEFの値を抽出するという難作業。 文章データなので大きなファイルだがstringrを使いたいので、ある程度絞った後にcollectする メモリに余裕のある人向けのタスクかもしれない(4GBだとおそらく不可、16GBなら可能、8GBは手元にないので検証できていません→是非やってみて結果を教えてください。)
db_note <- tbl(con_mimic, "noteevents")
db_note |> colnames()
## [1] "ROW_ID" "SUBJECT_ID" "HADM_ID" "CHARTDATE" "CHARTTIME"
## [6] "STORETIME" "CATEGORY" "DESCRIPTION" "CGID" "ISERROR"
## [11] "TEXT"
db_note |> count()
## # Source: SQL [1 x 1]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
## n
## <int>
## 1 2083180
db_note |> head(10)
## # Source: SQL [10 x 11]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
## ROW_ID SUBJECT_ID HADM_ID CHARTDATE CHARTTIME STORETIME CATEGORY DESCRIPTION
## <dbl> <dbl> <dbl> <dbl> <int> <int> <chr> <chr>
## 1 174 22532 167853 66324 NA NA Discharg… Report
## 2 175 13702 107527 54220 NA NA Discharg… Report
## 3 176 13702 167118 54565 NA NA Discharg… Report
## 4 177 13702 196489 56477 NA NA Discharg… Report
## 5 178 26880 135453 70210 NA NA Discharg… Report
## 6 179 53181 170490 73846 NA NA Discharg… Report
## 7 180 20646 134727 52208 NA NA Discharg… Report
## 8 181 42130 114236 65803 NA NA Discharg… Report
## 9 182 56174 163469 54279 NA NA Discharg… Report
## 10 183 56174 189681 54398 NA NA Discharg… Report
## # ℹ 3 more variables: CGID <int>, ISERROR <int>, TEXT <chr>
#文章カテゴリーを見てみる 15カテゴリーの文章情報。ここからLVEFが記載されていそうな文章を選ぶ。 通常はEchoと各種サマリー系だが、面倒であれば全部選ぶのでも問題ない。 TEXTの列をwrite.CSV()でcsvファイルに書き出して、開くと見やすい。
db_note |> distinct(CATEGORY)
## # Source: SQL [?? x 1]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
## CATEGORY
## <chr>
## 1 Discharge summary
## 2 Echo
## 3 ECG
## 4 Nursing
## 5 Physician
## 6 Rehab Services
## 7 Case Management
## 8 Respiratory
## 9 Nutrition
## 10 General
## # ℹ more rows
ptids <- db_PEO |> pull(HADM_ID)
db_PEO |> distinct(HADM_ID)|>count()
## # Source: SQL [1 x 1]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
## n
## <int>
## 1 10390
db_note_echo <-
db_note |>
filter(HADM_ID %in% ptids) |>
# filter(CATEGORY %in% c("Echo")) |>
compute()
db_note_echo |> distinct(HADM_ID) |> count()
## # Source: SQL [1 x 1]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
## n
## <int>
## 1 10347
10390人中、検査・医療資源治療が記載されている人は10347人であった。
df_note_echo <- collect(db_note_echo)
pryr::object_size(df_note_echo)
## 803.97 MB
normal_text <-
c("systolic function.{0,10}normal", #systolic function とnormalの間には、最大で10文字までの任意の文字列(スペース、記号、数字など)が存在できる。
"Normal regional LV systolic function",
"left ventricular systolic function appears grossly preserved",
"LV systolic function appears grossly preserved",
"left ventricular systolic function is.{0,12}normal",
"LV systolic function is.{0,12}normal"
)
mild_text <-
c("left ventricular systolic function is mildly depressed",
"LV systolic function is mildly depressed",
"left ventricular systolic fxn is slightly depressed",
"LV systolic fxn is slightly depressed",
"mild regional left ventricular systolic dysfunction",
"mild regional LV systolic dysfunction",
"mildly depressed LVEF",
"mild global left ventricular hypokinesis",
"mild global LV hypokinesis",
"mild left ventricular hypokinesis",
"mild LV hypokinesis",
"left ventricular systolic function is.{0,5}slightly worse",
"LV systolic function is.{0,5}slightly worse")
moderate_text <-
c("left ventricular systolic function is moderately depressed",
"LV systolic function is moderately depressed",
"left ventricular systolic fxn is moderately depressed",
"LV systolic fxn is moderately depressed",
"moderate regional left ventricular systolic dysfunction",
"moderate regional LV systolic dysfunction",
"moderately depressed LVEF",
"moderate global left ventricular hypokinesis",
"moderate global LV hypokinesis",
"moderate left ventricular hypokinesis",
"moderate LV hypokinesis",
"left ventricular systolic function is.{0,5}moderately worse",
"LV systolic function is.{0,5}moderately worse"
)
severe_text <-
c("left ventricular systolic function is severely depressed",
"LV systolic function is severely depressed",
"left ventricular systolic fxn is severely depressed",
"LV systolic fxn is severely depressed",
"severe regional left ventricular systolic dysfunction",
"severe regional LV systolic dysfunction",
"severely depressed LVEF",
"severe global left ventricular hypokinesis",
"severe global LV hypokinesis",
"severe left ventricular hypokinesis",
"severe LV hypokinesis",
"left ventricular systolic function is.{0,5}severely worse",
"LV systolic function is.{0,5}severely worse"
)
difficult_text <-
c("LV systolic function is difficult to assess",
"left ventricular systolic function is difficult to assess",
"LV systolic function cannot be reliably assessed",
"left ventricular systolic function cannot be reliably assessed",
"LV not well seen",
"LVEF cannot be estimated")
abnormal_text <-
c("left ventricular systolic function is now depressed",
"left ventricular systolic function is depressed",
"LV systolic function appears depressed",
"depressed left ventricular.{0,10}function",
"depressed LV.{0,10}function"
)
df_note_echo_1 <-
df_note_echo |>
select(HADM_ID, TEXT, CHARTDATE, CATEGORY) |>
#1
mutate(TEXT = str_replace_all(TEXT, pattern = "\n", replacement = " ")) |> #\nは改行を示す正規表現。改行をスペースに置換すれば文章の続きとして認識される。
mutate(
#2
LVEF_ex1 = str_extract(TEXT, "EF.{0,8}[0-9]{2}"),
LVEF_ex2 = str_extract(TEXT, "ejection fraction.{0,8}[0-9]{2}"),
#3
EF_assess = case_when(
str_detect(TEXT, regex(str_c(severe_text, collapse = "|"),ignore_case = T)) ~ "severe",
str_detect(TEXT, regex(str_c(moderate_text, collapse = "|"),ignore_case = T)) ~ "moderate",
str_detect(TEXT, regex(str_c(mild_text, collapse = "|"),ignore_case = T)) ~ "mild",
str_detect(TEXT, regex(str_c(normal_text, collapse = "|"),ignore_case = T)) ~ "normal",
# str_detect(TEXT, regex(str_c(difficult_text, collapse = "|"),ignore_case = T)) ~ "difficult",
str_detect(TEXT,regex(str_c(abnormal_text, collapse = "|"),ignore_case = T)) ~ "mild"
)) |> select(!TEXT)
df_note_echo_1
## # A tibble: 377,474 × 6
## HADM_ID CHARTDATE CATEGORY LVEF_ex1 LVEF_ex2 EF_assess
## <dbl> <dbl> <chr> <chr> <chr> <chr>
## 1 135453 70210 Discharge summary <NA> <NA> <NA>
## 2 134727 52208 Discharge summary <NA> <NA> <NA>
## 3 121936 56659 Discharge summary EF 25-30 <NA> moderate
## 4 139574 81227 Discharge summary EF >55 <NA> <NA>
## 5 121504 83685 Discharge summary <NA> <NA> <NA>
## 6 120644 82020 Discharge summary EF 35 <NA> moderate
## 7 116471 57553 Discharge summary <NA> <NA> <NA>
## 8 146533 79757 Discharge summary <NA> <NA> <NA>
## 9 106238 79054 Discharge summary EF of 30 <NA> <NA>
## 10 114242 77490 Discharge summary <NA> <NA> <NA>
## # ℹ 377,464 more rows
#LVEFの数値部分のみ抽出する 数値から分類カテゴリー列を作成する。 分類カテゴリーは文章表現を優先し、数値は文章がなかった場合とした。このあたりの優先も論文未記載。
df_note_echo_assess <-
df_note_echo_1 |>
mutate(LVEF_num = case_when(
!is.na(LVEF_ex1) ~ str_extract(LVEF_ex1, pattern = "[0-9]{2}"),
!is.na(LVEF_ex2) ~ str_extract(LVEF_ex2, pattern = "[0-9]{2}"))
) |>
mutate(LVEF_num_assess = case_when(
LVEF_num >= 55 ~ "normal",
LVEF_num >= 45 ~ "mild",
LVEF_num >= 30 ~ "moderate",
LVEF_num >= 0 ~ "severe"
)) |>
mutate(LVEF_ctg = case_when(
!is.na(EF_assess) ~ EF_assess,
!is.na(LVEF_num_assess) ~ LVEF_num_assess
)) |>
arrange(CHARTDATE) |>
select(HADM_ID, CHARTDATE, LVEF_ctg) |>
filter(!is.na(LVEF_ctg)) |>
distinct(HADM_ID, .keep_all = T)
df_note_echo_assess
## # A tibble: 8,860 × 3
## HADM_ID CHARTDATE LVEF_ctg
## <dbl> <dbl> <chr>
## 1 125380 47665 moderate
## 2 137275 47668 normal
## 3 176771 47678 severe
## 4 192798 47684 normal
## 5 116534 47699 normal
## 6 185717 47704 mild
## 7 175778 47706 normal
## 8 175719 47709 moderate
## 9 134899 47712 severe
## 10 184844 47712 moderate
## # ℹ 8,850 more rows
#データフレームのままだとmimic.dbファイルに結合できないので、.dbに戻す dbRemoveTable()は初回は不要。2回目以降は既に作成されているdb_LVEF_assessを削除する必要あり。
dbRemoveTable(con_mimic, "db_LVEF_assess")
dbWriteTable(con_mimic, "db_LVEF_assess", df_note_echo_assess)
db_LVEF_assess <- tbl(con_mimic, "db_LVEF_assess")
db_PEO_LVEF <-
db_PEO |>
left_join(db_LVEF_assess, by = "HADM_ID",copy = TRUE)
db_PEO_LVEF <- db_PEO_LVEF |> compute()
#欠測は15%未満におさまった。
cols_ctg <- c("LVEF_ctg")
db_PEO_LVEF |> select(cols_ctg, "h2ras")|> collect() |>
mutate(LVEF_ctg = factor(LVEF_ctg,
levels = c("severe", "moderate", "mild", "normal"))) |>
CreateTableOne(vars = cols_ctg, strata = "h2ras", factorVars = cols_ctg) -> tableone
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
## # Was:
## data %>% select(cols_ctg)
##
## # Now:
## data %>% select(all_of(cols_ctg))
##
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
print(tableone, smd = TRUE, missing = TRUE) #|> write.csv("test.csv")
## Stratified by h2ras
## 0 1 p test SMD Missing
## n 5949 4441
## LVEF_ctg (%) <0.001 0.104 14.7
## severe 1294 (25.9) 845 (21.9)
## moderate 922 (18.4) 689 (17.9)
## mild 741 (14.8) 645 (16.7)
## normal 2044 (40.9) 1680 (43.5)
db_LVEF <-
db_PEO_LVEF |>
select("HADM_ID", cols_ctg)
db_LVEF <- db_LVEF |> compute()
db_labitems <- tbl(con_mimic, "d_labitems")
db_labitems |> colnames()
## [1] "ROW_ID" "ITEMID" "LABEL" "FLUID" "CATEGORY"
## [6] "LOINC_CODE"
df_labitems <- collect(db_labitems)
# blood
df_lab_items <-
df_labitems |>
filter(str_detect(FLUID, coll("blood", ignore_case = T))) |>
filter(str_detect(LABEL, coll("white",ignore_case = T )) |
str_detect(LABEL, coll("red blood",ignore_case = T )) |
str_detect(LABEL, coll("platelet count",ignore_case = T )) |
str_detect(LABEL, coll("gluco",ignore_case = T )) |
str_detect(LABEL, coll("urea",ignore_case = T )) |
str_detect(LABEL, coll("creatinine",ignore_case = T )) |
str_detect(LABEL, coll("sodium",ignore_case = T )) |
str_detect(LABEL, coll("calcium",ignore_case = T )) |
str_detect(LABEL, coll("magne",ignore_case = T ))
) |>
filter(CATEGORY != "Blood Gas") |>
filter(!ITEMID %in% c(51278, 51529)) |>
select(ITEMID, LABEL)
df_lab_items
## # A tibble: 9 × 2
## ITEMID LABEL
## <dbl> <chr>
## 1 50893 Calcium, Total
## 2 50912 Creatinine
## 3 50931 Glucose
## 4 50960 Magnesium
## 5 50983 Sodium
## 6 51006 Urea Nitrogen
## 7 51265 Platelet Count
## 8 51279 Red Blood Cells
## 9 51301 White Blood Cells
vec_labitems <-
df_lab_items |>
select(ITEMID) |>
pull()
db_labeve <- tbl(con_mimic, "labevents")
db_labeve |> colnames()
## [1] "ROW_ID" "SUBJECT_ID" "HADM_ID" "ITEMID" "CHARTTIME"
## [6] "VALUE" "VALUENUM" "VALUEUOM" "FLAG"
#2.検査結果表(膨大)を上記9つに限定する 3.さらにHADM_IDを研究対象の入院に限定する/ 入院中複数回検査しているが、どのタイミングを採用するのか論文に書いてない。 最初のタイミングを採用した。
db_labeve |> head(20)
## # Source: SQL [?? x 9]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
## ROW_ID SUBJECT_ID HADM_ID ITEMID CHARTTIME VALUE VALUENUM VALUEUOM FLAG
## <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <dbl> <chr> <chr>
## 1 281 3 NA 50820 4158576420 7.39 7.39 units <NA>
## 2 282 3 NA 50800 4158584220 ART NA <NA> <NA>
## 3 283 3 NA 50802 4158584220 -1 -1 mEq/L <NA>
## 4 284 3 NA 50804 4158584220 22 22 mEq/L <NA>
## 5 285 3 NA 50808 4158584220 0.93 0.93 mmol/L abno…
## 6 286 3 NA 50812 4158584220 NOT INTU… NA <NA> <NA>
## 7 287 3 NA 50813 4158584220 1.8 1.8 mmol/L <NA>
## 8 288 3 NA 50818 4158584220 33 33 mm Hg <NA>
## 9 289 3 NA 50820 4158584220 7.42 7.42 units <NA>
## 10 290 3 NA 50821 4158584220 80 80 mm Hg <NA>
## # ℹ more rows
vec_hadms <-
db_PEO |>
select(HADM_ID) |>
pull()
db_labo_res <-
db_labeve |>
filter(ITEMID %in% vec_labitems) |>
filter(HADM_ID %in% vec_hadms) |>
group_by(HADM_ID) |>
distinct(ITEMID, .keep_all = T) |>
ungroup() |>
compute()
db_labo_res |> count()
## # Source: SQL [1 x 1]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
## n
## <int>
## 1 92348
#4.wide formatにして、データをJoinする
df_lab_list <-
df_lab_items |>
mutate(lab = str_sub(LABEL, start = 1, end = 3)) |>
select(ITEMID, lab)
dbRemoveTable(con_mimic, "df_lab_list")
dbWriteTable(con_mimic, "df_lab_list", df_lab_list)
db_lab_items_list <- tbl(con_mimic, "df_lab_list")
db_lab_items_list
## # Source: table<df_lab_list> [9 x 2]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
## ITEMID lab
## <dbl> <chr>
## 1 50893 Cal
## 2 50912 Cre
## 3 50931 Glu
## 4 50960 Mag
## 5 50983 Sod
## 6 51006 Ure
## 7 51265 Pla
## 8 51279 Red
## 9 51301 Whi
db_labo_res_names <-
db_labo_res |>
left_join(db_lab_items_list, by = "ITEMID") |>
mutate(new_lab = paste(lab, VALUEUOM, sep = "_")) |>
select(!c(lab, VALUEUOM, VALUE, ROW_ID, FLAG, ITEMID, CHARTTIME)) |>
pivot_wider(names_from = new_lab, values_from = VALUENUM) |>
compute()
#<db_PEO_laboで完成> 割り算記号の列名は修正しておく
db_PEO_labo <-
db_PEO |>
left_join(db_labo_res_names, by = c("HADM_ID", "SUBJECT_ID"),copy = TRUE) |>
rename(Calcium = "Cal_mg/dL",
Cre = "Cre_mg/dL",
Glucose = "Glu_mg/dL",
Mg = "Mag_mg/dL",
Sodium = "Sod_mEq/L",
BUN = "Ure_mg/dL",
Plate = "Pla_K/uL",
RBC = "Red_m/uL",
WBC = "Whi_K/uL"
) |>
compute()
db_PEO_labo |> head(10)
## # Source: SQL [10 x 28]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
## SUBJECT_ID HADM_ID ICUSTAY_ID h2ras death_elasp mortality30 age
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 3 145834 211552 0 236. 0 76.5
## 2 9 150750 220597 0 4.45 1 41.8
## 3 21 109451 217847 0 149. 0 87.4
## 4 26 197661 244882 0 659. 0 72.0
## 5 30 104557 225176 0 NA 0 95
## 6 34 115799 263086 0 2021. 0 95
## 7 37 188670 213503 0 503. 0 68.9
## 8 38 185910 248910 1 NA 0 75.9
## 9 42 119203 210828 0 1999. 0 61.2
## 10 49 190539 249195 1 NA 0 80.9
## # ℹ 21 more variables: ADMISSION_TYPE <chr>, INSURANCE <chr>, ETHNICITY <chr>,
## # DIAGNOSIS <chr>, HOSPITAL_EXPIRE_FLAG <dbl>, GENDER <chr>,
## # EXPIRE_FLAG <dbl>, birthday <dbl>, admitday <dbl>, dischday <dbl>,
## # deathday <dbl>, hosp_deathday <dbl>, Calcium <dbl>, Cre <dbl>,
## # Glucose <dbl>, Mg <dbl>, Sodium <dbl>, BUN <dbl>, Plate <dbl>, RBC <dbl>,
## # WBC <dbl>
#自作関数 表作成とヒストグラム作成の自作関数を作成する
f_table = function(cols_cont, cols_fact, db){
tableone <-
db |>
select(cols_cont, cols_fact, "h2ras")|>
collect() |>
CreateTableOne(vars = c(cols_cont, cols_fact),
strata = "h2ras",
factorVars = cols_fact)
print(tableone, smd = TRUE, missing = TRUE, test = FALSE, explain = FALSE)
}
#ヒストグラム
f_hist = function(cols_cont, db){
db |>
select(cols_cont) |>
pivot_longer(cols = cols_cont, names_to = "name", values_to = "value") |>
ggplot()+
geom_histogram(aes(x = value), color = "black")+
facet_wrap(~ name, scales = "free", ncol = 4) +
theme_bw()+
theme(text = element_text(size = 16))
}
cols_labo <- c("Calcium", "Cre", "Glucose", "Mg",
"Sodium", "BUN", "Plate", "RBC", "WBC")
db_PEO_labo |>
f_table(cols_cont = cols_labo, cols_fact = c())
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
## # Was:
## data %>% select(cols_cont)
##
## # Now:
## data %>% select(all_of(cols_cont))
##
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
## # Was:
## data %>% select(cols_fact)
##
## # Now:
## data %>% select(all_of(cols_fact))
##
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Stratified by h2ras
## 0 1 SMD Missing
## n 5949 4441
## Calcium 8.50 (0.82) 8.54 (0.79) 0.053 2.9
## Cre 1.75 (1.62) 1.56 (1.44) 0.125 1.0
## Glucose 144.74 (77.12) 139.09 (65.81) 0.079 1.0
## Mg 2.04 (0.59) 2.08 (0.39) 0.090 1.4
## Sodium 138.57 (5.05) 138.54 (4.60) 0.005 1.0
## BUN 36.27 (25.29) 30.92 (21.24) 0.229 1.0
## Plate 237.54 (119.76) 228.76 (118.48) 0.074 1.0
## RBC 3.65 (0.70) 3.62 (0.67) 0.050 1.0
## WBC 11.72 (10.45) 11.68 (10.45) 0.004 1.0
#最後のmerge用ファイル
db_labo <-
db_PEO_labo |>
select("HADM_ID", cols_labo)
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
## # Was:
## data %>% select(cols_labo)
##
## # Now:
## data %>% select(all_of(cols_labo))
##
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
db_labo <- db_labo |> compute()
#Medications - RAAs xxxpril xxxartan aliskiren
RAA_vec <- c(".+pril", ".+artan", "alis.+")
RAA_or <- str_c(RAA_vec, collapse = "|")
RAA_names <-
df_drugs |>
filter(str_detect(DRUG, regex(RAA_or, ignore_case = T)) |
str_detect(DRUG_NAME_POE, regex(RAA_or, ignore_case = T)) |
str_detect(DRUG_NAME_GENERIC, regex(RAA_or, ignore_case = T))) |>
filter(!DRUG %in% c("Lidocaine-Prilocaine")) |>
select(DRUG) |> distinct() |> pull()
#“Lidocaine-Prilocaine”はノイズなので除去
使用薬剤歴全リストから、RAA阻害薬を使ったレコードのみに限定し、その患者IDを得る
# H2RAsの時作ったdb_presc_drug
RAA_user <-
db_presc_drug |>
filter(DRUG %in% RAA_names) |>
select(SUBJECT_ID) |>
pull()
db_patients_RAA <-
db_PEO |>
mutate(RAA = ifelse(SUBJECT_ID %in% RAA_user, 1, 0))
f_table(cols_cont = c(),
cols_fact = c("RAA"),
db = db_patients_RAA)
## Stratified by h2ras
## 0 1 SMD Missing
## n 5949 4441
## RAA = 1 2880 (48.4) 2722 (61.3) 0.261 0.0
for merge
db_RAA <- db_patients_RAA |> select(HADM_ID, RAA)
#これ以降、同じようなコードの繰り返しになる。 本来はコードを関数化するべきだが、内容の確認・学習の意味も込めて関数化は避けた。興味のある方は関数化してコードをスッキリさせると良い。
hydrochlorothiazide chlorthalidone metolazone indapamide
amiloride triamterene (Maxzide) spironolactone (Aldactone) eplerenone (Inspra)
diurit_vec <-
c("furosemide", "lasix", "torsemide", "Demadex", "bumetanide", "hydrochlorothiazide",
"chlorthalidone", "metolazone", "indapamide", "amiloride", "triamterene",
"spironolactone", "Aldactone", "eplerenone", "Inspra" )
diurit_or <- str_c(diurit_vec, collapse = "|")
diurit_or
## [1] "furosemide|lasix|torsemide|Demadex|bumetanide|hydrochlorothiazide|chlorthalidone|metolazone|indapamide|amiloride|triamterene|spironolactone|Aldactone|eplerenone|Inspra"
diuritic_names <-
df_drugs |>
filter(str_detect(DRUG, regex(diurit_or, ignore_case = T)) |
str_detect(DRUG_NAME_POE, regex(diurit_or, ignore_case = T)) |
str_detect(DRUG_NAME_GENERIC, regex(diurit_or, ignore_case = T))) |>
select(DRUG) |>
filter(!DRUG %in% c("Captopril","Enalaprilat", "Heparin", "Metoprolol" )) |>
distinct() |> pull()
#使用薬剤歴全リストから、薬を使ったレコードのみに限定し、その患者IDを得る
diuritic_user <-
db_presc_drug |>
filter(DRUG %in% diuritic_names) |>
select(SUBJECT_ID) |>
pull()
db_patients_diuritic <-
db_PEO |>
mutate(Diuritic = ifelse(SUBJECT_ID %in% diuritic_user, 1, 0))
#表作成
f_table(cols_cont = c(),
cols_fact = c("Diuritic"),
db = db_patients_diuritic)
## Stratified by h2ras
## 0 1 SMD Missing
## n 5949 4441
## Diuritic = 1 4311 (72.5) 4083 (91.9) 0.526 0.0
#for merge
db_diuritic <- db_patients_diuritic |> select(HADM_ID, Diuritic)
Epinephrine (Adrenalin® or Auvi-Q®) Norepinephrine (Levophed® or Levarterenol®) Dopamine Dobutamine Ephedrine Phenylephrine
Levosimendan Digoxin (Cardoxin® or Lanoxin®) Milrinone Amrinone Enoximone
inotrop_vec <-
c("Epinephrine", "Adrenalin", "Norepinephrine", "Levophed", "Levarterenol",
"Dopamine", "Dobutamine"
)
inotrop_or <- str_c(inotrop_vec, collapse = "|")
inotrop_or
## [1] "Epinephrine|Adrenalin|Norepinephrine|Levophed|Levarterenol|Dopamine|Dobutamine"
inotrop_names <-
df_drugs |>
filter(str_detect(DRUG, regex(inotrop_or, ignore_case = T)) |
str_detect(DRUG_NAME_POE, regex(inotrop_or, ignore_case = T)) |
str_detect(DRUG_NAME_GENERIC, regex(inotrop_or, ignore_case = T))) |>
select(DRUG) |>
filter(!str_detect(DRUG, regex("bupivacaine|lidocaine", ignore_case = T))) |>
filter(!DRUG %in% c("1", "Epinephrine Topical Soln", "Epinephrine Inhalation",
"Racepinephrine", "racemic epinephrine")) |>
distinct() |> pull()
inotrop_names
## [1] "Norepinephrine" "Epinephrine"
## [3] "Epinephrine 1:1000" "DopAmine"
## [5] "Dopamine HCl" "DOPamine"
## [7] "Norepinephrine Bitartrate" "DOBUTamine"
## [9] "Dobutamine" "Dobutamine HCl"
## [11] "NORepinephrine" "Dopamine Hcl"
## [13] "Epinephrine HCl" "Epinephrine-Sodium Chloride"
## [15] "EPINEPHrine" "Epinephrin"
## [17] "Epin" "Dopamine"
## [19] "EpiPen" "Dobutamine Hcl"
## [21] "Epinephri" "EPINEPHRINE"
## [23] "epinephrine" "Epine"
## [25] "Epineph" "epi"
## [27] "EPINEPHrine Auto Injector" "Epi"
## [29] "Epinephrine Auto Injector" "Epinephrine Base"
## [31] "Epinephrine Kit"
#使用薬剤歴全リストから、薬を使ったレコードのみに限定し、その患者IDを得る
inotrop_user <-
db_presc_drug |>
filter(DRUG %in% inotrop_names) |>
select(SUBJECT_ID) |>
pull()
db_patients_inotrop <-
db_PEO |>
mutate(inotrop = ifelse(SUBJECT_ID %in% inotrop_user, 1, 0))
#データ確認:
f_table(cols_cont = c(),
cols_fact = c("inotrop"),
db = db_patients_inotrop)
## Stratified by h2ras
## 0 1 SMD Missing
## n 5949 4441
## inotrop = 1 1780 (29.9) 2266 (51.0) 0.440 0.0
#for merge
db_inotrop <- db_patients_inotrop |> select(HADM_ID, inotrop)
#- adrenaline receptor antagonists Clonidine (Catapres) Phentolamine phenoxybenzamine Tamsulosin
Atenolol Propranolol Nebivilol Atenolol Oxprenolol Metoprolol Timolol Pindolol Nadolol Pindolol Esmolol Acebutolol Sotalol Talinolol Betaxolol Labetalol Carvedilol
anti_adrenal_vec <-
c("Clonidine", "Catapres", "Phentolamine", "phenoxybenzamine",
"Tamsulosin",
"lol"
)
anti_adrenal_or <- str_c(anti_adrenal_vec, collapse = "|")
anti_adrenal_or
## [1] "Clonidine|Catapres|Phentolamine|phenoxybenzamine|Tamsulosin|lol"
anti_adrenal_names <-
df_drugs |>
filter(str_detect(DRUG, regex(anti_adrenal_or, ignore_case = T)) |
str_detect(DRUG_NAME_POE, regex(anti_adrenal_or, ignore_case = T)) |
str_detect(DRUG_NAME_GENERIC, regex(anti_adrenal_or, ignore_case = T))) |>
select(DRUG) |>
filter(!str_detect(DRUG, regex("bupivacaine|lidocaine", ignore_case = T))) |>
filter(!DRUG %in% c("Fentanyl Citrate", "Heparin")) |>
distinct() |> pull()
anti_adrenal_names
## [1] "Labetalol HCl" "Metoprolol"
## [3] "Clonidine HCl" "Atenolol"
## [5] "Metoprolol XL" "Esmolol"
## [7] "Tamsulosin HCl" "Metoprolol Tartrate"
## [9] "Dorzolamide 2%/Timolol 0.5% Ophth." "Carvedilol"
## [11] "Timolol Maleate 0.25%" "Tamsulosin"
## [13] "Timolol Maleate 0.5%" "Nadolol"
## [15] "Metoprolol Succinate XL" "Labetalol"
## [17] "Clonidine Patch 0.3 mg/24 hr" "Clonidine Patch 0.1 mg/24 hr"
## [19] "CloniDINE" "Clonidine TTS 3 Patch"
## [21] "Clonidine TTS 1 Patch" "Clonidine Patch 0.2 mg/24 hr"
## [23] "Propranolol" "Propranolol HCl"
## [25] "Sotalol HCl" "Metoprolol XL (Toprol XL)"
## [27] "Sterile Diluent for Flolan" "Clonidine TTS 2 Patch"
## [29] "METOPROLOL" "Sotalol"
## [31] "Betaxolol Hcl 0.25%" "Phentolamine Mesylate"
## [33] "Bisoprolol Fumarate" "Acebutolol HCl"
## [35] "Levobunolol Hcl 0.25%" "Levobunolol"
## [37] "Propranolol LA" "Clonidine TTS"
## [39] "Betoptic S" "Levobunolol 0.25%"
## [41] "Levobunolol 0.5%" "Levobunolol Hcl 0.5%"
## [43] "LABETALOL" "ESMOLOL"
## [45] "Clonidine T" "Proprano"
## [47] "Toprol XL" "Betimol (timolol hemihydrate)"
## [49] "Apraclonidine 0.5%" "Carteolol 1% Ophth Soln"
## [51] "Betaxolol Ophth Susp 0.25%" "Zebeta"
## [53] "Betimol ( Timolol )" "Cosopt"
## [55] "Betaxolol HCl 0.25%" "Betaxolol HCl"
## [57] "Atenolol-Chlorthalidone" "Pindolol"
## [59] "NEO*PO*Propranolol" "Stanozolol (Bulk)"
## [61] "Betaxolol" "*NF* TIMOLOL HEMIHYDRATE (BETIMOL)"
## [63] "timolol" "Esmolol in Saline (Iso-osm)"
## [65] "Propranolol Oral Solution" "Clonidine"
## [67] "Timolol" "Levobunolol HCl 0.5%"
## [69] "Timolol Maleate 0.5% XE" "Metipranolol"
## [71] "TIMOP" "Phenoxybenzamine HCl"
## [73] "Catapres" "flomax"
## [75] "Labet" "Metoprolo"
## [77] "Timolol Maleate 0.5% GFS" "Propra"
## [79] "Acebutolol" "Prop"
## [81] "bisoprolol fumarate" "metipranolol"
## [83] "sotalol" "Istalol"
## [85] "Clonidine Patch" "Sotalol AF"
## [87] "timolo" "dorzolamide-timolol"
## [89] "bisopro" "bisopr"
## [91] "bisop" "bisoprolol"
#使用薬剤歴全リストから、薬を使ったレコードのみに限定し、その患者IDを得る
anti_adrenal_user <-
db_presc_drug |>
filter(DRUG %in% anti_adrenal_names) |>
select(SUBJECT_ID) |>
pull()
db_patients_anti_adrenal <-
db_PEO |>
mutate(anti_adrenal = ifelse(SUBJECT_ID %in% anti_adrenal_user, 1, 0))
#データ確認:
f_table(cols_cont = c(),
cols_fact = c("anti_adrenal"),
db = db_patients_anti_adrenal)
## Stratified by h2ras
## 0 1 SMD Missing
## n 5949 4441
## anti_adrenal = 1 4439 (74.6) 4098 (92.3) 0.489 0.0
#for merge
db_anti_adrenal <- db_patients_anti_adrenal |> select(HADM_ID, anti_adrenal)
#calcium antagonists Amlodipine (Norvasc) Aranidipine (Sapresta) Azelnidipine (Calblock) Barnidipine (HypoCa) Benidipine (Coniel) Cilnidipine (Atelec, Cinalong, Siscard) Clevidipine (Cleviprex) Efonidipine (Landel) Felodipine (Plendil) Isradipine (DynaCirc, Prescal) Lacidipine (Motens, Lacipil) Lercanidipine (Zanidip) Manidipine (Calslot, Madipine) Nicardipine (Cardene, Carden SR) Nifedipine (Procardia, Adalat) Nilvadipine (Nivadil) Nimodipine (Nimotop) Nisoldipine (Baymycard, Sular, Syscor) Nitrendipine (Cardif, Nitrepin, Baylotensin) Pranidipine (Acalas)
Fendiline Gallopamil Verapamil (Calan, Isoptin) Diltiazem (Cardizem)
ca_blocker_vec <-
c("dipine", "Fendiline", "Gallopamil", "Verapamil",
"Diltiazem"
)
ca_blocker_or <- str_c(ca_blocker_vec, collapse = "|")
ca_blocker_or
## [1] "dipine|Fendiline|Gallopamil|Verapamil|Diltiazem"
ca_blocker_names <-
df_drugs |>
filter(str_detect(DRUG, regex(ca_blocker_or, ignore_case = T)) |
str_detect(DRUG_NAME_POE, regex(ca_blocker_or, ignore_case = T)) |
str_detect(DRUG_NAME_GENERIC, regex(ca_blocker_or, ignore_case = T))) |>
select(DRUG) |>
distinct() |> pull()
ca_blocker_names
## [1] "*NF* Nicardipine HCl IV" "Amlodipine"
## [3] "NiCARdipine IV" "Diltiazem"
## [5] "Diltiazem Extended-Release" "Nifedipine CR"
## [7] "NiCARdipine" "NIFEdipine CR"
## [9] "Nicardipine" "Nifedipine"
## [11] "NIFEdipine" "Verapamil HCl"
## [13] "Verapamil SR" "Felodipine"
## [15] "Amlodipine Besylate" "Nimodipine"
## [17] "Nicardipine HCl IV" "Lotrel"
## [19] "Verapamil" "diltiazem"
## [21] "Nicardipine HCl" "Procainamide HCl"
## [23] "Procardia XL" "Dilti"
## [25] "*NF* Nifedipine XL" "nifed"
## [27] "diltia" "diltiaz"
## [29] "nif" "DILT"
## [31] "PROCARDIA XL" "nifedipine"
## [33] "*NF* Diltiazem SR" "Nisoldipine (Sular)"
## [35] "Nifedi" "Nifedipine (Bulk)"
## [37] "dilt" "Tiazac"
## [39] "Isradipine" "Diltiaz"
## [41] "Verapa" "Diltia"
## [43] "Nifed" "Calan SR"
## [45] "amlodipine-benazepril" "Vera"
#使用薬剤歴全リストから、薬を使ったレコードのみに限定し、その患者IDを得る
ca_blocker_user <-
db_presc_drug |>
filter(DRUG %in% ca_blocker_names) |>
select(SUBJECT_ID) |>
pull()
db_patients_ca_blocker <-
db_PEO |>
mutate(ca_blocker = ifelse(SUBJECT_ID %in% ca_blocker_user, 1, 0))
#データ確認:
f_table(cols_cont = c(),
cols_fact = c("ca_blocker"),
db = db_patients_ca_blocker)
## Stratified by h2ras
## 0 1 SMD Missing
## n 5949 4441
## ca_blocker = 1 1747 (29.4) 1714 (38.6) 0.196 0.0
#for merge
db_ca_blocker <- db_patients_ca_blocker |> select(HADM_ID, ca_blocker)
#anticoagultants Fondaparinux Idraparinux Idrabiotaparinux dabigatran, rivaroxaban, apixaban, edoxaban, betrixaban argatroban hirudin, lepirudin, bivalirudin
anticoag_vec <-
c("Coumadin", "Warfarin", "Fondaparinux", "Idraparinux", "Idrabiotaparinux",
"heparin",
"dabigatran", "rivaroxaban", "apixaban", "edoxaban", "betrixaban", "argatroban",
"hirudin", "lepirudin", "bivalirudin"
)
anticoag_or <- str_c(anticoag_vec, collapse = "|")
anticoag_or
## [1] "Coumadin|Warfarin|Fondaparinux|Idraparinux|Idrabiotaparinux|heparin|dabigatran|rivaroxaban|apixaban|edoxaban|betrixaban|argatroban|hirudin|lepirudin|bivalirudin"
anticoag_names <-
df_drugs |>
filter(str_detect(DRUG, regex(anticoag_or, ignore_case = T)) |
str_detect(DRUG_NAME_POE, regex(anticoag_or, ignore_case = T)) |
str_detect(DRUG_NAME_GENERIC, regex(anticoag_or, ignore_case = T))) |>
select(DRUG) |>
distinct() |>
pull()
anticoag_names
## [1] "Warfarin"
## [2] "Heparin Sodium"
## [3] "Heparin"
## [4] "Heparin Flush CVL (100 units/ml)"
## [5] "Heparin Flush CRRT (5000 Units/mL)"
## [6] "Heparin Flush PICC (100 units/ml)"
## [7] "Argatroban"
## [8] "HEPARIN"
## [9] "Heparin Flush Hickman (100 units/ml)"
## [10] "Heparin Flush"
## [11] "Heparin (Preservative Free)"
## [12] "Heparin Flush (10 units/ml)"
## [13] "Heparin Sodium (Preservative Free)"
## [14] "Heparin Flush (1000 units/mL)"
## [15] "Heparin Flush (5000 Units/mL)"
## [16] "Heparin Flush (100 units/ml)"
## [17] "Lepirudin"
## [18] "Heparin Flush Port (10units/ml)"
## [19] "Heparin Flush Midline (100 units/ml)"
## [20] "Heparin (CRRT Machine Priming)"
## [21] "Dabigatran Etexilate"
## [22] "Heparin (Preserv. Free)"
## [23] "Heparin CRRT"
## [24] "Fondaparinux Sodium"
## [25] "heparin"
## [26] "Heparin Flush Port (10 units/mL)"
## [27] "Heparin Lock Flush"
## [28] "Heparin (IABP)"
## [29] "Bivalirudin"
## [30] "Heparin Dwell (1000 Units/mL)"
## [31] "Heparin (Hemodialysis)"
## [32] "Heparin (Porcine)"
## [33] "Hepa"
## [34] "Hepar"
## [35] "H"
## [36] "Fondaparinux"
## [37] "Heparin lock"
## [38] "Lansoprazole"
## [39] "Hepari"
## [40] "HEParin (Porcine) in NS (PF)"
## [41] "Heparin Flu"
## [42] "hepar"
## [43] "Heparin Flush (1000 units/ml)"
## [44] "hep"
## [45] "*NF* Warfarin"
## [46] "Hep"
## [47] "Warf"
## [48] "Heparin Flus"
## [49] "HEPARIN FLUSH"
## [50] "*NF* Argatroban"
## [51] "Rivaroxaban"
## [52] "Heparin flush"
## [53] "Heparin Flush Port"
## [54] "Heparin Flush (10 units/mL)"
## [55] "Heparin Flush (100 units/mL)"
## [56] "Coumadin"
## [57] "dabigatran etexilate"
## [58] "Heparin Flush (10 Units/mL)"
## [59] "warfarin"
## [60] "Pradaxa"
#使用薬剤歴全リストから、薬を使ったレコードのみに限定し、その患者IDを得る
anticoag_user <-
db_presc_drug |>
filter(DRUG %in% anticoag_names) |>
select(SUBJECT_ID) |>
pull()
db_patients_anticoag <-
db_PEO |>
mutate(anticoag = ifelse(SUBJECT_ID %in% anticoag_user, 1, 0))
#データ確認:
db_patients_anticoag |>
f_table(cols_cont = c(),
cols_fact = c("anticoag"))
## Stratified by h2ras
## 0 1 SMD Missing
## n 5949 4441
## anticoag = 1 4793 (80.6) 4091 (92.1) 0.341 0.0
#for merge
db_anticoag <- db_patients_anticoag |> select(HADM_ID, anticoag)
#antiplatelets Aspirin Triflusal (Disgren) Cangrelor (Kengreal) Clopidogrel (Plavix) Prasugrel (Effient) Ticagrelor (Brilinta) Ticlopidine (Ticlid) Cilostazol (Pletal) Vorapaxar (Zontivity) Abciximab (ReoPro) Eptifibatide (Integrilin) Tirofiban (Aggrastat) Dipyridamole (Persantine)
antiplate_vec <-
c("Aspirin", "Cangrelor", "Clopidogrel", "Plavix", "Prasugrel",
"Ticagrelor", "Brilinta", "Ticlopidine", "Cilostazol", "Pletal",
"Abciximab", "ReoPro", "Eptifibatide", "Tirofiban", "Aggrastat",
"Dipyridamole", "Persantine"
)
antiplate_or <- str_c(antiplate_vec, collapse = "|")
antiplate_or
## [1] "Aspirin|Cangrelor|Clopidogrel|Plavix|Prasugrel|Ticagrelor|Brilinta|Ticlopidine|Cilostazol|Pletal|Abciximab|ReoPro|Eptifibatide|Tirofiban|Aggrastat|Dipyridamole|Persantine"
antiplate_names <-
df_drugs |>
filter(str_detect(DRUG, regex(antiplate_or, ignore_case = T)) |
str_detect(DRUG_NAME_POE, regex(antiplate_or, ignore_case = T)) |
str_detect(DRUG_NAME_GENERIC, regex(antiplate_or, ignore_case = T))) |>
select(DRUG) |>
distinct() |>
pull()
antiplate_names
## [1] "Aspirin EC"
## [2] "Aspirin"
## [3] "Tirofiban"
## [4] "Clopidogrel Bisulfate"
## [5] "Eptifibatide"
## [6] "Clopidogrel"
## [7] "Prasugrel"
## [8] "Aspirin (Buffered)"
## [9] "Butalbital-Aspirin-Caffeine"
## [10] "Dipyridamole-Aspirin"
## [11] "Abciximab"
## [12] "Ticlopidine HCl"
## [13] "Aspir"
## [14] "Aspiri"
## [15] "Aspirin Desensitization (Angioedema)"
## [16] "asa"
## [17] "Pletal"
## [18] "Aspirin-Caffeine-Butalbital"
## [19] "Dipyridamole"
## [20] "cilostazol"
## [21] "Cilostazol"
## [22] "*nf"
## [23] "cilo"
## [24] "ASPIR"
## [25] "cilos"
## [26] "PleTAL"
## [27] "aspirin"
## [28] "ASPIRIN"
## [29] "Aspi"
## [30] "aspi"
## [31] "Aspirin (Rectal)"
## [32] "Clopidogrel Desensitization"
## [33] "ASA"
## [34] "cil"
## [35] "Dopamine"
## [36] "Aspirin Childrens"
## [37] "aspir"
## [38] "as"
## [39] "Aspirin 81 mg /placebo"
## [40] "asp"
## [41] "Aspirin 325 mg or placebo"
## [42] "Aggrastat"
## [43] "ASA81"
## [44] "Excedrin Aspirin Free"
## [45] "Excedrin Migraine"
## [46] "ticagrelor"
## [47] "CILOSTAZOL"
## [48] "cilostaz"
## [49] "Cilostaz"
## [50] "Aspirin Desensitization (AERD)"
## [51] "Asp"
## [52] "Aspirin 325mg/ placebo"
## [53] "Aspirin 81 mg/ Placebo"
## [54] "Aspirin 325 mg or Placebo"
## [55] "Aspirin 81 mg or Placebo"
## [56] "Aspirin 81mg or placbo"
## [57] "Cangrelor Study Drug (*IND*)"
## [58] "Clopidogrel 150 mg or placebo"
## [59] "Aspirin 81 mg or placebo"
## [60] "Reopro"
## [61] "Butalbital-Acetaminophen-Caff"
## [62] "Aspirin Desensitization"
## [63] "Aspirin Desens"
## [64] "Aspirin 81 mg or placebo"
## [65] "Aspirin 325mg or placebo"
## [66] "Aspirin 81 mg /Placebo"
## [67] "Brilinta"
#使用薬剤歴全リストから、薬を使ったレコードのみに限定し、その患者IDを得る
antiplate_user <-
db_presc_drug |>
filter(DRUG %in% antiplate_names) |>
select(SUBJECT_ID) |>
pull()
db_patients_antiplate <-
db_PEO |>
mutate(antiplate = ifelse(SUBJECT_ID %in% antiplate_user, 1, 0))
#データ確認:
db_patients_antiplate |>
f_table(cols_cont = c(),
cols_fact = c("antiplate"))
## Stratified by h2ras
## 0 1 SMD Missing
## n 5949 4441
## antiplate = 1 3679 (61.8) 3662 (82.5) 0.473 0.0
#for merge
db_antiplate <- db_patients_antiplate |> select(HADM_ID, antiplate)
#PPIs Omeprazole Lansoprazole Dexlansoprazole Esomeprazole Pantoprazole Rabeprazole Ilaprazole
ppi_vec <-
c("Omeprazole", "Prilosec", "Nexium", "Lansoprazole", "Prevacid",
"Dexlansoprazole", "Pantoprazole", "Rabeprazole", "Ilaprazole"
)
ppi_or <- str_c(ppi_vec, collapse = "|")
ppi_or
## [1] "Omeprazole|Prilosec|Nexium|Lansoprazole|Prevacid|Dexlansoprazole|Pantoprazole|Rabeprazole|Ilaprazole"
ppi_names <-
df_drugs |>
filter(str_detect(DRUG, regex(ppi_or, ignore_case = T)) |
str_detect(DRUG_NAME_POE, regex(ppi_or, ignore_case = T)) |
str_detect(DRUG_NAME_GENERIC, regex(ppi_or, ignore_case = T))) |>
select(DRUG) |>
filter(!str_detect(DRUG, regex("Heparin", ignore_case = T))) |>
distinct() |>
pull()
ppi_names
## [1] "Pantoprazole Sodium"
## [2] "Pantoprazole"
## [3] "Lansoprazole Oral Suspension"
## [4] "Lansoprazole"
## [5] "Omeprazole"
## [6] "Lansoprazole Oral Disintegrating Tab"
## [7] "Lansoprazole Oral Solution"
## [8] "Nexium"
## [9] "Neo*PO*Omeprazole"
## [10] "Prilosec"
## [11] "Prevacid"
## [12] "prevacid"
## [13] "PRILOSEC"
## [14] "esomeprazole"
## [15] "*NF* Esomeprazole"
## [16] "*NF*ESOMEPRAZOLE"
## [17] "nexium"
## [18] "omeprazole"
## [19] "rabeprazole"
## [20] "LANSOPR"
## [21] "lans"
## [22] "Lansop"
## [23] "LANS"
## [24] "lansoprazole"
## [25] "Esomeprazole Magnesium"
## [26] "lansopr"
## [27] "NexIUM"
## [28] "prevac"
## [29] "LANSOP"
## [30] "lanso"
## [31] "panto"
## [32] "Pantoprazole (Self Med)"
## [33] "Omeprazole-Sodium Bicarbonate"
## [34] "Prilosec OTC"
## [35] "esomeprazole magnesium"
## [36] "PrevACID 24Hr"
## [37] "Nexium 40mg capsules"
## [38] "dexlansoprazole (Dexilant)"
#使用薬剤歴全リストから、薬を使ったレコードのみに限定し、その患者IDを得る
ppi_user <-
db_presc_drug |>
filter(DRUG %in% ppi_names) |>
select(SUBJECT_ID) |>
pull()
db_patients_ppi <-
db_PEO |>
mutate(ppi = ifelse(SUBJECT_ID %in% ppi_user, 1, 0))
#データ確認:
db_patients_ppi |>
f_table(cols_cont = c(),
cols_fact = c("ppi"))
## Stratified by h2ras
## 0 1 SMD Missing
## n 5949 4441
## ppi = 1 4330 (72.8) 2987 (67.3) 0.121 0.0
#for merge
db_ppi <- db_patients_ppi |> select(HADM_ID, ppi)
db_PEO_total <-
db_PEO |>
left_join(db_HtWt, by = "HADM_ID") |>
left_join(db_vital, by = "HADM_ID") |>
left_join(db_urine, by = "HADM_ID") |>
left_join(db_sofa, by = "HADM_ID") |>
left_join(db_rrt, by = "HADM_ID") |>
left_join(db_venti, by = "HADM_ID") |>
left_join(db_ethnicity, by = "HADM_ID") |>
left_join(db_comorb, by = "HADM_ID") |>
compute()
db_PEO_total <-
db_PEO_total |>
left_join(db_labo, by = "HADM_ID") |>
left_join(db_LVEF, by = "HADM_ID") |>
left_join(db_RAA, by = "HADM_ID") |>
left_join(db_diuritic, by = "HADM_ID") |>
left_join(db_inotrop, by = "HADM_ID") |>
left_join(db_anti_adrenal, by = "HADM_ID") |>
left_join(db_ca_blocker, by = "HADM_ID") |>
left_join(db_anticoag, by = "HADM_ID") |>
left_join(db_antiplate, by = "HADM_ID") |>
left_join(db_ppi, by = "HADM_ID") |>
compute()
#最終固定データとしてcsv形式で出力しておく (結局最終データは3.3MBしかない)
df_PEO_total <- db_PEO_total |> collect()
df_PEO_total |> write_csv("MIMIC_HDS.csv")
# file.size("MIMIC_HDS.csv")
#csvファイル読み込み
df_PEO_total <- read_csv("MIMIC_HDS.csv")
## Rows: 10390 Columns: 67
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (7): ADMISSION_TYPE, INSURANCE, ETHNICITY, DIAGNOSIS, GENDER, ethnicity...
## dbl (60): SUBJECT_ID, HADM_ID, ICUSTAY_ID, h2ras, death_elasp, mortality30, ...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df_PEO_total
## # A tibble: 10,390 × 67
## SUBJECT_ID HADM_ID ICUSTAY_ID h2ras death_elasp mortality30 age
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 3 145834 211552 0 236. 0 76.5
## 2 9 150750 220597 0 4.45 1 41.8
## 3 21 109451 217847 0 149. 0 87.4
## 4 26 197661 244882 0 659. 0 72.0
## 5 30 104557 225176 0 NA 0 95
## 6 34 115799 263086 0 2021. 0 95
## 7 37 188670 213503 0 503. 0 68.9
## 8 38 185910 248910 1 NA 0 75.9
## 9 42 119203 210828 0 1999. 0 61.2
## 10 49 190539 249195 1 NA 0 80.9
## # ℹ 10,380 more rows
## # ℹ 60 more variables: ADMISSION_TYPE <chr>, INSURANCE <chr>, ETHNICITY <chr>,
## # DIAGNOSIS <chr>, HOSPITAL_EXPIRE_FLAG <dbl>, GENDER <chr>,
## # EXPIRE_FLAG <dbl>, birthday <dbl>, admitday <dbl>, dischday <dbl>,
## # deathday <dbl>, hosp_deathday <dbl>, weight <dbl>, Height <dbl>, BMI <dbl>,
## # HeartRate <dbl>, SysBP <dbl>, DiasBP <dbl>, MeanBP <dbl>, RespRate <dbl>,
## # TempC <dbl>, SpO2 <dbl>, urine_out <dbl>, SOFA <dbl>, apsiii <dbl>, …
df_PEO_total <-
read_csv("MIMIC_HDS.csv") |>
mutate(GENDER = ifelse(GENDER == "M", 1, 0))
## Rows: 10390 Columns: 67
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (7): ADMISSION_TYPE, INSURANCE, ETHNICITY, DIAGNOSIS, GENDER, ethnicity...
## dbl (60): SUBJECT_ID, HADM_ID, ICUSTAY_ID, h2ras, death_elasp, mortality30, ...
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df_PEO_total
## # A tibble: 10,390 × 67
## SUBJECT_ID HADM_ID ICUSTAY_ID h2ras death_elasp mortality30 age
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 3 145834 211552 0 236. 0 76.5
## 2 9 150750 220597 0 4.45 1 41.8
## 3 21 109451 217847 0 149. 0 87.4
## 4 26 197661 244882 0 659. 0 72.0
## 5 30 104557 225176 0 NA 0 95
## 6 34 115799 263086 0 2021. 0 95
## 7 37 188670 213503 0 503. 0 68.9
## 8 38 185910 248910 1 NA 0 75.9
## 9 42 119203 210828 0 1999. 0 61.2
## 10 49 190539 249195 1 NA 0 80.9
## # ℹ 10,380 more rows
## # ℹ 60 more variables: ADMISSION_TYPE <chr>, INSURANCE <chr>, ETHNICITY <chr>,
## # DIAGNOSIS <chr>, HOSPITAL_EXPIRE_FLAG <dbl>, GENDER <dbl>,
## # EXPIRE_FLAG <dbl>, birthday <dbl>, admitday <dbl>, dischday <dbl>,
## # deathday <dbl>, hosp_deathday <dbl>, weight <dbl>, Height <dbl>, BMI <dbl>,
## # HeartRate <dbl>, SysBP <dbl>, DiasBP <dbl>, MeanBP <dbl>, RespRate <dbl>,
## # TempC <dbl>, SpO2 <dbl>, urine_out <dbl>, SOFA <dbl>, apsiii <dbl>, …
#unmatched_table
col_continuous <-
c("age", "Height", "weight", "BMI",
"HeartRate", "SysBP", "DiasBP","RespRate", "TempC", "SpO2",
"urine_out","SOFA", "apsiii",
"Calcium", "Cre", "Glucose", "Mg", "Sodium", "BUN", "Plate", "RBC", "WBC")
col_factors <-
c("ADMISSION_TYPE", "INSURANCE", "GENDER", "ethnicity_5",
"mortality30", "HOSPITAL_EXPIRE_FLAG",
"rrt", "ventilation",
"hypertension", "DM", "anemia", "Af", "coronary_atherosclerosis",
"venous_thrombus", "MI", "gastritis", "gastric_ulcer", "duodenal_ulcer",
"gastric_bleeding", "aki", "septic_shock", "pneumonia",
"LVEF_ctg",
"RAA", "Diuritic", "inotrop", "anti_adrenal", "ca_blocker",
"anticoag", "antiplate", "ppi")
df_PEO_total |>
select(c(col_continuous, col_factors, "h2ras"))|>
CreateTableOne(vars = c(col_continuous, col_factors), strata = "h2ras", factorVars = col_factors) -> tableone
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
## # Was:
## data %>% select(col_continuous)
##
## # Now:
## data %>% select(all_of(col_continuous))
##
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
## # Was:
## data %>% select(col_factors)
##
## # Now:
## data %>% select(all_of(col_factors))
##
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
print(tableone, smd = T, missing = T, test = F, explain = F) |>
as.data.frame() #|>
## Stratified by h2ras
## 0 1 SMD
## n 5949 4441
## age 74.35 (14.30) 71.26 (13.97) 0.219
## Height 167.67 (10.98) 168.20 (10.77) 0.048
## weight 80.15 (24.65) 81.99 (24.63) 0.075
## BMI 28.62 (8.03) 29.00 (8.11) 0.046
## HeartRate 88.36 (20.63) 88.12 (18.73) 0.012
## SysBP 123.44 (25.95) 122.21 (25.21) 0.048
## DiasBP 62.25 (17.36) 62.13 (16.40) 0.008
## RespRate 19.99 (6.23) 18.30 (6.31) 0.270
## TempC 36.54 (0.99) 36.49 (0.95) 0.056
## SpO2 96.42 (5.44) 97.16 (4.90) 0.143
## urine_out 1781.19 (1381.17) 1842.99 (1309.36) 0.046
## SOFA 4.59 (3.04) 5.04 (3.02) 0.147
## apsiii 48.66 (20.54) 46.51 (19.58) 0.107
## Calcium 8.50 (0.82) 8.54 (0.79) 0.053
## Cre 1.75 (1.62) 1.56 (1.44) 0.125
## Glucose 144.74 (77.12) 139.09 (65.81) 0.079
## Mg 2.04 (0.59) 2.08 (0.39) 0.090
## Sodium 138.57 (5.05) 138.54 (4.60) 0.005
## BUN 36.27 (25.29) 30.92 (21.24) 0.229
## Plate 237.54 (119.76) 228.76 (118.48) 0.074
## RBC 3.65 (0.70) 3.62 (0.67) 0.050
## WBC 11.72 (10.45) 11.68 (10.45) 0.004
## ADMISSION_TYPE 0.353
## ELECTIVE 411 ( 6.9) 793 (17.9)
## EMERGENCY 5270 (88.6) 3548 (79.9)
## URGENT 268 ( 4.5) 100 ( 2.3)
## INSURANCE 0.122
## Government 80 ( 1.3) 58 ( 1.3)
## Medicare 4831 (81.2) 3401 (76.6)
## Private 1012 (17.0) 968 (21.8)
## Self Pay 26 ( 0.4) 14 ( 0.3)
## GENDER = 1 3074 (51.7) 2446 (55.1) 0.068
## ethnicity_5 0.081
## asian 101 ( 1.7) 80 ( 1.8)
## black 499 ( 8.5) 362 ( 8.2)
## hispanic 123 ( 2.1) 125 ( 2.8)
## other 892 (15.2) 569 (12.9)
## white 4262 (72.5) 3279 (74.3)
## mortality30 = 1 1283 (21.6) 538 (12.1) 0.255
## HOSPITAL_EXPIRE_FLAG = 1 1075 (18.1) 481 (10.8) 0.207
## rrt = 1 155 ( 2.6) 138 ( 3.1) 0.030
## ventilation = 1 2403 (40.4) 2837 (63.9) 0.484
## hypertension = 1 2356 (39.6) 2027 (45.6) 0.122
## DM = 1 2132 (35.8) 1643 (37.0) 0.024
## anemia = 1 1996 (33.6) 1416 (31.9) 0.036
## Af = 1 2537 (42.6) 2034 (45.8) 0.064
## coronary_atherosclerosis = 1 2268 (38.1) 2047 (46.1) 0.162
## venous_thrombus = 1 112 ( 1.9) 76 ( 1.7) 0.013
## MI = 1 1002 (16.8) 716 (16.1) 0.019
## gastritis = 1 121 ( 2.0) 61 ( 1.4) 0.051
## gastric_ulcer = 1 66 ( 1.1) 20 ( 0.5) 0.075
## duodenal_ulcer = 1 71 ( 1.2) 25 ( 0.6) 0.068
## gastric_bleeding = 1 516 ( 8.7) 184 ( 4.1) 0.186
## aki = 1 2262 (38.0) 1410 (31.7) 0.132
## septic_shock = 1 415 ( 7.0) 260 ( 5.9) 0.046
## pneumonia = 1 1343 (22.6) 926 (20.9) 0.042
## LVEF_ctg 0.104
## mild 741 (14.8) 645 (16.7)
## moderate 922 (18.4) 689 (17.9)
## normal 2044 (40.9) 1680 (43.5)
## severe 1294 (25.9) 845 (21.9)
## RAA = 1 2880 (48.4) 2722 (61.3) 0.261
## Diuritic = 1 4311 (72.5) 4083 (91.9) 0.526
## inotrop = 1 1780 (29.9) 2266 (51.0) 0.440
## anti_adrenal = 1 4439 (74.6) 4098 (92.3) 0.489
## ca_blocker = 1 1747 (29.4) 1714 (38.6) 0.196
## anticoag = 1 4793 (80.6) 4091 (92.1) 0.341
## antiplate = 1 3679 (61.8) 3662 (82.5) 0.473
## ppi = 1 4330 (72.8) 2987 (67.3) 0.121
## Stratified by h2ras
## Missing
## n
## age 0.0
## Height 12.6
## weight 3.0
## BMI 13.9
## HeartRate 1.7
## SysBP 1.7
## DiasBP 1.7
## RespRate 1.7
## TempC 2.0
## SpO2 1.8
## urine_out 4.0
## SOFA 0.0
## apsiii 0.0
## Calcium 2.9
## Cre 1.0
## Glucose 1.0
## Mg 1.4
## Sodium 1.0
## BUN 1.0
## Plate 1.0
## RBC 1.0
## WBC 1.0
## ADMISSION_TYPE 0.0
## ELECTIVE
## EMERGENCY
## URGENT
## INSURANCE 0.0
## Government
## Medicare
## Private
## Self Pay
## GENDER = 1 0.0
## ethnicity_5 0.9
## asian
## black
## hispanic
## other
## white
## mortality30 = 1 0.0
## HOSPITAL_EXPIRE_FLAG = 1 0.0
## rrt = 1 0.0
## ventilation = 1 0.0
## hypertension = 1 0.0
## DM = 1 0.0
## anemia = 1 0.0
## Af = 1 0.0
## coronary_atherosclerosis = 1 0.0
## venous_thrombus = 1 0.0
## MI = 1 0.0
## gastritis = 1 0.0
## gastric_ulcer = 1 0.0
## duodenal_ulcer = 1 0.0
## gastric_bleeding = 1 0.0
## aki = 1 0.0
## septic_shock = 1 0.0
## pneumonia = 1 0.0
## LVEF_ctg 14.7
## mild
## moderate
## normal
## severe
## RAA = 1 0.0
## Diuritic = 1 0.0
## inotrop = 1 0.0
## anti_adrenal = 1 0.0
## ca_blocker = 1 0.0
## anticoag = 1 0.0
## antiplate = 1 0.0
## ppi = 1 0.0
## 0 1 SMD Missing
## n 5949 4441
## age 74.35 (14.30) 71.26 (13.97) 0.219 0.0
## Height 167.67 (10.98) 168.20 (10.77) 0.048 12.6
## weight 80.15 (24.65) 81.99 (24.63) 0.075 3.0
## BMI 28.62 (8.03) 29.00 (8.11) 0.046 13.9
## HeartRate 88.36 (20.63) 88.12 (18.73) 0.012 1.7
## SysBP 123.44 (25.95) 122.21 (25.21) 0.048 1.7
## DiasBP 62.25 (17.36) 62.13 (16.40) 0.008 1.7
## RespRate 19.99 (6.23) 18.30 (6.31) 0.270 1.7
## TempC 36.54 (0.99) 36.49 (0.95) 0.056 2.0
## SpO2 96.42 (5.44) 97.16 (4.90) 0.143 1.8
## urine_out 1781.19 (1381.17) 1842.99 (1309.36) 0.046 4.0
## SOFA 4.59 (3.04) 5.04 (3.02) 0.147 0.0
## apsiii 48.66 (20.54) 46.51 (19.58) 0.107 0.0
## Calcium 8.50 (0.82) 8.54 (0.79) 0.053 2.9
## Cre 1.75 (1.62) 1.56 (1.44) 0.125 1.0
## Glucose 144.74 (77.12) 139.09 (65.81) 0.079 1.0
## Mg 2.04 (0.59) 2.08 (0.39) 0.090 1.4
## Sodium 138.57 (5.05) 138.54 (4.60) 0.005 1.0
## BUN 36.27 (25.29) 30.92 (21.24) 0.229 1.0
## Plate 237.54 (119.76) 228.76 (118.48) 0.074 1.0
## RBC 3.65 (0.70) 3.62 (0.67) 0.050 1.0
## WBC 11.72 (10.45) 11.68 (10.45) 0.004 1.0
## ADMISSION_TYPE 0.353 0.0
## ELECTIVE 411 ( 6.9) 793 (17.9)
## EMERGENCY 5270 (88.6) 3548 (79.9)
## URGENT 268 ( 4.5) 100 ( 2.3)
## INSURANCE 0.122 0.0
## Government 80 ( 1.3) 58 ( 1.3)
## Medicare 4831 (81.2) 3401 (76.6)
## Private 1012 (17.0) 968 (21.8)
## Self Pay 26 ( 0.4) 14 ( 0.3)
## GENDER = 1 3074 (51.7) 2446 (55.1) 0.068 0.0
## ethnicity_5 0.081 0.9
## asian 101 ( 1.7) 80 ( 1.8)
## black 499 ( 8.5) 362 ( 8.2)
## hispanic 123 ( 2.1) 125 ( 2.8)
## other 892 (15.2) 569 (12.9)
## white 4262 (72.5) 3279 (74.3)
## mortality30 = 1 1283 (21.6) 538 (12.1) 0.255 0.0
## HOSPITAL_EXPIRE_FLAG = 1 1075 (18.1) 481 (10.8) 0.207 0.0
## rrt = 1 155 ( 2.6) 138 ( 3.1) 0.030 0.0
## ventilation = 1 2403 (40.4) 2837 (63.9) 0.484 0.0
## hypertension = 1 2356 (39.6) 2027 (45.6) 0.122 0.0
## DM = 1 2132 (35.8) 1643 (37.0) 0.024 0.0
## anemia = 1 1996 (33.6) 1416 (31.9) 0.036 0.0
## Af = 1 2537 (42.6) 2034 (45.8) 0.064 0.0
## coronary_atherosclerosis = 1 2268 (38.1) 2047 (46.1) 0.162 0.0
## venous_thrombus = 1 112 ( 1.9) 76 ( 1.7) 0.013 0.0
## MI = 1 1002 (16.8) 716 (16.1) 0.019 0.0
## gastritis = 1 121 ( 2.0) 61 ( 1.4) 0.051 0.0
## gastric_ulcer = 1 66 ( 1.1) 20 ( 0.5) 0.075 0.0
## duodenal_ulcer = 1 71 ( 1.2) 25 ( 0.6) 0.068 0.0
## gastric_bleeding = 1 516 ( 8.7) 184 ( 4.1) 0.186 0.0
## aki = 1 2262 (38.0) 1410 (31.7) 0.132 0.0
## septic_shock = 1 415 ( 7.0) 260 ( 5.9) 0.046 0.0
## pneumonia = 1 1343 (22.6) 926 (20.9) 0.042 0.0
## LVEF_ctg 0.104 14.7
## mild 741 (14.8) 645 (16.7)
## moderate 922 (18.4) 689 (17.9)
## normal 2044 (40.9) 1680 (43.5)
## severe 1294 (25.9) 845 (21.9)
## RAA = 1 2880 (48.4) 2722 (61.3) 0.261 0.0
## Diuritic = 1 4311 (72.5) 4083 (91.9) 0.526 0.0
## inotrop = 1 1780 (29.9) 2266 (51.0) 0.440 0.0
## anti_adrenal = 1 4439 (74.6) 4098 (92.3) 0.489 0.0
## ca_blocker = 1 1747 (29.4) 1714 (38.6) 0.196 0.0
## anticoag = 1 4793 (80.6) 4091 (92.1) 0.341 0.0
## antiplate = 1 3679 (61.8) 3662 (82.5) 0.473 0.0
## ppi = 1 4330 (72.8) 2987 (67.3) 0.121 0.0
write.csv("unmatched.csv")
## "","x"
## "1","unmatched.csv"
df_unmatched_table <- read.csv("/Users/abekohei/Desktop/大学院授業/mimic 演習/mimic演習/unmatched.csv",
col.names = c("name", "non_user", "user", "smd", "missing"))
df_unmatched_table
## name non_user user smd
## 1 n 5949 4441 NA
## 2 age 74.35 (14.30) 71.26 (13.97) 0.219
## 3 Height 167.93 (14.24) 168.53 (14.46) 0.042
## 4 weight 80.12 (24.69) 81.99 (24.63) 0.076
## 5 BMI 28.67 (9.76) 28.97 (8.16) 0.033
## 6 HeartRate 88.36 (20.63) 88.12 (18.73) 0.012
## 7 SysBP 123.44 (25.95) 122.21 (25.21) 0.048
## 8 DiasBP 62.25 (17.36) 62.13 (16.40) 0.008
## 9 RespRate 19.99 (6.23) 18.30 (6.31) 0.270
## 10 TempC 36.54 (0.99) 36.49 (0.95) 0.056
## 11 SpO2 96.42 (5.44) 97.16 (4.90) 0.143
## 12 urine_out 1781.19 (1381.17) 1842.99 (1309.36) 0.046
## 13 SOFA 4.59 (3.04) 5.04 (3.02) 0.147
## 14 apsiii 48.66 (20.54) 46.51 (19.58) 0.107
## 15 Calcium 8.50 (0.82) 8.54 (0.79) 0.053
## 16 Cre 1.75 (1.62) 1.56 (1.44) 0.125
## 17 Glucose 144.74 (77.12) 139.09 (65.81) 0.079
## 18 Mg 2.04 (0.59) 2.08 (0.39) 0.090
## 19 Sodium 138.57 (5.05) 138.54 (4.60) 0.005
## 20 BUN 36.27 (25.29) 30.92 (21.24) 0.229
## 21 Plate 237.54 (119.76) 228.76 (118.48) 0.074
## 22 RBC 3.65 (0.70) 3.62 (0.67) 0.050
## 23 WBC 11.72 (10.45) 11.68 (10.45) 0.004
## 24 ADMISSION_TYPE 0.353
## 25 ELECTIVE 411 ( 6.9) 793 (17.9) NA
## 26 EMERGENCY 5270 (88.6) 3548 (79.9) NA
## 27 URGENT 268 ( 4.5) 100 ( 2.3) NA
## 28 INSURANCE 0.122
## 29 Government 80 ( 1.3) 58 ( 1.3) NA
## 30 Medicare 4831 (81.2) 3401 (76.6) NA
## 31 Private 1012 (17.0) 968 (21.8) NA
## 32 Self Pay 26 ( 0.4) 14 ( 0.3) NA
## 33 GENDER = M 3074 (51.7) 2446 (55.1) 0.068
## 34 ethnicity_5 0.081
## 35 asian 101 ( 1.7) 80 ( 1.8) NA
## 36 black 499 ( 8.5) 362 ( 8.2) NA
## 37 hispanic 123 ( 2.1) 125 ( 2.8) NA
## 38 other 892 (15.2) 569 (12.9) NA
## 39 white 4262 (72.5) 3279 (74.3) NA
## 40 mortality30 = 1 1283 (21.6) 538 (12.1) 0.255
## 41 HOSPITAL_EXPIRE_FLAG = 1 1075 (18.1) 481 (10.8) 0.207
## 42 rrt = 1 155 ( 2.6) 138 ( 3.1) 0.030
## 43 ventilation = 1 2403 (40.4) 2837 (63.9) 0.484
## 44 hypertension = 1 2356 (39.6) 2027 (45.6) 0.122
## 45 DM = 1 2132 (35.8) 1643 (37.0) 0.024
## 46 anemia = 1 1996 (33.6) 1416 (31.9) 0.036
## 47 Af = 1 2537 (42.6) 2034 (45.8) 0.064
## 48 coronary_atherosclerosis = 1 2268 (38.1) 2047 (46.1) 0.162
## 49 venous_thrombus = 1 112 ( 1.9) 76 ( 1.7) 0.013
## 50 MI = 1 1002 (16.8) 716 (16.1) 0.019
## 51 gastritis = 1 121 ( 2.0) 61 ( 1.4) 0.051
## 52 gastric_ulcer = 1 66 ( 1.1) 20 ( 0.5) 0.075
## 53 duodenal_ulcer = 1 71 ( 1.2) 25 ( 0.6) 0.068
## 54 gastric_bleeding = 1 516 ( 8.7) 184 ( 4.1) 0.186
## 55 aki = 1 2262 (38.0) 1410 (31.7) 0.132
## 56 septic_shock = 1 415 ( 7.0) 260 ( 5.9) 0.046
## 57 pneumonia = 1 1343 (22.6) 926 (20.9) 0.042
## 58 LVEF_ctg 0.104
## 59 mild 741 (14.8) 645 (16.7) NA
## 60 moderate 922 (18.4) 689 (17.9) NA
## 61 normal 2044 (40.9) 1680 (43.5) NA
## 62 severe 1294 (25.9) 845 (21.9) NA
## 63 RAA = 1 2880 (48.4) 2722 (61.3) 0.261
## 64 Diuritic = 1 4311 (72.5) 4083 (91.9) 0.526
## 65 inotrop = 1 1780 (29.9) 2266 (51.0) 0.440
## 66 anti_adrenal = 1 4439 (74.6) 4098 (92.3) 0.489
## 67 ca_blocker = 1 1747 (29.4) 1714 (38.6) 0.196
## 68 anticoag = 1 4793 (80.6) 4091 (92.1) 0.341
## 69 antiplate = 1 3679 (61.8) 3662 (82.5) 0.473
## 70 ppi = 1 4330 (72.8) 2987 (67.3) 0.121
## missing
## 1 NA
## 2 0.0
## 3 12.5
## 4 3.0
## 5 13.8
## 6 1.7
## 7 1.7
## 8 1.7
## 9 1.7
## 10 2.0
## 11 1.8
## 12 4.0
## 13 0.0
## 14 0.0
## 15 2.9
## 16 1.0
## 17 1.0
## 18 1.4
## 19 1.0
## 20 1.0
## 21 1.0
## 22 1.0
## 23 1.0
## 24 0.0
## 25 NA
## 26 NA
## 27 NA
## 28 0.0
## 29 NA
## 30 NA
## 31 NA
## 32 NA
## 33 0.0
## 34 0.9
## 35 NA
## 36 NA
## 37 NA
## 38 NA
## 39 NA
## 40 0.0
## 41 0.0
## 42 0.0
## 43 0.0
## 44 0.0
## 45 0.0
## 46 0.0
## 47 0.0
## 48 0.0
## 49 0.0
## 50 0.0
## 51 0.0
## 52 0.0
## 53 0.0
## 54 0.0
## 55 0.0
## 56 0.0
## 57 0.0
## 58 14.7
## 59 NA
## 60 NA
## 61 NA
## 62 NA
## 63 0.0
## 64 0.0
## 65 0.0
## 66 0.0
## 67 0.0
## 68 0.0
## 69 0.0
## 70 0.0
df_PEO_total_complete <- df_PEO_total[complete.cases(df_PEO_total[c("BMI","RespRate", "TempC", "SpO2", "urine_out", "Calcium", "Cre", "Glucose", "Mg", "Sodium", "BUN", "Plate", "RBC", "WBC")]), ]
#"hosp_deathday","HeartRate", "SysBP", "DiasBP"
df_PEO_total_complete |> mutate_if(is.character, as.factor) |> skim()
| Name | mutate_if(df_PEO_total_co… |
| Number of rows | 8460 |
| Number of columns | 67 |
| _______________________ | |
| Column type frequency: | |
| factor | 6 |
| numeric | 61 |
| ________________________ | |
| Group variables | None |
Variable type: factor
| skim_variable | n_missing | complete_rate | ordered | n_unique | top_counts |
|---|---|---|---|---|---|
| ADMISSION_TYPE | 0 | 1.00 | FALSE | 3 | EME: 7186, ELE: 986, URG: 288 |
| INSURANCE | 0 | 1.00 | FALSE | 4 | Med: 6624, Pri: 1683, Gov: 119, Sel: 34 |
| ETHNICITY | 0 | 1.00 | FALSE | 35 | WHI: 6157, UNK: 819, BLA: 656, HIS: 163 |
| DIAGNOSIS | 1 | 1.00 | FALSE | 3533 | CON: 492, PNE: 335, CHE: 215, SEP: 210 |
| ethnicity_5 | 0 | 1.00 | FALSE | 5 | whi: 6217, oth: 1202, bla: 685, his: 204 |
| LVEF_ctg | 792 | 0.91 | FALSE | 4 | nor: 3267, sev: 1831, mod: 1364, mil: 1206 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| SUBJECT_ID | 0 | 1.00 | 35083.82 | 28643.26 | 3.00 | 12477.75 | 24870.50 | 57057.75 | 99995.00 | ▇▅▂▂▂ |
| HADM_ID | 0 | 1.00 | 150496.60 | 28972.72 | 100018.00 | 125420.00 | 150302.00 | 175735.75 | 199993.00 | ▇▇▇▇▇ |
| ICUSTAY_ID | 0 | 1.00 | 249799.85 | 28924.84 | 200014.00 | 224679.00 | 249885.50 | 275065.50 | 299987.00 | ▇▇▇▇▇ |
| h2ras | 0 | 1.00 | 0.45 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▆ |
| death_elasp | 3614 | 0.57 | 559.25 | 772.62 | -0.69 | 25.52 | 173.27 | 832.93 | 3907.26 | ▇▂▁▁▁ |
| mortality30 | 0 | 1.00 | 0.15 | 0.36 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
| age | 0 | 1.00 | 72.51 | 14.17 | 19.16 | 63.43 | 74.60 | 82.73 | 95.00 | ▁▁▅▇▇ |
| HOSPITAL_EXPIRE_FLAG | 0 | 1.00 | 0.14 | 0.34 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
| GENDER | 0 | 1.00 | 0.54 | 0.50 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▇▁▁▁▇ |
| EXPIRE_FLAG | 0 | 1.00 | 0.57 | 0.49 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▆▁▁▁▇ |
| birthday | 0 | 1.00 | 33198.08 | 26532.80 | -61909.00 | 28365.25 | 38708.00 | 48988.50 | 77986.00 | ▁▁▁▇▃ |
| admitday | 0 | 1.00 | 66285.03 | 10678.91 | 47663.50 | 57031.46 | 66332.59 | 75496.55 | 87074.11 | ▇▇▇▇▆ |
| dischday | 0 | 1.00 | 66297.48 | 10679.11 | 47669.75 | 57044.31 | 66343.61 | 75501.22 | 87084.60 | ▇▇▇▇▆ |
| deathday | 3614 | 0.57 | 66824.94 | 10702.84 | 47683.00 | 57690.00 | 66852.50 | 76066.50 | 88183.00 | ▇▇▇▇▅ |
| hosp_deathday | 7317 | 0.14 | 65789.52 | 10695.71 | 47683.44 | 56528.16 | 65601.01 | 74764.55 | 85821.46 | ▇▇▇▇▆ |
| weight | 0 | 1.00 | 81.50 | 24.78 | 27.30 | 65.20 | 77.80 | 93.00 | 295.00 | ▇▆▁▁▁ |
| Height | 0 | 1.00 | 167.83 | 10.91 | 86.36 | 160.02 | 167.64 | 175.26 | 212.29 | ▁▁▃▇▁ |
| BMI | 0 | 1.00 | 28.84 | 8.13 | 10.22 | 23.70 | 27.37 | 32.08 | 136.09 | ▇▂▁▁▁ |
| HeartRate | 0 | 1.00 | 88.50 | 19.82 | 1.00 | 75.00 | 87.00 | 100.00 | 191.00 | ▁▃▇▁▁ |
| SysBP | 0 | 1.00 | 122.54 | 25.23 | 11.00 | 105.00 | 120.00 | 138.00 | 281.00 | ▁▇▇▁▁ |
| DiasBP | 0 | 1.00 | 62.53 | 16.89 | 10.00 | 51.00 | 61.00 | 72.00 | 202.00 | ▂▇▁▁▁ |
| MeanBP | 0 | 1.00 | 80.62 | 17.89 | 0.83 | 68.33 | 79.00 | 91.00 | 223.00 | ▁▇▃▁▁ |
| RespRate | 0 | 1.00 | 19.21 | 6.37 | 1.00 | 14.00 | 18.00 | 23.00 | 65.00 | ▂▇▂▁▁ |
| TempC | 0 | 1.00 | 36.52 | 0.97 | 30.56 | 35.94 | 36.50 | 37.06 | 40.83 | ▁▁▇▅▁ |
| SpO2 | 0 | 1.00 | 96.75 | 5.10 | 9.00 | 95.00 | 98.00 | 100.00 | 100.00 | ▁▁▁▁▇ |
| urine_out | 0 | 1.00 | 1836.68 | 1378.79 | -1305.00 | 939.75 | 1590.00 | 2500.00 | 45680.00 | ▇▁▁▁▁ |
| SOFA | 0 | 1.00 | 4.89 | 3.04 | 0.00 | 3.00 | 4.00 | 7.00 | 21.00 | ▇▆▂▁▁ |
| apsiii | 0 | 1.00 | 48.30 | 19.30 | 1.00 | 35.00 | 45.00 | 58.00 | 174.00 | ▃▇▂▁▁ |
| rrt | 0 | 1.00 | 0.03 | 0.18 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
| ventilation | 0 | 1.00 | 0.54 | 0.50 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▇▁▁▁▇ |
| hypertension | 0 | 1.00 | 0.43 | 0.49 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▆ |
| DM | 0 | 1.00 | 0.36 | 0.48 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▅ |
| anemia | 0 | 1.00 | 0.33 | 0.47 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▅ |
| Af | 0 | 1.00 | 0.44 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▆ |
| coronary_atherosclerosis | 0 | 1.00 | 0.43 | 0.50 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▆ |
| venous_thrombus | 0 | 1.00 | 0.02 | 0.14 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
| MI | 0 | 1.00 | 0.17 | 0.38 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
| gastritis | 0 | 1.00 | 0.02 | 0.13 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
| gastric_ulcer | 0 | 1.00 | 0.01 | 0.09 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
| duodenal_ulcer | 0 | 1.00 | 0.01 | 0.09 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
| gastric_bleeding | 0 | 1.00 | 0.06 | 0.24 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
| aki | 0 | 1.00 | 0.37 | 0.48 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▅ |
| septic_shock | 0 | 1.00 | 0.07 | 0.25 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▁ |
| pneumonia | 0 | 1.00 | 0.23 | 0.42 | 0.00 | 0.00 | 0.00 | 0.00 | 1.00 | ▇▁▁▁▂ |
| Calcium | 0 | 1.00 | 8.51 | 0.80 | 0.00 | 8.00 | 8.50 | 9.00 | 15.60 | ▁▁▇▁▁ |
| Cre | 0 | 1.00 | 1.62 | 1.45 | 0.10 | 0.90 | 1.20 | 1.80 | 18.80 | ▇▁▁▁▁ |
| Glucose | 0 | 1.00 | 142.32 | 72.48 | 17.00 | 103.00 | 124.00 | 159.00 | 1140.00 | ▇▁▁▁▁ |
| Mg | 0 | 1.00 | 2.06 | 0.54 | 0.30 | 1.80 | 2.00 | 2.30 | 33.00 | ▇▁▁▁▁ |
| Sodium | 0 | 1.00 | 138.44 | 4.75 | 97.00 | 136.00 | 139.00 | 141.00 | 170.00 | ▁▁▇▃▁ |
| BUN | 0 | 1.00 | 33.55 | 23.42 | 2.00 | 18.00 | 26.00 | 42.00 | 272.00 | ▇▁▁▁▁ |
| Plate | 0 | 1.00 | 233.51 | 119.51 | 5.00 | 155.00 | 215.00 | 290.00 | 1256.00 | ▇▃▁▁▁ |
| RBC | 0 | 1.00 | 3.64 | 0.69 | 1.13 | 3.16 | 3.57 | 4.06 | 7.10 | ▁▇▇▁▁ |
| WBC | 0 | 1.00 | 11.60 | 8.94 | 0.10 | 7.50 | 10.20 | 13.90 | 486.80 | ▇▁▁▁▁ |
| RAA | 0 | 1.00 | 0.57 | 0.50 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▆▁▁▁▇ |
| Diuritic | 0 | 1.00 | 0.84 | 0.36 | 0.00 | 1.00 | 1.00 | 1.00 | 1.00 | ▂▁▁▁▇ |
| inotrop | 0 | 1.00 | 0.42 | 0.49 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▆ |
| anti_adrenal | 0 | 1.00 | 0.85 | 0.36 | 0.00 | 1.00 | 1.00 | 1.00 | 1.00 | ▂▁▁▁▇ |
| ca_blocker | 0 | 1.00 | 0.35 | 0.48 | 0.00 | 0.00 | 0.00 | 1.00 | 1.00 | ▇▁▁▁▅ |
| anticoag | 0 | 1.00 | 0.88 | 0.32 | 0.00 | 1.00 | 1.00 | 1.00 | 1.00 | ▁▁▁▁▇ |
| antiplate | 0 | 1.00 | 0.74 | 0.44 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▃▁▁▁▇ |
| ppi | 0 | 1.00 | 0.71 | 0.45 | 0.00 | 0.00 | 1.00 | 1.00 | 1.00 | ▃▁▁▁▇ |
df_PEO_total_complete |> count()
## # A tibble: 1 × 1
## n
## <int>
## 1 8460
df_PEO_total_complete |>head()
## # A tibble: 6 × 67
## SUBJECT_ID HADM_ID ICUSTAY_ID h2ras death_elasp mortality30 age
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 3 145834 211552 0 236. 0 76.5
## 2 9 150750 220597 0 4.45 1 41.8
## 3 21 109451 217847 0 149. 0 87.4
## 4 26 197661 244882 0 659. 0 72.0
## 5 30 104557 225176 0 NA 0 95
## 6 34 115799 263086 0 2021. 0 95
## # ℹ 60 more variables: ADMISSION_TYPE <chr>, INSURANCE <chr>, ETHNICITY <chr>,
## # DIAGNOSIS <chr>, HOSPITAL_EXPIRE_FLAG <dbl>, GENDER <dbl>,
## # EXPIRE_FLAG <dbl>, birthday <dbl>, admitday <dbl>, dischday <dbl>,
## # deathday <dbl>, hosp_deathday <dbl>, weight <dbl>, Height <dbl>, BMI <dbl>,
## # HeartRate <dbl>, SysBP <dbl>, DiasBP <dbl>, MeanBP <dbl>, RespRate <dbl>,
## # TempC <dbl>, SpO2 <dbl>, urine_out <dbl>, SOFA <dbl>, apsiii <dbl>,
## # rrt <dbl>, ventilation <dbl>, ethnicity_5 <chr>, hypertension <dbl>, …
col_continuous <-
c("age", "Height", "weight", "BMI",
"HeartRate", "SysBP", "DiasBP","RespRate", "TempC", "SpO2",
"urine_out","SOFA", "apsiii",
"Calcium", "Cre", "Glucose", "Mg", "Sodium", "BUN", "Plate", "RBC", "WBC")
col_factors <-
c("ADMISSION_TYPE", "INSURANCE", "GENDER", "ethnicity_5",
"mortality30", "HOSPITAL_EXPIRE_FLAG",
"rrt", "ventilation",
"hypertension", "DM", "anemia", "Af", "coronary_atherosclerosis",
"venous_thrombus", "MI", "gastritis", "gastric_ulcer", "duodenal_ulcer",
"gastric_bleeding", "aki", "septic_shock", "pneumonia",
"LVEF_ctg",
"RAA", "Diuritic", "inotrop", "anti_adrenal", "ca_blocker",
"anticoag", "antiplate", "ppi")
df_survival_complete <-
df_PEO_total_complete |>
mutate(futime30 = ifelse(mortality30 == 1, deathday-admitday, 30)) |>
# filter(futime30 < -1)
mutate(futime30 = ifelse(futime30 <=0, 1, futime30)) |>
select(c(col_continuous, col_factors, h2ras, futime30))
formula_1_complete = Surv(futime30, mortality30)~ h2ras
sfit_1 = surv_fit(formula_1_complete, data = df_survival_complete, conf.type = "log-log")
ggsurvplot(sfit_1,
conf.int=TRUE, pval=T, pval.method = T,
risk.table=TRUE,
break.time.by = 5,
break.y.by = 0.2,
palette = "Set1",
xlab = "Days",
ylab = "Cum survival",
legend.labs=c("Non-H2RA", "H2RA"),
legend.title = "",
risk.table.height=0.25,
main = "before matching"
)
#cox regression model with multiple imputation - model_1: unadjusted
# Cox回帰モデルの構築
model_1_complete <- with(data=df_survival_complete,
exp=coxph(Surv(futime30, mortality30) ~ h2ras))
# Cox回帰モデルのサマリー取得
summary_result <- summary(model_1_complete)
# ハザード比 (HR) と信頼区間の計算
hr_results <- summary_result$coefficients
hr_results_df <- as.data.frame(hr_results)
# 行名をカラムに変換
hr_results_df$term <- rownames(hr_results_df)
# 正しい列名を使用して mutate() を実行
hr_results_df <- hr_results_df %>%
mutate(HR = exp(coef),
lower = exp(coef - 1.96 * `se(coef)`),
upper = exp(coef + 1.96 * `se(coef)`)) %>%
select(term, HR, lower, upper, p.value = `Pr(>|z|)`) %>%
mutate_if(is.numeric, round, 3)
# CSVファイルに保存
write.csv(hr_results_df, "model_1_complete.csv")
# CSVファイルを読み込む
model_1_complete <- read.csv("model_1_complete.csv")
model_1_complete
## X term HR lower upper p.value
## 1 h2ras h2ras 0.545 0.486 0.612 0
#model_2: adjusted, partial formula
cols_cph_2_complete <-
c("h2ras",
"age", "BMI", "GENDER",
"ADMISSION_TYPE", "INSURANCE", "ethnicity_5"
)
formula_2_complete <-
cols_cph_2_complete |>
str_c(collapse = " + ") %>%
str_c("Surv(futime30, mortality30) ~ ", .) %>%
formula()
formula_2_complete
## Surv(futime30, mortality30) ~ h2ras + age + BMI + GENDER + ADMISSION_TYPE +
## INSURANCE + ethnicity_5
## <environment: 0x11a9f2a58>
model_2_complete <-
with(data=df_survival_complete,
exp=coxph(Surv(futime30, mortality30) ~ h2ras + age + BMI +
GENDER + ADMISSION_TYPE + INSURANCE + ethnicity_5))
summary_result <- summary(model_2_complete)
# ハザード比 (HR) と信頼区間の計算
hr_results <- summary_result$coefficients
hr_results_df <- as.data.frame(hr_results)
# 行名をカラムに変換
hr_results_df$term <- rownames(hr_results_df)
# 正しい列名を使用して mutate() を実行
hr_results_df <- hr_results_df %>%
mutate(HR = exp(coef),
lower = exp(coef - 1.96 * `se(coef)`),
upper = exp(coef + 1.96 * `se(coef)`)) %>%
select(term, HR, lower, upper, p.value = `Pr(>|z|)`) %>%
mutate_if(is.numeric, round, 3)
# CSVファイルに保存
write.csv(hr_results_df, "model_2_complete.csv")
# CSVファイルを読み込む
model_2_complete <- read.csv("model_2_complete.csv")
model_2_complete
## X term HR lower upper p.value
## 1 h2ras h2ras 0.631 0.561 0.709 0.000
## 2 age age 1.024 1.019 1.030 0.000
## 3 BMI BMI 0.986 0.978 0.995 0.001
## 4 GENDER GENDER 1.052 0.942 1.174 0.371
## 5 ADMISSION_TYPEEMERGENCY ADMISSION_TYPEEMERGENCY 2.854 2.158 3.774 0.000
## 6 ADMISSION_TYPEURGENT ADMISSION_TYPEURGENT 2.276 1.513 3.423 0.000
## 7 INSURANCEMedicare INSURANCEMedicare 1.192 0.633 2.245 0.586
## 8 INSURANCEPrivate INSURANCEPrivate 1.139 0.601 2.158 0.690
## 9 INSURANCESelf Pay INSURANCESelf Pay 2.201 0.837 5.791 0.110
## 10 ethnicity_5black ethnicity_5black 0.756 0.466 1.226 0.257
## 11 ethnicity_5hispanic ethnicity_5hispanic 0.781 0.418 1.460 0.439
## 12 ethnicity_5other ethnicity_5other 1.619 1.045 2.506 0.031
## 13 ethnicity_5white ethnicity_5white 1.062 0.695 1.624 0.780
#model_3: adjusted, full formula
cols_cph_3_complete <-
c("h2ras",
"age", "BMI", "GENDER",
"ADMISSION_TYPE", "INSURANCE", "ethnicity_5",
"resprate", "LVEF_ctg", "SOFA", "apsiii", "rrt", "ventilation",
"urine_out", "Glucose", "Calcium", "Cre", "Mg", "Sodium", "BUN",
"Plate", "RBC", "WBC",
"RAA", "Diuritic", "inotrop", "anti_adrenal", "ca_blocker",
"anticoag", "antiplate", "ppi",
"hypertension", "DM", "anemia", "Af", "coronary_atherosclerosis",
"venous_thrombus", "MI", "gastric_ulcer", "duodenal_ulcer",
"gastric_bleeding", "aki", "septic_shock", "pneumonia"
)
formula_3_complete <-
cols_cph_3_complete |>
str_c(collapse = " + ") %>%
str_c("Surv(futime30, mortality30) ~ ", .) %>%
formula()
formula_3_complete
## Surv(futime30, mortality30) ~ h2ras + age + BMI + GENDER + ADMISSION_TYPE +
## INSURANCE + ethnicity_5 + resprate + LVEF_ctg + SOFA + apsiii +
## rrt + ventilation + urine_out + Glucose + Calcium + Cre +
## Mg + Sodium + BUN + Plate + RBC + WBC + RAA + Diuritic +
## inotrop + anti_adrenal + ca_blocker + anticoag + antiplate +
## ppi + hypertension + DM + anemia + Af + coronary_atherosclerosis +
## venous_thrombus + MI + gastric_ulcer + duodenal_ulcer + gastric_bleeding +
## aki + septic_shock + pneumonia
## <environment: 0x17f708d68>
model_3_complete <-
with(data=df_survival_complete,
exp=coxph(Surv(futime30, mortality30) ~ h2ras + age + BMI + GENDER + ADMISSION_TYPE +
INSURANCE + ethnicity_5 + RespRate + LVEF_ctg + SOFA + apsiii +
rrt + ventilation + urine_out + Glucose + Calcium + Cre +
Mg + Sodium + BUN + Plate + RBC + WBC + RAA + Diuritic +
inotrop + anti_adrenal + ca_blocker + anticoag + antiplate +
ppi + hypertension + DM + anemia + Af + coronary_atherosclerosis +
venous_thrombus + MI + gastric_ulcer + duodenal_ulcer + gastric_bleeding +
aki + septic_shock + pneumonia))
# モデルのサマリー取得
summary_result <- summary(model_3_complete)
# ハザード比 (HR) と信頼区間の計算
hr_results <- summary_result$coefficients
hr_results_df <- as.data.frame(hr_results)
# 行名をカラムに変換
hr_results_df$term <- rownames(hr_results_df)
# 正しい列名を使用して mutate() を実行
hr_results_df <- hr_results_df %>%
mutate(HR = exp(coef),
lower = exp(coef - 1.96 * `se(coef)`),
upper = exp(coef + 1.96 * `se(coef)`)) %>%
select(term, HR, lower, upper, p.value = `Pr(>|z|)`) %>%
mutate_if(is.numeric, round, 3)
# CSVファイルに保存
write.csv(hr_results_df, "model_3_complete.csv")
# CSVファイルを読み込む
model_3_complete <- read.csv("model_3_complete.csv")
model_3_complete
## X term HR lower upper p.value
## 1 h2ras h2ras 0.663 0.578 0.761 0.000
## 2 age age 1.023 1.018 1.029 0.000
## 3 BMI BMI 0.982 0.973 0.991 0.000
## 4 GENDER GENDER 1.072 0.948 1.211 0.266
## 5 ADMISSION_TYPEEMERGENCY ADMISSION_TYPEEMERGENCY 2.042 1.515 2.753 0.000
## 6 ADMISSION_TYPEURGENT ADMISSION_TYPEURGENT 1.150 0.738 1.791 0.537
## 7 INSURANCEMedicare INSURANCEMedicare 1.061 0.542 2.075 0.864
## 8 INSURANCEPrivate INSURANCEPrivate 0.914 0.464 1.801 0.794
## 9 INSURANCESelf Pay INSURANCESelf Pay 1.896 0.695 5.173 0.212
## 10 ethnicity_5black ethnicity_5black 1.032 0.626 1.700 0.902
## 11 ethnicity_5hispanic ethnicity_5hispanic 1.034 0.539 1.984 0.920
## 12 ethnicity_5other ethnicity_5other 1.672 1.071 2.611 0.024
## 13 ethnicity_5white ethnicity_5white 1.232 0.802 1.892 0.341
## 14 RespRate RespRate 1.028 1.019 1.037 0.000
## 15 LVEF_ctgmoderate LVEF_ctgmoderate 1.053 0.848 1.307 0.641
## 16 LVEF_ctgnormal LVEF_ctgnormal 0.956 0.794 1.150 0.634
## 17 LVEF_ctgsevere LVEF_ctgsevere 1.198 0.985 1.458 0.071
## 18 SOFA SOFA 0.989 0.962 1.017 0.426
## 19 apsiii apsiii 1.019 1.015 1.023 0.000
## 20 rrt rrt 1.249 0.996 1.567 0.055
## 21 ventilation ventilation 1.486 1.287 1.716 0.000
## 22 urine_out urine_out 1.000 1.000 1.000 0.000
## 23 Glucose Glucose 1.001 1.001 1.002 0.000
## 24 Calcium Calcium 0.971 0.904 1.043 0.425
## 25 Cre Cre 0.878 0.825 0.935 0.000
## 26 Mg Mg 1.064 0.975 1.161 0.167
## 27 Sodium Sodium 0.986 0.975 0.998 0.017
## 28 BUN BUN 1.008 1.005 1.011 0.000
## 29 Plate Plate 1.000 0.999 1.000 0.050
## 30 RBC RBC 1.002 0.910 1.104 0.962
## 31 WBC WBC 1.008 1.006 1.011 0.000
## 32 RAA RAA 0.543 0.472 0.624 0.000
## 33 Diuritic Diuritic 1.069 0.870 1.314 0.523
## 34 inotrop inotrop 1.403 1.211 1.626 0.000
## 35 anti_adrenal anti_adrenal 0.502 0.425 0.594 0.000
## 36 ca_blocker ca_blocker 0.885 0.772 1.014 0.078
## 37 anticoag anticoag 1.226 0.962 1.564 0.100
## 38 antiplate antiplate 0.853 0.730 0.997 0.046
## 39 ppi ppi 1.013 0.858 1.197 0.876
## 40 hypertension hypertension 1.110 0.980 1.258 0.100
## 41 DM DM 0.951 0.832 1.087 0.460
## 42 anemia anemia 0.717 0.628 0.818 0.000
## 43 Af Af 1.083 0.957 1.225 0.208
## 44 coronary_atherosclerosis coronary_atherosclerosis 0.869 0.758 0.997 0.045
## 45 venous_thrombus venous_thrombus 0.743 0.501 1.103 0.141
## 46 MI MI 1.189 1.010 1.398 0.037
## 47 gastric_ulcer gastric_ulcer 0.814 0.392 1.690 0.581
## 48 duodenal_ulcer duodenal_ulcer 0.895 0.479 1.672 0.728
## 49 gastric_bleeding gastric_bleeding 0.950 0.750 1.205 0.674
## 50 aki aki 1.346 1.176 1.541 0.000
## 51 septic_shock septic_shock 1.170 0.980 1.397 0.082
## 52 pneumonia pneumonia 1.171 1.029 1.332 0.017
#model_1_complete から model_3_completeまでの結果。
bind_rows(model_1_complete |> filter(term == "h2ras"),
model_2_complete |> filter(term == "h2ras"),
model_3_complete |> filter(term == "h2ras")) |>
mutate(model = c("model_1_complete", "model_2_complete", "model_3_complete"), .before=HR) |>
select(!X) |>
write_csv("unadjusted_models_complete.csv")
(unadjusted_models <- read_csv("unadjusted_models_complete.csv"))
## Rows: 3 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): term, model
## dbl (4): HR, lower, upper, p.value
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## # A tibble: 3 × 6
## term model HR lower upper p.value
## <chr> <chr> <dbl> <dbl> <dbl> <dbl>
## 1 h2ras model_1_complete 0.545 0.486 0.612 0
## 2 h2ras model_2_complete 0.631 0.561 0.709 0
## 3 h2ras model_3_complete 0.663 0.578 0.761 0
df_survival_complete_PS<- na.omit(df_survival_complete)
df_survival_complete_PS |> count()
## # A tibble: 1 × 1
## n
## <int>
## 1 7668
#PS machiting
cols <-
c(
"age", "BMI",
"HeartRate","RespRate", "TempC", "SpO2",
"urine_out","SOFA", "apsiii",
"Calcium", "Cre", "Glucose", "Mg", "Sodium", "BUN", "Plate", "RBC", "WBC",
"ADMISSION_TYPE", "INSURANCE", "GENDER", "ethnicity_5",
"rrt", "ventilation",
"hypertension", "DM", "anemia", "Af", "coronary_atherosclerosis",
"venous_thrombus", "MI", "gastritis", "gastric_ulcer", "duodenal_ulcer",
"gastric_bleeding", "aki", "septic_shock", "pneumonia",
"LVEF_ctg",
"RAA", "Diuritic", "inotrop", "anti_adrenal", "ca_blocker",
"anticoag", "antiplate", "ppi")
ps_f <-
cols %>%
str_c(collapse = " + ") %>%
str_c("h2ras ~ ", .) %>%
formula()
ps_f
## h2ras ~ age + BMI + HeartRate + RespRate + TempC + SpO2 + urine_out +
## SOFA + apsiii + Calcium + Cre + Glucose + Mg + Sodium + BUN +
## Plate + RBC + WBC + ADMISSION_TYPE + INSURANCE + GENDER +
## ethnicity_5 + rrt + ventilation + hypertension + DM + anemia +
## Af + coronary_atherosclerosis + venous_thrombus + MI + gastritis +
## gastric_ulcer + duodenal_ulcer + gastric_bleeding + aki +
## septic_shock + pneumonia + LVEF_ctg + RAA + Diuritic + inotrop +
## anti_adrenal + ca_blocker + anticoag + antiplate + ppi
## <environment: 0x12d9b7108>
m_near_complete <-
matchit(
formula= ps_f,
data = df_survival_complete_PS,
method = "nearest",
distance = "glm",
caliper = 0.2,
ratio = 1,
replace = F)
matched_data_complete <- MatchIt::match.data(m_near_complete)
matched_data_complete <-
matched_data_complete |>
mutate(h2ras = as.numeric(as.character(h2ras)))
cols <-
c(
"age", "BMI",
"HeartRate","RespRate", "TempC", "SpO2",
"urine_out","SOFA", "apsiii",
"Calcium", "Cre", "Glucose", "Mg", "Sodium", "BUN", "Plate", "RBC", "WBC",
"ADMISSION_TYPE", "INSURANCE", "GENDER", "ethnicity_5",
"rrt", "ventilation",
"hypertension", "DM", "anemia", "Af", "coronary_atherosclerosis",
"venous_thrombus", "MI", "gastritis", "gastric_ulcer", "duodenal_ulcer",
"gastric_bleeding", "aki", "septic_shock", "pneumonia",
"LVEF_ctg",
"RAA", "Diuritic", "inotrop", "anti_adrenal", "ca_blocker",
"anticoag", "antiplate", "ppi")
col_factors <-
c("ADMISSION_TYPE", "INSURANCE", "GENDER", "ethnicity_5",
"mortality30", "HOSPITAL_EXPIRE_FLAG",
"rrt", "ventilation",
"hypertension", "DM", "anemia", "Af", "coronary_atherosclerosis",
"venous_thrombus", "MI", "gastritis", "gastric_ulcer", "duodenal_ulcer",
"gastric_bleeding", "aki", "septic_shock", "pneumonia",
"LVEF_ctg",
"RAA", "Diuritic", "inotrop", "anti_adrenal", "ca_blocker",
"anticoag", "antiplate", "ppi")
tableone<-
matched_data_complete |> select(c(col_continuous, col_factors, "h2ras"))|>
CreateTableOne(vars = c(col_continuous, col_factors), strata = "h2ras", factorVars = col_factors)
print(tableone, smd = T, missing = T, test = F, explain = F) |>
as.data.frame() |>
mutate(SMD = ifelse(SMD == "<0.001", 0.001, SMD)) |>
write.csv("matched_complete.csv")
## Stratified by h2ras
## 0 1 SMD
## n 2513 2513
## age 72.13 (14.47) 71.78 (14.12) 0.025
## Height 168.12 (11.16) 167.92 (10.90) 0.019
## weight 82.76 (26.22) 82.33 (25.33) 0.017
## BMI 29.16 (8.48) 29.11 (8.29) 0.005
## HeartRate 88.90 (20.88) 88.78 (19.79) 0.006
## SysBP 122.08 (25.10) 122.27 (25.19) 0.008
## DiasBP 63.30 (17.53) 62.91 (16.89) 0.023
## RespRate 19.72 (5.98) 19.39 (6.48) 0.053
## TempC 36.54 (0.99) 36.53 (0.97) 0.012
## SpO2 96.63 (4.77) 96.70 (5.07) 0.014
## urine_out 1816.68 (1259.76) 1813.71 (1268.90) 0.002
## SOFA 4.87 (3.10) 5.03 (3.14) 0.051
## apsiii 49.02 (19.12) 49.10 (19.77) 0.004
## Calcium 8.53 (0.81) 8.52 (0.81) 0.013
## Cre 1.63 (1.47) 1.61 (1.42) 0.013
## Glucose 142.82 (69.59) 141.97 (71.96) 0.012
## Mg 2.06 (0.78) 2.07 (0.40) 0.020
## Sodium 138.46 (4.75) 138.54 (4.72) 0.017
## BUN 33.53 (22.66) 33.02 (22.42) 0.023
## Plate 234.57 (118.81) 235.91 (122.03) 0.011
## RBC 3.66 (0.71) 3.65 (0.68) 0.019
## WBC 11.61 (12.28) 11.59 (6.63) 0.002
## ADMISSION_TYPE 0.060
## ELECTIVE 212 ( 8.4) 255 (10.1)
## EMERGENCY 2241 (89.2) 2194 (87.3)
## URGENT 60 ( 2.4) 64 ( 2.5)
## INSURANCE 0.011
## Government 38 ( 1.5) 37 ( 1.5)
## Medicare 1946 (77.4) 1954 (77.8)
## Private 519 (20.7) 511 (20.3)
## Self Pay 10 ( 0.4) 11 ( 0.4)
## GENDER = 1 1351 (53.8) 1373 (54.6) 0.018
## ethnicity_5 0.021
## asian 51 ( 2.0) 54 ( 2.1)
## black 208 ( 8.3) 217 ( 8.6)
## hispanic 73 ( 2.9) 71 ( 2.8)
## other 300 (11.9) 309 (12.3)
## white 1881 (74.9) 1862 (74.1)
## mortality30 = 1 438 (17.4) 323 (12.9) 0.128
## HOSPITAL_EXPIRE_FLAG = 1 384 (15.3) 297 (11.8) 0.101
## rrt = 1 90 ( 3.6) 100 ( 4.0) 0.021
## ventilation = 1 1308 (52.0) 1443 (57.4) 0.108
## hypertension = 1 1080 (43.0) 1089 (43.3) 0.007
## DM = 1 917 (36.5) 917 (36.5) <0.001
## anemia = 1 872 (34.7) 840 (33.4) 0.027
## Af = 1 1115 (44.4) 1138 (45.3) 0.018
## coronary_atherosclerosis = 1 1048 (41.7) 1085 (43.2) 0.030
## venous_thrombus = 1 52 ( 2.1) 54 ( 2.1) 0.006
## MI = 1 426 (17.0) 432 (17.2) 0.006
## gastritis = 1 40 ( 1.6) 44 ( 1.8) 0.012
## gastric_ulcer = 1 16 ( 0.6) 16 ( 0.6) <0.001
## duodenal_ulcer = 1 17 ( 0.7) 16 ( 0.6) 0.005
## gastric_bleeding = 1 138 ( 5.5) 137 ( 5.5) 0.002
## aki = 1 1017 (40.5) 1007 (40.1) 0.008
## septic_shock = 1 212 ( 8.4) 194 ( 7.7) 0.026
## pneumonia = 1 624 (24.8) 620 (24.7) 0.004
## LVEF_ctg 0.028
## mild 382 (15.2) 401 (16.0)
## moderate 437 (17.4) 416 (16.6)
## normal 1109 (44.1) 1107 (44.1)
## severe 585 (23.3) 589 (23.4)
## RAA = 1 1547 (61.6) 1571 (62.5) 0.020
## Diuritic = 1 2303 (91.6) 2314 (92.1) 0.016
## inotrop = 1 1140 (45.4) 1220 (48.5) 0.064
## anti_adrenal = 1 2299 (91.5) 2296 (91.4) 0.004
## ca_blocker = 1 953 (37.9) 976 (38.8) 0.019
## anticoag = 1 2403 (95.6) 2368 (94.2) 0.063
## antiplate = 1 2015 (80.2) 2043 (81.3) 0.028
## ppi = 1 1962 (78.1) 1930 (76.8) 0.030
## Stratified by h2ras
## Missing
## n
## age 0.0
## Height 0.0
## weight 0.0
## BMI 0.0
## HeartRate 0.0
## SysBP 0.0
## DiasBP 0.0
## RespRate 0.0
## TempC 0.0
## SpO2 0.0
## urine_out 0.0
## SOFA 0.0
## apsiii 0.0
## Calcium 0.0
## Cre 0.0
## Glucose 0.0
## Mg 0.0
## Sodium 0.0
## BUN 0.0
## Plate 0.0
## RBC 0.0
## WBC 0.0
## ADMISSION_TYPE 0.0
## ELECTIVE
## EMERGENCY
## URGENT
## INSURANCE 0.0
## Government
## Medicare
## Private
## Self Pay
## GENDER = 1 0.0
## ethnicity_5 0.0
## asian
## black
## hispanic
## other
## white
## mortality30 = 1 0.0
## HOSPITAL_EXPIRE_FLAG = 1 0.0
## rrt = 1 0.0
## ventilation = 1 0.0
## hypertension = 1 0.0
## DM = 1 0.0
## anemia = 1 0.0
## Af = 1 0.0
## coronary_atherosclerosis = 1 0.0
## venous_thrombus = 1 0.0
## MI = 1 0.0
## gastritis = 1 0.0
## gastric_ulcer = 1 0.0
## duodenal_ulcer = 1 0.0
## gastric_bleeding = 1 0.0
## aki = 1 0.0
## septic_shock = 1 0.0
## pneumonia = 1 0.0
## LVEF_ctg 0.0
## mild
## moderate
## normal
## severe
## RAA = 1 0.0
## Diuritic = 1 0.0
## inotrop = 1 0.0
## anti_adrenal = 1 0.0
## ca_blocker = 1 0.0
## anticoag = 1 0.0
## antiplate = 1 0.0
## ppi = 1 0.0
df_matched_table_complete <- read.csv("matched_complete.csv",
col.names = c("name", "non_user", "user", "smd", "missing"))
df_matched_table_complete
## name non_user user smd
## 1 n 2513 2513 NA
## 2 age 72.13 (14.47) 71.78 (14.12) 0.025
## 3 Height 168.12 (11.16) 167.92 (10.90) 0.019
## 4 weight 82.76 (26.22) 82.33 (25.33) 0.017
## 5 BMI 29.16 (8.48) 29.11 (8.29) 0.005
## 6 HeartRate 88.90 (20.88) 88.78 (19.79) 0.006
## 7 SysBP 122.08 (25.10) 122.27 (25.19) 0.008
## 8 DiasBP 63.30 (17.53) 62.91 (16.89) 0.023
## 9 RespRate 19.72 (5.98) 19.39 (6.48) 0.053
## 10 TempC 36.54 (0.99) 36.53 (0.97) 0.012
## 11 SpO2 96.63 (4.77) 96.70 (5.07) 0.014
## 12 urine_out 1816.68 (1259.76) 1813.71 (1268.90) 0.002
## 13 SOFA 4.87 (3.10) 5.03 (3.14) 0.051
## 14 apsiii 49.02 (19.12) 49.10 (19.77) 0.004
## 15 Calcium 8.53 (0.81) 8.52 (0.81) 0.013
## 16 Cre 1.63 (1.47) 1.61 (1.42) 0.013
## 17 Glucose 142.82 (69.59) 141.97 (71.96) 0.012
## 18 Mg 2.06 (0.78) 2.07 (0.40) 0.020
## 19 Sodium 138.46 (4.75) 138.54 (4.72) 0.017
## 20 BUN 33.53 (22.66) 33.02 (22.42) 0.023
## 21 Plate 234.57 (118.81) 235.91 (122.03) 0.011
## 22 RBC 3.66 (0.71) 3.65 (0.68) 0.019
## 23 WBC 11.61 (12.28) 11.59 (6.63) 0.002
## 24 ADMISSION_TYPE 0.060
## 25 ELECTIVE 212 ( 8.4) 255 (10.1) NA
## 26 EMERGENCY 2241 (89.2) 2194 (87.3) NA
## 27 URGENT 60 ( 2.4) 64 ( 2.5) NA
## 28 INSURANCE 0.011
## 29 Government 38 ( 1.5) 37 ( 1.5) NA
## 30 Medicare 1946 (77.4) 1954 (77.8) NA
## 31 Private 519 (20.7) 511 (20.3) NA
## 32 Self Pay 10 ( 0.4) 11 ( 0.4) NA
## 33 GENDER = 1 1351 (53.8) 1373 (54.6) 0.018
## 34 ethnicity_5 0.021
## 35 asian 51 ( 2.0) 54 ( 2.1) NA
## 36 black 208 ( 8.3) 217 ( 8.6) NA
## 37 hispanic 73 ( 2.9) 71 ( 2.8) NA
## 38 other 300 (11.9) 309 (12.3) NA
## 39 white 1881 (74.9) 1862 (74.1) NA
## 40 mortality30 = 1 438 (17.4) 323 (12.9) 0.128
## 41 HOSPITAL_EXPIRE_FLAG = 1 384 (15.3) 297 (11.8) 0.101
## 42 rrt = 1 90 ( 3.6) 100 ( 4.0) 0.021
## 43 ventilation = 1 1308 (52.0) 1443 (57.4) 0.108
## 44 hypertension = 1 1080 (43.0) 1089 (43.3) 0.007
## 45 DM = 1 917 (36.5) 917 (36.5) 0.001
## 46 anemia = 1 872 (34.7) 840 (33.4) 0.027
## 47 Af = 1 1115 (44.4) 1138 (45.3) 0.018
## 48 coronary_atherosclerosis = 1 1048 (41.7) 1085 (43.2) 0.030
## 49 venous_thrombus = 1 52 ( 2.1) 54 ( 2.1) 0.006
## 50 MI = 1 426 (17.0) 432 (17.2) 0.006
## 51 gastritis = 1 40 ( 1.6) 44 ( 1.8) 0.012
## 52 gastric_ulcer = 1 16 ( 0.6) 16 ( 0.6) 0.001
## 53 duodenal_ulcer = 1 17 ( 0.7) 16 ( 0.6) 0.005
## 54 gastric_bleeding = 1 138 ( 5.5) 137 ( 5.5) 0.002
## 55 aki = 1 1017 (40.5) 1007 (40.1) 0.008
## 56 septic_shock = 1 212 ( 8.4) 194 ( 7.7) 0.026
## 57 pneumonia = 1 624 (24.8) 620 (24.7) 0.004
## 58 LVEF_ctg 0.028
## 59 mild 382 (15.2) 401 (16.0) NA
## 60 moderate 437 (17.4) 416 (16.6) NA
## 61 normal 1109 (44.1) 1107 (44.1) NA
## 62 severe 585 (23.3) 589 (23.4) NA
## 63 RAA = 1 1547 (61.6) 1571 (62.5) 0.020
## 64 Diuritic = 1 2303 (91.6) 2314 (92.1) 0.016
## 65 inotrop = 1 1140 (45.4) 1220 (48.5) 0.064
## 66 anti_adrenal = 1 2299 (91.5) 2296 (91.4) 0.004
## 67 ca_blocker = 1 953 (37.9) 976 (38.8) 0.019
## 68 anticoag = 1 2403 (95.6) 2368 (94.2) 0.063
## 69 antiplate = 1 2015 (80.2) 2043 (81.3) 0.028
## 70 ppi = 1 1962 (78.1) 1930 (76.8) 0.030
## missing
## 1 NA
## 2 0
## 3 0
## 4 0
## 5 0
## 6 0
## 7 0
## 8 0
## 9 0
## 10 0
## 11 0
## 12 0
## 13 0
## 14 0
## 15 0
## 16 0
## 17 0
## 18 0
## 19 0
## 20 0
## 21 0
## 22 0
## 23 0
## 24 0
## 25 NA
## 26 NA
## 27 NA
## 28 0
## 29 NA
## 30 NA
## 31 NA
## 32 NA
## 33 0
## 34 0
## 35 NA
## 36 NA
## 37 NA
## 38 NA
## 39 NA
## 40 0
## 41 0
## 42 0
## 43 0
## 44 0
## 45 0
## 46 0
## 47 0
## 48 0
## 49 0
## 50 0
## 51 0
## 52 0
## 53 0
## 54 0
## 55 0
## 56 0
## 57 0
## 58 0
## 59 NA
## 60 NA
## 61 NA
## 62 NA
## 63 0
## 64 0
## 65 0
## 66 0
## 67 0
## 68 0
## 69 0
## 70 0
df_love_plot_complete <-
df_unmatched_table |> select(name, smd) |>
rename(unmatched = smd) |>
left_join(df_matched_table_complete, by="name") |>
rename(matched = smd) |>
select(name, unmatched, matched) |>
filter(!is.na(matched)) |>
mutate(name = str_remove(name, pattern = " =.*")) |>
arrange(unmatched)
order <- df_love_plot_complete |> pull(name)
df_love_plot_complete <-
df_love_plot_complete |>
mutate(name = factor(name, levels = order))
df_love_plot_complete
## name unmatched matched
## 1 WBC 0.004 0.002
## 2 Sodium 0.005 0.017
## 3 DiasBP 0.008 0.023
## 4 HeartRate 0.012 0.006
## 5 venous_thrombus 0.013 0.006
## 6 MI 0.019 0.006
## 7 DM 0.024 0.001
## 8 rrt 0.030 0.021
## 9 BMI 0.033 0.005
## 10 anemia 0.036 0.027
## 11 Height 0.042 0.019
## 12 pneumonia 0.042 0.004
## 13 urine_out 0.046 0.002
## 14 septic_shock 0.046 0.026
## 15 SysBP 0.048 0.008
## 16 RBC 0.050 0.019
## 17 gastritis 0.051 0.012
## 18 Calcium 0.053 0.013
## 19 TempC 0.056 0.012
## 20 Af 0.064 0.018
## 21 duodenal_ulcer 0.068 0.005
## 22 Plate 0.074 0.011
## 23 gastric_ulcer 0.075 0.001
## 24 weight 0.076 0.017
## 25 Glucose 0.079 0.012
## 26 ethnicity_5 0.081 0.021
## 27 Mg 0.090 0.020
## 28 LVEF_ctg 0.104 0.028
## 29 apsiii 0.107 0.004
## 30 ppi 0.121 0.030
## 31 INSURANCE 0.122 0.011
## 32 hypertension 0.122 0.007
## 33 Cre 0.125 0.013
## 34 aki 0.132 0.008
## 35 SpO2 0.143 0.014
## 36 SOFA 0.147 0.051
## 37 coronary_atherosclerosis 0.162 0.030
## 38 gastric_bleeding 0.186 0.002
## 39 ca_blocker 0.196 0.019
## 40 HOSPITAL_EXPIRE_FLAG 0.207 0.101
## 41 age 0.219 0.025
## 42 BUN 0.229 0.023
## 43 mortality30 0.255 0.128
## 44 RAA 0.261 0.020
## 45 RespRate 0.270 0.053
## 46 anticoag 0.341 0.063
## 47 ADMISSION_TYPE 0.353 0.060
## 48 inotrop 0.440 0.064
## 49 antiplate 0.473 0.028
## 50 ventilation 0.484 0.108
## 51 anti_adrenal 0.489 0.004
## 52 Diuritic 0.526 0.016
df_love_plot_complete |>
filter(!name %in% c("mortality30", "hospital_death")) |>
pivot_longer(cols = !name, names_to = "Method", values_to = "smd") |>
ggplot() +
geom_point(aes(x = name,
y = smd,
color = Method)) +
geom_hline(yintercept = 0.1, color = "black", lty = 2) +
labs(x = "variable",
y = "SMD") +
coord_flip() +
theme_bw()
love.plot(m_near_complete,
thresholds = 0.1,
abs = TRUE,
position = "top",
shapes = c("circle", "triangle"),
colors = c("red", "blue"))
## Warning: Standardized mean differences and raw mean differences are present in the same plot.
## Use the `stars` argument to distinguish between them and appropriately label the x-axis.
bal.plot(m_near_complete,
which = "both",
type = "histogram", bins=30,
mirror = TRUE) +
labs(title = "Distributional balance for Propensity score",
x = "Propensity score")
## No `var.name` was provided. Displaying balance for distance.
result_matched_complete<-
matched_data_complete |> select(h2ras, mortality30, HOSPITAL_EXPIRE_FLAG) |>
CreateTableOne(vars = c("mortality30"),
strata = "h2ras",
factorVars = c("mortality30"))
print(result_matched_complete) |> as.data.frame() |>
rename(H2RA = '1',
Non_H2RA = '0') |>
write.csv("result_matched_complete.csv")
## Stratified by h2ras
## 0 1 p test
## n 2513 2513
## mortality30 = 1 (%) 438 (17.4) 323 (12.9) <0.001
#KM curve: 30day mortality after matching
matched_data_complete
## # A tibble: 5,026 × 58
## age Height weight BMI HeartRate SysBP DiasBP RespRate TempC SpO2
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 72.0 188. 123. 34.8 61 102 45 20 37.2 98
## 2 95 170. 71.7 24.8 73 153 77 20 36.3 93
## 3 68.9 175. 106. 34.6 91 129 38 22 38.2 97
## 4 80.9 183. 98.2 29.4 71 104 56 12 35.6 100
## 5 41.8 173. 50 16.8 138 83 49 30 37.6 98
## 6 45.5 173. 98 32.9 91 142. 85.5 20 36.3 99
## 7 75.7 160. 94 36.7 93 102 58 12 36.3 100
## 8 61.4 180. 109. 33.5 96 112 72 25 36.9 93
## 9 95 165. 56.7 20.8 86 131 70 24 36.1 97
## 10 95 165. 68 24.9 82 146 70 15 35.6 98
## # ℹ 5,016 more rows
## # ℹ 48 more variables: urine_out <dbl>, SOFA <dbl>, apsiii <dbl>,
## # Calcium <dbl>, Cre <dbl>, Glucose <dbl>, Mg <dbl>, Sodium <dbl>, BUN <dbl>,
## # Plate <dbl>, RBC <dbl>, WBC <dbl>, ADMISSION_TYPE <chr>, INSURANCE <chr>,
## # GENDER <dbl>, ethnicity_5 <chr>, mortality30 <dbl>,
## # HOSPITAL_EXPIRE_FLAG <dbl>, rrt <dbl>, ventilation <dbl>,
## # hypertension <dbl>, DM <dbl>, anemia <dbl>, Af <dbl>, …
df_survival_matched_complete <-
matched_data_complete |>
mutate(futime30 = ifelse(futime30 <=0, 1, futime30))
formula_1_complete2 = Surv(futime30, mortality30)~ h2ras
sfit_m1_complete2 = surv_fit(formula_1_complete2, data = df_survival_matched_complete, conf.type = "log-log")
ggsurvplot(sfit_m1_complete2,
conf.int=TRUE, pval=T, pval.method = T,
risk.table=TRUE,
break.time.by = 5,
break.y.by = 0.2,
palette = "Set1",
xlab = "Days",
ylab = "Cum survival",
legend.labs=c("Non-H2AR", "H2AR"),
legend.title = "",
risk.table.height=0.25,
main = "before matching"
)
#cox regression model - model_1_complete
cph_m1_complete <- coxph(formula_1_complete2, data = df_survival_matched_complete)
res_m1_complete <- summary(cph_m1_complete)
res_model_m1_complete <-
res_m1_complete$coefficients |> as.data.frame() |>
rename(HR = "exp(coef)",
se = "se(coef)",
P = "Pr(>|z|)") |>
mutate(lower = exp(coef - 1.96*se),
upper = exp(coef + 1.96*se)) |>
select(HR, lower, upper, P) |>
mutate_if(is.numeric, round, 3) |>
write.csv("model_m1_complete.csv")
model_m1_complete <- read.csv("model_m1_complete.csv")
model_m1_complete
## X HR lower upper P
## 1 h2ras 0.714 0.618 0.824 0
#model_2_complete
cols_cph_2_complete2 <-
c("h2ras",
"age", "BMI", "GENDER",
"ADMISSION_TYPE", "INSURANCE", "ethnicity_5"
)
formula_2_complete2 <-
cols_cph_2_complete2 |>
str_c(collapse = " + ") %>%
str_c("Surv(futime30, mortality30) ~ ", .) %>%
formula()
formula_2_complete2
## Surv(futime30, mortality30) ~ h2ras + age + BMI + GENDER + ADMISSION_TYPE +
## INSURANCE + ethnicity_5
## <environment: 0x113b54008>
cph_m2_complete <- coxph(formula_2_complete2, data = df_survival_matched_complete)
res_m2_complete <- summary(cph_m2_complete)
res_model_m2_complete <-
res_m2_complete$coefficients |> as.data.frame() |>
rename(HR = "exp(coef)",
se = "se(coef)",
P = "Pr(>|z|)") |>
mutate(lower = exp(coef - 1.96*se),
upper = exp(coef + 1.96*se)) |>
select(HR, lower, upper, P) |>
mutate_if(is.numeric, round, 3) |>
write.csv("model_m2_complete.csv")
model_m2_complete <- read.csv("model_m2_complete.csv")
model_m2_complete
## X HR lower upper P
## 1 h2ras 0.725 0.628 0.838 0.000
## 2 age 1.025 1.018 1.032 0.000
## 3 BMI 0.991 0.981 1.002 0.108
## 4 GENDER 0.987 0.854 1.141 0.861
## 5 ADMISSION_TYPEEMERGENCY 2.104 1.504 2.941 0.000
## 6 ADMISSION_TYPEURGENT 1.542 0.817 2.911 0.182
## 7 INSURANCEMedicare 1.213 0.534 2.755 0.645
## 8 INSURANCEPrivate 1.115 0.487 2.551 0.796
## 9 INSURANCESelf Pay 2.155 0.606 7.669 0.236
## 10 ethnicity_5black 0.601 0.344 1.051 0.074
## 11 ethnicity_5hispanic 0.650 0.322 1.311 0.229
## 12 ethnicity_5other 1.204 0.732 1.980 0.465
## 13 ethnicity_5white 0.837 0.522 1.343 0.461
#model_3_complete
cols_cph_3_complete2 <-
c("h2ras",
"age", "BMI", "GENDER",
"ADMISSION_TYPE", "INSURANCE", "ethnicity_5",
"RespRate", "LVEF_ctg", "SOFA", "apsiii", "rrt", "ventilation",
"urine_out", "Glucose", "Calcium", "Cre", "Mg", "Sodium", "BUN",
"Plate", "RBC", "WBC",
"RAA", "Diuritic", "inotrop", "anti_adrenal", "ca_blocker",
"anticoag", "antiplate", "ppi",
"hypertension", "DM", "anemia", "Af", "coronary_atherosclerosis",
"venous_thrombus", "MI", "gastric_ulcer", "duodenal_ulcer",
"gastric_bleeding", "aki", "septic_shock", "pneumonia"
)
formula_3_complete2 <-
cols_cph_3_complete2 |>
str_c(collapse = " + ") %>%
str_c("Surv(futime30, mortality30) ~ ", .) %>%
formula()
formula_3_complete2
## Surv(futime30, mortality30) ~ h2ras + age + BMI + GENDER + ADMISSION_TYPE +
## INSURANCE + ethnicity_5 + RespRate + LVEF_ctg + SOFA + apsiii +
## rrt + ventilation + urine_out + Glucose + Calcium + Cre +
## Mg + Sodium + BUN + Plate + RBC + WBC + RAA + Diuritic +
## inotrop + anti_adrenal + ca_blocker + anticoag + antiplate +
## ppi + hypertension + DM + anemia + Af + coronary_atherosclerosis +
## venous_thrombus + MI + gastric_ulcer + duodenal_ulcer + gastric_bleeding +
## aki + septic_shock + pneumonia
## <environment: 0x11a634f28>
cph_m3_complete <- coxph(formula_3_complete2, data = df_survival_matched_complete)
res_m3_complete <- summary(cph_m3_complete)
res_model_m3_complete <-
res_m3_complete$coefficients |> as.data.frame() |>
rename(HR = "exp(coef)",
se = "se(coef)",
P = "Pr(>|z|)") |>
mutate(lower = exp(coef - 1.96*se),
upper = exp(coef + 1.96*se)) |>
select(HR, lower, upper, P) |>
mutate_if(is.numeric, round, 3) |>
write.csv("model_m3_complete.csv")
model_m3_complete <- read.csv("model_m3_complete.csv")
model_m3_complete
## X HR lower upper P
## 1 h2ras 0.662 0.572 0.766 0.000
## 2 age 1.027 1.020 1.035 0.000
## 3 BMI 0.985 0.975 0.996 0.007
## 4 GENDER 1.053 0.904 1.227 0.504
## 5 ADMISSION_TYPEEMERGENCY 1.765 1.250 2.493 0.001
## 6 ADMISSION_TYPEURGENT 1.142 0.599 2.177 0.687
## 7 INSURANCEMedicare 1.148 0.501 2.632 0.744
## 8 INSURANCEPrivate 1.019 0.441 2.352 0.965
## 9 INSURANCESelf Pay 1.887 0.518 6.873 0.336
## 10 ethnicity_5black 0.995 0.562 1.762 0.987
## 11 ethnicity_5hispanic 0.938 0.458 1.921 0.861
## 12 ethnicity_5other 1.314 0.787 2.193 0.296
## 13 ethnicity_5white 0.998 0.614 1.622 0.993
## 14 RespRate 1.021 1.010 1.033 0.000
## 15 LVEF_ctgmoderate 1.035 0.792 1.353 0.799
## 16 LVEF_ctgnormal 0.937 0.749 1.174 0.573
## 17 LVEF_ctgsevere 1.092 0.854 1.396 0.483
## 18 SOFA 0.987 0.954 1.021 0.446
## 19 apsiii 1.019 1.014 1.025 0.000
## 20 rrt 1.252 0.953 1.644 0.106
## 21 ventilation 1.597 1.332 1.916 0.000
## 22 urine_out 1.000 1.000 1.000 0.000
## 23 Glucose 1.001 1.001 1.002 0.002
## 24 Calcium 0.997 0.912 1.089 0.944
## 25 Cre 0.843 0.776 0.915 0.000
## 26 Mg 1.043 0.948 1.148 0.387
## 27 Sodium 0.984 0.970 0.999 0.034
## 28 BUN 1.010 1.006 1.014 0.000
## 29 Plate 1.000 0.999 1.000 0.170
## 30 RBC 0.993 0.882 1.118 0.912
## 31 WBC 1.008 1.005 1.011 0.000
## 32 RAA 0.551 0.468 0.650 0.000
## 33 Diuritic 0.805 0.608 1.065 0.129
## 34 inotrop 1.413 1.182 1.689 0.000
## 35 anti_adrenal 0.445 0.360 0.549 0.000
## 36 ca_blocker 0.905 0.772 1.060 0.216
## 37 anticoag 0.909 0.647 1.278 0.584
## 38 antiplate 0.796 0.660 0.961 0.017
## 39 ppi 0.855 0.700 1.046 0.127
## 40 hypertension 1.081 0.929 1.257 0.315
## 41 DM 0.897 0.761 1.058 0.197
## 42 anemia 0.659 0.560 0.776 0.000
## 43 Af 1.034 0.888 1.204 0.671
## 44 coronary_atherosclerosis 0.913 0.769 1.084 0.300
## 45 venous_thrombus 1.083 0.716 1.639 0.706
## 46 MI 1.281 1.043 1.573 0.018
## 47 gastric_ulcer 0.584 0.183 1.863 0.364
## 48 duodenal_ulcer 1.178 0.533 2.601 0.685
## 49 gastric_bleeding 1.056 0.772 1.444 0.733
## 50 aki 1.313 1.110 1.552 0.001
## 51 septic_shock 1.108 0.899 1.367 0.336
## 52 pneumonia 1.197 1.018 1.406 0.029
bind_rows(model_m1_complete |> filter(X == "h2ras"),
model_m2_complete |> filter(X == "h2ras"),
model_m3_complete |> filter(X == "h2ras")) |>
mutate(model = c("model_1", "model_2", "model_3"), .before=HR) |>
select(!X) |>
write_csv("adjusted_models_complete.csv")
(adjusted_models_complete <- read_csv("adjusted_models_complete.csv"))
## Rows: 3 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): model
## dbl (4): HR, lower, upper, P
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## # A tibble: 3 × 5
## model HR lower upper P
## <chr> <dbl> <dbl> <dbl> <dbl>
## 1 model_1 0.714 0.618 0.824 0
## 2 model_2 0.725 0.628 0.838 0
## 3 model_3 0.662 0.572 0.766 0