(前回までの資料も一部記載していますが、コードは最低限にしています。復習は前回の資料を参照してください。)
pacman::p_load(tidyverse, lubridate, tableone, skimr,
duckdb, arrow, RSQLite)
FドライブのDatabaseフォルダ内のmimic.dbにアクセスする場合は以下のコード(各自の状況に合わせて適宜修正)。
(mimic.dbが無い場合は以下の操作で空のmimic.dbが作成される)
con_mimic <- dbConnect(duckdb::duckdb(), "F:/Database/mimic.db")
mimic.dbの中に入っているデータリスト
dbListTables(con_mimic)
## [1] "admissions" "apsiii"
## [3] "callout" "caregivers"
## [5] "chartevents" "comorbidities_AHRQ"
## [7] "comorbidities_Quan" "cptevents"
## [9] "d_cpt" "d_icd_diagnoses"
## [11] "d_icd_procedures" "d_items"
## [13] "d_labitems" "datetimeevents"
## [15] "db_HtWt" "db_LVEF"
## [17] "db_LVEF_assess" "db_PEO"
## [19] "db_comorb" "db_ethnicity"
## [21] "db_patients" "db_rrt"
## [23] "db_sofa" "db_urine"
## [25] "db_venti" "db_vital"
## [27] "df_lab_list" "diagnoses_icd"
## [29] "drgcodes" "height_firstday"
## [31] "height_weight" "icustay_details"
## [33] "icustays" "inputevents_cv"
## [35] "inputevents_mv" "labevents"
## [37] "microbiologyevents" "noteevents"
## [39] "outputevents" "patients"
## [41] "pivot_RRT" "pivot_height"
## [43] "pivot_labo" "pivot_urine_output"
## [45] "pivot_vitals" "prescriptions"
## [47] "procedureevents_mv" "procedures_icd"
## [49] "services" "sofa"
## [51] "test" "transfers"
## [53] "urine_output" "urine_output_firstday"
## [55] "ventilation_classification" "ventilation_duration"
## [57] "weight_duration" "weight_firstday"
# dbDisconnect(con_mimic, shutdown=TRUE)
作成データ
db_patients <- tbl(con_mimic, "db_patients")
db_PEO <- tbl(con_mimic, "db_PEO")
db_HtWt <- tbl(con_mimic, "db_HtWt")
db_vital <- tbl(con_mimic, "db_vital")
db_urine <- tbl(con_mimic, "db_urine")
db_sofa <- tbl(con_mimic, "db_sofa")
db_rrt <- tbl(con_mimic, "db_rrt")
db_venti <- tbl(con_mimic, "db_venti")
db_ethnicity <- tbl(con_mimic, "db_ethnicity")
db_comorb <- tbl(con_mimic, "db_comorb")
db_LVEF <- tbl(con_mimic, "db_LVEF")
db_ad <- tbl(con_mimic, "admissions")
db_pt <- tbl(con_mimic, "patients")
db_diag <- tbl(con_mimic, "diagnoses_icd")
db_icustay <- tbl(con_mimic, "icustays")
db_icustay_hadm <-
db_icustay |>
distinct(HADM_ID, .keep_all = T) |>
select(HADM_ID, ICUSTAY_ID)
hadm_icu_vec <-
db_icustay_hadm |>
select(HADM_ID) |>
pull()
icustay_id_vec <-
db_icustay_hadm |>
select(ICUSTAY_ID) |>
pull()
db_d_icd_diag <- tbl(con_mimic, "d_icd_diagnoses")
df_d_icd_diag <- collect(db_d_icd_diag)
db_presc <- tbl(con_mimic,"prescriptions")
col_presc <-
c("SUBJECT_ID", "HADM_ID", "ICUSTAY_ID", "DRUG", "DRUG_NAME_POE", "DRUG_NAME_GENERIC", "FORMULARY_DRUG_CD")
db_presc_drug <- db_presc |> select(col_presc)
drugs <-
db_presc_drug |>
distinct(DRUG, DRUG_NAME_POE, DRUG_NAME_GENERIC, FORMULARY_DRUG_CD)
df_drugs <- drugs |> collect()
以下の抽出を行う。今まで通りHADM_IDと目的の変数群に絞ったファイルを作成する事。
1. Laboratory data(BUN, Cr, Na, Ca, Mg, Glu, WBC, RBC, Plt)
2. Medications(RAAs, Diuritics, inotropic agents, adrenaline receptor
antagonists, calcium antagonists, antiplatelets, PPIs)
pivot_laboの値と合わなさすぎるので、論文では以下のファイルから直接データを取ってきていると思われる。我々も直接抽出する方針とする。
“d_labitems”
“labevents”
<方針>
BUN, Cr, Na, Ca, Mg, Glu, WBC, RBC, Plt
1.d_labitemsから上記9つのITEMIDを特定する
2.検査結果表(膨大)を上記9つに限定する
3.さらにHADM_IDを研究対象の入院に限定する
4.wide formatにして、データをJoinする
db_labitems <- tbl(con_mimic, "d_labitems")
db_labitems |> colnames()
## [1] "ROW_ID" "ITEMID" "LABEL" "FLUID" "CATEGORY"
## [6] "LOINC_CODE"
db_labitems |> count()
db_labitems |> head(5)
df_labitems <- collect(db_labitems)
1.上記9つのITEMIDを特定する
# blood
df_lab_items <-
df_labitems |>
filter(str_detect(FLUID, coll("blood", ignore_case = T))) |>
filter(str_detect(LABEL, coll("white",ignore_case = T )) |
str_detect(LABEL, coll("red blood",ignore_case = T )) |
str_detect(LABEL, coll("platelet count",ignore_case = T )) |
str_detect(LABEL, coll("gluco",ignore_case = T )) |
str_detect(LABEL, coll("urea",ignore_case = T )) |
str_detect(LABEL, coll("creatinine",ignore_case = T )) |
str_detect(LABEL, coll("sodium",ignore_case = T )) |
str_detect(LABEL, coll("calcium",ignore_case = T )) |
str_detect(LABEL, coll("magne",ignore_case = T ))
) |>
filter(CATEGORY != "Blood Gas") |>
filter(!ITEMID %in% c(51278, 51529)) |>
select(ITEMID, LABEL)
df_lab_items
vec_labitems <-
df_lab_items |>
select(ITEMID) |>
pull()
db_labeve <- tbl(con_mimic, "labevents")
db_labeve |> colnames()
## [1] "ROW_ID" "SUBJECT_ID" "HADM_ID" "ITEMID" "CHARTTIME"
## [6] "VALUE" "VALUENUM" "VALUEUOM" "FLAG"
db_labeve |> count()
db_labeve |> head(5)
2.検査結果表(膨大)を上記9つに限定する
3.さらにHADM_IDを研究対象の入院に限定する/
入院中複数回検査しているが、どのタイミングを採用するのか論文に書いてない。
最初のタイミングを採用した。
db_labeve |> head(20)
vec_hadms <-
db_PEO |>
select(HADM_ID) |>
pull()
db_labo_res <-
db_labeve |>
filter(ITEMID %in% vec_labitems) |>
filter(HADM_ID %in% vec_hadms) |>
group_by(HADM_ID) |>
distinct(ITEMID, .keep_all = T) |>
ungroup() |>
compute()
db_labo_res |> count()
db_labo_res |> head(100)
4.wide formatにして、データをJoinする
df_lab_list <-
df_lab_items |>
mutate(lab = str_sub(LABEL, start = 1, end = 3)) |>
select(ITEMID, lab)
dbRemoveTable(con_mimic, "df_lab_list")
dbWriteTable(con_mimic, "df_lab_list", df_lab_list)
db_lab_items_list <- tbl(con_mimic, "df_lab_list")
db_lab_items_list
db_labo_res_names <-
db_labo_res |>
left_join(db_lab_items_list, by = "ITEMID") |>
mutate(new_lab = paste(lab, VALUEUOM, sep = "_")) |>
select(!c(lab, VALUEUOM, VALUE, ROW_ID, FLAG, ITEMID, CHARTTIME)) |>
pivot_wider(names_from = new_lab, values_from = VALUENUM) |>
compute()
db_labo_res_names |> head(100)
db_labo_res_names |> count()
# db_labo_res_names |>
# group_by() |>
# summarise(count = n())
<db_PEO_laboで完成> 割り算記号の列名は修正しておく
db_PEO_labo <-
db_PEO |>
left_join(db_labo_res_names, by = c("HADM_ID", "SUBJECT_ID")) |>
rename(Calcium = "Cal_mg/dL",
Cre = "Cre_mg/dL",
Glucose = "Glu_mg/dL",
Mg = "Mag_mg/dL",
Sodium = "Sod_mEq/L",
BUN = "Ure_mg/dL",
Plate = "Pla_K/uL",
RBC = "Red_m/uL",
WBC = "Whi_K/uL"
) |>
compute()
db_PEO_labo |> head(10)
db_PEO_labo |> count()
表作成とヒストグラム作成の自作関数を作成する
f_table = function(cols_cont, cols_fact, db){
tableone <-
db |>
select(cols_cont, cols_fact, "h2ras")|>
collect() |>
CreateTableOne(vars = c(cols_cont, cols_fact),
strata = "h2ras",
factorVars = cols_fact)
print(tableone, smd = TRUE, missing = TRUE, test = FALSE, explain = FALSE)
}
f_hist = function(cols_cont, db){
db |>
select(cols_cont) |>
pivot_longer(cols = cols_cont, names_to = "name", values_to = "value") |>
ggplot()+
geom_histogram(aes(x = value), color = "black")+
facet_wrap(~ name, scales = "free", ncol = 4) +
theme_bw()+
theme(text = element_text(size = 16))
}
cols_labo <- c("Calcium", "Cre", "Glucose", "Mg",
"Sodium", "BUN", "Plate", "RBC", "WBC")
db_PEO_labo |>
f_table(cols_cont = cols_labo, cols_fact = c())
## Stratified by h2ras
## 0 1 SMD Missing
## n 5949 4441
## Calcium 8.57 (0.89) 8.67 (0.84) 0.122 2.9
## Cre 1.82 (1.72) 1.60 (1.51) 0.137 1.0
## Glucose 157.16 (88.46) 153.63 (81.05) 0.042 1.0
## Mg 1.99 (0.41) 2.04 (0.51) 0.108 1.4
## Sodium 138.01 (5.21) 138.22 (4.78) 0.041 1.0
## BUN 36.87 (26.00) 30.68 (21.02) 0.262 1.0
## Plate 245.71 (118.16) 241.97 (115.02) 0.032 1.0
## RBC 3.81 (0.76) 3.88 (0.73) 0.083 1.0
## WBC 12.13 (12.49) 11.57 (14.37) 0.041 1.0
db_PEO_labo |>
f_hist(cols_cont = cols_labo)
db_labo <-
db_PEO_labo |>
select("HADM_ID", cols_labo)
db_labo <- db_labo |> compute()
xxxpril
xxxartan
aliskiren
RAA_vec <- c(".+pril", ".+artan", "alis.+")
RAA_or <- str_c(RAA_vec, collapse = "|")
RAA_names <-
df_drugs |>
filter(str_detect(DRUG, regex(RAA_or, ignore_case = T)) |
str_detect(DRUG_NAME_POE, regex(RAA_or, ignore_case = T)) |
str_detect(DRUG_NAME_GENERIC, regex(RAA_or, ignore_case = T))) |>
filter(!DRUG %in% c("Lidocaine-Prilocaine")) |>
select(DRUG) |> distinct() |> pull()
“Lidocaine-Prilocaine”はノイズなので除去
使用薬剤歴全リストから、RAA阻害薬を使ったレコードのみに限定し、その患者IDを得る
# H2RAsの時作ったdb_presc_drug
RAA_user <-
db_presc_drug |>
filter(DRUG %in% RAA_names) |>
select(SUBJECT_ID) |>
pull()
db_patients_RAA <-
db_PEO |>
mutate(RAA = ifelse(SUBJECT_ID %in% RAA_user, 1, 0))
f_table(cols_cont = c(),
cols_fact = c("RAA"),
db = db_patients_RAA)
## Stratified by h2ras
## 0 1 SMD Missing
## n 5949 4441
## RAA = 1 2880 (48.4) 2722 (61.3) 0.261 0.0
for merge
db_RAA <- db_patients_RAA |> select(HADM_ID, RAA)
これ以降、同じようなコードの繰り返しになる。
本来はコードを関数化するべきだが、内容の確認・学習の意味も込めて関数化は避けた。興味のある方は関数化してコードをスッキリさせると良い。
furosemide (Lasix)
torsemide (Demadex)
bumetanide
hydrochlorothiazide
chlorthalidone
metolazone
indapamide
amiloride
triamterene (Maxzide)
spironolactone (Aldactone)
eplerenone (Inspra)
diurit_vec <-
c("furosemide", "lasix", "torsemide", "Demadex", "bumetanide", "hydrochlorothiazide",
"chlorthalidone", "metolazone", "indapamide", "amiloride", "triamterene",
"spironolactone", "Aldactone", "eplerenone", "Inspra" )
diurit_or <- str_c(diurit_vec, collapse = "|")
diurit_or
## [1] "furosemide|lasix|torsemide|Demadex|bumetanide|hydrochlorothiazide|chlorthalidone|metolazone|indapamide|amiloride|triamterene|spironolactone|Aldactone|eplerenone|Inspra"
diuritic_names <-
df_drugs |>
filter(str_detect(DRUG, regex(diurit_or, ignore_case = T)) |
str_detect(DRUG_NAME_POE, regex(diurit_or, ignore_case = T)) |
str_detect(DRUG_NAME_GENERIC, regex(diurit_or, ignore_case = T))) |>
select(DRUG) |>
filter(!DRUG %in% c("Captopril","Enalaprilat", "Heparin", "Metoprolol" )) |>
distinct() |> pull()
使用薬剤歴全リストから、薬を使ったレコードのみに限定し、その患者IDを得る
diuritic_user <-
db_presc_drug |>
filter(DRUG %in% diuritic_names) |>
select(SUBJECT_ID) |>
pull()
db_patients_diuritic <-
db_PEO |>
mutate(Diuritic = ifelse(SUBJECT_ID %in% diuritic_user, 1, 0))
表作成
f_table(cols_cont = c(),
cols_fact = c("Diuritic"),
db = db_patients_diuritic)
## Stratified by h2ras
## 0 1 SMD Missing
## n 5949 4441
## Diuritic = 1 4311 (72.5) 4083 (91.9) 0.526 0.0
for merge
db_diuritic <- db_patients_diuritic |> select(HADM_ID, Diuritic)
Epinephrine (Adrenalin® or Auvi-Q®)
Norepinephrine (Levophed® or Levarterenol®)
Dopamine
Dobutamine
Ephedrine
Phenylephrine
Levosimendan
Digoxin (Cardoxin® or Lanoxin®)
Milrinone
Amrinone
Enoximone
inotrop_vec <-
c("Epinephrine", "Adrenalin", "Norepinephrine", "Levophed", "Levarterenol",
"Dopamine", "Dobutamine"
)
inotrop_or <- str_c(inotrop_vec, collapse = "|")
inotrop_or
## [1] "Epinephrine|Adrenalin|Norepinephrine|Levophed|Levarterenol|Dopamine|Dobutamine"
inotrop_names <-
df_drugs |>
filter(str_detect(DRUG, regex(inotrop_or, ignore_case = T)) |
str_detect(DRUG_NAME_POE, regex(inotrop_or, ignore_case = T)) |
str_detect(DRUG_NAME_GENERIC, regex(inotrop_or, ignore_case = T))) |>
select(DRUG) |>
filter(!str_detect(DRUG, regex("bupivacaine|lidocaine", ignore_case = T))) |>
filter(!DRUG %in% c("1", "Epinephrine Topical Soln", "Epinephrine Inhalation",
"Racepinephrine", "racemic epinephrine")) |>
distinct() |> pull()
inotrop_names
## [1] "Norepinephrine" "DOBUTamine"
## [3] "Epinephrine 1:1000" "DopAmine"
## [5] "DOPamine" "Dobutamine HCl"
## [7] "Dopamine" "Dopamine HCl"
## [9] "EPINEPHRINE" "Epinephrine HCl"
## [11] "Dobutamine" "Epinephrine"
## [13] "EPINEPHrine" "NORepinephrine"
## [15] "epi" "Epi"
## [17] "Epin" "Epinephrin"
## [19] "EpiPen" "Epine"
## [21] "Epinephri" "Dobutamine Hcl"
## [23] "EPINEPHrine Auto Injector" "Norepinephrine Bitartrate"
## [25] "Epineph" "Epinephrine Base"
## [27] "epinephrine" "Dopamine Hcl"
## [29] "Epinephrine Auto Injector" "Epinephrine-Sodium Chloride"
## [31] "Epinephrine Kit"
使用薬剤歴全リストから、薬を使ったレコードのみに限定し、その患者IDを得る
inotrop_user <-
db_presc_drug |>
filter(DRUG %in% inotrop_names) |>
select(SUBJECT_ID) |>
pull()
db_patients_inotrop <-
db_PEO |>
mutate(inotrop = ifelse(SUBJECT_ID %in% inotrop_user, 1, 0))
データ確認:
f_table(cols_cont = c(),
cols_fact = c("inotrop"),
db = db_patients_inotrop)
## Stratified by h2ras
## 0 1 SMD Missing
## n 5949 4441
## inotrop = 1 1780 (29.9) 2266 (51.0) 0.440 0.0
for merge
db_inotrop <- db_patients_inotrop |> select(HADM_ID, inotrop)
Clonidine (Catapres)
Phentolamine
phenoxybenzamine
Tamsulosin
Atenolol
Propranolol
Nebivilol
Atenolol
Oxprenolol
Metoprolol
Timolol
Pindolol
Nadolol
Pindolol
Esmolol
Acebutolol
Sotalol
Talinolol
Betaxolol
Labetalol
Carvedilol
anti_adrenal_vec <-
c("Clonidine", "Catapres", "Phentolamine", "phenoxybenzamine",
"Tamsulosin",
"lol"
)
anti_adrenal_or <- str_c(anti_adrenal_vec, collapse = "|")
anti_adrenal_or
## [1] "Clonidine|Catapres|Phentolamine|phenoxybenzamine|Tamsulosin|lol"
anti_adrenal_names <-
df_drugs |>
filter(str_detect(DRUG, regex(anti_adrenal_or, ignore_case = T)) |
str_detect(DRUG_NAME_POE, regex(anti_adrenal_or, ignore_case = T)) |
str_detect(DRUG_NAME_GENERIC, regex(anti_adrenal_or, ignore_case = T))) |>
select(DRUG) |>
filter(!str_detect(DRUG, regex("bupivacaine|lidocaine", ignore_case = T))) |>
filter(!DRUG %in% c("Fentanyl Citrate", "Heparin")) |>
distinct() |> pull()
anti_adrenal_names
## [1] "Metoprolol" "Metoprolol XL"
## [3] "Metoprolol XL (Toprol XL)" "Metoprolol Tartrate"
## [5] "Dorzolamide 2%/Timolol 0.5% Ophth." "Propranolol HCl"
## [7] "Propranolol LA" "Apraclonidine 0.5%"
## [9] "Propranolol" "flomax"
## [11] "Esmolol" "Betaxolol HCl 0.25%"
## [13] "Betaxolol HCl" "Labetalol"
## [15] "Metoprolo" "Atenolol"
## [17] "Metoprolol Succinate XL" "Timolol Maleate 0.5%"
## [19] "CloniDINE" "Labetalol HCl"
## [21] "timolol" "Esmolol in Saline (Iso-osm)"
## [23] "Carteolol 1% Ophth Soln" "Metipranolol"
## [25] "Phentolamine Mesylate" "Catapres"
## [27] "Clonidine Patch" "Betoptic S"
## [29] "ESMOLOL" "Clonidine T"
## [31] "Betaxolol Hcl 0.25%" "Tamsulosin"
## [33] "Levobunolol Hcl 0.5%" "Clonidine Patch 0.1 mg/24 hr"
## [35] "Sotalol" "Betaxolol Ophth Susp 0.25%"
## [37] "Acebutolol HCl" "Levobunolol 0.25%"
## [39] "Proprano" "Clonidine TTS 1 Patch"
## [41] "NEO*PO*Propranolol" "bisoprolol"
## [43] "timolo" "dorzolamide-timolol"
## [45] "Carvedilol" "Sotalol HCl"
## [47] "Timolol Maleate 0.5% GFS" "Clonidine"
## [49] "bisopr" "Clonidine HCl"
## [51] "Clonidine TTS 2 Patch" "Nadolol"
## [53] "Levobunolol 0.5%" "Sterile Diluent for Flolan"
## [55] "Toprol XL" "Betimol (timolol hemihydrate)"
## [57] "Propra" "Timolol Maleate 0.5% XE"
## [59] "Sotalol AF" "Cosopt"
## [61] "Tamsulosin HCl" "*NF* TIMOLOL HEMIHYDRATE (BETIMOL)"
## [63] "Phenoxybenzamine HCl" "Stanozolol (Bulk)"
## [65] "METOPROLOL" "Levobunolol"
## [67] "metipranolol" "Clonidine TTS 3 Patch"
## [69] "Bisoprolol Fumarate" "Clonidine Patch 0.2 mg/24 hr"
## [71] "Levobunolol Hcl 0.25%" "Atenolol-Chlorthalidone"
## [73] "Clonidine TTS" "Istalol"
## [75] "Betimol ( Timolol )" "Labet"
## [77] "Timolol Maleate 0.25%" "Prop"
## [79] "Clonidine Patch 0.3 mg/24 hr" "Acebutolol"
## [81] "Pindolol" "bisoprolol fumarate"
## [83] "TIMOP" "Propranolol Oral Solution"
## [85] "Zebeta" "Betaxolol"
## [87] "sotalol" "Timolol"
## [89] "Levobunolol HCl 0.5%" "LABETALOL"
## [91] "bisopro" "bisop"
使用薬剤歴全リストから、薬を使ったレコードのみに限定し、その患者IDを得る
anti_adrenal_user <-
db_presc_drug |>
filter(DRUG %in% anti_adrenal_names) |>
select(SUBJECT_ID) |>
pull()
db_patients_anti_adrenal <-
db_PEO |>
mutate(anti_adrenal = ifelse(SUBJECT_ID %in% anti_adrenal_user, 1, 0))
データ確認:
f_table(cols_cont = c(),
cols_fact = c("anti_adrenal"),
db = db_patients_anti_adrenal)
## Stratified by h2ras
## 0 1 SMD Missing
## n 5949 4441
## anti_adrenal = 1 4439 (74.6) 4098 (92.3) 0.489 0.0
for merge
db_anti_adrenal <- db_patients_anti_adrenal |> select(HADM_ID, anti_adrenal)
Amlodipine (Norvasc)
Aranidipine (Sapresta)
Azelnidipine (Calblock)
Barnidipine (HypoCa)
Benidipine (Coniel)
Cilnidipine (Atelec, Cinalong, Siscard)
Clevidipine (Cleviprex)
Efonidipine (Landel)
Felodipine (Plendil)
Isradipine (DynaCirc, Prescal)
Lacidipine (Motens, Lacipil)
Lercanidipine (Zanidip)
Manidipine (Calslot, Madipine)
Nicardipine (Cardene, Carden SR)
Nifedipine (Procardia, Adalat)
Nilvadipine (Nivadil)
Nimodipine (Nimotop)
Nisoldipine (Baymycard, Sular, Syscor)
Nitrendipine (Cardif, Nitrepin, Baylotensin)
Pranidipine (Acalas)
Fendiline
Gallopamil
Verapamil (Calan, Isoptin)
Diltiazem (Cardizem)
ca_blocker_vec <-
c("dipine", "Fendiline", "Gallopamil", "Verapamil",
"Diltiazem"
)
ca_blocker_or <- str_c(ca_blocker_vec, collapse = "|")
ca_blocker_or
## [1] "dipine|Fendiline|Gallopamil|Verapamil|Diltiazem"
ca_blocker_names <-
df_drugs |>
filter(str_detect(DRUG, regex(ca_blocker_or, ignore_case = T)) |
str_detect(DRUG_NAME_POE, regex(ca_blocker_or, ignore_case = T)) |
str_detect(DRUG_NAME_GENERIC, regex(ca_blocker_or, ignore_case = T))) |>
select(DRUG) |>
distinct() |> pull()
ca_blocker_names
## [1] "NiCARdipine IV" "Diltiazem"
## [3] "Diltiazem Extended-Release" "NIFEdipine"
## [5] "NIFEdipine CR" "Verapamil SR"
## [7] "diltiazem" "Nifed"
## [9] "Procardia XL" "Nicardipine"
## [11] "Nifedipine" "DILT"
## [13] "diltiaz" "Amlodipine"
## [15] "Nifedipine CR" "Verapamil HCl"
## [17] "Nicardipine HCl" "diltia"
## [19] "Verapamil" "Isradipine"
## [21] "nifedipine" "dilt"
## [23] "Lotrel" "Nifedi"
## [25] "Calan SR" "NiCARdipine"
## [27] "Procainamide HCl" "Diltia"
## [29] "nif" "*NF* Diltiazem SR"
## [31] "Amlodipine Besylate" "Verapa"
## [33] "nifed" "Felodipine"
## [35] "Diltiaz" "Nimodipine"
## [37] "Nifedipine (Bulk)" "Tiazac"
## [39] "*NF* Nifedipine XL" "Nisoldipine (Sular)"
## [41] "amlodipine-benazepril" "Nicardipine HCl IV"
## [43] "*NF* Nicardipine HCl IV" "Dilti"
## [45] "PROCARDIA XL" "Vera"
使用薬剤歴全リストから、薬を使ったレコードのみに限定し、その患者IDを得る
ca_blocker_user <-
db_presc_drug |>
filter(DRUG %in% ca_blocker_names) |>
select(SUBJECT_ID) |>
pull()
db_patients_ca_blocker <-
db_PEO |>
mutate(ca_blocker = ifelse(SUBJECT_ID %in% ca_blocker_user, 1, 0))
データ確認:
f_table(cols_cont = c(),
cols_fact = c("ca_blocker"),
db = db_patients_ca_blocker)
## Stratified by h2ras
## 0 1 SMD Missing
## n 5949 4441
## ca_blocker = 1 1747 (29.4) 1714 (38.6) 0.196 0.0
for merge
db_ca_blocker <- db_patients_ca_blocker |> select(HADM_ID, ca_blocker)
Fondaparinux
Idraparinux
Idrabiotaparinux
dabigatran, rivaroxaban, apixaban, edoxaban, betrixaban
argatroban
hirudin, lepirudin, bivalirudin
anticoag_vec <-
c("Coumadin", "Warfarin", "Fondaparinux", "Idraparinux", "Idrabiotaparinux",
"heparin",
"dabigatran", "rivaroxaban", "apixaban", "edoxaban", "betrixaban", "argatroban",
"hirudin", "lepirudin", "bivalirudin"
)
anticoag_or <- str_c(anticoag_vec, collapse = "|")
anticoag_or
## [1] "Coumadin|Warfarin|Fondaparinux|Idraparinux|Idrabiotaparinux|heparin|dabigatran|rivaroxaban|apixaban|edoxaban|betrixaban|argatroban|hirudin|lepirudin|bivalirudin"
anticoag_names <-
df_drugs |>
filter(str_detect(DRUG, regex(anticoag_or, ignore_case = T)) |
str_detect(DRUG_NAME_POE, regex(anticoag_or, ignore_case = T)) |
str_detect(DRUG_NAME_GENERIC, regex(anticoag_or, ignore_case = T))) |>
select(DRUG) |>
distinct() |>
pull()
anticoag_names
## [1] "Heparin Flush (5000 Units/mL)"
## [2] "Heparin Flush Hickman (100 units/ml)"
## [3] "Heparin Flush (100 units/ml)"
## [4] "Heparin (CRRT Machine Priming)"
## [5] "Heparin Flush CVL (100 units/ml)"
## [6] "Heparin (Preservative Free)"
## [7] "Dabigatran Etexilate"
## [8] "Lepirudin"
## [9] "Hepari"
## [10] "Rivaroxaban"
## [11] "Hepar"
## [12] "Heparin Lock Flush"
## [13] "Heparin Flush (10 units/mL)"
## [14] "Heparin (Porcine)"
## [15] "Heparin Flush (100 units/mL)"
## [16] "Heparin"
## [17] "Warfarin"
## [18] "*NF* Argatroban"
## [19] "HEPARIN"
## [20] "Heparin Flush (10 units/ml)"
## [21] "Heparin CRRT"
## [22] "Heparin (Preserv. Free)"
## [23] "hepar"
## [24] "hep"
## [25] "Heparin Flus"
## [26] "warfarin"
## [27] "Pradaxa"
## [28] "Heparin Flush Port (10units/ml)"
## [29] "Heparin Flush (1000 units/mL)"
## [30] "Heparin Sodium"
## [31] "Heparin Flush Port (10 units/mL)"
## [32] "Heparin Flush PICC (100 units/ml)"
## [33] "Hepa"
## [34] "Heparin Flush"
## [35] "*NF* Warfarin"
## [36] "HEPARIN FLUSH"
## [37] "Heparin Flush (10 Units/mL)"
## [38] "Hep"
## [39] "dabigatran etexilate"
## [40] "H"
## [41] "Heparin flush"
## [42] "Heparin Flush Midline (100 units/ml)"
## [43] "Heparin Flush CRRT (5000 Units/mL)"
## [44] "Heparin (IABP)"
## [45] "heparin"
## [46] "Heparin Flu"
## [47] "Argatroban"
## [48] "Bivalirudin"
## [49] "Heparin Flush (1000 units/ml)"
## [50] "Heparin Flush Port"
## [51] "Heparin Sodium (Preservative Free)"
## [52] "Fondaparinux Sodium"
## [53] "Warf"
## [54] "Heparin Dwell (1000 Units/mL)"
## [55] "Heparin (Hemodialysis)"
## [56] "HEParin (Porcine) in NS (PF)"
## [57] "Heparin lock"
## [58] "Lansoprazole"
## [59] "Coumadin"
## [60] "Fondaparinux"
使用薬剤歴全リストから、薬を使ったレコードのみに限定し、その患者IDを得る
anticoag_user <-
db_presc_drug |>
filter(DRUG %in% anticoag_names) |>
select(SUBJECT_ID) |>
pull()
db_patients_anticoag <-
db_PEO |>
mutate(anticoag = ifelse(SUBJECT_ID %in% anticoag_user, 1, 0))
データ確認:
db_patients_anticoag |>
f_table(cols_cont = c(),
cols_fact = c("anticoag"))
## Stratified by h2ras
## 0 1 SMD Missing
## n 5949 4441
## anticoag = 1 4793 (80.6) 4091 (92.1) 0.341 0.0
for merge
db_anticoag <- db_patients_anticoag |> select(HADM_ID, anticoag)
Aspirin
Triflusal (Disgren)
Cangrelor (Kengreal)
Clopidogrel (Plavix)
Prasugrel (Effient)
Ticagrelor (Brilinta)
Ticlopidine (Ticlid)
Cilostazol (Pletal)
Vorapaxar (Zontivity)
Abciximab (ReoPro)
Eptifibatide (Integrilin)
Tirofiban (Aggrastat)
Dipyridamole (Persantine)
antiplate_vec <-
c("Aspirin", "Cangrelor", "Clopidogrel", "Plavix", "Prasugrel",
"Ticagrelor", "Brilinta", "Ticlopidine", "Cilostazol", "Pletal",
"Abciximab", "ReoPro", "Eptifibatide", "Tirofiban", "Aggrastat",
"Dipyridamole", "Persantine"
)
antiplate_or <- str_c(antiplate_vec, collapse = "|")
antiplate_or
## [1] "Aspirin|Cangrelor|Clopidogrel|Plavix|Prasugrel|Ticagrelor|Brilinta|Ticlopidine|Cilostazol|Pletal|Abciximab|ReoPro|Eptifibatide|Tirofiban|Aggrastat|Dipyridamole|Persantine"
antiplate_names <-
df_drugs |>
filter(str_detect(DRUG, regex(antiplate_or, ignore_case = T)) |
str_detect(DRUG_NAME_POE, regex(antiplate_or, ignore_case = T)) |
str_detect(DRUG_NAME_GENERIC, regex(antiplate_or, ignore_case = T))) |>
select(DRUG) |>
distinct() |>
pull()
antiplate_names
## [1] "Aspirin EC"
## [2] "Clopidogrel Desensitization"
## [3] "Cilostazol"
## [4] "Aspirin Desensitization (Angioedema)"
## [5] "Aspirin (Buffered)"
## [6] "PleTAL"
## [7] "cilostazol"
## [8] "aspirin"
## [9] "Aspirin Desensitization (AERD)"
## [10] "Aspir"
## [11] "asa"
## [12] "Dopamine"
## [13] "cilostaz"
## [14] "Aspirin"
## [15] "Tirofiban"
## [16] "Dipyridamole"
## [17] "Prasugrel"
## [18] "Butalbital-Aspirin-Caffeine"
## [19] "cilos"
## [20] "cil"
## [21] "Aspirin 81 mg or placebo"
## [22] "Aspirin 325mg/ placebo"
## [23] "CILOSTAZOL"
## [24] "Aggrastat"
## [25] "Clopidogrel Bisulfate"
## [26] "Clopidogrel"
## [27] "Excedrin Aspirin Free"
## [28] "ASPIR"
## [29] "Cilostaz"
## [30] "Aspirin 325 mg or placebo"
## [31] "Aspi"
## [32] "asp"
## [33] "Asp"
## [34] "as"
## [35] "ASA81"
## [36] "*nf"
## [37] "Aspirin-Caffeine-Butalbital"
## [38] "Aspirin Desensitization"
## [39] "Eptifibatide"
## [40] "Brilinta"
## [41] "Ticlopidine HCl"
## [42] "Aspirin 81 mg or Placebo"
## [43] "Aspirin 81mg or placbo"
## [44] "Abciximab"
## [45] "Aspirin (Rectal)"
## [46] "Aspiri"
## [47] "aspi"
## [48] "Butalbital-Acetaminophen-Caff"
## [49] "Aspirin Desens"
## [50] "Aspirin 81 mg /placebo"
## [51] "ASA"
## [52] "Aspirin 325mg or placebo"
## [53] "Aspirin 81 mg /Placebo"
## [54] "Cangrelor Study Drug (*IND*)"
## [55] "aspir"
## [56] "Excedrin Migraine"
## [57] "ASPIRIN"
## [58] "Aspirin 81 mg or placebo"
## [59] "Aspirin Childrens"
## [60] "Dipyridamole-Aspirin"
## [61] "cilo"
## [62] "ticagrelor"
## [63] "Pletal"
## [64] "Aspirin 325 mg or Placebo"
## [65] "Clopidogrel 150 mg or placebo"
## [66] "Aspirin 81 mg/ Placebo"
## [67] "Reopro"
使用薬剤歴全リストから、薬を使ったレコードのみに限定し、その患者IDを得る
antiplate_user <-
db_presc_drug |>
filter(DRUG %in% antiplate_names) |>
select(SUBJECT_ID) |>
pull()
db_patients_antiplate <-
db_PEO |>
mutate(antiplate = ifelse(SUBJECT_ID %in% antiplate_user, 1, 0))
データ確認:
db_patients_antiplate |>
f_table(cols_cont = c(),
cols_fact = c("antiplate"))
## Stratified by h2ras
## 0 1 SMD Missing
## n 5949 4441
## antiplate = 1 3679 (61.8) 3662 (82.5) 0.473 0.0
for merge
db_antiplate <- db_patients_antiplate |> select(HADM_ID, antiplate)
Omeprazole
Lansoprazole
Dexlansoprazole
Esomeprazole
Pantoprazole
Rabeprazole
Ilaprazole
ppi_vec <-
c("Omeprazole", "Prilosec", "Nexium", "Lansoprazole", "Prevacid",
"Dexlansoprazole", "Pantoprazole", "Rabeprazole", "Ilaprazole"
)
ppi_or <- str_c(ppi_vec, collapse = "|")
ppi_or
## [1] "Omeprazole|Prilosec|Nexium|Lansoprazole|Prevacid|Dexlansoprazole|Pantoprazole|Rabeprazole|Ilaprazole"
ppi_names <-
df_drugs |>
filter(str_detect(DRUG, regex(ppi_or, ignore_case = T)) |
str_detect(DRUG_NAME_POE, regex(ppi_or, ignore_case = T)) |
str_detect(DRUG_NAME_GENERIC, regex(ppi_or, ignore_case = T))) |>
select(DRUG) |>
filter(!str_detect(DRUG, regex("Heparin", ignore_case = T))) |>
distinct() |>
pull()
ppi_names
## [1] "Pantoprazole"
## [2] "Lansoprazole Oral Solution"
## [3] "Lansoprazole"
## [4] "Nexium"
## [5] "*NF* Esomeprazole"
## [6] "Omeprazole"
## [7] "Prevacid"
## [8] "prevacid"
## [9] "Lansoprazole Oral Disintegrating Tab"
## [10] "PRILOSEC"
## [11] "Prilosec"
## [12] "Esomeprazole Magnesium"
## [13] "lansopr"
## [14] "Pantoprazole Sodium"
## [15] "PrevACID 24Hr"
## [16] "Nexium 40mg capsules"
## [17] "Neo*PO*Omeprazole"
## [18] "LANSOP"
## [19] "LANS"
## [20] "NexIUM"
## [21] "Lansoprazole Oral Suspension"
## [22] "LANSOPR"
## [23] "Pantoprazole (Self Med)"
## [24] "nexium"
## [25] "Omeprazole-Sodium Bicarbonate"
## [26] "esomeprazole magnesium"
## [27] "lans"
## [28] "Prilosec OTC"
## [29] "omeprazole"
## [30] "dexlansoprazole (Dexilant)"
## [31] "esomeprazole"
## [32] "rabeprazole"
## [33] "Lansop"
## [34] "lanso"
## [35] "panto"
## [36] "*NF*ESOMEPRAZOLE"
## [37] "lansoprazole"
## [38] "prevac"
使用薬剤歴全リストから、薬を使ったレコードのみに限定し、その患者IDを得る
ppi_user <-
db_presc_drug |>
filter(DRUG %in% ppi_names) |>
select(SUBJECT_ID) |>
pull()
db_patients_ppi <-
db_PEO |>
mutate(ppi = ifelse(SUBJECT_ID %in% ppi_user, 1, 0))
データ確認:
db_patients_ppi |>
f_table(cols_cont = c(),
cols_fact = c("ppi"))
## Stratified by h2ras
## 0 1 SMD Missing
## n 5949 4441
## ppi = 1 4330 (72.8) 2987 (67.3) 0.121 0.0
for merge
db_ppi <- db_patients_ppi |> select(HADM_ID, ppi)
今まで作ったデータを結合して1つのファイルにする
stack overflowになるので何回かに分けてcompute()する
db_PEO_total_1 <-
db_PEO |>
left_join(db_HtWt, by = "HADM_ID") |>
left_join(db_vital, by = "HADM_ID") |>
left_join(db_urine, by = "HADM_ID") |>
left_join(db_sofa, by = "HADM_ID") |>
left_join(db_rrt, by = "HADM_ID") |>
left_join(db_venti, by = "HADM_ID") |>
left_join(db_ethnicity, by = "HADM_ID") |>
left_join(db_comorb, by = "HADM_ID") |>
compute()
db_PEO_total <-
db_PEO_total_1 |>
left_join(db_labo, by = "HADM_ID") |>
left_join(db_LVEF, by = "HADM_ID") |>
left_join(db_RAA, by = "HADM_ID") |>
left_join(db_diuritic, by = "HADM_ID") |>
left_join(db_inotrop, by = "HADM_ID") |>
left_join(db_anti_adrenal, by = "HADM_ID") |>
left_join(db_ca_blocker, by = "HADM_ID") |>
left_join(db_anticoag, by = "HADM_ID") |>
left_join(db_antiplate, by = "HADM_ID") |>
left_join(db_ppi, by = "HADM_ID") |>
compute()
最終固定データとしてcsv形式で出力しておく
(結局最終データは3.3MBしかない)
df_PEO_total <- db_PEO_total |> collect()
df_PEO_total |> write_csv("MIMIC_HDS.csv")
# file.size("MIMIC_HDS.csv")
csvファイル読み込み
df_PEO_total <- read_csv("MIMIC_HDS.csv")
df_PEO_total
表作成
col_continuous <-
c("age", "height", "weight", "BMI",
"heartrate", "sysbp", "diasbp", "meanbp", "resprate", "tempc", "spo2",
"urine_out","SOFA", "apsiii",
"Calcium", "Cre", "Glucose", "Mg", "Sodium", "BUN", "Plate", "RBC", "WBC")
col_factors <-
c("ADMISSION_TYPE", "INSURANCE", "GENDER", "ethnicity_5",
"mortality30", "hospital_death",
"rrt", "ventilation",
"hypertension", "DM", "anemia", "Af", "coronary_atherosclerosis",
"venous_thrombus", "MI", "gastritis", "gastric_ulcer", "duodenal_ulcer",
"gastric_bleeding", "aki", "septic_shock", "pneumonia",
"LVEF_ctg",
"RAA", "Diuritic", "inotrop", "anti_adrenal", "ca_blocker",
"anticoag", "antiplate", "ppi")
表作成 PS matching前の結果として保存しておく(unmatched.csv)
db_PEO_total |>
f_table(cols_cont = col_continuous,
cols_fact = col_factors) |>
write.csv("unmatched.csv")
## Stratified by h2ras
## 0 1 SMD
## n 5949 4441
## age 74.35 (14.30) 71.25 (13.97) 0.219
## height 167.67 (10.97) 168.20 (10.78) 0.049
## weight 80.16 (24.65) 81.98 (24.62) 0.074
## BMI 28.63 (8.03) 28.99 (8.11) 0.044
## heartrate 88.36 (20.63) 88.09 (18.75) 0.014
## sysbp 123.44 (25.95) 122.17 (25.18) 0.050
## diasbp 62.25 (17.36) 62.11 (16.40) 0.009
## meanbp 80.51 (18.28) 80.41 (17.61) 0.005
## resprate 19.99 (6.22) 18.30 (6.30) 0.269
## tempc 36.54 (0.99) 36.49 (0.95) 0.057
## spo2 96.42 (5.44) 97.16 (4.89) 0.144
## urine_out 1775.04 (1252.05) 1838.51 (1214.97) 0.051
## SOFA 4.59 (3.04) 5.04 (3.02) 0.146
## apsiii 48.63 (20.54) 46.49 (19.59) 0.107
## Calcium 8.57 (0.89) 8.67 (0.84) 0.122
## Cre 1.82 (1.72) 1.60 (1.51) 0.137
## Glucose 157.16 (88.46) 153.63 (81.05) 0.042
## Mg 1.99 (0.41) 2.04 (0.51) 0.108
## Sodium 138.01 (5.21) 138.22 (4.78) 0.041
## BUN 36.87 (26.00) 30.68 (21.02) 0.262
## Plate 245.71 (118.16) 241.97 (115.02) 0.032
## RBC 3.81 (0.76) 3.88 (0.73) 0.083
## WBC 12.13 (12.49) 11.57 (14.37) 0.041
## ADMISSION_TYPE 0.354
## ELECTIVE 412 ( 6.9) 795 (17.9)
## EMERGENCY 5267 (88.5) 3546 (79.8)
## URGENT 270 ( 4.5) 100 ( 2.3)
## INSURANCE 0.122
## Government 80 ( 1.3) 58 ( 1.3)
## Medicare 4830 (81.2) 3400 (76.6)
## Private 1013 (17.0) 969 (21.8)
## Self Pay 26 ( 0.4) 14 ( 0.3)
## GENDER = M 3074 (51.7) 2446 (55.1) 0.068
## ethnicity_5 0.081
## asian 101 ( 1.7) 80 ( 1.8)
## black 499 ( 8.5) 362 ( 8.2)
## hispanic 122 ( 2.1) 125 ( 2.8)
## other 892 (15.2) 569 (12.9)
## white 4262 (72.5) 3279 (74.3)
## mortality30 = 1 1301 (21.9) 546 (12.3) 0.256
## hospital_death = 1 1074 (18.1) 479 (10.8) 0.208
## rrt = 1 155 ( 2.6) 138 ( 3.1) 0.030
## ventilation = 1 2402 (40.4) 2839 (63.9) 0.485
## hypertension = 1 2359 (39.7) 2025 (45.6) 0.120
## DM = 1 2133 (35.9) 1642 (37.0) 0.023
## anemia = 1 1993 (33.5) 1414 (31.8) 0.035
## Af = 1 2537 (42.6) 2034 (45.8) 0.064
## coronary_atherosclerosis = 1 2264 (38.1) 2049 (46.1) 0.164
## venous_thrombus = 1 112 ( 1.9) 77 ( 1.7) 0.011
## MI = 1 1001 (16.8) 717 (16.1) 0.018
## gastritis = 1 121 ( 2.0) 61 ( 1.4) 0.051
## gastric_ulcer = 1 67 ( 1.1) 20 ( 0.5) 0.076
## duodenal_ulcer = 1 71 ( 1.2) 25 ( 0.6) 0.068
## gastric_bleeding = 1 514 ( 8.6) 184 ( 4.1) 0.185
## aki = 1 2260 (38.0) 1409 (31.7) 0.132
## septic_shock = 1 414 ( 7.0) 261 ( 5.9) 0.044
## pneumonia = 1 1343 (22.6) 927 (20.9) 0.041
## LVEF_ctg 0.097
## mild 736 (14.7) 638 (16.5)
## moderate 936 (18.7) 708 (18.3)
## normal 2041 (40.8) 1663 (43.1)
## severe 1289 (25.8) 850 (22.0)
## RAA = 1 2880 (48.4) 2722 (61.3) 0.261
## Diuritic = 1 4311 (72.5) 4083 (91.9) 0.526
## inotrop = 1 1780 (29.9) 2266 (51.0) 0.440
## anti_adrenal = 1 4439 (74.6) 4098 (92.3) 0.489
## ca_blocker = 1 1747 (29.4) 1714 (38.6) 0.196
## anticoag = 1 4793 (80.6) 4091 (92.1) 0.341
## antiplate = 1 3679 (61.8) 3662 (82.5) 0.473
## ppi = 1 4330 (72.8) 2987 (67.3) 0.121
## Stratified by h2ras
## Missing
## n
## age 0.0
## height 12.6
## weight 3.0
## BMI 13.9
## heartrate 1.7
## sysbp 1.7
## diasbp 1.7
## meanbp 1.7
## resprate 1.7
## tempc 2.0
## spo2 1.8
## urine_out 4.0
## SOFA 0.0
## apsiii 0.0
## Calcium 2.9
## Cre 1.0
## Glucose 1.0
## Mg 1.4
## Sodium 1.0
## BUN 1.0
## Plate 1.0
## RBC 1.0
## WBC 1.0
## ADMISSION_TYPE 0.0
## ELECTIVE
## EMERGENCY
## URGENT
## INSURANCE 0.0
## Government
## Medicare
## Private
## Self Pay
## GENDER = M 0.0
## ethnicity_5 1.0
## asian
## black
## hispanic
## other
## white
## mortality30 = 1 0.0
## hospital_death = 1 0.0
## rrt = 1 0.0
## ventilation = 1 0.0
## hypertension = 1 0.0
## DM = 1 0.0
## anemia = 1 0.0
## Af = 1 0.0
## coronary_atherosclerosis = 1 0.0
## venous_thrombus = 1 0.0
## MI = 1 0.0
## gastritis = 1 0.0
## gastric_ulcer = 1 0.0
## duodenal_ulcer = 1 0.0
## gastric_bleeding = 1 0.0
## aki = 1 0.0
## septic_shock = 1 0.0
## pneumonia = 1 0.0
## LVEF_ctg 14.7
## mild
## moderate
## normal
## severe
## RAA = 1 0.0
## Diuritic = 1 0.0
## inotrop = 1 0.0
## anti_adrenal = 1 0.0
## ca_blocker = 1 0.0
## anticoag = 1 0.0
## antiplate = 1 0.0
## ppi = 1 0.0
# df_unmatched_table <- read.csv("unmatched.csv",
# col.names = c("name", "non_user", "user", "smd", "missing"))
# df_unmatched_table
ヒストグラム作成
db_PEO_total |>
f_hist(cols_cont = col_continuous)
# dbDisconnect(con_mimic, shutdown=TRUE)