#libraryロード

pacman::p_load(tidyverse,readxl,ggplot2,dplyr,tableone,ggpubr,ggrepel,DBI,skimr,dbplyr,pryr,lubridate, survival, survminer,car,svglite,MatchIt, cobalt)
con_mimic <- dbConnect(RSQLite::SQLite(), dbname = "/Volumes/drive/RSQLite/temp/mimic.db")
db_ad <- tbl(con_mimic, "admissions")
db_ad |> colnames()
##  [1] "ROW_ID"               "SUBJECT_ID"           "HADM_ID"             
##  [4] "ADMITTIME"            "DISCHTIME"            "DEATHTIME"           
##  [7] "ADMISSION_TYPE"       "ADMISSION_LOCATION"   "DISCHARGE_LOCATION"  
## [10] "INSURANCE"            "LANGUAGE"             "RELIGION"            
## [13] "MARITAL_STATUS"       "ETHNICITY"            "EDREGTIME"           
## [16] "EDOUTTIME"            "DIAGNOSIS"            "HOSPITAL_EXPIRE_FLAG"
## [19] "HAS_CHARTEVENTS_DATA"
db_ad |> count()
## # Source:   SQL [1 x 1]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
##        n
##    <int>
## 1 530784
db_pt <- tbl(con_mimic, "patients")
db_pt |> colnames()
## [1] "ROW_ID"      "SUBJECT_ID"  "GENDER"      "DOB"         "DOD"        
## [6] "DOD_HOSP"    "DOD_SSN"     "EXPIRE_FLAG"
db_pt |> count()
## # Source:   SQL [1 x 1]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
##       n
##   <int>
## 1 46520

#診断名を確認

db_diag <- tbl(con_mimic, "diagnoses_icd")
db_diag |> colnames()
## [1] "ROW_ID"     "SUBJECT_ID" "HADM_ID"    "SEQ_NUM"    "ICD9_CODE"
db_diag |> head(10)
## # Source:   SQL [10 x 5]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
##    ROW_ID SUBJECT_ID HADM_ID SEQ_NUM ICD9_CODE
##     <dbl>      <dbl>   <dbl>   <dbl> <chr>    
##  1   1297        109  172335       1 40301    
##  2   1298        109  172335       2 486      
##  3   1299        109  172335       3 58281    
##  4   1300        109  172335       4 5855     
##  5   1301        109  172335       5 4254     
##  6   1302        109  172335       6 2762     
##  7   1303        109  172335       7 7100     
##  8   1304        109  172335       8 2767     
##  9   1305        109  172335       9 7243     
## 10   1306        109  172335      10 45829
db_d_icd_diag <- tbl(con_mimic, "d_icd_diagnoses")
db_d_icd_diag |> colnames()
## [1] "ROW_ID"      "ICD9_CODE"   "SHORT_TITLE" "LONG_TITLE"
db_d_icd_diag |> count()
## # Source:   SQL [1 x 1]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
##       n
##   <int>
## 1 14567
db_d_icd_diag |> head(10)
## # Source:   SQL [10 x 4]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
##    ROW_ID ICD9_CODE SHORT_TITLE              LONG_TITLE                         
##     <dbl> <chr>     <chr>                    <chr>                              
##  1    174 01166     TB pneumonia-oth test    Tuberculous pneumonia [any form], …
##  2    175 01170     TB pneumothorax-unspec   Tuberculous pneumothorax, unspecif…
##  3    176 01171     TB pneumothorax-no exam  Tuberculous pneumothorax, bacterio…
##  4    177 01172     TB pneumothorx-exam unkn Tuberculous pneumothorax, bacterio…
##  5    178 01173     TB pneumothorax-micro dx Tuberculous pneumothorax, tubercle…
##  6    179 01174     TB pneumothorax-cult dx  Tuberculous pneumothorax, tubercle…
##  7    180 01175     TB pneumothorax-histo dx Tuberculous pneumothorax, tubercle…
##  8    181 01176     TB pneumothorax-oth test Tuberculous pneumothorax, tubercle…
##  9    182 01180     Pulmonary TB NEC-unspec  Other specified pulmonary tubercul…
## 10    183 01181     Pulmonary TB NEC-no exam Other specified pulmonary tubercul…
df_d_icd_diag <- collect(db_d_icd_diag)
# df_d_icd_diag |> View()

hf_candidates <- 
  df_d_icd_diag |> 
  filter(str_detect(LONG_TITLE, regex("heart failure", ignore_case = T))) #coll()

icd_hf <- 
  hf_candidates |> 
  filter(!str_detect(LONG_TITLE, regex("without heart failure", ignore_case = T))) |>
  select(ICD9_CODE) |> pull()

icd_hf
##  [1] "39891" "40201" "40211" "40291" "40401" "40403" "40411" "40413" "40491"
## [10] "40493" "4280"  "4281"  "42820" "42821" "42822" "42823" "42830" "42831"
## [19] "42832" "42833" "42840" "42841" "42842" "42843" "4289"
icd_hf_from_article <- 
  c(4280, 4281, 4289, 39891, 40201, 40211, 40291, 40401, 40403, 40411, 40413, 40491,
    40493, 42820, 42821, 42822, 42823, 42830, 42831, 42832, 42833, 42840, 42841, 42842, 42843)
icd_hf %in% icd_hf_from_article |> as.character()
##  [1] "TRUE" "TRUE" "TRUE" "TRUE" "TRUE" "TRUE" "TRUE" "TRUE" "TRUE" "TRUE"
## [11] "TRUE" "TRUE" "TRUE" "TRUE" "TRUE" "TRUE" "TRUE" "TRUE" "TRUE" "TRUE"
## [21] "TRUE" "TRUE" "TRUE" "TRUE" "TRUE"
length(icd_hf)
## [1] 25
length(icd_hf_from_article)
## [1] 25

#HADM_ID for HF

db_diag_hf <- 
  db_diag |> 
  filter(ICD9_CODE %in% icd_hf)

db_diag_hf |> count()
## # Source:   SQL [1 x 1]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
##       n
##   <int>
## 1 21274
db_diag_hf |> head(10)
## # Source:   SQL [10 x 5]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
##    ROW_ID SUBJECT_ID HADM_ID SEQ_NUM ICD9_CODE
##     <dbl>      <dbl>   <dbl>   <dbl> <chr>    
##  1   1511        115  114585      10 4280     
##  2   1527        117  140784       5 4280     
##  3   1610        124  138376      12 42833    
##  4   1613        124  138376      15 4280     
##  5   1671        130  198214       2 4280     
##  6    550         68  108329      10 4280     
##  7    555         68  170467       4 42820    
##  8    556         68  170467       5 4280     
##  9    620         77  142768       2 4280     
## 10    649         83  158569       2 39891
db_diag_hf |> 
  distinct(HADM_ID) |> 
  count()
## # Source:   SQL [1 x 1]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
##       n
##   <int>
## 1 14040
db_diag_hf |> 
  mutate_if(is.numeric, as.character) |> 
  skim()
## Applying predicate on the first 100 rows
Data summary
Name mutate_if(db_diag_hf, is….
Number of rows 21274
Number of columns 5
_______________________
Column type frequency:
character 5
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
ROW_ID 0 1 3 8 0 21274 0
SUBJECT_ID 0 1 3 7 0 10436 0
HADM_ID 0 1 8 8 0 14040 0
SEQ_NUM 0 1 3 4 0 36 0
ICD9_CODE 0 1 4 5 0 25 0
db_icustay <- tbl(con_mimic, "icustays")
db_icustay |> colnames()
##  [1] "ROW_ID"         "SUBJECT_ID"     "HADM_ID"        "ICUSTAY_ID"    
##  [5] "DBSOURCE"       "FIRST_CAREUNIT" "LAST_CAREUNIT"  "FIRST_WARDID"  
##  [9] "LAST_WARDID"    "INTIME"         "OUTTIME"        "LOS"

#一度の入院で何回もICU入室している可能性あるので、HADM_IDで重複を削除する ※ICU_IDで重複削除しても複数回ICU入室しているもの全てuniqueとしてカウントしてしまう。

db_icustay_hadm <- 
  db_icustay |> 
  distinct(HADM_ID, .keep_all = T) |> 
  select(HADM_ID, ICUSTAY_ID)

hadm_icu_vec <- 
  db_icustay_hadm |> 
  select(HADM_ID) |> 
  pull()

icustay_id_vec <- 
  db_icustay_hadm |> 
  select(ICUSTAY_ID) |> 
  pull()
db_diag_hf_icu <- 
  db_diag_hf |> 
  filter(HADM_ID %in% hadm_icu_vec)

db_diag_hf_icu |> 
  mutate_if(is.numeric, as.character) |> 
  skim()
## Applying predicate on the first 100 rows
Data summary
Name mutate_if(db_diag_hf_icu,…
Number of rows 21061
Number of columns 5
_______________________
Column type frequency:
character 5
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
ROW_ID 0 1 3 8 0 21061 0
SUBJECT_ID 0 1 3 7 0 10405 0
HADM_ID 0 1 8 8 0 13874 0
SEQ_NUM 0 1 3 4 0 36 0
ICD9_CODE 0 1 4 5 0 25 0
hadm_hf <- 
  db_diag_hf_icu |> 
  distinct(HADM_ID) |> 
  pull(HADM_ID)

length(hadm_hf)
## [1] 13874

#18歳以上に限定

db_ad_pt_age <-
  db_ad |> 
  left_join(db_pt, by = "SUBJECT_ID") |> 
  mutate(birthday = DOB/(60*60*24),
         admitday = ADMITTIME/(60*60*24),
         age = (admitday - birthday)/365.25)

db_ad_pt_age |> head(10)
## # Source:   SQL [10 x 29]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
##    ROW_ID.x SUBJECT_ID HADM_ID  ADMITTIME  DISCHTIME  DEATHTIME ADMISSION_TYPE
##       <dbl>      <dbl>   <dbl>      <dbl>      <dbl>      <dbl> <chr>         
##  1       21         22  165315 7140486360 7140585240         NA EMERGENCY     
##  2       22         23  152223 5796170100 5796645000         NA ELECTIVE      
##  3       23         23  124321 5926332840 5926917600         NA EMERGENCY     
##  4       24         24  161859 5346663240 5346910080         NA EMERGENCY     
##  5       25         25  129635 6022260360 6022565700         NA EMERGENCY     
##  6       26         26  197661 4933754160 4934358000         NA EMERGENCY     
##  7       27         27  134931 7002972960 7003205100         NA NEWBORN       
##  8       28         28  162569 6553379700 6553843200         NA ELECTIVE      
##  9       29         30  104557 6399353820 6399787020         NA URGENT        
## 10       30         31  128652 4375121220 4375782000 4375782000 EMERGENCY     
## # ℹ 22 more variables: ADMISSION_LOCATION <chr>, DISCHARGE_LOCATION <chr>,
## #   INSURANCE <chr>, LANGUAGE <chr>, RELIGION <chr>, MARITAL_STATUS <chr>,
## #   ETHNICITY <chr>, EDREGTIME <dbl>, EDOUTTIME <dbl>, DIAGNOSIS <chr>,
## #   HOSPITAL_EXPIRE_FLAG <dbl>, HAS_CHARTEVENTS_DATA <dbl>, ROW_ID.y <dbl>,
## #   GENDER <chr>, DOB <dbl>, DOD <dbl>, DOD_HOSP <dbl>, DOD_SSN <dbl>,
## #   EXPIRE_FLAG <dbl>, birthday <dbl>, admitday <dbl>, age <dbl>
db_ad_pt_age18 <-
  db_ad_pt_age |> 
  filter(age >= 18) |> 
  filter(HADM_ID %in% hadm_hf) |> 
  distinct(SUBJECT_ID, .keep_all = T)

db_ad_pt_age18 |> 
  mutate_if(is.numeric, as.character) |> 
  skim()
## Applying predicate on the first 100 rows
Data summary
Name mutate_if(db_ad_pt_age18,…
Number of rows 10390
Number of columns 29
_______________________
Column type frequency:
character 29
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
ROW_ID.x 0 1.00 3 7 0 10390 0
SUBJECT_ID 0 1.00 3 7 0 10390 0
HADM_ID 0 1.00 8 8 0 10390 0
ADMITTIME 0 1.00 12 12 0 10381 0
DISCHTIME 0 1.00 12 12 0 10379 0
DEATHTIME 8834 0.15 12 12 0 1556 0
ADMISSION_TYPE 0 1.00 6 9 0 3 0
ADMISSION_LOCATION 0 1.00 20 25 0 7 0
DISCHARGE_LOCATION 0 1.00 3 25 0 16 0
INSURANCE 0 1.00 7 10 0 5 0
LANGUAGE 4276 0.59 4 4 0 53 0
RELIGION 77 0.99 5 22 0 19 0
MARITAL_STATUS 439 0.96 6 17 0 7 0
ETHNICITY 0 1.00 5 56 0 35 0
EDREGTIME 4574 0.56 12 12 0 5816 0
EDOUTTIME 4574 0.56 12 12 0 5816 0
DIAGNOSIS 1 1.00 2 190 0 4206 0
HOSPITAL_EXPIRE_FLAG 0 1.00 3 3 0 2 0
HAS_CHARTEVENTS_DATA 0 1.00 3 3 0 2 0
ROW_ID.y 0 1.00 3 7 0 10390 0
GENDER 0 1.00 1 1 0 2 0
DOB 0 1.00 12 13 0 9448 0
DOD 4223 0.59 12 12 0 5762 0
DOD_HOSP 6498 0.37 12 12 0 3722 0
DOD_SSN 5033 0.52 12 12 0 5055 0
EXPIRE_FLAG 0 1.00 3 3 0 2 0
birthday 0 1.00 7 8 0 9448 0
admitday 0 1.00 7 16 0 10381 0
age 0 1.00 13 16 0 10089 0
db_ad_pt_age18 |> 
  filter(age >150) |> 
  summarize(count = n())
## # Source:   SQL [1 x 1]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
##   count
##   <int>
## 1  1032
db_ad_pt_age18 |> select(age, GENDER) |> 
  ggplot() +
  geom_histogram(aes(x = age), color = "black", alpha = 0.8, ) +
  theme_bw()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

db_ad_pt_age18_mod <-
  db_ad_pt_age18 |> 
  mutate(age = ifelse(age > 100, 95, age))

db_ad_pt_age18_mod |> 
  select(age, GENDER) |> 
  ggplot() +
  geom_histogram(aes(x = age), color = "black", alpha = 0.8) +
  theme_bw()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#10390データの日付整理したファイル作成

cols <- c("SUBJECT_ID", "HADM_ID", "ICUSTAY_ID", "age", "ADMISSION_TYPE", 
          "INSURANCE", "ETHNICITY", "DIAGNOSIS",
          "HOSPITAL_EXPIRE_FLAG", "GENDER", "EXPIRE_FLAG",
          "birthday", "admitday", "dischday", "deathday", "hosp_deathday")
db_patients <-
  db_ad_pt_age18_mod |> 
  left_join(db_icustay_hadm, by = "HADM_ID") |>
  mutate(dischday = DISCHTIME/(60*60*24),
         hosp_deathday = DEATHTIME/(60*60*24),
         deathday = DOD/(60*60*24)) |> 
  select(cols)
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
##   # Was:
##   data %>% select(cols)
## 
##   # Now:
##   data %>% select(all_of(cols))
## 
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
db_patients |> head(10)
## # Source:   SQL [10 x 16]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
##    SUBJECT_ID HADM_ID ICUSTAY_ID   age ADMISSION_TYPE INSURANCE ETHNICITY       
##         <dbl>   <dbl>      <dbl> <dbl> <chr>          <chr>     <chr>           
##  1          3  145834     211552  76.5 EMERGENCY      Medicare  WHITE           
##  2          9  150750     220597  41.8 EMERGENCY      Medicaid  UNKNOWN/NOT SPE…
##  3         21  109451     217847  87.4 EMERGENCY      Medicare  WHITE           
##  4         26  197661     244882  72.0 EMERGENCY      Medicare  UNKNOWN/NOT SPE…
##  5         30  104557     225176  95   URGENT         Medicare  UNKNOWN/NOT SPE…
##  6         34  115799     263086  95   EMERGENCY      Medicare  WHITE           
##  7         37  188670     213503  68.9 EMERGENCY      Medicare  WHITE           
##  8         38  185910     248910  75.9 EMERGENCY      Medicare  WHITE           
##  9         42  119203     210828  61.2 EMERGENCY      Private   UNKNOWN/NOT SPE…
## 10         49  190539     249195  80.9 ELECTIVE       Medicare  WHITE           
## # ℹ 9 more variables: DIAGNOSIS <chr>, HOSPITAL_EXPIRE_FLAG <dbl>,
## #   GENDER <chr>, EXPIRE_FLAG <dbl>, birthday <dbl>, admitday <dbl>,
## #   dischday <dbl>, deathday <dbl>, hosp_deathday <dbl>
db_patients |> count()
## # Source:   SQL [1 x 1]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
##       n
##   <int>
## 1 10390

#曝露情報H2RA

db_presc <- tbl(con_mimic,"prescriptions")
db_presc |> colnames()
##  [1] "ROW_ID"            "SUBJECT_ID"        "HADM_ID"          
##  [4] "ICUSTAY_ID"        "STARTDATE"         "ENDDATE"          
##  [7] "DRUG_TYPE"         "DRUG"              "DRUG_NAME_POE"    
## [10] "DRUG_NAME_GENERIC" "FORMULARY_DRUG_CD" "GSN"              
## [13] "NDC"               "PROD_STRENGTH"     "DOSE_VAL_RX"      
## [16] "DOSE_UNIT_RX"      "FORM_VAL_DISP"     "FORM_UNIT_DISP"   
## [19] "ROUTE"
db_presc |> head(10)
## # Source:   SQL [10 x 19]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
##     ROW_ID SUBJECT_ID HADM_ID ICUSTAY_ID  STARTDATE    ENDDATE DRUG_TYPE DRUG   
##      <dbl>      <dbl>   <dbl>      <dbl>      <dbl>      <dbl> <chr>     <chr>  
##  1 2214776          6  107064         NA 6483110400 6483196800 MAIN      Tacrol…
##  2 2214775          6  107064         NA 6483110400 6483196800 MAIN      Warfar…
##  3 2215524          6  107064         NA 6483110400 6483196800 MAIN      Hepari…
##  4 2216265          6  107064         NA 6483110400 6483196800 BASE      D5W    
##  5 2214773          6  107064         NA 6483110400 6483196800 MAIN      Furose…
##  6 2214774          6  107064         NA 6483110400 6483456000 MAIN      Warfar…
##  7 2215525          6  107064         NA 6483196800 6483196800 MAIN      Hepari…
##  8 2216266          6  107064         NA 6483196800 6483196800 BASE      D5W    
##  9 2215526          6  107064         NA 6483196800 6483283200 MAIN      Hepari…
## 10 2214778          6  107064         NA 6483196800 6483283200 MAIN      Warfar…
## # ℹ 11 more variables: DRUG_NAME_POE <chr>, DRUG_NAME_GENERIC <chr>,
## #   FORMULARY_DRUG_CD <chr>, GSN <chr>, NDC <chr>, PROD_STRENGTH <chr>,
## #   DOSE_VAL_RX <chr>, DOSE_UNIT_RX <chr>, FORM_VAL_DISP <chr>,
## #   FORM_UNIT_DISP <chr>, ROUTE <chr>
db_presc |> count()
## # Source:   SQL [1 x 1]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
##         n
##     <int>
## 1 4156450
col_presc <- c("SUBJECT_ID", "HADM_ID", "ICUSTAY_ID", "DRUG", "DRUG_NAME_POE", "DRUG_NAME_GENERIC", "FORMULARY_DRUG_CD")
db_presc_drug <- db_presc |> select(col_presc)
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
##   # Was:
##   data %>% select(col_presc)
## 
##   # Now:
##   data %>% select(all_of(col_presc))
## 
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
db_presc_drug |> head(10)
## # Source:   SQL [10 x 7]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
##    SUBJECT_ID HADM_ID ICUSTAY_ID DRUG           DRUG_NAME_POE DRUG_NAME_GENERIC
##         <dbl>   <dbl>      <dbl> <chr>          <chr>         <chr>            
##  1          6  107064         NA Tacrolimus     Tacrolimus    Tacrolimus       
##  2          6  107064         NA Warfarin       Warfarin      Warfarin         
##  3          6  107064         NA Heparin Sodium <NA>          <NA>             
##  4          6  107064         NA D5W            <NA>          <NA>             
##  5          6  107064         NA Furosemide     Furosemide    Furosemide       
##  6          6  107064         NA Warfarin       Warfarin      Warfarin         
##  7          6  107064         NA Heparin Sodium <NA>          <NA>             
##  8          6  107064         NA D5W            <NA>          <NA>             
##  9          6  107064         NA Heparin Sodium <NA>          <NA>             
## 10          6  107064         NA Warfarin       Warfarin      Warfarin         
## # ℹ 1 more variable: FORMULARY_DRUG_CD <chr>
drugs <-
  db_presc_drug |>
  distinct(DRUG, DRUG_NAME_POE, DRUG_NAME_GENERIC, FORMULARY_DRUG_CD)

drugs |> count()
## # Source:   SQL [1 x 1]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
##       n
##   <int>
## 1  8575
drugs |> head(10)
## # Source:   SQL [10 x 4]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
##    DRUG                  DRUG_NAME_POE       DRUG_NAME_GENERIC FORMULARY_DRUG_CD
##    <chr>                 <chr>               <chr>             <chr>            
##  1 Tacrolimus            Tacrolimus          Tacrolimus        TACR1            
##  2 Warfarin              Warfarin            Warfarin          WARF5            
##  3 Heparin Sodium        <NA>                <NA>              HEPAPREMIX       
##  4 D5W                   <NA>                <NA>              HEPBASE          
##  5 Furosemide            Furosemide          Furosemide        FURO20           
##  6 Warfarin              Warfarin            Warfarin          WARF0            
##  7 Warfarin              Warfarin            Warfarin          WARF2            
##  8 Tacrolimus            Tacrolimus          Tacrolimus        TACR5            
##  9 Mycophenolate Mofetil Mycophenolate Mofe… Mycophenolate Mo… MYCO500T         
## 10 Mycophenolate Mofetil Mycophenolate Mofe… Mycophenolate Mo… MYCO250
df_drugs <- drugs |> collect()
df_drugs |> head(20)
## # A tibble: 20 × 4
##    DRUG                  DRUG_NAME_POE       DRUG_NAME_GENERIC FORMULARY_DRUG_CD
##    <chr>                 <chr>               <chr>             <chr>            
##  1 Tacrolimus            Tacrolimus          Tacrolimus        TACR1            
##  2 Warfarin              Warfarin            Warfarin          WARF5            
##  3 Heparin Sodium        <NA>                <NA>              HEPAPREMIX       
##  4 D5W                   <NA>                <NA>              HEPBASE          
##  5 Furosemide            Furosemide          Furosemide        FURO20           
##  6 Warfarin              Warfarin            Warfarin          WARF0            
##  7 Warfarin              Warfarin            Warfarin          WARF2            
##  8 Tacrolimus            Tacrolimus          Tacrolimus        TACR5            
##  9 Mycophenolate Mofetil Mycophenolate Mofe… Mycophenolate Mo… MYCO500T         
## 10 Mycophenolate Mofetil Mycophenolate Mofe… Mycophenolate Mo… MYCO250          
## 11 Neutra-Phos           Neutra-Phos         Neutra-Phos       NEUT             
## 12 Nitroglycerin         <NA>                <NA>              NTG100PB         
## 13 Docusate Sodium       Docusate Sodium     Docusate Sodium   DOCU100          
## 14 Insulin               Insulin             Insulin           INSULIN          
## 15 Atropine Sulfate      Atropine Sulfate    Atropine Sulfate  ATRO1I           
## 16 Zolpidem Tartrate     Zolpidem Tartrate   Zolpidem Tartrate AMBI5            
## 17 Midazolam HCl         Midazolam HCl       Midazolam HCl     MIDA2I           
## 18 Nitroglycerin SL      Nitroglycerin SL    Nitroglycerin SL  NTG3SL           
## 19 Lorazepam             Lorazepam           Lorazepam         LORA2I           
## 20 Magnesium Sulfate     Magnesium Sulfate   Magnesium Sulfate MAGS1I
col_h2 <- 
  df_drugs |> 
  filter(str_detect(DRUG, regex("tidine", ignore_case = T))|
         str_detect(DRUG_NAME_POE, regex("tidine", ignore_case = T))|
         str_detect(DRUG_NAME_GENERIC, regex("tidine", ignore_case = T))
         ) |> 
  select(DRUG) |> pull() |> unique()

col_h2 |> dput()
## c("Ranitidine", "Famotidine", "Famotidine (IV)", "Ranitidine (Liquid)", 
## "Cimetidine", "NEO*PO*Ranitidine", "Ranitidine HCl", "Famotidine (PO)", 
## "Azacitidine", "Claritin", "Loratadine", "Azacitidine (Subcut)", 
## "CLARITIN", "Ranitidine Hcl", "Famoti", "Famo", "Nizatidine", 
## "famotidine", "Cimetidine HCl")
vec_h2ras <- c("Cimetidine", "Cimetidine HCl", "Famo", "Famoti", "Famotidine", 
"Famotidine (IV)", "Famotidine (PO)", "NEO*PO*Ranitidine", 
"Nizatidine", "Ranitidine", "Ranitidine (Liquid)", "Ranitidine HCl", 
"Ranitidine Hcl", "famotidine",
"Zantac 75", "Zantac Maximum Strength")
H2RAs_user <- 
  db_presc_drug |> 
  filter(DRUG %in% vec_h2ras) |> 
  select(SUBJECT_ID) |> 
  pull()

db_patients_h2 <-
  db_patients |> 
  mutate(h2ras = ifelse(SUBJECT_ID %in% H2RAs_user, 1, 0),
         .after = ICUSTAY_ID)

db_patients_h2 |> head(10)
## # Source:   SQL [10 x 17]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
##    SUBJECT_ID HADM_ID ICUSTAY_ID h2ras   age ADMISSION_TYPE INSURANCE ETHNICITY 
##         <dbl>   <dbl>      <dbl> <dbl> <dbl> <chr>          <chr>     <chr>     
##  1          3  145834     211552     0  76.5 EMERGENCY      Medicare  WHITE     
##  2          9  150750     220597     0  41.8 EMERGENCY      Medicaid  UNKNOWN/N…
##  3         21  109451     217847     0  87.4 EMERGENCY      Medicare  WHITE     
##  4         26  197661     244882     0  72.0 EMERGENCY      Medicare  UNKNOWN/N…
##  5         30  104557     225176     0  95   URGENT         Medicare  UNKNOWN/N…
##  6         34  115799     263086     0  95   EMERGENCY      Medicare  WHITE     
##  7         37  188670     213503     0  68.9 EMERGENCY      Medicare  WHITE     
##  8         38  185910     248910     1  75.9 EMERGENCY      Medicare  WHITE     
##  9         42  119203     210828     0  61.2 EMERGENCY      Private   UNKNOWN/N…
## 10         49  190539     249195     1  80.9 ELECTIVE       Medicare  WHITE     
## # ℹ 9 more variables: DIAGNOSIS <chr>, HOSPITAL_EXPIRE_FLAG <dbl>,
## #   GENDER <chr>, EXPIRE_FLAG <dbl>, birthday <dbl>, admitday <dbl>,
## #   dischday <dbl>, deathday <dbl>, hosp_deathday <dbl>
db_patients_h2 |> count(h2ras)
## # Source:   SQL [2 x 2]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
##   h2ras     n
##   <dbl> <int>
## 1     0  5949
## 2     1  4441
db_PEO <-
  db_patients_h2 |> 
  mutate(death_elasp = deathday - admitday,
         mortality30 = ifelse(death_elasp <= 30, 1, 0),
         # hospital_death = HOSPITAL_EXPIRE_FLAG,
         .after = h2ras) |> 
  mutate(INSURANCE = ifelse(INSURANCE == "Medicaid", "Medicare", INSURANCE)) |> 
  # mutate(mortality30 = coalesce(mortality30, 0)) 
  replace_na(list(mortality30 = 0))

db_PEO <- db_PEO |> compute()

db_PEO |> head(10)
## # Source:   SQL [10 x 19]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
##    SUBJECT_ID HADM_ID ICUSTAY_ID h2ras death_elasp mortality30   age
##         <dbl>   <dbl>      <dbl> <dbl>       <dbl>       <dbl> <dbl>
##  1          3  145834     211552     0      236.             0  76.5
##  2          9  150750     220597     0        4.45           1  41.8
##  3         21  109451     217847     0      149.             0  87.4
##  4         26  197661     244882     0      659.             0  72.0
##  5         30  104557     225176     0       NA              0  95  
##  6         34  115799     263086     0     2021.             0  95  
##  7         37  188670     213503     0      503.             0  68.9
##  8         38  185910     248910     1       NA              0  75.9
##  9         42  119203     210828     0     1999.             0  61.2
## 10         49  190539     249195     1       NA              0  80.9
## # ℹ 12 more variables: ADMISSION_TYPE <chr>, INSURANCE <chr>, ETHNICITY <chr>,
## #   DIAGNOSIS <chr>, HOSPITAL_EXPIRE_FLAG <dbl>, GENDER <chr>,
## #   EXPIRE_FLAG <dbl>, birthday <dbl>, admitday <dbl>, dischday <dbl>,
## #   deathday <dbl>, hosp_deathday <dbl>
db_heightweight <- tbl(con_mimic, "heightweight")
db_heightFirst <- tbl(con_mimic, "heightfirstday")
db_weightFirst <- tbl(con_mimic, "weightfirstday")
db_weightDuration <- tbl(con_mimic, "weightduration")
db_pivot_height <- tbl(con_mimic, "pivoted_height") #subject_id

db_heightweight |> count()
## # Source:   SQL [1 x 1]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
##       n
##   <int>
## 1 61532
db_weightDuration1 <- 
  db_weightDuration |> 
  arrange(icustay_id, starttime) |> 
  distinct(icustay_id, .keep_all = T) |> 
  rename(weight_duration = weight) |> 
  select(icustay_id, weight_duration) 
db_pivot_height1 <-
  db_pivot_height |> distinct(subject_id, .keep_all = T) |> 
  rename(height_pivot = height) |> 
  select(subject_id, height_pivot)
db_pivot_height1
## # Source:   SQL [?? x 2]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
##    subject_id height_pivot
##         <dbl>        <dbl>
##  1          3         178.
##  2          4         173.
##  3          8          49 
##  4         10          38 
##  5         17         165.
##  6         18         183.
##  7         21         185.
##  8         24         185.
##  9         25         183.
## 10         26         188.
## # ℹ more rows
db_hw_1 <-
  db_PEO |> 
  left_join(db_heightweight, by = c("ICUSTAY_ID" = "icustay_id"), copy = T)
db_hw_1 <- db_hw_1 |> compute()

db_hw_2 <-
  db_hw_1 |> 
  left_join(db_heightFirst, by = c("ICUSTAY_ID" = "icustay_id"), copy = T)
db_hw_2 <- db_hw_2 |> compute()

db_hw_3 <-
  db_hw_2 |> 
  left_join(db_weightFirst, by = c("ICUSTAY_ID" = "icustay_id"), copy = T) 
db_hw_3 <- db_hw_3 |> compute()

db_hw_4 <-
  db_hw_3 |> 
  left_join(db_weightDuration1, by = c("ICUSTAY_ID" = "icustay_id"), copy = T) 
db_hw_4 <- db_hw_4 |> compute()
## Warning: ORDER BY is ignored in subqueries without LIMIT
## ℹ Do you need to move arrange() later in the pipeline or use window_order() instead?
db_hw_4 |> count()
## # Source:   SQL [1 x 1]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
##       n
##   <int>
## 1 10390
db_PEO_HtWt <-
  db_hw_4 |> 
  mutate(
    Height = case_when(
    !is.na(height_first) ~ height_first,
    !is.na(Height) ~ Height,
    !is.na(Height_echo) ~ Height_echo,
    !is.na(Height_chart) ~ Height_chart),
    #ここでのcase_whenは、height_firstがNAでなければheight_firstを、height_firstがNAならばHeightを、HeightもNAならばHeight_echoを、Height_echoもNAならばHeight_chartを、Height_chartもNAならば、NAが入力されるという意味
    #height_firstがNAでなければheight_firstが採用され、case_whenは終了となる。(case_whenは与えられた条件を順番に採用する)
   weight = case_when(
    !is.na(weight_first) ~ weight_first,
    !is.na(weight_admit) ~weight_admit,
    !is.na(weight) ~ weight,
    !is.na(weight_duration) ~ weight_duration,
    !is.na(weight_echoinhosp) ~ weight_echoinhosp,
    !is.na(weight_daily) ~ weight_daily
    )
  ) |> 
  select(!c("weight_first", "weight_min", "weight_max", "height_first", "height_min", 
            "height_max", "Height_chart", "Height_echo", "weight_admit",
            "weight_daily", "weight_echoinhosp", "weight_echoprehosp", "weight_duration"))

db_PEO_HtWt |> count()
## # Source:   SQL [1 x 1]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
##       n
##   <int>
## 1 10390
db_PEO_HtWt1 <-
  db_PEO_HtWt |> 
  left_join(db_pivot_height1, by = c("SUBJECT_ID" = "subject_id"), copy = T) |> 
  mutate(Height = ifelse(!is.na(Height), Height, height_pivot)) |> 
  select(!c(height_pivot))

db_PEO_HtWt1 <- db_PEO_HtWt1 |> compute()
db_PEO_HtWt2 <-
  db_PEO_HtWt1 |> 
  mutate(Height = ifelse(Height < 250 & Height > 80, Height, NA),
         weight = ifelse(weight > 10, weight, NA)) |> 
  mutate(BMI = weight/(Height/100)^2)

db_PEO_HtWt2 <- db_PEO_HtWt2 |> compute()
db_HtWt <- db_PEO_HtWt2 |> 
  select("HADM_ID", "weight", "Height", "BMI")

db_HtWt <- db_HtWt |> compute()
db_vital <- tbl(con_mimic, "pivoted_vitals" )
db_vital |> count()
## # Source:   SQL [1 x 1]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
##         n
##     <int>
## 1 9152810
icustay_id_vec |> length()
## [1] 57786
db_vital_target <- db_vital |>  filter(icustay_id %in% icustay_id_vec)
db_vital_target |> head(10)
## # Source:   SQL [10 x 10]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
##    icustay_id  charttime HeartRate SysBP DiasBP MeanBP RespRate TempC  SpO2
##         <dbl>      <dbl>     <dbl> <dbl>  <dbl>  <dbl>    <dbl> <dbl> <dbl>
##  1     226115 6134934600        NA    NA     NA     NA       NA    NA    NA
##  2     226403 7056760620        NA    NA     NA     NA       38    NA    NA
##  3     226651 5053984200        NA    NA     NA     NA       NA    NA    NA
##  4     226656 4901881680        NA    NA     NA     NA       NA    NA    NA
##  5     226889 5308514400        NA    NA     NA     NA       NA    NA    NA
##  6     227129 6998653080        NA    NA     NA     NA       NA    NA    NA
##  7     227347 6997553760        NA    NA     NA     NA       NA    NA    NA
##  8     227368 5121366300        NA    NA     NA     NA       NA    NA    NA
##  9     227489 6517883340        NA    NA     NA     NA       NA    NA    NA
## 10     227865 7199496660        NA    NA     NA     NA       NA    NA    NA
## # ℹ 1 more variable: Glucose <dbl>
db_vital_target |> mutate_if(is.numeric, as.character) |> skim()
## Applying predicate on the first 100 rows
Data summary
Name mutate_if(db_vital_target…
Number of rows 8439687
Number of columns 10
_______________________
Column type frequency:
character 10
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
icustay_id 0 1.00 8 8 0 56696 0
charttime 0 1.00 12 12 0 3126810 0
HeartRate 1119085 0.87 3 17 0 419 0
SysBP 3209874 0.62 3 18 0 726 0
DiasBP 3211509 0.62 3 17 0 539 0
MeanBP 3198031 0.62 3 17 0 2907 0
RespRate 2778181 0.67 3 17 0 244 0
TempC 6852744 0.19 4 16 0 2088 0
SpO2 2943503 0.65 3 17 0 123 0
Glucose 7281949 0.14 3 16 0 1724 0

#バイタル毎にicustay_idに紐づくデータを抽出する。 1入室中に複数のバイタルデータがあることがほとんどだが、どの時点でのデータを抽出するかは論文未記載(たぶん)。 以下では入室最初のデータを採用したが、平均値・中央値にすることもあり得る。   (以下のコードをcompute()なしで回してみると、compute()の意味が分かると思います。)

db_HR <-
  db_vital_target |> 
  select(icustay_id, charttime, HeartRate) |> 
  filter(!is.na(HeartRate)) |> 
  group_by(icustay_id) |> 
  summarize(HeartRate = case_when(min(charttime) ~ HeartRate)) |> 
  compute()
## Warning: Missing values are always removed in SQL aggregation functions.
## Use `na.rm = TRUE` to silence this warning
## This warning is displayed once every 8 hours.
db_HR |> head(10)
## # Source:   SQL [10 x 2]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
##    icustay_id HeartRate
##         <dbl>     <dbl>
##  1     200001       115
##  2     200003       132
##  3     200006        84
##  4     200007        90
##  5     200009        92
##  6     200010       109
##  7     200011        77
##  8     200012       109
##  9     200014        71
## 10     200016        67
db_HR|>mutate_if(is.numeric,as.character)|>skim()
## Applying predicate on the first 100 rows
Data summary
Name mutate_if(db_HR, is.numer…
Number of rows 56566
Number of columns 2
_______________________
Column type frequency:
character 2
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
icustay_id 0 1 8 8 0 56566 0
HeartRate 0 1 3 16 0 190 0
db_sBP <-
  db_vital_target |> 
  select(icustay_id, charttime,SysBP) |> 
  filter(!is.na(SysBP)) |> 
  group_by(icustay_id) |> 
  summarize(SysBP = case_when(min(charttime) ~ SysBP)) |> 
  compute()
db_sBP |> mutate_if(is.numeric, as.character) |> skim()
## Applying predicate on the first 100 rows
Data summary
Name mutate_if(db_sBP, is.nume…
Number of rows 48901
Number of columns 2
_______________________
Column type frequency:
character 2
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
icustay_id 0 1 8 8 0 48901 0
SysBP 0 1 4 16 0 346 0
db_dBP <-
  db_vital_target |> 
  select(icustay_id, charttime,DiasBP) |> 
  filter(!is.na(DiasBP)) |> 
  group_by(icustay_id) |> 
  summarize(DiasBP = case_when(min(charttime) ~ DiasBP)) |> 
  compute()
db_mBP <-
  db_vital_target |> 
  select(icustay_id, charttime,MeanBP) |> 
  filter(!is.na(MeanBP)) |> 
  group_by(icustay_id) |> 
  summarize(MeanBP = case_when(min(charttime) ~ MeanBP)) |> 
  compute()
db_mBP |> head(10)
## # Source:   SQL [10 x 2]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
##    icustay_id MeanBP
##         <dbl>  <dbl>
##  1     200001   77  
##  2     200003   73  
##  3     200006   97.3
##  4     200007   87.7
##  5     200009  100  
##  6     200010   92  
##  7     200011   73  
##  8     200012   84  
##  9     200014   89  
## 10     200016   93
db_RR <-
  db_vital_target |> 
  select(icustay_id, charttime,RespRate) |> 
  filter(!is.na(RespRate)) |> 
  group_by(icustay_id) |> 
  summarize(RespRate = case_when(min(charttime) ~ RespRate)) |> 
  compute()
db_tmpc <-
  db_vital_target |> 
  select(icustay_id, charttime,TempC) |> 
  filter(!is.na(TempC)) |> 
  group_by(icustay_id) |> 
  summarize(TempC = case_when(min(charttime) ~ TempC)) |> 
  compute()
db_spo2 <-
  db_vital_target |> 
  select(icustay_id, charttime,SpO2) |> 
  filter(!is.na(SpO2)) |> 
  group_by(icustay_id) |> 
  summarize(SpO2 = case_when(min(charttime) ~ SpO2)) |> 
  compute()

#バイタルデータを結合する

db_vitals_first <-
  db_HR |> 
  left_join(db_sBP, by = "icustay_id") |> 
  left_join(db_dBP, by = "icustay_id") |> 
  left_join(db_mBP, by = "icustay_id") |> 
  left_join(db_RR, by = "icustay_id") |> 
  left_join(db_tmpc, by = "icustay_id") |> 
  left_join(db_spo2, by = "icustay_id")
db_vitals_first |> head(10)
## # Source:   SQL [10 x 8]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
##    icustay_id HeartRate SysBP DiasBP MeanBP RespRate TempC  SpO2
##         <dbl>     <dbl> <dbl>  <dbl>  <dbl>    <dbl> <dbl> <dbl>
##  1     200001       115   113     65   77         22  37.3    94
##  2     200003       132   106     68   73         25  39.6    97
##  3     200006        84   118     87   97.3       20  36.2   100
##  4     200007        90   121     71   87.7       13  36.4    97
##  5     200009        92   125     84  100         14  34.7   100
##  6     200010       109   138     78   92         17  36.7    98
##  7     200011        77   137     62   73         31  36.8    98
##  8     200012       109   104     79   84         15  37.5    96
##  9     200014        71   133     67   89         10  35.7   100
## 10     200016        67   126     69   93         14  35.1   100

left_joinなのでdb_PEOにあるicustay_idのみ結合され、それ以外は削除される

db_vitals_first <- db_vitals_first |> compute()
db_PEO_vital <- 
  db_PEO |> 
  left_join(db_vitals_first, by = c("ICUSTAY_ID" = "icustay_id"),copy = TRUE) 

db_PEO_vital <- db_PEO_vital |> compute()
db_PEO_vital |> count()
## # Source:   SQL [1 x 1]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
##       n
##   <int>
## 1 10390
cols_vital <- c("HeartRate", "SysBP", "DiasBP", "MeanBP", "RespRate", "TempC", "SpO2")
db_vital <- 
  db_PEO_vital |> 
  select("HADM_ID", cols_vital)
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
##   # Was:
##   data %>% select(cols_vital)
## 
##   # Now:
##   data %>% select(all_of(cols_vital))
## 
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
db_vital <- db_vital |> compute()

#urine output

#db_pivot_urine <- tbl(con_mimic, "pivot_urine_output")
db_urine_out <- tbl(con_mimic, "urineoutput")
db_urine_firstday <- tbl(con_mimic, "urine_output_firstday")

#db_pivot_urine: icustya_id・時刻ごとの尿量が記載されている。一日の尿量に直す必要がある。 時間を秒単位から日単位に修正し、 各icustay_idの入室時刻をもとに、 そこから1日(24時間)後までのデータから尿量を計算する。 (コードは結構乱雑です)

db_urine_out |> head(10)
## # Source:   SQL [10 x 3]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
##    icustay_id  charttime value
##         <dbl>      <dbl> <dbl>
##  1     262400 6803121600   720
##  2     201728 5799682800   330
##  3     268544 4561884000   270
##  4     269568 6926698800    27
##  5     270592 4923151200     7
##  6     270592 4923162000    27
##  7     270848 6946108200    48
##  8     271616 4766220000     7
##  9     271616 4768909200     8
## 10     271616 4765392000     9
db_urine_out |> arrange(icustay_id)
## # Source:     SQL [?? x 3]
## # Database:   sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
## # Ordered by: icustay_id
##    icustay_id  charttime value
##         <dbl>      <dbl> <dbl>
##  1     200001 6687221700    50
##  2     200001 6687007320   250
##  3     200001 6687089220    60
##  4     200003 7245248400   170
##  5     200003 7245100800   230
##  6     200003 7245360000   420
##  7     200003 7245432000   370
##  8     200003 7245085500   360
##  9     200003 7245082800   275
## 10     200003 7245482400    40
## # ℹ more rows
db_urine1 <-
  db_urine_out |> filter(!is.na(icustay_id)) |> 
  mutate(charttime = charttime/(60*60*24))
db_urine1 |>arrange(icustay_id)|> head(10)
## # Source:     SQL [10 x 3]
## # Database:   sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
## # Ordered by: icustay_id
##    icustay_id charttime value
##         <dbl>     <dbl> <dbl>
##  1     200001    77398.    50
##  2     200001    77396.   250
##  3     200001    77397.    60
##  4     200003    83857.   170
##  5     200003    83855.   230
##  6     200003    83858.   420
##  7     200003    83859.   370
##  8     200003    83855.   360
##  9     200003    83855.   275
## 10     200003    83860.    40
db_urine_time0 <-
  db_urine1 |> 
  group_by(icustay_id) |> 
  summarize(time0 = min(charttime))
db_urine_time0 |> head(10)
## # Source:   SQL [10 x 2]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
##    icustay_id  time0
##         <dbl>  <dbl>
##  1     200001 77396.
##  2     200003 83855.
##  3     200006 69277.
##  4     200007 50817.
##  5     200009 80323.
##  6     200010 59386.
##  7     200011 79841.
##  8     200012 67196.
##  9     200014 49355.
## 10     200016 66080.

#1日(24時間)以内の尿量(value)のデータを抽出する

db_urine2 <-
  db_urine1 |> 
  left_join(db_urine_time0, by = "icustay_id") |> 
  mutate(elaspe = charttime - time0) |> 
  filter(elaspe <= 1) |> 
  compute()
db_urine2 |> arrange(icustay_id)|>head(10)
## # Source:     SQL [10 x 5]
## # Database:   sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
## # Ordered by: icustay_id
##    icustay_id charttime value  time0 elaspe
##         <dbl>     <dbl> <dbl>  <dbl>  <dbl>
##  1     200001    77396.   250 77396.  0    
##  2     200001    77397.    60 77396.  0.948
##  3     200003    83855.   230 83855.  0.490
##  4     200003    83855.   360 83855.  0.312
##  5     200003    83855.   275 83855.  0.281
##  6     200003    83855.   320 83855.  0.323
##  7     200003    83855.   250 83855.  0.531
##  8     200003    83856.   220 83855.  0.948
##  9     200003    83855.   230 83855.  0    
## 10     200003    83856.   130 83855.  0.698

#1日(24時間)以内の尿量の合計をみる

db_urine3 <-
  db_urine2 |> 
  group_by(icustay_id) |> 
  summarize(value = sum(value))
db_urine3 |> arrange(icustay_id)|>head(10)
## # Source:     SQL [10 x 2]
## # Database:   sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
## # Ordered by: icustay_id
##    icustay_id value
##         <dbl> <dbl>
##  1     200001   310
##  2     200003  3732
##  3     200006  2355
##  4     200007  1295
##  5     200009  2170
##  6     200010  2050
##  7     200011  3085
##  8     200012  1200
##  9     200014   844
## 10     200016  1275
db_urine3 <- db_urine3 |> compute()
db_urine3 |> head(10)
## # Source:   SQL [10 x 2]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
##    icustay_id value
##         <dbl> <dbl>
##  1     200001   310
##  2     200003  3732
##  3     200006  2355
##  4     200007  1295
##  5     200009  2170
##  6     200010  2050
##  7     200011  3085
##  8     200012  1200
##  9     200014   844
## 10     200016  1275

#db_urine_firstday: 初日のデータのみなので処理は楽。

db_urine_firstday |>head(10)
## # Source:   SQL [10 x 4]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
##    subject_id hadm_id icustay_id urineoutput
##         <dbl>   <dbl>      <dbl>       <dbl>
##  1        120  188923     273725          NA
##  2       1007  141227     267926          NA
##  3       1137  120833     281449          NA
##  4       1319  197140     276358          NA
##  5       1662  180788     207701          NA
##  6       1718  121066     238595          NA
##  7       1855  142848     298888          NA
##  8       1860  195382     283907          NA
##  9       1991  187693     285178          NA
## 10       2061  174783     256004          NA
#is.na(db_urine_firstday$urineoutput) |> sum()ではdbへアクセスできないため、使えない。
#使いたいならcollect()を使う。
db_urine_firstday |>mutate_if(is.numeric,as.character)|>skim()
## Applying predicate on the first 100 rows
Data summary
Name mutate_if(db_urine_firstd…
Number of rows 53359
Number of columns 4
_______________________
Column type frequency:
character 4
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
subject_id 0 1 3 7 0 41338 0
hadm_id 0 1 8 8 0 50763 0
icustay_id 0 1 8 8 0 53359 0
urineoutput 158 1 3 13 0 4357 0
db_urine_firstday1 <-
  db_urine_firstday |> 
  select(icustay_id, urineoutput)

is.na(db_urine_firstday1$urineoutput) |> sum()
## [1] 0
db_urine_firstday1 |>arrange(icustay_id)|> head(10)
## # Source:     SQL [10 x 2]
## # Database:   sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
## # Ordered by: icustay_id
##    icustay_id urineoutput
##         <dbl>       <dbl>
##  1     200001         250
##  2     200003        3652
##  3     200006        1955
##  4     200007        1295
##  5     200009        1570
##  6     200010        2050
##  7     200011        3085
##  8     200012        1200
##  9     200014         664
## 10     200016        1275

#3つのファイルをdb_PEOにマージして、 どのファイルのデータを優先して使うかを決めたうえでデータを固定する(論文未記載)

db_u1 <-
  db_PEO |> 
  left_join(db_urine_firstday1, by = c("ICUSTAY_ID" = "icustay_id"),copy = TRUE) |> 
  compute()

db_u2 <-
  db_u1 |> 
  left_join(db_urine3, by = c("ICUSTAY_ID" = "icustay_id"),copy = TRUE) |> 
  compute()
db_u2 |> mutate_if(is.numeric,as.character)|>skim()
## Applying predicate on the first 100 rows
Data summary
Name mutate_if(db_u2, is.numer…
Number of rows 10390
Number of columns 21
_______________________
Column type frequency:
character 21
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
SUBJECT_ID 0 1.00 3 7 0 10390 0
HADM_ID 0 1.00 8 8 0 10390 0
ICUSTAY_ID 0 1.00 8 8 0 10390 0
h2ras 0 1.00 3 3 0 2 0
death_elasp 4223 0.59 3 20 0 6107 0
mortality30 0 1.00 3 3 0 2 0
age 0 1.00 4 16 0 9355 0
ADMISSION_TYPE 0 1.00 6 9 0 3 0
INSURANCE 0 1.00 7 10 0 4 0
ETHNICITY 0 1.00 5 56 0 35 0
DIAGNOSIS 1 1.00 2 190 0 4206 0
HOSPITAL_EXPIRE_FLAG 0 1.00 3 3 0 2 0
GENDER 0 1.00 1 1 0 2 0
EXPIRE_FLAG 0 1.00 3 3 0 2 0
birthday 0 1.00 7 8 0 9448 0
admitday 0 1.00 7 16 0 10381 0
dischday 0 1.00 7 16 0 10379 0
deathday 4223 0.59 7 7 0 5762 0
hosp_deathday 8834 0.15 7 16 0 1556 0
urineoutput 480 0.95 3 11 0 2693 0
value 416 0.96 3 11 0 2941 0
db_PEO_urine <-
  db_u2 |> 
  mutate(urine_out = case_when(
    !is.na(urineoutput) ~ urineoutput,
    !is.na(value) ~ value
  )) 
db_PEO_urine|>mutate_if(is.numeric,as.character)|>skim()
## Applying predicate on the first 100 rows
Data summary
Name mutate_if(db_PEO_urine, i…
Number of rows 10390
Number of columns 22
_______________________
Column type frequency:
character 22
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
SUBJECT_ID 0 1.00 3 7 0 10390 0
HADM_ID 0 1.00 8 8 0 10390 0
ICUSTAY_ID 0 1.00 8 8 0 10390 0
h2ras 0 1.00 3 3 0 2 0
death_elasp 4223 0.59 3 20 0 6107 0
mortality30 0 1.00 3 3 0 2 0
age 0 1.00 4 16 0 9355 0
ADMISSION_TYPE 0 1.00 6 9 0 3 0
INSURANCE 0 1.00 7 10 0 4 0
ETHNICITY 0 1.00 5 56 0 35 0
DIAGNOSIS 1 1.00 2 190 0 4206 0
HOSPITAL_EXPIRE_FLAG 0 1.00 3 3 0 2 0
GENDER 0 1.00 1 1 0 2 0
EXPIRE_FLAG 0 1.00 3 3 0 2 0
birthday 0 1.00 7 8 0 9448 0
admitday 0 1.00 7 16 0 10381 0
dischday 0 1.00 7 16 0 10379 0
deathday 4223 0.59 7 7 0 5762 0
hosp_deathday 8834 0.15 7 16 0 1556 0
urineoutput 480 0.95 3 11 0 2693 0
value 416 0.96 3 11 0 2941 0
urine_out 416 0.96 3 11 0 2695 0
db_PEO_urine <- db_PEO_urine |> compute()
db_PEO_urine |> mutate_if(is.numeric,as.character)|>skim()
## Applying predicate on the first 100 rows
Data summary
Name mutate_if(db_PEO_urine, i…
Number of rows 10390
Number of columns 22
_______________________
Column type frequency:
character 22
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
SUBJECT_ID 0 1.00 3 7 0 10390 0
HADM_ID 0 1.00 8 8 0 10390 0
ICUSTAY_ID 0 1.00 8 8 0 10390 0
h2ras 0 1.00 3 3 0 2 0
death_elasp 4223 0.59 3 20 0 6107 0
mortality30 0 1.00 3 3 0 2 0
age 0 1.00 4 16 0 9355 0
ADMISSION_TYPE 0 1.00 6 9 0 3 0
INSURANCE 0 1.00 7 10 0 4 0
ETHNICITY 0 1.00 5 56 0 35 0
DIAGNOSIS 1 1.00 2 190 0 4206 0
HOSPITAL_EXPIRE_FLAG 0 1.00 3 3 0 2 0
GENDER 0 1.00 1 1 0 2 0
EXPIRE_FLAG 0 1.00 3 3 0 2 0
birthday 0 1.00 7 8 0 9448 0
admitday 0 1.00 7 16 0 10381 0
dischday 0 1.00 7 16 0 10379 0
deathday 4223 0.59 7 7 0 5762 0
hosp_deathday 8834 0.15 7 16 0 1556 0
urineoutput 480 0.95 3 11 0 2693 0
value 416 0.96 3 11 0 2941 0
urine_out 416 0.96 3 11 0 2695 0
vars <- c("urine_out")

db_PEO_urine |> select(c(vars, "h2ras"))|> collect() |> 
  CreateTableOne(vars = vars, strata = "h2ras") -> tableone
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
##   # Was:
##   data %>% select(vars)
## 
##   # Now:
##   data %>% select(all_of(vars))
## 
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
print(tableone, smd = TRUE, missing = TRUE) #|> write.csv("/Users/abekohei/Desktop/大学院授業/mimic 演習/mimic演習/test2.csv")
##                        Stratified by h2ras
##                         0                 1                 p      test SMD   
##   n                        5949              4441                             
##   urine_out (mean (SD)) 1781.19 (1381.17) 1842.99 (1309.36)  0.024       0.046
##                        Stratified by h2ras
##                         Missing
##   n                            
##   urine_out (mean (SD)) 4.0
cols_continuous <- c("urine_out")

db_PEO_urine |> 
  select(cols_continuous) |> 
  pivot_longer(cols = cols_continuous, names_to = "name", values_to = "value") |> 
  ggplot()+
  geom_histogram(aes(x = value), color = "black")+
  facet_wrap(~ name, scales = "free", ncol = 2) +
  theme_bw()+
  theme(text = element_text(size = 20))
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
##   # Was:
##   data %>% select(cols_continuous)
## 
##   # Now:
##   data %>% select(all_of(cols_continuous))
## 
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 416 rows containing non-finite values (`stat_bin()`).

db_urine <- 
  db_PEO_urine |> 
  select("HADM_ID", "urine_out")

db_urine <- db_urine |> compute()

#SOFA, SAPSiii 重症度スコアは各ICU入室ごとに作成されている。正直このまま使えばいいのは楽。

db_sofa <- tbl(con_mimic, "sofa")
db_saps <- tbl(con_mimic, "apsiii")

db_sofa_1 <- db_sofa |> select(icustay_id, SOFA)
db_saps_1 <- db_saps |> select(icustay_id, apsiii)
db_PEO_sofa <-
  db_PEO |> 
  left_join(db_sofa_1, by = c("ICUSTAY_ID" = "icustay_id"),copy = TRUE) |> 
  left_join(db_saps_1, by = c("ICUSTAY_ID" = "icustay_id"),copy = TRUE) 

db_PEO_sofa <- db_PEO_sofa |> compute()
vars <- c("SOFA", "apsiii")

db_PEO_sofa |> select(c(vars, "h2ras"))|> collect() |> 
  CreateTableOne(vars = vars, strata = "h2ras") -> tableone

print(tableone, smd = TRUE, missing = TRUE) #|> write.csv("/Users/abekohei/Desktop/大学院授業/mimic 演習/mimic演習/test3.csv")
##                     Stratified by h2ras
##                      0             1             p      test SMD    Missing
##   n                   5949          4441                                   
##   SOFA (mean (SD))    4.59 (3.04)   5.04 (3.02)  <0.001       0.147 0.0    
##   apsiii (mean (SD)) 48.66 (20.54) 46.51 (19.58) <0.001       0.107 0.0
cols_continuous <- c( "SOFA", "apsiii")

db_PEO_sofa |> 
  select(cols_continuous) |> 
  pivot_longer(cols = cols_continuous, names_to = "name", values_to = "value") |> 
  ggplot()+
  geom_histogram(aes(x = value), color = "black")+
  facet_wrap(~ name, scales = "free", ncol = 2) +
  theme_bw()+
  theme(text = element_text(size = 20))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

db_sofa <- 
  db_PEO_sofa |> 
  select("HADM_ID", "SOFA", "apsiii")

db_sofa <- db_sofa |> compute()

#CRRT(持続的腎代替療法) rrtしてる人のうち、dialysis_typeがCRRT, CVVH, CVVHD, CVVHDFの全員を対象とする。 pivot_RRTからicustay_idを取得する。

dbListTables(con_mimic)
##  [1] "admissions"                 "apsiii"                    
##  [3] "callout"                    "caregivers"                
##  [5] "chartevents"                "cptevents"                 
##  [7] "d_cpt"                      "d_icd_diagnoses"           
##  [9] "d_icd_procedures"           "d_items"                   
## [11] "d_labitems"                 "datetimeevents"            
## [13] "db_LVEF_assess"             "dbplyr_001"                
## [15] "dbplyr_002"                 "dbplyr_003"                
## [17] "dbplyr_004"                 "dbplyr_005"                
## [19] "dbplyr_006"                 "dbplyr_007"                
## [21] "dbplyr_008"                 "dbplyr_009"                
## [23] "dbplyr_010"                 "dbplyr_011"                
## [25] "dbplyr_012"                 "dbplyr_013"                
## [27] "dbplyr_014"                 "dbplyr_015"                
## [29] "dbplyr_016"                 "dbplyr_017"                
## [31] "dbplyr_018"                 "dbplyr_019"                
## [33] "dbplyr_020"                 "dbplyr_021"                
## [35] "dbplyr_022"                 "dbplyr_023"                
## [37] "dbplyr_024"                 "dbplyr_025"                
## [39] "dbplyr_026"                 "df_lab_list"               
## [41] "diagnoses_icd"              "drgcodes"                  
## [43] "elixhauser_ahrq_v37"        "elixhauser_quan"           
## [45] "heightfirstday"             "heightweight"              
## [47] "icustay_details"            "icustays"                  
## [49] "inputevents_cv"             "inputevents_mv"            
## [51] "labevents"                  "microbiologyevents"        
## [53] "noteevents"                 "outputevents"              
## [55] "patients"                   "pivoted _ rrt"             
## [57] "pivoted_height"             "pivoted_labo"              
## [59] "pivoted_vitals"             "prescriptions"             
## [61] "procedureevents_mv"         "procedures_icd"            
## [63] "services"                   "sofa"                      
## [65] "sqlite_stat1"               "sqlite_stat4"              
## [67] "transfers"                  "urine_output_firstday"     
## [69] "urineoutput"                "ventilation_classification"
## [71] "ventilation_duration"       "weightduration"            
## [73] "weightfirstday"
db_rrt <- tbl(con_mimic, "pivoted _ rrt")
db_rrt |> count()
## # Source:   SQL [1 x 1]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
##         n
##     <int>
## 1 1399045
db_rrt |> mutate_if(is.numeric, as.character) |> skim()
## Applying predicate on the first 100 rows
Data summary
Name mutate_if(db_rrt, is.nume…
Number of rows 1399045
Number of columns 5
_______________________
Column type frequency:
character 5
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
icustay_id 0 1.00 8 8 0 3622 0
charttime 0 1.00 12 12 0 253399 0
dialysis_present 0 1.00 3 3 0 1 0
dialysis_active 0 1.00 3 3 0 2 0
dialysis_type 342019 0.76 3 10 0 8 0
db_rrt |> head(10)
## # Source:   SQL [10 x 5]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
##    icustay_id  charttime dialysis_present dialysis_active dialysis_type
##         <dbl>      <dbl>            <dbl>           <dbl> <chr>        
##  1     200001 6687101640                1               0 <NA>         
##  2     200001 6687117720                1               0 <NA>         
##  3     200001 6687259560                1               0 <NA>         
##  4     200055 6602027940                1               0 <NA>         
##  5     200055 6602058300                1               0 <NA>         
##  6     200065 4740669900                1               0 <NA>         
##  7     200065 4740732000                1               0 <NA>         
##  8     200065 4740782400                1               0 <NA>         
##  9     200065 4740832800                1               0 <NA>         
## 10     200065 4740843600                1               0 <NA>

#そもそもdialysis_typeがどのくらいあるのかチェックする. #列の入力要素を確認できる

db_rrt |> distinct(dialysis_type)
## # Source:   SQL [9 x 1]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
##   dialysis_type
##   <chr>        
## 1 <NA>         
## 2 IHD          
## 3 CAVH         
## 4 CRRT         
## 5 CVVH         
## 6 SCUF         
## 7 CVVHD        
## 8 CVVHDF       
## 9 Peritoneal

#特定のdialysis_typeに限定し、該当するicustay_idを取得する

RRT_icu_id <-
  db_rrt |> filter(dialysis_type %in% c("CRRT", "CVVH", "CVVHD", "CVVHDF")) |>
  distinct(icustay_id) |> pull(icustay_id)

#db_PEOにrrtの有無を示す列を作る。

db_PEO_rrt <-
  db_PEO |> 
  mutate(rrt = ifelse(ICUSTAY_ID %in% RRT_icu_id, 1, 0))

db_PEO_rrt <- db_PEO_rrt |> compute()
vars <- c("rrt")
vars_fact <- vars
  
db_PEO_rrt |> select(c(vars, "h2ras"))|> collect() |> 
  CreateTableOne(vars = vars, strata = "h2ras", factorVars = vars_fact) -> tableone

print(tableone, smd = TRUE, missing = TRUE) #|> write.csv("/Users/abekohei/Desktop/大学院授業/mimic 演習/mimic演習/test4.csv")
##              Stratified by h2ras
##               0           1           p      test SMD    Missing
##   n           5949        4441                                  
##   rrt = 1 (%)  155 (2.6)   138 (3.1)   0.142       0.030 0.0
db_rrt <- 
  db_PEO_rrt |> 
  select("HADM_ID", "rrt")

db_rrt <- db_rrt |> compute("db_rrt")

#use of ventilator ventilation_durationのあるICUSTAY_IDを人工呼吸ありとする

dbListTables(con_mimic)
##  [1] "admissions"                 "apsiii"                    
##  [3] "callout"                    "caregivers"                
##  [5] "chartevents"                "cptevents"                 
##  [7] "d_cpt"                      "d_icd_diagnoses"           
##  [9] "d_icd_procedures"           "d_items"                   
## [11] "d_labitems"                 "datetimeevents"            
## [13] "db_LVEF_assess"             "db_rrt"                    
## [15] "dbplyr_001"                 "dbplyr_002"                
## [17] "dbplyr_003"                 "dbplyr_004"                
## [19] "dbplyr_005"                 "dbplyr_006"                
## [21] "dbplyr_007"                 "dbplyr_008"                
## [23] "dbplyr_009"                 "dbplyr_010"                
## [25] "dbplyr_011"                 "dbplyr_012"                
## [27] "dbplyr_013"                 "dbplyr_014"                
## [29] "dbplyr_015"                 "dbplyr_016"                
## [31] "dbplyr_017"                 "dbplyr_018"                
## [33] "dbplyr_019"                 "dbplyr_020"                
## [35] "dbplyr_021"                 "dbplyr_022"                
## [37] "dbplyr_023"                 "dbplyr_024"                
## [39] "dbplyr_025"                 "dbplyr_026"                
## [41] "dbplyr_027"                 "df_lab_list"               
## [43] "diagnoses_icd"              "drgcodes"                  
## [45] "elixhauser_ahrq_v37"        "elixhauser_quan"           
## [47] "heightfirstday"             "heightweight"              
## [49] "icustay_details"            "icustays"                  
## [51] "inputevents_cv"             "inputevents_mv"            
## [53] "labevents"                  "microbiologyevents"        
## [55] "noteevents"                 "outputevents"              
## [57] "patients"                   "pivoted _ rrt"             
## [59] "pivoted_height"             "pivoted_labo"              
## [61] "pivoted_vitals"             "prescriptions"             
## [63] "procedureevents_mv"         "procedures_icd"            
## [65] "services"                   "sofa"                      
## [67] "sqlite_stat1"               "sqlite_stat4"              
## [69] "transfers"                  "urine_output_firstday"     
## [71] "urineoutput"                "ventilation_classification"
## [73] "ventilation_duration"       "weightduration"            
## [75] "weightfirstday"
db_venti_d <- tbl(con_mimic, "ventilation_duration")
db_venti_d |> head(10)
## # Source:   SQL [10 x 5]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
##    icustay_id ventnum  starttime    endtime duration_hours
##         <dbl>   <dbl>      <dbl>      <dbl>          <dbl>
##  1     200241       1 5542520400 5542527600              2
##  2     200614       1 5608353600 5608360800              2
##  3     201027       1 7091373600 7091380800              2
##  4     201705       1 5497311600 5497318800              2
##  5     201706       1 7235535600 7235542800              2
##  6     202391       1 6914916000 6914923200              2
##  7     202404       1 6504073200 6504080400              2
##  8     203143       1 5711580000 5711587200              2
##  9     203480       1 6343106400 6343113600              2
## 10     203674       1 4264786800 4264794000              2
db_venti_d |> count()
## # Source:   SQL [1 x 1]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
##       n
##   <int>
## 1 38518
db_venti_d |> mutate_if(is.numeric, as.character) |> skim()
## Applying predicate on the first 100 rows
Data summary
Name mutate_if(db_venti_d, is….
Number of rows 38518
Number of columns 5
_______________________
Column type frequency:
character 5
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
icustay_id 34 1 8 8 0 26796 0
ventnum 0 1 3 4 0 62 0
starttime 0 1 12 12 0 38089 0
endtime 0 1 12 12 0 38039 0
duration_hours 0 1 3 18 0 7114 0
venti_icu_id <-
  db_venti_d |> distinct(icustay_id) |> pull(icustay_id)
db_PEO_venti <-
  db_PEO |> 
  mutate(ventilation = ifelse(ICUSTAY_ID %in% venti_icu_id, 1, 0)) |> 
  replace_na(list(ventilation = 0))    #|> replace_na(list(ventilation = 0))なくて良い
  
db_PEO_venti <- db_PEO_venti |> compute()
db_PEO_venti |> mutate_if(is.numeric, as.character) |> skim()
## Applying predicate on the first 100 rows
Data summary
Name mutate_if(db_PEO_venti, i…
Number of rows 10390
Number of columns 20
_______________________
Column type frequency:
character 20
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
SUBJECT_ID 0 1.00 3 7 0 10390 0
HADM_ID 0 1.00 8 8 0 10390 0
ICUSTAY_ID 0 1.00 8 8 0 10390 0
h2ras 0 1.00 3 3 0 2 0
death_elasp 4223 0.59 3 20 0 6107 0
mortality30 0 1.00 3 3 0 2 0
age 0 1.00 4 16 0 9355 0
ADMISSION_TYPE 0 1.00 6 9 0 3 0
INSURANCE 0 1.00 7 10 0 4 0
ETHNICITY 0 1.00 5 56 0 35 0
DIAGNOSIS 1 1.00 2 190 0 4206 0
HOSPITAL_EXPIRE_FLAG 0 1.00 3 3 0 2 0
GENDER 0 1.00 1 1 0 2 0
EXPIRE_FLAG 0 1.00 3 3 0 2 0
birthday 0 1.00 7 8 0 9448 0
admitday 0 1.00 7 16 0 10381 0
dischday 0 1.00 7 16 0 10379 0
deathday 4223 0.59 7 7 0 5762 0
hosp_deathday 8834 0.15 7 16 0 1556 0
ventilation 0 1.00 3 3 0 2 0
vars <- c("ventilation")
vars_fact <- vars
  
db_PEO_venti |> select(c(vars, "h2ras"))|> collect() |> 
  CreateTableOne(vars = vars, strata = "h2ras", factorVars = vars_fact) -> tableone

print(tableone, smd = TRUE, missing = TRUE) #|> write.csv("/Users/abekohei/Desktop/大学院授業/mimic 演習/mimic演習/test5.csv")
##                      Stratified by h2ras
##                       0            1            p      test SMD    Missing
##   n                   5949         4441                                   
##   ventilation = 1 (%) 2403 (40.4)  2837 (63.9)  <0.001       0.484 0.0
db_venti <- 
  db_PEO_venti |> 
  select("HADM_ID", "ventilation")

# dbRemoveTable(con_mimic, "db_venti")
db_venti <- db_venti |> compute("db_venti")

#details: Ethnicity もともとEthnicityはadmissionsに入っているが分類が細かすぎる。 より一般的な分類にした表を公式が作成していて、icustay_detailsから参照できる。

db_details <- tbl(con_mimic, "icustay_details")
db_details |> head(10)
## # Source:   SQL [10 x 19]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
##    subject_id hadm_id icustay_id gender   dod  admittime  dischtime los_hospital
##         <dbl>   <dbl>      <dbl> <chr>  <dbl>      <dbl>      <dbl>        <dbl>
##  1          2  163353     243653 M         NA 5318679840 5319013680            4
##  2          5  178980     214757 M         NA 4199833860 4200034500            2
##  3        110  154943     282073 M         NA 4430849220 4431438720            7
##  4        116  127203     208829 M         NA 4632845160 4633184700            4
##  5        168  141436     224501 M         NA 5347083300 5347250040            2
##  6        204  181170     232244 F         NA 4810846260 4811056800            2
##  7        258  189406     284648 F         NA 4882391940 4882693920            3
##  8        258  189406     207952 F         NA 4882391940 4882693920            3
##  9        299  195143     204434 M         NA 6516529620 6516691200            2
## 10        442  168519     257970 M         NA 6627264060 6627444780            2
## # ℹ 11 more variables: admission_age <dbl>, ethnicity <chr>,
## #   ethnicity_grouped <chr>, hospital_expire_flag <dbl>, hospstay_seq <dbl>,
## #   first_hosp_stay <int>, intime <dbl>, outtime <dbl>, los_icu <dbl>,
## #   icustay_seq <dbl>, first_icu_stay <int>
db_details |> mutate_if(is.numeric, as.character) |> skim()
## Applying predicate on the first 100 rows
Data summary
Name mutate_if(db_details, is….
Number of rows 61051
Number of columns 19
_______________________
Column type frequency:
character 19
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
subject_id 0 1.00 3 7 0 46428 0
hadm_id 0 1.00 8 8 0 57328 0
icustay_id 0 1.00 8 8 0 61051 0
gender 0 1.00 1 1 0 2 0
dod 37162 0.39 12 12 0 12884 0
admittime 0 1.00 12 12 0 57047 0
dischtime 0 1.00 12 12 0 57036 0
los_hospital 0 1.00 3 5 0 162 0
admission_age 0 1.00 3 5 0 89 0
ethnicity 0 1.00 5 56 0 41 0
ethnicity_grouped 0 1.00 5 8 0 7 0
hospital_expire_flag 0 1.00 3 3 0 2 0
hospstay_seq 0 1.00 3 4 0 41 0
first_hosp_stay 0 1.00 1 1 0 2 0
intime 0 1.00 12 12 0 61050 0
outtime 10 1.00 12 12 0 61037 0
los_icu 10 1.00 3 5 0 146 0
icustay_seq 0 1.00 3 3 0 7 0
first_icu_stay 0 1.00 1 1 0 2 0

#ethinicityの種類を確認する

db_details |> distinct(ethnicity_grouped)
## # Source:   SQL [7 x 1]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
##   ethnicity_grouped
##   <chr>            
## 1 asian            
## 2 other            
## 3 white            
## 4 black            
## 5 unknown          
## 6 hispanic         
## 7 native
db_details |> group_by(ethnicity_grouped) |> count()
## # Source:   SQL [7 x 2]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
## # Groups:   ethnicity_grouped
##   ethnicity_grouped     n
##   <chr>             <int>
## 1 asian              2024
## 2 black              5955
## 3 hispanic           2182
## 4 native               57
## 5 other              1827
## 6 unknown            6173
## 7 white             42833

#native, other, unknownはotherとした。

db_details_ethnicity <-
  db_details |> 
  distinct(hadm_id, .keep_all = T) |> 
  select(hadm_id, ethnicity_grouped) |> 
  mutate(ethnicity_5 = case_when(
    ethnicity_grouped %in% c("native", "other", "unknown") ~ "other",
    TRUE ~ ethnicity_grouped
  ))                       #TRUE ~ ethnicity_groupedはethnicity_groupedがそのまま代入。
db_details_ethnicity |> group_by(ethnicity_5) |> count()
## # Source:   SQL [5 x 2]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
## # Groups:   ethnicity_5
##   ethnicity_5     n
##   <chr>       <int>
## 1 asian        1932
## 2 black        5625
## 3 hispanic     2077
## 4 other        7565
## 5 white       40129
db_PEO_ethnicity <-
  db_PEO |> 
  left_join(db_details_ethnicity, by = c("HADM_ID" = "hadm_id"),copy = TRUE)

db_PEO_ethnicity <- db_PEO_ethnicity |> compute()

asiaの数が逆転している

var = c("ethnicity_5")
tableone<-
db_PEO_ethnicity |> select(c(var, "h2ras")) |> collect() |> 
  CreateTableOne(vars = var, strata = "h2ras", factorVars = var) 
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
##   # Was:
##   data %>% select(var)
## 
##   # Now:
##   data %>% select(all_of(var))
## 
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
print(tableone, smd=T, missing = T)
##                  Stratified by h2ras
##                   0            1            p      test SMD    Missing
##   n               5949         4441                                   
##   ethnicity_5 (%)                            0.002       0.081 0.9    
##      asian         101 ( 1.7)    80 ( 1.8)                            
##      black         499 ( 8.5)   362 ( 8.2)                            
##      hispanic      123 ( 2.1)   125 ( 2.8)                            
##      other         892 (15.2)   569 (12.9)                            
##      white        4262 (72.5)  3279 (74.3)
db_ethnicity <-
  db_PEO_ethnicity |> 
  select(HADM_ID, ethnicity_5)

db_ethnicity <- db_ethnicity |> compute()

#comorbidity Quanの定義による合併症リストはBigQueryから入手可能 しかし、論文ではこの合併症リストにないものが多い(H2ブロッカー使用に関連のありそうな合併症を選択している) また、Quanのリストにある合併症に関してもBigQueryファイルと論文とでかなり食い違う。おそらく著者らは病名のICD9コードから自分たちで抽出しているっぽい

我々もQuanのリストからは採用せず、手動でデータ抽出していく事にする

hypertension “d_icd_diagnoses”と”diagnoses_icd”を使う やることは心不全を抽出した時と同じなので、比較的単純

HT_icd_9 <- 
  df_d_icd_diag |> 
  filter(str_detect(LONG_TITLE, coll("hypertension", ignore_case = T))) |> 
  filter(!str_detect(LONG_TITLE, coll("without", ignore_case = T))) |> 
  pull(ICD9_CODE)
hadm_id_vec <- db_PEO |> pull(HADM_ID)

db_ht <- 
  db_diag |> 
  filter(HADM_ID %in% hadm_id_vec) |> 
  mutate(HT = ifelse(ICD9_CODE %in% HT_icd_9, 1, 0)) |> 
  group_by(HADM_ID) |> 
  summarize(hypertension = max(HT))
db_ht |> head(10)
## # Source:   SQL [10 x 2]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
##    HADM_ID hypertension
##      <dbl>        <dbl>
##  1  100018            0
##  2  100035            0
##  3  100036            0
##  4  100050            1
##  5  100078            1
##  6  100087            1
##  7  100095            0
##  8  100099            1
##  9  100113            0
## 10  100124            0
db_ht |> mutate_if(is.numeric, as.character) |> skim()
## Applying predicate on the first 100 rows
Data summary
Name mutate_if(db_ht, is.numer…
Number of rows 10390
Number of columns 2
_______________________
Column type frequency:
character 2
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
HADM_ID 0 1 8 8 0 10390 0
hypertension 0 1 3 3 0 2 0
db_ht |> count()
## # Source:   SQL [1 x 1]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
##       n
##   <int>
## 1 10390

#論文にある全ての合併症 以下にコード例を示す。自分でも試行錯誤してみると良い。

df_d_icd_diag|>head(10)
## # A tibble: 10 × 4
##    ROW_ID ICD9_CODE SHORT_TITLE              LONG_TITLE                         
##     <dbl> <chr>     <chr>                    <chr>                              
##  1    174 01166     TB pneumonia-oth test    Tuberculous pneumonia [any form], …
##  2    175 01170     TB pneumothorax-unspec   Tuberculous pneumothorax, unspecif…
##  3    176 01171     TB pneumothorax-no exam  Tuberculous pneumothorax, bacterio…
##  4    177 01172     TB pneumothorx-exam unkn Tuberculous pneumothorax, bacterio…
##  5    178 01173     TB pneumothorax-micro dx Tuberculous pneumothorax, tubercle…
##  6    179 01174     TB pneumothorax-cult dx  Tuberculous pneumothorax, tubercle…
##  7    180 01175     TB pneumothorax-histo dx Tuberculous pneumothorax, tubercle…
##  8    181 01176     TB pneumothorax-oth test Tuberculous pneumothorax, tubercle…
##  9    182 01180     Pulmonary TB NEC-unspec  Other specified pulmonary tubercul…
## 10    183 01181     Pulmonary TB NEC-no exam Other specified pulmonary tubercul…
#hypertension
HT_icd_9 <- 
  df_d_icd_diag |> 
  filter(str_detect(LONG_TITLE, coll("hypertension", ignore_case = T))) |> 
  filter(!str_detect(LONG_TITLE, coll("without", ignore_case = T))) |> 
  pull(ICD9_CODE)

#diabetes
DM_icd_9 <- 
  df_d_icd_diag |> 
  filter(str_detect(LONG_TITLE, coll("diabetes me", ignore_case = T))|
         str_detect(SHORT_TITLE, coll("DMI", ignore_case = F))) |> 
  filter(str_starts(ICD9_CODE, "2")) |>
  pull(ICD9_CODE)

#anemia 貧血
anemia_icd_9 <- 
  df_d_icd_diag |> 
  filter(str_detect(LONG_TITLE, coll("anemia", ignore_case = T))) |> 
  filter(str_starts(ICD9_CODE, "2")) |> 
  pull(ICD9_CODE)

#af
af_icd_9 <- c("42731")

#coronary atherosclerosis 冠動脈アテローム性動脈硬化症 
coronary_icd_9 <- 
  df_d_icd_diag |> 
  filter(str_detect(LONG_TITLE, coll("coronary atherosclerosis", ignore_case = T))) |> 
  pull(ICD9_CODE)

#venous thrombus 静脈血栓
venous_icd_9 <- 
  df_d_icd_diag |> 
  filter(str_detect(LONG_TITLE, coll("venous embo", ignore_case = T))|
         str_detect(SHORT_TITLE, coll("vein thromb", ignore_case = T))) |> 
  pull(ICD9_CODE)

#myocardial infarction 心筋梗塞
mi_icd_9 <- 
  df_d_icd_diag |> 
  filter(str_detect(LONG_TITLE, coll("myocardial infarction", ignore_case = T))) |> 
  filter(!ICD9_CODE %in% c("4110", "41181", "42979")) |> 
  pull(ICD9_CODE)

#gastritis 胃炎
gastritis_icd_9 <- 
  df_d_icd_diag |> 
  filter(str_detect(LONG_TITLE, coll("gastritis", ignore_case = T))) |> 
  pull(ICD9_CODE)

#duodenal ulcer 十二指腸潰瘍
d_ulcer_icd_9 <- 
  df_d_icd_diag |> 
  filter(str_detect(LONG_TITLE, coll("duodenal ulcer", ignore_case = T))) |> 
  pull(ICD9_CODE)

#gastric ulcer 胃潰瘍
g_ulcer_icd_9 <- 
  df_d_icd_diag |> 
  filter(str_detect(LONG_TITLE, coll("gastric ulcer", ignore_case = T))) |> 
  pull(ICD9_CODE)

#gastric bleeding 胃出血。
g_bleed_icd_9 <- 
  df_d_icd_diag |> 
  filter(str_detect(LONG_TITLE, coll("gastr", ignore_case = T))|
         str_detect(LONG_TITLE, coll("intest", ignore_case = T))|
         str_detect(LONG_TITLE, coll("duodenal", ignore_case = T))|
         str_detect(LONG_TITLE, coll("colon", ignore_case = T))) |> 
  filter(str_detect(LONG_TITLE, coll("hemorrh", ignore_case = T))) |> 
  filter(!str_detect(LONG_TITLE, coll("without mention of hemorrhage", ignore_case = T))) |>
  pull(ICD9_CODE)

#AKI 急性腎障害
aki_icd_9 <- 
  df_d_icd_diag |> 
  filter(str_detect(LONG_TITLE, coll("acute kidney", ignore_case = T))) |> 
  pull(ICD9_CODE)

#septic shock 敗血症性ショック
s_shock_icd_9 <- 
  df_d_icd_diag |> 
  filter(str_detect(LONG_TITLE, coll("septic shock", ignore_case = T))) |> 
  pull(ICD9_CODE)

#pneumonia 肺炎
pneumonia_icd_9 <- 
  df_d_icd_diag |> 
  filter(str_detect(LONG_TITLE, coll("pneumonia", ignore_case = T))) |> 
  pull(ICD9_CODE)
hadm_id_vec <- db_PEO |> pull(HADM_ID)

db_comorb_manual <- 
  db_diag |> 
  filter(HADM_ID %in% hadm_id_vec) |> 

  mutate(HT = ifelse(ICD9_CODE %in% HT_icd_9, 1, 0),
         DM = ifelse(ICD9_CODE %in% DM_icd_9, 1, 0),
         anemia = ifelse(ICD9_CODE %in% anemia_icd_9, 1, 0),
         Af = ifelse(ICD9_CODE %in% af_icd_9, 1, 0),
         coronary_atherosclerosis = ifelse(ICD9_CODE %in% coronary_icd_9, 1, 0),
         venous_thrombus = ifelse(ICD9_CODE %in% venous_icd_9, 1, 0),
         MI = ifelse(ICD9_CODE %in% mi_icd_9, 1, 0),
         gastritis = ifelse(ICD9_CODE %in% gastritis_icd_9, 1, 0),
         g_ulcer = ifelse(ICD9_CODE %in% g_ulcer_icd_9, 1, 0),
         d_ulcer = ifelse(ICD9_CODE %in% d_ulcer_icd_9, 1, 0),
         g_bleeding = ifelse(ICD9_CODE %in% g_bleed_icd_9, 1, 0),
         aki = ifelse(ICD9_CODE %in% aki_icd_9, 1, 0),
         s_shock = ifelse(ICD9_CODE %in% s_shock_icd_9, 1, 0),
         pneumonia = ifelse(ICD9_CODE %in% pneumonia_icd_9, 1, 0)) |> 
  
  group_by(HADM_ID) |> 
  summarize(hypertension = max(HT),
            DM = max(DM),
            anemia = max(anemia),
            Af = max(Af),
            coronary_atherosclerosis = max(coronary_atherosclerosis),
            venous_thrombus = max(venous_thrombus),
            MI = max(MI),
            gastritis = max(gastritis),
            gastric_ulcer = max(g_ulcer),
            duodenal_ulcer = max(d_ulcer),
            gastric_bleeding = max(g_bleeding),
            aki = max(aki),
            septic_shock = max(s_shock),
            pneumonia = max(pneumonia))
db_comorb_manual |> count()
## # Source:   SQL [1 x 1]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
##       n
##   <int>
## 1 10390
db_PEO_comorb <-
  db_PEO |> 
    left_join(db_comorb_manual, by = "HADM_ID",copy = TRUE)

db_PEO_comorb <- db_PEO_comorb |> compute()
cols_comorb <- c(
  "hypertension", "DM", "anemia", "Af", "coronary_atherosclerosis", "venous_thrombus", "MI",
  "gastritis", "gastric_ulcer", "duodenal_ulcer", "gastric_bleeding", "aki", "septic_shock",
  "pneumonia")

#Pneumonia(肺炎)がH2RAS使用群と非使用群で論文と逆になっている。

vars <- cols_comorb
factors <- cols_comorb

tableone <-db_PEO_comorb |> select(c(vars, "h2ras"))|> collect() |> 
  CreateTableOne(vars = vars, strata = "h2ras", factorVars = factors) 

print(tableone, smd = TRUE, missing = TRUE) #|> write.csv("/Users/abekohei/Desktop/大学院授業/mimic 演習/mimic演習/test6.csv")
##                                   Stratified by h2ras
##                                    0            1            p      test SMD   
##   n                                5949         4441                           
##   hypertension = 1 (%)             2356 (39.6)  2027 (45.6)  <0.001       0.122
##   DM = 1 (%)                       2132 (35.8)  1643 (37.0)   0.233       0.024
##   anemia = 1 (%)                   1996 (33.6)  1416 (31.9)   0.077       0.036
##   Af = 1 (%)                       2537 (42.6)  2034 (45.8)   0.001       0.064
##   coronary_atherosclerosis = 1 (%) 2268 (38.1)  2047 (46.1)  <0.001       0.162
##   venous_thrombus = 1 (%)           112 ( 1.9)    76 ( 1.7)   0.566       0.013
##   MI = 1 (%)                       1002 (16.8)   716 (16.1)   0.341       0.019
##   gastritis = 1 (%)                 121 ( 2.0)    61 ( 1.4)   0.014       0.051
##   gastric_ulcer = 1 (%)              66 ( 1.1)    20 ( 0.5)  <0.001       0.075
##   duodenal_ulcer = 1 (%)             71 ( 1.2)    25 ( 0.6)   0.001       0.068
##   gastric_bleeding = 1 (%)          516 ( 8.7)   184 ( 4.1)  <0.001       0.186
##   aki = 1 (%)                      2262 (38.0)  1410 (31.7)  <0.001       0.132
##   septic_shock = 1 (%)              415 ( 7.0)   260 ( 5.9)   0.024       0.046
##   pneumonia = 1 (%)                1343 (22.6)   926 (20.9)   0.038       0.042
##                                   Stratified by h2ras
##                                    Missing
##   n                                       
##   hypertension = 1 (%)             0.0    
##   DM = 1 (%)                       0.0    
##   anemia = 1 (%)                   0.0    
##   Af = 1 (%)                       0.0    
##   coronary_atherosclerosis = 1 (%) 0.0    
##   venous_thrombus = 1 (%)          0.0    
##   MI = 1 (%)                       0.0    
##   gastritis = 1 (%)                0.0    
##   gastric_ulcer = 1 (%)            0.0    
##   duodenal_ulcer = 1 (%)           0.0    
##   gastric_bleeding = 1 (%)         0.0    
##   aki = 1 (%)                      0.0    
##   septic_shock = 1 (%)             0.0    
##   pneumonia = 1 (%)                0.0
db_comorb <- 
  db_PEO_comorb |> 
  select("HADM_ID", all_of(cols_comorb))

db_comorb <- db_comorb |> compute()

#LVEF noteeventsを使う 超音波検査の結果を記述した文章からEFの値を抽出するという難作業。 文章データなので大きなファイルだがstringrを使いたいので、ある程度絞った後にcollectする メモリに余裕のある人向けのタスクかもしれない(4GBだとおそらく不可、16GBなら可能、8GBは手元にないので検証できていません→是非やってみて結果を教えてください。)

db_note <- tbl(con_mimic, "noteevents")
db_note |> colnames()
##  [1] "ROW_ID"      "SUBJECT_ID"  "HADM_ID"     "CHARTDATE"   "CHARTTIME"  
##  [6] "STORETIME"   "CATEGORY"    "DESCRIPTION" "CGID"        "ISERROR"    
## [11] "TEXT"
db_note |> count()
## # Source:   SQL [1 x 1]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
##         n
##     <int>
## 1 2083180
db_note |> head(10)
## # Source:   SQL [10 x 11]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
##    ROW_ID SUBJECT_ID HADM_ID CHARTDATE CHARTTIME STORETIME CATEGORY  DESCRIPTION
##     <dbl>      <dbl>   <dbl>     <dbl>     <int>     <int> <chr>     <chr>      
##  1    174      22532  167853     66324        NA        NA Discharg… Report     
##  2    175      13702  107527     54220        NA        NA Discharg… Report     
##  3    176      13702  167118     54565        NA        NA Discharg… Report     
##  4    177      13702  196489     56477        NA        NA Discharg… Report     
##  5    178      26880  135453     70210        NA        NA Discharg… Report     
##  6    179      53181  170490     73846        NA        NA Discharg… Report     
##  7    180      20646  134727     52208        NA        NA Discharg… Report     
##  8    181      42130  114236     65803        NA        NA Discharg… Report     
##  9    182      56174  163469     54279        NA        NA Discharg… Report     
## 10    183      56174  189681     54398        NA        NA Discharg… Report     
## # ℹ 3 more variables: CGID <int>, ISERROR <int>, TEXT <chr>

#文章カテゴリーを見てみる 15カテゴリーの文章情報。ここからLVEFが記載されていそうな文章を選ぶ。 通常はEchoと各種サマリー系だが、面倒であれば全部選ぶのでも問題ない。 TEXTの列をwrite.CSV()でcsvファイルに書き出して、開くと見やすい。

db_note |> distinct(CATEGORY)
## # Source:   SQL [?? x 1]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
##    CATEGORY         
##    <chr>            
##  1 Discharge summary
##  2 Echo             
##  3 ECG              
##  4 Nursing          
##  5 Physician        
##  6 Rehab Services   
##  7 Case Management  
##  8 Respiratory      
##  9 Nutrition        
## 10 General          
## # ℹ more rows
ptids <- db_PEO |> pull(HADM_ID)
db_PEO |> distinct(HADM_ID)|>count()
## # Source:   SQL [1 x 1]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
##       n
##   <int>
## 1 10390
db_note_echo <- 
  db_note |> 
  filter(HADM_ID %in% ptids) |> 
  # filter(CATEGORY %in% c("Echo")) |>
  compute()

db_note_echo |> distinct(HADM_ID) |> count()
## # Source:   SQL [1 x 1]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
##       n
##   <int>
## 1 10347

10390人中、検査・医療資源治療が記載されている人は10347人であった。

df_note_echo <- collect(db_note_echo)
pryr::object_size(df_note_echo)
## 803.97 MB
normal_text <-
  c("systolic function.{0,10}normal",    #systolic function とnormalの間には、最大で10文字までの任意の文字列(スペース、記号、数字など)が存在できる。
    "Normal regional LV systolic function", 
    "left ventricular systolic function appears grossly preserved",
    "LV systolic function appears grossly preserved",
    "left ventricular systolic function is.{0,12}normal",
    "LV systolic function is.{0,12}normal"
    )

mild_text <-
  c("left ventricular systolic function is mildly depressed",
    "LV systolic function is mildly depressed",
    "left ventricular systolic fxn is slightly depressed",
    "LV systolic fxn is slightly depressed",
    "mild regional left ventricular systolic dysfunction",
    "mild regional LV systolic dysfunction",
    "mildly depressed LVEF",
    "mild global left ventricular hypokinesis",
    "mild global LV hypokinesis",
    "mild left ventricular hypokinesis",
    "mild LV hypokinesis",
    "left ventricular systolic function is.{0,5}slightly worse",
    "LV systolic function is.{0,5}slightly worse")   

moderate_text <- 
  c("left ventricular systolic function is moderately depressed",
    "LV systolic function is moderately depressed",
    "left ventricular systolic fxn is moderately depressed",
    "LV systolic fxn is moderately depressed",
    "moderate regional left ventricular systolic dysfunction",
    "moderate regional LV systolic dysfunction",
    "moderately depressed LVEF",
    "moderate global left ventricular hypokinesis",
    "moderate global LV hypokinesis",
    "moderate left ventricular hypokinesis",
    "moderate LV hypokinesis",
    "left ventricular systolic function is.{0,5}moderately worse",
    "LV systolic function is.{0,5}moderately worse"
    )

severe_text <-
  c("left ventricular systolic function is severely depressed",
    "LV systolic function is severely depressed",
    "left ventricular systolic fxn is severely depressed",
    "LV systolic fxn is severely depressed",
    "severe regional left ventricular systolic dysfunction",
    "severe regional LV systolic dysfunction",
    "severely depressed LVEF",
    "severe global left ventricular hypokinesis",
    "severe global LV hypokinesis",
    "severe left ventricular hypokinesis",
    "severe LV hypokinesis",
    "left ventricular systolic function is.{0,5}severely worse",
    "LV systolic function is.{0,5}severely worse"
    )

difficult_text <-
  c("LV systolic function is difficult to assess",
    "left ventricular systolic function is difficult to assess",
    "LV systolic function cannot be reliably assessed",
    "left ventricular systolic function cannot be reliably assessed",
    "LV not well seen",
    "LVEF cannot be estimated")


abnormal_text <-
  c("left ventricular systolic function is now depressed",
    "left ventricular systolic function is depressed",
    "LV systolic function appears depressed",
    "depressed left ventricular.{0,10}function",
    "depressed LV.{0,10}function"
  )
df_note_echo_1 <-
  df_note_echo |> 
  select(HADM_ID, TEXT, CHARTDATE, CATEGORY) |> 
  #1
  mutate(TEXT = str_replace_all(TEXT, pattern = "\n", replacement = " ")) |>   #\nは改行を示す正規表現。改行をスペースに置換すれば文章の続きとして認識される。

  mutate(
    #2
    LVEF_ex1 = str_extract(TEXT, "EF.{0,8}[0-9]{2}"),
    LVEF_ex2 = str_extract(TEXT, "ejection fraction.{0,8}[0-9]{2}"),
    
    #3
    EF_assess = case_when(
      str_detect(TEXT, regex(str_c(severe_text, collapse = "|"),ignore_case = T)) ~ "severe",
      str_detect(TEXT, regex(str_c(moderate_text, collapse = "|"),ignore_case = T)) ~ "moderate",
      str_detect(TEXT, regex(str_c(mild_text, collapse = "|"),ignore_case = T)) ~ "mild",
      str_detect(TEXT, regex(str_c(normal_text, collapse = "|"),ignore_case = T)) ~ "normal",
      # str_detect(TEXT, regex(str_c(difficult_text, collapse = "|"),ignore_case = T)) ~ "difficult",
      str_detect(TEXT,regex(str_c(abnormal_text, collapse = "|"),ignore_case = T)) ~ "mild"
      
    )) |> select(!TEXT)

df_note_echo_1
## # A tibble: 377,474 × 6
##    HADM_ID CHARTDATE CATEGORY          LVEF_ex1 LVEF_ex2 EF_assess
##      <dbl>     <dbl> <chr>             <chr>    <chr>    <chr>    
##  1  135453     70210 Discharge summary <NA>     <NA>     <NA>     
##  2  134727     52208 Discharge summary <NA>     <NA>     <NA>     
##  3  121936     56659 Discharge summary EF 25-30 <NA>     moderate 
##  4  139574     81227 Discharge summary EF >55   <NA>     <NA>     
##  5  121504     83685 Discharge summary <NA>     <NA>     <NA>     
##  6  120644     82020 Discharge summary EF 35    <NA>     moderate 
##  7  116471     57553 Discharge summary <NA>     <NA>     <NA>     
##  8  146533     79757 Discharge summary <NA>     <NA>     <NA>     
##  9  106238     79054 Discharge summary EF of 30 <NA>     <NA>     
## 10  114242     77490 Discharge summary <NA>     <NA>     <NA>     
## # ℹ 377,464 more rows

#LVEFの数値部分のみ抽出する 数値から分類カテゴリー列を作成する。 分類カテゴリーは文章表現を優先し、数値は文章がなかった場合とした。このあたりの優先も論文未記載。

df_note_echo_assess <-
  df_note_echo_1 |> 
  mutate(LVEF_num = case_when(
    !is.na(LVEF_ex1) ~ str_extract(LVEF_ex1, pattern = "[0-9]{2}"),
    !is.na(LVEF_ex2) ~ str_extract(LVEF_ex2, pattern = "[0-9]{2}"))
  ) |> 
  mutate(LVEF_num_assess = case_when(
    LVEF_num >= 55 ~ "normal",
    LVEF_num >= 45 ~ "mild",
    LVEF_num >= 30 ~ "moderate",
    LVEF_num >= 0 ~ "severe"
    
  )) |> 
  mutate(LVEF_ctg = case_when(
    !is.na(EF_assess) ~ EF_assess,
    !is.na(LVEF_num_assess) ~ LVEF_num_assess
  )) |> 
  arrange(CHARTDATE) |> 
  select(HADM_ID, CHARTDATE, LVEF_ctg) |> 
  filter(!is.na(LVEF_ctg)) |> 
  distinct(HADM_ID, .keep_all = T)

df_note_echo_assess
## # A tibble: 8,860 × 3
##    HADM_ID CHARTDATE LVEF_ctg
##      <dbl>     <dbl> <chr>   
##  1  125380     47665 moderate
##  2  137275     47668 normal  
##  3  176771     47678 severe  
##  4  192798     47684 normal  
##  5  116534     47699 normal  
##  6  185717     47704 mild    
##  7  175778     47706 normal  
##  8  175719     47709 moderate
##  9  134899     47712 severe  
## 10  184844     47712 moderate
## # ℹ 8,850 more rows

#データフレームのままだとmimic.dbファイルに結合できないので、.dbに戻す dbRemoveTable()は初回は不要。2回目以降は既に作成されているdb_LVEF_assessを削除する必要あり。

dbRemoveTable(con_mimic, "db_LVEF_assess")
dbWriteTable(con_mimic, "db_LVEF_assess", df_note_echo_assess)
db_LVEF_assess <- tbl(con_mimic, "db_LVEF_assess")
db_PEO_LVEF <-
  db_PEO |> 
    left_join(db_LVEF_assess, by = "HADM_ID",copy = TRUE)

db_PEO_LVEF <- db_PEO_LVEF |> compute()

#欠測は15%未満におさまった。

cols_ctg <- c("LVEF_ctg")

db_PEO_LVEF |> select(cols_ctg, "h2ras")|> collect() |> 
  mutate(LVEF_ctg = factor(LVEF_ctg, 
                           levels = c("severe", "moderate", "mild", "normal"))) |> 
  CreateTableOne(vars = cols_ctg, strata = "h2ras", factorVars = cols_ctg) -> tableone
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
##   # Was:
##   data %>% select(cols_ctg)
## 
##   # Now:
##   data %>% select(all_of(cols_ctg))
## 
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
print(tableone, smd = TRUE, missing = TRUE) #|> write.csv("test.csv")
##               Stratified by h2ras
##                0            1            p      test SMD    Missing
##   n            5949         4441                                   
##   LVEF_ctg (%)                           <0.001       0.104 14.7   
##      severe    1294 (25.9)   845 (21.9)                            
##      moderate   922 (18.4)   689 (17.9)                            
##      mild       741 (14.8)   645 (16.7)                            
##      normal    2044 (40.9)  1680 (43.5)
db_LVEF <- 
  db_PEO_LVEF |> 
  select("HADM_ID", cols_ctg)

db_LVEF <- db_LVEF |> compute()
db_labitems <- tbl(con_mimic, "d_labitems")
db_labitems |> colnames()
## [1] "ROW_ID"     "ITEMID"     "LABEL"      "FLUID"      "CATEGORY"  
## [6] "LOINC_CODE"
df_labitems <- collect(db_labitems)
# blood
df_lab_items <-
  df_labitems |> 
  filter(str_detect(FLUID, coll("blood", ignore_case = T))) |> 
  filter(str_detect(LABEL, coll("white",ignore_case = T )) |
         str_detect(LABEL, coll("red blood",ignore_case = T )) |
         str_detect(LABEL, coll("platelet count",ignore_case = T )) |
         str_detect(LABEL, coll("gluco",ignore_case = T )) |
         str_detect(LABEL, coll("urea",ignore_case = T )) |
         str_detect(LABEL, coll("creatinine",ignore_case = T )) |
         str_detect(LABEL, coll("sodium",ignore_case = T )) |
         str_detect(LABEL, coll("calcium",ignore_case = T )) |
         str_detect(LABEL, coll("magne",ignore_case = T )) 
         ) |> 
  filter(CATEGORY != "Blood Gas") |>
  filter(!ITEMID %in% c(51278, 51529)) |>
  select(ITEMID, LABEL)

df_lab_items
## # A tibble: 9 × 2
##   ITEMID LABEL            
##    <dbl> <chr>            
## 1  50893 Calcium, Total   
## 2  50912 Creatinine       
## 3  50931 Glucose          
## 4  50960 Magnesium        
## 5  50983 Sodium           
## 6  51006 Urea Nitrogen    
## 7  51265 Platelet Count   
## 8  51279 Red Blood Cells  
## 9  51301 White Blood Cells
vec_labitems <- 
  df_lab_items |> 
  select(ITEMID) |> 
  pull()
db_labeve <- tbl(con_mimic, "labevents")
db_labeve |> colnames()
## [1] "ROW_ID"     "SUBJECT_ID" "HADM_ID"    "ITEMID"     "CHARTTIME" 
## [6] "VALUE"      "VALUENUM"   "VALUEUOM"   "FLAG"

#2.検査結果表(膨大)を上記9つに限定する 3.さらにHADM_IDを研究対象の入院に限定する/ 入院中複数回検査しているが、どのタイミングを採用するのか論文に書いてない。 最初のタイミングを採用した。

db_labeve |> head(20)
## # Source:   SQL [?? x 9]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
##    ROW_ID SUBJECT_ID HADM_ID ITEMID  CHARTTIME VALUE     VALUENUM VALUEUOM FLAG 
##     <dbl>      <dbl>   <dbl>  <dbl>      <dbl> <chr>        <dbl> <chr>    <chr>
##  1    281          3      NA  50820 4158576420 7.39          7.39 units    <NA> 
##  2    282          3      NA  50800 4158584220 ART          NA    <NA>     <NA> 
##  3    283          3      NA  50802 4158584220 -1           -1    mEq/L    <NA> 
##  4    284          3      NA  50804 4158584220 22           22    mEq/L    <NA> 
##  5    285          3      NA  50808 4158584220 0.93          0.93 mmol/L   abno…
##  6    286          3      NA  50812 4158584220 NOT INTU…    NA    <NA>     <NA> 
##  7    287          3      NA  50813 4158584220 1.8           1.8  mmol/L   <NA> 
##  8    288          3      NA  50818 4158584220 33           33    mm Hg    <NA> 
##  9    289          3      NA  50820 4158584220 7.42          7.42 units    <NA> 
## 10    290          3      NA  50821 4158584220 80           80    mm Hg    <NA> 
## # ℹ more rows
vec_hadms <- 
  db_PEO |> 
  select(HADM_ID) |> 
  pull()

db_labo_res <- 
  db_labeve |> 
  filter(ITEMID %in% vec_labitems) |> 
  filter(HADM_ID %in% vec_hadms) |> 
  group_by(HADM_ID) |> 
  distinct(ITEMID, .keep_all = T) |> 
  ungroup() |>
  compute()

db_labo_res |> count()
## # Source:   SQL [1 x 1]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
##       n
##   <int>
## 1 92348

#4.wide formatにして、データをJoinする

df_lab_list <- 
  df_lab_items |> 
  mutate(lab = str_sub(LABEL, start = 1, end = 3)) |> 
  select(ITEMID, lab)

dbRemoveTable(con_mimic, "df_lab_list")
dbWriteTable(con_mimic, "df_lab_list", df_lab_list)
db_lab_items_list <- tbl(con_mimic, "df_lab_list")
db_lab_items_list
## # Source:   table<df_lab_list> [9 x 2]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
##   ITEMID lab  
##    <dbl> <chr>
## 1  50893 Cal  
## 2  50912 Cre  
## 3  50931 Glu  
## 4  50960 Mag  
## 5  50983 Sod  
## 6  51006 Ure  
## 7  51265 Pla  
## 8  51279 Red  
## 9  51301 Whi
db_labo_res_names <- 
  db_labo_res |> 
  left_join(db_lab_items_list, by = "ITEMID") |> 
  mutate(new_lab = paste(lab, VALUEUOM, sep = "_")) |> 
  select(!c(lab, VALUEUOM, VALUE, ROW_ID, FLAG, ITEMID, CHARTTIME)) |> 
  pivot_wider(names_from = new_lab, values_from = VALUENUM) |> 
  compute()

#<db_PEO_laboで完成> 割り算記号の列名は修正しておく

db_PEO_labo <-
  db_PEO |> 
  left_join(db_labo_res_names, by = c("HADM_ID", "SUBJECT_ID"),copy = TRUE) |> 
  rename(Calcium = "Cal_mg/dL",
         Cre = "Cre_mg/dL",
         Glucose = "Glu_mg/dL",
         Mg = "Mag_mg/dL",
         Sodium = "Sod_mEq/L",
         BUN = "Ure_mg/dL",
         Plate = "Pla_K/uL",
         RBC = "Red_m/uL",
         WBC = "Whi_K/uL"
         ) |> 
  compute()

db_PEO_labo |> head(10)
## # Source:   SQL [10 x 28]
## # Database: sqlite 3.41.2 [/Volumes/drive/RSQLite/temp/mimic.db]
##    SUBJECT_ID HADM_ID ICUSTAY_ID h2ras death_elasp mortality30   age
##         <dbl>   <dbl>      <dbl> <dbl>       <dbl>       <dbl> <dbl>
##  1          3  145834     211552     0      236.             0  76.5
##  2          9  150750     220597     0        4.45           1  41.8
##  3         21  109451     217847     0      149.             0  87.4
##  4         26  197661     244882     0      659.             0  72.0
##  5         30  104557     225176     0       NA              0  95  
##  6         34  115799     263086     0     2021.             0  95  
##  7         37  188670     213503     0      503.             0  68.9
##  8         38  185910     248910     1       NA              0  75.9
##  9         42  119203     210828     0     1999.             0  61.2
## 10         49  190539     249195     1       NA              0  80.9
## # ℹ 21 more variables: ADMISSION_TYPE <chr>, INSURANCE <chr>, ETHNICITY <chr>,
## #   DIAGNOSIS <chr>, HOSPITAL_EXPIRE_FLAG <dbl>, GENDER <chr>,
## #   EXPIRE_FLAG <dbl>, birthday <dbl>, admitday <dbl>, dischday <dbl>,
## #   deathday <dbl>, hosp_deathday <dbl>, Calcium <dbl>, Cre <dbl>,
## #   Glucose <dbl>, Mg <dbl>, Sodium <dbl>, BUN <dbl>, Plate <dbl>, RBC <dbl>,
## #   WBC <dbl>

#自作関数 表作成とヒストグラム作成の自作関数を作成する

f_table = function(cols_cont, cols_fact, db){
  tableone <-
    db |> 
    select(cols_cont, cols_fact, "h2ras")|> 
    collect() |> 
    CreateTableOne(vars = c(cols_cont, cols_fact), 
                 strata = "h2ras", 
                 factorVars = cols_fact) 
  print(tableone, smd = TRUE, missing = TRUE, test = FALSE, explain = FALSE)
}

#ヒストグラム

f_hist = function(cols_cont, db){
  db |> 
  select(cols_cont) |> 
  pivot_longer(cols = cols_cont, names_to = "name", values_to = "value") |>
  ggplot()+
  geom_histogram(aes(x = value), color = "black")+
  facet_wrap(~ name, scales = "free", ncol = 4) +
  theme_bw()+
  theme(text = element_text(size = 16))
}
cols_labo <- c("Calcium", "Cre", "Glucose", "Mg", 
          "Sodium", "BUN", "Plate", "RBC", "WBC")

db_PEO_labo |> 
  f_table(cols_cont = cols_labo, cols_fact = c())
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
##   # Was:
##   data %>% select(cols_cont)
## 
##   # Now:
##   data %>% select(all_of(cols_cont))
## 
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
##   # Was:
##   data %>% select(cols_fact)
## 
##   # Now:
##   data %>% select(all_of(cols_fact))
## 
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
##          Stratified by h2ras
##           0               1               SMD    Missing
##   n         5949            4441                        
##   Calcium   8.50 (0.82)     8.54 (0.79)    0.053 2.9    
##   Cre       1.75 (1.62)     1.56 (1.44)    0.125 1.0    
##   Glucose 144.74 (77.12)  139.09 (65.81)   0.079 1.0    
##   Mg        2.04 (0.59)     2.08 (0.39)    0.090 1.4    
##   Sodium  138.57 (5.05)   138.54 (4.60)    0.005 1.0    
##   BUN      36.27 (25.29)   30.92 (21.24)   0.229 1.0    
##   Plate   237.54 (119.76) 228.76 (118.48)  0.074 1.0    
##   RBC       3.65 (0.70)     3.62 (0.67)    0.050 1.0    
##   WBC      11.72 (10.45)   11.68 (10.45)   0.004 1.0

#最後のmerge用ファイル

db_labo <- 
  db_PEO_labo |> 
  select("HADM_ID", cols_labo)
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
##   # Was:
##   data %>% select(cols_labo)
## 
##   # Now:
##   data %>% select(all_of(cols_labo))
## 
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
db_labo <- db_labo |> compute()

#Medications - RAAs xxxpril xxxartan aliskiren

RAA_vec <- c(".+pril", ".+artan", "alis.+")
RAA_or <- str_c(RAA_vec, collapse = "|")

RAA_names <-
  df_drugs |> 
  filter(str_detect(DRUG, regex(RAA_or, ignore_case = T)) |
         str_detect(DRUG_NAME_POE, regex(RAA_or, ignore_case = T)) |
         str_detect(DRUG_NAME_GENERIC, regex(RAA_or, ignore_case = T))) |>
  filter(!DRUG %in% c("Lidocaine-Prilocaine")) |> 
  select(DRUG) |> distinct() |> pull()

#“Lidocaine-Prilocaine”はノイズなので除去

使用薬剤歴全リストから、RAA阻害薬を使ったレコードのみに限定し、その患者IDを得る

# H2RAsの時作ったdb_presc_drug
RAA_user <- 
  db_presc_drug |> 
  filter(DRUG %in% RAA_names) |> 
  select(SUBJECT_ID) |> 
  pull()

db_patients_RAA <-
  db_PEO |> 
  mutate(RAA = ifelse(SUBJECT_ID %in% RAA_user, 1, 0))
f_table(cols_cont = c(), 
        cols_fact = c("RAA"), 
        db = db_patients_RAA)
##          Stratified by h2ras
##           0            1            SMD    Missing
##   n       5949         4441                       
##   RAA = 1 2880 (48.4)  2722 (61.3)   0.261 0.0

for merge

db_RAA <- db_patients_RAA |> select(HADM_ID, RAA)

#これ以降、同じようなコードの繰り返しになる。 本来はコードを関数化するべきだが、内容の確認・学習の意味も込めて関数化は避けた。興味のある方は関数化してコードをスッキリさせると良い。

  • Diuritics furosemide (Lasix) torsemide (Demadex) bumetanide

hydrochlorothiazide chlorthalidone metolazone indapamide

amiloride triamterene (Maxzide) spironolactone (Aldactone) eplerenone (Inspra)

diurit_vec <-
  c("furosemide", "lasix", "torsemide", "Demadex", "bumetanide", "hydrochlorothiazide", 
    "chlorthalidone", "metolazone", "indapamide", "amiloride", "triamterene", 
    "spironolactone", "Aldactone", "eplerenone", "Inspra" )


diurit_or <- str_c(diurit_vec, collapse = "|")
diurit_or
## [1] "furosemide|lasix|torsemide|Demadex|bumetanide|hydrochlorothiazide|chlorthalidone|metolazone|indapamide|amiloride|triamterene|spironolactone|Aldactone|eplerenone|Inspra"

[1] “furosemide|lasix|torsemide|Demadex|bumetanide|hydrochlorothiazide|chlorthalidone|metolazone|indapamide|amiloride|triamterene|spironolactone|Aldactone|eplerenone|Inspra”

diuritic_names <-
  df_drugs |> 
  filter(str_detect(DRUG, regex(diurit_or, ignore_case = T)) |
         str_detect(DRUG_NAME_POE, regex(diurit_or, ignore_case = T)) |
         str_detect(DRUG_NAME_GENERIC, regex(diurit_or, ignore_case = T))) |> 
  select(DRUG) |> 
  filter(!DRUG %in% c("Captopril","Enalaprilat", "Heparin", "Metoprolol" )) |> 
  distinct() |> pull()

#使用薬剤歴全リストから、薬を使ったレコードのみに限定し、その患者IDを得る

diuritic_user <- 
  db_presc_drug |> 
  filter(DRUG %in% diuritic_names) |> 
  select(SUBJECT_ID) |> 
  pull()

db_patients_diuritic <-
  db_PEO |> 
  mutate(Diuritic = ifelse(SUBJECT_ID %in% diuritic_user, 1, 0))

#表作成

f_table(cols_cont = c(), 
        cols_fact = c("Diuritic"), 
        db = db_patients_diuritic)
##               Stratified by h2ras
##                0            1            SMD    Missing
##   n            5949         4441                       
##   Diuritic = 1 4311 (72.5)  4083 (91.9)   0.526 0.0

#for merge

db_diuritic <- db_patients_diuritic |> select(HADM_ID, Diuritic)

inotropic agents

Epinephrine (Adrenalin® or Auvi-Q®) Norepinephrine (Levophed® or Levarterenol®) Dopamine Dobutamine Ephedrine Phenylephrine

Levosimendan Digoxin (Cardoxin® or Lanoxin®) Milrinone Amrinone Enoximone

inotrop_vec <-
  c("Epinephrine", "Adrenalin", "Norepinephrine", "Levophed", "Levarterenol",
    "Dopamine", "Dobutamine"
    )

inotrop_or <- str_c(inotrop_vec, collapse = "|")
inotrop_or
## [1] "Epinephrine|Adrenalin|Norepinephrine|Levophed|Levarterenol|Dopamine|Dobutamine"
inotrop_names <-
  df_drugs |> 
  filter(str_detect(DRUG, regex(inotrop_or, ignore_case = T)) |
         str_detect(DRUG_NAME_POE, regex(inotrop_or, ignore_case = T)) |
         str_detect(DRUG_NAME_GENERIC, regex(inotrop_or, ignore_case = T))) |> 
  select(DRUG) |> 
  filter(!str_detect(DRUG, regex("bupivacaine|lidocaine", ignore_case = T))) |> 
  filter(!DRUG %in% c("1", "Epinephrine Topical Soln", "Epinephrine Inhalation",
                      "Racepinephrine", "racemic epinephrine")) |>
  distinct() |> pull()

inotrop_names
##  [1] "Norepinephrine"              "Epinephrine"                
##  [3] "Epinephrine 1:1000"          "DopAmine"                   
##  [5] "Dopamine HCl"                "DOPamine"                   
##  [7] "Norepinephrine Bitartrate"   "DOBUTamine"                 
##  [9] "Dobutamine"                  "Dobutamine HCl"             
## [11] "NORepinephrine"              "Dopamine Hcl"               
## [13] "Epinephrine HCl"             "Epinephrine-Sodium Chloride"
## [15] "EPINEPHrine"                 "Epinephrin"                 
## [17] "Epin"                        "Dopamine"                   
## [19] "EpiPen"                      "Dobutamine Hcl"             
## [21] "Epinephri"                   "EPINEPHRINE"                
## [23] "epinephrine"                 "Epine"                      
## [25] "Epineph"                     "epi"                        
## [27] "EPINEPHrine Auto Injector"   "Epi"                        
## [29] "Epinephrine Auto Injector"   "Epinephrine Base"           
## [31] "Epinephrine Kit"

#使用薬剤歴全リストから、薬を使ったレコードのみに限定し、その患者IDを得る

inotrop_user <- 
  db_presc_drug |> 
  filter(DRUG %in% inotrop_names) |> 
  select(SUBJECT_ID) |> 
  pull()

db_patients_inotrop <-
  db_PEO |> 
  mutate(inotrop = ifelse(SUBJECT_ID %in% inotrop_user, 1, 0))

#データ確認:

f_table(cols_cont = c(), 
        cols_fact = c("inotrop"), 
        db = db_patients_inotrop)
##              Stratified by h2ras
##               0            1            SMD    Missing
##   n           5949         4441                       
##   inotrop = 1 1780 (29.9)  2266 (51.0)   0.440 0.0

#for merge

db_inotrop <- db_patients_inotrop |> select(HADM_ID, inotrop)

#- adrenaline receptor antagonists Clonidine (Catapres) Phentolamine phenoxybenzamine Tamsulosin

Atenolol Propranolol Nebivilol Atenolol Oxprenolol Metoprolol Timolol Pindolol Nadolol Pindolol Esmolol Acebutolol Sotalol Talinolol Betaxolol Labetalol Carvedilol

anti_adrenal_vec <-
  c("Clonidine", "Catapres", "Phentolamine", "phenoxybenzamine", 
    "Tamsulosin", 
    "lol"
    )

anti_adrenal_or <- str_c(anti_adrenal_vec, collapse = "|")
anti_adrenal_or
## [1] "Clonidine|Catapres|Phentolamine|phenoxybenzamine|Tamsulosin|lol"
anti_adrenal_names <-
  df_drugs |> 
  filter(str_detect(DRUG, regex(anti_adrenal_or, ignore_case = T)) |
         str_detect(DRUG_NAME_POE, regex(anti_adrenal_or, ignore_case = T)) |
         str_detect(DRUG_NAME_GENERIC, regex(anti_adrenal_or, ignore_case = T))) |> 
  select(DRUG) |> 
  filter(!str_detect(DRUG, regex("bupivacaine|lidocaine", ignore_case = T))) |> 
  filter(!DRUG %in% c("Fentanyl Citrate", "Heparin")) |>
  distinct() |> pull()

anti_adrenal_names
##  [1] "Labetalol HCl"                       "Metoprolol"                         
##  [3] "Clonidine HCl"                       "Atenolol"                           
##  [5] "Metoprolol XL"                       "Esmolol"                            
##  [7] "Tamsulosin HCl"                      "Metoprolol Tartrate"                
##  [9] "Dorzolamide 2%/Timolol 0.5% Ophth."  "Carvedilol"                         
## [11] "Timolol Maleate 0.25%"               "Tamsulosin"                         
## [13] "Timolol Maleate 0.5%"                "Nadolol"                            
## [15] "Metoprolol Succinate XL"             "Labetalol"                          
## [17] "Clonidine Patch 0.3 mg/24 hr"        "Clonidine Patch 0.1 mg/24 hr"       
## [19] "CloniDINE"                           "Clonidine TTS 3 Patch"              
## [21] "Clonidine TTS 1 Patch"               "Clonidine Patch 0.2 mg/24 hr"       
## [23] "Propranolol"                         "Propranolol HCl"                    
## [25] "Sotalol HCl"                         "Metoprolol XL (Toprol XL)"          
## [27] "Sterile Diluent for Flolan"          "Clonidine TTS 2 Patch"              
## [29] "METOPROLOL"                          "Sotalol"                            
## [31] "Betaxolol Hcl  0.25%"                "Phentolamine Mesylate"              
## [33] "Bisoprolol Fumarate"                 "Acebutolol HCl"                     
## [35] "Levobunolol Hcl 0.25%"               "Levobunolol"                        
## [37] "Propranolol LA"                      "Clonidine TTS"                      
## [39] "Betoptic S"                          "Levobunolol 0.25%"                  
## [41] "Levobunolol 0.5%"                    "Levobunolol Hcl 0.5%"               
## [43] "LABETALOL"                           "ESMOLOL"                            
## [45] "Clonidine T"                         "Proprano"                           
## [47] "Toprol XL"                           "Betimol (timolol hemihydrate)"      
## [49] "Apraclonidine 0.5%"                  "Carteolol 1% Ophth Soln"            
## [51] "Betaxolol Ophth Susp  0.25%"         "Zebeta"                             
## [53] "Betimol ( Timolol )"                 "Cosopt"                             
## [55] "Betaxolol HCl  0.25%"                "Betaxolol HCl"                      
## [57] "Atenolol-Chlorthalidone"             "Pindolol"                           
## [59] "NEO*PO*Propranolol"                  "Stanozolol (Bulk)"                  
## [61] "Betaxolol"                           "*NF* TIMOLOL  HEMIHYDRATE (BETIMOL)"
## [63] "timolol"                             "Esmolol in Saline (Iso-osm)"        
## [65] "Propranolol Oral Solution"           "Clonidine"                          
## [67] "Timolol"                             "Levobunolol HCl 0.5%"               
## [69] "Timolol Maleate 0.5% XE"             "Metipranolol"                       
## [71] "TIMOP"                               "Phenoxybenzamine HCl"               
## [73] "Catapres"                            "flomax"                             
## [75] "Labet"                               "Metoprolo"                          
## [77] "Timolol Maleate 0.5% GFS"            "Propra"                             
## [79] "Acebutolol"                          "Prop"                               
## [81] "bisoprolol fumarate"                 "metipranolol"                       
## [83] "sotalol"                             "Istalol"                            
## [85] "Clonidine Patch"                     "Sotalol AF"                         
## [87] "timolo"                              "dorzolamide-timolol"                
## [89] "bisopro"                             "bisopr"                             
## [91] "bisop"                               "bisoprolol"

#使用薬剤歴全リストから、薬を使ったレコードのみに限定し、その患者IDを得る

anti_adrenal_user <- 
  db_presc_drug |> 
  filter(DRUG %in% anti_adrenal_names) |> 
  select(SUBJECT_ID) |> 
  pull()

db_patients_anti_adrenal <-
  db_PEO |> 
  mutate(anti_adrenal = ifelse(SUBJECT_ID %in% anti_adrenal_user, 1, 0))

#データ確認:

f_table(cols_cont = c(), 
        cols_fact = c("anti_adrenal"),
        db = db_patients_anti_adrenal)
##                   Stratified by h2ras
##                    0            1            SMD    Missing
##   n                5949         4441                       
##   anti_adrenal = 1 4439 (74.6)  4098 (92.3)   0.489 0.0

#for merge

db_anti_adrenal <- db_patients_anti_adrenal |> select(HADM_ID, anti_adrenal)

#calcium antagonists Amlodipine (Norvasc) Aranidipine (Sapresta) Azelnidipine (Calblock) Barnidipine (HypoCa) Benidipine (Coniel) Cilnidipine (Atelec, Cinalong, Siscard) Clevidipine (Cleviprex) Efonidipine (Landel) Felodipine (Plendil) Isradipine (DynaCirc, Prescal) Lacidipine (Motens, Lacipil) Lercanidipine (Zanidip) Manidipine (Calslot, Madipine) Nicardipine (Cardene, Carden SR) Nifedipine (Procardia, Adalat) Nilvadipine (Nivadil) Nimodipine (Nimotop) Nisoldipine (Baymycard, Sular, Syscor) Nitrendipine (Cardif, Nitrepin, Baylotensin) Pranidipine (Acalas)

Fendiline Gallopamil Verapamil (Calan, Isoptin) Diltiazem (Cardizem)

ca_blocker_vec <-
  c("dipine", "Fendiline", "Gallopamil", "Verapamil", 
    "Diltiazem"
    )

ca_blocker_or <- str_c(ca_blocker_vec, collapse = "|")
ca_blocker_or
## [1] "dipine|Fendiline|Gallopamil|Verapamil|Diltiazem"
ca_blocker_names <-
  df_drugs |> 
  filter(str_detect(DRUG, regex(ca_blocker_or, ignore_case = T)) |
         str_detect(DRUG_NAME_POE, regex(ca_blocker_or, ignore_case = T)) |
         str_detect(DRUG_NAME_GENERIC, regex(ca_blocker_or, ignore_case = T))) |> 
  select(DRUG) |> 
  distinct() |> pull()

ca_blocker_names
##  [1] "*NF* Nicardipine HCl IV"    "Amlodipine"                
##  [3] "NiCARdipine IV"             "Diltiazem"                 
##  [5] "Diltiazem Extended-Release" "Nifedipine CR"             
##  [7] "NiCARdipine"                "NIFEdipine CR"             
##  [9] "Nicardipine"                "Nifedipine"                
## [11] "NIFEdipine"                 "Verapamil HCl"             
## [13] "Verapamil SR"               "Felodipine"                
## [15] "Amlodipine Besylate"        "Nimodipine"                
## [17] "Nicardipine HCl IV"         "Lotrel"                    
## [19] "Verapamil"                  "diltiazem"                 
## [21] "Nicardipine HCl"            "Procainamide HCl"          
## [23] "Procardia XL"               "Dilti"                     
## [25] "*NF* Nifedipine XL"         "nifed"                     
## [27] "diltia"                     "diltiaz"                   
## [29] "nif"                        "DILT"                      
## [31] "PROCARDIA XL"               "nifedipine"                
## [33] "*NF* Diltiazem SR"          "Nisoldipine (Sular)"       
## [35] "Nifedi"                     "Nifedipine (Bulk)"         
## [37] "dilt"                       "Tiazac"                    
## [39] "Isradipine"                 "Diltiaz"                   
## [41] "Verapa"                     "Diltia"                    
## [43] "Nifed"                      "Calan SR"                  
## [45] "amlodipine-benazepril"      "Vera"

#使用薬剤歴全リストから、薬を使ったレコードのみに限定し、その患者IDを得る

ca_blocker_user <- 
  db_presc_drug |> 
  filter(DRUG %in% ca_blocker_names) |> 
  select(SUBJECT_ID) |> 
  pull()

db_patients_ca_blocker <-
  db_PEO |> 
  mutate(ca_blocker = ifelse(SUBJECT_ID %in% ca_blocker_user, 1, 0))

#データ確認:

f_table(cols_cont = c(), 
        cols_fact = c("ca_blocker"), 
        db = db_patients_ca_blocker)
##                 Stratified by h2ras
##                  0            1            SMD    Missing
##   n              5949         4441                       
##   ca_blocker = 1 1747 (29.4)  1714 (38.6)   0.196 0.0

#for merge

db_ca_blocker <- db_patients_ca_blocker |> select(HADM_ID, ca_blocker)

#anticoagultants Fondaparinux Idraparinux Idrabiotaparinux dabigatran, rivaroxaban, apixaban, edoxaban, betrixaban argatroban hirudin, lepirudin, bivalirudin

anticoag_vec <-
  c("Coumadin", "Warfarin", "Fondaparinux", "Idraparinux", "Idrabiotaparinux",
    "heparin",
    "dabigatran", "rivaroxaban", "apixaban", "edoxaban", "betrixaban", "argatroban",
    "hirudin", "lepirudin", "bivalirudin"
    )

anticoag_or <- str_c(anticoag_vec, collapse = "|")
anticoag_or
## [1] "Coumadin|Warfarin|Fondaparinux|Idraparinux|Idrabiotaparinux|heparin|dabigatran|rivaroxaban|apixaban|edoxaban|betrixaban|argatroban|hirudin|lepirudin|bivalirudin"
anticoag_names <-
  df_drugs |> 
  filter(str_detect(DRUG, regex(anticoag_or, ignore_case = T)) |
         str_detect(DRUG_NAME_POE, regex(anticoag_or, ignore_case = T)) |
         str_detect(DRUG_NAME_GENERIC, regex(anticoag_or, ignore_case = T))) |> 
  select(DRUG) |> 
  distinct() |> 
  pull()

anticoag_names
##  [1] "Warfarin"                            
##  [2] "Heparin Sodium"                      
##  [3] "Heparin"                             
##  [4] "Heparin Flush CVL  (100 units/ml)"   
##  [5] "Heparin Flush CRRT (5000 Units/mL)"  
##  [6] "Heparin Flush PICC (100 units/ml)"   
##  [7] "Argatroban"                          
##  [8] "HEPARIN"                             
##  [9] "Heparin Flush Hickman (100 units/ml)"
## [10] "Heparin Flush"                       
## [11] "Heparin (Preservative Free)"         
## [12] "Heparin Flush (10 units/ml)"         
## [13] "Heparin Sodium (Preservative Free)"  
## [14] "Heparin Flush (1000 units/mL)"       
## [15] "Heparin Flush (5000 Units/mL)"       
## [16] "Heparin Flush (100 units/ml)"        
## [17] "Lepirudin"                           
## [18] "Heparin Flush Port (10units/ml)"     
## [19] "Heparin Flush Midline (100 units/ml)"
## [20] "Heparin (CRRT Machine Priming)"      
## [21] "Dabigatran Etexilate"                
## [22] "Heparin (Preserv. Free)"             
## [23] "Heparin CRRT"                        
## [24] "Fondaparinux Sodium"                 
## [25] "heparin"                             
## [26] "Heparin Flush Port (10 units/mL)"    
## [27] "Heparin Lock Flush"                  
## [28] "Heparin (IABP)"                      
## [29] "Bivalirudin"                         
## [30] "Heparin Dwell (1000 Units/mL)"       
## [31] "Heparin (Hemodialysis)"              
## [32] "Heparin (Porcine)"                   
## [33] "Hepa"                                
## [34] "Hepar"                               
## [35] "H"                                   
## [36] "Fondaparinux"                        
## [37] "Heparin lock"                        
## [38] "Lansoprazole"                        
## [39] "Hepari"                              
## [40] "HEParin (Porcine) in NS (PF)"        
## [41] "Heparin Flu"                         
## [42] "hepar"                               
## [43] "Heparin Flush (1000 units/ml)"       
## [44] "hep"                                 
## [45] "*NF* Warfarin"                       
## [46] "Hep"                                 
## [47] "Warf"                                
## [48] "Heparin Flus"                        
## [49] "HEPARIN  FLUSH"                      
## [50] "*NF* Argatroban"                     
## [51] "Rivaroxaban"                         
## [52] "Heparin flush"                       
## [53] "Heparin Flush Port"                  
## [54] "Heparin Flush (10 units/mL)"         
## [55] "Heparin Flush (100 units/mL)"        
## [56] "Coumadin"                            
## [57] "dabigatran etexilate"                
## [58] "Heparin Flush (10 Units/mL)"         
## [59] "warfarin"                            
## [60] "Pradaxa"

#使用薬剤歴全リストから、薬を使ったレコードのみに限定し、その患者IDを得る

anticoag_user <- 
  db_presc_drug |> 
  filter(DRUG %in% anticoag_names) |> 
  select(SUBJECT_ID) |> 
  pull()

db_patients_anticoag <-
  db_PEO |> 
  mutate(anticoag = ifelse(SUBJECT_ID %in% anticoag_user, 1, 0))

#データ確認:

db_patients_anticoag |> 
  f_table(cols_cont = c(), 
          cols_fact = c("anticoag"))
##               Stratified by h2ras
##                0            1            SMD    Missing
##   n            5949         4441                       
##   anticoag = 1 4793 (80.6)  4091 (92.1)   0.341 0.0

#for merge

db_anticoag <- db_patients_anticoag |> select(HADM_ID, anticoag)

#antiplatelets Aspirin Triflusal (Disgren) Cangrelor (Kengreal) Clopidogrel (Plavix) Prasugrel (Effient) Ticagrelor (Brilinta) Ticlopidine (Ticlid) Cilostazol (Pletal) Vorapaxar (Zontivity) Abciximab (ReoPro) Eptifibatide (Integrilin) Tirofiban (Aggrastat) Dipyridamole (Persantine)

antiplate_vec <-
  c("Aspirin", "Cangrelor", "Clopidogrel", "Plavix", "Prasugrel",
    "Ticagrelor", "Brilinta",  "Ticlopidine", "Cilostazol", "Pletal", 
    "Abciximab", "ReoPro", "Eptifibatide", "Tirofiban", "Aggrastat", 
    "Dipyridamole", "Persantine"
    )

antiplate_or <- str_c(antiplate_vec, collapse = "|")
antiplate_or
## [1] "Aspirin|Cangrelor|Clopidogrel|Plavix|Prasugrel|Ticagrelor|Brilinta|Ticlopidine|Cilostazol|Pletal|Abciximab|ReoPro|Eptifibatide|Tirofiban|Aggrastat|Dipyridamole|Persantine"
antiplate_names <-
  df_drugs |> 
  filter(str_detect(DRUG, regex(antiplate_or, ignore_case = T)) |
         str_detect(DRUG_NAME_POE, regex(antiplate_or, ignore_case = T)) |
         str_detect(DRUG_NAME_GENERIC, regex(antiplate_or, ignore_case = T))) |> 
  select(DRUG) |> 
  distinct() |> 
  pull()

antiplate_names
##  [1] "Aspirin EC"                          
##  [2] "Aspirin"                             
##  [3] "Tirofiban"                           
##  [4] "Clopidogrel Bisulfate"               
##  [5] "Eptifibatide"                        
##  [6] "Clopidogrel"                         
##  [7] "Prasugrel"                           
##  [8] "Aspirin (Buffered)"                  
##  [9] "Butalbital-Aspirin-Caffeine"         
## [10] "Dipyridamole-Aspirin"                
## [11] "Abciximab"                           
## [12] "Ticlopidine HCl"                     
## [13] "Aspir"                               
## [14] "Aspiri"                              
## [15] "Aspirin Desensitization (Angioedema)"
## [16] "asa"                                 
## [17] "Pletal"                              
## [18] "Aspirin-Caffeine-Butalbital"         
## [19] "Dipyridamole"                        
## [20] "cilostazol"                          
## [21] "Cilostazol"                          
## [22] "*nf"                                 
## [23] "cilo"                                
## [24] "ASPIR"                               
## [25] "cilos"                               
## [26] "PleTAL"                              
## [27] "aspirin"                             
## [28] "ASPIRIN"                             
## [29] "Aspi"                                
## [30] "aspi"                                
## [31] "Aspirin (Rectal)"                    
## [32] "Clopidogrel Desensitization"         
## [33] "ASA"                                 
## [34] "cil"                                 
## [35] "Dopamine"                            
## [36] "Aspirin Childrens"                   
## [37] "aspir"                               
## [38] "as"                                  
## [39] "Aspirin 81 mg /placebo"              
## [40] "asp"                                 
## [41] "Aspirin 325 mg or placebo"           
## [42] "Aggrastat"                           
## [43] "ASA81"                               
## [44] "Excedrin Aspirin Free"               
## [45] "Excedrin Migraine"                   
## [46] "ticagrelor"                          
## [47] "CILOSTAZOL"                          
## [48] "cilostaz"                            
## [49] "Cilostaz"                            
## [50] "Aspirin Desensitization (AERD)"      
## [51] "Asp"                                 
## [52] "Aspirin 325mg/ placebo"              
## [53] "Aspirin 81 mg/ Placebo"              
## [54] "Aspirin 325 mg or Placebo"           
## [55] "Aspirin 81 mg or Placebo"            
## [56] "Aspirin 81mg or placbo"              
## [57] "Cangrelor Study Drug (*IND*)"        
## [58] "Clopidogrel 150 mg or placebo"       
## [59] "Aspirin 81 mg  or placebo"           
## [60] "Reopro"                              
## [61] "Butalbital-Acetaminophen-Caff"       
## [62] "Aspirin Desensitization"             
## [63] "Aspirin Desens"                      
## [64] "Aspirin 81 mg or placebo"            
## [65] "Aspirin 325mg or placebo"            
## [66] "Aspirin 81 mg /Placebo"              
## [67] "Brilinta"

#使用薬剤歴全リストから、薬を使ったレコードのみに限定し、その患者IDを得る

antiplate_user <- 
  db_presc_drug |> 
  filter(DRUG %in% antiplate_names) |> 
  select(SUBJECT_ID) |> 
  pull()

db_patients_antiplate <-
  db_PEO |> 
  mutate(antiplate = ifelse(SUBJECT_ID %in% antiplate_user, 1, 0))

#データ確認:

db_patients_antiplate |> 
  f_table(cols_cont = c(), 
          cols_fact = c("antiplate"))
##                Stratified by h2ras
##                 0            1            SMD    Missing
##   n             5949         4441                       
##   antiplate = 1 3679 (61.8)  3662 (82.5)   0.473 0.0

#for merge

db_antiplate <- db_patients_antiplate |> select(HADM_ID, antiplate)

#PPIs Omeprazole Lansoprazole Dexlansoprazole Esomeprazole Pantoprazole Rabeprazole Ilaprazole

ppi_vec <-
  c("Omeprazole", "Prilosec", "Nexium", "Lansoprazole", "Prevacid",
    "Dexlansoprazole", "Pantoprazole", "Rabeprazole", "Ilaprazole"
    )

ppi_or <- str_c(ppi_vec, collapse = "|")
ppi_or
## [1] "Omeprazole|Prilosec|Nexium|Lansoprazole|Prevacid|Dexlansoprazole|Pantoprazole|Rabeprazole|Ilaprazole"
ppi_names <-
  df_drugs |> 
  filter(str_detect(DRUG, regex(ppi_or, ignore_case = T)) |
         str_detect(DRUG_NAME_POE, regex(ppi_or, ignore_case = T)) |
         str_detect(DRUG_NAME_GENERIC, regex(ppi_or, ignore_case = T))) |> 
  select(DRUG) |> 
  filter(!str_detect(DRUG, regex("Heparin", ignore_case = T))) |>
  distinct() |> 
  pull()

ppi_names
##  [1] "Pantoprazole Sodium"                 
##  [2] "Pantoprazole"                        
##  [3] "Lansoprazole Oral Suspension"        
##  [4] "Lansoprazole"                        
##  [5] "Omeprazole"                          
##  [6] "Lansoprazole Oral Disintegrating Tab"
##  [7] "Lansoprazole Oral Solution"          
##  [8] "Nexium"                              
##  [9] "Neo*PO*Omeprazole"                   
## [10] "Prilosec"                            
## [11] "Prevacid"                            
## [12] "prevacid"                            
## [13] "PRILOSEC"                            
## [14] "esomeprazole"                        
## [15] "*NF* Esomeprazole"                   
## [16] "*NF*ESOMEPRAZOLE"                    
## [17] "nexium"                              
## [18] "omeprazole"                          
## [19] "rabeprazole"                         
## [20] "LANSOPR"                             
## [21] "lans"                                
## [22] "Lansop"                              
## [23] "LANS"                                
## [24] "lansoprazole"                        
## [25] "Esomeprazole Magnesium"              
## [26] "lansopr"                             
## [27] "NexIUM"                              
## [28] "prevac"                              
## [29] "LANSOP"                              
## [30] "lanso"                               
## [31] "panto"                               
## [32] "Pantoprazole (Self Med)"             
## [33] "Omeprazole-Sodium Bicarbonate"       
## [34] "Prilosec OTC"                        
## [35] "esomeprazole magnesium"              
## [36] "PrevACID 24Hr"                       
## [37] "Nexium 40mg capsules"                
## [38] "dexlansoprazole (Dexilant)"

#使用薬剤歴全リストから、薬を使ったレコードのみに限定し、その患者IDを得る

ppi_user <- 
  db_presc_drug |> 
  filter(DRUG %in% ppi_names) |> 
  select(SUBJECT_ID) |> 
  pull()

db_patients_ppi <-
  db_PEO |> 
  mutate(ppi = ifelse(SUBJECT_ID %in% ppi_user, 1, 0))

#データ確認:

db_patients_ppi |> 
  f_table(cols_cont = c(), 
          cols_fact = c("ppi"))
##          Stratified by h2ras
##           0            1            SMD    Missing
##   n       5949         4441                       
##   ppi = 1 4330 (72.8)  2987 (67.3)   0.121 0.0

#for merge

db_ppi <- db_patients_ppi |> select(HADM_ID, ppi)
db_PEO_total <-
  db_PEO |>
  left_join(db_HtWt, by = "HADM_ID") |> 
  left_join(db_vital, by = "HADM_ID") |> 
  left_join(db_urine, by = "HADM_ID") |> 
  left_join(db_sofa, by = "HADM_ID") |> 
  left_join(db_rrt, by = "HADM_ID") |> 
  left_join(db_venti, by = "HADM_ID") |> 
  left_join(db_ethnicity, by = "HADM_ID") |> 
  left_join(db_comorb, by = "HADM_ID") |> 
  compute()

db_PEO_total <-
  db_PEO_total |> 
  left_join(db_labo, by = "HADM_ID") |> 
  left_join(db_LVEF, by = "HADM_ID") |> 
  left_join(db_RAA, by = "HADM_ID") |> 
  left_join(db_diuritic, by = "HADM_ID") |> 
  left_join(db_inotrop, by = "HADM_ID") |> 
  left_join(db_anti_adrenal, by = "HADM_ID") |> 
  left_join(db_ca_blocker, by = "HADM_ID") |> 
  left_join(db_anticoag, by = "HADM_ID") |> 
  left_join(db_antiplate, by = "HADM_ID") |> 
  left_join(db_ppi, by = "HADM_ID") |> 
  compute()

#最終固定データとしてcsv形式で出力しておく (結局最終データは3.3MBしかない)

df_PEO_total <- db_PEO_total |> collect()
df_PEO_total |> write_csv("MIMIC_HDS.csv")
# file.size("MIMIC_HDS.csv")

#csvファイル読み込み

df_PEO_total <- read_csv("MIMIC_HDS.csv")
## Rows: 10390 Columns: 67
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (7): ADMISSION_TYPE, INSURANCE, ETHNICITY, DIAGNOSIS, GENDER, ethnicity...
## dbl (60): SUBJECT_ID, HADM_ID, ICUSTAY_ID, h2ras, death_elasp, mortality30, ...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df_PEO_total
## # A tibble: 10,390 × 67
##    SUBJECT_ID HADM_ID ICUSTAY_ID h2ras death_elasp mortality30   age
##         <dbl>   <dbl>      <dbl> <dbl>       <dbl>       <dbl> <dbl>
##  1          3  145834     211552     0      236.             0  76.5
##  2          9  150750     220597     0        4.45           1  41.8
##  3         21  109451     217847     0      149.             0  87.4
##  4         26  197661     244882     0      659.             0  72.0
##  5         30  104557     225176     0       NA              0  95  
##  6         34  115799     263086     0     2021.             0  95  
##  7         37  188670     213503     0      503.             0  68.9
##  8         38  185910     248910     1       NA              0  75.9
##  9         42  119203     210828     0     1999.             0  61.2
## 10         49  190539     249195     1       NA              0  80.9
## # ℹ 10,380 more rows
## # ℹ 60 more variables: ADMISSION_TYPE <chr>, INSURANCE <chr>, ETHNICITY <chr>,
## #   DIAGNOSIS <chr>, HOSPITAL_EXPIRE_FLAG <dbl>, GENDER <chr>,
## #   EXPIRE_FLAG <dbl>, birthday <dbl>, admitday <dbl>, dischday <dbl>,
## #   deathday <dbl>, hosp_deathday <dbl>, weight <dbl>, Height <dbl>, BMI <dbl>,
## #   HeartRate <dbl>, SysBP <dbl>, DiasBP <dbl>, MeanBP <dbl>, RespRate <dbl>,
## #   TempC <dbl>, SpO2 <dbl>, urine_out <dbl>, SOFA <dbl>, apsiii <dbl>, …
df_PEO_total <- 
  read_csv("MIMIC_HDS.csv") |> 
  mutate(GENDER = ifelse(GENDER == "M", 1, 0))
## Rows: 10390 Columns: 67
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (7): ADMISSION_TYPE, INSURANCE, ETHNICITY, DIAGNOSIS, GENDER, ethnicity...
## dbl (60): SUBJECT_ID, HADM_ID, ICUSTAY_ID, h2ras, death_elasp, mortality30, ...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
df_PEO_total
## # A tibble: 10,390 × 67
##    SUBJECT_ID HADM_ID ICUSTAY_ID h2ras death_elasp mortality30   age
##         <dbl>   <dbl>      <dbl> <dbl>       <dbl>       <dbl> <dbl>
##  1          3  145834     211552     0      236.             0  76.5
##  2          9  150750     220597     0        4.45           1  41.8
##  3         21  109451     217847     0      149.             0  87.4
##  4         26  197661     244882     0      659.             0  72.0
##  5         30  104557     225176     0       NA              0  95  
##  6         34  115799     263086     0     2021.             0  95  
##  7         37  188670     213503     0      503.             0  68.9
##  8         38  185910     248910     1       NA              0  75.9
##  9         42  119203     210828     0     1999.             0  61.2
## 10         49  190539     249195     1       NA              0  80.9
## # ℹ 10,380 more rows
## # ℹ 60 more variables: ADMISSION_TYPE <chr>, INSURANCE <chr>, ETHNICITY <chr>,
## #   DIAGNOSIS <chr>, HOSPITAL_EXPIRE_FLAG <dbl>, GENDER <dbl>,
## #   EXPIRE_FLAG <dbl>, birthday <dbl>, admitday <dbl>, dischday <dbl>,
## #   deathday <dbl>, hosp_deathday <dbl>, weight <dbl>, Height <dbl>, BMI <dbl>,
## #   HeartRate <dbl>, SysBP <dbl>, DiasBP <dbl>, MeanBP <dbl>, RespRate <dbl>,
## #   TempC <dbl>, SpO2 <dbl>, urine_out <dbl>, SOFA <dbl>, apsiii <dbl>, …

#unmatched_table

col_continuous <- 
  c("age", "Height", "weight", "BMI",
    "HeartRate", "SysBP", "DiasBP","RespRate", "TempC", "SpO2",
    "urine_out","SOFA", "apsiii",
    "Calcium", "Cre", "Glucose", "Mg", "Sodium", "BUN", "Plate", "RBC", "WBC")

col_factors <- 
  c("ADMISSION_TYPE", "INSURANCE", "GENDER", "ethnicity_5", 
    "mortality30", "HOSPITAL_EXPIRE_FLAG",
    "rrt", "ventilation",
    "hypertension", "DM", "anemia", "Af", "coronary_atherosclerosis", 
    "venous_thrombus", "MI", "gastritis", "gastric_ulcer", "duodenal_ulcer",
    "gastric_bleeding", "aki", "septic_shock", "pneumonia",
    "LVEF_ctg",
    "RAA", "Diuritic", "inotrop", "anti_adrenal", "ca_blocker",
    "anticoag", "antiplate", "ppi")
df_PEO_total |> 
  select(c(col_continuous, col_factors, "h2ras"))|>
  CreateTableOne(vars = c(col_continuous, col_factors), strata = "h2ras", factorVars = col_factors) -> tableone
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
##   # Was:
##   data %>% select(col_continuous)
## 
##   # Now:
##   data %>% select(all_of(col_continuous))
## 
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
## Warning: Using an external vector in selections was deprecated in tidyselect 1.1.0.
## ℹ Please use `all_of()` or `any_of()` instead.
##   # Was:
##   data %>% select(col_factors)
## 
##   # Now:
##   data %>% select(all_of(col_factors))
## 
## See <https://tidyselect.r-lib.org/reference/faq-external-vector.html>.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
print(tableone, smd = T, missing = T, test = F, explain = F) |> 
  as.data.frame() #|> 
##                               Stratified by h2ras
##                                0                 1                 SMD   
##   n                               5949              4441                 
##   age                            74.35 (14.30)     71.26 (13.97)    0.219
##   Height                        167.67 (10.98)    168.20 (10.77)    0.048
##   weight                         80.15 (24.65)     81.99 (24.63)    0.075
##   BMI                            28.62 (8.03)      29.00 (8.11)     0.046
##   HeartRate                      88.36 (20.63)     88.12 (18.73)    0.012
##   SysBP                         123.44 (25.95)    122.21 (25.21)    0.048
##   DiasBP                         62.25 (17.36)     62.13 (16.40)    0.008
##   RespRate                       19.99 (6.23)      18.30 (6.31)     0.270
##   TempC                          36.54 (0.99)      36.49 (0.95)     0.056
##   SpO2                           96.42 (5.44)      97.16 (4.90)     0.143
##   urine_out                    1781.19 (1381.17) 1842.99 (1309.36)  0.046
##   SOFA                            4.59 (3.04)       5.04 (3.02)     0.147
##   apsiii                         48.66 (20.54)     46.51 (19.58)    0.107
##   Calcium                         8.50 (0.82)       8.54 (0.79)     0.053
##   Cre                             1.75 (1.62)       1.56 (1.44)     0.125
##   Glucose                       144.74 (77.12)    139.09 (65.81)    0.079
##   Mg                              2.04 (0.59)       2.08 (0.39)     0.090
##   Sodium                        138.57 (5.05)     138.54 (4.60)     0.005
##   BUN                            36.27 (25.29)     30.92 (21.24)    0.229
##   Plate                         237.54 (119.76)   228.76 (118.48)   0.074
##   RBC                             3.65 (0.70)       3.62 (0.67)     0.050
##   WBC                            11.72 (10.45)     11.68 (10.45)    0.004
##   ADMISSION_TYPE                                                    0.353
##      ELECTIVE                      411 ( 6.9)        793 (17.9)          
##      EMERGENCY                    5270 (88.6)       3548 (79.9)          
##      URGENT                        268 ( 4.5)        100 ( 2.3)          
##   INSURANCE                                                         0.122
##      Government                     80 ( 1.3)         58 ( 1.3)          
##      Medicare                     4831 (81.2)       3401 (76.6)          
##      Private                      1012 (17.0)        968 (21.8)          
##      Self Pay                       26 ( 0.4)         14 ( 0.3)          
##   GENDER = 1                      3074 (51.7)       2446 (55.1)     0.068
##   ethnicity_5                                                       0.081
##      asian                         101 ( 1.7)         80 ( 1.8)          
##      black                         499 ( 8.5)        362 ( 8.2)          
##      hispanic                      123 ( 2.1)        125 ( 2.8)          
##      other                         892 (15.2)        569 (12.9)          
##      white                        4262 (72.5)       3279 (74.3)          
##   mortality30 = 1                 1283 (21.6)        538 (12.1)     0.255
##   HOSPITAL_EXPIRE_FLAG = 1        1075 (18.1)        481 (10.8)     0.207
##   rrt = 1                          155 ( 2.6)        138 ( 3.1)     0.030
##   ventilation = 1                 2403 (40.4)       2837 (63.9)     0.484
##   hypertension = 1                2356 (39.6)       2027 (45.6)     0.122
##   DM = 1                          2132 (35.8)       1643 (37.0)     0.024
##   anemia = 1                      1996 (33.6)       1416 (31.9)     0.036
##   Af = 1                          2537 (42.6)       2034 (45.8)     0.064
##   coronary_atherosclerosis = 1    2268 (38.1)       2047 (46.1)     0.162
##   venous_thrombus = 1              112 ( 1.9)         76 ( 1.7)     0.013
##   MI = 1                          1002 (16.8)        716 (16.1)     0.019
##   gastritis = 1                    121 ( 2.0)         61 ( 1.4)     0.051
##   gastric_ulcer = 1                 66 ( 1.1)         20 ( 0.5)     0.075
##   duodenal_ulcer = 1                71 ( 1.2)         25 ( 0.6)     0.068
##   gastric_bleeding = 1             516 ( 8.7)        184 ( 4.1)     0.186
##   aki = 1                         2262 (38.0)       1410 (31.7)     0.132
##   septic_shock = 1                 415 ( 7.0)        260 ( 5.9)     0.046
##   pneumonia = 1                   1343 (22.6)        926 (20.9)     0.042
##   LVEF_ctg                                                          0.104
##      mild                          741 (14.8)        645 (16.7)          
##      moderate                      922 (18.4)        689 (17.9)          
##      normal                       2044 (40.9)       1680 (43.5)          
##      severe                       1294 (25.9)        845 (21.9)          
##   RAA = 1                         2880 (48.4)       2722 (61.3)     0.261
##   Diuritic = 1                    4311 (72.5)       4083 (91.9)     0.526
##   inotrop = 1                     1780 (29.9)       2266 (51.0)     0.440
##   anti_adrenal = 1                4439 (74.6)       4098 (92.3)     0.489
##   ca_blocker = 1                  1747 (29.4)       1714 (38.6)     0.196
##   anticoag = 1                    4793 (80.6)       4091 (92.1)     0.341
##   antiplate = 1                   3679 (61.8)       3662 (82.5)     0.473
##   ppi = 1                         4330 (72.8)       2987 (67.3)     0.121
##                               Stratified by h2ras
##                                Missing
##   n                                   
##   age                           0.0   
##   Height                       12.6   
##   weight                        3.0   
##   BMI                          13.9   
##   HeartRate                     1.7   
##   SysBP                         1.7   
##   DiasBP                        1.7   
##   RespRate                      1.7   
##   TempC                         2.0   
##   SpO2                          1.8   
##   urine_out                     4.0   
##   SOFA                          0.0   
##   apsiii                        0.0   
##   Calcium                       2.9   
##   Cre                           1.0   
##   Glucose                       1.0   
##   Mg                            1.4   
##   Sodium                        1.0   
##   BUN                           1.0   
##   Plate                         1.0   
##   RBC                           1.0   
##   WBC                           1.0   
##   ADMISSION_TYPE                0.0   
##      ELECTIVE                         
##      EMERGENCY                        
##      URGENT                           
##   INSURANCE                     0.0   
##      Government                       
##      Medicare                         
##      Private                          
##      Self Pay                         
##   GENDER = 1                    0.0   
##   ethnicity_5                   0.9   
##      asian                            
##      black                            
##      hispanic                         
##      other                            
##      white                            
##   mortality30 = 1               0.0   
##   HOSPITAL_EXPIRE_FLAG = 1      0.0   
##   rrt = 1                       0.0   
##   ventilation = 1               0.0   
##   hypertension = 1              0.0   
##   DM = 1                        0.0   
##   anemia = 1                    0.0   
##   Af = 1                        0.0   
##   coronary_atherosclerosis = 1  0.0   
##   venous_thrombus = 1           0.0   
##   MI = 1                        0.0   
##   gastritis = 1                 0.0   
##   gastric_ulcer = 1             0.0   
##   duodenal_ulcer = 1            0.0   
##   gastric_bleeding = 1          0.0   
##   aki = 1                       0.0   
##   septic_shock = 1              0.0   
##   pneumonia = 1                 0.0   
##   LVEF_ctg                     14.7   
##      mild                             
##      moderate                         
##      normal                           
##      severe                           
##   RAA = 1                       0.0   
##   Diuritic = 1                  0.0   
##   inotrop = 1                   0.0   
##   anti_adrenal = 1              0.0   
##   ca_blocker = 1                0.0   
##   anticoag = 1                  0.0   
##   antiplate = 1                 0.0   
##   ppi = 1                       0.0
##                                              0                 1    SMD Missing
## n                                         5949              4441               
## age                              74.35 (14.30)     71.26 (13.97)  0.219     0.0
## Height                          167.67 (10.98)    168.20 (10.77)  0.048    12.6
## weight                           80.15 (24.65)     81.99 (24.63)  0.075     3.0
## BMI                               28.62 (8.03)      29.00 (8.11)  0.046    13.9
## HeartRate                        88.36 (20.63)     88.12 (18.73)  0.012     1.7
## SysBP                           123.44 (25.95)    122.21 (25.21)  0.048     1.7
## DiasBP                           62.25 (17.36)     62.13 (16.40)  0.008     1.7
## RespRate                          19.99 (6.23)      18.30 (6.31)  0.270     1.7
## TempC                             36.54 (0.99)      36.49 (0.95)  0.056     2.0
## SpO2                              96.42 (5.44)      97.16 (4.90)  0.143     1.8
## urine_out                    1781.19 (1381.17) 1842.99 (1309.36)  0.046     4.0
## SOFA                               4.59 (3.04)       5.04 (3.02)  0.147     0.0
## apsiii                           48.66 (20.54)     46.51 (19.58)  0.107     0.0
## Calcium                            8.50 (0.82)       8.54 (0.79)  0.053     2.9
## Cre                                1.75 (1.62)       1.56 (1.44)  0.125     1.0
## Glucose                         144.74 (77.12)    139.09 (65.81)  0.079     1.0
## Mg                                 2.04 (0.59)       2.08 (0.39)  0.090     1.4
## Sodium                           138.57 (5.05)     138.54 (4.60)  0.005     1.0
## BUN                              36.27 (25.29)     30.92 (21.24)  0.229     1.0
## Plate                          237.54 (119.76)   228.76 (118.48)  0.074     1.0
## RBC                                3.65 (0.70)       3.62 (0.67)  0.050     1.0
## WBC                              11.72 (10.45)     11.68 (10.45)  0.004     1.0
## ADMISSION_TYPE                                                    0.353     0.0
##    ELECTIVE                        411 ( 6.9)        793 (17.9)                
##    EMERGENCY                      5270 (88.6)       3548 (79.9)                
##    URGENT                          268 ( 4.5)        100 ( 2.3)                
## INSURANCE                                                         0.122     0.0
##    Government                       80 ( 1.3)         58 ( 1.3)                
##    Medicare                       4831 (81.2)       3401 (76.6)                
##    Private                        1012 (17.0)        968 (21.8)                
##    Self Pay                         26 ( 0.4)         14 ( 0.3)                
## GENDER = 1                        3074 (51.7)       2446 (55.1)   0.068     0.0
## ethnicity_5                                                       0.081     0.9
##    asian                           101 ( 1.7)         80 ( 1.8)                
##    black                           499 ( 8.5)        362 ( 8.2)                
##    hispanic                        123 ( 2.1)        125 ( 2.8)                
##    other                           892 (15.2)        569 (12.9)                
##    white                          4262 (72.5)       3279 (74.3)                
## mortality30 = 1                   1283 (21.6)        538 (12.1)   0.255     0.0
## HOSPITAL_EXPIRE_FLAG = 1          1075 (18.1)        481 (10.8)   0.207     0.0
## rrt = 1                            155 ( 2.6)        138 ( 3.1)   0.030     0.0
## ventilation = 1                   2403 (40.4)       2837 (63.9)   0.484     0.0
## hypertension = 1                  2356 (39.6)       2027 (45.6)   0.122     0.0
## DM = 1                            2132 (35.8)       1643 (37.0)   0.024     0.0
## anemia = 1                        1996 (33.6)       1416 (31.9)   0.036     0.0
## Af = 1                            2537 (42.6)       2034 (45.8)   0.064     0.0
## coronary_atherosclerosis = 1      2268 (38.1)       2047 (46.1)   0.162     0.0
## venous_thrombus = 1                112 ( 1.9)         76 ( 1.7)   0.013     0.0
## MI = 1                            1002 (16.8)        716 (16.1)   0.019     0.0
## gastritis = 1                      121 ( 2.0)         61 ( 1.4)   0.051     0.0
## gastric_ulcer = 1                   66 ( 1.1)         20 ( 0.5)   0.075     0.0
## duodenal_ulcer = 1                  71 ( 1.2)         25 ( 0.6)   0.068     0.0
## gastric_bleeding = 1               516 ( 8.7)        184 ( 4.1)   0.186     0.0
## aki = 1                           2262 (38.0)       1410 (31.7)   0.132     0.0
## septic_shock = 1                   415 ( 7.0)        260 ( 5.9)   0.046     0.0
## pneumonia = 1                     1343 (22.6)        926 (20.9)   0.042     0.0
## LVEF_ctg                                                          0.104    14.7
##    mild                            741 (14.8)        645 (16.7)                
##    moderate                        922 (18.4)        689 (17.9)                
##    normal                         2044 (40.9)       1680 (43.5)                
##    severe                         1294 (25.9)        845 (21.9)                
## RAA = 1                           2880 (48.4)       2722 (61.3)   0.261     0.0
## Diuritic = 1                      4311 (72.5)       4083 (91.9)   0.526     0.0
## inotrop = 1                       1780 (29.9)       2266 (51.0)   0.440     0.0
## anti_adrenal = 1                  4439 (74.6)       4098 (92.3)   0.489     0.0
## ca_blocker = 1                    1747 (29.4)       1714 (38.6)   0.196     0.0
## anticoag = 1                      4793 (80.6)       4091 (92.1)   0.341     0.0
## antiplate = 1                     3679 (61.8)       3662 (82.5)   0.473     0.0
## ppi = 1                           4330 (72.8)       2987 (67.3)   0.121     0.0
  write.csv("unmatched.csv")
## "","x"
## "1","unmatched.csv"
df_unmatched_table <- read.csv("/Users/abekohei/Desktop/大学院授業/mimic 演習/mimic演習/unmatched.csv", 
                               col.names = c("name", "non_user", "user", "smd", "missing"))
df_unmatched_table
##                            name          non_user              user   smd
## 1                             n              5949              4441    NA
## 2                           age     74.35 (14.30)     71.26 (13.97) 0.219
## 3                        Height    167.93 (14.24)    168.53 (14.46) 0.042
## 4                        weight     80.12 (24.69)     81.99 (24.63) 0.076
## 5                           BMI      28.67 (9.76)      28.97 (8.16) 0.033
## 6                     HeartRate     88.36 (20.63)     88.12 (18.73) 0.012
## 7                         SysBP    123.44 (25.95)    122.21 (25.21) 0.048
## 8                        DiasBP     62.25 (17.36)     62.13 (16.40) 0.008
## 9                      RespRate      19.99 (6.23)      18.30 (6.31) 0.270
## 10                        TempC      36.54 (0.99)      36.49 (0.95) 0.056
## 11                         SpO2      96.42 (5.44)      97.16 (4.90) 0.143
## 12                    urine_out 1781.19 (1381.17) 1842.99 (1309.36) 0.046
## 13                         SOFA       4.59 (3.04)       5.04 (3.02) 0.147
## 14                       apsiii     48.66 (20.54)     46.51 (19.58) 0.107
## 15                      Calcium       8.50 (0.82)       8.54 (0.79) 0.053
## 16                          Cre       1.75 (1.62)       1.56 (1.44) 0.125
## 17                      Glucose    144.74 (77.12)    139.09 (65.81) 0.079
## 18                           Mg       2.04 (0.59)       2.08 (0.39) 0.090
## 19                       Sodium     138.57 (5.05)     138.54 (4.60) 0.005
## 20                          BUN     36.27 (25.29)     30.92 (21.24) 0.229
## 21                        Plate   237.54 (119.76)   228.76 (118.48) 0.074
## 22                          RBC       3.65 (0.70)       3.62 (0.67) 0.050
## 23                          WBC     11.72 (10.45)     11.68 (10.45) 0.004
## 24               ADMISSION_TYPE                                     0.353
## 25                     ELECTIVE       411 ( 6.9)        793 (17.9)     NA
## 26                    EMERGENCY      5270 (88.6)       3548 (79.9)     NA
## 27                       URGENT       268 ( 4.5)        100 ( 2.3)     NA
## 28                    INSURANCE                                     0.122
## 29                   Government        80 ( 1.3)         58 ( 1.3)     NA
## 30                     Medicare      4831 (81.2)       3401 (76.6)     NA
## 31                      Private      1012 (17.0)        968 (21.8)     NA
## 32                     Self Pay        26 ( 0.4)         14 ( 0.3)     NA
## 33                   GENDER = M      3074 (51.7)       2446 (55.1)  0.068
## 34                  ethnicity_5                                     0.081
## 35                        asian       101 ( 1.7)         80 ( 1.8)     NA
## 36                        black       499 ( 8.5)        362 ( 8.2)     NA
## 37                     hispanic       123 ( 2.1)        125 ( 2.8)     NA
## 38                        other       892 (15.2)        569 (12.9)     NA
## 39                        white      4262 (72.5)       3279 (74.3)     NA
## 40              mortality30 = 1      1283 (21.6)        538 (12.1)  0.255
## 41     HOSPITAL_EXPIRE_FLAG = 1      1075 (18.1)        481 (10.8)  0.207
## 42                      rrt = 1       155 ( 2.6)        138 ( 3.1)  0.030
## 43              ventilation = 1      2403 (40.4)       2837 (63.9)  0.484
## 44             hypertension = 1      2356 (39.6)       2027 (45.6)  0.122
## 45                       DM = 1      2132 (35.8)       1643 (37.0)  0.024
## 46                   anemia = 1      1996 (33.6)       1416 (31.9)  0.036
## 47                       Af = 1      2537 (42.6)       2034 (45.8)  0.064
## 48 coronary_atherosclerosis = 1      2268 (38.1)       2047 (46.1)  0.162
## 49          venous_thrombus = 1       112 ( 1.9)         76 ( 1.7)  0.013
## 50                       MI = 1      1002 (16.8)        716 (16.1)  0.019
## 51                gastritis = 1       121 ( 2.0)         61 ( 1.4)  0.051
## 52            gastric_ulcer = 1        66 ( 1.1)         20 ( 0.5)  0.075
## 53           duodenal_ulcer = 1        71 ( 1.2)         25 ( 0.6)  0.068
## 54         gastric_bleeding = 1       516 ( 8.7)        184 ( 4.1)  0.186
## 55                      aki = 1      2262 (38.0)       1410 (31.7)  0.132
## 56             septic_shock = 1       415 ( 7.0)        260 ( 5.9)  0.046
## 57                pneumonia = 1      1343 (22.6)        926 (20.9)  0.042
## 58                     LVEF_ctg                                     0.104
## 59                         mild       741 (14.8)        645 (16.7)     NA
## 60                     moderate       922 (18.4)        689 (17.9)     NA
## 61                       normal      2044 (40.9)       1680 (43.5)     NA
## 62                       severe      1294 (25.9)        845 (21.9)     NA
## 63                      RAA = 1      2880 (48.4)       2722 (61.3)  0.261
## 64                 Diuritic = 1      4311 (72.5)       4083 (91.9)  0.526
## 65                  inotrop = 1      1780 (29.9)       2266 (51.0)  0.440
## 66             anti_adrenal = 1      4439 (74.6)       4098 (92.3)  0.489
## 67               ca_blocker = 1      1747 (29.4)       1714 (38.6)  0.196
## 68                 anticoag = 1      4793 (80.6)       4091 (92.1)  0.341
## 69                antiplate = 1      3679 (61.8)       3662 (82.5)  0.473
## 70                      ppi = 1      4330 (72.8)       2987 (67.3)  0.121
##    missing
## 1       NA
## 2      0.0
## 3     12.5
## 4      3.0
## 5     13.8
## 6      1.7
## 7      1.7
## 8      1.7
## 9      1.7
## 10     2.0
## 11     1.8
## 12     4.0
## 13     0.0
## 14     0.0
## 15     2.9
## 16     1.0
## 17     1.0
## 18     1.4
## 19     1.0
## 20     1.0
## 21     1.0
## 22     1.0
## 23     1.0
## 24     0.0
## 25      NA
## 26      NA
## 27      NA
## 28     0.0
## 29      NA
## 30      NA
## 31      NA
## 32      NA
## 33     0.0
## 34     0.9
## 35      NA
## 36      NA
## 37      NA
## 38      NA
## 39      NA
## 40     0.0
## 41     0.0
## 42     0.0
## 43     0.0
## 44     0.0
## 45     0.0
## 46     0.0
## 47     0.0
## 48     0.0
## 49     0.0
## 50     0.0
## 51     0.0
## 52     0.0
## 53     0.0
## 54     0.0
## 55     0.0
## 56     0.0
## 57     0.0
## 58    14.7
## 59      NA
## 60      NA
## 61      NA
## 62      NA
## 63     0.0
## 64     0.0
## 65     0.0
## 66     0.0
## 67     0.0
## 68     0.0
## 69     0.0
## 70     0.0

1 完全ケース解析

欠測除外した完全ケース解析用データ

df_PEO_total_complete <- df_PEO_total[complete.cases(df_PEO_total[c("BMI","RespRate", "TempC", "SpO2", "urine_out", "Calcium", "Cre", "Glucose", "Mg", "Sodium", "BUN", "Plate", "RBC", "WBC")]), ]
#"hosp_deathday","HeartRate", "SysBP", "DiasBP"
df_PEO_total_complete |> mutate_if(is.character, as.factor) |> skim()
Data summary
Name mutate_if(df_PEO_total_co…
Number of rows 8460
Number of columns 67
_______________________
Column type frequency:
factor 6
numeric 61
________________________
Group variables None

Variable type: factor

skim_variable n_missing complete_rate ordered n_unique top_counts
ADMISSION_TYPE 0 1.00 FALSE 3 EME: 7186, ELE: 986, URG: 288
INSURANCE 0 1.00 FALSE 4 Med: 6624, Pri: 1683, Gov: 119, Sel: 34
ETHNICITY 0 1.00 FALSE 35 WHI: 6157, UNK: 819, BLA: 656, HIS: 163
DIAGNOSIS 1 1.00 FALSE 3533 CON: 492, PNE: 335, CHE: 215, SEP: 210
ethnicity_5 0 1.00 FALSE 5 whi: 6217, oth: 1202, bla: 685, his: 204
LVEF_ctg 792 0.91 FALSE 4 nor: 3267, sev: 1831, mod: 1364, mil: 1206

Variable type: numeric

skim_variable n_missing complete_rate mean sd p0 p25 p50 p75 p100 hist
SUBJECT_ID 0 1.00 35083.82 28643.26 3.00 12477.75 24870.50 57057.75 99995.00 ▇▅▂▂▂
HADM_ID 0 1.00 150496.60 28972.72 100018.00 125420.00 150302.00 175735.75 199993.00 ▇▇▇▇▇
ICUSTAY_ID 0 1.00 249799.85 28924.84 200014.00 224679.00 249885.50 275065.50 299987.00 ▇▇▇▇▇
h2ras 0 1.00 0.45 0.50 0.00 0.00 0.00 1.00 1.00 ▇▁▁▁▆
death_elasp 3614 0.57 559.25 772.62 -0.69 25.52 173.27 832.93 3907.26 ▇▂▁▁▁
mortality30 0 1.00 0.15 0.36 0.00 0.00 0.00 0.00 1.00 ▇▁▁▁▂
age 0 1.00 72.51 14.17 19.16 63.43 74.60 82.73 95.00 ▁▁▅▇▇
HOSPITAL_EXPIRE_FLAG 0 1.00 0.14 0.34 0.00 0.00 0.00 0.00 1.00 ▇▁▁▁▁
GENDER 0 1.00 0.54 0.50 0.00 0.00 1.00 1.00 1.00 ▇▁▁▁▇
EXPIRE_FLAG 0 1.00 0.57 0.49 0.00 0.00 1.00 1.00 1.00 ▆▁▁▁▇
birthday 0 1.00 33198.08 26532.80 -61909.00 28365.25 38708.00 48988.50 77986.00 ▁▁▁▇▃
admitday 0 1.00 66285.03 10678.91 47663.50 57031.46 66332.59 75496.55 87074.11 ▇▇▇▇▆
dischday 0 1.00 66297.48 10679.11 47669.75 57044.31 66343.61 75501.22 87084.60 ▇▇▇▇▆
deathday 3614 0.57 66824.94 10702.84 47683.00 57690.00 66852.50 76066.50 88183.00 ▇▇▇▇▅
hosp_deathday 7317 0.14 65789.52 10695.71 47683.44 56528.16 65601.01 74764.55 85821.46 ▇▇▇▇▆
weight 0 1.00 81.50 24.78 27.30 65.20 77.80 93.00 295.00 ▇▆▁▁▁
Height 0 1.00 167.83 10.91 86.36 160.02 167.64 175.26 212.29 ▁▁▃▇▁
BMI 0 1.00 28.84 8.13 10.22 23.70 27.37 32.08 136.09 ▇▂▁▁▁
HeartRate 0 1.00 88.50 19.82 1.00 75.00 87.00 100.00 191.00 ▁▃▇▁▁
SysBP 0 1.00 122.54 25.23 11.00 105.00 120.00 138.00 281.00 ▁▇▇▁▁
DiasBP 0 1.00 62.53 16.89 10.00 51.00 61.00 72.00 202.00 ▂▇▁▁▁
MeanBP 0 1.00 80.62 17.89 0.83 68.33 79.00 91.00 223.00 ▁▇▃▁▁
RespRate 0 1.00 19.21 6.37 1.00 14.00 18.00 23.00 65.00 ▂▇▂▁▁
TempC 0 1.00 36.52 0.97 30.56 35.94 36.50 37.06 40.83 ▁▁▇▅▁
SpO2 0 1.00 96.75 5.10 9.00 95.00 98.00 100.00 100.00 ▁▁▁▁▇
urine_out 0 1.00 1836.68 1378.79 -1305.00 939.75 1590.00 2500.00 45680.00 ▇▁▁▁▁
SOFA 0 1.00 4.89 3.04 0.00 3.00 4.00 7.00 21.00 ▇▆▂▁▁
apsiii 0 1.00 48.30 19.30 1.00 35.00 45.00 58.00 174.00 ▃▇▂▁▁
rrt 0 1.00 0.03 0.18 0.00 0.00 0.00 0.00 1.00 ▇▁▁▁▁
ventilation 0 1.00 0.54 0.50 0.00 0.00 1.00 1.00 1.00 ▇▁▁▁▇
hypertension 0 1.00 0.43 0.49 0.00 0.00 0.00 1.00 1.00 ▇▁▁▁▆
DM 0 1.00 0.36 0.48 0.00 0.00 0.00 1.00 1.00 ▇▁▁▁▅
anemia 0 1.00 0.33 0.47 0.00 0.00 0.00 1.00 1.00 ▇▁▁▁▅
Af 0 1.00 0.44 0.50 0.00 0.00 0.00 1.00 1.00 ▇▁▁▁▆
coronary_atherosclerosis 0 1.00 0.43 0.50 0.00 0.00 0.00 1.00 1.00 ▇▁▁▁▆
venous_thrombus 0 1.00 0.02 0.14 0.00 0.00 0.00 0.00 1.00 ▇▁▁▁▁
MI 0 1.00 0.17 0.38 0.00 0.00 0.00 0.00 1.00 ▇▁▁▁▂
gastritis 0 1.00 0.02 0.13 0.00 0.00 0.00 0.00 1.00 ▇▁▁▁▁
gastric_ulcer 0 1.00 0.01 0.09 0.00 0.00 0.00 0.00 1.00 ▇▁▁▁▁
duodenal_ulcer 0 1.00 0.01 0.09 0.00 0.00 0.00 0.00 1.00 ▇▁▁▁▁
gastric_bleeding 0 1.00 0.06 0.24 0.00 0.00 0.00 0.00 1.00 ▇▁▁▁▁
aki 0 1.00 0.37 0.48 0.00 0.00 0.00 1.00 1.00 ▇▁▁▁▅
septic_shock 0 1.00 0.07 0.25 0.00 0.00 0.00 0.00 1.00 ▇▁▁▁▁
pneumonia 0 1.00 0.23 0.42 0.00 0.00 0.00 0.00 1.00 ▇▁▁▁▂
Calcium 0 1.00 8.51 0.80 0.00 8.00 8.50 9.00 15.60 ▁▁▇▁▁
Cre 0 1.00 1.62 1.45 0.10 0.90 1.20 1.80 18.80 ▇▁▁▁▁
Glucose 0 1.00 142.32 72.48 17.00 103.00 124.00 159.00 1140.00 ▇▁▁▁▁
Mg 0 1.00 2.06 0.54 0.30 1.80 2.00 2.30 33.00 ▇▁▁▁▁
Sodium 0 1.00 138.44 4.75 97.00 136.00 139.00 141.00 170.00 ▁▁▇▃▁
BUN 0 1.00 33.55 23.42 2.00 18.00 26.00 42.00 272.00 ▇▁▁▁▁
Plate 0 1.00 233.51 119.51 5.00 155.00 215.00 290.00 1256.00 ▇▃▁▁▁
RBC 0 1.00 3.64 0.69 1.13 3.16 3.57 4.06 7.10 ▁▇▇▁▁
WBC 0 1.00 11.60 8.94 0.10 7.50 10.20 13.90 486.80 ▇▁▁▁▁
RAA 0 1.00 0.57 0.50 0.00 0.00 1.00 1.00 1.00 ▆▁▁▁▇
Diuritic 0 1.00 0.84 0.36 0.00 1.00 1.00 1.00 1.00 ▂▁▁▁▇
inotrop 0 1.00 0.42 0.49 0.00 0.00 0.00 1.00 1.00 ▇▁▁▁▆
anti_adrenal 0 1.00 0.85 0.36 0.00 1.00 1.00 1.00 1.00 ▂▁▁▁▇
ca_blocker 0 1.00 0.35 0.48 0.00 0.00 0.00 1.00 1.00 ▇▁▁▁▅
anticoag 0 1.00 0.88 0.32 0.00 1.00 1.00 1.00 1.00 ▁▁▁▁▇
antiplate 0 1.00 0.74 0.44 0.00 0.00 1.00 1.00 1.00 ▃▁▁▁▇
ppi 0 1.00 0.71 0.45 0.00 0.00 1.00 1.00 1.00 ▃▁▁▁▇
df_PEO_total_complete |> count()
## # A tibble: 1 × 1
##       n
##   <int>
## 1  8460
df_PEO_total_complete |>head()
## # A tibble: 6 × 67
##   SUBJECT_ID HADM_ID ICUSTAY_ID h2ras death_elasp mortality30   age
##        <dbl>   <dbl>      <dbl> <dbl>       <dbl>       <dbl> <dbl>
## 1          3  145834     211552     0      236.             0  76.5
## 2          9  150750     220597     0        4.45           1  41.8
## 3         21  109451     217847     0      149.             0  87.4
## 4         26  197661     244882     0      659.             0  72.0
## 5         30  104557     225176     0       NA              0  95  
## 6         34  115799     263086     0     2021.             0  95  
## # ℹ 60 more variables: ADMISSION_TYPE <chr>, INSURANCE <chr>, ETHNICITY <chr>,
## #   DIAGNOSIS <chr>, HOSPITAL_EXPIRE_FLAG <dbl>, GENDER <dbl>,
## #   EXPIRE_FLAG <dbl>, birthday <dbl>, admitday <dbl>, dischday <dbl>,
## #   deathday <dbl>, hosp_deathday <dbl>, weight <dbl>, Height <dbl>, BMI <dbl>,
## #   HeartRate <dbl>, SysBP <dbl>, DiasBP <dbl>, MeanBP <dbl>, RespRate <dbl>,
## #   TempC <dbl>, SpO2 <dbl>, urine_out <dbl>, SOFA <dbl>, apsiii <dbl>,
## #   rrt <dbl>, ventilation <dbl>, ethnicity_5 <chr>, hypertension <dbl>, …
col_continuous <- 
  c("age", "Height", "weight", "BMI",
    "HeartRate", "SysBP", "DiasBP","RespRate", "TempC", "SpO2",
    "urine_out","SOFA", "apsiii",
    "Calcium", "Cre", "Glucose", "Mg", "Sodium", "BUN", "Plate", "RBC", "WBC")

col_factors <- 
  c("ADMISSION_TYPE", "INSURANCE", "GENDER", "ethnicity_5", 
    "mortality30", "HOSPITAL_EXPIRE_FLAG",
    "rrt", "ventilation",
    "hypertension", "DM", "anemia", "Af", "coronary_atherosclerosis", 
    "venous_thrombus", "MI", "gastritis", "gastric_ulcer", "duodenal_ulcer",
    "gastric_bleeding", "aki", "septic_shock", "pneumonia",
    "LVEF_ctg",
    "RAA", "Diuritic", "inotrop", "anti_adrenal", "ca_blocker",
    "anticoag", "antiplate", "ppi")


df_survival_complete <- 
  df_PEO_total_complete |> 
  mutate(futime30 = ifelse(mortality30 == 1, deathday-admitday, 30)) |> 
  # filter(futime30 < -1)
  mutate(futime30 = ifelse(futime30 <=0, 1, futime30)) |> 
  select(c(col_continuous, col_factors, h2ras, futime30))
formula_1_complete = Surv(futime30, mortality30)~ h2ras
sfit_1 = surv_fit(formula_1_complete, data = df_survival_complete, conf.type = "log-log")
ggsurvplot(sfit_1,
           conf.int=TRUE, pval=T, pval.method = T,
           risk.table=TRUE,
           break.time.by = 5,
           break.y.by = 0.2,
           palette = "Set1",
           xlab = "Days",
           ylab = "Cum survival",
           legend.labs=c("Non-H2RA", "H2RA"),
           legend.title = "",
           risk.table.height=0.25,
           main = "before matching"
           )

#cox regression model with multiple imputation - model_1: unadjusted

# Cox回帰モデルの構築
model_1_complete <- with(data=df_survival_complete, 
                         exp=coxph(Surv(futime30, mortality30) ~ h2ras))
# Cox回帰モデルのサマリー取得
summary_result <- summary(model_1_complete)

# ハザード比 (HR) と信頼区間の計算
hr_results <- summary_result$coefficients
hr_results_df <- as.data.frame(hr_results)

# 行名をカラムに変換
hr_results_df$term <- rownames(hr_results_df)

# 正しい列名を使用して mutate() を実行
hr_results_df <- hr_results_df %>%
  mutate(HR = exp(coef),
         lower = exp(coef - 1.96 * `se(coef)`),
         upper = exp(coef + 1.96 * `se(coef)`)) %>%
  select(term, HR, lower, upper, p.value = `Pr(>|z|)`) %>%
  mutate_if(is.numeric, round, 3)

# CSVファイルに保存
write.csv(hr_results_df, "model_1_complete.csv")

# CSVファイルを読み込む
model_1_complete <- read.csv("model_1_complete.csv")
model_1_complete
##       X  term    HR lower upper p.value
## 1 h2ras h2ras 0.545 0.486 0.612       0

#model_2: adjusted, partial formula

cols_cph_2_complete <- 
  c("h2ras", 
    "age", "BMI", "GENDER", 
    "ADMISSION_TYPE", "INSURANCE", "ethnicity_5"
    )

formula_2_complete <-
cols_cph_2_complete |> 
  str_c(collapse = " + ") %>%
  str_c("Surv(futime30, mortality30) ~ ", .) %>%
  formula()
formula_2_complete
## Surv(futime30, mortality30) ~ h2ras + age + BMI + GENDER + ADMISSION_TYPE + 
##     INSURANCE + ethnicity_5
## <environment: 0x11a9f2a58>
model_2_complete <- 
  with(data=df_survival_complete, 
       exp=coxph(Surv(futime30, mortality30) ~ h2ras + age + BMI + 
                   GENDER + ADMISSION_TYPE + INSURANCE + ethnicity_5))
summary_result <- summary(model_2_complete)

# ハザード比 (HR) と信頼区間の計算
hr_results <- summary_result$coefficients
hr_results_df <- as.data.frame(hr_results)

# 行名をカラムに変換
hr_results_df$term <- rownames(hr_results_df)

# 正しい列名を使用して mutate() を実行
hr_results_df <- hr_results_df %>%
  mutate(HR = exp(coef),
         lower = exp(coef - 1.96 * `se(coef)`),
         upper = exp(coef + 1.96 * `se(coef)`)) %>%
  select(term, HR, lower, upper, p.value = `Pr(>|z|)`) %>%
  mutate_if(is.numeric, round, 3)

# CSVファイルに保存
write.csv(hr_results_df, "model_2_complete.csv")

# CSVファイルを読み込む
model_2_complete <- read.csv("model_2_complete.csv")
model_2_complete
##                          X                    term    HR lower upper p.value
## 1                    h2ras                   h2ras 0.631 0.561 0.709   0.000
## 2                      age                     age 1.024 1.019 1.030   0.000
## 3                      BMI                     BMI 0.986 0.978 0.995   0.001
## 4                   GENDER                  GENDER 1.052 0.942 1.174   0.371
## 5  ADMISSION_TYPEEMERGENCY ADMISSION_TYPEEMERGENCY 2.854 2.158 3.774   0.000
## 6     ADMISSION_TYPEURGENT    ADMISSION_TYPEURGENT 2.276 1.513 3.423   0.000
## 7        INSURANCEMedicare       INSURANCEMedicare 1.192 0.633 2.245   0.586
## 8         INSURANCEPrivate        INSURANCEPrivate 1.139 0.601 2.158   0.690
## 9        INSURANCESelf Pay       INSURANCESelf Pay 2.201 0.837 5.791   0.110
## 10        ethnicity_5black        ethnicity_5black 0.756 0.466 1.226   0.257
## 11     ethnicity_5hispanic     ethnicity_5hispanic 0.781 0.418 1.460   0.439
## 12        ethnicity_5other        ethnicity_5other 1.619 1.045 2.506   0.031
## 13        ethnicity_5white        ethnicity_5white 1.062 0.695 1.624   0.780

#model_3: adjusted, full formula

cols_cph_3_complete <- 
  c("h2ras", 
    "age", "BMI", "GENDER", 
    "ADMISSION_TYPE", "INSURANCE", "ethnicity_5",
    
    "resprate", "LVEF_ctg", "SOFA", "apsiii", "rrt", "ventilation",
    "urine_out", "Glucose", "Calcium", "Cre",  "Mg", "Sodium", "BUN",
    "Plate", "RBC", "WBC",
    "RAA", "Diuritic", "inotrop", "anti_adrenal", "ca_blocker",
    "anticoag", "antiplate", "ppi",
    
    "hypertension", "DM", "anemia", "Af", "coronary_atherosclerosis", 
    "venous_thrombus", "MI", "gastric_ulcer", "duodenal_ulcer",
    "gastric_bleeding", "aki", "septic_shock", "pneumonia"
    
    )

formula_3_complete <-
cols_cph_3_complete |> 
  str_c(collapse = " + ") %>%
  str_c("Surv(futime30, mortality30) ~ ", .) %>%
  formula()

formula_3_complete
## Surv(futime30, mortality30) ~ h2ras + age + BMI + GENDER + ADMISSION_TYPE + 
##     INSURANCE + ethnicity_5 + resprate + LVEF_ctg + SOFA + apsiii + 
##     rrt + ventilation + urine_out + Glucose + Calcium + Cre + 
##     Mg + Sodium + BUN + Plate + RBC + WBC + RAA + Diuritic + 
##     inotrop + anti_adrenal + ca_blocker + anticoag + antiplate + 
##     ppi + hypertension + DM + anemia + Af + coronary_atherosclerosis + 
##     venous_thrombus + MI + gastric_ulcer + duodenal_ulcer + gastric_bleeding + 
##     aki + septic_shock + pneumonia
## <environment: 0x17f708d68>
model_3_complete <- 
  with(data=df_survival_complete, 
       exp=coxph(Surv(futime30, mortality30) ~ h2ras + age + BMI + GENDER + ADMISSION_TYPE + 
                   INSURANCE + ethnicity_5 + RespRate + LVEF_ctg + SOFA + apsiii + 
                   rrt + ventilation + urine_out + Glucose + Calcium + Cre + 
                   Mg + Sodium + BUN + Plate + RBC + WBC + RAA + Diuritic + 
                   inotrop + anti_adrenal + ca_blocker + anticoag + antiplate + 
                   ppi + hypertension + DM + anemia + Af + coronary_atherosclerosis + 
                   venous_thrombus + MI + gastric_ulcer + duodenal_ulcer + gastric_bleeding +
                   aki + septic_shock + pneumonia))
# モデルのサマリー取得
summary_result <- summary(model_3_complete)

# ハザード比 (HR) と信頼区間の計算
hr_results <- summary_result$coefficients
hr_results_df <- as.data.frame(hr_results)

# 行名をカラムに変換
hr_results_df$term <- rownames(hr_results_df)

# 正しい列名を使用して mutate() を実行
hr_results_df <- hr_results_df %>%
  mutate(HR = exp(coef),
         lower = exp(coef - 1.96 * `se(coef)`),
         upper = exp(coef + 1.96 * `se(coef)`)) %>%
  select(term, HR, lower, upper, p.value = `Pr(>|z|)`) %>%
  mutate_if(is.numeric, round, 3)

# CSVファイルに保存
write.csv(hr_results_df, "model_3_complete.csv")

# CSVファイルを読み込む
model_3_complete <- read.csv("model_3_complete.csv")
model_3_complete
##                           X                     term    HR lower upper p.value
## 1                     h2ras                    h2ras 0.663 0.578 0.761   0.000
## 2                       age                      age 1.023 1.018 1.029   0.000
## 3                       BMI                      BMI 0.982 0.973 0.991   0.000
## 4                    GENDER                   GENDER 1.072 0.948 1.211   0.266
## 5   ADMISSION_TYPEEMERGENCY  ADMISSION_TYPEEMERGENCY 2.042 1.515 2.753   0.000
## 6      ADMISSION_TYPEURGENT     ADMISSION_TYPEURGENT 1.150 0.738 1.791   0.537
## 7         INSURANCEMedicare        INSURANCEMedicare 1.061 0.542 2.075   0.864
## 8          INSURANCEPrivate         INSURANCEPrivate 0.914 0.464 1.801   0.794
## 9         INSURANCESelf Pay        INSURANCESelf Pay 1.896 0.695 5.173   0.212
## 10         ethnicity_5black         ethnicity_5black 1.032 0.626 1.700   0.902
## 11      ethnicity_5hispanic      ethnicity_5hispanic 1.034 0.539 1.984   0.920
## 12         ethnicity_5other         ethnicity_5other 1.672 1.071 2.611   0.024
## 13         ethnicity_5white         ethnicity_5white 1.232 0.802 1.892   0.341
## 14                 RespRate                 RespRate 1.028 1.019 1.037   0.000
## 15         LVEF_ctgmoderate         LVEF_ctgmoderate 1.053 0.848 1.307   0.641
## 16           LVEF_ctgnormal           LVEF_ctgnormal 0.956 0.794 1.150   0.634
## 17           LVEF_ctgsevere           LVEF_ctgsevere 1.198 0.985 1.458   0.071
## 18                     SOFA                     SOFA 0.989 0.962 1.017   0.426
## 19                   apsiii                   apsiii 1.019 1.015 1.023   0.000
## 20                      rrt                      rrt 1.249 0.996 1.567   0.055
## 21              ventilation              ventilation 1.486 1.287 1.716   0.000
## 22                urine_out                urine_out 1.000 1.000 1.000   0.000
## 23                  Glucose                  Glucose 1.001 1.001 1.002   0.000
## 24                  Calcium                  Calcium 0.971 0.904 1.043   0.425
## 25                      Cre                      Cre 0.878 0.825 0.935   0.000
## 26                       Mg                       Mg 1.064 0.975 1.161   0.167
## 27                   Sodium                   Sodium 0.986 0.975 0.998   0.017
## 28                      BUN                      BUN 1.008 1.005 1.011   0.000
## 29                    Plate                    Plate 1.000 0.999 1.000   0.050
## 30                      RBC                      RBC 1.002 0.910 1.104   0.962
## 31                      WBC                      WBC 1.008 1.006 1.011   0.000
## 32                      RAA                      RAA 0.543 0.472 0.624   0.000
## 33                 Diuritic                 Diuritic 1.069 0.870 1.314   0.523
## 34                  inotrop                  inotrop 1.403 1.211 1.626   0.000
## 35             anti_adrenal             anti_adrenal 0.502 0.425 0.594   0.000
## 36               ca_blocker               ca_blocker 0.885 0.772 1.014   0.078
## 37                 anticoag                 anticoag 1.226 0.962 1.564   0.100
## 38                antiplate                antiplate 0.853 0.730 0.997   0.046
## 39                      ppi                      ppi 1.013 0.858 1.197   0.876
## 40             hypertension             hypertension 1.110 0.980 1.258   0.100
## 41                       DM                       DM 0.951 0.832 1.087   0.460
## 42                   anemia                   anemia 0.717 0.628 0.818   0.000
## 43                       Af                       Af 1.083 0.957 1.225   0.208
## 44 coronary_atherosclerosis coronary_atherosclerosis 0.869 0.758 0.997   0.045
## 45          venous_thrombus          venous_thrombus 0.743 0.501 1.103   0.141
## 46                       MI                       MI 1.189 1.010 1.398   0.037
## 47            gastric_ulcer            gastric_ulcer 0.814 0.392 1.690   0.581
## 48           duodenal_ulcer           duodenal_ulcer 0.895 0.479 1.672   0.728
## 49         gastric_bleeding         gastric_bleeding 0.950 0.750 1.205   0.674
## 50                      aki                      aki 1.346 1.176 1.541   0.000
## 51             septic_shock             septic_shock 1.170 0.980 1.397   0.082
## 52                pneumonia                pneumonia 1.171 1.029 1.332   0.017

#model_1_complete から model_3_completeまでの結果。

bind_rows(model_1_complete |> filter(term == "h2ras"),
          model_2_complete |> filter(term == "h2ras"),
          model_3_complete |> filter(term == "h2ras")) |> 
  mutate(model = c("model_1_complete", "model_2_complete", "model_3_complete"), .before=HR) |> 
  select(!X) |> 
  write_csv("unadjusted_models_complete.csv")

(unadjusted_models <- read_csv("unadjusted_models_complete.csv"))
## Rows: 3 Columns: 6
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): term, model
## dbl (4): HR, lower, upper, p.value
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## # A tibble: 3 × 6
##   term  model               HR lower upper p.value
##   <chr> <chr>            <dbl> <dbl> <dbl>   <dbl>
## 1 h2ras model_1_complete 0.545 0.486 0.612       0
## 2 h2ras model_2_complete 0.631 0.561 0.709       0
## 3 h2ras model_3_complete 0.663 0.578 0.761       0
df_survival_complete_PS<- na.omit(df_survival_complete)
df_survival_complete_PS |> count()
## # A tibble: 1 × 1
##       n
##   <int>
## 1  7668

#PS machiting

cols <- 
  c(
    "age", "BMI",
    "HeartRate","RespRate", "TempC", "SpO2",
    "urine_out","SOFA", "apsiii",
    "Calcium", "Cre", "Glucose", "Mg", "Sodium", "BUN", "Plate", "RBC", "WBC",
    "ADMISSION_TYPE", "INSURANCE", "GENDER", "ethnicity_5", 
    "rrt", "ventilation",
    "hypertension", "DM", "anemia", "Af", "coronary_atherosclerosis", 
    "venous_thrombus", "MI", "gastritis", "gastric_ulcer", "duodenal_ulcer",
    "gastric_bleeding", "aki", "septic_shock", "pneumonia",
    "LVEF_ctg",
    "RAA", "Diuritic", "inotrop", "anti_adrenal", "ca_blocker",
    "anticoag", "antiplate", "ppi")

ps_f <-
cols %>%
  str_c(collapse = " + ") %>%
  str_c("h2ras ~ ", .) %>%
  formula()
ps_f
## h2ras ~ age + BMI + HeartRate + RespRate + TempC + SpO2 + urine_out + 
##     SOFA + apsiii + Calcium + Cre + Glucose + Mg + Sodium + BUN + 
##     Plate + RBC + WBC + ADMISSION_TYPE + INSURANCE + GENDER + 
##     ethnicity_5 + rrt + ventilation + hypertension + DM + anemia + 
##     Af + coronary_atherosclerosis + venous_thrombus + MI + gastritis + 
##     gastric_ulcer + duodenal_ulcer + gastric_bleeding + aki + 
##     septic_shock + pneumonia + LVEF_ctg + RAA + Diuritic + inotrop + 
##     anti_adrenal + ca_blocker + anticoag + antiplate + ppi
## <environment: 0x12d9b7108>
m_near_complete <- 
  matchit(
    formula= ps_f,
    data = df_survival_complete_PS,
    method = "nearest", 
    distance = "glm", 
    caliper = 0.2, 
    ratio = 1, 
    replace = F)

matched_data_complete <- MatchIt::match.data(m_near_complete)
matched_data_complete <- 
  matched_data_complete |> 
  mutate(h2ras = as.numeric(as.character(h2ras)))
cols <- 
  c(
    "age", "BMI",
    "HeartRate","RespRate", "TempC", "SpO2",
    "urine_out","SOFA", "apsiii",
    "Calcium", "Cre", "Glucose", "Mg", "Sodium", "BUN", "Plate", "RBC", "WBC",
    "ADMISSION_TYPE", "INSURANCE", "GENDER", "ethnicity_5", 
    "rrt", "ventilation",
    "hypertension", "DM", "anemia", "Af", "coronary_atherosclerosis", 
    "venous_thrombus", "MI", "gastritis", "gastric_ulcer", "duodenal_ulcer",
    "gastric_bleeding", "aki", "septic_shock", "pneumonia",
    "LVEF_ctg",
    "RAA", "Diuritic", "inotrop", "anti_adrenal", "ca_blocker",
    "anticoag", "antiplate", "ppi")

col_factors <- 
  c("ADMISSION_TYPE", "INSURANCE", "GENDER", "ethnicity_5", 
    "mortality30", "HOSPITAL_EXPIRE_FLAG",
    "rrt", "ventilation",
    "hypertension", "DM", "anemia", "Af", "coronary_atherosclerosis", 
    "venous_thrombus", "MI", "gastritis", "gastric_ulcer", "duodenal_ulcer",
    "gastric_bleeding", "aki", "septic_shock", "pneumonia",
    "LVEF_ctg",
    "RAA", "Diuritic", "inotrop", "anti_adrenal", "ca_blocker",
    "anticoag", "antiplate", "ppi")
tableone<-
matched_data_complete |> select(c(col_continuous, col_factors, "h2ras"))|>
  CreateTableOne(vars = c(col_continuous, col_factors), strata = "h2ras", factorVars = col_factors) 

print(tableone, smd = T, missing = T, test = F, explain = F) |> 
  as.data.frame() |> 
  mutate(SMD = ifelse(SMD == "<0.001", 0.001, SMD)) |> 
  write.csv("matched_complete.csv")
##                               Stratified by h2ras
##                                0                 1                 SMD   
##   n                               2513              2513                 
##   age                            72.13 (14.47)     71.78 (14.12)    0.025
##   Height                        168.12 (11.16)    167.92 (10.90)    0.019
##   weight                         82.76 (26.22)     82.33 (25.33)    0.017
##   BMI                            29.16 (8.48)      29.11 (8.29)     0.005
##   HeartRate                      88.90 (20.88)     88.78 (19.79)    0.006
##   SysBP                         122.08 (25.10)    122.27 (25.19)    0.008
##   DiasBP                         63.30 (17.53)     62.91 (16.89)    0.023
##   RespRate                       19.72 (5.98)      19.39 (6.48)     0.053
##   TempC                          36.54 (0.99)      36.53 (0.97)     0.012
##   SpO2                           96.63 (4.77)      96.70 (5.07)     0.014
##   urine_out                    1816.68 (1259.76) 1813.71 (1268.90)  0.002
##   SOFA                            4.87 (3.10)       5.03 (3.14)     0.051
##   apsiii                         49.02 (19.12)     49.10 (19.77)    0.004
##   Calcium                         8.53 (0.81)       8.52 (0.81)     0.013
##   Cre                             1.63 (1.47)       1.61 (1.42)     0.013
##   Glucose                       142.82 (69.59)    141.97 (71.96)    0.012
##   Mg                              2.06 (0.78)       2.07 (0.40)     0.020
##   Sodium                        138.46 (4.75)     138.54 (4.72)     0.017
##   BUN                            33.53 (22.66)     33.02 (22.42)    0.023
##   Plate                         234.57 (118.81)   235.91 (122.03)   0.011
##   RBC                             3.66 (0.71)       3.65 (0.68)     0.019
##   WBC                            11.61 (12.28)     11.59 (6.63)     0.002
##   ADMISSION_TYPE                                                    0.060
##      ELECTIVE                      212 ( 8.4)        255 (10.1)          
##      EMERGENCY                    2241 (89.2)       2194 (87.3)          
##      URGENT                         60 ( 2.4)         64 ( 2.5)          
##   INSURANCE                                                         0.011
##      Government                     38 ( 1.5)         37 ( 1.5)          
##      Medicare                     1946 (77.4)       1954 (77.8)          
##      Private                       519 (20.7)        511 (20.3)          
##      Self Pay                       10 ( 0.4)         11 ( 0.4)          
##   GENDER = 1                      1351 (53.8)       1373 (54.6)     0.018
##   ethnicity_5                                                       0.021
##      asian                          51 ( 2.0)         54 ( 2.1)          
##      black                         208 ( 8.3)        217 ( 8.6)          
##      hispanic                       73 ( 2.9)         71 ( 2.8)          
##      other                         300 (11.9)        309 (12.3)          
##      white                        1881 (74.9)       1862 (74.1)          
##   mortality30 = 1                  438 (17.4)        323 (12.9)     0.128
##   HOSPITAL_EXPIRE_FLAG = 1         384 (15.3)        297 (11.8)     0.101
##   rrt = 1                           90 ( 3.6)        100 ( 4.0)     0.021
##   ventilation = 1                 1308 (52.0)       1443 (57.4)     0.108
##   hypertension = 1                1080 (43.0)       1089 (43.3)     0.007
##   DM = 1                           917 (36.5)        917 (36.5)    <0.001
##   anemia = 1                       872 (34.7)        840 (33.4)     0.027
##   Af = 1                          1115 (44.4)       1138 (45.3)     0.018
##   coronary_atherosclerosis = 1    1048 (41.7)       1085 (43.2)     0.030
##   venous_thrombus = 1               52 ( 2.1)         54 ( 2.1)     0.006
##   MI = 1                           426 (17.0)        432 (17.2)     0.006
##   gastritis = 1                     40 ( 1.6)         44 ( 1.8)     0.012
##   gastric_ulcer = 1                 16 ( 0.6)         16 ( 0.6)    <0.001
##   duodenal_ulcer = 1                17 ( 0.7)         16 ( 0.6)     0.005
##   gastric_bleeding = 1             138 ( 5.5)        137 ( 5.5)     0.002
##   aki = 1                         1017 (40.5)       1007 (40.1)     0.008
##   septic_shock = 1                 212 ( 8.4)        194 ( 7.7)     0.026
##   pneumonia = 1                    624 (24.8)        620 (24.7)     0.004
##   LVEF_ctg                                                          0.028
##      mild                          382 (15.2)        401 (16.0)          
##      moderate                      437 (17.4)        416 (16.6)          
##      normal                       1109 (44.1)       1107 (44.1)          
##      severe                        585 (23.3)        589 (23.4)          
##   RAA = 1                         1547 (61.6)       1571 (62.5)     0.020
##   Diuritic = 1                    2303 (91.6)       2314 (92.1)     0.016
##   inotrop = 1                     1140 (45.4)       1220 (48.5)     0.064
##   anti_adrenal = 1                2299 (91.5)       2296 (91.4)     0.004
##   ca_blocker = 1                   953 (37.9)        976 (38.8)     0.019
##   anticoag = 1                    2403 (95.6)       2368 (94.2)     0.063
##   antiplate = 1                   2015 (80.2)       2043 (81.3)     0.028
##   ppi = 1                         1962 (78.1)       1930 (76.8)     0.030
##                               Stratified by h2ras
##                                Missing
##   n                                   
##   age                          0.0    
##   Height                       0.0    
##   weight                       0.0    
##   BMI                          0.0    
##   HeartRate                    0.0    
##   SysBP                        0.0    
##   DiasBP                       0.0    
##   RespRate                     0.0    
##   TempC                        0.0    
##   SpO2                         0.0    
##   urine_out                    0.0    
##   SOFA                         0.0    
##   apsiii                       0.0    
##   Calcium                      0.0    
##   Cre                          0.0    
##   Glucose                      0.0    
##   Mg                           0.0    
##   Sodium                       0.0    
##   BUN                          0.0    
##   Plate                        0.0    
##   RBC                          0.0    
##   WBC                          0.0    
##   ADMISSION_TYPE               0.0    
##      ELECTIVE                         
##      EMERGENCY                        
##      URGENT                           
##   INSURANCE                    0.0    
##      Government                       
##      Medicare                         
##      Private                          
##      Self Pay                         
##   GENDER = 1                   0.0    
##   ethnicity_5                  0.0    
##      asian                            
##      black                            
##      hispanic                         
##      other                            
##      white                            
##   mortality30 = 1              0.0    
##   HOSPITAL_EXPIRE_FLAG = 1     0.0    
##   rrt = 1                      0.0    
##   ventilation = 1              0.0    
##   hypertension = 1             0.0    
##   DM = 1                       0.0    
##   anemia = 1                   0.0    
##   Af = 1                       0.0    
##   coronary_atherosclerosis = 1 0.0    
##   venous_thrombus = 1          0.0    
##   MI = 1                       0.0    
##   gastritis = 1                0.0    
##   gastric_ulcer = 1            0.0    
##   duodenal_ulcer = 1           0.0    
##   gastric_bleeding = 1         0.0    
##   aki = 1                      0.0    
##   septic_shock = 1             0.0    
##   pneumonia = 1                0.0    
##   LVEF_ctg                     0.0    
##      mild                             
##      moderate                         
##      normal                           
##      severe                           
##   RAA = 1                      0.0    
##   Diuritic = 1                 0.0    
##   inotrop = 1                  0.0    
##   anti_adrenal = 1             0.0    
##   ca_blocker = 1               0.0    
##   anticoag = 1                 0.0    
##   antiplate = 1                0.0    
##   ppi = 1                      0.0
df_matched_table_complete <- read.csv("matched_complete.csv", 
                             col.names = c("name", "non_user", "user", "smd", "missing"))
df_matched_table_complete
##                            name          non_user              user   smd
## 1                             n              2513              2513    NA
## 2                           age     72.13 (14.47)     71.78 (14.12) 0.025
## 3                        Height    168.12 (11.16)    167.92 (10.90) 0.019
## 4                        weight     82.76 (26.22)     82.33 (25.33) 0.017
## 5                           BMI      29.16 (8.48)      29.11 (8.29) 0.005
## 6                     HeartRate     88.90 (20.88)     88.78 (19.79) 0.006
## 7                         SysBP    122.08 (25.10)    122.27 (25.19) 0.008
## 8                        DiasBP     63.30 (17.53)     62.91 (16.89) 0.023
## 9                      RespRate      19.72 (5.98)      19.39 (6.48) 0.053
## 10                        TempC      36.54 (0.99)      36.53 (0.97) 0.012
## 11                         SpO2      96.63 (4.77)      96.70 (5.07) 0.014
## 12                    urine_out 1816.68 (1259.76) 1813.71 (1268.90) 0.002
## 13                         SOFA       4.87 (3.10)       5.03 (3.14) 0.051
## 14                       apsiii     49.02 (19.12)     49.10 (19.77) 0.004
## 15                      Calcium       8.53 (0.81)       8.52 (0.81) 0.013
## 16                          Cre       1.63 (1.47)       1.61 (1.42) 0.013
## 17                      Glucose    142.82 (69.59)    141.97 (71.96) 0.012
## 18                           Mg       2.06 (0.78)       2.07 (0.40) 0.020
## 19                       Sodium     138.46 (4.75)     138.54 (4.72) 0.017
## 20                          BUN     33.53 (22.66)     33.02 (22.42) 0.023
## 21                        Plate   234.57 (118.81)   235.91 (122.03) 0.011
## 22                          RBC       3.66 (0.71)       3.65 (0.68) 0.019
## 23                          WBC     11.61 (12.28)      11.59 (6.63) 0.002
## 24               ADMISSION_TYPE                                     0.060
## 25                     ELECTIVE       212 ( 8.4)        255 (10.1)     NA
## 26                    EMERGENCY      2241 (89.2)       2194 (87.3)     NA
## 27                       URGENT        60 ( 2.4)         64 ( 2.5)     NA
## 28                    INSURANCE                                     0.011
## 29                   Government        38 ( 1.5)         37 ( 1.5)     NA
## 30                     Medicare      1946 (77.4)       1954 (77.8)     NA
## 31                      Private       519 (20.7)        511 (20.3)     NA
## 32                     Self Pay        10 ( 0.4)         11 ( 0.4)     NA
## 33                   GENDER = 1      1351 (53.8)       1373 (54.6)  0.018
## 34                  ethnicity_5                                     0.021
## 35                        asian        51 ( 2.0)         54 ( 2.1)     NA
## 36                        black       208 ( 8.3)        217 ( 8.6)     NA
## 37                     hispanic        73 ( 2.9)         71 ( 2.8)     NA
## 38                        other       300 (11.9)        309 (12.3)     NA
## 39                        white      1881 (74.9)       1862 (74.1)     NA
## 40              mortality30 = 1       438 (17.4)        323 (12.9)  0.128
## 41     HOSPITAL_EXPIRE_FLAG = 1       384 (15.3)        297 (11.8)  0.101
## 42                      rrt = 1        90 ( 3.6)        100 ( 4.0)  0.021
## 43              ventilation = 1      1308 (52.0)       1443 (57.4)  0.108
## 44             hypertension = 1      1080 (43.0)       1089 (43.3)  0.007
## 45                       DM = 1       917 (36.5)        917 (36.5)  0.001
## 46                   anemia = 1       872 (34.7)        840 (33.4)  0.027
## 47                       Af = 1      1115 (44.4)       1138 (45.3)  0.018
## 48 coronary_atherosclerosis = 1      1048 (41.7)       1085 (43.2)  0.030
## 49          venous_thrombus = 1        52 ( 2.1)         54 ( 2.1)  0.006
## 50                       MI = 1       426 (17.0)        432 (17.2)  0.006
## 51                gastritis = 1        40 ( 1.6)         44 ( 1.8)  0.012
## 52            gastric_ulcer = 1        16 ( 0.6)         16 ( 0.6)  0.001
## 53           duodenal_ulcer = 1        17 ( 0.7)         16 ( 0.6)  0.005
## 54         gastric_bleeding = 1       138 ( 5.5)        137 ( 5.5)  0.002
## 55                      aki = 1      1017 (40.5)       1007 (40.1)  0.008
## 56             septic_shock = 1       212 ( 8.4)        194 ( 7.7)  0.026
## 57                pneumonia = 1       624 (24.8)        620 (24.7)  0.004
## 58                     LVEF_ctg                                     0.028
## 59                         mild       382 (15.2)        401 (16.0)     NA
## 60                     moderate       437 (17.4)        416 (16.6)     NA
## 61                       normal      1109 (44.1)       1107 (44.1)     NA
## 62                       severe       585 (23.3)        589 (23.4)     NA
## 63                      RAA = 1      1547 (61.6)       1571 (62.5)  0.020
## 64                 Diuritic = 1      2303 (91.6)       2314 (92.1)  0.016
## 65                  inotrop = 1      1140 (45.4)       1220 (48.5)  0.064
## 66             anti_adrenal = 1      2299 (91.5)       2296 (91.4)  0.004
## 67               ca_blocker = 1       953 (37.9)        976 (38.8)  0.019
## 68                 anticoag = 1      2403 (95.6)       2368 (94.2)  0.063
## 69                antiplate = 1      2015 (80.2)       2043 (81.3)  0.028
## 70                      ppi = 1      1962 (78.1)       1930 (76.8)  0.030
##    missing
## 1       NA
## 2        0
## 3        0
## 4        0
## 5        0
## 6        0
## 7        0
## 8        0
## 9        0
## 10       0
## 11       0
## 12       0
## 13       0
## 14       0
## 15       0
## 16       0
## 17       0
## 18       0
## 19       0
## 20       0
## 21       0
## 22       0
## 23       0
## 24       0
## 25      NA
## 26      NA
## 27      NA
## 28       0
## 29      NA
## 30      NA
## 31      NA
## 32      NA
## 33       0
## 34       0
## 35      NA
## 36      NA
## 37      NA
## 38      NA
## 39      NA
## 40       0
## 41       0
## 42       0
## 43       0
## 44       0
## 45       0
## 46       0
## 47       0
## 48       0
## 49       0
## 50       0
## 51       0
## 52       0
## 53       0
## 54       0
## 55       0
## 56       0
## 57       0
## 58       0
## 59      NA
## 60      NA
## 61      NA
## 62      NA
## 63       0
## 64       0
## 65       0
## 66       0
## 67       0
## 68       0
## 69       0
## 70       0
df_love_plot_complete <- 
  df_unmatched_table |> select(name, smd) |> 
  rename(unmatched = smd) |> 
  left_join(df_matched_table_complete, by="name") |> 
  rename(matched = smd) |> 
  select(name, unmatched, matched) |> 
  filter(!is.na(matched)) |> 
  mutate(name = str_remove(name, pattern = " =.*")) |> 
  arrange(unmatched)

order <- df_love_plot_complete |> pull(name)
df_love_plot_complete <-
  df_love_plot_complete |> 
  mutate(name = factor(name, levels = order))

df_love_plot_complete
##                        name unmatched matched
## 1                       WBC     0.004   0.002
## 2                    Sodium     0.005   0.017
## 3                    DiasBP     0.008   0.023
## 4                 HeartRate     0.012   0.006
## 5           venous_thrombus     0.013   0.006
## 6                        MI     0.019   0.006
## 7                        DM     0.024   0.001
## 8                       rrt     0.030   0.021
## 9                       BMI     0.033   0.005
## 10                   anemia     0.036   0.027
## 11                   Height     0.042   0.019
## 12                pneumonia     0.042   0.004
## 13                urine_out     0.046   0.002
## 14             septic_shock     0.046   0.026
## 15                    SysBP     0.048   0.008
## 16                      RBC     0.050   0.019
## 17                gastritis     0.051   0.012
## 18                  Calcium     0.053   0.013
## 19                    TempC     0.056   0.012
## 20                       Af     0.064   0.018
## 21           duodenal_ulcer     0.068   0.005
## 22                    Plate     0.074   0.011
## 23            gastric_ulcer     0.075   0.001
## 24                   weight     0.076   0.017
## 25                  Glucose     0.079   0.012
## 26              ethnicity_5     0.081   0.021
## 27                       Mg     0.090   0.020
## 28                 LVEF_ctg     0.104   0.028
## 29                   apsiii     0.107   0.004
## 30                      ppi     0.121   0.030
## 31                INSURANCE     0.122   0.011
## 32             hypertension     0.122   0.007
## 33                      Cre     0.125   0.013
## 34                      aki     0.132   0.008
## 35                     SpO2     0.143   0.014
## 36                     SOFA     0.147   0.051
## 37 coronary_atherosclerosis     0.162   0.030
## 38         gastric_bleeding     0.186   0.002
## 39               ca_blocker     0.196   0.019
## 40     HOSPITAL_EXPIRE_FLAG     0.207   0.101
## 41                      age     0.219   0.025
## 42                      BUN     0.229   0.023
## 43              mortality30     0.255   0.128
## 44                      RAA     0.261   0.020
## 45                 RespRate     0.270   0.053
## 46                 anticoag     0.341   0.063
## 47           ADMISSION_TYPE     0.353   0.060
## 48                  inotrop     0.440   0.064
## 49                antiplate     0.473   0.028
## 50              ventilation     0.484   0.108
## 51             anti_adrenal     0.489   0.004
## 52                 Diuritic     0.526   0.016
df_love_plot_complete |> 
  filter(!name %in% c("mortality30", "hospital_death")) |> 
  pivot_longer(cols = !name, names_to = "Method", values_to = "smd") |> 
  ggplot() +
  geom_point(aes(x = name, 
                 y = smd, 
                 color = Method)) + 
  geom_hline(yintercept = 0.1, color = "black", lty = 2) +
  labs(x = "variable",
       y = "SMD") +
  coord_flip() +
  theme_bw()

love.plot(m_near_complete,
          thresholds = 0.1,
          abs = TRUE,
          position = "top",
          shapes = c("circle", "triangle"),
          colors = c("red", "blue"))
## Warning: Standardized mean differences and raw mean differences are present in the same plot. 
## Use the `stars` argument to distinguish between them and appropriately label the x-axis.

bal.plot(m_near_complete,
         which = "both",
         type = "histogram", bins=30,
         mirror = TRUE) +
  labs(title = "Distributional balance for Propensity score",
       x = "Propensity score")
## No `var.name` was provided. Displaying balance for distance.

result_matched_complete<-
matched_data_complete |> select(h2ras, mortality30, HOSPITAL_EXPIRE_FLAG) |> 
  CreateTableOne(vars = c("mortality30"),
                 strata = "h2ras", 
                 factorVars = c("mortality30")) 

print(result_matched_complete) |> as.data.frame() |> 
  rename(H2RA = '1',
         Non_H2RA = '0') |> 
  write.csv("result_matched_complete.csv")
##                      Stratified by h2ras
##                       0            1            p      test
##   n                   2513         2513                    
##   mortality30 = 1 (%)  438 (17.4)   323 (12.9)  <0.001

#KM curve: 30day mortality after matching

matched_data_complete
## # A tibble: 5,026 × 58
##      age Height weight   BMI HeartRate SysBP DiasBP RespRate TempC  SpO2
##    <dbl>  <dbl>  <dbl> <dbl>     <dbl> <dbl>  <dbl>    <dbl> <dbl> <dbl>
##  1  72.0   188.  123.   34.8        61  102    45         20  37.2    98
##  2  95     170.   71.7  24.8        73  153    77         20  36.3    93
##  3  68.9   175.  106.   34.6        91  129    38         22  38.2    97
##  4  80.9   183.   98.2  29.4        71  104    56         12  35.6   100
##  5  41.8   173.   50    16.8       138   83    49         30  37.6    98
##  6  45.5   173.   98    32.9        91  142.   85.5       20  36.3    99
##  7  75.7   160.   94    36.7        93  102    58         12  36.3   100
##  8  61.4   180.  109.   33.5        96  112    72         25  36.9    93
##  9  95     165.   56.7  20.8        86  131    70         24  36.1    97
## 10  95     165.   68    24.9        82  146    70         15  35.6    98
## # ℹ 5,016 more rows
## # ℹ 48 more variables: urine_out <dbl>, SOFA <dbl>, apsiii <dbl>,
## #   Calcium <dbl>, Cre <dbl>, Glucose <dbl>, Mg <dbl>, Sodium <dbl>, BUN <dbl>,
## #   Plate <dbl>, RBC <dbl>, WBC <dbl>, ADMISSION_TYPE <chr>, INSURANCE <chr>,
## #   GENDER <dbl>, ethnicity_5 <chr>, mortality30 <dbl>,
## #   HOSPITAL_EXPIRE_FLAG <dbl>, rrt <dbl>, ventilation <dbl>,
## #   hypertension <dbl>, DM <dbl>, anemia <dbl>, Af <dbl>, …
df_survival_matched_complete <- 
  matched_data_complete |> 
  mutate(futime30 = ifelse(futime30 <=0, 1, futime30))
formula_1_complete2 = Surv(futime30, mortality30)~ h2ras
sfit_m1_complete2 = surv_fit(formula_1_complete2, data = df_survival_matched_complete, conf.type = "log-log")
ggsurvplot(sfit_m1_complete2,
           conf.int=TRUE, pval=T, pval.method = T,
           risk.table=TRUE,
           break.time.by = 5,
           break.y.by = 0.2,
           palette = "Set1",
           xlab = "Days",
           ylab = "Cum survival",
           legend.labs=c("Non-H2AR", "H2AR"),
           legend.title = "",
           risk.table.height=0.25,
           main = "before matching"
           )

#cox regression model - model_1_complete

cph_m1_complete <- coxph(formula_1_complete2, data = df_survival_matched_complete)
res_m1_complete <- summary(cph_m1_complete)

res_model_m1_complete <- 
  res_m1_complete$coefficients |>  as.data.frame() |>
  rename(HR = "exp(coef)",
         se = "se(coef)",
         P = "Pr(>|z|)") |> 
  mutate(lower = exp(coef - 1.96*se),
         upper = exp(coef + 1.96*se)) |> 
  select(HR, lower, upper, P) |>
  mutate_if(is.numeric, round, 3) |> 
  write.csv("model_m1_complete.csv")
  
model_m1_complete <- read.csv("model_m1_complete.csv")
model_m1_complete
##       X    HR lower upper P
## 1 h2ras 0.714 0.618 0.824 0

#model_2_complete

cols_cph_2_complete2 <- 
  c("h2ras", 
    "age", "BMI", "GENDER", 
    "ADMISSION_TYPE", "INSURANCE", "ethnicity_5"
    )

formula_2_complete2 <-
cols_cph_2_complete2 |> 
  str_c(collapse = " + ") %>%
  str_c("Surv(futime30, mortality30) ~ ", .) %>%
  formula()
formula_2_complete2
## Surv(futime30, mortality30) ~ h2ras + age + BMI + GENDER + ADMISSION_TYPE + 
##     INSURANCE + ethnicity_5
## <environment: 0x113b54008>
cph_m2_complete <- coxph(formula_2_complete2, data = df_survival_matched_complete)
res_m2_complete <- summary(cph_m2_complete)

res_model_m2_complete <- 
  res_m2_complete$coefficients |>  as.data.frame() |>
  rename(HR = "exp(coef)",
         se = "se(coef)",
         P = "Pr(>|z|)") |> 
  mutate(lower = exp(coef - 1.96*se),
         upper = exp(coef + 1.96*se)) |> 
  select(HR, lower, upper, P) |> 
  mutate_if(is.numeric, round, 3) |> 
  write.csv("model_m2_complete.csv")
  
model_m2_complete <- read.csv("model_m2_complete.csv")
model_m2_complete
##                          X    HR lower upper     P
## 1                    h2ras 0.725 0.628 0.838 0.000
## 2                      age 1.025 1.018 1.032 0.000
## 3                      BMI 0.991 0.981 1.002 0.108
## 4                   GENDER 0.987 0.854 1.141 0.861
## 5  ADMISSION_TYPEEMERGENCY 2.104 1.504 2.941 0.000
## 6     ADMISSION_TYPEURGENT 1.542 0.817 2.911 0.182
## 7        INSURANCEMedicare 1.213 0.534 2.755 0.645
## 8         INSURANCEPrivate 1.115 0.487 2.551 0.796
## 9        INSURANCESelf Pay 2.155 0.606 7.669 0.236
## 10        ethnicity_5black 0.601 0.344 1.051 0.074
## 11     ethnicity_5hispanic 0.650 0.322 1.311 0.229
## 12        ethnicity_5other 1.204 0.732 1.980 0.465
## 13        ethnicity_5white 0.837 0.522 1.343 0.461

#model_3_complete

cols_cph_3_complete2 <- 
  c("h2ras", 
    "age", "BMI", "GENDER", 
    "ADMISSION_TYPE", "INSURANCE", "ethnicity_5",
    "RespRate", "LVEF_ctg", "SOFA", "apsiii", "rrt", "ventilation",
    "urine_out", "Glucose", "Calcium", "Cre",  "Mg", "Sodium", "BUN",
    "Plate", "RBC", "WBC",
    "RAA", "Diuritic", "inotrop", "anti_adrenal", "ca_blocker",
    "anticoag", "antiplate", "ppi",
    
    "hypertension", "DM", "anemia", "Af", "coronary_atherosclerosis", 
    "venous_thrombus", "MI", "gastric_ulcer", "duodenal_ulcer",
    "gastric_bleeding", "aki", "septic_shock", "pneumonia"
    
    )

formula_3_complete2 <-
cols_cph_3_complete2 |> 
  str_c(collapse = " + ") %>%
  str_c("Surv(futime30, mortality30) ~ ", .) %>%
  formula()
formula_3_complete2
## Surv(futime30, mortality30) ~ h2ras + age + BMI + GENDER + ADMISSION_TYPE + 
##     INSURANCE + ethnicity_5 + RespRate + LVEF_ctg + SOFA + apsiii + 
##     rrt + ventilation + urine_out + Glucose + Calcium + Cre + 
##     Mg + Sodium + BUN + Plate + RBC + WBC + RAA + Diuritic + 
##     inotrop + anti_adrenal + ca_blocker + anticoag + antiplate + 
##     ppi + hypertension + DM + anemia + Af + coronary_atherosclerosis + 
##     venous_thrombus + MI + gastric_ulcer + duodenal_ulcer + gastric_bleeding + 
##     aki + septic_shock + pneumonia
## <environment: 0x11a634f28>
cph_m3_complete <- coxph(formula_3_complete2, data = df_survival_matched_complete) 
res_m3_complete <- summary(cph_m3_complete)

res_model_m3_complete <- 
  res_m3_complete$coefficients |>  as.data.frame() |>
  rename(HR = "exp(coef)",
         se = "se(coef)",
         P = "Pr(>|z|)") |> 
  mutate(lower = exp(coef - 1.96*se),
         upper = exp(coef + 1.96*se)) |> 
  select(HR, lower, upper, P) |> 
  mutate_if(is.numeric, round, 3) |> 
  write.csv("model_m3_complete.csv")
  
model_m3_complete <- read.csv("model_m3_complete.csv")
model_m3_complete
##                           X    HR lower upper     P
## 1                     h2ras 0.662 0.572 0.766 0.000
## 2                       age 1.027 1.020 1.035 0.000
## 3                       BMI 0.985 0.975 0.996 0.007
## 4                    GENDER 1.053 0.904 1.227 0.504
## 5   ADMISSION_TYPEEMERGENCY 1.765 1.250 2.493 0.001
## 6      ADMISSION_TYPEURGENT 1.142 0.599 2.177 0.687
## 7         INSURANCEMedicare 1.148 0.501 2.632 0.744
## 8          INSURANCEPrivate 1.019 0.441 2.352 0.965
## 9         INSURANCESelf Pay 1.887 0.518 6.873 0.336
## 10         ethnicity_5black 0.995 0.562 1.762 0.987
## 11      ethnicity_5hispanic 0.938 0.458 1.921 0.861
## 12         ethnicity_5other 1.314 0.787 2.193 0.296
## 13         ethnicity_5white 0.998 0.614 1.622 0.993
## 14                 RespRate 1.021 1.010 1.033 0.000
## 15         LVEF_ctgmoderate 1.035 0.792 1.353 0.799
## 16           LVEF_ctgnormal 0.937 0.749 1.174 0.573
## 17           LVEF_ctgsevere 1.092 0.854 1.396 0.483
## 18                     SOFA 0.987 0.954 1.021 0.446
## 19                   apsiii 1.019 1.014 1.025 0.000
## 20                      rrt 1.252 0.953 1.644 0.106
## 21              ventilation 1.597 1.332 1.916 0.000
## 22                urine_out 1.000 1.000 1.000 0.000
## 23                  Glucose 1.001 1.001 1.002 0.002
## 24                  Calcium 0.997 0.912 1.089 0.944
## 25                      Cre 0.843 0.776 0.915 0.000
## 26                       Mg 1.043 0.948 1.148 0.387
## 27                   Sodium 0.984 0.970 0.999 0.034
## 28                      BUN 1.010 1.006 1.014 0.000
## 29                    Plate 1.000 0.999 1.000 0.170
## 30                      RBC 0.993 0.882 1.118 0.912
## 31                      WBC 1.008 1.005 1.011 0.000
## 32                      RAA 0.551 0.468 0.650 0.000
## 33                 Diuritic 0.805 0.608 1.065 0.129
## 34                  inotrop 1.413 1.182 1.689 0.000
## 35             anti_adrenal 0.445 0.360 0.549 0.000
## 36               ca_blocker 0.905 0.772 1.060 0.216
## 37                 anticoag 0.909 0.647 1.278 0.584
## 38                antiplate 0.796 0.660 0.961 0.017
## 39                      ppi 0.855 0.700 1.046 0.127
## 40             hypertension 1.081 0.929 1.257 0.315
## 41                       DM 0.897 0.761 1.058 0.197
## 42                   anemia 0.659 0.560 0.776 0.000
## 43                       Af 1.034 0.888 1.204 0.671
## 44 coronary_atherosclerosis 0.913 0.769 1.084 0.300
## 45          venous_thrombus 1.083 0.716 1.639 0.706
## 46                       MI 1.281 1.043 1.573 0.018
## 47            gastric_ulcer 0.584 0.183 1.863 0.364
## 48           duodenal_ulcer 1.178 0.533 2.601 0.685
## 49         gastric_bleeding 1.056 0.772 1.444 0.733
## 50                      aki 1.313 1.110 1.552 0.001
## 51             septic_shock 1.108 0.899 1.367 0.336
## 52                pneumonia 1.197 1.018 1.406 0.029
bind_rows(model_m1_complete |> filter(X == "h2ras"),
          model_m2_complete |> filter(X == "h2ras"),
          model_m3_complete |> filter(X == "h2ras")) |> 
  mutate(model = c("model_1", "model_2", "model_3"), .before=HR) |> 
  select(!X) |> 
  write_csv("adjusted_models_complete.csv")

(adjusted_models_complete <- read_csv("adjusted_models_complete.csv"))
## Rows: 3 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (1): model
## dbl (4): HR, lower, upper, P
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
## # A tibble: 3 × 5
##   model      HR lower upper     P
##   <chr>   <dbl> <dbl> <dbl> <dbl>
## 1 model_1 0.714 0.618 0.824     0
## 2 model_2 0.725 0.628 0.838     0
## 3 model_3 0.662 0.572 0.766     0