##パッケージのlibrary

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6      ✔ purrr   0.3.5 
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.2.1      ✔ stringr 1.4.1 
## ✔ readr   2.1.3      ✔ forcats 0.5.2 
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(RSQLite)
library(lubridate)
## 
##  次のパッケージを付け加えます: 'lubridate' 
## 
##  以下のオブジェクトは 'package:base' からマスクされています:
## 
##     date, intersect, setdiff, union
library(tableone)
library(skimr)
library(svglite)
library(haven)
library(summarytools)
## Warning in system2("/usr/bin/otool", c("-L", shQuote(DSO)), stdout = TRUE): 命令
## ''/usr/bin/otool' -L '/Library/Frameworks/R.framework/Resources/library/tcltk/
## libs//tcltk.so'' の実行は状態 1 を持ちました
## 
##  次のパッケージを付け加えます: 'summarytools' 
## 
##  以下のオブジェクトは 'package:tibble' からマスクされています:
## 
##     view
library(naniar)
## 
##  次のパッケージを付け加えます: 'naniar' 
## 
##  以下のオブジェクトは 'package:skimr' からマスクされています:
## 
##     n_complete
library(devtools)
##  要求されたパッケージ usethis をロード中です
library(reader)
##  要求されたパッケージ NCmisc をロード中です 
## 
##  次のパッケージを付け加えます: 'reader' 
## 
##  以下のオブジェクトは 'package:NCmisc' からマスクされています:
## 
##     cat.path, get.ext, rmv.ext
library(stringr)
library(missForest)

データベースの作成

con_mimic <- dbConnect(SQLite(),"/Users/ichitachikamasa/Desktop/RSQLite/temp/mimic.db")

データの確認(最初は何もない)

dbListTables(con_mimic)
##  [1] "admissions"         "callout"            "caregivers"        
##  [4] "chartevents"        "cptevents"          "d_cpt"             
##  [7] "d_icd_diagnoses"    "d_icd_procedures"   "d_items"           
## [10] "d_labitems"         "datetimeevents"     "diagnoses_icd"     
## [13] "drgcodes"           "icustays"           "inputevents_cv"    
## [16] "inputevents_mv"     "labevents"          "microbiologyevents"
## [19] "noteevents"         "outputevents"       "patients"          
## [22] "prescriptions"      "procedureevents_mv" "procedures_icd"    
## [25] "services"           "transfers"

ファイル名リストの抽出

csv_list <- list.files(path = "/Users/ichitachikamasa/Desktop/BicDatas/mimic_iii_clinical_database_1.4",
                       pattern = ".csv.gz$",
                       full.names = T)
csv_list
##  [1] "/Users/ichitachikamasa/Desktop/BicDatas/mimic_iii_clinical_database_1.4/ADMISSIONS.csv.gz"        
##  [2] "/Users/ichitachikamasa/Desktop/BicDatas/mimic_iii_clinical_database_1.4/CALLOUT.csv.gz"           
##  [3] "/Users/ichitachikamasa/Desktop/BicDatas/mimic_iii_clinical_database_1.4/CAREGIVERS.csv.gz"        
##  [4] "/Users/ichitachikamasa/Desktop/BicDatas/mimic_iii_clinical_database_1.4/CHARTEVENTS.csv.gz"       
##  [5] "/Users/ichitachikamasa/Desktop/BicDatas/mimic_iii_clinical_database_1.4/CPTEVENTS.csv.gz"         
##  [6] "/Users/ichitachikamasa/Desktop/BicDatas/mimic_iii_clinical_database_1.4/D_CPT.csv.gz"             
##  [7] "/Users/ichitachikamasa/Desktop/BicDatas/mimic_iii_clinical_database_1.4/D_ICD_DIAGNOSES.csv.gz"   
##  [8] "/Users/ichitachikamasa/Desktop/BicDatas/mimic_iii_clinical_database_1.4/D_ICD_PROCEDURES.csv.gz"  
##  [9] "/Users/ichitachikamasa/Desktop/BicDatas/mimic_iii_clinical_database_1.4/D_ITEMS.csv.gz"           
## [10] "/Users/ichitachikamasa/Desktop/BicDatas/mimic_iii_clinical_database_1.4/D_LABITEMS.csv.gz"        
## [11] "/Users/ichitachikamasa/Desktop/BicDatas/mimic_iii_clinical_database_1.4/DATETIMEEVENTS.csv.gz"    
## [12] "/Users/ichitachikamasa/Desktop/BicDatas/mimic_iii_clinical_database_1.4/DIAGNOSES_ICD.csv.gz"     
## [13] "/Users/ichitachikamasa/Desktop/BicDatas/mimic_iii_clinical_database_1.4/DRGCODES.csv.gz"          
## [14] "/Users/ichitachikamasa/Desktop/BicDatas/mimic_iii_clinical_database_1.4/ICUSTAYS.csv.gz"          
## [15] "/Users/ichitachikamasa/Desktop/BicDatas/mimic_iii_clinical_database_1.4/INPUTEVENTS_CV.csv.gz"    
## [16] "/Users/ichitachikamasa/Desktop/BicDatas/mimic_iii_clinical_database_1.4/INPUTEVENTS_MV.csv.gz"    
## [17] "/Users/ichitachikamasa/Desktop/BicDatas/mimic_iii_clinical_database_1.4/LABEVENTS.csv.gz"         
## [18] "/Users/ichitachikamasa/Desktop/BicDatas/mimic_iii_clinical_database_1.4/MICROBIOLOGYEVENTS.csv.gz"
## [19] "/Users/ichitachikamasa/Desktop/BicDatas/mimic_iii_clinical_database_1.4/NOTEEVENTS.csv.gz"        
## [20] "/Users/ichitachikamasa/Desktop/BicDatas/mimic_iii_clinical_database_1.4/OUTPUTEVENTS.csv.gz"      
## [21] "/Users/ichitachikamasa/Desktop/BicDatas/mimic_iii_clinical_database_1.4/PATIENTS.csv.gz"          
## [22] "/Users/ichitachikamasa/Desktop/BicDatas/mimic_iii_clinical_database_1.4/PRESCRIPTIONS.csv.gz"     
## [23] "/Users/ichitachikamasa/Desktop/BicDatas/mimic_iii_clinical_database_1.4/PROCEDUREEVENTS_MV.csv.gz"
## [24] "/Users/ichitachikamasa/Desktop/BicDatas/mimic_iii_clinical_database_1.4/PROCEDURES_ICD.csv.gz"    
## [25] "/Users/ichitachikamasa/Desktop/BicDatas/mimic_iii_clinical_database_1.4/SERVICES.csv.gz"          
## [26] "/Users/ichitachikamasa/Desktop/BicDatas/mimic_iii_clinical_database_1.4/TRANSFERS.csv.gz"
short_list <- list.files(path = "/Users/ichitachikamasa/Desktop/BicDatas/mimic_iii_clinical_database_1.4",
                       pattern = ".csv.gz$",
                       full.names = F)
name_list <- short_list |> str_remove_all(pattern = ".csv.gz")
name_list <- name_list |> str_to_lower()
name_list
##  [1] "admissions"         "callout"            "caregivers"        
##  [4] "chartevents"        "cptevents"          "d_cpt"             
##  [7] "d_icd_diagnoses"    "d_icd_procedures"   "d_items"           
## [10] "d_labitems"         "datetimeevents"     "diagnoses_icd"     
## [13] "drgcodes"           "icustays"           "inputevents_cv"    
## [16] "inputevents_mv"     "labevents"          "microbiologyevents"
## [19] "noteevents"         "outputevents"       "patients"          
## [22] "prescriptions"      "procedureevents_mv" "procedures_icd"    
## [25] "services"           "transfers"

各ファイルををDB上に登録する

一回読んだら二度と読まない!

append = TRUEなので、同じ処理がエラーにならずにデータ量が追加されていく。危険。

name_list名で各ファイルをDB上に登録する

#for (i in seq_along(name_list)) {
#callback <- function(df, pos) {
#   dbWriteTable(con_mimic, name_list[i], df, append = TRUE)
# }
# 
## デフォルトではチャンクサイズは 10,000 レコードずつ。
# read_csv_chunked(csv_list[i],
#                  DataFrameCallback$new(callback),
#                  chunk_size = 100000)
# }

データベース内に格納されているファイルの確認

dbListTables(con_mimic)
##  [1] "admissions"         "callout"            "caregivers"        
##  [4] "chartevents"        "cptevents"          "d_cpt"             
##  [7] "d_icd_diagnoses"    "d_icd_procedures"   "d_items"           
## [10] "d_labitems"         "datetimeevents"     "diagnoses_icd"     
## [13] "drgcodes"           "icustays"           "inputevents_cv"    
## [16] "inputevents_mv"     "labevents"          "microbiologyevents"
## [19] "noteevents"         "outputevents"       "patients"          
## [22] "prescriptions"      "procedureevents_mv" "procedures_icd"    
## [25] "services"           "transfers"

各データへのアクセスを作成

db_adm<- tbl(con_mimic, "admissions")

1.行数を数える

db_adm |> count()
## # Source:   SQL [1 x 1]
## # Database: sqlite 3.39.4 [/Users/ichitachikamasa/Desktop/RSQLite/temp/mimic.db]
##       n
##   <int>
## 1 58976

2.列名を表示する

db_adm |> colnames()
##  [1] "ROW_ID"               "SUBJECT_ID"           "HADM_ID"             
##  [4] "ADMITTIME"            "DISCHTIME"            "DEATHTIME"           
##  [7] "ADMISSION_TYPE"       "ADMISSION_LOCATION"   "DISCHARGE_LOCATION"  
## [10] "INSURANCE"            "LANGUAGE"             "RELIGION"            
## [13] "MARITAL_STATUS"       "ETHNICITY"            "EDREGTIME"           
## [16] "EDOUTTIME"            "DIAGNOSIS"            "HOSPITAL_EXPIRE_FLAG"
## [19] "HAS_CHARTEVENTS_DATA"

3.先頭の数行表示する

db_adm |> head(5)
## # Source:   SQL [5 x 19]
## # Database: sqlite 3.39.4 [/Users/ichitachikamasa/Desktop/RSQLite/temp/mimic.db]
##   ROW_ID SUBJE…¹ HADM_ID ADMIT…² DISCH…³ DEATH…⁴ ADMIS…⁵ ADMIS…⁶ DISCH…⁷ INSUR…⁸
##    <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl> <chr>   <chr>   <chr>   <chr>  
## 1     21      22  165315  7.14e9  7.14e9      NA EMERGE… EMERGE… DISC-T… Private
## 2     22      23  152223  5.80e9  5.80e9      NA ELECTI… PHYS R… HOME H… Medica…
## 3     23      23  124321  5.93e9  5.93e9      NA EMERGE… TRANSF… HOME H… Medica…
## 4     24      24  161859  5.35e9  5.35e9      NA EMERGE… TRANSF… HOME    Private
## 5     25      25  129635  6.02e9  6.02e9      NA EMERGE… EMERGE… HOME    Private
## # … with 9 more variables: LANGUAGE <chr>, RELIGION <chr>,
## #   MARITAL_STATUS <chr>, ETHNICITY <chr>, EDREGTIME <dbl>, EDOUTTIME <dbl>,
## #   DIAGNOSIS <chr>, HOSPITAL_EXPIRE_FLAG <dbl>, HAS_CHARTEVENTS_DATA <dbl>,
## #   and abbreviated variable names ¹​SUBJECT_ID, ²​ADMITTIME, ³​DISCHTIME,
## #   ⁴​DEATHTIME, ⁵​ADMISSION_TYPE, ⁶​ADMISSION_LOCATION, ⁷​DISCHARGE_LOCATION,
## #   ⁸​INSURANCE

4.特定のユニーク数を数える

db_adm |> distinct(SUBJECT_ID) |> count()
## # Source:   SQL [1 x 1]
## # Database: sqlite 3.39.4 [/Users/ichitachikamasa/Desktop/RSQLite/temp/mimic.db]
##       n
##   <int>
## 1 46520

5.処理した後のデータを一時的にmimic.dbに保存する

6.データをメモリに上げる

db_call<- tbl(con_mimic,"callout")
db_care<- tbl(con_mimic, "caregivers")
db_chart<- tbl(con_mimic, "chartevents")
db_cpt<- tbl(con_mimic, "cptevents")
master_cpt<- tbl(con_mimic, "d_cpt")
master_icd_d<- tbl(con_mimic, "d_icd_diagnoses")
master_icd_p<- tbl(con_mimic, "d_icd_procedures")
master_item<- tbl(con_mimic, "d_items")
master_lab<- tbl(con_mimic, "d_labitems")
db_datetime<- tbl(con_mimic, "datetimeevents")
db_diagnose<- tbl(con_mimic, "diagnoses_icd")
db_drg<- tbl(con_mimic, "drgcodes")
db_icustay<- tbl(con_mimic, "icustays")
db_inpcv<- tbl(con_mimic, "inputevents_cv")
db_inpmv<- tbl(con_mimic, "inputevents_mv")
db_lab<- tbl(con_mimic, "labevents")
db_micro<- tbl(con_mimic, "microbiologyevents")
db_noteeve<- tbl(con_mimic, "noteevents")
db_outputeve<- tbl(con_mimic, "outputevents")
db_patient<- tbl(con_mimic, "patients")
db_presc<- tbl(con_mimic, "prescriptions")
db_promv<- tbl(con_mimic, "procedureevents_mv")
db_proicd<- tbl(con_mimic, "procedures_icd")
db_serv<- tbl(con_mimic, "services")
db_trans<- tbl(con_mimic, "transfers")

distinct関数でuniqueなものを探す

db_adm %>% distinct(ROW_ID) %>% count()
## # Source:   SQL [1 x 1]
## # Database: sqlite 3.39.4 [/Users/ichitachikamasa/Desktop/RSQLite/temp/mimic.db]
##       n
##   <int>
## 1 58976
db_adm %>% distinct(ADMITTIME) %>% count()
## # Source:   SQL [1 x 1]
## # Database: sqlite 3.39.4 [/Users/ichitachikamasa/Desktop/RSQLite/temp/mimic.db]
##       n
##   <int>
## 1 58651
db_adm %>% distinct(HADM_ID) %>% count()
## # Source:   SQL [1 x 1]
## # Database: sqlite 3.39.4 [/Users/ichitachikamasa/Desktop/RSQLite/temp/mimic.db]
##       n
##   <int>
## 1 58976

なぜcharacterへ変換するのか?? mutate_at()

db_adm |> mutate_if(is.numeric, as.character) |> skim()
## Applying predicate on the first 100 rows
Data summary
Name mutate_if(db_adm, is.nume…
Number of rows 58976
Number of columns 19
_______________________
Column type frequency:
character 19
________________________
Group variables None

Variable type: character

skim_variable n_missing complete_rate min max empty n_unique whitespace
ROW_ID 0 1.00 3 7 0 58976 0
SUBJECT_ID 0 1.00 3 7 0 46520 0
HADM_ID 0 1.00 8 8 0 58976 0
ADMITTIME 0 1.00 12 12 0 58651 0
DISCHTIME 0 1.00 12 12 0 58657 0
DEATHTIME 53122 0.10 12 12 0 5834 0
ADMISSION_TYPE 0 1.00 6 9 0 4 0
ADMISSION_LOCATION 0 1.00 17 25 0 9 0
DISCHARGE_LOCATION 0 1.00 3 25 0 17 0
INSURANCE 0 1.00 7 10 0 5 0
LANGUAGE 25332 0.57 4 4 0 75 0
RELIGION 458 0.99 5 22 0 20 0
MARITAL_STATUS 10128 0.83 6 17 0 7 0
ETHNICITY 0 1.00 5 56 0 41 0
EDREGTIME 28099 0.52 12 12 0 30874 0
EDOUTTIME 28099 0.52 12 12 0 30864 0
DIAGNOSIS 25 1.00 2 190 0 15646 0
HOSPITAL_EXPIRE_FLAG 0 1.00 3 3 0 2 0
HAS_CHARTEVENTS_DATA 0 1.00 3 3 0 2 0

こちらで一覧にみればざっくりしたヒストグラムも見える。#skimが外観把握でよい。

view(dfSummary(db_adm))
## db_adm was converted to a data frame
## Switching method to 'browser'
## Output file written: /var/folders/n9/tf_wmwpn3gl2t7l1cz4tqk000000gn/T//RtmpnGKAwv/file6fa122cd75a.html

年齢の作成

年齢の情報の作成 欠測の情報の確認どうする?? まずはleft_joinしてadmにpatietを入れ込む

ds_age1<-left_join(db_adm,db_patient,by="SUBJECT_ID")
glimpse(ds_age1)
## Rows: ??
## Columns: 26
## Database: sqlite 3.39.4 [/Users/ichitachikamasa/Desktop/RSQLite/temp/mimic.db]
## $ ROW_ID.x             <dbl> 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 3…
## $ SUBJECT_ID           <dbl> 22, 23, 23, 24, 25, 26, 27, 28, 30, 31, 32, 33, 3…
## $ HADM_ID              <dbl> 165315, 152223, 124321, 161859, 129635, 197661, 1…
## $ ADMITTIME            <dbl> 7140486360, 5796170100, 5926332840, 5346663240, 6…
## $ DISCHTIME            <dbl> 7140585240, 5796645000, 5926917600, 5346910080, 6…
## $ DEATHTIME            <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 4375782000, N…
## $ ADMISSION_TYPE       <chr> "EMERGENCY", "ELECTIVE", "EMERGENCY", "EMERGENCY"…
## $ ADMISSION_LOCATION   <chr> "EMERGENCY ROOM ADMIT", "PHYS REFERRAL/NORMAL DEL…
## $ DISCHARGE_LOCATION   <chr> "DISC-TRAN CANCER/CHLDRN H", "HOME HEALTH CARE", …
## $ INSURANCE            <chr> "Private", "Medicare", "Medicare", "Private", "Pr…
## $ LANGUAGE             <chr> NA, NA, "ENGL", NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ RELIGION             <chr> "UNOBTAINABLE", "CATHOLIC", "CATHOLIC", "PROTESTA…
## $ MARITAL_STATUS       <chr> "MARRIED", "MARRIED", "MARRIED", "SINGLE", "MARRI…
## $ ETHNICITY            <chr> "WHITE", "WHITE", "WHITE", "WHITE", "WHITE", "UNK…
## $ EDREGTIME            <dbl> 7140477960, NA, NA, NA, 6022256460, NA, NA, NA, N…
## $ EDOUTTIME            <dbl> 7140489840, NA, NA, NA, 6022268820, NA, NA, NA, N…
## $ DIAGNOSIS            <chr> "BENZODIAZEPINE OVERDOSE", "CORONARY ARTERY DISEA…
## $ HOSPITAL_EXPIRE_FLAG <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0…
## $ HAS_CHARTEVENTS_DATA <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ ROW_ID.y             <dbl> 19, 20, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 3…
## $ GENDER               <chr> "F", "M", "M", "M", "M", "M", "F", "M", "M", "M",…
## $ DOB                  <dbl> 5091552000, 3551472000, 3551472000, 4115404800, 4…
## $ DOD                  <dbl> NA, NA, NA, NA, NA, 4990723200, NA, NA, NA, 43757…
## $ DOD_HOSP             <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 4375728000, N…
## $ DOD_SSN              <dbl> NA, NA, NA, NA, NA, 4990723200, NA, NA, NA, 43757…
## $ EXPIRE_FLAG          <dbl> 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0…
glimpse(ds_age1)
## Rows: ??
## Columns: 26
## Database: sqlite 3.39.4 [/Users/ichitachikamasa/Desktop/RSQLite/temp/mimic.db]
## $ ROW_ID.x             <dbl> 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 3…
## $ SUBJECT_ID           <dbl> 22, 23, 23, 24, 25, 26, 27, 28, 30, 31, 32, 33, 3…
## $ HADM_ID              <dbl> 165315, 152223, 124321, 161859, 129635, 197661, 1…
## $ ADMITTIME            <dbl> 7140486360, 5796170100, 5926332840, 5346663240, 6…
## $ DISCHTIME            <dbl> 7140585240, 5796645000, 5926917600, 5346910080, 6…
## $ DEATHTIME            <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 4375782000, N…
## $ ADMISSION_TYPE       <chr> "EMERGENCY", "ELECTIVE", "EMERGENCY", "EMERGENCY"…
## $ ADMISSION_LOCATION   <chr> "EMERGENCY ROOM ADMIT", "PHYS REFERRAL/NORMAL DEL…
## $ DISCHARGE_LOCATION   <chr> "DISC-TRAN CANCER/CHLDRN H", "HOME HEALTH CARE", …
## $ INSURANCE            <chr> "Private", "Medicare", "Medicare", "Private", "Pr…
## $ LANGUAGE             <chr> NA, NA, "ENGL", NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ RELIGION             <chr> "UNOBTAINABLE", "CATHOLIC", "CATHOLIC", "PROTESTA…
## $ MARITAL_STATUS       <chr> "MARRIED", "MARRIED", "MARRIED", "SINGLE", "MARRI…
## $ ETHNICITY            <chr> "WHITE", "WHITE", "WHITE", "WHITE", "WHITE", "UNK…
## $ EDREGTIME            <dbl> 7140477960, NA, NA, NA, 6022256460, NA, NA, NA, N…
## $ EDOUTTIME            <dbl> 7140489840, NA, NA, NA, 6022268820, NA, NA, NA, N…
## $ DIAGNOSIS            <chr> "BENZODIAZEPINE OVERDOSE", "CORONARY ARTERY DISEA…
## $ HOSPITAL_EXPIRE_FLAG <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0…
## $ HAS_CHARTEVENTS_DATA <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ ROW_ID.y             <dbl> 19, 20, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 3…
## $ GENDER               <chr> "F", "M", "M", "M", "M", "M", "F", "M", "M", "M",…
## $ DOB                  <dbl> 5091552000, 3551472000, 3551472000, 4115404800, 4…
## $ DOD                  <dbl> NA, NA, NA, NA, NA, 4990723200, NA, NA, NA, 43757…
## $ DOD_HOSP             <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 4375728000, N…
## $ DOD_SSN              <dbl> NA, NA, NA, NA, NA, 4990723200, NA, NA, NA, 43757…
## $ EXPIRE_FLAG          <dbl> 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0…

365.242でもよい。

ds_age <- ds_age1 %>% 
mutate(age=(ADMITTIME - DOB)/(60*60*24*365.25))  

年齢をfloorで切り捨てしたらダメ??

glimpse(ds_age)
## Rows: ??
## Columns: 27
## Database: sqlite 3.39.4 [/Users/ichitachikamasa/Desktop/RSQLite/temp/mimic.db]
## $ ROW_ID.x             <dbl> 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 3…
## $ SUBJECT_ID           <dbl> 22, 23, 23, 24, 25, 26, 27, 28, 30, 31, 32, 33, 3…
## $ HADM_ID              <dbl> 165315, 152223, 124321, 161859, 129635, 197661, 1…
## $ ADMITTIME            <dbl> 7140486360, 5796170100, 5926332840, 5346663240, 6…
## $ DISCHTIME            <dbl> 7140585240, 5796645000, 5926917600, 5346910080, 6…
## $ DEATHTIME            <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 4375782000, N…
## $ ADMISSION_TYPE       <chr> "EMERGENCY", "ELECTIVE", "EMERGENCY", "EMERGENCY"…
## $ ADMISSION_LOCATION   <chr> "EMERGENCY ROOM ADMIT", "PHYS REFERRAL/NORMAL DEL…
## $ DISCHARGE_LOCATION   <chr> "DISC-TRAN CANCER/CHLDRN H", "HOME HEALTH CARE", …
## $ INSURANCE            <chr> "Private", "Medicare", "Medicare", "Private", "Pr…
## $ LANGUAGE             <chr> NA, NA, "ENGL", NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ RELIGION             <chr> "UNOBTAINABLE", "CATHOLIC", "CATHOLIC", "PROTESTA…
## $ MARITAL_STATUS       <chr> "MARRIED", "MARRIED", "MARRIED", "SINGLE", "MARRI…
## $ ETHNICITY            <chr> "WHITE", "WHITE", "WHITE", "WHITE", "WHITE", "UNK…
## $ EDREGTIME            <dbl> 7140477960, NA, NA, NA, 6022256460, NA, NA, NA, N…
## $ EDOUTTIME            <dbl> 7140489840, NA, NA, NA, 6022268820, NA, NA, NA, N…
## $ DIAGNOSIS            <chr> "BENZODIAZEPINE OVERDOSE", "CORONARY ARTERY DISEA…
## $ HOSPITAL_EXPIRE_FLAG <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0…
## $ HAS_CHARTEVENTS_DATA <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ ROW_ID.y             <dbl> 19, 20, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 3…
## $ GENDER               <chr> "F", "M", "M", "M", "M", "M", "F", "M", "M", "M",…
## $ DOB                  <dbl> 5091552000, 3551472000, 3551472000, 4115404800, 4…
## $ DOD                  <dbl> NA, NA, NA, NA, NA, 4990723200, NA, NA, NA, 43757…
## $ DOD_HOSP             <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 4375728000, N…
## $ DOD_SSN              <dbl> NA, NA, NA, NA, NA, 4990723200, NA, NA, NA, 43757…
## $ EXPIRE_FLAG          <dbl> 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0…
## $ age                  <dbl> 6.492681e+01, 7.113019e+01, 7.525480e+01, 3.90162…
ds_age
## # Source:   SQL [?? x 27]
## # Database: sqlite 3.39.4 [/Users/ichitachikamasa/Desktop/RSQLite/temp/mimic.db]
##    ROW_ID.x SUBJECT_ID HADM_ID ADMITTIME DISCH…¹ DEATH…² ADMIS…³ ADMIS…⁴ DISCH…⁵
##       <dbl>      <dbl>   <dbl>     <dbl>   <dbl>   <dbl> <chr>   <chr>   <chr>  
##  1       21         22  165315    7.14e9  7.14e9 NA      EMERGE… EMERGE… DISC-T…
##  2       22         23  152223    5.80e9  5.80e9 NA      ELECTI… PHYS R… HOME H…
##  3       23         23  124321    5.93e9  5.93e9 NA      EMERGE… TRANSF… HOME H…
##  4       24         24  161859    5.35e9  5.35e9 NA      EMERGE… TRANSF… HOME   
##  5       25         25  129635    6.02e9  6.02e9 NA      EMERGE… EMERGE… HOME   
##  6       26         26  197661    4.93e9  4.93e9 NA      EMERGE… TRANSF… HOME   
##  7       27         27  134931    7.00e9  7.00e9 NA      NEWBORN PHYS R… HOME   
##  8       28         28  162569    6.55e9  6.55e9 NA      ELECTI… PHYS R… HOME H…
##  9       29         30  104557    6.40e9  6.40e9 NA      URGENT  TRANSF… HOME H…
## 10       30         31  128652    4.38e9  4.38e9  4.38e9 EMERGE… TRANSF… DEAD/E…
## # … with more rows, 18 more variables: INSURANCE <chr>, LANGUAGE <chr>,
## #   RELIGION <chr>, MARITAL_STATUS <chr>, ETHNICITY <chr>, EDREGTIME <dbl>,
## #   EDOUTTIME <dbl>, DIAGNOSIS <chr>, HOSPITAL_EXPIRE_FLAG <dbl>,
## #   HAS_CHARTEVENTS_DATA <dbl>, ROW_ID.y <dbl>, GENDER <chr>, DOB <dbl>,
## #   DOD <dbl>, DOD_HOSP <dbl>, DOD_SSN <dbl>, EXPIRE_FLAG <dbl>, age <dbl>, and
## #   abbreviated variable names ¹​DISCHTIME, ²​DEATHTIME, ³​ADMISSION_TYPE,
## #   ⁴​ADMISSION_LOCATION, ⁵​DISCHARGE_LOCATION

89歳以上は300歳  bin_width

ds_age %>% 
distinct(SUBJECT_ID, .keep_all = T) %>% 
ggplot(aes(x=age))+geom_histogram(color= "black")+theme_bw()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

こうやってもデータベースには格納されない、、、 一個ずつ作ったデータを新しい名前をつけて格納するしかないのか??

# ds_age1$BIRTH <- ds_age1 |> mutate(birth = DOB/(60*60*24))

70歳で層別してみる

ds_age %>% 
  mutate(age_70=ifelse(age>70,"over70","under70")) ->ds_age2
glimpse(ds_age2)
## Rows: ??
## Columns: 28
## Database: sqlite 3.39.4 [/Users/ichitachikamasa/Desktop/RSQLite/temp/mimic.db]
## $ ROW_ID.x             <dbl> 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 3…
## $ SUBJECT_ID           <dbl> 22, 23, 23, 24, 25, 26, 27, 28, 30, 31, 32, 33, 3…
## $ HADM_ID              <dbl> 165315, 152223, 124321, 161859, 129635, 197661, 1…
## $ ADMITTIME            <dbl> 7140486360, 5796170100, 5926332840, 5346663240, 6…
## $ DISCHTIME            <dbl> 7140585240, 5796645000, 5926917600, 5346910080, 6…
## $ DEATHTIME            <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 4375782000, N…
## $ ADMISSION_TYPE       <chr> "EMERGENCY", "ELECTIVE", "EMERGENCY", "EMERGENCY"…
## $ ADMISSION_LOCATION   <chr> "EMERGENCY ROOM ADMIT", "PHYS REFERRAL/NORMAL DEL…
## $ DISCHARGE_LOCATION   <chr> "DISC-TRAN CANCER/CHLDRN H", "HOME HEALTH CARE", …
## $ INSURANCE            <chr> "Private", "Medicare", "Medicare", "Private", "Pr…
## $ LANGUAGE             <chr> NA, NA, "ENGL", NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ RELIGION             <chr> "UNOBTAINABLE", "CATHOLIC", "CATHOLIC", "PROTESTA…
## $ MARITAL_STATUS       <chr> "MARRIED", "MARRIED", "MARRIED", "SINGLE", "MARRI…
## $ ETHNICITY            <chr> "WHITE", "WHITE", "WHITE", "WHITE", "WHITE", "UNK…
## $ EDREGTIME            <dbl> 7140477960, NA, NA, NA, 6022256460, NA, NA, NA, N…
## $ EDOUTTIME            <dbl> 7140489840, NA, NA, NA, 6022268820, NA, NA, NA, N…
## $ DIAGNOSIS            <chr> "BENZODIAZEPINE OVERDOSE", "CORONARY ARTERY DISEA…
## $ HOSPITAL_EXPIRE_FLAG <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0…
## $ HAS_CHARTEVENTS_DATA <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ ROW_ID.y             <dbl> 19, 20, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 3…
## $ GENDER               <chr> "F", "M", "M", "M", "M", "M", "F", "M", "M", "M",…
## $ DOB                  <dbl> 5091552000, 3551472000, 3551472000, 4115404800, 4…
## $ DOD                  <dbl> NA, NA, NA, NA, NA, 4990723200, NA, NA, NA, 43757…
## $ DOD_HOSP             <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 4375728000, N…
## $ DOD_SSN              <dbl> NA, NA, NA, NA, NA, 4990723200, NA, NA, NA, 43757…
## $ EXPIRE_FLAG          <dbl> 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0…
## $ age                  <dbl> 6.492681e+01, 7.113019e+01, 7.525480e+01, 3.90162…
## $ age_70               <chr> "under70", "over70", "over70", "under70", "under7…
ds_age2  |> 
collect() |>  #.dbには直前にcollect()しておかないとtableone使えない
  CreateTableOne(vars = "EXPIRE_FLAG", 
                 strata = c("GENDER", "age_70"), factorVars = "EXPIRE_FLAG",test=FALSE) ->table
print(table) |> as.data.frame()
##                      Stratified by GENDER:age_70
##                       F:over70      M:over70      F:under70     M:under70    
##   n                   10224         10476         15802         22474        
##   EXPIRE_FLAG = 1 (%)  6221 (60.8)   6391 (61.0)   4015 (25.4)   5959 (26.5)
##                          F:over70      M:over70     F:under70     M:under70
## n                           10224         10476         15802         22474
## EXPIRE_FLAG = 1 (%)  6221 (60.8)   6391 (61.0)   4015 (25.4)   5959 (26.5)

死亡割合は男女差なさそう。

——————————————————————————-

心不全のICD-9コードを確認する。ただ、これは論文から抽出とする

View(master_icd_d)
## Warning in system2("/usr/bin/otool", c("-L", shQuote(DSO)), stdout = TRUE): 命
## 令 ''/usr/bin/otool' -L '/Library/Frameworks/R.framework/Resources/modules/
## R_de.so'' の実行は状態 1 を持ちました
colnames(db_diagnose)
## [1] "ROW_ID"     "SUBJECT_ID" "HADM_ID"    "SEQ_NUM"    "ICD9_CODE"

ICU入室患者とdb_diagnoseをHADM_IDで結合する

count(db_icustay)
## # Source:   SQL [1 x 1]
## # Database: sqlite 3.39.4 [/Users/ichitachikamasa/Desktop/RSQLite/temp/mimic.db]
##       n
##   <int>
## 1 61532
db_ICU <- 
db_icustay %>% left_join(db_diagnose,by="HADM_ID")
db_ICU
## # Source:   SQL [?? x 16]
## # Database: sqlite 3.39.4 [/Users/ichitachikamasa/Desktop/RSQLite/temp/mimic.db]
##    ROW_ID.x SUBJECT_ID.x HADM_ID ICUST…¹ DBSOU…² FIRST…³ LAST_…⁴ FIRST…⁵ LAST_…⁶
##       <dbl>        <dbl>   <dbl>   <dbl> <chr>   <chr>   <chr>     <dbl>   <dbl>
##  1      365          268  110404  280836 carevue MICU    MICU         52      52
##  2      365          268  110404  280836 carevue MICU    MICU         52      52
##  3      365          268  110404  280836 carevue MICU    MICU         52      52
##  4      365          268  110404  280836 carevue MICU    MICU         52      52
##  5      365          268  110404  280836 carevue MICU    MICU         52      52
##  6      365          268  110404  280836 carevue MICU    MICU         52      52
##  7      365          268  110404  280836 carevue MICU    MICU         52      52
##  8      365          268  110404  280836 carevue MICU    MICU         52      52
##  9      365          268  110404  280836 carevue MICU    MICU         52      52
## 10      366          269  106296  206613 carevue MICU    MICU         52      52
## # … with more rows, 7 more variables: INTIME <dbl>, OUTTIME <dbl>, LOS <dbl>,
## #   ROW_ID.y <dbl>, SUBJECT_ID.y <dbl>, SEQ_NUM <dbl>, ICD9_CODE <chr>, and
## #   abbreviated variable names ¹​ICUSTAY_ID, ²​DBSOURCE, ³​FIRST_CAREUNIT,
## #   ⁴​LAST_CAREUNIT, ⁵​FIRST_WARDID, ⁶​LAST_WARDID
count(db_ICU)
## # Source:   SQL [1 x 1]
## # Database: sqlite 3.39.4 [/Users/ichitachikamasa/Desktop/RSQLite/temp/mimic.db]
##        n
##    <int>
## 1 705921

ICU入室から心不全患者を抽出する。

ds_ICU_HF <- db_ICU %>%  filter(ICD9_CODE %in% "4280", "4281", "4289", "39891", "40201", "40211", "40291", "40401", "40403", "40411", "40413", "40491","40493", "42820", "42821", "42822", "42823", "42830", "42831", "42832", "42833", "42840", "42841", "42842", "42843")
count(ds_ICU_HF)
## # Source:   SQL [1 x 1]
## # Database: sqlite 3.39.4 [/Users/ichitachikamasa/Desktop/RSQLite/temp/mimic.db]
##       n
##   <int>
## 1 14226
ds_ICU_HF_1<-ds_ICU_HF|> distinct(HADM_ID) 
count(ds_ICU_HF_1)
## # Source:   SQL [1 x 1]
## # Database: sqlite 3.39.4 [/Users/ichitachikamasa/Desktop/RSQLite/temp/mimic.db]
##       n
##   <int>
## 1 12951

年齢18歳以上にする

ds_age
## # Source:   SQL [?? x 27]
## # Database: sqlite 3.39.4 [/Users/ichitachikamasa/Desktop/RSQLite/temp/mimic.db]
##    ROW_ID.x SUBJECT_ID HADM_ID ADMITTIME DISCH…¹ DEATH…² ADMIS…³ ADMIS…⁴ DISCH…⁵
##       <dbl>      <dbl>   <dbl>     <dbl>   <dbl>   <dbl> <chr>   <chr>   <chr>  
##  1       21         22  165315    7.14e9  7.14e9 NA      EMERGE… EMERGE… DISC-T…
##  2       22         23  152223    5.80e9  5.80e9 NA      ELECTI… PHYS R… HOME H…
##  3       23         23  124321    5.93e9  5.93e9 NA      EMERGE… TRANSF… HOME H…
##  4       24         24  161859    5.35e9  5.35e9 NA      EMERGE… TRANSF… HOME   
##  5       25         25  129635    6.02e9  6.02e9 NA      EMERGE… EMERGE… HOME   
##  6       26         26  197661    4.93e9  4.93e9 NA      EMERGE… TRANSF… HOME   
##  7       27         27  134931    7.00e9  7.00e9 NA      NEWBORN PHYS R… HOME   
##  8       28         28  162569    6.55e9  6.55e9 NA      ELECTI… PHYS R… HOME H…
##  9       29         30  104557    6.40e9  6.40e9 NA      URGENT  TRANSF… HOME H…
## 10       30         31  128652    4.38e9  4.38e9  4.38e9 EMERGE… TRANSF… DEAD/E…
## # … with more rows, 18 more variables: INSURANCE <chr>, LANGUAGE <chr>,
## #   RELIGION <chr>, MARITAL_STATUS <chr>, ETHNICITY <chr>, EDREGTIME <dbl>,
## #   EDOUTTIME <dbl>, DIAGNOSIS <chr>, HOSPITAL_EXPIRE_FLAG <dbl>,
## #   HAS_CHARTEVENTS_DATA <dbl>, ROW_ID.y <dbl>, GENDER <chr>, DOB <dbl>,
## #   DOD <dbl>, DOD_HOSP <dbl>, DOD_SSN <dbl>, EXPIRE_FLAG <dbl>, age <dbl>, and
## #   abbreviated variable names ¹​DISCHTIME, ²​DEATHTIME, ³​ADMISSION_TYPE,
## #   ⁴​ADMISSION_LOCATION, ⁵​DISCHARGE_LOCATION
ds_ICU_HF_18 <- 
  ds_ICU_HF_1 %>% left_join(ds_age,by="HADM_ID" ) %>% 
  filter(age>=18)
count(ds_ICU_HF_18) 
## # Source:   SQL [1 x 1]
## # Database: sqlite 3.39.4 [/Users/ichitachikamasa/Desktop/RSQLite/temp/mimic.db]
##       n
##   <int>
## 1 12937

水原先生 ここまでです。