##パッケージのlibrary
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.5
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ stringr 1.4.1
## ✔ readr 2.1.3 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(RSQLite)
library(lubridate)
##
## 次のパッケージを付け加えます: 'lubridate'
##
## 以下のオブジェクトは 'package:base' からマスクされています:
##
## date, intersect, setdiff, union
library(tableone)
library(skimr)
library(svglite)
library(haven)
library(summarytools)
## Warning in system2("/usr/bin/otool", c("-L", shQuote(DSO)), stdout = TRUE): 命令
## ''/usr/bin/otool' -L '/Library/Frameworks/R.framework/Resources/library/tcltk/
## libs//tcltk.so'' の実行は状態 1 を持ちました
##
## 次のパッケージを付け加えます: 'summarytools'
##
## 以下のオブジェクトは 'package:tibble' からマスクされています:
##
## view
library(naniar)
##
## 次のパッケージを付け加えます: 'naniar'
##
## 以下のオブジェクトは 'package:skimr' からマスクされています:
##
## n_complete
library(devtools)
## 要求されたパッケージ usethis をロード中です
library(reader)
## 要求されたパッケージ NCmisc をロード中です
##
## 次のパッケージを付け加えます: 'reader'
##
## 以下のオブジェクトは 'package:NCmisc' からマスクされています:
##
## cat.path, get.ext, rmv.ext
library(stringr)
library(missForest)
con_mimic <- dbConnect(SQLite(),"/Users/ichitachikamasa/Desktop/RSQLite/temp/mimic.db")
dbListTables(con_mimic)
## [1] "admissions" "callout" "caregivers"
## [4] "chartevents" "cptevents" "d_cpt"
## [7] "d_icd_diagnoses" "d_icd_procedures" "d_items"
## [10] "d_labitems" "datetimeevents" "diagnoses_icd"
## [13] "drgcodes" "icustays" "inputevents_cv"
## [16] "inputevents_mv" "labevents" "microbiologyevents"
## [19] "noteevents" "outputevents" "patients"
## [22] "prescriptions" "procedureevents_mv" "procedures_icd"
## [25] "services" "transfers"
csv_list <- list.files(path = "/Users/ichitachikamasa/Desktop/BicDatas/mimic_iii_clinical_database_1.4",
pattern = ".csv.gz$",
full.names = T)
csv_list
## [1] "/Users/ichitachikamasa/Desktop/BicDatas/mimic_iii_clinical_database_1.4/ADMISSIONS.csv.gz"
## [2] "/Users/ichitachikamasa/Desktop/BicDatas/mimic_iii_clinical_database_1.4/CALLOUT.csv.gz"
## [3] "/Users/ichitachikamasa/Desktop/BicDatas/mimic_iii_clinical_database_1.4/CAREGIVERS.csv.gz"
## [4] "/Users/ichitachikamasa/Desktop/BicDatas/mimic_iii_clinical_database_1.4/CHARTEVENTS.csv.gz"
## [5] "/Users/ichitachikamasa/Desktop/BicDatas/mimic_iii_clinical_database_1.4/CPTEVENTS.csv.gz"
## [6] "/Users/ichitachikamasa/Desktop/BicDatas/mimic_iii_clinical_database_1.4/D_CPT.csv.gz"
## [7] "/Users/ichitachikamasa/Desktop/BicDatas/mimic_iii_clinical_database_1.4/D_ICD_DIAGNOSES.csv.gz"
## [8] "/Users/ichitachikamasa/Desktop/BicDatas/mimic_iii_clinical_database_1.4/D_ICD_PROCEDURES.csv.gz"
## [9] "/Users/ichitachikamasa/Desktop/BicDatas/mimic_iii_clinical_database_1.4/D_ITEMS.csv.gz"
## [10] "/Users/ichitachikamasa/Desktop/BicDatas/mimic_iii_clinical_database_1.4/D_LABITEMS.csv.gz"
## [11] "/Users/ichitachikamasa/Desktop/BicDatas/mimic_iii_clinical_database_1.4/DATETIMEEVENTS.csv.gz"
## [12] "/Users/ichitachikamasa/Desktop/BicDatas/mimic_iii_clinical_database_1.4/DIAGNOSES_ICD.csv.gz"
## [13] "/Users/ichitachikamasa/Desktop/BicDatas/mimic_iii_clinical_database_1.4/DRGCODES.csv.gz"
## [14] "/Users/ichitachikamasa/Desktop/BicDatas/mimic_iii_clinical_database_1.4/ICUSTAYS.csv.gz"
## [15] "/Users/ichitachikamasa/Desktop/BicDatas/mimic_iii_clinical_database_1.4/INPUTEVENTS_CV.csv.gz"
## [16] "/Users/ichitachikamasa/Desktop/BicDatas/mimic_iii_clinical_database_1.4/INPUTEVENTS_MV.csv.gz"
## [17] "/Users/ichitachikamasa/Desktop/BicDatas/mimic_iii_clinical_database_1.4/LABEVENTS.csv.gz"
## [18] "/Users/ichitachikamasa/Desktop/BicDatas/mimic_iii_clinical_database_1.4/MICROBIOLOGYEVENTS.csv.gz"
## [19] "/Users/ichitachikamasa/Desktop/BicDatas/mimic_iii_clinical_database_1.4/NOTEEVENTS.csv.gz"
## [20] "/Users/ichitachikamasa/Desktop/BicDatas/mimic_iii_clinical_database_1.4/OUTPUTEVENTS.csv.gz"
## [21] "/Users/ichitachikamasa/Desktop/BicDatas/mimic_iii_clinical_database_1.4/PATIENTS.csv.gz"
## [22] "/Users/ichitachikamasa/Desktop/BicDatas/mimic_iii_clinical_database_1.4/PRESCRIPTIONS.csv.gz"
## [23] "/Users/ichitachikamasa/Desktop/BicDatas/mimic_iii_clinical_database_1.4/PROCEDUREEVENTS_MV.csv.gz"
## [24] "/Users/ichitachikamasa/Desktop/BicDatas/mimic_iii_clinical_database_1.4/PROCEDURES_ICD.csv.gz"
## [25] "/Users/ichitachikamasa/Desktop/BicDatas/mimic_iii_clinical_database_1.4/SERVICES.csv.gz"
## [26] "/Users/ichitachikamasa/Desktop/BicDatas/mimic_iii_clinical_database_1.4/TRANSFERS.csv.gz"
short_list <- list.files(path = "/Users/ichitachikamasa/Desktop/BicDatas/mimic_iii_clinical_database_1.4",
pattern = ".csv.gz$",
full.names = F)
name_list <- short_list |> str_remove_all(pattern = ".csv.gz")
name_list <- name_list |> str_to_lower()
name_list
## [1] "admissions" "callout" "caregivers"
## [4] "chartevents" "cptevents" "d_cpt"
## [7] "d_icd_diagnoses" "d_icd_procedures" "d_items"
## [10] "d_labitems" "datetimeevents" "diagnoses_icd"
## [13] "drgcodes" "icustays" "inputevents_cv"
## [16] "inputevents_mv" "labevents" "microbiologyevents"
## [19] "noteevents" "outputevents" "patients"
## [22] "prescriptions" "procedureevents_mv" "procedures_icd"
## [25] "services" "transfers"
一回読んだら二度と読まない!
append = TRUEなので、同じ処理がエラーにならずにデータ量が追加されていく。危険。
name_list名で各ファイルをDB上に登録する
#for (i in seq_along(name_list)) {
#callback <- function(df, pos) {
# dbWriteTable(con_mimic, name_list[i], df, append = TRUE)
# }
#
## デフォルトではチャンクサイズは 10,000 レコードずつ。
# read_csv_chunked(csv_list[i],
# DataFrameCallback$new(callback),
# chunk_size = 100000)
# }
データベース内に格納されているファイルの確認
dbListTables(con_mimic)
## [1] "admissions" "callout" "caregivers"
## [4] "chartevents" "cptevents" "d_cpt"
## [7] "d_icd_diagnoses" "d_icd_procedures" "d_items"
## [10] "d_labitems" "datetimeevents" "diagnoses_icd"
## [13] "drgcodes" "icustays" "inputevents_cv"
## [16] "inputevents_mv" "labevents" "microbiologyevents"
## [19] "noteevents" "outputevents" "patients"
## [22] "prescriptions" "procedureevents_mv" "procedures_icd"
## [25] "services" "transfers"
db_adm<- tbl(con_mimic, "admissions")
db_adm |> count()
## # Source: SQL [1 x 1]
## # Database: sqlite 3.39.4 [/Users/ichitachikamasa/Desktop/RSQLite/temp/mimic.db]
## n
## <int>
## 1 58976
db_adm |> colnames()
## [1] "ROW_ID" "SUBJECT_ID" "HADM_ID"
## [4] "ADMITTIME" "DISCHTIME" "DEATHTIME"
## [7] "ADMISSION_TYPE" "ADMISSION_LOCATION" "DISCHARGE_LOCATION"
## [10] "INSURANCE" "LANGUAGE" "RELIGION"
## [13] "MARITAL_STATUS" "ETHNICITY" "EDREGTIME"
## [16] "EDOUTTIME" "DIAGNOSIS" "HOSPITAL_EXPIRE_FLAG"
## [19] "HAS_CHARTEVENTS_DATA"
db_adm |> head(5)
## # Source: SQL [5 x 19]
## # Database: sqlite 3.39.4 [/Users/ichitachikamasa/Desktop/RSQLite/temp/mimic.db]
## ROW_ID SUBJE…¹ HADM_ID ADMIT…² DISCH…³ DEATH…⁴ ADMIS…⁵ ADMIS…⁶ DISCH…⁷ INSUR…⁸
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr> <chr> <chr>
## 1 21 22 165315 7.14e9 7.14e9 NA EMERGE… EMERGE… DISC-T… Private
## 2 22 23 152223 5.80e9 5.80e9 NA ELECTI… PHYS R… HOME H… Medica…
## 3 23 23 124321 5.93e9 5.93e9 NA EMERGE… TRANSF… HOME H… Medica…
## 4 24 24 161859 5.35e9 5.35e9 NA EMERGE… TRANSF… HOME Private
## 5 25 25 129635 6.02e9 6.02e9 NA EMERGE… EMERGE… HOME Private
## # … with 9 more variables: LANGUAGE <chr>, RELIGION <chr>,
## # MARITAL_STATUS <chr>, ETHNICITY <chr>, EDREGTIME <dbl>, EDOUTTIME <dbl>,
## # DIAGNOSIS <chr>, HOSPITAL_EXPIRE_FLAG <dbl>, HAS_CHARTEVENTS_DATA <dbl>,
## # and abbreviated variable names ¹SUBJECT_ID, ²ADMITTIME, ³DISCHTIME,
## # ⁴DEATHTIME, ⁵ADMISSION_TYPE, ⁶ADMISSION_LOCATION, ⁷DISCHARGE_LOCATION,
## # ⁸INSURANCE
db_adm |> distinct(SUBJECT_ID) |> count()
## # Source: SQL [1 x 1]
## # Database: sqlite 3.39.4 [/Users/ichitachikamasa/Desktop/RSQLite/temp/mimic.db]
## n
## <int>
## 1 46520
db_call<- tbl(con_mimic,"callout")
db_care<- tbl(con_mimic, "caregivers")
db_chart<- tbl(con_mimic, "chartevents")
db_cpt<- tbl(con_mimic, "cptevents")
master_cpt<- tbl(con_mimic, "d_cpt")
master_icd_d<- tbl(con_mimic, "d_icd_diagnoses")
master_icd_p<- tbl(con_mimic, "d_icd_procedures")
master_item<- tbl(con_mimic, "d_items")
master_lab<- tbl(con_mimic, "d_labitems")
db_datetime<- tbl(con_mimic, "datetimeevents")
db_diagnose<- tbl(con_mimic, "diagnoses_icd")
db_drg<- tbl(con_mimic, "drgcodes")
db_icustay<- tbl(con_mimic, "icustays")
db_inpcv<- tbl(con_mimic, "inputevents_cv")
db_inpmv<- tbl(con_mimic, "inputevents_mv")
db_lab<- tbl(con_mimic, "labevents")
db_micro<- tbl(con_mimic, "microbiologyevents")
db_noteeve<- tbl(con_mimic, "noteevents")
db_outputeve<- tbl(con_mimic, "outputevents")
db_patient<- tbl(con_mimic, "patients")
db_presc<- tbl(con_mimic, "prescriptions")
db_promv<- tbl(con_mimic, "procedureevents_mv")
db_proicd<- tbl(con_mimic, "procedures_icd")
db_serv<- tbl(con_mimic, "services")
db_trans<- tbl(con_mimic, "transfers")
distinct関数でuniqueなものを探す
db_adm %>% distinct(ROW_ID) %>% count()
## # Source: SQL [1 x 1]
## # Database: sqlite 3.39.4 [/Users/ichitachikamasa/Desktop/RSQLite/temp/mimic.db]
## n
## <int>
## 1 58976
db_adm %>% distinct(ADMITTIME) %>% count()
## # Source: SQL [1 x 1]
## # Database: sqlite 3.39.4 [/Users/ichitachikamasa/Desktop/RSQLite/temp/mimic.db]
## n
## <int>
## 1 58651
db_adm %>% distinct(HADM_ID) %>% count()
## # Source: SQL [1 x 1]
## # Database: sqlite 3.39.4 [/Users/ichitachikamasa/Desktop/RSQLite/temp/mimic.db]
## n
## <int>
## 1 58976
なぜcharacterへ変換するのか?? mutate_at()
db_adm |> mutate_if(is.numeric, as.character) |> skim()
## Applying predicate on the first 100 rows
| Name | mutate_if(db_adm, is.nume… |
| Number of rows | 58976 |
| Number of columns | 19 |
| _______________________ | |
| Column type frequency: | |
| character | 19 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| ROW_ID | 0 | 1.00 | 3 | 7 | 0 | 58976 | 0 |
| SUBJECT_ID | 0 | 1.00 | 3 | 7 | 0 | 46520 | 0 |
| HADM_ID | 0 | 1.00 | 8 | 8 | 0 | 58976 | 0 |
| ADMITTIME | 0 | 1.00 | 12 | 12 | 0 | 58651 | 0 |
| DISCHTIME | 0 | 1.00 | 12 | 12 | 0 | 58657 | 0 |
| DEATHTIME | 53122 | 0.10 | 12 | 12 | 0 | 5834 | 0 |
| ADMISSION_TYPE | 0 | 1.00 | 6 | 9 | 0 | 4 | 0 |
| ADMISSION_LOCATION | 0 | 1.00 | 17 | 25 | 0 | 9 | 0 |
| DISCHARGE_LOCATION | 0 | 1.00 | 3 | 25 | 0 | 17 | 0 |
| INSURANCE | 0 | 1.00 | 7 | 10 | 0 | 5 | 0 |
| LANGUAGE | 25332 | 0.57 | 4 | 4 | 0 | 75 | 0 |
| RELIGION | 458 | 0.99 | 5 | 22 | 0 | 20 | 0 |
| MARITAL_STATUS | 10128 | 0.83 | 6 | 17 | 0 | 7 | 0 |
| ETHNICITY | 0 | 1.00 | 5 | 56 | 0 | 41 | 0 |
| EDREGTIME | 28099 | 0.52 | 12 | 12 | 0 | 30874 | 0 |
| EDOUTTIME | 28099 | 0.52 | 12 | 12 | 0 | 30864 | 0 |
| DIAGNOSIS | 25 | 1.00 | 2 | 190 | 0 | 15646 | 0 |
| HOSPITAL_EXPIRE_FLAG | 0 | 1.00 | 3 | 3 | 0 | 2 | 0 |
| HAS_CHARTEVENTS_DATA | 0 | 1.00 | 3 | 3 | 0 | 2 | 0 |
こちらで一覧にみればざっくりしたヒストグラムも見える。#skimが外観把握でよい。
view(dfSummary(db_adm))
## db_adm was converted to a data frame
## Switching method to 'browser'
## Output file written: /var/folders/n9/tf_wmwpn3gl2t7l1cz4tqk000000gn/T//RtmpnGKAwv/file6fa122cd75a.html
年齢の情報の作成 欠測の情報の確認どうする?? まずはleft_joinしてadmにpatietを入れ込む
ds_age1<-left_join(db_adm,db_patient,by="SUBJECT_ID")
glimpse(ds_age1)
## Rows: ??
## Columns: 26
## Database: sqlite 3.39.4 [/Users/ichitachikamasa/Desktop/RSQLite/temp/mimic.db]
## $ ROW_ID.x <dbl> 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 3…
## $ SUBJECT_ID <dbl> 22, 23, 23, 24, 25, 26, 27, 28, 30, 31, 32, 33, 3…
## $ HADM_ID <dbl> 165315, 152223, 124321, 161859, 129635, 197661, 1…
## $ ADMITTIME <dbl> 7140486360, 5796170100, 5926332840, 5346663240, 6…
## $ DISCHTIME <dbl> 7140585240, 5796645000, 5926917600, 5346910080, 6…
## $ DEATHTIME <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 4375782000, N…
## $ ADMISSION_TYPE <chr> "EMERGENCY", "ELECTIVE", "EMERGENCY", "EMERGENCY"…
## $ ADMISSION_LOCATION <chr> "EMERGENCY ROOM ADMIT", "PHYS REFERRAL/NORMAL DEL…
## $ DISCHARGE_LOCATION <chr> "DISC-TRAN CANCER/CHLDRN H", "HOME HEALTH CARE", …
## $ INSURANCE <chr> "Private", "Medicare", "Medicare", "Private", "Pr…
## $ LANGUAGE <chr> NA, NA, "ENGL", NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ RELIGION <chr> "UNOBTAINABLE", "CATHOLIC", "CATHOLIC", "PROTESTA…
## $ MARITAL_STATUS <chr> "MARRIED", "MARRIED", "MARRIED", "SINGLE", "MARRI…
## $ ETHNICITY <chr> "WHITE", "WHITE", "WHITE", "WHITE", "WHITE", "UNK…
## $ EDREGTIME <dbl> 7140477960, NA, NA, NA, 6022256460, NA, NA, NA, N…
## $ EDOUTTIME <dbl> 7140489840, NA, NA, NA, 6022268820, NA, NA, NA, N…
## $ DIAGNOSIS <chr> "BENZODIAZEPINE OVERDOSE", "CORONARY ARTERY DISEA…
## $ HOSPITAL_EXPIRE_FLAG <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0…
## $ HAS_CHARTEVENTS_DATA <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ ROW_ID.y <dbl> 19, 20, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 3…
## $ GENDER <chr> "F", "M", "M", "M", "M", "M", "F", "M", "M", "M",…
## $ DOB <dbl> 5091552000, 3551472000, 3551472000, 4115404800, 4…
## $ DOD <dbl> NA, NA, NA, NA, NA, 4990723200, NA, NA, NA, 43757…
## $ DOD_HOSP <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 4375728000, N…
## $ DOD_SSN <dbl> NA, NA, NA, NA, NA, 4990723200, NA, NA, NA, 43757…
## $ EXPIRE_FLAG <dbl> 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0…
glimpse(ds_age1)
## Rows: ??
## Columns: 26
## Database: sqlite 3.39.4 [/Users/ichitachikamasa/Desktop/RSQLite/temp/mimic.db]
## $ ROW_ID.x <dbl> 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 3…
## $ SUBJECT_ID <dbl> 22, 23, 23, 24, 25, 26, 27, 28, 30, 31, 32, 33, 3…
## $ HADM_ID <dbl> 165315, 152223, 124321, 161859, 129635, 197661, 1…
## $ ADMITTIME <dbl> 7140486360, 5796170100, 5926332840, 5346663240, 6…
## $ DISCHTIME <dbl> 7140585240, 5796645000, 5926917600, 5346910080, 6…
## $ DEATHTIME <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 4375782000, N…
## $ ADMISSION_TYPE <chr> "EMERGENCY", "ELECTIVE", "EMERGENCY", "EMERGENCY"…
## $ ADMISSION_LOCATION <chr> "EMERGENCY ROOM ADMIT", "PHYS REFERRAL/NORMAL DEL…
## $ DISCHARGE_LOCATION <chr> "DISC-TRAN CANCER/CHLDRN H", "HOME HEALTH CARE", …
## $ INSURANCE <chr> "Private", "Medicare", "Medicare", "Private", "Pr…
## $ LANGUAGE <chr> NA, NA, "ENGL", NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ RELIGION <chr> "UNOBTAINABLE", "CATHOLIC", "CATHOLIC", "PROTESTA…
## $ MARITAL_STATUS <chr> "MARRIED", "MARRIED", "MARRIED", "SINGLE", "MARRI…
## $ ETHNICITY <chr> "WHITE", "WHITE", "WHITE", "WHITE", "WHITE", "UNK…
## $ EDREGTIME <dbl> 7140477960, NA, NA, NA, 6022256460, NA, NA, NA, N…
## $ EDOUTTIME <dbl> 7140489840, NA, NA, NA, 6022268820, NA, NA, NA, N…
## $ DIAGNOSIS <chr> "BENZODIAZEPINE OVERDOSE", "CORONARY ARTERY DISEA…
## $ HOSPITAL_EXPIRE_FLAG <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0…
## $ HAS_CHARTEVENTS_DATA <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ ROW_ID.y <dbl> 19, 20, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 3…
## $ GENDER <chr> "F", "M", "M", "M", "M", "M", "F", "M", "M", "M",…
## $ DOB <dbl> 5091552000, 3551472000, 3551472000, 4115404800, 4…
## $ DOD <dbl> NA, NA, NA, NA, NA, 4990723200, NA, NA, NA, 43757…
## $ DOD_HOSP <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 4375728000, N…
## $ DOD_SSN <dbl> NA, NA, NA, NA, NA, 4990723200, NA, NA, NA, 43757…
## $ EXPIRE_FLAG <dbl> 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0…
365.242でもよい。
ds_age <- ds_age1 %>%
mutate(age=(ADMITTIME - DOB)/(60*60*24*365.25))
年齢をfloorで切り捨てしたらダメ??
glimpse(ds_age)
## Rows: ??
## Columns: 27
## Database: sqlite 3.39.4 [/Users/ichitachikamasa/Desktop/RSQLite/temp/mimic.db]
## $ ROW_ID.x <dbl> 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 3…
## $ SUBJECT_ID <dbl> 22, 23, 23, 24, 25, 26, 27, 28, 30, 31, 32, 33, 3…
## $ HADM_ID <dbl> 165315, 152223, 124321, 161859, 129635, 197661, 1…
## $ ADMITTIME <dbl> 7140486360, 5796170100, 5926332840, 5346663240, 6…
## $ DISCHTIME <dbl> 7140585240, 5796645000, 5926917600, 5346910080, 6…
## $ DEATHTIME <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 4375782000, N…
## $ ADMISSION_TYPE <chr> "EMERGENCY", "ELECTIVE", "EMERGENCY", "EMERGENCY"…
## $ ADMISSION_LOCATION <chr> "EMERGENCY ROOM ADMIT", "PHYS REFERRAL/NORMAL DEL…
## $ DISCHARGE_LOCATION <chr> "DISC-TRAN CANCER/CHLDRN H", "HOME HEALTH CARE", …
## $ INSURANCE <chr> "Private", "Medicare", "Medicare", "Private", "Pr…
## $ LANGUAGE <chr> NA, NA, "ENGL", NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ RELIGION <chr> "UNOBTAINABLE", "CATHOLIC", "CATHOLIC", "PROTESTA…
## $ MARITAL_STATUS <chr> "MARRIED", "MARRIED", "MARRIED", "SINGLE", "MARRI…
## $ ETHNICITY <chr> "WHITE", "WHITE", "WHITE", "WHITE", "WHITE", "UNK…
## $ EDREGTIME <dbl> 7140477960, NA, NA, NA, 6022256460, NA, NA, NA, N…
## $ EDOUTTIME <dbl> 7140489840, NA, NA, NA, 6022268820, NA, NA, NA, N…
## $ DIAGNOSIS <chr> "BENZODIAZEPINE OVERDOSE", "CORONARY ARTERY DISEA…
## $ HOSPITAL_EXPIRE_FLAG <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0…
## $ HAS_CHARTEVENTS_DATA <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ ROW_ID.y <dbl> 19, 20, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 3…
## $ GENDER <chr> "F", "M", "M", "M", "M", "M", "F", "M", "M", "M",…
## $ DOB <dbl> 5091552000, 3551472000, 3551472000, 4115404800, 4…
## $ DOD <dbl> NA, NA, NA, NA, NA, 4990723200, NA, NA, NA, 43757…
## $ DOD_HOSP <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 4375728000, N…
## $ DOD_SSN <dbl> NA, NA, NA, NA, NA, 4990723200, NA, NA, NA, 43757…
## $ EXPIRE_FLAG <dbl> 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0…
## $ age <dbl> 6.492681e+01, 7.113019e+01, 7.525480e+01, 3.90162…
ds_age
## # Source: SQL [?? x 27]
## # Database: sqlite 3.39.4 [/Users/ichitachikamasa/Desktop/RSQLite/temp/mimic.db]
## ROW_ID.x SUBJECT_ID HADM_ID ADMITTIME DISCH…¹ DEATH…² ADMIS…³ ADMIS…⁴ DISCH…⁵
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr> <chr>
## 1 21 22 165315 7.14e9 7.14e9 NA EMERGE… EMERGE… DISC-T…
## 2 22 23 152223 5.80e9 5.80e9 NA ELECTI… PHYS R… HOME H…
## 3 23 23 124321 5.93e9 5.93e9 NA EMERGE… TRANSF… HOME H…
## 4 24 24 161859 5.35e9 5.35e9 NA EMERGE… TRANSF… HOME
## 5 25 25 129635 6.02e9 6.02e9 NA EMERGE… EMERGE… HOME
## 6 26 26 197661 4.93e9 4.93e9 NA EMERGE… TRANSF… HOME
## 7 27 27 134931 7.00e9 7.00e9 NA NEWBORN PHYS R… HOME
## 8 28 28 162569 6.55e9 6.55e9 NA ELECTI… PHYS R… HOME H…
## 9 29 30 104557 6.40e9 6.40e9 NA URGENT TRANSF… HOME H…
## 10 30 31 128652 4.38e9 4.38e9 4.38e9 EMERGE… TRANSF… DEAD/E…
## # … with more rows, 18 more variables: INSURANCE <chr>, LANGUAGE <chr>,
## # RELIGION <chr>, MARITAL_STATUS <chr>, ETHNICITY <chr>, EDREGTIME <dbl>,
## # EDOUTTIME <dbl>, DIAGNOSIS <chr>, HOSPITAL_EXPIRE_FLAG <dbl>,
## # HAS_CHARTEVENTS_DATA <dbl>, ROW_ID.y <dbl>, GENDER <chr>, DOB <dbl>,
## # DOD <dbl>, DOD_HOSP <dbl>, DOD_SSN <dbl>, EXPIRE_FLAG <dbl>, age <dbl>, and
## # abbreviated variable names ¹DISCHTIME, ²DEATHTIME, ³ADMISSION_TYPE,
## # ⁴ADMISSION_LOCATION, ⁵DISCHARGE_LOCATION
89歳以上は300歳 bin_width
ds_age %>%
distinct(SUBJECT_ID, .keep_all = T) %>%
ggplot(aes(x=age))+geom_histogram(color= "black")+theme_bw()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
こうやってもデータベースには格納されない、、、 一個ずつ作ったデータを新しい名前をつけて格納するしかないのか??
# ds_age1$BIRTH <- ds_age1 |> mutate(birth = DOB/(60*60*24))
70歳で層別してみる
ds_age %>%
mutate(age_70=ifelse(age>70,"over70","under70")) ->ds_age2
glimpse(ds_age2)
## Rows: ??
## Columns: 28
## Database: sqlite 3.39.4 [/Users/ichitachikamasa/Desktop/RSQLite/temp/mimic.db]
## $ ROW_ID.x <dbl> 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 3…
## $ SUBJECT_ID <dbl> 22, 23, 23, 24, 25, 26, 27, 28, 30, 31, 32, 33, 3…
## $ HADM_ID <dbl> 165315, 152223, 124321, 161859, 129635, 197661, 1…
## $ ADMITTIME <dbl> 7140486360, 5796170100, 5926332840, 5346663240, 6…
## $ DISCHTIME <dbl> 7140585240, 5796645000, 5926917600, 5346910080, 6…
## $ DEATHTIME <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 4375782000, N…
## $ ADMISSION_TYPE <chr> "EMERGENCY", "ELECTIVE", "EMERGENCY", "EMERGENCY"…
## $ ADMISSION_LOCATION <chr> "EMERGENCY ROOM ADMIT", "PHYS REFERRAL/NORMAL DEL…
## $ DISCHARGE_LOCATION <chr> "DISC-TRAN CANCER/CHLDRN H", "HOME HEALTH CARE", …
## $ INSURANCE <chr> "Private", "Medicare", "Medicare", "Private", "Pr…
## $ LANGUAGE <chr> NA, NA, "ENGL", NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ RELIGION <chr> "UNOBTAINABLE", "CATHOLIC", "CATHOLIC", "PROTESTA…
## $ MARITAL_STATUS <chr> "MARRIED", "MARRIED", "MARRIED", "SINGLE", "MARRI…
## $ ETHNICITY <chr> "WHITE", "WHITE", "WHITE", "WHITE", "WHITE", "UNK…
## $ EDREGTIME <dbl> 7140477960, NA, NA, NA, 6022256460, NA, NA, NA, N…
## $ EDOUTTIME <dbl> 7140489840, NA, NA, NA, 6022268820, NA, NA, NA, N…
## $ DIAGNOSIS <chr> "BENZODIAZEPINE OVERDOSE", "CORONARY ARTERY DISEA…
## $ HOSPITAL_EXPIRE_FLAG <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0…
## $ HAS_CHARTEVENTS_DATA <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ ROW_ID.y <dbl> 19, 20, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 3…
## $ GENDER <chr> "F", "M", "M", "M", "M", "M", "F", "M", "M", "M",…
## $ DOB <dbl> 5091552000, 3551472000, 3551472000, 4115404800, 4…
## $ DOD <dbl> NA, NA, NA, NA, NA, 4990723200, NA, NA, NA, 43757…
## $ DOD_HOSP <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, 4375728000, N…
## $ DOD_SSN <dbl> NA, NA, NA, NA, NA, 4990723200, NA, NA, NA, 43757…
## $ EXPIRE_FLAG <dbl> 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 1, 0, 0, 0…
## $ age <dbl> 6.492681e+01, 7.113019e+01, 7.525480e+01, 3.90162…
## $ age_70 <chr> "under70", "over70", "over70", "under70", "under7…
ds_age2 |>
collect() |> #.dbには直前にcollect()しておかないとtableone使えない
CreateTableOne(vars = "EXPIRE_FLAG",
strata = c("GENDER", "age_70"), factorVars = "EXPIRE_FLAG",test=FALSE) ->table
print(table) |> as.data.frame()
## Stratified by GENDER:age_70
## F:over70 M:over70 F:under70 M:under70
## n 10224 10476 15802 22474
## EXPIRE_FLAG = 1 (%) 6221 (60.8) 6391 (61.0) 4015 (25.4) 5959 (26.5)
## F:over70 M:over70 F:under70 M:under70
## n 10224 10476 15802 22474
## EXPIRE_FLAG = 1 (%) 6221 (60.8) 6391 (61.0) 4015 (25.4) 5959 (26.5)
死亡割合は男女差なさそう。
心不全のICD-9コードを確認する。ただ、これは論文から抽出とする
View(master_icd_d)
## Warning in system2("/usr/bin/otool", c("-L", shQuote(DSO)), stdout = TRUE): 命
## 令 ''/usr/bin/otool' -L '/Library/Frameworks/R.framework/Resources/modules/
## R_de.so'' の実行は状態 1 を持ちました
colnames(db_diagnose)
## [1] "ROW_ID" "SUBJECT_ID" "HADM_ID" "SEQ_NUM" "ICD9_CODE"
ICU入室患者とdb_diagnoseをHADM_IDで結合する
count(db_icustay)
## # Source: SQL [1 x 1]
## # Database: sqlite 3.39.4 [/Users/ichitachikamasa/Desktop/RSQLite/temp/mimic.db]
## n
## <int>
## 1 61532
db_ICU <-
db_icustay %>% left_join(db_diagnose,by="HADM_ID")
db_ICU
## # Source: SQL [?? x 16]
## # Database: sqlite 3.39.4 [/Users/ichitachikamasa/Desktop/RSQLite/temp/mimic.db]
## ROW_ID.x SUBJECT_ID.x HADM_ID ICUST…¹ DBSOU…² FIRST…³ LAST_…⁴ FIRST…⁵ LAST_…⁶
## <dbl> <dbl> <dbl> <dbl> <chr> <chr> <chr> <dbl> <dbl>
## 1 365 268 110404 280836 carevue MICU MICU 52 52
## 2 365 268 110404 280836 carevue MICU MICU 52 52
## 3 365 268 110404 280836 carevue MICU MICU 52 52
## 4 365 268 110404 280836 carevue MICU MICU 52 52
## 5 365 268 110404 280836 carevue MICU MICU 52 52
## 6 365 268 110404 280836 carevue MICU MICU 52 52
## 7 365 268 110404 280836 carevue MICU MICU 52 52
## 8 365 268 110404 280836 carevue MICU MICU 52 52
## 9 365 268 110404 280836 carevue MICU MICU 52 52
## 10 366 269 106296 206613 carevue MICU MICU 52 52
## # … with more rows, 7 more variables: INTIME <dbl>, OUTTIME <dbl>, LOS <dbl>,
## # ROW_ID.y <dbl>, SUBJECT_ID.y <dbl>, SEQ_NUM <dbl>, ICD9_CODE <chr>, and
## # abbreviated variable names ¹ICUSTAY_ID, ²DBSOURCE, ³FIRST_CAREUNIT,
## # ⁴LAST_CAREUNIT, ⁵FIRST_WARDID, ⁶LAST_WARDID
count(db_ICU)
## # Source: SQL [1 x 1]
## # Database: sqlite 3.39.4 [/Users/ichitachikamasa/Desktop/RSQLite/temp/mimic.db]
## n
## <int>
## 1 705921
ICU入室から心不全患者を抽出する。
ds_ICU_HF <- db_ICU %>% filter(ICD9_CODE %in% "4280", "4281", "4289", "39891", "40201", "40211", "40291", "40401", "40403", "40411", "40413", "40491","40493", "42820", "42821", "42822", "42823", "42830", "42831", "42832", "42833", "42840", "42841", "42842", "42843")
count(ds_ICU_HF)
## # Source: SQL [1 x 1]
## # Database: sqlite 3.39.4 [/Users/ichitachikamasa/Desktop/RSQLite/temp/mimic.db]
## n
## <int>
## 1 14226
ds_ICU_HF_1<-ds_ICU_HF|> distinct(HADM_ID)
count(ds_ICU_HF_1)
## # Source: SQL [1 x 1]
## # Database: sqlite 3.39.4 [/Users/ichitachikamasa/Desktop/RSQLite/temp/mimic.db]
## n
## <int>
## 1 12951
年齢18歳以上にする
ds_age
## # Source: SQL [?? x 27]
## # Database: sqlite 3.39.4 [/Users/ichitachikamasa/Desktop/RSQLite/temp/mimic.db]
## ROW_ID.x SUBJECT_ID HADM_ID ADMITTIME DISCH…¹ DEATH…² ADMIS…³ ADMIS…⁴ DISCH…⁵
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr> <chr>
## 1 21 22 165315 7.14e9 7.14e9 NA EMERGE… EMERGE… DISC-T…
## 2 22 23 152223 5.80e9 5.80e9 NA ELECTI… PHYS R… HOME H…
## 3 23 23 124321 5.93e9 5.93e9 NA EMERGE… TRANSF… HOME H…
## 4 24 24 161859 5.35e9 5.35e9 NA EMERGE… TRANSF… HOME
## 5 25 25 129635 6.02e9 6.02e9 NA EMERGE… EMERGE… HOME
## 6 26 26 197661 4.93e9 4.93e9 NA EMERGE… TRANSF… HOME
## 7 27 27 134931 7.00e9 7.00e9 NA NEWBORN PHYS R… HOME
## 8 28 28 162569 6.55e9 6.55e9 NA ELECTI… PHYS R… HOME H…
## 9 29 30 104557 6.40e9 6.40e9 NA URGENT TRANSF… HOME H…
## 10 30 31 128652 4.38e9 4.38e9 4.38e9 EMERGE… TRANSF… DEAD/E…
## # … with more rows, 18 more variables: INSURANCE <chr>, LANGUAGE <chr>,
## # RELIGION <chr>, MARITAL_STATUS <chr>, ETHNICITY <chr>, EDREGTIME <dbl>,
## # EDOUTTIME <dbl>, DIAGNOSIS <chr>, HOSPITAL_EXPIRE_FLAG <dbl>,
## # HAS_CHARTEVENTS_DATA <dbl>, ROW_ID.y <dbl>, GENDER <chr>, DOB <dbl>,
## # DOD <dbl>, DOD_HOSP <dbl>, DOD_SSN <dbl>, EXPIRE_FLAG <dbl>, age <dbl>, and
## # abbreviated variable names ¹DISCHTIME, ²DEATHTIME, ³ADMISSION_TYPE,
## # ⁴ADMISSION_LOCATION, ⁵DISCHARGE_LOCATION
ds_ICU_HF_18 <-
ds_ICU_HF_1 %>% left_join(ds_age,by="HADM_ID" ) %>%
filter(age>=18)
count(ds_ICU_HF_18)
## # Source: SQL [1 x 1]
## # Database: sqlite 3.39.4 [/Users/ichitachikamasa/Desktop/RSQLite/temp/mimic.db]
## n
## <int>
## 1 12937