##パッケージのlibrary
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.5
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ stringr 1.4.1
## ✔ readr 2.1.3 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(RSQLite)
library(lubridate)
##
## 次のパッケージを付け加えます: 'lubridate'
##
## 以下のオブジェクトは 'package:base' からマスクされています:
##
## date, intersect, setdiff, union
library(tableone)
con_mimic <- dbConnect(SQLite(),"/Users/ichitachikamasa/Desktop/RSQLite/temp/mimic.db")
dbListTables(con_mimic)
## [1] "admissions" "callout" "caregivers"
## [4] "chartevents" "cptevents" "d_cpt"
## [7] "d_icd_diagnoses" "d_icd_procedures" "d_items"
## [10] "d_labitems" "datetimeevents" "diagnoses_icd"
## [13] "drgcodes" "icustays" "inputevents_cv"
## [16] "inputevents_mv" "labevents" "microbiologyevents"
## [19] "noteevents" "outputevents" "patients"
## [22] "prescriptions" "procedureevents_mv" "procedures_icd"
## [25] "services" "transfers"
csv_list <- list.files(path = "/Users/ichitachikamasa/Desktop/BicDatas/mimic_iii_clinical_database_1.4",
pattern = ".csv.gz$",
full.names = T)
csv_list
## [1] "/Users/ichitachikamasa/Desktop/BicDatas/mimic_iii_clinical_database_1.4/ADMISSIONS.csv.gz"
## [2] "/Users/ichitachikamasa/Desktop/BicDatas/mimic_iii_clinical_database_1.4/CALLOUT.csv.gz"
## [3] "/Users/ichitachikamasa/Desktop/BicDatas/mimic_iii_clinical_database_1.4/CAREGIVERS.csv.gz"
## [4] "/Users/ichitachikamasa/Desktop/BicDatas/mimic_iii_clinical_database_1.4/CHARTEVENTS.csv.gz"
## [5] "/Users/ichitachikamasa/Desktop/BicDatas/mimic_iii_clinical_database_1.4/CPTEVENTS.csv.gz"
## [6] "/Users/ichitachikamasa/Desktop/BicDatas/mimic_iii_clinical_database_1.4/D_CPT.csv.gz"
## [7] "/Users/ichitachikamasa/Desktop/BicDatas/mimic_iii_clinical_database_1.4/D_ICD_DIAGNOSES.csv.gz"
## [8] "/Users/ichitachikamasa/Desktop/BicDatas/mimic_iii_clinical_database_1.4/D_ICD_PROCEDURES.csv.gz"
## [9] "/Users/ichitachikamasa/Desktop/BicDatas/mimic_iii_clinical_database_1.4/D_ITEMS.csv.gz"
## [10] "/Users/ichitachikamasa/Desktop/BicDatas/mimic_iii_clinical_database_1.4/D_LABITEMS.csv.gz"
## [11] "/Users/ichitachikamasa/Desktop/BicDatas/mimic_iii_clinical_database_1.4/DATETIMEEVENTS.csv.gz"
## [12] "/Users/ichitachikamasa/Desktop/BicDatas/mimic_iii_clinical_database_1.4/DIAGNOSES_ICD.csv.gz"
## [13] "/Users/ichitachikamasa/Desktop/BicDatas/mimic_iii_clinical_database_1.4/DRGCODES.csv.gz"
## [14] "/Users/ichitachikamasa/Desktop/BicDatas/mimic_iii_clinical_database_1.4/ICUSTAYS.csv.gz"
## [15] "/Users/ichitachikamasa/Desktop/BicDatas/mimic_iii_clinical_database_1.4/INPUTEVENTS_CV.csv.gz"
## [16] "/Users/ichitachikamasa/Desktop/BicDatas/mimic_iii_clinical_database_1.4/INPUTEVENTS_MV.csv.gz"
## [17] "/Users/ichitachikamasa/Desktop/BicDatas/mimic_iii_clinical_database_1.4/LABEVENTS.csv.gz"
## [18] "/Users/ichitachikamasa/Desktop/BicDatas/mimic_iii_clinical_database_1.4/MICROBIOLOGYEVENTS.csv.gz"
## [19] "/Users/ichitachikamasa/Desktop/BicDatas/mimic_iii_clinical_database_1.4/NOTEEVENTS.csv.gz"
## [20] "/Users/ichitachikamasa/Desktop/BicDatas/mimic_iii_clinical_database_1.4/OUTPUTEVENTS.csv.gz"
## [21] "/Users/ichitachikamasa/Desktop/BicDatas/mimic_iii_clinical_database_1.4/PATIENTS.csv.gz"
## [22] "/Users/ichitachikamasa/Desktop/BicDatas/mimic_iii_clinical_database_1.4/PRESCRIPTIONS.csv.gz"
## [23] "/Users/ichitachikamasa/Desktop/BicDatas/mimic_iii_clinical_database_1.4/PROCEDUREEVENTS_MV.csv.gz"
## [24] "/Users/ichitachikamasa/Desktop/BicDatas/mimic_iii_clinical_database_1.4/PROCEDURES_ICD.csv.gz"
## [25] "/Users/ichitachikamasa/Desktop/BicDatas/mimic_iii_clinical_database_1.4/SERVICES.csv.gz"
## [26] "/Users/ichitachikamasa/Desktop/BicDatas/mimic_iii_clinical_database_1.4/TRANSFERS.csv.gz"
short_list <- list.files(path = "/Users/ichitachikamasa/Desktop/BicDatas/mimic_iii_clinical_database_1.4",
pattern = ".csv.gz$",
full.names = F)
name_list <- short_list |> str_remove_all(pattern = ".csv.gz")
name_list <- name_list |> str_to_lower()
name_list
## [1] "admissions" "callout" "caregivers"
## [4] "chartevents" "cptevents" "d_cpt"
## [7] "d_icd_diagnoses" "d_icd_procedures" "d_items"
## [10] "d_labitems" "datetimeevents" "diagnoses_icd"
## [13] "drgcodes" "icustays" "inputevents_cv"
## [16] "inputevents_mv" "labevents" "microbiologyevents"
## [19] "noteevents" "outputevents" "patients"
## [22] "prescriptions" "procedureevents_mv" "procedures_icd"
## [25] "services" "transfers"
一回読んだら二度と読まない!
append = TRUEなので、同じ処理がエラーにならずにデータ量が追加されていく。危険。
name_list名で各ファイルをDB上に登録する
#for (i in seq_along(name_list)) {
#callback <- function(df, pos) {
# dbWriteTable(con_mimic, name_list[i], df, append = TRUE)
# }
#
## デフォルトではチャンクサイズは 10,000 レコードずつ。
# read_csv_chunked(csv_list[i],
# DataFrameCallback$new(callback),
# chunk_size = 100000)
# }
データベース内に格納されているファイルの確認
dbListTables(con_mimic)
## [1] "admissions" "callout" "caregivers"
## [4] "chartevents" "cptevents" "d_cpt"
## [7] "d_icd_diagnoses" "d_icd_procedures" "d_items"
## [10] "d_labitems" "datetimeevents" "diagnoses_icd"
## [13] "drgcodes" "icustays" "inputevents_cv"
## [16] "inputevents_mv" "labevents" "microbiologyevents"
## [19] "noteevents" "outputevents" "patients"
## [22] "prescriptions" "procedureevents_mv" "procedures_icd"
## [25] "services" "transfers"
db_adm<- tbl(con_mimic, "admissions")
db_adm |> count()
## # Source: SQL [1 x 1]
## # Database: sqlite 3.39.4 [/Users/ichitachikamasa/Desktop/RSQLite/temp/mimic.db]
## n
## <int>
## 1 58976
db_adm |> colnames()
## [1] "ROW_ID" "SUBJECT_ID" "HADM_ID"
## [4] "ADMITTIME" "DISCHTIME" "DEATHTIME"
## [7] "ADMISSION_TYPE" "ADMISSION_LOCATION" "DISCHARGE_LOCATION"
## [10] "INSURANCE" "LANGUAGE" "RELIGION"
## [13] "MARITAL_STATUS" "ETHNICITY" "EDREGTIME"
## [16] "EDOUTTIME" "DIAGNOSIS" "HOSPITAL_EXPIRE_FLAG"
## [19] "HAS_CHARTEVENTS_DATA"
db_adm |> head(5)
## # Source: SQL [5 x 19]
## # Database: sqlite 3.39.4 [/Users/ichitachikamasa/Desktop/RSQLite/temp/mimic.db]
## ROW_ID SUBJE…¹ HADM_ID ADMIT…² DISCH…³ DEATH…⁴ ADMIS…⁵ ADMIS…⁶ DISCH…⁷ INSUR…⁸
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <chr> <chr> <chr> <chr>
## 1 21 22 165315 7.14e9 7.14e9 NA EMERGE… EMERGE… DISC-T… Private
## 2 22 23 152223 5.80e9 5.80e9 NA ELECTI… PHYS R… HOME H… Medica…
## 3 23 23 124321 5.93e9 5.93e9 NA EMERGE… TRANSF… HOME H… Medica…
## 4 24 24 161859 5.35e9 5.35e9 NA EMERGE… TRANSF… HOME Private
## 5 25 25 129635 6.02e9 6.02e9 NA EMERGE… EMERGE… HOME Private
## # … with 9 more variables: LANGUAGE <chr>, RELIGION <chr>,
## # MARITAL_STATUS <chr>, ETHNICITY <chr>, EDREGTIME <dbl>, EDOUTTIME <dbl>,
## # DIAGNOSIS <chr>, HOSPITAL_EXPIRE_FLAG <dbl>, HAS_CHARTEVENTS_DATA <dbl>,
## # and abbreviated variable names ¹SUBJECT_ID, ²ADMITTIME, ³DISCHTIME,
## # ⁴DEATHTIME, ⁵ADMISSION_TYPE, ⁶ADMISSION_LOCATION, ⁷DISCHARGE_LOCATION,
## # ⁸INSURANCE
db_adm |> distinct(SUBJECT_ID) |> count()
## # Source: SQL [1 x 1]
## # Database: sqlite 3.39.4 [/Users/ichitachikamasa/Desktop/RSQLite/temp/mimic.db]
## n
## <int>
## 1 46520
db_call<- tbl(con_mimic,"callout")
db_care<- tbl(con_mimic, "caregivers")
db_chart<- tbl(con_mimic, "chartevents")
db_cpt<- tbl(con_mimic, "cptevents")
master_cpt<- tbl(con_mimic, "d_cpt")
master_icd_d<- tbl(con_mimic, "d_icd_diagnoses")
master_icd_p<- tbl(con_mimic, "d_icd_procedures")
master_item<- tbl(con_mimic, "d_items")
master_lab<- tbl(con_mimic, "d_labitems")
db_datetime<- tbl(con_mimic, "datetimeevents")
db_diagnose<- tbl(con_mimic, "diagnoses_icd")
db_drg<- tbl(con_mimic, "drgcodes")
db_icustay<- tbl(con_mimic, "icustays")
db_inpcv<- tbl(con_mimic, "inputevents_cv")
db_inpmv<- tbl(con_mimic, "inputevents_mv")
db_lab<- tbl(con_mimic, "labevents")
db_micro<- tbl(con_mimic, "microbiologyevents")
db_noteeve<- tbl(con_mimic, "noteevents")
db_outputeve<- tbl(con_mimic, "outputevents")
db_patient<- tbl(con_mimic, "patients")
db_presc<- tbl(con_mimic, "prescriptions")
db_promv<- tbl(con_mimic, "procedureevents_mv")
db_proicd<- tbl(con_mimic, "procedures_icd")
db_serv<- tbl(con_mimic, "services")
db_trans<- tbl(con_mimic, "transfers")