Load data
library("data.table")
dat <- fread("~/goals_of_care/external_validation/NOTES_STAYS_ADM_PAT.csv", header = T, stringsAsFactors = F)
##
Read 0.0% of 385794 rows
Read 5.2% of 385794 rows
Read 10.4% of 385794 rows
Read 15.6% of 385794 rows
Read 20.7% of 385794 rows
Read 25.9% of 385794 rows
Read 31.1% of 385794 rows
Read 33.7% of 385794 rows
Read 38.9% of 385794 rows
Read 169621 rows and 44 (of 44) columns from 1.243 GB file in 00:00:19
#Days until death value
dat$DAYS_UNTIL_DEATH <- dat$DOD - dat$ADMITTIME
#Time since admission value
dat$TIME_SINCE_ADMIT <- as.numeric(as.Date(dat$CHARTDATE, "%Y-%m-%d")) - dat$ADMITTIME
Selection Process
length(unique(dat$SUBJECT_ID))
## [1] 7564
#[1] 7564 Potentially Eligible Patients
dat <- dat[(dat$AGE >= 75),]
length(unique(dat$SUBJECT_ID)) # Dropped 5166
## [1] 2398
#[1] 2398 Patients over age 75 on admission
#Patient survived 48hr since admit
dat <- dat[(dat$DAYS_UNTIL_DEATH >= 2),]
length(unique(dat$SUBJECT_ID)) # Dropped 1103
## [1] 1295
#[1] 1295 Patients survived 2 or more days since admission
#Only within 2 days since admission
dat <- dat[(dat$TIME_SINCE_ADMIT <= 2),]
length(unique(dat$SUBJECT_ID)) # Dropped 15
## [1] 1141
#[1] 1141 Patients who had notes within two days of admission
nrow(dat)
## [1] 11575
#[1] 11575 Notes associated with these patients
length(unique(dat$TEXT)) # Dropped 1325
## [1] 10250
#[1] 10250 Notes associated with these patients when duplicates are removed
#Use only noteevents columnnames
notes_out <- dat[ ,c("ROW_ID",
"SUBJECT_ID",
"HADM_ID",
"CHARTDATE",
"CHARTTIME",
"STORETIME",
"CATEGORY",
"DESCRIPTION",
"CGID",
"ISERROR",
"TEXT" )]
#write.csv(notes_out, file = "/Users/Edward/Desktop/over_75_cohort_17Jan18.csv", row.names = F)