This is a report focusing on the quantity and type of patient notes containing phrases associated with end of life decisions and their implementation by the clinical care team.
Notes consist of all patient notes from MIMIC-IIIv1.4 contained in the NOTEEVENTS table.
Contextual information has been given by sources provided by Charlotta Lindvall, and include: Keywords_and_definitions.docx as well as the following phrases:
Code status: “CPR,ventilator,breathing machine,breathing tube,life-sustaining treatments,chest compressions,intubation,shocks,feeding tube”
Goals of care: “goals of care,GOC,life-sustaining treatment,comfort measures,comfort care,family meeting,hospice,quality of life,end of life, understanding of illness,understanding of prognosis,priorities,quality of life,extending life,comfort-focused care, supportive care”
Illness severity: “advanced cancer,progressing cancer,poor function,poor functional status,worsening cancer,widely metastatic,functional decline,frail,ill-appearing,poor prognosis,no treatment,end of life,progressive cancer”
Advance care planning: “health care proxy,HCP,living will,MOLST,advance directives,advance care planning,ACP,durable power of attorney”
Further:
We have six categories within our note annotation GUI:
To generate a subset of notes containing inclusive phrases we will use regex according to the following trategy:
',' for n unique phrasesgrepl() to generate a logical vector to capture all TRUE evaluationsTRUENot dictionaryWe will include NOTEEVENTS, as well as the ADMISSIONS and PATIENTS tables from MIMIC-III, to generate a DAYS_UNTIL_DEATH [relative to admission date].
#Load notes
notes <- read.csv("NOTEEVENTS.csv",
header = T,
stringsAsFactors = F)
#Load ADMISSIONS table from MIMIC for admittime/distime
adm <- read.csv("ADMISSIONS.csv",
header = T, stringsAsFactors = F)
#Load PATIENTS table from MIMIC for date of death
pat <- read.csv("PATIENTS.csv",
header = T, stringsAsFactors = F)
#Convert dates for easier manipulation
adm$ADMITTIME <- as.numeric(as.Date(adm$ADMITTIME, "%Y-%m-%d %H:%M:%S"))
adm$DISCHTIME <- as.numeric(as.Date(adm$DISCHTIME, "%Y-%m-%d %H:%M:%S"))
pat$DOD <- as.numeric(as.Date(pat$DOD, "%Y-%m-%d %H:%M:%S"))
#Drop ROW_ID variables from each table
adm$ROW_ID <- NULL
pat$ROW_ID <- NULL
#Merge adm and pat tables on SUBJECT_ID
dat <- merge(adm, pat, by = "SUBJECT_ID")
#Clean environment of admissions and patient tables
rm(pat)
#merge notes to other data on hadm_id for time data
notes <- merge(notes, dat, by = c("SUBJECT_ID","HADM_ID"))
rm(dat)
#Generate DAYS_UNTIL_DEATH [from admission date] variable
notes$DAYS_UNTIL_DEATH <- notes$DOD - notes$ADMITTIME
#Generate TIME_SINCE_ADMIT[TED] variable
notes$TIME_SINCE_ADMIT <- as.numeric(as.Date(notes$CHARTDATE, "%Y-%m-%d")) - notes$ADMITTIME
colnames(notes)
## [1] "SUBJECT_ID" "HADM_ID" "ROW_ID"
## [4] "CHARTDATE" "CHARTTIME" "STORETIME"
## [7] "CATEGORY" "DESCRIPTION" "CGID"
## [10] "ISERROR" "TEXT" "ADMITTIME"
## [13] "DISCHTIME" "DEATHTIME" "ADMISSION_TYPE"
## [16] "ADMISSION_LOCATION" "DISCHARGE_LOCATION" "INSURANCE"
## [19] "LANGUAGE" "RELIGION" "MARITAL_STATUS"
## [22] "ETHNICITY" "EDREGTIME" "EDOUTTIME"
## [25] "DIAGNOSIS" "HOSPITAL_EXPIRE_FLAG" "HAS_CHARTEVENTS_DATA"
## [28] "GENDER" "DOB" "DOD"
## [31] "DOD_HOSP" "DOD_SSN" "EXPIRE_FLAG"
## [34] "DAYS_UNTIL_DEATH" "TIME_SINCE_ADMIT"
par(mai=c(1,2,1,1))
barplot(table(factor(notes$CATEGORY)),
horiz = T,
names.arg = attr(table(factor(notes$CATEGORY)), "names"),
main = "Note Count by Type (All of MIMIC)",
las=1)
hist(notes$DAYS_UNTIL_DEATH,
breaks = 50,
main = "Note Event Frequency by Days Until Death From Admission (All of MIMIC)",
xlab = "Days Until Death",
ylab = "Note Event Frequency")
summary(notes$DAYS_UNTIL_DEATH)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## -3 25 97 421 527 4333 1078824
hist(notes$TIME_SINCE_ADMIT,
breaks = 50,
main = "Note Event Frequency Since Admission (All of MIMIC)",
xlab = "Time Since Admission",
ylab = "Note Event Frequency")
summary(notes$TIME_SINCE_ADMIT)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -7 2 5 12 14 295
#All of mimic
cat("There are",length(unique(adm$SUBJECT_ID)),"unique patients in MIMIC-IIIv1.4\n")
## There are 46520 unique patients in MIMIC-IIIv1.4
#Clean environment of admissions table
rm(adm)
#Patients who expired in hospital
notes <- notes[(notes$HOSPITAL_EXPIRE_FLAG == 1),]
cat("We have",length(unique(notes$SUBJECT_ID)),"unique patients who expired in the hospital\n")
## We have 5604 unique patients who expired in the hospital
#Physician notes
notes <- notes[(notes$CATEGORY == "Physician "),]
cat("We have",length(unique(notes$SUBJECT_ID)),"unique patients who expired in the hospital and had physician's notes.\n")
## We have 887 unique patients who expired in the hospital and had physician's notes.
cat("We have", nrow(notes), "observations of these patients.\n")
## We have 24460 observations of these patients.
#Add column to count characters
notes$CHARS <- nchar(notes$TEXT)
#Order by subject_ID and note size so duplicated() grabs the note with the most characters
notes <- notes[with(notes, order(SUBJECT_ID, -CHARS)), ]
#Remove duplicates
notes <- notes[!duplicated(notes$TEXT),]
cat("We have", nrow(notes), "notes after removing duplicates\n")
## We have 23253 notes after removing duplicates
#Plot again for our cohort
hist(notes$DAYS_UNTIL_DEATH,
breaks = 50,
main = "Note Event Frequency by Days Until Death From Admission (Entire Cohort)",
xlab = "Days Until Death",
ylab = "Note Event Frequency")
summary(notes$DAYS_UNTIL_DEATH)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 8.00 17.00 25.94 33.00 165.00
hist(notes$TIME_SINCE_ADMIT,
breaks = 50,
main = "Note Event Frequency Since Admission (Entire Cohort)",
xlab = "Time Since Admission",
ylab = "Note Event Frequency")
summary(notes$TIME_SINCE_ADMIT)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 3.00 7.00 14.67 18.00 131.00
#Focus on patients within 48hrs of admission
notes <- notes[(notes$TIME_SINCE_ADMIT <= 2),]
cat("We have", length(unique(notes$SUBJECT_ID)), "unique patients who have notes within 48hrs of admission.\n")
## We have 730 unique patients who have notes within 48hrs of admission.
cat("We have", nrow(notes), "notes after focusing on 48hrs since admission.\n")
## We have 5339 notes after focusing on 48hrs since admission.
Phrases will be searched for in text according to the dictionary of terms generated earlier.
#Store text for replacement later
txtHolder <- notes$TEXT
#convert all text to lowercase
notes$TEXT <- tolower(notes$TEXT)
#Remove escapes and carriage returns
notes$TEXT <- gsub("\\\\n", '\n', notes$TEXT)
#Replace carriage returns with spaces to split on
notes$TEXT <- gsub('\n', ' ', notes$TEXT)
#Use phrases from above, convert to lower
phrases <- tolower(c("CPR,ventilator,breathing machine,breathing tube,chest compressions,intubation,shocks,feeding tube,",
"goals of care,GOC,life-sustaining treatment,comfort measures,comfort care,family meeting,hospice,quality of life,end of life,understanding of illness,understanding of prognosis,priorities,quality of life,extending life,comfort-focused care,supportive care,",
"advanced cancer,progressing cancer,poor function,poor functional status,worsening cancer,widely metastatic,functional decline,frail,ill-appearing,poor prognosis,no treatment,end of life,progressive cancer,",
"health care proxy,HCP,living will,MOLST,advance directives,advance care planning,ACP,durable power of attorney,",
"chest compressions,no endotracheal intubation,no mechanical intubation,shocks,cmo,comfort measures,",
"goals of care,goc,goals for care,goals of treatment,goals for treatment,treatment goals,family meeting,family discussion,family discussions,",
"pallcare,palliative care,pall care,pallcare,palliative medicine,",
"hospice"))
#Paste phrases together
incPhrases <- paste(phrases, sep = ',', collapse = '')
#Split strings on ',', use unique() for a union
incPhrases <- unique(strsplit(incPhrases, ',')[[1]])
#Display inclusive phrases
print(incPhrases)
## [1] "cpr" "ventilator"
## [3] "breathing machine" "breathing tube"
## [5] "chest compressions" "intubation"
## [7] "shocks" "feeding tube"
## [9] "goals of care" "goc"
## [11] "life-sustaining treatment" "comfort measures"
## [13] "comfort care" "family meeting"
## [15] "hospice" "quality of life"
## [17] "end of life" "understanding of illness"
## [19] "understanding of prognosis" "priorities"
## [21] "extending life" "comfort-focused care"
## [23] "supportive care" "advanced cancer"
## [25] "progressing cancer" "poor function"
## [27] "poor functional status" "worsening cancer"
## [29] "widely metastatic" "functional decline"
## [31] "frail" "ill-appearing"
## [33] "poor prognosis" "no treatment"
## [35] "progressive cancer" "health care proxy"
## [37] "hcp" "living will"
## [39] "molst" "advance directives"
## [41] "advance care planning" "acp"
## [43] "durable power of attorney" "no endotracheal intubation"
## [45] "no mechanical intubation" "cmo"
## [47] "goals for care" "goals of treatment"
## [49] "goals for treatment" "treatment goals"
## [51] "family discussion" "family discussions"
## [53] "pallcare" "palliative care"
## [55] "pall care" "palliative medicine"
#Use exclusionary phrases from above
excPhrases <- tolower("Full code per np admit note,full code per admission,full code per orders,full code per review of chart,full code per LMR,full code per chart,full code per recent,full code per last admission,full code per order set,full code per records,full code per pepl,full code per cas,full code per team,Full code as per np admit note,full code as per admission,full code as per orders,full code as per review of chart,full code as per LMR,full code as per chart,full code as per recent,full code as per last admission,full code as per order set,full code as per records,full code as per pepl,full code as per cas,full code as per team")
#Split strings on ',', use unique() for a union
excPhrases <- unique(strsplit(excPhrases, ',')[[1]])
#Display exclusive phrases
print(excPhrases)
## [1] "full code per np admit note" "full code per admission"
## [3] "full code per orders" "full code per review of chart"
## [5] "full code per lmr" "full code per chart"
## [7] "full code per recent" "full code per last admission"
## [9] "full code per order set" "full code per records"
## [11] "full code per pepl" "full code per cas"
## [13] "full code per team" "full code as per np admit note"
## [15] "full code as per admission" "full code as per orders"
## [17] "full code as per review of chart" "full code as per lmr"
## [19] "full code as per chart" "full code as per recent"
## [21] "full code as per last admission" "full code as per order set"
## [23] "full code as per records" "full code as per pepl"
## [25] "full code as per cas" "full code as per team"
strictRegex() will accept all phrases kwds, and all note texts, texts, it will utilize grepl() to find direct matches in the text, and will return a list of booleans.
strictRegex <- function(kwds, texts){
#Create a list to store results
tmpList <- list()
#Loop through all keywords
for (i in 1:length(kwds)){
#Store results as a logical vector in its respective list entry position
tmpList[[i]] <- grepl(kwds[i], texts)
}
#Return list and control to environment
return(tmpList)
}
system.time(hold <- strictRegex(incPhrases, notes$TEXT))
## user system elapsed
## 27.44 0.00 27.61
system.time(excHold <- strictRegex(excPhrases, notes$TEXT))
## user system elapsed
## 13.78 0.00 13.79
#Convert from list entries to dataframe columns
hold <- as.data.frame(hold)
#Each column correspondes to each phrase in the phrases vector
colnames(hold) <- incPhrases
#Multiply logicals by 1 for binary numeric
hold <- hold*1
#Sum each column (phrase) to show the number of occurences of the phrase
posTable <- apply(hold[,1:length(colnames(hold))],2, FUN = sum)
#Print matches and count, omit phrases where no matches were found
posTable[posTable > 0]
## cpr ventilator
## 275 2942
## chest compressions intubation
## 59 1365
## shocks feeding tube
## 123 24
## goals of care goc
## 542 37
## life-sustaining treatment comfort measures
## 2 164
## comfort care family meeting
## 83 619
## hospice quality of life
## 132 13
## end of life comfort-focused care
## 12 10
## supportive care poor function
## 150 12
## poor functional status widely metastatic
## 4 61
## functional decline frail
## 4 106
## ill-appearing poor prognosis
## 51 189
## no treatment health care proxy
## 22 63
## hcp living will
## 712 13
## cmo goals for care
## 342 7
## treatment goals family discussion
## 5 70
## family discussions palliative care
## 7 204
## pall care palliative medicine
## 1 1
excHold <- as.data.frame(excHold)
colnames(excHold) <- excPhrases
excHold <- excHold*1
excTable <- apply(excHold[,1:length(colnames(excHold))],2, FUN = sum)
excTable[excTable > 0]
## named numeric(0)
No exclusive phrases are found.
Strategy: Include any patient note that contained any inclusive phrase.
#Create a vector
inc <- vector()
for (i in 1:nrow(hold)){
#Populate vector with logical value if note contains any concepts associated with inclusion
inc[length(inc)+1] <- any(hold[i,] == 1)
}
#Replace text without tolower() and clean its tmp variable
notes$TEXT <- txtHolder
#Clean txtHolder from environment
rm(txtHolder)
#Subset all positive notes
results <- notes[inc,]
nrow(results)
## [1] 4158
#Subset negatives
negatives <- notes[!inc,]
nrow(negatives)
## [1] 1181
results$COHORT <- rep(1, each = nrow(results))
negatives$COHORT <- rep(0, each = nrow(negatives))
strictResults <- rbind(results, negatives)
#Initial subset
#write.csv(strictResults, file = "strict_regex_MIMIC_results06Nov17.csv", row.names = F)
cat(paste(round(nrow(results)/(nrow(negatives)+nrow(results)), 2), '%'))
## 0.78 %