Analysis
## Latest Dataset of NeuroNER Predictions
dat <- read.csv("~/nqf_caregivers/data/20180607_EOL_data_ICU.csv", header = T, stringsAsFactors = F)
dim(dat)
## [1] 10250 57
## Note: Notes had been logged by multiple Care providers, we will reintroduce those annotations
## Load Labeled Note Data for NQF Caremeasure Cohort (From NOTEEVENTS table)
tmp <- read.csv("~/nqf_caregivers/data/note_labels_over75.csv", header = T, stringsAsFactors = F)
dim(tmp)
## [1] 11575 25
## Keep only TEXT and ROW_ID from tmp
tmp <- tmp[ ,c("ROW_ID", "TEXT")]
## Inner join
dat <- merge(tmp, dat, by = "TEXT")
## Clean tmp
rm(tmp)
## Check column names
colnames(dat)
## [1] "TEXT" "ROW_ID.x" "SUBJECT_ID"
## [4] "HADM_ID" "ROW_ID.y" "CHARTDATE"
## [7] "CHARTTIME" "STORETIME" "CATEGORY"
## [10] "DESCRIPTION" "CGID" "ISERROR"
## [13] "ADMITTIME" "DISCHTIME" "DEATHTIME"
## [16] "ADMISSION_TYPE" "ADMISSION_LOCATION" "DISCHARGE_LOCATION"
## [19] "INSURANCE" "LANGUAGE" "RELIGION"
## [22] "MARITAL_STATUS" "ETHNICITY" "EDREGTIME"
## [25] "EDOUTTIME" "DIAGNOSIS" "HOSPITAL_EXPIRE_FLAG"
## [28] "HAS_CHARTEVENTS_DATA" "GENDER" "DOB"
## [31] "DOD" "DOD_HOSP" "DOD_SSN"
## [34] "EXPIRE_FLAG" "ICUSTAY_ID" "DBSOURCE"
## [37] "FIRST_CAREUNIT" "LAST_CAREUNIT" "FIRST_WARDID"
## [40] "LAST_WARDID" "INTIME" "OUTTIME"
## [43] "LOS" "AGE" "ADMISSION_NUMBER"
## [46] "DAYS_UNTIL_DEATH" "TIME_SINCE_ADMIT" "CGID.1"
## [49] "HADM_ID.1" "FAM.machine" "CIM.machine"
## [52] "LIM.machine" "CAR.machine" "COD.machine"
## [55] "check.CGID" "check.dadm_id" "CIM.or.FAM"
## [58] "Died.in.Hospital"
## What is HADM_ID.1?
head(table(dat$HADM_ID.1))
##
## #N/A 100102 100153 100347 100391 100525
## 32 8 3 12 15 2
## What is HADM_ID?
head(table(dat$HADM_ID))
##
## 100102 100153 100347 100391 100525 100575
## 8 3 12 15 2 11
## #N/A? Clean HADM_ID.1
dat$HADM_ID.1 <- NULL
## What is CGID.1?
head(table(dat$CGID.1))
##
## #N/A 14010 14022 14037 14045 14056
## 32 22 1 93 37 14
## What is CGID
head(table(dat$CGID))
##
## 14010 14022 14037 14045 14056 14080
## 22 1 93 37 14 6
## #N/A? Clean CGID.1
dat$CGID.1 <- NULL
## What is check.CGID
head(table(dat$check.CGID))
## 0
## 11575
## Clean it
dat$check.CGID <- NULL
## What is check.dadm_id?
head(table(dat$check.dadm_id))
## 0
## 11575
## Clean it
dat$check.dadm_id <- NULL
## Clean column names
dat$ROW_ID.y <- NULL
colnames(dat)[which(colnames(dat) == "ROW_ID.x")] <- "ROW_ID"
## Load CAREGIVERS Table for join on CGID
cg <- read.csv("~/nqf_caregivers/data/mimic/CAREGIVERS.csv",
header = T, stringsAsFactors = F)
## Change column name of "NOTEEVENTS.DESCRIPTION" to explicitly mention that it describes the note
colnames(dat)[which(colnames(dat) == "DESCRIPTION")] <- "NOTE_DESCRIPTION"
## Change column name of "CAREGIVERS. DESCRIPTION" to explicitly mention that it describes the careprovider
colnames(cg)[which(colnames(cg) == "DESCRIPTION")] <- "CG_DESCRIPTION"
## Remove ROW_ID from CG
cg$ROW_ID <- NULL
## Remove TEXT
dat$TEXT <- NULL
## Merge to caregivers
dat <- merge(dat, cg, by = "CGID")
dim(dat)
## [1] 11575 54
## Clean CG
rm(cg)
Load Severity of Illness Data
sofa <- read.csv("~/nqf_caregivers/data/sofa.csv", header = T, stringsAsFactors = F)
#oasis <- read.csv("~/nqf_caregivers/data/oasis.csv", header = T, stringsAsFactors = F)
#saps <- read.csv("~/nqf_caregivers/data/saps.csv", header = T, stringsAsFactors = F)
colnames(sofa) <- toupper(colnames(sofa))
dat <- merge(dat, sofa, by = c("SUBJECT_ID", "HADM_ID", "ICUSTAY_ID"))
dim(dat)
## [1] 11575 61
## Clean environment
rm(sofa)
Data Cleaning
## Clean ethnicity to Black/White/Other
dat[(grepl("WHITE|PORTUGUESE", dat$ETHNICITY)),]$ETHNICITY <- "WHITE"
dat[(grepl("ASIAN", dat$ETHNICITY)),]$ETHNICITY <- "OTHER"
dat[(grepl("BLACK", dat$ETHNICITY)),]$ETHNICITY <- "BLACK"
dat[(grepl("HISPANIC", dat$ETHNICITY)),]$ETHNICITY <- "OTHER"
dat[(grepl("MIDDLE|NATIVE|MULTI|DECLINED|UNABLE|OTHER|NOT", dat$ETHNICITY)),]$ETHNICITY <- "OTHER"
## Clean Marital Status to Married, Single, Widowed, Unknown
dat$MARITAL_STATUS[dat$MARITAL_STATUS == ""] <- "UNKNOWN (DEFAULT)"
dat$MARITAL_STATUS[dat$MARITAL_STATUS == "UNKNOWN (DEFAULT)"] <- "UNKNOWN"
dat$MARITAL_STATUS[dat$MARITAL_STATUS == "SEPARATED"] <- "SINGLE"
dat$MARITAL_STATUS[dat$MARITAL_STATUS == "DIVORCED"] <- "SINGLE"
Check Attending and Caremeasure Implementation
temp <- caremeasure_check(dat)
temp <- expire_check(temp)
## Change caremeasure name
colnames(temp)[which(colnames(temp) == "CIM.machine")] <- "NQF"
cat(length(unique(dat$CGID)) - length(unique(temp$CGID)), "Clinicians dropped due to no attending data in notes.\n")
## 4 Clinicians dropped due to no attending data in notes.
cat(length(unique(dat$SUBJECT_ID)) - length(unique(temp$SUBJECT_ID)), "Patients dropped due to no attending data in notes.\n")
## 78 Patients dropped due to no attending data in notes.
cat(length(unique(dat$HADM_ID)) - length(unique(temp$HADM_ID)), "Hospital Admissions dropped due to no attending data in notes.\n")
## 95 Hospital Admissions dropped due to no attending data in notes.
Care Provider Stats
length(unique(temp$CGID))
## [1] 493
test <- temp[!duplicated(temp$CGID), ]
length(test$CG_DESCRIPTION[test$CG_DESCRIPTION == "Attending"])
## [1] 68
#table(test$LABEL)
#table(test$CG_DESCRIPTION)
#table(test$NOTE_DESCRIPTION)
table(test$LABEL, test$CG_DESCRIPTION)
##
## Attending Dietitian Read Only Resident/Fellow/PA/NP Respiratory
## 1390 0 0 0 1 0
## 9596 0 0 0 1 0
## eaw 0 0 0 1 0
## HMS MS 0 0 0 1 0
## MD 64 0 2 58 0
## Mds 0 0 0 1 0
## MDs 2 0 1 91 0
## MDS 0 0 0 3 0
## Med St 0 0 0 1 0
## MedRes 0 0 0 1 0
## MedSt 1 0 0 0 0
## ms 0 0 0 2 0
## MS 0 0 0 7 0
## NP 0 0 0 2 0
## PA 0 0 0 4 0
## PHD 1 0 0 0 0
## RD 0 1 0 0 0
## Res 0 0 0 215 0
## RF 0 0 0 1 0
## Rn 0 0 0 0 0
## RN 0 0 2 1 0
## RRT 0 0 0 0 2
## RTH 0 0 0 0 1
## Std 0 0 0 1 0
## STD 0 0 0 1 0
## Studen 0 0 0 1 0
##
## RN
## 1390 0
## 9596 0
## eaw 0
## HMS MS 0
## MD 0
## Mds 0
## MDs 0
## MDS 0
## Med St 0
## MedRes 0
## MedSt 0
## ms 0
## MS 0
## NP 0
## PA 0
## PHD 0
## RD 0
## Res 0
## RF 0
## Rn 1
## RN 21
## RRT 0
## RTH 0
## Std 0
## STD 0
## Studen 0
#table(test$NOTE_DESCRIPTION, test$CG_DESCRIPTION)
## Work on this one
#table(test$CG_DESCRIPTION, test$NQF)
## Subject_id level
test <- temp[!duplicated(temp$SUBJECT_ID), ]
plotDat(test, "HOSP_DEATH", "FIRST_CAREUNIT", F, "Hospital Expiration by ICU", "First Care Unit", "Proportion")

tmp <- table(test$FIRST_CAREUNIT, test$HOSP_DEATH)
tmp
##
## 0 1
## CCU 98 21
## CSRU 41 14
## MICU 491 119
## SICU 124 48
## TSICU 75 32
pairwiseNominalIndependence(
as.matrix(tmp),
fisher = F, gtest = F, chisq = T, method = "fdr")
## Comparison p.Chisq p.adj.Chisq
## 1 CCU : CSRU 0.3220 0.632
## 2 CCU : MICU 0.7310 0.856
## 3 CCU : SICU 0.0597 0.149
## 4 CCU : TSICU 0.0439 0.146
## 5 CSRU : MICU 0.3790 0.632
## 6 CSRU : SICU 0.8560 0.856
## 7 CSRU : TSICU 0.6810 0.856
## 8 MICU : SICU 0.0233 0.116
## 9 MICU : TSICU 0.0212 0.116
## 10 SICU : TSICU 0.8240 0.856
boxplot(test$DAYS_UNTIL_DEATH ~ test$FIRST_CAREUNIT, main = "Days Until Death by ICU", xlab = "First Care Unit", ylab = "Days")

tmp <- dud_t_tests(test, combn(unique(test$FIRST_CAREUNIT), 2, simplify = F), "FIRST_CAREUNIT")
tmp
## test_pair p.value adjusted.p signif
## 1 MICU vs. CCU.p.value 0.97264864 0.9726486
## 2 MICU vs. CSRU.p.value 0.05784444 0.1928148
## 3 MICU vs. SICU.p.value 0.49850317 0.7121474
## 4 MICU vs. TSICU.p.value 0.42890853 0.7121474
## 5 CCU vs. CSRU.p.value 0.07994306 0.1998577
## 6 CCU vs. SICU.p.value 0.58484436 0.7310555
## 7 CCU vs. TSICU.p.value 0.49832329 0.7121474
## 8 CSRU vs. SICU.p.value 0.03649755 0.1824877
## 9 CSRU vs. TSICU.p.value 0.03393579 0.1824877
## 10 SICU vs. TSICU.p.value 0.83878003 0.9319778
mean(test[(test$FIRST_CAREUNIT == "CCU"),]$DAYS_UNTIL_DEATH)
## [1] 296.084
mean(test[(test$FIRST_CAREUNIT == "CSRU"),]$DAYS_UNTIL_DEATH)
## [1] 421.6545
mean(test[(test$FIRST_CAREUNIT == "MICU"),]$DAYS_UNTIL_DEATH)
## [1] 294.9082
mean(test[(test$FIRST_CAREUNIT == "SICU"),]$DAYS_UNTIL_DEATH)
## [1] 273.3256
mean(test[(test$FIRST_CAREUNIT == "TSICU"),]$DAYS_UNTIL_DEATH)
## [1] 264.0561
boxplot(test$SOFA ~ test$FIRST_CAREUNIT, main = "Sequential Organ Failure Assessment (SOFA)\nby ICU", xlab = "First Care Unit", ylab = "SOFA")

tmp<- sofa_t_tests(test, combn(unique(test$FIRST_CAREUNIT), 2, simplify = F), "FIRST_CAREUNIT")
tmp
## test_pair p.value adjusted.p signif
## 1 MICU vs. CCU.p.value 2.251465e-01 0.2814331498
## 2 MICU vs. CSRU.p.value 4.324493e-01 0.4804992420
## 3 MICU vs. SICU.p.value 6.539884e-05 0.0006539884 ***
## 4 MICU vs. TSICU.p.value 1.695166e-03 0.0066632998 **
## 5 CCU vs. CSRU.p.value 1.351286e-01 0.2252143717
## 6 CCU vs. SICU.p.value 1.313185e-01 0.2252143717
## 7 CCU vs. TSICU.p.value 2.075981e-01 0.2814331498
## 8 CSRU vs. SICU.p.value 1.998990e-03 0.0066632998 **
## 9 CSRU vs. TSICU.p.value 5.606970e-03 0.0140174245 *
## 10 SICU vs. TSICU.p.value 8.740909e-01 0.8740908542
mean(test[(test$FIRST_CAREUNIT == "CCU"),]$SOFA)
## [1] 4.605042
mean(test[(test$FIRST_CAREUNIT == "CSRU"),]$SOFA)
## [1] 5.290909
mean(test[(test$FIRST_CAREUNIT == "MICU"),]$SOFA)
## [1] 5.009836
mean(test[(test$FIRST_CAREUNIT == "SICU"),]$SOFA)
## [1] 4.040698
mean(test[(test$FIRST_CAREUNIT == "TSICU"),]$SOFA)
## [1] 4.093458
plotDat(test, "NQF", "FIRST_CAREUNIT", F, "Care Measure Implementation", "First Care Unit", "Proportion")

tmp <- table(test$FIRST_CAREUNIT, test$NQF)
tmp
##
## 0 1
## CCU 60 59
## CSRU 43 12
## MICU 291 319
## SICU 107 65
## TSICU 70 37
pairwiseNominalIndependence(
as.matrix(tmp),
fisher = F, gtest = F, chisq = T, method = "fdr")
## Comparison p.Chisq p.adj.Chisq
## 1 CCU : CSRU 9.72e-04 0.002650
## 2 CCU : MICU 6.59e-01 0.679000
## 3 CCU : SICU 6.03e-02 0.086100
## 4 CCU : TSICU 3.21e-02 0.064200
## 5 CSRU : MICU 2.81e-05 0.000281
## 6 CSRU : SICU 4.40e-02 0.073300
## 7 CSRU : TSICU 1.35e-01 0.169000
## 8 MICU : SICU 1.06e-03 0.002650
## 9 MICU : TSICU 1.05e-03 0.002650
## 10 SICU : TSICU 6.79e-01 0.679000
Pt Stats
length(unique(temp$SUBJECT_ID))
## [1] 1063
test <- temp[!duplicated(temp$SUBJECT_ID), ]
summary(test$AGE)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 75.01 80.12 84.33 84.29 88.60 91.40
sd(temp$AGE)
## [1] 5.210141
table(test$GENDER)
##
## F M
## 545 518
table(test$ETHNICITY)
##
## BLACK OTHER WHITE
## 72 116 875
table(test$MARITAL_STATUS)
##
## MARRIED SINGLE UNKNOWN WIDOWED
## 444 201 54 364
table(test$FIRST_CAREUNIT)
##
## CCU CSRU MICU SICU TSICU
## 119 55 610 172 107
## Clinical
summary(test$SOFA)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.00 3.00 4.00 4.73 6.00 18.00
sd(test$SOFA)
## [1] 2.955016