Analysis

## Latest Dataset of NeuroNER Predictions
dat <- read.csv("~/nqf_caregivers/data/20180607_EOL_data_ICU.csv", header = T, stringsAsFactors = F)
dim(dat)
## [1] 10250    57
## Note: Notes had been logged by multiple Care providers, we will reintroduce those annotations

## Load Labeled Note Data for NQF Caremeasure Cohort (From NOTEEVENTS table)
tmp <- read.csv("~/nqf_caregivers/data/note_labels_over75.csv", header = T, stringsAsFactors = F)
dim(tmp)
## [1] 11575    25
## Keep only TEXT and ROW_ID from tmp
tmp <- tmp[ ,c("ROW_ID", "TEXT")]

## Inner join
dat <- merge(tmp, dat, by = "TEXT")

## Clean tmp
rm(tmp)

## Check column names
colnames(dat)
##  [1] "TEXT"                 "ROW_ID.x"             "SUBJECT_ID"          
##  [4] "HADM_ID"              "ROW_ID.y"             "CHARTDATE"           
##  [7] "CHARTTIME"            "STORETIME"            "CATEGORY"            
## [10] "DESCRIPTION"          "CGID"                 "ISERROR"             
## [13] "ADMITTIME"            "DISCHTIME"            "DEATHTIME"           
## [16] "ADMISSION_TYPE"       "ADMISSION_LOCATION"   "DISCHARGE_LOCATION"  
## [19] "INSURANCE"            "LANGUAGE"             "RELIGION"            
## [22] "MARITAL_STATUS"       "ETHNICITY"            "EDREGTIME"           
## [25] "EDOUTTIME"            "DIAGNOSIS"            "HOSPITAL_EXPIRE_FLAG"
## [28] "HAS_CHARTEVENTS_DATA" "GENDER"               "DOB"                 
## [31] "DOD"                  "DOD_HOSP"             "DOD_SSN"             
## [34] "EXPIRE_FLAG"          "ICUSTAY_ID"           "DBSOURCE"            
## [37] "FIRST_CAREUNIT"       "LAST_CAREUNIT"        "FIRST_WARDID"        
## [40] "LAST_WARDID"          "INTIME"               "OUTTIME"             
## [43] "LOS"                  "AGE"                  "ADMISSION_NUMBER"    
## [46] "DAYS_UNTIL_DEATH"     "TIME_SINCE_ADMIT"     "CGID.1"              
## [49] "HADM_ID.1"            "FAM.machine"          "CIM.machine"         
## [52] "LIM.machine"          "CAR.machine"          "COD.machine"         
## [55] "check.CGID"           "check.dadm_id"        "CIM.or.FAM"          
## [58] "Died.in.Hospital"
## What is HADM_ID.1?
head(table(dat$HADM_ID.1))
## 
##   #N/A 100102 100153 100347 100391 100525 
##     32      8      3     12     15      2
## What is HADM_ID?
head(table(dat$HADM_ID))
## 
## 100102 100153 100347 100391 100525 100575 
##      8      3     12     15      2     11
## #N/A? Clean HADM_ID.1
dat$HADM_ID.1 <- NULL

## What is CGID.1?
head(table(dat$CGID.1))
## 
##  #N/A 14010 14022 14037 14045 14056 
##    32    22     1    93    37    14
## What is CGID
head(table(dat$CGID))
## 
## 14010 14022 14037 14045 14056 14080 
##    22     1    93    37    14     6
## #N/A? Clean CGID.1
dat$CGID.1 <- NULL

## What is check.CGID
head(table(dat$check.CGID))
##     0 
## 11575
## Clean it
dat$check.CGID <- NULL

## What is check.dadm_id?
head(table(dat$check.dadm_id))
##     0 
## 11575
## Clean it
dat$check.dadm_id <- NULL

## Clean column names
dat$ROW_ID.y <- NULL

colnames(dat)[which(colnames(dat) == "ROW_ID.x")] <- "ROW_ID"


## Load CAREGIVERS Table for join on CGID
cg <- read.csv("~/nqf_caregivers/data/mimic/CAREGIVERS.csv", 
               header = T, stringsAsFactors = F)

## Change column name of "NOTEEVENTS.DESCRIPTION" to explicitly mention that it describes the note
colnames(dat)[which(colnames(dat) == "DESCRIPTION")] <- "NOTE_DESCRIPTION"

## Change column name of "CAREGIVERS. DESCRIPTION" to explicitly mention that it describes the careprovider
colnames(cg)[which(colnames(cg) == "DESCRIPTION")] <- "CG_DESCRIPTION"

## Remove ROW_ID from CG
cg$ROW_ID <- NULL

## Remove TEXT
dat$TEXT <- NULL

## Merge to caregivers
dat <- merge(dat, cg, by = "CGID")
dim(dat)
## [1] 11575    54
## Clean CG
rm(cg)

Load Severity of Illness Data

sofa <- read.csv("~/nqf_caregivers/data/sofa.csv", header = T, stringsAsFactors = F)
#oasis <- read.csv("~/nqf_caregivers/data/oasis.csv", header = T, stringsAsFactors = F)
#saps <- read.csv("~/nqf_caregivers/data/saps.csv", header = T, stringsAsFactors = F)

colnames(sofa) <- toupper(colnames(sofa))

dat <- merge(dat, sofa, by = c("SUBJECT_ID", "HADM_ID", "ICUSTAY_ID"))
dim(dat)
## [1] 11575    61
## Clean environment
rm(sofa)

Data Cleaning

## Clean ethnicity to Black/White/Other
dat[(grepl("WHITE|PORTUGUESE", dat$ETHNICITY)),]$ETHNICITY <- "WHITE" 
dat[(grepl("ASIAN", dat$ETHNICITY)),]$ETHNICITY <- "OTHER" 
dat[(grepl("BLACK", dat$ETHNICITY)),]$ETHNICITY <- "BLACK" 
dat[(grepl("HISPANIC", dat$ETHNICITY)),]$ETHNICITY <- "OTHER"
dat[(grepl("MIDDLE|NATIVE|MULTI|DECLINED|UNABLE|OTHER|NOT", dat$ETHNICITY)),]$ETHNICITY <- "OTHER"

## Clean Marital Status to Married, Single, Widowed, Unknown
dat$MARITAL_STATUS[dat$MARITAL_STATUS == ""] <- "UNKNOWN (DEFAULT)"
dat$MARITAL_STATUS[dat$MARITAL_STATUS == "UNKNOWN (DEFAULT)"] <- "UNKNOWN"
dat$MARITAL_STATUS[dat$MARITAL_STATUS == "SEPARATED"] <- "SINGLE"
dat$MARITAL_STATUS[dat$MARITAL_STATUS == "DIVORCED"] <- "SINGLE"

Check Attending and Caremeasure Implementation

temp <- caremeasure_check(dat)
temp <- expire_check(temp)

## Change caremeasure name
colnames(temp)[which(colnames(temp) == "CIM.machine")] <- "NQF"

cat(length(unique(dat$CGID)) - length(unique(temp$CGID)), "Clinicians dropped due to no attending data in notes.\n")
## 4 Clinicians dropped due to no attending data in notes.
cat(length(unique(dat$SUBJECT_ID)) - length(unique(temp$SUBJECT_ID)), "Patients dropped due to no attending data in notes.\n")
## 78 Patients dropped due to no attending data in notes.
cat(length(unique(dat$HADM_ID)) - length(unique(temp$HADM_ID)), "Hospital Admissions dropped due to no attending data in notes.\n")
## 95 Hospital Admissions dropped due to no attending data in notes.

Care Provider Stats

length(unique(temp$CGID))
## [1] 493
test <- temp[!duplicated(temp$CGID), ]

length(test$CG_DESCRIPTION[test$CG_DESCRIPTION == "Attending"])
## [1] 68
#table(test$LABEL)

#table(test$CG_DESCRIPTION)

#table(test$NOTE_DESCRIPTION)

table(test$LABEL, test$CG_DESCRIPTION)
##         
##          Attending Dietitian Read Only Resident/Fellow/PA/NP Respiratory
##   1390           0         0         0                     1           0
##   9596           0         0         0                     1           0
##   eaw            0         0         0                     1           0
##   HMS MS         0         0         0                     1           0
##   MD            64         0         2                    58           0
##   Mds            0         0         0                     1           0
##   MDs            2         0         1                    91           0
##   MDS            0         0         0                     3           0
##   Med St         0         0         0                     1           0
##   MedRes         0         0         0                     1           0
##   MedSt          1         0         0                     0           0
##   ms             0         0         0                     2           0
##   MS             0         0         0                     7           0
##   NP             0         0         0                     2           0
##   PA             0         0         0                     4           0
##   PHD            1         0         0                     0           0
##   RD             0         1         0                     0           0
##   Res            0         0         0                   215           0
##   RF             0         0         0                     1           0
##   Rn             0         0         0                     0           0
##   RN             0         0         2                     1           0
##   RRT            0         0         0                     0           2
##   RTH            0         0         0                     0           1
##   Std            0         0         0                     1           0
##   STD            0         0         0                     1           0
##   Studen         0         0         0                     1           0
##         
##           RN
##   1390     0
##   9596     0
##   eaw      0
##   HMS MS   0
##   MD       0
##   Mds      0
##   MDs      0
##   MDS      0
##   Med St   0
##   MedRes   0
##   MedSt    0
##   ms       0
##   MS       0
##   NP       0
##   PA       0
##   PHD      0
##   RD       0
##   Res      0
##   RF       0
##   Rn       1
##   RN      21
##   RRT      0
##   RTH      0
##   Std      0
##   STD      0
##   Studen   0
#table(test$NOTE_DESCRIPTION, test$CG_DESCRIPTION)

## Work on this one
#table(test$CG_DESCRIPTION, test$NQF)
## Subject_id level
test <- temp[!duplicated(temp$SUBJECT_ID), ]

plotDat(test, "HOSP_DEATH", "FIRST_CAREUNIT", F, "Hospital Expiration by ICU", "First Care Unit", "Proportion")

tmp <- table(test$FIRST_CAREUNIT, test$HOSP_DEATH)
tmp
##        
##           0   1
##   CCU    98  21
##   CSRU   41  14
##   MICU  491 119
##   SICU  124  48
##   TSICU  75  32
pairwiseNominalIndependence(
  as.matrix(tmp), 
  fisher = F, gtest = F, chisq = T, method = "fdr")
##      Comparison p.Chisq p.adj.Chisq
## 1    CCU : CSRU  0.3220       0.632
## 2    CCU : MICU  0.7310       0.856
## 3    CCU : SICU  0.0597       0.149
## 4   CCU : TSICU  0.0439       0.146
## 5   CSRU : MICU  0.3790       0.632
## 6   CSRU : SICU  0.8560       0.856
## 7  CSRU : TSICU  0.6810       0.856
## 8   MICU : SICU  0.0233       0.116
## 9  MICU : TSICU  0.0212       0.116
## 10 SICU : TSICU  0.8240       0.856
boxplot(test$DAYS_UNTIL_DEATH ~ test$FIRST_CAREUNIT, main = "Days Until Death by ICU", xlab = "First Care Unit", ylab = "Days")

tmp <- dud_t_tests(test, combn(unique(test$FIRST_CAREUNIT), 2, simplify = F), "FIRST_CAREUNIT")
tmp
##                 test_pair    p.value adjusted.p signif
## 1    MICU vs. CCU.p.value 0.97264864  0.9726486       
## 2   MICU vs. CSRU.p.value 0.05784444  0.1928148       
## 3   MICU vs. SICU.p.value 0.49850317  0.7121474       
## 4  MICU vs. TSICU.p.value 0.42890853  0.7121474       
## 5    CCU vs. CSRU.p.value 0.07994306  0.1998577       
## 6    CCU vs. SICU.p.value 0.58484436  0.7310555       
## 7   CCU vs. TSICU.p.value 0.49832329  0.7121474       
## 8   CSRU vs. SICU.p.value 0.03649755  0.1824877       
## 9  CSRU vs. TSICU.p.value 0.03393579  0.1824877       
## 10 SICU vs. TSICU.p.value 0.83878003  0.9319778
mean(test[(test$FIRST_CAREUNIT == "CCU"),]$DAYS_UNTIL_DEATH)
## [1] 296.084
mean(test[(test$FIRST_CAREUNIT == "CSRU"),]$DAYS_UNTIL_DEATH)
## [1] 421.6545
mean(test[(test$FIRST_CAREUNIT == "MICU"),]$DAYS_UNTIL_DEATH)
## [1] 294.9082
mean(test[(test$FIRST_CAREUNIT == "SICU"),]$DAYS_UNTIL_DEATH)
## [1] 273.3256
mean(test[(test$FIRST_CAREUNIT == "TSICU"),]$DAYS_UNTIL_DEATH)
## [1] 264.0561
boxplot(test$SOFA ~ test$FIRST_CAREUNIT, main = "Sequential Organ Failure Assessment (SOFA)\nby ICU", xlab = "First Care Unit", ylab = "SOFA")

tmp<- sofa_t_tests(test, combn(unique(test$FIRST_CAREUNIT), 2, simplify = F), "FIRST_CAREUNIT")
tmp
##                 test_pair      p.value   adjusted.p signif
## 1    MICU vs. CCU.p.value 2.251465e-01 0.2814331498       
## 2   MICU vs. CSRU.p.value 4.324493e-01 0.4804992420       
## 3   MICU vs. SICU.p.value 6.539884e-05 0.0006539884    ***
## 4  MICU vs. TSICU.p.value 1.695166e-03 0.0066632998     **
## 5    CCU vs. CSRU.p.value 1.351286e-01 0.2252143717       
## 6    CCU vs. SICU.p.value 1.313185e-01 0.2252143717       
## 7   CCU vs. TSICU.p.value 2.075981e-01 0.2814331498       
## 8   CSRU vs. SICU.p.value 1.998990e-03 0.0066632998     **
## 9  CSRU vs. TSICU.p.value 5.606970e-03 0.0140174245      *
## 10 SICU vs. TSICU.p.value 8.740909e-01 0.8740908542
mean(test[(test$FIRST_CAREUNIT == "CCU"),]$SOFA)
## [1] 4.605042
mean(test[(test$FIRST_CAREUNIT == "CSRU"),]$SOFA)
## [1] 5.290909
mean(test[(test$FIRST_CAREUNIT == "MICU"),]$SOFA)
## [1] 5.009836
mean(test[(test$FIRST_CAREUNIT == "SICU"),]$SOFA)
## [1] 4.040698
mean(test[(test$FIRST_CAREUNIT == "TSICU"),]$SOFA)
## [1] 4.093458
plotDat(test, "NQF", "FIRST_CAREUNIT", F, "Care Measure Implementation", "First Care Unit", "Proportion")

tmp <- table(test$FIRST_CAREUNIT, test$NQF)
tmp
##        
##           0   1
##   CCU    60  59
##   CSRU   43  12
##   MICU  291 319
##   SICU  107  65
##   TSICU  70  37
pairwiseNominalIndependence(
  as.matrix(tmp), 
  fisher = F, gtest = F, chisq = T, method = "fdr")
##      Comparison  p.Chisq p.adj.Chisq
## 1    CCU : CSRU 9.72e-04    0.002650
## 2    CCU : MICU 6.59e-01    0.679000
## 3    CCU : SICU 6.03e-02    0.086100
## 4   CCU : TSICU 3.21e-02    0.064200
## 5   CSRU : MICU 2.81e-05    0.000281
## 6   CSRU : SICU 4.40e-02    0.073300
## 7  CSRU : TSICU 1.35e-01    0.169000
## 8   MICU : SICU 1.06e-03    0.002650
## 9  MICU : TSICU 1.05e-03    0.002650
## 10 SICU : TSICU 6.79e-01    0.679000

Pt Stats

length(unique(temp$SUBJECT_ID))
## [1] 1063
test <- temp[!duplicated(temp$SUBJECT_ID), ]

summary(test$AGE)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   75.01   80.12   84.33   84.29   88.60   91.40
sd(temp$AGE)
## [1] 5.210141
table(test$GENDER)
## 
##   F   M 
## 545 518
table(test$ETHNICITY)
## 
## BLACK OTHER WHITE 
##    72   116   875
table(test$MARITAL_STATUS)
## 
## MARRIED  SINGLE UNKNOWN WIDOWED 
##     444     201      54     364
table(test$FIRST_CAREUNIT)
## 
##   CCU  CSRU  MICU  SICU TSICU 
##   119    55   610   172   107
## Clinical

summary(test$SOFA)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.00    3.00    4.00    4.73    6.00   18.00
sd(test$SOFA)
## [1] 2.955016