library(openxlsx)
library(tidyverse)
library(Hmisc)
library(lubridate)
library(naniar)
Import and preprocess
# import data
dat <- read.xlsx("rough.xlsx")
# replace "NULL"s with NA. This automatically converts columns to numeric
library(naniar)
dat <- replace_with_na(dat, replace = list(ICU_Admit_1 = "NULL",
ICU_Discharge_1 = "NULL",
ICU_Admit_2 = "NULL",
ICU_Discharge_2 = "NULL"))
# positions of the columns that you want converted
# to datetime
posofint <- c(4, 5, 6, 7)
# two step process:
# step 1: define anonymous function to convert to Date Time using openxlsx
# Step 2: apply the anonymous function to columns of interest using lapply
convert_dtm <- function(x) convertToDateTime(x, origin = "1900-01-01")
dat[posofint] <- lapply(dat[posofint], convert_dtm)
# positions of the columns that you want converted
# to date
posofint2 <- c(2, 3)
# Step one: define anonymous function
# Step two: apply anonymous function to certain columns alone.
convert_dt <- function(x) convertToDate(x, origin = "1900-01-01")
dat[posofint2] <- lapply(dat[posofint2], convert_dt)
# remove clutter
rm(posofint, posofint2, convert_dt, convert_dtm)
# change names
names(dat) <- c("mrn", "hadm", "hdis", "icuadm1", "icudis1", "icuadm2", "icudis2", "disp")
# add inputsortorder
dat$inpsort <- 1:nrow(dat)
# View data, first five rows
head(dat, n = 5)
## mrn hadm hdis icuadm1 icudis1
## 1 123 2019-02-27 2019-03-14 2019-02-27 14:43:00 2019-03-04 15:15:00
## 2 124 2018-04-20 2018-04-22 <NA> <NA>
## 3 125 2018-04-20 2018-04-22 2018-04-20 18:21:00 2018-04-23 00:01:00
## 4 126 2018-04-20 2018-04-24 2018-04-20 18:01:00 2018-04-21 21:07:00
## 5 127 2018-04-20 2018-04-21 <NA> <NA>
## icuadm2 icudis2 disp
## 1 2019-03-13 04:47:00 2019-03-13 21:19:00 CHRONIC
## 2 <NA> <NA> HOME
## 3 <NA> <NA> AGAINST MEDICAL ADVICE
## 4 <NA> <NA> REHAB
## 5 <NA> <NA> HOME
## inpsort
## 1 1
## 2 2
## 3 3
## 4 4
## 5 5
Understand the data. This is useful to do before you start writing code.
# how many patients are missing a hospital admission?
# length(which(is.na(dat$hadm)))
# how many patients are missing a hospital discharge? .
# length(which(is.na(dat$hdis)))
# All patients have a hospital admission and discharge date.
# how many patients are missing a second ICU admission?
# length(which(is.na(dat$icuadm2)))
# how many patients are missing a second ICU discharge? Hopefully should be the same.
# length(which(is.na(dat$icudis2)))
# so, almost 99.67 % of the data does not have a second ICU admission-discharge.
# round((length(which(is.na(dat$icudis2)))/nrow(dat))*100, 2)
# how many patients are missing the first ICU admission?
# length(which(is.na(dat$icuadm1)))
# how many patients are missing a first ICU discharge? Hopefully should be the same.
# length(which(is.na(dat$icudis1)))
# so, almost 86.23 % of the data does not have a first ICU admission-discharge.
# round((length(which(is.na(dat$icudis1)))/nrow(dat))*100, 2)
# Are there any patients that have an ICU admission date but not a icu discharge date?
# which(is.na(dat$icuadm1) & !(is.na(dat$icudis1)))
# which(!(is.na(dat$icuadm1)) & is.na(dat$icudis1))
# which(is.na(dat$icuadm2) & !(is.na(dat$icudis2)))
# which(!(is.na(dat$icuadm2)) & is.na(dat$icudis2))
# seem to be none. Good.
# Are there any cases that have the details for the second ICU admission
# but not for the first ICU admission?
# sum(!(is.na(dat$icuadm2)) & is.na(dat$icuadm1))
# seem to be none. Good.
# Add indicator columns that tell you how many times each
# patient appears in the data, and which appearance the row
# we are looking at currently represents.
# which visit are we currently looking at?
dat2 <- dat %>%
arrange(mrn, hadm) %>%
group_by(mrn) %>%
mutate(which_visit = row_number())
# how many visits has a patient had?
dat2 <- dat2 %>%
group_by(mrn) %>%
mutate(howm_seen = max(row_number())) %>%
arrange(mrn)
# rearrange back in input order.
dat2 <- dat2 %>%
select(inpsort, everything()) %>%
arrange(inpsort)
# overwrite dat with dat2
dat <- dat2
# remove clutter
rm(dat2)
Replace the icuadm1, icudis1, icuadm2, icudis2 columns with date columns rather than date-time.
convert_dtm_to_dt <- function(x) lubridate::as_date(x)
dat[c(5:8)] <- lapply(dat[c(5:8)], convert_dtm_to_dt )
rm(convert_dtm_to_dt)
Write function:
floorweeks <- function(x) {
# set the start and end date for
# hospital weeks using
# hospital admission date and
# hospital discharge date.
x <- x %>%
mutate(hadm.wfloor = lubridate::floor_date(hadm, "week"),
hdis.wfloor = lubridate::floor_date(hdis, "week"))
hweeks <- seq(x$hadm.wfloor, x$hdis.wfloor, by = 7)
hweeks # all the weeks that the patient was in the hospital for.
if (is.na(x$icuadm1)) { # if the patient never went to the ICU
# prepare output list
mrn <- x$mrn
visitnum <- x$which_visit
howmvisits <- x$howm_seen
floorweeks <- hweeks # the weeks spent in floor is just the weeks between hadm and hdis
dimensions <- length(floorweeks ) # the number of individual weeks the patient was in the floor for.
outlist <- list(mrn = mrn,
visitnum = visitnum,
floorweeks = floorweeks,
length = dimensions)
return(outlist)
} # end condition where patient never went to ICU
if (!(is.na(x$icuadm1)) & (is.na(x$icuadm2))) { # if the patient went to the
# ICU exactly once.
# First ICU stint.
# get the start and end date for ICU weeks
# using icuadm1 and icudis1
x <- x %>% mutate(icuadm1.wfloor = lubridate::floor_date(icuadm1, "week"),
icudis1.wfloor = lubridate::floor_date(icudis1, "week"))
icuweeks <- seq(x$icuadm1.wfloor, x$icudis1.wfloor, by = 7)
icuweeks # all the weeks that the patient was in the ICU for during theirr first ICU stint.
#weeks in floor:
floorweeks <- hweeks[!(hweeks %in% icuweeks)] # weeks spent in floor is the weeks in hweeks (hospital weeks)
# that do not appear in icuweeks (weeks spent in ICU in the first ICU stint)
# prepare output list
mrn <- x$mrn
visitnum <- x$which_visit
howmvisits <- x$howm_seen
floorweeks <- floorweeks # the weeks spent in floor has been calculated above.
dimensions <- length(floorweeks ) # the number of individual weeks the patient was in the floor for.
outlist <- list(mrn = mrn,
visitnum = visitnum,
floorweeks = floorweeks,
length = dimensions)
return(outlist)
} # end condition where patient goes to ICU exactly once.
if (!(is.na(x$icuadm1)) & !(is.na(x$icuadm2))) { # if patient went to ICU twice
# First ICU stint.
# get the start and end date for ICU weeks
# using icuadm1 and icudis1
x <- x %>% mutate(icuadm1.wfloor = lubridate::floor_date(icuadm1, "week"),
icudis1.wfloor = lubridate::floor_date(icudis1, "week"))
icuweeks <- seq(x$icuadm1.wfloor, x$icudis1.wfloor, by = 7)
icuweeks # all the weeks that the patient was in the ICU for during theirr first ICU stint.
# Second ICU stint.
# get the start and end date for ICU weeks
# using icuadm2 and icudis2
x <- x %>% mutate(icuadm2.wfloor = lubridate::floor_date(icuadm2, "week"),
icudis2.wfloor = lubridate::floor_date(icudis2, "week"))
icuweeks2 <- seq(x$icuadm2.wfloor, x$icudis2.wfloor, by = 7)
icuweeks2 # all the weeks that the patient was in the ICU for during their second ICU stint.
# combine ICU stints.
icucombine <- c(icuweeks, icuweeks2)
#weeks in floor:
floorweeks <- hweeks[!(hweeks %in% icucombine)] # weeks spent in floor is the weeks in hweeks (hospital weeks)
# that do not appear in the combination of icuweeks
# (weeks spent in ICU in the first ICU stint)
# or icuweeks2 (weeks spent in ICU in second ICU stint)
# prepare output list
mrn <- x$mrn
visitnum <- x$which_visit
howmvisits <- x$howm_seen
floorweeks <- floorweeks # the weeks spent in floor has been calculated above.
dimensions <- length(floorweeks ) # the number of individual weeks the patient was in the floor for.
outlist <- list(mrn = mrn,
visitnum = visitnum,
floorweeks = floorweeks,
length = dimensions)
return(outlist)
} # end patient going to ICU twice.
} # end function
To use function on any row of dat-
floorweeks(dat[14, ]) # try it on row 14 of dat.
## $mrn
## [1] 136
##
## $visitnum
## [1] 1
##
## $floorweeks
## [1] "2018-04-15" "2018-04-22" "2018-04-29"
##
## $length
## [1] 3
To use function on multiple rows of dat - try on the first 20 rows of dat.
rows <- 20
dats <- dat[1:rows, ]
outputlist <- vector(mode = "list", length = rows)
for (i in 1:rows) {
datcurr <- dats[i, ] # set input data to one row at a time.
# apply the function floorweeks() to each row, and
# save the output to the corresponding element of outputlist
outputlist[[i]] <- floorweeks(datcurr) # get a list of mrn, visitnum, floorweeks, and length
}
str(outputlist)
## List of 20
## $ :List of 4
## ..$ mrn : num 123
## ..$ visitnum : int 1
## ..$ floorweeks: 'Date' num(0)
## ..$ length : int 0
## $ :List of 4
## ..$ mrn : num 124
## ..$ visitnum : int 1
## ..$ floorweeks: Date[1:2], format: "2018-04-15" ...
## ..$ length : int 2
## $ :List of 4
## ..$ mrn : num 125
## ..$ visitnum : int 1
## ..$ floorweeks: 'Date' num(0)
## ..$ length : int 0
## $ :List of 4
## ..$ mrn : num 126
## ..$ visitnum : int 1
## ..$ floorweeks: Date[1:1], format: "2018-04-22"
## ..$ length : int 1
## $ :List of 4
## ..$ mrn : num 127
## ..$ visitnum : int 1
## ..$ floorweeks: Date[1:1], format: "2018-04-15"
## ..$ length : int 1
## $ :List of 4
## ..$ mrn : num 128
## ..$ visitnum : int 1
## ..$ floorweeks: Date[1:1], format: "2018-04-15"
## ..$ length : int 1
## $ :List of 4
## ..$ mrn : num 129
## ..$ visitnum : int 1
## ..$ floorweeks: 'Date' num(0)
## ..$ length : int 0
## $ :List of 4
## ..$ mrn : num 130
## ..$ visitnum : int 1
## ..$ floorweeks: Date[1:1], format: "2018-05-06"
## ..$ length : int 1
## $ :List of 4
## ..$ mrn : num 131
## ..$ visitnum : int 1
## ..$ floorweeks: 'Date' num(0)
## ..$ length : int 0
## $ :List of 4
## ..$ mrn : num 132
## ..$ visitnum : int 1
## ..$ floorweeks: Date[1:4], format: "2018-04-15" ...
## ..$ length : int 4
## $ :List of 4
## ..$ mrn : num 133
## ..$ visitnum : int 1
## ..$ floorweeks: 'Date' num(0)
## ..$ length : int 0
## $ :List of 4
## ..$ mrn : num 134
## ..$ visitnum : int 1
## ..$ floorweeks: Date[1:1], format: "2018-05-20"
## ..$ length : int 1
## $ :List of 4
## ..$ mrn : num 135
## ..$ visitnum : int 1
## ..$ floorweeks: Date[1:1], format: "2018-04-29"
## ..$ length : int 1
## $ :List of 4
## ..$ mrn : num 136
## ..$ visitnum : int 1
## ..$ floorweeks: Date[1:3], format: "2018-04-15" ...
## ..$ length : int 3
## $ :List of 4
## ..$ mrn : num 137
## ..$ visitnum : int 1
## ..$ floorweeks: 'Date' num(0)
## ..$ length : int 0
## $ :List of 4
## ..$ mrn : num 138
## ..$ visitnum : int 1
## ..$ floorweeks: Date[1:3], format: "2018-04-15" ...
## ..$ length : int 3
## $ :List of 4
## ..$ mrn : num 139
## ..$ visitnum : int 1
## ..$ floorweeks: Date[1:2], format: "2018-04-15" ...
## ..$ length : int 2
## $ :List of 4
## ..$ mrn : num 140
## ..$ visitnum : int 1
## ..$ floorweeks: 'Date' num(0)
## ..$ length : int 0
## $ :List of 4
## ..$ mrn : num 141
## ..$ visitnum : int 1
## ..$ floorweeks: Date[1:1], format: "2018-04-22"
## ..$ length : int 1
## $ :List of 4
## ..$ mrn : num 142
## ..$ visitnum : int 1
## ..$ floorweeks: Date[1:1], format: "2018-04-22"
## ..$ length : int 1
# Save output list
# saveRDS(outputlist, file = "calculated")
For next steps, unbind list and aggregate by weeks to get counts of patients in floor each week.