Introduction

library(openxlsx)
library(tidyverse)
library(Hmisc)
library(lubridate)
library(naniar)

Import and preprocess data

Import and preprocess

# import data
dat <- read.xlsx("rough.xlsx")



# replace "NULL"s with NA. This automatically converts columns to numeric
library(naniar)
dat <- replace_with_na(dat, replace = list(ICU_Admit_1 = "NULL",
                                           ICU_Discharge_1 = "NULL",
                                           ICU_Admit_2 = "NULL",
                                           ICU_Discharge_2 = "NULL"))


# positions of the columns that you want converted 
# to datetime
posofint <- c(4, 5, 6, 7)


# two step process:
# step 1: define anonymous function to convert to Date Time using openxlsx
# Step 2: apply the anonymous function to columns of interest using lapply
convert_dtm <- function(x) convertToDateTime(x, origin = "1900-01-01")
dat[posofint] <- lapply(dat[posofint], convert_dtm)

# positions of the columns that you want converted 
# to date
posofint2 <- c(2, 3)

# Step one: define anonymous function
# Step two: apply anonymous function to certain columns alone.
convert_dt <- function(x) convertToDate(x, origin = "1900-01-01")
dat[posofint2] <- lapply(dat[posofint2], convert_dt)


# remove clutter
rm(posofint, posofint2, convert_dt, convert_dtm)


# change names
names(dat) <- c("mrn", "hadm", "hdis", "icuadm1", "icudis1", "icuadm2", "icudis2", "disp")

# add inputsortorder
dat$inpsort <- 1:nrow(dat)

View data

# View data, first five rows
head(dat, n = 5)
##   mrn       hadm       hdis             icuadm1             icudis1
## 1 123 2019-02-27 2019-03-14 2019-02-27 14:43:00 2019-03-04 15:15:00
## 2 124 2018-04-20 2018-04-22                <NA>                <NA>
## 3 125 2018-04-20 2018-04-22 2018-04-20 18:21:00 2018-04-23 00:01:00
## 4 126 2018-04-20 2018-04-24 2018-04-20 18:01:00 2018-04-21 21:07:00
## 5 127 2018-04-20 2018-04-21                <NA>                <NA>
##               icuadm2             icudis2                      disp
## 1 2019-03-13 04:47:00 2019-03-13 21:19:00 CHRONIC                  
## 2                <NA>                <NA> HOME                     
## 3                <NA>                <NA> AGAINST MEDICAL ADVICE   
## 4                <NA>                <NA> REHAB                    
## 5                <NA>                <NA> HOME                     
##   inpsort
## 1       1
## 2       2
## 3       3
## 4       4
## 5       5

Understand the data. This is useful to do before you start writing code.

# how many patients are missing a hospital admission?
# length(which(is.na(dat$hadm)))
# how many patients are missing a hospital discharge? .
# length(which(is.na(dat$hdis)))
# All patients have a hospital admission and discharge date.



# how many patients are missing a second ICU admission?
# length(which(is.na(dat$icuadm2)))
# how many patients are missing a second ICU discharge? Hopefully should be the same.
# length(which(is.na(dat$icudis2)))
# so, almost 99.67 % of the data does not have a second ICU admission-discharge.
# round((length(which(is.na(dat$icudis2)))/nrow(dat))*100, 2)



# how many patients are missing the first ICU admission?
# length(which(is.na(dat$icuadm1)))
# how many patients are missing a first ICU discharge? Hopefully should be the same.
# length(which(is.na(dat$icudis1)))
#  so, almost 86.23 % of the data does not have a first ICU admission-discharge.
# round((length(which(is.na(dat$icudis1)))/nrow(dat))*100, 2)


# Are there any patients that have an ICU admission date but not a icu discharge date?
# which(is.na(dat$icuadm1) & !(is.na(dat$icudis1)))
# which(!(is.na(dat$icuadm1)) & is.na(dat$icudis1))
# which(is.na(dat$icuadm2) & !(is.na(dat$icudis2)))
# which(!(is.na(dat$icuadm2)) & is.na(dat$icudis2))
# seem to be none. Good.


# Are there any cases that have the details for the second ICU admission
# but not for the first ICU admission?
# sum(!(is.na(dat$icuadm2)) & is.na(dat$icuadm1))
# seem to be none. Good.



# Add indicator columns that tell you how many times each
# patient appears in the data, and which appearance the row
# we are looking at currently represents.

# which visit are we currently looking at?
dat2 <- dat %>% 
        arrange(mrn, hadm) %>%
        group_by(mrn) %>%
        mutate(which_visit = row_number())

# how many visits has a patient had?
dat2 <- dat2 %>%
        group_by(mrn) %>%
        mutate(howm_seen = max(row_number())) %>%
        arrange(mrn)


# rearrange back in input order.
dat2 <- dat2 %>% 
       select(inpsort, everything()) %>% 
       arrange(inpsort)

# overwrite dat with dat2
dat <- dat2

# remove clutter
rm(dat2)

Replace the icuadm1, icudis1, icuadm2, icudis2 columns with date columns rather than date-time.

convert_dtm_to_dt <- function(x) lubridate::as_date(x)
dat[c(5:8)] <- lapply(dat[c(5:8)], convert_dtm_to_dt )
rm(convert_dtm_to_dt)

Function floorweeks()

Write function:

floorweeks <- function(x) {
  

  
# set the start and end date for 
# hospital weeks using 
# hospital admission date and 
# hospital discharge date.
x <- x %>%  
     mutate(hadm.wfloor = lubridate::floor_date(hadm, "week"),
            hdis.wfloor = lubridate::floor_date(hdis, "week"))

hweeks <- seq(x$hadm.wfloor, x$hdis.wfloor, by = 7) 
hweeks # all the weeks that the patient was in the hospital for.


if (is.na(x$icuadm1)) { # if the patient never went to the ICU
  
  # prepare output list
  mrn <- x$mrn
  visitnum <- x$which_visit
  howmvisits <- x$howm_seen
  floorweeks <- hweeks  # the weeks spent in floor is just the weeks between hadm and hdis
  dimensions <- length(floorweeks ) # the number of individual weeks the patient was in the floor for.
  outlist <- list(mrn = mrn, 
                  visitnum = visitnum,
                  floorweeks = floorweeks,
                  length = dimensions)
                  
  return(outlist)
  
                     } # end condition where patient never went to ICU


if (!(is.na(x$icuadm1)) & (is.na(x$icuadm2))) { # if the patient went to the 
                                                # ICU exactly once.


# First ICU stint.
# get the start and end date for ICU weeks 
# using icuadm1 and icudis1

x <- x %>% mutate(icuadm1.wfloor = lubridate::floor_date(icuadm1, "week"),
                  icudis1.wfloor = lubridate::floor_date(icudis1, "week"))

icuweeks <- seq(x$icuadm1.wfloor, x$icudis1.wfloor, by = 7) 
icuweeks # all the weeks that the patient was in the ICU for during theirr first ICU stint.


#weeks in floor:
floorweeks <- hweeks[!(hweeks %in% icuweeks)] # weeks spent in floor is the weeks in hweeks (hospital weeks)
                                              # that do not appear in icuweeks (weeks spent in ICU in the first ICU stint)

  # prepare output list
  mrn <- x$mrn
  visitnum <- x$which_visit
  howmvisits <- x$howm_seen
  floorweeks <- floorweeks # the weeks spent in floor has been calculated above.
  dimensions <- length(floorweeks ) # the number of individual weeks the patient was in the floor for.
  outlist <- list(mrn = mrn, 
                  visitnum = visitnum,
                  floorweeks = floorweeks,
                  length = dimensions)
                  
  return(outlist)

                                            } # end condition where patient goes to ICU exactly once.


if (!(is.na(x$icuadm1)) & !(is.na(x$icuadm2))) { # if patient went to ICU twice
  
  
# First ICU stint.
# get the start and end date for ICU weeks 
# using icuadm1 and icudis1

x <- x %>% mutate(icuadm1.wfloor = lubridate::floor_date(icuadm1, "week"),
                  icudis1.wfloor = lubridate::floor_date(icudis1, "week"))

icuweeks <- seq(x$icuadm1.wfloor, x$icudis1.wfloor, by = 7) 
icuweeks # all the weeks that the patient was in the ICU for during theirr first ICU stint.

  
# Second ICU stint.
# get the start and end date for ICU weeks 
# using icuadm2 and icudis2

x <- x %>% mutate(icuadm2.wfloor = lubridate::floor_date(icuadm2, "week"),
                  icudis2.wfloor = lubridate::floor_date(icudis2, "week"))

icuweeks2 <- seq(x$icuadm2.wfloor, x$icudis2.wfloor, by = 7) 
icuweeks2 # all the weeks that the patient was in the ICU for during their second ICU stint.
 
 
# combine ICU stints.
icucombine <- c(icuweeks, icuweeks2)
  
#weeks in floor:
floorweeks <- hweeks[!(hweeks %in% icucombine)] # weeks spent in floor is the weeks in hweeks (hospital weeks)
                                                # that do not appear in the combination of icuweeks
                                                # (weeks spent in ICU in the first ICU stint)  
                                                # or icuweeks2 (weeks spent in ICU in second ICU stint)
 

  # prepare output list
  mrn <- x$mrn
  visitnum <- x$which_visit
  howmvisits <- x$howm_seen
  floorweeks <- floorweeks # the weeks spent in floor has been calculated above.
  dimensions <- length(floorweeks ) # the number of individual weeks the patient was in the floor for.
  outlist <- list(mrn = mrn, 
                  visitnum = visitnum,
                  floorweeks = floorweeks,
                  length = dimensions)
                  
  return(outlist)

                                               } # end patient going to ICU twice.

  
} # end function

Using the floorweeks() function

To use function on any row of dat-

floorweeks(dat[14, ])   # try it on row 14 of dat.
## $mrn
## [1] 136
## 
## $visitnum
## [1] 1
## 
## $floorweeks
## [1] "2018-04-15" "2018-04-22" "2018-04-29"
## 
## $length
## [1] 3

To use function on multiple rows of dat - try on the first 20 rows of dat.

rows <- 20
dats <- dat[1:rows, ]

outputlist <- vector(mode = "list", length = rows)
for (i in 1:rows) {
        
        datcurr <- dats[i, ] # set input data to one row at a time.
                             # apply the function floorweeks() to each row, and 
                             # save the output to the corresponding element of outputlist
        outputlist[[i]] <- floorweeks(datcurr) # get a list of mrn, visitnum, floorweeks, and length
        
        
}


str(outputlist)
## List of 20
##  $ :List of 4
##   ..$ mrn       : num 123
##   ..$ visitnum  : int 1
##   ..$ floorweeks: 'Date' num(0) 
##   ..$ length    : int 0
##  $ :List of 4
##   ..$ mrn       : num 124
##   ..$ visitnum  : int 1
##   ..$ floorweeks: Date[1:2], format: "2018-04-15" ...
##   ..$ length    : int 2
##  $ :List of 4
##   ..$ mrn       : num 125
##   ..$ visitnum  : int 1
##   ..$ floorweeks: 'Date' num(0) 
##   ..$ length    : int 0
##  $ :List of 4
##   ..$ mrn       : num 126
##   ..$ visitnum  : int 1
##   ..$ floorweeks: Date[1:1], format: "2018-04-22"
##   ..$ length    : int 1
##  $ :List of 4
##   ..$ mrn       : num 127
##   ..$ visitnum  : int 1
##   ..$ floorweeks: Date[1:1], format: "2018-04-15"
##   ..$ length    : int 1
##  $ :List of 4
##   ..$ mrn       : num 128
##   ..$ visitnum  : int 1
##   ..$ floorweeks: Date[1:1], format: "2018-04-15"
##   ..$ length    : int 1
##  $ :List of 4
##   ..$ mrn       : num 129
##   ..$ visitnum  : int 1
##   ..$ floorweeks: 'Date' num(0) 
##   ..$ length    : int 0
##  $ :List of 4
##   ..$ mrn       : num 130
##   ..$ visitnum  : int 1
##   ..$ floorweeks: Date[1:1], format: "2018-05-06"
##   ..$ length    : int 1
##  $ :List of 4
##   ..$ mrn       : num 131
##   ..$ visitnum  : int 1
##   ..$ floorweeks: 'Date' num(0) 
##   ..$ length    : int 0
##  $ :List of 4
##   ..$ mrn       : num 132
##   ..$ visitnum  : int 1
##   ..$ floorweeks: Date[1:4], format: "2018-04-15" ...
##   ..$ length    : int 4
##  $ :List of 4
##   ..$ mrn       : num 133
##   ..$ visitnum  : int 1
##   ..$ floorweeks: 'Date' num(0) 
##   ..$ length    : int 0
##  $ :List of 4
##   ..$ mrn       : num 134
##   ..$ visitnum  : int 1
##   ..$ floorweeks: Date[1:1], format: "2018-05-20"
##   ..$ length    : int 1
##  $ :List of 4
##   ..$ mrn       : num 135
##   ..$ visitnum  : int 1
##   ..$ floorweeks: Date[1:1], format: "2018-04-29"
##   ..$ length    : int 1
##  $ :List of 4
##   ..$ mrn       : num 136
##   ..$ visitnum  : int 1
##   ..$ floorweeks: Date[1:3], format: "2018-04-15" ...
##   ..$ length    : int 3
##  $ :List of 4
##   ..$ mrn       : num 137
##   ..$ visitnum  : int 1
##   ..$ floorweeks: 'Date' num(0) 
##   ..$ length    : int 0
##  $ :List of 4
##   ..$ mrn       : num 138
##   ..$ visitnum  : int 1
##   ..$ floorweeks: Date[1:3], format: "2018-04-15" ...
##   ..$ length    : int 3
##  $ :List of 4
##   ..$ mrn       : num 139
##   ..$ visitnum  : int 1
##   ..$ floorweeks: Date[1:2], format: "2018-04-15" ...
##   ..$ length    : int 2
##  $ :List of 4
##   ..$ mrn       : num 140
##   ..$ visitnum  : int 1
##   ..$ floorweeks: 'Date' num(0) 
##   ..$ length    : int 0
##  $ :List of 4
##   ..$ mrn       : num 141
##   ..$ visitnum  : int 1
##   ..$ floorweeks: Date[1:1], format: "2018-04-22"
##   ..$ length    : int 1
##  $ :List of 4
##   ..$ mrn       : num 142
##   ..$ visitnum  : int 1
##   ..$ floorweeks: Date[1:1], format: "2018-04-22"
##   ..$ length    : int 1
# Save output list
# saveRDS(outputlist, file = "calculated")

For next steps, unbind list and aggregate by weeks to get counts of patients in floor each week.