In this post, I present a time series visualization of hospital stays
using the R package healthyR
, based on the SyntheaMass
dataset.
suppressPackageStartupMessages(
{
library(healthyR)
library(timetk)
library(dplyr)
library(purrr)
}
)
# Get Length of Stay Data
data_tbl <- read.csv("/Users/nnthieu/SyntheaData/SyntheaCovid19/encounters.csv")
names(data_tbl)
## [1] "Id" "START" "STOP"
## [4] "PATIENT" "ORGANIZATION" "PROVIDER"
## [7] "PAYER" "ENCOUNTERCLASS" "CODE"
## [10] "DESCRIPTION" "BASE_ENCOUNTER_COST" "TOTAL_CLAIM_COST"
## [13] "PAYER_COVERAGE" "REASONCODE" "REASONDESCRIPTION"
# create column named 'length_of_stay' by 'STOP' minus by 'START'
df_tbl <- data_tbl |> filter(ENCOUNTERCLASS=="inpatient") |>
mutate(
length_of_stay = as.numeric(difftime(STOP, START, units = "days"))
) |> mutate(STOP = as.Date(STOP)) |>
select(STOP, length_of_stay)
head(df_tbl)
## STOP length_of_stay
## 1 2020-03-05 15
## 2 2019-10-27 1
## 3 2020-05-14 1
## 4 2010-12-18 1
## 5 2010-12-24 1
## 6 2019-07-07 1
df_tbl <- df_tbl |>
summarise_by_time(
.date_var = STOP,
.by = "day",
visits = mean(length_of_stay, na.rm = TRUE)
) |>
filter_by_time(
.date_var = STOP,
.start_date = "2012",
.end_date = "2019"
) |>
set_names("Date", "Values")
head(df_tbl)
## # A tibble: 6 × 2
## Date Values
## <date> <dbl>
## 1 2012-01-01 1
## 2 2012-01-02 1
## 3 2012-01-03 1.33
## 4 2012-01-04 1
## 5 2012-01-05 1
## 6 2012-01-06 1.11
ts_alos_plt(
.data = df_tbl
, .date_col = Date
, .value_col = Values
, .by = "month"
, .interactive = TRUE
)
As demonstrated above, the R package healthyR
can be
used to visualize hospital stays through time series plots.