Problem Definition
Estimate survival time of patients with primary biliary cirrhosis (PBC), a rare autoimmune liver disease.
Data
Patient genealogy
Time-series: Bilirubin levels measured over time
Data ETL
dat <- pbc2
dat <- data.frame(lapply(dat, # Using Base R functions
function(x) if(is.numeric(x)) round(x, 3) else x))
PBC Data
Patient genealogy
pbc2.id %>% dplyr::group_by(status) %>% dplyr::summarise(mean_life = mean(years, na.rm = T))
#mean(pbc2.id[pbc2.id$status2 == 1, ]$years, na.rm = T)
pbc2.id$status2 <- as.numeric(pbc2.id$status2)
fit <- survfit(Surv(years, status2) ~ 1, data = pbc2.id)
plot(fit, xlab = "Years",
ylab = "Survival probability")

NA
NA
Bilirubin levels measured over time for each patient
lattice::xyplot(log(serBilir) ~ year | id, group = id,
data = pbc2[pbc2$id %in% c(1:10),],
xlab = "Years", ylab = expression(log("serBilir")), col = 1, type = "b")

Correlation of bilirubin level measurements to patients’ state (dead/alive)
Notice patient-to-patient variations
xyplot(log(serBilir) ~ year | as.factor(status2),
group = id, data = pbc2,
panel = function(x, y, ...){panel.xyplot(x, y, type = "l", col = 1, ...)
panel.loess(x, y, col = 2, lwd = 2)}, main = "",
ylab = expression(log ("serBilir")), xlab = "Years")

Model for Forecast Survival Time
tic("Build survival model...")
use_linear_model <- 1
use_nonlinear_model <- 0
if(use_linear_model){
## Linear model
lmeFit <- lme(log(serBilir) ~ drug*(year), random = ~ year|id, data = pbc2)
coxFit <- coxph(Surv(years, status2) ~ drug + prothrombin, data = pbc2.id, x = TRUE)
jmFit <- jointModel(lmeFit, coxFit, timeVar = "year", method = "weibull-AFT-GH")
}
if(use_nonlinear_model){
# Nonlinear model
lmeFit <-
lme(log(serBilir) ~ drug*(year + I(year^2)), random = ~ year + I(year^2)|id, data = pbc2)
coxFit <- coxph(Surv(years, status2) ~ drug + prothrombin, data = pbc2.id, x = TRUE)
jmFit <- jointModel(lmeFit, coxFit, timeVar = "year", method = "weibull-AFT-GH")
}
toc()
Build survival model...: 52.82 sec elapsed
Validation
Two methods:
Backtesting: Go back in time and assume that time is the present date, Forecast failure time and compare if they agreed with the real ground-truth failure time.
Forward validation: Implement predictions in the field and do post-mortem analysis
Get predictions for an example patient
bRunPred <- 1
bSavePlot <- 0
patient_id <- 2
tic()
ND <- pbc2[pbc2$id == patient_id, ]
survPreds <- vector("list", nrow(ND))
for (i in 1:nrow(ND)) {
set.seed(123)
survPreds[[i]] <- survfitJM(jmFit, newdata = ND[1:i, ])
}
par(mfrow = c(2, 2), oma = c(0, 2, 0, 2))
for (i in c(1,3,5,7)) {
plot(survPreds[[i]], estimator = "median", conf.int = TRUE,
include.y = TRUE, main = paste("Follow-up time:",
round(survPreds[[i]]$last.time, 1)))
}

toc()
4.07 sec elapsed
Result
Patient 180-day risk table
bGeneratePreds <- 1
if(bGeneratePreds){
ids_to_predict <- pbc2.id %>% dplyr::filter(status2 == 0) %>% dplyr::select(id) %>% pull()
surv_preds <- list()
for(i in ids_to_predict){
tmp_pred <-pbc2 %>% dplyr::filter(id == i)
last_time <- unique(tmp_pred$years)
tmp <- JM::survfitJM(jmFit, newdata = tmp_pred, last.time = last_time, idVar = "id", survTimes = last_time + 180/365)
pSurv_180day <- unlist(tmp[1]$summaries)[2]
surv_preds <- surv_preds %>% bind_rows(data.frame("id" = i, "psurv_180day" = pSurv_180day, "years" = last_time))
}
surv_preds <- surv_preds %>% dplyr::arrange(pSurv_180day)
us_cities <- maps::us.cities %>% dplyr::select(name, lat, long)
set.seed(123)
us_cities <- us_cities[sample(nrow(surv_preds)),]
surv_preds <- cbind(surv_preds, us_cities)
write.csv(surv_preds, "surv_preds.csv", row.names = F)
}
surv_preds$psurv_180day <- round(surv_preds$psurv_180day, digits = 4)
surv_preds$risk <- "Medium"
surv_preds$risk[surv_preds$psurv_180day > 0.8] <- "Low"
surv_preds$risk[surv_preds$psurv_180day <= 0.6] <- "High"
DT::datatable(surv_preds, options = list(scrollX = TRUE), class = 'cell-border stripe', rownames = FALSE)
NA
Patient 180-day Risk + Locations Info on a Map
surv_preds$id <- paste0("Patient ", surv_preds$id)
surv_preds_sf <- sf::st_as_sf(
x = surv_preds,
coords = c("long", "lat"), # columns with coordinates
crs = 'ESRI:102003' # coordinate reference system code for eastings/northings
) %>% sf::st_transform(crs = 'ESRI:102003') # the coord ref system code for latlong
saveRDS(surv_preds_sf, "surv_preds_sf.RDS")
sch <- surv_preds_sf
map <- sch %>%
leaflet::leaflet() %>%
leaflet::addProviderTiles(providers$OpenStreetMap) %>%
leaflet::addAwesomeMarkers(
popup = ~paste0(
"<h1>", sch$id, "</h1>",
"<table style='width:100%'>",
"<tr>",
"<th>RISK</th>",
"<th>", sch$risk, "</th>",
"</tr>",
"<tr>",
"<tr>",
"<th>pSurvival </th>",
"<th>", sch$psurv_180day, "</th>",
"</tr>",
"<tr>",
"<tr>",
"<th>city</th>",
"<th>", sch$name, "</th>",
"</tr>"
), # end popup()
icon = awesomeIcons(
library = "ion",
icon = ifelse(
test = sch$risk == "High",
yes = "ion-android-star-outline",
no = "ion-android-star-outline"
),
iconColor = "white",
markerColor = ifelse(
test = sch$risk == "High",
yes = "red",
no = ifelse(sch$risk == "Medium", yes = "orange", no = "green")
)
)
) %>% # end addAwesomeMarkers()
leaflet::addMeasure()
map
NA
Patient 180-day Risk + Ambulance Locations on a Map
- Re-distribute Resources (Ambulance + Labor) proactively
us_cities <- maps::us.cities %>% dplyr::select(name, lat, long) %>%
rename(lng = long)
set.seed(125)
ambulance_locations <- us_cities[sample(10),]
ambulance_locations$id <- 1:nrow(ambulance_locations)
ambulance_locations$id <- paste0("A", ambulance_locations$id)
map %>%
addPulseMarkers(
lng = ambulance_locations$lng,
lat = ambulance_locations$lat,
popup = ~paste0(
"<h1>", ambulance_locations$id, "</h1>"),
icon = makePulseIcon()
)
NA
---
title: 'Prognostics: Primary Biliary Cirrhosis (PBC) of the Liver'
author: "Satish Iyengar"
date: "4/13/2022"
output: html_notebook

---

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```

```{r install_packages, include=FALSE}
library(JM)
library(lattice)
library(DT)
library(tictoc)
library(survival)
library(dplyr)
library(leaflet)
library(flexdashboard)
library(sf)
library(leaflet.extras)

```


## Problem Definition
- **Estimate survival time of patients with primary biliary cirrhosis (PBC), a rare autoimmune liver disease.**

- Data
- Patient genealogy 
- Time-series: Bilirubin levels measured over time


## Data ETL


```{r, include=TRUE}
dat <- pbc2
dat <- data.frame(lapply(dat,    # Using Base R functions
                         function(x) if(is.numeric(x)) round(x, 3) else x))

```


## PBC Data

### Patient genealogy

```{r view_data, include=TRUE, message=FALSE, warning=FALSE, echo=FALSE}
DT::datatable(pbc2.id, options = list(scrollX = TRUE), class = 'cell-border stripe', rownames = FALSE)

```

```{r}

pbc2.id %>% dplyr::group_by(status) %>% dplyr::summarise(mean_life = mean(years, na.rm = T))
#mean(pbc2.id[pbc2.id$status2 == 1, ]$years, na.rm = T)

```



```{r}
pbc2.id$status2 <- as.numeric(pbc2.id$status2)
fit <- survfit(Surv(years, status2) ~ 1, data = pbc2.id)
plot(fit, xlab = "Years", 
     ylab = "Survival probability")


```


### Bilirubin levels measured over time for each patient


```{r}
lattice::xyplot(log(serBilir) ~ year | id, group = id, 
                data = pbc2[pbc2$id %in% c(1:10),], 
                xlab = "Years", ylab = expression(log("serBilir")), col = 1, type = "b")

```

### Correlation of bilirubin level measurements to patients' state (dead/alive)

Notice patient-to-patient variations

```{r}
xyplot(log(serBilir) ~ year | as.factor(status2),
       group = id, data = pbc2, 
       panel = function(x, y, ...){panel.xyplot(x, y, type = "l", col = 1, ...)
         panel.loess(x, y, col = 2, lwd = 2)}, main = "", 
       ylab = expression(log ("serBilir")), xlab = "Years") 

```






## Model for Forecast Survival Time


```{r build_joint_model, include=TRUE}
tic("Build survival model...")
use_linear_model <- 1
use_nonlinear_model <- 0

if(use_linear_model){
  ## Linear model
  lmeFit <- lme(log(serBilir) ~ drug*(year), random = ~ year|id, data = pbc2)
  coxFit <- coxph(Surv(years, status2) ~ drug + prothrombin, data = pbc2.id, x = TRUE)
  jmFit <- jointModel(lmeFit, coxFit, timeVar = "year", method = "weibull-AFT-GH")
}

if(use_nonlinear_model){
  # Nonlinear model
  lmeFit <-
    lme(log(serBilir) ~ drug*(year + I(year^2)), random = ~ year + I(year^2)|id, data = pbc2)
  coxFit <- coxph(Surv(years, status2) ~ drug + prothrombin, data = pbc2.id, x = TRUE)
  jmFit <- jointModel(lmeFit, coxFit, timeVar = "year", method = "weibull-AFT-GH")
}

toc()

```

## Validation

Two methods: 

1. Backtesting: Go back in time and assume that time is the present date, Forecast failure time and compare if they agreed with the real ground-truth failure time.

2. Forward validation: Implement predictions in the field and do post-mortem analysis



## Get predictions for an example patient

```{r}
bRunPred <- 1
bSavePlot <- 0
patient_id <- 2

tic()


ND <- pbc2[pbc2$id == patient_id, ]
survPreds <- vector("list", nrow(ND))
for (i in 1:nrow(ND)) {
  set.seed(123)
  survPreds[[i]] <- survfitJM(jmFit, newdata = ND[1:i, ])
}

par(mfrow = c(2, 2), oma = c(0, 2, 0, 2))
for (i in c(1,3,5,7)) {
  plot(survPreds[[i]], estimator = "median", conf.int = TRUE,
       include.y = TRUE, main = paste("Follow-up time:",
                                      round(survPreds[[i]]$last.time, 1)))
}

toc()

```


## Result
### Patient 180-day risk table

```{r}
bGeneratePreds <- 1

if(bGeneratePreds){
  ids_to_predict <- pbc2.id %>% dplyr::filter(status2 == 0) %>% dplyr::select(id) %>% pull()
  surv_preds <- list()
  for(i in ids_to_predict){
    tmp_pred <-pbc2 %>% dplyr::filter(id == i) 
    last_time <- unique(tmp_pred$years)
    tmp <- JM::survfitJM(jmFit, newdata = tmp_pred, last.time = last_time, idVar = "id", survTimes = last_time + 180/365)
    pSurv_180day <- unlist(tmp[1]$summaries)[2]
    surv_preds <- surv_preds %>% bind_rows(data.frame("id" = i, "psurv_180day" = pSurv_180day, "years" = last_time))
  }
  
  surv_preds <- surv_preds %>% dplyr::arrange(pSurv_180day)
  us_cities <- maps::us.cities %>% dplyr::select(name, lat, long)
  set.seed(123)
  us_cities <- us_cities[sample(nrow(surv_preds)),]
  surv_preds <- cbind(surv_preds, us_cities) 
  write.csv(surv_preds, "surv_preds.csv", row.names = F)
}
```


```{r}
surv_preds$psurv_180day <- round(surv_preds$psurv_180day, digits = 4)
surv_preds$risk <- "Medium"
surv_preds$risk[surv_preds$psurv_180day > 0.8] <- "Low"
surv_preds$risk[surv_preds$psurv_180day <= 0.6] <- "High"

DT::datatable(surv_preds, options = list(scrollX = TRUE), class = 'cell-border stripe', rownames = FALSE)

```



## Patient 180-day Risk + Locations Info on a Map

```{r}

surv_preds$id <- paste0("Patient ", surv_preds$id)


surv_preds_sf <- sf::st_as_sf(
  x = surv_preds,
  coords = c("long", "lat"),  # columns with coordinates
  crs = 'ESRI:102003'	 # coordinate reference system code for eastings/northings
) %>% sf::st_transform(crs = 'ESRI:102003')  # the coord ref system code for latlong

saveRDS(surv_preds_sf, "surv_preds_sf.RDS")


```



```{r, width = 8, height = 8, warning=FALSE}
sch <- surv_preds_sf
map <- sch %>% 
  leaflet::leaflet() %>%
  leaflet::addProviderTiles(providers$OpenStreetMap) %>% 
  leaflet::addAwesomeMarkers(
    popup = ~paste0(
      "<h1>", sch$id, "</h1>",
      
      "<table style='width:100%'>",
      
      "<tr>",
      "<th>RISK</th>",
      "<th>", sch$risk, "</th>",
      "</tr>",
      
      "<tr>",
      "<tr>",
      "<th>pSurvival </th>",
      "<th>", sch$psurv_180day, "</th>",
      "</tr>",
      
      "<tr>",
      "<tr>",
      "<th>city</th>",
      "<th>", sch$name, "</th>",
      "</tr>"
    ),  # end popup()
    icon = awesomeIcons(
      library = "ion",
      icon = ifelse(
        test = sch$risk == "High", 
        yes = "ion-android-star-outline",
        no = "ion-android-star-outline"
      ),
      iconColor = "white",
      markerColor = ifelse(
        test = sch$risk == "High", 
        yes = "red",
        no = ifelse(sch$risk == "Medium", yes = "orange", no = "green")
      )
    )
  ) %>%   # end addAwesomeMarkers()
  leaflet::addMeasure()



```



```{r, fig.width=9, fig.align='center', warning=FALSE}
map

```




## Patient 180-day Risk + Ambulance Locations on a Map

- **Re-distribute Resources (Ambulance + Labor) proactively**

```{r, fig.width=9, fig.align='center', warning=FALSE}
us_cities <- maps::us.cities %>% dplyr::select(name, lat, long) %>% 
  rename(lng = long)

set.seed(125)
ambulance_locations <- us_cities[sample(10),]
ambulance_locations$id <- 1:nrow(ambulance_locations)
ambulance_locations$id <- paste0("A", ambulance_locations$id)

map %>%
  addPulseMarkers(
    lng = ambulance_locations$lng,
    lat = ambulance_locations$lat,
    popup = ~paste0(
      "<h1>", ambulance_locations$id, "</h1>"),
    
    icon = makePulseIcon()
  )

```
