youtube video link with explanations for these examples https://youtu.be/k-IN6HBhgq4

Easiest way to create Sankey diagram from your own data. Data driven Sankey chart is easy to create, customise and save it in your chart.

Watch this vides if you need to created a sankey chart and put it in your presentation or your documents and you need to create the chart from your own data frame.

Create data labels with numbers and percentages at each node.

Just define the columns which you want to use, customise the colours using the themes and then save it as an image file on your desktop.

library(highcharter)

#Create data which can be used for Sankey

set.seed(111)

t1 <- sample(x = c("Hosp A", "Hosp B", "Hosp C") , size = 100, replace=TRUE)
t2 <- sample(x = c("Male", "Female")   , size = 100, replace=TRUE)
t3 <- sample(x = c("Survived", "Died") , size = 100, replace=TRUE)

d <- data.frame(cbind(t1,t2,t3))

names(d) <- c('Hospital', 'Gender', 'Outcome')
head(d)
##   Hospital Gender  Outcome
## 1   Hosp B   Male Survived
## 2   Hosp C Female Survived
## 3   Hosp C Female     Died
## 4   Hosp C   Male     Died
## 5   Hosp A   Male     Died
## 6   Hosp C Female Survived
# First Sankey diagram
hchart(data_to_sankey(d), "sankey", name = "Hospital and Gender based Outcomes")
# Second Sankey diagram

dataForSankey <- d%>%dplyr::select(Hospital, Outcome)
hchart(data_to_sankey(dataForSankey), "sankey", name = "Hospital based Outcomes")
# Third Sankey Diagram

dataForSankey <- d%>%dplyr::select(Gender, Outcome)
hchart(data_to_sankey(dataForSankey), "sankey", name = "Gender based Outcomes")
pl <- hchart(data_to_sankey(d), "sankey", name = "Patient Outcomes")
pl
pl%>%
hc_title(text= "Sankey Diagram") %>%
hc_subtitle(text= "Hospital and Gender based outcomes")  %>%
hc_caption(text = "<b>This is my caption at the bottom.<b>")%>%
hc_add_theme(hc_theme_economist())
pl
## save to html
library(htmlwidgets)
#htmlwidgets::saveWidget(widget = pl, file = "D:\\tmp\\map.html")
## save html to png

#webshot::webshot(url = "D:\\tmp\\map.html",
#                 file = "D:\\tmp\\map.png" )
#png('D:\\tmp\\hc.png', width = 800,height = 400)
#print(pl)
#dev.off()

How to add value labels and percentages to the Sankey

This can be achieved by using the following workaround

# Create a data frame with data
set.seed(111)

t1 <- sample(x = c("Hosp A", "Hosp B", "Hosp C") , size = 100, replace=TRUE)
t2 <- sample(x = c("Male", "Female")   , size = 100, replace=TRUE)
t3 <- sample(x = c("Survived", "Died") , size = 100, replace=TRUE)

d <- data.frame(cbind(t1,t2,t3))

names(d) <- c('Hospital', 'Gender', 'Outcome')
head(d)
##   Hospital Gender  Outcome
## 1   Hosp B   Male Survived
## 2   Hosp C Female Survived
## 3   Hosp C Female     Died
## 4   Hosp C   Male     Died
## 5   Hosp A   Male     Died
## 6   Hosp C Female Survived
# Get the count of the Hospitals and create a new column which
# has the name as well as the count in it.
d1 <- d%>%
      dplyr::group_by(Hospital)%>%
      dplyr::tally()%>%
      dplyr::mutate(perc = n/sum(n))%>%
dplyr::mutate(HospitalNew = paste(Hospital, n, '(', round(perc* 100,1) , '%)'))%>%
dplyr::select(-n, - perc)
d1
## # A tibble: 3 x 2
##   Hospital HospitalNew      
##   <chr>    <chr>            
## 1 Hosp A   Hosp A 38 ( 38 %)
## 2 Hosp B   Hosp B 30 ( 30 %)
## 3 Hosp C   Hosp C 32 ( 32 %)
dMain <- merge (d, d1, by  = 'Hospital')


# Get the count of the Gender and create a new column which
# has the Gender as well as the count in it.
d2 <- d%>%
  dplyr::group_by(Gender)%>%
  dplyr::tally()%>%
   dplyr::mutate(perc = n/sum(n))%>%
   dplyr::mutate(GenderNew = paste(Gender, n, '(', round(perc* 100,1) , '%)'))%>%
  dplyr::select(-n, - perc)


dMain <- merge (dMain, d2, by  = 'Gender')

# Get the count of the Outcome  and create a new column which
# has the Outcome  as well as the count in it.

d3 <- d%>%
  dplyr::group_by(Outcome)%>%
  dplyr::tally()%>%
  dplyr::mutate(perc = n/sum(n))%>%
dplyr::mutate(OutcomeNew = paste(Outcome, n, '(', round(perc* 100,1) , '%)'))%>%
dplyr::select(-n, - perc)

dMain <- merge (dMain, d3, by  = 'Outcome')

dFinal <- dMain%>%
          dplyr::select(HospitalNew,GenderNew,OutcomeNew  )

dFinal
##           HospitalNew         GenderNew          OutcomeNew
## 1   Hosp A 38 ( 38 %) Female 50 ( 50 %)     Died 58 ( 58 %)
## 2   Hosp A 38 ( 38 %) Female 50 ( 50 %)     Died 58 ( 58 %)
## 3   Hosp B 30 ( 30 %) Female 50 ( 50 %)     Died 58 ( 58 %)
## 4   Hosp B 30 ( 30 %) Female 50 ( 50 %)     Died 58 ( 58 %)
## 5   Hosp A 38 ( 38 %) Female 50 ( 50 %)     Died 58 ( 58 %)
## 6   Hosp C 32 ( 32 %) Female 50 ( 50 %)     Died 58 ( 58 %)
## 7   Hosp B 30 ( 30 %) Female 50 ( 50 %)     Died 58 ( 58 %)
## 8   Hosp A 38 ( 38 %) Female 50 ( 50 %)     Died 58 ( 58 %)
## 9   Hosp A 38 ( 38 %) Female 50 ( 50 %)     Died 58 ( 58 %)
## 10  Hosp B 30 ( 30 %) Female 50 ( 50 %)     Died 58 ( 58 %)
## 11  Hosp A 38 ( 38 %) Female 50 ( 50 %)     Died 58 ( 58 %)
## 12  Hosp A 38 ( 38 %) Female 50 ( 50 %)     Died 58 ( 58 %)
## 13  Hosp B 30 ( 30 %) Female 50 ( 50 %)     Died 58 ( 58 %)
## 14  Hosp A 38 ( 38 %) Female 50 ( 50 %)     Died 58 ( 58 %)
## 15  Hosp A 38 ( 38 %)   Male 50 ( 50 %)     Died 58 ( 58 %)
## 16  Hosp B 30 ( 30 %) Female 50 ( 50 %)     Died 58 ( 58 %)
## 17  Hosp A 38 ( 38 %)   Male 50 ( 50 %)     Died 58 ( 58 %)
## 18  Hosp B 30 ( 30 %) Female 50 ( 50 %)     Died 58 ( 58 %)
## 19  Hosp A 38 ( 38 %)   Male 50 ( 50 %)     Died 58 ( 58 %)
## 20  Hosp A 38 ( 38 %)   Male 50 ( 50 %)     Died 58 ( 58 %)
## 21  Hosp A 38 ( 38 %) Female 50 ( 50 %)     Died 58 ( 58 %)
## 22  Hosp A 38 ( 38 %) Female 50 ( 50 %)     Died 58 ( 58 %)
## 23  Hosp C 32 ( 32 %) Female 50 ( 50 %)     Died 58 ( 58 %)
## 24  Hosp C 32 ( 32 %) Female 50 ( 50 %)     Died 58 ( 58 %)
## 25  Hosp C 32 ( 32 %) Female 50 ( 50 %)     Died 58 ( 58 %)
## 26  Hosp C 32 ( 32 %) Female 50 ( 50 %)     Died 58 ( 58 %)
## 27  Hosp C 32 ( 32 %) Female 50 ( 50 %)     Died 58 ( 58 %)
## 28  Hosp B 30 ( 30 %)   Male 50 ( 50 %)     Died 58 ( 58 %)
## 29  Hosp A 38 ( 38 %)   Male 50 ( 50 %)     Died 58 ( 58 %)
## 30  Hosp B 30 ( 30 %)   Male 50 ( 50 %)     Died 58 ( 58 %)
## 31  Hosp B 30 ( 30 %) Female 50 ( 50 %)     Died 58 ( 58 %)
## 32  Hosp B 30 ( 30 %)   Male 50 ( 50 %)     Died 58 ( 58 %)
## 33  Hosp A 38 ( 38 %)   Male 50 ( 50 %)     Died 58 ( 58 %)
## 34  Hosp A 38 ( 38 %)   Male 50 ( 50 %)     Died 58 ( 58 %)
## 35  Hosp B 30 ( 30 %) Female 50 ( 50 %)     Died 58 ( 58 %)
## 36  Hosp C 32 ( 32 %) Female 50 ( 50 %)     Died 58 ( 58 %)
## 37  Hosp C 32 ( 32 %)   Male 50 ( 50 %)     Died 58 ( 58 %)
## 38  Hosp C 32 ( 32 %) Female 50 ( 50 %)     Died 58 ( 58 %)
## 39  Hosp B 30 ( 30 %)   Male 50 ( 50 %)     Died 58 ( 58 %)
## 40  Hosp C 32 ( 32 %) Female 50 ( 50 %)     Died 58 ( 58 %)
## 41  Hosp A 38 ( 38 %)   Male 50 ( 50 %)     Died 58 ( 58 %)
## 42  Hosp C 32 ( 32 %)   Male 50 ( 50 %)     Died 58 ( 58 %)
## 43  Hosp A 38 ( 38 %)   Male 50 ( 50 %)     Died 58 ( 58 %)
## 44  Hosp C 32 ( 32 %) Female 50 ( 50 %)     Died 58 ( 58 %)
## 45  Hosp B 30 ( 30 %)   Male 50 ( 50 %)     Died 58 ( 58 %)
## 46  Hosp C 32 ( 32 %)   Male 50 ( 50 %)     Died 58 ( 58 %)
## 47  Hosp C 32 ( 32 %)   Male 50 ( 50 %)     Died 58 ( 58 %)
## 48  Hosp A 38 ( 38 %)   Male 50 ( 50 %)     Died 58 ( 58 %)
## 49  Hosp A 38 ( 38 %)   Male 50 ( 50 %)     Died 58 ( 58 %)
## 50  Hosp B 30 ( 30 %)   Male 50 ( 50 %)     Died 58 ( 58 %)
## 51  Hosp A 38 ( 38 %)   Male 50 ( 50 %)     Died 58 ( 58 %)
## 52  Hosp A 38 ( 38 %)   Male 50 ( 50 %)     Died 58 ( 58 %)
## 53  Hosp C 32 ( 32 %)   Male 50 ( 50 %)     Died 58 ( 58 %)
## 54  Hosp B 30 ( 30 %)   Male 50 ( 50 %)     Died 58 ( 58 %)
## 55  Hosp C 32 ( 32 %)   Male 50 ( 50 %)     Died 58 ( 58 %)
## 56  Hosp A 38 ( 38 %)   Male 50 ( 50 %)     Died 58 ( 58 %)
## 57  Hosp C 32 ( 32 %)   Male 50 ( 50 %)     Died 58 ( 58 %)
## 58  Hosp C 32 ( 32 %)   Male 50 ( 50 %)     Died 58 ( 58 %)
## 59  Hosp C 32 ( 32 %) Female 50 ( 50 %) Survived 42 ( 42 %)
## 60  Hosp C 32 ( 32 %) Female 50 ( 50 %) Survived 42 ( 42 %)
## 61  Hosp A 38 ( 38 %)   Male 50 ( 50 %) Survived 42 ( 42 %)
## 62  Hosp B 30 ( 30 %) Female 50 ( 50 %) Survived 42 ( 42 %)
## 63  Hosp A 38 ( 38 %) Female 50 ( 50 %) Survived 42 ( 42 %)
## 64  Hosp B 30 ( 30 %) Female 50 ( 50 %) Survived 42 ( 42 %)
## 65  Hosp A 38 ( 38 %) Female 50 ( 50 %) Survived 42 ( 42 %)
## 66  Hosp C 32 ( 32 %) Female 50 ( 50 %) Survived 42 ( 42 %)
## 67  Hosp A 38 ( 38 %) Female 50 ( 50 %) Survived 42 ( 42 %)
## 68  Hosp A 38 ( 38 %)   Male 50 ( 50 %) Survived 42 ( 42 %)
## 69  Hosp C 32 ( 32 %) Female 50 ( 50 %) Survived 42 ( 42 %)
## 70  Hosp A 38 ( 38 %)   Male 50 ( 50 %) Survived 42 ( 42 %)
## 71  Hosp A 38 ( 38 %) Female 50 ( 50 %) Survived 42 ( 42 %)
## 72  Hosp C 32 ( 32 %) Female 50 ( 50 %) Survived 42 ( 42 %)
## 73  Hosp C 32 ( 32 %) Female 50 ( 50 %) Survived 42 ( 42 %)
## 74  Hosp B 30 ( 30 %) Female 50 ( 50 %) Survived 42 ( 42 %)
## 75  Hosp C 32 ( 32 %) Female 50 ( 50 %) Survived 42 ( 42 %)
## 76  Hosp B 30 ( 30 %)   Male 50 ( 50 %) Survived 42 ( 42 %)
## 77  Hosp A 38 ( 38 %)   Male 50 ( 50 %) Survived 42 ( 42 %)
## 78  Hosp B 30 ( 30 %) Female 50 ( 50 %) Survived 42 ( 42 %)
## 79  Hosp A 38 ( 38 %) Female 50 ( 50 %) Survived 42 ( 42 %)
## 80  Hosp C 32 ( 32 %) Female 50 ( 50 %) Survived 42 ( 42 %)
## 81  Hosp A 38 ( 38 %)   Male 50 ( 50 %) Survived 42 ( 42 %)
## 82  Hosp B 30 ( 30 %)   Male 50 ( 50 %) Survived 42 ( 42 %)
## 83  Hosp B 30 ( 30 %)   Male 50 ( 50 %) Survived 42 ( 42 %)
## 84  Hosp B 30 ( 30 %)   Male 50 ( 50 %) Survived 42 ( 42 %)
## 85  Hosp C 32 ( 32 %) Female 50 ( 50 %) Survived 42 ( 42 %)
## 86  Hosp C 32 ( 32 %) Female 50 ( 50 %) Survived 42 ( 42 %)
## 87  Hosp A 38 ( 38 %) Female 50 ( 50 %) Survived 42 ( 42 %)
## 88  Hosp A 38 ( 38 %)   Male 50 ( 50 %) Survived 42 ( 42 %)
## 89  Hosp C 32 ( 32 %)   Male 50 ( 50 %) Survived 42 ( 42 %)
## 90  Hosp B 30 ( 30 %)   Male 50 ( 50 %) Survived 42 ( 42 %)
## 91  Hosp B 30 ( 30 %)   Male 50 ( 50 %) Survived 42 ( 42 %)
## 92  Hosp B 30 ( 30 %)   Male 50 ( 50 %) Survived 42 ( 42 %)
## 93  Hosp C 32 ( 32 %) Female 50 ( 50 %) Survived 42 ( 42 %)
## 94  Hosp B 30 ( 30 %)   Male 50 ( 50 %) Survived 42 ( 42 %)
## 95  Hosp B 30 ( 30 %)   Male 50 ( 50 %) Survived 42 ( 42 %)
## 96  Hosp A 38 ( 38 %)   Male 50 ( 50 %) Survived 42 ( 42 %)
## 97  Hosp B 30 ( 30 %)   Male 50 ( 50 %) Survived 42 ( 42 %)
## 98  Hosp C 32 ( 32 %)   Male 50 ( 50 %) Survived 42 ( 42 %)
## 99  Hosp C 32 ( 32 %)   Male 50 ( 50 %) Survived 42 ( 42 %)
## 100 Hosp A 38 ( 38 %)   Male 50 ( 50 %) Survived 42 ( 42 %)
hchart(data_to_sankey(dFinal), "sankey", name = "Patient Outcomes")%>%
  hc_title(text= "Sankey Diagram with value labels") %>%
  hc_subtitle(text= "Hospital and Gender based outcomes")

How to reduce the data labels size

This can be achieved by using the hc_plotOptions. In this example we have reduced the font size to 5 px.

# Create a data frame with data
set.seed(111)

t1 <- sample(x = c("Hosp A", "Hosp B", "Hosp C") , size = 100, replace=TRUE)
t2 <- sample(x = c("Male", "Female")   , size = 100, replace=TRUE)
t3 <- sample(x = c("Survived", "Died") , size = 100, replace=TRUE)

d <- data.frame(cbind(t1,t2,t3))

names(d) <- c('Hospital', 'Gender', 'Outcome')
#head(d)


# Get the count of the Hospitals and create a new column which
# has the name as well as the count in it.
d1 <- d%>%
      dplyr::group_by(Hospital)%>%
      dplyr::tally()%>%
      dplyr::mutate(HospitalNew = paste(Hospital, n))




dMain <- merge (d, d1, by  = 'Hospital')


# Get the count of the Gender and create a new column which
# has the Gender as well as the count in it.
d2 <- d%>%
  dplyr::group_by(Gender)%>%
  dplyr::tally()%>%
  dplyr::mutate(GenderNew = paste(Gender, n))

dMain <- merge (dMain, d2, by  = 'Gender')

# Get the count of the Outcome  and create a new column which
# has the Outcome  as well as the count in it.

d3 <- d%>%
  dplyr::group_by(Outcome)%>%
  dplyr::tally()%>%
  dplyr::mutate(OutcomeNew = paste(Outcome, n))

dMain <- merge (dMain, d3, by  = 'Outcome')

dFinal <- dMain%>%
          dplyr::select(HospitalNew,GenderNew,OutcomeNew  )


hchart(data_to_sankey(dFinal), "sankey", name = "Patient Outcomes")%>%
  hc_title(text= "Sankey Diagram with value labels with size reduced to 5px ") %>%
  hc_subtitle(text= "Hospital and Gender based outcomes")%>%
hc_plotOptions(series = list(dataLabels = list( style = list(fontSize = "5px"))))

More control on the data labels

# Create a data frame with data
set.seed(111)

t1 <- sample(x = c("Hosp A", "Hosp B", "Hosp C") , size = 100, replace=TRUE)
t2 <- sample(x = c("Male", "Female")   , size = 100, replace=TRUE)
t3 <- sample(x = c("Survived", "Died") , size = 100, replace=TRUE)

d <- data.frame(cbind(t1,t2,t3))

names(d) <- c('Hospital', 'Gender', 'Outcome')
#head(d)


# Get the count of the Hospitals and create a new column which
# has the name as well as the count in it.
d1 <- d%>%
      dplyr::group_by(Hospital)%>%
      dplyr::tally()%>%
      dplyr::mutate(HospitalNew = paste(Hospital, n))




dMain <- merge (d, d1, by  = 'Hospital')


# Get the count of the Gender and create a new column which
# has the Gender as well as the count in it.
d2 <- d%>%
  dplyr::group_by(Gender)%>%
  dplyr::tally()%>%
  dplyr::mutate(GenderNew = paste(Gender, n))

dMain <- merge (dMain, d2, by  = 'Gender')

# Get the count of the Outcome  and create a new column which
# has the Outcome  as well as the count in it.

d3 <- d%>%
  dplyr::group_by(Outcome)%>%
  dplyr::tally()%>%
  dplyr::mutate(OutcomeNew = paste(Outcome, n))

dMain <- merge (dMain, d3, by  = 'Outcome')

dFinal <- dMain%>%
          dplyr::select(HospitalNew,GenderNew,OutcomeNew  )




hchart(data_to_sankey(dFinal), "sankey", name = "Patient Outcomes")%>%
  hc_title(text= "Sankey Diagram with value labels ") %>%
  hc_subtitle(text= "Hospital and Gender based outcomes")%>%
  hc_plotOptions(series = list(dataLabels = list( style = list(fontSize = "12px" , color = "red")
                                                  , backgroundColor = "yellow"
                                                  , borderRadius = 10
                                                  , borderWidth = 1
                                                  , borderColor = 'blue'
                                                  , padding = 5
                                                  , shadow = FALSE
                                                  
   )))

How to control the colour of the nodes manually

set.seed(111)

t1 <- sample(x = c("Hosp A", "Hosp B", "Hosp C") , size = 100, replace=TRUE)
t2 <- sample(x = c("Male", "Female")   , size = 100, replace=TRUE)
t3 <- sample(x = c("Survived", "Died") , size = 100, replace=TRUE)

d <- data.frame(cbind(t1,t2,t3))

names(d) <- c('Hospital', 'Gender', 'Outcome')


df <- data_to_sankey(d)


# Change the node colours as shown below. Give the node name of each of your nodes and set the colour manually

plot <- highchart() %>%
     hc_add_series(data = data_to_sankey(d), type = "sankey"
                ,   hcaes(from = from, to = to, weight = weight)
                ,   nodes = list(list(id = 'Hosp A'  , color = "green")
                                 ,list(id = 'Female' , color = "red")
                                 ,list(id = 'Male'   , color = "blue")
                                ,list(id = "Hosp B"  , color = "red")
                                ,list(id = "Hosp C"  , color = "orange")
                                ,list(id = "Died"    , color = "black")
                                ))



plot