Youtube Channel: https://www.youtube.com/c/TechAnswers88

Video link https://youtu.be/jvb-xYXSHvI

How to create data for a Funnel Plot. How to transfor the data for creating the Funnel Plot. How to create the Funnel Plot in GGPLOT.

Packages used

library(ggplot2)
library(dplyr)

Prepare Data

We will construct some fictitious covid data. The followind data frame shows the covid cases where the patients had a comorbidity and the deaths for each comorbidity.

comorbidity <- c('asthma'        , 'cardio'  , 'diabetes'    , 'downsyn'    , 'hematologic','immuno'
                 ,'neurological' , 'obesity' , 'pneumopathy' , 'puerperium' , 'renal')


cases <-       c(145, 2768, 2369, 36, 179,289
                 ,198, 168, 44, 318, 14)


deaths <-   c(42, 1069 , 914 ,13, 63,  127
               , 62,  81,  6 ,137 ,7)



df <- data.frame(comorbidity,cases,deaths)

Transform the data for the funnel plot

df2 <- df%>%
  dplyr::mutate(DeathRatebyComorb =  deaths/cases
                , OverallDeathRate = sum(deaths)/sum(cases)
                , se = sqrt( OverallDeathRate * ((1- OverallDeathRate) /cases))
                , lcl95 = OverallDeathRate   - (1.96* se)
                , ucl95 = OverallDeathRate   + (1.96* se)
                , lcl99.7 = OverallDeathRate - (3* se)
                , ucl99.7 = OverallDeathRate + (3* se)

  )


#You can use the following values to create your own confidence intervals.

#Confidence Interval    Z
#  80%              1.282
#  85%              1.440
#  90%              1.645
#  95%              1.960
#  99%              2.576
#  99.5%              2.807
#  99.75%             3
#  99.9%              3.291

Video link https://youtu.be/jvb-xYXSHvI

Create the plot

pl <- ggplot(data = df2, aes(x = cases, group =OverallDeathRate  ))

pl <- pl + geom_smooth(aes(y =lcl95),se = FALSE,linetype ="solid",color = "red", size = 0.5)
pl <- pl + geom_smooth(aes(y =ucl95),se = FALSE, linetype ="solid",color = "red", size = 0.5)

pl <- pl + geom_smooth(aes(y =lcl99.7),se = FALSE, linetype ="solid",color = "blue", size = 0.5)
pl <- pl + geom_smooth(aes(y =ucl99.7),se = FALSE, linetype ="solid",color = "blue", size = 0.5)

pl <- pl + geom_smooth(aes(y =OverallDeathRate),se = FALSE, color = "blue")

pl <- pl + geom_point(aes(y =DeathRatebyComorb), color ="red")

pl <- pl + geom_text(aes(y =DeathRatebyComorb,label=comorbidity),color = "blue",size = 3,hjust= -0.2, vjust= -0.3)


pl <- pl + geom_label(aes(x = 1500, y = .5, label ="red  line shows 95% CI"))
pl <- pl + geom_label(aes(x = 1500, y = .25, label ="blue line shows 97.5% CI"))

pl <- pl +  theme_classic()
pl <- pl + scale_x_continuous(breaks = seq(0, 4000, by=500 ))
pl <- pl + scale_y_continuous(labels = scales::percent)
pl <- pl + labs(title ="Funnel plot showing the mortality rates for various comorbidities")
pl <- pl + labs(x ="Covid cases")
pl <- pl + labs(y= "Mortality rate %")
pl

Textbook oncologic outcome rates comparison for different facilities

library(ggrepel)

library(dplyr)
library(ggplot2)
Facility <- c('a','b','c','d','e','f','g','h','i','j','k','l','m'
                ,'n','o','p','q','r','s','t','u','v','w','x','y','z')


TotalSurgeries <- c(789,392,586,176,644,112,540,518,597,143,170,60,592
                ,132,582,484,527,41,86,593,21,104,470,27,109,177)


FacilityTOORate <- c(.36,.16,.2,.12,.20,.3,.25,.20,.88,.3,.9,.3,.30,.3,.33,.15,.25,.2,.2
                 ,.33,.1,.2,.18,.2,.4,.11)


df2 <- data.frame(Facility,TotalSurgeries,FacilityTOORate)                 





d <- df2%>%
  dplyr::mutate(OverallTOORate = mean(FacilityTOORate)
                 , se = sqrt( OverallTOORate * ((1- OverallTOORate) /TotalSurgeries))
                 , lcl95 = OverallTOORate -(1.96* se)
                 , ucl95 = OverallTOORate +(1.96* se)
                 , lcl99.7 = OverallTOORate -(3* se)
                 , ucl99.7 = OverallTOORate +(3* se) 
                
  )





pl <- ggplot(data = d, aes(x = TotalSurgeries, group =OverallTOORate  ))
pl <- pl + geom_line(aes(y =lcl95), linetype ="dotted",color = "red")
pl <- pl + geom_line(aes(y =ucl95), linetype ="dotted",color = "red")
pl <- pl + geom_line(aes(y =lcl99.7), linetype ="dotted")
pl <- pl + geom_line(aes(y =ucl99.7), linetype ="dotted")
pl <- pl + geom_line(aes(y =OverallTOORate), color = "red")
pl <- pl + geom_point(aes(y =FacilityTOORate), color ="red")
pl <- pl + geom_text_repel(aes(y =FacilityTOORate,label= Facility))
pl <- pl + labs(x = "No of surgeries")
pl <- pl + labs(y = "Textbook oncologic outcome rate")
pl <- pl + theme_bw()
#pl <- pl + scale_x_continuous(breaks = TotalSurgeries, labels = Facility)
#pl <- pl + scale_y_continuous(labels = scales::percent)
pl

Another idea to show the hospitals on the x axis but hiding the number of surgeries.

pl <- ggplot(data = d, aes(x = TotalSurgeries, group =OverallTOORate  ))
pl <- pl + geom_line(aes(y =lcl95), linetype ="dotted",color = "red")
pl <- pl + geom_line(aes(y =ucl95), linetype ="dotted",color = "red")
pl <- pl + geom_line(aes(y =lcl99.7), linetype ="dotted")
pl <- pl + geom_line(aes(y =ucl99.7), linetype ="dotted")
pl <- pl + geom_line(aes(y =OverallTOORate), color = "red")
pl <- pl + geom_point(aes(y =FacilityTOORate), color ="red")
#pl <- pl + geom_text_repel(aes(y =FacilityTOORate,label= Facility),hjust= -0.5, vjust= -0.3)
pl <- pl + labs(x = "No of surgeries (Not shown due to privacy)")
pl <- pl + labs(y = "Textbook oncologic outcome rate")
pl <- pl + theme_bw()
pl <- pl + scale_x_continuous(breaks = TotalSurgeries, labels = Facility)

pl

Additional labels on the funnel plot

library(ggrepel)

library(dplyr)
library(ggplot2)
Facility <- c('a','b','c','d','e','f','g','h','i','j','k','l','m'
                ,'n','o','p','q','r','s','t','u','v','w','x','y','z')


TotalSurgeries <- c(789,392,586,176,644,112,540,518,597,143,170,60,592
                ,132,582,484,527,41,86,593,21,104,470,27,109,177)


FacilityTOORate <- c(.36,.16,.2,.12,.20,.3,.25,.20,.88,.3,.9,.3,.30,.3,.33,.15,.25,.2,.2
                 ,.33,.1,.2,.18,.2,.4,.11)

FacilityVolume <- c(0,0,0,0,1,1,0,0,1,0,0,1,0,0,0,1,0,0,1,0,1,0,1,0,1,1)


df2 <- data.frame(Facility,TotalSurgeries,FacilityTOORate,FacilityVolume)                 





d <- df2%>%
  dplyr::mutate(OverallTOORate = mean(FacilityTOORate)
                 , se = sqrt( OverallTOORate * ((1- OverallTOORate) /TotalSurgeries))
                 , lcl95 = OverallTOORate -(1.96* se)
                 , ucl95 = OverallTOORate +(1.96* se)
                 , lcl99.7 = OverallTOORate -(3* se)
                 , ucl99.7 = OverallTOORate +(3* se) 
                
  )

d <- d%>%
     dplyr::mutate(FacVolumeLabel = if_else(FacilityVolume == 1, 'High', 'Low'))



pl <- ggplot(data = d, aes(x = TotalSurgeries, group =OverallTOORate  ))
pl <- pl + geom_line(aes(y =lcl95), linetype ="dotted",color = "red")
pl <- pl + geom_line(aes(y =ucl95), linetype ="dotted",color = "red")
pl <- pl + geom_line(aes(y =lcl99.7), linetype ="dotted")
pl <- pl + geom_line(aes(y =ucl99.7), linetype ="dotted")
pl <- pl + geom_line(aes(y =OverallTOORate), color = "red")
pl <- pl + geom_point(aes(y =FacilityTOORate), color ="red")
pl <- pl + geom_text_repel(aes(y =FacilityTOORate,color =FacVolumeLabel,
                               label=  paste0(Facility,"\n", FacVolumeLabel)))
pl <- pl + labs(x = "No of surgeries")
pl <- pl + labs(y = "Textbook oncologic outcome rate")
pl <- pl + theme_bw()
pl <- pl + theme(legend.position = "none")
#pl <- pl + scale_x_continuous(breaks = TotalSurgeries, labels = Facility)
#pl <- pl + scale_y_continuous(labels = scales::percent)
pl <- pl + scale_color_manual(values = c("red", "blue"))
pl

Youtube Channel: https://www.youtube.com/c/TechAnswers88

Video link https://youtu.be/jvb-xYXSHvI