@DataAnalytic Channel

Our YouTube Channel is @DataAnalytic

Watch this script in action and for finer details

https://www.youtube.com/watch?v=at7PO5yZckM

knitr::opts_chunk$set(fig.width=10, fig.height=8)

Sophisticated and publication ready Scatter Plot with trendline, R squared value, mean line and boxplots for x and y axis.

We will create a simple scatter plot first. Then we will add the trendline, the euqation for the trendline and the R Squared value. We would then add the box plots on the x and y axis. Display the mean line on the scatter plot and the boxplots. Finally we will combine all the charts and also provide combined title,subtitle and caption.

Libraries used

library(ggplot2)   # For plotting
library(ggpmisc)   # for misc. plotting utilities
library(aplot)     # for combining plot
library(dplyr)     # for data manipulation
library(bp)        # for using the sample data
library(patchwork) # for combining plot
library(gt)        # for displaying the dataset

Create sample data

# Get sample dataset ready
data(bp_jhs)
# View dataset
gt(head(bp_jhs))
DateTime Month Day Year DayofWk Hour Meal_Time Sys.mmHg. Dias.mmHg. bpDelta Pulse.bpm.
2019-08-01 09:15:54 8 1 2019 Thu 9 Breakfast 132 80 52 79
2019-07-31 11:39:59 7 31 2019 Wed 11 Breakfast 126 77 49 62
2019-07-31 11:38:07 7 31 2019 Wed 11 Breakfast 128 76 52 60
2019-07-30 13:47:46 7 30 2019 Tue 13 Lunch 130 81 49 63
2019-07-30 13:46:15 7 30 2019 Tue 13 Lunch 134 83 51 62
2019-07-29 11:18:13 7 29 2019 Mon 11 Breakfast 140 84 56 74

Create Scatter Plot

pl1 <- ggplot(data = bp_jhs, aes(x = `Pulse.bpm.` , y = `Sys.mmHg.`))
pl1 <- pl1 + geom_point()
pl1 <- pl1 + geom_vline(xintercept = mean(bp_jhs$Pulse.bpm.)
                        , color="red"
                        , linetype="dashed")
pl1 <- pl1 + geom_hline(yintercept = mean(bp_jhs$Sys.mmHg.)
                        , color="red"
                        , linetype="dashed")
pl1 <- pl1 + theme_bw()
pl1 <- pl1 + ggpmisc::stat_poly_line(se = FALSE)
pl1 <- pl1 +  ggpmisc::stat_poly_eq(use_label(c("eq", "R2")))
pl1

Second chart - boxplot for the x axis

pl2 <- ggplot(data = bp_jhs, aes(x = `Pulse.bpm.`))
pl2 <- pl2 + geom_boxplot(fill = "darkolivegreen1" )
pl2 <- pl2 +  theme_void()
pl2 <- pl2 + geom_vline(xintercept = mean(bp_jhs$Pulse.bpm.)
                        , color="red"
                        , linetype="dashed")
pl2

Third chart - boxplot for the y axis

pl3 <- ggplot(data = bp_jhs, aes(y = `Sys.mmHg.`))
pl3 <- pl3 + geom_boxplot(fill = "gold" )
pl3 <- pl3 +  theme_void()
pl3 <- pl3 + geom_hline(yintercept = mean(bp_jhs$`Sys.mmHg.`)
                        , color="red"
                        , linetype="dashed")

pl3

Assemble the combined plot

# We will use the aplot functionality from aplot package
pl1 %>%
  aplot::insert_left(pl3   ,width  = 0.1)%>%
  aplot::insert_bottom(pl2 ,height = 0.1)

Placing the title,subtitle , caption

The patchwork package allows placing the title, subtitle on the overall combined chart. This looks much neater.

ptchplot <- aplot::as.patchwork(pl1 %>%
                  aplot::insert_left(pl3   ,width  = 0.05)%>%
                  aplot::insert_bottom(pl2 ,height = 0.1)
            )

ptchplot + patchwork::plot_annotation(
  title = 'Scatter Plot using GGPLOT'
  , caption = "Created by @DataAnalytic Channel on YouTube"
  ,subtitle = paste0( 'Enriched by plotting the boxplot of x and y'
                      , '\n'
                      , 'Title adjusted by aplot and patchwork')
  
  )

Watch this script in action on YouTube Video link https://www.youtube.com/watch?v=at7PO5yZckM