Introduction

Scatter plots visualize the relationship between numerical variables. These variables are noted as pairs, one eahc for every subject (row) in a dataset. It is possible to visualize a third variable

Creating simulated data

set.seed(123)
age <- runif(n = 200,
            min = 15,
            max = 85)
wcc <- round(rnorm(n = 200,
                  mean = 15,
                  sd = 4),
            digits = 1)
crp <- round((wcc + runif(n = 200,
                          min = -2,
                          max = 10)) / 10,
             digits = 1)
sBP <- round(runif(n = 200,
                   min = 70,
                   max = 180),
             digits = 0)
group <- sample(c("Treatment group", "Control group"),
                size = 200,
                replace = TRUE)
df <- data.frame("Age" = age,
                 "WCC" = wcc,
                 "CRP" = crp,
                 "SBP" = sBP,
                 "Group" = group)
head(df)
##        Age  WCC CRP SBP           Group
## 1 35.13043 12.2 1.3 122 Treatment group
## 2 70.18136 16.0 2.2 110   Control group
## 3 43.62838 14.0 1.5  83 Treatment group
## 4 76.81122 13.6 1.5  75   Control group
## 5 80.83271 11.2 1.1  99   Control group
## 6 18.18895 14.8 2.2 177 Treatment group

Simple scatter plot

p1 <- plot_ly(data = df,
              x = ~WCC,
              y = ~CRP,
              type = "scatter",
              mode = "markers",
              marker = list(size = 12)) %>% 
  layout(title = "Correlation between WCC and CRP",
         xaxis = list(title = "White cell count",
                      zeroline = FALSE),
         yaxis = list(title = "C-Reactive Protein",
                      zeroline = FALSE))
p1

Scatter plot by group

p2 <- plot_ly(data = df,
              x = ~WCC,
              y = ~CRP,
              color = ~Group,
              colors = c("deepskyblue", "orange"),
              type = "scatter",
              mode = "markers",
              marker = list(size = 12)) %>% 
  layout(title = "Correlation between WCC and CRP",
         xaxis = list(title = "White cell count",
                      zeroline = FALSE),
         yaxis = list(title = "C-Reactive Protein",
                      zeroline = FALSE))
p2

Adding a third variable as a color scale

p3 <- plot_ly(data = df,
              x = ~WCC,
              y = ~CRP,
              color = ~Age,
              type = "scatter",
              mode = "markers",
              marker = list(size = 12)) %>% 
  layout(title = "Correlation between WCC and CRP",
         xaxis = list(title = "White cell count",
                      zeroline = FALSE),
         yaxis = list(title = "C-Reactive Protein",
                      zeroline = FALSE))
p3

Adding a fourth variable by marker size (bubble chart)

p4 <- plot_ly(data = df,
              x = ~WCC,
              y = ~CRP,
              color = ~Age,
              size = round(sBP / 10, digits = 0),
              type = "scatter",
              mode = "markers") %>% 
  layout(title = "Correlation between WCC and CRP",
         xaxis = list(title = "White cell count",
                      zeroline = FALSE),
         yaxis = list(title = "C-Reactive Protein",
                      zeroline = FALSE))
p4