Libraries

library(readr)
library(dplyr)
library(plotly)
library(DT)

Data import

df <- read_csv("Plotting.csv")
names(df)
## [1] "Age"         "Difference"  "CRP"         "Group"       "sBP"        
## [6] "Weight"      "SideEffects" "Survey"

Strip plots

stripchart(df$Age ~ df$Group,
           vertical = T,
           method = "jitter",
           pch = 2,
           jitter = 0.1)

stripchart(df$Age ~ df$Group,
           vertical = T,
           method = "jitter",
           main = "Age of patient cohort",
           xlab = "Ages (in years)",
           pch = 3,
           col = c("orange", "deepskyblue"))

stripchart(df$Age ~ df$Group,
           method = "stack",
           vertical = T,
           main = "Age of patient cohort",
           xlab = "Ages (in years)",
           pch = "o",
           col = c("pink", "blue"))

Bar plot

Simple bar plot

barplot(table(df$Group),
        main = "Number of patients in each group",
        xlab = "Group",
        ylab = "Count",
        las = 1)  # Turns the y-axis values upright

Horizontal bar plot

barplot(table(df$Survey),
        main = "Survey question",
        ylab = "Survey question answers",
        xlab = "Count",
        horiz = TRUE,
        las = 1)

barplot(table(df$Survey),
        main = "Survey question",
        xlab = "Survey question answers",
        ylab = "Count",
        names.arg = c("Strongly disagree",
                      "Disagree",
                      "Neither",
                      "Agree",
                      "Strongly agree"),
        las = 1)

Patterned bar plot

barplot(table(df$Survey),
        main = "Survey question",
        ylab = "Survey question answers",
        xlab = "Count",
        horiz = TRUE,
        border = "black",
        col = "grey",
        density = 20,
        las = 1)

Colored bar plot

barplot(table(df$Survey),
        main = "Survey question",
        ylab = "Survey question answers",
        xlab = "Count",
        horiz = TRUE,
        border = "black",
        col = c("red", "grey", "grey", "grey", "grey"),
        density = 20,
        las = 1)

barplot(table(df$Group, df$Survey),
        main = "Survey question per group",
        xlab = "Survey question answers",
        ylab = "Count",
        border = "black",
        col = c("black", "grey"),
        legend = rownames(table(df$Group, df$Survey)),
        density = 20,
        las = 1)

Grouped bar plot

barplot(table(df$Group,
              df$Survey),
        main = "Survey question per group",
        xlab = "Survey question answers",
        ylab = "Count",
        border = "black",
        col = c("black", "grey"),
        legend = rownames(table(df$Group, df$Survey)),
        density = c(20, 40),
        beside = TRUE,
        las = 1)

Histogram

hist(df$Age,
     main = "Histogram of patient ages",
     xlab = "Age (in years)",
     ylab = "Count",
     density = 10,  # Density pattern
     las = 1)

hist(df$Age,
     breaks = 5,
     main = "Histogram of patient ages",
     xlab = "Age (in years)",
     ylab = "Count",
     las = 1)

hist(df$Age,
     labels = TRUE,  # Adding labels (frequency values above bins)
     main = "Histogram of patient ages",
     xlab = "Age (in years)",
     ylab = "Count",
     ylim = c(0, 90),
     las = 1)

hist(df$Age,
     breaks = c(min(df$Age),50, 70, 80, max(df$Age)),
     main = "Histogram of patient ages",
     xlab = "Age (in years)",
     ylab = "Relative frequency")

transparent_dark_grey = rgb(0.2, 0.2, 0.2, 0.5)  # Specifying the RGBA values for the color
transparent_light_grey = rgb(0.8, 0.8, 0.8, 0.5)
hist((df %>% filter(Group == "I") %>% select(Age))$Age,
     col = transparent_dark_grey,
     main = "Age distribution  for each group",
     xlab = "Age",
     ylab = "Count")
hist((df %>% filter(Group == "II") %>% select(Age))$Age,
     col = transparent_light_grey,
     add = TRUE)
legend("topright",
       legend = c("Group I", "Group II"),
       col = c(transparent_dark_grey, transparent_light_grey),
       pt.cex = 2,
       pch = 15)

Density plots

plot(density(df$Age),
     main = "Distribution of patient ages",
     xlab = "Age (in years)",
     ylab = "Density",
     lwd = 2)

Box and whisker plots

boxplot(df$Age ~ df$Group,
        col = c(transparent_dark_grey, transparent_light_grey),
        boxwex = 0.4,  # Width of boxes as a fraction
        main = "Age per group",
        xlab = "Group",
        ylab = "Age")
legend("topright",
       legend = c("Group I", "Group II"),
       col = c(transparent_dark_grey, transparent_light_grey),
       pt.cex = 2,
       pch = 15)

Scatter plots

plot(df$Age, # Independent variable (x-axis)
     df$Weight,  # Dependent variable (y-axis)
     main = "Patient age vs weight",
     xlab = "Age (in years)",
     ylab = "Weight (in lbs)")

plot(df$Age, # Independent variable (x-axis)
     df$Weight,  # Dependent variable (y-axis)
     main = "Patient age vs weight",
     xlab = "Age (in years)",
     ylab = "Weight (in lbs)")
abline(lm(df$Weight ~ df$Age))

plot(df$Age, # Independent variable (x-axis)
     df$Weight,  # Dependent variable (y-axis)
     main = "Patient age vs weight",
     xlab = "Age (in years)",
     ylab = "Weight (in lbs)",
     axes = FALSE)
# x-axis
# Small tick labels
par(tcl = 0.1)  # Tick length of +0.1 (protruding into plot)
axis(1,
     at = seq(30, 90, by = 1),  # From 30 to 90, stepsize 1
     labels = FALSE)  # Don't add number labels
# Slightly taller tick marks every 5 steps
par(tcl = 0.2)
axis(1,
     at = seq(30, 90, by = 5),
     labels = FALSE)
# MAjor tick marks every 10 years
par(tcl = -0.5)
axis(1,
     at = seq(30, 90, by = 10))
# y-axis
# Small tick marks every 1 lb
par(tcl = 0.1)
axis(2,
     at = seq(110, 230, by = 2),
     labels = FALSE)
par(tcl = -0.5)
axis(2,
     at = seq(110, 230, by = 20))

Scatter plot matrices

pairs(df[c(1, 3, 5, 6)],
      main = "Scatter plot matrix of numerical values",
      pch = 22,
      bg = c("orange", "deepskyblue")[unclass(factor(df$Group))])

Multiple plots

grey_five <- c(rgb(0.1, 0.1, 0.1),
            rgb(0.3, 0.3, 0.3),
            rgb(0.5, 0.5, 0.5),
            rgb(0.7, 0.7, 0.7),
            rgb(0.9, 0.9, 0.9))  # Specifying five shades of grey
par(mfrow = c(1, 2))  # One row, two columns
barplot(table(df$Survey),
        main = "Bar plots",
        col = grey_five)
pie(table(df$Survey), 
    main = "Pie plots are bad",
    radius = 1,
    col = grey_five)

# Make labels and margins smaller
par(cex = 0.7, mai = c(0.1, 0.1, 0.2, 0.1))
# Define area for the histogram
par(fig = c(0.1, 0.7, 0.3, 0.9))
hist(df$Age,
     main = "Three plots of patient age")
# Define area for the boxplot
par(fig = c(0.8 ,1 ,0 ,1 ),
    new = TRUE)
boxplot(df$Age)
# Define area for the stripchart
par(fig = c(0.1, 0.67, 0.1, 0.25),
    new = TRUE)
stripchart(df$Age,
           method = "jitter")

Plotly

Scatter plots

  • Simple
p <- plot_ly(type = "scatter",
             mode = "markers",
             data = iris,
             x = ~Sepal.Length,
             y = ~Petal.Length,
             marker = list(size =14,
                           color = "rgba(255, 180, 190, 0.8)",
                           line = list(color = "rgba(150, 0, 0, 0.8)",
                                       width = 2)))%>%
  layout(title = "Scatter plot",
         yaxis = list(title = "Petal length", zeroline = FALSE),
         xaxis = list(title = "Sepal length", zeroline = FALSE))
p
  • Bubble chart
p <- plot_ly(type = "scatter",
             mode = "markers",
             data = iris,
             x = ~Sepal.Length,
             y = ~Petal.Length,
             marker = list(size = ~Sepal.Width * 10,
                           color = "rgba(255, 180, 190, 0.8)",
                           line = list(color = "rgba(150, 0, 0, 0.8)",
                                       width = 2)))%>%
  layout(title = "Scatter plot",
         yaxis = list(title = "Petal length", zeroline = FALSE),
         xaxis = list(title = "Sepal length", zeroline = FALSE))
p
  • Color scale
p <- plot_ly(type = "scatter",
             mode = "markers",
             data = iris,
             x = ~Sepal.Length,
             y = ~Petal.Length,
             color = ~Petal.Width,
             marker = list(size = ~Sepal.Width * 10,
                           line = list(color = "rgba(10, 10, 10, 0.5)",
                                       width = 2)))%>%
  layout(title = "Scatter plot",
         yaxis = list(title = "Petal length", zeroline = FALSE),
         xaxis = list(title = "Sepal length", zeroline = FALSE))
p
  • Adding traces
trace0 <- sample(5:10, 100, replace = TRUE)
trace1 <- sample(1:5, 100, replace = TRUE)
x <- c(1:100)

df <- data.frame(x, trace0, trace1)

p <- plot_ly(df,
             x = ~x,
             y = ~trace0,
             name = "First run",
             type = "scatter",
             mode = "lines") %>%
  add_trace(y = trace1,
            name = "Second run",
            mode = "lines+markers")
p
  • Specific color palette
p <- plot_ly(type = "scatter",
             mode = "markers",
             data = iris,
             x = ~Sepal.Length,
             y = ~Petal.Length,
             marker = list(size = 16),
             color = ~Species,
             colors = "Set1") %>%
  layout(title = "Scatter plot",
         yaxis = list(title = "Petal length", zeroline = FALSE),
         xaxis = list(title = "Sepal length", zeroline = FALSE))
p
  • Specifying a color scale
pal <- c("orange", "blue", "gray")

p <- plot_ly(type = "scatter",
             mode = "markers",
             data = iris,
             x = ~Sepal.Length,
             y = ~Petal.Length,
             marker = list(size = 16),
             color = ~Species,
             colors = pal) %>%
  layout(title = "Scatter plot",
         yaxis = list(title = "Petal length", zeroline = FALSE),
         xaxis = list(title = "Sepal length", zeroline = FALSE))
p

Histogram

p <- plot_ly(type = "histogram",
             data = iris,
             x = ~Sepal.Length,
             marker = list(color = "teal",
                           line = list(width = 1,
                                       color = "lightgray"))) %>% 
  layout(title = "Teal histogram",
         yaxis = list(title = "Count", zeroline = FALSE),
         xaxis = list(title = "Sepal length", zeroline = FALSE))
p
wcc <- rnorm(100,
             mean = 15,
             sd = 4)
summary(wcc)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   5.692  13.665  15.858  15.949  18.263  24.323
p <- plot_ly(x = ~wcc,
             type = "histogram",
             histnorm = "probability") %>% 
  layout(title = "Frequency distribution of white cell count",
         xaxis = list(title = "White cell count",
                      zeroline = FALSE),
         yaxis = list(title = "Frequency distribution",
                      zeroline = FALSE))
p
p <- plot_ly(y = ~wcc,
             type = "histogram",
             histnorm = "probability") %>% 
  layout(title = "Frequency distribution of white cell count",
         yaxis = list(title = "White cell count",
                      zeroline = FALSE),
         xaxis = list(title = "Frequency",
                      zeroline = FALSE))
p
df <- data.frame(Group = sample(c("A", "B"),
                                200,
                                replace = TRUE),
                 WCC = wcc)
groupA <- df %>% filter(Group == "A")
groupB <- df %>% filter(Group == "B")
p <- plot_ly(alpha = 0.7) %>% 
  add_histogram(x = ~groupA$WCC,
                name = "Group A") %>% 
  add_histogram(x = ~groupB$WCC,
                name = "GroupB") %>% 
  layout(barmode = "overlay",
         title = "Histogram of white cell count for groups A and B",
         xaxis = list(title = "White cell count",
                      zeroline = FALSE),
         yaxis = list(title = "Frequency",
                      zeroline = FALSE))
p
p <- plot_ly(x = ~wcc,
             type = "histogram",
             histnorm = "probability",
             marker = list(color = "lightgray",
                           line = list(color = "darkgray",
                                       width = 2))) %>% 
  layout(title = "Frequency distribution of white cell count",
         xaxis = list(title = "White cell count",
                      zeroline = FALSE),
         yaxis = list(title = "Frequency",
                      zeroline = TRUE))
p
p <- plot_ly() %>% 
  add_histogram(x = ~groupA$WCC,
                name = "Group A",
                marker = list(color = "teal",
                            line = list(color = "darkgray",
                                        width = 2))) %>% 
  add_histogram(x = ~groupB$WCC,
                opacity = 0.7,  # Add opacity to overlay trace
                name = "GroupB",
                marker = list(color = "orange",
                            line = list(color = "darkgray",
                                        width = 2))) %>% 
  layout(barmode = "overlay",
         title = "Histogram of white cell count for groups A and B",
         xaxis = list(title = "White cell count",
                      zeroline = FALSE),
         yaxis = list(title = "Count",
                      zeroline = FALSE))
p
p <- plot_ly() %>% 
  add_histogram(x = ~groupA$WCC,
                name = "Group A",
                marker = list(color = "rgba(255, 165, 0, 1.0)",
                            line = list(color = "rgb(169, 169, 169)",
                                        width = 2))) %>% 
  add_histogram(x = ~groupB$WCC,
                name = "GroupB",
                marker = list(color = "rgba(150, 150, 150, 0.7)",
                            line = list(color = "rgb(169, 169, 169)",
                                        width = 2))) %>% 
  layout(barmode = "overlay",
         title = "Histogram of white cell count for groups A and B",
         xaxis = list(title = "White cell count",
                      zeroline = FALSE),
         yaxis = list(title = "Count",
                      zeroline = FALSE))
p

Box and whisker plots

# Seeding the pseudo-random number generator for reproducible results
set.seed(1234)
# Create three varaible
income <- round(rnorm(500,  # 500 random data point values
                      mean = 10000,  # mean of 100
                      sd = 1000),  # standard deviation of 1000
                digits = 2)  # round the random values to two decimal points
stage <- sample(c("Early",  
                  "Mid",
                  "Late"),  # sample space of the stage variable
                500,  # 500 random data point values
                replace = TRUE)  # replace values for reselection
country <- sample(c("USA",
                    "Canada"),  # sample space of the country variabe
                  500,  # 500 random data point values
                  replace = TRUE)  # replace values for reselection
# Create tibble
df <- tibble(Income = income,  # create an Income variable for the income data point values
             Stage = stage,  # create a Stage variable for the stage data point values
             Country = country)  # create a Country variable for the country data point values
# Print a data table
datatable(df)
p1 <- plot_ly(type = "box",
              y = ~Income,
              data = df,
              name = "All income") %>% 
  layout(title = "Overall income",
         xaxis = list(title = "",
                      zeroline = FALSE),
         yaxis = list(title = "Income",
                      zeroline = FALSE))
p1
p2 <- plot_ly(type = "box",
              x = ~Income,
              data = df,
              name = "All income") %>% 
  layout(title = "Overall income",
         yaxis = list(title = "",
                      zeroline = FALSE),
         xaxis = list(title = "Income",
                      zeroline = FALSE))
p2
p3 <- plot_ly(type = "box",
              y = ~Income,
              data = df,
              name = "All income",
              boxpoints = "all",
              jitter = 0.3,
              pointpos = -2) %>% 
  layout(title = "Overall income",
         xaxis = list(title = "",
                      zeroline = FALSE),
         yaxis = list(title = "Income",
                      zeroline = FALSE))
p3
p4 <- plot_ly(type = "box",
              y = ~Income,
              data = df,
              name = "All income",
              boxmean = "sd") %>% 
  layout(title = "Overall income",
         xaxis = list(title = "",
                      zeroline = FALSE),
         yaxis = list(title = "Income",
                      zeroline = FALSE))
p4
p5 <- plot_ly(df,
              y = ~Income,
              color = ~Stage,
              type = "box") %>% 
  layout(title = "Income by career stage",
         xaxis = list(title = "Stage",
                      zeroline = FALSE),
         yaxis = list(title = "Income",
                      zeroline = FALSE))
p5
p6 <- plot_ly(df,
              x = ~Country,
              y = ~Income,
              color = ~Stage,
              type = "box") %>% 
  layout(boxmode = "group",
         title = "Income by career stage",
         xaxis = list(title = "Country",
                      zeroline = FALSE),
         yaxis = list(title = "Income",
                      zeroline = FALSE))
p6
p7 <- plot_ly(type = "box",
              y = ~Income,
              data = df,
              name = "All income",
              marker = list(symbol = "square-dot")) %>% 
  layout(title = "Overall income",
         xaxis = list(title = "",
                      zeroline = FALSE),
         yaxis = list(title = "Income",
                      zeroline = FALSE))
p7
p8 <- plot_ly(type = "box",
              y = ~Income,
              data = df,
              name = "All income",
              marker = list(symbol = "square-dot"),
              fillcolor = "pink",
              line = list(color = "gray",
                          width = 2)) %>% 
  layout(title = "Overall income",
         xaxis = list(title = "",
                      zeroline = FALSE),
         yaxis = list(title = "Income",
                      zeroline = FALSE))
p8
p7 <- plot_ly(type = "box",
              y = ~Income,
              color = ~Country,
              data = df,
              marker = list(symbol = "square-dot"),
              colors = "Set3") %>% 
  layout(title = "Overall income",
         xaxis = list(title = "Country",
                      zeroline = FALSE),
         yaxis = list(title = "Income",
                      zeroline = FALSE))
p7

Bar charts

cities <- sample(c("NYC", "Boston", "LA", "Seattle"),
                 100,
                 replace = TRUE)
table(cities)
## cities
##  Boston      LA     NYC Seattle 
##      26      25      24      25
as.numeric(table(cities))
## [1] 26 25 24 25
names(table(cities))
## [1] "Boston"  "LA"      "NYC"     "Seattle"
p1 <- plot_ly(x = names(table(cities)),
              y = as.numeric(table(cities)),
              name = "Cities",
              type = "bar")
p1
df <- data.frame(Cities = cities,
                 Group = sample(c("A", "B"),
                 100,
                 replace = TRUE))
head(df)
##   Cities Group
## 1 Boston     B
## 2     LA     A
## 3 Boston     A
## 4     LA     B
## 5     LA     B
## 6     LA     B
groupA <- df %>% filter(Group == "A")
groupB <- df %>% filter(Group == "B")
table(groupA$Cities)
## 
##  Boston      LA     NYC Seattle 
##      11      11      16      11
names(table(groupA$Cities))
## [1] "Boston"  "LA"      "NYC"     "Seattle"
as.numeric(table(groupA$Cities))
## [1] 11 11 16 11
gBarChart <- data.frame(Cities = names(table(groupA$Cities)),
                      GroupA = as.numeric(table(groupA$Cities)),
                      GroupB = as.numeric(table(groupB$Cities)))
head(gBarChart)
##    Cities GroupA GroupB
## 1  Boston     11     15
## 2      LA     11     14
## 3     NYC     16      8
## 4 Seattle     11     14
p3 <- plot_ly(gBarChart,
              x = ~Cities,
              y = ~GroupA,
              type = "bar",
              name = "Group A") %>% 
  add_trace(y = ~GroupB,
            name = "Group B") %>% 
  layout(yaxis = list(title = "Cities"),
         barmode = "group")
p3
p4 <- plot_ly(gBarChart,
              x = ~Cities,
              y = ~GroupA,
              type = "bar",
              name = "group A") %>% 
  add_trace(y = ~GroupB,
            name = "Group B") %>% 
  layout(yaxis = list(title = "Cities"),
         barmode = "stack")
p4
p5 <- plot_ly(x = names(table(cities)),
              y = as.numeric(table(cities)),
              name = "Cities",
              type = "bar",
              marker = list(color = "rgba(255, 70, 0, 0.7)",
                            line = list(color = "rgba(0, 0, 0, 0.5)",
                                        width = 1.5))) %>% 
  layout(title = "Number of offices per city",
         xaxis = list(title = "Cities",
                      zeroline = FALSE),
         yaxis = list(title = "Number",
                      zeroline = FALSE))
p5
p6 <- plot_ly(x = names(table(cities)),
              y = as.numeric(table(cities)),
              name = "Cities",
              type = "bar",
              marker = list(color = "rgba(255, 70, 0, 0.7)",
                            line = list(color = "rgba(0, 0, 0, 0.5)",
                                        width = 1.5))) %>% 
  layout(title = "Number of offices per city",
         xaxis = list(title = "Cities",
                      zeroline = FALSE,
                      tickangle = -20),
         yaxis = list(title = "Number",
                      zeroline = FALSE))
p6
p7 <- plot_ly(x = names(table(cities)),
              y = as.numeric(table(cities)),
              name = "Cities",
              type = "bar",
              marker = list(color = c("rgba(150, 150, 150, 0.7)",
                                      "rgba(150, 150, 150, 0.7",
                                      "rgba(255, 20, 0, 0.7)",
                                      "rgba(150, 150, 150, 0.7"))) %>% 
  layout(title = "Number of offices per city",
         xaxis = list(title = "Cities",
                      zeroline = FALSE),
         yaxis = list(title = "Number",
                      zeroline = FALSE))
p7