
Libraries
library(readr)
library(dplyr)
library(plotly)
library(DT)
Data import
df <- read_csv("Plotting.csv")
names(df)
## [1] "Age" "Difference" "CRP" "Group" "sBP"
## [6] "Weight" "SideEffects" "Survey"
Strip plots
stripchart(df$Age ~ df$Group,
vertical = T,
method = "jitter",
pch = 2,
jitter = 0.1)

stripchart(df$Age ~ df$Group,
vertical = T,
method = "jitter",
main = "Age of patient cohort",
xlab = "Ages (in years)",
pch = 3,
col = c("orange", "deepskyblue"))

stripchart(df$Age ~ df$Group,
method = "stack",
vertical = T,
main = "Age of patient cohort",
xlab = "Ages (in years)",
pch = "o",
col = c("pink", "blue"))

Bar plot
Simple bar plot
barplot(table(df$Group),
main = "Number of patients in each group",
xlab = "Group",
ylab = "Count",
las = 1) # Turns the y-axis values upright

Horizontal bar plot
barplot(table(df$Survey),
main = "Survey question",
ylab = "Survey question answers",
xlab = "Count",
horiz = TRUE,
las = 1)

barplot(table(df$Survey),
main = "Survey question",
xlab = "Survey question answers",
ylab = "Count",
names.arg = c("Strongly disagree",
"Disagree",
"Neither",
"Agree",
"Strongly agree"),
las = 1)

Patterned bar plot
barplot(table(df$Survey),
main = "Survey question",
ylab = "Survey question answers",
xlab = "Count",
horiz = TRUE,
border = "black",
col = "grey",
density = 20,
las = 1)

Colored bar plot
barplot(table(df$Survey),
main = "Survey question",
ylab = "Survey question answers",
xlab = "Count",
horiz = TRUE,
border = "black",
col = c("red", "grey", "grey", "grey", "grey"),
density = 20,
las = 1)

barplot(table(df$Group, df$Survey),
main = "Survey question per group",
xlab = "Survey question answers",
ylab = "Count",
border = "black",
col = c("black", "grey"),
legend = rownames(table(df$Group, df$Survey)),
density = 20,
las = 1)

Grouped bar plot
barplot(table(df$Group,
df$Survey),
main = "Survey question per group",
xlab = "Survey question answers",
ylab = "Count",
border = "black",
col = c("black", "grey"),
legend = rownames(table(df$Group, df$Survey)),
density = c(20, 40),
beside = TRUE,
las = 1)

Histogram
hist(df$Age,
main = "Histogram of patient ages",
xlab = "Age (in years)",
ylab = "Count",
density = 10, # Density pattern
las = 1)

hist(df$Age,
breaks = 5,
main = "Histogram of patient ages",
xlab = "Age (in years)",
ylab = "Count",
las = 1)

hist(df$Age,
labels = TRUE, # Adding labels (frequency values above bins)
main = "Histogram of patient ages",
xlab = "Age (in years)",
ylab = "Count",
ylim = c(0, 90),
las = 1)

hist(df$Age,
breaks = c(min(df$Age),50, 70, 80, max(df$Age)),
main = "Histogram of patient ages",
xlab = "Age (in years)",
ylab = "Relative frequency")

transparent_dark_grey = rgb(0.2, 0.2, 0.2, 0.5) # Specifying the RGBA values for the color
transparent_light_grey = rgb(0.8, 0.8, 0.8, 0.5)
hist((df %>% filter(Group == "I") %>% select(Age))$Age,
col = transparent_dark_grey,
main = "Age distribution for each group",
xlab = "Age",
ylab = "Count")
hist((df %>% filter(Group == "II") %>% select(Age))$Age,
col = transparent_light_grey,
add = TRUE)
legend("topright",
legend = c("Group I", "Group II"),
col = c(transparent_dark_grey, transparent_light_grey),
pt.cex = 2,
pch = 15)

Density plots
plot(density(df$Age),
main = "Distribution of patient ages",
xlab = "Age (in years)",
ylab = "Density",
lwd = 2)

Box and whisker plots
boxplot(df$Age ~ df$Group,
col = c(transparent_dark_grey, transparent_light_grey),
boxwex = 0.4, # Width of boxes as a fraction
main = "Age per group",
xlab = "Group",
ylab = "Age")
legend("topright",
legend = c("Group I", "Group II"),
col = c(transparent_dark_grey, transparent_light_grey),
pt.cex = 2,
pch = 15)

Scatter plots
plot(df$Age, # Independent variable (x-axis)
df$Weight, # Dependent variable (y-axis)
main = "Patient age vs weight",
xlab = "Age (in years)",
ylab = "Weight (in lbs)")

plot(df$Age, # Independent variable (x-axis)
df$Weight, # Dependent variable (y-axis)
main = "Patient age vs weight",
xlab = "Age (in years)",
ylab = "Weight (in lbs)")
abline(lm(df$Weight ~ df$Age))

plot(df$Age, # Independent variable (x-axis)
df$Weight, # Dependent variable (y-axis)
main = "Patient age vs weight",
xlab = "Age (in years)",
ylab = "Weight (in lbs)",
axes = FALSE)
# x-axis
# Small tick labels
par(tcl = 0.1) # Tick length of +0.1 (protruding into plot)
axis(1,
at = seq(30, 90, by = 1), # From 30 to 90, stepsize 1
labels = FALSE) # Don't add number labels
# Slightly taller tick marks every 5 steps
par(tcl = 0.2)
axis(1,
at = seq(30, 90, by = 5),
labels = FALSE)
# MAjor tick marks every 10 years
par(tcl = -0.5)
axis(1,
at = seq(30, 90, by = 10))
# y-axis
# Small tick marks every 1 lb
par(tcl = 0.1)
axis(2,
at = seq(110, 230, by = 2),
labels = FALSE)
par(tcl = -0.5)
axis(2,
at = seq(110, 230, by = 20))

Scatter plot matrices
- The
pch = argument is set to 22 (code for square blocks)
pairs(df[c(1, 3, 5, 6)],
main = "Scatter plot matrix of numerical values",
pch = 22,
bg = c("orange", "deepskyblue")[unclass(factor(df$Group))])

Multiple plots
grey_five <- c(rgb(0.1, 0.1, 0.1),
rgb(0.3, 0.3, 0.3),
rgb(0.5, 0.5, 0.5),
rgb(0.7, 0.7, 0.7),
rgb(0.9, 0.9, 0.9)) # Specifying five shades of grey
par(mfrow = c(1, 2)) # One row, two columns
barplot(table(df$Survey),
main = "Bar plots",
col = grey_five)
pie(table(df$Survey),
main = "Pie plots are bad",
radius = 1,
col = grey_five)

par(fig =) lets us control the location of a figure precisely in a plot
- Need to provide the coordinates in a normalized form, i.e. as
c(x1, x2, y1, y2), i.e. the whole plot area would be c(0, 1, 0, 1) with (x1, y1) = (0, 0) being the lower-left corner and (x2, y2) = (1, 1) being the upper-right corner
- Parameters
cex to decrease the size of labels and mai to define margins.
# Make labels and margins smaller
par(cex = 0.7, mai = c(0.1, 0.1, 0.2, 0.1))
# Define area for the histogram
par(fig = c(0.1, 0.7, 0.3, 0.9))
hist(df$Age,
main = "Three plots of patient age")
# Define area for the boxplot
par(fig = c(0.8 ,1 ,0 ,1 ),
new = TRUE)
boxplot(df$Age)
# Define area for the stripchart
par(fig = c(0.1, 0.67, 0.1, 0.25),
new = TRUE)
stripchart(df$Age,
method = "jitter")

Plotly
Scatter plots
p <- plot_ly(type = "scatter",
mode = "markers",
data = iris,
x = ~Sepal.Length,
y = ~Petal.Length,
marker = list(size =14,
color = "rgba(255, 180, 190, 0.8)",
line = list(color = "rgba(150, 0, 0, 0.8)",
width = 2)))%>%
layout(title = "Scatter plot",
yaxis = list(title = "Petal length", zeroline = FALSE),
xaxis = list(title = "Sepal length", zeroline = FALSE))
p
p <- plot_ly(type = "scatter",
mode = "markers",
data = iris,
x = ~Sepal.Length,
y = ~Petal.Length,
marker = list(size = ~Sepal.Width * 10,
color = "rgba(255, 180, 190, 0.8)",
line = list(color = "rgba(150, 0, 0, 0.8)",
width = 2)))%>%
layout(title = "Scatter plot",
yaxis = list(title = "Petal length", zeroline = FALSE),
xaxis = list(title = "Sepal length", zeroline = FALSE))
p
p <- plot_ly(type = "scatter",
mode = "markers",
data = iris,
x = ~Sepal.Length,
y = ~Petal.Length,
color = ~Petal.Width,
marker = list(size = ~Sepal.Width * 10,
line = list(color = "rgba(10, 10, 10, 0.5)",
width = 2)))%>%
layout(title = "Scatter plot",
yaxis = list(title = "Petal length", zeroline = FALSE),
xaxis = list(title = "Sepal length", zeroline = FALSE))
p
trace0 <- sample(5:10, 100, replace = TRUE)
trace1 <- sample(1:5, 100, replace = TRUE)
x <- c(1:100)
df <- data.frame(x, trace0, trace1)
p <- plot_ly(df,
x = ~x,
y = ~trace0,
name = "First run",
type = "scatter",
mode = "lines") %>%
add_trace(y = trace1,
name = "Second run",
mode = "lines+markers")
p
p <- plot_ly(type = "scatter",
mode = "markers",
data = iris,
x = ~Sepal.Length,
y = ~Petal.Length,
marker = list(size = 16),
color = ~Species,
colors = "Set1") %>%
layout(title = "Scatter plot",
yaxis = list(title = "Petal length", zeroline = FALSE),
xaxis = list(title = "Sepal length", zeroline = FALSE))
p
pal <- c("orange", "blue", "gray")
p <- plot_ly(type = "scatter",
mode = "markers",
data = iris,
x = ~Sepal.Length,
y = ~Petal.Length,
marker = list(size = 16),
color = ~Species,
colors = pal) %>%
layout(title = "Scatter plot",
yaxis = list(title = "Petal length", zeroline = FALSE),
xaxis = list(title = "Sepal length", zeroline = FALSE))
p
Histogram
p <- plot_ly(type = "histogram",
data = iris,
x = ~Sepal.Length,
marker = list(color = "teal",
line = list(width = 1,
color = "lightgray"))) %>%
layout(title = "Teal histogram",
yaxis = list(title = "Count", zeroline = FALSE),
xaxis = list(title = "Sepal length", zeroline = FALSE))
p
wcc <- rnorm(100,
mean = 15,
sd = 4)
summary(wcc)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 5.692 13.665 15.858 15.949 18.263 24.323
p <- plot_ly(x = ~wcc,
type = "histogram",
histnorm = "probability") %>%
layout(title = "Frequency distribution of white cell count",
xaxis = list(title = "White cell count",
zeroline = FALSE),
yaxis = list(title = "Frequency distribution",
zeroline = FALSE))
p
p <- plot_ly(y = ~wcc,
type = "histogram",
histnorm = "probability") %>%
layout(title = "Frequency distribution of white cell count",
yaxis = list(title = "White cell count",
zeroline = FALSE),
xaxis = list(title = "Frequency",
zeroline = FALSE))
p
df <- data.frame(Group = sample(c("A", "B"),
200,
replace = TRUE),
WCC = wcc)
groupA <- df %>% filter(Group == "A")
groupB <- df %>% filter(Group == "B")
p <- plot_ly(alpha = 0.7) %>%
add_histogram(x = ~groupA$WCC,
name = "Group A") %>%
add_histogram(x = ~groupB$WCC,
name = "GroupB") %>%
layout(barmode = "overlay",
title = "Histogram of white cell count for groups A and B",
xaxis = list(title = "White cell count",
zeroline = FALSE),
yaxis = list(title = "Frequency",
zeroline = FALSE))
p
p <- plot_ly(x = ~wcc,
type = "histogram",
histnorm = "probability",
marker = list(color = "lightgray",
line = list(color = "darkgray",
width = 2))) %>%
layout(title = "Frequency distribution of white cell count",
xaxis = list(title = "White cell count",
zeroline = FALSE),
yaxis = list(title = "Frequency",
zeroline = TRUE))
p
- Overlay with different colors
p <- plot_ly() %>%
add_histogram(x = ~groupA$WCC,
name = "Group A",
marker = list(color = "teal",
line = list(color = "darkgray",
width = 2))) %>%
add_histogram(x = ~groupB$WCC,
opacity = 0.7, # Add opacity to overlay trace
name = "GroupB",
marker = list(color = "orange",
line = list(color = "darkgray",
width = 2))) %>%
layout(barmode = "overlay",
title = "Histogram of white cell count for groups A and B",
xaxis = list(title = "White cell count",
zeroline = FALSE),
yaxis = list(title = "Count",
zeroline = FALSE))
p
- Using RGB and RGBA color values
p <- plot_ly() %>%
add_histogram(x = ~groupA$WCC,
name = "Group A",
marker = list(color = "rgba(255, 165, 0, 1.0)",
line = list(color = "rgb(169, 169, 169)",
width = 2))) %>%
add_histogram(x = ~groupB$WCC,
name = "GroupB",
marker = list(color = "rgba(150, 150, 150, 0.7)",
line = list(color = "rgb(169, 169, 169)",
width = 2))) %>%
layout(barmode = "overlay",
title = "Histogram of white cell count for groups A and B",
xaxis = list(title = "White cell count",
zeroline = FALSE),
yaxis = list(title = "Count",
zeroline = FALSE))
p
Box and whisker plots
# Seeding the pseudo-random number generator for reproducible results
set.seed(1234)
# Create three varaible
income <- round(rnorm(500, # 500 random data point values
mean = 10000, # mean of 100
sd = 1000), # standard deviation of 1000
digits = 2) # round the random values to two decimal points
stage <- sample(c("Early",
"Mid",
"Late"), # sample space of the stage variable
500, # 500 random data point values
replace = TRUE) # replace values for reselection
country <- sample(c("USA",
"Canada"), # sample space of the country variabe
500, # 500 random data point values
replace = TRUE) # replace values for reselection
# Create tibble
df <- tibble(Income = income, # create an Income variable for the income data point values
Stage = stage, # create a Stage variable for the stage data point values
Country = country) # create a Country variable for the country data point values
# Print a data table
datatable(df)
- Simple box-and-whisker plot
p1 <- plot_ly(type = "box",
y = ~Income,
data = df,
name = "All income") %>%
layout(title = "Overall income",
xaxis = list(title = "",
zeroline = FALSE),
yaxis = list(title = "Income",
zeroline = FALSE))
p1
p2 <- plot_ly(type = "box",
x = ~Income,
data = df,
name = "All income") %>%
layout(title = "Overall income",
yaxis = list(title = "",
zeroline = FALSE),
xaxis = list(title = "Income",
zeroline = FALSE))
p2
- Adding all the data point values
p3 <- plot_ly(type = "box",
y = ~Income,
data = df,
name = "All income",
boxpoints = "all",
jitter = 0.3,
pointpos = -2) %>%
layout(title = "Overall income",
xaxis = list(title = "",
zeroline = FALSE),
yaxis = list(title = "Income",
zeroline = FALSE))
p3
- Adding a mean and a standard deviation
p4 <- plot_ly(type = "box",
y = ~Income,
data = df,
name = "All income",
boxmean = "sd") %>%
layout(title = "Overall income",
xaxis = list(title = "",
zeroline = FALSE),
yaxis = list(title = "Income",
zeroline = FALSE))
p4
- Creating more than one box in a box plot
p5 <- plot_ly(df,
y = ~Income,
color = ~Stage,
type = "box") %>%
layout(title = "Income by career stage",
xaxis = list(title = "Stage",
zeroline = FALSE),
yaxis = list(title = "Income",
zeroline = FALSE))
p5
p6 <- plot_ly(df,
x = ~Country,
y = ~Income,
color = ~Stage,
type = "box") %>%
layout(boxmode = "group",
title = "Income by career stage",
xaxis = list(title = "Country",
zeroline = FALSE),
yaxis = list(title = "Income",
zeroline = FALSE))
p6
- Changing the outlier marker shape
p7 <- plot_ly(type = "box",
y = ~Income,
data = df,
name = "All income",
marker = list(symbol = "square-dot")) %>%
layout(title = "Overall income",
xaxis = list(title = "",
zeroline = FALSE),
yaxis = list(title = "Income",
zeroline = FALSE))
p7
p8 <- plot_ly(type = "box",
y = ~Income,
data = df,
name = "All income",
marker = list(symbol = "square-dot"),
fillcolor = "pink",
line = list(color = "gray",
width = 2)) %>%
layout(title = "Overall income",
xaxis = list(title = "",
zeroline = FALSE),
yaxis = list(title = "Income",
zeroline = FALSE))
p8
p7 <- plot_ly(type = "box",
y = ~Income,
color = ~Country,
data = df,
marker = list(symbol = "square-dot"),
colors = "Set3") %>%
layout(title = "Overall income",
xaxis = list(title = "Country",
zeroline = FALSE),
yaxis = list(title = "Income",
zeroline = FALSE))
p7
Bar charts
cities <- sample(c("NYC", "Boston", "LA", "Seattle"),
100,
replace = TRUE)
table(cities)
## cities
## Boston LA NYC Seattle
## 26 25 24 25
as.numeric(table(cities))
## [1] 26 25 24 25
names(table(cities))
## [1] "Boston" "LA" "NYC" "Seattle"
p1 <- plot_ly(x = names(table(cities)),
y = as.numeric(table(cities)),
name = "Cities",
type = "bar")
p1
- Creating simulated data for a
data.frame
df <- data.frame(Cities = cities,
Group = sample(c("A", "B"),
100,
replace = TRUE))
head(df)
## Cities Group
## 1 Boston B
## 2 LA A
## 3 Boston A
## 4 LA B
## 5 LA B
## 6 LA B
groupA <- df %>% filter(Group == "A")
groupB <- df %>% filter(Group == "B")
table(groupA$Cities)
##
## Boston LA NYC Seattle
## 11 11 16 11
names(table(groupA$Cities))
## [1] "Boston" "LA" "NYC" "Seattle"
as.numeric(table(groupA$Cities))
## [1] 11 11 16 11
gBarChart <- data.frame(Cities = names(table(groupA$Cities)),
GroupA = as.numeric(table(groupA$Cities)),
GroupB = as.numeric(table(groupB$Cities)))
head(gBarChart)
## Cities GroupA GroupB
## 1 Boston 11 15
## 2 LA 11 14
## 3 NYC 16 8
## 4 Seattle 11 14
p3 <- plot_ly(gBarChart,
x = ~Cities,
y = ~GroupA,
type = "bar",
name = "Group A") %>%
add_trace(y = ~GroupB,
name = "Group B") %>%
layout(yaxis = list(title = "Cities"),
barmode = "group")
p3
p4 <- plot_ly(gBarChart,
x = ~Cities,
y = ~GroupA,
type = "bar",
name = "group A") %>%
add_trace(y = ~GroupB,
name = "Group B") %>%
layout(yaxis = list(title = "Cities"),
barmode = "stack")
p4
p5 <- plot_ly(x = names(table(cities)),
y = as.numeric(table(cities)),
name = "Cities",
type = "bar",
marker = list(color = "rgba(255, 70, 0, 0.7)",
line = list(color = "rgba(0, 0, 0, 0.5)",
width = 1.5))) %>%
layout(title = "Number of offices per city",
xaxis = list(title = "Cities",
zeroline = FALSE),
yaxis = list(title = "Number",
zeroline = FALSE))
p5
- Changing the text angle on the \(x\) axis
- The
tickangle = argument in the xaxis ragument of the layout command can change the angle of text.
p6 <- plot_ly(x = names(table(cities)),
y = as.numeric(table(cities)),
name = "Cities",
type = "bar",
marker = list(color = "rgba(255, 70, 0, 0.7)",
line = list(color = "rgba(0, 0, 0, 0.5)",
width = 1.5))) %>%
layout(title = "Number of offices per city",
xaxis = list(title = "Cities",
zeroline = FALSE,
tickangle = -20),
yaxis = list(title = "Number",
zeroline = FALSE))
p6
- Specifying the color of each bar
- Color can be specified of each bar
p7 <- plot_ly(x = names(table(cities)),
y = as.numeric(table(cities)),
name = "Cities",
type = "bar",
marker = list(color = c("rgba(150, 150, 150, 0.7)",
"rgba(150, 150, 150, 0.7",
"rgba(255, 20, 0, 0.7)",
"rgba(150, 150, 150, 0.7"))) %>%
layout(title = "Number of offices per city",
xaxis = list(title = "Cities",
zeroline = FALSE),
yaxis = list(title = "Number",
zeroline = FALSE))
p7