Note: All of the following graphs were created using ggplot2 and are inspired from the book Storytelling With Data: Let’s Practice!
(Ref: Knaflic, Cole. Storytelling With Data: Let’s Practice! Wiley, © 2019.)
The author created all the figures using Excel and PowerPoint.
In this post, I will use mainly two ubiquitous packages, namely ggplot2 (for data visualization) and tidyverse (for data transforming).
library(ggplot2)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v tibble 3.0.5 v dplyr 1.0.3
## v tidyr 1.1.2 v stringr 1.4.0
## v readr 1.4.0 v forcats 0.5.0
## v purrr 0.3.4
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(ggrepel)
## Warning: package 'ggrepel' was built under R version 4.0.5
library(ggtext)
## Warning: package 'ggtext' was built under R version 4.0.5
meals <- c(40139,127020,168193,153115,202102,232897,277912,205350,233389,232797)
year <- c(2010:2019)
df <- data.frame(year, meals)
ggplot(data=df,
aes(x=year,
y=meals)) +
geom_bar(stat="identity", fill="#0070c0", width = 0.75) +
scale_x_continuous(breaks=seq(2010, 2019,1)) +
scale_y_continuous(breaks=seq(0, 300000, 50000),
labels = function(x) format(x, scientific = FALSE)) +
labs(title ="Meals served over time",
x = "CAMPAIGN YEAR",
y = "# OF MEALS SERVED") +
theme(plot.title = element_text(size=16),
plot.title.position = "plot",
axis.title.x = element_text(size = 12, hjust = 0, vjust = -1.5, color ="black"),
# axis.title.y = element_blank(),
axis.text = element_text(size=12),
# axis.text.y = element_blank(),
axis.line.x = element_line(color = "grey"),
axis.ticks = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank())
ggsave("Figure 2.2d Bar graph.png", height = 5, width = 8,units = "in",dpi = 300)
ggplot(data=df, aes(x=year,
y=meals)) +
geom_line(color="#0070c0", size = 1.5) +
geom_point(data = filter(df, year == 2010 | year == 2019),
aes(x = year,y = meals),
color = "#0070c0",
size = 5) +
labs(title ="Meals served over time",
subtitle = "# OF MEALS SERVED",
x = "CAMPAIGN YEAR") +
annotate("text", x = 2010, y = 25000, label = "40139", color = "#0070c0") +
annotate("text", x = 2019, y = 250000, label = "232797", color = "#0070c0") +
scale_x_continuous(breaks=seq(2010, 2019,1)) +
scale_y_continuous(limits = c(0,300000)) +
theme(plot.title = element_text(size=16),
axis.title.x = element_text(size = 12, hjust = 0, vjust = -1.5, color ="black"),
axis.title.y = element_blank(),
axis.text.x = element_text(size=12),
axis.text.y = element_blank(),
axis.line.x = element_line(color = "grey"),
axis.ticks = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank())
ggsave("Figure 2.2e Line graph.png", height = 5, width = 8,units = "in",dpi = 300)
project <- read_csv("project.csv")
##
## -- Column specification --------------------------------------------------------
## cols(
## DATE = col_character(),
## CAPACITY = col_number(),
## DEMAND = col_number()
## )
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
project$DATE2 <-ym(project$DATE)
Now we are ready to tidy our data and convert the table to long format.
To do this we will use the gather function from the tidyr package.
df2 <- gather(project, category, project, DEMAND:CAPACITY)
df2$category = factor(df2$category, levels = c("DEMAND", "CAPACITY"), ordered = TRUE)
df2
## # A tibble: 18 x 4
## DATE DATE2 category project
## <chr> <date> <ord> <dbl>
## 1 19-Apr 2019-04-01 DEMAND 46193
## 2 19-May 2019-05-01 DEMAND 49131
## 3 19-Jun 2019-06-01 DEMAND 50124
## 4 19-Jul 2019-07-01 DEMAND 48850
## 5 19-Aug 2019-08-01 DEMAND 47602
## 6 19-Sep 2019-09-01 DEMAND 43697
## 7 19-Oct 2019-10-01 DEMAND 41058
## 8 19-Nov 2019-11-01 DEMAND 37364
## 9 19-Dec 2019-12-01 DEMAND 34364
## 10 19-Apr 2019-04-01 CAPACITY 29263
## 11 19-May 2019-05-01 CAPACITY 28037
## 12 19-Jun 2019-06-01 CAPACITY 21596
## 13 19-Jul 2019-07-01 CAPACITY 25895
## 14 19-Aug 2019-08-01 CAPACITY 25813
## 15 19-Sep 2019-09-01 CAPACITY 22427
## 16 19-Oct 2019-10-01 CAPACITY 23605
## 17 19-Nov 2019-11-01 CAPACITY 24263
## 18 19-Dec 2019-12-01 CAPACITY 24243
ggplot(df2, aes(x = DATE2,
y = project,
fill = category)) +
geom_bar(stat = "identity",
position = 'dodge',
color="#0070c0",
size = 1.5,
width = 22.5) +
scale_fill_manual(values = c("white", "#0070c0")) +
scale_x_date(breaks = seq(as.Date("2019-04-01"),
as.Date("2019-12-01"),
by = "1 month"),
date_labels = "%b") +
scale_y_continuous(expand = c(0, 0),
limits = c(0,60000),
breaks = seq(0, 60000, 10000),) +
labs(title = "Demande vs Capacity over time",
subtitle = bquote("DAMANDE|"~ bold("CAPACITY")),
x = "2019",
y = "NUMBER OF PROJECT HOURS") +
theme(plot.title = element_text(size = 18),
plot.title.position = "plot",
plot.subtitle = element_text(color = "#0070c0", size = 14),
axis.title.x = element_text(size = 12, hjust = 0, vjust = 0, color = "black"),
axis.title.y = element_text(size = 12, hjust = 1, vjust = 2.5 , color = "black"),
axis.text = element_text(size=12),
axis.ticks = element_line(color="#a9a9a9"),
# axis.ticks = element_blank(),
# axis.title.x=element_blank(),
axis.line = element_line(color = "grey", size = 1),
legend.position = "none",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank())
ggsave("Figure 2.4a Basic bars.png", height = 6, width = 10, units = "in",dpi = 300)
theme_ex2_69 <- theme(plot.title = element_text(size = 18),
plot.title.position = "plot",
plot.subtitle = element_text(color = "#0070c0", size = 14),
axis.title.x = element_text(size = 12, hjust = 0, vjust = 0, color = "black"),
axis.title.y = element_text(size = 12, hjust = 1, vjust = 2.5 , color = "black"),
axis.text = element_text(size=12),
axis.ticks = element_line(color="#a9a9a9"),
# axis.ticks = element_blank(),
# axis.title.x=element_blank(),
axis.line = element_line(color = "grey", size = 1),
legend.position = "none",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
plot.margin=unit(c(0.5,2.5,0.5,1),"cm"))
ggplot(df2, aes(x = DATE2,
y = project)) +
geom_line(data = filter(df2, category == "DEMAND"), size = 0.5, color = "#0070c0") + # bright navy blue
geom_line(data = filter(df2, category == "CAPACITY"), size = 1.5, color = "#0070c0") +
annotate("text", x = as.Date("2019-12-10"), y = 34360, label = "34K", color = "#0070c0") + # as.Date is required
annotate("text", x = as.Date("2019-12-20"), y = 34360, label = "DEMAND", color = "#0070c0",hjust = 0) + # as.Date is required
annotate("text", x = as.Date("2019-12-10"), y = 24360, label = "24K", color = "#0070c0", fontface = "bold") +
annotate("text", x = as.Date("2019-12-20"), y = 24360, label = "CAPACITY", color = "#0070c0", fontface = "bold",hjust = 0) +
scale_x_date(breaks = seq(as.Date("2019-04-01"),
as.Date("2019-12-01"),
by = "1 month"),
date_labels = "%b") +
coord_cartesian(xlim = c(as.Date("2019-04-01"),
as.Date("2019-12-01")),
clip = 'off') +
scale_y_continuous(expand = c(0, 0),
limits = c(0,60000),
breaks = seq(0, 60000, 10000),) +
labs(title = "Demande vs Capacity over time",
subtitle = bquote("DAMANDE|"~ bold("CAPACITY")),
x = "2019",
y = "NUMBER OF PROJECT HOURS") +
theme_ex2_69 # to change the margin top, right. bottom. left
ggsave("Figure 2.4b Line graph.png", height = 6, width = 9, units = "in",dpi = 300)
theme_ex2_75 <- theme(plot.title = element_text(size = 18),
plot.title.position = "plot",
plot.subtitle = element_text(color = "#0070c0", size = 14),
axis.title.x = element_text(size = 12, hjust = 0, vjust = 0, color = "black"),
axis.title.y = element_text(size = 12, hjust = 1, vjust = 2.5 , color = "black"),
axis.text = element_text(size=12),
axis.ticks = element_line(color="#a9a9a9"),
# axis.ticks = element_blank(),
# axis.title.x=element_blank(),
axis.line = element_line(color = "grey", size = 1),
legend.position = "none",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
plot.margin=unit(c(0.25,1,0.5,1),"cm"))
ggplot(df2, aes(x = DATE2,
y = project)) +
geom_bar(data = filter(df2, category == "DEMAND"),
stat = "identity",
width = 12,
color = "#0070c0",
fill = "white",
size= 1) +
geom_bar(data = filter(df2, category == "CAPACITY"),
stat = "identity",
width = 20,
fill = "blue",
alpha = 0.4) +
scale_x_date(breaks = seq(as.Date("2019-04-01"),
as.Date("2019-12-01"),
by = "1 month"),
date_labels = "%b") +
coord_cartesian(xlim = c(as.Date("2019-04-01"),
as.Date("2019-12-01")),
clip = 'off') +
scale_y_continuous(expand = c(0, 0),
limits = c(0,60000),
breaks = seq(0, 60000, 10000),) +
labs(title = "Demande vs Capacity over time",
subtitle = bquote("DAMANDE|"~ bold("CAPACITY")),
x = "2019",
y = "NUMBER OF PROJECT HOURS") +
theme_ex2_75
ggsave("Figure 2.4c Overlapping bars.png", height = 5, width = 7.5, units = "in",dpi = 300)
ggplot(df2, aes(x = DATE2,
y = project)) +
geom_bar(data = filter(df2, category == "DEMAND"),
stat = "identity",
width = 20,
color = "#0070c0", fill = "#0070c0",
size = 1) +
geom_bar(data = filter(df2, category == "CAPACITY"),
stat = "identity",
width = 20,
color = "#0070c0", fill = "lightgrey",
size = 1) +
scale_x_date(breaks = seq(as.Date("2019-04-01"),
as.Date("2019-12-01"),
by = "1 month"),
date_labels = "%b") +
coord_cartesian(xlim = c(as.Date("2019-04-01"),
as.Date("2019-12-01")),
clip = 'off') +
scale_y_continuous(expand = c(0, 0),
limits = c(0,60000),
breaks = seq(0, 60000, 10000),) +
labs(title = "Demande vs Capacity over time",
subtitle = bquote("DAMANDE|"~ bold("CAPACITY")),
x = "2019",
y = "NUMBER OF PROJECT HOURS") +
theme_ex2_75
ggsave("Figure 2.4d Stacked bars.png", height = 5, width = 7.5, units = "in",dpi = 300)
ggplot(df2, aes(x = DATE2,
y = project)) +
geom_bar(data = filter(df2, category == "DEMAND"), stat = "identity",
width = 17.8, color = "white", fill = "#0070c0",alpha = 0.4)+
geom_bar(data = filter(df2, category == "CAPACITY"), stat = "identity",
width = 17, color = "white", fill = "white") +
geom_point(aes(color = factor(category),fill = factor(category)),
shape = 21, size = 14, stroke = 2.2) +
scale_fill_manual(values=c("white", "#0070c0")) +
scale_colour_manual(values=c("steelblue", "#0070c0")) +
geom_text(data = filter(df2, category == "DEMAND"),
aes(x = DATE2,y = project,label = round(project/1000,0), size = 10),
color = "blue",
show.legend =F) +
geom_text(data = filter(df2, category == "CAPACITY"),
aes(x = DATE2,y = project,label = round(project/1000,0), size = 10),
color = "white",
show.legend =F) +
annotate("text", x = as.Date("2019-12-30"), y = 34360, label = "DEMAND", color = "steelblue") + # as.Date is required
annotate("text", x = as.Date("2019-12-30"), y = 24360, label = "CAPACITY", color = "#0070c0", fontface = "bold") +
expand_limits(x = as.Date(c("2019-04-01", "2020-01-15"))) +
scale_x_date(breaks = seq(as.Date("2019-04-01"),
as.Date("2019-12-01"),
by = "1 month"),
date_labels = "%b") +
coord_cartesian(xlim = c(as.Date("2019-04-01"),
as.Date("2019-12-01")),
clip = 'off') +
scale_y_continuous(expand = c(0, 0),
limits = c(0,60000),
breaks = seq(0, 60000, 10000),) +
labs(title = "Demande vs Capacity over time",
subtitle = bquote("DAMANDE|"~ bold("CAPACITY")),
x = "2019",
y = "NUMBER OF PROJECT HOURS") +
theme_ex2_69
ggsave("Figure 2.4e Dot plot.png", height = 6, width = 9, units = "in",dpi = 300)
year <- c(2010:2019)
rate <- c(9.7, 2.0, 1.0, 7.0, 15.1, 5.6, 12.3, 4.5, 8.2, 9.1)
attrition <- data.frame(year, rate)
attrition
## year rate
## 1 2010 9.7
## 2 2011 2.0
## 3 2012 1.0
## 4 2013 7.0
## 5 2014 15.1
## 6 2015 5.6
## 7 2016 12.3
## 8 2017 4.5
## 9 2018 8.2
## 10 2019 9.1
theme_ex3 <- theme(plot.title = element_text(size = 18),
plot.title.position = "plot",
# plot.subtitle = element_text(color = "#0070c0", size = 14),
axis.title.x = element_blank(),
axis.title.y = element_text(size = 12, hjust = 1, vjust = 2.5 , color = "black"),
axis.text = element_text(size=12),
axis.ticks = element_line(color="#a9a9a9"),
# axis.ticks = element_blank(),
# axis.title.x=element_blank(),
axis.line = element_line(color = "grey", size = 1),
legend.position = "none",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
plot.margin=unit(c(0.25,0.5,0.5,1),"cm"))
ggplot(data = attrition,
aes(x = year, y = rate))+
geom_point(color = "#2554C7", size = 5) +
annotate("segment", x = 2010, xend = 2019,
y = mean(rate), yend = mean(rate),
color = "#2554C7", linetype = "dashed") +
annotate("text", x = 2011, y = 8.0, label = "AVERAGE: 7.5%", color = "#2554C7") +
scale_x_continuous(n.breaks = 9) + # this is a good w and simple way to generate breaks for scale_x_continuous
scale_y_continuous(expand = c(0, 0),limits = c(0, 16), n.breaks = 9) +
labs(title = "Attrition rate over time",
y = "ATTRITION RATE (%)") +
theme_ex3
ggsave("Figure 2.5b Dot plot.png",height = 5, width = 7, units = "in", dpi = 300)
ggplot(data=attrition , aes(x=year, y=rate)) +
geom_line(color="#2554C7", size = 1.2) +
geom_point(data = filter(attrition, year == 2019),
aes(x = year,y = rate),
color = "#2554C7",size=4) +
annotate("segment",
x = 2010, xend = 2019,
y = mean(rate), yend = mean(rate),
color = "#2554C7", linetype="dashed") +
annotate("text", x =2018.5, y = 6.25, label = "AVG: 7.5%", color = "#2554C7") +
annotate("text", x = 2019, y = 10, label = "9.1%", color = "#2554C7") +
scale_x_continuous(expand = c(0, 0),limits = c(2010, 2019.5), n.breaks = 9) +
scale_y_continuous(expand = c(0, 0),limits = c(0, 16), n.breaks = 9) +
labs(title = "Attrition rate over time",
y = "ATTRITION RATE (%)") +
theme_ex3
ggsave("Figure 2.5c Line graph.png", height = 5, width = 7, units = "in", dpi = 300)
ggplot(data = attrition,
aes(x = year, y = rate))+
geom_line(color="royalblue", size = 0.5) +
geom_point(color = "#2554C7", size = 5) +
annotate("segment",
x = 2010, xend = 2019,
y = mean(rate), yend = mean(rate),
color = "royalblue",linetype="dashed") +
annotate("text",
x = 2011.5, y = 8.0,
label = "AVERAGE: 7.5%", color = "#2554C7") +
scale_x_continuous( n.breaks = 9) +
scale_y_continuous(expand = c(0, 0),limits = c(0, 16), n.breaks = 9) +
labs(title = "Attrition rate over time",
y = "ATTRITION RATE (%)") +
theme_ex3
ggsave("Figure 2.5c2 Line and dots graph.png",height = 5, width = 7,units = "in",dpi = 300)
ggplot(data=attrition , aes(x=year, y=rate)) +
geom_line(color="#2554C7", size = 1.75) +
geom_rect(aes(xmin=2010, xmax=2019,
ymin=0, ymax= 7.5),
alpha= 0.05, fill = "#2554C7") +
geom_point(data = filter(attrition, year == 2019),
aes(x = year,y = rate),
color = "#2554C7",size=4) +
annotate("segment", x = 2010, xend = 2019, y = mean(rate), yend = mean(rate),
color = "#2554C7",linetype="dashed") +
annotate("text", x =2018.25, y = 6.5, label = "AVG: 7.5%", color = "#2554C7") +
annotate("text", x = 2018.75, y = 10.5, label = "9.1%", color = "#2554C7") +
scale_x_continuous(expand = c(0, 0), limits = c(2010, 2019.2), n.breaks = 9) +
scale_y_continuous(expand = c(0, 0),limits = c(0, 16), n.breaks = 9) +
labs(title = "Attrition rate over time",
y = "ATTRITION RATE (%)") +
theme_ex3
ggsave("Figure 2.5d Line graph with shaded area depicting average.png",height = 5, width = 7,units = "in",dpi = 300)
ggplot(attrition, aes(x = year, y = rate)) +
geom_area(fill='royalblue', colour = "royalblue", size = 1) +
# scale_fill_brewer(palette = "Blues") ne marche pas +
annotate("segment", x = 2010, xend = 2019, y = mean(rate), yend = mean(rate), color = "royalblue",linetype="dashed") +
annotate("text", x =2018.25, y = 6.5, label = "AVG: 7.5%", color = "white") +
scale_x_continuous(n.breaks = 9) +
scale_y_continuous(expand = c(0, 0),limits = c(0, 16), n.breaks = 9) +
labs(title = "Attrition rate over time",
y = "ATTRITION RATE (%)") +
theme_ex3
ggsave("Figure 2.5e Area graph.png", height = 5, width = 7, units = "in",dpi = 300)
ggplot(attrition, aes(x = year, y = rate)) +
geom_bar(stat = "identity", fill='royalblue', colour = "royalblue", size = 1) +
annotate("segment",
x = 2010, xend = 2019,
y = mean(rate), yend = mean(rate),
color = "royalblue", linetype="dashed") +
annotate("text", x = 2011.5, y = 6.5, label = "AVG: 7.5%", color = "blue") +
scale_x_continuous(n.breaks = 9) +
scale_y_continuous(expand = c(0, 0),limits = c(0, 16), n.breaks = 9) +
labs(title = "Attrition rate over time",
y = "ATTRITION RATE (%)") +
theme_ex3
ggsave("Figure 2.5f Bar graph.png", height = 5, width = 7, units = "in",dpi = 300)
year <- c(rep(2012:2019,2))
score <- c(795, 800, 804, 812, 830, 832, 824, 846, 774, 785, 805, 833, 839, 843, 827, 836)
category <- c(rep("Industry Average", 8), rep("Financial Saving",8))
data <- data.frame(year, category, score)
data
## year category score
## 1 2012 Industry Average 795
## 2 2013 Industry Average 800
## 3 2014 Industry Average 804
## 4 2015 Industry Average 812
## 5 2016 Industry Average 830
## 6 2017 Industry Average 832
## 7 2018 Industry Average 824
## 8 2019 Industry Average 846
## 9 2012 Financial Saving 774
## 10 2013 Financial Saving 785
## 11 2014 Financial Saving 805
## 12 2015 Financial Saving 833
## 13 2016 Financial Saving 839
## 14 2017 Financial Saving 843
## 15 2018 Financial Saving 827
## 16 2019 Financial Saving 836
library(ggtext)
ggplot(data = data,
aes(x= year, y = score))+
geom_line(data = filter(data, category == "Industry Average"), size = 1.5, color = "black") + # bright navy blue
geom_line(data = filter(data, category == "Financial Saving"), size = 1.5, color = "blue") +
geom_point(data = filter(data, category == "Industry Average", year == 2019), size = 4, color = "black") +
geom_point(data = filter(data, category == "Financial Saving", year == 2019), size = 4, color = "blue") +
annotate("text", x = 2019.2, y = 850, label = "Industry Average", color = "black", size = 5, hjust = 0) +
annotate("text", x = 2019.2, y = 837.5, label = "Financial Savings", color ="blue", size = 5, hjust = 0) +
scale_x_continuous(expand = c(0, 0),n.breaks = 10) +
coord_cartesian(xlim = c(2012, 2019), clip = 'off') +
# this is a good and simple way to generate breaks for scale_x_continuous
scale_y_continuous(expand = c(0, 0),limits = c(700, 900), n.breaks = 10) +
labs(title = "BRANCH SATISFACTION<br><span style = 'color:#4169e1;'>**Financial Savings**</span> below <span style ='color:#000000;'>**industry**</span> for the first 5 years") +
ylab("SATISFACTION SCORE") +
xlab("SURVEY YEAR") +
theme(plot.title = element_markdown(size = 20, lineheight = 1.25),
# plot.subtitle = element_markdown(size = 20, vjust = 1, hjust = 0.5),
plot.title.position = "plot", # applied for subtitle
# plot.subtitle = element_markdown(size=16, hjust = 1), # when using ggtext package
# Adjust Space Between ggplot2 Axis Labels and Plot Area
axis.text.x = element_text(size=12, vjust = -2) ,
axis.text.y = element_text(size=12, hjust = -3), # ca marche pas cet commande
axis.title.x= element_text(size = 12, hjust = 0, vjust = -2.5, color ="black"), # change the position of label on y axis
axis.title.y = element_text(size = 12, hjust = 1, vjust = 2.5 , color ="black"),# change the position of label on x axis
# axis.title.x=element_blank(),
axis.line.x= element_line(color="grey"),
axis.line.y= element_line(color="grey", size = 1),
axis.ticks = element_line(color="#a9a9a9"),
# axis.text.y = element_blank(),
legend.position = "none",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
plot.margin=unit(c(0.5,4.5,1,1),"cm"))
ggsave("Figure 2.7b Revamped graph.png", width = 9, height = 6, units = "in",dpi = 300)