Recreation of Cole Nussbaumer Knaflic’s Storytelling with Data (Wiley, 2015) plots using R an ggplot2.
Original data provided by the book’s author https://www.storytellingwithdata.com/book/downloads
library(ggtext)
## Warning: package 'ggtext' was built under R version 4.0.5
library(tidyselect)
library(ggplot2)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v tibble 3.0.5 v dplyr 1.0.3
## v tidyr 1.1.2 v stringr 1.4.0
## v readr 1.4.0 v forcats 0.5.0
## v purrr 0.3.4
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
library(gridtext)
## Warning: package 'gridtext' was built under R version 4.0.5
df_box <- tibble(
label = c("<span style='font-size:128pt;color:#31859C'>**20%**</span> <br/>
<span style='font-size:20pt;color:#7F7F7F'>**of children had a**</span><br/>
<span style='font-size:20pt;color:#31859C'>**traditional stay-at-home mom** </span><br/>
<span style='font-size:20pt;color:#7F7F7F'>**in 2012, compared to 41% in 1970**</span>"),
x = 0.01,
y = 0.5,
color = c("white"),
fill = c("white"))
ggplot()+
geom_textbox(data = df_box,
aes(x , y, label = label),
box.color = "white",
fill = "white", lineheight = 2,
width = unit(400, "pt"),
hjust = 0,
box.padding = unit(c(0, 0, 0, 0), "pt")) + # control the width
xlim(0, 1) +
ylim(0, 1) +
theme(axis.ticks.x = element_blank(),
axis.ticks.y = element_blank(),
axis.title = element_blank(),
axis.line = element_blank(),
axis.text.y = element_blank(),
axis.text.x = element_blank(),
axis.title.x = element_blank(),
axis.title.y = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
plot.margin = unit(c(0,0,0,0),"cm"))
Figure 2.3 Stay-at-home moms simple text makeover
ggsave("Figure 2.3 Stay-at-home moms simple text makeover.png", height = 7, width = 11,units = "in",dpi = 300)
miles <- read_csv("milesdriven.csv")
##
## -- Column specification --------------------------------------------------------
## cols(
## `Miles Driven` = col_number(),
## `Cost Per Mile` = col_double()
## )
avg_cost <- mean(miles$`Cost Per Mile`)
avg_mile <- mean(miles$`Miles Driven`)
ggplot(data = miles,aes(x = `Miles Driven`, y = `Cost Per Mile`)) +
geom_point(data = filter(miles,`Cost Per Mile` < avg_cost),color = "#BFBEBE",size = 5)+
geom_point(data = filter(miles,`Cost Per Mile` >= avg_cost),color = "#F79747",size = 5) +
geom_hline(yintercept = avg_cost, linetype = "longdash") +
geom_point(x = avg_mile,y = avg_cost, size = 6) +
geom_label(x = avg_mile, y = avg_cost, label = "AVG", hjust = 1.25, label.size = 0) +
scale_x_continuous(expand = c(0, 0),limits = c(0, 4000), n.breaks = 5) +
scale_y_continuous(expand = c(0, 0),limits = c(0, 3),labels = scales::dollar_format(), n.breaks = 7) +
labs(title = "Cost per mile by miles driven",
x = "Miles driven per month",
y = "Cost per mile") +
theme(plot.title = element_markdown(size=16),
plot.title.position = "plot",
# axis.title.y = element_text(hjust = 1, margin = margin(0, 6, 0, 15, "pt")),
# axis.title.x = element_text(hjust = 0, margin = margin(6, 0, 15, 0, "pt")),
axis.title.x= element_text(size = 12, hjust = 0, vjust = -1, color = "#555655"),
axis.title.y = element_text(size = 12, hjust = 1 ,vjust = 1, color = "#555655"),
axis.text.x = element_text(color ="#777B7E", face="bold"),
axis.text.y = element_text(color ="#777B7E", face="bold"),
axis.ticks.x = element_line(color="#a9a9a9"),
axis.ticks.y = element_line(color="#a9a9a9"),
axis.line.x = element_line(color="grey", size = 1),
axis.line.y = element_line(color="grey", size = 1),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
plot.margin = margin(.5,.5,.5,.5,"cm"))
Figure 2.7 Modified Scatterplot
ggsave("Figure 2.7 Modified Scatterplot.png", height = 7, width = 11,units = "in",dpi = 300)
passport <- read_csv("passport.csv")
##
## -- Column specification --------------------------------------------------------
## cols(
## year = col_double(),
## month = col_character(),
## Min = col_double(),
## Avg = col_double(),
## Max = col_double(),
## `Max to graph` = col_double()
## )
theme_ch2 <- theme(plot.title = element_markdown(size=16),
plot.title.position = "plot",
plot.subtitle = element_markdown(size =14),
# axis.title.y = element_text(hjust = 1, margin = margin(0, 6, 0, 15, "pt")),
# axis.title.x = element_text(hjust = 0, margin = margin(6, 0, 15, 0, "pt")),
axis.title.x = element_text(size = 12, hjust = 0, vjust = -1, color = "#555655"),
axis.title.y = element_text(size = 12, hjust = 1 ,vjust = 1, color = "#555655"),
axis.text.x = element_text(size = 12,color ="#777B7E", face="bold"),
axis.text.y = element_text(size = 12,color ="#777B7E", face="bold"),
axis.ticks.x = element_line(color="#a9a9a9"),
axis.ticks.y = element_line(color="#a9a9a9"),
axis.line.x = element_line(color="grey", size = 1),
axis.line.y = element_line(color="grey", size = 1),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
plot.margin = margin(.5,.5,.5,.5,"cm"))
passport <- passport %>%
mutate(date = ymd(paste(year, month, 1)))
passport %>%
filter(date== "2014-09-01")
## # A tibble: 1 x 7
## year month Min Avg Max `Max to graph` date
## <dbl> <chr> <dbl> <dbl> <dbl> <dbl> <date>
## 1 2014 Sep 9 18 27 18 2014-09-01
ggplot(data = passport) +
geom_ribbon(aes(x= date, ymin = Min, ymax = Max), fill = "grey70") +
geom_line(aes(x= date, y = Avg), size = 2) +
geom_point(data= filter(passport, date == "2015-09-01"), aes(x= date, y = Avg), size = 5) +
annotate("text", x = as.Date("2015-09-20"), y = 21, label = "21", color = "black", size = 5) +
annotate("text", x = as.Date("2014-09-15"), y = 10, label = "MIN", color = "#76787B", size = 5) +
annotate("text", x = as.Date("2014-09-15"), y = 20, label = "AGV", color = "black", size = 5,fontface =2) +
annotate("text", x = as.Date("2014-09-15"), y = 25, label = "MAX", color = "#76787B",size = 5) +
scale_x_date(expand = c(0, 0),
labels = function(x) if_else(is.na(lag(x)) | !year(lag(x)) == year(x),
paste(month(x, label = TRUE), "\n", year(x)),
paste(month(x, label = TRUE))),
limits = c(ymd("2014-08-16"), ymd("2015-10-01")),
breaks = passport$date) + # n.breaks does not work
scale_y_continuous(expand = c(0, 0),
limit = c(0, 40),
n.breaks = 9) +
labs(title = "Passport control wait time",
subtitle = "Past 13 months",
y = "Wait time (minutes)") +
coord_cartesian(clip = "off") +
theme_ch2 +
theme(axis.title.x = element_blank(),
plot.margin = margin(.5,0.5,0.5,0.5,"cm"))
Figure 2.9 Showing average within a range in a line graph
ggsave("Figure 2.9 Showing average within a range in a line graph.png", height = 7, width = 11,units = "in",dpi = 300)
feedback <- read_csv("feedback.csv")
##
## -- Column specification --------------------------------------------------------
## cols(
## Category = col_character(),
## `2014` = col_double(),
## `2015` = col_double()
## )
feedback
## # A tibble: 7 x 3
## Category `2014` `2015`
## <chr> <dbl> <dbl>
## 1 Culture 80 96
## 2 Peers 85 91
## 3 Work environment 76 75
## 4 Leadership 59 62
## 5 Rewards & recognition 41 45
## 6 Perf management 33 42
## 7 Career development 49 33
df_long <- feedback %>%
pivot_longer(cols = c("2014","2015"),
names_to = "year",
values_to = "percent")
head(df_long,10)
## # A tibble: 10 x 3
## Category year percent
## <chr> <chr> <dbl>
## 1 Culture 2014 80
## 2 Culture 2015 96
## 3 Peers 2014 85
## 4 Peers 2015 91
## 5 Work environment 2014 76
## 6 Work environment 2015 75
## 7 Leadership 2014 59
## 8 Leadership 2015 62
## 9 Rewards & recognition 2014 41
## 10 Rewards & recognition 2015 45
ggplot(data = df_long)+
#Line
geom_line(aes(x = year, y = percent, group= Category),
size = 1.5, color ="#76787B") +
geom_line(data = filter(df_long, Category == "Career development"),
aes(year, percent,group= Category),
size = 1.5, color = "#FF3403") +
#point
geom_point(aes(x = year, y = percent, group= Category),
size = 3, color ="#76787B") +
geom_point(data = filter(df_long, Category == "Career development"),
aes(year, percent),
size = 3, color = "#FF3403") +
#Percent-labels
geom_text(data= filter(df_long, year== 2014),aes(x= year,y = percent, label=paste(percent,"%",sep = "")),
nudge_x = -.2, color ="#76787B") +
geom_text(data= filter(df_long, year== 2015),aes(x= year,y = percent, label=paste(percent,"%",sep = "")),
nudge_x = .25, color ="#76787B") +
geom_text(data= filter(df_long, year == 2014, Category == "Career development"),
aes(x= year,y = percent, label=paste(percent,"%",sep = "")),
nudge_x = -.2, color ="#FF3403") +
geom_text(data= filter(df_long, year == 2015, Category == "Career development"),
aes(x= year,y = percent, label=paste(percent,"%",sep = "")),
nudge_x = .25, color = "#FF3403") +
# Category-labels
geom_text(data= filter(df_long, year == 2014),
aes(x= year,y = percent, label=paste(Category)),
nudge_x = -.4,hjust = 1, color ="#76787B") +
geom_text(data= filter(df_long, year == 2014, Category == "Career development"),
aes(x= year,y = percent, label=paste(Category)),
nudge_x = -.4,hjust = 1, color = "#FF3403") +
scale_x_discrete(expand = expansion(2,0),
limits = c("2014","2015"),
labels = c(2014,2015)) +
scale_y_continuous(limits = c(20,100)) +
labs(title = "Employee feedback over time",
x = "Survey Year",
subtitle = "Survey Category | Percent Favorable") +
theme(plot.title = element_markdown(size=16),
plot.title.position = "plot",
plot.subtitle = element_markdown(size =14, hjust = 0.15, color = "#76787B"),
# axis.title.y = element_text(hjust = 1, margin = margin(0, 6, 0, 15, "pt")),
# axis.title.x = element_text(hjust = 0, margin = margin(6, 0, 15, 0, "pt")),
axis.title.x = element_text(size = 12, hjust = 0, vjust = -1, color = "#555655"),
axis.title.y = element_blank(),
axis.text.x = element_text(size = 12,color ="#777B7E", face="bold"),
axis.text.y = element_blank(),
axis.ticks.x = element_line(color="#a9a9a9"),
axis.ticks.y = element_blank(),
axis.line.x = element_line(color="grey", size = 1),
# axis.line.y = element_line(color="grey", size = 1),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
plot.margin = margin(.5,.5,.5,.5,"cm"))
Figure 2.11 Modified slopegraph (page 49)
ggsave("Figure 2.11 Modified slopegraph.png", height = 7, width = 11,units = "in",dpi = 300)
survey <- read_csv("survey.csv")
##
## -- Column specification --------------------------------------------------------
## cols(
## items = col_character(),
## `Strongly Disagree` = col_character(),
## Disagree = col_character(),
## Neutral = col_character(),
## Agree = col_character(),
## `Strongly Agree` = col_character()
## )
survey
## # A tibble: 5 x 6
## items `Strongly Disagree` Disagree Neutral Agree `Strongly Agree`
## <chr> <chr> <chr> <chr> <chr> <chr>
## 1 Survey item E 5% 5% 30% 27% 33%
## 2 Survey item D 6% 11% 35% 28% 20%
## 3 Survey item C 6% 14% 45% 15% 20%
## 4 Survey item B 8% 32% 20% 23% 17%
## 5 Survey item A 16% 25% 32% 18% 9%
Transforming data
df_long <- survey %>%
pivot_longer(cols = c("Strongly Disagree","Disagree","Neutral","Agree","Strongly Agree"),
names_to = "Scale",
values_to = "Percent") %>%
mutate(percent = as.numeric(str_remove(Percent,"%")))
df_long$Scale <- factor(df_long$Scale,levels=c("Strongly Agree","Agree", "Neutral","Disagree","Strongly Disagree"))
head(df_long, 10)
## # A tibble: 10 x 4
## items Scale Percent percent
## <chr> <fct> <chr> <dbl>
## 1 Survey item E Strongly Disagree 5% 5
## 2 Survey item E Disagree 5% 5
## 3 Survey item E Neutral 30% 30
## 4 Survey item E Agree 27% 27
## 5 Survey item E Strongly Agree 33% 33
## 6 Survey item D Strongly Disagree 6% 6
## 7 Survey item D Disagree 11% 11
## 8 Survey item D Neutral 35% 35
## 9 Survey item D Agree 28% 28
## 10 Survey item D Strongly Agree 20% 20
cols <- c("Strongly Disagree" = '#404040',
"Disagree" = '#404040',
"Neutral"= '#BFBFBF' ,
"Agree"= '#1F497D',
"Strongly Agree"= '#1F497D')
ggplot(data = df_long) +
geom_bar(stat = "identity",
aes(x = items, y = percent, fill = Scale),
width = 0.65,
color = "white")+
scale_y_continuous(name = "Percent of total",
limits = c(0, 100),
breaks = seq(0, 100, by = 20),
labels = function(x) paste0(x,"%"),
position = 'right') +
labs(title= "Survey results",
subtitle = "<span style='color:#404040'>Strongly Disagree</span>|
<span style='color:#404040'>Disagree</span> |
<span style='color:#BFBFBF'>Neutral</span> |
<span style='color:#1F497D'>Agree</span> |
<span style='color:#1F497D'>Strongly Agree</span>") +
scale_fill_manual(values = cols) +
coord_flip() +
theme(plot.title = element_markdown(size=18, hjust =-0.25),
plot.subtitle = element_markdown(size=12,face="bold", color="#777B7E"),
axis.title.y = element_blank(),
axis.ticks.y = element_blank(),
axis.text.y = element_text(color ="#777B7E", face="bold", size = 12),
axis.title.x = element_markdown(hjust = 0,size = 12),
axis.text.x = element_text(color ="#777B7E", face="bold", size = 12),
axis.line.x = element_line(color="grey", size = 1),
axis.ticks.x = element_line(color="#a9a9a9"),
legend.position = "none",
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank())
Figure 2.19 100% stacked horizontal bar chart (Page 59)
ggsave("Figure 2.19.png", height = 4, width = 7,units = "in",dpi = 300)