Recreation of Cole Nussbaumer Knaflic’s Storytelling with Data (Wiley, 2015) plots using R an ggplot2.

Original data provided by the book’s author https://www.storytellingwithdata.com/book/downloads

library(ggtext)
## Warning: package 'ggtext' was built under R version 4.0.5
library(tidyselect)
library(ggplot2)
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.0 --
## v tibble  3.0.5     v dplyr   1.0.3
## v tidyr   1.1.2     v stringr 1.4.0
## v readr   1.4.0     v forcats 0.5.0
## v purrr   0.3.4
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
library(gridtext)
## Warning: package 'gridtext' was built under R version 4.0.5

Chapter 2: Choosing an effective visual

Simple text

df_box <- tibble(
  label = c("<span style='font-size:128pt;color:#31859C'>**20%**</span> <br/> 
  <span style='font-size:20pt;color:#7F7F7F'>**of children had a**</span><br/>
  <span style='font-size:20pt;color:#31859C'>**traditional stay-at-home mom** </span><br/>
  <span style='font-size:20pt;color:#7F7F7F'>**in 2012, compared to 41% in 1970**</span>"),
  x = 0.01, 
  y = 0.5,
  color = c("white"),
  fill = c("white"))

ggplot()+
  geom_textbox(data = df_box, 
               aes(x , y, label = label), 
               box.color = "white", 
               fill = "white", lineheight = 2,
               width = unit(400, "pt"),
               hjust = 0,
               box.padding = unit(c(0, 0, 0, 0), "pt")) + # control the width
  xlim(0, 1) + 
  ylim(0, 1) +
  theme(axis.ticks.x = element_blank(),
       axis.ticks.y = element_blank(),
       axis.title = element_blank(),
       axis.line = element_blank(),
       axis.text.y = element_blank(),
       axis.text.x = element_blank(),
       axis.title.x = element_blank(),
       axis.title.y = element_blank(),
       panel.grid.major = element_blank(),
       panel.grid.minor = element_blank(),
       panel.background = element_blank(),
       plot.margin = unit(c(0,0,0,0),"cm"))

Figure 2.3 Stay-at-home moms simple text makeover

ggsave("Figure 2.3 Stay-at-home moms simple text makeover.png", height = 7, width = 11,units = "in",dpi = 300)

Scatter Plot

miles <- read_csv("milesdriven.csv")
## 
## -- Column specification --------------------------------------------------------
## cols(
##   `Miles Driven` = col_number(),
##   `Cost Per Mile` = col_double()
## )
avg_cost <- mean(miles$`Cost Per Mile`)
avg_mile <- mean(miles$`Miles Driven`)


ggplot(data = miles,aes(x = `Miles Driven`, y = `Cost Per Mile`)) +
  
  geom_point(data = filter(miles,`Cost Per Mile` < avg_cost),color = "#BFBEBE",size = 5)+
  geom_point(data = filter(miles,`Cost Per Mile` >= avg_cost),color = "#F79747",size = 5) +
  
  geom_hline(yintercept = avg_cost, linetype = "longdash") + 
  geom_point(x = avg_mile,y = avg_cost, size = 6) +
  geom_label(x = avg_mile, y = avg_cost, label = "AVG", hjust = 1.25, label.size = 0) + 
  
  scale_x_continuous(expand = c(0, 0),limits = c(0, 4000), n.breaks = 5) +
  scale_y_continuous(expand = c(0, 0),limits = c(0, 3),labels = scales::dollar_format(), n.breaks = 7) +
  
  labs(title = "Cost per mile by miles driven",
       x =  "Miles driven per month",
       y = "Cost per mile") +
  
  theme(plot.title = element_markdown(size=16),
        plot.title.position = "plot",
        
        # axis.title.y = element_text(hjust = 1, margin = margin(0, 6, 0, 15, "pt")),
        # axis.title.x = element_text(hjust = 0, margin = margin(6, 0, 15, 0, "pt")),
        axis.title.x= element_text(size = 12, hjust = 0, vjust = -1, color = "#555655"),
        axis.title.y = element_text(size = 12, hjust = 1 ,vjust = 1, color = "#555655"),
        
        axis.text.x = element_text(color ="#777B7E", face="bold"),
        axis.text.y = element_text(color ="#777B7E", face="bold"),        
        
        axis.ticks.x = element_line(color="#a9a9a9"),
        axis.ticks.y = element_line(color="#a9a9a9"),
      
        axis.line.x = element_line(color="grey", size = 1),
        axis.line.y = element_line(color="grey", size = 1),
        
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        
         plot.margin = margin(.5,.5,.5,.5,"cm"))

Figure 2.7 Modified Scatterplot

ggsave("Figure 2.7 Modified Scatterplot.png", height = 7, width = 11,units = "in",dpi = 300)

Line graph

passport <- read_csv("passport.csv")
## 
## -- Column specification --------------------------------------------------------
## cols(
##   year = col_double(),
##   month = col_character(),
##   Min = col_double(),
##   Avg = col_double(),
##   Max = col_double(),
##   `Max to graph` = col_double()
## )
theme_ch2 <-  theme(plot.title = element_markdown(size=16),
        plot.title.position = "plot",
        plot.subtitle = element_markdown(size =14),
        # axis.title.y = element_text(hjust = 1, margin = margin(0, 6, 0, 15, "pt")),
        # axis.title.x = element_text(hjust = 0, margin = margin(6, 0, 15, 0, "pt")),
        axis.title.x =  element_text(size = 12, hjust = 0, vjust = -1, color = "#555655"),
        axis.title.y = element_text(size = 12, hjust = 1 ,vjust = 1, color = "#555655"),
        
        axis.text.x = element_text(size = 12,color ="#777B7E", face="bold"),
        axis.text.y = element_text(size = 12,color ="#777B7E", face="bold"),        
        
        axis.ticks.x = element_line(color="#a9a9a9"),
        axis.ticks.y = element_line(color="#a9a9a9"),
      
        axis.line.x = element_line(color="grey", size = 1),
        axis.line.y = element_line(color="grey", size = 1),
        
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        
         plot.margin = margin(.5,.5,.5,.5,"cm"))
passport <- passport %>% 
  mutate(date = ymd(paste(year, month, 1)))
passport %>% 
  filter(date== "2014-09-01")
## # A tibble: 1 x 7
##    year month   Min   Avg   Max `Max to graph` date      
##   <dbl> <chr> <dbl> <dbl> <dbl>          <dbl> <date>    
## 1  2014 Sep       9    18    27             18 2014-09-01
ggplot(data = passport) +
  geom_ribbon(aes(x= date, ymin = Min, ymax = Max), fill = "grey70") +
  geom_line(aes(x= date, y = Avg), size = 2) + 
  geom_point(data= filter(passport, date == "2015-09-01"), aes(x= date, y = Avg), size = 5) +
  
  annotate("text", x = as.Date("2015-09-20"), y = 21, label = "21", color = "black", size = 5) +
  annotate("text", x = as.Date("2014-09-15"), y = 10, label = "MIN", color = "#76787B", size = 5) +
  annotate("text", x = as.Date("2014-09-15"), y = 20, label = "AGV", color = "black", size = 5,fontface =2) +
  annotate("text", x = as.Date("2014-09-15"), y = 25, label = "MAX", color = "#76787B",size = 5) +
  
  scale_x_date(expand = c(0, 0),
               labels = function(x) if_else(is.na(lag(x)) | !year(lag(x)) == year(x), 
                                             paste(month(x, label = TRUE), "\n", year(x)), 
                                             paste(month(x, label = TRUE))),
               limits = c(ymd("2014-08-16"), ymd("2015-10-01")),
               breaks = passport$date) + # n.breaks does not work
  scale_y_continuous(expand = c(0, 0), 
                     limit = c(0, 40), 
                     n.breaks = 9) +
  
  labs(title = "Passport control wait time",
       subtitle = "Past 13 months",
       y = "Wait time (minutes)") +

  coord_cartesian(clip = "off") +
  theme_ch2 +
  theme(axis.title.x =  element_blank(),
        plot.margin = margin(.5,0.5,0.5,0.5,"cm"))

Figure 2.9 Showing average within a range in a line graph

ggsave("Figure 2.9 Showing average within a range in a line graph.png", height = 7, width = 11,units = "in",dpi = 300)

Slopegraphe

feedback <- read_csv("feedback.csv")
## 
## -- Column specification --------------------------------------------------------
## cols(
##   Category = col_character(),
##   `2014` = col_double(),
##   `2015` = col_double()
## )
feedback
## # A tibble: 7 x 3
##   Category              `2014` `2015`
##   <chr>                  <dbl>  <dbl>
## 1 Culture                   80     96
## 2 Peers                     85     91
## 3 Work environment          76     75
## 4 Leadership                59     62
## 5 Rewards & recognition     41     45
## 6 Perf management           33     42
## 7 Career development        49     33
df_long <- feedback %>% 
  pivot_longer(cols = c("2014","2015"), 
               names_to = "year", 
               values_to = "percent")

head(df_long,10)
## # A tibble: 10 x 3
##    Category              year  percent
##    <chr>                 <chr>   <dbl>
##  1 Culture               2014       80
##  2 Culture               2015       96
##  3 Peers                 2014       85
##  4 Peers                 2015       91
##  5 Work environment      2014       76
##  6 Work environment      2015       75
##  7 Leadership            2014       59
##  8 Leadership            2015       62
##  9 Rewards & recognition 2014       41
## 10 Rewards & recognition 2015       45
ggplot(data = df_long)+ 
  #Line
  geom_line(aes(x = year, y  = percent, group= Category), 
            size = 1.5, color ="#76787B") +
  geom_line(data = filter(df_long, Category == "Career development"), 
            aes(year, percent,group= Category), 
            size = 1.5, color = "#FF3403") +
  #point
  geom_point(aes(x = year, y  = percent, group= Category), 
             size = 3, color ="#76787B") +
  geom_point(data = filter(df_long, Category == "Career development"), 
             aes(year, percent), 
             size = 3, color = "#FF3403") +
  
  #Percent-labels
  geom_text(data= filter(df_long, year== 2014),aes(x= year,y = percent, label=paste(percent,"%",sep = "")),
            nudge_x = -.2, color ="#76787B") +
  geom_text(data= filter(df_long, year== 2015),aes(x= year,y = percent, label=paste(percent,"%",sep = "")),
            nudge_x = .25, color ="#76787B") +
  geom_text(data= filter(df_long, year == 2014, Category == "Career development"),
            aes(x= year,y = percent, label=paste(percent,"%",sep = "")),
            nudge_x = -.2, color ="#FF3403") +
  geom_text(data= filter(df_long, year == 2015, Category == "Career development"),
            aes(x= year,y = percent, label=paste(percent,"%",sep = "")),
            nudge_x = .25, color = "#FF3403") +
  
  # Category-labels
  geom_text(data= filter(df_long, year == 2014),
            aes(x= year,y = percent, label=paste(Category)),
            nudge_x = -.4,hjust = 1, color ="#76787B") +
  geom_text(data= filter(df_long, year == 2014, Category == "Career development"),
            aes(x= year,y = percent, label=paste(Category)),
            nudge_x = -.4,hjust = 1, color = "#FF3403") +
  
  scale_x_discrete(expand = expansion(2,0), 
                   limits = c("2014","2015"), 
                   labels = c(2014,2015)) +
  scale_y_continuous(limits = c(20,100)) +
  
  labs(title = "Employee feedback over time", 
       x = "Survey Year", 
       subtitle = "Survey Category | Percent Favorable") +
  theme(plot.title = element_markdown(size=16),
        plot.title.position = "plot",
        plot.subtitle = element_markdown(size =14, hjust =  0.15, color = "#76787B"),
        # axis.title.y = element_text(hjust = 1, margin = margin(0, 6, 0, 15, "pt")),
        # axis.title.x = element_text(hjust = 0, margin = margin(6, 0, 15, 0, "pt")),
        axis.title.x =  element_text(size = 12, hjust = 0, vjust = -1, color = "#555655"),
        axis.title.y = element_blank(),
        
        axis.text.x = element_text(size = 12,color ="#777B7E", face="bold"),
        axis.text.y = element_blank(),        
        
        axis.ticks.x = element_line(color="#a9a9a9"),
        axis.ticks.y = element_blank(),
      
        axis.line.x = element_line(color="grey", size = 1),
        # axis.line.y = element_line(color="grey", size = 1),
        
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank(),
        
         plot.margin = margin(.5,.5,.5,.5,"cm"))

Figure 2.11 Modified slopegraph (page 49)

ggsave("Figure 2.11 Modified slopegraph.png", height = 7, width = 11,units = "in",dpi = 300)

bars: Stacked horizontal bar chart (FIG2.19)

survey <- read_csv("survey.csv")
## 
## -- Column specification --------------------------------------------------------
## cols(
##   items = col_character(),
##   `Strongly Disagree` = col_character(),
##   Disagree = col_character(),
##   Neutral = col_character(),
##   Agree = col_character(),
##   `Strongly Agree` = col_character()
## )
survey
## # A tibble: 5 x 6
##   items         `Strongly Disagree` Disagree Neutral Agree `Strongly Agree`
##   <chr>         <chr>               <chr>    <chr>   <chr> <chr>           
## 1 Survey item E 5%                  5%       30%     27%   33%             
## 2 Survey item D 6%                  11%      35%     28%   20%             
## 3 Survey item C 6%                  14%      45%     15%   20%             
## 4 Survey item B 8%                  32%      20%     23%   17%             
## 5 Survey item A 16%                 25%      32%     18%   9%

Transforming data

df_long <- survey %>%
  pivot_longer(cols = c("Strongly Disagree","Disagree","Neutral","Agree","Strongly Agree"),
               names_to = "Scale",
               values_to = "Percent") %>%
  mutate(percent = as.numeric(str_remove(Percent,"%")))
  
df_long$Scale <- factor(df_long$Scale,levels=c("Strongly Agree","Agree", "Neutral","Disagree","Strongly Disagree"))

head(df_long, 10)
## # A tibble: 10 x 4
##    items         Scale             Percent percent
##    <chr>         <fct>             <chr>     <dbl>
##  1 Survey item E Strongly Disagree 5%            5
##  2 Survey item E Disagree          5%            5
##  3 Survey item E Neutral           30%          30
##  4 Survey item E Agree             27%          27
##  5 Survey item E Strongly Agree    33%          33
##  6 Survey item D Strongly Disagree 6%            6
##  7 Survey item D Disagree          11%          11
##  8 Survey item D Neutral           35%          35
##  9 Survey item D Agree             28%          28
## 10 Survey item D Strongly Agree    20%          20
cols <- c("Strongly Disagree" = '#404040',
          "Disagree" = '#404040',
          "Neutral"= '#BFBFBF' ,
          "Agree"= '#1F497D',
          "Strongly Agree"= '#1F497D')

ggplot(data = df_long) + 
  geom_bar(stat = "identity", 
           aes(x = items, y = percent, fill = Scale), 
           width = 0.65, 
           color = "white")+
  
  scale_y_continuous(name = "Percent of total", 
                     limits = c(0, 100),
                     breaks = seq(0, 100, by = 20),
                     labels = function(x) paste0(x,"%"),
                     position = 'right') +
  
  labs(title= "Survey results",
       subtitle = "<span style='color:#404040'>Strongly Disagree</span>| 
       <span style='color:#404040'>Disagree</span> | 
       <span style='color:#BFBFBF'>Neutral</span> |
       <span style='color:#1F497D'>Agree</span> | 
       <span style='color:#1F497D'>Strongly Agree</span>") +

  scale_fill_manual(values = cols) + 

  coord_flip() +
  
  theme(plot.title = element_markdown(size=18, hjust =-0.25),
        plot.subtitle = element_markdown(size=12,face="bold", color="#777B7E"),
        axis.title.y = element_blank(),
        axis.ticks.y = element_blank(),
        axis.text.y = element_text(color ="#777B7E", face="bold", size = 12),
        
        axis.title.x = element_markdown(hjust = 0,size = 12),
        axis.text.x = element_text(color ="#777B7E", face="bold", size = 12),
        axis.line.x = element_line(color="grey", size = 1),
        axis.ticks.x = element_line(color="#a9a9a9"),
        
        legend.position = "none",
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank(),
        panel.background = element_blank())

Figure 2.19 100% stacked horizontal bar chart (Page 59)

ggsave("Figure 2.19.png", height = 4, width = 7,units = "in",dpi = 300)