Series bài giảng này về hình ảnh hóa dữ liệu hướng vào việc tái lập lại những sản phẩm được trình bày trong cuốn Storytelling with Data: Let’s Practice! của tác giả Cole Nussbaumer Knaflic chỉ bằng ngôn ngữ R. Cuốn sách này có 11 chương với nội dung như sau:
Cuốn sách này có thể được coi là tập 2 của cuốn Storytelling with Data: A Data Visualization Guide for Business Professionals nhằm hiện thực hóa các nguyên lí về Data Visualization. Nếu cần thiết bạn đọc cũng nên nghiên cứu cuốn sách này hoặc tham khảo từ sân chơi Storytelling with Data của chính tác giả hai cuốn sách trên. Data thực hành các bạn có thể download tại đây.
R codes một số kiểu Chart (chẳng hạn Pie Chart - tôi là người không ủng hộ sử dụng kiểu graph này) sẽ không được trình bày. Ngoài ra sẽ có một số Charts tôi bỏ qua do việc tạo những Charts này có dạng tương tự như một Chart nào đó đã được trình bày trước đó (tất nhiên là có R codes đi kèm). Điều này cũng có nghĩa là có thể có một số Excercices sẽ bị bỏ qua. Tôi chỉ trình bày R codes cho những Charts mà tôi cho là quan trọng.
Dưới đây là R codes để tái tạo lại Figure 2.1b (kích chuột vào cửa sổ có chữ “Show” màu xám nhạt):
#===========================================
# R codes for "improve this table" section
#===========================================
# Clear our R environment:
rm(list = ls())
# Import readxl for loading xlsx files:
library(readxl)
# Load data:
read_excel("E:/storytelling/2.1 EXERCISE.xlsx", skip = 5) -> rawData
#----------------------------------------------------------------------------------
# FIGURE 2.1b Slightly improved table
# Ref for presenting nice table in R:
# 1. https://cran.r-project.org/web/packages/kableExtra/vignettes/awesome_table_in_html.html
# 2. https://themockup.blog/static/slides/intro-tables#15
# 3. https://haozhu233.github.io/kableExtra/
#----------------------------------------------------------------------------------
# Load dplyr for data processing and manipulation:
library(dplyr)
# Extract column names:
names(rawData) -> columnNames
# Rename for all columns:
names(rawData) <- c("tier", "numAccounts", "perAccounts", "revenue", "perRevenue")
# Calculate some metrics:
rawData %>%
mutate(totalAccounts = numAccounts / perAccounts,
totalRevenue = revenue / perRevenue,
allOtherAcc = totalAccounts - sum(numAccounts),
allOtherRev = totalRevenue - sum(revenue)) -> rawData
data.frame(tier = "All other",
numAccounts = rawData$allOtherAcc,
revenue = rawData$allOtherRev) -> dfAllOther
rawData %>%
select(tier, numAccounts, revenue) %>%
bind_rows(dfAllOther %>% slice(1)) -> baseData
baseData %>%
mutate(perAccounts = 100*numAccounts / sum(numAccounts),
perRevenue = 100*revenue / sum(revenue)) %>%
slice(c(2, 1, 3:6)) %>%
mutate(perAccounts = round(perAccounts, 0),
perRevenue = round(perRevenue, 0),
revenue = round(revenue, 1)) -> baseData
baseData %>%
summarise_if(is.numeric, sum) %>%
mutate(tier = "TOTAL") -> dfForAll
baseData %>% bind_rows(dfForAll) -> dfForReporting
dfForReporting %>%
select(tier, numAccounts, perAccounts, revenue, perRevenue) %>%
mutate(perAccounts = paste0(perAccounts, "%"),
perRevenue = paste0(perRevenue, "%"),
revenue = case_when(tier != "B" ~ paste0("$", revenue),
TRUE ~ paste0("$", revenue, ".0"))) -> dfReportingFigure2
names(dfReportingFigure2) <- columnNames
library(kableExtra) # For presenting beautiful tables.
dfReportingFigure2 %>%
kbl(caption = "Figure 2.1b: Slightly improved table") %>%
kable_classic(full_width = FALSE, html_font = "Cambria") %>%
row_spec(c(1, 3, 5, 7), bold = FALSE, color = "black", background = "#C5C5C5")| Tier | # of Accounts | % Accounts | Revenue ($M) | % Revenue |
|---|---|---|---|---|
| A+ | 19 | 2% | $3.9 | 21% |
| A | 77 | 7% | $4.7 | 25% |
| B | 338 | 31% | $6.0 | 32% |
| C | 425 | 39% | $2.8 | 15% |
| D | 24 | 2% | $0.4 | 2% |
| All other | 205 | 19% | $0.9 | 5% |
| TOTAL | 1088 | 100% | $18.7 | 100% |
R Codes cho Figure 2.1c (version 1):
dfReportingFigure2 %>%
kbl(caption = "Figure 2.1c: Table with heatmapping, version 1") %>%
kable_classic(full_width = FALSE, html_font = "Cambria") %>%
add_header_above(c(" " = 1, "ACCOUNTS" = 2, "REVENUE" = 2)) %>%
column_spec(column = 3, color = "white",
background = spec_color(dfForReporting$perAccounts, end = 0.8)) %>%
column_spec(column = 5, color = "white",
background = spec_color(dfForReporting$perRevenue, end = 0.8)) %>%
column_spec(column = 1, color = "black",
background = "#C5C5C5")|
ACCOUNTS
|
REVENUE
|
|||
|---|---|---|---|---|
| Tier | # of Accounts | % Accounts | Revenue ($M) | % Revenue |
| A+ | 19 | 2% | $3.9 | 21% |
| A | 77 | 7% | $4.7 | 25% |
| B | 338 | 31% | $6.0 | 32% |
| C | 425 | 39% | $2.8 | 15% |
| D | 24 | 2% | $0.4 | 2% |
| All other | 205 | 19% | $0.9 | 5% |
| TOTAL | 1088 | 100% | $18.7 | 100% |
R codes for Figure 2.1c (version 2):
dfReportingFigure2 %>%
kbl(caption = "Figure 2.1d: Table with heatmapping, version 2") %>%
kable_classic(full_width = FALSE, html_font = "Cambria") %>%
add_header_above(c(" " = 1, "ACCOUNTS" = 2, "REVENUE" = 2)) %>%
column_spec(column = 3,
link = "https://haozhu233.github.io/kableExtra/",
color = spec_color(dfForReporting$perAccounts, end = 0.5)) %>%
column_spec(column = 5,
link = "https://haozhu233.github.io/kableExtra/",
color = spec_color(dfForReporting$perRevenue, end = 0.5)) %>%
column_spec(column = 1, color = "black",
background = "#C5C5C5")|
ACCOUNTS
|
REVENUE
|
|||
|---|---|---|---|---|
| Tier | # of Accounts | % Accounts | Revenue ($M) | % Revenue |
| A+ | 19 | 2% | $3.9 | 21% |
| A | 77 | 7% | $4.7 | 25% |
| B | 338 | 31% | $6.0 | 32% |
| C | 425 | 39% | $2.8 | 15% |
| D | 24 | 2% | $0.4 | 2% |
| All other | 205 | 19% | $0.9 | 5% |
| TOTAL | 1088 | 100% | $18.7 | 100% |
R codes cho Figure 2.1f:
library(tidyr) # For data reshaping.
library(ggplot2) # For data visualization.
dfForReporting %>%
filter(tier != "TOTAL") %>%
slice(6:1) %>%
mutate(tier = factor(tier, levels = tier)) %>%
select(tier, perAccounts, perRevenue) %>%
rename(`% Accounts` = perAccounts, `% Revenue` = perRevenue) %>%
pivot_longer(cols = c(`% Accounts`, `% Revenue`)) -> dfLong
# Prepare for plotting:
color1 <- "#c74f4c"
color2 <- "#5687c2"
dfLong %>% filter(value < 10) -> dfPresentingText1
dfLong %>% filter(value >= 10) -> dfPresentingText2
library(showtext)
my_font <- "Ubuntu"
font_add_google(name = my_font, family = my_font)
showtext_auto()
dfLong %>%
ggplot(aes(y = tier, x = value)) +
geom_col(fill = color2, width = 0.7) +
facet_wrap(~ name) +
theme_minimal() +
geom_text(data = dfPresentingText1, aes(label = value), hjust = -0.5, color = color2, size = 5, family = my_font) +
geom_text(data = dfPresentingText2, aes(label = value), hjust = 1.3, color = "white", size = 5, family = my_font) +
theme(axis.title = element_blank()) +
theme(axis.text.x = element_blank()) +
theme(axis.ticks = element_blank()) +
theme(panel.grid = element_blank()) +
scale_x_continuous(expand = c(0, 0)) +
theme(axis.text.y = element_text(size = 14, family = my_font)) +
theme(strip.text = element_text(size = 14, family = my_font, color = "grey20")) +
theme(strip.background = element_rect(color = "grey80", fill = "grey80")) +
labs(title = "Figure 2.1f: Two horizontal bar charts",
caption = "Source: https://www.storytellingwithdata.com/") +
theme(plot.title = element_text(size = 18)) +
theme(plot.caption = element_text(color = "grey39", face = "italic"))R codes cho Figure 2.1g:
dfLong %>%
mutate(name = factor(name, level = c("% Revenue", "% Accounts"))) -> dfLong
label_on_x <- paste0(seq(0, 40, 10), "%")
dfLong %>%
ggplot(aes(y = tier, x = value, fill = name)) +
geom_col(position = "dodge") +
scale_fill_manual(values = c(color1, color2)) +
scale_x_continuous(position = "top", expand = c(0, 0), limits = c(0, 42), labels = label_on_x) +
theme_minimal() +
theme(legend.title = element_blank()) +
theme(legend.position = "top") +
labs(title = "Figure 2.1g: Two horizontal bar charts",
caption = "Source: https://www.storytellingwithdata.com/") +
theme(plot.title = element_text(size = 18)) +
theme(plot.caption = element_text(color = "grey39", face = "italic")) +
theme(axis.title = element_blank()) +
theme(panel.grid.minor = element_blank()) +
theme(panel.grid.major.y = element_blank()) +
theme(axis.text = element_text(size = 13)) +
theme(legend.text = element_text(size = 13, family = my_font, color = "grey30")) +
theme(plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm")) +
theme(plot.title.position = "plot")dfLong %>%
mutate(tier = factor(tier, levels = baseData$tier)) -> data2.1h
data2.1h %>%
ggplot(aes(x = tier, y = value, fill = name)) +
geom_col(position = "dodge", width = 0.7) +
scale_fill_manual(values = c(color1, color2)) +
scale_y_continuous(expand = c(0, 0), limits = c(0, 42), labels = label_on_x) +
theme_minimal() +
theme(legend.title = element_blank()) +
theme(legend.position = "top") +
labs(title = "Figure 2.1h: A vertical bar chart",
caption = "Source: https://www.storytellingwithdata.com/") +
theme(plot.title = element_text(size = 17, face = "bold", color = "grey20")) +
theme(plot.caption = element_text(color = "grey39", face = "italic")) +
theme(axis.title = element_blank()) +
theme(panel.grid.minor = element_blank()) +
theme(panel.grid.major.x = element_blank()) +
theme(axis.text = element_text(size = 13)) +
theme(legend.text = element_text(size = 12, family = my_font, color = "grey30")) +
theme(plot.margin = margin(0.5, 0.5, 0.5, 0.5, "cm")) +
theme(plot.title.position = "plot") +
theme(legend.key.height = unit(0.4, "cm")) +
theme(legend.key.width = unit(0.4, "cm"))R codes cho Table/Figure 2.2a, Figure 2.2b tương tự như R codes cho các yêu cầu của Excercise 1.1 nên sẽ không trình bày lại. Dưới đây là R codes cho Figure 2.2c:
rm(list = ls()) # Clear R environment.
read_excel("E:/storytelling/2.2 EXERCISE.xlsx", skip = 5) -> rawData # Load data.
# Prepare for plotting:
color_for_bar <- "#74ab45"
library(showtext)
lato_font <- "Lato"
font_add_google(name = lato_font, family = lato_font)
showtext_auto()
library(scales)
rawData %>%
ggplot(aes(x = `Campaign Year`, y = `Meals Served`)) +
geom_col(fill = color_for_bar, width = 0.7) +
theme_minimal() +
scale_x_continuous(breaks = seq(2010, 2019, 1), expand = c(0, 0)) +
scale_y_continuous(breaks = seq(0, 300000, 50000), labels = comma, expand = c(0, 0)) +
theme(panel.grid.minor = element_blank()) +
theme(panel.grid.major.x = element_blank()) +
labs(title = "Figure 2.2c: Meals served over time",
caption = "Source: https://www.storytellingwithdata.com/",
x = "CAMPAIGN YEAR",
y = "# OF MEALS SERVED") +
theme(text = element_text(family = lato_font)) +
theme(plot.title = element_text(size = 16)) +
theme(axis.title.x = element_text(color = "grey30", hjust = 0, vjust = -1)) +
theme(axis.title.y = element_text(color = "grey30", hjust = 0.85, vjust = 2)) +
theme(axis.text = element_text(size = 11)) +
theme(plot.caption = element_text(color = "grey39", face = "italic")) +
theme(plot.title.position = "plot") +
theme(plot.margin = margin(0.5, 1, 0.5, 0.5, "cm"))R Codes cho Figure 2.2d:
rawData %>%
filter(`Campaign Year` %in% c(2010, 2019)) %>%
rename(year = `Campaign Year`,
meal = `Meals Served`) -> dfPoint
library(ggrepel)
rawData %>%
ggplot(aes(x = `Campaign Year`, y = `Meals Served`)) +
geom_line(color = color_for_bar, linewidth = 1.5) +
theme_minimal() +
theme(text = element_text(family = lato_font)) +
scale_x_continuous(breaks = seq(2010, 2019.5, 1), expand = c(0, 0.1)) +
scale_y_continuous(breaks = seq(0, 300000, 50000), labels = comma, expand = c(0.1, 0)) +
theme(panel.grid = element_blank()) +
labs(title = "Figure 2.2d: Meals served over time",
caption = "Source: https://www.storytellingwithdata.com/",
subtitle = "# OF MEALS SERVED",
x = "CAMPAIGN YEAR") +
theme(axis.title.x = element_text(color = "grey30", hjust = -0.02)) +
theme(axis.text.y = element_blank()) +
theme(axis.text.x = element_text(size = 10)) +
theme(axis.title.y = element_blank()) +
theme(plot.margin = margin(0.5, 1, 0.5, 1, "cm")) +
theme(plot.caption = element_text(color = "grey39", face = "italic")) +
theme(plot.title = element_text(size = 18, hjust = -0.1)) +
theme(plot.subtitle = element_text(size = 11, hjust = -0.05, color = "grey30")) +
geom_point(data = dfPoint, aes(x = year, y = meal), size = 3, color = color_for_bar) +
geom_text_repel(data = dfPoint,
aes(x = year, y = meal, label = comma(meal)), color = color_for_bar, size = 4,
direction = "y", family = lato_font, force = 1)R Codes cho Figure 2.3a:
library(tidyr) # For reshaping data form.
read_excel("E:/storytelling/2.3 EXERCISE.xlsx", skip = 5) -> data2.3
data2.3 %>%
slice(9:1) %>%
mutate(DATE = factor(DATE, levels = DATE)) -> data2.3a_wider
data2.3a_wider %>%
pivot_longer(cols = c("CAPACITY", "DEMAND")) %>%
mutate(name = factor(name, levels = c("DEMAND", "CAPACITY"))) -> data2.3a_long
colorBar <- c("#ff7f00", "#377eb8")
data2.3a_long %>%
ggplot(aes(y = DATE, x = value, fill = name)) +
geom_col(position = "dodge") +
scale_fill_manual(values = colorBar) +
theme_minimal() +
theme(text = element_text(family = lato_font)) +
geom_text(aes(label = comma(value)), position = position_dodge(0.9), hjust = 1.1, family = lato_font, color = "white") +
labs(title = "Figure 2.3a: Demand and Capacity by Month",
caption = "Source: https://www.storytellingwithdata.com/") +
guides(fill = guide_legend(reverse = TRUE)) +
theme(legend.position = "top") +
theme(legend.title = element_blank()) +
theme(panel.grid = element_blank()) +
theme(axis.title = element_blank()) +
theme(axis.text.x = element_blank()) +
scale_x_continuous(expand = c(0, 0)) +
theme(axis.text.y = element_text(size = 11)) +
theme(legend.text = element_text(family = lato_font, color = "grey30", size = 10)) +
theme(plot.margin = margin(0.5, 0.7, 0.5, 0.7, "cm")) +
theme(plot.caption = element_text(color = "grey39", face = "italic", size = 9)) +
theme(legend.key.height = unit(0.4, "cm")) +
theme(legend.key.width = unit(0.4, "cm")) +
theme(plot.title.position = "plot") +
theme(plot.title = element_text(size = 18))R Codes cho một số phương án khác cho hình ảnh hóa dữ liệu như được đề cập ở trang 69:
# Prepare data for plotting:
data2.3 %>%
slice(1:9) %>%
mutate(DATE = factor(DATE, levels = DATE)) %>%
pivot_longer(cols = c("CAPACITY", "DEMAND")) %>%
mutate(name = factor(name, levels = c("DEMAND", "CAPACITY"))) -> dataTimeBar
# Plot:
dataTimeBar %>%
ggplot(aes(x = DATE, y = value, fill = name)) +
geom_col(position = "dodge") +
scale_fill_manual(values = colorBar) +
theme_minimal() +
theme(text = element_text(family = lato_font)) +
labs(title = "Figure 2.3a1: Demand and Capacity by Month",
caption = "Source: https://www.storytellingwithdata.com/") +
theme(panel.grid.minor = element_blank()) +
theme(panel.grid.major.x = element_blank()) +
theme(axis.title = element_blank()) +
theme(legend.position = "top") +
theme(legend.title = element_blank()) +
theme(plot.margin = margin(0.5, 0.7, 0.5, 0.7, "cm")) +
theme(legend.key.height = unit(0.4, "cm")) +
theme(legend.key.width = unit(0.4, "cm")) +
theme(plot.caption = element_text(color = "grey39", face = "italic", size = 10)) +
scale_y_continuous(expand = c(0, 0), labels = comma) +
scale_x_discrete(expand = c(0, 0)) +
theme(plot.title.position = "plot") +
theme(plot.title = element_text(size = 18)) +
theme(axis.text.y = element_text(size = 10)) +
theme(axis.text.x = element_text(size = 10)) +
theme(legend.text = element_text(family = lato_font, color = "grey30", size = 10))# Prepare data for plotting:
data2.3 %>%
slice(1:9) %>%
mutate(myTime = 1:9) -> dataForLinePlot
labels_on_x <- month.abb[4:12]
case_when(labels_on_x %in% c("Apr", "Dec") ~ paste0(labels_on_x, "\n 2019"),
TRUE ~ labels_on_x) -> labels_on_x
c(labels_on_x, c("", "")) -> labels_on_x
dataForLinePlot %>%
select(-DATE) %>%
pivot_longer(cols = c("CAPACITY", "DEMAND")) -> dataForLinePlot_long
dataForLinePlot_long %>%
filter(myTime == 9) %>%
filter(name == "CAPACITY") -> dfText1
dataForLinePlot_long %>%
filter(myTime == 9) %>%
filter(name != "CAPACITY") -> dfText2
dataForLinePlot_long %>%
ggplot(aes(x = myTime, y = value, color = name)) +
geom_line(size = 1.3, show.legend = FALSE) +
geom_point(data = dataForLinePlot_long %>% filter(myTime == 9), show.legend = FALSE, size = 4) +
scale_colour_manual(values = colorBar) +
theme_minimal() +
theme(text = element_text(family = lato_font)) +
labs(title = "Demand and Capacity by Month using Line Graph (version 1)",
caption = "Source: https://www.storytellingwithdata.com/") +
theme(plot.title.position = "plot") +
theme(plot.margin = margin(0.5, 0.7, 0.5, 0.7, "cm")) +
scale_y_continuous(limits = c(0, 55000), breaks = seq(0, 60000, 10000), label = comma, expand = c(0, 0)) +
scale_x_continuous(breaks = 1:11, expand = c(0, 0.1), limits = c(1, 11), labels = labels_on_x) +
theme(axis.title = element_blank()) +
theme(panel.grid.minor = element_blank()) +
geom_text(data = dfText1, aes(x = 10, label = "24K CAPACITY"), show.legend = FALSE, family = lato_font) +
geom_text(data = dfText2, aes(x = 10, label = "34K DEMAND"), show.legend = FALSE, family = lato_font) +
theme(plot.title.position = "plot") +
theme(plot.title = element_text(size = 18)) +
theme(axis.text.y = element_text(size = 11)) +
theme(axis.text.x = element_text(size = 11)) +
theme(plot.caption = element_text(color = "grey39", face = "italic", size = 10))library(ggtext)
p_title <- "<span style = 'color:#377eb8'>Demand</span> vs <span style = 'color:#ff7f00'>Capacity</span> over time (version 2)"
dataForLinePlot_long %>%
ggplot(aes(x = myTime, y = value, color = name)) +
geom_line(size = 1.3, show.legend = FALSE) +
geom_point(data = dataForLinePlot_long %>% filter(myTime == 9), show.legend = FALSE, size = 4) +
scale_colour_manual(values = colorBar) +
theme_minimal() +
theme(text = element_text(family = lato_font)) +
labs(title = p_title,
caption = "Source: https://www.storytellingwithdata.com/") +
theme(plot.title.position = "plot") +
theme(plot.margin = margin(0.5, 0.7, 0.5, 0.7, "cm")) +
scale_y_continuous(limits = c(0, 55000), breaks = seq(0, 60000, 10000), label = comma, expand = c(0, 0)) +
scale_x_continuous(breaks = 1:11, expand = c(0, 0.1), limits = c(1, 11), labels = labels_on_x) +
theme(axis.title = element_blank()) +
theme(panel.grid.minor = element_blank()) +
geom_text(data = dfText1, aes(x = 10, label = "24K CAPACITY"), show.legend = FALSE, family = lato_font) +
geom_text(data = dfText2, aes(x = 10, label = "34K DEMAND"), show.legend = FALSE, family = lato_font) +
theme(plot.title.position = "plot") +
theme(plot.title = element_markdown(size = 18)) +
theme(axis.text.y = element_text(size = 11)) +
theme(axis.text.x = element_text(size = 11)) +
theme(plot.caption = element_text(color = "grey39", face = "italic", size = 10))dataTimeBar %>%
ggplot(aes(x = DATE, y = value, fill = name)) +
geom_col() +
scale_fill_manual(values = colorBar) +
theme_minimal() +
theme(text = element_text(family = lato_font)) +
labs(title = "Figure 2.3a3: Demand and Capacity by Month",
caption = "Source: https://www.storytellingwithdata.com/") +
theme(panel.grid.major.y = element_line(color = "grey80", linewidth = 0.5)) +
theme(panel.grid.minor.y = element_blank()) +
theme(panel.grid.major.x = element_blank()) +
theme(axis.title = element_blank()) +
theme(legend.position = "top") +
theme(legend.title = element_blank()) +
theme(plot.margin = margin(0.5, 0.7, 0.5, 0.7, "cm")) +
theme(legend.key.height = unit(0.4, "cm")) +
theme(legend.key.width = unit(0.4, "cm")) +
theme(plot.caption = element_text(color = "grey39", face = "italic", size = 10)) +
scale_y_continuous(expand = c(0, 0), labels = comma, limits = c(0, 80000), breaks = seq(0, 80000, 10000)) +
scale_x_discrete(expand = c(0, 0)) +
theme(plot.title.position = "plot") +
theme(plot.title = element_text(size = 18)) +
theme(axis.text.y = element_text(size = 10)) +
theme(axis.text.x = element_text(size = 10)) +
theme(legend.text = element_text(family = lato_font, color = "grey30", size = 10))dataForLinePlot_long %>%
pivot_wider() -> dataForSegment
dataForLinePlot_long %>%
filter(name == "CAPACITY") %>%
mutate(valueK = value / 1000) %>%
mutate(valueK = as.character(valueK %>% round(0))) -> capText
dataForLinePlot_long %>%
filter(name != "CAPACITY") %>%
mutate(valueK = value / 1000) %>%
mutate(valueK = as.character(valueK %>% round(0))) -> demText
ggplot() +
geom_segment(data = dataForSegment,
aes(x = myTime, xend = myTime, y = CAPACITY, yend = DEMAND),
size = 10, color = "grey85") +
geom_point(data = capText, aes(x = myTime, y = value, color = "CAPACITY"),
size = 10) +
geom_point(data = demText, aes(x = myTime, y = value, color = "DEMAND"),
size = 10) +
geom_text(data = capText, aes(x = myTime, y = value, label = valueK), color = "white", family = lato_font) +
geom_text(data = demText, aes(x = myTime, y = value, label = valueK), color = "white", family = lato_font) +
scale_color_manual(values = colorBar) +
theme_minimal() +
theme(plot.title.position = "plot") +
theme(text = element_text(family = lato_font)) +
labs(title = "Figure 2.4e: Demand and Capacity by Month",
caption = "Source: https://www.storytellingwithdata.com/") +
theme(panel.grid = element_line(color = "grey80", linewidth = 0.5)) +
theme(panel.grid = element_blank()) +
theme(axis.title = element_blank()) +
theme(legend.position = "top") +
theme(legend.title = element_blank()) +
theme(plot.margin = margin(0.5, 0.7, 0.5, 0.7, "cm")) +
scale_x_continuous(expand = c(0.03, 0.03), breaks = 1:9, labels = labels_on_x[1:9]) +
theme(axis.text.y = element_blank()) +
theme(plot.caption = element_text(color = "grey40", face = "italic", size = 10)) +
theme(plot.title = element_text(size = 18)) +
theme(axis.text.x = element_text(size = 11)) +
theme(legend.text = element_text(family = lato_font, color = "grey30", size = 10)) +
guides(color = guide_legend(reverse = TRUE, override.aes = list(size = 5))) dataForSegment %>%
mutate(gapDemand = DEMAND - CAPACITY) %>%
ggplot(aes(x = myTime, y = gapDemand)) +
geom_line(color = colorBar[2], size = 1.5) +
theme_classic() +
theme(plot.title.position = "plot") +
theme(text = element_text(family = lato_font)) +
labs(title = "Unmet Demand by Month (Figure 2.4f)",
caption = "Source: https://www.storytellingwithdata.com/") +
scale_x_continuous(expand = c(0.03, 0.03), breaks = 1:9, labels = labels_on_x[1:9]) +
scale_y_continuous(limits = c(0, 30000), breaks = seq(0, 30000, 5000)) +
theme(axis.title = element_blank()) +
theme(plot.margin = margin(1, 0.7, 0.5, 0.7, "cm")) +
theme(plot.caption = element_text(color = "grey40", face = "italic", size = 10)) +
theme(plot.title = element_text(size = 18, vjust = 5)) +
theme(axis.text = element_text(size = 11))# Load data:
read_excel("E:/storytelling/2.5 EXERCISE.xlsx", skip = 5) -> attritionData
#----------------------------------------------
# R Codes for FIGURE 2.5b Dot plot (page 77)
#----------------------------------------------
names(attritionData) <- c("year", "attRate")
attritionData %>%
filter(year != "AVG") %>%
mutate(year = as.numeric(year)) -> attritionData
attritionData$attRate %>% mean() -> avgAttr
attritionData %>%
ggplot(aes(x = year, y = attRate)) +
geom_point(size = 4, color = colorBar[2]) +
theme_classic() +
theme(plot.title.position = "plot") +
theme(text = element_text(family = lato_font)) +
labs(title = "Attrition Rate over Time (Figure 2.5b)",
caption = "Source: https://www.storytellingwithdata.com/") +
theme(plot.caption = element_text(color = "grey40", face = "italic", size = 10)) +
theme(plot.title = element_text(size = 18, vjust = 1, color = "grey20")) +
theme(plot.margin = margin(0.5, 0.7, 0.5, 0.7, "cm")) +
scale_y_continuous(breaks = seq(0, 0.16, 0.02), limits = c(0, 0.16), labels = percent) +
scale_x_continuous(breaks = 2010:2019) +
theme(axis.title = element_blank()) +
theme(axis.text = element_text(size = 12)) +
geom_hline(yintercept = avgAttr, linetype = "dashed", color = "grey40") +
annotate("text", label = "AVERAGE 7.5%", family = lato_font,
x = 2010, y = 0.08, size = 4.5, hjust = 0, vjust = 0.5, color = colorBar[2])attritionData %>%
filter(year == max(year)) -> dfPoint
attritionData %>%
ggplot(aes(x = year, y = attRate)) +
geom_line(size = 1.2, color = colorBar[2]) +
geom_point(data = dfPoint, color = colorBar[2], size = 4) +
theme_classic() +
theme(plot.title.position = "plot") +
theme(text = element_text(family = lato_font)) +
labs(title = "Attrition Rate over Time (Figure 2.5c)",
caption = "Source: https://www.storytellingwithdata.com/") +
theme(plot.caption = element_text(color = "grey40", face = "italic", size = 10)) +
theme(plot.title = element_text(size = 18, vjust = 1, color = "grey20")) +
theme(plot.margin = margin(0.5, 0.7, 0.5, 0.7, "cm")) +
scale_y_continuous(breaks = seq(0, 0.16, 0.02), limits = c(0, 0.16), labels = percent) +
scale_x_continuous(breaks = 2010:2019) +
theme(axis.title = element_blank()) +
theme(axis.text = element_text(size = 12)) +
geom_hline(yintercept = avgAttr, linetype = "dashed", color = "grey40") +
annotate("text", label = "AVG 7.5%", family = lato_font,
x = 2018, y = 0.068, size = 4, hjust = 0, vjust = 0.5, color = colorBar[2]) +
annotate("text", label = "9.1%", family = lato_font,
x = 2018.5, y = 0.1, size = 4.5, hjust = 0, vjust = 0.5, color = colorBar[2])attritionData %>%
mutate(avgAttr = mean(attRate)) -> attritionData
attritionData %>%
ggplot(aes(x = year, y = attRate)) +
geom_rect(aes(xmin = -Inf, xmax = Inf,
ymin = -Inf, ymax = avgAttr,
fill = "Stage 1"),
fill = colorBar[2], alpha = 0.1 / 7, show.legend = FALSE) +
geom_line(size = 1.2, color = colorBar[2]) +
geom_point(data = dfPoint, color = colorBar[2], size = 4) +
theme_classic() +
theme(plot.title.position = "plot") +
theme(text = element_text(family = lato_font)) +
labs(title = "Attrition Rate over Time (Figure 2.5d)",
caption = "Source: https://www.storytellingwithdata.com/") +
theme(plot.caption = element_text(color = "grey40", face = "italic", size = 10)) +
theme(plot.title = element_text(size = 18, vjust = 1, color = "grey20")) +
theme(plot.margin = margin(0.5, 0.7, 0.5, 0.7, "cm")) +
scale_y_continuous(breaks = seq(0, 0.16, 0.02), limits = c(0, 0.16), labels = percent) +
scale_x_continuous(breaks = 2010:2019) +
theme(axis.title = element_blank()) +
theme(axis.text = element_text(size = 12)) +
geom_hline(yintercept = avgAttr, linetype = "dashed", color = "grey40") +
annotate("text", label = "AVG 7.5%", family = lato_font,
x = 2018, y = 0.068, size = 4, hjust = 0, vjust = 0.5, color = colorBar[2]) +
annotate("text", label = "9.1%", family = lato_font,
x = 2018.5, y = 0.1, size = 4.5, hjust = 0, vjust = 0.5, color = colorBar[2])attritionData %>%
ggplot(aes(x = year, y = attRate)) +
geom_area(size = 1.2, fill = colorBar[2]) +
geom_point(data = dfPoint, color = colorBar[2], size = 4) +
theme_classic() +
theme(plot.title.position = "plot") +
theme(text = element_text(family = lato_font)) +
labs(title = "Attrition Rate over Time (Figure 2.5e)",
caption = "Source: https://www.storytellingwithdata.com/") +
theme(plot.caption = element_text(color = "grey40", face = "italic", size = 10)) +
theme(plot.title = element_text(size = 18, vjust = 1, color = "grey20")) +
theme(plot.margin = margin(0.5, 0.7, 0.5, 0.7, "cm")) +
scale_y_continuous(breaks = seq(0, 0.16, 0.02), limits = c(0, 0.16), labels = percent) +
scale_x_continuous(breaks = 2010:2019) +
theme(axis.title = element_blank()) +
theme(axis.text = element_text(size = 12)) +
geom_hline(yintercept = avgAttr, linetype = "dashed", color = "grey40") +
annotate("text", label = "AVG 7.5%", family = lato_font, fontface = "bold",
x = 2018 - 0.2, y = 0.068, size = 3.5, hjust = 0, vjust = 0.5, color = "white") # Load data:
read_excel("E:/storytelling/2.8 EXERCISE.xlsx", skip = 3) -> dataEx2.8
# Prepare data for plotting:
names(dataEx2.8) <- c("timeline", "loanLossRev", "npl", "loanRevPer", "nplRate")
dataEx2.8 %>%
filter(!is.na(loanLossRev)) %>%
mutate(labelLoan = paste0("$", round(loanLossRev, 2))) %>%
mutate(labelNPL = paste0("$", round(npl, 2))) %>%
mutate(labelNPL = case_when(timeline == "2018" ~ paste0(labelNPL, "0"),
TRUE ~ labelNPL)) -> dataEx2.8
dataEx2.8 %>%
slice(c(1:5, 10)) %>%
mutate(timeNew = 1:6) -> dataLeft
dataEx2.8 %>%
slice(6:9) %>%
mutate(timeNew = 1:4) -> dataRight
dataLeft %>%
select(-timeline) %>%
pivot_longer(cols = c("loanLossRev", "npl")) -> dataLeftLong
colorsLine <- c("grey40", "firebrick")
label_on_xLine <- 2014:2019
textSize <- 4
dataLeftLong %>%
ggplot(aes(x = timeNew, y = value, color = name)) +
geom_rect(aes(xmin = 5.75, xmax = 6.25, ymin = -Inf, ymax = Inf),
fill = "grey85", color = "white", show.legend = FALSE) +
geom_line(size = 1.2, show.legend = FALSE) +
geom_point(size = 3.5, show.legend = FALSE) +
scale_colour_manual(values = colorsLine) +
scale_x_continuous(breaks = 1:6, labels = label_on_xLine, expand = c(0, 0), limits = c(0.8, 6.25)) +
scale_y_continuous(limits = c(0, 1.9)) +
theme_classic() +
theme(plot.title.position = "plot") +
theme(text = element_text(family = lato_font)) +
labs(title = "Annual Loan Loss Reserves & Non-Performing Loans (NPLs)",
subtitle = "BILLIONS",
x = "FISCAL YEAR") +
theme(plot.subtitle = element_text(color = "grey40", size = 10)) +
theme(plot.title = element_text(size = 14, vjust = 1, color = "grey20")) +
theme(plot.margin = margin(0.5, 0.7, 0.5, 0.7, "cm")) +
theme(axis.text.x = element_text(size = 12, color = "grey40")) +
theme(axis.text.y = element_blank()) +
theme(axis.line.y = element_blank()) +
theme(axis.ticks.y = element_blank()) +
theme(axis.title.y = element_blank()) +
theme(axis.title.x = element_text(hjust = 0.01, color = "grey40", size = 10)) +
geom_text(data = dataLeftLong %>% filter(name == "loanLossRev"), aes(label = labelLoan),
vjust = -1.2, show.legend = FALSE, family = lato_font, size = textSize ) +
geom_text(data = dataLeftLong %>% filter(name != "loanLossRev"), aes(label = labelNPL),
vjust = 2.2, show.legend = FALSE, family = lato_font, size = textSize) -> figLeft
dataRight %>%
select(-timeline) %>%
pivot_longer(cols = c("loanLossRev", "npl")) -> dataRightLong
a <- 0.6
b <- 4.4
library(ggrepel)
dataRightLong %>%
ggplot(aes(x = timeNew, y = value, color = name)) +
geom_rect(aes(xmin = a, xmax = b, ymin = -Inf, ymax = Inf),
fill = "grey85", color = "white", show.legend = FALSE) +
geom_line(size = 1.2, show.legend = FALSE) +
geom_point(size = 3.5, show.legend = FALSE) +
scale_colour_manual(values = colorsLine) +
theme_classic() +
theme(plot.title.position = "plot") +
theme(text = element_text(family = lato_font)) +
labs(title = "2019 Quaterly View",
subtitle = "BILLIONS",
x = "FISCAL YEAR") +
scale_x_continuous(
labels = c("Q1", "Q2", "Q3", "Q4"), breaks = 1:4,
expand = c(0, 0), limits = c(a, b)) +
scale_y_continuous(limits = c(0, 1.9)) +
theme(axis.text.y = element_blank()) +
theme(axis.title.y = element_blank()) +
theme(axis.line.y = element_blank()) +
theme(axis.ticks.y = element_blank()) +
geom_text_repel(data = dataRightLong %>% filter(name == "loanLossRev", timeNew %in% c(1, 2)),
aes(label = labelLoan), direction = "y", vjust = -1 , show.legend = FALSE,
family = lato_font, size = textSize) +
geom_text_repel(data = dataRightLong %>% filter(name == "loanLossRev", timeNew %in% c(3, 4)),
aes(label = labelLoan), direction = "y", vjust = 2, show.legend = FALSE,
family = lato_font, size = textSize) +
geom_text_repel(data = dataRightLong %>% filter(name != "loanLossRev", timeNew %in% c(1, 2)),
aes(label = labelNPL), direction = "y", vjust = 2, show.legend = FALSE,
family = lato_font, size = textSize) +
geom_text_repel(data = dataRightLong %>% filter(name != "loanLossRev", !timeNew %in% c(1, 2)),
aes(label = labelNPL), direction = "y", vjust = -1, show.legend = FALSE,
family = lato_font, size = textSize) +
theme(plot.subtitle = element_text(color = "grey40", size = 10)) +
theme(plot.title = element_text(size = 14, vjust = 1, color = "grey20")) +
theme(axis.text.x = element_text(size = 12, color = "grey40")) +
theme(plot.margin = margin(0.5, 0.7, 0.5, 0.7, "cm")) +
theme(axis.title.x = element_text(hjust = 0.02, color = "grey40", size = 10)) -> figRight
library(patchwork) # For Plot Composition: https://patchwork.data-imaginist.com/articles/guides/assembly.html
figLeft +
plot_spacer() +
figRight +
plot_layout(widths = c(2.5, -0.2 , 1.2)) read_excel("E:/storytelling/2.12 EXERCISE.xlsx", skip = 6) -> dataEx2.12
dataEx2.12 %>%
slice(2:5) %>%
select(1:3) -> dataEx2.12
names(dataEx2.12) <- c("resp", "lastY", "thisY")
dataEx2.12 %>%
mutate(lastY = as.numeric(lastY),
thisY = as.numeric(thisY)) -> dataEx2.12Wide
dataEx2.12Wide %>%
mutate(resp = factor(resp, resp)) %>%
pivot_longer(cols = c("thisY", "lastY")) %>%
mutate(labelPer = paste0(round(100*value, 0), "%")) -> dataEx2.12Long
col_dis_alot <- "#e36c33"
col_dis <- "#edad88"
col_agr <- "#829cb2"
col_agr_alot <- "#3e6487"
dataEx2.12Long %>% filter(value > 0.1) -> dataPercent
dataEx2.12Long %>%
ggplot(aes(y = name, x = value, fill = resp)) +
geom_col() +
theme_minimal() +
theme(legend.position = "top") +
scale_fill_manual(values = c(`STRONGLY DISAGREE` = col_dis_alot,
DISAGREE = col_dis,
AGREE = col_agr,
`STRONGLY AGREE` = col_agr_alot)) +
guides(fill = guide_legend(reverse = TRUE)) +
geom_text(data = dataPercent,
aes(label = labelPer),
position = position_stack(vjust = 0.5),
color = "white",
size = 4.5,
family = lato_font) +
theme(plot.title.position = "plot") +
theme(text = element_text(family = lato_font)) +
labs(title = "Divergent Stacked Bars (Figure 2.12c)",
caption = "Source: https://www.storytellingwithdata.com/") +
theme(plot.caption = element_text(color = "grey40", face = "italic", size = 10)) +
theme(plot.title = element_text(size = 16, vjust = 1, color = "grey20")) +
theme(plot.margin = margin(0.5, 0.7, 0.5, 0.7, "cm")) +
theme(axis.text.x = element_blank()) +
theme(axis.title = element_blank()) +
scale_x_continuous(expand = c(0, 0)) +
scale_y_discrete(label = c("LAST YEAR", "THIS YEAR")) +
theme(panel.grid = element_blank()) +
theme(legend.title = element_blank()) +
theme(legend.text = element_text(color = "grey30", size = 9)) +
theme(legend.key.height = unit(0.4, "cm")) +
theme(legend.key.width = unit(0.4, "cm")) +
theme(axis.text.y = element_text(size = 11))library(stringr)
dataEx2.12Long %>%
mutate(timeStart = case_when(name == "lastY" ~ 1,
TRUE ~ 2)) %>%
mutate(respAdj = case_when(str_detect(resp, "STRO") ~ str_replace_all(resp, "STRONGLY ", "STRONGLY\n"),
TRUE ~ resp)) -> dfSlopChart
dfSlopChart %>%
ggplot(aes(x = timeStart, y = value, group = resp)) +
geom_line(size = 1, color = "grey50") +
geom_point(size = 4, color = "grey30") +
theme_minimal() +
theme(text = element_text(family = lato_font)) +
theme(plot.title.position = "plot") +
scale_x_continuous(limits = c(0.9, 2.35), breaks = 1:2, labels = c("LAST YEAR", "THIS YEAR")) +
geom_text(data = dfSlopChart %>% filter(name == "lastY"), aes(x = 0.92, label = labelPer),
size = 4.5, family = lato_font, color = "grey30") +
geom_text(data = dfSlopChart %>% filter(name != "lastY"), aes(x = 2.08, label = labelPer),
size = 4.5, family = lato_font, color = "grey30") +
geom_text(data = dfSlopChart %>% filter(name != "lastY"), aes(x = 2.15, label = respAdj),
hjust = 0, size = 4, family = lato_font, color = "grey30") +
labs(title = "Slopegraph (Figure 2.12d)",
caption = "Source: https://www.storytellingwithdata.com/") +
theme(plot.caption = element_text(color = "grey40", face = "italic", size = 10)) +
theme(plot.title = element_text(size = 18, vjust = 1, color = "grey20")) +
theme(plot.margin = margin(0.5, 0.3, 0.5, 0.3, "cm")) +
theme(panel.grid = element_blank()) +
theme(axis.title = element_blank()) +
theme(axis.text.y = element_blank()) +
theme(axis.text.x = element_text(size = 11))read_excel("E:/storytelling/2.13 EXERCISE.xlsx", skip = 4) -> dataEx2.13
dataEx2.13 %>% select(1:3) -> dataEx2.13
names(dataEx2.13) <- c("timeQ", "comRate", "resRate")
dataEx2.13 %>%
mutate(timeN = 1:9) -> dataEx2.13
c(rep(2017, 4), rep(2018, 4), 2019) -> label_Xaxis
c("Q1\n 2017", "Q2", "Q3", "Q4",
"Q1\n 2018", "Q2", "Q3", "Q4", "Q1\n 2019") -> label_Xaxis
library(ggtext)
library(ggrepel)
library(scales)
title2.13 <-"<span style = 'color:#377eb8'>Compalation</span> and <span style = 'color:#ff7f00'>Response</span> Rate from Q1-2017 to Q1-2019"
dataEx2.13 %>%
ggplot(aes(x = timeN, y = comRate)) +
geom_col(fill = colorBar[2], width = 0.7) +
geom_text(aes(label = percent(comRate)), family = lato_font, color = colorBar[2], vjust = -0.5, size = 4) +
geom_line(aes(y = 20*resRate), color = colorBar[1], size = 1) +
geom_point(aes(y = 20*resRate), color = colorBar[1], size = 3) +
geom_text_repel(aes(y = 20*resRate, label = percent(resRate)), family = lato_font, color = colorBar[1], size = 4, direction = "y", force = 19) +
scale_x_continuous(breaks = 1:9, labels = label_Xaxis, expand = c(0, 0)) +
scale_y_continuous(limits = c(0, 1.05), expand = c(0, 0)) +
theme(text = element_text(family = lato_font)) +
theme(plot.title.position = "plot") +
labs(title = title2.13,
caption = "Source: https://www.storytellingwithdata.com/") +
theme_minimal() +
theme(plot.caption = element_text(color = "grey40", face = "italic", size = 10)) +
theme(plot.title = element_text(size = 14.9, vjust = 1, color = "grey20", face = "bold")) +
theme(plot.margin = margin(0.5, 0.7, 0.5, 0.7, "cm")) +
theme(plot.title = element_markdown()) +
theme(axis.title = element_blank()) +
theme(axis.text.y = element_blank()) +
theme(axis.text.x = element_text(hjust = 0.5, size = 12)) +
theme(panel.grid = element_blank()) Một phương án khác là sử dụng dual-axis. Bạn đọc quan tâm có thể tham khảo ở đây hoặc ở đây. Cần lưu ý rằng kiểu data visualiation này cũng có tiếng nói không ủng hộ (có thể đọc thêm tại đây).