ex_1 <- read_excel("../data/storytelling_practice/chapter_2/2.1 EXERCISE.xlsx",
col_types = c(
"text", "numeric", "numeric","numeric", "numeric"
)
)
# save and rds file
write_rds(ex_1, "2_1_EX.rds")
# plot the table
kable(ex_1 %>%
mutate(
`% Accounts` = percent(`% Accounts`),
`Revenue ($M)` = dollar(`Revenue ($M)`),
`% Revenue` = percent(`% Revenue`)
)
) %>%
kable_styling(bootstrap_options = c("responsive","striped"),
full_width = F)
| Tier | # of Accounts | % Accounts | Revenue ($M) | % Revenue |
|---|---|---|---|---|
| A | 77 | 7.08% | $4.68 | 25.00% |
| A+ | 19 | 1.75% | $3.93 | 21.00% |
| B | 338 | 31.07% | $5.98 | 32.00% |
| C | 425 | 39.06% | $2.80 | 15.00% |
| D | 24 | 2.21% | $0.37 | 2.00% |
Add extra information into the table:
# Find out the actual information
rest <- ex_1 %>%
summarise_if(is.numeric, sum) %>%
mutate(Tier = "Rest") %>%
select(Tier, everything()) %>%
bind_rows(ex_1) %>%
filter(Tier == "Rest")
# Create the other missing data
Other <- tibble(
Tier = "Other",
`# of Accounts` = (rest[[2]]*(1 - rest[[3]]))/rest[[3]],
`% Accounts` = 1 - rest[[3]],
`Revenue ($M)` = (rest[[4]]*(1 - rest[[5]]))/rest[[5]],
`% Revenue` = 1 - rest[[5]]
)
# bind the information to add the total
final_table <- ex_1 %>%
bind_rows(Other) %>%
summarise_if(is.numeric, sum) %>%
mutate(Tier = "Total") %>%
select(Tier, everything()) %>%
bind_rows(ex_1 %>%
bind_rows(Other)) %>%
mutate(
Tier = factor(Tier, levels = c("A+","A", "B", "C", "D", "Other", "Total"))
) %>%
arrange(Tier)
Create an HTML table:
final_table %>%
mutate(
`% Accounts` = percent(`% Accounts`, digits = 0),
`Revenue ($M)` = dollar(`Revenue ($M)`),
`% Revenue` = percent(`% Revenue`, digits = 0)
) %>%
kable(
col.names = c("Tier","#", "% OF TOT", "$M", "% OF TOT"), align = "c"
) %>%
column_spec(5, width = "3cm") %>%
column_spec(
3, border_right = T, extra_css = "border-right: 2px solid;"
) %>%
pack_rows(
start_row = 7, end_row = 7, hline_before = T,
label_row_css = "border-bottom: 2px solid;line-height: 0;padding: 0;"
) %>%
row_spec(0, bold = T, color = "black", background = "ghostwhite",
extra_css = "border-bottom: 2px solid #777777;") %>%
column_spec(1, bold = T, color = "black", background = "ghostwhite",
extra_css = "border-right: 2px solid #777777;") %>%
add_header_above(header = c(" " = 1, "Accounts" = 2, "Revenue" = 2),
bold = T, color = "black", background = "ghostwhite",
extra_css = "border-left: 2px solid #777777;") %>%
kable_styling(bootstrap_options = c("responsive", "bordered"),
full_width = F)
| Tier | # | % OF TOT | $M | % OF TOT |
|---|---|---|---|---|
| A+ | 19 | 2% | $3.93 | 21% |
| A | 77 | 7% | $4.68 | 25% |
| B | 338 | 31% | $5.98 | 32% |
| C | 425 | 39% | $2.80 | 15% |
| D | 24 | 2% | $0.37 | 2% |
| Other | 205 | 19% | $0.94 | 5% |
| Total | 1088 | 100% | $18.70 | 100% |
Create the pie chart similar to this one:
# add other in the data
all_ex_1 <- ex_1 %>%
bind_rows(Other) %>%
mutate(
Tier = factor(Tier, levels = c("Other","D","C", "B", "A","A+"))
)
# plot the pie chart Accounts
pie_accounts <- all_ex_1 %>%
ggplot(aes(x = "", y = `% Accounts`, fill = Tier)) +
geom_col(stat = "identity", width = 1) +
# Convert to pie (polar coordinates) and add labels
coord_polar("y", start = 0) +
geom_text(
aes(
label = Tier#percent(`% Accounts`, digits = 0)
),
position = position_stack(vjust = 0.7),
color = "white", cex = 4.25
) +
# Add color scale (hex colors)
scale_fill_manual(
values=c("#55DDE0", "#33658A", "#2F4858", "#F6AE2D", "#F26419", "#999999")
) +
# Remove labels and add title
labs(x = NULL, y = NULL, fill = NULL, title = "% Total of Accounts") +
# Tidy up the theme
theme_classic() +
theme(axis.line = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
plot.title = element_text(hjust = 0.5, color = "#666666"),
legend.position = "none")
# plot the pie chart Accounts
pie_revenue <- all_ex_1 %>%
ggplot(aes(x = "", y = `% Revenue`, fill = Tier)) +
geom_col(stat = "identity", width = 1) +
# Convert to pie (polar coordinates) and add labels
coord_polar("y", start = 0) +
geom_text(
aes(label = Tier),
position = position_stack(vjust = 0.7),
color = "white", cex = 4.25
) +
# Add color scale (hex colors)
scale_fill_manual(
values=c("#55DDE0", "#33658A", "#2F4858", "#F6AE2D", "#F26419", "#999999")
) +
# Remove labels and add title
labs(x = NULL, y = NULL, fill = NULL, title = "% Total of Revenue") +
# Tidy up the theme
theme_classic() +
theme(axis.line = element_blank(),
axis.text = element_blank(),
axis.ticks = element_blank(),
plot.title = element_text(hjust = 0.5, color = "#666666"),
legend.position = "none")
# grid the plots
grid.arrange(pie_accounts, pie_revenue, nrow = 1)
Other ways to improve a pie chart in this stackOverflow link
Bar chart to improve the visualization:
# coord plots for accounts
bar_accounts <- all_ex_1 %>%
ggplot(aes(x = Tier, y = `% Accounts`)) +
geom_col(fill = "dodgerblue4", width = 0.7) +
coord_flip() +
geom_text(aes(label = if_else(`% Accounts` > 0.05,
percent(`% Accounts`, digits = 0), NULL)),
hjust = 1.1, color = "white", size = 4.5) +
labs(
title = expression(paste("TIER | % OF TOTAL ", bold("ACCOUNTS")))
) +
theme_void() +
theme(
plot.title = element_text(hjust = -0.75),
axis.title.y = element_blank(),
axis.text.y = element_text(face = "bold", size = 13),
axis.title.x = element_blank(),
axis.text.x = element_blank()
)
# coord plots for revenue
bar_revenue <- all_ex_1 %>%
ggplot(aes(x = Tier, y = `% Revenue`)) +
geom_col(fill = "dodgerblue4", width = 0.7) +
coord_flip() +
geom_text(aes(label = if_else(`% Revenue` > 0.05,
percent(`% Revenue`, digits = 0), NULL)),
hjust = 1.1, color = "white", size = 4.5) +
labs(title = expression(paste(" | % OF TOTAL ", bold("REVENUE")))) +
theme_void() +
theme(
plot.title = element_text(),
axis.title.y = element_blank(),
axis.text.y = element_blank(),
axis.title.x = element_blank(),
axis.text.x = element_blank(),
)
# merge both chart
grid.arrange(bar_accounts, bar_revenue, nrow = 1)
Bar chart improvment:
# create teh table
pivot_ex_1 <- all_ex_1 %>%
select(Tier, `% Accounts`, `% Revenue`) %>%
mutate(`% Accounts` = round(`% Accounts`, 2)) %>%
pivot_longer(cols = -Tier, names_to = "names", values_to = "perc")
# define the colors used on the plot
used_colors <- c("#6fc3ff", "dodgerblue4")
# create the plot
pivot_ex_1 %>%
ggplot(aes(x = Tier, y = perc, fill = names)) +
geom_col(position = "dodge", width = 0.7) +
coord_flip() +
scale_y_continuous(labels = percent_format(accuracy = 1), position = "right") +
scale_fill_manual(
values = c("#6FB8FF", "dodgerblue4"),
labels = c("% ACCOUNTS","% REVENUE")
) +
labs(
title = "% OF TOTAL ACCOUNTS vs. REVENUE\n"
) +
theme_void() +
theme(
plot.title = element_text(hjust = 0.5),
axis.title.y = element_blank(),
axis.text.y = element_text(face = "bold", size = 12),
axis.line.y = element_blank(),
axis.title.x = element_blank(),
axis.text.x = element_text(),
axis.line.x = element_line(size = 0.5),
legend.position = "top",
legend.justification = "left",
legend.title = element_blank(),
plot.margin = unit(c(1, 4, 1, 1), "lines")
)
Flip the chart:
library(grid)
pivot_ex_1 %>%
ggplot(aes(x = Tier, y = perc, fill = names)) +
geom_col(position = "dodge", width = 0.7) +
scale_y_continuous(labels = percent_format(accuracy = 1),
position = "left",
breaks = seq(0, 0.40, 0.1),
limits = c(0, 0.45)) +
scale_fill_manual(
values = c("#6FB8FF", "dodgerblue4"),
labels = c("% ACCOUNTS","% REVENUE")
) +
theme_void() +
theme(
plot.title = element_text(hjust = 0.5),
axis.title.y = element_blank(),
axis.text.y = element_text(face = "bold", size = 10),
axis.title.x = element_blank(),
axis.text.x = element_text(),
axis.line.x = element_blank(),
legend.position = "none",
plot.margin = unit(c(1, 4, 1, 1), "lines")
) +
annotation_custom(textGrob(
expression('% OF TOTAL '),
gp = gpar(col = 'black', fontsize = 13), just = "left"
),
xmin = 0, xmax = 1, ymin = 0.45) +
annotation_custom(textGrob(
expression(phantom('% OF TOTAL ') * bold("ACCOUNTS ")),
gp = gpar(col = used_colors[1], fontsize = 13), just = "left"
),
xmin = 0, xmax = 1, ymin = 0.45) +
annotation_custom(textGrob(
expression(phantom("% OF TOTAL") * phantom(bold("ACCOUNTS ")) *
" vs. "),
gp = gpar(col = "black", fontsize = 13), just = "left"
),
xmin = 0, xmax = 1, ymin = 0.45) +
annotation_custom(textGrob(
expression(phantom("% OF TOTAL ACCOUNTS vs. ") * bold(" REVENUE")),
gp = gpar(col = used_colors[2], fontsize = 13), just = "left"
),
xmin = 0, xmax = 1, ymin = 0.45)
Help the user to understand the difference between the different levels:
pivot_ex_1 %>%
ggplot(aes(x = Tier, y = perc)) +
geom_col(aes(fill = names), position = "dodge", width = 0.7) +
geom_point(aes(group = names), position = position_dodge(width = 0.75), inherit.aes = TRUE) +
geom_line(aes(group = Tier)) +
scale_y_continuous(labels = percent_format(accuracy = 1), position = "left") +
scale_fill_manual(
values = c("#6fc3ff", "dodgerblue4"),
labels = c("% ACCOUNTS","% REVENUE")
) +
labs(
title = "% OF TOTAL ACCOUNTS vs. REVENUE\n"
) +
theme_void() +
theme(
plot.title = element_text(hjust = 0.5),
axis.title.y = element_blank(),
axis.text.y = element_text(face = "bold", size = 12),
axis.line.y = element_line(size = 0.5),
axis.title.x = element_blank(),
axis.text.x = element_text(),
axis.line.x = element_blank(),
legend.position = "top",
legend.justification = "left",
legend.title = element_blank(),
plot.margin = unit(c(1, 4, 1, 1), "lines")
)
That would be the same than:
pivot_ex_1 %>%
ggplot(aes(x = Tier, y = perc)) +
geom_point(aes(group = names), position = "identity") +
geom_line(aes(group = Tier)) +
scale_y_continuous(labels = percent_format(accuracy = 1), position = "left") +
scale_fill_manual(
values = c("#6FB8FF", "dodgerblue4"),
labels = c("% ACCOUNTS","% REVENUE")
) +
labs(
title = "% OF TOTAL ACCOUNTS vs. REVENUE\n"
) +
theme_void() +
theme(
plot.title = element_text(hjust = 0.5),
axis.title.y = element_blank(),
axis.text.y = element_text(face = "bold", size = 12),
axis.line.y = element_line(size = 0.5),
axis.title.x = element_blank(),
axis.text.x = element_text(),
axis.line.x = element_blank(),
legend.position = "top",
legend.justification = "left",
legend.title = element_blank(),
plot.margin = unit(c(1, 4, 1, 1), "lines")
)
Create a slope graph based on the previous transformations:
# filter the percentages to plot
ex_1_revenue <- pivot_ex_1 %>%
filter(names == "% Revenue")
ex_1_accounts <- pivot_ex_1 %>%
filter(names == "% Accounts")
# # define palette of colors
# library(RColorBrewer)
# my_blues <- brewer.pal(n = 9, "Blues")[4:9]
pivot_ex_1 %>%
# pivot_wider(names_from = names, values_from = perc) %>%
ggplot(aes(x = names, y = perc, group = Tier)) + #, col = Tier
geom_point() +
geom_line() +
annotate("text", x = 0.95, y = ex_1_accounts$perc,
label = percent(ex_1_accounts$perc, digits = 0), hjust = 1)+
annotate("text", x = 2.05, y = ex_1_revenue$perc,
label = percent(ex_1_revenue$perc, digits = 0), hjust = 0) +
annotate("text", x = 2.25, y = ex_1_revenue$perc,
label = ex_1_revenue$Tier, hjust = 0) +
scale_x_discrete(
labels = c("% TOT.\nACCOUNTS", "% TOT.\nREVENUE")
) +
labs(title = "% OF TOTAL ACCOUNTS vs. REVENUE\n") +
theme_void() +
theme(
plot.title = element_text(hjust = 0.5),
axis.title.y = element_blank(),
axis.text.y = element_blank(),
axis.title.x = element_blank(),
axis.text.x = element_text(),
axis.line.x = element_blank(),
legend.position = "right",
legend.justification = "left",
legend.text = element_text(size = 12),
legend.title = element_blank(),
plot.margin = unit(c(1, 4, 1, 1), "lines")
)
Load the data:
ex_2 <- read_excel("../data/storytelling_practice/chapter_2/2.2 EXERCISE.xlsx")
# transform to year
library(lubridate)
# ex_2 %>%
# mutate(`Campaign Year` = ymd(`Campaign Year`))
Let’s do a bar graph:
ex_2 %>%
ggplot(aes(x = factor(`Campaign Year`), y = `Meals Served`)) +
geom_col(fill = "dodgerblue4", width = 0.7) +
theme_void() +
labs(title = "Meals served over time") +
xlab(label = "CAMPAIGN YEAR") +
ylab(label = "# OF MEALS SERVED") +
scale_y_continuous(labels = scales::number,
breaks = seq(0, max(ex_2$`Meals Served`),50000)) +
# scale_x_continuous(labels = scales::number_format(accuracy = 1, big.mark = "")) +
theme(
axis.title.x = element_text(hjust = 0),
axis.title.y = element_text(hjust = 1),
axis.text = element_text()
)
Now a line chart:
ex_2 %>%
ggplot(aes(x = `Campaign Year`, y = `Meals Served`)) +
geom_line(color = "dodgerblue4") +
geom_point(aes(y = if_else(`Campaign Year` == min(`Campaign Year`) |
`Campaign Year` == max(`Campaign Year`),
`Meals Served`, NULL)), col = "dodgerblue4") +
geom_text(aes(label = if_else(`Campaign Year` == min(`Campaign Year`),
number(`Meals Served`, big.mark = ","), NULL)),
vjust = 1.5, col = "dodgerblue4") +
geom_text(aes(label = if_else(`Campaign Year` == max(`Campaign Year`),
number(`Meals Served`, big.mark = ","), NULL)),
vjust = -1, col = "dodgerblue4") +
theme_void() +
labs(title = "Meals served over time",
subtitle = "# OF MEALS SERVED") +
xlab(label = "CAMPAIGN YEAR") +
ylim(0, max(ex_2$`Meals Served`)) +
scale_x_continuous(labels = as.character(ex_2$`Campaign Year`),
breaks = ex_2$`Campaign Year`) +
theme(
axis.title.x = element_text(hjust = 0),
axis.text.x = element_text(),
axis.line.x = element_line(),
axis.title.y = element_blank(),
axis.text.y = element_blank()
)
ex_3 <- read_excel("../data/storytelling_practice/chapter_2/2.3 EXERCISE.xlsx")
# format to the data
library(lubridate)
library(magrittr)
ex_3 %<>%
mutate(DATE = parse_date_time(DATE, "%Y-%m"),
YEAR = year(DATE),
MONTH = factor(
str_to_upper(month(DATE, label = F)),
levels = seq(1,12,1),
labels = c("JAN","FEB","MAR","APR","MAY","JUN","JUL",
"AUG","SEP","OCT","NOV","DEC")),
UNMET_DEMAND = DEMAND - CAPACITY
) %>%
arrange(desc(YEAR, MONTH))
# prepare the data
ex_3_2019 <- ex_3 %>%
filter(YEAR == 2019) %>%
select(-DATE, -YEAR) %>%
pivot_longer(cols = -MONTH, names_to = "names", values_to = "values")
# plot the data
ex_3_2019 %>%
filter(names != "UNMET_DEMAND") %>%
ggplot(aes(x = MONTH, y = values)) +
geom_col(aes(fill = names), position = "dodge", col = "black") +
coord_flip() +
geom_text(aes(group = names, label = number(values, big.mark = ".")),
position = position_dodge(width = 1), inherit.aes = TRUE,
hjust = -0.1, size = 3.5) +
ylim(0, 57000) +
labs(title = "Demand and capacity by Month") +
theme_void() +
theme(
legend.position = "bottom",
legend.title = element_blank(),
axis.text.y = element_text(size = 10)
)
Bar chart:
# initial plot
ex_3_2019 %>%
filter(names != "UNMET_DEMAND") %>%
ggplot(aes(x = MONTH, y = values)) +
geom_col(aes(fill = factor(names, levels = c("DEMAND","CAPACITY"))),
position = "dodge", color = "dodgerblue4") +
scale_fill_manual(values = used_colors) +
scale_y_continuous(labels = number_format(big.mark = ".")) +
labs(title = "Demand and capacity by Month\n") +
ylab("NUMBER OF PROJECT HOURS") + xlab("2019") +
theme_void() +
theme(
legend.position = "top",
legend.justification = "left",
legend.title = element_blank(),
axis.title = element_text(colour = "gray58"),
axis.text.y = element_text(size = 10, color = "gray58"),
axis.title.y = element_text(hjust = 1),
axis.text.x = element_text(size = 9, color = "gray58"),
axis.title.x = element_text(hjust = 0),
)
Or a line chart:
# plot the results
ex_3_2019 %>%
filter(names != "UNMET_DEMAND") %>%
ggplot(aes(x = MONTH, y = values)) +
geom_line(aes(color = names, group = names)) +
geom_text(aes(label = if_else(
MONTH == "DEC" & names == "DEMAND",
number(round(values/1000), suffix = "K DEMAND"), NULL)
),
col = used_colors[2], hjust = 0.6, vjust = 1.5) +
geom_text(aes(label = if_else(
MONTH == "DEC" & names == "CAPACITY",
number(round(values/1000), suffix = "K CAPACITY"), NULL)),
col = used_colors[1], hjust = 0.6, vjust = -0.75) +
scale_color_manual(values = used_colors) +
scale_y_continuous(labels = scales::number_format(big.mark = "."),
breaks = seq(0, max(ex_3_2019$values),10000),
limits = c(0, 55000)) +
labs(title = "Demand and capacity by Month\n") +
ylab("NUMBER OF PROJECT HOURS") + xlab("2019") +
theme_void() +
theme(
legend.position = "none",
axis.title = element_text(colour = "gray58"),
axis.line = element_line(color = "gray58"),
axis.text.y = element_text(size = 10, color = "gray58"),
axis.title.y = element_text(hjust = 1),
axis.text.x = element_text(size = 9, color = "gray58"),
axis.title.x = element_text(hjust = 0),
)
A bar chart with alpha parameter:
ex_3_2019 %>%
filter(names != "UNMET_DEMAND") %>%
pivot_wider(names_from = "names", values_from = "values") %>%
ggplot(aes(x = MONTH)) +
geom_col(aes(y = DEMAND),
color = "dodgerblue4", fill = "white", width = 0.4) +
geom_col(aes(y = CAPACITY), fill = "dodgerblue4", alpha = 0.4, width = 0.7) +
scale_fill_manual(values = used_colors) +
scale_y_continuous(labels = number_format(big.mark = ".")) +
labs(title = "Demand and capacity by Month",
subtitle = "Demand vs. capacity month by month\n") +
ylab("NUMBER OF PROJECT HOURS") + xlab("MONTH") +
theme_void() +
theme(
legend.position = "top",
legend.justification = "left",
legend.title = element_blank(),
axis.title = element_text(colour = "gray58"),
axis.text.y = element_text(size = 10, color = "gray58"),
axis.title.y = element_text(hjust = 1),
axis.text.x = element_text(size = 9, color = "gray58"),
axis.title.x = element_text(hjust = 0),
)
A bar chart to show the unmet demand:
ex_3_2019 %>%
filter(names != "DEMAND") %>%
ggplot(aes(x = MONTH, y = values,
fill = factor(names, levels = c("UNMET_DEMAND", "CAPACITY")))) +
geom_col(position = "stack", color = "dodgerblue4") +
scale_fill_manual(values = c(used_colors[2], used_colors[1])) +
scale_y_continuous(labels = number_format(big.mark = "."),
limits = c(0, 60000),
breaks = seq(0, 60000, 10000)) +
labs(title = "Demand vs. capacity over 2019\n") +
ylab("NUMBER OF PROJECT HOURS") + xlab("MONTH") +
theme_void() +
theme(
legend.position = "top",
legend.justification = "left",
legend.title = element_blank(),
axis.title = element_text(colour = "gray58"),
axis.text.y = element_text(size = 10, color = "gray58"),
axis.title.y = element_text(hjust = 1),
axis.text.x = element_text(size = 9, color = "gray58"),
axis.title.x = element_text(hjust = 0),
)
Use points to show the same information:
ex_3_2019 %>%
filter(names != "UNMET_DEMAND") %>%
ggplot(aes(x = MONTH, y = values)) +
geom_line(aes(group = MONTH), size = 10, color = "gray58") +
geom_point(aes(color = names, group = names), size = 10, shape = 21, fill = "white") +
geom_text(aes(label = round(values/1000), color = names)) +
scale_color_manual(values = used_colors) +
scale_y_continuous(labels = scales::number_format(big.mark = "."),
breaks = seq(0, max(ex_3_2019$values),5000),
limits = c(0, 55000)) +
labs(title = "Demand vs. capacity over 2019\n") +
ylab("NUMBER OF PROJECT HOURS (THOUSANDS)") + xlab("2019") +
theme_void() +
theme(
legend.position = "top",
legend.justification = "left",
legend.title = element_blank(),
axis.title = element_text(colour = "gray58"),
axis.title.y = element_text(hjust = 1),
axis.text.x = element_text(size = 9, color = "gray58"),
axis.title.x = element_text(hjust = 0, color = "gray58"),
axis.line.x = element_line(color = "gray58")
)
Or create a line plot with the unmet demand:
ex_3_2019 %>%
filter(names == "UNMET_DEMAND") %>%
mutate(GROUP = "1") %>%
ggplot(aes(x = MONTH, y = values)) +
geom_line(aes(group = GROUP), color = "dodgerblue4") +
geom_point(color = "dodgerblue4") +
scale_y_continuous(labels = number_format(big.mark = "."),
breaks = seq(0, 30000, 5000),
limits = c(0, 30000)) +
labs(title = "Unmet demand over 2019\n") +
ylab("NUMBER OF PROJECT HOURS") + xlab("2019") +
theme_void() +
theme(
legend.position = "top",
legend.justification = "left",
legend.title = element_blank(),
axis.line = element_line(color = "gray58"),
axis.title = element_text(colour = "gray58"),
axis.text.y = element_text(size = 10, color = "gray58"),
axis.title.y = element_text(hjust = 1),
axis.text.x = element_text(size = 9, color = "gray58"),
axis.title.x = element_text(hjust = 0),
)
The following table shows attrition rate for a 1-year associate training program for a given company. Spend a moment familiarizing yourself with it, then answer the following questions.
ex_5 <- read_excel("../data/storytelling_practice/chapter_2/2.5 EXERCISE.xlsx")
Define average value and filter it our from the data:
# filter out average
ex_5_raw <- ex_5 %>%
filter(Year != "AVG")
# select average
ex_5_avg <- ex_5 %>%
filter(Year == "AVG")
# define colo
col <- "dodgerblue4"
# scatter plot
ex_5_raw %>%
ggplot(aes(x = Year, y = `Attrition Rate`)) +
geom_point(color = col, size = 3) +
geom_hline(yintercept = ex_5_avg$`Attrition Rate`, linetype = "dashed",
col = col) +
annotate(geom = "text", x = 1.5, y = ex_5_avg$`Attrition Rate` + 0.005,
label = paste("AVERAGE: ", round(ex_5_avg$`Attrition Rate`*100, 1),
"%"),
col = col) +
scale_y_continuous(labels = percent_format(accuracy = 1),
breaks = seq(0,0.18,0.02),
limits = c(0,0.16)) +
labs(title = "Attrition rate over time\n") +
ylab("ATTRITION RATE") +
theme_void() +
theme(
legend.position = "top",
legend.justification = "left",
legend.title = element_blank(),
axis.line = element_line(color = "gray58"),
axis.text.y = element_text(size = 10, color = "gray58"),
axis.title.y = element_text(hjust = 1, color = "gray58"),
axis.text.x = element_text(size = 9, color = "gray58")
)
Use a line graph to show the evolution over time:
ex_5_raw %>%
mutate(GROUP = "1") %>%
ggplot(aes(x = Year, y = `Attrition Rate`)) +
geom_line(aes(group = GROUP), color = col, size = 1) +
geom_point(aes(y = if_else(Year == "2019", `Attrition Rate`, NULL)),
col = col, size = 3) +
geom_hline(yintercept = ex_5_avg$`Attrition Rate`, linetype = "dashed",
col = col) +
geom_text(aes(label = if_else(Year == "2019",
paste(round(`Attrition Rate`*100,1),"%"),
NULL)),
col = col, vjust = -1, hjust = 0.25) +
annotate(geom = "text", x = 10, y = ex_5_avg$`Attrition Rate` - 0.005,
label = paste("AVG: ", round(ex_5_avg$`Attrition Rate`*100, 1),
"%"),
col = col, hjust = 0.65) +
scale_y_continuous(labels = percent_format(accuracy = 1),
breaks = seq(0,0.18,0.02),
limits = c(0,0.16)) +
labs(title = "Attrition rate over time\n") +
ylab("ATTRITION RATE") +
theme_void() +
theme(
legend.position = "top",
legend.justification = "left",
legend.title = element_blank(),
axis.line = element_line(color = "gray58"),
axis.text.y = element_text(size = 10, color = "gray58"),
axis.title.y = element_text(hjust = 1, color = "gray58"),
axis.text.x = element_text(size = 9, color = "gray58")
)
Another approach would be to add area below the horizontal line:
ex_5_raw %>%
mutate(GROUP = "1") %>%
ggplot(aes(x = Year, y = `Attrition Rate`)) +
geom_line(aes(group = GROUP), color = col, size = 1) +
geom_point(aes(y = if_else(Year == "2019", `Attrition Rate`, NULL)),
col = col, size = 3) +
# geom_hline(yintercept = ex_5_avg$`Attrition Rate`, linetype = "dashed",
# col = col) +
geom_text(aes(label = if_else(Year == "2019",
paste(round(`Attrition Rate`*100,1),"%"),
NULL)),
col = col, vjust = -1, hjust = 0.25) +
annotate(geom = "text", x = 10, y = ex_5_avg$`Attrition Rate` - 0.005,
label = paste("AVG: ", round(ex_5_avg$`Attrition Rate`*100, 1),
"%"),
col = col, hjust = 0.65) +
annotate(geom = "rect", xmin = 1, xmax = 10.75,
ymin = 0, ymax = ex_5_avg$`Attrition Rate`, alpha = 0.3,
fill = col) +
scale_y_continuous(labels = percent_format(accuracy = 1),
breaks = seq(0,0.18,0.02),
limits = c(0,0.16)) +
labs(title = "Attrition rate over time\n") +
ylab("ATTRITION RATE") +
theme_void() +
theme(
legend.position = "top",
legend.justification = "left",
legend.title = element_blank(),
axis.line = element_line(color = "gray58"),
axis.text.y = element_text(size = 10, color = "gray58"),
axis.title.y = element_text(hjust = 1, color = "gray58"),
axis.text.x = element_text(size = 9, color = "gray58")
)
Area graph:
ex_5_raw %>%
mutate(GROUP = "1") %>%
ggplot(aes(x = Year, y = `Attrition Rate`)) +
geom_area(aes(group = GROUP), fill = col, size = 1) +
geom_hline(yintercept = ex_5_avg$`Attrition Rate`, linetype = "dashed",
col = "#5783AD") +
annotate(geom = "text", x = 10, y = ex_5_avg$`Attrition Rate` - 0.005,
label = paste("AVG.\n", round(ex_5_avg$`Attrition Rate`*100, 1),
"%"),
col = "#93AFCA", hjust = 1, vjust = 0.1) +
scale_y_continuous(labels = percent_format(accuracy = 1),
breaks = seq(0,0.18,0.02),
limits = c(0,0.16)) +
labs(title = "Attrition rate over time\n") +
ylab("ATTRITION RATE") +
theme_void() +
theme(
legend.position = "top",
legend.justification = "left",
legend.title = element_blank(),
axis.line = element_line(color = "gray58"),
axis.text.y = element_text(size = 10, color = "gray58"),
axis.title.y = element_text(hjust = 1, color = "gray58"),
axis.text.x = element_text(size = 9, color = "gray58")
)
Bar chart:
ex_5_raw %>%
mutate(GROUP = "1") %>%
ggplot(aes(x = Year, y = `Attrition Rate`)) +
geom_col(fill = "#275F96", width = 0.7) +
geom_hline(yintercept = ex_5_avg$`Attrition Rate`, linetype = "dashed",
col = col) +
annotate(geom = "text", x = 2, y = ex_5_avg$`Attrition Rate` + 0.005,
label = paste("AVG: ", round(ex_5_avg$`Attrition Rate`*100, 1),
"%"),
col = col, hjust = 0.25) +
scale_y_continuous(labels = percent_format(accuracy = 1),
breaks = seq(0,0.18,0.02),
limits = c(0,0.16)) +
labs(title = "Attrition rate over time\n") +
ylab("ATTRITION RATE") +
theme_void() +
theme(
legend.position = "top",
legend.justification = "left",
legend.title = element_blank(),
axis.line = element_line(color = "gray58"),
axis.text.y = element_text(size = 10, color = "gray58"),
axis.title.y = element_text(hjust = 1, color = "gray58"),
axis.text.x = element_text(size = 9, color = "gray58")
)
Original graph:
How to transform it to a better chart:
ex_7 <- read_excel("../data/storytelling_practice/chapter_2/2.7 EXERCISE.xlsx")
# pivot the data and get finance and banking data
ex_7_pivot <- ex_7 %>%
rename(name_1 = ...1) %>%
pivot_longer(cols = -name_1, names_to = "year", values_to = "values") %>%
mutate(year = as.integer(year))
# ex_7_finance <- filter(.data = ex_7_pivot, name_1 == "Financial Savings")
# ex_7_pivot <- filter(.data = ex_7_pivot, name_1 == "")
Now create a line plot:
# prepare the titles
y <- 910
x <- 0.5
library(grid)
title_finance <- textGrob(
expression(bold("Financial Savings") * phantom("below") *
phantom(bold("industry")) *
phantom("for first time in 5 years\n")),
x = x, y = y, gp = gpar(col = col))
title_industry <- textGrob(
expression(phantom("Financial Savings") * "below" *
phantom(bold("industry")) * "for first time in 5 years\n"),
x = x, y = y, gp = gpar(col = "black"))
title <- textGrob(
expression("Financial Savings" * phantom("below") * bold("industry") *
phantom("for first time in 5 years\n")),
x = x, y = y, gp = gpar(col = "black"))
# line plot
ex_7_pivot %>%
filter(name_1 %in% c("Financial Savings", "Industry Average")) %>%
ggplot(aes(x = year, y = values)) +
# gometries
geom_line(aes(color = name_1, group = name_1), size = 1) +
geom_point(aes(y = if_else(year == 2019, values, NULL), col = name_1)) +
# annotatios
annotate("text", x = 2019, y = 840, label = "Industry avg",
color = "black", hjust = -0.1, vjust = -1.1, size = 4) +
annotate("text", x = 2019, y = 830, label = "Financial\nSavings",
color = col, hjust = -0.15, vjust = 0.70, size = 4) +
# scales
scale_color_manual(values = c("dodgerblue4","black")) +
scale_y_continuous(breaks = seq(700, 900, 20), limits = c(700, 920)) +
scale_x_continuous(breaks = seq(2012, 2019, 1),
limits = c(2012, 2020)) +
# axis titles
xlab("SURVEY YEAR") + ylab("SATISFACTION SCORE") +
# theme
theme_void() +
theme(
title = element_text(size = 9, color = "gray58"),
axis.line.x = element_line(color = "gray58"),
axis.text.y = element_text(size = 9, color = "gray58"),
axis.title.y = element_text(size = 9, hjust = 1, color = "gray58"),
axis.text.x = element_text(size = 9, color = "gray58"),
axis.title.x = element_text(hjust = 0, color = "gray58", size = 9),
legend.position = "none"
) +
# plot title
annotation_custom(textGrob(
expression(bold('Financial Savings ')),
gp = gpar(col = 'dodgerblue4', fontsize = 13), just = "left"
),
xmin = 2012, xmax = 2013, ymin = 920) +
annotation_custom(textGrob(
expression(phantom(bold('Financial Savings ')) * "below "),
gp = gpar(col = 'black', fontsize = 13), just = "left"
),
xmin = 2012, xmax = 2013, ymin = 920) +
annotation_custom(textGrob(
expression(phantom(bold('Financial Savings ')) *
phantom("below ") * bold("industry ")),
gp = gpar(col = 'black', fontsize = 13), just = "left"
),
xmin = 2012, xmax = 2013, ymin = 920) +
annotation_custom(textGrob(
expression(phantom(bold('Financial Savings ')) *
phantom("below ") * phantom(bold("industry ")) *
"for first time in 5 years"),
gp = gpar(col = 'black', fontsize = 13), just = "left"
),
xmin = 2012, xmax = 2013, ymin = 920)
EXERCISE 8. What’s wrong with this graph?
ex_8 <- read_excel("../data/storytelling_practice/chapter_2/2.8 EXERCISE.xlsx")
ex_8 %<>%
rename(names_1 = ...1) %>%
mutate(names_1 = factor(names_1,
labels = c("2014","2015","2016","2017","2018",
"2019 Q1","2019 Q2","2019 Q3","2019 Q4",
"Dec-19")))
Initial graph (approx):
ex_8 %>%
pivot_longer(cols = c(`Loan Loss Reserves`,`NPLs`)) %>%
ggplot(aes(x = names_1)) +
geom_col(aes(y = value, fill = name), position = "dodge") +
geom_line(aes(y = value, group = name, col = name)) +
scale_y_continuous(labels = dollar)
Improvement of the chart:
# YEAR PLOTS
# prepare the table
ex_8_pivot_filter <- ex_8 %>%
pivot_longer(cols = c(`Loan Loss Reserves`,`NPLs`)) %>%
filter(names_1 %in% c("2014","2015","2016","2017","2018","Dec-19"))
ex_8_pivot_reserves <- filter(ex_8_pivot_filter, name == "Loan Loss Reserves")
ex_8_pivot_NPLs <- filter(ex_8_pivot_filter, name == "NPLs")
# define colors
darker_col <- c(col,"red4")
# line plot by years
line_plot <- ex_8_pivot_filter %>%
ggplot(aes(x = names_1, y = value, group = name, col = name)) +
geom_line() +
geom_point() +
# annotations
annotate(geom = "text", x = 1.1, y = ex_8_pivot_reserves$value[1] + 0.175,
label = expression(bold("Reserves")), col = darker_col[1], ) +
annotate(geom = "text", x = ex_8_pivot_reserves$names_1,
y = ex_8_pivot_reserves$value + 0.1,
label = dollar(ex_8_pivot_reserves$value),
col = darker_col[1]) +
annotate(geom = "text", x = 1, y = ex_8_pivot_NPLs$value[1] + 0.075,
label = expression(bold("NPLs")), col = darker_col[2]) +
annotate(geom = "text", x = ex_8_pivot_NPLs$names_1,
y = ex_8_pivot_NPLs$value - 0.1,
label = dollar(ex_8_pivot_NPLs$value),
col = darker_col[2]) +
annotate(geom = "rect", xmin = 5.7, xmax = 6.3, ymin = 0, ymax = 1.8,
alpha = 0.2) +
# scales
scale_y_continuous(limits = c(0, 1.8)) +
scale_color_manual(values = darker_col) +
# titles
labs(title = "Annual Loan Loss Reserves & Non-Performing Loans (NPLs)",
subtitle = "BILLIONS") +
xlab("FISCAL YEAR") +
# theme
theme_void() +
theme(
title = element_text(size = 9, color = "black", face = "bold",
hjust = 1),
axis.line.x = element_line(color = "gray58", lineend = "round"),
axis.text.x = element_text(size = 9, color = "gray58"),
axis.title.x = element_text(hjust = 0, color = "gray58", size = 9),
legend.position = "none",
plot.margin = unit(c(0,0.5,0,0), units = "cm")
)
# QUARTER PLOT
# prepare the data
ex_8_Q <- ex_8 %>%
filter(names_1 %in% c("2019 Q1","2019 Q2","2019 Q3","2019 Q4")) %>%
pivot_longer(cols = c(`Loan Loss Reserves`,`NPLs`)) %>%
mutate(names_1 = str_sub(string = names_1, start = 6, end = 8))
ex_8_Q_reserves <- filter(ex_8_Q, name == "Loan Loss Reserves")
ex_8_Q_NPLs <- filter(ex_8_Q, name == "NPLs")
# line plot by years
Q_plot <- ex_8_Q %>%
ggplot(aes(x = names_1, y = value, group = name, col = name)) +
geom_line() +
geom_point() +
# annotations
annotate(geom = "text", x = ex_8_Q_reserves$names_1,
y = ex_8_Q_reserves$value + 0.1,
label = dollar(ex_8_Q_reserves$value),
col = darker_col[1], size = 3) +
annotate(geom = "text", x = ex_8_Q_NPLs$names_1,
y = ex_8_Q_NPLs$value - 0.1,
label = dollar(ex_8_Q_NPLs$value),
col = darker_col[2], size = 3) +
annotate(geom = "rect", xmin = 0.5, xmax = 4.5, ymin = 0, ymax = 1.8,
alpha = 0.2) +
# scales
scale_y_continuous(limits = c(0, 1.8)) +
scale_color_manual(values = darker_col) +
# titles
labs(title = "2019 quarterly view",
subtitle = "BILLIONS") +
xlab("2019") +
# theme
theme_void() +
theme(
title = element_text(size = 9, color = "black", face = "bold",
hjust = 1),
axis.line.x = element_line(color = "gray58"),
axis.text.x = element_text(size = 9, color = "gray58"),
axis.title.x = element_text(hjust = 0, color = "gray58", size = 9),
legend.position = "none"
)
# JOIN CHARTS
grid.arrange(line_plot, Q_plot, widths = c(4,1), nrow = 1)
ex_3_1 <- read_excel("../data/storytelling_practice/chapter_3/ex_1.xlsx")
Transform the data:
# add factors
ex_3_1 %<>%
mutate(MONTH = factor(MONTH,
levels = c("JAN","FEB","MAR","APR","MAY","JUN","JUL",
"AUG","SEP","OCT","NOV","DEC")),
REL_SALES = if_else(RELEVANT == 1, SALES, NULL),
DATE = as_date(DATE))
Create the line chart:
ex_3_1 %>%
ggplot(aes(x = DATE)) +
# geometries
geom_line(aes(y = if_else(ORIGIN == "ACTUAL" |
(YEAR == 2019 & MONTH == "JUN"),
SALES, NULL)), size = 1) +
geom_line(aes(y = if_else(ORIGIN == "FORECAST", SALES, NULL)),
linetype = "dashed", size = 1) +
geom_point(aes(y = REL_SALES), size = 3) +
geom_point(aes(y = if_else(MONTH == "JUL" & YEAR == 2018, SALES, NULL)),
shape = 21, fill = "darkorange", size = 3) +
geom_point(aes(y = if_else(MONTH == "FEB" & YEAR == 2019, SALES, NULL)),
shape = 21, fill = col, size = 3) +
geom_text(aes(y = SALES, label = dollar(round(REL_SALES,1),
suffix = "B", accuracy = 0.1)),
vjust = -1.5, hjust = 0.2, size = 3) +
# annotations
# square text
annotate(geom = "rect", xmin = as_date("2019-05-20"),
xmax = as_date("2020-01-10"), ymin = 0, ymax = 2.55,
alpha = 0.1) +
annotate(geom = "text",
x = as_date("2020-01-01"), y = 1, hjust = 1, vjust = -1,
label = expression(bold("2019 FORECAST")),
col = "gray60", size = 3.25) +
annotate(geom = "text",
x = as_date("2020-01-01"), y = 1, hjust = 1, vjust = 1,
label = paste0("This is provided by ABC\n",
"consultants and based on\n",
"market data through June.\n",
"The forecast assumes no\n",
"major market changes.\n"),
col = "gray60", size = 3.5) +
# 2018 notes
annotate(
geom = "text", x = as_date("2018-01-10"), y = 3.5, hjust = 0, vjust = 1,
label = paste0("2018: Jan-Jun was a period of stability, with\n",
"fairly steady growth (averaging +3% per\n",
"month). There was a nearly 20% decrease\n",
"in July, when Product X was recalled and\n",
"pulled from the market. Total sales remained\n",
"at reduced volume for the rest of the year."),
col = "gray60", size = 3.5) +
annotate(
geom = "text", x = as_date("2018-01-10"), y = 3.5, hjust = 0, vjust = 1,
label = expression(bold("2018:")),
col = "gray60", size = 3.5) +
annotate(
geom = "text", x = as_date("2018-01-10"), y = 3.175, hjust = 0, vjust = 1,
label = expression(phantom("month). There was a ")*
"nearly 20% decrease"),
size = 3.5, col = "darkorange") +
annotate(
geom = "text", x = as_date("2018-01-10"), y = 3.0, hjust = 0, vjust = 1,
label = "in July",
size = 3.5, col = "darkorange") +
# 2019 notes
annotate(
geom = "text", x = as_date("2019-01-10"), y = 3.5, hjust = 0, vjust = 1,
label = paste0("2019: The year started at less than $1.6B, but\n",
"Increased markedly in February, when a new\n",
"study was released. Total sales have increased\n",
"steadly since then and this projected to continue.\n",
"The latest forecast is for $2.4B in monthly sales by\n",
"the end of the year."),
col = "gray60", size = 3.5) +
annotate(
geom = "text", x = as_date("2019-01-10"), y = 3.5, hjust = 0, vjust = 1,
label = expression(bold("2019:")),
col = "gray60", size = 3.5) +
annotate(
geom = "text", x = as_date("2019-01-10"), y = 3.34, hjust = 0, vjust = 1,
label = "Increased markedly in February",
size = 3.5, col = col) +
# scales
scale_x_date(date_labels = "%b'%y", date_breaks = "3 month") +
scale_y_continuous(labels = dollar, breaks = c(seq(0,3.5,0.5)),
limits = c(0, 3.5)) +
# titles
labs(title = "Market size over time") +
ylab("SALES ($USD BILLIONS)") +
# themes
theme_void() +
theme(
axis.line.x = element_line(color = "gray58"),
axis.text.y = element_text(size = 11, color = "gray58"),
axis.title.y = element_text(hjust = 1, color = "gray58"),
axis.text.x = element_text(size = 9, color = "gray58")
)
ex_3_2 <- read_excel("../data/storytelling_practice/chapter_3/ex_2.xlsx")
# give format to the data
ex_3_2 %<>%
mutate(Year_2019 = factor(Year_2019,
levels = c("JAN","FEB","MAR","APR","MAY","JUN","JUL",
"AUG","SEP","OCT","NOV","DEC")))
Plot evolution to understand the different trends:
text_col <- "grey58"
ex_3_2 %>%
ggplot(aes(x = Year_2019, y = Rate)) +
geom_col(fill = "gray58", col = "gray40", width = 0.8) +
scale_y_continuous(labels = percent_format(accuracy = 0.1),
breaks = seq(0, 0.01, 0.001),
limits = c(0, 0.01)) +
labs(title = "2019 monthly voluntary attrition rate",
subtitle = "") +
ylab("ATTRITION RATE") +
xlab("2019") +
theme_void() +
theme(
title = element_text(size = 12, color = "grey20", face = "bold",
hjust = 1),
axis.title.y = element_text(color = text_col, angle = 90, hjust = 1),
axis.text.y = element_text(color = text_col),
axis.line = element_line(color = text_col),
axis.text = element_text(color = text_col),
axis.title.x = element_text(hjust = 0, color = text_col, size = 12)
)
Highlight the relevant parts:
text_col <- "grey58"
rel_months <- c("APR", "JUL", "AUG", "NOV", "DEC")
ex_3_2 %>%
ggplot(aes(x = Year_2019, y = Rate)) +
geom_col(aes(fill = (Year_2019 %in% rel_months),
col = (Year_2019 %in% rel_months)),
width = 0.8) +
scale_y_continuous(labels = percent_format(accuracy = 0.1),
breaks = seq(0, 0.01, 0.001),
limits = c(0, 0.01)) +
scale_fill_manual(values = c("grey58", "grey20")) +
scale_color_manual(values = c("grey20", "grey58")) +
labs(title = "2019 monthly voluntary attrition rate",
subtitle = "") +
ylab("ATTRITION RATE") +
xlab("2019") +
theme_void() +
theme(
title = element_text(size = 12, color = "grey20", face = "bold",
hjust = 1),
axis.title.y = element_text(color = text_col, angle = 90, hjust = 1),
axis.text.y = element_text(color = text_col),
axis.line = element_line(color = text_col),
axis.text = element_text(color = text_col),
axis.title.x = element_text(hjust = 0, color = text_col, size = 12),
legend.position = "none"
)
text_col <- "grey58"
rel_months <- c("APR", "JUL", "AUG", "NOV", "DEC")
ex_3_2 %<>%
mutate(Highlight = if_else(
Year_2019 %in% c("JUL","AUG"), "Summer", if_else(
Year_2019 %in% c("APR"), "April", if_else(
Year_2019 %in% c("NOV","DEC"), "Winter","Other"
)))
)
ex_3_2 %>%
ggplot(aes(x = Year_2019, y = Rate)) +
geom_col(aes(fill = Highlight), col = "grey50", width = 0.8) +
scale_y_continuous(labels = percent_format(accuracy = 0.1),
breaks = seq(0, 0.01, 0.001),
limits = c(0, 0.01)) +
scale_fill_manual(values = c("red","grey58","orange","blue")) +
labs(title = "2019 monthly voluntary attrition rate",
subtitle = "") +
ylab("ATTRITION RATE") +
xlab("2019") +
theme_void() +
theme(
title = element_text(size = 12, color = "grey20", face = "bold",
hjust = 1),
axis.title.y = element_text(color = text_col, angle = 90, hjust = 1),
axis.text.y = element_text(color = text_col),
axis.line = element_line(color = text_col),
axis.text = element_text(color = text_col),
axis.title.x = element_text(hjust = 0, color = text_col, size = 12),
legend.position = "none"
)
Define grey areas:
text_col <- "grey58"
ex_3_2 %>%
ggplot(aes(x = Year_2019, y = Rate)) +
geom_col(fill = "gray58", col = "gray40", width = 0.8) +
annotate(geom = "rect",
xmin = 3.5, xmax = 13,
ymin = 0.007, ymax = 0.0095,
alpha = 0.3, fill = "grey60") +
annotate(geom = "rect",
xmin = 6.5, xmax = 13,
ymin = 0.0025, ymax = 0.0065,
alpha = 0.3, fill = "grey60") +
annotate(geom = "rect",
xmin = 10.5, xmax = 13,
ymin = 0, ymax = 0.002,
alpha = 0.3, fill = "grey60") +
scale_y_continuous(labels = percent_format(accuracy = 0.1),
breaks = seq(0, 0.01, 0.001),
limits = c(0, 0.01)) +
labs(title = "2019 monthly voluntary attrition rate",
subtitle = "") +
ylab("ATTRITION RATE") +
xlab("2019") +
theme_void() +
theme(
title = element_text(size = 12, color = "grey20", face = "bold",
hjust = 1),
axis.title.y = element_text(color = text_col, angle = 90, hjust = 1),
axis.text.y = element_text(color = text_col),
axis.line = element_line(color = text_col),
axis.text = element_text(color = text_col),
axis.title.x = element_text(hjust = 0, color = text_col, size = 12),
plot.margin = unit(c(0,4,0,0), "cm")
)
Other way to higlight specific information:
text_col <- "grey58"
ex_3_2 %>%
ggplot(aes(x = Year_2019, y = Rate)) +
geom_col(fill = "gray58", col = "gray40", width = 0.8) +
annotate(geom = "segment",
x = 13, xend = 4.5,
y = 0.0075, yend = 0.0075,
alpha = 0.3, fill = "grey60", arrow=arrow()) +
annotate(geom = "segment",
x = 13, xend = 8.5,
y = 0.0045, yend = 0.0045,
alpha = 0.3, fill = "grey60", arrow=arrow()) +
scale_y_continuous(labels = percent_format(accuracy = 0.1),
breaks = seq(0, 0.01, 0.001),
limits = c(0, 0.01)) +
labs(title = "2019 monthly voluntary attrition rate",
subtitle = "") +
ylab("ATTRITION RATE") +
xlab("2019") +
theme_void() +
theme(
title = element_text(size = 12, color = "grey20", face = "bold",
hjust = 1),
axis.title.y = element_text(color = text_col, angle = 90, hjust = 1),
axis.text.y = element_text(color = text_col),
axis.line = element_line(color = text_col),
axis.text = element_text(color = text_col),
axis.title.x = element_text(hjust = 0, color = text_col, size = 12),
plot.margin = unit(c(0,4,0,0), "cm")
)
ex_3_2 %>%
ggplot(aes(x = Year_2019, y = Rate)) +
geom_col(aes(fill = Highlight), col = "grey50", width = 0.8) +
annotate(geom = "rect",
xmin = 3.5, xmax = 13,
ymin = 0.007, ymax = 0.0095,
alpha = 0.3, fill = "grey60") +
annotate(geom = "rect",
xmin = 6.5, xmax = 13,
ymin = 0.0025, ymax = 0.0065,
alpha = 0.3, fill = "grey60") +
annotate(geom = "rect",
xmin = 10.5, xmax = 13,
ymin = 0, ymax = 0.002,
alpha = 0.3, fill = "grey60") +
scale_y_continuous(labels = percent_format(accuracy = 0.1),
breaks = seq(0, 0.01, 0.001),
limits = c(0, 0.01)) +
scale_fill_manual(values = c("red","grey58","orange","blue")) +
labs(title = "2019 monthly voluntary attrition rate",
subtitle = "") +
ylab("ATTRITION RATE") +
xlab("2019") +
theme_void() +
theme(
title = element_text(size = 12, color = "grey20", face = "bold",
hjust = 1),
axis.title.y = element_text(color = text_col, angle = 90, hjust = 1),
axis.text.y = element_text(color = text_col),
axis.line = element_line(color = text_col),
axis.text = element_text(color = text_col),
axis.title.x = element_text(hjust = 0, color = text_col, size = 12),
legend.position = "none"
)
Show the best way to plot a promotion:
ex_3_3 <- read_excel("../data/storytelling_practice/chapter_3/ex_3.xlsx")
Plot the dot plot:
# give the right shape
ex_3_3 %<>%
pivot_longer(c(-PROMO)) %>%
mutate(name = factor(name, levels = c("Repeat_writers_92K",
"New_writers_45K")))
repeat_ex_3_3 <- ex_3_3 %>%
filter(name == "Repeat_writers_92K")
new_ex_3_3 <- ex_3_3 %>%
filter(name == "New_writers_45K")
# plot the results
colors <- c("dodgerblue4", "darkorange2", "grey50")
ex_3_3 %>%
ggplot(aes(x = name, y = value, col = PROMO)) +
geom_point() +
geom_line(aes(group = PROMO)) +
annotate("text", x = 0.975, y = repeat_ex_3_3$value,
label = percent(repeat_ex_3_3$value, digits = 0),
col = colors, hjust = 1) +
annotate("text", x = 2.125, y = new_ex_3_3$value,
label = percent(new_ex_3_3$value, digits = 0),
color = colors, hjust = 1) +
annotate("text", x = 1.6, y = repeat_ex_3_3$value,
label = new_ex_3_3$PROMO,
color = colors, hjust = 1) +
scale_color_manual(values = colors) +
scale_x_discrete(
labels = c("Repeat writers\n92K", "New writers\n45K")
) +
labs(title = "Product X writers by promo type",
subtitle = "% OF TOTAL\n") +
theme_void() +
theme(
plot.title = element_text(hjust = 0.5),
plot.subtitle = element_text(hjust = 0.5, size = 9),
legend.position = "none",
axis.text.x = element_text(colour = "grey50"),
axis.line.x = element_line(colour = "grey50", size = 0.2)
)
Learn how to put focus on:
ex_4_2 <- read_excel("../data/storytelling_practice/chapter_4/ex_4_2.xlsx")
Initial chart:
ex_4_2 %>%
ggplot(aes(x = reorder(Brands, -perc), y = perc)) +
geom_col(fill = "grey65") +
coord_flip() +
geom_text(aes(x = reorder(Brands, -perc), y = 0.005,
label = if_else(perc < 0, Brands, NULL)),
hjust = 0, size = 3, col = "grey65") +
geom_text(aes(x = reorder(Brands, -perc), y = -0.005,
label = if_else(perc > 0, Brands, NULL)),
hjust = 1, size = 3, col = "grey65") +
scale_y_continuous(labels = percent_format(accuracy = 1),
breaks = seq(-0.2, 0.2, 0.05),
limits = c(-0.2, 0.2),
position = "bottom") +
labs(title = "Cat food brands: YoY sales change",
subtitle = "% CHANGE IN VOLUME ($)\n") +
theme_void() +
theme(
title = element_text(colour = "grey20"),
plot.subtitle = element_text(size = 10),
axis.text.x = element_text(colour = "grey65", size = 10),
axis.ticks.x = element_line(linetype = 1, colour = "grey65"),
axis.ticks.length.x = unit(.1, "cm")
)
Highlight relevant information:
ex_4_2 %>%
ggplot(aes(x = reorder(Brands, -perc), y = perc)) +
geom_col(fill = "grey65") +
geom_col(aes(y = if_else(Brands %in% c("Lifestyle",
"Diet Lifestyle",
"Lifestyle Plus"),
perc, NULL)), fill = "black") +
coord_flip() +
geom_text(aes(x = reorder(Brands, -perc), y = 0.005,
label = if_else(perc < 0, Brands, NULL)),
hjust = 0, size = 3, col = "grey65") +
geom_text(aes(x = reorder(Brands, -perc), y = 0.005,
label = if_else(perc < 0 &
Brands %in% c("Lifestyle",
"Diet Lifestyle",
"Lifestyle Plus"),
Brands, NULL)),
hjust = 0, size = 3, col = "black") +
geom_text(aes(x = reorder(Brands, -perc), y = -0.005,
label = if_else(perc > 0, Brands, NULL)),
hjust = 1, size = 3, col = "grey65") +
scale_y_continuous(labels = percent_format(accuracy = 1),
breaks = seq(-0.2, 0.2, 0.05),
limits = c(-0.2, 0.2),
position = "bottom") +
labs(title = "Cat food brands: Lifestyle line brands declined",
subtitle = "YEAR-OVER-YEAR % CHANGE IN SALES VOLUME ($)\n") +
theme_void() +
theme(
title = element_text(colour = "grey20"),
plot.subtitle = element_text(size = 10),
axis.text.x = element_text(colour = "grey65", size = 10),
axis.ticks.x = element_line(linetype = 1, colour = "grey65"),
axis.ticks.length.x = unit(.1, "cm")
)
Focus on factors in increasing values:
rel_brands <- c("Lifestyle", "Diet Lifestyle", "Lifestyle Plus", "Feline Gold",
"Feline Platinum", "Feline Instinct", "Feline Pro",
"Feline Royal", "Feline Focus", "Feline Grain Free",
"Feline Silver")
ex_4_2 %>%
ggplot(aes(x = reorder(Brands, -perc), y = perc)) +
geom_col(fill = "grey65") +
geom_col(aes(y = if_else(Brands %in% rel_brands,
perc, NULL)), fill = "dodgerblue4") +
coord_flip() +
geom_text(aes(x = reorder(Brands, -perc), y = 0.005,
label = if_else(perc < 0, Brands, NULL)),
hjust = 0, size = 3, col = "grey65") +
geom_text(aes(x = reorder(Brands, -perc), y = 0.005,
label = if_else(Brands %in% c("Lifestyle",
"Diet Lifestyle",
"Lifestyle Plus"),
Brands, NULL)),
hjust = 0, size = 3, col = "dodgerblue4") +
geom_text(aes(x = reorder(Brands, -perc), y = -0.005,
label = if_else(perc > 0, Brands, NULL)),
hjust = 1, size = 3, col = "grey65") +
geom_text(aes(x = reorder(Brands, -perc), y = -0.005,
label = if_else(Brands %in% c("Feline Gold",
"Feline Platinum", "Feline Instinct", "Feline Pro",
"Feline Royal", "Feline Focus", "Feline Grain Free",
"Feline Silver"), Brands, NULL)),
hjust = 1, size = 3, col = "dodgerblue4") +
scale_y_continuous(labels = percent_format(accuracy = 1),
breaks = seq(-0.2, 0.2, 0.05),
limits = c(-0.2, 0.2),
position = "bottom") +
labs(title = "Cat food brands: Lifestyle line brands declined",
subtitle = "YEAR-OVER-YEAR % CHANGE IN SALES VOLUME ($)\n") +
theme_void() +
theme(
title = element_text(colour = "grey20"),
plot.subtitle = element_text(size = 10),
axis.text.x = element_text(colour = "grey65", size = 10),
axis.ticks.x = element_line(linetype = 1, colour = "grey65"),
axis.ticks.length.x = unit(.1, "cm")
)
# Load the data
ex_7_2_perc <- read_excel(
"../data/storytelling_practice/chapter_7/ex_7_2.xlsx",
sheet = "perc"
)
ex_7_2_values <- read_excel(
"../data/storytelling_practice/chapter_7/ex_7_2.xlsx",
sheet = "values"
)
# transform the data
ex_7_2_values %<>%
pivot_longer(cols = c(-sales_channel),
names_to = "year", values_to = "units")
ex_7_2_perc %<>%
pivot_longer(cols = c(-sales_channel), names_to = "year", values_to = "perc")
Total units sold:
ex_7_2_values_all <- ex_7_2_values %>%
group_by(year) %>%
summarise(units = sum(units))
p_ex_7_2_values_all <- ex_7_2_values_all %>%
ggplot(aes(x = year, y = units)) +
geom_line(aes(y = if_else(year != 2020, units, NULL), group = "")) +
geom_line(aes(y = if_else(year %in% c(2019, 2020), units, NULL),
group = ""), linetype = "dashed") +
geom_point(aes(y = if_else(!(year %in% c(2013, 2015, 2016, 2020)), units, NULL))) +
geom_point(aes(y = if_else(year == 2020, units, NULL)),
fill = "white", shape = 21, size = 3) +
geom_text(aes(y = if_else(!(year %in% c(2013, 2015, 2016)), units, NULL),
label = dollar(round(units/1000,1),
suffix = "K", accuracy = 0.1, prefix = "")),
vjust = -2, hjust = 0.5, size = 3.5) +
scale_y_continuous(limits = c(0, max(ex_7_2_values_all$units + 1000))) +
labs(title = "Total units sold") +
xlab("FISCAL YEAR") +
theme_void() +
theme(
axis.line.x = element_line(color = "gray58"),
axis.text.x = element_text(size = 10, color = "gray58"),
axis.title.x = element_text(hjust = 0, color = "gray58", size = 9)
)
p_ex_7_2_values_all
Create chart percentage units sold by channel:
ex_7_2_values %>%
ggplot(aes(x = year, y = units, fill = sales_channel)) +
geom_col(position = "fill", col = "white", fill = "grey58") +
geom_col(aes(y = if_else(year == 2020, units, NULL)),
position = "fill", col = "white", fill = "grey80") +
scale_y_continuous(labels = percent_format(accuracy = 1),
breaks = seq(0, 1, 0.1)) +
labs(title = "Units sold by channel") +
ylab("% OF TOTAL") +
xlab("FISCAL YEAR") +
theme_void() +
theme(
plot.title = element_text(color = "grey10"),
axis.text = element_text(size = 10, color = "gray58"),
axis.title.x = element_text(hjust = 0, color = "gray58", size = 10),
axis.title.y = element_text(color = "gray58", angle = 90,
hjust = 1, size = 10),
plot.margin = unit(c(0,3,0,0), "cm")
)
Focus on retail:
ex_7_2_values %>%
ggplot(aes(x = year, y = units, fill = sales_channel)) +
geom_col(position = "fill", col = "white", fill = "grey58", width = 0.8) +
geom_col(aes(fill = (sales_channel == "Retail")),
position = "fill", col = "white", width = 0.8) +
geom_col(aes(y = if_else(year == 2020, units, NULL)),
position = "fill", col = "white", fill = "grey80", width = 0.8) +
geom_col(aes(y = if_else(year == 2020, units, NULL),
fill = (sales_channel == "Retail")),
position = "fill", col = "white", width = 0.8) +
scale_fill_manual(values = c("grey58","black")) +
scale_y_continuous(labels = percent_format(accuracy = 1),
breaks = seq(0, 1, 0.1)) +
labs(title = "Units sold in Retail") +
ylab("% OF TOTAL") +
xlab("FISCAL YEAR") +
theme_void() +
theme(
plot.title = element_text(color = "grey10"),
axis.text = element_text(size = 10, color = "gray58"),
axis.title.x = element_text(hjust = 0, color = "gray58", size = 10),
axis.title.y = element_text(color = "gray58", angle = 90,
hjust = 1, size = 10),
legend.position = "none"
)
Highlight what is increasing or decreasing:
p_ex_7_2_values <- ex_7_2_values %>%
mutate(rel_sales_channel = if_else(sales_channel %in% c("Retail", "Partner"),
sales_channel, "Other"),
rel_sales_channel = factor(rel_sales_channel,
levels = c("Partner","Other","Retail"))) %>%
ggplot(aes(x = year, y = units, fill = rel_sales_channel)) +
geom_col(position = "fill", col = "white", width = 0.8) +
scale_fill_manual(values = c("dodgerblue4","grey58", "dodgerblue")) +
scale_y_continuous(labels = percent_format(accuracy = 1),
breaks = seq(0, 1, 0.1)) +
labs(title = "Units sold by sales channel") +
ylab("% OF TOTAL") +
xlab("FISCAL YEAR") +
theme_void() +
theme(
plot.title = element_text(color = "grey10"),
axis.text = element_text(size = 10, color = "gray58"),
axis.title.x = element_text(hjust = 0, color = "gray58", size = 10),
axis.title.y = element_text(color = "gray58", angle = 90,
hjust = 1, size = 10),
legend.position = "top",
legend.title = element_blank()
)
grid.arrange(p_ex_7_2_values_all, p_ex_7_2_values, nrow = 1)