This post is inspired by Denise Lu and Albert Sun and a post by Cole Nussbaumer Knaflic.
R Codes for creating the chart:
#===========================
# Prepare data for ploting
#===========================
rm(list = ls())
# Load data:
::read_excel("PCI_20111020.xlsx") -> pci_data
readxl
# Select some columns:
library(dplyr)
library(tidyr)
%>%
pci_data mutate(Province = stringi::stri_trans_general(Province, "Latin-ASCII")) %>%
mutate(Province = case_when(Province == "TT-Hue" ~ "TT.Hue",
== "Bac Can" ~ "Bac Kan",
Province TRUE ~ Province)) %>%
select(Year, Province, PCI) %>%
filter(Year %in% c(2011, 2020)) -> pci_data_mini
%>%
pci_data_mini mutate(y_start = 0, y_end = 0) -> pci_data_mini
%>%
pci_data_mini arrange(Province) %>%
group_by(Province) %>%
mutate(pci_diff = PCI - lead(PCI)) %>%
mutate(pci_percent = pci_diff*100 / PCI) %>%
filter(!is.na(pci_diff)) %>%
arrange(-pci_percent) %>%
ungroup() -> pci_change_percent
%>%
pci_change_percent top_n(n = 5, wt = pci_percent) %>%
pull(Province) -> top5
%>%
pci_change_percent filter(pci_percent < 0) %>%
pull(Province) -> poor_provinces
<- c(top5, poor_provinces)
grey_provinces
$Province -> prov_levels
pci_change_percent
%>%
pci_data_mini mutate(Province = factor(Province, prov_levels)) %>%
mutate(cat = case_when(Province %in% top5 ~ "best5",
%in% poor_provinces ~ "worse5",
Province TRUE ~ "greyprov")) -> pci_data_mini
%>% filter(Province == top5[1]) -> df_year_dis
pci_data_mini
%>%
pci_data_mini filter(Province %in% top5) %>%
mutate(PCI_text = round(PCI, 0) %>% as.character()) -> df_scores
%>%
df_scores arrange(Province) %>%
group_by(Province) %>%
mutate(midpoint_y = mean(PCI)) %>%
mutate(prev_pci = lead(PCI)) %>%
mutate(delta = PCI - prev_pci) %>%
mutate(perc = 100*delta / prev_pci) %>%
mutate(perc_text = round(perc, 0) %>% as.character()) %>%
mutate(perc_text = str_c("+", perc_text, "%")) %>%
filter(!is.na(delta)) -> df_percent
%>%
pci_data_mini filter(Province %in% poor_provinces) %>%
mutate(PCI_text = round(PCI, 0) %>% as.character()) -> df_scores_down
%>%
df_scores_down arrange(Province) %>%
group_by(Province) %>%
mutate(midpoint_y = mean(PCI)) %>%
mutate(prev_pci = lead(PCI)) %>%
mutate(delta = PCI - prev_pci) %>%
mutate(perc = 100*delta / prev_pci) %>%
mutate(perc_text = round(perc, 0) %>% as.character()) %>%
mutate(perc_text = str_c(perc_text, "%")) %>%
filter(!is.na(delta)) -> df_percent_down
library(ggplot2)
library(gghighlight)
library(ggtext)
library(showtext)
<- "Investment climate change from 2011 to 2020 based on PCI score by province" # 5 provinces with the biggest improvement
my_title
<- "The provincial competitiveness index (PCI) ranks Vietnam's 63 provinces based on economic governance areas that affect the private sector development.<br>Five provinces with <b style='color:#0074A5;'>the highest improvement</b> were Vinh Long, Lam Dong, Thai Nguyen, Ha Nam and Cao Bang. Seven provinces where<br>the investment environment has <b style='color:#E7331A;'>not been improved </b>: Quang Tri, Dong Nai, Soc Trang, Bac Ninh, BRVT, Ha Tinh, Binh Phuoc and Lao Cai.</span>"
my_subtitle
<- "#0074A5"
in_color
<- "#E7331A"
de_color
# Select font for our chart:
<- "Roboto Condensed"
my_font
font_add_google(name = my_font, family = my_font)
<- "Lato"
my_font2
font_add_google(name = my_font2, family = my_font2)
showtext_auto()
ggplot() +
# Best:
geom_area(data = pci_data_mini %>% filter(Province %in% top5), aes(x = Year, y = PCI, fill = cat), alpha = 0.1) +
geom_line(data = pci_data_mini %>% filter(Province %in% top5), aes(x = Year, y = PCI, color = cat), size = 1) +
geom_point(data = pci_data_mini %>% filter(Province %in% top5), aes(x = Year, y = PCI, color = cat), size = 2) +
# Worse:
geom_area(data = pci_data_mini %>% filter(Province %in% poor_provinces), aes(x = Year, y = PCI, fill = cat), alpha = 0.1) +
geom_line(data = pci_data_mini %>% filter(Province %in% poor_provinces), aes(x = Year, y = PCI, color = cat), size = 1) +
geom_point(data = pci_data_mini %>% filter(Province %in% poor_provinces), aes(x = Year, y = PCI, color = cat), size = 2) +
# Normal:
geom_area(data = pci_data_mini %>% filter(!Province %in% grey_provinces), aes(x = Year, y = PCI, fill = cat), alpha = 0.1) +
geom_line(data = pci_data_mini %>% filter(!Province %in% grey_provinces), aes(x = Year, y = PCI, color = cat), size = 1) +
geom_point(data = pci_data_mini %>% filter(!Province %in% grey_provinces), aes(x = Year, y = PCI, color = cat), size = 2) +
facet_wrap(~ Province) +
theme_minimal() +
labs(title = my_title,
subtitle = my_subtitle,
caption = "Source: https://pcivietnam.vn/") +
theme(axis.text = element_blank()) +
theme(axis.title = element_blank()) +
theme(panel.grid = element_blank()) +
theme(legend.position = "none") +
scale_color_manual(values = c(in_color, "grey60", de_color)) +
scale_fill_manual(values = c(in_color, "grey60", de_color)) +
theme(plot.margin = unit(rep(0.7, 4), "cm")) +
scale_y_continuous(limits = c(0, 90)) +
scale_x_continuous(limits = c(2010.5, 2020.5), breaks = seq(2010, 2020, 10)) -> draft_chart
+
draft_chart theme(axis.text = element_blank()) +
theme(text = element_text(family = my_font)) +
theme(strip.text = element_text(color = "grey20", family = my_font2, size = 9, face = "bold", vjust = -1)) +
theme(plot.title = element_text(family = my_font, size = 21, face = "bold", color = "grey20")) +
# theme(plot.subtitle = element_text(family = my_font, size = 11, color = "grey20")) +
theme(plot.subtitle = element_markdown(family = my_font, size = 11, color = "grey30")) +
theme(plot.caption = element_text(family = my_font, size = 10, color = "grey40")) +
geom_text(data = df_year_dis, aes(x = c(2012.1, 2018.7), y = 0, label = c("2011", "2020")), size = 3.5, vjust = -0.5, color = "grey20", family = my_font) +
geom_text(data = df_scores %>% filter(Year == 2011), aes(x = Year, y = PCI + 15, label = PCI_text), hjust = 0.3, color = in_color, family = my_font) +
geom_text(data = df_scores %>% filter(Year != 2011), aes(x = Year, y = PCI + 15, label = PCI_text), hjust = 0.8, color = in_color, family = my_font) +
geom_text(data = df_percent, aes(x = 2014, y = midpoint_y + 15, label = perc_text), hjust = 0, color = in_color, family = my_font) +
# Down:
geom_text(data = df_scores_down %>% filter(Year == 2011), aes(x = Year, y = PCI + 15, label = PCI_text), hjust = 0.1, color = de_color, family = my_font) +
geom_text(data = df_scores_down %>% filter(Year != 2011), aes(x = Year, y = PCI + 15, label = PCI_text), hjust = 0.8, color = de_color, family = my_font) +
geom_text(data = df_percent_down, aes(x = 2015, y = midpoint_y + 15, label = perc_text), hjust = 0, color = de_color, family = my_font) +
geom_segment(data = pci_data_mini, aes(x = 2011, xend = 2020, y = 0, yend = 0), color = "grey50")