Introduction

This post is inspired by Denise Lu and Albert Sun and a post by Cole Nussbaumer Knaflic.
R codes

R Codes for creating the chart:
#===========================
# Prepare data for ploting
#===========================

rm(list = ls())

# Load data: 

readxl::read_excel("PCI_20111020.xlsx") -> pci_data

# Select some columns: 

library(dplyr)
library(tidyr)

pci_data %>% 
  mutate(Province = stringi::stri_trans_general(Province, "Latin-ASCII")) %>% 
  mutate(Province = case_when(Province == "TT-Hue" ~ "TT.Hue", 
                              Province == "Bac Can" ~ "Bac Kan", 
                              TRUE ~ Province)) %>% 
  select(Year, Province, PCI) %>% 
  filter(Year %in% c(2011, 2020)) -> pci_data_mini

pci_data_mini %>% 
  mutate(y_start = 0, y_end = 0) -> pci_data_mini


pci_data_mini %>% 
  arrange(Province) %>% 
  group_by(Province) %>% 
  mutate(pci_diff = PCI - lead(PCI)) %>% 
  mutate(pci_percent = pci_diff*100 / PCI) %>% 
  filter(!is.na(pci_diff)) %>% 
  arrange(-pci_percent) %>% 
  ungroup() -> pci_change_percent

pci_change_percent %>% 
  top_n(n = 5, wt = pci_percent) %>% 
  pull(Province) -> top5

pci_change_percent %>% 
  filter(pci_percent < 0) %>% 
  pull(Province) -> poor_provinces

grey_provinces <- c(top5, poor_provinces)

pci_change_percent$Province -> prov_levels

pci_data_mini %>% 
  mutate(Province = factor(Province, prov_levels)) %>% 
  mutate(cat = case_when(Province %in% top5 ~ "best5", 
                         Province %in% poor_provinces ~ "worse5", 
                         TRUE ~ "greyprov")) -> pci_data_mini


pci_data_mini %>% filter(Province == top5[1]) -> df_year_dis   

pci_data_mini %>% 
  filter(Province %in% top5) %>% 
  mutate(PCI_text = round(PCI, 0) %>% as.character()) -> df_scores

df_scores %>% 
  arrange(Province) %>% 
  group_by(Province) %>% 
  mutate(midpoint_y = mean(PCI)) %>% 
  mutate(prev_pci = lead(PCI)) %>% 
  mutate(delta = PCI - prev_pci) %>% 
  mutate(perc = 100*delta / prev_pci) %>% 
  mutate(perc_text = round(perc, 0) %>% as.character()) %>% 
  mutate(perc_text = str_c("+", perc_text, "%")) %>% 
  filter(!is.na(delta)) -> df_percent

pci_data_mini %>% 
  filter(Province %in% poor_provinces) %>% 
  mutate(PCI_text = round(PCI, 0) %>% as.character()) -> df_scores_down

df_scores_down %>% 
  arrange(Province) %>% 
  group_by(Province) %>% 
  mutate(midpoint_y = mean(PCI)) %>% 
  mutate(prev_pci = lead(PCI)) %>% 
  mutate(delta = PCI - prev_pci) %>% 
  mutate(perc = 100*delta / prev_pci) %>% 
  mutate(perc_text = round(perc, 0) %>% as.character()) %>% 
  mutate(perc_text = str_c(perc_text, "%")) %>% 
  filter(!is.na(delta)) -> df_percent_down



library(ggplot2)
library(gghighlight)
library(ggtext) 
library(showtext)


my_title <- "Investment climate change from 2011 to 2020 based on PCI score by province"  # 5 provinces with the biggest improvement

my_subtitle <- "The provincial competitiveness index (PCI) ranks Vietnam's 63 provinces based on economic governance areas that affect the private sector development.<br>Five provinces with <b style='color:#0074A5;'>the highest improvement</b> were Vinh Long, Lam Dong, Thai Nguyen, Ha Nam and Cao Bang. Seven provinces where<br>the investment environment has <b style='color:#E7331A;'>not been improved </b>: Quang Tri, Dong Nai, Soc Trang, Bac Ninh, BRVT, Ha Tinh, Binh Phuoc and Lao Cai.</span>"


in_color <- "#0074A5"

de_color <- "#E7331A"

# Select font for our chart: 

my_font <- "Roboto Condensed"

font_add_google(name = my_font, family = my_font)

my_font2 <- "Lato"

font_add_google(name = my_font2, family = my_font2)

showtext_auto()

ggplot() + 
  # Best: 
  geom_area(data = pci_data_mini %>% filter(Province %in% top5), aes(x = Year, y = PCI, fill = cat), alpha = 0.1) + 
  geom_line(data = pci_data_mini %>% filter(Province %in% top5), aes(x = Year, y = PCI, color = cat), size = 1) +  
  geom_point(data = pci_data_mini %>% filter(Province %in% top5), aes(x = Year, y = PCI, color = cat), size = 2) + 
  # Worse: 
  geom_area(data = pci_data_mini %>% filter(Province %in% poor_provinces), aes(x = Year, y = PCI, fill = cat), alpha = 0.1) + 
  geom_line(data = pci_data_mini %>% filter(Province %in% poor_provinces), aes(x = Year, y = PCI, color = cat), size = 1) + 
  geom_point(data = pci_data_mini %>% filter(Province %in% poor_provinces), aes(x = Year, y = PCI, color = cat), size = 2) + 
  # Normal: 
  geom_area(data = pci_data_mini %>% filter(!Province %in% grey_provinces), aes(x = Year, y = PCI, fill = cat), alpha = 0.1) + 
  geom_line(data = pci_data_mini %>% filter(!Province %in% grey_provinces), aes(x = Year, y = PCI, color = cat), size = 1) + 
  geom_point(data = pci_data_mini %>% filter(!Province %in% grey_provinces), aes(x = Year, y = PCI, color = cat), size = 2) + 
  facet_wrap(~ Province) + 
  theme_minimal() + 
  labs(title = my_title, 
       subtitle = my_subtitle,
       caption = "Source: https://pcivietnam.vn/") + 
  theme(axis.text = element_blank()) +
  theme(axis.title = element_blank()) +
  theme(panel.grid = element_blank()) + 
  theme(legend.position = "none") + 
  scale_color_manual(values = c(in_color, "grey60", de_color)) + 
  scale_fill_manual(values = c(in_color, "grey60", de_color)) + 
  theme(plot.margin = unit(rep(0.7, 4), "cm")) +  
  scale_y_continuous(limits = c(0, 90)) + 
  scale_x_continuous(limits = c(2010.5, 2020.5), breaks = seq(2010, 2020, 10)) -> draft_chart 


draft_chart + 
  theme(axis.text = element_blank()) + 
  theme(text = element_text(family = my_font)) + 
  theme(strip.text = element_text(color = "grey20", family = my_font2, size = 9, face = "bold", vjust = -1)) + 
  theme(plot.title = element_text(family = my_font, size = 21, face = "bold", color = "grey20")) + 
  # theme(plot.subtitle = element_text(family = my_font, size = 11, color = "grey20")) + 
  theme(plot.subtitle = element_markdown(family = my_font, size = 11, color = "grey30")) + 
  theme(plot.caption = element_text(family = my_font, size = 10, color = "grey40")) + 
  geom_text(data = df_year_dis, aes(x = c(2012.1, 2018.7), y = 0, label = c("2011", "2020")), size = 3.5, vjust = -0.5, color = "grey20", family = my_font) +
  geom_text(data = df_scores %>% filter(Year == 2011), aes(x = Year, y = PCI + 15, label = PCI_text), hjust = 0.3, color = in_color, family = my_font) + 
  geom_text(data = df_scores %>% filter(Year != 2011), aes(x = Year, y = PCI + 15, label = PCI_text), hjust = 0.8, color = in_color, family = my_font) + 
  geom_text(data = df_percent, aes(x = 2014, y = midpoint_y + 15, label = perc_text), hjust = 0, color = in_color, family = my_font) + 
  # Down: 
  geom_text(data = df_scores_down %>% filter(Year == 2011), aes(x = Year, y = PCI + 15, label = PCI_text), hjust = 0.1, color = de_color, family = my_font) + 
  geom_text(data = df_scores_down %>% filter(Year != 2011), aes(x = Year, y = PCI + 15, label = PCI_text), hjust = 0.8, color = de_color, family = my_font) +
  geom_text(data = df_percent_down, aes(x = 2015, y = midpoint_y + 15, label = perc_text), hjust = 0, color = de_color, family = my_font) + 
  geom_segment(data = pci_data_mini, aes(x = 2011, xend = 2020, y = 0, yend = 0), color = "grey50")
Investment climate change from 2011 to 2020 based on PCI score

Data Visualization Series

Author: Nguyen Chi Dung

Introduction

R codes