This post is inspired by David Sjoberg and Cédric Scherer.
R Codes for creating the chart:
#==========================
# Prepare data for ploting
#==========================
# Clear our R environment:
rm(list = ls())
# Load Geeospatial Data (downloaded from https://data.opendevelopmentmekong.net/vi/dataset/a-phn-tnh):
library(sf)
read_sf("diaphantinhenglish.geojson") -> sf_data_vietnam
# Extract the "central" points:
library(dplyr)
%>%
sf_data_vietnam st_geometry() %>%
st_point_on_surface() %>%
st_coordinates() %>%
as_tibble() %>%
mutate(id = sf_data_vietnam$Name) -> df_central_point
# Get geospatial data (nation level) for Viet Nam:
<- raster::getData("GADM", country = "Vietnam", level = 1)
geoData_vn_province
# Convert to data frame:
library(ggplot2)
<- geoData_vn_province %>% fortify(region = "NAME_1")
vn_province_df
# Convert to Latin name:
%>%
vn_province_df mutate(id = stringi::stri_trans_general(id, "Latin-ASCII")) -> vn_province_df
# Load PCI data:
::read_excel("_data_pci_file_general_file_1588582407-Indicator_PCI2018_VN.xlsx", sheet = 1) -> pci2018
readxl
# Select some column:
%>% select(1, 2, 3) -> pci2018_mini
pci2018
# Remove some observations:
%>% slice(1:63) -> pci2018_63
pci2018_mini
# Rename for all columns:
names(pci2018_63) <- c("id", "rank", "pci_score")
# Convert to Latin name:
%>% mutate(id = stringi::stri_trans_general(id, "Latin-ASCII")) -> pci2018_63
pci2018_63
# Harmonize province names:
%>%
df_central_point mutate(id = case_when(
== "TP. Ho Chi Minh" ~ "Ho Chi Minh",
id == "Thua Thien - Hue" ~ "Thua Thien Hue",
id TRUE ~ id)) -> df_central_point
%>%
pci2018_63 mutate(id = case_when(id == "BRVT" ~ "Ba Ria - Vung Tau",
== "TP.HCM" ~ "Ho Chi Minh",
id == "TT-Hue" ~ "Thua Thien Hue",
id TRUE ~ id)) -> pci2018_63
%>% full_join(pci2018_63, by = "id") -> df_central_point
df_central_point
# https://github.com/davidsjoberg/tidytuesday/blob/master/2020w17/2020w17_skript.R
library(stringr)
%>%
df_central_point arrange(rank) %>%
mutate(x_end_dum = 118, y_end_dum = seq(24, 8, length.out = 63)) -> df_bum
%>%
df_bum mutate(rank_full = case_when(rank < 10 ~ str_c("0", rank), TRUE ~ as.character(rank))) %>%
mutate(prov_rank = paste(rank_full, id)) %>%
mutate(pci_text = as.character(pci_score %>% round(1))) %>%
mutate(pci_text = case_when(str_detect(pci_text, "\\.") ~ pci_text, TRUE ~ str_c(pci_text, ".0"))) -> df_bum
%>%
df_bum mutate(label3 = str_c(id, pci_text, sep = " - ")) -> df_bum
library(showtext) # For using new font.
# Select font for our map:
<- "Roboto Condensed"
my_font
font_add_google(name = my_font, family = my_font)
showtext_auto()
#====================
# Version 1
#====================
library(ggbump)
ggplot() +
geom_path(data = vn_province_df, aes(x = long, y = lat, group = group), color = "grey30", size = 0.1) +
geom_point(data = df_central_point, aes(x = X, y = Y, color = pci_score), size = 1) +
geom_sigmoid(data = df_bum,
aes(x = X,
y = Y,
xend = x_end_dum,
yend = y_end_dum,
color = pci_score,
group = id),
alpha = .6, smooth = 10, size = 0) +
geom_text(data = df_bum, aes(x = x_end_dum, y = y_end_dum, label = prov_rank, color = pci_score), size = 2, vjust = -0.5, hjust = 0, family = my_font) +
geom_text(data = df_bum, aes(y = y_end_dum, x = x_end_dum + pci_score / 20, label = pci_text, color = pci_score), size = 3, hjust = -0.1, family = my_font) +
geom_segment(data = df_bum, aes(x = x_end_dum, y = y_end_dum, xend = x_end_dum + pci_score / 20, yend = y_end_dum, color = pci_score), size = 1) +
scale_x_continuous(limits = c(102, 122)) +
scale_fill_viridis_c() +
scale_color_viridis_c() +
theme_void() +
theme(text = element_text(family = my_font)) +
labs(title = "PCI 2018 Score by Province (Version 1)",
subtitle = "The provincial competitiveness index (PCI) ranks Vietnam's 63 provinces\nbased on economic governance areas that affect the private sector development.\nThe PCI measures the following ten sub-indices on a scale from 0 (minimum) to 100 (maximum).",
caption = "Source: https://pcivietnam.vn") +
theme(plot.margin = margin(.5, 1, .5, .5, "cm"),
legend.position = "none",
plot.background = element_rect(fill = "black"),
plot.caption = element_text(color = "gray60"),
plot.title = element_text(color = "gray90", size = 16),
plot.subtitle = element_text(color = "gray70", size = 10))
#====================
# Version 2
#====================
ggplot() +
geom_path(data = vn_province_df, aes(x = long, y = lat, group = group), color = "grey30", size = 0.1) +
geom_point(data = df_central_point, aes(x = X, y = Y, color = pci_score), size = 1) +
geom_sigmoid(data = df_bum,
aes(x = X,
y = Y,
xend = x_end_dum,
yend = y_end_dum,
color = pci_score,
group = id),
alpha = .6, smooth = 10, size = 0) +
geom_text(data = df_bum, aes(x = x_end_dum, y = y_end_dum, label = prov_rank, color = pci_score), size = 3, vjust = 0.2, hjust = 0, family = my_font) +
geom_text(data = df_bum, aes(y = y_end_dum, x = x_end_dum + 2.5 + pci_score / 20, label = pci_text, color = pci_score), size = 3, hjust = -0.1, family = my_font) +
geom_segment(data = df_bum, aes(x = x_end_dum + 2.5, y = y_end_dum, xend = x_end_dum + 2.5 + pci_score / 20, yend = y_end_dum, color = pci_score), size = 1) +
scale_x_continuous(limits = c(102, 124.5)) +
scale_fill_viridis_c() +
scale_color_viridis_c() +
theme_void() +
theme(text = element_text(family = my_font)) +
labs(title = "PCI 2018 Score by Province (Version 2)",
subtitle = "The provincial competitiveness index (PCI) ranks Vietnam's 63 provinces\nbased on economic governance areas that affect the private sector development.\nThe PCI measures the following ten sub-indices on a scale from 0 (minimum) to 100 (maximum).",
caption = "Source: https://pcivietnam.vn") +
theme(plot.margin = margin(0.3, 0.3, 0.3, 0.5, "cm"),
legend.position = "none",
plot.background = element_rect(fill = "black"),
plot.caption = element_text(color = "gray60"),
plot.title = element_text(color = "gray90", size = 16),
plot.subtitle = element_text(color = "gray70", size = 10))
#====================
# Version 3
#====================
ggplot() +
geom_path(data = vn_province_df, aes(x = long, y = lat, group = group), color = "grey30", size = 0.1) +
geom_point(data = df_central_point, aes(x = X, y = Y, color = pci_score), size = 1) +
geom_sigmoid(data = df_bum,
aes(x = X,
y = Y,
xend = x_end_dum,
yend = y_end_dum,
color = pci_score,
group = id),
alpha = .6, smooth = 10, size = 0) +
geom_text(data = df_bum, aes(x = x_end_dum, y = y_end_dum, label = rank_full, color = pci_score), size = 3, vjust = 0, hjust = 0, family = my_font) +
geom_text(data = df_bum, aes(y = y_end_dum, x = x_end_dum + 0.3 + pci_score / 20, label = label3, color = pci_score), size = 2.7, hjust = -0.1, family = my_font) +
geom_segment(data = df_bum, aes(x = x_end_dum + 0.4, y = y_end_dum, xend = x_end_dum + 0.4 + pci_score / 20, yend = y_end_dum, color = pci_score), size = 1) +
scale_x_continuous(limits = c(102, 126)) +
scale_fill_viridis_c() +
scale_color_viridis_c() +
theme_void() +
theme(text = element_text(family = my_font)) +
labs(title = "PCI 2018 Score by Province (Version 3)",
subtitle = "The provincial competitiveness index (PCI) ranks Vietnam's 63 provinces\nbased on economic governance areas that affect the private sector development.\nThe PCI measures the following ten sub-indices on a scale from 0 (minimum) to 100 (maximum).",
caption = "Source: https://pcivietnam.vn") +
theme(plot.margin = margin(.5, 1, .5, .5, "cm"),
legend.position = "none",
plot.background = element_rect(fill = "black"),
plot.caption = element_text(color = "gray60"),
plot.title = element_text(color = "gray90", size = 16),
plot.subtitle = element_text(color = "gray70", size = 10))