Motivations

Báo cáo The 2018 Atlas of Sustainable Development Goals (gọi tắt là SDG 2018) của World Bank (WB) có hơn 180 charts và maps được thực hiện gần như hoàn toàn bằng R, ngoại trừ một số bản đồ phức tạp được vẽ bằng ArcGIS.

Dưới đây là chart kí hiệu SDG 1.1 tại trang 2 của báo cáo này:

Kiểu chart này có tên gọi là Mosaic Chart.

R Codes

Dưới đây là R codes để tạo ra plot trên:

# Clear R environment: 

rm(list = ls())

# Load some packages: 
library(readxl)
library(tidyverse)
library(stringr)

# Select color for our plot: 

pink_color <- "#cc0641"

grey_color <- "#A6A6A6"

pink_box_color <- "#cb6a72"

library(showtext) # -> Package for using extra fonts. 

showtext_auto() # ->  Automatically render text. 

title_font <- "Ubuntu" # -> Set Work Sans for title.  

font_add_google(name = title_font, family = title_font) # -> Load font

subtitle_font <- "PT Sans"

font_add_google(name = subtitle_font, family = subtitle_font) # -> Load font

# Load main data: 

df <- read_excel("SDG1_f2_global_dist.xls")

years <- c(1990, 2013)

df %>%
  filter(year %in% years, regioncid != "WLD") %>%
  mutate(poverty = hc, not_poverty = (1 - hc)) %>%
  select(year, regioncid, population, poverty, not_poverty, iso3c = regioncid) %>%
  mutate(iso3c = fct_reorder2(iso3c, year == min(years), poverty)) %>% 
  arrange(year, iso3c) -> df

df <- df %>%
  group_by(year) %>%
  mutate(x_max = cumsum(population)) %>%
  mutate(x_min = x_max - population) %>%
  ungroup()

df <- df %>%
  gather(indicatorID, value, c(poverty, not_poverty)) %>%
  group_by(iso3c, year) %>%
  mutate(y_max = cumsum(value)) %>%
  mutate(y_min = y_max - value) %>%
  ungroup() %>% 
  mutate(indicatorID = factor(indicatorID, levels = c("poverty", "not_poverty"))) 

# Load ISO 3C data (from https://apps.who.int/gho/data/node.searo-metadata.WORLDBANKREGION?lang=en): 

read_csv("data-text.csv", col_names = FALSE) -> df_region_iso3c_codes

df_region_iso3c_codes %>% 
  select(iso3c = X2, region_name = X3) %>% 
  filter(str_detect(iso3c, "WB")) %>% 
  mutate(iso3c = str_replace_all(iso3c, "WB_", "")) -> df_region_iso3c_codes

df %>% left_join(df_region_iso3c_codes) -> df

df %>% 
  mutate(region_name = case_when(iso3c == "SAS" ~ "South Asia", 
                                 iso3c == "OHI" ~ "Other high income", 
                                 TRUE ~ region_name)) -> df

df %>% 
  ggplot(aes(xmin = x_min, xmax = x_max, ymin = y_min, ymax = y_max, fill = indicatorID)) +
  geom_rect(color = "white", size = 0.01) + 
  coord_flip() +
  facet_grid(~ year, scales = "free_x", space = "free_x") + 
  theme_minimal() + 
  theme(plot.margin = unit(c(0.7, 1, 1, 1), "cm")) -> draft_plot


p_title <- "The world's population has grown and the regional distribution of poverty has\nchanged. Compared with 1990, there are now more poor people in Sub-Saharan\nAfrica and fewer in South Asia and East Asia & Pacific."

p_caption <- "Note: Poor refers to people living on less than $1.90 a day (2011 PPP). Regional aggregates exclude certain high-income countries.\nSource: World Bank PovcalNet (database). http://iresearch.worldbank.org/PovcalNet/home.aspx\nGraphic Designer: Nguyen Chi Dung"

draft_plot + 
  geom_text(data = df %>% filter(indicatorID == "poverty"),
            aes(x = x_min + (x_max - x_min) / 2, y = y_max + 0.03, label = round(value*100, 1)),
            family = title_font, 
            color = "grey20", 
            hjust = 0.3) + 
  geom_text(data = df %>% filter(year == 2013, indicatorID != "poverty"),
            aes(x = x_min + (x_max - x_min) / 2, y = 0.52, label = region_name),
            hjust = 0, 
            family = subtitle_font, 
            color = "grey20", 
            size = 3.8) + 
  labs(y = "Share of population (%)", title = p_title, caption = p_caption) + 
  scale_y_continuous(labels = c("0","25","50","75","100"), expand = c(0, 0)) + 
  scale_x_continuous(expand = c(0, 0)) + 
  scale_fill_manual(values = c("poverty" = pink_color, "not_poverty" = "grey90"), labels = c("Poor", "Not Poor")) + 
  theme(axis.text.y = element_blank()) + 
  theme(axis.title.y = element_blank()) + 
  theme(axis.ticks = element_blank()) + 
  theme(panel.grid = element_blank()) + 
  theme(legend.position = c(0.23, 0.9)) + 
  theme(legend.direction = "horizontal") + 
  theme(legend.title = element_blank()) + 
  theme(panel.spacing.y = unit(0, "lines")) + 
  theme(plot.title = element_text(family = title_font, size = 15, hjust = 0, vjust = 2), plot.title.position = "plot") + 
  theme(plot.caption = element_text(family = subtitle_font, size = 10.5, color = "grey40", hjust = 0, vjust = -5), plot.caption.position = "plot") +  
  theme(axis.text = element_text(family = title_font, size = 11, color = "grey40")) + 
  theme(strip.text = element_text(family = title_font, size = 11, color = "grey30", hjust = -0.02, vjust = 2)) + 
  theme(axis.title.x = element_text(color = "grey30", family = title_font, size = 11, vjust = -2)) + 
  theme(legend.text = element_text(color = "grey30", family = title_font, size = 10)) + 
  theme(legend.key.size = unit(0.5, "cm")) + 
  theme(panel.spacing = unit(3, "lines"))