Motivations

Báo cáo The 2018 Atlas of Sustainable Development Goals (gọi tắt là SDG 2018) của World Bank có hơn 180 charts và maps được thực hiện gần như hoàn toàn bằng R, ngoại trừ một số bản đồ phức tạp được vẽ bằng ArcGIS.
Dưới đây là một kiểu plot thường xuyên xuất hiện trong báo cáo này:
R Codes

R codes cho plot ở trên:
# Clear R environment: 
rm(list = ls())

# Load some R packages: 
library(tidyverse)
library(readxl)

# Load data (download from https://github.com/worldbank/sdgatlas2018/blob/master/inputs/sdg1/registering_property.xlsx): 

geographic <- read_excel("registering_property.xlsx", sheet = "geographic_coverage") 

infrastructure <- read_excel("registering_property.xlsx", sheet = "reliability_infrastructure")

# Join data: 

full_join(geographic, infrastructure, by = "Economy") -> df_total

# Select and rename for some columns: 

df_total %>% 
  select(
    region = Economy, 
    immovable = "Are all privately held land plots in the largest business city formally registered at the immovable property registry?",
    mapped = "Are all privately held land plots in the largest business city mapped?",
    format = "In what format are the majority of maps of land plots kept in the largest business city—in a paper format or in a computerized format (scanned or fully digital)?",
    database = "Is the information recorded by the immovable property registration agency and the cadastral or mapping agency kept in a single database, in different but linked databases or in separate databases?"
  ) %>%
  mutate(format = case_when(format == "Computer/Fully digital" ~ "Yes", TRUE ~ "No")) %>% 
  mutate(database = case_when(database == "Separate databases" ~ "No", TRUE ~ "Yes")) %>% 
  gather(indicatorID, value, c(immovable, mapped, format, database)) %>%
  mutate(coded = case_when(value == "Yes" ~ 1, TRUE ~ 0)) %>% 
  group_by(region) %>%
  summarize(total = sum(coded)) -> df_for_mapping1

# Prepare data for ploting: 

df_for_mapping1 %>% 
  mutate(region = case_when(str_detect(region, "Antigua") ~ "Antigua", 
                            str_detect(region, "Bahamas") ~ "Bahamas", 
                            str_detect(region, "Bangladesh") ~ "Bangladesh", 
                            str_detect(region, "Brazil") ~ "Brazil", 
                            str_detect(region, "Brunei") ~ "Brunei", 
                            str_detect(region, "Verde") ~ "Cape Verde", 
                            str_detect(region, "^China") ~ "China", 
                            str_detect(region, "Taiwan") ~ "Taiwan", 
                            str_detect(region, "Congo") ~ "Democratic Republic of the Congo", 
                            str_detect(region, "Ivoire") ~ "Ivory Coast", 
                            str_detect(region, "Egypt") ~ "Egypt", 
                            str_detect(region, "Gambia") ~ "Gambia", 
                            str_detect(region, "India") ~ "India", 
                            str_detect(region, "Indonesia") ~ "Indonesia", 
                            str_detect(region, "Iran") ~ "Iran", 
                            str_detect(region, "Japan") ~ "Japan", 
                            str_detect(region, "Korea") ~ "South Korea", 
                            str_detect(region, "Kyrgyz") ~ "Kyrgyzstan", 
                            str_detect(region, "Lao") ~ "Laos", 
                            str_detect(region, "Macedonia") ~ "North Macedonia", 
                            str_detect(region, "Mexico") ~ "Mexico", 
                            str_detect(region, "Micronesia") ~ "Micronesia", 
                            str_detect(region, "Nigeria") ~ "Nigeria", 
                            str_detect(region, "Pakistan") ~ "Pakistan", 
                            str_detect(region, "Puerto Rico") ~ "Puerto Rico", 
                            str_detect(region, "Russian") ~ "Russia", 
                            str_detect(region, "Tome") ~ "Sao Tome and Principe", 
                            str_detect(region, "Slovak Republic") ~ "Slovakia", 
                            str_detect(region, "Nevis") ~ "Nevis", 
                            str_detect(region, "Lucia") ~ "Saint Lucia", 
                            str_detect(region, "Vincent") ~ "Saint Vincent", 
                            str_detect(region, "Syrian") ~ "Syria", 
                            str_detect(region, "Taiwan") ~ "Taiwan", 
                            str_detect(region, "Trinidad") ~ "Trinidad", 
                            str_detect(region, "United Kingdom") ~ "UK", 
                            str_detect(region, "United States") ~ "USA", 
                            str_detect(region, "Venezuela") ~ "Venezuela", 
                            str_detect(region, "Yemen") ~ "Yemen", 
                            TRUE ~ region)) -> df_for_mapping1

df_for_mapping1 %>% 
  group_by(region) %>% 
  slice(which.max(total)) %>% 
  ungroup() -> df_for_mapping1

# Load geo-spatial data: 
world <- map_data("world")

# Join the two datasets: 

full_join(world, df_for_mapping1, by = "region") -> df_indicator

df_indicator %>% 
  mutate(total = as.character(total)) %>% 
  mutate(total = case_when(is.na(total) ~ "No data", TRUE ~ total)) %>% 
  mutate(total = factor(total, levels = c("0", "1", "2", "3", "4", "No data"))) -> df_indicator

#----------------------------
#      Data Visualization
#----------------------------

pink1 <- "#cc1e41"

pink2 <- "#da4e69"

pink3 <- "#e37895"

pink4 <- "#efa5b9"

pink5 <- "#f9d2db"

grey <- "#cccccc"

library(showtext) # -> Package for using extra fonts. 

showtext_auto() # ->  Automatically render text. 

my_font <- "Outfit" # -> Set Outfit font for plot.  

font_add_google(name = my_font, family = my_font) # -> Load font

df_indicator %>% 
  filter(lat >= -55) %>%
  ggplot(aes(long, lat, group = group, fill = total)) + 
  geom_polygon(color = "white", size = 0.01) + 
  scale_fill_manual(values = c('0' = pink5, 
                               '1' = pink4, 
                               '2' = pink3, 
                               '3' = pink2, 
                               '4' = pink1, 
                               'No data' = grey)) + 
  coord_cartesian() +
  labs(title = "Land rights provide security of tenure and are important for reducing poverty. But\nmany countries lack comprehensive land registries that record ownership.", 
       subtitle = "Number of components related to property registration from Doing Business Index (0-4, higher is better)", 
       caption = "Source: World Bank Doing Business (database). http://www.doingbusiness.org\nGraphic Creator: Nguyen Chi Dung") + 
  theme_minimal() + 
  theme(panel.grid = element_blank()) + 
  theme(axis.title = element_blank()) + 
  theme(axis.text = element_blank()) + 
  theme(legend.title = element_blank()) +
  theme(legend.position = "top", legend.direction = "horizontal") + 
  guides(fill = guide_legend(nrow = 1)) + 
  theme(legend.key.height = unit(0.5, "cm")) + 
  theme(legend.key.width = unit(1.2, "cm")) + 
  theme(legend.text = element_text(size = 11, family = my_font, color = "grey20")) + 
  theme(plot.title = element_text(size = 18, family = my_font)) +  
  theme(plot.subtitle = element_text(size = 13, color = "grey30", family = my_font)) + 
  theme(plot.caption = element_text(size = 10, color = "grey30", family = my_font, hjust = 0)) + 
  theme(plot.margin = unit(rep(0.4, 4), "cm"))
Data Visualization using R: The 2018 Atlas of Sustainable Development Goals

Daily Graph Series

Author: Nguyen Chi Dung

Motivations

R Codes