Báo cáo The 2018 Atlas of Sustainable Development Goals (gọi tắt là SDG 2018) của World Bank có hơn 180 charts và maps được thực hiện gần như hoàn toàn bằng R, ngoại trừ một số bản đồ phức tạp được vẽ bằng ArcGIS.
Dưới đây là một kiểu plot thường xuyên xuất hiện trong báo cáo này:
R codes cho plot ở trên:
# Clear R environment:
rm(list = ls())
# Load some R packages:
library(tidyverse)
library(readxl)
# Load data (download from https://github.com/worldbank/sdgatlas2018/blob/master/inputs/sdg1/registering_property.xlsx):
<- read_excel("registering_property.xlsx", sheet = "geographic_coverage")
geographic
<- read_excel("registering_property.xlsx", sheet = "reliability_infrastructure")
infrastructure
# Join data:
full_join(geographic, infrastructure, by = "Economy") -> df_total
# Select and rename for some columns:
%>%
df_total select(
region = Economy,
immovable = "Are all privately held land plots in the largest business city formally registered at the immovable property registry?",
mapped = "Are all privately held land plots in the largest business city mapped?",
format = "In what format are the majority of maps of land plots kept in the largest business city—in a paper format or in a computerized format (scanned or fully digital)?",
database = "Is the information recorded by the immovable property registration agency and the cadastral or mapping agency kept in a single database, in different but linked databases or in separate databases?"
%>%
) mutate(format = case_when(format == "Computer/Fully digital" ~ "Yes", TRUE ~ "No")) %>%
mutate(database = case_when(database == "Separate databases" ~ "No", TRUE ~ "Yes")) %>%
gather(indicatorID, value, c(immovable, mapped, format, database)) %>%
mutate(coded = case_when(value == "Yes" ~ 1, TRUE ~ 0)) %>%
group_by(region) %>%
summarize(total = sum(coded)) -> df_for_mapping1
# Prepare data for ploting:
%>%
df_for_mapping1 mutate(region = case_when(str_detect(region, "Antigua") ~ "Antigua",
str_detect(region, "Bahamas") ~ "Bahamas",
str_detect(region, "Bangladesh") ~ "Bangladesh",
str_detect(region, "Brazil") ~ "Brazil",
str_detect(region, "Brunei") ~ "Brunei",
str_detect(region, "Verde") ~ "Cape Verde",
str_detect(region, "^China") ~ "China",
str_detect(region, "Taiwan") ~ "Taiwan",
str_detect(region, "Congo") ~ "Democratic Republic of the Congo",
str_detect(region, "Ivoire") ~ "Ivory Coast",
str_detect(region, "Egypt") ~ "Egypt",
str_detect(region, "Gambia") ~ "Gambia",
str_detect(region, "India") ~ "India",
str_detect(region, "Indonesia") ~ "Indonesia",
str_detect(region, "Iran") ~ "Iran",
str_detect(region, "Japan") ~ "Japan",
str_detect(region, "Korea") ~ "South Korea",
str_detect(region, "Kyrgyz") ~ "Kyrgyzstan",
str_detect(region, "Lao") ~ "Laos",
str_detect(region, "Macedonia") ~ "North Macedonia",
str_detect(region, "Mexico") ~ "Mexico",
str_detect(region, "Micronesia") ~ "Micronesia",
str_detect(region, "Nigeria") ~ "Nigeria",
str_detect(region, "Pakistan") ~ "Pakistan",
str_detect(region, "Puerto Rico") ~ "Puerto Rico",
str_detect(region, "Russian") ~ "Russia",
str_detect(region, "Tome") ~ "Sao Tome and Principe",
str_detect(region, "Slovak Republic") ~ "Slovakia",
str_detect(region, "Nevis") ~ "Nevis",
str_detect(region, "Lucia") ~ "Saint Lucia",
str_detect(region, "Vincent") ~ "Saint Vincent",
str_detect(region, "Syrian") ~ "Syria",
str_detect(region, "Taiwan") ~ "Taiwan",
str_detect(region, "Trinidad") ~ "Trinidad",
str_detect(region, "United Kingdom") ~ "UK",
str_detect(region, "United States") ~ "USA",
str_detect(region, "Venezuela") ~ "Venezuela",
str_detect(region, "Yemen") ~ "Yemen",
TRUE ~ region)) -> df_for_mapping1
%>%
df_for_mapping1 group_by(region) %>%
slice(which.max(total)) %>%
ungroup() -> df_for_mapping1
# Load geo-spatial data:
<- map_data("world")
world
# Join the two datasets:
full_join(world, df_for_mapping1, by = "region") -> df_indicator
%>%
df_indicator mutate(total = as.character(total)) %>%
mutate(total = case_when(is.na(total) ~ "No data", TRUE ~ total)) %>%
mutate(total = factor(total, levels = c("0", "1", "2", "3", "4", "No data"))) -> df_indicator
#----------------------------
# Data Visualization
#----------------------------
<- "#cc1e41"
pink1
<- "#da4e69"
pink2
<- "#e37895"
pink3
<- "#efa5b9"
pink4
<- "#f9d2db"
pink5
<- "#cccccc"
grey
library(showtext) # -> Package for using extra fonts.
showtext_auto() # -> Automatically render text.
<- "Outfit" # -> Set Outfit font for plot.
my_font
font_add_google(name = my_font, family = my_font) # -> Load font
%>%
df_indicator filter(lat >= -55) %>%
ggplot(aes(long, lat, group = group, fill = total)) +
geom_polygon(color = "white", size = 0.01) +
scale_fill_manual(values = c('0' = pink5,
'1' = pink4,
'2' = pink3,
'3' = pink2,
'4' = pink1,
'No data' = grey)) +
coord_cartesian() +
labs(title = "Land rights provide security of tenure and are important for reducing poverty. But\nmany countries lack comprehensive land registries that record ownership.",
subtitle = "Number of components related to property registration from Doing Business Index (0-4, higher is better)",
caption = "Source: World Bank Doing Business (database). http://www.doingbusiness.org\nGraphic Creator: Nguyen Chi Dung") +
theme_minimal() +
theme(panel.grid = element_blank()) +
theme(axis.title = element_blank()) +
theme(axis.text = element_blank()) +
theme(legend.title = element_blank()) +
theme(legend.position = "top", legend.direction = "horizontal") +
guides(fill = guide_legend(nrow = 1)) +
theme(legend.key.height = unit(0.5, "cm")) +
theme(legend.key.width = unit(1.2, "cm")) +
theme(legend.text = element_text(size = 11, family = my_font, color = "grey20")) +
theme(plot.title = element_text(size = 18, family = my_font)) +
theme(plot.subtitle = element_text(size = 13, color = "grey30", family = my_font)) +
theme(plot.caption = element_text(size = 10, color = "grey30", family = my_font, hjust = 0)) +
theme(plot.margin = unit(rep(0.4, 4), "cm"))