R codes for replicating the chart:
#===========================================
# A Short Introduction to wbstats package
#===========================================
# Clear R:
rm(list = ls())
# Load wbstats package:
library(wbstats)
# General information in list structure:
<- wb_cachelist
general_information
# Show some basic information:
str(general_information, max.level = 1)
#---------------------------------------------------------------------
# Extract data frame that contains general information for countries
#---------------------------------------------------------------------
<- general_information[[1]]
df_countries
# Some insights, for example, income group:
library(tidyverse)
%>%
df_countries group_by(region) %>%
count() %>%
ungroup()
#------------------------------
# Indicators provided by WB
#------------------------------
<- general_information[[2]]
df_indicators
# Number of indicators:
dim(df_indicators)
# Description for indicators:
%>%
df_indicators filter(str_detect(indicator, "Poverty")) %>%
head() %>%
View()
# A list of indicators:
<- c("SP.POP.TOTL", "SP.DYN.LE00.IN", "NY.GDP.PCAP.PP.CD")
my_indicator
%>%
df_indicators filter(indicator_id %in% my_indicator) %>%
select(1:2)
# Collect a given indicator for all nations:
<- wb_data(country = "all",
educ_data indicator = "PRJ.POP.2024.3.MF",
start_date = 2000,
end_date = 2018)
# Collect a given indicator for some nations:
<- wb_data(country = c("AFG", "AGO", "VNM"),
educ_data_3nations indicator = "SH.TBS.MORT",
start_date = 2016,
end_date = 2017)
# Collect some indicators for some nations:
<- wb_data(country = c("AFG", "AGO", "VNM"),
educ_tuberculosis indicator = c("SH.TBS.MORT", "PRJ.POP.2024.3.MF"),
start_date = 2016,
end_date = 2017) # Example 1.
#===========================================================
# A Case Study: Replicate chart created by The Economist
# with data collected from World Bank by using wbstats
#===========================================================
#---------------------------------------
# Collect and prepare data for ploting
#---------------------------------------
<- wb_data(country = "all",
mydf indicator = my_indicator,
start_date = 2020,
end_date = 2020)
# Rename for some columns:
%>%
mydf filter(!is.na(iso2c)) %>%
na.omit() %>%
rename(pop = SP.POP.TOTL, life = SP.DYN.LE00.IN, gdp = NY.GDP.PCAP.PP.CD) %>%
select(-c(1, 2)) -> mydf_small_wide
%>%
df_countries filter(!is.na(capital_city)) %>%
select(country, region, income_level) -> country_info
full_join(mydf_small_wide, country_info, by = "country") -> mydf_small_wide
%>%
mydf_small_wide na.omit() %>%
mutate(gdp = log10(gdp)) %>%
mutate_at(.vars = c("gdp", "life"), .funs = function(x) {(x - min(x)) / (max(x) - min(x))}) -> df_for_ploting
%>%
df_for_ploting filter(life != 1, gdp != 1, gdp != 0, life != 0) -> df_for_ploting
%>%
df_for_ploting slice(1:2) %>%
mutate(gdp = 0:1, life = 0:1) -> df_bgr1
%>%
df_for_ploting slice(1:2) %>%
mutate(gdp = 0:1, life = c(1, 1)) -> df_bgr2
%>%
df_for_ploting slice(1:2) %>%
mutate(gdp = c(0, 0), life = c(1, 1)) -> df_abline
#-----------------------------------------------------------------------------
# Replicate plot created by The Economist
# Ref: https://www.economist.com/briefing/2016/03/26/too-much-of-a-good-thing
#-----------------------------------------------------------------------------
library(ggsci)
library(showtext) # Package for using extra fonts.
<- "Roboto Condensed"
my_font
# Load font for ploting:
font_add_google(name = my_font, family = my_font)
<- "Oswald"
anno_font
font_add_google(name = anno_font, family = anno_font)
showtext_auto() # Automatically render text.
<- "A widespread effect"
p_title
<- "Top four firms' share of total industry revenue, %\nUnited States, 893 industries, grouped by region"
p_subtitle
<- "Source: US Census Bureau | Graphic Designer: Nguyen Chi Dung"
p_caption
<- "Top four firms' share of total industry revenue, 2000, %"
legend_y
<- "Top four firms' share of total industry revenue, 1997, %"
legend_x
<- "#d9e9f0"
bgr_color
ggplot() +
theme_minimal() +
theme(plot.background = element_rect(fill = bgr_color, color = NA)) +
geom_area(data = df_bgr2, aes(x = gdp, y = life), fill = "#008a84", alpha = 0.08) +
geom_area(data = df_bgr1, aes(x = gdp, y = life), fill = bgr_color, color = "grey40", linetype = "longdash", size = 0.6) +
geom_hline(yintercept = seq(0, 1, 0.2), color = "white", size = 0.8) +
geom_area(data = df_bgr1, aes(x = gdp, y = life), fill = bgr_color, color = "grey40", linetype = "longdash", size = 0.6, alpha = 0) +
geom_point(data = df_for_ploting, aes(x = gdp, y = life, size = pop, color = region, fill = region), alpha = 0.5) +
geom_point(shape = 21, alpha = 0.45, stroke = 0) +
scale_size(range = c(1, 20)) +
guides(size = FALSE) +
scale_color_jama(name = "Region") +
scale_fill_jama(name = "Region") +
theme(legend.position = c(0.857, 0.27)) +
theme(panel.grid = element_blank()) +
scale_y_continuous(expand = c(0, 0), breaks = seq(0, 1, 0.2), labels = seq(0, 100, 20), limits = c(-0.015, 1.05)) +
scale_x_continuous(expand = c(0.001, 0), breaks = seq(0, 1, 0.1), labels = seq(0,100, 10)) +
theme(plot.margin = unit(c(0.5, 1, 0.5, 0.8), "cm")) +
labs(title = p_title, subtitle = p_subtitle, caption = p_caption, x = legend_x, y = legend_y) +
geom_hline(yintercept = 0, color = "grey30", size = 0.71) +
geom_segment(aes(x = seq(0, 1, 0.1), xend = seq(0, 1, 0.1), y = 0, yend = -0.015), size = 1, color = "grey30") +
theme(axis.title = element_text(family = my_font, color = "grey20", size = 11, face = "italic")) +
theme(axis.text = element_text(family = my_font, color = "grey20", size = 11)) +
theme(plot.title = element_text(family = my_font, size = 17, hjust = 0, face = "bold")) +
theme(plot.subtitle = element_text(family = my_font, size = 11.6, color = "grey20")) +
theme(plot.caption = element_text(family = my_font, color = "grey40", hjust = 0, size = 11, vjust = -1)) +
theme(plot.title.position = "plot") +
theme(plot.caption.position = "plot") +
# Adjust legend:
theme(legend.title = element_text(color = "grey20", family = my_font, size = 10.5, face = "bold", hjust = 0.05)) +
theme(legend.text = element_text(family = my_font, size = 10.2, color = "grey20", hjust = 0)) +
guides(color = guide_legend(override.aes = list(size = 4.5))) +
theme(legend.background = element_rect(fill = "white", color = NA)) +
annotate("text", x = 0.1, y = 0.9, label = "BECOMING MORE\nCONCENTRATED",
hjust = 0, color = "#008a84", alpha = 0.9, family = anno_font, size = 3.5) +
annotate("text", x = 0.5, y = 0.1, label = "BECOMING LESS\nCONCENTRATED",
hjust = 0, color = "#008a84", alpha = 0.9, family = anno_font, size = 3.5)
# Make Financial Times icon:
library(grid)
<- "#ed1c24"
red_icon
grid.rect(x = 0, y = 1, width = 0.018, height = 0.008*8, just = c("left", "top"), gp = gpar(fill = red_icon, col = red_icon))