Motivations

The original chart by The Economist can be replicated as follows:
R Codes

R codes for replicating the chart:
#===========================================
# A Short Introduction to wbstats package
#===========================================

# Clear R: 

rm(list = ls())

# Load wbstats package: 

library(wbstats)

# General information in list structure: 

general_information <-  wb_cachelist

# Show some basic information: 
str(general_information, max.level = 1)


#---------------------------------------------------------------------
# Extract data frame that contains general information for countries
#---------------------------------------------------------------------

df_countries <- general_information[[1]]

# Some insights, for example, income group: 

library(tidyverse)

df_countries %>% 
  group_by(region) %>% 
  count() %>% 
  ungroup()

#------------------------------
#  Indicators provided by WB
#------------------------------

df_indicators <- general_information[[2]]

# Number of indicators: 

dim(df_indicators)

# Description for indicators: 

df_indicators %>% 
  filter(str_detect(indicator, "Poverty")) %>% 
  head() %>% 
  View()

# A list of indicators: 

my_indicator <- c("SP.POP.TOTL", "SP.DYN.LE00.IN", "NY.GDP.PCAP.PP.CD")

df_indicators %>% 
  filter(indicator_id %in% my_indicator) %>% 
  select(1:2)

# Collect a given indicator for all nations: 

educ_data <- wb_data(country = "all",
                     indicator = "PRJ.POP.2024.3.MF",
                     start_date = 2000,
                     end_date = 2018)

# Collect a given indicator for some nations: 

educ_data_3nations <- wb_data(country = c("AFG", "AGO", "VNM"),
                              indicator = "SH.TBS.MORT",
                              start_date = 2016,
                              end_date = 2017)

# Collect some indicators for some nations: 

educ_tuberculosis <- wb_data(country = c("AFG", "AGO", "VNM"),
                             indicator = c("SH.TBS.MORT", "PRJ.POP.2024.3.MF"), 
                             start_date = 2016,
                             end_date = 2017) # Example 1. 


#===========================================================
#  A Case Study: Replicate chart created by The Economist
#  with data collected from World Bank by using wbstats
#===========================================================

#---------------------------------------
# Collect and prepare data for ploting 
#---------------------------------------

mydf <- wb_data(country = "all",
                indicator = my_indicator, 
                start_date = 2020,
                end_date = 2020) 

# Rename for some columns: 

mydf %>% 
  filter(!is.na(iso2c)) %>% 
  na.omit() %>% 
  rename(pop = SP.POP.TOTL, life = SP.DYN.LE00.IN, gdp = NY.GDP.PCAP.PP.CD) %>% 
  select(-c(1, 2)) -> mydf_small_wide

df_countries %>% 
  filter(!is.na(capital_city)) %>% 
  select(country, region, income_level) -> country_info

full_join(mydf_small_wide, country_info, by = "country") -> mydf_small_wide

mydf_small_wide %>% 
  na.omit() %>% 
  mutate(gdp = log10(gdp)) %>% 
  mutate_at(.vars = c("gdp", "life"), .funs = function(x) {(x - min(x)) / (max(x) - min(x))}) -> df_for_ploting

df_for_ploting %>% 
  filter(life != 1, gdp != 1,  gdp != 0, life != 0) -> df_for_ploting

df_for_ploting %>% 
  slice(1:2) %>% 
  mutate(gdp = 0:1, life = 0:1) -> df_bgr1

df_for_ploting %>% 
  slice(1:2) %>% 
  mutate(gdp = 0:1, life = c(1, 1)) -> df_bgr2

df_for_ploting %>% 
  slice(1:2) %>% 
  mutate(gdp = c(0, 0), life = c(1, 1)) -> df_abline 

#-----------------------------------------------------------------------------
# Replicate plot created by The Economist
# Ref: https://www.economist.com/briefing/2016/03/26/too-much-of-a-good-thing
#-----------------------------------------------------------------------------

library(ggsci)

library(showtext) # Package for using extra fonts. 

my_font <- "Roboto Condensed" 

# Load font for ploting: 

font_add_google(name = my_font, family = my_font) 

anno_font <- "Oswald"

font_add_google(name = anno_font, family = anno_font) 

showtext_auto() # Automatically render text. 

p_title <- "A widespread effect"

p_subtitle <- "Top four firms' share of total industry revenue, %\nUnited States, 893 industries, grouped by region"

p_caption <- "Source: US Census Bureau | Graphic Designer: Nguyen Chi Dung"

legend_y <- "Top four firms' share of total industry revenue, 2000, %"

legend_x <- "Top four firms' share of total industry revenue, 1997, %"

bgr_color <- "#d9e9f0"

ggplot() + 
  theme_minimal() + 
  theme(plot.background = element_rect(fill = bgr_color, color = NA)) + 
  geom_area(data = df_bgr2, aes(x = gdp, y = life), fill = "#008a84", alpha = 0.08) + 
  geom_area(data = df_bgr1, aes(x = gdp, y = life), fill = bgr_color, color = "grey40", linetype = "longdash", size = 0.6) + 
  geom_hline(yintercept = seq(0, 1, 0.2), color = "white", size = 0.8) +  
  geom_area(data = df_bgr1, aes(x = gdp, y = life), fill = bgr_color, color = "grey40", linetype = "longdash", size = 0.6, alpha = 0) + 
  geom_point(data = df_for_ploting, aes(x = gdp, y = life, size = pop, color = region, fill = region), alpha = 0.5) + 
  geom_point(shape = 21, alpha = 0.45, stroke = 0) + 
  scale_size(range = c(1, 20)) + 
  guides(size = FALSE)  + 
  scale_color_jama(name = "Region") + 
  scale_fill_jama(name = "Region") + 
  theme(legend.position = c(0.857, 0.27)) + 
  theme(panel.grid = element_blank()) + 
  scale_y_continuous(expand = c(0, 0), breaks = seq(0, 1, 0.2), labels = seq(0, 100, 20), limits = c(-0.015, 1.05)) + 
  scale_x_continuous(expand = c(0.001, 0), breaks = seq(0, 1, 0.1), labels = seq(0,100, 10)) + 
  theme(plot.margin = unit(c(0.5, 1, 0.5, 0.8), "cm")) + 
  labs(title = p_title, subtitle = p_subtitle, caption = p_caption, x = legend_x, y = legend_y) + 
  geom_hline(yintercept = 0, color = "grey30", size = 0.71) + 
  geom_segment(aes(x = seq(0, 1, 0.1), xend = seq(0, 1, 0.1), y = 0, yend = -0.015), size = 1, color = "grey30") + 
  theme(axis.title = element_text(family = my_font, color = "grey20", size = 11, face = "italic")) + 
  theme(axis.text = element_text(family = my_font, color = "grey20", size = 11)) + 
  theme(plot.title = element_text(family = my_font, size = 17, hjust = 0, face = "bold")) + 
  theme(plot.subtitle = element_text(family = my_font, size = 11.6, color = "grey20")) + 
  theme(plot.caption = element_text(family = my_font, color = "grey40", hjust = 0, size = 11, vjust = -1)) + 
  theme(plot.title.position = "plot") +  
  theme(plot.caption.position = "plot") + 
  # Adjust legend: 
  theme(legend.title = element_text(color = "grey20", family = my_font, size = 10.5, face = "bold", hjust = 0.05)) +
  theme(legend.text = element_text(family = my_font, size = 10.2, color = "grey20", hjust = 0)) + 
  guides(color = guide_legend(override.aes = list(size = 4.5))) + 
  theme(legend.background = element_rect(fill = "white", color = NA)) + 
  annotate("text", x = 0.1, y = 0.9, label = "BECOMING MORE\nCONCENTRATED", 
           hjust = 0, color = "#008a84", alpha = 0.9, family = anno_font, size = 3.5) + 
  annotate("text", x = 0.5, y = 0.1, label = "BECOMING LESS\nCONCENTRATED", 
           hjust = 0, color = "#008a84", alpha = 0.9, family = anno_font, size = 3.5)


# Make Financial Times icon: 

library(grid)

red_icon <- "#ed1c24"

grid.rect(x = 0, y = 1, width = 0.018, height = 0.008*8, just = c("left", "top"), gp = gpar(fill = red_icon, col = red_icon))
R for Data Visualization: Recreate Bubble Chart by The Economist

Daily Graph Series

Author: Nguyen Chi Dung

Motivations

R Codes