Collect data from World Bank and make a Bubble Plot

#=======================================================================================================
# Reference: https://cran.r-project.org/web/packages/wbstats/vignettes/Using_the_wbstats_package.html
# https://www.r-bloggers.com/life-expectancy-animated/
#=======================================================================================================
# Load wbstats package:
library(wbstats)
# General information in list structure:
general_information <- wb_cachelist
# Show some basic information:
str(general_information, max.level = 1)
#---------------------------------------------------------------------
# Extract data frame that contains general information for countries
#---------------------------------------------------------------------
df_countries <- general_information[[1]]
# Some insights, for example, income group:
library(tidyverse)
df_countries %>%
group_by(income) %>%
count() %>%
ungroup()
#------------------------------
# Indicators provided by WB
#------------------------------
df_indicators <- general_information[[2]]
# Number of indicators:
dim(df_indicators)
# Description for indicators:
df_indicators %>%
filter(str_detect(indicator, "gross domestic")) %>%
head() %>%
View()
df_indicators %>%
filter(str_detect(indicator, "capital")) %>%
head() %>%
View()
df_indicators %>%
filter(str_detect(indicator, "death")) %>%
head() %>%
View()
df_indicators %>%
filter(str_detect(indicator, "tax")) %>%
head() %>%
View()
df_indicators %>%
filter(str_detect(indicator, "Population")) %>%
head() %>%
View()
# A list of indicators:
my_indicator <- c("SP.POP.TOTL", "SP.DYN.LE00.IN", "NY.GDP.PCAP.PP.CD")
df_indicators %>%
filter(indicatorID %in% my_indicator) %>%
select(1:2)
# Collect data for an indicator for all nations:
educ_data <- wb(country = "all",
indicator = "PRJ.POP.2024.3.MF",
startdate = 2000,
enddate = 2018)
# Collect data, an indicator for some nations:
educ_data_3nations <- wb(country = c("AFG", "AGO", "VNM"),
indicator = "SH.TBS.MORT",
startdate = 2016,
enddate = 2017)
# Collect some indicators for some nations:
educ_tuberculosis <- wb(country = c("AFG", "AGO", "VNM"),
indicator = c("SH.TBS.MORT", "PRJ.POP.2024.3.MF"),
startdate = 2016,
enddate = 2017) # Example 1.
mydf <- wb(country = "all",
indicator = my_indicator,
startdate = 2016,
enddate = 2017) # Example 2.
# Rename some columns and filter data for year of 2016:
mydf_small <- mydf %>%
filter(date == 2016) %>%
select(country, iso2c, indicatorID, value)
# Convert to wide form and rename for some columns:
mydf_small %>%
ungroup() %>%
spread(key = "indicatorID", value = "value") %>%
na.omit() %>%
rename(pop = SP.POP.TOTL, life = SP.DYN.LE00.IN, gdp = NY.GDP.PCAP.PP.CD) -> mydf_small_wide
# Filter our data (remove Aggregates label):
income_group <- df_countries %>%
mutate_if(is.factor, as.character) %>%
filter(income != "Aggregates") %>%
select(iso2c, region, income)
# Meger data sets and remove missing points:
total_df <- right_join(mydf_small_wide, income_group, by = "iso2c") %>% na.omit()
#-------------------------------------------------------------------------------------------------------------
# Page 38 from A. Deaton's book (https://www.amazon.com/gp/product/0691165629/ref=dbs_a_def_rwt_bibl_vppi_i0)
#-------------------------------------------------------------------------------------------------------------
library(ggrepel)
library(scales)
library(ggsci)
library(extrafont)
my_font <- "Roboto Condensed"
my_country <- c("Vietnam", "China", "India", "Thailand", "Malaysia", "France", "Cambodia",
"Germany", "Japan", "Nigeria", "Indonesia", "Singapore", "Philippines") # Select some nations.
total_df %>%
filter(gdp < 100000) %>%
ggplot(aes(gdp, life, size = pop, color = income)) +
geom_point(alpha = 0.5) +
geom_smooth(method = "lm", formula = y ~ log(x), color = "orange", alpha = 0.1, se = FALSE) +
geom_text_repel(data = total_df %>% filter(country %in% my_country), aes(label = country), color = "gray20", size = 5, force = 19, family = my_font) +
scale_x_continuous(breaks = seq(0, 100000, 10000), labels = dollar) +
scale_y_continuous(breaks = seq(50, 85, 5)) +
scale_size(range = c(1, 30)) +
scale_color_lancet(name = "Group:") +
guides(size = FALSE) +
labs(x = "GDP per capital",
y = "Life expectancy",
title = "The relationship between Life Expectancy and GDP per capital in 2016",
subtitle = "According to WHO definitions, Life Expectancy at reflects the overall mortality level of a population and it is defined as\nthe average number of years that a newborn is expected to live if current mortality rates continue to apply. ",
caption = "Data Source: The World Bank") +
theme_minimal() +
theme(legend.position = c(0.83, 0.30)) +
theme(legend.title = element_text(size = 12, face = "bold", family = my_font)) +
theme(plot.margin = unit(c(1, 1, 1, 1), "cm")) +
theme(plot.title = element_text(family = my_font, size = 26, color = "gray10")) +
theme(plot.subtitle = element_text(family = my_font, size = 15, color = "gray40")) +
theme(plot.caption = element_text(family = my_font, size = 12, color = "gray40", face = "italic")) +
theme(axis.text = element_text(family = my_font, size = 14, color = "gray30")) +
theme(legend.text = element_text(family = my_font, size = 12, color = "grey30")) +
theme(legend.title = element_text(family = my_font, size = 12, color = "grey30")) +
theme(axis.title = element_text(family = my_font, size = 15)) +
# theme(axis.title.x = element_text(family = my_font, size = 15, hjust = 0)) +
# theme(axis.title.y = element_text(family = my_font, size = 15, hjust = 1)) +
theme(panel.grid.minor = element_blank()) +
theme(plot.background = element_rect(fill = "seashell", color = NA))
