Recreate VISUAL CAPITALIST chart: Global Tobacco Use by Country and Sex
# Clear R environment:
rm(list = ls())
# Setwd
setwd("D:/0 - My documents/TOOLS/R/The Economist/Nobel")
# Load data
library(rvest)
library(tidyverse)
# Specify the URL
url <- "https://www.visualcapitalist.com/tobacco-use-by-country/"
# Read the HTML content of the webpage -> Change to numeric -> Rename variables
webpage <- read_html(url)
tobaco <- html_table(html_nodes(webpage, "table"))[[1]]
# Convert to numeric
tobaco <- tobaco %>%
mutate_at(vars(2, 3, 4), as.numeric)
# Delete charaters before blank
tobaco$Country <- sub("^\\S+\\s+", '', tobaco$Country)
# Rename
library(janitor)
tobaco <- tobaco %>%
janitor::clean_names()
names(tobaco)library(ggtext)
text <- "**Naura**, **Indonesia**, and **Myanmar** have some of the highest rates of tobacco use in the world"
df_text <- data.frame(country = "Afghanistan", total_prevalence_percent = 49.5, male_percent = 50.3, female_percent = 48.7, text = text)
ggplot(tobaco, aes(x = female_percent, y = male_percent, size = total_prevalence_percent, color = total_prevalence_percent)) +
geom_point()+
# Adjust scale size and legend of scale
scale_size(
range = c(1, 5),
guide = guide_legend(
direction = "vertical",
nrow = 1,
label.position = "right"))+
# Adjust scale color
scale_color_gradientn(colours = hcl.colors(5, "Sunset", rev = TRUE))+
guides(color = guide_legend(title = ""))+
# Add title, sub, and caption
labs(size = "")+
# Adjust background
theme(plot.background = element_rect(fill = "#E2BFB3", color = "#E2BFB3")) +
theme(panel.background = element_rect(fill = "#E2BFB3", color = "#E2BFB3"))+
theme(panel.grid.major.x = element_blank())+
theme(panel.grid.major.y = element_blank())+
theme(panel.grid.minor.x = element_blank())+
theme(panel.grid.minor.y = element_blank())+
theme(axis.line = element_line(color = "grey30",
linewidth = 0.2,
arrow = arrow(angle = 30,
length = unit(0.07, "inches"),
ends = "last",
type = "closed"))) +
theme(axis.ticks = element_line(color = "grey30",
linewidth = 0.2))+
# Legend position
theme(legend.position = c(0.7,0.1))+
# Legend color
theme(legend.background = element_rect(fill = "#E2BFB3",
colour = "#E2BFB3",
linewidth = 0.2))+
theme(legend.key = element_rect(fill = "#E2BFB3"))+
# Scale x.y
scale_y_continuous(breaks = c(0, 20, 40, 60),
expand = c(0, 0),
limits = c(0, 80),
labels = c("", "20%","40%","60%"))+
scale_x_continuous(breaks = c(0, 20, 40),
expand = c(0, 0),
limits = c(0, 60),
labels = c("0%", "20%","40%"))+
# Adjust axis
labs(x = "Female", y = "Male",
title = "TOBACCO USE AROUND THE WORLD",
caption = "Source: World Health Organization")+
theme(axis.title = element_text(hjust = 1))+
# Adjust title, sub, caption
theme(plot.title = element_text(size = 43, color = "grey30", face = "bold"))+
theme(plot.caption = element_text(size = 20, color = "grey40", face = "italic"))+
theme(axis.text = element_text(size = 20, color = "grey40"))+
theme(axis.title = element_text(size = 25, color = "grey30"))+
theme(legend.text = element_text(size = 20, color = "grey40"))+
theme(plot.title.position = "plot")+
# Adjust plot margin
theme(plot.margin = unit(c(0.3, 0.5, 0.1, 0.5), "cm"))+
# Add text box
geom_text(label = "Indonesia",
x = 7,
y = 73.6,
size = 10,
hjust = 0,
color = "#B859A8")+
geom_text(label = "Myanmar",
x = 21.5,
y = 69.4,
size = 10,
hjust = 0,
color = "#9653A3")+
geom_text(label = "Nauru",
x = 50.7,
y = 50.3,
size = 10,
hjust = 0,
color = "#704D9E")+
geom_text(label = "Nigeria",
x = 2,
y = 5.4,
size = 7,
hjust = 0,
color = "#F5DC95")
ggsave("tobaco.png", width = 3.86, height = 4.18,dpi = 300,units = c("in"))