Motivation

Recreate VISUAL CAPITALIST chart: Global Tobacco Use by Country and Sex

Data Processing

# Clear R environment: 
rm(list = ls())

# Setwd
setwd("D:/0 - My documents/TOOLS/R/The Economist/Nobel")

# Load data
library(rvest)
library(tidyverse)

# Specify the URL
url <- "https://www.visualcapitalist.com/tobacco-use-by-country/"

# Read the HTML content of the webpage -> Change to numeric -> Rename variables
webpage <- read_html(url)
tobaco <- html_table(html_nodes(webpage, "table"))[[1]]
  
  # Convert to numeric 
tobaco <- tobaco %>%
  mutate_at(vars(2, 3, 4), as.numeric)

  # Delete charaters before blank
tobaco$Country <- sub("^\\S+\\s+", '', tobaco$Country)

  # Rename
library(janitor)
tobaco <- tobaco %>%
  janitor::clean_names()

names(tobaco)

Visualization

library(ggtext)

text <- "**Naura**, **Indonesia**, and **Myanmar** have some of the highest rates of tobacco use in the world"
df_text <- data.frame(country = "Afghanistan", total_prevalence_percent = 49.5, male_percent = 50.3, female_percent = 48.7, text = text)

ggplot(tobaco, aes(x = female_percent, y = male_percent, size = total_prevalence_percent, color = total_prevalence_percent)) +
  geom_point()+
  # Adjust scale size and legend of scale
  scale_size(
    range = c(1, 5),
    guide = guide_legend(
      direction = "vertical",
      nrow = 1,
      label.position = "right"))+
  # Adjust scale color
  scale_color_gradientn(colours = hcl.colors(5, "Sunset", rev = TRUE))+
  guides(color = guide_legend(title = ""))+
  # Add title, sub, and caption
  labs(size = "")+
  # Adjust background
  theme(plot.background = element_rect(fill = "#E2BFB3", color = "#E2BFB3")) +
  theme(panel.background = element_rect(fill = "#E2BFB3", color = "#E2BFB3"))+
  theme(panel.grid.major.x = element_blank())+
  theme(panel.grid.major.y = element_blank())+
  theme(panel.grid.minor.x = element_blank())+
  theme(panel.grid.minor.y = element_blank())+
  theme(axis.line = element_line(color = "grey30",
                                 linewidth = 0.2,
                                 arrow = arrow(angle = 30,
                                               length = unit(0.07, "inches"),
                                               ends = "last", 
                                               type = "closed"))) +
  theme(axis.ticks = element_line(color = "grey30",
                                  linewidth = 0.2))+
  # Legend position
  theme(legend.position = c(0.7,0.1))+
  # Legend color
  theme(legend.background = element_rect(fill = "#E2BFB3",
                                         colour = "#E2BFB3",
                                         linewidth = 0.2))+
  theme(legend.key = element_rect(fill = "#E2BFB3"))+
  # Scale x.y
  scale_y_continuous(breaks = c(0, 20, 40, 60),
                     expand = c(0, 0),
                     limits = c(0, 80),
                     labels = c("", "20%","40%","60%"))+
  scale_x_continuous(breaks = c(0, 20, 40),
                     expand = c(0, 0),
                     limits = c(0, 60),
                     labels = c("0%", "20%","40%"))+
  # Adjust axis
  labs(x = "Female", y = "Male",
       title = "TOBACCO USE AROUND THE WORLD",
       caption = "Source: World Health Organization")+
  theme(axis.title = element_text(hjust = 1))+
  # Adjust title, sub, caption
  theme(plot.title = element_text(size = 43, color = "grey30", face = "bold"))+
  theme(plot.caption = element_text(size = 20, color = "grey40", face = "italic"))+
  theme(axis.text = element_text(size = 20, color = "grey40"))+
  theme(axis.title = element_text(size = 25, color = "grey30"))+
  theme(legend.text = element_text(size = 20, color = "grey40"))+
  theme(plot.title.position = "plot")+
  # Adjust plot margin
  theme(plot.margin = unit(c(0.3, 0.5, 0.1, 0.5), "cm"))+
  # Add text box
  geom_text(label = "Indonesia",
              x = 7,
              y = 73.6,
              size = 10,
              hjust = 0,
              color = "#B859A8")+
  geom_text(label = "Myanmar",
            x = 21.5,
            y = 69.4,
            size = 10,
            hjust = 0,
            color = "#9653A3")+
  geom_text(label = "Nauru",
            x = 50.7,
            y = 50.3,
            size = 10,
            hjust = 0,
            color = "#704D9E")+
  geom_text(label = "Nigeria",
            x = 2,
            y = 5.4,
            size = 7,
            hjust = 0,
            color = "#F5DC95")

  
ggsave("tobaco.png", width = 3.86, height = 4.18,dpi = 300,units = c("in"))