Life expectancy at birth by gender

#==============================
#   Collect and clean data
#==============================

# Load some packages: 
library(tidyverse)
library(rvest)

# Extract data from link: 
link <- "https://en.wikipedia.org/wiki/List_of_countries_by_life_expectancy?fbclid=IwAR0jRtf0RacPLxVPgcuu4zgYJl9HDk01cNy6u48vvyTuoK9CC1jZH8_pZPQ"


# Extract data: 

link %>% 
  read_html() %>% 
  html_nodes(xpath = '//*[@id="mw-content-text"]/div/table[2]') %>% 
  html_table(fill = TRUE) %>% 
  .[[1]] -> data_raw


# Rename for columns: 
names(data_raw) <- c("Rank", "Country", "Both", "Female", "Male")

# Remove the first row: 
data_raw %>% filter(Rank != "Rank") -> data_raw

# Conver to numeric and remove missing data: 

data_raw %>% 
  mutate(Country = factor(Country)) %>% 
  mutate_if(is.character, as.numeric) %>% 
  mutate(Country = as.character(Country)) %>% 
  mutate(Country = case_when(str_detect(Country, "Republic of China") ~ "China", TRUE ~ Country)) %>% 
  na.omit() -> df_life


# ASEAN country names: 
asean_link <- "https://en.wikipedia.org/wiki/Association_of_Southeast_Asian_Nations"

asean_link %>% 
  read_html() %>% 
  html_nodes(xpath = '//*[@id="mw-content-text"]/div/table[2]') %>% 
  html_table(fill = TRUE) %>% 
  .[[1]] -> asean_names

# G7 Countries: 
g7_link <- "https://en.wikipedia.org/wiki/Group_of_Seven"

g7_link %>% 
  read_html() %>% 
  html_nodes(xpath = '//*[@id="mw-content-text"]/div/table[5]') %>% 
  html_table(fill = TRUE) %>% 
  .[[1]] -> g7_names

# Filter data for ASEAN + G7 countries + China + India + South Korea: 

df_life %>% 
  filter(Country %in% c(asean_names$Country, g7_names$Member, "China", "India", "South Korea")) %>% 
  mutate(text_color = case_when(Country == "Vietnam" ~ "firebrick", TRUE ~ "gray20")) -> df_mini

df_mini %>% 
  arrange(Male) %>% 
  mutate(Country = factor(Country, levels = Country)) -> aseanG7_life

#===========
#  Plot 1
#===========

library(ggeconodist)
my_font <- "Ubuntu Condensed"

aseanG7_life %>% 
  ggplot(aes(x = Country)) + 
  geom_econodist(aes(ymin = Male, median = Both, ymax = Female), stat = "identity", show.legend = FALSE) + 
  scale_y_continuous(expand = c(0, 0), position = "right", limits = range(60, 90), breaks = seq(60, 90, 5)) +
  coord_flip() + 
  theme_econodist() + 
  theme(plot.margin = unit(rep(1.3, 4), "cm")) + 
  theme(axis.text.y = element_text(color = aseanG7_life$text_color, size = 14, family = my_font)) + 
  theme(axis.text.x = element_text(size = 14, family = my_font)) + 
  theme(plot.title = element_text(size = 26, family = my_font)) + 
  theme(plot.caption = element_text(size = 10, face = "italic")) + 
  labs(x = NULL, y = NULL, 
       title = "Figure 1: Life expectancy at birth by gender, 2018",
       caption = "Data Source: United Nations Development Programme") -> p   


grid.newpage()

p %>% 
  left_align(c("title", "caption")) %>% 
  add_econodist_legend(
    econodist_legend_grob(
      tenth_lab = "Male", 
      ninetieth_lab = "Female", 
      med_lab = "Both", 
      family = my_font, 
      label_size = 14,
      ), 
    below = "title"
    ) %>% 
  grid.draw()

#==============
#   Plot 2
#==============

df_mini %>% 
  arrange(Female) %>% 
  mutate(Country = factor(Country, levels = Country)) %>% 
  mutate(upperLabel = Female - 0.6, lowerLabel = Male + 0.6) -> dfPlot2 

dfvietnam <- dfPlot2 %>% filter(Country == "Vietnam")

dfPlot2 %>% 
  ggplot(aes(x = Country)) + 
  geom_econodist(aes(ymin = Male, median = Both, ymax = Female), stat = "identity", show.legend = FALSE) + 
  scale_y_continuous(expand = c(0, 0), position = "right", limits = range(60, 90), breaks = seq(60, 90, 5)) +
  geom_text(data = dfvietnam, aes(x = Country, y = upperLabel, label = Female), color = "gray30", size = 4.3, family = my_font) + 
  geom_text(data = dfvietnam, aes(x = Country, y = lowerLabel, label = Male), color = "gray30", size = 4.3, family = my_font) + 
  coord_flip() + 
  theme_econodist() + 
  theme(plot.margin = unit(rep(1.3, 4), "cm")) + 
  theme(axis.text.y = element_text(color = dfPlot2$text_color, size = 14, family = my_font)) + 
  theme(axis.text.x = element_text(size = 14, family = my_font)) + 
  theme(plot.title = element_text(size = 26, family = my_font)) + 
  theme(plot.caption = element_text(size = 10, face = "italic")) + 
  labs(x = NULL, y = NULL, 
       title = "Figure 2: Life expectancy at birth by Gender, 2018",
       caption = "Data Source: United Nations Development Programme") -> p2 

grid.newpage()

p2 %>% 
  left_align(c("title", "caption")) %>% 
  add_econodist_legend(
    econodist_legend_grob(
      tenth_lab = "Male", 
      ninetieth_lab = "Female", 
      med_lab = "Both", 
      family = my_font, 
      label_size = 14,
    ), 
    below = "title"
  ) %>% 
  grid.draw()

Countries have the biggest life expectancy gender gap

#===========
#  Plot 3
#===========

n <- nrow(dfPlot2)
some_countries <- c("Vietnam", "Russia", "Syria", "Ukraine")

df_life %>% 
  filter(!Country %in% c("Kiribati", "Eswatini")) %>% 
  mutate(lifeGap = Female - Male) %>% 
  arrange(-lifeGap) %>% 
  slice(1:n) %>% 
  mutate(upperLabel = Female - 0.5, lowerLabel = Male + 0.5) %>%  
  mutate(text_color = case_when(Country %in% some_countries ~ "firebrick", TRUE ~ "gray20")) %>% 
  arrange(Male) %>% 
  mutate(Country = factor(Country, levels = Country)) -> dfPlot3


dfPlot3 %>% filter(Country %in% some_countries) -> df_someCon


dfPlot3 %>% 
  ggplot(aes(x = Country)) + 
  geom_econodist(aes(ymin = Male, median = Both, ymax = Female), stat = "identity", show.legend = FALSE) + 
  scale_y_continuous(expand = c(0, 0), position = "right", limits = range(65, 85), breaks = seq(60, 90, 5)) +
  coord_flip() + 
  theme_econodist() + 
  theme(plot.margin = unit(rep(1.3, 4), "cm")) + 
  theme(axis.text.y = element_text(size = 14, family = my_font, color = dfPlot3$text_color)) + 
  theme(axis.text.x = element_text(size = 14, family = my_font)) + 
  theme(plot.title = element_text(size = 26, family = my_font)) + 
  theme(plot.caption = element_text(size = 10, face = "italic")) + 
  geom_text(data = df_someCon, aes(x = Country, y = upperLabel, label = Female), color = "gray30", size = 4.3, family = my_font) + 
  geom_text(data = df_someCon, aes(x = Country, y = lowerLabel, label = Male), color = "gray30", size = 4.3, family = my_font) + 
  labs(x = NULL, y = NULL, 
       title = "Figure 3: Life expectancy at birth by Gender, 2018",
       caption = "Data Source: United Nations Development Programme") -> p3

grid.newpage()

p3 %>% 
  left_align(c("title", "caption")) %>% 
  add_econodist_legend(
    econodist_legend_grob(
      tenth_lab = "Male", 
      ninetieth_lab = "Female", 
      med_lab = "Both", 
      family = my_font, 
      label_size = 14,
    ), 
    below = "title"
  ) %>% 
  grid.draw()

