Motivation

Recreate VISUAL CAPITALIST chart:Charted: Average Years Left to Live by Age

Data Processing

# Clear R environment: 
rm(list = ls())

# Setwd
setwd("D:/0 - My documents/TOOLS/R/Visual Capitalist/Life Expectancy")

# Load data
library(rvest)
library(tidyverse)
  
  # Load data from visualcapitalist
{
# Specify the URL
url <- "https://www.visualcapitalist.com/charted-american-life-expectancy-trends-2023/"

# Read the HTML content of the webpage -> Change to numeric -> Rename variables
webpage <- read_html(url)
male_life <- html_table(html_nodes(webpage, "table"))[[1]]
male_life[, 1:3] <- lapply(male_life[, 1:3], as.numeric)
names(male_life) <- c("age", "male_re","male_ex")

fe_life <- html_table(html_nodes(webpage, "table"))[[2]]
fe_life[, 1:3] <- lapply(fe_life[, 1:3], as.numeric)
names(fe_life) <- c("age", "fe_re","fe_ex")

# Full join data
life <- 
  male_life %>% full_join(fe_life, by = "age")

life_chart <- life %>% 
  filter(age %in% c(0,5,10,15,20,25,30,35,40,45,50,55,60,65,70,75,80,85,90,95,100))

}

  # Load data from source Government
{
# Specify the URL
url1 <- "https://www.ssa.gov/oact/STATS/table4c6.html"

# Read the HTML content of the webpage
webpage1 <- read_html(url1)
life1 <- html_table(html_nodes(webpage1, "table"))[[1]]
life1 <- life1[2:121, ]

# Rename df
names(life1) <- c("age", "male_de","male_no", "male_re", "fe_de","fe_no", "fe_re")

# Change data type
life1 <- life1 %>%
  mutate_at(vars(1, 4, 7), as.numeric)

life_chart1 <- life1 %>% 
  mutate(male_ex = age + male_re) %>% 
  mutate(fe_ex = age + fe_re) %>% 
  select(age, male_re, male_ex, fe_re, fe_ex)
  
life_chart1 <- life_chart1 %>% 
  filter(age %in% c(0,5,10,15,20,25,30,35,40,45,50,55,60,65,70,75,80,85,90,95,100))

}

Visualization

# Visualization

library(shadowtext)
library(extrafont)
library(ggplot2)
library(scales)
library(ggtext)

font_import(pattern = "SFU")
y
fonts()
loadfonts(device = "win")
windowsFonts()

# Visualization

addtext <- "Average life expectancy **naturally increase as\nwe get older**, while the gap between women\nand men's life expactancies decreases."
dftext <- data.frame(age = 30, addtext = addtext)


ggplot(life_chart)+
  geom_bar(aes(x = age, y = fe_ex), stat = "identity", fill = "#FF407D")+
  geom_bar(aes(x = age, y = male_ex), stat = "identity", fill = "#40A2E3")+
  geom_bar(aes(x = age, y = age), stat = "identity", fill = "grey50")+
  theme_minimal()+
  # Adjust background
  theme(plot.background = element_rect(fill = "#f0f0f0", color = "#f0f0f0")) +
  theme(panel.background = element_rect(fill = "#f0f0f0", color = "#f0f0f0"))+
  theme(axis.ticks = element_blank()) + 
  theme(axis.title = element_blank()) +
  theme(panel.grid.major.x = element_blank())+
  theme(panel.grid.major.y = element_line(color = "grey50", linewidth = 0.2))+
  # Scale x and y
  scale_y_continuous(breaks = c(0, 20, 40, 60, 80, 100),
                     expand = c(0, 0),
                     limits = c(0, 110),
                     labels = c("0", "20", "40", "60", "80", "Years\n100"))+
  scale_x_continuous(breaks = seq(0, 100, by = 5),
                     expand = c(0, 0),
                     labels = paste("Age", seq(0, 100, by = 5)))+
  theme(axis.text.x = element_text(angle = 45, hjust = 1))+
  # Adjust plot margin
  theme(plot.margin = unit(c(0.5, 0.3, 0.2, 0.3), "cm"))+
  # Add title, sub, caption
  labs(title = "How Much Longer Do You Have To Live?", 
       subtitle = "Life Expectancy in U.S", 
       caption = c("Source: Office of Social Security, U.S Government"))+
  theme(plot.title = element_text(size = 17,family = "#9Slide04 SFU Fenice",
                                  hjust = 0.5,                    
                                  vjust = 1))+
  theme(plot.subtitle = element_text(size = 10,
                                     hjust = 0.5,                    
                                     vjust = 1))+
  theme(plot.caption = element_text(size = 6, color = "grey60"))+
  theme(axis.text = element_text(size = 6, color = "black"))+
  # Add text
  geom_shadowtext(aes(x = 78, y=30),
                  label = "Current age",
                  size = 3,
                  color = "grey60",
                  bg.color = "white")+
  geom_shadowtext(aes(x = 15, y=50),
                  label = "Men's remaining years",
                  size = 3,
                  color = "#40A2E3",
                  bg.color = "white")+
  geom_text(label = "Women's remaining years",
            x = 11,
            y = 84,
            size = 3,
            color = "#FF407D")+
  geom_textbox(
    data = dftext,
    aes(x = age, y = 106, label = addtext, box.color = NA),
    width = 0.4, 
    hjust = 0,
    vjust = 1,
    fill = NA, 
    color = "black",
    size = 2.2
  )+
  geom_text(label = "+6",
            x = 0,
            y = 77,
            color = "white",
            size = 3)+
  geom_text(label = "+2",
            x = 70,
            y = 89,
            color = "#FF407D",
            size = 3)+
  geom_text(label = "+1",
            x = 90,
            y = 97,
            color = "#FF407D",
            size = 3)
  
# Save chart

ggsave("life.png", width = 6, height = 4,dpi = 300,units = c("in"))