Population Pyramids


R Codes for Collecting Data and Visualization
#==================================================================================
# Scrapping population data with inputs are:
# 1. Given year,
# 2. Country code selected from https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2
#==================================================================================
# Clear workspace:
rm(list = ls())
# Load some packages for scrapping data and data manipulation:
library(rvest)
library(magrittr)
library(tidyverse)
# A function for collecting population data:
get_population <- function(countryCode_selected, year_selected) {
base_url <- "https://www.census.gov/data-tools/demo/idb/region.php?N=%20Results%20&T=10&A=separate&RT=0&Y="
link <- paste0(paste0(paste0(base_url, year_selected), "&R=-1&C="), countryCode_selected)
link %>%
read_html() -> html_content
html_content %>%
html_table(fill = TRUE) %>%
.[[1]] -> pop_df
names(pop_df) <- str_replace_all(names(pop_df), " ", "_")
country_name <- html_content %>%
html_nodes('.query_source a') %>%
html_text()
if (length(country_name) == 2) {
country_name <- country_name[2]
}
final_df <- pop_df %>%
select(-Year, -Age, -contains("Percent"), -Sex_Ratio) %>%
mutate_all(function(x) {str_replace_all(x, "[^0-9]", "") %>% as.numeric()}) %>%
mutate(Year = pop_df$Year, Country = country_name[1], Age = pop_df$Age) %>%
select(Year, Country, Age, everything()) %>%
mutate(Percent_Both_Sexes = Both_Sexes_Population / Both_Sexes_Population[1],
Percent_Male = Male_Population / Male_Population[1],
Percent_Female = Female_Population / Female_Population[1],
Sex_Ratio = Male_Population / Female_Population,
Age = case_when(Age == "0-4" ~ "00-04", Age == "5-9" ~ "05-09", TRUE ~ Age))
return(final_df)
}
# Function for getting population data for Vietnam by given year:
pop_data_ByYear <- function(year_selected) {
get_population("VM", year_selected) %>%
slice(-c(1, 20:22)) %>%
select(Year, Age, Male_Population, Female_Population) %>%
mutate(Female_Population = -1*Female_Population) %>%
gather(Gender, Value, -Age, -Year) %>%
return()
}
# Use the function for getting data:
lapply(c(2018, 2005, 2000, 1995), pop_data_ByYear) -> total_list
total_df <- do.call("rbind", total_list)
#=========================
# Data Visualization
#=========================
library(extrafont)
# Colors and font selected:
my_colors <- c("#2E74C0", "#CB454A")
my_font <- "Roboto Condensed"
#---------------------------------
# Version 1: Using geom_col()
#---------------------------------
total_df %>%
ggplot(aes(Age, Value, fill = Gender)) +
geom_col(position = "stack", width = 0.85) +
scale_y_continuous(expand = c(0, 0.5)) +
facet_wrap(~ Year) +
coord_flip() +
scale_y_continuous(breaks = seq(-5000000, 5000000, 1000000),
limits = c(-5000000, 5000000),
labels = c(paste0(seq(5, 0, -1), "M"), paste0(1:5, "M"))) +
theme_minimal() +
scale_fill_manual(values = my_colors, name = "", labels = c("Female", "Male")) +
# guides(fill = guide_legend(reverse = TRUE)) +
theme(panel.grid.major.x = element_line(linetype = "dotted", size = 0.2, color = "grey40")) +
theme(panel.grid.major.y = element_blank()) +
theme(panel.grid.minor.y = element_blank()) +
theme(panel.grid.minor.x = element_blank()) +
theme(legend.position = "top") +
theme(axis.text.y = element_text(size = 10, family = my_font)) +
theme(axis.text.x = element_text(size = 10, family = my_font)) +
theme(plot.title = element_text(family = my_font, size = 28)) +
theme(plot.subtitle = element_text(family = my_font, size = 13, color = "gray40")) +
theme(plot.caption = element_text(family = my_font, size = 12, colour = "grey40", face = "italic")) +
theme(plot.margin = unit(c(1.2, 1.2, 1.2, 1.2), "cm")) +
theme(legend.text = element_text(size = 12, face = "bold", color = "grey30", family = my_font)) +
theme(strip.text = element_text(color = "grey20", size = 13, face = "bold", family = my_font)) +
labs(x = NULL, y = NULL,
title = "Population Pyramids of Vietnam: 1995 - 2018",
subtitle = "The age-sex structure of a country's population and may provide insights about political and social stability,\nas well as economic development. Countries with young populations need to invest more in schools,\nwhile countries with older populations need to invest more in the health sector.",
caption = "Data Source: https://www.census.gov")
#-----------------------------------------
# Version 2: Using geom_linerange()
#-----------------------------------------
total_df %>%
ggplot(aes(x = Age, color = Gender))+
geom_linerange(data = total_df %>% filter(Gender == "Male_Population"),
aes(ymin = -0.3, ymax = -0.3 + Value), size = 3.5, alpha = 1) +
geom_linerange(data = total_df %>% filter(Gender != "Male_Population"),
aes(ymin = 0.3, ymax = 0.3 + Value), size = 3.5, alpha = 1) +
geom_label(aes(x = Age, y = 0, label = Age), inherit.aes = TRUE, family = my_font,
size = 3.5, label.padding = unit(0, "lines"), label.size = 0,
label.r = unit(0.0, "lines"), fill = "#f5f5f2", alpha = 1, color = "gray20") +
theme_minimal() +
coord_flip() +
facet_wrap( ~ Year) +
scale_y_continuous(breaks = seq(-5000000, 5000000, 1000000),
limits = c(-5000000, 5000000),
labels = c(paste0(seq(5, 0, -1), "M"), paste0(1:5, "M"))) +
scale_color_manual(name = "", values = c(Female_Population = "#3E606F", Male_Population = "#8C3F4D"),
labels = c("Male", "Female")) +
theme(panel.grid.major.y = element_blank()) +
theme(plot.title = element_text(face = "bold", size = 28, family = my_font, margin = margin(b = 10), hjust = 0, color = "grey20")) +
theme(plot.subtitle = element_text(size = 14, margin = margin(b = 20), hjust = 0, family = my_font, color = "grey30")) +
theme(plot.caption = element_text(size = 13, color = "grey50", family = my_font)) +
theme(axis.text.y = element_blank()) +
theme(axis.text.x = element_text(size = 12, color = "grey20", family = my_font, face = "bold")) +
theme(plot.background = element_rect(fill = "#EFF2F4", color = NA)) +
theme(panel.grid.major.x = element_line(linetype = "dotted", size = 0.2, color = "grey40")) +
theme(plot.margin = unit(c(1.2, 1.2, 1.2, 1.2), "cm")) +
theme(legend.position = "top") +
theme(legend.text = element_text(size = 13, face = "bold", color = "grey30", family = my_font)) +
theme(legend.text.align = 1) +
theme(strip.text = element_text(color = "grey20", size = 14, face = "bold", family = my_font)) +
# theme(strip.text = element_text(color = "grey20", size = 14, face = "bold", hjust = 0.026) +
labs(x = NULL, y = NULL,
title = "Population Pyramids of Vietnam: 1995 - 2018",
subtitle = "The age-sex structure of a country's population and may provide insights about political and social stability,\nas well as economic development. Countries with young populations need to invest more in schools,\nwhile countries with older populations need to invest more in the health sector.",
caption = "Data Source: https://www.census.gov")
