# Load packages
if(!require(pacman)) install.packages("pacman")
## Loading required package: pacman
pacman::p_load(tidyverse, zoo, highcharter)
# read in data /Users/kendavidn/Dropbox/Mac (2)/Downloads/population.csv
pop <-
read_csv("/Users/kendavidn/Dropbox/Mac (2)/Downloads/population.csv") %>%
janitor::clean_names()
## Rows: 58252 Columns: 4
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (2): Entity, Code
## dbl (2): Year, Population (historical estimates)
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
annual_growth_rate <-
pop %>%
# filter out rows where code is empty
filter(!is.na(code)) %>%
filter(year > 0) %>%
rename(population = population_historical_estimates) %>%
select(-code) %>%
group_by(entity) %>%
# expand to include all years from 100 to 2020
complete(year = seq(100, 2020, 1)) %>%
arrange(entity, year) %>%
# for missing year data, interpolate. That is,
# if for specific country, say afganistan, we have data on year 100 (200,000 people)
# and data on year 300 (300,000 people), but NA's in between, we can interpolate
# to estimate the population in years 101, 102, 103, etc.
mutate(population = na.approx(population, na.rm = FALSE)) %>%
# now calculate year on year growth
mutate(growth = population - lag(population)) %>%
# calculate the growth rate
mutate(growth_rate = 100 * growth / lag(population)) %>%
# smooth growth rate by doing rolling average of past 15 years
mutate(growth_rate_smooth = rollmean(growth_rate, 20, na.pad = T, align = "right"))
# plot the annual growth rate for two key countries in each continent
# India, China; USA, Mexico; Brazil, Argentina; Russia, Turkey; Nigeria, Egypt; United Kingdom, France, Italy
select_countries <-
annual_growth_rate %>%
filter(entity %in% c("India", "China", "United States", "Mexico",
"Argentina", "Russia", "Turkey", "Nigeria", "Egypt",
"United Kingdom", "France", "Italy"))
select_countries %>%
ggplot(aes(x = year, y = growth_rate_smooth, color = entity)) +
geom_line() +
labs(title = "Annual population growth rate",
subtitle = "For key countries in each continent",
x = "Year",
y = "Annual growth rate")
## Warning: Removed 240 rows containing missing values (`geom_line()`).
# plot as a highcharter plot instead
select_countries %>%
hchart("line", hcaes(x = year, y = growth_rate_smooth, group = entity))
Top 30 largest countries in 2020
top_30 <- pop %>%
filter(year == 2020) %>%
arrange(desc(population_historical_estimates)) %>%
head(30)
annual_growth_rate %>%
filter(entity %in% top_30$entity) %>%
hchart("line", hcaes(x = year, y = growth_rate_smooth, group = entity))