library(foreign)
library(tidyverse)
library(countrycode)
library(gganimate)
library(scales)
library(RColorBrewer)
# Reading in the data
data = read.dta("Billionaires1996-2014.dta")
# Adding a variable for countryname using the 3 digit country code
data = data %>%
mutate(countryname = countrycode(data$countrycode, "iso3c", "country.name")) %>%
mutate(countryname = case_when(countrycode == "ROM" ~ "Romania",
countrycode == "Taiwan" ~ "Taiwan",
countrycode == "CHI" ~ "Channel Islands",
countrycode == "HKG" ~ "Hong Kong",
TRUE ~ .$countryname))
# Adding a variable for continent
data = data %>%
mutate(continent = countrycode(data$countryname, "country.name", "continent")) %>%
select(-c(citizenship, company))
data %>%
group_by(year) %>%
count() %>%
ggplot(aes(x = year, y = n)) +
geom_line(size=1) +
expand_limits(y = 0) +
labs(x = "",
y = "Billionaires",
title = "Number of Billionaires in the World (1996-2016)") +
geom_point(aes(x= 2001, y=538), colour="#e41a1c", size = 2) +
geom_point(aes(x= 2008, y=1124), colour="#e41a1c", size = 2) +
annotate("text", x= 2001, y= 750, label = "2001:\nDot-com bubble burst") +
annotate("text", x= 2008, y=1330, label = "2008:\nOnset of financial crisis") +
theme_classic() +
theme(axis.title.y = element_text(margin = margin(t = 0, r = 15, b = 0, l = 0)),
plot.title = element_text(size = 16))
continent_data = data %>%
filter(!is.na(continent))
continent_data$continent = factor(continent_data$continent, levels=c("Americas","Asia","Europe", "Oceania", "Africa"))
continent_data %>%
group_by(year, continent) %>%
filter(!is.na(continent)) %>%
count() %>%
ggplot(aes(x = year, y = n, colour = continent)) +
geom_line(size = 1) +
labs(x = "",
y = "Billionaires",
title = "Billionaires by Continent (1996-2016)",
colour = "") +
theme_classic() +
theme(axis.title.y = element_text(margin = margin(t = 0, r = 15, b = 0, l = 0)),
plot.title = element_text(size = 16)) +
scale_colour_brewer(type = "qual", palette = 6)
# I wanted to label each line directly instead of using a legend; however, since
# the Oceania and Africa lines are so close together, the labels overlapped.
data %>%
mutate(north = recode_factor(north,
"0" = "Emerging economies",
"1" = "Advanced economies")) %>%
group_by(year, north) %>%
summarise(totalnetworth = sum(realnetworth, na.rm = TRUE)) %>%
ggplot(aes(x = year, y = totalnetworth, colour = north)) +
geom_line(size = 1) +
expand_limits(y = 0) +
labs(x = "",
y = "",
title = "Total Net Worth of Billionaires in Emerging & Advanced Economies",
subtitle = "Measured in billions, 1996 US dollars",
colour = "") +
theme_classic() +
scale_colour_brewer(type = "qual", palette = 6,
guide = guide_legend(reverse=TRUE)) +
scale_y_continuous(labels = dollar_format()) +
theme(plot.title = element_text(size = 16))
# I also wanted to label each line directly instead of using a legend; however,
# I chose not to do this because I wanted to ensure consistency with the other
# graphs + fitting in the full labels ("advanced economies" and "emerging
# economies") distorted the x-axis significantly.
x = data %>%
filter(year %in% c(2000, 2004, 2008, 2012, 2016)) %>%
group_by(year, IndustryAggregates) %>%
filter(!is.na(IndustryAggregates)) %>%
summarise(count = n()) %>%
spread(IndustryAggregates, count) %>%
mutate(total = sum(other, `Resource Related`, `New Sectors`, `Non-Traded Sectors`, `Financial`, `Traded Sectors`)) %>%
summarise(Other = other/total,
`Resource Related` = `Resource Related`/total,
`New` = `New Sectors`/total,
`Non-Traded` = `Non-Traded Sectors`/total,
`Financial` = `Financial`/total,
`Traded` = `Traded Sectors`/total) %>%
gather("industry", "percentage", 2:7)
x$industry <- factor(x$industry, levels = c("Other", "New", "Resource Related", "Traded", "Non-Traded", "Financial"))
x %>%
ggplot() +
geom_bar(aes(x = year, y = percentage, fill = industry), stat = "identity") +
scale_x_continuous(breaks=seq(2000,2016,4)) +
scale_y_continuous(labels = percent_format()) +
# Position_stack() puts the text annotations in the middle of each bar segment
geom_text(aes(x = year, y = percentage, label = paste0(round(percentage * 100), "%"), group = industry),
position = position_stack(vjust = .5), size = 3.5) +
theme_classic() +
labs(x = "",
y = "Percentage of Billionaires",
title = "Billionaires by Industry",
fill = "Industry") +
theme(plot.title = element_text(size = 16)) +
scale_fill_brewer(type = "qual", palette = 4, direction = -1)
# Creating a rank variable for the top 10 countries each year
x = data %>%
group_by(year) %>%
count(countryname) %>%
mutate(rank = rank(-n, ties.method = "first")) %>%
group_by(countryname) %>%
filter(rank <= 10)
# The ColorBrewer palette I'm using doesn't have enough unique colours (I need
# 19 since that's how many countries make the top 10 at least once). This code
# creates enough new shades from the existing palette.
colourCount = length(unique(x$countryname))
getPalette = colorRampPalette(brewer.pal(colourCount,"Set1"))
anim = x %>%
ggplot(aes(rank, group = countryname, fill = as.factor(countryname))) +
# I'm using geom_title because it slides more smoothly than a regular bar graph.
geom_tile(aes(y = n/2,
height = n,
width = 0.9), alpha = 0.8) +
geom_text(aes(y = 0, label = paste(countryname, " ")), vjust = 0.2, hjust = 1) +
geom_text(aes(y = n, label = paste(n, " ")), hjust = -0.2) +
coord_flip(clip = "off", expand = FALSE) +
scale_x_reverse() +
# Closest_state shows the year of the current frame
labs(title='Countries with the most billionaires in {closest_state}', x = "", y = "") +
theme(legend.position = "none",
axis.ticks.y = element_blank(),
axis.text.y = element_blank(),
axis.ticks.x = element_blank(),
axis.text.x = element_blank(),
panel.grid.major=element_blank(),
panel.grid.minor=element_blank(),
plot.margin = margin(2,2, 2, 4, "cm"),
panel.background = element_blank(),
plot.title = element_text(size = 16)) +
scale_fill_manual(values = getPalette(colourCount)) +
scale_colour_manual(values = getPalette(colourCount)) +
transition_states(year, transition_length = 10, state_length = 10) +
ease_aes('cubic-in-out')
animate(anim, nframes = 80, fps = 4, end_pause = 20)