Init
options(
digits = 2
)
library(pacman)
p_load(kirkegaard, readxl)
theme_set(theme_bw())
Data
#read data files, transform to long format
GDP = read_excel("data/API_NY.GDP.PCAP.PP.CD_DS2_en_excel_v2_1217845.xls", skip = 3) %>%
df_legalize_names() %>%
pivot_longer(
cols = x1960:x2019,
names_to = "year"
)
POP = read_excel("data/API_SP.POP.TOTL_DS2_en_excel_v2_1307373.xls", skip = 3) %>%
df_legalize_names() %>%
pivot_longer(
cols = x1960:x2019,
names_to = "year"
)
#metadata for filtering
meta = read_excel("data/API_NY.GDP.PCAP.PP.CD_DS2_en_excel_v2_1217845.xls", sheet = 2) %>%
df_legalize_names()
#included couontries
included_countries = meta %>%
filter(
#aggregates
!str_detect(SpecialNotes, "aggregate") | is.na(SpecialNotes),
#Hong kong and Macao
!str_detect(SpecialNotes, "exercise of sovereignty") | is.na(SpecialNotes),
#some other random aggregates
!str_detect(TableName, "dividend|excluding|developed|income|members|Other|&"),
#non-sovereign
!str_detect(TableName, "Dutch|French|Virgin|American")
)
#append
d = bind_rows(GDP, POP) %>%
mutate(
year = year %>% str_sub(start = 2) %>% as.numeric()
)
Top 20, all countries
#extract top 20, all countries
top = d %>%
filter(
Country_Code %in% included_countries$Country_Code
) %>%
plyr::ddply("year", function(dd) {
#GDP data, sort
dd_sort = dd %>%
filter(Indicator_Code == "NY.GDP.PCAP.PP.CD") %>%
arrange(-value)
#no data, then return empty
if (all(is.na(dd_sort$value))) return(NULL)
dd_sort %>%
mutate(
rank = 1:n()
)
})
#plot it
top %>%
filter(
rank %in% 1:20
) %>%
ggplot(aes(year, rank, color = Country_Name, label = Country_Name)) +
geom_line()

#print a table
top %>%
filter(
rank %in% 1:20
) %>%
pivot_wider(
names_from = year,
values_from = rank,
id_cols = Country_Name
)
#print it in another way
top %>%
filter(
rank %in% 1:20
) %>%
arrange(rank) %>%
pivot_wider(
names_from = year,
values_from = Country_Name,
id_cols = rank
)
Top 20, independent, larger countries
#large are those with 1M pop in 2019
large_countries = d %>%
filter(year == 2019, value > 1e6, Indicator_Code == "SP.POP.TOTL") %>%
pull(Country_Name)
#extract top 20, large only
top_filter = d %>%
filter(
Country_Name %in% large_countries,
Country_Code %in% included_countries$Country_Code
) %>%
plyr::ddply("year", function(dd) {
#GDP data, sort
dd_sort = dd %>%
filter(Indicator_Code == "NY.GDP.PCAP.PP.CD") %>%
arrange(-value)
#no data, then return empty
if (all(is.na(dd_sort$value))) return(NULL)
#add rank
dd_sort %>%
mutate(
rank = 1:n()
)
})
#plot it
top_filter %>%
filter(
rank %in% 1:20
) %>%
ggplot(aes(year, rank, color = Country_Name, label = Country_Name)) +
geom_line()

#print a table
top_filter %>%
filter(
rank %in% 1:20
) %>%
pivot_wider(
names_from = year,
values_from = rank,
id_cols = Country_Name
)
#print it in another way
top_filter %>%
filter(
rank %in% 1:20
) %>%
arrange(rank) %>%
pivot_wider(
names_from = year,
values_from = Country_Name,
id_cols = rank
)
#nordics
top_filter %>%
filter(
Country_Name %in% c("Denmark", "Norway", "Sweden", "Finland")
) %>%
ggplot(aes(year, rank, color = Country_Name, group = Country_Name)) +
geom_line() +
scale_color_discrete("Country") +
# scale_y_continuous("rank", breaks = 1:100, limits = c(1, 25))
scale_y_reverse("rank", breaks = 1:100, lim = c(NA, NA)) +
ggtitle("GDP per capita (PPP), World Bank, 1990-2019",
"Soverign countries only, countries with <1M population removed") +
coord_cartesian(ylim = c(1, 25))

GG_save("figs/nordics.png")