Init

options(
  digits = 2
)

library(pacman)
p_load(kirkegaard, readxl)

theme_set(theme_bw())

Data

#read data files, transform to long format
GDP = read_excel("data/API_NY.GDP.PCAP.PP.CD_DS2_en_excel_v2_1217845.xls", skip = 3) %>% 
  df_legalize_names() %>% 
  pivot_longer(
    cols = x1960:x2019,
    names_to = "year"
  )
  
POP = read_excel("data/API_SP.POP.TOTL_DS2_en_excel_v2_1307373.xls", skip = 3) %>% 
  df_legalize_names() %>% 
    pivot_longer(
    cols = x1960:x2019,
    names_to = "year"
  )

#metadata for filtering
meta = read_excel("data/API_NY.GDP.PCAP.PP.CD_DS2_en_excel_v2_1217845.xls", sheet = 2) %>% 
  df_legalize_names()

#included couontries
included_countries = meta %>% 
  filter(
    #aggregates
    !str_detect(SpecialNotes, "aggregate") | is.na(SpecialNotes),
    #Hong kong and Macao
    !str_detect(SpecialNotes, "exercise of sovereignty") | is.na(SpecialNotes),
    #some other random aggregates
    !str_detect(TableName, "dividend|excluding|developed|income|members|Other|&"),
    #non-sovereign
    !str_detect(TableName, "Dutch|French|Virgin|American")
  )

#append
d = bind_rows(GDP, POP) %>% 
  mutate(
    year = year %>% str_sub(start = 2) %>% as.numeric()
  )

Top 20, all countries

#extract top 20, all countries
top = d %>% 
  filter(
    Country_Code %in% included_countries$Country_Code
  ) %>% 
  plyr::ddply("year", function(dd) {
    #GDP data, sort
    dd_sort = dd %>% 
      filter(Indicator_Code == "NY.GDP.PCAP.PP.CD") %>% 
      arrange(-value)
    
    #no data, then return empty
    if (all(is.na(dd_sort$value))) return(NULL)
    
    dd_sort %>% 
      mutate(
        rank = 1:n()
      )
  })

#plot it
top %>% 
  filter(
    rank %in% 1:20
  ) %>% 
  ggplot(aes(year, rank, color = Country_Name, label = Country_Name)) +
  geom_line()

#print a table
top %>% 
  filter(
    rank %in% 1:20
  ) %>% 
  pivot_wider(
    names_from = year,
    values_from = rank,
    id_cols = Country_Name
  )
#print it in another way
top %>% 
  filter(
    rank %in% 1:20
  ) %>% 
  arrange(rank) %>% 
  pivot_wider(
    names_from = year,
    values_from = Country_Name,
    id_cols = rank
  )

Top 20, independent, larger countries

#large are those with 1M pop in 2019
large_countries = d %>% 
  filter(year == 2019, value > 1e6, Indicator_Code == "SP.POP.TOTL") %>% 
  pull(Country_Name)

#extract top 20, large only
top_filter = d %>% 
  filter(
    Country_Name %in% large_countries,
    Country_Code %in% included_countries$Country_Code
  ) %>% 
  plyr::ddply("year", function(dd) {
    #GDP data, sort
    dd_sort = dd %>% 
      filter(Indicator_Code == "NY.GDP.PCAP.PP.CD") %>% 
      arrange(-value)
    
    #no data, then return empty
    if (all(is.na(dd_sort$value))) return(NULL)
    
    #add rank
    dd_sort %>% 
      mutate(
        rank = 1:n()
      )
  })

#plot it
top_filter %>% 
  filter(
    rank %in% 1:20
  ) %>% 
  ggplot(aes(year, rank, color = Country_Name, label = Country_Name)) +
  geom_line()

#print a table
top_filter %>% 
  filter(
    rank %in% 1:20
  ) %>% 
  pivot_wider(
    names_from = year,
    values_from = rank,
    id_cols = Country_Name
  )
#print it in another way
top_filter %>% 
  filter(
    rank %in% 1:20
  ) %>% 
  arrange(rank) %>% 
  pivot_wider(
    names_from = year,
    values_from = Country_Name,
    id_cols = rank
  )
#nordics
top_filter %>% 
  filter(
    Country_Name %in% c("Denmark", "Norway", "Sweden", "Finland")
  ) %>% 
  ggplot(aes(year, rank, color = Country_Name, group = Country_Name)) +
  geom_line() +
  scale_color_discrete("Country") +
  # scale_y_continuous("rank", breaks = 1:100, limits = c(1, 25))
  scale_y_reverse("rank", breaks = 1:100, lim = c(NA, NA)) +
  ggtitle("GDP per capita (PPP), World Bank, 1990-2019",
          "Soverign countries only, countries with <1M population removed") +
  coord_cartesian(ylim = c(1, 25))

GG_save("figs/nordics.png")