folder_path <- partial(here, "00_data", "sov_debt_paris_alignment")

folder_path() %>% list.files()
##  [1] "country_list_bis_debt_securities.csv"  
##  [2] "country_list_combined_bond_indices.csv"
##  [3] "country_list_em_dm.csv"                
##  [4] "emissions_dataset.csv"                 
##  [5] "emissions_dataset.rds"                 
##  [6] "emissions_dataset_full.csv"            
##  [7] "emissions_dataset_full.rds"            
##  [8] "imf_wb_country_groups.csv"             
##  [9] "imf_wb_country_groups.rds"             
## [10] "wb_income_groups.csv"                  
## [11] "wb_income_groups.rds"                  
## [12] "weo_world_income_population.csv"       
## [13] "weo_world_income_population.rds"

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

emissions_dataset <- folder_path("emissions_dataset.rds") %>%
  read_rds()

emissions_dataset_full <- folder_path("emissions_dataset_full.rds") %>%
  read_rds()

bond_indices <- folder_path("country_list_combined_bond_indices.csv") %>%
  read_csv()
## Rows: 158 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): country_name, iso3c, index, em_dm
## dbl (1): value_pct
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
bis_gov_securities <- folder_path("country_list_bis_debt_securities.csv") %>%
  read_csv()
## Rows: 55 Columns: 5
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (3): country_name, iso3c, em_dm
## dbl (2): total_debt_securities, pct_of_total
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
imf_wb_country_groups <- folder_path("imf_wb_country_groups.rds") %>%
  read_rds()

wb_income_groups <- folder_path("wb_income_groups.rds") %>%
  read_rds()

weo_world_income_population <- folder_path("weo_world_income_population.rds") %>%
  read_rds()
imf_ae_eme <- imf_wb_country_groups %>%
  filter(country_group %in% c("Advanced Economies", "Emerging Market Economies")) %>%
  select(-group_type)

imf_ae_eme
## # A tibble: 135 × 2
##    country_name country_group     
##    <chr>        <chr>             
##  1 Australia    Advanced Economies
##  2 Austria      Advanced Economies
##  3 Belgium      Advanced Economies
##  4 Canada       Advanced Economies
##  5 Switzerland  Advanced Economies
##  6 Cyprus       Advanced Economies
##  7 Czechia      Advanced Economies
##  8 Germany      Advanced Economies
##  9 Denmark      Advanced Economies
## 10 Spain        Advanced Economies
## # … with 125 more rows
emissions_dataset_2019 <- emissions_dataset %>%
  filter(year == 2019) %>%
  select(-year) %>%
  pivot_longer(gdp_usd_current_prices:cumulative_co2_per_capita)
bis_emissions <- bis_gov_securities %>%
  left_join(imf_ae_eme) %>%
  rename(imf_ae_eme = country_group) %>%
  mutate(region = countrycode(iso3c, origin = "iso3c", destination = "un.region.name"),
         sub_region = countrycode(iso3c, origin = "iso3c", destination = "un.regionsub.name")) %>%
  left_join(wb_income_groups) %>%
  left_join(emissions_dataset_2019, by = c("country_name", "iso3c", "em_dm")) %>%
  rename(characteristic = name)
## Joining, by = "country_name"
## Warning in countrycode_convert(sourcevar = sourcevar, origin = origin, destination = dest, : Some values were not matched unambiguously: TWN

## Warning in countrycode_convert(sourcevar = sourcevar, origin = origin, destination = dest, : Some values were not matched unambiguously: TWN
## Joining, by = "country_name"
bis_emissions_clean <- bis_emissions %>%
  # case_when() is a more flexible if_else().  It's great.  learn it. 
  mutate(region = case_when(iso3c == "TWN" ~ "Asia",
                            TRUE ~ region),
         sub_region = case_when(iso3c == "TWN" ~ "Eastern Asia",
                            TRUE ~ sub_region))

bis_emissions_clean
## # A tibble: 1,485 × 11
##    country_…¹ iso3c total…² pct_o…³ em_dm imf_a…⁴ region sub_r…⁵ wb_in…⁶ chara…⁷
##    <chr>      <chr>   <dbl>   <dbl> <chr> <chr>   <chr>  <chr>   <fct>   <chr>  
##  1 United St… USA    19115.   0.327 Deve… Advanc… Ameri… Northe… High    gdp_us…
##  2 United St… USA    19115.   0.327 Deve… Advanc… Ameri… Northe… High    gdp_pp…
##  3 United St… USA    19115.   0.327 Deve… Advanc… Ameri… Northe… High    gdp_pc…
##  4 United St… USA    19115.   0.327 Deve… Advanc… Ameri… Northe… High    gdp_pc…
##  5 United St… USA    19115.   0.327 Deve… Advanc… Ameri… Northe… High    popula…
##  6 United St… USA    19115.   0.327 Deve… Advanc… Ameri… Northe… High    govt_e…
##  7 United St… USA    19115.   0.327 Deve… Advanc… Ameri… Northe… High    debt_p…
##  8 United St… USA    19115.   0.327 Deve… Advanc… Ameri… Northe… High    territ…
##  9 United St… USA    19115.   0.327 Deve… Advanc… Ameri… Northe… High    trade_…
## 10 United St… USA    19115.   0.327 Deve… Advanc… Ameri… Northe… High    consum…
## # … with 1,475 more rows, 1 more variable: value <dbl>, and abbreviated
## #   variable names ¹​country_name, ²​total_debt_securities, ³​pct_of_total,
## #   ⁴​imf_ae_eme, ⁵​sub_region, ⁶​wb_income_group, ⁷​characteristic
add_rank_features <- function(data, value_var = value, rank_by_lowest = TRUE, 
                              quantile_n = 5) {
  

  # If rank_by_lowest is set to TRUE, as it is by default, then it will use this first code chunk 
if (rank_by_lowest)
  data %>%
    mutate("{{value_var}}_z_score" := ({{ value_var }}-mean({{ value_var }}))/
             sd({{ value_var }}),
         "{{value_var}}_rank" := rank({{ value_var }}), 
         "{{value_var}}_percentile" := percent_rank({{ value_var }}),
         "{{value_var}}_quantile_{{quantile_n}}" := ntile({{ value_var }}, 
                                                          {{ quantile_n }}),
         "{{value_var}}_pct_total" := {{ value_var }}/sum({{ value_var }}))
  
   # If rank_by_lowest is set to FALSE, as it is by default, then it will use this second code chunk
else
  
  data %>%
    mutate("{{value_var}}_z_score" := ({{ value_var }}-mean({{ value_var }}))/
             sd({{ value_var }}) * -1, # multiply by -1
           # put negative sign
           "{{value_var}}_rank" := rank(-{{ value_var }}), 
           # put negative sign
           "{{value_var}}_percentile" := percent_rank(-{{ value_var }}),
           # put negative sign
           "{{value_var}}_quantile_{{quantile_n}}" := ntile(-{{ value_var }}, 
                                                            {{ quantile_n }}), 
           # no change, not relevant for pct_total
           "{{value_var}}_pct_total" := {{ value_var }}/sum({{ value_var }})) 
    
}
bis_emissions_w_features <- bis_emissions_clean %>%
  group_by(characteristic) %>%
  add_rank_features(rank_by_lowest = FALSE) %>%
  ungroup()

#summarizing and grouping total debt by market type. 
em_debt <- bis_gov_securities %>%
  group_by(em_dm)%>%
  summarize(total_debt = sum(total_debt_securities)) %>%
  print()
## # A tibble: 2 × 2
##   em_dm             total_debt
##   <chr>                  <dbl>
## 1 Developed Markets     44964.
## 2 Emerging Markets      13516.

#Visualizing total debt by market type

em_debt %>%
  ggplot(aes(x = em_dm, y = total_debt)) + 
  geom_col(fill = "brown3", colour = "black") + 
  labs(x = "Market Type", 
       y = "Total Debt",
       title = "Total Debt by Market Type",
       subtitle = "$mn",
       caption = "Source: World Bank Data") +
  scale_y_continuous(breaks = seq(10000,50000, by = 5000)) +
  theme_economist()

#Charting CO2 emissions from 1990 to 2019 for select countries.

emissions_dataset_full %>%
  rename(Country = iso3c) %>%
  filter(Country %in% c("SGP","AUS","PHL","BGD","IND","IDN","MYS")) %>%
    ggplot(emissions_dataset_full, mapping = aes(x = year, y = cumulative_co2, group = Country)) +
    geom_line(aes(color = Country), size = 2) +
    labs(x = "Year", 
       y = "Cumulative CO2",
       title = "Cumulative CO2 by Year",
       caption = "Source: IMF") +
    scale_y_continuous(breaks = seq(10000,1500000, by = 10000)) +
    scale_x_continuous(breaks = seq(1990,2020, by = 5)) + 
    theme_bw()

#Viewing CO2 emissions by sub-region

sub_region_emissions <- bis_emissions_w_features %>%
  group_by(sub_region) %>%
  filter(characteristic == "cumulative_co2") %>%
  summarize(total_co2 = sum(value)) %>%
  arrange(desc(total_co2)) %>%
  print()
## # A tibble: 12 × 2
##    sub_region                      total_co2
##    <chr>                               <dbl>
##  1 Northern America                  445051.
##  2 Eastern Asia                      317591.
##  3 Eastern Europe                    174178.
##  4 Western Europe                    163685.
##  5 Northern Europe                    98878.
##  6 Southern Asia                      51975.
##  7 Latin America and the Caribbean    51792.
##  8 Southern Europe                    47678.
##  9 South-eastern Asia                 32082.
## 10 Western Asia                       28368.
## 11 Sub-Saharan Africa                 20712.
## 12 Australia and New Zealand          18244.

#using fix to edit sub_region column data into something more easily viewed on the graph.

fix(sub_region_emissions)
sub_region_emissions %>%  
  ggplot(aes(x = reorder (sub_region, -total_co2), y = total_co2)) +
  geom_col(fill = "brown") +
   labs(x = "Sub Region", 
       y = "Total CO2",
       title = "Total CO2 Emissions by Sub-Region",
       caption = "Source: IMF") +
  scale_y_continuous(breaks = seq(1000,1000000, by = 50000)) +
  theme(axis.text.x = element_text(angle = 90, vjust = 0 , hjust=.5),
        panel.background = element_rect(fill = "lightblue",
                           colour = "lightblue",
                           size = 0.5, 
                           linetype = "solid"))