PAI Charts - Data Cleaning

This is the data cleaning process on which all further analysis is based for the paper: PAI CHARTS - Effects of Exclusion Policies Using Principal Adverse Impact Indicators on Sovereign Debt Portfolios.

Load Necessary Packages

Code

# load packages


library(tidyverse)
library(tidyquant)
library(scales)
library(ggrepel)
library(readxl)
library(readr)
library(janitor)
library(here)
library(rnaturalearth)
library(countrycode)
library(wbstats)
library(stringr)
library(devtools)

# library(broom)
# library(maps)

options(scipen=10)

Integrate useful functions

Countrycode Function

Code

# copy in useful functions for countrycodes (Teal)
country_name_regex_to_iso3c <- function(country_name) {
  country_name |>
    countrycode(origin = "country.name", 
                                     destination = "iso3c",
                                     origin_regex = TRUE)
}

iso3c_to_country_name <- function(iso3c) {
  iso3c |>
  countrycode(origin = "iso3c", destination = "country.name")
}

Simple Rank Percentile Function

Code

rank_pct <- function(data, value_var = value, rank_by_lowest = TRUE) {

  if (rank_by_lowest) #lowest value gets lowest percentile (e.g., value of 1 out of ten --> 10%)
  data %>% 
    mutate(
      "{{value_var}}_pct_rank" := rank({{value_var}})/length({{value_var}})*100 ) 
  else #lowest value gets highest percentile (e.g., value of 1 out of ten --> 100%)
    data %>% 
    mutate(
      "{{value_var}}_pct_rank" := rank(-{{ value_var}})/length({{value_var}})*100
    )
}


sample_data <- data.frame(
  variable1 = c(3, 8, 2, 5, 7, 1, 4, 10, 6, 9),
  variable2 = c(7, 2, 9, 1, 5, 8, 3, 6, 10, 4)
)

sample_data_1 <- sample_data %>% rank_pct(variable1, rank_by_lowest = FALSE)

Year/Country Summary Function

Code

# build function that counts how many countries and which years are covered by the dataset

count_countries_years <- function(data) {
  require(dplyr)
  
  # Check if the input data contains the required columns
  if (!all(c("year", "country_name") %in% names(data))) {
    stop("Input data must contain 'year' and 'country_name' columns.")
  }
  
  # Calculate the counts
  unique_year_count <- data %>%
    select(year) %>%
    distinct() %>%
    n_distinct()
  
  unique_country_count <- data %>%
    select(country_name) %>%
    distinct() %>%
    n_distinct()
  
  # Calculate the lowest and highest year values
  lowest_year <- min(data$year)
  highest_year <- max(data$year)
  
  # Create a summary data frame
  summary_countries_years <- data.frame(
    Number_of_Years = unique_year_count,
    Lowest_Year = lowest_year,
    Highest_Year = highest_year,
    Number_of_Countries = unique_country_count
  )
  
  return(summary_countries_years)
}

Data Cleaning and Preparation

EDGAR Data

Code

# cleaning EDGAR data 
# EDGAR notes: Total GHG emissions from EDGARv7.0: https://edgar.jrc.ec.europa.eu/dataset_ghg70. GHG emissions are expressed in Mton CO2eq. They are aggregated using Global Warming Potential values from IPCC AR5 (GWP-100 AR5).GHG emissions include CO2 (fossil only), CH4, N2O and F-gases. 

edgar_raw <- read_excel("EDGARv7.0_GHG_AR4_AR5 scope 1.xlsx", sheet = "Total GHG by country AR5", skip = 4)

# longer format and timeframe to 1995-2019, years as integers
edgar_1 <- edgar_raw %>% 
  select(`Country`, `1970`:`2021`) %>% 
    pivot_longer(
    cols = `1970`:`2021`, 
    names_to = "year",
    values_to = "GHG") %>% 
  mutate(year = as.integer(year)) 

# Drop International Aviation and Shipping
edgar_2 <- edgar_1 %>% 
  filter (Country != "International Aviation" & Country != "International Shipping")

#Check for NAs
edgar_2 %>%  filter(is.na(GHG))
  #drop_na()

#Clean Names
edgar_3 <- edgar_2 %>% janitor::clean_names()
colnames(edgar_3) <- make.names(colnames(edgar_3))

The Edgar data set summarizes emissions for the following territories and states: - “France and Monaco” - “Israel and the State of Palestine” - “Italy, San Marino and the Holy See” - “Spain and Andorra” - “Switzerland and Liechtenstein” - “Serbia and Montenegro”

The emissions contributions of these small and smallest states and territories to the emissions of the larger states (France, Israel, Italy, Spain, Switzerland) are difficult to separate cleanly. To get an understanding of the actual amounts, I compared the most recently officially declared emissions to the UNFCCC (often the only source for another estimate). Data for Annex 1 parties: https://di.unfccc.int/flex_annex1 Data for Non-Annex 1 parties: https://unfccc.int/non-annex-I-NCs

The most recent data for San Marino is from 2010. In 2010, total emission from San Marino (267.85 kt CO2-e) were less than 0.1% of declared emissions from Italy (523,465.7 kt CO2-e), specifically 0.05%. Given that estimates by EDGAR have an overall uncertainty of 11% and taking into the account the difficulty of creating a dataset that would accurately reflect differences at this level, I chose to attribute the EDGAR estimate for “Italy and San Marino” to Italy for further analyses.
I followed this approach with the other cases mentioned above. Monaco’s emissions amounted to 0.02% of France’s emission in 2020. Andorra’s emissions were equal to 0.16% of Spain’s emissions in 2019. Liechtenstein’s emission equaled 0.4% of Switzerland’s emissions in 2020. All were well below 1%.
The outliers in this respect were Israel and the Palestinian territories as well as Serbia and Montenegro. Comparing emissions data for 2011 as submitted to UNFCCC Palestinian emissions were at 4% of Israel’s. Montenegro’s emissions in 2014 were as high as 6% of Serbia’s. While these figures are substantially higher than in the other cases, they are still within uncertainties of estimates from different available sources, namely also EGDAR’s. I therefore attributed these pooled EDGAR estimates to Israel and Serbia respectively. I refrain from constructing different estimates for the smalls states in favor of a consistent methodological approach.

Code

edgar_4 <- edgar_3 %>% mutate(
    country = case_when(
      country == "France and Monaco" ~ "France",
      country == "Israel and Palestine, State of" ~ "Israel",
      country == "Italy, San Marino and the Holy See" ~ "Italy", 
      country == "Spain and Andorra" ~ "Spain", 
      country == "Switzerland and Liechtenstein" ~ "Switzerland", 
      country == "Serbia and Montenegro" ~ "Serbia",
      TRUE ~ country
    )
  )

#Standardize country names and add iso3c codes
edgar_5 <- edgar_4 %>% 
   mutate(iso3c = country_name_regex_to_iso3c(`country`)) %>%
  mutate(country_name = iso3c_to_country_name(`iso3c`))

# move iso3c and country names to the front
edgar_6 <- edgar_5 %>% 
  select(! country) %>% 
  relocate(iso3c, .before = 1) %>% 
  relocate(country_name, .before = 2)

Code

# save cleaned edgar dataset
edgar_clean <- edgar_6
write_csv(edgar_clean, "./processed_01/edgar_clean.csv")

World Economic Outlook Data

Data on GDP is provided in national currency and USD. The SFDR asks for reporting in Million Euro. I thus use the annual average exchange rates between USD and Euro from OECD (https://stats.oecd.org/index.aspx?queryid=169#) to convert all annual USD values to Euro values. Nominal GDP in billion USD is coded as “NGDPD” as a WEO subject code, these are the rows I extract.

Code

weo_raw <- read_excel("WEO.xlsx", na = c("n/a", "NA", ""))
weo_1 <- weo_raw %>% 
  janitor::clean_names()

colnames(weo_raw) <- make.names(colnames(weo_raw))

weo_2 <- weo_1 %>% 
  select(weo_subject_code, country, subject_descriptor, x1980:x2023) %>% 
  filter(weo_subject_code == "NGDPD") %>% 
  select(country, x1980:x2023) %>% 
  pivot_longer(
    cols = `x1980`:`x2023`, 
    names_to = "year",
    values_to = "GDP_USD") %>% 
   mutate(year = as.numeric(str_replace(year, "x", ""))) %>% 
  mutate(year = as.integer(year)) %>% 
  mutate(GDP_USD = coalesce(GDP_USD, NA))

weo_2$GDP_USD <- gsub(",", "", weo_2$GDP_USD)
weo_2$GDP_USD <- as.numeric(weo_2$GDP_USD)

weo_3 <- weo_2 %>% 
  mutate(iso3c = country_name_regex_to_iso3c(`country`)) %>%
  mutate(iso3c = case_when(
    country == "Micronesia" ~ "FSM", 
    country == "Türkiye" ~ "TUR", 
    country == "Kosovo" ~ "XKX", 
    TRUE ~ iso3c)) %>% 
  mutate(country_name = iso3c_to_country_name(`iso3c`)) %>% 
  mutate(country_name = ifelse(iso3c == "XKX", "Kosovo", country_name))

# move iso3c and country names to the front
weo_4 <- weo_3 %>% 
  select(! country) %>% 
  relocate(iso3c, .before = 1) %>% 
  relocate(country_name, .before = 2) 

# save weo_4
weo_clean <- weo_4
write_csv(weo_clean, "./processed_01/weo_clean.csv")

OECD dataset for exchange rates

Annual averages of Euro per USD exchange rate from https://data.oecd.org/conversion/exchange-rates.htm

Code

# clean OECD exchange rate data
OECD_exchange_raw <- read_excel("OECD_exchange_rates.xlsx", skip = 3) %>% 
  select('2001':'2022')

OECD_exchange_1 <- OECD_exchange_raw[-1, ]
OECD_exchange_2 <- OECD_exchange_1[-2, ]
OECD_exchange_3 <- OECD_exchange_2 %>% 
  pivot_longer(
     cols = `2001`:`2022`, 
    names_to = "year",
    values_to = "EUR_per_USD") %>% 
  mutate(year = as.integer(year)) 
  
# save exchange rates
OECD_exchange_clean <- OECD_exchange_3
write_csv(OECD_exchange_clean, "./processed_01/OECD_exchange_clean.csv")

GDP in Euro data

Data from the IMF World Economic Outlook: <https://www.imf.org/en/Publications/SPROLLs/world-economic-outlookdatabases# sort=%40imfdate%20descending>

Code

weo_euro_1 <- weo_clean %>% 
left_join(OECD_exchange_clean, by = "year") %>% 
  mutate(GDP_EUR = GDP_USD*EUR_per_USD, 
         GDP_EUR_MLN = GDP_EUR*1000)

weo_euro_2 <- weo_euro_1 %>% 
  select("iso3c", "country_name", "year", "GDP_EUR_MLN")


weo_euro_clean <- weo_euro_2
count_countries_years(weo_euro_clean)
write_csv(weo_euro_1, "./processed_01/weo_euro_1.csv")
write_csv(weo_euro_clean, "./processed_01/weo_euro_clean.csv")

OECD data for scopes

The TECO2 dataset by OECD (https://stats.oecd.org/Index.aspx?DataSetCode=IO_GHG_2021) is for the moment the only datasource that contains data close enough to the definition by PCAF of scope 2 and scope 3 data for sovereigns. Specifically, the combination of Indicator DFD_FCO2 and Industry D35 (Electricity, gas, steam and AC supply) can be interpreted as scope 2 emissions (though OECD follows a residence rather than territorial principle) and subtracting these scope 2 emissions from the combination of Indicator DFD_FCO2 and Industry DTOTAL can be read as scope 3 emissions. In other words, Indicator DFD_FCO2 and Industry DTOTAL represents the sum of scope 2 and 3 emissions.

Code

# read in only relevant datasets from earlier
weo_euro_clean <- read_csv("./processed_01/weo_euro_clean.csv")
edgar_clean <- read_csv("./processed_01/edgar_clean.csv")

Code

# clean OECD scope data. This is data on the sum of scope 2 and 3 emissions. 
OECD_sum_scopes <- read_excel("OECD_sum_scope_2_3.xlsx", skip = 6)

OECD_scopes_1  <- OECD_sum_scopes %>% 
    select(-`...2`) %>% 
    filter(Time != "Country") %>% 
    rename(geography = Time)

  geography_tbl_2 <- OECD_scopes_1 %>% 
    select(geography)

  geography_processed_2 <- geography_tbl_2 %>%  
    separate(col = geography, into = c("geography_code", "geography_name"), 
             sep = ":", remove = FALSE) %>% 
    mutate(across(c("geography_code", "geography_name"), str_trim)) %>%  
    mutate(country_name = countrycode(sourcevar = geography_code, 
                                      origin = "iso3c", 
                                      destination = "country.name")) %>% 
    mutate(country_or_region = if_else(is.na(country_name), 
                                       true = "region",
                                       false = "country")) %>% 
    mutate(country_name = if_else(is.na(country_name),
                                  true = geography_name,
                                  false = country_name)) %>% 
    select(geography, country_name, iso3c = geography_code, country_or_region)
  
  OECD_scopes_2 <- geography_processed_2 %>%  
    left_join(OECD_scopes_1, by = "geography") %>%  
    select(!geography) %>% 
    filter(country_or_region == "country") %>% 
    pivot_longer(cols = matches("\\d{4}"), 
                 names_to = "year", 
                 names_transform = as.integer, 
                 values_to = "scope_2_3_GHG") %>% 
    janitor::clean_names()
  
  OECD_scopes_3 <- OECD_scopes_2 %>% select(!country_or_region)
  count_countries_years(OECD_scopes_3)
  
  OECD_scopes_clean <- OECD_scopes_3
  write_csv(OECD_scopes_clean, "./processed_01/OECD_scopes_clean.csv")

GHG-Intensity Indicator

This indicator is the sum of scope 1, 2, and 3, emissions divided by GDP in Million Euro. As data for scope 2 and 3 is only available for 66 countries, substantially limiting comparability, I will construct two indicators, one with only scope 1 emissions and one with summed emissions of all scopes where possible.

Code

# merge data on scope 2 and 3 with data on GDP and scope 1
OECD_scopes_clean <- read_csv("processed_01/OECD_scopes_clean.csv")
weo_euro_clean <- read_csv("processed_01/weo_euro_clean.csv")
ghg_intensity_1 <- merge(edgar_clean, OECD_scopes_clean, by = c("country_name", "iso3c", "year"), all = TRUE) %>%
  merge(weo_euro_clean, by = c("country_name", "iso3c", "year"), all = TRUE)

# this is a good control data set to see what went wrong with merging:
ghg_intensity_2 <- ghg_intensity_1 %>% 
  mutate(
    has_scope_2_and_3 = ifelse(is.na(scope_2_3_ghg), 0, 1),
    has_GDP = ifelse(is.na(GDP_EUR_MLN), 0, 1),
     scope_1 = ghg,
    sum_scopes = ifelse(has_scope_2_and_3 == 1, ghg + scope_2_3_ghg, scope_1),
    ghg_intensity_sum_scopes = sum_scopes/GDP_EUR_MLN, 
    ghg_intensity_scope_1 = scope_1/GDP_EUR_MLN
    )

# as the euro does not exist before 2001 and therefore the GDP in euro figures are non-existent before 2001, I limit the dataset to 2001-2023 where possible
ghg_intensity_3 <- ghg_intensity_2 %>% 
  filter(year >= 2001) %>% 
  select(country_name, iso3c, year, has_scope_2_and_3, has_GDP, ghg_intensity_scope_1, ghg_intensity_sum_scopes)

# these intensities are very small in size, they refer to how many tons of ghg emissions are facilitated by holding one euro in sovereign debt of the specific country
# for ease of use I will add another intensity measure, which will refer to tons of ghg emissions per 1,000,000 euro

ghg_intensity_4 <- ghg_intensity_3 %>% 
  mutate(ghg_intensity_scope_1_per_Mln_EUR = ghg_intensity_scope_1*1000000, 
         ghg_intensity_sum_scopes_per_Mln_EUR = ghg_intensity_sum_scopes*1000000) %>% 
   filter(!is.na(ghg_intensity_scope_1_per_Mln_EUR)) %>% 
  mutate(ghg_intensity_scope_1_per_Mln_EUR_perc_rank = rank(- ghg_intensity_scope_1_per_Mln_EUR)/length(ghg_intensity_scope_1_per_Mln_EUR)*100)

count_countries_years(ghg_intensity_4)
ghg_intensity_4 %>% filter(is.na(ghg_intensity_sum_scopes))

# save cleaned files
ghg_intensity_control <- ghg_intensity_2
ghg_intensity_clean <- ghg_intensity_4

write_csv(ghg_intensity_control, "./processed_01/ghg_intensity_control.csv")
write_csv(ghg_intensity_clean, "./processed_01/ghg_intensity_clean.csv")
ghg_intensity_clean <- read_csv("./processed_01/ghg_intensity_clean.csv")

Code

ghg_intensity_with_trend <- ghg_intensity_clean %>% 
  filter(year >= 2017) %>% 
  mutate(intensity_rate_of_change = diff(ghg_intensity_scope_1_per_Mln_EUR) / lag(ghg_intensity_scope_1_per_Mln_EUR) * 100) %>% 
  filter(year >= 2018 & year <= 2022)

ghg_intensity_with_trend %>% filter(year == 2022 & !is.na(ghg_intensity_scope_1))
# as there is no ghg intensity data for 2022 for any country, the trend over the five most recent years has to be calculated with only three available growth rates (for the years 2018, 2019, 2020). As the compounding effect is important here, I will use a geometric average to describe the most recent trend in ghg intensity growth.

 # Function to calculate geometric mean
geometric_mean <- function(x) {
  exp(mean(log(x), na.rm = TRUE))
}

ghg_intensity_with_trend_2021 <- ghg_intensity_with_trend %>%
  group_by(country_name) %>% 
  mutate(geom_avg_intensity_growth = (geometric_mean(1 + intensity_rate_of_change/100) -1) *100) %>% 
  ungroup() %>% 
  filter(year == 2021)

write.csv(ghg_intensity_with_trend_2021, "./processed_01/ghg_intensity_with_trend_2021.csv")

V-Dem Data Varieties of Democracy

creates indices of types of democracy based on subindicators which assess the fulfillment of individual rights. Subindicators and subindices which do not aim to measure democracy are useful datasources for the principal adverse impact indicators. Specifically, I use: - v2clkill to assess whether the right to freedom from extrajudicial killings is de facto respected in a country; - v2cltort to assess whether the right to freedom from torture is de facto respected in a country; - v2clslavem and v2clslavef to assess whether the right to freedom from slavery and forced labor is de facto respected in a country; - v2x_cspart to assess whether civil society organisations can participate freely in a society; - v2x_rule to assess the rule of law status of a country. V-Dem offers an R package to work with its most recent datasets, which can be found under: https://github.com/vdeminstitute/vdemdata. (Maerz, Seraphine F., Amanda B. Edgell, Sebastian Hellemeier, and Nina Illchenko. 2022. vdemdata: An R package to load, explore and work with the most recent V-Dem (Varieties of Democracy) dataset. https://github.com/vdeminstitute/vdemdata)

Code

# devtools::install_github("vdeminstitute/vdemdata")
vdemdata::vdem

Code

vdem_1 <- vdemdata::vdem

# narrowing the dataset to relevant years and variables
# unfortunately have to drop Zanzibar, Somaliland, Palestine/Gaza, as these territories will be difficult to match with data that can not be measured on this detailed level (ghg emissions for example)
vdem_2 <- vdem_1 %>% 
  filter(year >= 2001) %>% 
  filter(country_name != "Zanzibar" & country_name != "Somaliland" & country_name != "Palestine/Gaza") %>% 
  select(country_name, year, v2clkill_ord, v2cltort_ord, v2clslavem_ord, v2clslavef_ord, v2x_cspart, v2x_civlib, v2x_corr, v2x_rule) %>% 
  mutate(iso3c = country_name_regex_to_iso3c(country_name), 
         iso3c = ifelse(country_name == "Kosovo", "XKX", iso3c), 
         country_name_2 = iso3c_to_country_name(`iso3c`), 
         country_name_2 = ifelse(iso3c == "XKX", "Kosovo", country_name_2))

vdem_3 <- vdem_2 %>% 
  select(!country_name) %>% 
  rename(country_name = country_name_2) %>% 
  relocate(iso3c, .before = 1) %>% 
  relocate(country_name, .before = 2) 

#count_countries_years(vdem_3)

Creating PAI indicators from V-dem

The first indicator constructed from V-dem is the social violations indicator, I create the following versions: - social_violations_min_1 is a binary variable that indicates the presence (1) or absence (0) of social violations based on the values of the four specified variables (v2clkill_ord, v2cltort_ord, v2clslavem_ord, v2clslavef_ord). If any of these variables have a value less than 2 for a given row, it suggests that some form of social violation has occurred, and social_violations_min_1 is set to 1; otherwise, it is set to 0; - social_violations_min_2 will be equal to 1 when at least two of the underlying variables (v2clkill_ord, v2cltort_ord, v2clslavem_ord, v2clslavef_ord) are less than 2 for a given country in a given year;

The subindicators take values from 0-4, where 0 is “not respected by public authorities, systematic practice of torture/extrajudicial killings/slavery”, 1 is “weak respect, frequent practice, no active prevention”, 2 “somewhat respected, occasional practice but not approved by top leaders of government”, 3 is “mostly respected, only a few cases and not incited by of approved by top government leaders” and 4 is “full respect, non-existence of violation”. One can take different stances on which category will qualify a country for the “subject to social violations” states of the PAI indicator, which is why I create several ones. The subindices taken here range from 0-1 (low to high).

Code

# creating the PAI-indicators

# social violations: ordered from least stringent to most stringent
# a: 2 or more non-derogable rights need to be not respected at all to make a country "subject to social violations"
# b: 1 or more non-derogable rights need to be not respected at all to make a country "subject to social violations"
# c: 2 or more non-derogable rights need to be only weakly respected to make a country "subject to social violations"
# d: 2 or more non-derogable rights need to be only weakly respected to make a country "subject to social violations"
# e: unless all three selected non-derogable rights are fully respected and the respective violation non-existent, the country is declared "subject to social violations"

vdem_4 <- vdem_3 %>% 
  mutate(
          social_violations_a_min_2_no_respect = ifelse(rowSums(select(., starts_with("v2cl")) < 1) >= 2, 1, 0), 
          social_violations_b_min_1_no_respect = ifelse((v2clkill_ord == 0 | v2cltort_ord == 0 | v2clslavem_ord == 0 | v2clslavef_ord == 0), 1, 0), 
          social_violations_c_min_2_weak_respect = ifelse(rowSums(select(., starts_with("v2cl")) < 2) >= 2, 1, 0),
          social_violations_d_min_1_weak_respect = ifelse((v2clkill_ord <2 | v2cltort_ord <2 | v2clslavem_ord <2 | v2clslavef_ord <2), 1, 0), 
          social_violations_e_except_all_sull_respect = ifelse(rowSums(select(., starts_with("v2cl")) == 4) < 4, 1, 0)
          )
# the other indicators are simply created by assigning the value of the vdem-variable or subindex to the PAI-indicator. 
# additionally, I create some variables based on the subindices for later analysis
# For the indicators Human Rights performance and perceived corruption, V-dem data is only an alternative measure. 
# Maybe some relative measure will be useful
vdem_5 <- vdem_4 %>% 
  rename(freedom_of_expression_plain = v2x_cspart,
         hr_performance_alt = v2x_civlib, 
         corruption_alt = v2x_corr, 
         rule_of_law = v2x_rule) 

# for practicality reducing set to latest scores (2022)
# adding percentile ranks, the higher the percentile the "better"
vdem_6 <- vdem_5 %>% 
  filter(year == 2022) %>% 
  mutate(
          freedom_of_expression_pct_rank = rank(freedom_of_expression_plain)/length(freedom_of_expression_plain)*100, 
        hr_performance_alt_pct_rank = rank(hr_performance_alt)/length(hr_performance_alt)*100,
        corruption_alt_pct_rank = rank(-corruption_alt)/length(corruption_alt)*100,
        rule_of_law_pct_rank = rank(rule_of_law)/length(rule_of_law)*100
)

write_csv(vdem_6, "./processed_01/vdem_2022_control.csv")

vdem_2022_control <- read_csv("./processed_01/vdem_2022_control.csv")

EU and UN Sanctions

Many Asset managers approximate the status of a country as being subject to social violations by using the EU sanctions list. The list contains persons and entities subject to EU sanctions. As the EU is legally bound to implement UN sanctions, this also reflects UN sanctions. As there is no easy way to export the list of countries affected by EU sanctions (there is a list containing all persons and entities but in this case the country is the key variable) I will create a variable reflecting whether a country was mentioned for current sanctions on the EU’s Sanctions Map at https://www.sanctionsmap.eu/#/main (November, 9th, 2023). There is one special case, the US is mentioned not as being under sanctions but to incorporate an EU ruling that rebukes the applicability of secondary sanctions regimes by the US in EU countries. The US is thus not included in the following list. Sanctions frameworks that only enable the EU to impose sanctions against persons that undermine the sovereignty of the specific country but that are currently not applied to any person are not sufficient to declare a country sanctioned (in the absence of any other sanctions regimes that are effective). An example is Bosnia and Herzegovina.

I also create a variable that is slightly less conservative and aims at capturing the variety in width of the sanctions. Some sanctions often apply to individuals in countries, most notably asset freezes and admission restrictions. However, as asset freezes are specifically mentioned in some PAI statements, it is not reasonable to also exclude them from the more comprehensive/conservative variable.

Code

EU_sanctioned_countries <- c("Afghanistan", "Belarus", "Burundi", "Central African Republic", "China", "North Korea", "Congo - Kinshasa", "Guinea", "Guinea-Bissau", "Haiti", "Iran", "Iraq", "Lebanon", "Libya", "Moldova", "Myanmar (Burma)", "Nicaragua", "Russia", "Somalia", "South Sudan", "Sudan", "Syria", "Tunisia", "Turkey", "Ukraine", "Venezuela", "Yemen", "Zimbabwe") 
# without countries where only framework for restrictive measures exist but no persons or entities are affected

EU_sancframe_countries <- c("Afghanistan", "Belarus", "Bosnia & Herzegovina",  "Burundi", "Central African Republic", "China", "North Korea", "Congo - Kinshasa", "Guinea", "Guinea-Bissau", "Haiti", "Iran", "Iraq", "Lebanon", "Libya", "Mali", "Moldova", "Myanmar (Burma)", "Nicaragua", "Niger", "Russia", "Somalia", "South Sudan", "Sudan", "Syria", "Tunisia", "Turkey", "Ukraine", "Venezuela", "Yemen", "Zimbabwe")
# plus countries where frameworks are newly in place but no persons or entities are yet listed 
# only countries where the list explicitly states that there is no restrictive regime currently in place (Serbia and Montenegro) are excluded as internal review commissions would probably not regard this as a sanction regime

EU_sanctrade_countries <- c("Afghanistan", "Belarus", "Central African Republic", "China", "North Korea", "Congo - Kinshasa", "Haiti", "Iran", "Iraq", "Lebanon", "Libya", "Myanmar (Burma)", "Russia", "Somalia", "South Sudan", "Sudan", "Syria", "Venezuela", "Yemen", "Zimbabwe")
#arms embargoes treated as a type of trade restriction

UN_sanctioned_countries <- c("Afghanistan", "Central African Republic", "North Korea", "Congo - Kinshasa", "Guinea-Bissau", "Haiti", "Iran", "Iraq", "Lebanon", "Libya", "Somalia", "South Sudan", "Sudan", "Syria", "Yemen")

# vdem_2022_control %>% filter(str_detect(country_name, pattern = "Bosnia")) # check spellings

# for ease of use; this is also the point where I will introduce the variable on non-cooperative tax jurisdiction, although this only an optional indicator and does not serve to approximate social violations
# data on non-cooperative tax jurisdiction is from the most recent list of the EU Council, February 14 2023 <https://data.consilium.europa.eu/doc/document/ST-6375-2023-INIT/en/pdf>

non_coop_tax_countries <- c("American Samoa", "Anguilla", "Bahamas", "British Virgin Islands", "Costa Rica", "Fiji", "Guam", "Marshall Islands", "Palau", "Panama", "Russa", "Samoa", "Trinidad & Tobago", "Turks & Caicos Islands", "US Virgin Islands", "Vanuatu")

social_violations_opt <- vdem_2022_control %>%   
  mutate(sancEU = ifelse(country_name %in% EU_sanctioned_countries, 1, 0),
         sancframeEU = ifelse(country_name %in% EU_sancframe_countries, 1, 0),
         sancUN = ifelse(country_name %in% UN_sanctioned_countries, 1, 0), 
         sanctradeEU = ifelse(country_name %in% EU_sanctrade_countries, 1, 0), 
         non_coop_tax = ifelse(country_name %in% non_coop_tax_countries, 1, 0))  

write_csv(social_violations_opt, "./processed_01/social_violations_opt.csv")

Freedom House Data

MSCI uses Freedom House data for the indicator on Freedom of Expression. Since Freedom House has an extra subsection on freedom of expression, I assume they use this subsection for their Freedom of Expression indicator. I want to note here, that I do not think this is the best choice for capturing freedom of expression as described in the EU SFDR Annex 1, which aims at capturing how freely civil and political society organisations can act in a society. I would argue that the Freedom house general score reflects Human Rights Performance, especially in democracies given the outline of the questions. I will thus create two indicators from the Freedom House dataset: an MSCI freedom of expression indicator and a HR performance indicator. In my analysis I will focus on the effects of integrating PAI indicators as provided by MSCI due to its outsized position in the market for ESG data. But I also hope to test my own indicator set which I will from now on mark with a B.

I exclude the disputed territories Indian Kashmir, Pakistani Kashmir, Northern Cyprus as they will not match with other data and do not issue their own sovereign bonds (thus are irrlevant for end of analysis). https://freedomhouse.org/sites/default/files/2023-02/All_data_FIW_2013-2023.xlsx

Code

Freedom_House_All_data_FIW_2013_2023 <- read_excel("Freedom House All_data_FIW_2013-2023.xlsx", sheet = 2, skip = 1)

FH_1 <- Freedom_House_All_data_FIW_2013_2023 %>% 
  select("Country/Territory", "Edition", "D", "Total") %>% 
  rename(country = 'Country/Territory', 
         year = "Edition", 
         freedom_of_expression_MSCI = "D", 
         HR_perf_B = "Total") %>% 
  filter(country != "Pakistani Kashmir" & country != "Indian Kashmir" & country != "Northern Cyprus" & country != "Gaza Strip") %>% 
  mutate(iso3c = country_name_regex_to_iso3c(country), 
         iso3c = case_when(
           country == "Kosovo" ~ "XKX", 
           country == "Micronesia" ~ "FSM", 
           TRUE ~ iso3c
         )) 

FH_2 <- FH_1 %>% 
  filter(!is.na(iso3c)) %>% 
  mutate(country_name = iso3c_to_country_name(iso3c), 
         country_name = ifelse(iso3c == "XKX", "Kosovo", country_name), 
         country_name = ifelse(iso3c == "FSM", "Micronesia", country_name)) %>% 
  select(!"country") %>% 
   relocate(iso3c, .before = 1) %>% 
  relocate(country_name, .before = 2) 
# FH_2 %>%  filter(is.na(freedom_of_expression_MSCI) | is.na(HR_perf_B))

FH_3 <- FH_2 %>% 
  rank_pct(freedom_of_expression_MSCI) %>% 
  rank_pct(HR_perf_B) %>% 
  filter( year == 2022)

FH_clean <- FH_3
write_csv(FH_clean, "./processed_01/FH_clean.csv")

World Justice Project

MSCI uses the rule of law index of the World justice project for their Human Rights Performance indicator. https://worldjusticeproject.org/rule-of-law-index/global data from 2022

Code

wjp_rule_of_law <- read_excel("2023_wjp_rule_of_law_index_HISTORICAL_DATA.xlsx", sheet = 10)

wjp_1 <- wjp_rule_of_law %>% 
  slice(1:3) %>% 
 pivot_longer(
    cols = "Afghanistan":"Zimbabwe", 
    names_to = "country", 
    values_to = "value"
  ) %>% 
  pivot_wider(
    names_from = "Country", 
    values_from = "value"
  ) %>% 
  rename(
    hr_perf_MSCI = "WJP Rule of Law Index: Overall Score"
  ) %>% 
  mutate(
    iso3c = country_name_regex_to_iso3c(country), 
    iso3c = case_when(
   # country == "Micronesia" ~ "FSM", 
    country == "Turkiye" ~ "TUR", 
    country == "Kosovo" ~ "XKX", 
    TRUE ~ iso3c),
    country_name = iso3c_to_country_name(iso3c), 
    country_name = ifelse(iso3c == "XKX", "Kosovo", country_name)
  )
  
# wjp_1 %>% filter(country == "Turkiye" | country== "Kosovo")

wjp_2 <- wjp_1 %>% 
  select(country_name, iso3c, hr_perf_MSCI) %>% 
 rank_pct(hr_perf_MSCI)

wjp_clean <- wjp_2
write_csv(wjp_clean, "./processed_01/wjp_clean.csv")

CPI

The Corruption Perception Index by Transparency International is the natural choice for an indicator that asks for a quantitative measure of perceived corruption. MSCI uses it and it is also included in many of the internal ESG frameworks of asset managers already. Higher Index values indicate less perceived corruption. https://images.transparencycdn.org/images/CPI2022_GlobalResultsTrends.xlsx

Code

CPI2022 <- read_excel("CPI2022_GlobalResultsTrends.xlsx", skip = 1)

CPI_1 <- CPI2022 %>% 
  rename(country = 'Country / Territory', 
         corruption_MSCI_B = 'CPI score 2022') %>% 
  mutate(iso3c = country_name_regex_to_iso3c(country), 
          iso3c = ifelse(country == "Kosovo", "XKX", iso3c),
    country_name = iso3c_to_country_name(iso3c), 
         country_name = ifelse(iso3c == "XKX", "Kosovo", country_name),
         year = 2022) 
CPI_2 <- CPI_1 %>% 
  select(country_name, iso3c, year, corruption_MSCI_B) %>% 
  rank_pct(corruption_MSCI_B)

CPI_clean <- CPI_2
write_csv(CPI_clean, "./processed_01/CPI_clean.csv")

Fragile State Index

The Fragile State Index follows a thorough methodology for assessing the fragility and thus political stability of a state. While it is used by some asset managers, MSCI uses the Peace Index by Vision of Humanity. So, I will create two indicators measuring political stability. FSI: higher values indicate higher fragility (thus lower political stability)

Code

fsi_2022_download <- read_excel("fsi-2022-download.xlsx")

FSI_1 <- fsi_2022_download %>% 
  rename(country = 'Country', 
         pol_stability_B = 'Total') %>% 
  mutate(iso3c = country_name_regex_to_iso3c(country), 
          iso3c = ifelse(country == "Kosovo", "XKX", iso3c),
         iso3c = ifelse(country == "Micronesia", "FSM", iso3c),
    country_name = iso3c_to_country_name(iso3c), 
         country_name = ifelse(iso3c == "XKX", "Kosovo", country_name),
    country_name = ifelse(iso3c == "FSM", "Micronesia", country_name), 
         year = 2022) 
FSI_clean <- FSI_1 %>% 
  select(country_name, iso3c, year, pol_stability_B) %>% 
  rank_pct(pol_stability_B, rank_by_lowest = FALSE) # converting -> higher ranks reflect less fragility/higher political stability
write_csv(FSI_clean, "./processed_01/FSI_clean.csv")

Vision of Humanity (peace/political stability)

Taking the Global Peace Index 2023 data for MSCI’s political stability score. Source: https://www.visionofhumanity.org/wp-content/uploads/2023/06/GPI-2023-overall-scores-and-domains-2008-2023.xlsx

Code

GPI_2023_overall_scores_and_domains_2008_2023 <- read_excel("GPI-2023-overall-scores-and-domains-2008-2023.xlsx", sheet = 2, skip = 3)

GPI_1 <- GPI_2023_overall_scores_and_domains_2008_2023 %>% 
  rename(country = 'Country')  %>% 
  select(country, iso3c, '2022') %>% 
  pivot_longer(
    cols = "2022",
    names_to = "year", 
    values_to = "pol_stability_MSCI"
  ) %>% 
  mutate(
    iso3c = ifelse(country == "Kosovo", "XKX", iso3c),
    country_name = iso3c_to_country_name(iso3c), 
         country_name = ifelse(iso3c == "XKX", "Kosovo", country_name)
         ) 
GPI_clean <- GPI_1 %>% 
  select(country_name, iso3c, year, pol_stability_MSCI) %>% 
  rank_pct(pol_stability_MSCI, rank_by_lowest = FALSE) #converting -> higher ranks reflect more stability
write_csv(GPI_clean, "./processed_01/GPI_clean.csv")

World Governance Indicators (WGI)

World Governance Indicators are used by various asset managers for the indicators of rule of law, political stability and control of corruption. MSCI uses the rule of law indicator which I will create here. MSCI also refers to WGI for its political stability indicator but also mentions the vision of humanity dataset for the same indicator. It is unclear in which relation they are used. https://databank.worldbank.org/source/worldwide-governance-indicators

Code

WGI_rule_stability <- read_csv("WGI_rule_stability.csv")

WGI_1 <- WGI_rule_stability %>% 
  slice(1:856) %>% 
  rename(indicator = 'Series Name', 
         country = 'Country Name') %>% 
  select(country, indicator, '2022 [YR2022]') %>% 
  mutate(iso3c = country_name_regex_to_iso3c(country), 
             iso3c = case_when(
    country == "Turkiye" ~ "TUR", 
    country == "Kosovo" ~ "XKX", 
    TRUE ~ iso3c),
         country_name = iso3c_to_country_name(iso3c), 
         country_name = ifelse(iso3c == "XKX", "Kosovo", country_name), 
    country_name = ifelse(iso3c == "FSM", "Micronesia", country_name)
) 

WGI_2 <- WGI_1 %>% 
  filter(!country == "Netherlands Antilles") %>% # excluding Netherlands Antilles because 
  filter(str_detect(indicator, "Estimate")) %>% # exclude percentile ranks - calculate them later for consistency acroos datasets
  mutate(
    indicator = case_when(
      indicator == "Rule of Law: Estimate" ~"rule_of_law_MSCI", 
      indicator == "Political Stability and Absence of Violence/Terrorism: Estimate" ~ "pol_stab_WGI_MSCI", 
      TRUE ~ indicator
    )
  ) %>% 
  pivot_wider(
    names_from = "indicator", 
    values_from = "2022 [YR2022]"
  ) %>% 
  mutate(year = 2022, 
         rule_of_law_MSCI = as.numeric(rule_of_law_MSCI), 
         pol_stab_WGI_MSCI = as.numeric(pol_stab_WGI_MSCI)) %>%  
  select(country_name, iso3c, year, rule_of_law_MSCI, pol_stab_WGI_MSCI)

WGI_3 <- WGI_2 %>% 
  rank_pct(rule_of_law_MSCI) %>% 
  rank_pct(pol_stab_WGI_MSCI)

WGI_clean <- WGI_3
write_csv(WGI_clean, "./processed_01/WGI_clean.csv")

Code

#test (correlation WGI percentile ranks, own percentile ranks)
WGI_2b <- WGI_1 %>% 
  filter(!country == "Netherlands Antilles") %>% # excluding Netherlands Antilles because 
 # filter(str_detect(indicator, "Estimate")) %>% # exclude percentile ranks - calculate them later 
  mutate(
    indicator = case_when(
      indicator == "Rule of Law: Estimate" ~"rule_of_law_MSCI", 
      indicator == "Political Stability and Absence of Violence/Terrorism: Estimate" ~ "pol_stab_WGI_MSCI", 
      indicator == "Rule of Law: Percentile Rank" ~ "rule_of_law_orig_pct", 
      indicator == "Political Stability and Absence of Violence/Terrorism: Percentile Rank" ~ "pol_stab_orig_pct", 
      TRUE ~ indicator
    )
  ) %>% 
  pivot_wider(
    names_from = "indicator", 
    values_from = "2022 [YR2022]"
  ) %>% 
  mutate(year = 2022, 
         rule_of_law_MSCI = as.numeric(rule_of_law_MSCI), 
         pol_stab_WGI_MSCI = as.numeric(pol_stab_WGI_MSCI), 
         rule_of_law_orig_pct = as.numeric(rule_of_law_orig_pct), 
         pol_stab_orig_pct = as.numeric(pol_stab_orig_pct)) %>% 
  select(country_name, iso3c, year, rule_of_law_MSCI, pol_stab_WGI_MSCI, rule_of_law_orig_pct, pol_stab_orig_pct)


WGI_3b <- WGI_2b %>% 
  rank_pct(rule_of_law_MSCI) %>% 
  rank_pct(pol_stab_WGI_MSCI)

 WGI_3b %>% summarize(corr = cor(rule_of_law_MSCI_pct_rank, rule_of_law_orig_pct), 
                      corr_2 = cor(pol_stab_orig_pct, pol_stab_WGI_MSCI_pct_rank))

# correlations of 0.9999971 and 0.9999989   
 identical(
WGI_3b %>% arrange(rule_of_law_MSCI_pct_rank),
WGI_3b %>% arrange(rule_of_law_orig_pct))
 
 identical(
WGI_3b %>% arrange(pol_stab_orig_pct),
WGI_3b %>% arrange(pol_stab_WGI_MSCI_pct_rank))
 
 # both ranking methods ordered the countries in the same order
 # unfortunately, testing the cutoffs (for example 10%), the original WGI percentiles each would exclude one more country than based on my own calculation. It is unclear which data an asset manager would use, whether they would prefer consistency across data sources and thus use their own calculation or whether they would use the percentiles given by WGI for WGI data. I will prefer consistency, also given the fact that it is not clear how the different datasources for political stability would be used and it seems unlikely that within one indicator tow different methods of ranking would be used. 
 
 
 identical(
WGI_3b %>% filter(pol_stab_orig_pct <= 10),
WGI_3b %>% filter(pol_stab_WGI_MSCI_pct_rank <= 10))
 
  identical(
WGI_3b %>% filter(rule_of_law_orig_pct <= 10),
WGI_3b %>% filter(rule_of_law_MSCI_pct_rank <= 10))

World Development Indicators

The World Development Indicators are the most comprehensive dataset capturing income inequality through a Gini-coefficient. MSCI uses them for this indicator as well together with data from the CIA factbook. However, sources for the CIA factbook are unclear and it is unclear, whether World Bank and CIA use the same definition of income (household vs individual, after transfers/before transfers, including pensions or not etc.). I will thus not mix them but rather just take the World Bank data which is better documented.

Code

wbg_income_ineq <- wb_data("SI.POV.GINI")

wbg_inc_ineq_1 <- wbg_income_ineq %>% 
  select(iso3c, country, date, 'SI.POV.GINI' ) %>% 
  rename(mrnev_year_inc_ineq = date, 
         inc_ineq_MSCI_B = 'SI.POV.GINI') %>% 
  mutate(country_name= iso3c_to_country_name(iso3c), 
         country_name = ifelse(iso3c == "XKX", "Kosovo", country_name), 
          country_name = ifelse(iso3c == "FSM", "Micronesia", country_name)) %>% 
  select(!country) %>% 
 filter(!is.na(inc_ineq_MSCI_B)) %>% 
  group_by(country_name) %>% 
  filter(mrnev_year_inc_ineq == max(mrnev_year_inc_ineq)) %>% 
  ungroup()

#data for 2022 is limited. Assuming that MSCI takes the most recent data. 

wbg_inc_ineq_2 <- wbg_inc_ineq_1 %>% 
  rank_pct(-inc_ineq_MSCI_B) 
# this is a strong assumption but one that is necessary for the exclusion logic to work: less income inequality is "better"

wbg_inc_ineq_clean <- wbg_inc_ineq_2 
write_csv(wbg_inc_ineq_clean, "./processed_01/wbg_inc_ineq_clean.csv")

Debt to GDP Data

Sourced from World Bank via API.

Code

wbg_debt_to_gdp <- wb_data("GC.DOD.TOTL.GD.ZS")

wbg_debt_to_gdp_1 <- wbg_debt_to_gdp %>% 
  select(iso3c, country, date, 'GC.DOD.TOTL.GD.ZS' ) %>% 
  rename(mrnev_year_debt_to_gdp = date, 
         debt_to_gdp = 'GC.DOD.TOTL.GD.ZS') %>% 
 filter(!is.na(debt_to_gdp)) %>% 
  group_by(country) %>% 
  filter(mrnev_year_debt_to_gdp == max(mrnev_year_debt_to_gdp)) %>% 
  ungroup()
 

wbg_debt_to_gdp_2 <- wbg_debt_to_gdp_1 %>% 
  rank_pct(-debt_to_gdp) 

wbg_debt_to_gdp_clean <- wbg_debt_to_gdp_2 
write_csv(wbg_debt_to_gdp_clean, "./processed_01/wbg_debt_to_gdp_clean.csv")

SDR data

data from Sustainable Development Report, complementary to Sustainable Development Doals report of UN. https://dashboards.sdgindex.org/static/downloads/files/SDR2023-data.xlsx; formal citation: Sachs, J.D., Lafortune, G., Fuller, G., Drumm, E. (2023). Implementing the SDG Stimulus. Sustainable Development Report 2023. Paris: SDSN, Dublin: Dublin University Press, 2023. 10.25546/102924

Code

SDR2023_data <- read_excel("SDR2023-data.xlsx", sheet = 6)

SDR_1 <- SDR2023_data %>% 
  filter(year == 2022) %>% 
  mutate(iso3c = country_name_regex_to_iso3c(Country), 
         iso3c = ifelse(Country == "Türkiye", "TUR", iso3c),
    country_name = iso3c_to_country_name(iso3c)) %>% 
  select( iso3c, country_name, `SDG Index Score`) %>% 
  filter(!is.na(iso3c)) # only regions were not matched 

SDR_clean <- SDR_1
write_csv(SDR_clean, "./processed_01/SDR_clean.csv")

GNI per captia data (World Bank)

sourced via WBG API

Code

wbg_gni_pc <- wb_data("NY.GNP.PCAP.CD")

wbg_gni_pc_1 <- wbg_gni_pc %>% 
  select(iso3c, country, date, 'NY.GNP.PCAP.CD' ) %>% 
  rename(mrnev_year_gni_pc = date, 
         GNI_pc_MSCI_B = 'NY.GNP.PCAP.CD') %>% 
  mutate(country_name= iso3c_to_country_name(iso3c), 
         country_name = ifelse(iso3c == "XKX", "Kosovo", country_name), 
          country_name = ifelse(iso3c == "FSM", "Micronesia", country_name)) %>% 
  select(!country) %>% 
 filter(!is.na(GNI_pc_MSCI_B)) %>% 
  group_by(country_name) %>% 
  filter(mrnev_year_gni_pc == max(mrnev_year_gni_pc)) %>% 
  ungroup()

wbg_gni_pc_2 <- wbg_gni_pc_1 %>% 
  rank_pct(GNI_pc_MSCI_B)

wbg_gni_pc_clean <- wbg_gni_pc_2
write_csv(wbg_gni_pc_clean, "./processed_01/wbg_gni_pc_clean.csv")

Debt to GDP

data from 2023 retrieved from https://www.imf.org/external/datamapper/GGXWDG_NGDP@WEO/OEMDC/ADVEC/WEOWORLD?year=2023

Code

imf_debt_to_gdp <- read_excel("imf-debt_to_gdp.xls", na = c("n/a", "NA", "no data", ""))

imf_debt_to_gdp_1 <- imf_debt_to_gdp %>% 
  janitor::clean_names()

 colnames(imf_debt_to_gdp_1) <- make.names(colnames(imf_debt_to_gdp_1))
 
imf_debt_to_gdp_2 <- imf_debt_to_gdp_1[2:195, ] %>% 
select(general_government_gross_debt_percent_of_gdp, x2023) %>% 
  rename(country = general_government_gross_debt_percent_of_gdp, 
         gov_debt_to_gdp = x2023) %>% 
  mutate(iso3c = country_name_regex_to_iso3c(`country`), 
      iso3c = case_when(
    country == "Micronesia" ~ "FSM", 
    country == "Türkiye, Republic of" ~ "TUR", 
    country == "Kosovo" ~ "XKX", 
    TRUE ~ iso3c)) %>% 
  mutate(country_name = iso3c_to_country_name(`iso3c`),
        country_name = ifelse(iso3c == "XKX", "Kosovo", country_name), 
        country_name = ifelse(iso3c == "FSM", "Micronesia", country_name), 
        gov_debt_to_gdp = as.numeric(gov_debt_to_gdp)) %>% 
  select(country_name, iso3c, gov_debt_to_gdp)
  
imf_debt_to_gdp_3 <- imf_debt_to_gdp_2 %>% 
  filter(!is.na(gov_debt_to_gdp)) %>% 
  rank_pct(gov_debt_to_gdp, rank_by_lowest = FALSE)
  # less debt per gdp is better
  
imf_debt_to_gdp_clean <- imf_debt_to_gdp_3 
write_csv(imf_debt_to_gdp_clean, "./processed_01/imf_debt_to_gdp_clean.csv")

ND GAIN

vulnerability score: assesses how vulnerable country is to climate change (exposure, sensitivity, adaptive capacity) -> lower is better (0-1) readiness score: economic, governance, social readiness to leverage investments and convert them to adaptation actions -> higher is better (0-1) GAIN score: (readiness score - vulnerability score +1)*50 -> higher is better data from 2021 https://gain.nd.edu/our-work/country-index/methodology/

Code

readiness <- read_csv("readiness.csv")
vulnerability <- read_csv("vulnerability.csv")
gain <- read_csv("gain.csv")

readiness_1 <-  readiness %>% 
  select(ISO3, Name, '2021') %>% 
    rename(gain_readiness = '2021', 
         iso3c = ISO3) %>% 
  mutate(country_name = iso3c_to_country_name(iso3c), 
         country_name = ifelse(iso3c == "FSM", "Micronesia", country_name)) %>% 
  select(country_name, iso3c, gain_readiness)

readiness_2 <- readiness_1 %>% rank_pct(gain_readiness)
readiness_clean <- readiness_2

write_csv(readiness_clean, "./processed_01/readiness_clean.csv")

Code

vulnerability_1 <-  vulnerability %>% 
  select(ISO3, Name, '2021') %>% 
    rename(gain_vulnerability = '2021', 
         iso3c = ISO3) %>% 
  mutate(country_name = iso3c_to_country_name(iso3c), 
         country_name = ifelse(iso3c == "FSM", "Micronesia", country_name)) %>% 
  select(country_name, iso3c, gain_vulnerability)

vulnerability_2 <- vulnerability_1 %>% 
  filter(! is.na(gain_vulnerability)) %>% 
  rank_pct(gain_vulnerability, rank_by_lowest = FALSE) 

vulnerability_clean <- vulnerability_2

write_csv(vulnerability_clean, "./processed_01/vulnerability_clean.csv")

Code

gain_1 <-  gain %>% 
  select(ISO3, Name, '2021') %>% 
    rename(gain_index = '2021', 
         iso3c = ISO3) %>% 
  mutate(country_name = iso3c_to_country_name(iso3c), 
         country_name = ifelse(iso3c == "FSM", "Micronesia", country_name)) %>% 
  select(country_name, iso3c, gain_index)

gain_2 <- gain_1 %>% 
  filter(! is.na(gain_index)) %>% 
  rank_pct(gain_index) 

gain_index_clean <- gain_2

write_csv(gain_index_clean, "./processed_01/gain_index_clean.csv")

Code

all_gain_clean <- full_join(readiness_clean, vulnerability_clean, by = c("iso3c", "country_name")) %>% 
  full_join(gain_index_clean, by = c("iso3c", "country_name"))

write_csv(all_gain_clean, "./processed_01/all_gain_clean.csv")

Classification by WBG

downloaded from https://datahelpdesk.worldbank.org/knowledgebase/articles/906519

Code

wbg_class <- read_excel("WBG_classification.xlsx")

wbg_class_1 <- wbg_class[1:218, ] %>% 
  mutate(country_name = iso3c_to_country_name(Code), 
         country_name = ifelse(Code == "CHI", "Channel Islands", country_name), 
          country_name = ifelse(Code == "XKX", "Kosovo", country_name), 
          country_name = ifelse(Code == "FSM", "Micronesia", country_name) 
         ) %>% 
  rename(iso3c = Code, 
         income_group = 'Income group', 
         region = Region
         ) %>% 
  select(! "Lending category" & !"Economy")

wbg_class_clean <- wbg_class_1
write_csv(wbg_class_clean, "./processed_01/wbg_class_clean.csv")

EMBI vs ESEB

These are ETFs based on the conventional and the ESG index for Emerging Markets by JPMorgan. I sources this data via a Bloomberg Terminal at Johns Hopkins.

Code

EMBI <- read_excel("EMBI.xlsx") 

embi_2 <- EMBI %>% 
  janitor::clean_names()

colnames(embi_2) <- make.names(colnames(embi_2))

embi_3 <- embi_2 %>% 
  filter(percent_weight > 0) %>% 
  mutate(country_name = countryname(member_companies), 
         country_name = case_when(
           member_companies == "ABU DHABI GOVT" ~ "United Arab Emirates", 
          #  member_companies = "BANK GOSPODARSTW" ~ "Poland", #state-owned bank
            member_companies == "CBB INTL SUK WLL" ~ "Bahrain",
            member_companies == "CBB INTL SUKUK" ~ "Bahrain",
           # member_companies = "CODELCO INC" ~ "Chile", # state-owned company
           member_companies == "DUBAI GOVT INT'L" ~ "United Arab Emirates", 
           member_companies == "FIN DEPT SHARJAH" ~ "United Arab Emirates", 
            member_companies == "LEBANESE REP" ~ "Lebanon", 
            member_companies == "REPUBLIC OF AZER" ~ "Azerbaidjan",
           member_companies == "REPUBLIC OF ECUA" ~ "Ecuador",
            member_companies == "REPUBLIC OF GABO" ~ "Gabon",
           member_companies == "REPUBLIC OF RWAN" ~ "Rwanda",
           member_companies == "SAUDI INT BOND" ~ "Saudi Arabia",
           member_companies == "TURKIYE REP OF" ~ "Turkey",
           member_companies == "UAE INTL BOND" ~ "United Arab Emirates",
          TRUE ~ country_name
         )) %>% 
  filter(!is.na(country_name)) 

countries_embi <- unique(embi_3$country_name) #57 countries
  
ESEB <- read_excel("ESEB.xlsx")

eseb_2 <- ESEB %>% 
  janitor::clean_names()

colnames(eseb_2) <- make.names(colnames(eseb_2))

eseb_3 <- eseb_2 %>% 
  filter(percent_weight > 0) %>% 
  mutate(country_name = countryname(member_companies), 
         country_name = case_when(
           member_companies == "ABU DHABI GOVT" ~ "United Arab Emirates", 
        #  member_companies = "BANK GOSPODARSTW" ~ "Poland", #state-owned bank
        #    member_companies == "CBB INTL SUK WLL" ~ "Bahrain",
        #    member_companies == "CBB INTL SUKUK" ~ "Bahrain",
        #   member_companies = "CODELCO INC" ~ "Chile", # state-owned company
        #   member_companies == "DUBAI GOVT INT'L" ~ "United Arab Emirates", 
        #   member_companies == "FIN DEPT SHARJAH" ~ "United Arab Emirates", 
        #  member_companies == "LEBANESE REP" ~ "Lebanon", 
        #  member_companies == "REPUBLIC OF AZER" ~ "Azerbaidjan",
           member_companies == "REPUBLIC OF ECUA" ~ "Ecuador",
            member_companies == "REPUBLIC OF GABO" ~ "Gabon",
        # member_companies == "REPUBLIC OF RWAN" ~ "Rwanda",
           member_companies == "SAUDI INT BOND" ~ "Saudi Arabia",
           member_companies == "TURKIYE REP OF" ~ "Turkey",
        member_companies == "PNG GOVT INTL BO" ~ "Papua New Guinea",
          TRUE ~ country_name
         )) %>% 
  filter(!is.na(country_name)) 

countries_eseb <- unique(eseb_3$country_name) #42 countries

common_countries <- intersect(countries_embi, countries_eseb)
unique_to_embi <- setdiff(countries_embi, countries_eseb)
unique_to_eseb <- setdiff(countries_eseb, countries_embi)
common_countries
unique_to_embi
unique_to_eseb 
unique_to_embi_df <- data.frame(countries = setdiff(countries_embi, countries_eseb))
write.csv(unique_to_embi, "./processed_01/unique_to_embi.csv")