Using corruption data for ESG investing

Transparency International Corruption Perception Index

This script automatically loads the Curruption Perception Index data published by Transparency International (https://www.transparency.org/en/cpi/2019/index/nzl). The dataset is published as pdf and the script transforms it into an easily usable data.frame. Beyond that it creates an interactive, customizable worldmap similar to the one published by Transparency International on its website. The map can be integrated in websites or exported as high resolution scalable vector graphic (SVG) using Orca (https://github.com/plotly/orca).

#Load Data Only
corruption_perception_index_data<-function(file_path="https://images.transparencycdn.org/images/Consolidated_Data_CPI_2012-2019.pdf")
{
  
library(pdftools)
library(stringr)
library(plotly)
library(dplyr)
library(stringi)
library(ecm)
library(countrycode)

tx <- pdf_text(file_path)
tx2 <- unlist(str_split(tx, "[\\r\\n]+"))


txt<-as.data.frame(tx2,stringsAsFactors = F)



txt$Standard_Error<-stri_extract_last_regex(txt$tx2, "\\S+")
txt<-txt %>% mutate(tmp = str_replace(txt$tx2, "\\s[^ ]+$", ""))
txt$tmp<-trimws(txt$tmp, which = c("right"))
txt$Sources<-stri_extract_last_regex(txt$tmp, "\\S+")
txt<-txt %>% mutate(tmp = str_replace(txt$tmp, "\\s[^ ]+$", ""))
txt$tmp<-trimws(txt$tmp, which = c("right"))
txt$Rank<-stri_extract_last_regex(txt$tmp, "\\S+")
txt<-txt %>% mutate(tmp = str_replace(txt$tmp, "\\s[^ ]+$", ""))
txt$tmp<-trimws(txt$tmp, which = c("right"))
txt$CPI_Score<-stri_extract_last_regex(txt$tmp, "\\S+")
txt<-txt %>% mutate(tmp = str_replace(txt$tmp, "\\s[^ ]+$", ""))
txt$tmp<-trimws(txt$tmp, which = c("right"))
txt$Year<-stri_extract_last_regex(txt$tmp, "\\S+")
txt<-txt %>% mutate(tmp = str_replace(txt$tmp, "\\s[^ ]+$", ""))
txt$tmp<-trimws(txt$tmp, which = c("right"))
txt<-txt %>% mutate(tmp = str_replace(txt$tmp, "\\s[^ ]+$", ""))
txt$tmp<-trimws(txt$tmp, which = c("right"))
txt$ISO<-substr(txt$tmp,nchar(txt$tmp)-2,nchar(txt$tmp))
txt$upper<-grepl("^[[:upper:]]+$", txt$ISO)

txt$ISO1<-gsub(" ","",txt$tx2)
txt$ISO<-ifelse(txt$upper==FALSE,lagpad(txt$ISO1,k=1),txt$ISO)

options(warn=-1)
txt$Year<-as.numeric(txt$Year)
txt<-txt[!is.na(txt$Year),]
txt$CPI_Score<-as.numeric(txt$CPI_Score)

txt$ISO<-ifelse(nchar(txt$ISO)>3,substr(txt$tmp,nchar(txt$tmp)-2,nchar(txt$tmp)),txt$ISO)

txt$Country<-countrycode(txt$ISO, origin = 'iso3c', destination = 'country.name')
options(warn=0)

txt<-txt[,c("Country","ISO","Year","CPI_Score","Rank","Sources","Standard_Error")]


#Individual Error Adjustment
txt$Country<-ifelse(txt$ISO=="ral" & is.na(txt$Country),"Central African Republic",txt$Country)
txt$Country<-ifelse(txt$ISO=="SSA" & is.na(txt$Country),"Democratic Republic of Congo",txt$Country)
txt$ISO<-ifelse(txt$Country=="Central African Republic","CAF",txt$ISO)
txt$ISO<-ifelse(txt$Country=="Democratic Republic of Congo","COD",txt$ISO)
return(txt)
}


index_data<-corruption_perception_index_data()
head(index_data,5)

##       Country ISO Year CPI_Score Rank Sources Standard_Error
## 2 New Zealand NZL 2019        87    1       8           2.29
## 3 New Zealand NZL 2018        87    2       8           2.44
## 4 New Zealand NZL 2017        89    1       8           2.40
## 5 New Zealand NZL 2016        90    1       7           2.56
## 6 New Zealand NZL 2015        91    1       7           2.32

Creating a nice worldmap with Plotly

Below code loads the same dataset and creates a nice, interactive worldmap based on it.

#Load Data from Transparency International and Create Map
#--------------------------------------------------------------------------------------------------------------------------------------------
corruption_perception_index_map<-function(flex_height=400,sel_year="last",file_path="https://images.transparencycdn.org/images/Consolidated_Data_CPI_2012-2019.pdf")
{
   options(warn=-1)
   
  library(pdftools)
  library(stringr)
  library(plotly)
  library(dplyr)
  library(stringi)
  library(countrycode)
  library(ecm)
  
  tx <- pdf_text(file_path)
  tx2 <- unlist(str_split(tx, "[\\r\\n]+"))
  
  
  txt<-as.data.frame(tx2,stringsAsFactors = F)
  
  
  
  txt$Standard_Error<-stri_extract_last_regex(txt$tx2, "\\S+")
  txt<-txt %>% mutate(tmp = str_replace(txt$tx2, "\\s[^ ]+$", ""))
  txt$tmp<-trimws(txt$tmp, which = c("right"))
  txt$Sources<-stri_extract_last_regex(txt$tmp, "\\S+")
  txt<-txt %>% mutate(tmp = str_replace(txt$tmp, "\\s[^ ]+$", ""))
  txt$tmp<-trimws(txt$tmp, which = c("right"))
  txt$Rank<-stri_extract_last_regex(txt$tmp, "\\S+")
  txt<-txt %>% mutate(tmp = str_replace(txt$tmp, "\\s[^ ]+$", ""))
  txt$tmp<-trimws(txt$tmp, which = c("right"))
  txt$CPI_Score<-stri_extract_last_regex(txt$tmp, "\\S+")
  txt<-txt %>% mutate(tmp = str_replace(txt$tmp, "\\s[^ ]+$", ""))
  txt$tmp<-trimws(txt$tmp, which = c("right"))
  txt$Year<-stri_extract_last_regex(txt$tmp, "\\S+")
  txt<-txt %>% mutate(tmp = str_replace(txt$tmp, "\\s[^ ]+$", ""))
  txt$tmp<-trimws(txt$tmp, which = c("right"))
  txt<-txt %>% mutate(tmp = str_replace(txt$tmp, "\\s[^ ]+$", ""))
  txt$tmp<-trimws(txt$tmp, which = c("right"))
  txt$ISO<-substr(txt$tmp,nchar(txt$tmp)-2,nchar(txt$tmp))
  txt$upper<-grepl("^[[:upper:]]+$", txt$ISO)
  
  txt$ISO1<-gsub(" ","",txt$tx2)
  txt$ISO<-ifelse(txt$upper==FALSE,lagpad(txt$ISO1,k=1),txt$ISO)
  

  txt$Year<-as.numeric(txt$Year)
  txt<-txt[!is.na(txt$Year),]
  txt$CPI_Score<-as.numeric(txt$CPI_Score)
  
  txt$ISO<-ifelse(nchar(txt$ISO)>3,substr(txt$tmp,nchar(txt$tmp)-2,nchar(txt$tmp)),txt$ISO)
  
  txt$Country<-countrycode(txt$ISO, origin = 'iso3c', destination = 'country.name')

  
  txt<-txt[,c("Country","ISO","Year","CPI_Score","Rank","Sources","Standard_Error")]
  
  
  #Individual Error Adjustment
  txt$Country<-ifelse(txt$ISO=="ral" & is.na(txt$Country),"Central African Republic",txt$Country)
  txt$Country<-ifelse(txt$ISO=="SSA" & is.na(txt$Country),"Democratic Republic of Congo",txt$Country)
  txt$ISO<-ifelse(txt$Country=="Central African Republic","CAF",txt$ISO)
  txt$ISO<-ifelse(txt$Country=="Democratic Republic of Congo","COD",txt$ISO)
  
  if(sel_year=="last")
  {
    sel_year<-max(txt$Year)
  }
  
  txt_sel<-txt[txt$Year==sel_year,]  
  
  txt_sel<-txt_sel %>% mutate_at(vars(Country),  function(x){gsub('[^ -~]', '', x)})
  
  
  # light grey boundaries
  l <- list(color = toRGB("#04103b"), width = 0.2)
  
  # specify map projection/options
  g <- list(
    showframe = FALSE,
    showcoastlines = FALSE,
    projection = list(type = 'Mercator')
  )
  
  
  
  p <- plot_geo(txt_sel,height=flex_height) %>%
    add_trace(
      z = ~CPI_Score, color = ~CPI_Score, colors = c("#dd0400","#9da8b8"),
      text = ~Country , locations = ~ISO , marker = list(line = l)
      
    ) %>%
    colorbar(title = 'MSCI Sanctions List', tickprefix = '$') %>%
    layout(
      title = '',
      geo = g,
      showlegend=F
    )
  p<-hide_colorbar(p)
  
  options(warn=0)
  
  return(p)
  
}


cpi_map<-corruption_perception_index_map()
cpi_map

#orca(cpi_map, "cpi_map.svg")

Using corruption data for ESG investing

Fabian Scheler

22/01/2021

Transparency International Corruption Perception Index

Creating a nice worldmap with Plotly