This script automatically loads the Curruption Perception Index data published by Transparency International (https://www.transparency.org/en/cpi/2019/index/nzl). The dataset is published as pdf and the script transforms it into an easily usable data.frame. Beyond that it creates an interactive, customizable worldmap similar to the one published by Transparency International on its website. The map can be integrated in websites or exported as high resolution scalable vector graphic (SVG) using Orca (https://github.com/plotly/orca).
#Load Data Only
corruption_perception_index_data<-function(file_path="https://images.transparencycdn.org/images/Consolidated_Data_CPI_2012-2019.pdf")
{
library(pdftools)
library(stringr)
library(plotly)
library(dplyr)
library(stringi)
library(ecm)
library(countrycode)
tx <- pdf_text(file_path)
tx2 <- unlist(str_split(tx, "[\\r\\n]+"))
txt<-as.data.frame(tx2,stringsAsFactors = F)
txt$Standard_Error<-stri_extract_last_regex(txt$tx2, "\\S+")
txt<-txt %>% mutate(tmp = str_replace(txt$tx2, "\\s[^ ]+$", ""))
txt$tmp<-trimws(txt$tmp, which = c("right"))
txt$Sources<-stri_extract_last_regex(txt$tmp, "\\S+")
txt<-txt %>% mutate(tmp = str_replace(txt$tmp, "\\s[^ ]+$", ""))
txt$tmp<-trimws(txt$tmp, which = c("right"))
txt$Rank<-stri_extract_last_regex(txt$tmp, "\\S+")
txt<-txt %>% mutate(tmp = str_replace(txt$tmp, "\\s[^ ]+$", ""))
txt$tmp<-trimws(txt$tmp, which = c("right"))
txt$CPI_Score<-stri_extract_last_regex(txt$tmp, "\\S+")
txt<-txt %>% mutate(tmp = str_replace(txt$tmp, "\\s[^ ]+$", ""))
txt$tmp<-trimws(txt$tmp, which = c("right"))
txt$Year<-stri_extract_last_regex(txt$tmp, "\\S+")
txt<-txt %>% mutate(tmp = str_replace(txt$tmp, "\\s[^ ]+$", ""))
txt$tmp<-trimws(txt$tmp, which = c("right"))
txt<-txt %>% mutate(tmp = str_replace(txt$tmp, "\\s[^ ]+$", ""))
txt$tmp<-trimws(txt$tmp, which = c("right"))
txt$ISO<-substr(txt$tmp,nchar(txt$tmp)-2,nchar(txt$tmp))
txt$upper<-grepl("^[[:upper:]]+$", txt$ISO)
txt$ISO1<-gsub(" ","",txt$tx2)
txt$ISO<-ifelse(txt$upper==FALSE,lagpad(txt$ISO1,k=1),txt$ISO)
options(warn=-1)
txt$Year<-as.numeric(txt$Year)
txt<-txt[!is.na(txt$Year),]
txt$CPI_Score<-as.numeric(txt$CPI_Score)
txt$ISO<-ifelse(nchar(txt$ISO)>3,substr(txt$tmp,nchar(txt$tmp)-2,nchar(txt$tmp)),txt$ISO)
txt$Country<-countrycode(txt$ISO, origin = 'iso3c', destination = 'country.name')
options(warn=0)
txt<-txt[,c("Country","ISO","Year","CPI_Score","Rank","Sources","Standard_Error")]
#Individual Error Adjustment
txt$Country<-ifelse(txt$ISO=="ral" & is.na(txt$Country),"Central African Republic",txt$Country)
txt$Country<-ifelse(txt$ISO=="SSA" & is.na(txt$Country),"Democratic Republic of Congo",txt$Country)
txt$ISO<-ifelse(txt$Country=="Central African Republic","CAF",txt$ISO)
txt$ISO<-ifelse(txt$Country=="Democratic Republic of Congo","COD",txt$ISO)
return(txt)
}
index_data<-corruption_perception_index_data()
head(index_data,5)
## Country ISO Year CPI_Score Rank Sources Standard_Error
## 2 New Zealand NZL 2019 87 1 8 2.29
## 3 New Zealand NZL 2018 87 2 8 2.44
## 4 New Zealand NZL 2017 89 1 8 2.40
## 5 New Zealand NZL 2016 90 1 7 2.56
## 6 New Zealand NZL 2015 91 1 7 2.32
Below code loads the same dataset and creates a nice, interactive worldmap based on it.
#Load Data from Transparency International and Create Map
#--------------------------------------------------------------------------------------------------------------------------------------------
corruption_perception_index_map<-function(flex_height=400,sel_year="last",file_path="https://images.transparencycdn.org/images/Consolidated_Data_CPI_2012-2019.pdf")
{
options(warn=-1)
library(pdftools)
library(stringr)
library(plotly)
library(dplyr)
library(stringi)
library(countrycode)
library(ecm)
tx <- pdf_text(file_path)
tx2 <- unlist(str_split(tx, "[\\r\\n]+"))
txt<-as.data.frame(tx2,stringsAsFactors = F)
txt$Standard_Error<-stri_extract_last_regex(txt$tx2, "\\S+")
txt<-txt %>% mutate(tmp = str_replace(txt$tx2, "\\s[^ ]+$", ""))
txt$tmp<-trimws(txt$tmp, which = c("right"))
txt$Sources<-stri_extract_last_regex(txt$tmp, "\\S+")
txt<-txt %>% mutate(tmp = str_replace(txt$tmp, "\\s[^ ]+$", ""))
txt$tmp<-trimws(txt$tmp, which = c("right"))
txt$Rank<-stri_extract_last_regex(txt$tmp, "\\S+")
txt<-txt %>% mutate(tmp = str_replace(txt$tmp, "\\s[^ ]+$", ""))
txt$tmp<-trimws(txt$tmp, which = c("right"))
txt$CPI_Score<-stri_extract_last_regex(txt$tmp, "\\S+")
txt<-txt %>% mutate(tmp = str_replace(txt$tmp, "\\s[^ ]+$", ""))
txt$tmp<-trimws(txt$tmp, which = c("right"))
txt$Year<-stri_extract_last_regex(txt$tmp, "\\S+")
txt<-txt %>% mutate(tmp = str_replace(txt$tmp, "\\s[^ ]+$", ""))
txt$tmp<-trimws(txt$tmp, which = c("right"))
txt<-txt %>% mutate(tmp = str_replace(txt$tmp, "\\s[^ ]+$", ""))
txt$tmp<-trimws(txt$tmp, which = c("right"))
txt$ISO<-substr(txt$tmp,nchar(txt$tmp)-2,nchar(txt$tmp))
txt$upper<-grepl("^[[:upper:]]+$", txt$ISO)
txt$ISO1<-gsub(" ","",txt$tx2)
txt$ISO<-ifelse(txt$upper==FALSE,lagpad(txt$ISO1,k=1),txt$ISO)
txt$Year<-as.numeric(txt$Year)
txt<-txt[!is.na(txt$Year),]
txt$CPI_Score<-as.numeric(txt$CPI_Score)
txt$ISO<-ifelse(nchar(txt$ISO)>3,substr(txt$tmp,nchar(txt$tmp)-2,nchar(txt$tmp)),txt$ISO)
txt$Country<-countrycode(txt$ISO, origin = 'iso3c', destination = 'country.name')
txt<-txt[,c("Country","ISO","Year","CPI_Score","Rank","Sources","Standard_Error")]
#Individual Error Adjustment
txt$Country<-ifelse(txt$ISO=="ral" & is.na(txt$Country),"Central African Republic",txt$Country)
txt$Country<-ifelse(txt$ISO=="SSA" & is.na(txt$Country),"Democratic Republic of Congo",txt$Country)
txt$ISO<-ifelse(txt$Country=="Central African Republic","CAF",txt$ISO)
txt$ISO<-ifelse(txt$Country=="Democratic Republic of Congo","COD",txt$ISO)
if(sel_year=="last")
{
sel_year<-max(txt$Year)
}
txt_sel<-txt[txt$Year==sel_year,]
txt_sel<-txt_sel %>% mutate_at(vars(Country), function(x){gsub('[^ -~]', '', x)})
# light grey boundaries
l <- list(color = toRGB("#04103b"), width = 0.2)
# specify map projection/options
g <- list(
showframe = FALSE,
showcoastlines = FALSE,
projection = list(type = 'Mercator')
)
p <- plot_geo(txt_sel,height=flex_height) %>%
add_trace(
z = ~CPI_Score, color = ~CPI_Score, colors = c("#dd0400","#9da8b8"),
text = ~Country , locations = ~ISO , marker = list(line = l)
) %>%
colorbar(title = 'MSCI Sanctions List', tickprefix = '$') %>%
layout(
title = '',
geo = g,
showlegend=F
)
p<-hide_colorbar(p)
options(warn=0)
return(p)
}
cpi_map<-corruption_perception_index_map()
cpi_map
#orca(cpi_map, "cpi_map.svg")