library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.1 ✔ purrr 1.0.1
## ✔ tibble 3.1.8 ✔ dplyr 1.1.0
## ✔ tidyr 1.3.0 ✔ stringr 1.5.0
## ✔ readr 2.1.2 ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(dplyr)
library(ggplot2)
Data on natural resources from ten post Soviet countries (Armenia, Russia, Kazakhstan, Azerbaijan, Ukraine, Georgia, Turkmenistan, Uzbekistan, Belarus and Tajikistan) was gathered from the homepage of the World Bank under: https://databank.worldbank.org/home.aspx
In order to compare the natural richness among these ten countries the indicator “Total natural resources rent (% of GPD) was chosen. Total natural resources rents (% of GDP) represent the sum of oil, gas, coal, mineral, and forest rents, expressed as a percentage of a country’s Gross Domestic Product (GDP). Rents are the difference between the price of a resource and its total cost of production, reflecting the resource’s scarcity and economic value.
ESG data was derived from the Sovereign ESG Data Portal provided by the World Bank under: https://esgdata.worldbank.org/data/download?lang=en
In this dataset, the ESG framework of the World Bank is recorded, which is split into the three pillars Environment (32 indicators), Social (22 indicators) and Governance (18 indicators). In order to perform the analysis, two indicators from each pillar were selected.
The data on natural resources rents was pre-processed on the World Bank data bank homepage, where the ten post Soviet countries as well as a time-frame of the last 25 years was selected (1997-2021). In the first step after reading in the data, the meta data section at the end of the .csv file is removed, as well unnecessary variables like Country.Code and Series.Code removed. Not all years have data recorded, so blank spaces (represented by “..” are replaced with NA values) and the numeric values are mutated from character strings to numeric strings.
In the next step, the dataset is pivoted from wide to long in order to later merge it with the ESG data and to allow for easier visualisation later on. In the last step, the newly created “Year” column is adjusted so that the cells only represent the years instead of “X1997..X1997” etc..
data_natural = read.csv("51845f5f-f8f3-4b62-af84-ff674e7fa13c_Series - Metadata.csv")
data_natural = data_natural[1:20,] # exclude meta data at the end of csv file
#
tibble(data_natural)
data_natural = data_natural %>%
select(- Country.Code, - Series.Code) %>%
filter(Series.Name == "Total natural resources rents (% of GDP)")
data_natural[data_natural == ".."] <- NA
data_natural = data_natural %>%
mutate_at(c(3:27), as.numeric)
data_natural = data_natural %>%
select(- Series.Name) %>%
pivot_longer(!Country.Name, names_to = "Year", values_to = "Value") %>%
mutate(Variable = "Total natural resources rents (% of GDP)")
# Rename Year Variable
data_natural = data_natural %>%
mutate(Year = substr(data_natural$Year, start = 10, stop = 13))
The ESG data has to be transformed in the same way as the natural resources data. Since the ESG data bank does not allow for pre-selection, the whole dataset is read into the R Environment and the first three steps including filtering the countries by their ISO3 code, selecting the years 1997-2020 (there are no entries for 2021 in the natural resources data), as well as selecting the required indicators, which can be identified using the accompanying framework datasheet.
In the next step, the ISO3 codes are renamed to their respective country names, as well as the indicator to their full description. Than the numeric values are gain mutated from character values to numeric and the data is pivoted from wide to long. In the newly created Year column, the X is removed.
Before merging the two data sets via rbind (pasting one data frame below the other), the variables in the natural resources data are reorder so that all columns are in the same order. The final data set is than created and ready for analysis.
data_esg = read.csv("sovereignesg-data_2022-12-12.csv")
# filter countries and years
## countries
data_esg = data_esg %>%
filter(iso3 %in% c("ARM",
"RUS",
"KAZ",
"AZE",
"UKR",
"GEO",
"TKM",
"UZB",
"BLR",
"TJK"))
# Year
data_esg = data_esg %>%
select(iso3, ind,
X1997, X1998, X1999,
X2000, X2001, X2002,
X2003, X2004, X2005,
X2006, X2007, X2008,
X2009, X2010, X2011,
X2012, X2013, X2014,
X2015, X2016, X2017,
X2018, X2019, X2020)
# select ESG variables
data_esg =data_esg %>%
filter(ind %in% c("EN.ATM.CO2E.PC", "EG.ELC.RNEW.ZS",
"SP.DYN.LE00.IN", "SI.POV.NAHC",
"NY.GDP.MKTP.KD.ZG", "CC.EST")) %>%
rename(Country.Name = iso3)
# rename iso3 codes to country names
data_esg[data_esg == "ARM"] <- "Armenia"
data_esg[data_esg == "RUS"] <- "Russian Federation"
data_esg[data_esg == "KAZ"] <- "Kazakhstan"
data_esg[data_esg == "AZE"] <- "Azerbaijan"
data_esg[data_esg == "UKR"] <- "Ukraine"
data_esg[data_esg == "GEO"] <- "Georgia"
data_esg[data_esg == "TKM"] <- "Turkmenistan"
data_esg[data_esg == "UZB"] <- "Uzbekistan"
data_esg[data_esg == "BLR"] <- "Belarus"
data_esg[data_esg == "TJK"] <- "Tajikistan"
# rename indicators
data_esg[data_esg == "EN.ATM.CO2E.PC"] <- "CO2 emissions (metric tons per capita)"
data_esg[data_esg == "EG.ELC.RNEW.ZS"] <- "Renewable electricity output (% of total electricity output)"
data_esg[data_esg == "SP.DYN.LE00.IN"] <- "Life expectancy at birth, total (years)"
data_esg[data_esg == "SI.POV.NAHC"] <- "Poverty headcount ratio at national poverty lines (% of population)"
data_esg[data_esg == "NY.GDP.MKTP.KD.ZG"] <- "GDP growth (annual %)"
data_esg[data_esg == "CC.EST"] <- "Control of Corruption: Estimate"
# transform values into numeric
data_esg = data_esg %>%
mutate_at(c(3:26), as.numeric) %>%
rename(Variable = ind)
# pivot long to wide
data_esg = data_esg %>%
pivot_longer(cols = X1997:X2020, names_to = "Year", values_to = "Value")
# remove X from Year column
data_esg = data_esg %>%
mutate(Year = substr(data_esg$Year, start = 2, stop = 5))
# Reorder the columns using dplyr's select() function
data_natural <- data_natural %>%
select(Country.Name, Variable, Year, Value)
# merge together with rbind
data_final = rbind(data_esg, data_natural)
# write to csv
write.csv(data_final, "data_final.csv", row.names=FALSE)