Sustainable Finance Assignment 2

library(tidyverse)

-- Attaching packages --------------------------------------- tidyverse 1.3.1 --

v ggplot2 3.3.5     v purrr   0.3.4
v tibble  3.1.8     v dplyr   1.0.7
v tidyr   1.1.4     v stringr 1.4.0
v readr   2.1.1     v forcats 0.5.1

Warning: package 'tibble' was built under R version 4.1.3

-- Conflicts ------------------------------------------ tidyverse_conflicts() --
x dplyr::filter() masks stats::filter()
x dplyr::lag()    masks stats::lag()

library(janitor)

Warning: package 'janitor' was built under R version 4.1.3


Attaching package: 'janitor'

The following objects are masked from 'package:stats':

    chisq.test, fisher.test

library(here)

Warning: package 'here' was built under R version 4.1.3

here() starts at C:/Users/Ryan/Documents/Sustainable Finance Assignment 2

library(readxl)
library(tidyr)
library(dplyr)

library(rnaturalearth)

Warning: package 'rnaturalearth' was built under R version 4.1.3

library(countrycode)

Warning: package 'countrycode' was built under R version 4.1.3

library(wbstats)

Warning: package 'wbstats' was built under R version 4.1.3

Here, I read in data for domestic emissions from 1990 to 2018.

scope_1_domestic <- read_csv("~/Raw Data Finace Project 2/Scope 1 Emissions.csv")

Rows: 193 Columns: 31
-- Column specification --------------------------------------------------------
Delimiter: ","
chr  (3): Country/Region, unit, 1990
dbl (28): 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, ...

i Use `spec()` to retrieve the full column specification for this data.
i Specify the column types or set `show_col_types = FALSE` to quiet this message.

scope_1_domestic

# A tibble: 193 x 31
   `Country/Region` unit   `1990`  `1991` `1992` `1993` `1994` `1995` `1996`
   <chr>            <chr>  <chr>    <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>
 1 China            MtCO2e 2891.73  3039.  3168.  3398.  3557.  3961.  3982.
 2 United States    MtCO2e 5417.32  5372.  5456.  5568.  5662.  5730.  5901 
 3 India            MtCO2e 1002.56  1056.  1081.  1114.  1158.  1224.  1273.
 4 Indonesia        MtCO2e 1226.82  1246.  1267.  1282.  1303.  1339.  1164.
 5 Russia           MtCO2e 2648.36  2585.  2428.  2234.  1996.  1918.  1875.
 6 Brazil           MtCO2e 1638.68  1660.  1670.  1681.  1698.  1725.  1730.
 7 Japan            MtCO2e 1106.26  1122.  1135.  1128.  1186.  1201.  1216.
 8 Iran             MtCO2e 304.22    333.   354.   362.   395.   405.   420.
 9 Canada           MtCO2e 606.91    603.   617.   613.   631.   646.   668.
10 Saudi Arabia     MtCO2e 241.01    274.   292.   290.   289.   289.   304.
# ... with 183 more rows, and 22 more variables: `1997` <dbl>, `1998` <dbl>,
#   `1999` <dbl>, `2000` <dbl>, `2001` <dbl>, `2002` <dbl>, `2003` <dbl>,
#   `2004` <dbl>, `2005` <dbl>, `2006` <dbl>, `2007` <dbl>, `2008` <dbl>,
#   `2009` <dbl>, `2010` <dbl>, `2011` <dbl>, `2012` <dbl>, `2013` <dbl>,
#   `2014` <dbl>, `2015` <dbl>, `2016` <dbl>, `2017` <dbl>, `2018` <dbl>

I omit empty data entries because they have no value at this point.

scope_1_domestic <- scope_1_domestic %>% 
  na.omit()

I change data type so that emissions in the year 1990 are seen as numerical and not categorical. This will allow me to better sort the data later.

scope_1_domestic$"1990" <- as.numeric(as.character(scope_1_domestic$"1990"))  # Convert one variable to numeric

Warning: NAs introduced by coercion

I clean the data so that year is one column and domestic emissions are one column.

scope_1_domestic <- scope_1_domestic %>% 
  pivot_longer("1990":"2018",
    names_to = "year",
    values_to = "domestic_emissions"
  )

I rename the data so that it is standardized.

scope_1_domestic <- scope_1_domestic %>% 
  rename("country_name" = "Country/Region")

Create country code index fun tion.

country_name_regex_to_iso3c <- function(country_name) {
  country_name |>
    countrycode(origin = "country.name", 
                                     destination = "iso3c",
                                     origin_regex = TRUE)
}

Creaste iso3c index function.

iso3c_to_country_name <- function(iso3c) {
  iso3c |>
  countrycode(origin = "iso3c", destination = "country.name")
}

I add a standardized country name and ISO code for each country.

scope_1_domestic <- scope_1_domestic %>% 
  mutate(iso3c = country_name_regex_to_iso3c(country_name)) |>
  mutate(country_name = iso3c_to_country_name(iso3c))

Warning in countrycode_convert(sourcevar = sourcevar, origin = origin, destination = dest, : Some values were not matched unambiguously: Micronesia

Download data for export emissions for each country from 1995-2018.

scope_1_export <- read_excel("~/Raw Data Finace Project 2/Export Emissions!.xlsx", skip = 6)

I clean the data so that year is one column and export emissions are one column.

 scope_1_export <- scope_1_export %>% 
  pivot_longer("1995":"2018",
    names_to = "year",
    values_to = "export_emissions"
  )

Standardize column names,

scope_1_export <- scope_1_export %>% 
  rename("country_name" = "Country")

Standardize country names and add country code.

scope_1_export <- scope_1_export %>% 
  mutate(iso3c = country_name_regex_to_iso3c(country_name)) |>
  mutate(country_name = iso3c_to_country_name(iso3c))

Warning in countrycode_convert(sourcevar = sourcevar, origin = origin, destination = dest, : Some values were not matched unambiguously: APEC: Asia-Pacific Economic Cooperation, ASEAN: Association of South East Asian Nations, Data extracted on 12 Mar 2023 22:29 UTC (GMT) from OECD.Stat, EA19: Euro area (19 countries), EASIA: Eastern Asia, EU13: EU28 excluding EU15, EU15: European Union (15 countries), EU27_2020: European Union (27 countries), EU28: European Union (28 countries), G20: Group of Twenty, NONOECD: Non-OECD economies and aggregates, OECD: OECD member countries, ROW: Rest of the World, TUR: Türkiye, ZASI: East and Southeastern Asia, ZEUR: Europe, ZNAM: North America, ZOTH: Other regions, ZSCA: South and Central America

Delete empty data entries.

scope_1_export <- scope_1_export %>% 
  na.omit()

Join domestic emissions and export emissions so that I now have all scope 1 emissions data.

scope_1_and_export <- 
  full_join(scope_1_domestic, scope_1_export, by = c("year" = "year", "country_name" = "country_name", "iso3c" = "iso3c"))

Read in scope 2 emissions info from data base.

scope_2 <- read_excel("~/Raw Data Finace Project 2/Scope 2 Emissions.xlsx", skip = 6)

I clean the data so that year is one column and scope 2 emissions are one column.

scope_2 <- scope_2 %>% 
  pivot_longer("1995":"2018",
    names_to = "year",
    values_to = "scope_2_emissions"
  )

Standardize column and country names and add ISO index.

scope_2 <- scope_2 %>% 
  rename("country_name" = "Country")

scope_2 <- scope_2 %>% 
  mutate(iso3c = country_name_regex_to_iso3c(country_name)) |>
  mutate(country_name = iso3c_to_country_name(iso3c))

Warning in countrycode_convert(sourcevar = sourcevar, origin = origin, destination = dest, : Some values were not matched unambiguously: APEC: Asia-Pacific Economic Cooperation, ASEAN: Association of South East Asian Nations, Data extracted on 12 Mar 2023 22:34 UTC (GMT) from OECD.Stat, EA19: Euro area (19 countries), EASIA: Eastern Asia, EU13: EU28 excluding EU15, EU15: European Union (15 countries), EU27_2020: European Union (27 countries), EU28: European Union (28 countries), G20: Group of Twenty, NONOECD: Non-OECD economies and aggregates, OECD: OECD member countries, ROW: Rest of the World, TUR: Türkiye

Omit missing data

scope_2 <- scope_2 %>% 
  na.omit()

Download scope 3 emissionds data.

scope_3 <- read_excel("~/Raw Data Finace Project 2/Scope 3 Emissions.xlsx", skip = 6)

rearrange data so that year is in a column and scope 3 emissions are in a column.

scope_3 <- scope_3 %>% 
  pivot_longer("1995":"2018",
    names_to = "year",
    values_to = "scope_3_emissions"
  )

Standardize country names and column names and add ISO index.

scope_3 <- scope_3 %>% 
  rename("country_name" = "Country")

scope_3 <- scope_3 %>% 
  mutate(iso3c = country_name_regex_to_iso3c(country_name)) |>
  mutate(country_name = iso3c_to_country_name(iso3c))

Warning in countrycode_convert(sourcevar = sourcevar, origin = origin, destination = dest, : Some values were not matched unambiguously: APEC: Asia-Pacific Economic Cooperation, ASEAN: Association of South East Asian Nations, EA19: Euro area (19 countries), EASIA: Eastern Asia, EU13: EU28 excluding EU15, EU15: European Union (15 countries), EU27_2020: European Union (27 countries), EU28: European Union (28 countries), G20: Group of Twenty, NONOECD: Non-OECD economies and aggregates, OECD: OECD member countries, ROW: Rest of the World, TUR: Türkiye, ZASI: East and Southeastern Asia, ZEUR: Europe, ZNAM: North America, ZOTH: Other regions, ZSCA: South and Central America

Remove missing data.

scope_3 <- scope_3 %>% 
  na.omit()

combine scope 1 and scope 2 emissions data.

scopes_1_and_2 <- 
  full_join(scope_1_and_export, scope_2, , by = c("year" = "year", "country_name" = "country_name", "iso3c" = "iso3c"))

Combine scope 1, 2 and 3 emissions data into one data base.

all_scopes_all_data <-
    full_join(scopes_1_and_2, scope_3, , by = c("year" = "year", "country_name" = "country_name", "iso3c" = "iso3c"))

Remove data for years that do not have all kinds of emissions

all_scopes_no_na <- all_scopes_all_data %>% 
  na.omit

all_scopes_no_na

# A tibble: 1,512 x 8
   country_name unit   year  domestic_emissions iso3c export_emissions
   <chr>        <chr>  <chr>              <dbl> <chr>            <dbl>
 1 China        MtCO2e 1995               3961. CHN               442.
 2 China        MtCO2e 1996               3982. CHN               459.
 3 China        MtCO2e 1997               3978. CHN               500.
 4 China        MtCO2e 1998               4096. CHN               541.
 5 China        MtCO2e 1999               4029. CHN               509.
 6 China        MtCO2e 2000               4221. CHN               617.
 7 China        MtCO2e 2001               4430. CHN               612.
 8 China        MtCO2e 2002               4737. CHN               700.
 9 China        MtCO2e 2003               5387. CHN               986.
10 China        MtCO2e 2004               6173. CHN              1342.
# ... with 1,502 more rows, and 2 more variables: scope_2_emissions <dbl>,
#   scope_3_emissions <dbl>

Create column with total emissions where I add scope 1, 2, and 3, emissions.

all_scopes_no_na <- all_scopes_no_na %>% 
  mutate("consumption_emissions" = domestic_emissions + scope_2_emissions + scope_3_emissions - export_emissions)

Remove unnecessary columns.

all_scopes_no_na <- 
  select(all_scopes_no_na, -unit)

Download GDP PPP data.

GDP_PPP <- read_excel("~/Raw Data Finace Project 2/GDP_by_PPP.xlsx", skip = 3)

Rearrange data so that year is a column and GDP PPP is a column.

GDP_PPP <- GDP_PPP %>% 
  pivot_longer("1960":"2021",
    names_to = "year",
    values_to = "GDP_PPP_dollars"
  )

Remove missing data.

GDP_PPP <- GDP_PPP %>% 
  na.omit()

Standardize column names and remove unnecessary columns.

GDP_PPP <- GDP_PPP %>% 
  rename("country_name" = "Country Name")

GDP_PPP <- GDP_PPP %>% 
  rename("iso3c" = "Country Code")

GDP_PPP <- 
  select(GDP_PPP, -"Indicator Name")

GDP_PPP <- 
  select(GDP_PPP, -"Indicator Code")

Add GDP PPP to data base with scope 1, 2, and 3 emissions.

all_data_scopes_GDP_PPP <-
   full_join(all_scopes_no_na, GDP_PPP, , by = c("year" = "year", "country_name" = "country_name", "iso3c" = "iso3c"))

Download nominal GDP data.

nom_GDP <- read_excel("~/Raw Data Finace Project 2/Nominal GDP.xlsx", skip = 3)

Rearragne it so that year is a column and nominal GDP is a column

nom_GDP <- nom_GDP %>% 
  pivot_longer("1960":"2021",
    names_to = "year",
    values_to = "nom_GDP_dollars"
  )

Omit missing data

nom_GDP <- nom_GDP %>% 
  na.omit()

Standardize column names and remove unnecessary columns.

nom_GDP <- nom_GDP %>% 
  rename("country_name" = "Country Name")

nom_GDP <- nom_GDP %>% 
  rename("iso3c" = "Country Code")

nom_GDP <- 
  select(nom_GDP, -"Indicator Name")

nom_GDP <- 
  select(nom_GDP, -"Indicator Code")

Add nominal GDP tp data set with scopes 1-3 emissions and GDP PPP.

all_data_scopes_GDP_PPP_and_nom_na <-
   full_join(all_data_scopes_GDP_PPP, nom_GDP, , by = c("year" = "year", "country_name" = "country_name", "iso3c" = "iso3c"))

Remove data for countries that do not have data on scopes 1-3 emissions, nominal GDP, and PPPGDP in a given year. This limits the data to being from 1995-2018.

all_data_no_na <- all_data_scopes_GDP_PPP_and_nom_na %>% 
  na.omit()

debt_pct_gdp <- read_excel("~/Raw Data Finace Project 2/Government Debt.xlsx")

debt_pct_gdp <- debt_pct_gdp %>%
  pivot_longer("1950":"2021",
    names_to = "year",
    values_to = "pct_debt_gdp"
  )

debt_pct_gdp <- debt_pct_gdp %>%
  na.omit()

debt_pct_gdp <- debt_pct_gdp %>%
  rename("country_name" = "Country")


all_data_debt_no_na <-
  full_join(all_data_no_na, debt_pct_gdp, , by = c("year" = "year", "country_name" = "country_name"))

all_data_debt_no_na <- all_data_debt_no_na %>%
  mutate(debt = (pct_debt_gdp/100) * nom_GDP_dollars)

Now I have a clean database with all of the information I need!

all_data_debt_no_na

# A tibble: 3,497 x 12
   country_name year  domestic_emissions iso3c export_emissions scope_2_emissio~
   <chr>        <chr>              <dbl> <chr>            <dbl>            <dbl>
 1 China        1995               3961. CHN               442.             1.09
 2 China        1996               3982. CHN               459.             1.02
 3 China        1997               3978. CHN               500.             1.13
 4 China        1998               4096. CHN               541.             1.08
 5 China        1999               4029. CHN               509.             1.04
 6 China        2000               4221. CHN               617.             1.36
 7 China        2001               4430. CHN               612.             1.42
 8 China        2002               4737. CHN               700.             3.53
 9 China        2003               5387. CHN               986.             4.03
10 China        2004               6173. CHN              1342.             3.81
# ... with 3,487 more rows, and 6 more variables: scope_3_emissions <dbl>,
#   consumption_emissions <dbl>, GDP_PPP_dollars <dbl>, nom_GDP_dollars <dbl>,
#   pct_debt_gdp <dbl>, debt <dbl>