Final Project-data source

Dataset 1

The first database that needs to be prepared is sovereign-level data, including green bond issuance status of each country or region, GHG emission statistics, and GDP data (adjusted by PPP)

library(tidyverse) # because, always
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.4.4     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(janitor) # for clean_names() - makes variable names snake_case

载入程辑包:'janitor'

The following objects are masked from 'package:stats':

    chisq.test, fisher.test
imf_climate_dashboards_green_debt <- "C:\\Users\\puxin\\OneDrive\\Documents\\00_data_raw\\IMF_Green_Bonds.csv"

green_debt <- imf_climate_dashboards_green_debt |> 
  read_csv() 
Rows: 355 Columns: 42
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (12): Country, ISO2, ISO3, Indicator, Unit, Source, CTS_Code, CTS_Name, ...
dbl (30): ObjectId, F1985, F1986, F1987, F1990, F1991, F1992, F1993, F1994, ...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
green_debt
# A tibble: 355 × 42
   ObjectId Country         ISO2  ISO3  Indicator Unit  Source CTS_Code CTS_Name
      <dbl> <chr>           <chr> <chr> <chr>     <chr> <chr>  <chr>    <chr>   
 1        1 Argentina       AR    ARG   Green Bo… Bill… Refin… ECFFI    Green B…
 2        2 Australia       AU    AUS   Green Bo… Bill… Refin… ECFFI    Green B…
 3        3 Austria         AT    AUT   Green Bo… Bill… Refin… ECFFI    Green B…
 4        4 Austria         AT    AUT   Sovereig… Bill… Refin… ECFF     Green B…
 5        5 Bangladesh      BD    BGD   Green Bo… Bill… Refin… ECFFI    Green B…
 6        6 Belarus, Rep. … BY    BLR   Green Bo… Bill… Refin… ECFFI    Green B…
 7        7 Belarus, Rep. … BY    BLR   Sovereig… Bill… Refin… ECFF     Green B…
 8        8 Belgium         BE    BEL   Green Bo… Bill… Refin… ECFFI    Green B…
 9        9 Belgium         BE    BEL   Sovereig… Bill… Refin… ECFF     Green B…
10       10 Bermuda         BM    BMU   Green Bo… Bill… Refin… ECFFI    Green B…
# ℹ 345 more rows
# ℹ 33 more variables: CTS_Full_Descriptor <chr>, Type_of_Issuer <chr>,
#   Use_of_Proceed <chr>, Principal_Currency <chr>, F1985 <dbl>, F1986 <dbl>,
#   F1987 <dbl>, F1990 <dbl>, F1991 <dbl>, F1992 <dbl>, F1993 <dbl>,
#   F1994 <dbl>, F1999 <dbl>, F2000 <dbl>, F2002 <dbl>, F2003 <dbl>,
#   F2004 <dbl>, F2007 <dbl>, F2008 <dbl>, F2009 <dbl>, F2010 <dbl>,
#   F2011 <dbl>, F2012 <dbl>, F2013 <dbl>, F2014 <dbl>, F2015 <dbl>, …
# we want to compare these two indicators
indicators_we_want <- c("Green Bond Issuances by Country", "Sovereign Green Bond Issuances")

green_debt_subset <- green_debt |> 
  # from the janitor package -- makes variables snake_case so they are easier to work with
  clean_names() |> 
  # filter for the vector of indicators we defined above
  filter(indicator %in% indicators_we_want) |> 
  # "f\\d{4}" is a regular expression (regex) that searches for all columns that are f + four digits.
  # Ask ChatGPT to explain this to you.
  select(country, iso3, indicator, matches("f\\d{4}")) 

green_debt_subset 
# A tibble: 107 × 32
   country iso3  indicator f1985 f1986 f1987 f1990 f1991 f1992 f1993 f1994 f1999
   <chr>   <chr> <chr>     <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
 1 Argent… ARG   Green Bo…    NA    NA    NA    NA    NA    NA    NA    NA    NA
 2 Austra… AUS   Green Bo…    NA    NA    NA    NA    NA    NA    NA    NA    NA
 3 Austria AUT   Green Bo…    NA    NA    NA    NA    NA    NA    NA    NA    NA
 4 Austria AUT   Sovereig…    NA    NA    NA    NA    NA    NA    NA    NA    NA
 5 Bangla… BGD   Green Bo…    NA    NA    NA    NA    NA    NA    NA    NA    NA
 6 Belaru… BLR   Green Bo…    NA    NA    NA    NA    NA    NA    NA    NA    NA
 7 Belaru… BLR   Sovereig…    NA    NA    NA    NA    NA    NA    NA    NA    NA
 8 Belgium BEL   Green Bo…    NA    NA    NA    NA    NA    NA    NA    NA    NA
 9 Belgium BEL   Sovereig…    NA    NA    NA    NA    NA    NA    NA    NA    NA
10 Bermuda BMU   Green Bo…    NA    NA    NA    NA    NA    NA    NA    NA    NA
# ℹ 97 more rows
# ℹ 20 more variables: f2000 <dbl>, f2002 <dbl>, f2003 <dbl>, f2004 <dbl>,
#   f2007 <dbl>, f2008 <dbl>, f2009 <dbl>, f2010 <dbl>, f2011 <dbl>,
#   f2012 <dbl>, f2013 <dbl>, f2014 <dbl>, f2015 <dbl>, f2016 <dbl>,
#   f2017 <dbl>, f2018 <dbl>, f2019 <dbl>, f2020 <dbl>, f2021 <dbl>,
#   f2022 <dbl>
green_debt_subset |> 
  pivot_longer(
    cols = matches("f\\d{4}")
  )
# A tibble: 3,103 × 5
   country   iso3  indicator                       name  value
   <chr>     <chr> <chr>                           <chr> <dbl>
 1 Argentina ARG   Green Bond Issuances by Country f1985    NA
 2 Argentina ARG   Green Bond Issuances by Country f1986    NA
 3 Argentina ARG   Green Bond Issuances by Country f1987    NA
 4 Argentina ARG   Green Bond Issuances by Country f1990    NA
 5 Argentina ARG   Green Bond Issuances by Country f1991    NA
 6 Argentina ARG   Green Bond Issuances by Country f1992    NA
 7 Argentina ARG   Green Bond Issuances by Country f1993    NA
 8 Argentina ARG   Green Bond Issuances by Country f1994    NA
 9 Argentina ARG   Green Bond Issuances by Country f1999    NA
10 Argentina ARG   Green Bond Issuances by Country f2000    NA
# ℹ 3,093 more rows
green_bonds_tidy <- green_debt_subset |> 
  pivot_longer(
    # select all coluns with f + 4 numbers 
    cols = matches("f\\d{4}"),
    
    # change from default ("names")
    names_to = "year",
    # same with the values
    values_to = "issuance_bn_usd",
    
    # readr::parse_number is a handy function that changes the character string 
    # "f2222" into the number 2222. Very useful! 
    names_transform = readr::parse_number,
    
    # green bonds are new-ish, we can drop all those NA values in the 80s and 90s for now.
    values_drop_na = TRUE
  )

green_bonds_tidy
# A tibble: 465 × 5
   country   iso3  indicator                        year issuance_bn_usd
   <chr>     <chr> <chr>                           <dbl>           <dbl>
 1 Argentina ARG   Green Bond Issuances by Country  2017          0.974 
 2 Argentina ARG   Green Bond Issuances by Country  2020          0.0500
 3 Argentina ARG   Green Bond Issuances by Country  2021          0.916 
 4 Argentina ARG   Green Bond Issuances by Country  2022          0.207 
 5 Australia AUS   Green Bond Issuances by Country  2014          0.526 
 6 Australia AUS   Green Bond Issuances by Country  2015          0.413 
 7 Australia AUS   Green Bond Issuances by Country  2016          0.531 
 8 Australia AUS   Green Bond Issuances by Country  2017          2.53  
 9 Australia AUS   Green Bond Issuances by Country  2018          2.22  
10 Australia AUS   Green Bond Issuances by Country  2019          1.98  
# ℹ 455 more rows
green_bonds_tidy_cumulative <- green_bonds_tidy |> 
  # we don't need that here. get rid of clutter.
  select(-iso3) |> 
  # when calculating cumulative totals, make sure the years are in order first
  arrange(country, year) |> 
  group_by(country, indicator) |> 
  mutate(cumulative_bn_usd = cumsum(issuance_bn_usd)) |> 
  # when in doubt, always ungroup after group_by() functions.  Will stop weird behavior.
  ungroup()

green_bonds_tidy_cumulative
# A tibble: 465 × 5
   country   indicator                    year issuance_bn_usd cumulative_bn_usd
   <chr>     <chr>                       <dbl>           <dbl>             <dbl>
 1 Argentina Green Bond Issuances by Co…  2017          0.974              0.974
 2 Argentina Green Bond Issuances by Co…  2020          0.0500             1.02 
 3 Argentina Green Bond Issuances by Co…  2021          0.916              1.94 
 4 Argentina Green Bond Issuances by Co…  2022          0.207              2.15 
 5 Australia Green Bond Issuances by Co…  2014          0.526              0.526
 6 Australia Green Bond Issuances by Co…  2015          0.413              0.938
 7 Australia Green Bond Issuances by Co…  2016          0.531              1.47 
 8 Australia Green Bond Issuances by Co…  2017          2.53               4.00 
 9 Australia Green Bond Issuances by Co…  2018          2.22               6.22 
10 Australia Green Bond Issuances by Co…  2019          1.98               8.21 
# ℹ 455 more rows

After processing the green bond data, clean the GHG emission data and GDP data, and merge all data datasets.

library(tidyverse) 
library(janitor) 
PCAF_dashboards <- "C://Users//puxin//OneDrive//Documents//00_data_raw//PCAF-data-final.csv"

GHG_GDP_dataset <-PCAF_dashboards |> 
  read_csv() 
Rows: 19835 Columns: 6
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (1): country
dbl (5): year, EDGAR_GHG_value, GDP_adjusted_PPP, GDP_current_dollar, popula...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
GHG_GDP_dataset
# A tibble: 19,835 × 6
   country  year EDGAR_GHG_value GDP_adjusted_PPP GDP_current_dollar population
   <chr>   <dbl>           <dbl>            <dbl>              <dbl>      <dbl>
 1 Aruba    1970            45.2               NA                 NA      59106
 2 Aruba    1971            50.1               NA                 NA      58816
 3 Aruba    1972            60.9               NA                 NA      58855
 4 Aruba    1973            65.8               NA                 NA      59365
 5 Aruba    1974            65.1               NA                 NA      60028
 6 Aruba    1975            79.1               NA                 NA      60715
 7 Aruba    1976            78.2               NA                 NA      61193
 8 Aruba    1977            88.9               NA                 NA      61465
 9 Aruba    1978            93.7               NA                 NA      61738
10 Aruba    1979            97.7               NA                 NA      62006
# ℹ 19,825 more rows
merged_dataset <- left_join(green_bonds_tidy_cumulative, GHG_GDP_dataset, by = c("country", "year"))

# View the first few rows of the merged dataset to verify the merge
head(merged_dataset)
# A tibble: 6 × 9
  country   indicator     year issuance_bn_usd cumulative_bn_usd EDGAR_GHG_value
  <chr>     <chr>        <dbl>           <dbl>             <dbl>           <dbl>
1 Argentina Green Bond …  2017          0.974              0.974         379169.
2 Argentina Green Bond …  2020          0.0500             1.02          359026.
3 Argentina Green Bond …  2021          0.916              1.94          378420.
4 Argentina Green Bond …  2022          0.207              2.15          382992.
5 Australia Green Bond …  2014          0.526              0.526         585442.
6 Australia Green Bond …  2015          0.413              0.938         593561.
# ℹ 3 more variables: GDP_adjusted_PPP <dbl>, GDP_current_dollar <dbl>,
#   population <dbl>
# Rename merged_dataset to final_project_dataset1
final_project_dataset1 <- merged_dataset

output_dir <- "C://Users//puxin//OneDrive//Documents//03_data_processed"
if (!dir.exists(output_dir)) {
  dir.create(output_dir, recursive = TRUE)
}

# Specify the output file path
output_file_path <- file.path(output_dir, "final_project_dataset1.csv")

# Export the dataframe as a CSV file
write_csv(final_project_dataset1, file = output_file_path)

Dataset 2&3

  1. Annual Financial Commitments in Renewable Energy

  2. Public Renewable Energy Finance Flows

These two datasets are non-sovereign-level data and therefore are not merged with dataset1. These two datasets are long panel data that have been processed in advance, and the data types are correct, so they are directly stored in the folder 03_data_processed

# Load necessary libraries
library(readxl)
library(openxlsx)
Warning: 程辑包'openxlsx'是用R版本4.3.3 来建造的
# Define paths
old_path1 <- "C:/Users/puxin/OneDrive/Documents/00_data_raw/2023_IRENA-CPI_RE_Investment_Trends.xlsx"
old_path2 <- "C:/Users/puxin/OneDrive/Documents/00_data_raw/IRENA_RE_Public_Investment_2022.xlsx"

new_path <- "C:/Users/puxin/OneDrive/Documents/03_data_processed"

# Read Excel files
data1 <- read_excel(old_path1)
data2 <- read_excel(old_path2)

# Write the data to new location with specified names
write.xlsx(data1, file.path(new_path, "IRENA-CPI_RE_Investment_Trends.xlsx"))
write.xlsx(data2, file.path(new_path, "IRENA_RE_Public_Investment.xlsx"))