Data Tidying

options(repos = c(CRAN = "https://cran.rstudio.com/"))
#libraries
library(tidyverse) # because, always
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.4
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.4.4     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.0
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(janitor) # for clean_names() - makes variable names snake_case

Attaching package: 'janitor'

The following objects are masked from 'package:stats':

    chisq.test, fisher.test
library(countrycode)
library(ggthemes)
library(ggsci)
library(gghalves)
#Data
imf_climate_dashboards_green_debt_url <- "https://opendata.arcgis.com/datasets/8e2772e0b65f4e33a80183ce9583d062_0.csv"

green_debt <- imf_climate_dashboards_green_debt_url |> 
  read_csv() 
Rows: 355 Columns: 42
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (12): Country, ISO2, ISO3, Indicator, Unit, Source, CTS_Code, CTS_Name, ...
dbl (30): ObjectId, F1985, F1986, F1987, F1990, F1991, F1992, F1993, F1994, ...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
green_debt
# A tibble: 355 × 42
   ObjectId Country         ISO2  ISO3  Indicator Unit  Source CTS_Code CTS_Name
      <dbl> <chr>           <chr> <chr> <chr>     <chr> <chr>  <chr>    <chr>   
 1        1 Argentina       AR    ARG   Green Bo… Bill… Refin… ECFFI    Green B…
 2        2 Australia       AU    AUS   Green Bo… Bill… Refin… ECFFI    Green B…
 3        3 Austria         AT    AUT   Green Bo… Bill… Refin… ECFFI    Green B…
 4        4 Austria         AT    AUT   Sovereig… Bill… Refin… ECFF     Green B…
 5        5 Bangladesh      BD    BGD   Green Bo… Bill… Refin… ECFFI    Green B…
 6        6 Belarus, Rep. … BY    BLR   Green Bo… Bill… Refin… ECFFI    Green B…
 7        7 Belarus, Rep. … BY    BLR   Sovereig… Bill… Refin… ECFF     Green B…
 8        8 Belgium         BE    BEL   Green Bo… Bill… Refin… ECFFI    Green B…
 9        9 Belgium         BE    BEL   Sovereig… Bill… Refin… ECFF     Green B…
10       10 Bermuda         BM    BMU   Green Bo… Bill… Refin… ECFFI    Green B…
# ℹ 345 more rows
# ℹ 33 more variables: CTS_Full_Descriptor <chr>, Type_of_Issuer <chr>,
#   Use_of_Proceed <chr>, Principal_Currency <chr>, F1985 <dbl>, F1986 <dbl>,
#   F1987 <dbl>, F1990 <dbl>, F1991 <dbl>, F1992 <dbl>, F1993 <dbl>,
#   F1994 <dbl>, F1999 <dbl>, F2000 <dbl>, F2002 <dbl>, F2003 <dbl>,
#   F2004 <dbl>, F2007 <dbl>, F2008 <dbl>, F2009 <dbl>, F2010 <dbl>,
#   F2011 <dbl>, F2012 <dbl>, F2013 <dbl>, F2014 <dbl>, F2015 <dbl>, …
# we want to compare these two indicators
indicators_we_want <- c("Green Bond Issuances by Country", "Sovereign Green Bond Issuances")

green_debt_subset <- green_debt |> 
  # from the janitor package -- makes variables snake_case so they are easier to work with
  clean_names() |> 
  # filter for the vector of indicators we defined above
  filter(indicator %in% indicators_we_want) |> 
  # "f\\d{4}" is a regular expression (regex) that searches for all columns that are f + four digits.
  # Ask ChatGPT to explain this to you.
  select(country, iso3, indicator, matches("f\\d{4}")) 

green_debt_subset 
# A tibble: 107 × 32
   country iso3  indicator f1985 f1986 f1987 f1990 f1991 f1992 f1993 f1994 f1999
   <chr>   <chr> <chr>     <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
 1 Argent… ARG   Green Bo…    NA    NA    NA    NA    NA    NA    NA    NA    NA
 2 Austra… AUS   Green Bo…    NA    NA    NA    NA    NA    NA    NA    NA    NA
 3 Austria AUT   Green Bo…    NA    NA    NA    NA    NA    NA    NA    NA    NA
 4 Austria AUT   Sovereig…    NA    NA    NA    NA    NA    NA    NA    NA    NA
 5 Bangla… BGD   Green Bo…    NA    NA    NA    NA    NA    NA    NA    NA    NA
 6 Belaru… BLR   Green Bo…    NA    NA    NA    NA    NA    NA    NA    NA    NA
 7 Belaru… BLR   Sovereig…    NA    NA    NA    NA    NA    NA    NA    NA    NA
 8 Belgium BEL   Green Bo…    NA    NA    NA    NA    NA    NA    NA    NA    NA
 9 Belgium BEL   Sovereig…    NA    NA    NA    NA    NA    NA    NA    NA    NA
10 Bermuda BMU   Green Bo…    NA    NA    NA    NA    NA    NA    NA    NA    NA
# ℹ 97 more rows
# ℹ 20 more variables: f2000 <dbl>, f2002 <dbl>, f2003 <dbl>, f2004 <dbl>,
#   f2007 <dbl>, f2008 <dbl>, f2009 <dbl>, f2010 <dbl>, f2011 <dbl>,
#   f2012 <dbl>, f2013 <dbl>, f2014 <dbl>, f2015 <dbl>, f2016 <dbl>,
#   f2017 <dbl>, f2018 <dbl>, f2019 <dbl>, f2020 <dbl>, f2021 <dbl>,
#   f2022 <dbl>

3.5.0.1 Homework problem 1:

  • Use the countrycode package to add the region as a variable to green_debt_subset.

  • Calculate the cumulative issuance of green bonds by region.

  • Use ggplot2 to make a data visualization comparing regional cumulative issuance.

  • Make your plot look professionally presentable, as we did above.

    #using countrycode package
    green_debt_subset <- green_debt_subset |>
      mutate(region = countrycode::countrycode(iso3, "iso3c", "continent"))
    
    # Calculate cumulative issuance of green bonds by region
    cumulative_by_region <- green_debt_subset |>
      group_by(region) |>
      summarize(cumulative_issuance = sum(across(matches("f\\d{4}")), na.rm = TRUE))
    
    #data visualizatio
    ggplot(cumulative_by_region, aes(x = region, y = cumulative_issuance, fill = region)) + 
      geom_bar(stat = "identity") + 
      labs(x = "Region", y = "Cumulative Issuance", title = "Cumulative Issuance by Region") + 
      geom_text(aes(label = cumulative_issuance), 
                position = position_stack(vjust = 1), 
                size = 2.8, 
                color = "black")

3.5.0.2 Homework problem 2:

  • Use the full green_debt dataset

  • Use clean_names() to make the variable names snake_case

  • Filter out observations where type_of_issuer is “Not Applicable”

  • Use the tools taught in this chapter to provide a compelling data visualization and some repeatable factoids that provide actionable insights about green bond issuers.

    green_debt_filtered_type_of_issuer <- green_debt |>
      clean_names() |>
      filter(type_of_issuer != "Not Applicable")
    
    
    greenbond_long <- pivot_longer(green_debt_filtered_type_of_issuer, cols = f1985:f2022, names_prefix = "f", names_to = "Year")
    
    greenbond_long_sum <- greenbond_long |>
      group_by(type_of_issuer, Year) |> 
      summarize(sum_bn_usd = sum(value, na.rm = TRUE))
    `summarise()` has grouped output by 'type_of_issuer'. You can override using
    the `.groups` argument.
    ggplot(greenbond_long_sum, aes(x = type_of_issuer, y = sum_bn_usd)) +
      geom_boxplot(fill = "#B23333") +
      geom_jitter() +
      labs(x = "Type of Issuer", y = "Sum of Value in USD Billion", title = "Green Bond by Type of Issuer")+
      theme(axis.text.x = element_text(angle = 60, hjust = 1))

3.5.0.3 Homework problem 3:

  • Repeat the process from problem 2 for use_of_proceed and for principal_currency

    • What are green bond proceeds used for?

    • What do we know about the currency of issuance? Is that changing over time?

#use_of_proceed
green_debt_filtered_use_of_proceed <-green_debt|> 
  clean_names() |> 
  filter(use_of_proceed != "Not Applicable")
#examine: what are green bond proceeds used for:
print(unique(green_debt_filtered_use_of_proceed$use_of_proceed))
 [1] "Access to Essential Services"                                                      
 [2] "Acquiring and distribution of vaccine"                                             
 [3] "Acquisition"                                                                       
 [4] "Affordable Basic Infrastructure"                                                   
 [5] "Agriculture"                                                                       
 [6] "Alternative Energy"                                                                
 [7] "Aquatic Biodiversity Conservation"                                                 
 [8] "Capital expenditure"                                                               
 [9] "Capital expenditure/Financing expenses"                                            
[10] "Carbon reduction through reforestation and avoided deforestation"                  
[11] "China Urban Construction"                                                          
[12] "Circular Economy Adapted/Eco-efficient Products, Production Technologies/Processes"
[13] "Clean Transport"                                                                   
[14] "Climate Change Adaptation"                                                         
[15] "E-education programs - Education Projects"                                         
[16] "Economic Development"                                                              
[17] "Electric & Public Power"                                                           
[18] "Eligible Green Projects"                                                           
[19] "Employee stock ownership plan"                                                     
[20] "Energy Efficiency"                                                                 
[21] "Environmental Protection Projects"                                                 
[22] "Environmentally Sustainable Products"                                              
[23] "Equipment Upgrade/Construction"                                                    
[24] "Financing of Subordinated Loan"                                                    
[25] "Funding new technologies to reduce GHS emissions"                                  
[26] "Gas"                                                                               
[27] "General Purpose"                                                                   
[28] "General Purpose/Acquisition"                                                       
[29] "General Purpose/Refinance"                                                         
[30] "General Purpose/Working Capital"                                                   
[31] "Green Construction/Buildings"                                                      
[32] "Higher Education"                                                                  
[33] "Industrial Development"                                                            
[34] "Infrastructure"                                                                    
[35] "Land Preservation"                                                                 
[36] "Merger or Acquisition"                                                             
[37] "Other"                                                                             
[38] "Other Education"                                                                   
[39] "Other Housing"                                                                     
[40] "Other Public Service"                                                              
[41] "Other Transportation"                                                              
[42] "Pandemic"                                                                          
[43] "Pollution Control"                                                                 
[44] "Pollution Prevention & Control"                                                    
[45] "Production/Supply of Cannabis"                                                     
[46] "Project Finance"                                                                   
[47] "Property Expendit (acquisit/development)"                                          
[48] "Redeem Existing Bonds or Securities"                                               
[49] "Refinance/Financing expenses"                                                      
[50] "Renewable Energy Projects"                                                         
[51] "Repay Bank Loan or Bridge Financing"                                               
[52] "Repay Intercompany Debt"                                                           
[53] "Ship finance"                                                                      
[54] "Social Housing/Affordable Housing"                                                 
[55] "Socioeconomic Advancement And Empowerment"                                         
[56] "Solar projects"                                                                    
[57] "Sustainable Development Projects"                                                  
[58] "Sustainable Economic Growth"                                                       
[59] "Sustainable Forestry"                                                              
[60] "Sustainable Management of Land Use"                                                
[61] "Sustainable Management of Living Natural Resources"                                
[62] "Sustainable Transport"                                                             
[63] "Sustainable Water or Wastewater management"                                        
[64] "Terrestrial Biodiversity Conservation"                                             
[65] "The Belt and Road Initiative"                                                      
[66] "Waste Management"                                                                  
[67] "Water & Sewer"                                                                     
[68] "Wind projects"                                                                     
[69] "Working capital"                                                                   
#principal_currency
green_debt_filtered_principal_currency <-green_debt |> 
  clean_names() |> 
  filter(principal_currency != "Not Applicable")

#examine the currency of issuance data
green_debt_filtered_principal_currency |>
  group_by(principal_currency) |>
  summarize(n())
# A tibble: 50 × 2
   principal_currency                        `n()`
   <chr>                                     <int>
 1 Argentinian Peso                              2
 2 Argentinian Unidades de Valor Adquisitivo     2
 3 Australian Dollar                             2
 4 Bangladeshi Taka                              2
 5 Brazilian Real                                2
 6 British Pound                                 2
 7 Canadian Dollar                               2
 8 Chilean Peso                                  2
 9 Chilean Unidad de Fomento                     2
10 Chinese Yuan                                  2
# ℹ 40 more rows