Data Tidying

Author

Ashwini Arulrajhan

options(repos = c(CRAN = "https://cran.rstudio.com/"))
install.packages("janitor")
Installing package into 'C:/Users/ashwi/AppData/Local/R/win-library/4.3'
(as 'lib' is unspecified)
package 'janitor' successfully unpacked and MD5 sums checked

The downloaded binary packages are in
    C:\Users\ashwi\AppData\Local\Temp\Rtmp2ThF7O\downloaded_packages
library(tidyverse) # because, always
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.4.4     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(janitor) # for clean_names() - makes variable names snake_case

Attaching package: 'janitor'

The following objects are masked from 'package:stats':

    chisq.test, fisher.test
imf_climate_dashboards_green_debt_url <- "https://opendata.arcgis.com/datasets/8e2772e0b65f4e33a80183ce9583d062_0.csv"

green_debt <- imf_climate_dashboards_green_debt_url |> 
  read_csv() 
Rows: 355 Columns: 42
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (12): Country, ISO2, ISO3, Indicator, Unit, Source, CTS_Code, CTS_Name, ...
dbl (30): ObjectId, F1985, F1986, F1987, F1990, F1991, F1992, F1993, F1994, ...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
green_debt
# A tibble: 355 × 42
   ObjectId Country         ISO2  ISO3  Indicator Unit  Source CTS_Code CTS_Name
      <dbl> <chr>           <chr> <chr> <chr>     <chr> <chr>  <chr>    <chr>   
 1        1 Argentina       AR    ARG   Green Bo… Bill… Refin… ECFFI    Green B…
 2        2 Australia       AU    AUS   Green Bo… Bill… Refin… ECFFI    Green B…
 3        3 Austria         AT    AUT   Green Bo… Bill… Refin… ECFFI    Green B…
 4        4 Austria         AT    AUT   Sovereig… Bill… Refin… ECFF     Green B…
 5        5 Bangladesh      BD    BGD   Green Bo… Bill… Refin… ECFFI    Green B…
 6        6 Belarus, Rep. … BY    BLR   Green Bo… Bill… Refin… ECFFI    Green B…
 7        7 Belarus, Rep. … BY    BLR   Sovereig… Bill… Refin… ECFF     Green B…
 8        8 Belgium         BE    BEL   Green Bo… Bill… Refin… ECFFI    Green B…
 9        9 Belgium         BE    BEL   Sovereig… Bill… Refin… ECFF     Green B…
10       10 Bermuda         BM    BMU   Green Bo… Bill… Refin… ECFFI    Green B…
# ℹ 345 more rows
# ℹ 33 more variables: CTS_Full_Descriptor <chr>, Type_of_Issuer <chr>,
#   Use_of_Proceed <chr>, Principal_Currency <chr>, F1985 <dbl>, F1986 <dbl>,
#   F1987 <dbl>, F1990 <dbl>, F1991 <dbl>, F1992 <dbl>, F1993 <dbl>,
#   F1994 <dbl>, F1999 <dbl>, F2000 <dbl>, F2002 <dbl>, F2003 <dbl>,
#   F2004 <dbl>, F2007 <dbl>, F2008 <dbl>, F2009 <dbl>, F2010 <dbl>,
#   F2011 <dbl>, F2012 <dbl>, F2013 <dbl>, F2014 <dbl>, F2015 <dbl>, …
# we want to compare these two indicators
indicators_we_want <- c("Green Bond Issuances by Country", "Sovereign Green Bond Issuances")

green_debt_subset <- green_debt |> 
  # from the janitor package -- makes variables snake_case so they are easier to work with
  clean_names() |> 
  # filter for the vector of indicators we defined above
  filter(indicator %in% indicators_we_want) |> 
  # "f\\d{4}" is a regular expression (regex) that searches for all columns that are f + four digits.
  # Ask ChatGPT to explain this to you.
  select(country, iso3, indicator, matches("f\\d{4}")) 

green_debt_subset 
# A tibble: 107 × 32
   country iso3  indicator f1985 f1986 f1987 f1990 f1991 f1992 f1993 f1994 f1999
   <chr>   <chr> <chr>     <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
 1 Argent… ARG   Green Bo…    NA    NA    NA    NA    NA    NA    NA    NA    NA
 2 Austra… AUS   Green Bo…    NA    NA    NA    NA    NA    NA    NA    NA    NA
 3 Austria AUT   Green Bo…    NA    NA    NA    NA    NA    NA    NA    NA    NA
 4 Austria AUT   Sovereig…    NA    NA    NA    NA    NA    NA    NA    NA    NA
 5 Bangla… BGD   Green Bo…    NA    NA    NA    NA    NA    NA    NA    NA    NA
 6 Belaru… BLR   Green Bo…    NA    NA    NA    NA    NA    NA    NA    NA    NA
 7 Belaru… BLR   Sovereig…    NA    NA    NA    NA    NA    NA    NA    NA    NA
 8 Belgium BEL   Green Bo…    NA    NA    NA    NA    NA    NA    NA    NA    NA
 9 Belgium BEL   Sovereig…    NA    NA    NA    NA    NA    NA    NA    NA    NA
10 Bermuda BMU   Green Bo…    NA    NA    NA    NA    NA    NA    NA    NA    NA
# ℹ 97 more rows
# ℹ 20 more variables: f2000 <dbl>, f2002 <dbl>, f2003 <dbl>, f2004 <dbl>,
#   f2007 <dbl>, f2008 <dbl>, f2009 <dbl>, f2010 <dbl>, f2011 <dbl>,
#   f2012 <dbl>, f2013 <dbl>, f2014 <dbl>, f2015 <dbl>, f2016 <dbl>,
#   f2017 <dbl>, f2018 <dbl>, f2019 <dbl>, f2020 <dbl>, f2021 <dbl>,
#   f2022 <dbl>
green_debt_subset |> 
  pivot_longer(
    cols = matches("f\\d{4}")
  )
# A tibble: 3,103 × 5
   country   iso3  indicator                       name  value
   <chr>     <chr> <chr>                           <chr> <dbl>
 1 Argentina ARG   Green Bond Issuances by Country f1985    NA
 2 Argentina ARG   Green Bond Issuances by Country f1986    NA
 3 Argentina ARG   Green Bond Issuances by Country f1987    NA
 4 Argentina ARG   Green Bond Issuances by Country f1990    NA
 5 Argentina ARG   Green Bond Issuances by Country f1991    NA
 6 Argentina ARG   Green Bond Issuances by Country f1992    NA
 7 Argentina ARG   Green Bond Issuances by Country f1993    NA
 8 Argentina ARG   Green Bond Issuances by Country f1994    NA
 9 Argentina ARG   Green Bond Issuances by Country f1999    NA
10 Argentina ARG   Green Bond Issuances by Country f2000    NA
# ℹ 3,093 more rows
green_bonds_tidy <- green_debt_subset |> 
  pivot_longer(
    # select all coluns with f + 4 numbers 
    cols = matches("f\\d{4}"),
    
    # change from default ("names")
    names_to = "year",
    # same with the values
    values_to = "issuance_bn_usd",
    
    # readr::parse_number is a handy function that changes the character string 
    # "f2222" into the number 2222. Very useful! 
    names_transform = readr::parse_number,
    
    # green bonds are new-ish, we can drop all those NA values in the 80s and 90s for now.
    values_drop_na = TRUE
  )

green_bonds_tidy
# A tibble: 465 × 5
   country   iso3  indicator                        year issuance_bn_usd
   <chr>     <chr> <chr>                           <dbl>           <dbl>
 1 Argentina ARG   Green Bond Issuances by Country  2017          0.974 
 2 Argentina ARG   Green Bond Issuances by Country  2020          0.0500
 3 Argentina ARG   Green Bond Issuances by Country  2021          0.916 
 4 Argentina ARG   Green Bond Issuances by Country  2022          0.207 
 5 Australia AUS   Green Bond Issuances by Country  2014          0.526 
 6 Australia AUS   Green Bond Issuances by Country  2015          0.413 
 7 Australia AUS   Green Bond Issuances by Country  2016          0.531 
 8 Australia AUS   Green Bond Issuances by Country  2017          2.53  
 9 Australia AUS   Green Bond Issuances by Country  2018          2.22  
10 Australia AUS   Green Bond Issuances by Country  2019          1.98  
# ℹ 455 more rows
green_bonds_tidy_cumulative <- green_bonds_tidy |> 
  # we don't need that here. get rid of clutter.
  select(-iso3) |> 
  # when calculating cumulative totals, make sure the years are in order first
  arrange(country, year) |> 
  group_by(country, indicator) |> 
  mutate(cumulative_bn_usd = cumsum(issuance_bn_usd)) |> 
  # when in doubt, always ungroup after group_by() functions.  Will stop weird behavior.
  ungroup()

green_bonds_tidy_cumulative
# A tibble: 465 × 5
   country   indicator                    year issuance_bn_usd cumulative_bn_usd
   <chr>     <chr>                       <dbl>           <dbl>             <dbl>
 1 Argentina Green Bond Issuances by Co…  2017          0.974              0.974
 2 Argentina Green Bond Issuances by Co…  2020          0.0500             1.02 
 3 Argentina Green Bond Issuances by Co…  2021          0.916              1.94 
 4 Argentina Green Bond Issuances by Co…  2022          0.207              2.15 
 5 Australia Green Bond Issuances by Co…  2014          0.526              0.526
 6 Australia Green Bond Issuances by Co…  2015          0.413              0.938
 7 Australia Green Bond Issuances by Co…  2016          0.531              1.47 
 8 Australia Green Bond Issuances by Co…  2017          2.53               4.00 
 9 Australia Green Bond Issuances by Co…  2018          2.22               6.22 
10 Australia Green Bond Issuances by Co…  2019          1.98               8.21 
# ℹ 455 more rows
biggest_green_bond_issuers <- green_bonds_tidy_cumulative |> 
  filter(indicator == "Green Bond Issuances by Country") |> 
  group_by(country) |> 
  slice_max(order_by = year) |> 
  arrange(cumulative_bn_usd|> desc()) |> 
  select(country, cumulative_bn_usd) |> 
  ungroup()

biggest_green_bond_issuers
# A tibble: 79 × 2
   country               cumulative_bn_usd
   <chr>                             <dbl>
 1 China, P.R.: Mainland             325. 
 2 Germany                           253. 
 3 France                            213. 
 4 United States                     172. 
 5 Netherlands, The                  155. 
 6 United Kingdom                     84.0
 7 Sweden                             76.6
 8 Spain                              65.7
 9 Japan                              61.8
10 Italy                              59.9
# ℹ 69 more rows
top_10_chart <- biggest_green_bond_issuers |> 
  
  # take the top 10, ordered by cumulative issuance
  slice_max(order_by = cumulative_bn_usd, n = 10) |> 
  
  ggplot(aes(x = cumulative_bn_usd, 
             # order countries by cumulative issuance
             y = fct_reorder(.f = country, .x = cumulative_bn_usd)
             )) +
  geom_col(fill = "forestgreen") 

top_10_chart

top_10_chart +
  theme_minimal() +
  scale_x_continuous(labels = scales::label_dollar(suffix = " bn"),
                     expand = c(0,0)) +
  labs(title = "Top 10 Green Bond Issuers",
       subtitle = "The Dutch sure do issue a whole lot of green bonds per capita",
       x = "Cumulative Issuance (USD)",
       y = "",
       caption = "Data: IMF Climate Change Dashboard | Insight: Me!")

green_bonds_tidy |> 
  pivot_wider(names_from = indicator,
              values_from = issuance_bn_usd)
# A tibble: 420 × 5
   country   iso3   year Green Bond Issuances by Countr…¹ Sovereign Green Bond…²
   <chr>     <chr> <dbl>                            <dbl>                  <dbl>
 1 Argentina ARG    2017                           0.974                      NA
 2 Argentina ARG    2020                           0.0500                     NA
 3 Argentina ARG    2021                           0.916                      NA
 4 Argentina ARG    2022                           0.207                      NA
 5 Australia AUS    2014                           0.526                      NA
 6 Australia AUS    2015                           0.413                      NA
 7 Australia AUS    2016                           0.531                      NA
 8 Australia AUS    2017                           2.53                       NA
 9 Australia AUS    2018                           2.22                       NA
10 Australia AUS    2019                           1.98                       NA
# ℹ 410 more rows
# ℹ abbreviated names: ¹​`Green Bond Issuances by Country`,
#   ²​`Sovereign Green Bond Issuances`
corporate_vs_sovereign_green_bonds <- green_bonds_tidy |> 
  select(-iso3) |> 
  pivot_wider(names_from = indicator,
              values_from = issuance_bn_usd,
              
              values_fill = 0) |> 
  clean_names() |> 
  mutate(corporate_green_bond_issuances = green_bond_issuances_by_country - sovereign_green_bond_issuances)

corporate_vs_sovereign_green_bonds
# A tibble: 420 × 5
   country    year green_bond_issuances_by_country sovereign_green_bond_issuan…¹
   <chr>     <dbl>                           <dbl>                         <dbl>
 1 Argentina  2017                          0.974                              0
 2 Argentina  2020                          0.0500                             0
 3 Argentina  2021                          0.916                              0
 4 Argentina  2022                          0.207                              0
 5 Australia  2014                          0.526                              0
 6 Australia  2015                          0.413                              0
 7 Australia  2016                          0.531                              0
 8 Australia  2017                          2.53                               0
 9 Australia  2018                          2.22                               0
10 Australia  2019                          1.98                               0
# ℹ 410 more rows
# ℹ abbreviated name: ¹​sovereign_green_bond_issuances
# ℹ 1 more variable: corporate_green_bond_issuances <dbl>
corporate_vs_sovereign_green_bonds |> 
  pivot_longer(cols = contains("green_bond"))
# A tibble: 1,260 × 4
   country    year name                             value
   <chr>     <dbl> <chr>                            <dbl>
 1 Argentina  2017 green_bond_issuances_by_country 0.974 
 2 Argentina  2017 sovereign_green_bond_issuances  0     
 3 Argentina  2017 corporate_green_bond_issuances  0.974 
 4 Argentina  2020 green_bond_issuances_by_country 0.0500
 5 Argentina  2020 sovereign_green_bond_issuances  0     
 6 Argentina  2020 corporate_green_bond_issuances  0.0500
 7 Argentina  2021 green_bond_issuances_by_country 0.916 
 8 Argentina  2021 sovereign_green_bond_issuances  0     
 9 Argentina  2021 corporate_green_bond_issuances  0.916 
10 Argentina  2022 green_bond_issuances_by_country 0.207 
# ℹ 1,250 more rows
snake_case_to_title_case <- function(input_string) {
  
  input_string |> 
    # Replace underscores with spaces
    str_replace_all(pattern = "_", replacement = " ") |>
    # change capitalization to Title Case
    str_to_title()
}

corporate_vs_sovereign_green_bonds_long <- corporate_vs_sovereign_green_bonds |> 
  pivot_longer(cols = contains("green_bond"),
               names_to = "indicator",
               values_to = "issuance_bn_usd",
               names_transform = snake_case_to_title_case) 

corporate_vs_sovereign_green_bonds_long
# A tibble: 1,260 × 4
   country    year indicator                       issuance_bn_usd
   <chr>     <dbl> <chr>                                     <dbl>
 1 Argentina  2017 Green Bond Issuances By Country          0.974 
 2 Argentina  2017 Sovereign Green Bond Issuances           0     
 3 Argentina  2017 Corporate Green Bond Issuances           0.974 
 4 Argentina  2020 Green Bond Issuances By Country          0.0500
 5 Argentina  2020 Sovereign Green Bond Issuances           0     
 6 Argentina  2020 Corporate Green Bond Issuances           0.0500
 7 Argentina  2021 Green Bond Issuances By Country          0.916 
 8 Argentina  2021 Sovereign Green Bond Issuances           0     
 9 Argentina  2021 Corporate Green Bond Issuances           0.916 
10 Argentina  2022 Green Bond Issuances By Country          0.207 
# ℹ 1,250 more rows
china_vs_germany_green_bonds <- corporate_vs_sovereign_green_bonds_long |> 
  filter(country %in% c("China, P.R.: Mainland", "Germany")) |> 
  arrange(year) |> 
  pivot_wider(names_from = country,
              values_from = issuance_bn_usd,
              values_fill = 0) |> 
  clean_names() 

china_vs_germany_green_bonds
# A tibble: 48 × 4
    year indicator                       germany china_p_r_mainland
   <dbl> <chr>                             <dbl>              <dbl>
 1  1991 Green Bond Issuances By Country  0.0292                  0
 2  1991 Sovereign Green Bond Issuances   0                       0
 3  1991 Corporate Green Bond Issuances   0.0292                  0
 4  1992 Green Bond Issuances By Country  0.0350                  0
 5  1992 Sovereign Green Bond Issuances   0                       0
 6  1992 Corporate Green Bond Issuances   0.0350                  0
 7  1993 Green Bond Issuances By Country  0.0175                  0
 8  1993 Sovereign Green Bond Issuances   0                       0
 9  1993 Corporate Green Bond Issuances   0.0175                  0
10  2000 Green Bond Issuances By Country  0.0272                  0
# ℹ 38 more rows
china_vs_germany_green_bonds_repeatable_factoids <- china_vs_germany_green_bonds |> 
  mutate(
    
    # multiples:
    # "Germany issued x times more than China"
    germany_x_than_china = germany/china_p_r_mainland,
    # "China issued x times more than Germany"
    china_x_than_germany = china_p_r_mainland/germany,
    
    # percent more:
    # "Germany issued x% more than China"
    germany_pct_more_than_china = (germany/china_p_r_mainland-1) * 100,
    # "China issued x% more than Germany"
    china_pct_more_than_germany = (china_p_r_mainland/germany-1) * 100
    
    # could do absolute difference, etc.... anything that makes intuitive sense.
         
         )

china_vs_germany_green_bonds_repeatable_factoids
# A tibble: 48 × 8
    year indicator               germany china_p_r_mainland germany_x_than_china
   <dbl> <chr>                     <dbl>              <dbl>                <dbl>
 1  1991 Green Bond Issuances B…  0.0292                  0                  Inf
 2  1991 Sovereign Green Bond I…  0                       0                  NaN
 3  1991 Corporate Green Bond I…  0.0292                  0                  Inf
 4  1992 Green Bond Issuances B…  0.0350                  0                  Inf
 5  1992 Sovereign Green Bond I…  0                       0                  NaN
 6  1992 Corporate Green Bond I…  0.0350                  0                  Inf
 7  1993 Green Bond Issuances B…  0.0175                  0                  Inf
 8  1993 Sovereign Green Bond I…  0                       0                  NaN
 9  1993 Corporate Green Bond I…  0.0175                  0                  Inf
10  2000 Green Bond Issuances B…  0.0272                  0                  Inf
# ℹ 38 more rows
# ℹ 3 more variables: china_x_than_germany <dbl>,
#   germany_pct_more_than_china <dbl>, china_pct_more_than_germany <dbl>
factoid_2014 <- china_vs_germany_green_bonds_repeatable_factoids |> 
  filter(year == 2014) |> 
  filter(indicator == "Green Bond Issuances By Country") |> 
  pull(germany_x_than_china) |> 
  round()

factoid_2014
[1] 25
factoid_2022 <- china_vs_germany_green_bonds_repeatable_factoids |> 
  filter(year == 2022) |> 
  filter(indicator == "Green Bond Issuances By Country") |> 
  pull(china_pct_more_than_germany) |> 
  round()

factoid_2022
[1] 19

Homework Problem 1

#installing packages and creating region column
install.packages("countrycode")
Installing package into 'C:/Users/ashwi/AppData/Local/R/win-library/4.3'
(as 'lib' is unspecified)
package 'countrycode' successfully unpacked and MD5 sums checked

The downloaded binary packages are in
    C:\Users\ashwi\AppData\Local\Temp\Rtmp2ThF7O\downloaded_packages
library(countrycode)

green_debt_subset$region <- countrycode(green_debt_subset$iso3, "iso3c", "region")
library(tidyr)
green_bonds_tidy_region <- green_debt_subset |> 
  pivot_longer(
    # select all coluns with f + 4 numbers 
    cols = matches("f\\d{4}"),
    
    # change from default ("names")
    names_to = "year",
    # same with the values
    values_to = "issuance_bn_usd",
    
    # readr::parse_number is a handy function that changes the character string 
    # "f2222" into the number 2222. Very useful! 
    names_transform = readr::parse_number,
    
    # green bonds are new-ish, we can drop all those NA values in the 80s and 90s for now.
    values_drop_na = TRUE
  )

green_bonds_tidy_region
# A tibble: 465 × 6
   country   iso3  indicator                       region   year issuance_bn_usd
   <chr>     <chr> <chr>                           <chr>   <dbl>           <dbl>
 1 Argentina ARG   Green Bond Issuances by Country Latin …  2017          0.974 
 2 Argentina ARG   Green Bond Issuances by Country Latin …  2020          0.0500
 3 Argentina ARG   Green Bond Issuances by Country Latin …  2021          0.916 
 4 Argentina ARG   Green Bond Issuances by Country Latin …  2022          0.207 
 5 Australia AUS   Green Bond Issuances by Country East A…  2014          0.526 
 6 Australia AUS   Green Bond Issuances by Country East A…  2015          0.413 
 7 Australia AUS   Green Bond Issuances by Country East A…  2016          0.531 
 8 Australia AUS   Green Bond Issuances by Country East A…  2017          2.53  
 9 Australia AUS   Green Bond Issuances by Country East A…  2018          2.22  
10 Australia AUS   Green Bond Issuances by Country East A…  2019          1.98  
# ℹ 455 more rows
library(dplyr)

cumulative_issuance_by_region <- green_bonds_tidy_region %>%
  group_by(region) %>%
  summarize(cumulative_issuance = sum(issuance_bn_usd))

cumulative_issuance_by_region
# A tibble: 7 × 2
  region                     cumulative_issuance
  <chr>                                    <dbl>
1 East Asia & Pacific                      586. 
2 Europe & Central Asia                   1395. 
3 Latin America & Caribbean                 99.8
4 Middle East & North Africa                10.3
5 North America                            239. 
6 South Asia                                15.0
7 Sub-Saharan Africa                        16.2
library(ggplot2)
#Making the plot
cumulative_issuance_by_region <- cumulative_issuance_by_region %>%
  arrange(cumulative_issuance)


# Create a bar plot with dark blue fill and reordered regions
ggplot(cumulative_issuance_by_region, aes(x = cumulative_issuance, y = reorder(region, cumulative_issuance))) +
  geom_bar(stat = "identity", fill = "darkblue") +
  labs(title = "Cumulative Issuance of Green Bonds by Region",
       x = "Cumulative Issuance (in USD bn)",
       y = "Region",
       caption = "Data: IMF Climate Change Dashboard") +
  theme_minimal() +
  scale_x_continuous(labels = scales::label_dollar(suffix = " bn"),
                     expand = c(0, 0))

Homework Problem 2

install.packages("janitor")
Warning: package 'janitor' is in use and will not be installed
library(janitor)
problem2 <- green_debt |> 
  clean_names() |> 
  filter(type_of_issuer != "Not Applicable")  |> 
 pivot_longer(
    # select all coluns with f + 4 numbers 
    cols = matches("f\\d{4}"),
    
    # change from default ("names")
    names_to = "year",
    # same with the values
    values_to = "issuance_bn_usd",
    
    # readr::parse_number is a handy function that changes the character string 
    # "f2222" into the number 2222. Very useful! 
    names_transform = readr::parse_number,
    
    # green bonds are new-ish, we can drop all those NA values in the 80s and 90s for now.
    values_drop_na = TRUE
  )
##Plot for homework 2
library(ggplot2)
custom_colors <- c("#E41A1C", "#377EB8", "#4DAF4A", "#984EA3", "#FF7F00", "#FFFF33", "#A65628", "#F781BF")
ggplot(problem2, aes(x = year, y = issuance_bn_usd, color = type_of_issuer)) +
  geom_line() +
  scale_color_manual(values = custom_colors) +
  labs(title = "Green Bond Issuance by Type of Issuer",
       x = "Year",
       y = "Issuance (in USD bn)",
       color = "Type of Issuer") +
  theme_minimal()

Homework Problem 3 Part 1

library(janitor)
library(dplyr)

problem3 <- green_debt |> 
  clean_names() |> 
  filter(use_of_proceed != "Not Applicable")

green_bond_proceeds<- unique(problem3$use_of_proceed)
green_bond_proceeds
 [1] "Access to Essential Services"                                                      
 [2] "Acquiring and distribution of vaccine"                                             
 [3] "Acquisition"                                                                       
 [4] "Affordable Basic Infrastructure"                                                   
 [5] "Agriculture"                                                                       
 [6] "Alternative Energy"                                                                
 [7] "Aquatic Biodiversity Conservation"                                                 
 [8] "Capital expenditure"                                                               
 [9] "Capital expenditure/Financing expenses"                                            
[10] "Carbon reduction through reforestation and avoided deforestation"                  
[11] "China Urban Construction"                                                          
[12] "Circular Economy Adapted/Eco-efficient Products, Production Technologies/Processes"
[13] "Clean Transport"                                                                   
[14] "Climate Change Adaptation"                                                         
[15] "E-education programs - Education Projects"                                         
[16] "Economic Development"                                                              
[17] "Electric & Public Power"                                                           
[18] "Eligible Green Projects"                                                           
[19] "Employee stock ownership plan"                                                     
[20] "Energy Efficiency"                                                                 
[21] "Environmental Protection Projects"                                                 
[22] "Environmentally Sustainable Products"                                              
[23] "Equipment Upgrade/Construction"                                                    
[24] "Financing of Subordinated Loan"                                                    
[25] "Funding new technologies to reduce GHS emissions"                                  
[26] "Gas"                                                                               
[27] "General Purpose"                                                                   
[28] "General Purpose/Acquisition"                                                       
[29] "General Purpose/Refinance"                                                         
[30] "General Purpose/Working Capital"                                                   
[31] "Green Construction/Buildings"                                                      
[32] "Higher Education"                                                                  
[33] "Industrial Development"                                                            
[34] "Infrastructure"                                                                    
[35] "Land Preservation"                                                                 
[36] "Merger or Acquisition"                                                             
[37] "Other"                                                                             
[38] "Other Education"                                                                   
[39] "Other Housing"                                                                     
[40] "Other Public Service"                                                              
[41] "Other Transportation"                                                              
[42] "Pandemic"                                                                          
[43] "Pollution Control"                                                                 
[44] "Pollution Prevention & Control"                                                    
[45] "Production/Supply of Cannabis"                                                     
[46] "Project Finance"                                                                   
[47] "Property Expendit (acquisit/development)"                                          
[48] "Redeem Existing Bonds or Securities"                                               
[49] "Refinance/Financing expenses"                                                      
[50] "Renewable Energy Projects"                                                         
[51] "Repay Bank Loan or Bridge Financing"                                               
[52] "Repay Intercompany Debt"                                                           
[53] "Ship finance"                                                                      
[54] "Social Housing/Affordable Housing"                                                 
[55] "Socioeconomic Advancement And Empowerment"                                         
[56] "Solar projects"                                                                    
[57] "Sustainable Development Projects"                                                  
[58] "Sustainable Economic Growth"                                                       
[59] "Sustainable Forestry"                                                              
[60] "Sustainable Management of Land Use"                                                
[61] "Sustainable Management of Living Natural Resources"                                
[62] "Sustainable Transport"                                                             
[63] "Sustainable Water or Wastewater management"                                        
[64] "Terrestrial Biodiversity Conservation"                                             
[65] "The Belt and Road Initiative"                                                      
[66] "Waste Management"                                                                  
[67] "Water & Sewer"                                                                     
[68] "Wind projects"                                                                     
[69] "Working capital"                                                                   

Homework Problem 3 Part 2

library(dplyr)
library(tidyr)

problem3b <- green_debt |> 
  clean_names() |> 
  filter(principal_currency != "Not Applicable") |> 
 pivot_longer(
    # select all coluns with f + 4 numbers 
    cols = matches("f\\d{4}"),
    
    # change from default ("names")
    names_to = "year",
    # same with the values
    values_to = "issuance_bn_usd",
    names_transform = readr::parse_number,
  
    values_drop_na = TRUE
  )
library(ggplot2)
ggplot(problem3b, aes(x = year, y = principal_currency)) +
  geom_point() +
  labs(x = "Year", y = "Principal Currency") +
  ggtitle("Scatter Plot of Year vs. Principal Currency")

We only have data on cumulative issuance as of 2022. We cannot say anything about the time trend with rest to primary currency.