Final Project Data Tidying

Author

Yuanling Zeng

library(readr)
library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
library(ggplot2)
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ lubridate 1.9.3     ✔ tibble    3.2.1
✔ purrr     1.0.2     ✔ tidyr     1.3.0
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(janitor)

Attaching package: 'janitor'

The following objects are masked from 'package:stats':

    chisq.test, fisher.test
library(countrycode)

imf_climate_dashboards_green_debt_url <- 
  "https://opendata.arcgis.com/datasets/8e2772e0b65f4e33a80183ce9583d062_0.csv"
green_bond <- imf_climate_dashboards_green_debt_url |> 
  read_csv() 
Rows: 355 Columns: 42
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (12): Country, ISO2, ISO3, Indicator, Unit, Source, CTS_Code, CTS_Name, ...
dbl (30): ObjectId, F1985, F1986, F1987, F1990, F1991, F1992, F1993, F1994, ...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
indicators_we_want <- c("Green Bond Issuances by Country", "Sovereign Green Bond Issuances")

green_bond_subset <- green_bond |> 
  clean_names() |> 
  mutate(region = countrycode(iso2, "iso2c", "region"), 
         .before = iso2) 
green_bond_subset
# A tibble: 355 × 43
   object_id country region iso2  iso3  indicator unit  source cts_code cts_name
       <dbl> <chr>   <chr>  <chr> <chr> <chr>     <chr> <chr>  <chr>    <chr>   
 1         1 Argent… Latin… AR    ARG   Green Bo… Bill… Refin… ECFFI    Green B…
 2         2 Austra… East … AU    AUS   Green Bo… Bill… Refin… ECFFI    Green B…
 3         3 Austria Europ… AT    AUT   Green Bo… Bill… Refin… ECFFI    Green B…
 4         4 Austria Europ… AT    AUT   Sovereig… Bill… Refin… ECFF     Green B…
 5         5 Bangla… South… BD    BGD   Green Bo… Bill… Refin… ECFFI    Green B…
 6         6 Belaru… Europ… BY    BLR   Green Bo… Bill… Refin… ECFFI    Green B…
 7         7 Belaru… Europ… BY    BLR   Sovereig… Bill… Refin… ECFF     Green B…
 8         8 Belgium Europ… BE    BEL   Green Bo… Bill… Refin… ECFFI    Green B…
 9         9 Belgium Europ… BE    BEL   Sovereig… Bill… Refin… ECFF     Green B…
10        10 Bermuda North… BM    BMU   Green Bo… Bill… Refin… ECFFI    Green B…
# ℹ 345 more rows
# ℹ 33 more variables: cts_full_descriptor <chr>, type_of_issuer <chr>,
#   use_of_proceed <chr>, principal_currency <chr>, f1985 <dbl>, f1986 <dbl>,
#   f1987 <dbl>, f1990 <dbl>, f1991 <dbl>, f1992 <dbl>, f1993 <dbl>,
#   f1994 <dbl>, f1999 <dbl>, f2000 <dbl>, f2002 <dbl>, f2003 <dbl>,
#   f2004 <dbl>, f2007 <dbl>, f2008 <dbl>, f2009 <dbl>, f2010 <dbl>,
#   f2011 <dbl>, f2012 <dbl>, f2013 <dbl>, f2014 <dbl>, f2015 <dbl>, …
green_bond_cumulative <- green_bond_subset |>
  pivot_longer(
    cols = matches("f\\d{4}"),
    names_to = "Year", 
    values_to = "Issuance_bn_usd",
    values_drop_na = TRUE
  ) |>
  filter(!is.na(region)) |>
  group_by(region) |>
  summarize(cumulative_issuance = sum(Issuance_bn_usd)) |>
  ungroup()
green_bond_cumulative
# A tibble: 7 × 2
  region                     cumulative_issuance
  <chr>                                    <dbl>
1 East Asia & Pacific                      586. 
2 Europe & Central Asia                   1395. 
3 Latin America & Caribbean                 99.8
4 Middle East & North Africa                10.3
5 North America                            239. 
6 South Asia                                15.0
7 Sub-Saharan Africa                        16.2
green_bond_region <- green_bond_cumulative |>
  group_by(region) |> 
  slice_max(n = 1, order_by = cumulative_issuance) |> 
  arrange(cumulative_issuance|> desc()) |> 
  select(region, cumulative_issuance) |> 
  ungroup()
green_bond_region
# A tibble: 7 × 2
  region                     cumulative_issuance
  <chr>                                    <dbl>
1 Europe & Central Asia                   1395. 
2 East Asia & Pacific                      586. 
3 North America                            239. 
4 Latin America & Caribbean                 99.8
5 Sub-Saharan Africa                        16.2
6 South Asia                                15.0
7 Middle East & North Africa                10.3
green_bond_issuer <- green_bond |> 
  clean_names() |>
  filter(type_of_issuer != "Not Applicable")

green_bond_issuer <- green_bond_issuer |>
  pivot_longer(
    cols = matches("f\\d{4}"),
    names_to = "Year", 
    values_to = "Issuance_bn_usd",
    names_transform = readr::parse_number,
    values_drop_na = TRUE
    )

india_green_bonds <- green_bond %>%
  filter(Country == "India")
india_green_bonds
# A tibble: 1 × 42
  ObjectId Country ISO2  ISO3  Indicator          Unit  Source CTS_Code CTS_Name
     <dbl> <chr>   <chr> <chr> <chr>              <chr> <chr>  <chr>    <chr>   
1       45 India   IN    IND   Green Bond Issuan… Bill… Refin… ECFFI    Green B…
# ℹ 33 more variables: CTS_Full_Descriptor <chr>, Type_of_Issuer <chr>,
#   Use_of_Proceed <chr>, Principal_Currency <chr>, F1985 <dbl>, F1986 <dbl>,
#   F1987 <dbl>, F1990 <dbl>, F1991 <dbl>, F1992 <dbl>, F1993 <dbl>,
#   F1994 <dbl>, F1999 <dbl>, F2000 <dbl>, F2002 <dbl>, F2003 <dbl>,
#   F2004 <dbl>, F2007 <dbl>, F2008 <dbl>, F2009 <dbl>, F2010 <dbl>,
#   F2011 <dbl>, F2012 <dbl>, F2013 <dbl>, F2014 <dbl>, F2015 <dbl>,
#   F2016 <dbl>, F2017 <dbl>, F2018 <dbl>, F2019 <dbl>, F2020 <dbl>, …