Data Tidying Practice

Author

Pinandito Wisambudi

Preparation

library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.4.4     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(janitor)

Attaching package: 'janitor'

The following objects are masked from 'package:stats':

    chisq.test, fisher.test
library(dplyr)
library(ggplot2)
options(repos = c(CRAN = "https://cloud.r-project.org/"))

# Now you can safely call install.packages() without the error
install.packages("countrycode")

The downloaded binary packages are in
    /var/folders/7r/x_rm29tn51v44b1wcbyg15km0000gn/T//Rtmpq5fv9r/downloaded_packages

Homework Problem 1

  • Use the countrycode package to add the region as a variable to green_debt_subset.

  • Calculate the cumulative issuance of green bonds by region.

  • Use ggplot2 to make a data visualization comparing regional cumulative issuance.

  • Make your plot look professionally presentable, as we did above.

library(countrycode)

imf_climate_dashboards_green_debt_url <- "https://opendata.arcgis.com/datasets/8e2772e0b65f4e33a80183ce9583d062_0.csv"

green_debt <- imf_climate_dashboards_green_debt_url |> 
  read_csv() 
Rows: 355 Columns: 42
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (12): Country, ISO2, ISO3, Indicator, Unit, Source, CTS_Code, CTS_Name, ...
dbl (30): ObjectId, F1985, F1986, F1987, F1990, F1991, F1992, F1993, F1994, ...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
indicators_we_want <- c("Green Bond Issuances by Country", "Sovereign Green Bond Issuances")

green_debt_subset <- green_debt |> 
  clean_names() |> 
  filter(indicator %in% indicators_we_want) |> 
  select(country, iso3, indicator, matches("f\\d{4}")) 

green_debt_subset$region <- countrycode(green_debt_subset$iso3, "iso3c", "region")

green_bonds_tidy <- green_debt_subset |>
  pivot_longer(
    cols = matches("f\\d{4}"),
    names_to = "year",
    values_to = "issuance_bn_usd",
    names_transform = readr::parse_number,
    values_drop_na = TRUE
  )

green_bonds_cumulative <- green_bonds_tidy |>
  select(-iso3) |> 
  arrange(region) |> 
  group_by(region) |> 
  mutate(cumulative_bn_usd = cumsum(issuance_bn_usd)) |>
  slice_max(order_by = cumulative_bn_usd) |> 
  arrange(cumulative_bn_usd|> desc()) |> 
  select(region, cumulative_bn_usd) |> 
  ungroup()

green_bonds_cumulative |>
  ggplot(aes(x = cumulative_bn_usd, 
             y = fct_reorder(.f = region, .x = cumulative_bn_usd)
             )) +
  geom_col(fill = "maroon") +
  theme_minimal() +
  scale_x_continuous(labels = scales::label_dollar(suffix = " bn"),
                     expand = c(0,0)) +
  labs(title = "Cumulative Issuance of Green Bonds by Region",
       subtitle = "Europe and Central Asia are responsible for the highest green bond issuance",
       x = "Cumulative Issuance (USD)",
       y = "Region",
       caption = "Data: IMF Climate Change Dashboard | Insight: Pinandito Wisambudi")

Homework Problem 2

  • Use the full green_debt dataset

  • Use clean_names() to make the variable names snake_case

  • Filter out observations where type_of_issuer is “Not Applicable”

  • Use the tools taught in this chapter to provide a compelling data visualization and some repeatable factoids that provide actionable insights about green bond issuers.

library(ggplot2)
library(dplyr)
library(forcats)
library(scales)

Attaching package: 'scales'
The following object is masked from 'package:purrr':

    discard
The following object is masked from 'package:readr':

    col_factor
green_bond_issuer <- green_debt |>
  clean_names() |>
  filter(type_of_issuer != "Not Applicable")

ggplot(green_bond_issuer, aes(x = fct_reorder(.f = type_of_issuer, .x = f2021), weight = f2021)) +
  geom_bar(fill = "pink") +
  scale_y_continuous(labels = label_dollar(suffix = " bn"), expand = c(0, 0)) +
  labs(x = "",
       y = "Issuance (USD)",
       title = "Green Bond Issuance in 2021",
       subtitle = "In 2021, the highest green bond issuers are Nonfinancial Corporations") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) # Tilting x-axis labels

Homework Problem 3

  • Repeat the process from problem 2 for use_of_proceed and for principal_currency

    • What are green bond proceeds used for?

    • What do we know about the currency of issuance? Is that changing over time?

  1. Use of proceeds
green_debt_uop <- green_debt |>
  clean_names() |>
  filter(use_of_proceed != "Not Applicable") |>
  pivot_longer(
    cols = matches("f\\d{4}"),
    names_to = "Year", 
    values_to = "Issuance_bn_usd",
    values_drop_na = TRUE
  ) |>
  filter(!grepl("e", Issuance_bn_usd)) |>
  select(use_of_proceed, Year, Issuance_bn_usd)

unique(green_debt_uop$use_of_proceed)
 [1] "Access to Essential Services"                                                      
 [2] "Acquiring and distribution of vaccine"                                             
 [3] "Acquisition"                                                                       
 [4] "Affordable Basic Infrastructure"                                                   
 [5] "Agriculture"                                                                       
 [6] "Alternative Energy"                                                                
 [7] "Aquatic Biodiversity Conservation"                                                 
 [8] "Capital expenditure"                                                               
 [9] "Capital expenditure/Financing expenses"                                            
[10] "Carbon reduction through reforestation and avoided deforestation"                  
[11] "China Urban Construction"                                                          
[12] "Circular Economy Adapted/Eco-efficient Products, Production Technologies/Processes"
[13] "Clean Transport"                                                                   
[14] "Climate Change Adaptation"                                                         
[15] "E-education programs - Education Projects"                                         
[16] "Economic Development"                                                              
[17] "Electric & Public Power"                                                           
[18] "Eligible Green Projects"                                                           
[19] "Employee stock ownership plan"                                                     
[20] "Energy Efficiency"                                                                 
[21] "Environmental Protection Projects"                                                 
[22] "Environmentally Sustainable Products"                                              
[23] "Equipment Upgrade/Construction"                                                    
[24] "Financing of Subordinated Loan"                                                    
[25] "Funding new technologies to reduce GHS emissions"                                  
[26] "Gas"                                                                               
[27] "General Purpose"                                                                   
[28] "General Purpose/Acquisition"                                                       
[29] "General Purpose/Refinance"                                                         
[30] "General Purpose/Working Capital"                                                   
[31] "Green Construction/Buildings"                                                      
[32] "Higher Education"                                                                  
[33] "Industrial Development"                                                            
[34] "Infrastructure"                                                                    
[35] "Land Preservation"                                                                 
[36] "Merger or Acquisition"                                                             
[37] "Other"                                                                             
[38] "Other Education"                                                                   
[39] "Other Housing"                                                                     
[40] "Other Public Service"                                                              
[41] "Other Transportation"                                                              
[42] "Pandemic"                                                                          
[43] "Pollution Control"                                                                 
[44] "Pollution Prevention & Control"                                                    
[45] "Production/Supply of Cannabis"                                                     
[46] "Project Finance"                                                                   
[47] "Property Expendit (acquisit/development)"                                          
[48] "Redeem Existing Bonds or Securities"                                               
[49] "Refinance/Financing expenses"                                                      
[50] "Renewable Energy Projects"                                                         
[51] "Repay Bank Loan or Bridge Financing"                                               
[52] "Repay Intercompany Debt"                                                           
[53] "Ship finance"                                                                      
[54] "Social Housing/Affordable Housing"                                                 
[55] "Socioeconomic Advancement And Empowerment"                                         
[56] "Solar projects"                                                                    
[57] "Sustainable Development Projects"                                                  
[58] "Sustainable Economic Growth"                                                       
[59] "Sustainable Forestry"                                                              
[60] "Sustainable Management of Land Use"                                                
[61] "Sustainable Management of Living Natural Resources"                                
[62] "Sustainable Transport"                                                             
[63] "Sustainable Water or Wastewater management"                                        
[64] "Terrestrial Biodiversity Conservation"                                             
[65] "The Belt and Road Initiative"                                                      
[66] "Waste Management"                                                                  
[67] "Water & Sewer"                                                                     
[68] "Wind projects"                                                                     
[69] "Working capital"                                                                   
  1. Principal Currency
green_debt_pc <- green_debt |>
  clean_names() |>
  filter(principal_currency != "Not Applicable") |>
  pivot_longer(
    cols = matches("f\\d{4}"),
    names_to = "Year", 
    values_to = "Issuance_bn_usd",
    values_drop_na = TRUE
  ) |>
  select(principal_currency, Year, Issuance_bn_usd) |>
  filter(!grepl("e", Issuance_bn_usd))

unique(green_debt_pc$principal_currency)
 [1] "Argentinian Peso"                         
 [2] "Argentinian Unidades de Valor Adquisitivo"
 [3] "Australian Dollar"                        
 [4] "Bangladeshi Taka"                         
 [5] "Brazilian Real"                           
 [6] "British Pound"                            
 [7] "Canadian Dollar"                          
 [8] "Chilean Peso"                             
 [9] "Chilean Unidad de Fomento"                
[10] "Chinese Yuan"                             
[11] "Colombian Peso"                           
[12] "Czech Koruna"                             
[13] "Danish Krone"                             
[14] "Deutsche Mark"                            
[15] "Euro"                                     
[16] "European Currency Unit"                   
[17] "Fijian Dollar"                            
[18] "Hong Kong Dollar"                         
[19] "Hungarian Forint"                         
[20] "Icelandic Krona"                          
[21] "Indian Rupee"                             
[22] "Indonesian Rupiah"                        
[23] "Japanese Yen"                             
[24] "Kazakhstani Tenge"                        
[25] "Macanese Pataca"                          
[26] "Malaysian Ringgit"                        
[27] "Mexican Peso"                             
[28] "Mexican Unidad de Inversion"              
[29] "Moroccan Dirham"                          
[30] "Namibian Dollar"                          
[31] "New Zealand Dollar"                       
[32] "Nigerian Naira"                           
[33] "Norwegian Krone"                          
[34] "Peruvian Sol"                             
[35] "Philippine Peso"                          
[36] "Polish Zloty"                             
[37] "Romanian Leu"                             
[38] "Russian Ruble"                            
[39] "Singapore Dollar"                         
[40] "South African Rand"                       
[41] "South Korean Won"                         
[42] "Sri Lankan Rupee"                         
[43] "Swedish Krona"                            
[44] "Swiss Franc"                              
[45] "Taiwanese Dollar"                         
[46] "Thai Baht"                                
[47] "Turkish Lira"                             
[48] "Ukrainian Hryvnia"                        
[49] "US Dollar"                                
[50] "Vietnamese Dong"                          
currency_counts <- green_debt_pc %>%
  group_by(principal_currency) %>%
  summarize(total_issuance = sum(Issuance_bn_usd)) %>%
  arrange(desc(total_issuance))

desc_chart <- ggplot(head(currency_counts, 15), aes(x = total_issuance, y = reorder(principal_currency, total_issuance))) +
  geom_col(fill = "magenta") +
  scale_x_continuous(labels = label_dollar(suffix = " bn"), expand = c(0, 0)) +
  labs(x = "Total Issuance in Billion USD", y = "Currency", 
       title = "Green Bond Issuance by Currencies") +
  theme_minimal() 

desc_chart