Data Tidying Practice Problems

Author

Yuanling Zeng

library("tidyverse")
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.4.4     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.0
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library("janitor")

Attaching package: 'janitor'

The following objects are masked from 'package:stats':

    chisq.test, fisher.test
library("ggplot2")

Problem 1:

  • Use the countrycode package to add the region as a variable to green_debt_subset.

  • Calculate the cumulative issuance of green bonds by region.

  • Use ggplot2 to make a data visualization comparing regional cumulative issuance.

  • Make your plot look professionally presentable, as we did above.

library("countrycode")
imf_climate_dashboards_green_debt_url <- 
  "https://opendata.arcgis.com/datasets/8e2772e0b65f4e33a80183ce9583d062_0.csv"
green_debt <- imf_climate_dashboards_green_debt_url |> 
  read_csv() 
Rows: 355 Columns: 42
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (12): Country, ISO2, ISO3, Indicator, Unit, Source, CTS_Code, CTS_Name, ...
dbl (30): ObjectId, F1985, F1986, F1987, F1990, F1991, F1992, F1993, F1994, ...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
indicators_we_want <- c("Green Bond Issuances by Country", "Sovereign Green Bond Issuances")

green_debt_subset <- green_debt |> 
  clean_names() |> 
  mutate(region = countrycode(iso2, "iso2c", "region"), 
         .before = iso2) 
green_debt_subset
# A tibble: 355 × 43
   object_id country region iso2  iso3  indicator unit  source cts_code cts_name
       <dbl> <chr>   <chr>  <chr> <chr> <chr>     <chr> <chr>  <chr>    <chr>   
 1         1 Argent… Latin… AR    ARG   Green Bo… Bill… Refin… ECFFI    Green B…
 2         2 Austra… East … AU    AUS   Green Bo… Bill… Refin… ECFFI    Green B…
 3         3 Austria Europ… AT    AUT   Green Bo… Bill… Refin… ECFFI    Green B…
 4         4 Austria Europ… AT    AUT   Sovereig… Bill… Refin… ECFF     Green B…
 5         5 Bangla… South… BD    BGD   Green Bo… Bill… Refin… ECFFI    Green B…
 6         6 Belaru… Europ… BY    BLR   Green Bo… Bill… Refin… ECFFI    Green B…
 7         7 Belaru… Europ… BY    BLR   Sovereig… Bill… Refin… ECFF     Green B…
 8         8 Belgium Europ… BE    BEL   Green Bo… Bill… Refin… ECFFI    Green B…
 9         9 Belgium Europ… BE    BEL   Sovereig… Bill… Refin… ECFF     Green B…
10        10 Bermuda North… BM    BMU   Green Bo… Bill… Refin… ECFFI    Green B…
# ℹ 345 more rows
# ℹ 33 more variables: cts_full_descriptor <chr>, type_of_issuer <chr>,
#   use_of_proceed <chr>, principal_currency <chr>, f1985 <dbl>, f1986 <dbl>,
#   f1987 <dbl>, f1990 <dbl>, f1991 <dbl>, f1992 <dbl>, f1993 <dbl>,
#   f1994 <dbl>, f1999 <dbl>, f2000 <dbl>, f2002 <dbl>, f2003 <dbl>,
#   f2004 <dbl>, f2007 <dbl>, f2008 <dbl>, f2009 <dbl>, f2010 <dbl>,
#   f2011 <dbl>, f2012 <dbl>, f2013 <dbl>, f2014 <dbl>, f2015 <dbl>, …
green_debt_cumulative <- green_debt_subset |>
  pivot_longer(
    cols = matches("f\\d{4}"),
    names_to = "Year", 
    values_to = "Issuance_bn_usd",
    values_drop_na = TRUE
  ) |>
  filter(!is.na(region)) |>
  group_by(region) |>
  summarize(cumulative_issuance = sum(Issuance_bn_usd)) |>
  ungroup()
green_debt_cumulative
# A tibble: 7 × 2
  region                     cumulative_issuance
  <chr>                                    <dbl>
1 East Asia & Pacific                      586. 
2 Europe & Central Asia                   1395. 
3 Latin America & Caribbean                 99.8
4 Middle East & North Africa                10.3
5 North America                            239. 
6 South Asia                                15.0
7 Sub-Saharan Africa                        16.2
green_bond_region <- green_debt_cumulative |>
  group_by(region) |> 
  slice_max(n = 1, order_by = cumulative_issuance) |> 
  arrange(cumulative_issuance|> desc()) |> 
  select(region, cumulative_issuance) |> 
  ungroup()
green_bond_region
# A tibble: 7 × 2
  region                     cumulative_issuance
  <chr>                                    <dbl>
1 Europe & Central Asia                   1395. 
2 East Asia & Pacific                      586. 
3 North America                            239. 
4 Latin America & Caribbean                 99.8
5 Sub-Saharan Africa                        16.2
6 South Asia                                15.0
7 Middle East & North Africa                10.3
regional_cumulative_issuance <- ggplot(green_bond_region, aes(x = cumulative_issuance, y = reorder(region, cumulative_issuance))) +
  geom_col(fill = "forestgreen") +
  theme_minimal() +
  scale_x_continuous(labels = scales::label_dollar(suffix = " BN"), expand = c(0, 0)) +
  labs(x = "Cumulative Issuance", y = "Region", 
       title = "Regional Cumulative Issuance of Green Bonds")
regional_cumulative_issuance

Problem 2:

  • Use the full green_debt dataset

  • Use clean_names() to make the variable names snake_case

  • Filter out observations where type_of_issuer is “Not Applicable”

  • Use the tools taught in this chapter to provide a compelling data visualization and some repeatable factoids that provide actionable insights about green bond issuers.

data visualization

green_debt_issuer <- green_debt |> 
  clean_names() |>
  filter(type_of_issuer != "Not Applicable")

green_debt_issuer <- green_debt_issuer |>
  pivot_longer(
    cols = matches("f\\d{4}"),
    names_to = "Year", 
    values_to = "Issuance_bn_usd",
    names_transform = readr::parse_number,
    values_drop_na = TRUE
    )
green_debt_issuer|>
  filter(Year>=2010)|>
  ggplot(aes(x = Year, y = Issuance_bn_usd, fill = type_of_issuer)) +
  geom_bar(stat = "identity", position = "stack") +
  labs(
    title = "Green bond Issuance By Type of Issuers After Year 2010", 
    x = "Year", 
    y = "Green Bond Insurance in $BN"
  ) +
theme_minimal() +
  theme(legend.position = "right")

factoid

green_debt_issuer_tidy <- green_debt_issuer |> 
  filter(type_of_issuer %in% c("Banks", "Nonfinancial corporations")) |> 
  group_by(Year, type_of_issuer) |> 
  summarize(Issuance_bn_usd = sum(Issuance_bn_usd, na.rm = TRUE), .groups = 'drop') |> 
  pivot_wider(names_from = type_of_issuer,
              values_from = Issuance_bn_usd,
              values_fill = 0) |> 
  ungroup() |> 
  clean_names()
green_debt_issuer_tidy
# A tibble: 21 × 3
    year  banks nonfinancial_corporations
   <dbl>  <dbl>                     <dbl>
 1  1985 0.03                       0.025
 2  1986 0.3                        0    
 3  1987 0.0751                     0    
 4  1990 0                          0    
 5  1993 0                          0.168
 6  1999 0.0482                     0    
 7  2000 0.0533                     0    
 8  2002 0.148                      0    
 9  2003 0.01                       0.172
10  2004 0.109                      0    
# ℹ 11 more rows
nonfinancial_vs_banks_green_bonds_factoid <- green_debt_issuer_tidy |>
  mutate(
    # Percent more:
    # "Nonfinancial corporations issued x% more than Banks"
    nonfinancial_vs_banks_pct_more = ((nonfinancial_corporations/banks) - 1) * 100
  )
nonfinancial_vs_banks_green_bonds_factoid
# A tibble: 21 × 4
    year  banks nonfinancial_corporations nonfinancial_vs_banks_pct_more
   <dbl>  <dbl>                     <dbl>                          <dbl>
 1  1985 0.03                       0.025                          -16.7
 2  1986 0.3                        0                             -100  
 3  1987 0.0751                     0                             -100  
 4  1990 0                          0                              NaN  
 5  1993 0                          0.168                          Inf  
 6  1999 0.0482                     0                             -100  
 7  2000 0.0533                     0                             -100  
 8  2002 0.148                      0                             -100  
 9  2003 0.01                       0.172                         1616. 
10  2004 0.109                      0                             -100  
# ℹ 11 more rows
factoid_2016 <- nonfinancial_vs_banks_green_bonds_factoid |> 
  filter(year == 2016) |> 
  pull(nonfinancial_vs_banks_pct_more) |> 
  round()
factoid_2016
[1] -33
factoid_2022 <- nonfinancial_vs_banks_green_bonds_factoid |> 
  filter(year == 2022) |> 
  pull(nonfinancial_vs_banks_pct_more) |> 
  round()
factoid_2022
[1] 2

Problem 3:

  • Repeat the process from problem 2 for use_of_proceed and for principal_currency

    • What are green bond proceeds used for?

    • What do we know about the currency of issuance? Is that changing over time?

use_of_proceed

green_debt_use <- green_debt |>
  clean_names() |>
  filter(use_of_proceed != "Not Applicable") |>
  pivot_longer(
    cols = matches("f\\d{4}"),
    names_to = "Year", 
    values_to = "Issuance_bn_usd",
    values_drop_na = TRUE
  ) |>
  filter(!grepl("e", Issuance_bn_usd)) |>
  select(use_of_proceed, Year, Issuance_bn_usd)

unique(green_debt_use$use_of_proceed)
 [1] "Access to Essential Services"                                                      
 [2] "Acquiring and distribution of vaccine"                                             
 [3] "Acquisition"                                                                       
 [4] "Affordable Basic Infrastructure"                                                   
 [5] "Agriculture"                                                                       
 [6] "Alternative Energy"                                                                
 [7] "Aquatic Biodiversity Conservation"                                                 
 [8] "Capital expenditure"                                                               
 [9] "Capital expenditure/Financing expenses"                                            
[10] "Carbon reduction through reforestation and avoided deforestation"                  
[11] "China Urban Construction"                                                          
[12] "Circular Economy Adapted/Eco-efficient Products, Production Technologies/Processes"
[13] "Clean Transport"                                                                   
[14] "Climate Change Adaptation"                                                         
[15] "E-education programs - Education Projects"                                         
[16] "Economic Development"                                                              
[17] "Electric & Public Power"                                                           
[18] "Eligible Green Projects"                                                           
[19] "Employee stock ownership plan"                                                     
[20] "Energy Efficiency"                                                                 
[21] "Environmental Protection Projects"                                                 
[22] "Environmentally Sustainable Products"                                              
[23] "Equipment Upgrade/Construction"                                                    
[24] "Financing of Subordinated Loan"                                                    
[25] "Funding new technologies to reduce GHS emissions"                                  
[26] "Gas"                                                                               
[27] "General Purpose"                                                                   
[28] "General Purpose/Acquisition"                                                       
[29] "General Purpose/Refinance"                                                         
[30] "General Purpose/Working Capital"                                                   
[31] "Green Construction/Buildings"                                                      
[32] "Higher Education"                                                                  
[33] "Industrial Development"                                                            
[34] "Infrastructure"                                                                    
[35] "Land Preservation"                                                                 
[36] "Merger or Acquisition"                                                             
[37] "Other"                                                                             
[38] "Other Education"                                                                   
[39] "Other Housing"                                                                     
[40] "Other Public Service"                                                              
[41] "Other Transportation"                                                              
[42] "Pandemic"                                                                          
[43] "Pollution Control"                                                                 
[44] "Pollution Prevention & Control"                                                    
[45] "Production/Supply of Cannabis"                                                     
[46] "Project Finance"                                                                   
[47] "Property Expendit (acquisit/development)"                                          
[48] "Redeem Existing Bonds or Securities"                                               
[49] "Refinance/Financing expenses"                                                      
[50] "Renewable Energy Projects"                                                         
[51] "Repay Bank Loan or Bridge Financing"                                               
[52] "Repay Intercompany Debt"                                                           
[53] "Ship finance"                                                                      
[54] "Social Housing/Affordable Housing"                                                 
[55] "Socioeconomic Advancement And Empowerment"                                         
[56] "Solar projects"                                                                    
[57] "Sustainable Development Projects"                                                  
[58] "Sustainable Economic Growth"                                                       
[59] "Sustainable Forestry"                                                              
[60] "Sustainable Management of Land Use"                                                
[61] "Sustainable Management of Living Natural Resources"                                
[62] "Sustainable Transport"                                                             
[63] "Sustainable Water or Wastewater management"                                        
[64] "Terrestrial Biodiversity Conservation"                                             
[65] "The Belt and Road Initiative"                                                      
[66] "Waste Management"                                                                  
[67] "Water & Sewer"                                                                     
[68] "Wind projects"                                                                     
[69] "Working capital"                                                                   

principal_currency

green_debt_currency <- green_debt |>
  clean_names() |>
  filter(principal_currency != "Not Applicable") |>
  pivot_longer(
    cols = matches("f\\d{4}"),
    names_to = "Year", 
    values_to = "Issuance_bn_usd",
    values_drop_na = TRUE
  ) |>
  select(principal_currency, Year, Issuance_bn_usd) |>
  filter(!grepl("e", Issuance_bn_usd))

unique(green_debt_currency$principal_currency)
 [1] "Argentinian Peso"                         
 [2] "Argentinian Unidades de Valor Adquisitivo"
 [3] "Australian Dollar"                        
 [4] "Bangladeshi Taka"                         
 [5] "Brazilian Real"                           
 [6] "British Pound"                            
 [7] "Canadian Dollar"                          
 [8] "Chilean Peso"                             
 [9] "Chilean Unidad de Fomento"                
[10] "Chinese Yuan"                             
[11] "Colombian Peso"                           
[12] "Czech Koruna"                             
[13] "Danish Krone"                             
[14] "Deutsche Mark"                            
[15] "Euro"                                     
[16] "European Currency Unit"                   
[17] "Fijian Dollar"                            
[18] "Hong Kong Dollar"                         
[19] "Hungarian Forint"                         
[20] "Icelandic Krona"                          
[21] "Indian Rupee"                             
[22] "Indonesian Rupiah"                        
[23] "Japanese Yen"                             
[24] "Kazakhstani Tenge"                        
[25] "Macanese Pataca"                          
[26] "Malaysian Ringgit"                        
[27] "Mexican Peso"                             
[28] "Mexican Unidad de Inversion"              
[29] "Moroccan Dirham"                          
[30] "Namibian Dollar"                          
[31] "New Zealand Dollar"                       
[32] "Nigerian Naira"                           
[33] "Norwegian Krone"                          
[34] "Peruvian Sol"                             
[35] "Philippine Peso"                          
[36] "Polish Zloty"                             
[37] "Romanian Leu"                             
[38] "Russian Ruble"                            
[39] "Singapore Dollar"                         
[40] "South African Rand"                       
[41] "South Korean Won"                         
[42] "Sri Lankan Rupee"                         
[43] "Swedish Krona"                            
[44] "Swiss Franc"                              
[45] "Taiwanese Dollar"                         
[46] "Thai Baht"                                
[47] "Turkish Lira"                             
[48] "Ukrainian Hryvnia"                        
[49] "US Dollar"                                
[50] "Vietnamese Dong"                          
currency_counts <- green_debt_currency %>%
  group_by(principal_currency) %>%
  summarize(total_issuance = sum(Issuance_bn_usd)) %>%
  arrange(desc(total_issuance))

top_10_chart <- ggplot(head(currency_counts, 10), aes(x = total_issuance, y = reorder(principal_currency, total_issuance))) +
  geom_col(fill = "forestgreen") +
  labs(x = "Total Issuance in $BN", y = "Currency", 
       title = "Top 10 Green Bond Currencies") +
  theme_minimal() 
top_10_chart