Tidying plan: 1.preprocess data by using Excel (remove duplicates,format data and correct Spanish typso in cells) 2.load datasets 3.filter and select data of interest 4.export data
Step1: finished in Excel Step2:loading the data
# Load necessary libraries
library(tidyverse)
library(readxl)
#load datasets
bonds_new <- read_csv("/Users/xingyuning/Desktop/susfin/final project/00_data_raw/bonds.csv")
Rows: 166 Columns: 19── Column specification ─────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (14): link, handle, ISIN, CUSIP, FIGI, ticker, bond_name, issuer_name, issuer_type, i...
dbl (4): issuance_year, volume_usd_billion, volume_usd, volume_bond_currency
lgl (1): fund
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
measurements_new <- read_excel("/Users/xingyuning/Desktop/susfin/final project/00_data_raw/measurements.xlsx")
Warning: Expecting logical in N2972 / R2972C14: got 'ACM0002'Warning: Expecting logical in N2973 / R2973C14: got 'ACM0002'Warning: Expecting logical in N2974 / R2974C14: got 'ACM0002'Warning: Expecting logical in N2975 / R2975C14: got 'ACM0002'Warning: Expecting logical in N3044 / R3044C14: got 'ACM0002'Warning: Expecting logical in N3045 / R3045C14: got 'ACM0002'Warning: Expecting logical in N3046 / R3046C14: got 'ACM0002'Warning: Expecting logical in N3047 / R3047C14: got 'ACM0002'Warning: Expecting logical in N3048 / R3048C14: got 'ACM0002'Warning: Expecting logical in N3049 / R3049C14: got 'ACM0002'
tranches_new <- read_csv("/Users/xingyuning/Desktop/susfin/final project/00_data_raw/tranches.csv")
Rows: 190 Columns: 10── Column specification ─────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (6): bond_handle, bond_name, market, stock_exchange, tranche_currency, bond_currency
dbl (4): volume_offered_tranche_currency, volume_raised_tranche_currency, volume_raised_b...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
projects_new <- read_csv("/Users/xingyuning/Desktop/susfin/final project/00_data_raw/projects.csv")
Rows: 1400 Columns: 15── Column specification ─────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (10): bond_handle, bond_name, nomenclature, category, subcategory, project, parent_pr...
dbl (4): latitude, longitude, cofinancing_percentage, represented_projects
lgl (1): refinancing
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
allocations_new <- read_csv("/Users/xingyuning/Desktop/susfin/final project/00_data_raw/allocations.csv")
Rows: 2278 Columns: 17── Column specification ─────────────────────────────────────────────────────────────────────
Delimiter: ","
chr (11): bond_handle, bond_name, nomenclature, category, subcategory, project, location,...
dbl (5): allocated_allocation_currency, disbursed_allocation_currency, disbursed_bond_cu...
lgl (1): refinancing
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
global_greenbonds <- read_excel("/Users/xingyuning/Desktop/susfin/final project/00_data_raw/Sustainable_debt_gloabl.xls")
Step2:Examine the data structure
glimpse(bonds_new)
Rows: 166
Columns: 19
$ link <chr> "https://www.greenbondtransparency.com/bond-info/?handle=826fb…
$ handle <chr> "826fb91751eb4970ac578c6e8f0929fc", "270dc78108554e578876030bd…
$ ISIN <chr> "US00105DAG07", "BRVERTCRA2K6", "US210314AB60", "BRRANIDBS031"…
$ CUSIP <chr> "00105DAG0", NA, "210314AB6", NA, NA, NA, "AM3592905", NA, NA,…
$ FIGI <chr> "BBG00QGRGP11", NA, "BBG00NW1PM87", NA, NA, "BBG00ZLGVWW3", "B…
$ ticker <chr> NA, NA, NA, "RANI14", NA, NA, "90_GCDMXCB_16V", NA, NA, NA, NA…
$ bond_name <chr> "AES Gener Green Bond", "CRA ComBio Green Bond 2028", "ISA CTM…
$ issuer_name <chr> "AES Andes", "ComBio Energia", "ISA CTM - Consorcio Transmanta…
$ issuer_type <chr> "Non-Financial Corporate", "Non-Financial Corporate", "Non-Fin…
$ issuer_jurisdiction <chr> "Chile", "Brazil", "Peru", "Brazil", "Colombia", "Chile", "Mex…
$ fund <lgl> FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,…
$ framework_link <chr> NA, NA, "https://www.greenbondtransparency.com/download-docume…
$ issuance_year <dbl> 2019, 2021, 2019, 2021, 2019, 2021, 2016, 2020, 2019, 2019, 20…
$ issuance_date <chr> "2019/10/7 00:00", "2021/11/23 00:00", "2019/4/16 00:00", "202…
$ maturity_date <chr> "2079/10/7 00:00", "2028/11/16 00:00", "2034/4/16 00:00", "202…
$ volume_usd_billion <dbl> 4.50, 0.27, 6.00, 0.11, 2.06, 0.54, 0.49, 2.63, 3.43, 0.82, 1.…
$ volume_usd <dbl> 450000000, 26572188, 600000000, 10531304, 206408402, 54000000,…
$ volume_bond_currency <dbl> 450000000, 150000000, 600000000, 60000000, 657000000000, 54000…
$ bond_currency <chr> "USD", "BRL", "USD", "BRL", "COP", "USD", "MXN", "USD", "BRL",…
glimpse(measurements_new)
Rows: 4,820
Columns: 16
$ bond_handle <chr> "65809b95fbf34e898b30b546b2d2a6a8", "65809b95fbf34e898b30b…
$ bond_name <chr> "AES Argentina Green Bond", "AES Argentina Green Bond", "A…
$ nomenclature <chr> "ISO", "EU Taxonomy Green Bonds Standard", "ICMA Green Bon…
$ category <chr> "Other", "Other", "Renewable energy", "Energy", "Other", "…
$ subcategory <chr> "Unspecified", "Unspecified", "Generation", "Wind", "Unspe…
$ project <chr> "Vientos Neuquinos", "Vientos Neuquinos", "Vientos Neuquin…
$ location <chr> "Argentina", "Argentina", "Argentina", "Argentina", "Chile…
$ indicator <chr> "tCO2e avoided", "tCO2e avoided", "tCO2e avoided", "tCO2e …
$ unit <chr> "metric ton", "metric ton", "metric ton", "metric ton", "m…
$ planned_value <dbl> 67584, 67584, 67584, 67584, 140982, 140982, 140982, 140982…
$ percentage <dbl> 1.0032552, 1.0032552, 1.0032552, 1.0032552, 1.1185825, 1.1…
$ measured_value <dbl> 67804.00, 67804.00, 67804.00, 67804.00, 157700.00, 157700.…
$ can_be_aggregated <chr> "t", "t", "t", "t", "t", "t", "t", "t", "t", "t", "t", "t"…
$ methodology_code <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
$ measurement_period_start <dttm> 2020-10-01, 2020-10-01, 2020-10-01, 2020-10-01, 2020-01-0…
$ measurement_period_end <dttm> 2020-12-31, 2020-12-31, 2020-12-31, 2020-12-31, 2020-12-3…
glimpse(tranches_new)
Rows: 190
Columns: 10
$ bond_handle <chr> "ee7a68239dbe4664adad596c19403c93", "ee7a68239dbe46…
$ bond_name <chr> "Suzano Papel e Celulose Green Bond 2026", "Suzano …
$ market <chr> "United States", "United States", "Mexico", "Mexico…
$ stock_exchange <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA,…
$ tranche_currency <chr> "USD", "USD", "MXN", "MXN", "USD", "BRL", "USD", "U…
$ volume_offered_tranche_currency <dbl> 500000000, 200000000, 1000000000, 2000000000, 50000…
$ volume_raised_tranche_currency <dbl> 500000000, 200000000, 1000000000, 2000000000, 50000…
$ bond_currency <chr> "USD", "USD", "MXN", "MXN", "USD", "BRL", "USD", "U…
$ volume_raised_bond_currency <dbl> 500000000, 200000000, 1000000000, 2000000000, 50000…
$ volume_raised_usd <dbl> 500000000, 200000000, 49269093, 112585354, 50000000…
glimpse(projects_new)
Rows: 1,400
Columns: 15
$ bond_handle <chr> "9fc875791e3143a1bb3f864a304e8185", "9fc875791e3143a1bb3f864…
$ bond_name <chr> "Green Bond 2017 - Davivienda", "Green Bond 2017 - Daviviend…
$ nomenclature <chr> "ISO", "EU Taxonomy Green Bonds Standard", "ICMA Green Bonds…
$ category <chr> "Other", "Other", "Green buildings", "Buildings", "Renewable…
$ subcategory <chr> "Unspecified", "Unspecified", "Residential", "Residential", …
$ project <chr> "Residential Building #20", "Residential Building #20", "Res…
$ parent_project_handle <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "79d4df39f23b4759851…
$ location <chr> "Colombia", "Colombia", "Colombia", "Colombia", "Chile", "Ch…
$ latitude <dbl> NA, NA, NA, NA, -40.224965, -40.224965, NA, NA, NA, NA, NA, …
$ longitude <dbl> NA, NA, NA, NA, -72.39668, -72.39668, NA, NA, NA, NA, NA, NA…
$ host_organisation <chr> "Anonymous", "Anonymous", "Anonymous", "Anonymous", "Ministr…
$ refinancing <lgl> FALSE, FALSE, FALSE, FALSE, TRUE, TRUE, TRUE, TRUE, TRUE, TR…
$ cofinancing_percentage <dbl> 1.00, 1.00, 1.00, 1.00, 1.00, 1.00, 0.74, 0.74, 0.26, 0.26, …
$ represented_projects <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, …
$ description <chr> "EDGE", "EDGE", "EDGE", "EDGE", "Construction of 151 individ…
glimpse(allocations_new)
Rows: 2,278
Columns: 17
$ bond_handle <chr> "1fd5c178da6148569a42c52f45981714", "1fd5c178da614856…
$ bond_name <chr> "Bancolombia Bono Verde 1", "Bancolombia Bono Verde 1…
$ nomenclature <chr> "Climate Bonds Initiative Project Classification", "I…
$ category <chr> "Energy", "Renewable energy", "Energy", "Renewable en…
$ subcategory <chr> "Hydro", "Generation", "Hydro", "Generation", "Hydro"…
$ project <chr> "GB 1 Proyecto 1", "GB 1 Proyecto 1", "GB 1 Proyecto …
$ location <chr> "Colombia", "Colombia", "Colombia", "Colombia", "Colo…
$ date_allocation <chr> "2016/12/5 00:00", "2016/12/5 00:00", "2016/12/5 00:0…
$ date_disbursement <chr> "2016/12/5 00:00", "2016/12/5 00:00", "2016/12/5 00:0…
$ allocation_currency <chr> "COP", "COP", "COP", "COP", "COP", "COP", "COP", "COP…
$ allocated_allocation_currency <dbl> 5271430000, 5271430000, 2488157000, 2488157000, 79175…
$ disbursed_allocation_currency <dbl> 5271430000, 5271430000, 2488157000, 2488157000, 79175…
$ bond_currency <chr> "COP", "COP", "COP", "COP", "COP", "COP", "COP", "COP…
$ disbursed_bond_currency <dbl> 5271430000, 5271430000, 2488157000, 2488157000, 79175…
$ disbursed_usd <dbl> 1722104, 1722104, 812847, 812847, 25865474, 25865474,…
$ refinancing <lgl> TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE,…
$ cofinancing_percentage <dbl> 0.92, 0.92, 0.92, 0.92, 0.92, 0.92, 0.92, 0.92, 0.92,…
glimpse(global_greenbonds)
Rows: 44
Columns: 7
$ issuance_year <dbl> 2013, 2013, 2013, 2013, 2014, 2014, 2014, 2014, 2015, 2015, 2015…
$ issuance_date <dttm> 2013-03-31, 2013-06-30, 2013-09-30, 2013-12-31, 2014-03-31, 201…
$ `Mature markets` <dbl> 0.6275551, 1.4372590, 0.3917766, 3.7781835, 2.4387376, 7.3866435…
$ `Emerging markets` <dbl> 0.00000000, 0.00000000, 0.00000000, 0.00000000, 0.00000000, 0.13…
$ `Offshore centers` <dbl> 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.0000000, 0.0000000…
$ Supranationals <dbl> 1.0230859, 0.1060526, 5.1475775, 2.1562222, 2.2054462, 4.2860049…
$ Global <dbl> 1.650641, 1.543312, 5.539354, 5.934406, 4.644184, 11.809595, 10.…
Step3:filter and select data of interest
# Create a modified version of the data frame
bonds_new_LAC <- bonds_new %>%
group_by(issuance_year) %>% # Group by year
mutate(LAC_volume_usd = sum(volume_usd_billion, na.rm = TRUE)) %>%
ungroup() # Ungroup to stop
# view processed data
print(bonds_new_LAC)
# Add a new column to the global green bond data
global_greenbonds <- global_greenbonds %>%
group_by(issuance_year) %>% # Group by year
mutate(global_volume_usd = sum(Global, na.rm = TRUE)) %>%
ungroup() # Ungroup to stop the grouping effect
# View the summary
print(global_greenbonds)
# Now merge with your other dataset
comparison <- left_join(bonds_new_LAC, global_greenbonds, by = "issuance_year")
Warning: Detected an unexpected many-to-many relationship between `x` and `y`.
# View the comparison
print(comparison)
Step4: export data
dir.create("/Users/xingyuning/Desktop/susfin/final project/01_data_cleaning", recursive = TRUE)
write.csv(bonds_new_LAC, "/Users/xingyuning/Desktop/susfin/final project/01_data_cleaning/bonds_new_LAC.csv", row.names = FALSE)
write.csv(global_greenbonds, "/Users/xingyuning/Desktop/susfin/final project/01_data_cleaning/global_greenbonds.csv", row.names = FALSE)