library(tidyverse) 
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6     ✔ purrr   0.3.4
## ✔ tibble  3.1.8     ✔ dplyr   1.0.9
## ✔ tidyr   1.2.0     ✔ stringr 1.4.0
## ✔ readr   2.1.2     ✔ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(tidyquant)
## 载入需要的程辑包:lubridate
## 
## 载入程辑包:'lubridate'
## 
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
## 
## 载入需要的程辑包:PerformanceAnalytics
## 载入需要的程辑包:xts
## 载入需要的程辑包:zoo
## 
## 载入程辑包:'zoo'
## 
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## 
## 
## 载入程辑包:'xts'
## 
## The following objects are masked from 'package:dplyr':
## 
##     first, last
## 
## 
## 载入程辑包:'PerformanceAnalytics'
## 
## The following object is masked from 'package:graphics':
## 
##     legend
## 
## 载入需要的程辑包:quantmod
## 载入需要的程辑包:TTR
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(lubridate)


library(plotly)
## 
## 载入程辑包:'plotly'
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following object is masked from 'package:graphics':
## 
##     layout
library(esquisse)
library(here)
## here() starts at /Users/irisyan/Sustainable finance final project Jing Yan
library(janitor)
## 
## 载入程辑包:'janitor'
## 
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
library(ggthemes)
library(ggrepel)
library(gt)

library(countrycode)
library(wbstats)
folder_path <- partial(here, "00_data_raw")

folder_path() %>% list.files()
## [1] "country_features_2022-10.csv"    "emissions_dataset_full.csv"     
## [3] "emissions_dataset.csv"           "Green_bond_full_dataset.csv"    
## [5] "imf_weo_by_country_2022_oct.csv"
emissions_dataset <- folder_path("emissions_dataset.csv") %>%
  read_csv()
## Rows: 2820 Columns: 31
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (3): country_name, iso3c, em_dm
## dbl (28): year, gdp_usd_current_prices, gdp_ppp_current_prices, gdp_pc_usd_c...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
emissions_dataset_full <- folder_path("emissions_dataset_full.csv") %>%
  read_csv()
## Rows: 6702 Columns: 31
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (1): iso3c
## dbl (30): year, gdp_usd_current_prices, gdp_ppp_current_prices, gdp_pc_usd_c...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
imf_weo_by_country_2022 <- folder_path("imf_weo_by_country_2022_oct.csv") %>%
  read_csv()
## Rows: 414000 Columns: 9
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (7): country_name, iso3c, short_name_unit, short_name, short_unit, categ...
## dbl (2): year, value
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
country_features_2022 <- folder_path("country_features_2022-10.csv") %>%
  read_csv()
## Rows: 217 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (4): country_name, iso3c, wb_income_group, wb_region
## dbl (4): debt_gross_percent_of_gdp, nominal_gdp_bn_ppp, nominal_gdp_per_capi...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
Green_bond_full_dataset <- folder_path("Green_bond_full_dataset.csv") %>%
  read_csv()
## Rows: 5006 Columns: 19
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (17): description, maturity_date, coupon_class, currency, ESG_bond_type,...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
?countrycode
country_name_regex_to_iso3c <- function(country_name) {
  country_name %>%
    countrycode(origin = "country.name", 
                                     destination = "iso3c",
                                     origin_regex = TRUE)
}
Green_bond_full_dataset$iso3c <- country_name_regex_to_iso3c(Green_bond_full_dataset$country_of_issue)
emissions_dataset_full
imf_weo_by_country_2022
country_features_2022
Green_bond_full_dataset
missing_green_bond_data <- Green_bond_full_dataset %>%
  filter(is.na(amount_outstanding_usd)) %>%
  select(country_of_issue, amount_outstanding_usd, coupon_class, ESG_bond_type, issued_amount_usd, TRBC_sector, iso3c) %>%
  unique() 

missing_green_bond_data
library(visdat)
vis_miss(Green_bond_full_dataset)
## Warning: `gather_()` was deprecated in tidyr 1.2.0.
## Please use `gather()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was generated.

vis_miss(emissions_dataset_full)

vis_miss(country_features_2022)

missing_emissions_data <- emissions_dataset_full %>%
  filter(is.na(consumption_co2)) %>%
  select(iso3c, consumption_co2) %>%
  unique() 

missing_emissions_data
write.csv(emissions_dataset,file = "/Users/irisyan/Sustainable finance final project Jing Yan/03_data_processed/emissions_dataset.csv")
write.csv(emissions_dataset_full,file = "/Users/irisyan/Sustainable finance final project Jing Yan/03_data_processed/emissions_dataset_full.csv")
write.csv(imf_weo_by_country_2022,file = "/Users/irisyan/Sustainable finance final project Jing Yan/03_data_processed/imf_weo_by_country_2022_oct.csv")
write.csv(country_features_2022,file = "/Users/irisyan/Sustainable finance final project Jing Yan/03_data_processed/country_features_2022-10.csv")
write.csv(Green_bond_full_dataset,file = "/Users/irisyan/Sustainable finance final project Jing Yan/03_data_processed/Green_bond_full_dataset.csv")