Final Project Exploratory Data Analysis

Author

Yuanling Zeng

library(readr)
library(dplyr)
library(ggplot2)
library(tidyverse)
library(janitor)
library(tidyr)

imf_climate_dashboards_green_debt_url <- 
  "https://opendata.arcgis.com/datasets/8e2772e0b65f4e33a80183ce9583d062_0.csv"
green_bond <- imf_climate_dashboards_green_debt_url |> 
  read_csv() 

indicators_we_want <- c("Green Bond Issuances by Country", "Sovereign Green Bond Issuances")

india_green_bonds <- green_bond %>%
  filter(Country == "India")
india_green_bonds
# A tibble: 1 × 42
  ObjectId Country ISO2  ISO3  Indicator          Unit  Source CTS_Code CTS_Name
     <dbl> <chr>   <chr> <chr> <chr>              <chr> <chr>  <chr>    <chr>   
1       45 India   IN    IND   Green Bond Issuan… Bill… Refin… ECFFI    Green B…
# ℹ 33 more variables: CTS_Full_Descriptor <chr>, Type_of_Issuer <chr>,
#   Use_of_Proceed <chr>, Principal_Currency <chr>, F1985 <dbl>, F1986 <dbl>,
#   F1987 <dbl>, F1990 <dbl>, F1991 <dbl>, F1992 <dbl>, F1993 <dbl>,
#   F1994 <dbl>, F1999 <dbl>, F2000 <dbl>, F2002 <dbl>, F2003 <dbl>,
#   F2004 <dbl>, F2007 <dbl>, F2008 <dbl>, F2009 <dbl>, F2010 <dbl>,
#   F2011 <dbl>, F2012 <dbl>, F2013 <dbl>, F2014 <dbl>, F2015 <dbl>,
#   F2016 <dbl>, F2017 <dbl>, F2018 <dbl>, F2019 <dbl>, F2020 <dbl>, …
india_green_bonds_subset <- india_green_bonds %>%
  select(starts_with("F2015"):ends_with("F2022"))

india_green_bonds_long <- india_green_bonds_subset %>%
  pivot_longer(cols = everything(),
               names_to = "Year",
               values_to = "Insurance")

india_green_bonds_long$Year <- as.numeric(gsub("F", "", india_green_bonds_long$Year))
ggplot(india_green_bonds_long, aes(x = Year, y = Insurance, label = Insurance)) +
  geom_line() +
  geom_text(hjust = -0.2, size = 3, color = "black") +  
  labs(x = "Year", y = "Green Bond Insurance") +
  theme_minimal() +
  scale_x_continuous(breaks = india_green_bonds_long$Year, labels = india_green_bonds_long$Year)
Warning: Removed 1 rows containing missing values (`geom_text()`).

blackrock_etf_screener <- read_csv ("https://raw.githubusercontent.com/t-emery/sais-susfin_data/main/datasets/ishares_etf_screener_as_of_2023-12-27.csv")
Rows: 424 Columns: 18
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr  (10): ticker, name, asset_class, sub_asset_class, region, market, locat...
dbl   (6): gross_expense_ratio_percent, net_expense_ratio_percent, net_asset...
dttm  (2): incept_date, net_assets_as_of

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
india_etf <- blackrock_etf_screener %>% 
  filter(ticker %in% c("INDY", "INDA", "SMIN"))
india_etf
# A tibble: 3 × 18
  ticker name  incept_date         gross_expense_ratio_…¹ net_expense_ratio_pe…²
  <chr>  <chr> <dttm>                               <dbl>                  <dbl>
1 INDA   iSha… 2012-02-02 00:00:00                   0.64                   0.64
2 INDY   iSha… 2009-11-18 00:00:00                   0.89                   0.89
3 SMIN   iSha… 2012-02-08 00:00:00                   0.74                   0.74
# ℹ abbreviated names: ¹​gross_expense_ratio_percent, ²​net_expense_ratio_percent
# ℹ 13 more variables: net_assets_usd <dbl>, net_assets_as_of <dttm>,
#   asset_class <chr>, sub_asset_class <chr>, region <chr>, market <chr>,
#   location <chr>, investment_style <chr>, msci_esg_fund_rating_aaa_ccc <chr>,
#   msci_esg_quality_score_0_10 <dbl>,
#   msci_weighted_average_carbon_intensity_tons_co2e_m_sales <dbl>,
#   msci_esg_percent_coverage <dbl>, sustainable_classification <chr>
india_etf$incept_date <- as.Date(india_etf$incept_date)

ggplot(data = india_etf, aes(x = incept_date, y = net_assets_usd)) +
  geom_line() +
  labs(x = "Inception Date", y = "Net Assets (USD)") +
  ggtitle("Net Assets over Time")

ggplot(data = india_etf, aes(x = incept_date, y = gross_expense_ratio_percent)) +
  geom_line() +
  labs(x = "Inception Date", y = "Gross Expense Ratio (%)") +
  ggtitle("Gross Expense Ratio over Time")

ggplot(india_etf, aes(x = msci_esg_quality_score_0_10, y = net_assets_usd, fill = msci_esg_quality_score_0_10)) +
  geom_bar(stat = "identity") +
  labs(x = "MSCI ESG Quality Score (0-10)", y = "Net Assets (USD)", fill = "MSCI ESG Quality Score") +
  ggtitle("Comparison of MSCI ESG Quality Score with Net Assets")
Warning: Removed 1 rows containing missing values (`position_stack()`).

ggplot(india_etf, aes(x = msci_esg_quality_score_0_10, y = gross_expense_ratio_percent, fill = msci_esg_quality_score_0_10)) +
  geom_bar(stat = "identity") +
  labs(x = "MSCI ESG Quality Score (0-10)", y = "Gross Expense Ratio (%)", fill = "MSCI ESG Quality Score") +
  ggtitle("Comparison of MSCI ESG Quality Score with Gross Expense Ratio")
Warning: Removed 1 rows containing missing values (`position_stack()`).