2.5.0.1 Homework problem 1

#load the tidyverse 
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.3     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.4     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
library(dplyr)
# this will stop R from presenting data in scientific notation, which can be annoying. 
options(scipen = 999)
blackrock_etf_screener <- read_csv("https://raw.githubusercontent.com/t-emery/sais-susfin_data/main/datasets/ishares_etf_screener_as_of_2023-12-27.csv") 
## Rows: 424 Columns: 18
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr  (10): ticker, name, asset_class, sub_asset_class, region, market, locat...
## dbl   (6): gross_expense_ratio_percent, net_expense_ratio_percent, net_asset...
## dttm  (2): incept_date, net_assets_as_of
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
blackrock_etf_screener
## # A tibble: 424 × 18
##    ticker name                        incept_date         gross_expense_ratio_…¹
##    <chr>  <chr>                       <dttm>                               <dbl>
##  1 IVV    iShares Core S&P 500 ETF    2000-05-15 00:00:00                   0.03
##  2 IEFA   iShares Core MSCI EAFE ETF  2012-10-18 00:00:00                   0.07
##  3 AGG    iShares Core U.S. Aggregat… 2003-09-22 00:00:00                   0.03
##  4 IWF    iShares Russell 1000 Growt… 2000-05-22 00:00:00                   0.19
##  5 IJR    iShares Core S&P Small-Cap… 2000-05-22 00:00:00                   0.06
##  6 IJH    iShares Core S&P Mid-Cap E… 2000-05-22 00:00:00                   0.05
##  7 IEMG   iShares Core MSCI Emerging… 2012-10-18 00:00:00                   0.09
##  8 IWM    iShares Russell 2000 ETF    2000-05-22 00:00:00                   0.19
##  9 IWD    iShares Russell 1000 Value… 2000-05-22 00:00:00                   0.19
## 10 TLT    iShares 20+ Year Treasury … 2002-07-22 00:00:00                   0.15
## # ℹ 414 more rows
## # ℹ abbreviated name: ¹​gross_expense_ratio_percent
## # ℹ 14 more variables: net_expense_ratio_percent <dbl>, net_assets_usd <dbl>,
## #   net_assets_as_of <dttm>, asset_class <chr>, sub_asset_class <chr>,
## #   region <chr>, market <chr>, location <chr>, investment_style <chr>,
## #   msci_esg_fund_rating_aaa_ccc <chr>, msci_esg_quality_score_0_10 <dbl>,
## #   msci_weighted_average_carbon_intensity_tons_co2e_m_sales <dbl>, …
blackrock_etf_screener |> 
  glimpse()
## Rows: 424
## Columns: 18
## $ ticker                                                   <chr> "IVV", "IEFA"…
## $ name                                                     <chr> "iShares Core…
## $ incept_date                                              <dttm> 2000-05-15, …
## $ gross_expense_ratio_percent                              <dbl> 0.03, 0.07, 0…
## $ net_expense_ratio_percent                                <dbl> 0.03, 0.07, 0…
## $ net_assets_usd                                           <dbl> 399312542067,…
## $ net_assets_as_of                                         <dttm> 2023-12-27, …
## $ asset_class                                              <chr> "Equity", "Eq…
## $ sub_asset_class                                          <chr> "Large Cap", …
## $ region                                                   <chr> "North Americ…
## $ market                                                   <chr> "Developed", …
## $ location                                                 <chr> "United State…
## $ investment_style                                         <chr> "Index", "Ind…
## $ msci_esg_fund_rating_aaa_ccc                             <chr> "A", "AA", "A…
## $ msci_esg_quality_score_0_10                              <dbl> 6.6845, 7.519…
## $ msci_weighted_average_carbon_intensity_tons_co2e_m_sales <dbl> 106.80, 102.8…
## $ msci_esg_percent_coverage                                <dbl> 99.05, 99.80,…
## $ sustainable_classification                               <chr> NA, NA, NA, N…

Create a dplyr pipeline to create a tibble fulfills the following parameters:

#only contains ESG funds (no standard funds)

#contains the largest fund from each sustainable classification.

#shows the percent of assets that the top fund comprises for its category (e.g. the top Thematic fund is x% of the total assets of all thematic funds).

#Ranks each sustainable classification by the % of assets in its largest fund, and arranges them in descending order.

blackrock_etf_screener_w_new_features <- blackrock_etf_screener |> 
  mutate(
    # if the sustainable_classification column is NA, then the fund is not an ESG fund.
    standard_or_esg = if_else(
      condition = is.na(sustainable_classification),
      true = "Standard",
      false = "ESG"
    )
  )
blackrock_etf_screener_w_new_features 
## # A tibble: 424 × 19
##    ticker name                        incept_date         gross_expense_ratio_…¹
##    <chr>  <chr>                       <dttm>                               <dbl>
##  1 IVV    iShares Core S&P 500 ETF    2000-05-15 00:00:00                   0.03
##  2 IEFA   iShares Core MSCI EAFE ETF  2012-10-18 00:00:00                   0.07
##  3 AGG    iShares Core U.S. Aggregat… 2003-09-22 00:00:00                   0.03
##  4 IWF    iShares Russell 1000 Growt… 2000-05-22 00:00:00                   0.19
##  5 IJR    iShares Core S&P Small-Cap… 2000-05-22 00:00:00                   0.06
##  6 IJH    iShares Core S&P Mid-Cap E… 2000-05-22 00:00:00                   0.05
##  7 IEMG   iShares Core MSCI Emerging… 2012-10-18 00:00:00                   0.09
##  8 IWM    iShares Russell 2000 ETF    2000-05-22 00:00:00                   0.19
##  9 IWD    iShares Russell 1000 Value… 2000-05-22 00:00:00                   0.19
## 10 TLT    iShares 20+ Year Treasury … 2002-07-22 00:00:00                   0.15
## # ℹ 414 more rows
## # ℹ abbreviated name: ¹​gross_expense_ratio_percent
## # ℹ 15 more variables: net_expense_ratio_percent <dbl>, net_assets_usd <dbl>,
## #   net_assets_as_of <dttm>, asset_class <chr>, sub_asset_class <chr>,
## #   region <chr>, market <chr>, location <chr>, investment_style <chr>,
## #   msci_esg_fund_rating_aaa_ccc <chr>, msci_esg_quality_score_0_10 <dbl>,
## #   msci_weighted_average_carbon_intensity_tons_co2e_m_sales <dbl>, …
esg_fund_summary <- blackrock_etf_screener_w_new_features |>
  dplyr::filter(standard_or_esg == "ESG") |>
  group_by(sustainable_classification) |>
  summarize(
    largest_fund_assets = max(net_assets_usd),
    total_assets = sum(net_assets_usd)
  ) |>
  mutate(
    percent_of_category_assets = (largest_fund_assets / total_assets) * 100
  ) |>
  ungroup() |>
  arrange(desc(percent_of_category_assets)) |>
  mutate(
    rank = rank(-percent_of_category_assets)
  )

esg_fund_summary
## # A tibble: 4 × 5
##   sustainable_classifi…¹ largest_fund_assets total_assets percent_of_category_…²
##   <chr>                                <dbl>        <dbl>                  <dbl>
## 1 Impact                          335216065.   335216065.                  100  
## 2 Thematic                       3057666513.  3427675675.                   89.2
## 3 Screened                        182340754.   383389770.                   47.6
## 4 Uplift                        13385805595. 51443665637.                   26.0
## # ℹ abbreviated names: ¹​sustainable_classification, ²​percent_of_category_assets
## # ℹ 1 more variable: rank <dbl>

2.5.0.2 Homework problem 2:

#Create a sorted bar chart of the 10 smallest ETFs. # Hint: for sorting the bar chart, look up fct_reorder() . Make sure your chart has meaningful titles and labels, including numbers of an appropriate magnitude.

library(ggplot2)
library(dplyr)
library(forcats)

smallest_etfs <- blackrock_etf_screener_w_new_features |>
  arrange(net_assets_usd) |>
  head(10) |>
  mutate(fund_name_reordered = fct_reorder(name, net_assets_usd))
# Creating the sorted bar chart
ggplot(smallest_etfs, aes(x = fund_name_reordered, y = net_assets_usd)) +
  geom_bar(stat = "identity", fill = "skyblue") +
  coord_flip() + 
  labs(title = "10 Smallest ETFs by Net Assets",
       x = "ETF Name",
       y = "Net Assets (USD)") +
  theme_minimal() 

2.5.0.3 Homework problem 3:

Find the funds in the bottom quintile of the MSCI ESG quality score.

For the funds in the bottom quintile of MSCI ESG quality scores, calculate i) the number of funds, and ii) the total assets, grouped by asset class (e.g. Equity, Fixed Income) and sub-asset class (e.g. Emerging Markets).

bottom_quantile <- blackrock_etf_screener_w_new_features |>
  dplyr::filter(!is.na(msci_esg_quality_score_0_10)) |>
  mutate(quantile_group = ntile(msci_esg_quality_score_0_10,5)) |>
  dplyr::filter(quantile_group==1) |>
  arrange(desc(msci_esg_quality_score_0_10))

bottom_quantile_group <- bottom_quantile |>
  group_by(asset_class, market)|>
  dplyr::summarise(
    number_of_funds=n(),
    total_assets = sum(net_assets_usd)
  )
## `summarise()` has grouped output by 'asset_class'. You can override using the
## `.groups` argument.
print(bottom_quantile)
## # A tibble: 76 × 20
##    ticker name                        incept_date         gross_expense_ratio_…¹
##    <chr>  <chr>                       <dttm>                               <dbl>
##  1 BPAY   BlackRock Future Financial… 2022-08-16 00:00:00                   0.71
##  2 EEM    iShares MSCI Emerging Mark… 2003-04-07 00:00:00                   0.69
##  3 HEEM   iShares Currency Hedged MS… 2014-09-23 00:00:00                   1.47
##  4 REET   iShares Global REIT ETF     2014-07-08 00:00:00                   0.14
##  5 IBRN   iShares Neuroscience and H… 2022-08-24 00:00:00                   0.47
##  6 GHYG   iShares US & Intl High Yie… 2012-04-03 00:00:00                   0.4 
##  7 IJS    iShares S&P Small-Cap 600 … 2000-07-24 00:00:00                   0.18
##  8 IBHJ   iShares® iBonds® 2030 Term… 2023-06-21 00:00:00                   0.35
##  9 EMGF   iShares Emerging Markets E… 2015-12-08 00:00:00                   0.25
## 10 IBHI   iShares® iBonds® 2029 Term… 2022-03-08 00:00:00                   0.35
## # ℹ 66 more rows
## # ℹ abbreviated name: ¹​gross_expense_ratio_percent
## # ℹ 16 more variables: net_expense_ratio_percent <dbl>, net_assets_usd <dbl>,
## #   net_assets_as_of <dttm>, asset_class <chr>, sub_asset_class <chr>,
## #   region <chr>, market <chr>, location <chr>, investment_style <chr>,
## #   msci_esg_fund_rating_aaa_ccc <chr>, msci_esg_quality_score_0_10 <dbl>,
## #   msci_weighted_average_carbon_intensity_tons_co2e_m_sales <dbl>, …

Homework problem 4:

#We only began exploring this data. Use your newfound dplyr skills to uncover actionable insights from the dataset.

#Show your work.

#Present 5 repeatable factoids or data visualizations. As always, make sure your data visualizations have meaningful titles and labels. We will share these in class.

#1: Distribution of Net Assets Across Asset Classes
#Objective: Understand how net assets are distributed across different asset classes.
library(dplyr)
library(ggplot2)

asset_class_distribution <- blackrock_etf_screener_w_new_features |>
  group_by(asset_class) |>
  summarise(total_assets = sum(net_assets_usd, na.rm = TRUE)) |>
  arrange(desc(total_assets))

ggplot(asset_class_distribution, aes(x = reorder(asset_class, total_assets), y = total_assets)) +
  geom_bar(stat = "identity", fill = "orange") +
  coord_flip() +
  labs(title = "Distribution of Net Assets Across Asset Classes",
       x = "Asset Class",
       y = "Total Net Assets (USD)") +
  theme_minimal()

#2:ESG Score Distribution (Density Plot)
#Objective: Visualize the distribution of ESG scores across all funds to highlight areas of concentration.
ggplot(blackrock_etf_screener_w_new_features, aes(x = msci_esg_quality_score_0_10)) +
  geom_density(fill = "skyblue", alpha = 0.7) +
  labs(title = "Distribution of MSCI ESG Quality Scores",
       x = "ESG Score",
       y = "Density") +
  theme_minimal()
## Warning: Removed 44 rows containing non-finite values (`stat_density()`).

#3:Regional Distribution of ESG Fund Assets
#Objective: Visualize the distribution of ESG fund assets across different geographic regions to identify where ESG investing is most concentrated.
library(ggplot2)
library(dplyr)

# Prepare the data
regional_distribution_esg <- blackrock_etf_screener_w_new_features |>
  filter(standard_or_esg == "ESG") |>
  group_by(region) |>
  summarise(total_assets = sum(net_assets_usd, na.rm = TRUE)) |>
  mutate(proportion = total_assets / sum(total_assets)) |>
  arrange(desc(total_assets))

# Convert to factor for ordered plotting
regional_distribution_esg$region <- factor(regional_distribution_esg$region, levels = regional_distribution_esg$region)

# Plot
ggplot(regional_distribution_esg, aes(x = region, y = proportion, fill = region)) +
  geom_col() +
  coord_flip() + # Horizontal layout
  labs(title = "Regional Distribution of ESG Fund Assets", x = "Geographic Region", y = "Proportion of Total Assets") +
  scale_fill_brewer(palette = "Pastel1") +
  theme_minimal() +
  geom_text(aes(label = scales::percent(proportion)), position = position_dodge(width = 0.9), hjust = -0.2, size = 2)

#4:Top 10 Markets by Number of ESG Funds (Bar Chart)
#Objective: Identify the top 10 markets with the highest number of ESG funds.
top_markets_esg <- blackrock_etf_screener_w_new_features |>
  dplyr:: filter(standard_or_esg == "ESG") |>
  count(market, sort = TRUE) |>
  top_n(10, wt = n)

# Bar Chart
ggplot(top_markets_esg, aes(x = reorder(market, n), y = n, fill = market)) +
  geom_col() +
  coord_flip() +
  labs(title = "Top 10 Markets by Number of ESG Funds",
       x = "Market",
       y = "Number of Funds") +
  scale_fill_viridis_d()

# 5:Fund Size vs. ESG Score Correlation
#Objective: Investigate if there's a correlation between the size of the funds (net assets) and their ESG scores.

ggplot(blackrock_etf_screener_w_new_features, aes(x = net_assets_usd, y = msci_esg_quality_score_0_10)) +
  geom_point(alpha = 0.5) +
  geom_smooth(method = "lm", color = "blue", se = FALSE) +
  scale_x_log10(labels = scales::comma) +
  labs(title = "Correlation between Fund Size and ESG Score",
       x = "Net Assets (USD, log scale)",
       y = "MSCI ESG Quality Score") +
  theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 44 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 44 rows containing missing values (`geom_point()`).