HW3

Author

Alyssa Anderson

Sustainable Finance

Set up:

#load the tidyverse 
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.1     ✔ readr     2.1.4
✔ forcats   1.0.0     ✔ stringr   1.5.0
✔ ggplot2   3.4.1     ✔ tibble    3.2.1
✔ lubridate 1.9.2     ✔ tidyr     1.3.0
✔ purrr     1.0.1     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# Stop R from presenting data in scientific notation 
options(scipen = 999)

# Import Data from Github
blackrock_etf_screener <- read_csv("https://raw.githubusercontent.com/t-emery/sais-susfin_data/main/datasets/ishares_etf_screener_as_of_2023-12-27.csv") 
Rows: 424 Columns: 18
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr  (10): ticker, name, asset_class, sub_asset_class, region, market, locat...
dbl   (6): gross_expense_ratio_percent, net_expense_ratio_percent, net_asset...
dttm  (2): incept_date, net_assets_as_of

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
blackrock_etf_screener
# A tibble: 424 × 18
   ticker name                        incept_date         gross_expense_ratio_…¹
   <chr>  <chr>                       <dttm>                               <dbl>
 1 IVV    iShares Core S&P 500 ETF    2000-05-15 00:00:00                   0.03
 2 IEFA   iShares Core MSCI EAFE ETF  2012-10-18 00:00:00                   0.07
 3 AGG    iShares Core U.S. Aggregat… 2003-09-22 00:00:00                   0.03
 4 IWF    iShares Russell 1000 Growt… 2000-05-22 00:00:00                   0.19
 5 IJR    iShares Core S&P Small-Cap… 2000-05-22 00:00:00                   0.06
 6 IJH    iShares Core S&P Mid-Cap E… 2000-05-22 00:00:00                   0.05
 7 IEMG   iShares Core MSCI Emerging… 2012-10-18 00:00:00                   0.09
 8 IWM    iShares Russell 2000 ETF    2000-05-22 00:00:00                   0.19
 9 IWD    iShares Russell 1000 Value… 2000-05-22 00:00:00                   0.19
10 TLT    iShares 20+ Year Treasury … 2002-07-22 00:00:00                   0.15
# ℹ 414 more rows
# ℹ abbreviated name: ¹​gross_expense_ratio_percent
# ℹ 14 more variables: net_expense_ratio_percent <dbl>, net_assets_usd <dbl>,
#   net_assets_as_of <dttm>, asset_class <chr>, sub_asset_class <chr>,
#   region <chr>, market <chr>, location <chr>, investment_style <chr>,
#   msci_esg_fund_rating_aaa_ccc <chr>, msci_esg_quality_score_0_10 <dbl>,
#   msci_weighted_average_carbon_intensity_tons_co2e_m_sales <dbl>, …

Homework Problem 1:

# Creating new variable to distinguish ESG funds

blackrock_etf_screener_ESG <- blackrock_etf_screener |> 
  mutate(
    standard_or_esg = if_else(
      condition = is.na(sustainable_classification),
      true = "Standard",
      false = "ESG"
    ), 
    net_assets_bn_usd = net_assets_usd/10^9)

#Look at summary stats

blackrock_etf_screener_ESG |> 
  group_by(standard_or_esg) |> 
  summarize(
    max_size = max(net_assets_bn_usd, na.rm = TRUE),
    min_size = min(net_assets_bn_usd, na.rm = TRUE),
    avg_size = mean(net_assets_bn_usd, na.rm = TRUE),
    sd_size = sd(net_assets_bn_usd, na.rm = TRUE),
    median_size = median(net_assets_bn_usd, na.rm = TRUE),
    percentile_50 = quantile(net_assets_bn_usd, .5, na.rm = TRUE),
    percentile_75 = quantile(net_assets_bn_usd, .75, na.rm = TRUE),
  )
# A tibble: 2 × 8
  standard_or_esg max_size min_size avg_size sd_size median_size percentile_50
  <chr>              <dbl>    <dbl>    <dbl>   <dbl>       <dbl>         <dbl>
1 ESG                 13.4 0.00371      1.39    2.59       0.343         0.343
2 Standard           399.  0.000457     6.63   24.4        0.629         0.629
# ℹ 1 more variable: percentile_75 <dbl>
# % of assets that the top fund comprises for its category 
Perc_rank_ESG_assets <- blackrock_etf_screener_ESG |> 
  filter(standard_or_esg == "ESG") %>%
  group_by(sustainable_classification) %>%
  summarize(
    Total_Assets = sum(net_assets_bn_usd, na.rm = TRUE),
    Max_Assets = max(net_assets_bn_usd, na.rm = TRUE) 
  ) %>%
  mutate(
    Percent_of_Assets = (Max_Assets / Total_Assets) * 100 
  ) %>%
  arrange(desc(Percent_of_Assets)) 

Homework Problem 2:

#loading libraries 

library(dplyr)
library(ggplot2)
library(forcats)

# Sort by the 10 smallest ETFs 
smallest_etfs <- blackrock_etf_screener_ESG |>
  arrange(min_size= min(net_assets_bn_usd, na.rm = TRUE)) |>
  slice_head(n = 10)

# Reorder by name and size
smallest_etfs$etf_name <- fct_reorder(smallest_etfs$name, smallest_etfs$net_assets_bn_usd)

smallest_etf_graph <- smallest_etfs %>%
  mutate(category = fct_reorder(smallest_etfs$name, smallest_etfs$net_assets_bn_usd))

smallest_etf_graph <- smallest_etfs %>%
  mutate(name = fct_reorder(name, net_assets_bn_usd))

# Create bar chart
 
 ggplot(smallest_etf_graph, aes(x = name, y = net_assets_bn_usd, fill = name)) +
  geom_bar(stat = "identity") +
  theme_minimal() +
   theme(axis.text.x = element_blank())+
  labs(x = "ETFs", y = "Net Assets in Billions", title = "10 Smallest ETFs")

Homework Problem 3:

# bottom quintile of the MSCI ESG quality score
msci_low_esg_score <-blackrock_etf_screener_ESG |> 
  arrange(desc(msci_esg_quality_score_0_10)) 

# summary statistics 

 df_msci_low_esg_score <-msci_low_esg_score|>
  group_by(asset_class, sub_asset_class) |> 
  summarize(
    percentile_20 = quantile(net_assets_bn_usd, .2, na.rm = TRUE),
    max_size = max(net_assets_bn_usd, na.rm = TRUE),
    min_size = min(net_assets_bn_usd, na.rm = TRUE),
    avg_size = mean(net_assets_bn_usd, na.rm = TRUE),
    assets_bn = sum(net_assets_bn_usd, na.rm = TRUE),
    n_funds = n()
  )
`summarise()` has grouped output by 'asset_class'. You can override using the
`.groups` argument.
 df_msci_low_esg_score |>
   summarize(
     Total_Assets = sum(assets_bn, na.rm = TRUE),
     Total_No_Funds = sum(n_funds)
   )
# A tibble: 5 × 3
  asset_class  Total_Assets Total_No_Funds
  <chr>               <dbl>          <int>
1 Commodity           40.3               9
2 Equity            1909.              260
3 Fixed Income       633.              127
4 Multi Asset          6.58             19
5 Real Estate         14.1               9

Homework Problem 4:

# 5 repeatable factoids or data visualizations 

# factoid 1: 
  # The U.S., Japan, China, and India have the highest total net assets of all countries in this dataset. 

blackrock_etf_screener_ESG |> 
  group_by(location) |>
  summarize(
    Assets_per_country = sum(net_assets_bn_usd)
  ) |>
  arrange(desc(Assets_per_country))
# A tibble: 41 × 2
   location       Assets_per_country
   <chr>                       <dbl>
 1 United States             1978.  
 2 Broad                      557.  
 3 Japan                       14.5 
 4 China                       10.3 
 5 India                        9.03
 6 Brazil                       6.34
 7 South Korea                  3.99
 8 Taiwan                       3.54
 9 Canada                       3.19
10 United Kingdom               2.76
# ℹ 31 more rows
# factoid 2: 
  # North America has more than 2x the ESG funds than the rest of the world combined. 

blackrock_etf_screener_ESG |> 
  filter(standard_or_esg == "ESG") |> 
  group_by(region) |>
  summarize(
    ESG_per_region = sum(net_assets_bn_usd)
  ) |>
  arrange(desc(ESG_per_region))
# A tibble: 2 × 2
  region        ESG_per_region
  <chr>                  <dbl>
1 North America           38.5
2 Global                  17.1
# data visualization 3: 
  # Large/Mid cap funds have the highest count of ESG funds of any sub asset class

sub_asset_esg <- blackrock_etf_screener_ESG |> 
  group_by(sub_asset_class, standard_or_esg) |> 
  count() |> 
  ungroup()
sub_asset_esg
# A tibble: 31 × 3
   sub_asset_class      standard_or_esg     n
   <chr>                <chr>           <int>
 1 All Cap              ESG                 5
 2 All Cap              Standard          104
 3 Corporates           Standard            1
 4 Credit               ESG                 4
 5 Credit               Standard           26
 6 Flexible             Standard            2
 7 Government           Standard           26
 8 High Yield           ESG                 1
 9 High Yield           Standard           19
10 High Yield Bank Loan Standard            2
# ℹ 21 more rows
sub_asset_esg |> 
  ggplot(aes(x = sub_asset_class, y = n, fill = standard_or_esg)) +
  geom_bar(stat = "identity", position = "stack")+
    theme_minimal() +
  labs(x = "Sub Asset Class", y = "Number of Funds", title = "Number of Funds per Sub Asset Class")+ 
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# data visualization 4: 
  # The majority of net assets in this data set have an A rating. 

library(esquisse)
ggplot(blackrock_etf_screener_ESG) +
  aes(x = msci_esg_fund_rating_aaa_ccc, y = net_assets_bn_usd,
    fill = standard_or_esg
  ) +
  geom_col() +
  scale_fill_hue(direction = 1) +
  labs(
    x = "MSCI Fund Rating ",
    y = "Net Assets (Billions)",
    title = "Net Assets (Billions) per Rating "
  ) +
  theme_minimal()

# data visualization 5: 
  # Equities havce the highest gross expense ratios of any asset class. 

ggplot(blackrock_etf_screener_ESG) +
  aes(x = asset_class, y = gross_expense_ratio_percent) +
  geom_col(fill = "#4682B4") +
  labs(
    x = "Asset Class",
    y = "Gross Expense Ratio",
    title = "Gross Expense Ratio by Asset Class"
  ) +
  theme_classic()