Quarto 3

Author

Haotian Duan

2.5.0.1 Homework problem 1

# Use the package
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.4.4     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
blackrock_etf_screener <- read_csv("https://raw.githubusercontent.com/t-emery/sais-susfin_data/main/datasets/ishares_etf_screener_as_of_2023-12-27.csv") 
Rows: 424 Columns: 18
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr  (10): ticker, name, asset_class, sub_asset_class, region, market, locat...
dbl   (6): gross_expense_ratio_percent, net_expense_ratio_percent, net_asset...
dttm  (2): incept_date, net_assets_as_of

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
blackrock_etf_screener |> 
# Filter only contains ESG funds
  filter(sustainable_classification == "Uplift" | sustainable_classification == "Thematic" | sustainable_classification == "Impact" | sustainable_classification == "Screened") |>
# Keep the largest fund from each sustainable classification
  group_by(sustainable_classification) |>
  mutate(total_assets = sum(net_assets_usd)) |>
  slice_max(net_assets_usd, n = 1) |>
# Calculate the percent of assets that the top fund comprises for its category
  mutate(percent_assets = (net_assets_usd / total_assets) * 100) |>
# Ranks each sustainable classification
  arrange(desc(percent_assets))
# A tibble: 4 × 20
# Groups:   sustainable_classification [4]
  ticker name  incept_date         gross_expense_ratio_…¹ net_expense_ratio_pe…²
  <chr>  <chr> <dttm>                               <dbl>                  <dbl>
1 BGRN   iSha… 2018-11-13 00:00:00                   0.2                    0.2 
2 ICLN   iSha… 2008-06-24 00:00:00                   0.41                   0.41
3 XVV    iSha… 2020-09-22 00:00:00                   0.08                   0.08
4 ESGU   iSha… 2016-12-01 00:00:00                   0.15                   0.15
# ℹ abbreviated names: ¹​gross_expense_ratio_percent, ²​net_expense_ratio_percent
# ℹ 15 more variables: net_assets_usd <dbl>, net_assets_as_of <dttm>,
#   asset_class <chr>, sub_asset_class <chr>, region <chr>, market <chr>,
#   location <chr>, investment_style <chr>, msci_esg_fund_rating_aaa_ccc <chr>,
#   msci_esg_quality_score_0_10 <dbl>,
#   msci_weighted_average_carbon_intensity_tons_co2e_m_sales <dbl>,
#   msci_esg_percent_coverage <dbl>, sustainable_classification <chr>, …

2.5.0.2 Homework problem 2

# Use the package
library(ggplot2)
# Select 10 10 smallest ETFs
blackrock_etf_screener |> 
  slice_min(net_assets_usd, n = 10) |>
  mutate(name = fct_reorder(name, net_assets_usd)) |>
# Plot the sorted bar chart
  ggplot(aes(x = net_assets_usd, y = name)) +
  geom_bar(stat = "identity", fill = "yellow", alpha = .7) +
  scale_x_continuous(labels = scales::label_dollar(scale = 1/10^3, suffix = " tn")) +
  labs(title = "Fund Size of 10 smallest ETFs",
       x = "Assets (USD)",
       y = "Name of EFTs",
       caption = "Haotian Duan") +
  theme_minimal()

2.5.0.3 Homework problem 3

# The funds in the bottom quintile of the MSCI ESG quality score
blackrock_etf_screener |> 
  mutate(score_decile = ntile(msci_esg_quality_score_0_10, n = 5)) |>
  filter(score_decile == 1) |>
# Calculate the number of funds by total assets by asset class
  group_by(asset_class) |>
  summarize(
    n_funds = n(),
    total_assets = sum(net_assets_usd, na.rm = TRUE)
    )
# A tibble: 3 × 3
  asset_class  n_funds  total_assets
  <chr>          <int>         <dbl>
1 Equity            51 327053099481.
2 Fixed Income      21  56249713527.
3 Real Estate        4   7145439523.
# Calculate the number of funds by total assets by sub asset class
blackrock_etf_screener |> 
  mutate(score_decile = ntile(msci_esg_quality_score_0_10, n = 5)) |>
  filter(score_decile == 1) |>
  group_by(sub_asset_class) |>
  summarize(
    n_funds = n(),
    total_assets = sum(net_assets_usd, na.rm = TRUE)
    )
# A tibble: 9 × 3
  sub_asset_class        n_funds  total_assets
  <chr>                    <int>         <dbl>
1 All Cap                     23  95486052602.
2 Corporates                   1     41339182.
3 Credit                       1    408038372.
4 Government                   3  17560711395.
5 High Yield                  16  38239624578.
6 Large Cap                    1   1852506812.
7 Large/Mid Cap               10  42421984678.
8 Real Estate Securities       4   7145439523.
9 Small Cap                   17 187292555389.

2.5.0.4 Homework problem 4

# Comparison Between Index and Active Investment
# Build a Statistic Table on Assets Size
stata_table_assets <- blackrock_etf_screener |> 
  group_by(investment_style) |>
  summarize(
    total_size = sum(net_assets_usd, na.rm = TRUE),
    max_size = max(net_assets_usd, na.rm = TRUE),
    min_size = min(net_assets_usd, na.rm = TRUE),
    avg_size = mean(net_assets_usd, na.rm = TRUE),
    sd_size = sd(net_assets_usd, na.rm = TRUE),
    percentile_25_size = quantile(net_assets_usd, .25, na.rm = TRUE),
    percentile_50_size = quantile(net_assets_usd, .5, na.rm = TRUE),
    percentile_75_size = quantile(net_assets_usd, .75, na.rm = TRUE),
  )
stata_table_assets
# A tibble: 2 × 9
  investment_style total_size      max_size min_size    avg_size      sd_size
  <chr>                 <dbl>         <dbl>    <dbl>       <dbl>        <dbl>
1 Active              1.34e10   6105715713. 2711720.  371926529.  1134097310.
2 Index               2.59e12 399312542067.  456626. 6674284744. 24219988000.
# ℹ 3 more variables: percentile_25_size <dbl>, percentile_50_size <dbl>,
#   percentile_75_size <dbl>
# Comparison of Total Assets Between Different Investment Style
ggplot(stata_table_assets) +
  aes(x = total_size, y = investment_style) +
  geom_bar(stat = "identity", fill = "red", alpha = .7) +
  scale_x_continuous(labels = scales::label_dollar(scale = 1/10^3, suffix = " tn")) +
  labs(title = "Fund Size Between Different Investment Style",
       x = "Assets (USD)",
       y = "Investment Style",
       caption = "Haotian Duan") +
  theme_minimal()

# Display the Distribution of Assets Size in Different Investment Style
ggplot(blackrock_etf_screener) +
  aes(x = net_assets_usd, y = investment_style) +
  geom_boxplot(fill = "red") +
  scale_x_continuous(trans = "log10") +
  labs(
    x = "Log of Assets Size",
    y = "Investment Style",
    title = "Distribution of Assets Size in Different Investment Style ",
    caption = "Haotian Duan"
  ) +
  theme_minimal()

# Build a Statistic Table on MSCI ESG Quality Score
stata_table_score <- blackrock_etf_screener |> 
  group_by(investment_style) |>
  summarize(
    total_score = sum(msci_esg_quality_score_0_10, na.rm = TRUE),
    max_score = max(msci_esg_quality_score_0_10, na.rm = TRUE),
    min_score = min(msci_esg_quality_score_0_10, na.rm = TRUE),
    avg_score = mean(msci_esg_quality_score_0_10, na.rm = TRUE),
    sd_score = sd(msci_esg_quality_score_0_10, na.rm = TRUE),
    percentile_25_score = quantile(msci_esg_quality_score_0_10, .25, na.rm = TRUE),
    percentile_50_score = quantile(msci_esg_quality_score_0_10, .5, na.rm = TRUE),
    percentile_75_score = quantile(msci_esg_quality_score_0_10, .75, na.rm = TRUE),
  )
stata_table_score
# A tibble: 2 × 9
  investment_style total_score max_score min_score avg_score sd_score
  <chr>                  <dbl>     <dbl>     <dbl>     <dbl>    <dbl>
1 Active                  166.      8.01      5.70      6.65    0.463
2 Index                  2262.      8.73      3.17      6.37    0.960
# ℹ 3 more variables: percentile_25_score <dbl>, percentile_50_score <dbl>,
#   percentile_75_score <dbl>
# Comparison of Average MSCI ESG Quality Score Between Different Investment Style
ggplot(stata_table_score) +
  aes(x = avg_score, y = investment_style) +
  geom_bar(stat = "identity", fill = "blue", alpha = .7) +
  labs(title = "Average MSCI ESG Quality Score Between Different Investment Style",
       x = "MSCI ESG Quality Score",
       y = "Investment Style",
       caption = "Haotian Duan") +
  theme_minimal()

# Display the Distribution of MSCI ESG Quality Score in Different Investment Style
ggplot(blackrock_etf_screener) +
  aes(x = msci_esg_quality_score_0_10, y = investment_style) +
  geom_boxplot(fill = "blue") +
  labs(
    x = "MSCI ESG Quality Score",
    y = "Investment Style",
    title = "Distribution of MSCI ESG Quality Score in Different Investment Style ",
    caption = "Haotian Duan"
  ) +
  theme_minimal()
Warning: Removed 44 rows containing non-finite values (`stat_boxplot()`).