Homework - Data Transformation

Author

Lexi Lei

library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.4.4     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
options(scipen = 999)

blackrock_etf_screener <- read_csv("https://raw.githubusercontent.com/t-emery/sais-susfin_data/main/datasets/ishares_etf_screener_as_of_2023-12-27.csv") 
Rows: 424 Columns: 18
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr  (10): ticker, name, asset_class, sub_asset_class, region, market, locat...
dbl   (6): gross_expense_ratio_percent, net_expense_ratio_percent, net_asset...
dttm  (2): incept_date, net_assets_as_of

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

Problem 1

blackrock_etf_screener |>
  filter(!is.na(sustainable_classification)) |>
  group_by(sustainable_classification) |>
  summarize(
    largest_fund = max(net_assets_usd, na.rm = TRUE),
    pct_largest_fund = (largest_fund / sum(net_assets_usd) * 100)
    ) |>
  mutate(pct_rank = rank(pct_largest_fund)) |>
  arrange(desc(pct_rank))
# A tibble: 4 × 4
  sustainable_classification largest_fund pct_largest_fund pct_rank
  <chr>                             <dbl>            <dbl>    <dbl>
1 Impact                       335216065.            100          4
2 Thematic                    3057666513.             89.2        3
3 Screened                     182340754.             47.6        2
4 Uplift                     13385805595.             26.0        1

Problem 2

smallest_etf <- blackrock_etf_screener |>
  slice_min(order_by = net_assets_usd, n = 10) |>
  mutate(name = fct_reorder(name, net_assets_usd))

ggplot(
  data = smallest_etf, 
  aes(x = name, y = net_assets_usd)) +
  geom_bar(stat = "identity") +
  scale_y_continuous(labels = scales::label_dollar(scale = 1/10^6, suffix = " mn")) +
  labs(title = "10 Smallest ETFs",
    x = "Fund",
    y = "Net Assets (USD)",
    caption = "Lexi") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 60, hjust = 1))

Problem 3

bottom_quintile_esg <- blackrock_etf_screener |>
  filter(msci_esg_quality_score_0_10 <= quantile(msci_esg_quality_score_0_10, 0.2, na.rm = TRUE))

bottom_quintile_esg |>
  count()
# A tibble: 1 × 1
      n
  <int>
1    76
bottom_quintile_esg |>
  group_by(asset_class, market) |>
  summarize(total_asset_bn = sum(net_assets_usd / 10^9))
`summarise()` has grouped output by 'asset_class'. You can override using the
`.groups` argument.
# A tibble: 5 × 3
# Groups:   asset_class [3]
  asset_class  market    total_asset_bn
  <chr>        <chr>              <dbl>
1 Equity       Developed         203.  
2 Equity       Emerging          124.  
3 Fixed Income Developed          38.6 
4 Fixed Income Emerging           17.7 
5 Real Estate  Developed           7.15

Problem 4

  1. Among the 53 iShares funds in emerging markets, only 4 are related to ESG.
emerging_etf <- blackrock_etf_screener |>
  filter(market == "Emerging")

emerging_etf |>
  group_by(sustainable_classification) |> 
  count(sort = TRUE)
# A tibble: 3 × 2
# Groups:   sustainable_classification [3]
  sustainable_classification     n
  <chr>                      <int>
1 <NA>                          49
2 Uplift                         3
3 Thematic                       1
  1. ETFs, mostly non-ESG funds, concentrate in developed markets. Few ESG funds are set up in emerging markets.
standard_vs_esg <- blackrock_etf_screener |> 
  filter(market == "Emerging" | market == "Developed") |>
  mutate(
    standard_or_esg = if_else(
      condition = is.na(sustainable_classification),
      true = "Standard",
      false = "ESG")
    )
  
standard_vs_esg |>
  ggplot(aes(x = sustainable_classification, fill = market)
  ) +
  geom_bar(position = "stack") +
  labs(title = "ETFs in Emerging Markets vs Developed Markets",
    subtitle = "ETFs Concentrate in Developed Markets",
    x = "ESG and Non-ESG",
    y = "Number of Funds",
    caption = "Lexi") +
  theme_minimal()

  1. Geographically, ETFs mostly concentrate in the United States where the number of funds is much more higher than the sum of all other country-specific ETFs. Second-tier countries include China, Japan, India, Brazil, Germany, and United Kingdom.
blackrock_etf_screener |>
  group_by(location) |> 
  count(sort = TRUE)
# A tibble: 41 × 2
# Groups:   location [41]
   location           n
   <chr>          <int>
 1 United States    233
 2 Broad            139
 3 China              5
 4 Japan              5
 5 India              3
 6 Brazil             2
 7 Germany            2
 8 United Kingdom     2
 9 Australia          1
10 Austria            1
# ℹ 31 more rows
  1. In Asia, China and Japan have more ETFs than others. Emerging markets have a strong presence in the ETFs landscape in Asia, showing significant potentials to grow in the future.
asia_etf <- blackrock_etf_screener |>
  filter(region == "Asia Pacific",
         location != "Broad", 
         location != "Australia", 
         location != "New Zealand")

asia_etf |>
  ggplot(aes(x = location, fill = market)) +
  geom_bar() +
  labs(
    title = "ETFs Landscape in Asia",
    subtitle = "China and Japan have more ETFs than others",
    x = "Country",
    y = "Number of Funds",
    caption = "Lexi") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 60, hjust = 1))

  1. Among all A-level ETFs, active investments are much less than index investments and are only present in North American and global funds.
blackrock_etf_screener %>%
 filter(msci_esg_fund_rating_aaa_ccc %in% c("A", "AA", "AAA") | is.na(msci_esg_fund_rating_aaa_ccc)) %>%
 ggplot() +
  aes(x = sustainable_classification, fill = investment_style) +
  geom_bar() +
  scale_fill_hue(direction = 1) +
  labs(
    x = "ESG vs Non-ESG",
    y = "Number of Funds",
    title = "Investment Styles of A-Level ETFs Worldwide",
    subtitle = "Active Investment Is Only Present in North American and Global Funds",
    caption = "Lexi",
    fill = "Investment Style"
  ) +
  theme_minimal() +
  facet_wrap(vars(region), scales = "free_y", ncol = 4L)