Week3 HW_Linyi Zheng

Practice Problem

set up

library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.4.4     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
options(scipen = 999)
blackrock_etf_screener <- read_csv ("https://raw.githubusercontent.com/t-emery/sais-susfin_data/main/datasets/ishares_etf_screener_as_of_2023-12-27.csv")
Rows: 424 Columns: 18
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr  (10): ticker, name, asset_class, sub_asset_class, region, market, locat...
dbl   (6): gross_expense_ratio_percent, net_expense_ratio_percent, net_asset...
dttm  (2): incept_date, net_assets_as_of

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
blackrock_etf_screener
# A tibble: 424 × 18
   ticker name                        incept_date         gross_expense_ratio_…¹
   <chr>  <chr>                       <dttm>                               <dbl>
 1 IVV    iShares Core S&P 500 ETF    2000-05-15 00:00:00                   0.03
 2 IEFA   iShares Core MSCI EAFE ETF  2012-10-18 00:00:00                   0.07
 3 AGG    iShares Core U.S. Aggregat… 2003-09-22 00:00:00                   0.03
 4 IWF    iShares Russell 1000 Growt… 2000-05-22 00:00:00                   0.19
 5 IJR    iShares Core S&P Small-Cap… 2000-05-22 00:00:00                   0.06
 6 IJH    iShares Core S&P Mid-Cap E… 2000-05-22 00:00:00                   0.05
 7 IEMG   iShares Core MSCI Emerging… 2012-10-18 00:00:00                   0.09
 8 IWM    iShares Russell 2000 ETF    2000-05-22 00:00:00                   0.19
 9 IWD    iShares Russell 1000 Value… 2000-05-22 00:00:00                   0.19
10 TLT    iShares 20+ Year Treasury … 2002-07-22 00:00:00                   0.15
# ℹ 414 more rows
# ℹ abbreviated name: ¹​gross_expense_ratio_percent
# ℹ 14 more variables: net_expense_ratio_percent <dbl>, net_assets_usd <dbl>,
#   net_assets_as_of <dttm>, asset_class <chr>, sub_asset_class <chr>,
#   region <chr>, market <chr>, location <chr>, investment_style <chr>,
#   msci_esg_fund_rating_aaa_ccc <chr>, msci_esg_quality_score_0_10 <dbl>,
#   msci_weighted_average_carbon_intensity_tons_co2e_m_sales <dbl>, …

Problem1

Create a dplyr pipeline to create a tibble fulfills the following parameters:

  • only contains ESG funds (no standard funds)

  • contains the largest fund from each sustainable classification.

  • shows the percent of assets that the top fund comprises for its category (e.g. the top Thematic fund is x% of the total assets of all thematic funds).

  • Ranks each sustainable classification by the % of assets in its largest fund, and arranges them in descending order.

library(dplyr)

result <- blackrock_etf_screener %>%
  filter(!is.na(sustainable_classification) & sustainable_classification != "") %>%
  group_by(sustainable_classification) %>%
  mutate(Total_Assets = sum(net_assets_usd)) %>%
  slice(which.max(net_assets_usd)) %>%
  mutate(Percent_of_Total_Assets = net_assets_usd / Total_Assets * 100) %>%
  arrange(desc(Percent_of_Total_Assets))

print (result)
# A tibble: 4 × 20
# Groups:   sustainable_classification [4]
  ticker name  incept_date         gross_expense_ratio_…¹ net_expense_ratio_pe…²
  <chr>  <chr> <dttm>                               <dbl>                  <dbl>
1 BGRN   iSha… 2018-11-13 00:00:00                   0.2                    0.2 
2 ICLN   iSha… 2008-06-24 00:00:00                   0.41                   0.41
3 XVV    iSha… 2020-09-22 00:00:00                   0.08                   0.08
4 ESGU   iSha… 2016-12-01 00:00:00                   0.15                   0.15
# ℹ abbreviated names: ¹​gross_expense_ratio_percent, ²​net_expense_ratio_percent
# ℹ 15 more variables: net_assets_usd <dbl>, net_assets_as_of <dttm>,
#   asset_class <chr>, sub_asset_class <chr>, region <chr>, market <chr>,
#   location <chr>, investment_style <chr>, msci_esg_fund_rating_aaa_ccc <chr>,
#   msci_esg_quality_score_0_10 <dbl>,
#   msci_weighted_average_carbon_intensity_tons_co2e_m_sales <dbl>,
#   msci_esg_percent_coverage <dbl>, sustainable_classification <chr>, …

Problem 2

Create a sorted bar chart of the 10 smallest ETFs.

Hint: for sorting the bar chart, look up fct_reorder() . Make sure your chart has meaningful titles and labels, including numbers of an appropriate magnitude.

library (ggplot2)
library(forcats)

# Step 1: Select the 10 smallest ETFs based on assets under management
smallest_etfs <- blackrock_etf_screener %>%
  arrange(net_assets_usd) %>%
  head(10)

# Step 2: Sort the ETFs based on their assets under management
smallest_etfs <- smallest_etfs %>%
  mutate(name = fct_reorder(name, net_assets_usd))

# Step 3: Create a bar chart
ggplot(smallest_etfs, aes(x = name, y = net_assets_usd)) +
  geom_bar(stat = "identity", fill = "skyblue") +
  labs(title = "Top 10 Smallest ETFs",
       x = "ETF Name",
       y = "Net Assets ($)") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  coord_flip()  # Flip the coordinates for horizontal bars

Problem 3

  • Find the funds in the bottom quintile of the MSCI ESG quality score.

  • calculate the number of funds by total assets by asset class and sub asset class.

    # Step 1: Identify the bottom quintile of funds based on the MSCI ESG quality score
    bottom_quintile <- blackrock_etf_screener %>%
      filter(!is.na(msci_esg_quality_score_0_10)) %>%
      arrange(msci_esg_quality_score_0_10) %>%
      slice(1:ceiling(n() * 0.2))  
    
    # Step 2: Group the data by asset class and sub-asset class
    grouped_data <- bottom_quintile %>%
      group_by(asset_class, sub_asset_class) %>%
      summarize(total_assets = sum(net_assets_usd))
    `summarise()` has grouped output by 'asset_class'. You can override using the
    `.groups` argument.
    # Step 3: Display the number of funds by total assets by asset class and sub-asset class
    print(grouped_data)
    # A tibble: 9 × 3
    # Groups:   asset_class [3]
      asset_class  sub_asset_class         total_assets
      <chr>        <chr>                          <dbl>
    1 Equity       All Cap                 95486052602.
    2 Equity       Large Cap                1852506812.
    3 Equity       Large/Mid Cap           42421984678.
    4 Equity       Small Cap              187292555389.
    5 Fixed Income Corporates                 41339182.
    6 Fixed Income Credit                    408038372.
    7 Fixed Income Government              17560711395.
    8 Fixed Income High Yield              38239624578.
    9 Real Estate  Real Estate Securities   7145439523.

Problem 4

1,average net expense ratio by region

factoid1 <- blackrock_etf_screener %>%
  group_by(region) %>%
  summarise(Avg_Expense_Ratio = mean(net_expense_ratio_percent)) %>%
  ggplot(aes(x = region, y = Avg_Expense_Ratio)) +
  geom_bar(stat = "identity", fill = "skyblue") +
  labs(title = "Average Expense Ratio by region",
       x = "region",
       y = "Average Expense Ratio")
print (factoid1)

2.distribution of fund sizes

factoid2 <- ggplot(blackrock_etf_screener, aes(x = net_assets_usd)) +
  geom_histogram(fill = "skyblue", color = "black") +
  labs(title = "Distribution of Fund Sizes",
       x = "Net assets USD",
       y = "Frequency")
print (factoid2)
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.