Homework 3

Author

Sophia Wang

Homework 3

Setup working directory

library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.4.4     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
library(forcats)
blackrock_etf_screener <- read_csv("https://raw.githubusercontent.com/t-emery/sais-susfin_data/main/datasets/ishares_etf_screener_as_of_2023-12-27.csv") 
Rows: 424 Columns: 18
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr  (10): ticker, name, asset_class, sub_asset_class, region, market, locat...
dbl   (6): gross_expense_ratio_percent, net_expense_ratio_percent, net_asset...
dttm  (2): incept_date, net_assets_as_of

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
glimpse(blackrock_etf_screener)
Rows: 424
Columns: 18
$ ticker                                                   <chr> "IVV", "IEFA"…
$ name                                                     <chr> "iShares Core…
$ incept_date                                              <dttm> 2000-05-15, …
$ gross_expense_ratio_percent                              <dbl> 0.03, 0.07, 0…
$ net_expense_ratio_percent                                <dbl> 0.03, 0.07, 0…
$ net_assets_usd                                           <dbl> 399312542067,…
$ net_assets_as_of                                         <dttm> 2023-12-27, …
$ asset_class                                              <chr> "Equity", "Eq…
$ sub_asset_class                                          <chr> "Large Cap", …
$ region                                                   <chr> "North Americ…
$ market                                                   <chr> "Developed", …
$ location                                                 <chr> "United State…
$ investment_style                                         <chr> "Index", "Ind…
$ msci_esg_fund_rating_aaa_ccc                             <chr> "A", "AA", "A…
$ msci_esg_quality_score_0_10                              <dbl> 6.6845, 7.519…
$ msci_weighted_average_carbon_intensity_tons_co2e_m_sales <dbl> 106.80, 102.8…
$ msci_esg_percent_coverage                                <dbl> 99.05, 99.80,…
$ sustainable_classification                               <chr> NA, NA, NA, N…

Problem 1

blackrock_etf_screener |>
  filter(!is.na(sustainable_classification)) |>
  group_by(sustainable_classification) |>
  summarize(
    largest = max(net_assets_usd, na.rm = TRUE),
    total = sum(net_assets_usd, na.rm = TRUE),
    top_company = name[which.max(net_assets_usd)]
  ) |>
  mutate(
    percent = (largest / total) * 100
  ) |>
  ungroup() |>
  arrange(desc(percent))
# A tibble: 4 × 5
  sustainable_classification      largest        total top_company       percent
  <chr>                             <dbl>        <dbl> <chr>               <dbl>
1 Impact                       335216065.   335216065. iShares USD Gree…   100  
2 Thematic                    3057666513.  3427675675. iShares Global C…    89.2
3 Screened                     182340754.   383389770. iShares ESG Scre…    47.6
4 Uplift                     13385805595. 51443665637. iShares ESG Awar…    26.0

Problem 2

smallest_10 <- blackrock_etf_screener |>
  arrange(net_assets_usd) |>
  slice_head(n = 10) |>
   mutate(ticker = fct_reorder(ticker, net_assets_usd))

ggplot(smallest_10, aes(x = ticker, y = net_assets_usd, fill = ticker)) +
  geom_bar(stat = "identity") +
  labs(
    title = "10 Smallest ETFs by Net Assets",
    caption = "Sophia Wang",
    x = "Company",
    y = "Net Assets (USD)",
    fill = "Ticker"
  ) +
  theme_minimal() 

Problem 3

# Calculate the bottom quintile score
bottom_score <- blackrock_etf_screener |>
  summarize(
    bottom_quintile = quantile(msci_esg_quality_score_0_10, 0.2, na.rm = TRUE),
  ) 

# Filter the funds
bottom_funds <- blackrock_etf_screener |>
  filter(msci_esg_quality_score_0_10 <= bottom_score)
Warning: Using one column matrices in `filter()` was deprecated in dplyr 1.1.0.
ℹ Please use one dimensional logical vectors instead.
# Calculate the number of funds and the total assets by asset class and sub-asset class
summary_by_class <- bottom_funds |>
  group_by(asset_class, sub_asset_class) |>
  summarise(
    number_of_funds = n(),
    total_assets = sum(net_assets_usd, na.rm = TRUE),
    .groups = 'drop' 
  )

Problem 4

# Top5 Regions by Total Net Assets
blackrock_etf_screener |>
  group_by(region) |>
  summarise(total_net_assets_usd = sum(net_assets_usd, na.rm = TRUE)) |>
  slice_head(n = 5) |>
  arrange(desc(total_net_assets_usd))
# A tibble: 5 × 2
  region        total_net_assets_usd
  <chr>                        <dbl>
1 Global               531638155429.
2 Asia Pacific          53570660782.
3 Europe                23366693723.
4 Latin America         11036232024.
5 Kuwait                   56785533.
# Average Net Assets by Region
avg_assets_region <- blackrock_etf_screener |>
  group_by(region) |>
  summarise(average_net_assets_usd = mean(net_assets_usd, na.rm = TRUE))

ggplot(avg_assets_region, aes(x = reorder(region, average_net_assets_usd), y = average_net_assets_usd)) +
  geom_bar(stat = "identity", fill = "lightpink") +
  labs(title = "Average Net Assets by Region",
       caption = "Sophia Wang",
       x = "Region",
       y = "Average Net Assets (USD)") +
  theme_minimal()

# Top5 Asset Classes by Total Net Assets
blackrock_etf_screener |>
  group_by(asset_class) |>
  summarise(total_net_assets_usd = sum(net_assets_usd, na.rm = TRUE)) |>
  slice_head(n = 5) |>
  arrange(desc(total_net_assets_usd))
# A tibble: 5 × 2
  asset_class  total_net_assets_usd
  <chr>                       <dbl>
1 Equity                    1.91e12
2 Fixed Income              6.33e11
3 Commodity                 4.03e10
4 Real Estate               1.41e10
5 Multi Asset               6.58e 9
# Average Net Assets by Asset Class
avg_assets_by_class <- blackrock_etf_screener |>
  group_by(asset_class) |>
  summarise(average_net_assets_usd = mean(net_assets_usd, na.rm = TRUE))

ggplot(avg_assets_by_class, aes(x = reorder(asset_class, average_net_assets_usd), y = average_net_assets_usd)) +
  geom_bar(stat = "identity", fill = "lightblue") +
  labs(title = "Average Net Assets by Asset Class",
       caption = "Sophia Wang",
       x = "Asset Class",
       y = "Average Net Assets (USD)") +
  theme_minimal()

# Score Distribution of ESG Funds
esg_fund <- blackrock_etf_screener |>
  filter(!is.na(msci_esg_quality_score_0_10))

ggplot(esg_fund, aes(x = msci_esg_quality_score_0_10)) +
  geom_histogram(binwidth = 0.5) +
  labs(title = "Distribution of MSCI ESG Quality Scores",
       x = "ESG Quality Score",
       y = "Number of Funds") +
  theme_minimal()