Untitled

HW-Data Transformation

Q1

library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.4
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.4.4     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.0
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
blackrock_etf_screener <- read_csv("https://raw.githubusercontent.com/t-emery/sais-susfin_data/main/datasets/ishares_etf_screener_as_of_2023-12-27.csv") 
Rows: 424 Columns: 18
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr  (10): ticker, name, asset_class, sub_asset_class, region, market, locat...
dbl   (6): gross_expense_ratio_percent, net_expense_ratio_percent, net_asset...
dttm  (2): incept_date, net_assets_as_of

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
blackrock_etf_screener |>
  filter(sustainable_classification!="NA")
# A tibble: 40 × 18
   ticker name                        incept_date         gross_expense_ratio_…¹
   <chr>  <chr>                       <dttm>                               <dbl>
 1 ESGU   iShares ESG Aware MSCI USA… 2016-12-01 00:00:00                   0.15
 2 ESGD   iShares ESG Aware MSCI EAF… 2016-06-28 00:00:00                   0.2 
 3 SUSA   iShares MSCI USA ESG Selec… 2005-01-24 00:00:00                   0.25
 4 DSI    iShares MSCI KLD 400 Socia… 2006-11-14 00:00:00                   0.25
 5 ESGE   iShares ESG Aware MSCI EM … 2016-06-28 00:00:00                   0.25
 6 EAGG   iShares ESG Aware U.S. Agg… 2018-10-18 00:00:00                   0.11
 7 ICLN   iShares Global Clean Energ… 2008-06-24 00:00:00                   0.41
 8 USCL   iShares Climate Conscious … 2023-06-06 00:00:00                   0.08
 9 PABU   iShares Paris-Aligned Clim… 2022-02-08 00:00:00                   0.1 
10 LCTU   BlackRock U.S. Carbon Tran… 2021-04-06 00:00:00                   0.29
# ℹ 30 more rows
# ℹ abbreviated name: ¹​gross_expense_ratio_percent
# ℹ 14 more variables: net_expense_ratio_percent <dbl>, net_assets_usd <dbl>,
#   net_assets_as_of <dttm>, asset_class <chr>, sub_asset_class <chr>,
#   region <chr>, market <chr>, location <chr>, investment_style <chr>,
#   msci_esg_fund_rating_aaa_ccc <chr>, msci_esg_quality_score_0_10 <dbl>,
#   msci_weighted_average_carbon_intensity_tons_co2e_m_sales <dbl>, …
blackrock_etf_screener |>
  group_by(sustainable_classification) |>
  summarize(
    largest_fund=max(net_assets_usd)
  )
# A tibble: 5 × 2
  sustainable_classification  largest_fund
  <chr>                              <dbl>
1 Impact                        335216065.
2 Screened                      182340754.
3 Thematic                     3057666513.
4 Uplift                      13385805595.
5 <NA>                       399312542067.
blackrock_etf_screener |>
  group_by(sustainable_classification) |>
  summarize(
    largest_fund=max(net_assets_usd),
    pcg_top_fund=largest_fund/sum(net_assets_usd)*100
  )
# A tibble: 5 × 3
  sustainable_classification  largest_fund pcg_top_fund
  <chr>                              <dbl>        <dbl>
1 Impact                        335216065.        100  
2 Screened                      182340754.         47.6
3 Thematic                     3057666513.         89.2
4 Uplift                      13385805595.         26.0
5 <NA>                       399312542067.         15.7
blackrock_etf_screener |>
  group_by(sustainable_classification) |>
  summarize(
    largest_fund=max(net_assets_usd),
    pcg_top_fund=largest_fund/sum(net_assets_usd)*100,
  ) |>
  mutate(
    top_fund_rank=rank(desc(pcg_top_fund))
    ) |>
  arrange(top_fund_rank |> desc())
# A tibble: 5 × 4
  sustainable_classification  largest_fund pcg_top_fund top_fund_rank
  <chr>                              <dbl>        <dbl>         <dbl>
1 <NA>                       399312542067.         15.7             5
2 Uplift                      13385805595.         26.0             4
3 Screened                      182340754.         47.6             3
4 Thematic                     3057666513.         89.2             2
5 Impact                        335216065.        100               1

Q2

ten_min <- blackrock_etf_screener |>
  slice_min(net_assets_usd, n=10) |>
  arrange(desc(net_assets_usd))
ggplot(ten_min, aes(x=net_assets_usd, y=reorder(ticker, -net_assets_usd)))+
  geom_bar(stat = "identity")+
  scale_x_continuous() +
  labs(title= "10 Smallest ETFs")

Q3

bottom_msci <- blackrock_etf_screener |>
  filter(
    msci_esg_quality_score_0_10 <= quantile(msci_esg_quality_score_0_10, 0.25, 
                                            na.rm = TRUE))
count(bottom_msci)
# A tibble: 1 × 1
      n
  <int>
1   104
total_assets <- bottom_msci |>
  group_by(asset_class, sub_asset_class) |>
  summarize(total_assets = sum(net_assets_usd, na.rm = TRUE))
`summarise()` has grouped output by 'asset_class'. You can override using the
`.groups` argument.

Q4

#Top sub-asset classes with largest amount of fund
top_sub_asset_classes <- blackrock_etf_screener |>
  group_by(sub_asset_class) |>
  summarise(
    total_funds = sum(net_assets_usd, na.rm = TRUE)) |>
  top_n(5, total_funds)

ggplot(top_sub_asset_classes, aes(x = reorder(sub_asset_class, total_funds), 
                                  y = total_funds)) +
  geom_bar(stat = "identity") +
  labs(
    title = "Top 5 Sub-Asset Classes by Total Funds",
       x = "Sub-Asset Class",
       y = "Total Funds")

#Top asset class with largest amount of funds
top_asset_classes <- blackrock_etf_screener |>
  group_by(asset_class) |>
  summarise(
    total_fundss = sum(net_assets_usd, na.rm = TRUE)) |>
  top_n(5, total_fundss)
ggplot(top_asset_classes, aes(x = reorder(asset_class, total_fundss), y = total_fundss)) +
  geom_bar(stat = "identity") +
  labs(
    title = "Top 5 Asset Classes by Total Funds",
    x = "Asset Class",
    y = "Total Funds")

#Distribution of fund size
ggplot(blackrock_etf_screener, aes(x = net_assets_usd)) +
  geom_histogram() + 
  labs(title = "Distribution of Net Assets (USD)",
       x = "Net Assets (USD)",
       y = "Count")
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

#Distribution of MSCI score
ggplot(blackrock_etf_screener, aes(x = msci_esg_quality_score_0_10)) +
  geom_histogram() +
  labs(
    title = "Distribution of MSCI ESG Quality Scores",
    x = "MSCI ESG Quality Score",
    y = "Count")
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
Warning: Removed 44 rows containing non-finite values (`stat_bin()`).

#Distribution of Asset Class
asset_class_distribution <- blackrock_etf_screener |>
  group_by(asset_class) |>
  summarise(
    count = n())

ggplot(asset_class_distribution, aes(x = asset_class, y = count, 
                                     fill = asset_class)) +
  geom_bar(stat = "identity") +
  labs(title = "Distribution of ETFs Across Asset Classes",
       x = "Asset Class",
       y = "Count")