#load the tidyverse
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.4.4 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# this will stop R from presenting data in scientific notation, which can be annoying.
options(scipen = 999)
#load data
blackrock_etf_screener <- read_csv("https://raw.githubusercontent.com/t-emery/sais-susfin_data/main/datasets/ishares_etf_screener_as_of_2023-12-27.csv")
## Rows: 424 Columns: 18
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (10): ticker, name, asset_class, sub_asset_class, region, market, locat...
## dbl (6): gross_expense_ratio_percent, net_expense_ratio_percent, net_asset...
## dttm (2): incept_date, net_assets_as_of
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
blackrock_etf_screener
## # A tibble: 424 × 18
## ticker name incept_date gross_expense_ratio_…¹
## <chr> <chr> <dttm> <dbl>
## 1 IVV iShares Core S&P 500 ETF 2000-05-15 00:00:00 0.03
## 2 IEFA iShares Core MSCI EAFE ETF 2012-10-18 00:00:00 0.07
## 3 AGG iShares Core U.S. Aggregat… 2003-09-22 00:00:00 0.03
## 4 IWF iShares Russell 1000 Growt… 2000-05-22 00:00:00 0.19
## 5 IJR iShares Core S&P Small-Cap… 2000-05-22 00:00:00 0.06
## 6 IJH iShares Core S&P Mid-Cap E… 2000-05-22 00:00:00 0.05
## 7 IEMG iShares Core MSCI Emerging… 2012-10-18 00:00:00 0.09
## 8 IWM iShares Russell 2000 ETF 2000-05-22 00:00:00 0.19
## 9 IWD iShares Russell 1000 Value… 2000-05-22 00:00:00 0.19
## 10 TLT iShares 20+ Year Treasury … 2002-07-22 00:00:00 0.15
## # ℹ 414 more rows
## # ℹ abbreviated name: ¹​gross_expense_ratio_percent
## # ℹ 14 more variables: net_expense_ratio_percent <dbl>, net_assets_usd <dbl>,
## # net_assets_as_of <dttm>, asset_class <chr>, sub_asset_class <chr>,
## # region <chr>, market <chr>, location <chr>, investment_style <chr>,
## # msci_esg_fund_rating_aaa_ccc <chr>, msci_esg_quality_score_0_10 <dbl>,
## # msci_weighted_average_carbon_intensity_tons_co2e_m_sales <dbl>, …
blackrock_etf_screener |>
group_by(sustainable_classification) |>
summarize(
# how many funds?
n_funds = n(),
# how much money?
assets_usd = sum(net_assets_usd, na.rm = TRUE)
) |>
# arrange in descending order
arrange(assets_usd |> desc())
## # A tibble: 5 × 3
## sustainable_classification n_funds assets_usd
## <chr> <int> <dbl>
## 1 <NA> 384 2.55e12
## 2 Uplift 30 5.14e10
## 3 Thematic 6 3.43e 9
## 4 Screened 3 3.83e 8
## 5 Impact 1 3.35e 8
blackrock_etf_screener_w_new_features <- blackrock_etf_screener |>
mutate(
# if the sustainable_classification column is NA, then the fund is not an ESG fund.
standard_or_esg = if_else(
condition = is.na(sustainable_classification),
true = "Standard",
false = "ESG"
),
# lubridate::year() extracts the year from a date
inception_year = lubridate::year(incept_date),
# Change to a meaningful magnitude for the data. In asset management, billions is a good default.
net_assets_bn_usd = net_assets_usd/10^9,
# let's put our new variables at the front so we can see them easily.
.before = everything()
)
blackrock_etf_screener_w_new_features
## # A tibble: 424 × 21
## standard_or_esg inception_year net_assets_bn_usd ticker name
## <chr> <dbl> <dbl> <chr> <chr>
## 1 Standard 2000 399. IVV iShares Core S&P 500…
## 2 Standard 2012 107. IEFA iShares Core MSCI EA…
## 3 Standard 2003 101. AGG iShares Core U.S. Ag…
## 4 Standard 2000 82.1 IWF iShares Russell 1000…
## 5 Standard 2000 78.2 IJR iShares Core S&P Sma…
## 6 Standard 2000 77.1 IJH iShares Core S&P Mid…
## 7 Standard 2012 73.9 IEMG iShares Core MSCI Em…
## 8 Standard 2000 67.7 IWM iShares Russell 2000…
## 9 Standard 2000 55.4 IWD iShares Russell 1000…
## 10 Standard 2002 52.2 TLT iShares 20+ Year Tre…
## # ℹ 414 more rows
## # ℹ 16 more variables: incept_date <dttm>, gross_expense_ratio_percent <dbl>,
## # net_expense_ratio_percent <dbl>, net_assets_usd <dbl>,
## # net_assets_as_of <dttm>, asset_class <chr>, sub_asset_class <chr>,
## # region <chr>, market <chr>, location <chr>, investment_style <chr>,
## # msci_esg_fund_rating_aaa_ccc <chr>, msci_esg_quality_score_0_10 <dbl>,
## # msci_weighted_average_carbon_intensity_tons_co2e_m_sales <dbl>, …
#Practice Problem 1
top_funds <- blackrock_etf_screener_w_new_features %>%
#keep ESG funds
filter(standard_or_esg == "ESG") %>%
select(ticker, name, net_assets_bn_usd, sustainable_classification) %>%
#group by sustainable classification and calculate stats
group_by(sustainable_classification) %>%
mutate(
fund_rank = rank(net_assets_bn_usd * -1),
fund_percentile = percent_rank(net_assets_bn_usd),
fund_decile = ntile(net_assets_bn_usd, n = 10),
pct_of_total = net_assets_bn_usd/sum(net_assets_bn_usd, na.rm = TRUE) * 100,
pct_of_total_cumulative = cumsum(pct_of_total)
) %>%
#keep top-ranking funds; rank these funds by concentration, format for presentation
filter(fund_rank == 1) %>%
ungroup() %>%
mutate(
fund_concentration_rank = rank(pct_of_total * -1)
) %>%
select(fund_concentration_rank,ticker, name, net_assets_bn_usd, sustainable_classification,pct_of_total) %>%
arrange(fund_concentration_rank)
print(top_funds)
## # A tibble: 4 × 6
## fund_concentration_rank ticker name net_assets_bn_usd sustainable_classifi…¹
## <dbl> <chr> <chr> <dbl> <chr>
## 1 1 BGRN iShar… 0.335 Impact
## 2 2 ICLN iShar… 3.06 Thematic
## 3 3 XVV iShar… 0.182 Screened
## 4 4 ESGU iShar… 13.4 Uplift
## # ℹ abbreviated name: ¹​sustainable_classification
## # ℹ 1 more variable: pct_of_total <dbl>
#Practice Problem 2
# Filter for ESG funds and select relevant columns
smallest_etfs <- blackrock_etf_screener_w_new_features %>%
filter(standard_or_esg == "ESG") %>%
select(ticker, name, net_assets_bn_usd) %>%
# Sort by net assets in ascending order and take the top 10 smallest ETFs
arrange(net_assets_bn_usd) %>%
head(10)
# Create a sorted bar chart
ggplot(smallest_etfs, aes(x = fct_reorder(name, net_assets_bn_usd), y = net_assets_bn_usd)) +
geom_bar(stat = "identity", fill = "skyblue") +
coord_flip() + # Horizontal bars
labs(title = "Top 10 Smallest ETFs",
x = "ETF Name",
y = "Net Assets (in billion USD)",
caption = "Source: Your Data Source") +
theme_minimal()
#Practice Problem 3
# Step 1: Find the bottom quintile of MSCI ESG quality score
bottom_quintile <- blackrock_etf_screener_w_new_features %>%
filter(!is.na(msci_esg_quality_score_0_10)) %>%
mutate(quantile_group = ntile(msci_esg_quality_score_0_10, 5)) %>%
filter(quantile_group == 1) # Selecting the bottom quintile
# Step 2: Group by asset class and sub-asset class, calculate number of funds and total assets
bottom_quintile_summary <- bottom_quintile %>%
group_by(asset_class, sub_asset_class) %>%
summarise(
num_funds = n(), # Number of funds
total_assets = sum(net_assets_bn_usd) # Total assets
)
## `summarise()` has grouped output by 'asset_class'. You can override using the
## `.groups` argument.
# View the summary
print(bottom_quintile)
## # A tibble: 76 × 22
## standard_or_esg inception_year net_assets_bn_usd ticker name
## <chr> <dbl> <dbl> <chr> <chr>
## 1 Standard 2000 78.2 IJR iShares Core S&P Sma…
## 2 Standard 2012 73.9 IEMG iShares Core MSCI Em…
## 3 Standard 2000 67.7 IWM iShares Russell 2000…
## 4 Standard 2007 18.8 HYG iShares iBoxx $ High…
## 5 Standard 2003 17.6 EEM iShares MSCI Emergin…
## 6 Standard 2007 16.4 EMB iShares J.P. Morgan …
## 7 Standard 2007 13.7 PFF iShares Preferred an…
## 8 Standard 2000 12.6 IWN iShares Russell 2000…
## 9 Standard 2017 12.0 USHY iShares Broad USD Hi…
## 10 Standard 2000 11.1 IWO iShares Russell 2000…
## # ℹ 66 more rows
## # ℹ 17 more variables: incept_date <dttm>, gross_expense_ratio_percent <dbl>,
## # net_expense_ratio_percent <dbl>, net_assets_usd <dbl>,
## # net_assets_as_of <dttm>, asset_class <chr>, sub_asset_class <chr>,
## # region <chr>, market <chr>, location <chr>, investment_style <chr>,
## # msci_esg_fund_rating_aaa_ccc <chr>, msci_esg_quality_score_0_10 <dbl>,
## # msci_weighted_average_carbon_intensity_tons_co2e_m_sales <dbl>, …
print(bottom_quintile_summary)
## # A tibble: 9 × 4
## # Groups: asset_class [3]
## asset_class sub_asset_class num_funds total_assets
## <chr> <chr> <int> <dbl>
## 1 Equity All Cap 23 95.5
## 2 Equity Large Cap 1 1.85
## 3 Equity Large/Mid Cap 10 42.4
## 4 Equity Small Cap 17 187.
## 5 Fixed Income Corporates 1 0.0413
## 6 Fixed Income Credit 1 0.408
## 7 Fixed Income Government 3 17.6
## 8 Fixed Income High Yield 16 38.2
## 9 Real Estate Real Estate Securities 4 7.15
#Problem 4
# Sort the dataset by "msci_weighted_average_carbon_intensity_tons_co2e_m_sales" in descending order,
# select the top 5 most carbon-intense ESG ETFs
top_5_etfs <- blackrock_etf_screener_w_new_features %>%
filter(standard_or_esg == "ESG") %>%
filter(!is.na(msci_weighted_average_carbon_intensity_tons_co2e_m_sales)) %>%
arrange(desc(msci_weighted_average_carbon_intensity_tons_co2e_m_sales)) %>%
head(5)
# Calculate average carbon intensity for non-ESG ETFs
non_esg_average <- blackrock_etf_screener_w_new_features %>%
filter(standard_or_esg != "ESG" & !is.na(msci_weighted_average_carbon_intensity_tons_co2e_m_sales)) %>%
summarise(average_carbon_intensity = mean(msci_weighted_average_carbon_intensity_tons_co2e_m_sales))
# Create a data frame for plotting
plot_data <- data.frame(
ETF = gsub(" ETF$", "", c(top_5_etfs$name, "Average Non-ESG ETF")), # Remove "ETF" suffix
Carbon_Intensity = c(top_5_etfs$msci_weighted_average_carbon_intensity_tons_co2e_m_sales, non_esg_average$average_carbon_intensity),
Type = c(rep("ESG", 5), "Non-ESG")
)
# Create the grouped bar plot
ggplot(plot_data, aes(x = ETF, y = Carbon_Intensity, fill = Type)) +
geom_bar(stat = "identity", position = "dodge", width = 0.7) +
geom_text(aes(label = round(Carbon_Intensity, 2)), vjust = -0.5, position = position_dodge(width = 0.7), size = 3) +
labs(title = "Top 5 Most Carbon-Intense ESG ETFs vs. Average Non-ESG",
x = "",
y = "MSCI Weighted Average Carbon Intensity (tons CO2e/m sales)",
fill = "Type") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, hjust = 1),
axis.title.y = element_text(margin = margin(t = 0, r = 10, b = 0, l = 0))) +
ylim(0, max(plot_data$Carbon_Intensity) * 1.1) # Adjust y-axis limits for better readability
Note that the echo = FALSE parameter was added to the
code chunk to prevent printing of the R code that generated the
plot.