#load the tidyverse
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.3 ✔ readr 2.1.4
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.4 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.0
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
library(dplyr)
# this will stop R from presenting data in scientific notation, which can be annoying.
options(scipen = 999)
blackrock_etf_screener <- read_csv("https://raw.githubusercontent.com/t-emery/sais-susfin_data/main/datasets/ishares_etf_screener_as_of_2023-12-27.csv")
## Rows: 424 Columns: 18
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (10): ticker, name, asset_class, sub_asset_class, region, market, locat...
## dbl (6): gross_expense_ratio_percent, net_expense_ratio_percent, net_asset...
## dttm (2): incept_date, net_assets_as_of
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
blackrock_etf_screener
## # A tibble: 424 × 18
## ticker name incept_date gross_expense_ratio_…¹
## <chr> <chr> <dttm> <dbl>
## 1 IVV iShares Core S&P 500 ETF 2000-05-15 00:00:00 0.03
## 2 IEFA iShares Core MSCI EAFE ETF 2012-10-18 00:00:00 0.07
## 3 AGG iShares Core U.S. Aggregat… 2003-09-22 00:00:00 0.03
## 4 IWF iShares Russell 1000 Growt… 2000-05-22 00:00:00 0.19
## 5 IJR iShares Core S&P Small-Cap… 2000-05-22 00:00:00 0.06
## 6 IJH iShares Core S&P Mid-Cap E… 2000-05-22 00:00:00 0.05
## 7 IEMG iShares Core MSCI Emerging… 2012-10-18 00:00:00 0.09
## 8 IWM iShares Russell 2000 ETF 2000-05-22 00:00:00 0.19
## 9 IWD iShares Russell 1000 Value… 2000-05-22 00:00:00 0.19
## 10 TLT iShares 20+ Year Treasury … 2002-07-22 00:00:00 0.15
## # ℹ 414 more rows
## # ℹ abbreviated name: ¹​gross_expense_ratio_percent
## # ℹ 14 more variables: net_expense_ratio_percent <dbl>, net_assets_usd <dbl>,
## # net_assets_as_of <dttm>, asset_class <chr>, sub_asset_class <chr>,
## # region <chr>, market <chr>, location <chr>, investment_style <chr>,
## # msci_esg_fund_rating_aaa_ccc <chr>, msci_esg_quality_score_0_10 <dbl>,
## # msci_weighted_average_carbon_intensity_tons_co2e_m_sales <dbl>, …
blackrock_etf_screener |>
glimpse()
## Rows: 424
## Columns: 18
## $ ticker <chr> "IVV", "IEFA"…
## $ name <chr> "iShares Core…
## $ incept_date <dttm> 2000-05-15, …
## $ gross_expense_ratio_percent <dbl> 0.03, 0.07, 0…
## $ net_expense_ratio_percent <dbl> 0.03, 0.07, 0…
## $ net_assets_usd <dbl> 399312542067,…
## $ net_assets_as_of <dttm> 2023-12-27, …
## $ asset_class <chr> "Equity", "Eq…
## $ sub_asset_class <chr> "Large Cap", …
## $ region <chr> "North Americ…
## $ market <chr> "Developed", …
## $ location <chr> "United State…
## $ investment_style <chr> "Index", "Ind…
## $ msci_esg_fund_rating_aaa_ccc <chr> "A", "AA", "A…
## $ msci_esg_quality_score_0_10 <dbl> 6.6845, 7.519…
## $ msci_weighted_average_carbon_intensity_tons_co2e_m_sales <dbl> 106.80, 102.8…
## $ msci_esg_percent_coverage <dbl> 99.05, 99.80,…
## $ sustainable_classification <chr> NA, NA, NA, N…
#only contains ESG funds (no standard funds)
#contains the largest fund from each sustainable classification.
#shows the percent of assets that the top fund comprises for its category (e.g. the top Thematic fund is x% of the total assets of all thematic funds).
#Ranks each sustainable classification by the % of assets in its largest fund, and arranges them in descending order.
blackrock_etf_screener_w_new_features <- blackrock_etf_screener |>
mutate(
# if the sustainable_classification column is NA, then the fund is not an ESG fund.
standard_or_esg = if_else(
condition = is.na(sustainable_classification),
true = "Standard",
false = "ESG"
)
)
blackrock_etf_screener_w_new_features
## # A tibble: 424 × 19
## ticker name incept_date gross_expense_ratio_…¹
## <chr> <chr> <dttm> <dbl>
## 1 IVV iShares Core S&P 500 ETF 2000-05-15 00:00:00 0.03
## 2 IEFA iShares Core MSCI EAFE ETF 2012-10-18 00:00:00 0.07
## 3 AGG iShares Core U.S. Aggregat… 2003-09-22 00:00:00 0.03
## 4 IWF iShares Russell 1000 Growt… 2000-05-22 00:00:00 0.19
## 5 IJR iShares Core S&P Small-Cap… 2000-05-22 00:00:00 0.06
## 6 IJH iShares Core S&P Mid-Cap E… 2000-05-22 00:00:00 0.05
## 7 IEMG iShares Core MSCI Emerging… 2012-10-18 00:00:00 0.09
## 8 IWM iShares Russell 2000 ETF 2000-05-22 00:00:00 0.19
## 9 IWD iShares Russell 1000 Value… 2000-05-22 00:00:00 0.19
## 10 TLT iShares 20+ Year Treasury … 2002-07-22 00:00:00 0.15
## # ℹ 414 more rows
## # ℹ abbreviated name: ¹​gross_expense_ratio_percent
## # ℹ 15 more variables: net_expense_ratio_percent <dbl>, net_assets_usd <dbl>,
## # net_assets_as_of <dttm>, asset_class <chr>, sub_asset_class <chr>,
## # region <chr>, market <chr>, location <chr>, investment_style <chr>,
## # msci_esg_fund_rating_aaa_ccc <chr>, msci_esg_quality_score_0_10 <dbl>,
## # msci_weighted_average_carbon_intensity_tons_co2e_m_sales <dbl>, …
esg_fund_summary <- blackrock_etf_screener_w_new_features |>
dplyr::filter(standard_or_esg == "ESG") |>
group_by(sustainable_classification) |>
summarize(
largest_fund_assets = max(net_assets_usd),
total_assets = sum(net_assets_usd)
) |>
mutate(
percent_of_category_assets = (largest_fund_assets / total_assets) * 100
) |>
ungroup() |>
arrange(desc(percent_of_category_assets)) |>
mutate(
rank = rank(-percent_of_category_assets)
)
esg_fund_summary
## # A tibble: 4 × 5
## sustainable_classifi…¹ largest_fund_assets total_assets percent_of_category_…²
## <chr> <dbl> <dbl> <dbl>
## 1 Impact 335216065. 335216065. 100
## 2 Thematic 3057666513. 3427675675. 89.2
## 3 Screened 182340754. 383389770. 47.6
## 4 Uplift 13385805595. 51443665637. 26.0
## # ℹ abbreviated names: ¹​sustainable_classification, ²​percent_of_category_assets
## # ℹ 1 more variable: rank <dbl>
#Create a sorted bar chart of the 10 smallest ETFs. # Hint: for sorting the bar chart, look up fct_reorder() . Make sure your chart has meaningful titles and labels, including numbers of an appropriate magnitude.
library(ggplot2)
library(dplyr)
library(forcats)
smallest_etfs <- blackrock_etf_screener_w_new_features |>
arrange(net_assets_usd) |>
head(10) |>
mutate(fund_name_reordered = fct_reorder(name, net_assets_usd))
# Creating the sorted bar chart
ggplot(smallest_etfs, aes(x = fund_name_reordered, y = net_assets_usd)) +
geom_bar(stat = "identity", fill = "skyblue") +
coord_flip() +
labs(title = "10 Smallest ETFs by Net Assets",
x = "ETF Name",
y = "Net Assets (USD)") +
theme_minimal()
bottom_quantile <- blackrock_etf_screener_w_new_features |>
dplyr::filter(!is.na(msci_esg_quality_score_0_10)) |>
mutate(quantile_group = ntile(msci_esg_quality_score_0_10,5)) |>
dplyr::filter(quantile_group==1) |>
arrange(desc(msci_esg_quality_score_0_10))
bottom_quantile_group <- bottom_quantile |>
group_by(asset_class, market)|>
dplyr::summarise(
number_of_funds=n(),
total_assets = sum(net_assets_usd)
)
## `summarise()` has grouped output by 'asset_class'. You can override using the
## `.groups` argument.
print(bottom_quantile)
## # A tibble: 76 × 20
## ticker name incept_date gross_expense_ratio_…¹
## <chr> <chr> <dttm> <dbl>
## 1 BPAY BlackRock Future Financial… 2022-08-16 00:00:00 0.71
## 2 EEM iShares MSCI Emerging Mark… 2003-04-07 00:00:00 0.69
## 3 HEEM iShares Currency Hedged MS… 2014-09-23 00:00:00 1.47
## 4 REET iShares Global REIT ETF 2014-07-08 00:00:00 0.14
## 5 IBRN iShares Neuroscience and H… 2022-08-24 00:00:00 0.47
## 6 GHYG iShares US & Intl High Yie… 2012-04-03 00:00:00 0.4
## 7 IJS iShares S&P Small-Cap 600 … 2000-07-24 00:00:00 0.18
## 8 IBHJ iShares® iBonds® 2030 Term… 2023-06-21 00:00:00 0.35
## 9 EMGF iShares Emerging Markets E… 2015-12-08 00:00:00 0.25
## 10 IBHI iShares® iBonds® 2029 Term… 2022-03-08 00:00:00 0.35
## # ℹ 66 more rows
## # ℹ abbreviated name: ¹​gross_expense_ratio_percent
## # ℹ 16 more variables: net_expense_ratio_percent <dbl>, net_assets_usd <dbl>,
## # net_assets_as_of <dttm>, asset_class <chr>, sub_asset_class <chr>,
## # region <chr>, market <chr>, location <chr>, investment_style <chr>,
## # msci_esg_fund_rating_aaa_ccc <chr>, msci_esg_quality_score_0_10 <dbl>,
## # msci_weighted_average_carbon_intensity_tons_co2e_m_sales <dbl>, …
#We only began exploring this data. Use your newfound dplyr skills to uncover actionable insights from the dataset.
#Show your work.
#Present 5 repeatable factoids or data visualizations. As always, make sure your data visualizations have meaningful titles and labels. We will share these in class.
#1: Distribution of Net Assets Across Asset Classes
#Objective: Understand how net assets are distributed across different asset classes.
library(dplyr)
library(ggplot2)
asset_class_distribution <- blackrock_etf_screener_w_new_features |>
group_by(asset_class) |>
summarise(total_assets = sum(net_assets_usd, na.rm = TRUE)) |>
arrange(desc(total_assets))
ggplot(asset_class_distribution, aes(x = reorder(asset_class, total_assets), y = total_assets)) +
geom_bar(stat = "identity", fill = "orange") +
coord_flip() +
labs(title = "Distribution of Net Assets Across Asset Classes",
x = "Asset Class",
y = "Total Net Assets (USD)") +
theme_minimal()
#2:ESG Score Distribution (Density Plot)
#Objective: Visualize the distribution of ESG scores across all funds to highlight areas of concentration.
ggplot(blackrock_etf_screener_w_new_features, aes(x = msci_esg_quality_score_0_10)) +
geom_density(fill = "skyblue", alpha = 0.7) +
labs(title = "Distribution of MSCI ESG Quality Scores",
x = "ESG Score",
y = "Density") +
theme_minimal()
## Warning: Removed 44 rows containing non-finite values (`stat_density()`).
#3:Regional Distribution of ESG Fund Assets
#Objective: Visualize the distribution of ESG fund assets across different geographic regions to identify where ESG investing is most concentrated.
library(ggplot2)
library(dplyr)
# Prepare the data
regional_distribution_esg <- blackrock_etf_screener_w_new_features |>
filter(standard_or_esg == "ESG") |>
group_by(region) |>
summarise(total_assets = sum(net_assets_usd, na.rm = TRUE)) |>
mutate(proportion = total_assets / sum(total_assets)) |>
arrange(desc(total_assets))
# Convert to factor for ordered plotting
regional_distribution_esg$region <- factor(regional_distribution_esg$region, levels = regional_distribution_esg$region)
# Plot
ggplot(regional_distribution_esg, aes(x = region, y = proportion, fill = region)) +
geom_col() +
coord_flip() + # Horizontal layout
labs(title = "Regional Distribution of ESG Fund Assets", x = "Geographic Region", y = "Proportion of Total Assets") +
scale_fill_brewer(palette = "Pastel1") +
theme_minimal() +
geom_text(aes(label = scales::percent(proportion)), position = position_dodge(width = 0.9), hjust = -0.2, size = 2)
#4:Top 10 Markets by Number of ESG Funds (Bar Chart)
#Objective: Identify the top 10 markets with the highest number of ESG funds.
top_markets_esg <- blackrock_etf_screener_w_new_features |>
dplyr:: filter(standard_or_esg == "ESG") |>
count(market, sort = TRUE) |>
top_n(10, wt = n)
# Bar Chart
ggplot(top_markets_esg, aes(x = reorder(market, n), y = n, fill = market)) +
geom_col() +
coord_flip() +
labs(title = "Top 10 Markets by Number of ESG Funds",
x = "Market",
y = "Number of Funds") +
scale_fill_viridis_d()
# 5:Fund Size vs. ESG Score Correlation
#Objective: Investigate if there's a correlation between the size of the funds (net assets) and their ESG scores.
ggplot(blackrock_etf_screener_w_new_features, aes(x = net_assets_usd, y = msci_esg_quality_score_0_10)) +
geom_point(alpha = 0.5) +
geom_smooth(method = "lm", color = "blue", se = FALSE) +
scale_x_log10(labels = scales::comma) +
labs(title = "Correlation between Fund Size and ESG Score",
x = "Net Assets (USD, log scale)",
y = "MSCI ESG Quality Score") +
theme_minimal()
## `geom_smooth()` using formula = 'y ~ x'
## Warning: Removed 44 rows containing non-finite values (`stat_smooth()`).
## Warning: Removed 44 rows containing missing values (`geom_point()`).