Rows: 424 Columns: 18
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (10): ticker, name, asset_class, sub_asset_class, region, market, locat...
dbl (6): gross_expense_ratio_percent, net_expense_ratio_percent, net_asset...
dttm (2): incept_date, net_assets_as_of
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
blackrock_etf_screener |>group_by(sustainable_classification) |>summarize(# how many funds?n_funds =n(),# how much money?assets_usd =sum(net_assets_usd, na.rm =TRUE) ) |># arrange in descending orderarrange(assets_usd |>desc())
blackrock_etf_screener_w_new_features <- blackrock_etf_screener |>mutate(# if the sustainable_classification column is NA, then the fund is not an ESG fund.standard_or_esg =if_else(condition =is.na(sustainable_classification),true ="Standard",false ="ESG" ),# lubridate::year() extracts the year from a dateinception_year = lubridate::year(incept_date),# Change to a meaningful magnitude for the data. In asset management, billions is a good default. net_assets_bn_usd = net_assets_usd/10^9,# let's put our new variables at the front so we can see them easily..before =everything() )blackrock_etf_screener_w_new_features
# A tibble: 424 × 21
standard_or_esg inception_year net_assets_bn_usd ticker name
<chr> <dbl> <dbl> <chr> <chr>
1 Standard 2000 399. IVV iShares Core S&P 500…
2 Standard 2012 107. IEFA iShares Core MSCI EA…
3 Standard 2003 101. AGG iShares Core U.S. Ag…
4 Standard 2000 82.1 IWF iShares Russell 1000…
5 Standard 2000 78.2 IJR iShares Core S&P Sma…
6 Standard 2000 77.1 IJH iShares Core S&P Mid…
7 Standard 2012 73.9 IEMG iShares Core MSCI Em…
8 Standard 2000 67.7 IWM iShares Russell 2000…
9 Standard 2000 55.4 IWD iShares Russell 1000…
10 Standard 2002 52.2 TLT iShares 20+ Year Tre…
# ℹ 414 more rows
# ℹ 16 more variables: incept_date <dttm>, gross_expense_ratio_percent <dbl>,
# net_expense_ratio_percent <dbl>, net_assets_usd <dbl>,
# net_assets_as_of <dttm>, asset_class <chr>, sub_asset_class <chr>,
# region <chr>, market <chr>, location <chr>, investment_style <chr>,
# msci_esg_fund_rating_aaa_ccc <chr>, msci_esg_quality_score_0_10 <dbl>,
# msci_weighted_average_carbon_intensity_tons_co2e_m_sales <dbl>, …
Homework Problem 1
Create a dplyr pipeline to create a tibble fulfills the following parameters:
only contains ESG funds (no standard funds)
contains the largest fund from each sustainable classification.
shows the percent of assets that the top fund comprises for its category (e.g. the top Thematic fund is x% of the total assets of all thematic funds).
Ranks each sustainable classification by the % of assets in its largest fund, and arranges them in descending order.
largest_funds <- blackrock_etf_screener_w_new_features %>%#Filter ESG fundsfilter(standard_or_esg =="ESG") %>%select(ticker, name, net_assets_bn_usd, sustainable_classification) %>%#Group by sustainable classification and calculate statsgroup_by(sustainable_classification) %>%mutate(fund_rank =rank(net_assets_bn_usd *-1),fund_percentile =percent_rank(net_assets_bn_usd),fund_decile =ntile(net_assets_bn_usd, n =10),pct_of_total = net_assets_bn_usd/sum(net_assets_bn_usd, na.rm =TRUE) *100,pct_of_total_cumulative =cumsum(pct_of_total) ) %>%#Filter top-ranking funds; rank these funds by concentration, format for presentationfilter(fund_rank ==1) %>%ungroup() %>%mutate(fund_concentration_rank =rank(pct_of_total *-1) ) %>%select(fund_concentration_rank,ticker, name, net_assets_bn_usd, sustainable_classification,pct_of_total) %>%arrange(fund_concentration_rank)print(largest_funds)
Create a sorted bar chart of the 10 smallest ETFs.
Hint: for sorting the bar chart, look up fct_reorder() . Make sure your chart has meaningful titles and labels, including numbers of an appropriate magnitude.
smallest_etfs <- blackrock_etf_screener_w_new_features %>%filter(standard_or_esg =="ESG") %>%select(ticker, name, net_assets_bn_usd) %>%arrange(net_assets_bn_usd) %>%head(10)bar_chart_smallest_etfs <-ggplot(smallest_etfs,aes(x=net_assets_bn_usd, y=fct_reorder(name,net_assets_bn_usd)))+geom_bar(stat="identity",fill="navy", width=0.5)+labs(title="10 Smallest ETFs by Net Assets",x="Net Assets (in billion USD)",y="ETF Name")+theme_classic()print(bar_chart_smallest_etfs)
Homework Problem 3
Find the funds in the bottom quintile of the MSCI ESG quality score.
For the funds in the bottom quintile of MSCI ESG quality scores, calculate i) the number of funds, and ii) the total assets, grouped by asset class (e.g. Equity, Fixed Income) and sub-asset class (e.g. Emerging Markets).
`summarise()` has grouped output by 'asset_class'. You can override using the
`.groups` argument.
print(bottom_quantile)
# A tibble: 76 × 22
standard_or_esg inception_year net_assets_bn_usd ticker name
<chr> <dbl> <dbl> <chr> <chr>
1 Standard 2022 0.00459 BPAY BlackRock Future Fin…
2 Standard 2003 17.6 EEM iShares MSCI Emergin…
3 Standard 2014 0.129 HEEM iShares Currency Hed…
4 Standard 2014 3.48 REET iShares Global REIT …
5 Standard 2022 0.00400 IBRN iShares Neuroscience…
6 Standard 2012 0.128 GHYG iShares US & Intl Hi…
7 Standard 2000 8.10 IJS iShares S&P Small-Ca…
8 Standard 2023 0.0130 IBHJ iShares® iBonds® 203…
9 Standard 2015 0.554 EMGF iShares Emerging Mar…
10 Standard 2022 0.0162 IBHI iShares® iBonds® 202…
# ℹ 66 more rows
# ℹ 17 more variables: incept_date <dttm>, gross_expense_ratio_percent <dbl>,
# net_expense_ratio_percent <dbl>, net_assets_usd <dbl>,
# net_assets_as_of <dttm>, asset_class <chr>, sub_asset_class <chr>,
# region <chr>, market <chr>, location <chr>, investment_style <chr>,
# msci_esg_fund_rating_aaa_ccc <chr>, msci_esg_quality_score_0_10 <dbl>,
# msci_weighted_average_carbon_intensity_tons_co2e_m_sales <dbl>, …
print(bottom_quantile_group)
# A tibble: 5 × 4
# Groups: asset_class [3]
asset_class market number_of_funds total_assets
<chr> <chr> <int> <dbl>
1 Equity Developed 22 203.
2 Equity Emerging 29 124.
3 Fixed Income Developed 16 38.6
4 Fixed Income Emerging 5 17.7
5 Real Estate Developed 4 7.15
Homework Problem 4
We only began exploring this data. Use your newfound dplyr skills to uncover actionable insights from the dataset.
Show your work.
Present 5 repeatable factoids or data visualizations. As always, make sure your data visualizations have meaningful titles and labels. We will share these in class.
# A tibble: 4 × 3
asset_class net_assets_bn_usd percentage
<chr> <dbl> <dbl>
1 Equity 48.8 87.9
2 Fixed Income 6.68 12
3 Multi Asset 0.0574 0.1
4 Real Estate 0.00939 0
ESG Fund by Asset Class (Pie Chart)
esg_by_asset_class <- blackrock_etf_screener_w_new_features %>%filter(standard_or_esg =="ESG") %>%group_by(asset_class) %>%summarize(net_assets_bn_usd =sum(net_assets_bn_usd, na.rm =TRUE)) %>%mutate(percentage =round((net_assets_bn_usd /sum(net_assets_bn_usd)) *100, 1)) %>%arrange(desc(net_assets_bn_usd))pie_chart <-ggplot(esg_by_asset_class, aes(x="", y=net_assets_bn_usd, fill=asset_class)) +geom_col() +coord_polar("y", start=0) +labs(title="ESG Fund by Asset Class",subtitle="The majority of the ESG funds are in Equity",fill="Asset Class") +geom_text(aes(label =paste0(percentage, "%")), position =position_stack(vjust =0.5)) +theme_minimal()pie_chart
Net Assets by Region (Bar Chart)
net_assets_by_region <- blackrock_etf_screener_w_new_features %>%group_by(region) %>%summarize(net_assets_usd =sum(net_assets_usd, na.rm =TRUE)/1000000000) %>%arrange(desc(net_assets_usd)) # Arrange by net assetsggplot(net_assets_by_region, aes(x =reorder(region, -net_assets_usd), y = net_assets_usd)) +geom_bar(stat ="identity", fill ="pink", color ="black") +labs(x ="Region", y ="Net Assets (billion USD)", title ="Net Assets by Region",subtitle ="North America ") +theme_minimal() +theme(axis.text.x =element_text(angle =45, hjust =1))
Carbon Intensity vs Net Assets (Scatter Plot)
library(dplyr)# Filter out rows with missing values in MSCI weighted average carbon intensity or net assetsfiltered_funds <- blackrock_etf_screener_w_new_features %>%filter(!is.na(msci_weighted_average_carbon_intensity_tons_co2e_m_sales) &!is.na(net_assets_bn_usd)) %>%filter(msci_weighted_average_carbon_intensity_tons_co2e_m_sales <=1000,net_assets_bn_usd <=200)# Create a new column to categorize ESG and non-ESG fundsfiltered_funds <- filtered_funds %>%mutate(standard_or_esg =ifelse(standard_or_esg =="ESG", "ESG", "Standard"))# Create a scatter plotggplot(filtered_funds, aes(x = (net_assets_bn_usd), y =(msci_weighted_average_carbon_intensity_tons_co2e_m_sales), color = standard_or_esg)) +geom_point() +geom_smooth(method="lm", color="red")+scale_color_manual(values =c("ESG"="darkgreen", "Standard"="orange")) +labs(title ="Correlation between Carbon Intensity and Net Assets",subtitle ="Negative correlation: Funds with higher carbon intensity tend to have lower net assets",x ="Net Assets (USD)", y ="MSCI Weighted Average Carbon Intensity (tCO2e/$M sales)", color ="Fund Type") +theme_minimal()
`geom_smooth()` using formula = 'y ~ x'
The regression line in this scatter plot shows a negative slope, which means there is a negative correlation between Net Assets and MSCI Weighted Average Carbon Intensity. As the value of the Net Assets increases, the carbon intensity tend to decreases. The outcome could relate to investor’s preference to choose lower carbon intensity assets.