# assign the url to `github_raw_csv_url`github_raw_csv_url <-"https://raw.githubusercontent.com/t-emery/sais-susfin_data/main/datasets/blackrock_etf_screener_2022-08-30.csv"# read in the data, and assign it to the object `blackrock_etf_data`blackrock_etf_data <-read_csv(github_raw_csv_url)
Rows: 393 Columns: 22
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (14): ticker, name, incept_date, net_assets_as_of, asset_class, sub_asse...
dbl (8): gross_expense_ratio_percent, net_expense_ratio_percent, net_assets...
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# print out the tibble to take a look at the datablackrock_etf_data
blackrock_etf_data <- blackrock_etf_data |># we are transforming both date columns (currently character strings) into date objects# so we can work with them.# this syntax is a bit confusing, but selects all columns containing `date` and applies# lubridate::mdy() function to them to turn them into date objects. mutate(across(contains("date"), lubridate::mdy)) |># Billions is a more useful magnitude than millions, so we'll create a column with # the assets in billions by dividing by `net_assets_millions` by 1,000 (10^3)# If we wanted trillions, we could divide by 1,000,000 (10^6)mutate(net_assets_usd_bn = net_assets_usd_mn/10^3) |># this column doesn't add anything to our analysis - it says that the data is from 8/30/22select(-net_assets_as_of)mini_blackrock_data <- blackrock_etf_data |># group by whether the fund is an ESG fund or notgroup_by(is_esg) |># take the top 5 from each group, by net assetsslice_max(order_by = net_assets_usd_bn, n =5) |># select the following columns select(ticker, fund_name = name_wo_ishares_etf, asset_class, sub_asset_class, region, incept_date, net_assets_usd_bn, msci_weighted_average_carbon_intensity_tons_co2e_m_sales) |># rename to `co2_intensity` because the full name is a mouthful, if descriptive.rename(co2_intensity = msci_weighted_average_carbon_intensity_tons_co2e_m_sales) |># always good to ungroup() if you've used a group_by(). We'll discuss later.ungroup()
Adding missing grouping variables: `is_esg`
mini_blackrock_data
# A tibble: 10 × 9
is_esg ticker fund_…¹ asset…² sub_a…³ region incept_d…⁴ net_a…⁵ co2_i…⁶
<chr> <chr> <chr> <chr> <chr> <chr> <date> <dbl> <dbl>
1 ESG Fund ESGU ESG Aw… Equity Large/… North… 2016-12-01 22.4 104.
2 ESG Fund ESGD ESG Aw… Equity Large/… Global 2016-06-28 6.43 104.
3 ESG Fund ICLN Global… Equity All Cap Global 2008-06-24 5.63 266.
4 ESG Fund ESGE ESG Aw… Equity Large/… Global 2016-06-28 4.23 168.
5 ESG Fund DSI MSCI K… Equity Large/… North… 2006-11-14 3.69 72.7
6 Regular Fund IVV Core S… Equity Large … North… 2000-05-15 298. 148.
7 Regular Fund IEFA Core M… Equity All Cap Global 2012-10-18 84.2 127.
8 Regular Fund AGG Core U… Fixed … Multi … North… 2003-09-22 82.3 283.
9 Regular Fund IJR Core S… Equity Small … North… 2000-05-22 66.5 133.
10 Regular Fund IEMG Core M… Equity All Cap Global 2012-10-18 64.9 369.
# … with abbreviated variable names ¹fund_name, ²asset_class, ³sub_asset_class,
# ⁴incept_date, ⁵net_assets_usd_bn, ⁶co2_intensity
blackrock_etf_data |># the column you want to examineselect(asset_class) |># selects the unique valuesdistinct()
# A tibble: 5 × 1
asset_class
<chr>
1 Equity
2 Fixed Income
3 Commodity
4 Real Estate
5 Multi Asset
blackrock_etf_data |># select all character columns and find the number of distinct values in eachsummarize(across(where(is.character), n_distinct)) |># rename this column because the original name is unweildy and makes all the rest of the formatting awkwardrename(msci_esg_rating = sustainability_characteristics_msci_esg_fund_ratings_msci_esg_fund_rating_aaa_ccc) |># pivot into long format to make it easier to work with.pivot_longer(cols =everything()) |># arrange from highest to lowest (descending order)arrange(value |>desc())
# A tibble: 12 × 2
name value
<chr> <int>
1 ticker 393
2 name 393
3 name_wo_ishares_etf 393
4 location 41
5 sub_asset_class 18
6 region 7
7 msci_esg_rating 7
8 asset_class 5
9 sustainable_classification 5
10 market 3
11 investment_style 2
12 is_esg 2
blackrock_etf_data |># select columns that are character data types. Take out the columns discussed above.select(where(is.character), -ticker, -name, -name_wo_ishares_etf) |># for each column, find the unique values (we'll cover map more later in the course)map(unique)
blackrock_etf_data |>ggplot(aes(x = net_assets_usd_bn, fill = is_esg)) +geom_histogram()
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
blackrock_etf_data |>ggplot(aes(x = net_assets_usd_bn, fill = is_esg)) +# alpha (0 to 1) makes the fill color translucent so you can see both# position = "identity" makes it so the two overlap, unlike the default of "stack"geom_histogram(binwidth =5, alpha = .6, position ="identity")
blackrock_etf_data |>ggplot(aes(x = net_assets_usd_bn)) +geom_histogram(binwidth =3) +# stick a ~ in front of the variable name you want to facet by. #ncol = 1 makes it 1 column with the two charts stacked on top of each otherfacet_wrap(~is_esg, ncol =1)
Graph 1 : density plot for CO 2 helps us understand the following :
The median (50th percentile) firm has a carbon intensity of 200 tonnes of C02
The 25th percentile firm has a carbon intensity of 150 tonnes of C02
The 75th percentile firm has a carbon intensity of 300 tonnes of C02
However, they are quite few outliers with higher co2 intensity
Graph 2: Compares the net assets across different markets. As seen below, it is clear the most of the ESG assets are concentrated in the developed markets and is negligible in the emerging markets.