W-3

library(tidyverse)

── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
✔ ggplot2 3.4.0     ✔ purrr   1.0.1
✔ tibble  3.1.8     ✔ dplyr   1.1.0
✔ tidyr   1.3.0     ✔ stringr 1.5.0
✔ readr   2.1.3     ✔ forcats 1.0.0
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()

# assign the url to `github_raw_csv_url`
github_raw_csv_url <- "https://raw.githubusercontent.com/t-emery/sais-susfin_data/main/datasets/blackrock_etf_screener_2022-08-30.csv"

# read in the data, and assign it to the object `blackrock_etf_data`
blackrock_etf_data <- read_csv(github_raw_csv_url)

Rows: 393 Columns: 22
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (14): ticker, name, incept_date, net_assets_as_of, asset_class, sub_asse...
dbl  (8): gross_expense_ratio_percent, net_expense_ratio_percent, net_assets...

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

# print out the tibble to take a look at the data
blackrock_etf_data

# A tibble: 393 × 22
   ticker name    incep…¹ gross…² net_e…³ net_a…⁴ net_a…⁵ asset…⁶ sub_a…⁷ region
   <chr>  <chr>   <chr>     <dbl>   <dbl>   <dbl> <chr>   <chr>   <chr>   <chr> 
 1 IVV    iShare… 5/15/00    0.03    0.03 297663. 8/30/22 Equity  Large … North…
 2 IEFA   iShare… 10/18/…    0.07    0.07  84222. 8/30/22 Equity  All Cap Global
 3 AGG    iShare… 9/22/03    0.04    0.03  82344. 8/30/22 Fixed … Multi … North…
 4 IJR    iShare… 5/22/00    0.06    0.06  66533. 8/30/22 Equity  Small … North…
 5 IEMG   iShare… 10/18/…    0.09    0.09  64920. 8/30/22 Equity  All Cap Global
 6 IWF    iShare… 5/22/00    0.18    0.18  61831. 8/30/22 Equity  Large/… North…
 7 IJH    iShare… 5/22/00    0.05    0.05  61424. 8/30/22 Equity  Mid Cap North…
 8 IWM    iShare… 5/22/00    0.19    0.19  53048. 8/30/22 Equity  Small … North…
 9 IWD    iShare… 5/22/00    0.18    0.18  51913. 8/30/22 Equity  Large/… North…
10 EFA    iShare… 8/14/01    0.32    0.32  44144. 8/30/22 Equity  Large/… Global
# … with 383 more rows, 12 more variables: market <chr>, location <chr>,
#   investment_style <chr>,
#   sustainability_characteristics_msci_esg_fund_ratings_msci_esg_fund_rating_aaa_ccc <chr>,
#   msci_esg_quality_score_0_10 <dbl>,
#   msci_weighted_average_carbon_intensity_tons_co2e_m_sales <dbl>,
#   msci_esg_percent_coverage <dbl>, sustainable_classification <chr>,
#   name_wo_ishares_etf <chr>, is_esg <chr>, years_from_inception <dbl>, …

blackrock_etf_data <- blackrock_etf_data |> 
  # we are transforming both date columns (currently character strings) into date objects
  # so we can work with them.
  # this syntax is a bit confusing, but selects all columns containing `date` and applies
  # lubridate::mdy() function to them to turn them into date objects. 
  mutate(across(contains("date"), lubridate::mdy)) |>
  # Billions is a more useful magnitude than millions, so we'll create a column with 
  # the assets in billions by dividing by `net_assets_millions` by 1,000 (10^3)
  # If we wanted trillions, we could divide by 1,000,000 (10^6)
  mutate(net_assets_usd_bn = net_assets_usd_mn/10^3) |> 
  # this column doesn't add anything to our analysis - it says that the data is from 8/30/22
  select(-net_assets_as_of)
mini_blackrock_data <- blackrock_etf_data |> 
  # group by whether the fund is an ESG fund or not
  group_by(is_esg) |> 
  # take the top 5 from each group, by net assets
  slice_max(order_by = net_assets_usd_bn, n = 5) |> 
  # select the following columns 
  select(ticker, fund_name = name_wo_ishares_etf, asset_class, sub_asset_class, region, incept_date, net_assets_usd_bn,
         msci_weighted_average_carbon_intensity_tons_co2e_m_sales) |> 
  # rename to `co2_intensity` because the full name is a mouthful, if descriptive.
  rename(co2_intensity = msci_weighted_average_carbon_intensity_tons_co2e_m_sales) |> 
  # always good to ungroup() if you've used a group_by().  We'll discuss later.
  ungroup()

Adding missing grouping variables: `is_esg`

mini_blackrock_data

# A tibble: 10 × 9
   is_esg       ticker fund_…¹ asset…² sub_a…³ region incept_d…⁴ net_a…⁵ co2_i…⁶
   <chr>        <chr>  <chr>   <chr>   <chr>   <chr>  <date>       <dbl>   <dbl>
 1 ESG Fund     ESGU   ESG Aw… Equity  Large/… North… 2016-12-01   22.4    104. 
 2 ESG Fund     ESGD   ESG Aw… Equity  Large/… Global 2016-06-28    6.43   104. 
 3 ESG Fund     ICLN   Global… Equity  All Cap Global 2008-06-24    5.63   266. 
 4 ESG Fund     ESGE   ESG Aw… Equity  Large/… Global 2016-06-28    4.23   168. 
 5 ESG Fund     DSI    MSCI K… Equity  Large/… North… 2006-11-14    3.69    72.7
 6 Regular Fund IVV    Core S… Equity  Large … North… 2000-05-15  298.     148. 
 7 Regular Fund IEFA   Core M… Equity  All Cap Global 2012-10-18   84.2    127. 
 8 Regular Fund AGG    Core U… Fixed … Multi … North… 2003-09-22   82.3    283. 
 9 Regular Fund IJR    Core S… Equity  Small … North… 2000-05-22   66.5    133. 
10 Regular Fund IEMG   Core M… Equity  All Cap Global 2012-10-18   64.9    369. 
# … with abbreviated variable names ¹fund_name, ²asset_class, ³sub_asset_class,
#   ⁴incept_date, ⁵net_assets_usd_bn, ⁶co2_intensity

blackrock_etf_data |> 
  glimpse()

Rows: 393
Columns: 22
$ ticker                                                                            <chr> …
$ name                                                                              <chr> …
$ incept_date                                                                       <date> …
$ gross_expense_ratio_percent                                                       <dbl> …
$ net_expense_ratio_percent                                                         <dbl> …
$ net_assets_usd_mn                                                                 <dbl> …
$ asset_class                                                                       <chr> …
$ sub_asset_class                                                                   <chr> …
$ region                                                                            <chr> …
$ market                                                                            <chr> …
$ location                                                                          <chr> …
$ investment_style                                                                  <chr> …
$ sustainability_characteristics_msci_esg_fund_ratings_msci_esg_fund_rating_aaa_ccc <chr> …
$ msci_esg_quality_score_0_10                                                       <dbl> …
$ msci_weighted_average_carbon_intensity_tons_co2e_m_sales                          <dbl> …
$ msci_esg_percent_coverage                                                         <dbl> …
$ sustainable_classification                                                        <chr> …
$ name_wo_ishares_etf                                                               <chr> …
$ is_esg                                                                            <chr> …
$ years_from_inception                                                              <dbl> …
$ year_launched                                                                     <dbl> …
$ net_assets_usd_bn                                                                 <dbl> …

mini_blackrock_data |> 
  glimpse()

Rows: 10
Columns: 9
$ is_esg            <chr> "ESG Fund", "ESG Fund", "ESG Fund", "ESG Fund", "ESG…
$ ticker            <chr> "ESGU", "ESGD", "ICLN", "ESGE", "DSI", "IVV", "IEFA"…
$ fund_name         <chr> "ESG Aware MSCI USA", "ESG Aware MSCI EAFE", "Global…
$ asset_class       <chr> "Equity", "Equity", "Equity", "Equity", "Equity", "E…
$ sub_asset_class   <chr> "Large/Mid Cap", "Large/Mid Cap", "All Cap", "Large/…
$ region            <chr> "North America", "Global", "Global", "Global", "Nort…
$ incept_date       <date> 2016-12-01, 2016-06-28, 2008-06-24, 2016-06-28, 200…
$ net_assets_usd_bn <dbl> 22.376688, 6.425673, 5.628011, 4.233544, 3.688930, 2…
$ co2_intensity     <dbl> 103.60, 103.83, 265.82, 167.71, 72.73, 148.34, 126.6…

blackrock_etf_data |>
  # the column you want to examine
  select(asset_class) |> 
  # selects the unique values
  distinct()

# A tibble: 5 × 1
  asset_class 
  <chr>       
1 Equity      
2 Fixed Income
3 Commodity   
4 Real Estate 
5 Multi Asset

blackrock_etf_data |> 
  # select all character columns and find the number of distinct values in each
  summarize(across(where(is.character), n_distinct)) |> 
  # rename this column because the original name is unweildy and makes all the rest of the formatting awkward
  rename(msci_esg_rating = sustainability_characteristics_msci_esg_fund_ratings_msci_esg_fund_rating_aaa_ccc) |> 
  # pivot into long format to make it easier to work with.
  pivot_longer(cols = everything()) |> 
  # arrange from highest to lowest (descending order)
  arrange(value |> desc())

# A tibble: 12 × 2
   name                       value
   <chr>                      <int>
 1 ticker                       393
 2 name                         393
 3 name_wo_ishares_etf          393
 4 location                      41
 5 sub_asset_class               18
 6 region                         7
 7 msci_esg_rating                7
 8 asset_class                    5
 9 sustainable_classification     5
10 market                         3
11 investment_style               2
12 is_esg                         2

blackrock_etf_data |> 
  # select columns that are character data types. Take out the columns discussed above.
  select(where(is.character), -ticker, -name, -name_wo_ishares_etf) |> 
  # for each column, find the unique values (we'll cover map more later in the course)
  map(unique)

$asset_class
[1] "Equity"       "Fixed Income" "Commodity"    "Real Estate"  "Multi Asset" 

$sub_asset_class
 [1] "Large Cap"              "All Cap"                "Multi Sectors"         
 [4] "Small Cap"              "Large/Mid Cap"          "Mid Cap"               
 [7] "Credit"                 "Inflation"              "Municipals"            
[10] "Precious Metals"        "Government"             "Mortgages"             
[13] "High Yield"             "Real Estate Securities" "Multi Commodity"       
[16] "Static Allocation"      "Mid/Small Cap"          "Multi Strategy"        

$region
[1] "North America"          "Global"                 "Asia Pacific"          
[4] "Latin America"          "Europe"                 "Middle East and Africa"
[7] "Kuwait"                

$market
[1] "Developed" "Emerging"  "Frontier" 

$location
 [1] "United States"        "Broad"                "Japan"               
 [4] "China"                "Brazil"               "India"               
 [7] "Taiwan"               "Canada"               "United Kingdom"      
[10] "South Korea"          "Australia"            "Switzerland"         
[13] "Germany"              "Saudi Arabia"         "Mexico"              
[16] "France"               "Hong Kong"            "Singapore"           
[19] "Chile"                "Indonesia"            "Spain"               
[22] "Sweden"               "Thailand"             "Netherlands"         
[25] "South Africa"         "Turkey"               "Malaysia"            
[28] "Italy"                "Denmark"              "Israel"              
[31] "Poland"               "New Zealand"          "Philippines"         
[34] "Qatar"                "Austria"              "United Arab Emirates"
[37] "Norway"               "Kuwait"               "Finland"             
[40] "Belgium"              "Russia"              

$investment_style
[1] "Index"  "Active"

$sustainability_characteristics_msci_esg_fund_ratings_msci_esg_fund_rating_aaa_ccc
[1] "AA"  "AAA" "A"   "BBB" NA    "BB"  "B"  

$sustainable_classification
[1] NA             "Broad ESG"    "Thematic ESG" "Impact"       "Screened"    

$is_esg
[1] "Regular Fund" "ESG Fund"

blackrock_etf_data |> 
  ggplot(aes(x = net_assets_usd_bn)) + 
  geom_histogram()

`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

blackrock_etf_data |> 
  ggplot(aes(x = net_assets_usd_bn)) + 
  geom_histogram(binwidth = 3)

blackrock_etf_data |> 
  ggplot(aes(x = net_assets_usd_bn)) + 
  geom_density()

blackrock_etf_data |> 
  ggplot(aes(y = net_assets_usd_bn)) +
  geom_boxplot()

mtcars |> 
  ggplot(aes(y = mpg)) + 
  geom_boxplot()

blackrock_etf_data |> 
  ggplot(aes(x = net_assets_usd_bn, fill = is_esg)) + 
  geom_histogram()

`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

blackrock_etf_data |> 
  ggplot(aes(x = net_assets_usd_bn, fill = is_esg)) + 
  # alpha (0 to 1) makes the fill color translucent so you can see both
  # position = "identity" makes it so the two overlap, unlike the default of "stack"
  geom_histogram(binwidth = 5, alpha = .6, position = "identity")

blackrock_etf_data |> 
  ggplot(aes(x = net_assets_usd_bn)) + 
  geom_histogram(binwidth = 3) +
  # stick a ~ in front of the variable name you want to facet by. 
  #ncol = 1 makes it 1 column with the two charts stacked on top of each other
  facet_wrap(~is_esg, ncol = 1)

Graph 1 : density plot for CO 2 helps us understand the following :

The median (50th percentile) firm has a carbon intensity of 200 tonnes of C02
The 25th percentile firm has a carbon intensity of 150 tonnes of C02
The 75th percentile firm has a carbon intensity of 300 tonnes of C02
However, they are quite few outliers with higher co2 intensity

library(tidyverse) 
blackrock_etf_data |> 
  glimpse()

Rows: 393
Columns: 22
$ ticker                                                                            <chr> …
$ name                                                                              <chr> …
$ incept_date                                                                       <date> …
$ gross_expense_ratio_percent                                                       <dbl> …
$ net_expense_ratio_percent                                                         <dbl> …
$ net_assets_usd_mn                                                                 <dbl> …
$ asset_class                                                                       <chr> …
$ sub_asset_class                                                                   <chr> …
$ region                                                                            <chr> …
$ market                                                                            <chr> …
$ location                                                                          <chr> …
$ investment_style                                                                  <chr> …
$ sustainability_characteristics_msci_esg_fund_ratings_msci_esg_fund_rating_aaa_ccc <chr> …
$ msci_esg_quality_score_0_10                                                       <dbl> …
$ msci_weighted_average_carbon_intensity_tons_co2e_m_sales                          <dbl> …
$ msci_esg_percent_coverage                                                         <dbl> …
$ sustainable_classification                                                        <chr> …
$ name_wo_ishares_etf                                                               <chr> …
$ is_esg                                                                            <chr> …
$ years_from_inception                                                              <dbl> …
$ year_launched                                                                     <dbl> …
$ net_assets_usd_bn                                                                 <dbl> …

blackrock_etf_data|> 
  ggplot(aes(y =msci_weighted_average_carbon_intensity_tons_co2e_m_sales )) + 
  geom_boxplot()

Warning: Removed 62 rows containing non-finite values (`stat_boxplot()`).

Graph 2: Compares the net assets across different markets. As seen below, it is clear the most of the ESG assets are concentrated in the developed markets and is negligible in the emerging markets.

library(tidyverse) 
blackrock_etf_data |> 
  filter(is_esg == "ESG Fund") %>%
  ggplot(aes(x =net_assets_usd_bn)) + 
  geom_histogram(binwidth = 3) +
  facet_wrap(~market, ncol = 1)

Graph 3: This graph show which asset class has the most CO2 intensity

library(tidyverse) 
blackrock_etf_data |> 
  ggplot(aes(x = msci_weighted_average_carbon_intensity_tons_co2e_m_sales )) + 
  geom_histogram(binwidth = 3) +
  facet_wrap(~ asset_class, ncol = 1)

Warning: Removed 62 rows containing non-finite values (`stat_bin()`).