Ticha HW 3

Getting Started

Loading relevant libraries.

# loading relevant libraries
library(tidyverse)
Warning: package 'tidyr' was built under R version 4.2.3
Warning: package 'readr' was built under R version 4.2.3
Warning: package 'dplyr' was built under R version 4.2.3
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.0
✔ ggplot2   3.4.4     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.1     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
library(ggplot2)
library(forcats)
library(gplots)
Warning: package 'gplots' was built under R version 4.2.3

Attaching package: 'gplots'

The following object is masked from 'package:stats':

    lowess
library(reshape)

Attaching package: 'reshape'

The following object is masked from 'package:lubridate':

    stamp

The following object is masked from 'package:dplyr':

    rename

The following objects are masked from 'package:tidyr':

    expand, smiths
library(GGally)
Registered S3 method overwritten by 'GGally':
  method from   
  +.gg   ggplot2

Reading the Data

blackrock_etf_screener<- read_csv("https://raw.githubusercontent.com/t-emery/sais-susfin_data/main/datasets/ishares_etf_screener_as_of_2023-12-27.csv")
Rows: 424 Columns: 18
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr  (10): ticker, name, asset_class, sub_asset_class, region, market, locat...
dbl   (6): gross_expense_ratio_percent, net_expense_ratio_percent, net_asset...
dttm  (2): incept_date, net_assets_as_of

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

2.5.0.1 Homework problem 1:

Create a dplyr pipeline to create a tibble fulfills the following parameters:

  • only contains ESG funds (no standard funds)

  • contains the largest fund from each sustainable classification.

  • shows the percent of assets that the top fund comprises for its category (e.g. the top Thematic fund is x% of the total assets of all thematic funds).

  • Ranks each sustainable classification by the % of assets in its largest fund, and arranges them in descending order.

blackrock_etf_screener %>%
  group_by(sustainable_classification) %>% 
  count(sort = TRUE)
# A tibble: 5 × 2
# Groups:   sustainable_classification [5]
  sustainable_classification     n
  <chr>                      <int>
1 <NA>                         384
2 Uplift                        30
3 Thematic                       6
4 Screened                       3
5 Impact                         1
blackrock_esg <- blackrock_etf_screener %>%
  filter(!is.na(sustainable_classification)) %>%
  group_by(sustainable_classification) %>%
  arrange(desc(net_assets_usd)) %>%
  summarize(
    Top_Fund_Name = first(name),  # Gets the first fund name after arranging by net asset
    Top_Fund_assets = first(net_assets_usd),  # Gets the net asset of the first, hence largest, fund
    Total_assets_in_Category = sum(net_assets_usd),
    Percent_of_Category = (first(net_assets_usd) / sum(net_assets_usd)) * 100,
    .groups = "drop"
  ) %>%
  mutate(Rank = rank(-Percent_of_Category)) %>%
  arrange(desc(Percent_of_Category))
# To view the resulting tibble
print(blackrock_esg)
# A tibble: 4 × 6
  sustainable_classificat…¹ Top_Fund_Name Top_Fund_assets Total_assets_in_Cate…²
  <chr>                     <chr>                   <dbl>                  <dbl>
1 Impact                    iShares USD …      335216065.             335216065.
2 Thematic                  iShares Glob…     3057666513.            3427675675.
3 Screened                  iShares ESG …      182340754.             383389770.
4 Uplift                    iShares ESG …    13385805595.           51443665637.
# ℹ abbreviated names: ¹​sustainable_classification, ²​Total_assets_in_Category
# ℹ 2 more variables: Percent_of_Category <dbl>, Rank <dbl>

2.5.0.2 Homework problem 2:

Create a sorted bar chart of the 10 smallest ETFs.

Hint: for sorting the bar chart, look up fct_reorder() . Make sure your chart has meaningful titles and labels, including numbers of an appropriate magnitude.

# Step 1: Filter to select the 10 smallest ETFs
smallest_etfs <- blackrock_etf_screener %>%
  arrange(net_assets_usd) %>%
  top_n(-10, net_assets_usd)

# Step 2: Reorder the etf_name factor based on net assets
smallest_etfs$name <- fct_reorder(smallest_etfs$name, smallest_etfs$net_assets_usd)

# Step 3: Create the bar chart
ggplot(smallest_etfs, aes(x = name, y = net_assets_usd, fill = name)) +
  geom_bar(stat = "identity") +
  coord_flip() +  # Flip coordinates to make it a horizontal bar chart
  labs(title = "10 Smallest ETFs by Net Assets",
       x = "ETF Name",
       y = "Assets Under Management",
       fill = "ETF Name") +
  theme_minimal() +
  scale_y_continuous(labels = scales::dollar) 

2.5.0.3 Homework problem 3:

  • Find the funds in the bottom quintile of the MSCI ESG quality score.

  • calculate the number of funds by total assets by asset class and sub asset class.

# Step 1: Calculate the bottom quintile for the MSCI ESG quality scores
bottom_quintile_funds <- blackrock_etf_screener %>%
  filter(msci_esg_quality_score_0_10 <= quantile(msci_esg_quality_score_0_10, 0.2, na.rm = TRUE))

# Step 2: Group by asset class and sub-asset class, then summarize
summary_by_class <- bottom_quintile_funds %>%
  group_by(asset_class, sub_asset_class) %>%
  summarize(
    Number_of_Funds = n(),
    Total_Assets = sum(net_assets_usd, na.rm = TRUE),
    .groups = "drop"  # Drop the grouping structure after summarizing
  )

# View the result
print(summary_by_class)
# A tibble: 9 × 4
  asset_class  sub_asset_class        Number_of_Funds  Total_Assets
  <chr>        <chr>                            <int>         <dbl>
1 Equity       All Cap                             23  95486052602.
2 Equity       Large Cap                            1   1852506812.
3 Equity       Large/Mid Cap                       10  42421984678.
4 Equity       Small Cap                           17 187292555389.
5 Fixed Income Corporates                           1     41339182.
6 Fixed Income Credit                               1    408038372.
7 Fixed Income Government                           3  17560711395.
8 Fixed Income High Yield                          16  38239624578.
9 Real Estate  Real Estate Securities               4   7145439523.

2.5.0.4 Homework problem 4:

We only began exploring this data. Use your newfound dplyr skills to uncover actionable insights from the dataset.

  • Show your work.

  • Present 5 repeatable factoids or data visualizations. As always, make sure your data visualizations have meaningful titles and labels. We will share these in class.

# Present 4 Factoids/ Data Visualizations
# Separate ESG and non-ESG funds
esg_etf <- blackrock_etf_screener %>%
  filter(!is.na(sustainable_classification))%>%
  mutate(sustainable_classification = 1) # ESG firms have 1

standard_etf <- blackrock_etf_screener %>%
  filter(is.na(sustainable_classification))%>%
  mutate(sustainable_classification = 0) # standard firms have 0

standard_vs_esg_etf <- bind_rows(esg_etf, standard_etf)
key_metrics <- standard_vs_esg_etf%>%
  select(net_assets_usd,
         sustainable_classification,
         gross_expense_ratio_percent,
         net_expense_ratio_percent, 
         msci_esg_quality_score_0_10, 
         msci_weighted_average_carbon_intensity_tons_co2e_m_sales, 
         msci_esg_percent_coverage,)

cor.mat <- round(cor(key_metrics),2)
cor.mat
                                                         net_assets_usd
net_assets_usd                                                     1.00
sustainable_classification                                        -0.07
gross_expense_ratio_percent                                       -0.19
net_expense_ratio_percent                                         -0.20
msci_esg_quality_score_0_10                                          NA
msci_weighted_average_carbon_intensity_tons_co2e_m_sales             NA
msci_esg_percent_coverage                                            NA
                                                         sustainable_classification
net_assets_usd                                                                -0.07
sustainable_classification                                                     1.00
gross_expense_ratio_percent                                                   -0.12
net_expense_ratio_percent                                                     -0.14
msci_esg_quality_score_0_10                                                      NA
msci_weighted_average_carbon_intensity_tons_co2e_m_sales                         NA
msci_esg_percent_coverage                                                        NA
                                                         gross_expense_ratio_percent
net_assets_usd                                                                 -0.19
sustainable_classification                                                     -0.12
gross_expense_ratio_percent                                                     1.00
net_expense_ratio_percent                                                       0.92
msci_esg_quality_score_0_10                                                       NA
msci_weighted_average_carbon_intensity_tons_co2e_m_sales                          NA
msci_esg_percent_coverage                                                         NA
                                                         net_expense_ratio_percent
net_assets_usd                                                               -0.20
sustainable_classification                                                   -0.14
gross_expense_ratio_percent                                                   0.92
net_expense_ratio_percent                                                     1.00
msci_esg_quality_score_0_10                                                     NA
msci_weighted_average_carbon_intensity_tons_co2e_m_sales                        NA
msci_esg_percent_coverage                                                       NA
                                                         msci_esg_quality_score_0_10
net_assets_usd                                                                    NA
sustainable_classification                                                        NA
gross_expense_ratio_percent                                                       NA
net_expense_ratio_percent                                                         NA
msci_esg_quality_score_0_10                                                        1
msci_weighted_average_carbon_intensity_tons_co2e_m_sales                          NA
msci_esg_percent_coverage                                                         NA
                                                         msci_weighted_average_carbon_intensity_tons_co2e_m_sales
net_assets_usd                                                                                                 NA
sustainable_classification                                                                                     NA
gross_expense_ratio_percent                                                                                    NA
net_expense_ratio_percent                                                                                      NA
msci_esg_quality_score_0_10                                                                                    NA
msci_weighted_average_carbon_intensity_tons_co2e_m_sales                                                        1
msci_esg_percent_coverage                                                                                      NA
                                                         msci_esg_percent_coverage
net_assets_usd                                                                  NA
sustainable_classification                                                      NA
gross_expense_ratio_percent                                                     NA
net_expense_ratio_percent                                                       NA
msci_esg_quality_score_0_10                                                     NA
msci_weighted_average_carbon_intensity_tons_co2e_m_sales                        NA
msci_esg_percent_coverage                                                        1
heatmap.2(cor(key_metrics), Rowv = FALSE, Colv = FALSE, dendrogram = "none", 
          cellnote = round(cor(key_metrics),2), 
          notecol = "black", key = FALSE, trace = 'none', margins = c(10,10))

hist(key_metrics$net_assets_usd, xlab = "Net Asset Value")

plot(key_metrics$net_assets_usd~ key_metrics$sustainable_classification, 
     xlab = "Sustainable Classification", ylab = "Net Assets")

boxplot(key_metrics$net_assets_usd)