Ticha HW 3

Getting Started

Loading relevant libraries.

# loading relevant libraries
library(tidyverse)

Warning: package 'tidyr' was built under R version 4.2.3

Warning: package 'readr' was built under R version 4.2.3

Warning: package 'dplyr' was built under R version 4.2.3

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.0
✔ ggplot2   3.4.4     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.1     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(dplyr)
library(ggplot2)
library(forcats)
library(gplots)

Warning: package 'gplots' was built under R version 4.2.3


Attaching package: 'gplots'

The following object is masked from 'package:stats':

    lowess

library(reshape)


Attaching package: 'reshape'

The following object is masked from 'package:lubridate':

    stamp

The following object is masked from 'package:dplyr':

    rename

The following objects are masked from 'package:tidyr':

    expand, smiths

library(GGally)

Registered S3 method overwritten by 'GGally':
  method from   
  +.gg   ggplot2

library(scales)

Warning: package 'scales' was built under R version 4.2.3


Attaching package: 'scales'

The following object is masked from 'package:purrr':

    discard

The following object is masked from 'package:readr':

    col_factor

Reading the Data

blackrock_etf_screener<- read_csv("https://raw.githubusercontent.com/t-emery/sais-susfin_data/main/datasets/ishares_etf_screener_as_of_2023-12-27.csv")

Rows: 424 Columns: 18
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr  (10): ticker, name, asset_class, sub_asset_class, region, market, locat...
dbl   (6): gross_expense_ratio_percent, net_expense_ratio_percent, net_asset...
dttm  (2): incept_date, net_assets_as_of

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

2.5.0.1 Homework problem 1:

Create a dplyr pipeline to create a tibble fulfills the following parameters:

only contains ESG funds (no standard funds)
contains the largest fund from each sustainable classification.
shows the percent of assets that the top fund comprises for its category (e.g. the top Thematic fund is x% of the total assets of all thematic funds).
Ranks each sustainable classification by the % of assets in its largest fund, and arranges them in descending order.

blackrock_etf_screener %>%
  group_by(sustainable_classification) %>% 
  count(sort = TRUE)

# A tibble: 5 × 2
# Groups:   sustainable_classification [5]
  sustainable_classification     n
  <chr>                      <int>
1 <NA>                         384
2 Uplift                        30
3 Thematic                       6
4 Screened                       3
5 Impact                         1

blackrock_esg <- blackrock_etf_screener %>%
  filter(!is.na(sustainable_classification)) %>%
  group_by(sustainable_classification) %>%
  arrange(desc(net_assets_usd)) %>%
  summarize(
    Top_Fund_Name = first(name),  # Gets the first fund name after arranging by net asset
    Top_Fund_assets = first(net_assets_usd),  # Gets the net asset of the first, hence largest, fund
    Total_assets_in_Category = sum(net_assets_usd),
    Percent_of_Category = (first(net_assets_usd) / sum(net_assets_usd)) * 100,
    .groups = "drop"
  ) %>%
  mutate(Rank = rank(-Percent_of_Category)) %>%
  arrange(desc(Percent_of_Category))
# To view the resulting tibble
print(blackrock_esg)

# A tibble: 4 × 6
  sustainable_classificat…¹ Top_Fund_Name Top_Fund_assets Total_assets_in_Cate…²
  <chr>                     <chr>                   <dbl>                  <dbl>
1 Impact                    iShares USD …      335216065.             335216065.
2 Thematic                  iShares Glob…     3057666513.            3427675675.
3 Screened                  iShares ESG …      182340754.             383389770.
4 Uplift                    iShares ESG …    13385805595.           51443665637.
# ℹ abbreviated names: ¹sustainable_classification, ²Total_assets_in_Category
# ℹ 2 more variables: Percent_of_Category <dbl>, Rank <dbl>

2.5.0.2 Homework problem 2:

Create a sorted bar chart of the 10 smallest ETFs.

Hint: for sorting the bar chart, look up fct_reorder() . Make sure your chart has meaningful titles and labels, including numbers of an appropriate magnitude.

# Step 1: Filter to select the 10 smallest ETFs
smallest_etfs <- blackrock_etf_screener %>%
  arrange(net_assets_usd) %>%
  top_n(-10, net_assets_usd)

# Step 2: Reorder the etf_name factor based on net assets
smallest_etfs$name <- fct_reorder(smallest_etfs$name, smallest_etfs$net_assets_usd)

# Step 3: Create the bar chart
ggplot(smallest_etfs, aes(x = name, y = net_assets_usd, fill = name)) +
  geom_bar(stat = "identity") +
  coord_flip() +  # Flip coordinates to make it a horizontal bar chart
  labs(title = "10 Smallest ETFs by Net Assets",
       x = "ETF Name",
       y = "Assets Under Management",
       fill = "ETF Name") +
  scale_y_continuous(labels = label_number(scale = 1e-6, suffix = "M"))+
  theme(legend.position = "none")

2.5.0.3 Homework problem 3:

Find the funds in the bottom quintile of the MSCI ESG quality score.
calculate the number of funds by total assets by asset class and sub asset class.

# Step 1: Calculate the bottom quintile for the MSCI ESG quality scores
bottom_quintile_funds <- blackrock_etf_screener %>%
  filter(msci_esg_quality_score_0_10 <= quantile(msci_esg_quality_score_0_10, 0.2, na.rm = TRUE))

# Step 2: Group by asset class and sub-asset class, then summarize
summary_by_class <- bottom_quintile_funds %>%
  group_by(asset_class, sub_asset_class) %>%
  summarize(
    Number_of_Funds = n(),
    Total_Assets = sum(net_assets_usd, na.rm = TRUE),
    .groups = "drop"  # Drop the grouping structure after summarizing
  )

# View the result
print(summary_by_class)

# A tibble: 9 × 4
  asset_class  sub_asset_class        Number_of_Funds  Total_Assets
  <chr>        <chr>                            <int>         <dbl>
1 Equity       All Cap                             23  95486052602.
2 Equity       Large Cap                            1   1852506812.
3 Equity       Large/Mid Cap                       10  42421984678.
4 Equity       Small Cap                           17 187292555389.
5 Fixed Income Corporates                           1     41339182.
6 Fixed Income Credit                               1    408038372.
7 Fixed Income Government                           3  17560711395.
8 Fixed Income High Yield                          16  38239624578.
9 Real Estate  Real Estate Securities               4   7145439523.

2.5.0.4 Homework problem 4:

We only began exploring this data. Use your newfound dplyr skills to uncover actionable insights from the dataset.

Show your work.
Present 5 repeatable factoids or data visualizations. As always, make sure your data visualizations have meaningful titles and labels. We will share these in class.

# Present 4 Factoids/ Data Visualizations
# Separate ESG and non-ESG funds
esg_etf <- blackrock_etf_screener %>%
  filter(!is.na(sustainable_classification))%>%
  mutate(sustainable_classification = 1) # ESG firms have 1

standard_etf <- blackrock_etf_screener %>%
  filter(is.na(sustainable_classification))%>%
  mutate(sustainable_classification = 0) # standard firms have 0

standard_vs_esg_etf <- bind_rows(esg_etf, standard_etf)
key_metrics <- standard_vs_esg_etf%>%
  select(net_assets_usd,
         sustainable_classification,
         gross_expense_ratio_percent,
         net_expense_ratio_percent, 
         msci_esg_quality_score_0_10, 
         msci_weighted_average_carbon_intensity_tons_co2e_m_sales, 
         msci_esg_percent_coverage,)

cor.mat <- round(cor(key_metrics),2)
cor.mat

                                                         net_assets_usd
net_assets_usd                                                     1.00
sustainable_classification                                        -0.07
gross_expense_ratio_percent                                       -0.19
net_expense_ratio_percent                                         -0.20
msci_esg_quality_score_0_10                                          NA
msci_weighted_average_carbon_intensity_tons_co2e_m_sales             NA
msci_esg_percent_coverage                                            NA
                                                         sustainable_classification
net_assets_usd                                                                -0.07
sustainable_classification                                                     1.00
gross_expense_ratio_percent                                                   -0.12
net_expense_ratio_percent                                                     -0.14
msci_esg_quality_score_0_10                                                      NA
msci_weighted_average_carbon_intensity_tons_co2e_m_sales                         NA
msci_esg_percent_coverage                                                        NA
                                                         gross_expense_ratio_percent
net_assets_usd                                                                 -0.19
sustainable_classification                                                     -0.12
gross_expense_ratio_percent                                                     1.00
net_expense_ratio_percent                                                       0.92
msci_esg_quality_score_0_10                                                       NA
msci_weighted_average_carbon_intensity_tons_co2e_m_sales                          NA
msci_esg_percent_coverage                                                         NA
                                                         net_expense_ratio_percent
net_assets_usd                                                               -0.20
sustainable_classification                                                   -0.14
gross_expense_ratio_percent                                                   0.92
net_expense_ratio_percent                                                     1.00
msci_esg_quality_score_0_10                                                     NA
msci_weighted_average_carbon_intensity_tons_co2e_m_sales                        NA
msci_esg_percent_coverage                                                       NA
                                                         msci_esg_quality_score_0_10
net_assets_usd                                                                    NA
sustainable_classification                                                        NA
gross_expense_ratio_percent                                                       NA
net_expense_ratio_percent                                                         NA
msci_esg_quality_score_0_10                                                        1
msci_weighted_average_carbon_intensity_tons_co2e_m_sales                          NA
msci_esg_percent_coverage                                                         NA
                                                         msci_weighted_average_carbon_intensity_tons_co2e_m_sales
net_assets_usd                                                                                                 NA
sustainable_classification                                                                                     NA
gross_expense_ratio_percent                                                                                    NA
net_expense_ratio_percent                                                                                      NA
msci_esg_quality_score_0_10                                                                                    NA
msci_weighted_average_carbon_intensity_tons_co2e_m_sales                                                        1
msci_esg_percent_coverage                                                                                      NA
                                                         msci_esg_percent_coverage
net_assets_usd                                                                  NA
sustainable_classification                                                      NA
gross_expense_ratio_percent                                                     NA
net_expense_ratio_percent                                                       NA
msci_esg_quality_score_0_10                                                     NA
msci_weighted_average_carbon_intensity_tons_co2e_m_sales                        NA
msci_esg_percent_coverage                                                        1

ggplot(key_metrics, aes(y = msci_esg_quality_score_0_10, x = "")) +
  geom_boxplot(fill = "lightblue", color = "black") +
  labs(y = "MSCI ESG Quality Score", title = "Boxplot of MSCI ESG Quality Score") +
  theme_minimal()

Warning: Removed 44 rows containing non-finite values (`stat_boxplot()`).

heatmap.2(cor(key_metrics), Rowv = FALSE, Colv = FALSE, dendrogram = "none", 
          cellnote = round(cor(key_metrics),2), 
          notecol = "black", key = FALSE, trace = 'none', margins = c(5,5))

ggplot(key_metrics, aes(x = net_assets_usd)) +
  geom_histogram(binwidth = 1e8, fill = "blue", color = "black") + # Adjust binwidth as needed
  labs(x = "Net Asset Value", y = "Frequency", title = "Histogram of Net Asset Value") +
  scale_x_continuous(labels = label_number(scale = 1e-9, suffix = "B"))+
  theme_minimal()

ggplot(key_metrics, aes(x = sustainable_classification, y = net_assets_usd)) +
  geom_point() +
  labs(x = "Sustainable Classification", y = "Net Assets", title = "Net Assets by Sustainable Classification") +
  scale_y_continuous(labels = label_number(scale = 1e-9, suffix = "B"))+
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) # Improve label readability