Susfin-homework-w3

HW Question 1

#load the tidyverse 

library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.4.4     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readr)

options(scipen = 999)
blackrock_etf_screener <- read_csv("https://raw.githubusercontent.com/t-emery/sais-susfin_data/main/datasets/ishares_etf_screener_as_of_2023-12-27.csv") 
Rows: 424 Columns: 18
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr  (10): ticker, name, asset_class, sub_asset_class, region, market, locat...
dbl   (6): gross_expense_ratio_percent, net_expense_ratio_percent, net_asset...
dttm  (2): incept_date, net_assets_as_of

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
blackrock_etf_screener_w_new_features <- blackrock_etf_screener |> 
  mutate(
    # if the sustainable_classification column is NA, then the fund is not an ESG fund.
    standard_or_esg = if_else(
      condition = is.na(sustainable_classification),
      true = "Standard",
      false = "ESG"
    ),
    # lubridate::year() extracts the year from a date
    inception_year = lubridate::year(incept_date),
    
    # Change to a meaningful magnitude for the data. In asset management, billions is a good default. 
    net_assets_bn_usd = net_assets_usd/10^9,
    
    # let's put our new variables at the front so we can see them easily.
    .before = everything()
  )

largest_funds_by_category <- blackrock_etf_screener_w_new_features |>
  filter(standard_or_esg == "ESG") |>
group_by(sustainable_classification) |> 
  summarize(
    LargestFund = first(ticker[which.max(net_assets_bn_usd)]),
    LargestFundAssets=
      max(net_assets_bn_usd),
     TotalCategoryAssets = sum(net_assets_bn_usd)
  ) |> 
  ungroup() |> 
  mutate(PercentOfCategoryAssets = (LargestFundAssets / TotalCategoryAssets) * 100) |> 
  arrange(desc(PercentOfCategoryAssets))

largest_funds_by_category
# A tibble: 4 × 5
  sustainable_classification LargestFund LargestFundAssets TotalCategoryAssets
  <chr>                      <chr>                   <dbl>               <dbl>
1 Impact                     BGRN                    0.335               0.335
2 Thematic                   ICLN                    3.06                3.43 
3 Screened                   XVV                     0.182               0.383
4 Uplift                     ESGU                   13.4                51.4  
# ℹ 1 more variable: PercentOfCategoryAssets <dbl>

HW Quesiton 2

library(tidyverse)
library(readr)
library(ggplot2)

# Filtering the 10 smallest ETFs by net assets
smallest_etfs <- blackrock_etf_screener %>%
  arrange(net_assets_usd) %>%
  slice_head(n = 10)

# Using fct_reorder() to sort the ETFs by net_assets_usd for plotting
smallest_etfs$ticker <- fct_reorder(smallest_etfs$ticker, smallest_etfs$net_assets_usd)

# Creating the bar chart
ggplot(smallest_etfs, aes(x = ticker, y = net_assets_usd)) +
  geom_bar(stat = "identity", fill = "blue") +
  coord_flip() + # Flipping coordinates to make the bar chart horizontal
  labs(title = "10 Smallest ETFs by Net Assets",
       x = "ETF Ticker",
       y = "Net Assets (USD)",
       caption = "Data source: BlackRock ETF Screener,2023-12-27") +
  theme_minimal() +
  scale_y_continuous(labels = scales::dollar_format(scale = 1e-6, suffix = "M")) # Assuming net_assets_usd is in USD, converting to millions for readability

HW Question 3

library(tidyverse)
library(readr)

  bottom_quintile_threshold <- quantile(blackrock_etf_screener$msci_esg_quality_score_0_10, 0.2, na.rm = TRUE)

funds_in_bottom_quintile <-blackrock_etf_screener |>
  filter(msci_esg_quality_score_0_10 <= bottom_quintile_threshold)
funds_summary <- funds_in_bottom_quintile |>
  group_by(asset_class, sub_asset_class) |>
  summarize(
    NumberOfFunds = n(),
    TotalAssets = sum(net_assets_usd, na.rm = TRUE), .groups = 'drop'
     ) |>
  arrange(asset_class, sub_asset_class)

print(funds_summary)
# A tibble: 9 × 4
  asset_class  sub_asset_class        NumberOfFunds   TotalAssets
  <chr>        <chr>                          <int>         <dbl>
1 Equity       All Cap                           23  95486052602.
2 Equity       Large Cap                          1   1852506812.
3 Equity       Large/Mid Cap                     10  42421984678.
4 Equity       Small Cap                         17 187292555389.
5 Fixed Income Corporates                         1     41339182.
6 Fixed Income Credit                             1    408038372.
7 Fixed Income Government                         3  17560711395.
8 Fixed Income High Yield                        16  38239624578.
9 Real Estate  Real Estate Securities             4   7145439523.

HW Question 4

Net Assets and ESG Coverage

library(dplyr)
library(ggplot2)


Q4q1 <- blackrock_etf_screener_w_new_features %>%
  mutate(
    standard_or_esg = ifelse(is.na(sustainable_classification), "Standard", "ESG"),
    size_category = cut(net_assets_usd,
                        breaks = c(-Inf, 1e8, 1e9, Inf),
                        labels = c("Small", "Medium", "Large"))
  ) %>%
  filter(standard_or_esg == "ESG")

# Calculate summary statistics for ESG percent coverage within each size category
df_summary <- Q4q1 %>%
  group_by(size_category) %>%
  summarise(
    mean_coverage = mean(msci_esg_percent_coverage, na.rm = TRUE),
    median_coverage = median(msci_esg_percent_coverage, na.rm = TRUE),
    coverage_range = max(msci_esg_percent_coverage, na.rm = TRUE) - min(msci_esg_percent_coverage, na.rm = TRUE)
  )

ggplot(Q4q1, aes(x = net_assets_bn_usd, y = msci_esg_percent_coverage, color = size_category)) +
  geom_point(alpha = 0.7) +
  scale_x_log10(labels = scales::comma) + # Apply logarithmic scale for net assets
  geom_hline(data = df_summary, aes(yintercept = mean_coverage, color = size_category), linetype = "dashed") +
  labs(title = "Net Assets vs. ESG Percent Coverage by Size",
       x = "Net Assets (USD)-Log",
       y = "ESG Percent Coverage",caption = "The dashed lines representing the mean ESG coverage for each size category",
       color = "Fund Size") +
  theme_minimal()

Carbon Intensity Across Regions

library(dplyr)
library(ggplot2)

# Correcting the summarization to focus on region_category (Developed vs. Emerging)
df_summary_corrected <- blackrock_etf_screener_w_new_features %>%
  mutate(
    standard_or_esg = ifelse(is.na(sustainable_classification), "Standard", "ESG"),
    region_category = case_when(
      region %in% c("North America", "Europe", "Japan", "Australia") ~ "Developed",
      TRUE ~ "Emerging"
    )
  ) %>%
  filter(standard_or_esg == "ESG") %>%
  group_by(region_category) %>%
  summarise(
    average_intensity = mean(msci_weighted_average_carbon_intensity_tons_co2e_m_sales, na.rm = TRUE),
    median_intensity = median(msci_weighted_average_carbon_intensity_tons_co2e_m_sales, na.rm = TRUE),
    intensity_range = max(msci_weighted_average_carbon_intensity_tons_co2e_m_sales, na.rm = TRUE) - min(msci_weighted_average_carbon_intensity_tons_co2e_m_sales
, na.rm = TRUE),
count = n() # Count of ESG funds in each region category for potential weighting or further analysis
)

ggplot(df_summary_corrected, aes(x = region_category, y = average_intensity, fill = region_category)) +
  geom_bar(stat = "identity", width = 0.7) +
  geom_errorbar(aes(ymin = average_intensity - intensity_range/2, ymax = average_intensity + intensity_range/2), width = 0.2) +
  geom_text(aes(label = count, y = average_intensity + intensity_range/2), vjust = 1.5, size = 3) +
  labs(title = "Average Carbon Intensity in Developed vs. Emerging Regions for ESG Funds",
       x = "Region Category",
       y = "Average Carbon Intensity (tons CO2e/M$ sales)") +
  theme_minimal() +
  coord_flip()  # Enhance readability by flipping the coordinates

Average ESG Quality Score by Asset Class

library(dplyr)
library(ggplot2)

Q4q3 <- blackrock_etf_screener_w_new_features %>%
  mutate(
    standard_or_esg = ifelse(is.na(sustainable_classification), "Standard", "ESG")
  ) %>%
  filter(standard_or_esg == "ESG") %>%
  group_by(asset_class) %>%
  summarise(
    average_esg_score = mean(msci_esg_quality_score_0_10, na.rm = TRUE),
    median_esg_score = median(msci_esg_quality_score_0_10, na.rm = TRUE),
    sd_esg_score = sd(msci_esg_quality_score_0_10, na.rm = TRUE),
    .groups = 'drop' 
  ) %>%
  mutate(
    rank = rank(-average_esg_score) # Rank asset classes by average ESG score
  ) %>%
  arrange(rank) # Sort by rank for visual clarity

ggplot(Q4q3, aes(x = reorder(asset_class, average_esg_score), y = average_esg_score, fill = asset_class)) +
  geom_bar(stat = "identity", position = "dodge") +
  geom_errorbar(aes(ymin = average_esg_score - sd_esg_score, ymax = average_esg_score + sd_esg_score), width = 0.2) +
  geom_text(aes(label = sprintf("Median: %.2f", median_esg_score), y = average_esg_score + sd_esg_score), hjust = 1.1, size = 3, angle = 45) +
  labs(title = "Average ESG Quality Score by Asset Class for ESG Funds",
       x = "Asset Class",
       y = "Average ESG Quality Score",
       fill = "Asset Class") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
Warning: Removed 1 rows containing missing values (`geom_text()`).

Proportion of ESG vs. Standard Funds

library(dplyr)
library(ggplot2)

Q4q4 <- blackrock_etf_screener_w_new_features |>
  mutate(
    standard_or_esg = ifelse(is.na(sustainable_classification), "Standard", "ESG"),
    # Optional: Categorize asset classes into broader groups if applicable
    asset_group = case_when(
      asset_class %in% c("Equity", "Fixed Income") ~ asset_class,
      TRUE ~ "Others"
    )
  ) |>
  # Calculate the count of ESG vs. Standard funds overall and within each asset group
  group_by(asset_group, standard_or_esg) |>
  summarise(count = n(), .groups = 'drop') |>
  # Calculate the proportion of each type within each asset group
  mutate(proportion = count / sum(count) * 100) %>%
  # For a clearer comparison, arrange by asset group and then by standard_or_esg classification
  arrange(asset_group, standard_or_esg)

ggplot(Q4q4, aes(x = asset_group, y = proportion, fill = standard_or_esg)) +
  geom_bar(stat = "identity", position = "fill") +
  scale_y_continuous(labels = scales::percent_format()) +
  facet_wrap(~asset_group, scales = "free_x") +
  labs(title = "Proportion of ESG vs. Standard Funds by Asset Group",
       x = "Asset Group",
       y = "Proportion (%)",
       fill = "Fund Classification") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

ESG Fund Ratings Distribution

library(dplyr)
library(ggplot2)

# Assuming the dataframe is named blackrock_etf_screener_w_new_features
Q4q5 <- blackrock_etf_screener_w_new_features %>%
  mutate(
    standard_or_esg = ifelse(is.na(sustainable_classification), "Standard", "ESG"),
    # Additional categorization for more insights, e.g., by asset class or region
    asset_group = case_when(
      asset_class %in% c("Equity", "Fixed Income") ~ asset_class,
      TRUE ~ "Others"
    )
  ) %>%
  filter(standard_or_esg == "ESG") %>%
  # Group by additional dimensions and ESG ratings
  group_by(asset_group, msci_esg_fund_rating_aaa_ccc) %>%
  summarise(count = n(), .groups = 'drop') %>%
  # Calculate the proportion of each ESG rating within each asset group
  mutate(proportion = count / sum(count) * 100) %>%
  arrange(asset_group, desc(msci_esg_fund_rating_aaa_ccc))

ggplot(Q4q5, aes(x = asset_group, y = count, fill = msci_esg_fund_rating_aaa_ccc)) +
  geom_bar(stat = "identity", position = "stack") +
  scale_fill_brewer(palette = "Spectral") +
  labs(title = "Distribution of MSCI ESG Fund Ratings by Asset Group",
       x = "Asset Group",
       y = "Count of Funds",
       fill = "ESG Rating") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))