Warning: package 'gplots' was built under R version 4.2.3
Attaching package: 'gplots'
The following object is masked from 'package:stats':
lowess
library(reshape)
Attaching package: 'reshape'
The following object is masked from 'package:lubridate':
stamp
The following object is masked from 'package:dplyr':
rename
The following objects are masked from 'package:tidyr':
expand, smiths
library(GGally)
Registered S3 method overwritten by 'GGally':
method from
+.gg ggplot2
library(scales)
Warning: package 'scales' was built under R version 4.2.3
Attaching package: 'scales'
The following object is masked from 'package:purrr':
discard
The following object is masked from 'package:readr':
col_factor
Rows: 424 Columns: 18
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (10): ticker, name, asset_class, sub_asset_class, region, market, locat...
dbl (6): gross_expense_ratio_percent, net_expense_ratio_percent, net_asset...
dttm (2): incept_date, net_assets_as_of
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
2.5.0.1 Homework problem 1:
Create a dplyr pipeline to create a tibble fulfills the following parameters:
only contains ESG funds (no standard funds)
contains the largest fund from each sustainable classification.
shows the percent of assets that the top fund comprises for its category (e.g. the top Thematic fund is x% of the total assets of all thematic funds).
Ranks each sustainable classification by the % of assets in its largest fund, and arranges them in descending order.
blackrock_esg <- blackrock_etf_screener %>%filter(!is.na(sustainable_classification)) %>%group_by(sustainable_classification) %>%arrange(desc(net_assets_usd)) %>%summarize(Top_Fund_Name =first(name), # Gets the first fund name after arranging by net assetTop_Fund_assets =first(net_assets_usd), # Gets the net asset of the first, hence largest, fundTotal_assets_in_Category =sum(net_assets_usd),Percent_of_Category = (first(net_assets_usd) /sum(net_assets_usd)) *100,.groups ="drop" ) %>%mutate(Rank =rank(-Percent_of_Category)) %>%arrange(desc(Percent_of_Category))# To view the resulting tibbleprint(blackrock_esg)
Create a sorted bar chart of the 10 smallest ETFs.
Hint: for sorting the bar chart, look up fct_reorder() . Make sure your chart has meaningful titles and labels, including numbers of an appropriate magnitude.
# Step 1: Filter to select the 10 smallest ETFssmallest_etfs <- blackrock_etf_screener %>%arrange(net_assets_usd) %>%top_n(-10, net_assets_usd)# Step 2: Reorder the etf_name factor based on net assetssmallest_etfs$name <-fct_reorder(smallest_etfs$name, smallest_etfs$net_assets_usd)# Step 3: Create the bar chartggplot(smallest_etfs, aes(x = name, y = net_assets_usd, fill = name)) +geom_bar(stat ="identity") +coord_flip() +# Flip coordinates to make it a horizontal bar chartlabs(title ="10 Smallest ETFs by Net Assets",x ="ETF Name",y ="Assets Under Management",fill ="ETF Name") +scale_y_continuous(labels =label_number(scale =1e-6, suffix ="M"))+theme(legend.position ="none")
2.5.0.3 Homework problem 3:
Find the funds in the bottom quintile of the MSCI ESG quality score.
calculate the number of funds by total assets by asset class and sub asset class.
# Step 1: Calculate the bottom quintile for the MSCI ESG quality scoresbottom_quintile_funds <- blackrock_etf_screener %>%filter(msci_esg_quality_score_0_10 <=quantile(msci_esg_quality_score_0_10, 0.2, na.rm =TRUE))# Step 2: Group by asset class and sub-asset class, then summarizesummary_by_class <- bottom_quintile_funds %>%group_by(asset_class, sub_asset_class) %>%summarize(Number_of_Funds =n(),Total_Assets =sum(net_assets_usd, na.rm =TRUE),.groups ="drop"# Drop the grouping structure after summarizing )# View the resultprint(summary_by_class)
# A tibble: 9 × 4
asset_class sub_asset_class Number_of_Funds Total_Assets
<chr> <chr> <int> <dbl>
1 Equity All Cap 23 95486052602.
2 Equity Large Cap 1 1852506812.
3 Equity Large/Mid Cap 10 42421984678.
4 Equity Small Cap 17 187292555389.
5 Fixed Income Corporates 1 41339182.
6 Fixed Income Credit 1 408038372.
7 Fixed Income Government 3 17560711395.
8 Fixed Income High Yield 16 38239624578.
9 Real Estate Real Estate Securities 4 7145439523.
2.5.0.4 Homework problem 4:
We only began exploring this data. Use your newfound dplyr skills to uncover actionable insights from the dataset.
Show your work.
Present 5 repeatable factoids or data visualizations. As always, make sure your data visualizations have meaningful titles and labels. We will share these in class.
# Present 4 Factoids/ Data Visualizations# Separate ESG and non-ESG fundsesg_etf <- blackrock_etf_screener %>%filter(!is.na(sustainable_classification))%>%mutate(sustainable_classification =1) # ESG firms have 1standard_etf <- blackrock_etf_screener %>%filter(is.na(sustainable_classification))%>%mutate(sustainable_classification =0) # standard firms have 0standard_vs_esg_etf <-bind_rows(esg_etf, standard_etf)key_metrics <- standard_vs_esg_etf%>%select(net_assets_usd, sustainable_classification, gross_expense_ratio_percent, net_expense_ratio_percent, msci_esg_quality_score_0_10, msci_weighted_average_carbon_intensity_tons_co2e_m_sales, msci_esg_percent_coverage,)cor.mat <-round(cor(key_metrics),2)cor.mat
net_assets_usd
net_assets_usd 1.00
sustainable_classification -0.07
gross_expense_ratio_percent -0.19
net_expense_ratio_percent -0.20
msci_esg_quality_score_0_10 NA
msci_weighted_average_carbon_intensity_tons_co2e_m_sales NA
msci_esg_percent_coverage NA
sustainable_classification
net_assets_usd -0.07
sustainable_classification 1.00
gross_expense_ratio_percent -0.12
net_expense_ratio_percent -0.14
msci_esg_quality_score_0_10 NA
msci_weighted_average_carbon_intensity_tons_co2e_m_sales NA
msci_esg_percent_coverage NA
gross_expense_ratio_percent
net_assets_usd -0.19
sustainable_classification -0.12
gross_expense_ratio_percent 1.00
net_expense_ratio_percent 0.92
msci_esg_quality_score_0_10 NA
msci_weighted_average_carbon_intensity_tons_co2e_m_sales NA
msci_esg_percent_coverage NA
net_expense_ratio_percent
net_assets_usd -0.20
sustainable_classification -0.14
gross_expense_ratio_percent 0.92
net_expense_ratio_percent 1.00
msci_esg_quality_score_0_10 NA
msci_weighted_average_carbon_intensity_tons_co2e_m_sales NA
msci_esg_percent_coverage NA
msci_esg_quality_score_0_10
net_assets_usd NA
sustainable_classification NA
gross_expense_ratio_percent NA
net_expense_ratio_percent NA
msci_esg_quality_score_0_10 1
msci_weighted_average_carbon_intensity_tons_co2e_m_sales NA
msci_esg_percent_coverage NA
msci_weighted_average_carbon_intensity_tons_co2e_m_sales
net_assets_usd NA
sustainable_classification NA
gross_expense_ratio_percent NA
net_expense_ratio_percent NA
msci_esg_quality_score_0_10 NA
msci_weighted_average_carbon_intensity_tons_co2e_m_sales 1
msci_esg_percent_coverage NA
msci_esg_percent_coverage
net_assets_usd NA
sustainable_classification NA
gross_expense_ratio_percent NA
net_expense_ratio_percent NA
msci_esg_quality_score_0_10 NA
msci_weighted_average_carbon_intensity_tons_co2e_m_sales NA
msci_esg_percent_coverage 1
ggplot(key_metrics, aes(y = msci_esg_quality_score_0_10, x ="")) +geom_boxplot(fill ="lightblue", color ="black") +labs(y ="MSCI ESG Quality Score", title ="Boxplot of MSCI ESG Quality Score") +theme_minimal()
ggplot(key_metrics, aes(x = net_assets_usd)) +geom_histogram(binwidth =1e8, fill ="blue", color ="black") +# Adjust binwidth as neededlabs(x ="Net Asset Value", y ="Frequency", title ="Histogram of Net Asset Value") +scale_x_continuous(labels =label_number(scale =1e-9, suffix ="B"))+theme_minimal()
ggplot(key_metrics, aes(x = sustainable_classification, y = net_assets_usd)) +geom_point() +labs(x ="Sustainable Classification", y ="Net Assets", title ="Net Assets by Sustainable Classification") +scale_y_continuous(labels =label_number(scale =1e-9, suffix ="B"))+theme_minimal() +theme(axis.text.x =element_text(angle =45, hjust =1)) # Improve label readability