── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# Load the datasetesg_data <-read_csv("https://raw.githubusercontent.com/t-emery/sais-susfin_data/main/datasets/ishares_etf_screener_as_of_2023-12-27.csv")
Rows: 424 Columns: 18
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (10): ticker, name, asset_class, sub_asset_class, region, market, locat...
dbl (6): gross_expense_ratio_percent, net_expense_ratio_percent, net_asset...
dttm (2): incept_date, net_assets_as_of
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Process the dataesg_processed <- esg_data %>%mutate(standard_or_esg =if_else(is.na(sustainable_classification), "Standard", "ESG")) %>%filter(standard_or_esg =="ESG") %>%group_by(sustainable_classification) %>%summarise(Top_Fund_Assets =max(net_assets_usd, na.rm =TRUE),Total_Assets =sum(net_assets_usd, na.rm =TRUE) ) %>%mutate(Percent_of_Assets = (Top_Fund_Assets / Total_Assets) *100) %>%arrange(desc(Percent_of_Assets))#hw2# Filter for the 10 smallest ETFssmallest_etfs <- esg_data %>%arrange(net_assets_usd) %>%slice_head(n =10)# Create a sorted bar chartggplot(smallest_etfs, aes(x =fct_reorder(name, net_assets_usd), y = net_assets_usd)) +geom_bar(stat ="identity") +coord_flip() +# Flips the chart to make it horizontallabs(title ="10 Smallest ETFs by Net Assets", x ="Fund Name", y ="Net Assets (USD)") +theme_minimal()
#hw3# Find the threshold for the bottom quintile of MSCI ESG quality scoresbottom_quintile_threshold <-quantile(esg_data$msci_esg_quality_score_0_10, 0.2, na.rm =TRUE)# Filter the funds in the bottom quintilebottom_quintile_funds <- esg_data %>%filter(msci_esg_quality_score_0_10 <= bottom_quintile_threshold)# Calculate the number of funds and total assets, grouped by asset class and sub-asset classbottom_quintile_summary <- bottom_quintile_funds %>%group_by(asset_class, sub_asset_class) %>%summarise(Number_of_Funds =n(),Total_Assets =sum(net_assets_usd, na.rm =TRUE),.groups ='drop' )#hw4library(ggplot2)top_5_esg <- esg_data %>%arrange(desc(net_assets_usd)) %>%top_n(5, net_assets_usd)ggplot(top_5_esg, aes(x =reorder(name, net_assets_usd), y = net_assets_usd)) +geom_bar(stat ="identity") +labs(title ="Top 5 ESG Funds by Net Assets", x ="Fund Name", y ="Net Assets (USD)") +coord_flip()
ggplot(esg_data, aes(x = msci_esg_quality_score_0_10)) +geom_histogram(binwidth =0.5, fill ="blue", color ="black") +labs(title ="Distribution of MSCI ESG Quality Scores", x ="ESG Score", y ="Count")
esg_data %>%group_by(asset_class) %>%summarise(Total_Assets =sum(net_assets_usd, na.rm =TRUE)) %>%ggplot(aes(x ="", y = Total_Assets, fill = asset_class)) +geom_bar(width =1, stat ="identity") +coord_polar("y") +labs(title ="Asset Class Breakdown of ESG Funds", x ="", y ="Total Assets (USD)")
ggplot(esg_data, aes(x = asset_class, y = msci_esg_quality_score_0_10)) +geom_boxplot() +labs(title ="Comparison of Average ESG Scores by Asset Class", x ="Asset Class", y ="ESG Score")
esg_data %>%group_by(incept_date) %>%summarise(Average_Size =mean(net_assets_usd, na.rm =TRUE)) %>%ggplot(aes(x = incept_date, y = Average_Size)) +geom_line() +labs(title ="Trend of Average ESG Fund Sizes Over Time", x ="Date", y ="Average Fund Size (USD)")