Data Transformation Homework

Author

Grace Huang

Data Transformation Homework

options(repos = c(CRAN = "https://cran.rstudio.com/"))
install.packages("tidyverse")
Installing package into 'C:/Users/Shiya (Grace) Huang/AppData/Local/R/win-library/4.3'
(as 'lib' is unspecified)
package 'tidyverse' successfully unpacked and MD5 sums checked

The downloaded binary packages are in
    C:\Users\Shiya (Grace) Huang\AppData\Local\Temp\RtmpQRlu3X\downloaded_packages
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.4.4     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
options(scipen = 999)
blackrock_etf_screener <- read_csv("https://raw.githubusercontent.com/t-emery/sais-susfin_data/main/datasets/ishares_etf_screener_as_of_2023-12-27.csv") 
Rows: 424 Columns: 18
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr  (10): ticker, name, asset_class, sub_asset_class, region, market, locat...
dbl   (6): gross_expense_ratio_percent, net_expense_ratio_percent, net_asset...
dttm  (2): incept_date, net_assets_as_of

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
blackrock_etf_screener
# A tibble: 424 × 18
   ticker name                        incept_date         gross_expense_ratio_…¹
   <chr>  <chr>                       <dttm>                               <dbl>
 1 IVV    iShares Core S&P 500 ETF    2000-05-15 00:00:00                   0.03
 2 IEFA   iShares Core MSCI EAFE ETF  2012-10-18 00:00:00                   0.07
 3 AGG    iShares Core U.S. Aggregat… 2003-09-22 00:00:00                   0.03
 4 IWF    iShares Russell 1000 Growt… 2000-05-22 00:00:00                   0.19
 5 IJR    iShares Core S&P Small-Cap… 2000-05-22 00:00:00                   0.06
 6 IJH    iShares Core S&P Mid-Cap E… 2000-05-22 00:00:00                   0.05
 7 IEMG   iShares Core MSCI Emerging… 2012-10-18 00:00:00                   0.09
 8 IWM    iShares Russell 2000 ETF    2000-05-22 00:00:00                   0.19
 9 IWD    iShares Russell 1000 Value… 2000-05-22 00:00:00                   0.19
10 TLT    iShares 20+ Year Treasury … 2002-07-22 00:00:00                   0.15
# ℹ 414 more rows
# ℹ abbreviated name: ¹​gross_expense_ratio_percent
# ℹ 14 more variables: net_expense_ratio_percent <dbl>, net_assets_usd <dbl>,
#   net_assets_as_of <dttm>, asset_class <chr>, sub_asset_class <chr>,
#   region <chr>, market <chr>, location <chr>, investment_style <chr>,
#   msci_esg_fund_rating_aaa_ccc <chr>, msci_esg_quality_score_0_10 <dbl>,
#   msci_weighted_average_carbon_intensity_tons_co2e_m_sales <dbl>, …
blackrock_etf_screener |> 
  glimpse()
Rows: 424
Columns: 18
$ ticker                                                   <chr> "IVV", "IEFA"…
$ name                                                     <chr> "iShares Core…
$ incept_date                                              <dttm> 2000-05-15, …
$ gross_expense_ratio_percent                              <dbl> 0.03, 0.07, 0…
$ net_expense_ratio_percent                                <dbl> 0.03, 0.07, 0…
$ net_assets_usd                                           <dbl> 399312542067,…
$ net_assets_as_of                                         <dttm> 2023-12-27, …
$ asset_class                                              <chr> "Equity", "Eq…
$ sub_asset_class                                          <chr> "Large Cap", …
$ region                                                   <chr> "North Americ…
$ market                                                   <chr> "Developed", …
$ location                                                 <chr> "United State…
$ investment_style                                         <chr> "Index", "Ind…
$ msci_esg_fund_rating_aaa_ccc                             <chr> "A", "AA", "A…
$ msci_esg_quality_score_0_10                              <dbl> 6.6845, 7.519…
$ msci_weighted_average_carbon_intensity_tons_co2e_m_sales <dbl> 106.80, 102.8…
$ msci_esg_percent_coverage                                <dbl> 99.05, 99.80,…
$ sustainable_classification                               <chr> NA, NA, NA, N…
blackrock_etf_screener |> 
  distinct(sustainable_classification)
# A tibble: 5 × 1
  sustainable_classification
  <chr>                     
1 <NA>                      
2 Uplift                    
3 Thematic                  
4 Impact                    
5 Screened                  
blackrock_etf_screener |> 
  group_by(sustainable_classification) |> 
  count(sort = TRUE)
# A tibble: 5 × 2
# Groups:   sustainable_classification [5]
  sustainable_classification     n
  <chr>                      <int>
1 <NA>                         384
2 Uplift                        30
3 Thematic                       6
4 Screened                       3
5 Impact                         1
blackrock_etf_screener |> 
  group_by(sustainable_classification) |> 
  summarize(
    # how many funds?
    n_funds = n(),
    # how much money?
    assets_usd = sum(net_assets_usd, na.rm = TRUE)
  ) |> 
  # arrange in descending order
  arrange(assets_usd |> desc())
# A tibble: 5 × 3
  sustainable_classification n_funds assets_usd
  <chr>                        <int>      <dbl>
1 <NA>                           384    2.55e12
2 Uplift                          30    5.14e10
3 Thematic                         6    3.43e 9
4 Screened                         3    3.83e 8
5 Impact                           1    3.35e 8
 #
blackrock_etf_screener_w_new_features <- blackrock_etf_screener |> 
  mutate(
    # if the sustainable_classification column is NA, then the fund is not an ESG fund.
    standard_or_esg = if_else(
      condition = is.na(sustainable_classification),
      true = "Standard",
      false = "ESG"
    ),
    # lubridate::year() extracts the year from a date
    inception_year = lubridate::year(incept_date),
    
    # Change to a meaningful magnitude for the data. In asset management, billions is a good default. 
    net_assets_bn_usd = net_assets_usd/10^9,
    
    # let's put our new variables at the front so we can see them easily.
    .before = everything()
  )

blackrock_etf_screener_w_new_features
# A tibble: 424 × 21
   standard_or_esg inception_year net_assets_bn_usd ticker name                 
   <chr>                    <dbl>             <dbl> <chr>  <chr>                
 1 Standard                  2000             399.  IVV    iShares Core S&P 500…
 2 Standard                  2012             107.  IEFA   iShares Core MSCI EA…
 3 Standard                  2003             101.  AGG    iShares Core U.S. Ag…
 4 Standard                  2000              82.1 IWF    iShares Russell 1000…
 5 Standard                  2000              78.2 IJR    iShares Core S&P Sma…
 6 Standard                  2000              77.1 IJH    iShares Core S&P Mid…
 7 Standard                  2012              73.9 IEMG   iShares Core MSCI Em…
 8 Standard                  2000              67.7 IWM    iShares Russell 2000…
 9 Standard                  2000              55.4 IWD    iShares Russell 1000…
10 Standard                  2002              52.2 TLT    iShares 20+ Year Tre…
# ℹ 414 more rows
# ℹ 16 more variables: incept_date <dttm>, gross_expense_ratio_percent <dbl>,
#   net_expense_ratio_percent <dbl>, net_assets_usd <dbl>,
#   net_assets_as_of <dttm>, asset_class <chr>, sub_asset_class <chr>,
#   region <chr>, market <chr>, location <chr>, investment_style <chr>,
#   msci_esg_fund_rating_aaa_ccc <chr>, msci_esg_quality_score_0_10 <dbl>,
#   msci_weighted_average_carbon_intensity_tons_co2e_m_sales <dbl>, …
blackrock_etf_screener_w_new_features |> 
  summarize(total_assets_bn = sum(net_assets_bn_usd, na.rm = TRUE))
# A tibble: 1 × 1
  total_assets_bn
            <dbl>
1           2603.
# 
assets_esg_vs_standard <- blackrock_etf_screener_w_new_features |> 
  group_by(standard_or_esg) |> 
  summarize(
    assets_bn = sum(net_assets_bn_usd, na.rm = TRUE),
    n_funds = n()
  )

assets_esg_vs_standard
# A tibble: 2 × 3
  standard_or_esg assets_bn n_funds
  <chr>               <dbl>   <int>
1 ESG                  55.6      40
2 Standard           2547.      384
assets_esg_vs_standard |> 
  ggplot(aes(x = assets_bn, y = standard_or_esg)) +
  geom_bar(stat = "identity")

assets_esg_vs_standard |> 
  mutate(assets_pct = assets_bn/sum(assets_bn) * 100,
         .after = standard_or_esg)
# A tibble: 2 × 4
  standard_or_esg assets_pct assets_bn n_funds
  <chr>                <dbl>     <dbl>   <int>
1 ESG                   2.14      55.6      40
2 Standard             97.9     2547.      384
assets_esg_vs_standard |> 
  ggplot(aes(x = assets_bn, y = standard_or_esg)) +
  geom_bar(stat = "identity", fill = "grey40", alpha = .7) +
  
  # make it prettier
  # scales::label_* funcitons provide useful options. Check them out!
  scale_x_continuous(labels = scales::label_dollar(scale = 1/10^3, suffix = " tn")) +
  labs(title = "Total Assets: iShares ESG vs. Standard Funds",
       subtitle = "ESG comprise a small percentage of total assets.",
       x = "Total Assets (USD)",
       y = "Fund Type",
       # you can use \n to create a line break
       caption = "I made this.\nData as of xyz") +
  theme_minimal()

etf_formation_esg_vs_standard <- blackrock_etf_screener_w_new_features |> 
  group_by(standard_or_esg, inception_year) |> 
  count() |> 
  ungroup()

etf_formation_esg_vs_standard
# A tibble: 37 × 3
   standard_or_esg inception_year     n
   <chr>                    <dbl> <int>
 1 ESG                       2005     1
 2 ESG                       2006     1
 3 ESG                       2008     1
 4 ESG                       2014     1
 5 ESG                       2016     5
 6 ESG                       2017     2
 7 ESG                       2018     3
 8 ESG                       2019     1
 9 ESG                       2020    12
10 ESG                       2021     5
# ℹ 27 more rows
etf_formation_esg_vs_standard |> 
  ggplot(aes(x = inception_year, y = n, fill = standard_or_esg)) +
  geom_bar(stat = "identity", position = "stack")

etf_formation_esg_vs_standard |> 
  ggplot(aes(x = inception_year, y = n, color = standard_or_esg)) +
  geom_line()

etf_formation_esg_vs_standard_cumulative <- etf_formation_esg_vs_standard |> 
  arrange(standard_or_esg, inception_year) |> 
  group_by(standard_or_esg) |> 
  mutate(cumulative_n = cumsum(n)) |> 
  ungroup()

etf_formation_esg_vs_standard_cumulative 
# A tibble: 37 × 4
   standard_or_esg inception_year     n cumulative_n
   <chr>                    <dbl> <int>        <int>
 1 ESG                       2005     1            1
 2 ESG                       2006     1            2
 3 ESG                       2008     1            3
 4 ESG                       2014     1            4
 5 ESG                       2016     5            9
 6 ESG                       2017     2           11
 7 ESG                       2018     3           14
 8 ESG                       2019     1           15
 9 ESG                       2020    12           27
10 ESG                       2021     5           32
# ℹ 27 more rows
etf_formation_esg_vs_standard_cumulative |> 
  ggplot(aes(x = inception_year, y = cumulative_n, color = standard_or_esg)) +
  geom_line()

blackrock_etf_screener_w_new_features |> 
  summarize(
    max_size = max(net_assets_bn_usd, na.rm = TRUE),
    min_size = min(net_assets_bn_usd, na.rm = TRUE),
    avg_size = mean(net_assets_bn_usd, na.rm = TRUE),
    sd_size = sd(net_assets_bn_usd, na.rm = TRUE),
    median_size = median(net_assets_bn_usd, na.rm = TRUE),
    # same as median, calculated a different way for illustration
    percentile_50 = quantile(net_assets_bn_usd, .5, na.rm = TRUE),
    percentile_75 = quantile(net_assets_bn_usd, .75, na.rm = TRUE),
  )
# A tibble: 1 × 7
  max_size min_size avg_size sd_size median_size percentile_50 percentile_75
     <dbl>    <dbl>    <dbl>   <dbl>       <dbl>         <dbl>         <dbl>
1     399. 0.000457     6.14    23.2       0.606         0.606          3.57
blackrock_etf_screener_w_new_features |> 
  group_by(standard_or_esg) |> 
  summarize(
    max_size = max(net_assets_bn_usd, na.rm = TRUE),
    min_size = min(net_assets_bn_usd, na.rm = TRUE),
    avg_size = mean(net_assets_bn_usd, na.rm = TRUE),
    sd_size = sd(net_assets_bn_usd, na.rm = TRUE),
    median_size = median(net_assets_bn_usd, na.rm = TRUE),
    # same as median, calculated a different way for illustration
    percentile_50 = quantile(net_assets_bn_usd, .5, na.rm = TRUE),
    percentile_75 = quantile(net_assets_bn_usd, .75, na.rm = TRUE),
  )
# A tibble: 2 × 8
  standard_or_esg max_size min_size avg_size sd_size median_size percentile_50
  <chr>              <dbl>    <dbl>    <dbl>   <dbl>       <dbl>         <dbl>
1 ESG                 13.4 0.00371      1.39    2.59       0.343         0.343
2 Standard           399.  0.000457     6.63   24.4        0.629         0.629
# ℹ 1 more variable: percentile_75 <dbl>
blackrock_etf_screener_w_new_features |> 
  group_by(standard_or_esg, asset_class) |> 
  summarize(
    max_size = max(net_assets_bn_usd, na.rm = TRUE),
    min_size = min(net_assets_bn_usd, na.rm = TRUE),
    avg_size = mean(net_assets_bn_usd, na.rm = TRUE),
    sd_size = sd(net_assets_bn_usd, na.rm = TRUE),
    median_size = median(net_assets_bn_usd, na.rm = TRUE),
    # same as median, calculated a different way for illustration
    percentile_50 = quantile(net_assets_bn_usd, .5, na.rm = TRUE),
    percentile_75 = quantile(net_assets_bn_usd, .75, na.rm = TRUE),
  )
`summarise()` has grouped output by 'standard_or_esg'. You can override using
the `.groups` argument.
# A tibble: 9 × 9
# Groups:   standard_or_esg [2]
  standard_or_esg asset_class   max_size min_size avg_size  sd_size median_size
  <chr>           <chr>            <dbl>    <dbl>    <dbl>    <dbl>       <dbl>
1 ESG             Equity        13.4     0.00371   1.74     2.97        0.501  
2 ESG             Fixed Income   3.56    0.0125    0.954    1.22        0.586  
3 ESG             Multi Asset    0.0257  0.00527   0.0144   0.00966     0.0132 
4 ESG             Real Estate    0.00939 0.00939   0.00939 NA           0.00939
5 Standard        Commodity     26.5     0.0130    4.48     8.92        0.659  
6 Standard        Equity       399.      0.000457  8.02    29.9         0.817  
7 Standard        Fixed Income 101.      0.00247   5.22    12.6         0.423  
8 Standard        Multi Asset    2.25    0.00271   0.435    0.782       0.00383
9 Standard        Real Estate    4.60    0.0403    1.76     1.67        1.41   
# ℹ 2 more variables: percentile_50 <dbl>, percentile_75 <dbl>
blackrock_etf_screener_w_new_features |> 
  filter(standard_or_esg == "ESG") |> 
  select(ticker, name, net_assets_bn_usd) |> 
  mutate(
    fund_rank = rank(net_assets_bn_usd),
    fund_percentile = percent_rank(net_assets_bn_usd),
    fund_decile = ntile(net_assets_bn_usd, n = 10),
    
    pct_of_total = net_assets_bn_usd/sum(net_assets_bn_usd, na.rm = TRUE) * 100,
    pct_of_total_cumulative = cumsum(pct_of_total)
    
  )
# A tibble: 40 × 8
   ticker name           net_assets_bn_usd fund_rank fund_percentile fund_decile
   <chr>  <chr>                      <dbl>     <dbl>           <dbl>       <int>
 1 ESGU   iShares ESG A…             13.4         40           1              10
 2 ESGD   iShares ESG A…              7.64        39           0.974          10
 3 SUSA   iShares MSCI …              5.37        38           0.949          10
 4 DSI    iShares MSCI …              4.08        37           0.923          10
 5 ESGE   iShares ESG A…              4.02        36           0.897           9
 6 EAGG   iShares ESG A…              3.56        35           0.872           9
 7 ICLN   iShares Globa…              3.06        34           0.846           9
 8 USCL   iShares Clima…              2.09        33           0.821           9
 9 PABU   iShares Paris…              1.54        32           0.795           8
10 LCTU   BlackRock U.S…              1.51        31           0.769           8
# ℹ 30 more rows
# ℹ 2 more variables: pct_of_total <dbl>, pct_of_total_cumulative <dbl>
tibble(value = 1:10) |> 
  mutate(rank = rank(value),
         percentile = percent_rank(value),
         decile = ntile(value, n = 10))
# A tibble: 10 × 4
   value  rank percentile decile
   <int> <dbl>      <dbl>  <int>
 1     1     1      0          1
 2     2     2      0.111      2
 3     3     3      0.222      3
 4     4     4      0.333      4
 5     5     5      0.444      5
 6     6     6      0.556      6
 7     7     7      0.667      7
 8     8     8      0.778      8
 9     9     9      0.889      9
10    10    10      1         10
blackrock_etf_screener_w_new_features |> 
  filter(standard_or_esg == "ESG") |> 
  select(ticker, name, net_assets_bn_usd) |> 
  mutate(
    fund_rank = rank(net_assets_bn_usd * -1),
    fund_percentile = percent_rank(net_assets_bn_usd * -1),
    fund_decile = ntile(net_assets_bn_usd * -1, n = 10),
    
    pct_of_total = net_assets_bn_usd/sum(net_assets_bn_usd, na.rm = TRUE) * 100,
    pct_of_total_cumulative = cumsum(pct_of_total)
  )
# A tibble: 40 × 8
   ticker name           net_assets_bn_usd fund_rank fund_percentile fund_decile
   <chr>  <chr>                      <dbl>     <dbl>           <dbl>       <int>
 1 ESGU   iShares ESG A…             13.4          1          0                1
 2 ESGD   iShares ESG A…              7.64         2          0.0256           1
 3 SUSA   iShares MSCI …              5.37         3          0.0513           1
 4 DSI    iShares MSCI …              4.08         4          0.0769           1
 5 ESGE   iShares ESG A…              4.02         5          0.103            2
 6 EAGG   iShares ESG A…              3.56         6          0.128            2
 7 ICLN   iShares Globa…              3.06         7          0.154            2
 8 USCL   iShares Clima…              2.09         8          0.179            2
 9 PABU   iShares Paris…              1.54         9          0.205            3
10 LCTU   BlackRock U.S…              1.51        10          0.231            3
# ℹ 30 more rows
# ℹ 2 more variables: pct_of_total <dbl>, pct_of_total_cumulative <dbl>
###### 2.5.0.1 Homework Problem 1
 # Filter for ESG funds only
blackrock_etf_screener_w_new_features |> 
  filter(standard_or_esg == "ESG")
# A tibble: 40 × 21
   standard_or_esg inception_year net_assets_bn_usd ticker name                 
   <chr>                    <dbl>             <dbl> <chr>  <chr>                
 1 ESG                       2016             13.4  ESGU   iShares ESG Aware MS…
 2 ESG                       2016              7.64 ESGD   iShares ESG Aware MS…
 3 ESG                       2005              5.37 SUSA   iShares MSCI USA ESG…
 4 ESG                       2006              4.08 DSI    iShares MSCI KLD 400…
 5 ESG                       2016              4.02 ESGE   iShares ESG Aware MS…
 6 ESG                       2018              3.56 EAGG   iShares ESG Aware U.…
 7 ESG                       2008              3.06 ICLN   iShares Global Clean…
 8 ESG                       2023              2.09 USCL   iShares Climate Cons…
 9 ESG                       2022              1.54 PABU   iShares Paris-Aligne…
10 ESG                       2021              1.51 LCTU   BlackRock U.S. Carbo…
# ℹ 30 more rows
# ℹ 16 more variables: incept_date <dttm>, gross_expense_ratio_percent <dbl>,
#   net_expense_ratio_percent <dbl>, net_assets_usd <dbl>,
#   net_assets_as_of <dttm>, asset_class <chr>, sub_asset_class <chr>,
#   region <chr>, market <chr>, location <chr>, investment_style <chr>,
#   msci_esg_fund_rating_aaa_ccc <chr>, msci_esg_quality_score_0_10 <dbl>,
#   msci_weighted_average_carbon_intensity_tons_co2e_m_sales <dbl>, …
 # contains the largest fund from each sustainable classification.
largest_funds_by_classification <- blackrock_etf_screener_w_new_features |> 
  group_by(sustainable_classification) |>  
  summarise(
    Largest_Fund = name[which.max(net_assets_usd)],
    Max_Assets = max(net_assets_usd),
    Total_Assets = sum(net_assets_usd, na.rm = TRUE),
    .groups = 'drop'
  ) |>
  mutate(
    Percent_Of_Category_Assets = (Max_Assets / Total_Assets) * 100
  )
 # shows the percent of assets that the top fund comprises for its category
 # Ranks each sustainable classification by the % of assets in its largest fund, and arranges them in descending order.
ranked_classifications <- largest_funds_by_classification  |>
  mutate(
    Rank = rank(-Percent_Of_Category_Assets)
  )  |>
  arrange(desc(Percent_Of_Category_Assets))
###### 2.5.0.2 Homework Problem 2
# Filter for the 10 smallest ETFs by net assets
smallest_etfs <- blackrock_etf_screener_w_new_features |>
  arrange(net_assets_usd) |>
  slice_head(n = 10)
# Create a sorted bar chart
smallest_etfs_chart <- smallest_etfs %>%
  ggplot(aes(x = fct_reorder(ticker, net_assets_usd), y = net_assets_usd)) +
  geom_bar(stat = "identity") +
  coord_flip() +  # Flip the chart for better readability of the tickers
  labs(title = "10 Smallest ETFs by Net Assets",
       x = "Ticker",
       y = "Net Assets (USD)",
       caption = "Data sourced from BlackRock ETF Screener") +
  scale_y_continuous(labels = scales::label_dollar()) # Use dollar format for y-axis
smallest_etfs_chart

###### 2.5.0.3 Homework Problem 3
bottom_quintile_threshold <- quantile(blackrock_etf_screener_w_new_features$msci_esg_quality_score_0_10, 0.2, na.rm = TRUE)
bottom_quintile_funds <- blackrock_etf_screener_w_new_features |>
  filter(msci_esg_quality_score_0_10 <= bottom_quintile_threshold)
bottom_quintile_summary <- bottom_quintile_funds  |>
  group_by(asset_class, sub_asset_class)  |>
  summarise(
    Number_of_Funds = n(),
    Total_Assets = sum(net_assets_usd, na.rm = TRUE),
    .groups = 'drop'
  )
print(bottom_quintile_summary)
# A tibble: 9 × 4
  asset_class  sub_asset_class        Number_of_Funds  Total_Assets
  <chr>        <chr>                            <int>         <dbl>
1 Equity       All Cap                             23  95486052602.
2 Equity       Large Cap                            1   1852506812.
3 Equity       Large/Mid Cap                       10  42421984678.
4 Equity       Small Cap                           17 187292555389.
5 Fixed Income Corporates                           1     41339182.
6 Fixed Income Credit                               1    408038372.
7 Fixed Income Government                           3  17560711395.
8 Fixed Income High Yield                          16  38239624578.
9 Real Estate  Real Estate Securities               4   7145439523.
###### 2.5.0.4 Homework Problem 4
library(ggplot2)
 # Distribution of MSCI ESG Quality Scores
ggplot(blackrock_etf_screener_w_new_features, aes(x = msci_esg_quality_score_0_10)) +
  geom_histogram(binwidth = 0.1, fill = "skyblue", color = "black") +
  labs(title = "Distribution of MSCI ESG Quality Scores",
       x = "ESG Quality Score",
       y = "Number of Funds") +
  theme_minimal()
Warning: Removed 44 rows containing non-finite values (`stat_bin()`).

# Pie Chart of Total Assets by Asset Class for Bottom Quintile
library(ggplot2)
bottom_quintile_summary |>
  ggplot(aes(x = "", y = Total_Assets, fill = asset_class)) +
  geom_bar(width = 1, stat = "identity") +
  coord_polar("y") +
  labs(title = "Total Assets by Asset Class for Bottom Quintile Funds")

# Bar Chart of Funds by Asset Class in Bottom Quintile
library(ggplot2)
ggplot(bottom_quintile_summary, aes(x = asset_class, y = Number_of_Funds, fill = asset_class)) +
  geom_bar(stat = "identity") +
  labs(title = "Funds in Bottom Quintile by Asset Class", x = "Asset Class", y = "Number of Funds") +
  theme_minimal()

# Line Graph Showing Cumulative Number of New ESG Funds Over Time
ggplot(etf_formation_esg_vs_standard_cumulative, aes(x = inception_year, y = cumulative_n, color = standard_or_esg)) +
  geom_line() +
  labs(title = "Cumulative Number of New ESG Funds Over Time", x = "Year", y = "Cumulative Number of Funds")