here("00_data", "day_1", "etf_screener_processed.csv")
## [1] "/cloud/project/00_data/day_1/etf_screener_processed.csv"
etf_screener <- here("00_data", "day_1", "etf_screener_processed.csv") %>%
read_csv()
## Rows: 388 Columns: 22
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (12): ticker, name, asset_class, sub_asset_class, region, market, locat...
## dbl (8): gross_expense_ratio_percent, net_expense_ratio_percent, net_asset...
## date (2): incept_date, net_assets_as_of
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
etf_screener
## # A tibble: 388 × 22
## ticker name incept_date gross_expense_r… net_expense_rat… net_assets_usd_…
## <chr> <chr> <date> <dbl> <dbl> <dbl>
## 1 IVV iShare… 2000-05-15 0.03 0.03 306960.
## 2 IEFA iShare… 2012-10-18 0.07 0.07 101197.
## 3 AGG iShare… 2003-09-22 0.05 0.04 90788.
## 4 IEMG iShare… 2012-10-18 0.11 0.11 76712.
## 5 IJR iShare… 2000-05-22 0.06 0.06 69622.
## 6 IWF iShare… 2000-05-22 0.19 0.19 68177.
## 7 IJH iShare… 2000-05-22 0.05 0.05 62850.
## 8 IWM iShare… 2000-05-22 0.19 0.19 58692.
## 9 IWD iShare… 2000-05-22 0.19 0.19 56864.
## 10 EFA iShare… 2001-08-14 0.32 0.32 55356.
## # … with 378 more rows, and 16 more variables: net_assets_as_of <date>,
## # asset_class <chr>, sub_asset_class <chr>, region <chr>, market <chr>,
## # location <chr>, investment_style <chr>,
## # sustainability_characteristics_msci_esg_fund_ratings_msci_esg_fund_rating_aaa_ccc <chr>,
## # msci_esg_quality_score_0_10 <dbl>,
## # msci_weighted_average_carbon_intensity_tons_co2e_m_sales <dbl>,
## # msci_esg_percent_coverage <dbl>, sustainable_classification <chr>, …
First, let’s take a look at the size of ESG ETF.
etf_screener %>%
# calculate the percentage of total AuM of a given fund
mutate(pct_of_total_aum = net_assets_usd_mn/sum(net_assets_usd_mn)) %>%
# group by ESG Fund/Regular Fund
group_by(is_esg) %>%
# calculate summary statistics on each group
summarize(percent_of_total_aum = sum(pct_of_total_aum) * 100,
# change magnitude from mn to bn
aum_bn = sum(net_assets_usd_mn)/10^3,
funds_n = n())
## # A tibble: 2 × 4
## is_esg percent_of_total_aum aum_bn funds_n
## <chr> <dbl> <dbl> <int>
## 1 ESG Fund 2.69 62.7 27
## 2 Regular Fund 97.3 2270. 361
The net asset use of the current ESG ETF is $62.66million,compared with regular fund of $2270million
```r
Prop <- c(62.66063, 2270.39234)
pie(Prop , labels = c('ESG Fund\n$63 million','Regular Fund\n$2270 million'))
Up to 2022, there are 27 ESG ETFs around the world.
Prop <- c(27, 361)
pie(Prop , labels = c('ESG Fund\n27','Regular Fund\n361'))
They take up 2.69% market share among the whole ETF market.
Prop <- c(2.685778, 97.314222)
pie(Prop , labels = c('ESG Fund\n2.69%','Regular Fund\n97.31%'))
Among all the ESG ETFs, broad ESG is the most popular type.
aum_bn = sum(etf_screener$net_assets_usd_mn)/10^3
#onlyetf<-etf_screener[which(sustainable_classification!=NA)]
test<-etf_screener
test<-na.omit(test)
test_aumbn= sum(test$net_assets_usd_mn)/10^3
aumbn_etf=test_aumbn
test %>%
ggplot(aes(x = sustainable_classification, y = aumbn_etf)) +
# for geom_bar, if you want it to use the y value you provide, use `stat = "identity"`
geom_bar(stat = "identity")
ESG ETFs are located in different places.
test %>%
group_by(location) %>%
summarize(location_n = n())
## # A tibble: 2 × 2
## location location_n
## <chr> <int>
## 1 Broad 8
## 2 United States 19
There are 8 ESG ETFs abroad, while 19 ETFs are located in US.
Prop1<- c(8, 19)
pie(Prop1 , labels = c('Broad\n8','United States\n19'))
Among 27 ESG ETFs, 24 of them target on developed countries, while only 3 of them focus on emerging countries.
test %>%
group_by(market) %>%
summarize(Market_n = n())
## # A tibble: 2 × 2
## market Market_n
## <chr> <int>
## 1 Developed 24
## 2 Emerging 3
Prop1<- c(24, 3)
pie(Prop1 , labels = c('Developed\n24','Emerging\n3'))
Most of them focus on equity investment, while some of them focus on fixed income and multi asset investment.
test %>%
ggplot(aes(x = asset_class, y = aum_bn)) +
# for geom_bar, if you want it to use the y value you provide, use `stat = "identity"`
geom_bar(stat = "identity")
This market is growing very quickly.
etf_launches_by_year <- etf_screener %>%
group_by(is_esg, year_launched) %>%
count() %>%
ungroup() %>%
group_by(is_esg) %>%
mutate(cumulative_funds = cumsum(n)) %>%
ungroup()
etf_launches_by_year
## # A tibble: 32 × 4
## is_esg year_launched n cumulative_funds
## <chr> <dbl> <int> <int>
## 1 ESG Fund 2005 1 1
## 2 ESG Fund 2006 1 2
## 3 ESG Fund 2008 1 3
## 4 ESG Fund 2014 1 4
## 5 ESG Fund 2016 4 8
## 6 ESG Fund 2018 2 10
## 7 ESG Fund 2019 1 11
## 8 ESG Fund 2020 12 23
## 9 ESG Fund 2021 4 27
## 10 Regular Fund 1996 17 17
## # … with 22 more rows
etf_launches_by_year %>%
# fct_reorder2() makes the legend correspond with the order of the colors in the chart
ggplot(aes(x = year_launched, y = n,
fill = fct_reorder2(.f = is_esg, .x = year_launched,
.y = n))) +
geom_bar(stat = "identity") +
labs(fill = "")
As you can see from this chart, ESG funds has emerged since 2005, then it experienced a sharp increase in the past 3 years.
etf_launches_by_year %>%
# fct_reorder2() makes the legend correspond with the order of the colors in the chart
ggplot(aes(x = year_launched, y = cumulative_funds,
color = fct_reorder2(.f = is_esg, .x = year_launched,
.y = cumulative_funds))) +
geom_line() +
labs(color = "")
Therefore, ESG ETF is a market with great potential.