ESG ETF Cross Comparisons US-Japan

Author

Okung Obang

ESG ETF Cross Comparison Visualizations

Code
install.packages('plotly', repos = "http://cran.us.r-project.org")
Installing package into 'C:/Users/okung/AppData/Local/R/win-library/4.2'
(as 'lib' is unspecified)
package 'plotly' successfully unpacked and MD5 sums checked

The downloaded binary packages are in
    C:\Users\okung\AppData\Local\Temp\RtmpCuuDDX\downloaded_packages
Code
# Load necessary library
library(tidyverse) 
Warning: package 'tidyverse' was built under R version 4.2.2
── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
✔ ggplot2 3.4.0     ✔ purrr   1.0.1
✔ tibble  3.1.8     ✔ dplyr   1.1.0
✔ tidyr   1.3.0     ✔ stringr 1.5.0
✔ readr   2.1.2     ✔ forcats 0.5.2
Warning: package 'ggplot2' was built under R version 4.2.2
Warning: package 'tidyr' was built under R version 4.2.2
Warning: package 'purrr' was built under R version 4.2.2
Warning: package 'dplyr' was built under R version 4.2.2
Warning: package 'stringr' was built under R version 4.2.2
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
Code
library(janitor)
Warning: package 'janitor' was built under R version 4.2.2

Attaching package: 'janitor'

The following objects are masked from 'package:stats':

    chisq.test, fisher.test
Code
library(here)
Warning: package 'here' was built under R version 4.2.3
here() starts at C:/Users/okung/OneDrive/Documents
Code
library(readxl)
Warning: package 'readxl' was built under R version 4.2.2
Code
library(plotly)
Warning: package 'plotly' was built under R version 4.2.3

Attaching package: 'plotly'

The following object is masked from 'package:ggplot2':

    last_plot

The following object is masked from 'package:stats':

    filter

The following object is masked from 'package:graphics':

    layout
Code
# Open processed data

etf <- read.csv("C:/Users/okung/OneDrive/Johns Hopkins - SAIS/2022-2023/SP23/Sustainable Finance/Week 11/etfcrosscomparison.csv")
Code
glimpse(etf)
Rows: 23,474
Columns: 57
$ X                      <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, …
$ Name                   <chr> "Median", "Median", "Median", "Median", "Median…
$ Ticker                 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "SP…
$ allocation             <chr> "Materials", "Comm", "Cons Cyclical", "Cons Non…
$ percentage             <dbl> 0.0468, 0.0778, 0.1040, 0.1905, 0.0023, 0.0509,…
$ Class.Assets..MLN.USD. <dbl> 113.89, 113.89, 113.89, 113.89, 113.89, 113.89,…
$ Fund.Assets..MLN.USD.  <dbl> 112.81, 112.81, 112.81, 112.81, 112.81, 112.81,…
$ Holdings               <int> 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 505…
$ Primary                <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Y"…
$ Cross                  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "N"…
$ Expense.Ratio          <dbl> 5e-03, 5e-03, 5e-03, 5e-03, 5e-03, 5e-03, 5e-03…
$ Fund.Mgr.Stated.Fee    <dbl> 0.0049, 0.0049, 0.0049, 0.0049, 0.0049, 0.0049,…
$ Avg.Bid.Ask.Spread     <dbl> 0.0023, 0.0023, 0.0023, 0.0023, 0.0023, 0.0023,…
$ X1.Yr.NAV.Trk.Error    <dbl> 0.005821, 0.005821, 0.005821, 0.005821, 0.00582…
$ Premium                <dbl> 0e+00, 0e+00, 0e+00, 0e+00, 0e+00, 0e+00, 0e+00…
$ X52W.Avg.Prem          <dbl> 0e+00, 0e+00, 0e+00, 0e+00, 0e+00, 0e+00, 0e+00…
$ X1D.Flow               <dbl> 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00,…
$ X1D.Flow..M.USD.       <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 196…
$ X1W.Flow               <dbl> 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00,…
$ X1W.Flow..M.USD.       <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 129…
$ X1M.Flow               <dbl> 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00,…
$ X1M.Flow..M.USD.       <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 757…
$ YTD.Flow               <dbl> 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00,…
$ YTD.Flow..M.USD.       <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, -73…
$ X1Y.Flow               <dbl> 1.52, 1.52, 1.52, 1.52, 1.52, 1.52, 1.52, 1.52,…
$ X1Y.Flow..M.USD.       <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, -22…
$ X3Y.Flow               <dbl> 28.15, 28.15, 28.15, 28.15, 28.15, 28.15, 28.15…
$ X3Y.Flow..M.USD.       <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 369…
$ Fund.Type              <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "ET…
$ Structure              <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Un…
$ Index.Weight           <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "Ma…
$ SFDR.Class.            <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "--…
$ Use.Derivative         <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "N"…
$ Tax.Form               <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "10…
$ UCITS                  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "N"…
$ UK.Reporting           <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "--…
$ SFC                    <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "N"…
$ China                  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "N"…
$ Leverage               <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "N"…
$ Inception.Date         <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "19…
$ X1D.Return             <dbl> 0.0043, 0.0043, 0.0043, 0.0043, 0.0043, 0.0043,…
$ MTD.Return             <dbl> 0.0014, 0.0014, 0.0014, 0.0014, 0.0014, 0.0014,…
$ YTD.Return             <dbl> 0.0545, 0.0545, 0.0545, 0.0545, 0.0545, 0.0545,…
$ X1.Yr.Return           <dbl> -0.0420, -0.0420, -0.0420, -0.0420, -0.0420, -0…
$ X3.Yr.Return           <dbl> 0.1393, 0.1393, 0.1393, 0.1393, 0.1393, 0.1393,…
$ X5.Yr.Return           <dbl> 0.0607, 0.0607, 0.0607, 0.0607, 0.0607, 0.0607,…
$ X10.Yr.Return          <dbl> 0.0859, 0.0859, 0.0859, 0.0859, 0.0859, 0.0859,…
$ X12M.Yld               <dbl> 0.0168, 0.0168, 0.0168, 0.0168, 0.0168, 0.0168,…
$ X1D.Vol                <int> 8750, 8750, 8750, 8750, 8750, 8750, 8750, 8750,…
$ X30D.Vol               <int> 15302, 15302, 15302, 15302, 15302, 15302, 15302…
$ Implied.Liquidity      <dbl> 4831274, 4831274, 4831274, 4831274, 4831274, 48…
$ Bid.Ask.Spread         <dbl> 0.07, 0.07, 0.07, 0.07, 0.07, 0.07, 0.07, 0.07,…
$ Short.Interest.        <dbl> 0.36, 0.36, 0.36, 0.36, 0.36, 0.36, 0.36, 0.36,…
$ Open.Interest          <int> 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 204…
$ location               <chr> "Japan", "Japan", "Japan", "Japan", "Japan", "J…
$ ESG                    <chr> "ESG", "ESG", "ESG", "ESG", "ESG", "ESG", "ESG"…
$ inception_year         <int> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 199…

Holdings (Japan vs US)

Code
# Look at holdings per market and ESG
hold_etf <- etf |>
  group_by(location, ESG) |>
  summarize(avg_hold = mean(Holdings, na.rm = TRUE)) |>
  ungroup()
`summarise()` has grouped output by 'location'. You can override using the
`.groups` argument.
Code
hold_etf
# A tibble: 4 × 3
  location      ESG     avg_hold
  <chr>         <chr>      <dbl>
1 Japan         ESG         283.
2 Japan         non-ESG     413.
3 United States ESG         311.
4 United States non-ESG     285.
Code
ggplot(hold_etf, aes(x = location, y = avg_hold, fill = ESG)) +
  geom_bar(stat = "identity", position = "dodge") +
  labs(title = "Holdings per Market and ESG",
       x = "Location",
       y = "Average Holdings",
       fill = "ESG") +
  theme_minimal()

Average and Total Assets (Japan vs. US)

Code
# Let's compare total asset size and average asset of the non-ESG and ESG ETFs in each market
asset_etf <- etf |>
  select(location, ESG, Class.Assets..MLN.USD., Fund.Assets..MLN.USD., inception_year) |>
  group_by(location, ESG)|>
  summarize(count = n(),
            tot_assets = sum(Fund.Assets..MLN.USD., na.rm = TRUE),
            avg_assets = mean(Fund.Assets..MLN.USD., na.rm = TRUE)) |>
  ungroup()
`summarise()` has grouped output by 'location'. You can override using the
`.groups` argument.
Code
asset_etf
# A tibble: 4 × 5
  location      ESG     count tot_assets avg_assets
  <chr>         <chr>   <int>      <dbl>      <dbl>
1 Japan         ESG       407    272126.       669.
2 Japan         non-ESG  1496   4657117.      3207.
3 United States ESG      2068    949032.       459.
4 United States non-ESG 19503  56719124.      2910.

Number of ETFs introduced each year (Japan vs US)

Code
year_etf <- etf |>
  group_by(inception_year, location, ESG)|>
  summarize(count=n())|>
  ungroup()
`summarise()` has grouped output by 'inception_year', 'location'. You can
override using the `.groups` argument.
Code
year_etf
# A tibble: 75 × 4
   inception_year location      ESG     count
            <int> <chr>         <chr>   <int>
 1           1993 United States non-ESG    11
 2           1995 Japan         non-ESG    11
 3           1995 United States non-ESG    11
 4           1996 United States non-ESG   187
 5           1998 United States non-ESG   121
 6           1999 United States non-ESG    11
 7           2000 United States non-ESG   484
 8           2001 Japan         non-ESG    66
 9           2001 United States ESG        11
10           2001 United States non-ESG   198
# … with 65 more rows
Code
year_etf |>
  ggplot(mapping = aes(x=inception_year, y = count, fill = ESG)) + geom_col() + facet_wrap(~location)
Warning: Removed 1 rows containing missing values (`position_stack()`).

Performance (1,3,5,& 10 year returns)

Code
# Let's compare average returns over 1yr, 3yrs, 5yrs, and 10yrs
returns_etf <-etf |>
  group_by(location, ESG) |>
  summarize(yr_avg_return = mean(X1.Yr.Return, na.rm = TRUE),
            three_yr_avg_return = mean(X3.Yr.Return, na.rm = TRUE),
            five_yr_avg_return = mean(X5.Yr.Return, na.rm = TRUE),
            ten_yr_avg_return = mean(X10.Yr.Return, na.rm = TRUE)
            ) |>
  ungroup()
`summarise()` has grouped output by 'location'. You can override using the
`.groups` argument.
Code
returns_etf
# A tibble: 4 × 6
  location      ESG     yr_avg_return three_yr_avg_return five_yr_avg_…¹ ten_y…²
  <chr>         <chr>           <dbl>               <dbl>          <dbl>   <dbl>
1 Japan         ESG            0.0746               0.152         0.0600  0.0859
2 Japan         non-ESG        0.0468               0.158         0.0552  0.0776
3 United States ESG           -0.0655               0.147         0.0825  0.114 
4 United States non-ESG       -0.0587               0.137         0.0519  0.0713
# … with abbreviated variable names ¹​five_yr_avg_return, ²​ten_yr_avg_return
Code
returns_etf_long <- returns_etf %>%
  gather(key = "return_period", value = "avg_return", -location, -ESG) %>%
  mutate(return_period = factor(return_period, levels = c("yr_avg_return", "three_yr_avg_return", "five_yr_avg_return", "ten_yr_avg_return"),
                               labels = c("1 Year", "3 Years", "5 Years", "10 Years")))

ggplot(returns_etf_long, aes(x = ESG, y = avg_return, fill = return_period)) +
  geom_bar(stat = "identity", position = "dodge", width = 0.8) +
  geom_text(aes(label = round(avg_return, 2)), position = position_dodge(width = 0.8), vjust = -0.5, size = 3) +
  facet_wrap(~ location, scales = "free_x") +
  scale_fill_manual(values = c("#E41A1C", "#377EB8", "#4DAF4A", "#984EA3")) +
  theme_minimal() +
  labs(x = "ESG", y = "Average Return") +
  ggtitle("Average Returns by ESG and Return Period")

Average and Total Flows (Japan & the US)

Code
# Let's compare the average flows and total flow into each market
flow_etf <-etf |>
  group_by(ESG) |>
  summarize(avg_yr_flow = mean(X1Y.Flow, na.rm = TRUE),
            avg_three_yr_flow = mean(X3Y.Flow, na.rm = TRUE),
            tot_yr_flow = sum(X1Y.Flow, na.rm = TRUE),
            tot_three_yr_flow = sum(X3Y.Flow, na.rm = TRUE),
            avg_open_int = mean(Open.Interest, na.rm = TRUE),
            ) |>
  ungroup()
flow_etf
# A tibble: 2 × 6
  ESG     avg_yr_flow avg_three_yr_flow tot_yr_flow tot_three_yr_flow avg_open…¹
  <chr>         <dbl>             <dbl>       <dbl>             <dbl>      <dbl>
1 ESG           -32.9              260.     -81436.           644042.      5990.
2 non-ESG       149.               667.    3119139.         14013056.     94655.
# … with abbreviated variable name ¹​avg_open_int
Code
flow_loc_etf <- etf |>
  group_by(location,ESG) |>
  summarize(avg_yr_flow = mean(X1Y.Flow, na.rm = TRUE),
            avg_three_yr_flow = mean(X3Y.Flow, na.rm = TRUE),
            tot_yr_flow = sum(X1Y.Flow, na.rm = TRUE),
            tot_three_yr_flow = sum(X3Y.Flow, na.rm = TRUE)) |>
  ungroup()
`summarise()` has grouped output by 'location'. You can override using the
`.groups` argument.
Code
flow_loc_etf
# A tibble: 4 × 6
  location      ESG     avg_yr_flow avg_three_yr_flow tot_yr_flow tot_three_yr…¹
  <chr>         <chr>         <dbl>             <dbl>       <dbl>          <dbl>
1 Japan         ESG            3.23              88.8       1316.         36159.
2 Japan         non-ESG       -9.66             406.      -14456.        607736.
3 United States ESG          -40.0              294.      -82753.        607883.
4 United States non-ESG      161.               687.     3133595.      13405320.
# … with abbreviated variable name ¹​tot_three_yr_flow
Code
ggplot(flow_loc_etf, aes(x = location, y = avg_yr_flow, fill = ESG)) +
  geom_bar(stat = "identity", position = "dodge", width = 0.8, color = "black") +
  geom_text(aes(label = round(avg_yr_flow, 2)), position = position_dodge(width = 0.8), vjust = -0.5, size = 3) +
  labs(x = "Location", y = "Average Yearly Flow") +
  ggtitle("Average Yearly Flow by Location and ESG") +
  facet_wrap(~ ESG, ncol = 2) +
  theme_minimal() +
  scale_fill_manual(values = c("orange", "blue"))

Code
ggplot(flow_loc_etf, aes(x = location, y = tot_yr_flow, fill = ESG)) +
  geom_bar(stat = "identity", position = "dodge", width = 0.8, color = "black") +
  geom_text(aes(label = round(tot_yr_flow, 2)), position = position_dodge(width = 0.8), vjust = -0.8, size = 3) +
  labs(x = "Location", y = "Total Yearly Flow") +
  ggtitle("Total Yearly Flow by Location and ESG") +
  facet_wrap(~ ESG, ncol = 2) +
  theme_minimal() +
  scale_fill_manual(values = c("orange", "blue"))

Expense Ratios (Japan vs. US)

Code
#Might see differences in expense ratios in each market?
expense_etf <- etf |>
  group_by(location, ESG) |>
  summarize(avg_exp= mean(Expense.Ratio, na.rm = TRUE)*100) |>
  ungroup()
`summarise()` has grouped output by 'location'. You can override using the
`.groups` argument.
Code
expense_etf
# A tibble: 4 × 3
  location      ESG     avg_exp
  <chr>         <chr>     <dbl>
1 Japan         ESG       0.238
2 Japan         non-ESG   0.370
3 United States ESG       0.435
4 United States non-ESG   0.528
Code
expense_etf |>
  ggplot(mapping = aes(x = ESG, y = avg_exp, fill = ESG)) + 
  geom_bar(stat = "identity") + facet_wrap(~location) + theme_minimal()

Code
# Premiums?
premium_etf <-etf |>
  group_by(location, ESG) |>
  summarize(avg_premium = mean(X52W.Avg.Prem, na.rm = TRUE)) |>
  ungroup()
`summarise()` has grouped output by 'location'. You can override using the
`.groups` argument.
Code
premium_etf
# A tibble: 4 × 3
  location      ESG     avg_premium
  <chr>         <chr>         <dbl>
1 Japan         ESG       -0.000127
2 Japan         non-ESG   -0.00169 
3 United States ESG        0.000528
4 United States non-ESG   -0.000135
Code
premium_etf |>
  ggplot(mapping = aes(x=ESG, y= avg_premium, fill = ESG)) +
  geom_bar(stat = "identity") +
  facet_wrap(~location) +
  theme_minimal()

Code
# How is the distribution of funds within ETFs in each market?

allo_etf <- etf |>
  group_by(location, ESG, allocation) |>
  summarize(avg_perc = mean(percentage, na.rm = TRUE),
            avg_fund = mean(Fund.Assets..MLN.USD., na.rm = TRUE)) |>
  ungroup() |>
  mutate(allocation_fund = avg_fund * (avg_perc))
`summarise()` has grouped output by 'location', 'ESG'. You can override using
the `.groups` argument.
Code
allo_etf
# A tibble: 44 × 6
   location ESG   allocation    avg_perc avg_fund allocation_fund
   <chr>    <chr> <chr>            <dbl>    <dbl>           <dbl>
 1 Japan    ESG   Comm            0.0878     669.          58.7  
 2 Japan    ESG   Cons Cyclical   0.203      669.         136.   
 3 Japan    ESG   Cons Non-Cycl   0.189      669.         126.   
 4 Japan    ESG   Divsf           0.0023     669.           1.54 
 5 Japan    ESG   Energy          0.0117     669.           7.85 
 6 Japan    ESG   Fin             0.120      669.          80.2  
 7 Japan    ESG   Govt            0.0004     669.           0.267
 8 Japan    ESG   Ind             0.235      669.         157.   
 9 Japan    ESG   Materials       0.0405     669.          27.1  
10 Japan    ESG   Tech            0.124      669.          82.8  
# … with 34 more rows
Code
allo_etf |>
  ggplot(mapping=aes(x= allocation, y=avg_perc, fill = ESG)) +
  geom_bar(position ="dodge", stat = "identity", width = 0.8) +
  facet_wrap(~location) +
  theme(axis.text.x= element_text(angle = 90, vjust = -0.5, hjust =1)) +
  geom_text(aes(label = round(avg_perc,2)), position = position_dodge(width=0.4), vjust = 2, hjust = 0.5, size = 2)

Code
# Create a bar plot of fund distribution by allocation, location, and type of fund
ggplot(allo_etf, aes(x = allocation, y = allocation_fund, fill = ESG)) +
  geom_bar(stat = "identity", position = "dodge") +
  facet_wrap(~location) +
  labs(x = "Allocation", y = "Average Fund Assets (Millions of USD)",
       fill = "Type of Fund") +
  ggtitle("Fund Distribution by Allocation, Location, and Type of Fund") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 90, vjust = 0.3, hjust = 1),
        axis.text.y = element_text(size = 8)) +
  geom_text(aes(label = scales::comma(round(allocation_fund)), y = allocation_fund),
            position = position_dodge(width = 0.9), vjust = -1.0, size = 2)

Code
# NAV Trk Error across location and ESG
nav_etf <- etf |>
  group_by(location, ESG) |>
  summarize(avg_nav = mean(X1.Yr.NAV.Trk.Error, na.rm = TRUE),
            avg_bid = mean(Avg.Bid.Ask.Spread, na.rm = TRUE))|>
  ungroup()
`summarise()` has grouped output by 'location'. You can override using the
`.groups` argument.
Code
nav_etf
# A tibble: 4 × 4
  location      ESG     avg_nav avg_bid
  <chr>         <chr>     <dbl>   <dbl>
1 Japan         ESG      0.0283 0.00382
2 Japan         non-ESG  0.0740 0.00382
3 United States ESG      0.0204 0.00674
4 United States non-ESG  0.0320 0.00687
Code
ggplot(nav_etf, aes(x = location, y = avg_nav, fill = ESG)) +
  geom_bar(stat = "identity", position = "dodge", width = 0.8, color = "black") +
  geom_text(aes(label = round(avg_nav, 2)), position = position_dodge(width = 0.8), vjust = -0.5, size = 3) +
  labs(x = "Location", y = "Average NAV") +
  ggtitle("Average NAV across Location and ESG") +
  facet_wrap(~ ESG, ncol = 2) +
  theme_minimal() +
  scale_fill_manual(values = c("orange", "blue"))

Code
ggplot(nav_etf, aes(x = location, y = avg_bid, fill = ESG)) +
  geom_bar(stat = "identity", position = "dodge", width = 0.8, color = "black") +
  geom_text(aes(label = round(avg_bid, 4)), position = position_dodge(width = 0.8), vjust = -0.5, size = 3) +
  labs(x = "Location", y = "Average BID") +
  ggtitle("Average BID across Location and ESG") +
  facet_wrap(~ ESG, ncol = 2) +
  theme_minimal() +
  scale_fill_manual(values = c("orange", "blue"))

Code
# Volatility of each?
vol_etf <-etf |>
  group_by(location, ESG) |>
  summarize(avg_30D_vol = mean(X30D.Vol, na.rm = TRUE),
            avg_imp_liq = mean(Implied.Liquidity, na.rm = TRUE)) |>
  ungroup()
`summarise()` has grouped output by 'location'. You can override using the
`.groups` argument.
Code
vol_etf
# A tibble: 4 × 4
  location      ESG     avg_30D_vol avg_imp_liq
  <chr>         <chr>         <dbl>       <dbl>
1 Japan         ESG           1696.   30913708.
2 Japan         non-ESG     124320.   81134419.
3 United States ESG          79537.   63980414.
4 United States non-ESG     594414.   27183970.
Code
# Look at the bid-ask-spread as it indicates risk of the funds in the market
bas_etf <- etf |>
  group_by(location,ESG) |>
  summarize(avg_avg_bas = mean(Avg.Bid.Ask.Spread, na.rm = TRUE)) |>
  ungroup()
`summarise()` has grouped output by 'location'. You can override using the
`.groups` argument.
Code
bas_etf
# A tibble: 4 × 3
  location      ESG     avg_avg_bas
  <chr>         <chr>         <dbl>
1 Japan         ESG         0.00382
2 Japan         non-ESG     0.00382
3 United States ESG         0.00674
4 United States non-ESG     0.00687
Code
vol_etf_plot <- ggplot(vol_etf, aes(x = ESG, y = avg_imp_liq)) +
  geom_bar(stat = "identity", fill = "steelblue") +
  facet_wrap(~ location) +
  labs(x = "Location", y = "Average Implied Liquidity",
       title = "Comparison of Average Implied Liquidity",
       subtitle = "Data Source: etf") +
  theme_minimal()

vol_etf_plot

Code
# Look at correlation between returns and costs? Volatility and premium? Holdings and returns? Allocations and returns?

cor_etf <- etf |>
  group_by(location,ESG) |>
  summarize(avg_yr_rtn = mean(X1.Yr.Return, na.rm = TRUE),
            avg_thryr_rtn = mean(X3.Yr.Return, na.rm = TRUE),
            avg_fivyr_rtn = mean(X5.Yr.Return, na.rm = TRUE),
            avg_tenyr_rtn = mean(X10.Yr.Return, na.rm = TRUE),
            avg_exp = mean(Expense.Ratio, na.rm =TRUE)) |>
  ungroup()
`summarise()` has grouped output by 'location'. You can override using the
`.groups` argument.
Code
cor_etf
# A tibble: 4 × 7
  location      ESG     avg_yr_rtn avg_thryr_rtn avg_fivyr_rtn avg_ten…¹ avg_exp
  <chr>         <chr>        <dbl>         <dbl>         <dbl>     <dbl>   <dbl>
1 Japan         ESG         0.0746         0.152        0.0600    0.0859 0.00238
2 Japan         non-ESG     0.0468         0.158        0.0552    0.0776 0.00370
3 United States ESG        -0.0655         0.147        0.0825    0.114  0.00435
4 United States non-ESG    -0.0587         0.137        0.0519    0.0713 0.00528
# … with abbreviated variable name ¹​avg_tenyr_rtn
Code
ggplot(cor_etf, aes(x = avg_exp, y = avg_yr_rtn, color = location, shape = ESG)) +
  geom_point(size = 3) +
  geom_text(aes(label = location), hjust = -0.2, vjust = -0.2, size = 3) +
  labs(x = "Average Expense Ratio", y = "Average 1-Year Return") +
  ggtitle("Average Year Return and Cost across Location and ESG") +
  scale_color_manual(values = c("blue", "red", "green")) +
  scale_shape_manual(values = c(19, 17)) +
  theme_minimal()

Code
library(ggplot2)


# Convert cor_etf data frame to a plotly object
cor_etf_plot <- ggplot(cor_etf, aes(x = avg_exp, y = avg_yr_rtn)) +
  geom_point(aes(text = paste("Location: ", location, "<br>",
                             "ESG: ", ESG, "<br>",
                             "Average 1 Yr Return: ", avg_yr_rtn, "<br>",
                             "Average Expense Ratio: ", avg_exp, "<br>"))) +
  labs(x = "Average Expense Ratio", y = "Average 1 Year Return",
       title = "Average Returns against Expense Ratio",
       subtitle = "Data Source: etf") +
  theme_minimal()
Warning in geom_point(aes(text = paste("Location: ", location, "<br>", "ESG: ",
: Ignoring unknown aesthetics: text
Code
# Convert cor_etf data frame to a plotly object
cor_etf_plot <- ggplot(cor_etf, aes(x = avg_exp, y = avg_yr_rtn)) +
  geom_point(aes(text = paste("Location: ", location, "<br>",
                             "ESG: ", ESG, "<br>",
                             "Average Yr Return: ", avg_yr_rtn, "<br>",
                             "Average Expense Ratio: ", avg_exp, "<br>"))) +
  labs(x = "Average Expense Ratio", y = "Average Year Return",
       title = "Average Returns against Expense Ratio",
       subtitle = "Data Source: etf") +
  theme_minimal()
Warning in geom_point(aes(text = paste("Location: ", location, "<br>", "ESG: ",
: Ignoring unknown aesthetics: text
Code
# Convert ggplot object to plotly object
cor_etf_plotly <- ggplotly(cor_etf_plot, dynamicTicks = TRUE) |>
  layout(
    xaxis = list(title = "Average Expense Ratio"),
    yaxis = list(title = "Average Year Return"),
    updatemenus = list(
      list(
        buttons = list(
          list(method = "restyle",
               args = list("y", list(cor_etf$avg_yr_rtn)),
               label = "Average 1 Year Return"),
          list(method = "restyle",
               args = list("y", list(cor_etf$avg_thryr_rtn)),
               label = "Average 3 Year Return"),
          list(method = "restyle",
               args = list("y", list(cor_etf$avg_fivyr_rtn)),
               label = "Average 5 Year Return"),
          list(method = "restyle",
               args = list("y", list(cor_etf$avg_tenyr_rtn)),
               label = "Average 10 Year Return")
        ),
        x = 0.95,
        xanchor = "right",
        y = 1.1,
        yanchor = "top"
      )
    )
  )

# Display the interactive plotly object
cor_etf_plotly
Code
etf |>
  ggplot(mapping = aes(x= Holdings, y = Expense.Ratio, color = location, shape = ESG, size = Fund.Assets..MLN.USD.)) + geom_point(alpha=0.3)+ scale_y_continuous() + scale_x_continuous()
Warning: Removed 440 rows containing missing values (`geom_point()`).

Code
ggplot(etf, aes(x = Holdings, y = Expense.Ratio, color = location, shape = ESG, size = Fund.Assets..MLN.USD.)) +
  geom_point(alpha = 0.3) +
  scale_x_continuous(labels = scales::comma) +
  scale_y_continuous(labels = scales::percent) +
  labs(x = "Holdings", y = "Expense Ratio", size = "Fund Assets (Million USD)") +
  ggtitle("Holdings vs. Expense Ratio by Funds") +
  scale_color_manual(values = c("blue", "red", "green")) +
  scale_shape_manual(values = c(19, 17)) +
  theme_minimal()
Warning: Removed 440 rows containing missing values (`geom_point()`).

Code
ggplot(etf, aes(x = Holdings, y = Expense.Ratio, color = location, shape = ESG)) +
  geom_point(alpha = 0.3) +
  scale_x_continuous(labels = scales::comma, expand = c(0.05, 0)) +
  scale_y_continuous(labels = scales::percent, expand = c(0, 0.05)) +
  labs(x = "Holdings", y = "Expense Ratio", size = "Fund Assets (Million USD)") +
  ggtitle("Holdings vs. Expense Ratio by Funds") +
  scale_color_manual(values = c("blue", "red", "green")) +
  scale_shape_manual(values = c(19, 17)) +
  theme_minimal()
Warning: Removed 429 rows containing missing values (`geom_point()`).