Cleaning ETF Data

Author

Okung Obang

Import Data

Code
# Install necessary packages
install.packages('here', repos = "http://cran.us.r-project.org")
Installing package into 'C:/Users/okung/AppData/Local/R/win-library/4.2'
(as 'lib' is unspecified)
package 'here' successfully unpacked and MD5 sums checked

The downloaded binary packages are in
    C:\Users\okung\AppData\Local\Temp\RtmpCWxDIw\downloaded_packages
Code
install.packages('readxl1', repos = "http://cran.us.r-project.org")
Installing package into 'C:/Users/okung/AppData/Local/R/win-library/4.2'
(as 'lib' is unspecified)
Warning: package 'readxl1' is not available for this version of R

A version of this package for your version of R might be available elsewhere,
see the ideas at
https://cran.r-project.org/doc/manuals/r-patched/R-admin.html#Installing-packages
Code
# Load necessary library
library(tidyverse) 
Warning: package 'tidyverse' was built under R version 4.2.2
── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
✔ ggplot2 3.4.0     ✔ purrr   1.0.1
✔ tibble  3.1.8     ✔ dplyr   1.1.0
✔ tidyr   1.3.0     ✔ stringr 1.5.0
✔ readr   2.1.2     ✔ forcats 0.5.2
Warning: package 'ggplot2' was built under R version 4.2.2
Warning: package 'tidyr' was built under R version 4.2.2
Warning: package 'purrr' was built under R version 4.2.2
Warning: package 'dplyr' was built under R version 4.2.2
Warning: package 'stringr' was built under R version 4.2.2
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
Code
library(janitor)
Warning: package 'janitor' was built under R version 4.2.2

Attaching package: 'janitor'

The following objects are masked from 'package:stats':

    chisq.test, fisher.test
Code
library(here)
Warning: package 'here' was built under R version 4.2.3
here() starts at C:/Users/okung/OneDrive/Johns Hopkins - SAIS/2022-2023/SP23/Sustainable Finance/Week 11
Code
library(readxl)
Warning: package 'readxl' was built under R version 4.2.2
Code
library(lubridate)
Warning: package 'lubridate' was built under R version 4.2.2

Attaching package: 'lubridate'

The following objects are masked from 'package:base':

    date, intersect, setdiff, union
Code
# Import Bloomberg Data

# Bloomberg Data on ETF allocation
etf_al <- read_excel("C:/Users/okung/OneDrive/Johns Hopkins - SAIS/2022-2023/SP23/Sustainable Finance/Week 11/ETF comp/allocation.xlsx")

# Bloomberg data on ETF assets
etf_asset <- read_excel("C:/Users/okung/OneDrive/Johns Hopkins - SAIS/2022-2023/SP23/Sustainable Finance/Week 11/ETF comp/assets.xlsx")
Warning: Expecting numeric in D2131 / R2131C4: got '--'
Warning: Expecting numeric in D2132 / R2132C4: got '--'
Warning: Expecting numeric in D2133 / R2133C4: got '--'
Warning: Expecting numeric in D2134 / R2134C4: got '--'
Warning: Expecting numeric in D2135 / R2135C4: got '--'
Code
# Bloomberg data on ETF costs
etf_cost <- read_excel("C:/Users/okung/OneDrive/Johns Hopkins - SAIS/2022-2023/SP23/Sustainable Finance/Week 11/ETF comp/costs.xlsx")

# Don't need this data (already in the other excel sheets)
      #Bloomberg data on volume
      #etf_etf <- read_excel("C:/Users/okung/OneDrive/Johns Hopkins - SAIS/2022-2023/SP23/Sustainable Finance/Week 11/ETF comp/ETF1.xlsx")
        #Rename columns for clarity
       # colnames(etf_etf)[8] = "YT Class Flow (M USD)"
        # colnames(etf_etf)[10] = "1M Flow (M USD)"
       # colnames(etf_etf)[11] = ""
        #etf_etf <-etf_etf[, -c(9,12)]

# Bloomberg data on ETF flows
etf_flow <-read_excel("C:/Users/okung/OneDrive/Johns Hopkins - SAIS/2022-2023/SP23/Sustainable Finance/Week 11/ETF comp/flows.xlsx")
New names:
• `(M USD)` -> `(M USD)...4`
• `` -> `...5`
• `(M USD)` -> `(M USD)...7`
• `` -> `...8`
• `(M USD)` -> `(M USD)...10`
• `` -> `...11`
• `(M USD)` -> `(M USD)...13`
• `` -> `...14`
• `(M USD)` -> `(M USD)...16`
• `` -> `...17`
• `(M USD)` -> `(M USD)...19`
Code
# Clean up ETF flows as some columns have same title
  etf_flow <- etf_flow[, -c(5,8,11,14,17)]
# Change column names
  colnames(etf_flow)[4] = "1D Flow (M USD)" 
  colnames(etf_flow)[6] = "1W Flow (M USD)"
  colnames(etf_flow)[8] = "1M Flow (M USD)"
  colnames(etf_flow)[10] = "YTD Flow (M USD)"
  colnames(etf_flow)[12] = "1Y Flow (M USD)"
  colnames(etf_flow)[14] = "3Y Flow (M USD)"

# Bloomberg data on ETF historical performance
etf_hist <- read_excel("C:/Users/okung/OneDrive/Johns Hopkins - SAIS/2022-2023/SP23/Sustainable Finance/Week 11/ETF comp/hist.xlsx")

# Bloomberg data on ETF returns
etf_return <- read_excel("C:/Users/okung/OneDrive/Johns Hopkins - SAIS/2022-2023/SP23/Sustainable Finance/Week 11/ETF comp/Returns.xlsx")

# Don't need this as it repeats existing data.
    # Bloomberg data summary on ETF
    #etf_summary <- read_excel("C:/Users/okung/OneDrive/Johns Hopkins - SAIS/2022-2023/SP23/Sustainable Finance/Week 11/ETF comp/summary.xlsx")

# Bloomberg data on ETF volatility
etf_volt <- read_excel("C:/Users/okung/OneDrive/Johns Hopkins - SAIS/2022-2023/SP23/Sustainable Finance/Week 11/ETF comp/volatility.xlsx")
Warning: Expecting numeric in F1587 / R1587C6: got '--'
Warning: Expecting numeric in F2130 / R2130C6: got '--'
Code
# Pivot data for easier analysis
etf_al <- etf_al |>
  pivot_longer(cols = 3:13,
               names_to = "allocation",
               values_to = "percentage")
etf_al
# A tibble: 23,474 × 4
   Name   Ticker allocation    percentage
   <chr>  <chr>  <chr>         <chr>     
 1 Median <NA>   Materials     +4.68%    
 2 Median <NA>   Comm          +7.78%    
 3 Median <NA>   Cons Cyclical +10.40%   
 4 Median <NA>   Cons Non-Cycl +19.05%   
 5 Median <NA>   Divsf         +0.23%    
 6 Median <NA>   Energy        +5.09%    
 7 Median <NA>   Fin           +16.04%   
 8 Median <NA>   Ind           +10.68%   
 9 Median <NA>   Tech          +12.20%   
10 Median <NA>   Utils         +2.98%    
# … with 23,464 more rows

Clean ETF data

Code
view(etf_asset)
Code
# Create a list of all excel sheets to merge
etf_list = list(etf_al, etf_asset, etf_cost, etf_flow, etf_hist, etf_return, etf_volt)

# Merge data by Name and Ticker to avoid overlap
etf_data <- etf_list |>
  reduce(full_join, by = c('Name', 'Ticker'))
etf_data
# A tibble: 23,474 × 53
   Name   Ticker allocat…¹ perce…² Class…³ Fund …⁴ Holdi…⁵ Primary Cross Expen…⁶
   <chr>  <chr>  <chr>     <chr>   <chr>     <dbl>   <dbl> <chr>   <chr> <chr>  
 1 Median <NA>   Materials +4.68%  113.89     113.      84 <NA>    <NA>  0.50%  
 2 Median <NA>   Comm      +7.78%  113.89     113.      84 <NA>    <NA>  0.50%  
 3 Median <NA>   Cons Cyc… +10.40% 113.89     113.      84 <NA>    <NA>  0.50%  
 4 Median <NA>   Cons Non… +19.05% 113.89     113.      84 <NA>    <NA>  0.50%  
 5 Median <NA>   Divsf     +0.23%  113.89     113.      84 <NA>    <NA>  0.50%  
 6 Median <NA>   Energy    +5.09%  113.89     113.      84 <NA>    <NA>  0.50%  
 7 Median <NA>   Fin       +16.04% 113.89     113.      84 <NA>    <NA>  0.50%  
 8 Median <NA>   Ind       +10.68% 113.89     113.      84 <NA>    <NA>  0.50%  
 9 Median <NA>   Tech      +12.20% 113.89     113.      84 <NA>    <NA>  0.50%  
10 Median <NA>   Utils     +2.98%  113.89     113.      84 <NA>    <NA>  0.50%  
# … with 23,464 more rows, 43 more variables: `Fund Mgr Stated Fee` <chr>,
#   `Avg Bid Ask Spread` <chr>, `1 Yr NAV Trk Error` <chr>, Premium <chr>,
#   `52W Avg Prem` <chr>, `1D Flow` <dbl>, `1D Flow (M USD)` <dbl>,
#   `1W Flow` <dbl>, `1W Flow (M USD)` <dbl>, `1M Flow` <dbl>,
#   `1M Flow (M USD)` <dbl>, `YTD Flow` <dbl>, `YTD Flow (M USD)` <dbl>,
#   `1Y Flow` <dbl>, `1Y Flow (M USD)` <dbl>, `3Y Flow` <dbl>,
#   `3Y Flow (M USD)` <dbl>, `Fund Type` <chr>, Structure <chr>, …
Code
view(etf_data)
Code
#Create a column for US or JP market.
etf_data <- etf_data |>
 mutate(location = if_else(grepl("\\US",Ticker),"United States", "Japan"))
#Do not need this line, as many Japanese 'ESG' ETFs are NOT labeled as ESG in the market, so need to indicate that through alternative way (maybe the ESG ETF only data from Bloomberg)
#mutate(esg = if_else(grepl("\\ESG", Name), "ESG", "non-ESG"))
etf_data
# A tibble: 23,474 × 54
   Name   Ticker allocat…¹ perce…² Class…³ Fund …⁴ Holdi…⁵ Primary Cross Expen…⁶
   <chr>  <chr>  <chr>     <chr>   <chr>     <dbl>   <dbl> <chr>   <chr> <chr>  
 1 Median <NA>   Materials +4.68%  113.89     113.      84 <NA>    <NA>  0.50%  
 2 Median <NA>   Comm      +7.78%  113.89     113.      84 <NA>    <NA>  0.50%  
 3 Median <NA>   Cons Cyc… +10.40% 113.89     113.      84 <NA>    <NA>  0.50%  
 4 Median <NA>   Cons Non… +19.05% 113.89     113.      84 <NA>    <NA>  0.50%  
 5 Median <NA>   Divsf     +0.23%  113.89     113.      84 <NA>    <NA>  0.50%  
 6 Median <NA>   Energy    +5.09%  113.89     113.      84 <NA>    <NA>  0.50%  
 7 Median <NA>   Fin       +16.04% 113.89     113.      84 <NA>    <NA>  0.50%  
 8 Median <NA>   Ind       +10.68% 113.89     113.      84 <NA>    <NA>  0.50%  
 9 Median <NA>   Tech      +12.20% 113.89     113.      84 <NA>    <NA>  0.50%  
10 Median <NA>   Utils     +2.98%  113.89     113.      84 <NA>    <NA>  0.50%  
# … with 23,464 more rows, 44 more variables: `Fund Mgr Stated Fee` <chr>,
#   `Avg Bid Ask Spread` <chr>, `1 Yr NAV Trk Error` <chr>, Premium <chr>,
#   `52W Avg Prem` <chr>, `1D Flow` <dbl>, `1D Flow (M USD)` <dbl>,
#   `1W Flow` <dbl>, `1W Flow (M USD)` <dbl>, `1M Flow` <dbl>,
#   `1M Flow (M USD)` <dbl>, `YTD Flow` <dbl>, `YTD Flow (M USD)` <dbl>,
#   `1Y Flow` <dbl>, `1Y Flow (M USD)` <dbl>, `3Y Flow` <dbl>,
#   `3Y Flow (M USD)` <dbl>, `Fund Type` <chr>, Structure <chr>, …
Code
print(etf_data)
# A tibble: 23,474 × 54
   Name   Ticker allocat…¹ perce…² Class…³ Fund …⁴ Holdi…⁵ Primary Cross Expen…⁶
   <chr>  <chr>  <chr>     <chr>   <chr>     <dbl>   <dbl> <chr>   <chr> <chr>  
 1 Median <NA>   Materials +4.68%  113.89     113.      84 <NA>    <NA>  0.50%  
 2 Median <NA>   Comm      +7.78%  113.89     113.      84 <NA>    <NA>  0.50%  
 3 Median <NA>   Cons Cyc… +10.40% 113.89     113.      84 <NA>    <NA>  0.50%  
 4 Median <NA>   Cons Non… +19.05% 113.89     113.      84 <NA>    <NA>  0.50%  
 5 Median <NA>   Divsf     +0.23%  113.89     113.      84 <NA>    <NA>  0.50%  
 6 Median <NA>   Energy    +5.09%  113.89     113.      84 <NA>    <NA>  0.50%  
 7 Median <NA>   Fin       +16.04% 113.89     113.      84 <NA>    <NA>  0.50%  
 8 Median <NA>   Ind       +10.68% 113.89     113.      84 <NA>    <NA>  0.50%  
 9 Median <NA>   Tech      +12.20% 113.89     113.      84 <NA>    <NA>  0.50%  
10 Median <NA>   Utils     +2.98%  113.89     113.      84 <NA>    <NA>  0.50%  
# … with 23,464 more rows, 44 more variables: `Fund Mgr Stated Fee` <chr>,
#   `Avg Bid Ask Spread` <chr>, `1 Yr NAV Trk Error` <chr>, Premium <chr>,
#   `52W Avg Prem` <chr>, `1D Flow` <dbl>, `1D Flow (M USD)` <dbl>,
#   `1W Flow` <dbl>, `1W Flow (M USD)` <dbl>, `1M Flow` <dbl>,
#   `1M Flow (M USD)` <dbl>, `YTD Flow` <dbl>, `YTD Flow (M USD)` <dbl>,
#   `1Y Flow` <dbl>, `1Y Flow (M USD)` <dbl>, `3Y Flow` <dbl>,
#   `3Y Flow (M USD)` <dbl>, `Fund Type` <chr>, Structure <chr>, …
Code
glimpse(etf_data)
Rows: 23,474
Columns: 54
$ Name                     <chr> "Median", "Median", "Median", "Median", "Medi…
$ Ticker                   <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "…
$ allocation               <chr> "Materials", "Comm", "Cons Cyclical", "Cons N…
$ percentage               <chr> "+4.68%", "+7.78%", "+10.40%", "+19.05%", "+0…
$ `Class Assets (MLN USD)` <chr> "113.89", "113.89", "113.89", "113.89", "113.…
$ `Fund Assets (MLN USD)`  <dbl> 112.81, 112.81, 112.81, 112.81, 112.81, 112.8…
$ Holdings                 <dbl> 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 5…
$ Primary                  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "…
$ Cross                    <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "…
$ `Expense Ratio`          <chr> "0.50%", "0.50%", "0.50%", "0.50%", "0.50%", …
$ `Fund Mgr Stated Fee`    <chr> "0.49%", "0.49%", "0.49%", "0.49%", "0.49%", …
$ `Avg Bid Ask Spread`     <chr> "0.23%", "0.23%", "0.23%", "0.23%", "0.23%", …
$ `1 Yr NAV Trk Error`     <chr> "0.5821%", "0.5821%", "0.5821%", "0.5821%", "…
$ Premium                  <chr> "0%", "0%", "0%", "0%", "0%", "0%", "0%", "0%…
$ `52W Avg Prem`           <chr> "+0.00%", "+0.00%", "+0.00%", "+0.00%", "+0.0…
$ `1D Flow`                <dbl> 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.0…
$ `1D Flow (M USD)`        <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1…
$ `1W Flow`                <dbl> 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.0…
$ `1W Flow (M USD)`        <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1…
$ `1M Flow`                <dbl> 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.0…
$ `1M Flow (M USD)`        <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 7…
$ `YTD Flow`               <dbl> 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.0…
$ `YTD Flow (M USD)`       <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, -…
$ `1Y Flow`                <dbl> 1.52, 1.52, 1.52, 1.52, 1.52, 1.52, 1.52, 1.5…
$ `1Y Flow (M USD)`        <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, -…
$ `3Y Flow`                <dbl> 28.15, 28.15, 28.15, 28.15, 28.15, 28.15, 28.…
$ `3Y Flow (M USD)`        <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 3…
$ `Fund Type`              <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "…
$ Structure                <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "…
$ `Index Weight`           <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "…
$ `SFDR Class.`            <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "…
$ `Use Derivative`         <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "…
$ `Tax Form`               <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "…
$ UCITS                    <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "…
$ `UK Reporting`           <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "…
$ SFC                      <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "…
$ China                    <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "…
$ Leverage                 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "…
$ `Inception Date`         <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "…
$ `1D Return`              <chr> "+0.43%", "+0.43%", "+0.43%", "+0.43%", "+0.4…
$ `MTD Return`             <chr> "+0.14%", "+0.14%", "+0.14%", "+0.14%", "+0.1…
$ `YTD Return`             <chr> "+5.45%", "+5.45%", "+5.45%", "+5.45%", "+5.4…
$ `1 Yr Return`            <chr> "-4.20%", "-4.20%", "-4.20%", "-4.20%", "-4.2…
$ `3 Yr Return`            <chr> "+13.93%", "+13.93%", "+13.93%", "+13.93%", "…
$ `5 Yr Return`            <chr> "+6.07%", "+6.07%", "+6.07%", "+6.07%", "+6.0…
$ `10 Yr Return`           <chr> "+8.59%", "+8.59%", "+8.59%", "+8.59%", "+8.5…
$ `12M Yld`                <chr> "+1.68%", "+1.68%", "+1.68%", "+1.68%", "+1.6…
$ `1D Vol`                 <dbl> 8750, 8750, 8750, 8750, 8750, 8750, 8750, 875…
$ `30D Vol`                <chr> "15302", "15302", "15302", "15302", "15302", …
$ `Implied Liquidity`      <chr> "4831273.5", "4831273.5", "4831273.5", "48312…
$ `Bid Ask Spread`         <dbl> 0.07, 0.07, 0.07, 0.07, 0.07, 0.07, 0.07, 0.0…
$ `Short Interest%`        <chr> "0.36", "0.36", "0.36", "0.36", "0.36", "0.36…
$ `Open Interest`          <chr> "61", "61", "61", "61", "61", "61", "61", "61…
$ location                 <chr> "Japan", "Japan", "Japan", "Japan", "Japan", …
Code
# Import ESG only Bloomberg data
esg_etf <- read_excel("C:/Users/okung/OneDrive/Johns Hopkins - SAIS/2022-2023/SP23/Sustainable Finance/Week 11/ESG/esg assets.xlsx")

glimpse(esg_etf)
Rows: 225
Columns: 7
$ Name                     <chr> "Median", "iShares ESG Aware MSCI USA ETF", "…
$ Ticker                   <chr> NA, "ESGU US", "1591 JP", "ESGD US", "ESGV US…
$ `Class Assets (MLN USD)` <dbl> 39.74, 14076.15, 9011.94, 7251.67, 6094.92, 4…
$ `Fund Assets (MLN USD)`  <dbl> 39.74, 14076.15, 9011.94, 7251.67, 6094.92, 4…
$ Holdings                 <dbl> 142, 321, 401, 435, 1499, 104, 402, 333, 403,…
$ Primary                  <chr> NA, "Y", "Y", "Y", "Y", "Y", "Y", "Y", "Y", "…
$ Cross                    <chr> NA, "N", "N", "N", "N", "N", "N", "N", "N", "…
Code
# Create column for ESG, (note that all these ETFs are ESG from Bloomberg Data!)
esg_etf <- esg_etf |>
  mutate(ESG = paste0('ESG'))
esg_etf
# A tibble: 225 × 8
   Name                       Ticker Class…¹ Fund …² Holdi…³ Primary Cross ESG  
   <chr>                      <chr>    <dbl>   <dbl>   <dbl> <chr>   <chr> <chr>
 1 Median                     <NA>      39.7    39.7     142 <NA>    <NA>  ESG  
 2 iShares ESG Aware MSCI US… ESGU … 14076.  14076.      321 Y       N     ESG  
 3 NEXT FUNDS JPX-Nikkei Ind… 1591 …  9012.   9012.      401 Y       N     ESG  
 4 iShares Trust iShares ESG… ESGD …  7252.   7252.      435 Y       N     ESG  
 5 Vanguard ESG US Stock ETF  ESGV …  6095.   6095.     1499 Y       N     ESG  
 6 iShares Global Clean Ener… ICLN …  4783.   4783.      104 Y       N     ESG  
 7 MAXIS JPX-Nikkei Index 40… 1593 …  4571.   4571.      402 Y       N     ESG  
 8 iShares Inc iShares ESG A… ESGE …  4380.   4380.      333 Y       N     ESG  
 9 iShares Trust - iShares M… DSI US  3516.   3516.      403 Y       N     ESG  
10 Vanguard ESG Internationa… VSGX …  3385.   3385.     5841 Y       N     ESG  
# … with 215 more rows, and abbreviated variable names
#   ¹​`Class Assets (MLN USD)`, ²​`Fund Assets (MLN USD)`, ³​Holdings
Code
#Drop columns already in etf_data frame
esg_etf <- esg_etf[, -c(3,4,5,6,7)]
esg_etf
# A tibble: 225 × 3
   Name                                                 Ticker  ESG  
   <chr>                                                <chr>   <chr>
 1 Median                                               <NA>    ESG  
 2 iShares ESG Aware MSCI USA ETF                       ESGU US ESG  
 3 NEXT FUNDS JPX-Nikkei Index 400 Exchange Traded Fund 1591 JP ESG  
 4 iShares Trust iShares ESG Aware MSCI EAFE ETF        ESGD US ESG  
 5 Vanguard ESG US Stock ETF                            ESGV US ESG  
 6 iShares Global Clean Energy ETF                      ICLN US ESG  
 7 MAXIS JPX-Nikkei Index 400 ETF                       1593 JP ESG  
 8 iShares Inc iShares ESG Aware MSCI EM ETF            ESGE US ESG  
 9 iShares Trust - iShares MSCI KLD 400 Social ETF      DSI US  ESG  
10 Vanguard ESG International Stock ETF                 VSGX US ESG  
# … with 215 more rows
Code
#Merge this ESG data with the etf_data dataframe
etf_esg_data <- etf_data |>
  full_join(esg_etf, by=c('Name','Ticker'))
etf_esg_data
# A tibble: 23,474 × 55
   Name   Ticker allocat…¹ perce…² Class…³ Fund …⁴ Holdi…⁵ Primary Cross Expen…⁶
   <chr>  <chr>  <chr>     <chr>   <chr>     <dbl>   <dbl> <chr>   <chr> <chr>  
 1 Median <NA>   Materials +4.68%  113.89     113.      84 <NA>    <NA>  0.50%  
 2 Median <NA>   Comm      +7.78%  113.89     113.      84 <NA>    <NA>  0.50%  
 3 Median <NA>   Cons Cyc… +10.40% 113.89     113.      84 <NA>    <NA>  0.50%  
 4 Median <NA>   Cons Non… +19.05% 113.89     113.      84 <NA>    <NA>  0.50%  
 5 Median <NA>   Divsf     +0.23%  113.89     113.      84 <NA>    <NA>  0.50%  
 6 Median <NA>   Energy    +5.09%  113.89     113.      84 <NA>    <NA>  0.50%  
 7 Median <NA>   Fin       +16.04% 113.89     113.      84 <NA>    <NA>  0.50%  
 8 Median <NA>   Ind       +10.68% 113.89     113.      84 <NA>    <NA>  0.50%  
 9 Median <NA>   Tech      +12.20% 113.89     113.      84 <NA>    <NA>  0.50%  
10 Median <NA>   Utils     +2.98%  113.89     113.      84 <NA>    <NA>  0.50%  
# … with 23,464 more rows, 45 more variables: `Fund Mgr Stated Fee` <chr>,
#   `Avg Bid Ask Spread` <chr>, `1 Yr NAV Trk Error` <chr>, Premium <chr>,
#   `52W Avg Prem` <chr>, `1D Flow` <dbl>, `1D Flow (M USD)` <dbl>,
#   `1W Flow` <dbl>, `1W Flow (M USD)` <dbl>, `1M Flow` <dbl>,
#   `1M Flow (M USD)` <dbl>, `YTD Flow` <dbl>, `YTD Flow (M USD)` <dbl>,
#   `1Y Flow` <dbl>, `1Y Flow (M USD)` <dbl>, `3Y Flow` <dbl>,
#   `3Y Flow (M USD)` <dbl>, `Fund Type` <chr>, Structure <chr>, …
Code
# Replace NA in ESG column with non-ESG
etf_esg_data <- etf_esg_data |>
  mutate(ESG = ifelse(is.na(ESG), "non-ESG", ESG))
etf_esg_data
# A tibble: 23,474 × 55
   Name   Ticker allocat…¹ perce…² Class…³ Fund …⁴ Holdi…⁵ Primary Cross Expen…⁶
   <chr>  <chr>  <chr>     <chr>   <chr>     <dbl>   <dbl> <chr>   <chr> <chr>  
 1 Median <NA>   Materials +4.68%  113.89     113.      84 <NA>    <NA>  0.50%  
 2 Median <NA>   Comm      +7.78%  113.89     113.      84 <NA>    <NA>  0.50%  
 3 Median <NA>   Cons Cyc… +10.40% 113.89     113.      84 <NA>    <NA>  0.50%  
 4 Median <NA>   Cons Non… +19.05% 113.89     113.      84 <NA>    <NA>  0.50%  
 5 Median <NA>   Divsf     +0.23%  113.89     113.      84 <NA>    <NA>  0.50%  
 6 Median <NA>   Energy    +5.09%  113.89     113.      84 <NA>    <NA>  0.50%  
 7 Median <NA>   Fin       +16.04% 113.89     113.      84 <NA>    <NA>  0.50%  
 8 Median <NA>   Ind       +10.68% 113.89     113.      84 <NA>    <NA>  0.50%  
 9 Median <NA>   Tech      +12.20% 113.89     113.      84 <NA>    <NA>  0.50%  
10 Median <NA>   Utils     +2.98%  113.89     113.      84 <NA>    <NA>  0.50%  
# … with 23,464 more rows, 45 more variables: `Fund Mgr Stated Fee` <chr>,
#   `Avg Bid Ask Spread` <chr>, `1 Yr NAV Trk Error` <chr>, Premium <chr>,
#   `52W Avg Prem` <chr>, `1D Flow` <dbl>, `1D Flow (M USD)` <dbl>,
#   `1W Flow` <dbl>, `1W Flow (M USD)` <dbl>, `1M Flow` <dbl>,
#   `1M Flow (M USD)` <dbl>, `YTD Flow` <dbl>, `YTD Flow (M USD)` <dbl>,
#   `1Y Flow` <dbl>, `1Y Flow (M USD)` <dbl>, `3Y Flow` <dbl>,
#   `3Y Flow (M USD)` <dbl>, `Fund Type` <chr>, Structure <chr>, …
Code
glimpse(etf_esg_data)
Rows: 23,474
Columns: 55
$ Name                     <chr> "Median", "Median", "Median", "Median", "Medi…
$ Ticker                   <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "…
$ allocation               <chr> "Materials", "Comm", "Cons Cyclical", "Cons N…
$ percentage               <chr> "+4.68%", "+7.78%", "+10.40%", "+19.05%", "+0…
$ `Class Assets (MLN USD)` <chr> "113.89", "113.89", "113.89", "113.89", "113.…
$ `Fund Assets (MLN USD)`  <dbl> 112.81, 112.81, 112.81, 112.81, 112.81, 112.8…
$ Holdings                 <dbl> 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 5…
$ Primary                  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "…
$ Cross                    <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "…
$ `Expense Ratio`          <chr> "0.50%", "0.50%", "0.50%", "0.50%", "0.50%", …
$ `Fund Mgr Stated Fee`    <chr> "0.49%", "0.49%", "0.49%", "0.49%", "0.49%", …
$ `Avg Bid Ask Spread`     <chr> "0.23%", "0.23%", "0.23%", "0.23%", "0.23%", …
$ `1 Yr NAV Trk Error`     <chr> "0.5821%", "0.5821%", "0.5821%", "0.5821%", "…
$ Premium                  <chr> "0%", "0%", "0%", "0%", "0%", "0%", "0%", "0%…
$ `52W Avg Prem`           <chr> "+0.00%", "+0.00%", "+0.00%", "+0.00%", "+0.0…
$ `1D Flow`                <dbl> 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.0…
$ `1D Flow (M USD)`        <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1…
$ `1W Flow`                <dbl> 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.0…
$ `1W Flow (M USD)`        <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1…
$ `1M Flow`                <dbl> 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.0…
$ `1M Flow (M USD)`        <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 7…
$ `YTD Flow`               <dbl> 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.0…
$ `YTD Flow (M USD)`       <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, -…
$ `1Y Flow`                <dbl> 1.52, 1.52, 1.52, 1.52, 1.52, 1.52, 1.52, 1.5…
$ `1Y Flow (M USD)`        <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, -…
$ `3Y Flow`                <dbl> 28.15, 28.15, 28.15, 28.15, 28.15, 28.15, 28.…
$ `3Y Flow (M USD)`        <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 3…
$ `Fund Type`              <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "…
$ Structure                <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "…
$ `Index Weight`           <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "…
$ `SFDR Class.`            <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "…
$ `Use Derivative`         <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "…
$ `Tax Form`               <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "…
$ UCITS                    <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "…
$ `UK Reporting`           <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "…
$ SFC                      <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "…
$ China                    <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "…
$ Leverage                 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "…
$ `Inception Date`         <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "…
$ `1D Return`              <chr> "+0.43%", "+0.43%", "+0.43%", "+0.43%", "+0.4…
$ `MTD Return`             <chr> "+0.14%", "+0.14%", "+0.14%", "+0.14%", "+0.1…
$ `YTD Return`             <chr> "+5.45%", "+5.45%", "+5.45%", "+5.45%", "+5.4…
$ `1 Yr Return`            <chr> "-4.20%", "-4.20%", "-4.20%", "-4.20%", "-4.2…
$ `3 Yr Return`            <chr> "+13.93%", "+13.93%", "+13.93%", "+13.93%", "…
$ `5 Yr Return`            <chr> "+6.07%", "+6.07%", "+6.07%", "+6.07%", "+6.0…
$ `10 Yr Return`           <chr> "+8.59%", "+8.59%", "+8.59%", "+8.59%", "+8.5…
$ `12M Yld`                <chr> "+1.68%", "+1.68%", "+1.68%", "+1.68%", "+1.6…
$ `1D Vol`                 <dbl> 8750, 8750, 8750, 8750, 8750, 8750, 8750, 875…
$ `30D Vol`                <chr> "15302", "15302", "15302", "15302", "15302", …
$ `Implied Liquidity`      <chr> "4831273.5", "4831273.5", "4831273.5", "48312…
$ `Bid Ask Spread`         <dbl> 0.07, 0.07, 0.07, 0.07, 0.07, 0.07, 0.07, 0.0…
$ `Short Interest%`        <chr> "0.36", "0.36", "0.36", "0.36", "0.36", "0.36…
$ `Open Interest`          <chr> "61", "61", "61", "61", "61", "61", "61", "61…
$ location                 <chr> "Japan", "Japan", "Japan", "Japan", "Japan", …
$ ESG                      <chr> "ESG", "ESG", "ESG", "ESG", "ESG", "ESG", "ES…
Code
# Not all columns with values are numerics, need to change them.

# Create a list of columns that need to be changed to numerics
# First list is character values to numerics
num <-c(5,49:53)
etf_esg_data[num]<- lapply(etf_esg_data[num], as.numeric)
Warning in lapply(etf_esg_data[num], as.numeric): NAs introduced by coercion

Warning in lapply(etf_esg_data[num], as.numeric): NAs introduced by coercion

Warning in lapply(etf_esg_data[num], as.numeric): NAs introduced by coercion

Warning in lapply(etf_esg_data[num], as.numeric): NAs introduced by coercion

Warning in lapply(etf_esg_data[num], as.numeric): NAs introduced by coercion
Code
etf_esg_data
# A tibble: 23,474 × 55
   Name   Ticker allocat…¹ perce…² Class…³ Fund …⁴ Holdi…⁵ Primary Cross Expen…⁶
   <chr>  <chr>  <chr>     <chr>     <dbl>   <dbl>   <dbl> <chr>   <chr> <chr>  
 1 Median <NA>   Materials +4.68%     114.    113.      84 <NA>    <NA>  0.50%  
 2 Median <NA>   Comm      +7.78%     114.    113.      84 <NA>    <NA>  0.50%  
 3 Median <NA>   Cons Cyc… +10.40%    114.    113.      84 <NA>    <NA>  0.50%  
 4 Median <NA>   Cons Non… +19.05%    114.    113.      84 <NA>    <NA>  0.50%  
 5 Median <NA>   Divsf     +0.23%     114.    113.      84 <NA>    <NA>  0.50%  
 6 Median <NA>   Energy    +5.09%     114.    113.      84 <NA>    <NA>  0.50%  
 7 Median <NA>   Fin       +16.04%    114.    113.      84 <NA>    <NA>  0.50%  
 8 Median <NA>   Ind       +10.68%    114.    113.      84 <NA>    <NA>  0.50%  
 9 Median <NA>   Tech      +12.20%    114.    113.      84 <NA>    <NA>  0.50%  
10 Median <NA>   Utils     +2.98%     114.    113.      84 <NA>    <NA>  0.50%  
# … with 23,464 more rows, 45 more variables: `Fund Mgr Stated Fee` <chr>,
#   `Avg Bid Ask Spread` <chr>, `1 Yr NAV Trk Error` <chr>, Premium <chr>,
#   `52W Avg Prem` <chr>, `1D Flow` <dbl>, `1D Flow (M USD)` <dbl>,
#   `1W Flow` <dbl>, `1W Flow (M USD)` <dbl>, `1M Flow` <dbl>,
#   `1M Flow (M USD)` <dbl>, `YTD Flow` <dbl>, `YTD Flow (M USD)` <dbl>,
#   `1Y Flow` <dbl>, `1Y Flow (M USD)` <dbl>, `3Y Flow` <dbl>,
#   `3Y Flow (M USD)` <dbl>, `Fund Type` <chr>, Structure <chr>, …
Code
# Second list is for percentages to numerics
perc <- c(4,10:15,40:47)
#Divide by 100 to indicate percentage
etf_esg_data[perc] <- lapply(etf_esg_data[perc], function(x) as.numeric(gsub("%", "", x))/100)
Warning in FUN(X[[i]], ...): NAs introduced by coercion
Warning in FUN(X[[i]], ...): NAs introduced by coercion

Warning in FUN(X[[i]], ...): NAs introduced by coercion

Warning in FUN(X[[i]], ...): NAs introduced by coercion

Warning in FUN(X[[i]], ...): NAs introduced by coercion

Warning in FUN(X[[i]], ...): NAs introduced by coercion

Warning in FUN(X[[i]], ...): NAs introduced by coercion

Warning in FUN(X[[i]], ...): NAs introduced by coercion

Warning in FUN(X[[i]], ...): NAs introduced by coercion

Warning in FUN(X[[i]], ...): NAs introduced by coercion

Warning in FUN(X[[i]], ...): NAs introduced by coercion

Warning in FUN(X[[i]], ...): NAs introduced by coercion

Warning in FUN(X[[i]], ...): NAs introduced by coercion

Warning in FUN(X[[i]], ...): NAs introduced by coercion
Code
etf_esg_data
# A tibble: 23,474 × 55
   Name   Ticker allocat…¹ perce…² Class…³ Fund …⁴ Holdi…⁵ Primary Cross Expen…⁶
   <chr>  <chr>  <chr>       <dbl>   <dbl>   <dbl>   <dbl> <chr>   <chr>   <dbl>
 1 Median <NA>   Materials  0.0468    114.    113.      84 <NA>    <NA>    0.005
 2 Median <NA>   Comm       0.0778    114.    113.      84 <NA>    <NA>    0.005
 3 Median <NA>   Cons Cyc…  0.104     114.    113.      84 <NA>    <NA>    0.005
 4 Median <NA>   Cons Non…  0.190     114.    113.      84 <NA>    <NA>    0.005
 5 Median <NA>   Divsf      0.0023    114.    113.      84 <NA>    <NA>    0.005
 6 Median <NA>   Energy     0.0509    114.    113.      84 <NA>    <NA>    0.005
 7 Median <NA>   Fin        0.160     114.    113.      84 <NA>    <NA>    0.005
 8 Median <NA>   Ind        0.107     114.    113.      84 <NA>    <NA>    0.005
 9 Median <NA>   Tech       0.122     114.    113.      84 <NA>    <NA>    0.005
10 Median <NA>   Utils      0.0298    114.    113.      84 <NA>    <NA>    0.005
# … with 23,464 more rows, 45 more variables: `Fund Mgr Stated Fee` <dbl>,
#   `Avg Bid Ask Spread` <dbl>, `1 Yr NAV Trk Error` <dbl>, Premium <dbl>,
#   `52W Avg Prem` <dbl>, `1D Flow` <dbl>, `1D Flow (M USD)` <dbl>,
#   `1W Flow` <dbl>, `1W Flow (M USD)` <dbl>, `1M Flow` <dbl>,
#   `1M Flow (M USD)` <dbl>, `YTD Flow` <dbl>, `YTD Flow (M USD)` <dbl>,
#   `1Y Flow` <dbl>, `1Y Flow (M USD)` <dbl>, `3Y Flow` <dbl>,
#   `3Y Flow (M USD)` <dbl>, `Fund Type` <chr>, Structure <chr>, …
Code
glimpse(etf_esg_data)
Rows: 23,474
Columns: 55
$ Name                     <chr> "Median", "Median", "Median", "Median", "Medi…
$ Ticker                   <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "…
$ allocation               <chr> "Materials", "Comm", "Cons Cyclical", "Cons N…
$ percentage               <dbl> 0.0468, 0.0778, 0.1040, 0.1905, 0.0023, 0.050…
$ `Class Assets (MLN USD)` <dbl> 113.89, 113.89, 113.89, 113.89, 113.89, 113.8…
$ `Fund Assets (MLN USD)`  <dbl> 112.81, 112.81, 112.81, 112.81, 112.81, 112.8…
$ Holdings                 <dbl> 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 84, 5…
$ Primary                  <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "…
$ Cross                    <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "…
$ `Expense Ratio`          <dbl> 5e-03, 5e-03, 5e-03, 5e-03, 5e-03, 5e-03, 5e-…
$ `Fund Mgr Stated Fee`    <dbl> 0.0049, 0.0049, 0.0049, 0.0049, 0.0049, 0.004…
$ `Avg Bid Ask Spread`     <dbl> 0.0023, 0.0023, 0.0023, 0.0023, 0.0023, 0.002…
$ `1 Yr NAV Trk Error`     <dbl> 0.005821, 0.005821, 0.005821, 0.005821, 0.005…
$ Premium                  <dbl> 0e+00, 0e+00, 0e+00, 0e+00, 0e+00, 0e+00, 0e+…
$ `52W Avg Prem`           <dbl> 0e+00, 0e+00, 0e+00, 0e+00, 0e+00, 0e+00, 0e+…
$ `1D Flow`                <dbl> 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.0…
$ `1D Flow (M USD)`        <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1…
$ `1W Flow`                <dbl> 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.0…
$ `1W Flow (M USD)`        <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1…
$ `1M Flow`                <dbl> 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.0…
$ `1M Flow (M USD)`        <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 7…
$ `YTD Flow`               <dbl> 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.00, 0.0…
$ `YTD Flow (M USD)`       <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, -…
$ `1Y Flow`                <dbl> 1.52, 1.52, 1.52, 1.52, 1.52, 1.52, 1.52, 1.5…
$ `1Y Flow (M USD)`        <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, -…
$ `3Y Flow`                <dbl> 28.15, 28.15, 28.15, 28.15, 28.15, 28.15, 28.…
$ `3Y Flow (M USD)`        <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 3…
$ `Fund Type`              <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "…
$ Structure                <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "…
$ `Index Weight`           <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "…
$ `SFDR Class.`            <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "…
$ `Use Derivative`         <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "…
$ `Tax Form`               <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "…
$ UCITS                    <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "…
$ `UK Reporting`           <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "…
$ SFC                      <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "…
$ China                    <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "…
$ Leverage                 <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "…
$ `Inception Date`         <chr> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, "…
$ `1D Return`              <dbl> 0.0043, 0.0043, 0.0043, 0.0043, 0.0043, 0.004…
$ `MTD Return`             <dbl> 0.0014, 0.0014, 0.0014, 0.0014, 0.0014, 0.001…
$ `YTD Return`             <dbl> 0.0545, 0.0545, 0.0545, 0.0545, 0.0545, 0.054…
$ `1 Yr Return`            <dbl> -0.0420, -0.0420, -0.0420, -0.0420, -0.0420, …
$ `3 Yr Return`            <dbl> 0.1393, 0.1393, 0.1393, 0.1393, 0.1393, 0.139…
$ `5 Yr Return`            <dbl> 0.0607, 0.0607, 0.0607, 0.0607, 0.0607, 0.060…
$ `10 Yr Return`           <dbl> 0.0859, 0.0859, 0.0859, 0.0859, 0.0859, 0.085…
$ `12M Yld`                <dbl> 0.0168, 0.0168, 0.0168, 0.0168, 0.0168, 0.016…
$ `1D Vol`                 <dbl> 8750, 8750, 8750, 8750, 8750, 8750, 8750, 875…
$ `30D Vol`                <dbl> 15302, 15302, 15302, 15302, 15302, 15302, 153…
$ `Implied Liquidity`      <dbl> 4831274, 4831274, 4831274, 4831274, 4831274, …
$ `Bid Ask Spread`         <dbl> 0.07, 0.07, 0.07, 0.07, 0.07, 0.07, 0.07, 0.0…
$ `Short Interest%`        <dbl> 0.36, 0.36, 0.36, 0.36, 0.36, 0.36, 0.36, 0.3…
$ `Open Interest`          <dbl> 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 61, 2…
$ location                 <chr> "Japan", "Japan", "Japan", "Japan", "Japan", …
$ ESG                      <chr> "ESG", "ESG", "ESG", "ESG", "ESG", "ESG", "ES…
Code
# Change date column from character to actual dates

etf_esg_data$`Inception Date` <- mdy(etf_esg_data$`Inception Date`)
etf_esg_data <- etf_esg_data |>
  mutate(inception_year = year(`Inception Date`))
etf_esg_data
# A tibble: 23,474 × 56
   Name   Ticker allocat…¹ perce…² Class…³ Fund …⁴ Holdi…⁵ Primary Cross Expen…⁶
   <chr>  <chr>  <chr>       <dbl>   <dbl>   <dbl>   <dbl> <chr>   <chr>   <dbl>
 1 Median <NA>   Materials  0.0468    114.    113.      84 <NA>    <NA>    0.005
 2 Median <NA>   Comm       0.0778    114.    113.      84 <NA>    <NA>    0.005
 3 Median <NA>   Cons Cyc…  0.104     114.    113.      84 <NA>    <NA>    0.005
 4 Median <NA>   Cons Non…  0.190     114.    113.      84 <NA>    <NA>    0.005
 5 Median <NA>   Divsf      0.0023    114.    113.      84 <NA>    <NA>    0.005
 6 Median <NA>   Energy     0.0509    114.    113.      84 <NA>    <NA>    0.005
 7 Median <NA>   Fin        0.160     114.    113.      84 <NA>    <NA>    0.005
 8 Median <NA>   Ind        0.107     114.    113.      84 <NA>    <NA>    0.005
 9 Median <NA>   Tech       0.122     114.    113.      84 <NA>    <NA>    0.005
10 Median <NA>   Utils      0.0298    114.    113.      84 <NA>    <NA>    0.005
# … with 23,464 more rows, 46 more variables: `Fund Mgr Stated Fee` <dbl>,
#   `Avg Bid Ask Spread` <dbl>, `1 Yr NAV Trk Error` <dbl>, Premium <dbl>,
#   `52W Avg Prem` <dbl>, `1D Flow` <dbl>, `1D Flow (M USD)` <dbl>,
#   `1W Flow` <dbl>, `1W Flow (M USD)` <dbl>, `1M Flow` <dbl>,
#   `1M Flow (M USD)` <dbl>, `YTD Flow` <dbl>, `YTD Flow (M USD)` <dbl>,
#   `1Y Flow` <dbl>, `1Y Flow (M USD)` <dbl>, `3Y Flow` <dbl>,
#   `3Y Flow (M USD)` <dbl>, `Fund Type` <chr>, Structure <chr>, …
Code
print(etf_esg_data)
# A tibble: 23,474 × 56
   Name   Ticker allocat…¹ perce…² Class…³ Fund …⁴ Holdi…⁵ Primary Cross Expen…⁶
   <chr>  <chr>  <chr>       <dbl>   <dbl>   <dbl>   <dbl> <chr>   <chr>   <dbl>
 1 Median <NA>   Materials  0.0468    114.    113.      84 <NA>    <NA>    0.005
 2 Median <NA>   Comm       0.0778    114.    113.      84 <NA>    <NA>    0.005
 3 Median <NA>   Cons Cyc…  0.104     114.    113.      84 <NA>    <NA>    0.005
 4 Median <NA>   Cons Non…  0.190     114.    113.      84 <NA>    <NA>    0.005
 5 Median <NA>   Divsf      0.0023    114.    113.      84 <NA>    <NA>    0.005
 6 Median <NA>   Energy     0.0509    114.    113.      84 <NA>    <NA>    0.005
 7 Median <NA>   Fin        0.160     114.    113.      84 <NA>    <NA>    0.005
 8 Median <NA>   Ind        0.107     114.    113.      84 <NA>    <NA>    0.005
 9 Median <NA>   Tech       0.122     114.    113.      84 <NA>    <NA>    0.005
10 Median <NA>   Utils      0.0298    114.    113.      84 <NA>    <NA>    0.005
# … with 23,464 more rows, 46 more variables: `Fund Mgr Stated Fee` <dbl>,
#   `Avg Bid Ask Spread` <dbl>, `1 Yr NAV Trk Error` <dbl>, Premium <dbl>,
#   `52W Avg Prem` <dbl>, `1D Flow` <dbl>, `1D Flow (M USD)` <dbl>,
#   `1W Flow` <dbl>, `1W Flow (M USD)` <dbl>, `1M Flow` <dbl>,
#   `1M Flow (M USD)` <dbl>, `YTD Flow` <dbl>, `YTD Flow (M USD)` <dbl>,
#   `1Y Flow` <dbl>, `1Y Flow (M USD)` <dbl>, `3Y Flow` <dbl>,
#   `3Y Flow (M USD)` <dbl>, `Fund Type` <chr>, Structure <chr>, …
Code
# Save data in csv

write.csv(etf_esg_data, here("etf cross comparison.csv"))