HW3_Data transformation_NadiaXING

Problem1

library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.4.4     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
options(scipen = 999) 
blackrock_etf_screener <- read_csv("https://raw.githubusercontent.com/t-emery/sais-susfin_data/main/datasets/ishares_etf_screener_as_of_2023-12-27.csv") 
Rows: 424 Columns: 18
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr  (10): ticker, name, asset_class, sub_asset_class, region, market, locat...
dbl   (6): gross_expense_ratio_percent, net_expense_ratio_percent, net_asset...
dttm  (2): incept_date, net_assets_as_of

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
esg_funds_tibble <- blackrock_etf_screener %>%
  filter(sustainable_classification %in% c("Uplift", "Thematic", "Screened", "Impact")) %>%
  group_by(sustainable_classification) %>%
  summarize(total_assets = sum(net_assets_usd, na.rm = TRUE), 
            largest_fund_assets = max(net_assets_usd, na.rm = TRUE)) %>%
  ungroup() %>%
  mutate(percent_share = (largest_fund_assets / total_assets) * 100,
         rank = rank(-percent_share)) %>%
  arrange(desc(percent_share))

esg_funds_tibble
# A tibble: 4 × 5
  sustainable_classificat…¹ total_assets largest_fund_assets percent_share  rank
  <chr>                            <dbl>               <dbl>         <dbl> <dbl>
1 Impact                      335216065.          335216065.         100       1
2 Thematic                   3427675675.         3057666513.          89.2     2
3 Screened                    383389770.          182340754.          47.6     3
4 Uplift                    51443665637.        13385805595.          26.0     4
# ℹ abbreviated name: ¹​sustainable_classification

Problem2

Create a sorted bar chart of the 10 smallest ETFs.

Hint: for sorting the bar chart, look up fct_reorder() . Make sure your chart has meaningful titles and labels, including numbers of an appropriate magnitude.

library(tidyverse)
smallest_etfs <- blackrock_etf_screener %>%
  arrange(net_assets_usd) %>%
  slice_head(n = 10) %>%
  mutate(name = fct_reorder(name, net_assets_usd)) 
ggplot(smallest_etfs, aes(x = name, y = net_assets_usd)) +
  geom_bar(stat = "identity", alpha=0.5) +
  coord_flip() + # Flips the axes to make the chart horizontal
  labs(
    title = "Top 10 Smallest ETFs by Net Assets",
    x = "ETF Name",
    y = "Net Assets (USD)",
    caption = "Data source: BlackRock ETF Screener,Made by NadiaXING"
  ) +
  theme_minimal() +
  scale_y_continuous(labels = scales::label_dollar(scale=1/10^6,suffix = "M")) # Format the net assets as currency

Problem3

library(dplyr)
esg_threshold <- quantile(blackrock_etf_screener$msci_esg_quality_score_0_10, 0.2, na.rm = TRUE)
bottom_quintile_funds <- blackrock_etf_screener %>%
  filter(msci_esg_quality_score_0_10 <= esg_threshold)
print(bottom_quintile_funds$name)
 [1] "iShares Core S&P Small-Cap ETF"                         
 [2] "iShares Core MSCI Emerging Markets ETF"                 
 [3] "iShares Russell 2000 ETF"                               
 [4] "iShares iBoxx $ High Yield Corporate Bond ETF"          
 [5] "iShares MSCI Emerging Markets ETF"                      
 [6] "iShares J.P. Morgan USD Emerging Markets Bond ETF"      
 [7] "iShares Preferred and Income Securities ETF"            
 [8] "iShares Russell 2000 Value ETF"                         
 [9] "iShares Broad USD High Yield Corporate Bond ETF"        
[10] "iShares Russell 2000 Growth ETF"                        
[11] "iShares S&P Small-Cap 600 Value ETF"                    
[12] "iShares MSCI India ETF"                                 
[13] "iShares MSCI Brazil ETF"                                
[14] "iShares MSCI China ETF"                                 
[15] "iShares S&P Small-Cap 600 Growth ETF"                   
[16] "iShares 0-5 Year High Yield Corporate Bond ETF"         
[17] "iShares MSCI Emerging Markets Min Vol Factor ETF"       
[18] "iShares Global REIT ETF"                                
[19] "iShares Core U.S. REIT ETF"                             
[20] "iShares MSCI Mexico ETF"                                
[21] "iShares U.S. Financials ETF"                            
[22] "iShares Latin America 40 ETF"                           
[23] "iShares Micro-Cap ETF"                                  
[24] "iShares MSCI USA Small-Cap Min Vol Factor ETF"          
[25] "iShares MSCI Saudi Arabia ETF"                          
[26] "iShares Emerging Markets Dividend ETF"                  
[27] "iShares Agency Bond ETF"                                
[28] "iShares Residential and Multisector Real Estate ETF"    
[29] "iShares Mortgage Real Estate ETF"                       
[30] "iShares Emerging Markets Equity Factor ETF"             
[31] "iShares Morningstar Small-Cap Growth ETF"               
[32] "iShares J.P. Morgan EM Local Currency Bond ETF"         
[33] "iShares® iBonds® 2024 Term High Yield and Income ETF"   
[34] "iShares U.S. Insurance ETF"                             
[35] "iShares Morningstar Small-Cap Value ETF"                
[36] "iShares MSCI Indonesia ETF"                             
[37] "iShares J.P. Morgan EM Corporate Bond ETF"              
[38] "iShares MSCI Emerging Markets Small-Cap ETF"            
[39] "iShares High Yield Bond Factor ETF"                     
[40] "iShares J.P. Morgan EM High Yield Bond ETF"             
[41] "iShares® iBonds® 2025 Term High Yield and Income ETF"   
[42] "iShares Global Comm Services ETF"                       
[43] "iShares MSCI Brazil Small-Cap ETF"                      
[44] "iShares MSCI Poland ETF"                                
[45] "iShares® iBonds® 2026 Term High Yield and Income ETF"   
[46] "iShares Morningstar Small-Cap ETF"                      
[47] "iShares MSCI China A ETF"                               
[48] "iShares MSCI Global Silver and Metals Miners ETF"       
[49] "iShares Interest Rate Hedged High Yield Bond ETF"       
[50] "iShares MSCI Turkey ETF"                                
[51] "iShares Genomics Immunology and Healthcare ETF"         
[52] "iShares Currency Hedged MSCI Emerging Markets ETF"      
[53] "iShares US & Intl High Yield Corp Bond ETF"             
[54] "iShares MSCI Japan Small-Cap ETF"                       
[55] "iShares MSCI Peru and Global Exposure ETF"              
[56] "iShares MSCI Philippines ETF"                           
[57] "iShares US Small Cap Value Factor ETF"                  
[58] "iShares MSCI Qatar ETF"                                 
[59] "iShares® iBonds® 2027 Term High Yield and Income ETF"   
[60] "iShares MSCI BIC ETF"                                   
[61] "iShares ESG Screened S&P Small-Cap ETF"                 
[62] "iShares MSCI Kuwait ETF"                                
[63] "iShares MSCI China Small-Cap ETF"                       
[64] "iShares High Yield Corporate Bond BuyWrite Strategy ETF"
[65] "iShares® iBonds® 2028 Term High Yield and Income ETF"   
[66] "iShares J.P. Morgan Broad USD Emerging Markets Bond ETF"
[67] "iShares MSCI UAE ETF"                                   
[68] "iShares Emerging Markets Infrastructure ETF"            
[69] "iShares Blockchain and Tech ETF"                        
[70] "iShares® iBonds® 2029 Term High Yield and Income ETF"   
[71] "iShares® iBonds® 2030 Term High Yield and Income ETF"   
[72] "iShares MSCI China Multisector Tech ETF"                
[73] "BlackRock Future Financial and Technology ETF"          
[74] "iShares Neuroscience and Healthcare ETF"                
[75] "iShares Lithium Miners and Producers ETF"               
[76] "iShares Inflation Hedged High Yield Bond ETF"           
summary_by_class <- bottom_quintile_funds %>%
  group_by(asset_class, sub_asset_class) %>%
  summarise(
    Number_of_Funds = n(),
    total_assets = sum(net_assets_usd, na.rm = TRUE)
  ) %>%
  ungroup()
`summarise()` has grouped output by 'asset_class'. You can override using the
`.groups` argument.
print(summary_by_class)
# A tibble: 9 × 4
  asset_class  sub_asset_class        Number_of_Funds  total_assets
  <chr>        <chr>                            <int>         <dbl>
1 Equity       All Cap                             23  95486052602.
2 Equity       Large Cap                            1   1852506812.
3 Equity       Large/Mid Cap                       10  42421984678.
4 Equity       Small Cap                           17 187292555389.
5 Fixed Income Corporates                           1     41339182.
6 Fixed Income Credit                               1    408038372.
7 Fixed Income Government                           3  17560711395.
8 Fixed Income High Yield                          16  38239624578.
9 Real Estate  Real Estate Securities               4   7145439523.

Problem4

first data visualization:

library(dplyr)
library(ggplot2)
options(repos = c(CRAN = "https://cloud.r-project.org/"))
install.packages("plotly")

The downloaded binary packages are in
    /var/folders/pz/gf8r5yfj7r93ln6kz_gbkjtc0000gn/T//Rtmp4nhco5/downloaded_packages
library("plotly")

Attaching package: 'plotly'
The following object is masked from 'package:ggplot2':

    last_plot
The following object is masked from 'package:stats':

    filter
The following object is masked from 'package:graphics':

    layout
region_summary_data <- blackrock_etf_screener |> 
  count(region,sustainable_classification)
p<- ggplot(region_summary_data, aes(x = region , y = n, fill = sustainable_classification)) +
  geom_bar(stat = "identity", position = "stack") +
  labs(title = "number of funds by region and sustainable classification",
       x = "Region",
       y = "number of funds") +
  theme(axis.text.x = element_text(size = 10, angle = 45, hjust = 1)) 
p_interactive <- ggplotly(p)
p_interactive

second data visualization

region_score_data <- blackrock_etf_screener |> 
  group_by(region) |> 
  summarize(average_score = mean(msci_esg_quality_score_0_10, na.rm = TRUE)) |> 
  ungroup() 
ggplot(region_score_data, aes(x = region, y = average_score)) +
geom_bar (stat = "identity", position = "dodge") +
  labs(title = "average esg quality score by region ",
       x = "Region",
       y = "Average Esg Quality Score") +
  theme(axis.text.x = element_text(size = 12, angle = 45, hjust = 1)) 

third data visualization

library(dplyr)
library(ggplot2)
fund_score_data <- blackrock_etf_screener |> 
  group_by(region) |> 
  summarize(
    average_score = mean(msci_esg_quality_score_0_10, na.rm = TRUE),
    number_of_funds = n()
  ) |> 
  ungroup()
ggplot(fund_score_data, aes(y = average_score, x = number_of_funds, group = 1)) +
  geom_line(aes(color = region), size = 1) +   
  geom_point(aes(color = region), size = 3) +  
  labs(
    title = "Relationship between Average ESG Score and Number of Funds by Region",
    x = "Number of Funds",
    y = "Average ESG Score"
  ) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1), legend.title = element_blank()) 
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.

It seems there is no clear linear relationship between the average ESG score and the number of funds across region.

fourth data visualization

library(dplyr)
ESG_rating_percentage <- blackrock_etf_screener |> 
  group_by (msci_esg_fund_rating_aaa_ccc) |> 
  summarize(count =n()) |> 
  ungroup() |> 
mutate(total_count = sum (count)) |> 
mutate(rating_percentage = (count/total_count)*100)
library(ggplot2)

ggplot(ESG_rating_percentage, aes(x = "", y = rating_percentage, fill = msci_esg_fund_rating_aaa_ccc)) +
  geom_bar(width = 1, stat = "identity", color = "white") +
  coord_polar(theta = "y") + 
  geom_text(aes(label = paste0 (round(rating_percentage,1),"%")),
            position = position_stack(vjust = 0.5),
            color = "black")+
  labs(fill = "ESG Rating", 
       title = "ESG Rating Percentage") +
  theme_void() +
  theme(legend.title = element_text(size = 12), legend.position = "right") 

fifth data visualization

library(dplyr)
asset_class_percentage <- blackrock_etf_screener |> 
  group_by(asset_class) |> 
  summarise(total_net_assets_usd = sum(net_assets_usd, na.rm = TRUE)) |> 
  ungroup() |> 
mutate(total_assets = sum(total_net_assets_usd),
       asset_class_percentage = (total_net_assets_usd/total_assets)*100) 
  
ggplot(asset_class_percentage, aes(x = "", y ="total_net_assets_usd" , fill = asset_class)) +
  geom_bar(width = 1, stat = "identity", color = "pink") +
  coord_polar(theta = "y") + 
  geom_text(aes(label = paste0 (round(asset_class_percentage,1),"%")),
            position = position_stack(vjust = 0.5),
            color = "white")+
  labs(fill = "asset_class", 
       title = "Asset class Percentage") +
  theme_void() +
  theme(legend.title = element_text(size = 12), legend.position = "right")