title: “Assignment 3”

format: html

editor: visual


Quarto

Quarto enables you to weave together content and executable code into a finished document. To learn more about Quarto see https://quarto.org.

Running Code

When you click the Render button a document will be generated that includes both content and the output of embedded code. You can embed code like this:

You can add options to executable code like this

library(tidyverse) 
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0      ✔ purrr   1.0.1 
## ✔ tibble  3.1.8      ✔ dplyr   1.0.10
## ✔ tidyr   1.3.0      ✔ stringr 1.5.0 
## ✔ readr   2.1.3      ✔ forcats 0.5.2 
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
# assign the url to `github_raw_csv_url`

github_raw_csv_url <- "https://raw.githubusercontent.com/t-emery/sais-susfin_data/main/datasets/blackrock_etf_screener_2022-08-30.csv"



# read in the data, and assign it to the object `blackrock_etf_data`

blackrock_etf_data <- read_csv(github_raw_csv_url)
## Rows: 393 Columns: 22
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (14): ticker, name, incept_date, net_assets_as_of, asset_class, sub_asse...
## dbl  (8): gross_expense_ratio_percent, net_expense_ratio_percent, net_assets...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
blackrock_etf_data <- blackrock_etf_data |> 

  # we are transforming both date columns (currently character strings) into date objects

  # so we can work with them.

  # this syntax is a bit confusing, but selects all columns containing `date` and applies

  # lubridate::mdy() function to them to turn them into date objects. 

  mutate(across(contains("date"), lubridate::mdy)) |>

  # Billions is a more useful magnitude than millions, so we'll create a column with 

  # the assets in billions by dividing by `net_assets_millions` by 1,000 (10^3)

  # If we wanted trillions, we could divide by 1,000,000 (10^6)

  mutate(net_assets_usd_bn = net_assets_usd_mn/10^3) |> 

  # this column doesn't add anything to our analysis - it says that the data is from 8/30/22

  select(-net_assets_as_of)
mini_blackrock_data <- blackrock_etf_data |> 

  # group by whether the fund is an ESG fund or not

  group_by(is_esg) |> 

  # take the top 5 from each group, by net assets

  slice_max(order_by = net_assets_usd_bn, n = 5) |> 

  # select the following columns 

  select(ticker, fund_name = name_wo_ishares_etf, asset_class, sub_asset_class, region, incept_date, net_assets_usd_bn,

         msci_weighted_average_carbon_intensity_tons_co2e_m_sales) |> 

  # rename to `co2_intensity` because the full name is a mouthful, if descriptive.

  rename(co2_intensity = msci_weighted_average_carbon_intensity_tons_co2e_m_sales) |> 

  # always good to ungroup() if you've used a group_by().  We'll discuss later.

  ungroup()
## Adding missing grouping variables: `is_esg`
library(ggplot2)
# this graph shows that different regions' gross expense ratio. Apprarently, North America has the largest ratio.GER is the annual cost of investing in the ETF.It seems that holding an ETF in North America is much expensive than that in Middle East countries.  

ggplot(data=blackrock_etf_data,aes(x=region, y=gross_expense_ratio_percent))+

  geom_col(position="stack", color="black")

#this line graph compares the msci co2 level between ESG fund and Regular Fund in different classification

ggplot(data=blackrock_etf_data, aes(x=sustainable_classification,y=msci_weighted_average_carbon_intensity_tons_co2e_m_sales))+

  geom_col(color="darkgreen", size=1)+

  labs(x="sustainable classification",y="MSCI co2", title="BlackRock ETF msci co2")+

  facet_wrap(~is_esg,ncol=1)
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## Warning: Removed 62 rows containing missing values (`position_stack()`).

#this histogram shows the net assets distribution of ETF fund.

ggplot(data = blackrock_etf_data, aes(x = net_assets_usd_bn)) +

  geom_histogram(binwidth = 3, color = "black", fill = "blue") +

  labs(title = "Net Assets Distribution", x = "Net Assets (USD Bn)", y = "Count") +

  theme_minimal()