W2 HW

HW2

1.5.1.2

  1. Import blackrock_esg_vs_non_esg_etf
library(readr)
library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.4.4     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(esquisse)
github_url <- "https://raw.githubusercontent.com/t-emery/sais-susfin_data/main/datasets/etf_comparison-2022-10-03.csv"

blackrock_esg_vs_non_esg_etf <- github_url |> 
  read_csv() |> 
  select(company_name:standard_etf)
Rows: 537 Columns: 14
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (4): ticker, company_name, sector, esg_uw_ow
dbl (7): esg_etf, standard_etf, esg_tilt, esg_tilt_z_score, esg_tilt_rank, e...
lgl (3): in_esg_only, in_standard_only, in_on_index_only

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
  1. Look at the screenshot above. Assign your variables to the correct aesthetics, and choose the correct chart type.

  2. Explore the menus at the bottom of the screen.

  • Change the x and y axis to log10.

  • Add a smoother (there is no 45 degree line option in esquisse).

  • Add descriptive titles and labels. Put your name in the caption.

  1. Change the color palette to a different color palette of your choice.
library(ggplot2)
ggplot(blackrock_esg_vs_non_esg_etf) +
  aes(x = esg_etf, y = standard_etf, colour = sector) +
  geom_point(shape = "circle", size = 1.5) +
  geom_smooth(method = "loess", se = FALSE, color = "darkgrey") +
  scale_color_brewer(palette = "Set3", direction = 1) +
  scale_x_continuous(trans = "log10") +
  scale_y_continuous(trans = "log10") +
  labs(
    x = "ESG ETF (ESGU) [log scale]",
    y = "Standard ETF (IVV) [log scale]",
    title = "Large Cap American Equities ETFs: ESG vs. Non-ESG",
    subtitle = "A comparison of the holdings of BlackRock iShares ESGU and IVV",
    caption = "Linyi Zheng"
  ) +
  theme_minimal() +
  facet_wrap(vars(sector), ncol = 3L)
Warning: Transformation introduced infinite values in continuous x-axis
Warning: Transformation introduced infinite values in continuous y-axis
Warning: Transformation introduced infinite values in continuous x-axis
Warning: Transformation introduced infinite values in continuous y-axis
`geom_smooth()` using formula = 'y ~ x'
Warning: Removed 261 rows containing non-finite values (`stat_smooth()`).

  scale_color_brewer(palette = "Set2")  
<ggproto object: Class ScaleDiscrete, Scale, gg>
    aesthetics: colour
    axis_order: function
    break_info: function
    break_positions: function
    breaks: waiver
    call: call
    clone: function
    dimension: function
    drop: TRUE
    expand: waiver
    get_breaks: function
    get_breaks_minor: function
    get_labels: function
    get_limits: function
    guide: legend
    is_discrete: function
    is_empty: function
    labels: waiver
    limits: NULL
    make_sec_title: function
    make_title: function
    map: function
    map_df: function
    n.breaks.cache: NULL
    na.translate: TRUE
    na.value: NA
    name: waiver
    palette: function
    palette.cache: NULL
    position: left
    range: environment
    rescale: function
    reset: function
    scale_name: brewer
    train: function
    train_df: function
    transform: function
    transform_df: function
    super:  <ggproto object: Class ScaleDiscrete, Scale, gg>

1.5.1.3

blackrock_esg_vs_non_esg_etf_long <- blackrock_esg_vs_non_esg_etf |> 
  pivot_longer(cols = contains("etf"), names_to = "fund_type", values_to = "weight") |> 
  mutate(fund_type = case_when(fund_type == "esg_etf" ~ "ESG ETF (ESGU)",
                               fund_type == "standard_etf" ~ "Standard ETF (IVV)"))
blackrock_esg_vs_non_esg_etf_long
# A tibble: 1,074 × 4
   company_name                  sector                 fund_type         weight
   <chr>                         <chr>                  <chr>              <dbl>
 1 PRUDENTIAL FINANCIAL INC      Financials             ESG ETF (ESGU)    0.537 
 2 PRUDENTIAL FINANCIAL INC      Financials             Standard ETF (IV… 0.106 
 3 GENERAL MILLS INC             Consumer Staples       ESG ETF (ESGU)    0.552 
 4 GENERAL MILLS INC             Consumer Staples       Standard ETF (IV… 0.151 
 5 KELLOGG                       Consumer Staples       ESG ETF (ESGU)    0.453 
 6 KELLOGG                       Consumer Staples       Standard ETF (IV… 0.0592
 7 AUTOMATIC DATA PROCESSING INC Information Technology ESG ETF (ESGU)    0.649 
 8 AUTOMATIC DATA PROCESSING INC Information Technology Standard ETF (IV… 0.312 
 9 ECOLAB INC                    Materials              ESG ETF (ESGU)    0.441 
10 ECOLAB INC                    Materials              Standard ETF (IV… 0.118 
# ℹ 1,064 more rows
library(dplyr)
library(ggplot2)
blackrock_esg_vs_non_esg_etf_long %>%
 filter(weight >= 1 & weight <= 100) %>%
 ggplot() +
 aes(x = weight, y = company_name, colour = fund_type, size = weight) +
 geom_point(shape = "circle") +
 scale_color_manual(values = c(`ESG ETF (ESGU)` = "green", `Standard ETF (IVV)` = "gray"
)) +
 labs(x = "Weight", y = "Company Name", title = "Comparation of Weight in ESG and non-ESG", caption = "Linyi Zheng") +
 theme_minimal()

short reflection: As most data points fall on the lower end of the weight scale, outliers become more noticeable. There are two outliers with extreme weights of approximately 5.8% and 6.9%, which are included in both ESG and non-ESG ETFs. This suggests that the majority of these companies give equal importance to both types of ETFs.

1.5.1.4

library(ggplot2)


etf_summary <-  blackrock_esg_vs_non_esg_etf_long %>%
  group_by(sector, fund_type) %>%
  summarise(total_weight = sum(weight)) %>%
  group_by(sector) %>%
  mutate(proportion_esg = sum(ifelse(fund_type == "ESG ETF (ESGU)", total_weight, 0)) / sum(total_weight),
         proportion_non_esg = sum(ifelse(fund_type != "ESG ETF (ESGU)", total_weight, 0)) / sum(total_weight))
`summarise()` has grouped output by 'sector'. You can override using the
`.groups` argument.
total_counts <- blackrock_esg_vs_non_esg_etf_long %>%
  group_by(sector) %>%
  summarise(total_count = n())

etf_summary <- merge(total_counts, etf_summary, by = "sector")


ggplot(etf_summary, aes(x = sector)) +
  geom_bar(aes(y = total_count), stat = "identity", fill = "grey") +
  geom_line(aes(y = proportion_esg * max(total_count)), color = "green", size = 1.5) +
  geom_line(aes(y = proportion_non_esg * max(total_count)), color = "red", size = 1.5) +
  geom_point(aes(y = proportion_esg * max(total_count)), color = "green", size = 4) +
  geom_point(aes(y = proportion_non_esg * max(total_count)), color = "red", size = 4) +
  scale_y_continuous(sec.axis = sec_axis(~ . / max(etf_summary$total_count), name = "Proportion")) +
  labs(title = "Total Count of ETFs and Proportion of ESG/Non-ESG ETFs Across Sectors", 
       x = "Sector", y = "Total Count of ETFs") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.

This chart shows the amount of ETF across sectors. IT has the most ETFs, and Energy has the least. And within each sector, the proportions of ESG/Non-ESG ETFs across sectors are basically the same.

c2 <- blackrock_esg_vs_non_esg_etf |> 
    ggplot(aes(x = esg_etf, y = standard_etf, color = sector)) +
    geom_point(size = 3) +  
    scale_color_manual(values = c("red", "blue", "green", "orange","purple","cyan", "magenta", "yellow","black", "gray", "brown")) + 
    scale_x_log10() +
    scale_y_log10()+
    coord_flip() +
    theme_minimal() + 
    labs(title = "Comparison of Standard ETFs vs. ESG ETFs",
       x = "Standard ETF",
       y = "ESG ETF",
       color = "Sector") 

c2
Warning: Transformation introduced infinite values in continuous x-axis
Warning: Transformation introduced infinite values in continuous y-axis

The graph shows the relationship between ESG ETF and standard ETF among different sectors. There is a subtle linear relationship.

1.5.2.1

ggplot(blackrock_esg_vs_non_esg_etf) +
  aes(x = esg_etf, y = standard_etf) +
  geom_point(mapping = aes(color = sector, shape = "circle")) +
  geom_smooth(span = 0.75)+
  scale_color_hue(direction = 1) +
  scale_x_continuous(trans = "log10") +
  scale_y_continuous(trans = "log10") +
  labs(title = "Chart 1") +
  theme_grey()
Warning: Transformation introduced infinite values in continuous x-axis
Warning: Transformation introduced infinite values in continuous y-axis
Warning: Transformation introduced infinite values in continuous x-axis
Warning: Transformation introduced infinite values in continuous y-axis
`geom_smooth()` using method = 'loess' and formula = 'y ~ x'
Warning: Removed 261 rows containing non-finite values (`stat_smooth()`).

ggplot(blackrock_esg_vs_non_esg_etf) +
  aes(x = esg_etf, y = standard_etf, colour = sector) +
  geom_point(shape = "circle", size = 1.5) +
  geom_smooth(span = 0.75) +
  scale_color_hue(direction = 1) +
  scale_x_continuous(trans = "log10") +
  scale_y_continuous(trans = "log10") +
  labs(title = "Chart 2") +
  theme_grey()
Warning: Transformation introduced infinite values in continuous x-axis
Warning: Transformation introduced infinite values in continuous y-axis
Warning: Transformation introduced infinite values in continuous x-axis
Warning: Transformation introduced infinite values in continuous y-axis
`geom_smooth()` using method = 'loess' and formula = 'y ~ x'
Warning: Removed 261 rows containing non-finite values (`stat_smooth()`).

ggplot(blackrock_esg_vs_non_esg_etf) +
  aes(x = esg_etf, y = standard_etf) +
  geom_point(shape = "circle", size = 1.5, colour = "purple") +
  geom_smooth(span = 0.75, color = "yellow") +
  scale_x_continuous(trans = "log10") +
  scale_y_continuous(trans = "log10") +
  labs(x = "ESG ETF", y = "Standard ETF") +
  theme_grey()
Warning: Transformation introduced infinite values in continuous x-axis
Warning: Transformation introduced infinite values in continuous y-axis
Warning: Transformation introduced infinite values in continuous x-axis
Warning: Transformation introduced infinite values in continuous y-axis
`geom_smooth()` using method = 'loess' and formula = 'y ~ x'
Warning: Removed 261 rows containing non-finite values (`stat_smooth()`).

1.5.3.1

blackrock_esg_vs_non_esg_etf_long %>%
  filter(weight >= 1 & weight <= 100) %>%
  ggplot(aes(x = reorder(company_name, -weight), y = weight, fill = fund_type)) +
  geom_bar(stat = "identity", position = position_dodge(), width = 0.7) +
  scale_fill_manual(values = c(`ESG ETF (ESGU)` = "green", `Standard ETF (IVV)` = "red")) +
  labs(x = "Company Name", y = "Weight", 
       title = "Comparison of Company Weight in ESG vs. Non-ESG ETFs",
       subtitle = "Bar chart showing the weight distribution across companies", 
       caption = "Data visualization by Nadia Xing") +
  theme_grey() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

The chart shows the weight distribution in ESG and non-ESG ETFs across companies. We can see different companies have different distributions of ETFs.

1.5.3.2

options(repos = c(CRAN = "https://cloud.r-project.org/"))
install.packages("ggbeeswarm")

The downloaded binary packages are in
    /var/folders/65/g9h63hfx5m3c0vvggdnknh440000gn/T//RtmpFrSTBX/downloaded_packages
library(ggbeeswarm)
ggplot(blackrock_esg_vs_non_esg_etf_long, aes(x=sector, y=weight, color=fund_type)) +
  geom_quasirandom(dodge.width=1)+ scale_y_continuous(trans = "log10") + 
  labs(title = "Violin Plot of ETF Weights Allocation")+
  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
Warning: Transformation introduced infinite values in continuous y-axis
Warning: Removed 261 rows containing missing values (`geom_point()`).

This shows the weights allocated to each sector, respectively for each fund type.