Homework - Week 2

Author

Lexi Lei

Problem 1

library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.4.4     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
github_url <- "https://raw.githubusercontent.com/t-emery/sais-susfin_data/main/datasets/etf_comparison-2022-10-03.csv"

blackrock_esg_vs_non_esg_etf <- github_url |> 
  read_csv() |> 
  select(company_name:standard_etf)
Rows: 537 Columns: 14
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (4): ticker, company_name, sector, esg_uw_ow
dbl (7): esg_etf, standard_etf, esg_tilt, esg_tilt_z_score, esg_tilt_rank, e...
lgl (3): in_esg_only, in_standard_only, in_on_index_only

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
ggplot(blackrock_esg_vs_non_esg_etf) +
  aes(x = esg_etf, y = standard_etf, colour = sector) +
  geom_point(shape = "circle", size = 1.5) +
  geom_smooth(span = 1L) +
  scale_color_viridis_d(option = "inferno", direction = 1) +
  scale_x_continuous(trans = "log10") +
  scale_y_continuous(trans = "log10") +
  labs(
    x = "Weight in ESG ETF (ESGU)",
    y = "Weight in Standard ETF (IVV)",
    title = "Large Cap American Equities ETFs: ESG vs. Non-ESG",
    subtitle = "A comparison of the holdings of BlackRock iShares ESGU and IVV",
    caption = "Lexi Lei"
  ) +
  theme_minimal() +
  theme(legend.position = "none") +
  facet_wrap(vars(sector), ncol = 4L)
Warning: Transformation introduced infinite values in continuous x-axis
Warning: Transformation introduced infinite values in continuous y-axis
Warning: Transformation introduced infinite values in continuous x-axis
Warning: Transformation introduced infinite values in continuous y-axis
`geom_smooth()` using method = 'loess' and formula = 'y ~ x'
Warning: Removed 261 rows containing non-finite values (`stat_smooth()`).

Problem 2

library(tidyverse)
  
# import data
github_url <- "https://raw.githubusercontent.com/t-emery/sais-susfin_data/main/datasets/etf_comparison-2022-10-03.csv"

# change data from wide to long
blackrock_esg_vs_non_esg_etf_long <- blackrock_esg_vs_non_esg_etf |> 
  pivot_longer(cols = contains("etf"), names_to = "fund_type", values_to = "weight") |>
  mutate(fund_type = case_when(fund_type == "esg_etf" ~ "ESG ETF (ESGU)",
                               fund_type == "standard_etf" ~ "Standard ETF (IVV)"))

blackrock_esg_vs_non_esg_etf_long
# A tibble: 1,074 × 4
   company_name                  sector                 fund_type         weight
   <chr>                         <chr>                  <chr>              <dbl>
 1 PRUDENTIAL FINANCIAL INC      Financials             ESG ETF (ESGU)    0.537 
 2 PRUDENTIAL FINANCIAL INC      Financials             Standard ETF (IV… 0.106 
 3 GENERAL MILLS INC             Consumer Staples       ESG ETF (ESGU)    0.552 
 4 GENERAL MILLS INC             Consumer Staples       Standard ETF (IV… 0.151 
 5 KELLOGG                       Consumer Staples       ESG ETF (ESGU)    0.453 
 6 KELLOGG                       Consumer Staples       Standard ETF (IV… 0.0592
 7 AUTOMATIC DATA PROCESSING INC Information Technology ESG ETF (ESGU)    0.649 
 8 AUTOMATIC DATA PROCESSING INC Information Technology Standard ETF (IV… 0.312 
 9 ECOLAB INC                    Materials              ESG ETF (ESGU)    0.441 
10 ECOLAB INC                    Materials              Standard ETF (IV… 0.118 
# ℹ 1,064 more rows
# visulazation
blackrock_esg_vs_non_esg_etf_long %>%
 filter(weight >= 1L & weight <= 7L) %>%
 ggplot() +
  aes(
    x = weight,
    y = company_name,
    colour = fund_type,
    size = weight
  ) +
  geom_point(shape = "circle") +
  scale_color_manual(
    values = c(`ESG ETF (ESGU)` = "#63C32D",
    `Standard ETF (IVV)` = "#B0ACAE")
  ) +
  labs(
    x = "Weight",
    y = "Company Name",
    title = "Top Tier Investees:  ESG vs. Non-ESG",
    subtitle = "A comparison of the holdings of BlackRock iShares ESGU and IVV",
    caption = "Lexi Lei"
  ) +
  theme_minimal()

Big Tech tends to be heavily held by both ESG and Non-ESG funds, as those companies are leading the socioeconomic transition, with both strong ESG performance and considerable returns. In contrary, asset-heavy companies in traditional industries tend to be invested less by ESG funds.

Problem 3

library(tidyverse)

# import data
github_url <- "https://raw.githubusercontent.com/t-emery/sais-susfin_data/main/datasets/etf_comparison-2022-10-03.csv"

blackrock_esg_vs_non_esg_etf <- github_url |> 
  read_csv() |> 
  select(company_name:standard_etf)
Rows: 537 Columns: 14
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (4): ticker, company_name, sector, esg_uw_ow
dbl (7): esg_etf, standard_etf, esg_tilt, esg_tilt_z_score, esg_tilt_rank, e...
lgl (3): in_esg_only, in_standard_only, in_on_index_only

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# visulization
ggplot(blackrock_esg_vs_non_esg_etf) +
  aes(x = sector, weight = esg_etf) +
  geom_bar(fill = "#3E5F9B") +
  labs(
    x = "Sector",
    y = "Weight",
    title = "Large Cap American Equities ETFs: ESG Investment by Sector",
    subtitle = "A comparison based on the holdings of BlackRock iShares ESGU",
    caption = "Lexi Lei"
  ) +
  coord_flip() +
  theme_minimal()

Information technology gets the most ESG investment. In contrary, energy sector tends to be largely under-invested given its relative importance in environment conservation.

library(tidyverse)

# import data
github_url <- "https://raw.githubusercontent.com/t-emery/sais-susfin_data/main/datasets/etf_comparison-2022-10-03.csv"

blackrock_esg_vs_non_esg_etf <- github_url |> 
  read_csv() |> 
  select(company_name:standard_etf)
Rows: 537 Columns: 14
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (4): ticker, company_name, sector, esg_uw_ow
dbl (7): esg_etf, standard_etf, esg_tilt, esg_tilt_z_score, esg_tilt_rank, e...
lgl (3): in_esg_only, in_standard_only, in_on_index_only

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# filter companies without ESG investment
blackrock_zero_esg <- blackrock_esg_vs_non_esg_etf %>%
  filter(esg_etf < 0.01)

# visualization
ggplot(blackrock_zero_esg) +
  aes(x = sector) +
  geom_bar(fill = "#3E5F9B") +
  labs(
    x = "Sector",
    y = "Count",
    title = "Large Cap American Equities ETFs: 
    Low ESG Investment by Sector",
    subtitle = "A comparison based on the holdings of BlackRock iShares ESGU",
    caption = "Lexi Lei"
  ) +
  coord_flip() +
  theme_minimal()

It shows the number of companies who do not get ESG investment in each sector. Many companies in the industrial, financial, and consumer discretionary sectors are not able to attract ESG investment, while it is easier for the energy sector to access ESG funding.

Problem 4

Chart 1

library(tidyverse)

# import data
github_url <- "https://raw.githubusercontent.com/t-emery/sais-susfin_data/main/datasets/etf_comparison-2022-10-03.csv"

blackrock_esg_vs_non_esg_etf <- github_url |> 
  read_csv() |> 
  select(company_name:standard_etf)
Rows: 537 Columns: 14
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (4): ticker, company_name, sector, esg_uw_ow
dbl (7): esg_etf, standard_etf, esg_tilt, esg_tilt_z_score, esg_tilt_rank, e...
lgl (3): in_esg_only, in_standard_only, in_on_index_only

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# visulization
ggplot(blackrock_esg_vs_non_esg_etf) +
  aes(x = esg_etf, y = standard_etf) +
  geom_point(mapping = aes (shape = "circle", colour = sector)) +
  geom_smooth() +
  scale_x_log10() + 
  scale_y_log10() +
  theme_gray()
Warning: Transformation introduced infinite values in continuous x-axis
Warning: Transformation introduced infinite values in continuous y-axis
Warning: Transformation introduced infinite values in continuous x-axis
Warning: Transformation introduced infinite values in continuous y-axis
`geom_smooth()` using method = 'loess' and formula = 'y ~ x'
Warning: Removed 261 rows containing non-finite values (`stat_smooth()`).

Make color by sector specific to geom_point

Chart 2

library(tidyverse)

# import data
github_url <- "https://raw.githubusercontent.com/t-emery/sais-susfin_data/main/datasets/etf_comparison-2022-10-03.csv"

blackrock_esg_vs_non_esg_etf <- github_url |> 
  read_csv() |> 
  select(company_name:standard_etf)
Rows: 537 Columns: 14
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (4): ticker, company_name, sector, esg_uw_ow
dbl (7): esg_etf, standard_etf, esg_tilt, esg_tilt_z_score, esg_tilt_rank, e...
lgl (3): in_esg_only, in_standard_only, in_on_index_only

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# visulization
ggplot(blackrock_esg_vs_non_esg_etf) +
  aes(x = esg_etf, y = standard_etf, colour = sector) +
  geom_point(shape = "circle") +
  geom_smooth() +
  scale_x_log10() + 
  scale_y_log10() +
  theme_gray()
Warning: Transformation introduced infinite values in continuous x-axis
Warning: Transformation introduced infinite values in continuous y-axis
Warning: Transformation introduced infinite values in continuous x-axis
Warning: Transformation introduced infinite values in continuous y-axis
`geom_smooth()` using method = 'loess' and formula = 'y ~ x'
Warning: Removed 261 rows containing non-finite values (`stat_smooth()`).

Add color by sector to the whole plot

Chart 3

library(tidyverse)

# import data
github_url <- "https://raw.githubusercontent.com/t-emery/sais-susfin_data/main/datasets/etf_comparison-2022-10-03.csv"

blackrock_esg_vs_non_esg_etf <- github_url |> 
  read_csv() |> 
  select(company_name:standard_etf)
Rows: 537 Columns: 14
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (4): ticker, company_name, sector, esg_uw_ow
dbl (7): esg_etf, standard_etf, esg_tilt, esg_tilt_z_score, esg_tilt_rank, e...
lgl (3): in_esg_only, in_standard_only, in_on_index_only

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# visulization
ggplot(blackrock_esg_vs_non_esg_etf) +
  aes(x = esg_etf, y = standard_etf) +
  geom_point(shape = "circle", colour = "purple") +
  geom_smooth(colour = "yellow") +
  scale_x_log10() + 
  scale_y_log10() +
  theme_gray() +
  theme(legend.position = "none")
Warning: Transformation introduced infinite values in continuous x-axis
Warning: Transformation introduced infinite values in continuous y-axis
Warning: Transformation introduced infinite values in continuous x-axis
Warning: Transformation introduced infinite values in continuous y-axis
`geom_smooth()` using method = 'loess' and formula = 'y ~ x'
Warning: Removed 261 rows containing non-finite values (`stat_smooth()`).

Remove aes() in geom_point and geom_smooth, add colors directly instead, drop legend

Problem 5

Bubble plot from R Graph Gallery

library(tidyverse)

# import data
github_url <- "https://raw.githubusercontent.com/t-emery/sais-susfin_data/main/datasets/etf_comparison-2022-10-03.csv"

blackrock_esg_vs_non_esg_etf <- github_url |> 
  read_csv() |> 
  select(company_name:standard_etf)
Rows: 537 Columns: 14
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (4): ticker, company_name, sector, esg_uw_ow
dbl (7): esg_etf, standard_etf, esg_tilt, esg_tilt_z_score, esg_tilt_rank, e...
lgl (3): in_esg_only, in_standard_only, in_on_index_only

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# visulization
ggplot(blackrock_esg_vs_non_esg_etf) +
  aes(x = sector, y = esg_etf, colour = sector) +
  geom_boxplot(fill = "#112446") +
  scale_color_hue(direction = 1) +
  theme_minimal()

  labs(
    x = "Sector",
    y = "ESG",
    title = "Large Cap American Equities ETFs: ESG Investment by Sector",
    subtitle = "A comparison based on the holdings of BlackRock iShares ESGU",
    caption = "Lexi Lei"
  ) +
  theme_minimal()
NULL

Healthcare gets the most ESG investment. Tech giants get enormous amount of ESG investment. Averagely, it is easier for energy sector to attract ESG investment, while consumer discretionary, real estate, utilities get the least.

Problem 6

Extension ggsci

library(tidyverse)
library(ggsci)

# import data
github_url <- "https://raw.githubusercontent.com/t-emery/sais-susfin_data/main/datasets/etf_comparison-2022-10-03.csv"

blackrock_esg_vs_non_esg_etf <- github_url |> 
  read_csv() |> 
  select(company_name:standard_etf)
Rows: 537 Columns: 14
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (4): ticker, company_name, sector, esg_uw_ow
dbl (7): esg_etf, standard_etf, esg_tilt, esg_tilt_z_score, esg_tilt_rank, e...
lgl (3): in_esg_only, in_standard_only, in_on_index_only

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# visulization
p6 <- ggplot(blackrock_esg_vs_non_esg_etf) +
  aes(x = sector, y = esg_etf, colour = sector) +
  geom_boxplot() +
  theme_minimal()
labs(
  x = "Sector",
  y = "ESG",
  title = "Large Cap American Equities ETFs: ESG Investment by Sector",
  subtitle = "A comparison based on the holdings of BlackRock iShares ESGU",
  caption = "Lexi Lei"
) +
  theme_minimal()
NULL
p6 + scale_color_aaas()
Warning: This manual palette can handle a maximum of 10 values. You have
supplied 11

Color set changed compared to the previous one in Problem 5.