Quarto 2

Author

Haotian Duan

Homework #2

1.5.1.2 Homework problem 1: Recreate the chart above in Esquisse

# Install and library Tidyverse
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.4.4     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# Import data
github_url <- "https://raw.githubusercontent.com/t-emery/sais-susfin_data/main/datasets/etf_comparison-2022-10-03.csv"
blackrock_esg_vs_non_esg_etf <- github_url |> 
  read_csv() |> 
  select(company_name:standard_etf)
Rows: 537 Columns: 14
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (4): ticker, company_name, sector, esg_uw_ow
dbl (7): esg_etf, standard_etf, esg_tilt, esg_tilt_z_score, esg_tilt_rank, e...
lgl (3): in_esg_only, in_standard_only, in_on_index_only

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Plot graph
ggplot(blackrock_esg_vs_non_esg_etf) +
  aes(x = esg_etf, y = standard_etf, colour = sector) +
  geom_point(shape = "circle", size = 1.5) +
  scale_color_hue(direction = 1) +
  theme_minimal()

# Explore the menus at the bottom of the screen
ggplot(blackrock_esg_vs_non_esg_etf) +
  aes(x = esg_etf, y = standard_etf, colour = sector) +
  geom_point(shape = "circle", size = 1.5) +
  geom_smooth(span = 0.75) +
  scale_color_hue(direction = 1) +
  scale_x_continuous(trans = "log10") +
  scale_y_continuous(trans = "log10") +
  labs(
    x = "Weight in ESG ETF (ESGU)",
    y = "Weight in Standard ETF (IVV)",
    title = "Comparison Between 2 Funds",
    subtitle = "ESG vs. Non-ESG",
    caption = "Haotian Duan"
  ) +
  theme_minimal()
Warning: Transformation introduced infinite values in continuous x-axis
Warning: Transformation introduced infinite values in continuous y-axis
Warning: Transformation introduced infinite values in continuous x-axis
Warning: Transformation introduced infinite values in continuous y-axis
`geom_smooth()` using method = 'loess' and formula = 'y ~ x'
Warning: Removed 261 rows containing non-finite values (`stat_smooth()`).

# Change the color palette
ggplot(blackrock_esg_vs_non_esg_etf) +
  aes(x = esg_etf, y = standard_etf, colour = sector) +
  geom_point(shape = "circle", size = 1.5) +
  geom_smooth(span = 0.75) +
  scale_color_viridis_d(option = "magma", direction = 1) +
  scale_x_continuous(trans = "log10") +
  scale_y_continuous(trans = "log10") +
  labs(
    x = "Weight in ESG ETF (ESGU)",
    y = "Weight in Standard ETF (IVV)",
    title = "Comparison Between 2 Funds",
    subtitle = "ESG vs. Non-ESG",
    caption = "Haotian Duan"
  ) +
  theme_minimal()
Warning: Transformation introduced infinite values in continuous x-axis
Transformation introduced infinite values in continuous y-axis
Warning: Transformation introduced infinite values in continuous x-axis
Warning: Transformation introduced infinite values in continuous y-axis
`geom_smooth()` using method = 'loess' and formula = 'y ~ x'
Warning: Removed 261 rows containing non-finite values (`stat_smooth()`).

# In different sectors
ggplot(blackrock_esg_vs_non_esg_etf) +
  aes(x = esg_etf, y = standard_etf, colour = sector) +
  geom_point(shape = "circle", size = 1.5) +
  geom_smooth(span = 0.75) +
  scale_color_viridis_d(option = "magma", direction = 1) +
  scale_x_continuous(trans = "log10") +
  scale_y_continuous(trans = "log10") +
  labs(
    x = "Weight in ESG ETF (ESGU)",
    y = "Weight in Standard ETF (IVV)",
    title = "Comparison Between ESG vs. Non-ESG",
    caption = "Haotian Duan"
  ) +
  theme_minimal() +
  facet_wrap(vars(sector))
Warning: Transformation introduced infinite values in continuous x-axis
Transformation introduced infinite values in continuous y-axis
Warning: Transformation introduced infinite values in continuous x-axis
Warning: Transformation introduced infinite values in continuous y-axis
`geom_smooth()` using method = 'loess' and formula = 'y ~ x'
Warning: Removed 261 rows containing non-finite values (`stat_smooth()`).

1.5.1.3 Homework problem 2: exploring the outliers

blackrock_esg_vs_non_esg_etf_long <- blackrock_esg_vs_non_esg_etf |> 
  pivot_longer(cols = contains("etf"), names_to = "fund_type", values_to = "weight") |> 
  mutate(fund_type = case_when(fund_type == "esg_etf" ~ "ESG ETF (ESGU)",
                               fund_type == "standard_etf" ~ "Standard ETF (IVV)"))

blackrock_esg_vs_non_esg_etf_long
# A tibble: 1,074 × 4
   company_name                  sector                 fund_type         weight
   <chr>                         <chr>                  <chr>              <dbl>
 1 PRUDENTIAL FINANCIAL INC      Financials             ESG ETF (ESGU)    0.537 
 2 PRUDENTIAL FINANCIAL INC      Financials             Standard ETF (IV… 0.106 
 3 GENERAL MILLS INC             Consumer Staples       ESG ETF (ESGU)    0.552 
 4 GENERAL MILLS INC             Consumer Staples       Standard ETF (IV… 0.151 
 5 KELLOGG                       Consumer Staples       ESG ETF (ESGU)    0.453 
 6 KELLOGG                       Consumer Staples       Standard ETF (IV… 0.0592
 7 AUTOMATIC DATA PROCESSING INC Information Technology ESG ETF (ESGU)    0.649 
 8 AUTOMATIC DATA PROCESSING INC Information Technology Standard ETF (IV… 0.312 
 9 ECOLAB INC                    Materials              ESG ETF (ESGU)    0.441 
10 ECOLAB INC                    Materials              Standard ETF (IV… 0.118 
# ℹ 1,064 more rows
# Import data
blackrock_esg_vs_non_esg_etf_long %>%
# Limite the weight
 filter(weight >= 1L) %>%
 ggplot() +
# Set up variables
  aes(
    x = weight,
    y = company_name,
    colour = fund_type,
    size = weight
  ) +
# Set up shape and color
  geom_point(shape = "circle") +
  scale_color_manual(
    values = c(`ESG ETF (ESGU)` = "#04ED7E",
    `Standard ETF (IVV)` = "#C1BEC0")
  ) +
# Set up titles and labels
  labs(
    x = "Weight",
    y = "Company",
    title = "Weight Comparison of Companies in ESGU vs. IVV ETFs",
    caption = "Haotian Duan"
  ) +
  theme_minimal()

This graph compares the weightings of companies in two different ETFs: an ESG ETF (ESGU) and a Standard ETF (IVV). The X-axis represents the percentage of the fund’s total assets invested in each company. The Y-axis lists company names. This graph shows the proportion that weight exceed 1%, which can be considered as outlier. We can see that most company have similar weight on ESG and non-ESG ETFs.

1.5.1.4 Homework problem 3: Make your own charts with esquisse

# Topic 1
# Import data
blackrock_esg_etf_vs_esg_tilt <- github_url |> 
  read_csv() |> 
  select(company_name:esg_tilt)
Rows: 537 Columns: 14
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (4): ticker, company_name, sector, esg_uw_ow
dbl (7): esg_etf, standard_etf, esg_tilt, esg_tilt_z_score, esg_tilt_rank, e...
lgl (3): in_esg_only, in_standard_only, in_on_index_only

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Modify the range of variables to zoom in graph and exclude outlier
blackrock_esg_etf_vs_esg_tilt %>%
 filter(esg_etf >= 0 & esg_etf <= 2.6) %>%
 filter(esg_tilt >= -0.6 & 
 esg_tilt <= 0.44) %>%
 ggplot() +
# Choose variables and use smooth line to show its pattern
  aes(x = esg_etf, y = esg_tilt, colour = sector) +
  geom_point(shape = "circle", size = 1.5) +
  geom_smooth(span = 0.75) +
  scale_color_hue(direction = 1) +
# Name the topic and variables
  labs(
    x = "ESG ETF Weights",
    y = "ESG Tilt",
    title = "ESG ETF Weights and Company ESG Tilt Analysis",
    caption = "Haotian Duan"
  ) +
  theme_minimal()
`geom_smooth()` using method = 'loess' and formula = 'y ~ x'
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: pseudoinverse used at -0.0031653
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: neighborhood radius 0.11494
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: reciprocal condition number 0
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: There are other near singularities as well. 0.012493
Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
else if (is.data.frame(newdata))
as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used at
-0.0031653
Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
else if (is.data.frame(newdata))
as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
0.11494
Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
else if (is.data.frame(newdata))
as.matrix(model.frame(delete.response(terms(object)), : reciprocal condition
number 0
Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
else if (is.data.frame(newdata))
as.matrix(model.frame(delete.response(terms(object)), : There are other near
singularities as well. 0.012493

“ESG Tilt” is an investment strategy that overweights assets in a portfolio of ESG. Where investments are skewed towards companies that score higher on ESG criteria. So, I assume that higher weights in ESG ETF indicate a higher attention on ESG, which will lead to higher ESG tilt. However, our graph shows a very week positive relationship between 2 variables. In order to find out the reason, we move to our second topic.

# Topic 2

# Construct graphs to study this case seperately in each sector
blackrock_esg_etf_vs_esg_tilt %>%
 filter(esg_etf >= 0 & esg_etf <= 2.6) %>%
 filter(esg_tilt >= -0.6 & 
 esg_tilt <= 0.44) %>%
 ggplot() +
  aes(x = esg_etf, y = esg_tilt, colour = sector) +
  geom_point(shape = "circle", size = 1.5) +
  geom_smooth(span = 0.75) +
  scale_color_viridis_d(option = "inferno", direction = 1) +
  labs(
    x = "ESG ETF Weights",
    y = "ESG Tilt",
    title = "Relationship between ESG ETF Weights and Company ESG Tilt in Different Sector",
    caption = "Haotian Duan"
  ) +
  theme_minimal() +
  theme(legend.position = "left") +
  facet_wrap(vars(sector))
`geom_smooth()` using method = 'loess' and formula = 'y ~ x'
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: pseudoinverse used at -0.0031653
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: neighborhood radius 0.11494
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: reciprocal condition number 0
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: There are other near singularities as well. 0.012493
Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
else if (is.data.frame(newdata))
as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used at
-0.0031653
Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
else if (is.data.frame(newdata))
as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
0.11494
Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
else if (is.data.frame(newdata))
as.matrix(model.frame(delete.response(terms(object)), : reciprocal condition
number 0
Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
else if (is.data.frame(newdata))
as.matrix(model.frame(delete.response(terms(object)), : There are other near
singularities as well. 0.012493

We can see that the pattern of relationship between ESG ETF Weights and Company ESG Tilt is very different in different sector. Most of company will raise their ESG tilt as ESG ETF Weights growth at the beginning, while when they excessive pursuit ESG, the effect will be reversed, such as health care and consumer discretionary. On the contrary, in some sector like material, it is almost the more ESG ETF Weights the better.

1.5.2.1 Homework problem 4: Understanding aes()

# Chart 1
# Import data
ggplot(blackrock_esg_vs_non_esg_etf) +
# Choose variables
  aes(x = esg_etf, y = standard_etf) +
  geom_point(mapping = aes(color = sector, shape = "circle")) +
# Create smooth line
  geom_smooth(span = 0.75, method = "lm")+
  scale_color_hue(direction = 1) +
# Take log of x and y
  scale_x_continuous(trans = "log10") +
  scale_y_continuous(trans = "log10") +
  labs(title = "Chart 1") +
  theme_minimal()
Warning: Transformation introduced infinite values in continuous x-axis
Warning: Transformation introduced infinite values in continuous y-axis
Warning: Transformation introduced infinite values in continuous x-axis
Warning: Transformation introduced infinite values in continuous y-axis
`geom_smooth()` using formula = 'y ~ x'
Warning: Removed 261 rows containing non-finite values (`stat_smooth()`).

# Chart 2
# Import data
ggplot(blackrock_esg_vs_non_esg_etf) +
# Choose variables
  aes(x = esg_etf, y = standard_etf, colour = sector) +
  geom_point(shape = "circle", size = 1.5) +
#Create smooth line
  geom_smooth(span = 0.75) +
  scale_color_hue(direction = 1) +
#Take log of x and y
  scale_x_continuous(trans = "log10") +
  scale_y_continuous(trans = "log10") +
  labs(title = "Chart 2") +
  theme_minimal()
Warning: Transformation introduced infinite values in continuous x-axis
Transformation introduced infinite values in continuous y-axis
Warning: Transformation introduced infinite values in continuous x-axis
Warning: Transformation introduced infinite values in continuous y-axis
`geom_smooth()` using method = 'loess' and formula = 'y ~ x'
Warning: Removed 261 rows containing non-finite values (`stat_smooth()`).

#Chart 3
# Import data
ggplot(blackrock_esg_vs_non_esg_etf) +
# Choose variables
  aes(x = esg_etf, y = standard_etf) +
  geom_point(shape = "circle", size = 1.5, colour = "#AB4FE0") +
#Create smooth line
  geom_smooth(span = 0.75) +
#Take log of x and y
  scale_x_continuous(trans = "log10") +
  scale_y_continuous(trans = "log10") +
  labs(title = "Chart 3") +
  theme_minimal()
Warning: Transformation introduced infinite values in continuous x-axis
Transformation introduced infinite values in continuous y-axis
Warning: Transformation introduced infinite values in continuous x-axis
Warning: Transformation introduced infinite values in continuous y-axis
`geom_smooth()` using method = 'loess' and formula = 'y ~ x'
Warning: Removed 261 rows containing non-finite values (`stat_smooth()`).

The grammar difference is that for chart 1, sector does not count as variables in color, instead, it just determine the different color in mapping, since the variables are set up first and then gives information about sectors. For chart 2, sectors are set as a variables of color, this means chart 1 will regard different sectors as a whole but chart 2 regard it separately. For chart 3, it is similar as chart 1, the different is that it exclude all information about sectors.