SusFin

Author

Wendy Lei

1.5.1.1 Setup Esquisse and read the documents

1.5.1.2 Homework problem 1: Recreate the chart above in Esquisse

library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.3     ✔ readr     2.1.4
✔ forcats   1.0.0     ✔ stringr   1.5.0
✔ ggplot2   3.4.4     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.0
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(esquisse)
library(ggplot2)
# the URL of our data on GitHub
github_url <- "https://raw.githubusercontent.com/t-emery/sais-susfin_data/main/datasets/etf_comparison-2022-10-03.csv"

# read the data from GitHub
blackrock_esg_vs_non_esg_etf <- github_url |> read_csv() |> 
  # select the four columns we will use in our anlaysis here
  select(company_name:standard_etf)
Rows: 537 Columns: 14
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (4): ticker, company_name, sector, esg_uw_ow
dbl (7): esg_etf, standard_etf, esg_tilt, esg_tilt_z_score, esg_tilt_rank, e...
lgl (3): in_esg_only, in_standard_only, in_on_index_only

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#Use esquisse 
#esquisser(blackrock_esg_vs_non_esg_etf)
library(ggplot2)
ggplot(blackrock_esg_vs_non_esg_etf) +
  aes(x = esg_etf, y = standard_etf, colour = sector) +
  geom_point(shape = "circle", size = 1.5) +
  geom_smooth(method = 'loess', formula = 'y ~ x',span = 0.75) +
  scale_color_brewer(palette = "Set3", direction = 1) +
  scale_x_continuous(trans = "log10") +
  scale_y_continuous(trans = "log10") +
  labs(
    x = "ESG ETF (ESGU)",
    y = "Standard ETF (IVV)",
    title = "We made this chart using Esquisse!",
    subtitle = "It's a great tool for learning ggplot2,
    even it has its limitations.",
    caption = "Wendy Lei"
  ) +
  theme_minimal() +
  facet_wrap(vars(sector))
Warning: Transformation introduced infinite values in continuous x-axis
Warning: Transformation introduced infinite values in continuous y-axis
Warning: Transformation introduced infinite values in continuous x-axis
Warning: Transformation introduced infinite values in continuous y-axis
Warning: Removed 261 rows containing non-finite values (`stat_smooth()`).

1.5.1.3 Homework problem 2: exploring the outliers

blackrock_esg_vs_non_esg_etf_long <- blackrock_esg_vs_non_esg_etf |> 
  # we'll learn a lot more about long data & pivot_longer() in future weeks. 
  pivot_longer(cols = contains("etf"), names_to = "fund_type", values_to = "weight") |> 
  # case_when() is like an extended "if else"
  mutate(fund_type = case_when(fund_type == "esg_etf" ~ "ESG ETF (ESGU)",
                               fund_type == "standard_etf" ~ "Standard ETF (IVV)"))

blackrock_esg_vs_non_esg_etf_long
# A tibble: 1,074 × 4
   company_name                  sector                 fund_type         weight
   <chr>                         <chr>                  <chr>              <dbl>
 1 PRUDENTIAL FINANCIAL INC      Financials             ESG ETF (ESGU)    0.537 
 2 PRUDENTIAL FINANCIAL INC      Financials             Standard ETF (IV… 0.106 
 3 GENERAL MILLS INC             Consumer Staples       ESG ETF (ESGU)    0.552 
 4 GENERAL MILLS INC             Consumer Staples       Standard ETF (IV… 0.151 
 5 KELLOGG                       Consumer Staples       ESG ETF (ESGU)    0.453 
 6 KELLOGG                       Consumer Staples       Standard ETF (IV… 0.0592
 7 AUTOMATIC DATA PROCESSING INC Information Technology ESG ETF (ESGU)    0.649 
 8 AUTOMATIC DATA PROCESSING INC Information Technology Standard ETF (IV… 0.312 
 9 ECOLAB INC                    Materials              ESG ETF (ESGU)    0.441 
10 ECOLAB INC                    Materials              Standard ETF (IV… 0.118 
# ℹ 1,064 more rows
library(dplyr)
library(ggplot2)

blackrock_esg_vs_non_esg_etf_long %>%
#limit the weight variable to companies over a 1% weight
 filter(weight >= 1L & weight <= 7L) %>%
 ggplot() +
#Choose a Point chart and assign variables to aesthetics
 aes(x = weight, y = company_name, colour = fund_type, size = weight) +
 geom_point(shape = "circle") +
 scale_color_manual(
    values = c(`ESG ETF (ESGU)` = "#66FF00",
    `Standard ETF (IVV)` = "#666666")
  ) +
#Add meaningful titles and labels. Put your name as the caption
 labs(title = "ESG fund V.S. Non- ESG Fund", 
 subtitle = "Weight", caption = "Wendy Lei") +
 theme_minimal()

#esquisser(blackrock_esg_vs_non_esg_etf_long)

Reflection to interpret this chart: This chart compares the weights of various companies in an ESG fund versus a standard ETF. Each bubble circle represents a company, with its position on the x-axis indicating the weight of the company in one of the funds and the size of the bubble corresponding to the weight in the other fund. Companies that have larger bubbles are considered outliers, suggesting they have a significantly higher weight in one of the funds compared to others. For instance, if a large bubble is on the ESG ETF side, it suggests that the company has a substantial weight in the ESG fund, potentially due to its strong ESG credentials.

1.5.1.4 Homework problem 3: Make your own charts with esquisse

#Chart 1
ggplot(blackrock_esg_vs_non_esg_etf) +
  aes(x = esg_etf, y = standard_etf) +
  geom_point(aes(color = sector), size = 3) + 
  geom_smooth(method = 'loess', formula = 'y ~ x',span = 0.75, color = "black") +
  labs(title = "Scatter Plot of ESG vs. Standard ETFs with Trend Line",
       x = "ESG ETF Proportion",
       y = "Standard ETF Proportion",
       color = "Sector",caption = "Wendy Lei") +
  theme_minimal() +
  #set x to log scale 
  scale_x_log10() +  
#set y to log scale  
  scale_y_log10() + 
  theme(legend.position = "bottom")
Warning: Transformation introduced infinite values in continuous x-axis
Warning: Transformation introduced infinite values in continuous y-axis
Warning: Transformation introduced infinite values in continuous x-axis
Warning: Transformation introduced infinite values in continuous y-axis
Warning: Removed 261 rows containing non-finite values (`stat_smooth()`).

# This code creates a scatter plot with 'esg_etf' on the x-axis and 'standard_etf' on the y-axis.
# Each point is colored by 'sector'.
# A linear model trend line (geom_smooth) is added to show the overall relationship between the two variables.

Reflection to interpret this chart:The upward-sloping trend line indicates a positive linear relationship between the ESG ETF and standard ETF proportions. This suggests that companies with higher inclusion in ESG ETFs tend to also have higher inclusion in standard ETFs.

#Chart 2
library(dplyr)
library(ggplot2)

blackrock_esg_vs_non_esg_etf_long %>%
 filter(weight >= 0 & weight <= 3.65) %>%
 ggplot() +
 aes(x = fund_type, y = weight, colour = sector) +
 geom_jitter(size = 1.2) +
 scale_color_hue(direction = 1) +
 coord_flip() +
 labs(title = "Distribution of Weights",caption = "Wendy Lei") +
 theme_minimal()

Reflection to interpret this chart: Most companies have lower weights, as indicated by the dense clustering near the origin.The clear horizontal separation between the two fund types indicates the difference in weights between ESG and Standard ETFs for companies.

1.5.2 Understanding ggplot2 grammar

1.5.2.1 Homework problem 4: Understanding aes()

# chart 1
ggplot(blackrock_esg_vs_non_esg_etf) +
#set x, y, and color and draw the plot
  aes(x = log10(esg_etf+1), y = log10(standard_etf+1)) + 
  geom_point(shape = "circle", size = 1.5,aes(colour = sector)) +   
#add a smooth line
  geom_smooth(method = "lm",formula = y ~ poly(x, 3), color = "blue", span=0.75) +
  theme_gray() 

# chart 2
ggplot(blackrock_esg_vs_non_esg_etf) +
#set x, y, and color and draw the plot
  aes(x = esg_etf, y = standard_etf,colour = sector )+
  geom_point(shape = "circle", size = 1.5) +   
#add a grouped smooth line
  geom_smooth(method = 'loess', formula = 'y ~ x', span=0.75) +   
#set x to log scale 
  scale_x_log10() +  
#set y to log scale  
  scale_y_log10() + 
  theme_gray() 
Warning: Transformation introduced infinite values in continuous x-axis
Warning: Transformation introduced infinite values in continuous y-axis
Warning: Transformation introduced infinite values in continuous x-axis
Warning: Transformation introduced infinite values in continuous y-axis
Warning: Removed 261 rows containing non-finite values (`stat_smooth()`).

#chart 3
ggplot(blackrock_esg_vs_non_esg_etf) +
#set x, y, and color and draw the plot
  aes(x = esg_etf, y = standard_etf) +   
  geom_point(shape = "circle", size = 1.5, color="purple") + 
#add a grouped smooth line
  geom_smooth(method = "lm", formula = y ~ poly(x, 3), color = "yellow", span=0.75) +
#set x to log scale 
  scale_x_log10() +  
#set y to log scale  
  scale_y_log10() + 
  theme_gray()
Warning: Transformation introduced infinite values in continuous x-axis
Warning: Transformation introduced infinite values in continuous y-axis
Warning: Transformation introduced infinite values in continuous x-axis
Warning: Transformation introduced infinite values in continuous y-axis
Warning: Removed 261 rows containing non-finite values (`stat_smooth()`).

1.5.3 Resources for learning to make new charts

1.5.3.1 Homework problem 5: make a new chart from the

#exmaple from R Graph Gallery
#install.packages("hrbrthemes")
#install.packages("viridis")
# Libraries
library(tidyverse)
library(hrbrthemes)
NOTE: Either Arial Narrow or Roboto Condensed fonts are required to use these themes.
      Please use hrbrthemes::import_roboto_condensed() to install Roboto Condensed and
      if Arial Narrow is not on your system, please see https://bit.ly/arialnarrow
library(viridis)
Loading required package: viridisLite
# create a dataset
data <- data.frame(
  name=c( rep("A",500), rep("B",500), rep("B",500), rep("C",20), rep('D', 100)  ),
  value=c( rnorm(500, 10, 5), rnorm(500, 13, 1), rnorm(500, 18, 1), rnorm(20, 25, 4), rnorm(100, 12, 1) )
)

# Plot
data %>%
  ggplot( aes(x=name, y=value, fill=name)) +
    geom_boxplot() +
    scale_fill_viridis(discrete = TRUE, alpha=0.6) +
    geom_jitter(color="black", size=0.4, alpha=0.9) +
    theme_ipsum() +
    theme(
      legend.position="none",
      plot.title = element_text(size=11)
    ) +
    ggtitle("A boxplot with jitter") +
    xlab("")

# use our data
library(ggplot2)
library(tidyr)
library(scales) 

Attaching package: 'scales'
The following object is masked from 'package:viridis':

    viridis_pal
The following object is masked from 'package:purrr':

    discard
The following object is masked from 'package:readr':

    col_factor
#transform the data to a long format to facilitate the boxplot visualization
blackrock_esg_vs_non_esg_etf_long <- pivot_longer(blackrock_esg_vs_non_esg_etf, 
                                                  cols = c("esg_etf", "standard_etf"), 
                                                  names_to = "ETF_Type", 
                                                  values_to = "Value")

# Drawing the boxplot with ggplot2
ggplot(blackrock_esg_vs_non_esg_etf_long, aes(x = ETF_Type, y = Value, fill = sector)) +
  geom_boxplot() +
  scale_fill_brewer(palette = "Pastel1") + # Differentiating sectors with colors
  theme_light() +
  labs(title = "ESG vs. Non-ESG ETFs across Sectors", 
       x = "ETF Type", 
       y = "Value", 
       fill = "Sector") +
  ylim(quantile(blackrock_esg_vs_non_esg_etf_long$Value, 0.05), quantile(blackrock_esg_vs_non_esg_etf_long$Value, 0.95)) # Shortening the vertical axis
Warning: Removed 54 rows containing non-finite values (`stat_boxplot()`).
Warning in RColorBrewer::brewer.pal(n, pal): n too large, allowed maximum for palette Pastel1 is 9
Returning the palette you asked for with that many colors

Reflection to interpret this chart: The median values for ESG ETFs and standard ETFs are quite similar across most sectors.No one sector appears to dominate in terms of higher or lower values within ESG or non-ESG ETFs consistently. There is a notable amount of variation (spread) within each type of ETF, as indicated by the length of the boxes and whiskers.