DATA VISUALIZATION

Author

NIKI LINGANUR

library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.4.4     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
#> ── Attaching core tidyverse packages ───────────────────── tidyverse 2.0.0 ──
#> ✔ dplyr     1.1.4     ✔ readr     2.1.5
#> ✔ forcats   1.0.0     ✔ stringr   1.5.1
#> ✔ ggplot2   3.4.4     ✔ tibble    3.2.1
#> ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
#> ✔ purrr     1.0.2     
#> ── Conflicts ─────────────────────────────────────── tidyverse_conflicts() ──
#> ✖ dplyr::filter() masks stats::filter()
#> ✖ dplyr::lag()    masks stats::lag()
#> ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become error
# install.packages("devtools")
devtools::install_github("r-lib/conflicted")
Skipping install of 'conflicted' from a github remote, the SHA1 (321d77ce) has not changed since last install.
  Use `force = TRUE` to force installation
conflicted::conflicts_prefer(dplyr::filter)
[conflicted] Will prefer dplyr::filter over any other package.
library(conflicted)
library(dplyr)

filter(mtcars, cyl == 8)
                     mpg cyl  disp  hp drat    wt  qsec vs am gear carb
Hornet Sportabout   18.7   8 360.0 175 3.15 3.440 17.02  0  0    3    2
Duster 360          14.3   8 360.0 245 3.21 3.570 15.84  0  0    3    4
Merc 450SE          16.4   8 275.8 180 3.07 4.070 17.40  0  0    3    3
Merc 450SL          17.3   8 275.8 180 3.07 3.730 17.60  0  0    3    3
Merc 450SLC         15.2   8 275.8 180 3.07 3.780 18.00  0  0    3    3
Cadillac Fleetwood  10.4   8 472.0 205 2.93 5.250 17.98  0  0    3    4
Lincoln Continental 10.4   8 460.0 215 3.00 5.424 17.82  0  0    3    4
Chrysler Imperial   14.7   8 440.0 230 3.23 5.345 17.42  0  0    3    4
Dodge Challenger    15.5   8 318.0 150 2.76 3.520 16.87  0  0    3    2
AMC Javelin         15.2   8 304.0 150 3.15 3.435 17.30  0  0    3    2
Camaro Z28          13.3   8 350.0 245 3.73 3.840 15.41  0  0    3    4
Pontiac Firebird    19.2   8 400.0 175 3.08 3.845 17.05  0  0    3    2
Ford Pantera L      15.8   8 351.0 264 4.22 3.170 14.50  0  1    5    4
Maserati Bora       15.0   8 301.0 335 3.54 3.570 14.60  0  1    5    8
# the URL of our data on GitHub
github_url <- "https://raw.githubusercontent.com/t-emery/sais-susfin_data/main/datasets/etf_comparison-2022-10-03.csv"


# read the data from GitHub
blackrock_esg_vs_non_esg_etf <- github_url |> 
  read_csv() |> 
  # select the four columns we will use in our anlaysis here
  select(company_name:standard_etf)
Rows: 537 Columns: 14
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (4): ticker, company_name, sector, esg_uw_ow
dbl (7): esg_etf, standard_etf, esg_tilt, esg_tilt_z_score, esg_tilt_rank, e...
lgl (3): in_esg_only, in_standard_only, in_on_index_only

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# use dplyr::glimpse() to get an overview of our data
blackrock_esg_vs_non_esg_etf |> 
  glimpse()
Rows: 537
Columns: 4
$ company_name <chr> "PRUDENTIAL FINANCIAL INC", "GENERAL MILLS INC", "KELLOGG…
$ sector       <chr> "Financials", "Consumer Staples", "Consumer Staples", "In…
$ esg_etf      <dbl> 0.5366803, 0.5522180, 0.4534279, 0.6486836, 0.4407025, 0.…
$ standard_etf <dbl> 0.10574313, 0.15134370, 0.05920732, 0.31168123, 0.1184507…

HOMEWORK EXCERCISES

a) Recreation of Chart in Equisse (Code below)

ggplot(blackrock_esg_vs_non_esg_etf) +
  aes(x = esg_etf, y = standard_etf, colour = sector) +
  geom_point(shape = "circle", size = 1.5) +
  geom_smooth(span = 0.75) +
  scale_color_viridis_d(option = "viridis", direction = 1) +
  scale_x_continuous(trans = "log10") +
  scale_y_continuous(trans = "log10") +
  labs(
    x = "ESG ETF'S",
    y = "TRADITIONAL ETF'S (NON ESG) ",
    title = "ESG Funds vs. Traditional (Non ESG) Funds ",
    caption = "Niki Linganur "
  ) +
  theme_minimal() +
  theme(plot.caption = element_text(size = 12L)) +
  facet_wrap(vars(sector))
Warning: Transformation introduced infinite values in continuous x-axis
Warning: Transformation introduced infinite values in continuous y-axis
Warning: Transformation introduced infinite values in continuous x-axis
Warning: Transformation introduced infinite values in continuous y-axis
`geom_smooth()` using method = 'loess' and formula = 'y ~ x'
Warning: Removed 261 rows containing non-finite values (`stat_smooth()`).

b) Exploring the outliers

blackrock_esg_vs_non_esg_etf_long <- blackrock_esg_vs_non_esg_etf |> 
  # we'll learn a lot more about long data & pivot_longer() in future weeks. 
  pivot_longer(cols = contains("etf"), names_to = "fund_type", values_to = "weight") |> 
  # case_when() is like an extended "if else"
  mutate(fund_type = case_when(fund_type == "esg_etf" ~ "ESG ETF (ESGU)",
                               fund_type == "standard_etf" ~ "Standard ETF (IVV)"))

blackrock_esg_vs_non_esg_etf_long
# A tibble: 1,074 × 4
   company_name                  sector                 fund_type         weight
   <chr>                         <chr>                  <chr>              <dbl>
 1 PRUDENTIAL FINANCIAL INC      Financials             ESG ETF (ESGU)    0.537 
 2 PRUDENTIAL FINANCIAL INC      Financials             Standard ETF (IV… 0.106 
 3 GENERAL MILLS INC             Consumer Staples       ESG ETF (ESGU)    0.552 
 4 GENERAL MILLS INC             Consumer Staples       Standard ETF (IV… 0.151 
 5 KELLOGG                       Consumer Staples       ESG ETF (ESGU)    0.453 
 6 KELLOGG                       Consumer Staples       Standard ETF (IV… 0.0592
 7 AUTOMATIC DATA PROCESSING INC Information Technology ESG ETF (ESGU)    0.649 
 8 AUTOMATIC DATA PROCESSING INC Information Technology Standard ETF (IV… 0.312 
 9 ECOLAB INC                    Materials              ESG ETF (ESGU)    0.441 
10 ECOLAB INC                    Materials              Standard ETF (IV… 0.118 
# ℹ 1,064 more rows
blackrock_esg_vs_non_esg_etf_long %>%
  #sets the threshold of companies to those with 1% or greater weight
 filter(weight >= 1L & weight <= 7L) %>%
 ggplot() +
  aes(
    #sets x axis as weight of fund and y axis as the company name
    #creates a legend for the colour and the size 
    x = weight,
    y = company_name,
    colour = fund_type,
    size = weight
  ) +
  geom_point(shape = "circle") +
  scale_color_manual(
    values = c(`ESG ETF (ESGU)` = "#2BD160",
    `Standard ETF (IVV)` = "#646974")
  ) +
  #adds the titles for the axes and the title/author 
  labs(
    x = "Weight of Fund",
    y = "Company Name ",
    title = "ESG Funds vs. Traditional Funds (Non ESG) ",
    caption = "Niki Linganur "
  ) +
  #changes theme colour to green/blue
  theme_minimal()

Through the chart we created, we can observe that in most cases the traditional funds are weighted more than or very close to the ESG funds for the same company.  The instances where the ESG fund seems to be weighted more, seem to occur in the case where no traditional fund is present (potentially indicating missing data for that company). 

c) Making own charts with Esquisse

Chart 1

ggplot(blackrock_esg_vs_non_esg_etf) +
  #sets the x value as the sector of the fund and y as the number of ESG ETFS 
  aes(x = sector, y = esg_etf) +
  #changes the colour to purple 
  geom_col(fill = "#440154") +
  labs(
  #Renames the axises and adds title/author 
    x = "Sector ",
    y = "Number of ESG ETF's ",
    title = "Number of ESG ETF's in Each Sector",
    caption = "Niki Linganur "
  ) +
  #changes the theme 
  ggthemes::theme_pander()

This chart attempts to view which sectors tend to have the highest concentration of ESG ETFs. Through looking at this chart we can gather that Information Technology (IT) has the greatest ESG-oriented ETFs and that the materials sector has the least.  

Chart 2

ggplot(blackrock_esg_vs_non_esg_etf) +
    #sets the x value as the sector of the fund and y as the number of traditional ETFS 
  aes(x = sector, y = standard_etf) +
  #Changes colour to red 
  geom_col(fill = "#B22222") +
  labs(
     #Renames the axises and adds title/author 
    x = "Sector ",
    y = "Number of Traditional EFT's ",
    title = "Number of Traditional EFT's in Each Sector",
    caption = "NIki Linganur "
  ) +
  #changes the theme 
  theme_gray()

This chart attempts to answer the inverse question, of which sectors tend to have the highest EFTS (to see if the materials sector just tends to have fewer ETFs than other sectors). Through this chart, we can confirm that materials do indeed have the least ETFs, leading us to conclude that perhaps the  ESG EFTs are not allocated by sector.  

d) Understanding aes()

Chart 1

ggplot(blackrock_esg_vs_non_esg_etf) +
  aes(x = esg_etf, y = standard_etf) +
  geom_point(aes(colour = sector), shape = "circle", size = 1.5) +
  geom_smooth(span = 0.91) +
  scale_color_hue(direction = 1) +
  scale_x_continuous(trans = "log10") +
  scale_y_continuous(trans = "log10") +
  theme_minimal()
Warning: Transformation introduced infinite values in continuous x-axis
Warning: Transformation introduced infinite values in continuous y-axis
Warning: Transformation introduced infinite values in continuous x-axis
Warning: Transformation introduced infinite values in continuous y-axis
`geom_smooth()` using method = 'loess' and formula = 'y ~ x'
Warning: Removed 261 rows containing non-finite values (`stat_smooth()`).

There is an additional shape line added to this ggplot2 code to allow us to see each point with better precision.

Chart 2

ggplot(blackrock_esg_vs_non_esg_etf) +
  aes(x = esg_etf, y = standard_etf, colour = sector) +
  geom_point(shape = "circle", size = 1.5) +
  geom_smooth(span = 0.91) +
  scale_color_hue(direction = 1) +
  # format the axes to make the units clear
  scale_x_continuous(trans = "log10") +
  scale_y_continuous(trans = "log10") +
  #change the theme 
  theme_minimal()
Warning: Transformation introduced infinite values in continuous x-axis
Warning: Transformation introduced infinite values in continuous y-axis
Warning: Transformation introduced infinite values in continuous x-axis
Warning: Transformation introduced infinite values in continuous y-axis
`geom_smooth()` using method = 'loess' and formula = 'y ~ x'
Warning: Removed 261 rows containing non-finite values (`stat_smooth()`).

The span of the line is changed here to ensure better clarity of the point readability.

Chart 3

ggplot(blackrock_esg_vs_non_esg_etf) +
  aes(x = esg_etf, y = standard_etf) +
  #change the size and colour
  geom_point(shape = "circle", size = 1.5, colour = "#7B1494") +
    #change the size and colour
  geom_smooth(span = 0.75, color = "yellow") +
   # format the axes to make the units clear
  scale_x_continuous(trans = "log10") +
  scale_y_continuous(trans = "log10") +
  #label the axes to clarify x and y titles 
  labs(x = "ESG ETF", y = "Standard ETF") +
  theme_minimal()
Warning: Transformation introduced infinite values in continuous x-axis
Warning: Transformation introduced infinite values in continuous y-axis
Warning: Transformation introduced infinite values in continuous x-axis
Warning: Transformation introduced infinite values in continuous y-axis
`geom_smooth()` using method = 'loess' and formula = 'y ~ x'
Warning: Removed 261 rows containing non-finite values (`stat_smooth()`).

Here, both the span and the shape are specified to increase the precision of the orginal scatterplot.

e) New chart from R Graph Gallery

options(repos = c(CRAN = "https://cloud.r-project.org"))
install.packages('devtools')

The downloaded binary packages are in
    /var/folders/yd/c2zm5hrx3z9f72991hsqsbqh0000gn/T//RtmpSnqMlr/downloaded_packages
# library
library(ggplot2)

# plot and read data 
ggplot(data=blackrock_esg_vs_non_esg_etf, aes(x = esg_etf, y = standard_etf, color = sector)) +
  geom_point() +
  geom_rug(col="steelblue", alpha=0.8, size=6.5)
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.

This chart tells us how concentrated the data is in terms of sector. That is, it tells us how many ETF’s (both ESG and Traditional) are from which sector.

f) New Chart from Extensions Gallery

options(repos = c(CRAN = "https://cloud.r-project.org"))
install.packages("ggbeeswarm")

The downloaded binary packages are in
    /var/folders/yd/c2zm5hrx3z9f72991hsqsbqh0000gn/T//RtmpSnqMlr/downloaded_packages
install.packages("readr")  # Add this line to install the readr package

The downloaded binary packages are in
    /var/folders/yd/c2zm5hrx3z9f72991hsqsbqh0000gn/T//RtmpSnqMlr/downloaded_packages
library(ggbeeswarm)
library(ggplot2)
library(readr)  # Add this line to load the readr package

github_url <- "https://raw.githubusercontent.com/t-emery/sais-susfin_data/main/datasets/etf_comparison-2022-10-03.csv"

# Read the data from GitHub
blackrock_esg_vs_non_esg_etf <- read_csv(github_url)
Rows: 537 Columns: 14
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (4): ticker, company_name, sector, esg_uw_ow
dbl (7): esg_etf, standard_etf, esg_tilt, esg_tilt_z_score, esg_tilt_rank, e...
lgl (3): in_esg_only, in_standard_only, in_on_index_only

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Create the ggplot
ggplot(blackrock_esg_vs_non_esg_etf, aes(esg_etf, sector)) +
  geom_quasirandom(groupOnX = FALSE)
Orientation inferred to be along y-axis; override with
`position_quasirandom(orientation = 'x')`

This chart tells us which sectors the ESG ETF’s are enclosed in. This data is helpful to see what the correlation might be between sectors and ESG ETF prevalence to see if one type of sector might be more conducive to ESG ETF creation.