Data Visualisation

Author

Ashwini Arulrajhan

options(repos = c(CRAN = "https://cran.rstudio.com/"))
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.4.4     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
library(dplyr)
# the URL of our data on GitHub
github_url <- "https://raw.githubusercontent.com/t-emery/sais-susfin_data/main/datasets/etf_comparison-2022-10-03.csv"


# read the data from GitHub
blackrock_esg_vs_non_esg_etf <- github_url |> 
  read_csv() |> 
  # select the four columns we will use in our anlaysis here
  select(company_name:standard_etf)
Rows: 537 Columns: 14
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (4): ticker, company_name, sector, esg_uw_ow
dbl (7): esg_etf, standard_etf, esg_tilt, esg_tilt_z_score, esg_tilt_rank, e...
lgl (3): in_esg_only, in_standard_only, in_on_index_only

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
install.packages("esquisse")
Installing package into 'C:/Users/ashwi/AppData/Local/R/win-library/4.3'
(as 'lib' is unspecified)
package 'esquisse' successfully unpacked and MD5 sums checked

The downloaded binary packages are in
    C:\Users\ashwi\AppData\Local\Temp\RtmpGgOhe3\downloaded_packages

Homework Question 1

library(ggplot2)

ggplot(blackrock_esg_vs_non_esg_etf) +
 aes(x = esg_etf, y = standard_etf, colour = sector) +
 geom_point(shape = "circle", 
 size = 1.5) +
 geom_smooth(span = 0.75) +
 scale_color_viridis_d(option = "viridis", direction = 1) +
 scale_x_continuous(trans = "log10") +
 scale_y_continuous(trans = "log10") +
 labs(x = "ESG ETF (ESGU)", 
 y = "Standard ETF (IVV)", caption = "Ashwini Arulrajhan") +
 theme_minimal() +
 facet_wrap(vars(sector))
Warning: Transformation introduced infinite values in continuous x-axis
Warning: Transformation introduced infinite values in continuous y-axis
Warning: Transformation introduced infinite values in continuous x-axis
Warning: Transformation introduced infinite values in continuous y-axis
`geom_smooth()` using method = 'loess' and formula = 'y ~ x'
Warning: Removed 261 rows containing non-finite values (`stat_smooth()`).

Homework Question 2

blackrock_esg_vs_non_esg_etf_long <- blackrock_esg_vs_non_esg_etf |> 
  # we'll learn a lot more about long data & pivot_longer() in future weeks. 
  pivot_longer(cols = contains("etf"), names_to = "fund_type", values_to = "weight") |> 
  # case_when() is like an extended "if else"
  mutate(fund_type = case_when(fund_type == "esg_etf" ~ "ESG ETF (ESGU)",
                               fund_type == "standard_etf" ~ "Standard ETF (IVV)"))

blackrock_esg_vs_non_esg_etf_long
# A tibble: 1,074 × 4
   company_name                  sector                 fund_type         weight
   <chr>                         <chr>                  <chr>              <dbl>
 1 PRUDENTIAL FINANCIAL INC      Financials             ESG ETF (ESGU)    0.537 
 2 PRUDENTIAL FINANCIAL INC      Financials             Standard ETF (IV… 0.106 
 3 GENERAL MILLS INC             Consumer Staples       ESG ETF (ESGU)    0.552 
 4 GENERAL MILLS INC             Consumer Staples       Standard ETF (IV… 0.151 
 5 KELLOGG                       Consumer Staples       ESG ETF (ESGU)    0.453 
 6 KELLOGG                       Consumer Staples       Standard ETF (IV… 0.0592
 7 AUTOMATIC DATA PROCESSING INC Information Technology ESG ETF (ESGU)    0.649 
 8 AUTOMATIC DATA PROCESSING INC Information Technology Standard ETF (IV… 0.312 
 9 ECOLAB INC                    Materials              ESG ETF (ESGU)    0.441 
10 ECOLAB INC                    Materials              Standard ETF (IV… 0.118 
# ℹ 1,064 more rows
blackrock_esg_vs_non_esg_etf_long %>%
 filter(weight >= 1L & weight <= 7L) %>%
 ggplot() +
  aes(
    x = weight,
    y = company_name,
    colour = fund_type,
    size = weight
  ) +
  geom_point(shape = "circle") +
  scale_color_manual(
    values = c(`ESG ETF (ESGU)` = "#1BC250",
    `Standard ETF (IVV)` = "#878E8F")
  ) +
  labs(
    x = "Weight (%)",
    y = "Company Name",
    caption = "Ashwini Arulrajhan"
  ) +
  theme_minimal()

The above chart shows that some companies such as Apple, Alphabet and Microsoft have large weights in both ESG and Standard ETFs. This blurs the line between the two investments as both of may have similar profiles considering heavily weighted stocks are the same.

Homework Problem 3

library(ggplot2)

ggplot(blackrock_esg_vs_non_esg_etf_long) +
 aes(x = sector, y = weight, colour = fund_type) +
 geom_point(shape = "circle", 
 size = 1.5) +
 scale_color_hue(direction = 1) +
 theme_minimal()

This chart confirms that the outliers are mostly in the Information Technology sector.

library(ggplot2)

ggplot(blackrock_esg_vs_non_esg_etf_long) +
 aes(x = sector, y = weight, fill = fund_type) +
 geom_col() +
 scale_fill_hue(direction = 1) +
 theme_minimal() +
 facet_wrap(vars(fund_type))

The sector weights of both the ESG ETF and Standard ETF look similar.

Homework problem 4 (Chart 1)

blackrock_esg_vs_non_esg_etf %>%
ggplot(aes(x = esg_etf, y = standard_etf, color = sector)) +
  geom_point() +   # Scatter plot
  scale_x_log10() +   # Set the x axis to a logarithmic scale
  scale_y_log10() +   # Set the y axis to a logarithmic scale
  geom_smooth(method = 'loess', se = TRUE,colour="blue") +  # Add a smoothed line using a log-linear model
  labs(x = "ESG ETf", y = "Standard ETF", color = "Sector") +  # Add labels
  theme_minimal() +  # Use a minimal theme
  theme(legend.position = "right")  # Position the legend on the right
Warning: Transformation introduced infinite values in continuous x-axis
Warning: Transformation introduced infinite values in continuous y-axis
Warning: Transformation introduced infinite values in continuous x-axis
Warning: Transformation introduced infinite values in continuous y-axis
`geom_smooth()` using formula = 'y ~ x'
Warning: Removed 261 rows containing non-finite values (`stat_smooth()`).

Homework problem 4 (Chart 2)

blackrock_esg_vs_non_esg_etf %>% ggplot(aes(x = esg_etf, y = standard_etf)) +
  geom_point(aes(color = sector)) +  # Scatter plot based on color
  scale_x_log10() +  # Set x-axis to logarithmic scale
  scale_y_log10() +  # Set y-axis to logarithmic scale
  geom_smooth(aes(color = sector),  # Add a smooth line for each sector
              method = 'loess', se = TRUE) +
  labs(x = "esg_etf", 
       y = "standard_etf", 
       color = "Sector") +
  theme_minimal()
Warning: Transformation introduced infinite values in continuous x-axis
Warning: Transformation introduced infinite values in continuous y-axis
Warning: Transformation introduced infinite values in continuous x-axis
Warning: Transformation introduced infinite values in continuous y-axis
`geom_smooth()` using formula = 'y ~ x'
Warning: Removed 261 rows containing non-finite values (`stat_smooth()`).

Homework problem 4 (Chart 3)

#chart 3
blackrock_esg_vs_non_esg_etf %>%
ggplot(aes(x = esg_etf, y = standard_etf)) +
  geom_point(color = "purple") +  # Scatter plot
  scale_x_log10() +  
  scale_y_log10() +  
  geom_smooth(method = "loess", se = TRUE, color = "yellow") +  # Add regression line with confidence interval
  labs(x = "esg_etf", y = "standard_etf") +
  theme_minimal()  # Use a minimal theme for the plot
Warning: Transformation introduced infinite values in continuous x-axis
Warning: Transformation introduced infinite values in continuous y-axis
Warning: Transformation introduced infinite values in continuous x-axis
Warning: Transformation introduced infinite values in continuous y-axis
`geom_smooth()` using formula = 'y ~ x'
Warning: Removed 261 rows containing non-finite values (`stat_smooth()`).

Homework problem 5

ggplot(blackrock_esg_vs_non_esg_etf_long, aes(fill=fund_type, y=weight, x=sector)) + 
  geom_bar(position="dodge", stat="identity")+
 theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))