Data Visualization Practice

Author

Pinandito Wisambudi

Preparation

library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.4.4     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(esquisse)

github_url <- "https://raw.githubusercontent.com/t-emery/sais-susfin_data/main/datasets/etf_comparison-2022-10-03.csv"


# read the data from GitHub
blackrock_esg_vs_non_esg_etf <- github_url |> 
  read_csv() |> 
  # select the four columns we will use in our anlaysis here
  select(company_name:standard_etf)
Rows: 537 Columns: 14
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (4): ticker, company_name, sector, esg_uw_ow
dbl (7): esg_etf, standard_etf, esg_tilt, esg_tilt_z_score, esg_tilt_rank, e...
lgl (3): in_esg_only, in_standard_only, in_on_index_only

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# use dplyr::glimpse() to get an overview of our data
blackrock_esg_vs_non_esg_etf |> 
  glimpse()
Rows: 537
Columns: 4
$ company_name <chr> "PRUDENTIAL FINANCIAL INC", "GENERAL MILLS INC", "KELLOGG…
$ sector       <chr> "Financials", "Consumer Staples", "Consumer Staples", "In…
$ esg_etf      <dbl> 0.5366803, 0.5522180, 0.4534279, 0.6486836, 0.4407025, 0.…
$ standard_etf <dbl> 0.10574313, 0.15134370, 0.05920732, 0.31168123, 0.1184507…

Homework Problem 1

library(ggplot2)

ggplot(blackrock_esg_vs_non_esg_etf) +
  aes(x = esg_etf, y = standard_etf, colour = sector) +
  geom_point(shape = "circle", size = 1.5) +
  geom_smooth(span = 0.7) +
  scale_color_viridis_d(option = "viridis", direction = 1) +
  scale_x_continuous(trans = "log10") +
  scale_y_continuous(trans = "log10") +
  labs(
    x = "ESG ETF (ESGU)",
    y = "Standard ETF (IVV)",
    title = "We made this chart using Esquisse!",
    subtitle = "It's a great tool for learning ggplot2. Even if it has limitations.",
    caption = "Pinandito Wisambudi"
  ) +
  theme_minimal() +
  facet_wrap(vars(sector))
Warning: Transformation introduced infinite values in continuous x-axis
Warning: Transformation introduced infinite values in continuous y-axis
Warning: Transformation introduced infinite values in continuous x-axis
Warning: Transformation introduced infinite values in continuous y-axis
`geom_smooth()` using method = 'loess' and formula = 'y ~ x'
Warning: Removed 261 rows containing non-finite values (`stat_smooth()`).

Homework Problem 2

blackrock_esg_vs_non_esg_etf_long <- blackrock_esg_vs_non_esg_etf |> 
  # we'll learn a lot more about long data & pivot_longer() in future weeks. 
  pivot_longer(cols = contains("etf"), names_to = "fund_type", values_to = "weight") |> 
  # case_when() is like an extended "if else"
  mutate(fund_type = case_when(fund_type == "esg_etf" ~ "ESG ETF (ESGU)",
                               fund_type == "standard_etf" ~ "Standard ETF (IVV)"))

blackrock_esg_vs_non_esg_etf_long
# A tibble: 1,074 × 4
   company_name                  sector                 fund_type         weight
   <chr>                         <chr>                  <chr>              <dbl>
 1 PRUDENTIAL FINANCIAL INC      Financials             ESG ETF (ESGU)    0.537 
 2 PRUDENTIAL FINANCIAL INC      Financials             Standard ETF (IV… 0.106 
 3 GENERAL MILLS INC             Consumer Staples       ESG ETF (ESGU)    0.552 
 4 GENERAL MILLS INC             Consumer Staples       Standard ETF (IV… 0.151 
 5 KELLOGG                       Consumer Staples       ESG ETF (ESGU)    0.453 
 6 KELLOGG                       Consumer Staples       Standard ETF (IV… 0.0592
 7 AUTOMATIC DATA PROCESSING INC Information Technology ESG ETF (ESGU)    0.649 
 8 AUTOMATIC DATA PROCESSING INC Information Technology Standard ETF (IV… 0.312 
 9 ECOLAB INC                    Materials              ESG ETF (ESGU)    0.441 
10 ECOLAB INC                    Materials              Standard ETF (IV… 0.118 
# ℹ 1,064 more rows
blackrock_esg_vs_non_esg_etf_long %>%
  #filter the data to include weights between 1 and 7
  filter(weight >= 1L & weight <= 7L) %>%
  #create the ggplot object
  ggplot() +
  #define aesthetics: x-axis, y-axis, and color
  aes(
    x = weight,
    y = company_name,
    colour = fund_type,
    size = weight
  ) +
  #add points to the plot using geom_point() with circle shape
  geom_point(shape = "circle") +
  #set color manually for fund_type
  scale_color_manual(
    values = c(`ESG ETF (ESGU)` = "#37DA21",
               `Standard ETF (IVV)` = "#9C9C9C")
  ) +
  #label axes, title and caption
  labs(
    x = "Value",
    y = "Company",
    title = "ESG Fund vs Non-ESG Fund",
    caption = "Pinandito Wisambudi"
  ) +
  #apply minimal theme to the plot
  theme_minimal()

The chart compares the weights of companies in ESG funds and non-ESG funds. Each point represent a company, the position of the point is determined by its weight and the color of the point indicating the type of fund, whether it is ESG fund or non-ESG fund. The large outliers in the data sets such as APPLE INC and MICROSOFT CORP indicate that they disbursed more ESG and non-ESG fund than the other companies.

Homework Problem 3

Chart 1

# Begin a ggplot object using the blackrock_esg_vs_non_esg_etf_long dataset
ggplot(blackrock_esg_vs_non_esg_etf_long) +  
  # Define aesthetics: x-axis is sector, y-axis is weight, and fill color is fund_type
  aes(x = sector, y = weight, fill = fund_type) +  
  # Add a bar plot to the ggplot object
  geom_col() +  
  # Adjust the fill color scale to distinguish between fund types
  scale_fill_hue(direction = 1) +  
  labs(
    # Label the x-axis, y-axis, and add title
    x = "Sector",  
    y = "Weight",  
    title = "Distribution of ESG fund and non-ESG fund by Sector",  
    caption = "Pinandito Wisambudi",  
    fill = "Fund Type"  
  ) +
 # Apply a minimal theme to the plot
   theme_minimal()  

Chart 2

# Begin a ggplot object using the blackrock_esg_vs_non_esg_etf dataset
ggplot(blackrock_esg_vs_non_esg_etf) +  
  # Define aesthetics: x-axis, y-axis, and color
  aes(x = standard_etf, y = esg_etf, colour = sector) +  
  # Add points to the plot with circle shape and size 1.5
  geom_point(shape = "circle", size = 1.5) +  
  # Adjust the color scale to make it more visually distinguishable
  scale_color_hue(direction = 1) +  
  # Apply log10 transformation to the x-axis and y-axis
  scale_x_continuous(trans = "log10") +  
  scale_y_continuous(trans = "log10") +  
  # Label the x-axis, y-axis and add title
  labs(
    x = "Non-ESG Fund",  
    y = "ESG Fund",  
    title = "Correlation between ESG fund and Non-ESG Fund by Sector",  
    caption = "Pinandito Wisambudi",  
    color = "Sector"  
  ) +
  # Apply a minimal theme to the plot
  theme_minimal()  
Warning: Transformation introduced infinite values in continuous x-axis
Warning: Transformation introduced infinite values in continuous y-axis

Homework Problem 4

Chart 1

# Begin a ggplot object using the blackrock_esg_vs_non_esg_etf dataset
ggplot(blackrock_esg_vs_non_esg_etf) +  
  # Define aesthetics: x-axis, y-axis, and color
  aes(x = esg_etf, y = standard_etf, colour = sector) +  
  # Add points to the plot with circle shape and size 1.5
  geom_point(shape = "circle", size = 1.5) +  
  # Add a local regression smooth line to the plot with specified span and color
  geom_smooth(method = "loess", span = 0.71, color = "blue") +  
  # Adjust the color scale to make it more visually distinguishable
  scale_color_hue(direction = 1) +  
  # Apply log10 transformation to the x-axis and y-axis
  scale_x_continuous(trans = "log10") +  
  scale_y_continuous(trans = "log10") +  
  # Label the x-axis, y-axis and add title
  labs(
    x = "ESG Fund",  
    y = "Non-ESG Fund",  
    title = "Correlation between ESG Fund and Non-ESG Fund by Sector",  
    caption = "Pinandito Wisambudi",  
    color = "Sector"  
  ) +
  # Apply a gray theme to the plot
  theme_gray()  
Warning: Transformation introduced infinite values in continuous x-axis
Warning: Transformation introduced infinite values in continuous y-axis
Warning: Transformation introduced infinite values in continuous x-axis
Warning: Transformation introduced infinite values in continuous y-axis
`geom_smooth()` using formula = 'y ~ x'
Warning: Removed 261 rows containing non-finite values (`stat_smooth()`).

The difference of Chart 1 lies on the line: geom_smooth(method = “loess”, span = 0.71, color = “blue”).

Chart 2

# Begin a ggplot object using the blackrock_esg_vs_non_esg_etf dataset
ggplot(blackrock_esg_vs_non_esg_etf) +
  # Define aesthetics: x-axis, y-axis, and color
  aes(x = esg_etf, y = standard_etf, colour = sector) +
  # Add points to the plot with circle shape and size 1.5
  geom_point(shape = "circle", size = 1.5) +
  # Add a smooth line to the plot with specified span
  geom_smooth(span = 0.75) +
  # Adjust the color scale
  scale_color_hue(direction = 1) +
  # Apply log10 transformation to the x-axis and y-axis
  scale_x_continuous(trans = "log10") +
  scale_y_continuous(trans = "log10") +
  # Apply a gray theme to the plot
  theme_gray()  
Warning: Transformation introduced infinite values in continuous x-axis
Warning: Transformation introduced infinite values in continuous y-axis
Warning: Transformation introduced infinite values in continuous x-axis
Warning: Transformation introduced infinite values in continuous y-axis
`geom_smooth()` using method = 'loess' and formula = 'y ~ x'
Warning: Removed 261 rows containing non-finite values (`stat_smooth()`).

The difference of Chart 2 lies on the line: geom_smooth(span = 0.75).

Chart 3

# Begin a ggplot object using the blackrock_esg_vs_non_esg_etf dataset
ggplot(blackrock_esg_vs_non_esg_etf) +
  # Define aesthetics: x-axis, y-axis, and color
  aes(x = esg_etf, y = standard_etf) +
  # Add points to the plot with circle shape, size 1.5, and purple colour
  geom_point(shape = "circle", size = 1.5, colour = "#8D4BB5") +
  # Add a local regression smooth line to the plot with specified span and color
  geom_smooth(method = "loess", span = 0.71, color = "blue") + 
  # Apply log10 transformation to the x-axis and y-axis
  scale_x_continuous(trans = "log10") +
  scale_y_continuous(trans = "log10") +
  # Apply a gray theme to the plot
  theme_gray() 
Warning: Transformation introduced infinite values in continuous x-axis
Warning: Transformation introduced infinite values in continuous y-axis
Warning: Transformation introduced infinite values in continuous x-axis
Warning: Transformation introduced infinite values in continuous y-axis
`geom_smooth()` using formula = 'y ~ x'
Warning: Removed 261 rows containing non-finite values (`stat_smooth()`).

The differences of Chart 3 lies on the lines:

  • geom_point(shape = “circle”, size = 1.5, colour = “#8D4BB5”)

  • geom_smooth(method = “loess”, span = 0.71, color = “blue”)

Homework Problem 5

Example from R graph gallery

# Libraries
library(ggplot2)
library(dplyr)

# Load dataset from github
data <- read.table("https://raw.githubusercontent.com/holtzy/data_to_viz/master/Example_dataset/1_OneNum.csv", header=TRUE)

# Make the histogram
data %>%
  filter( price<300 ) %>%
  ggplot( aes(x=price)) +
    geom_density(fill="#69b3a2", color="#e9ecef", alpha=0.8)

My interpretation:

# Begin a ggplot object using the blackrock_esg_vs_non_esg_etf dataset
ggplot(blackrock_esg_vs_non_esg_etf) +  
  # Define aesthetics: x-axis, fill color, line color, and data is grouped by sector
  aes(  
    x = esg_etf,
    fill = sector,
    colour = sector,
    group = sector
  ) +
  # Add a density plot to the plot with an adjust parameter of 1L
  geom_density(adjust = 1L) +  
  # Adjust the fill color scale
  scale_fill_hue(direction = 1) + 
  # Adjust the line color scale
  scale_color_hue(direction = 1) +  
  # Apply log10 transformation to the x-axis
  scale_x_continuous(trans = "log10") +  
  # Label the x-axis, y-axis and add title
  labs(
    x = "ESG Fund",  
    y = "Density",  
    title = "Distribution of ESG Fund by Sector",  
    caption = "Pinandito Wisambudi",  
  ) +
  # Apply a gray theme to the plot
  theme_gray()  
Warning: Transformation introduced infinite values in continuous x-axis
Warning: Removed 226 rows containing non-finite values (`stat_density()`).

This density plot visualizes the distribution of ESG fund across different sectors. The log10 helps visualizes the data because most of the data is lower. This chart also highlights the difference or similarities of the ESG fund distribution between sectors.

Homework Problem 6

I downloaded the patchwork package.

library(ggplot2)
library(patchwork)

p1 <- ggplot(blackrock_esg_vs_non_esg_etf) +
  aes(
    x = esg_etf,
    fill = sector,
    colour = sector,
    group = sector
  ) +
  geom_density(adjust = 1L) +
  scale_fill_hue(direction = 1) +
  scale_color_hue(direction = 1) +
  scale_x_continuous(trans = "log10") +
  theme_gray()

p2 <- ggplot(blackrock_esg_vs_non_esg_etf) +
  aes(
    x = standard_etf,
    fill = sector,
    colour = sector,
    group = sector
  ) +
  geom_density(adjust = 1L) +
  scale_fill_hue(direction = 1) +
  scale_color_hue(direction = 1) +
  scale_x_continuous(trans = "log10") +
  theme_gray()  

p1+p2 
Warning: Transformation introduced infinite values in continuous x-axis
Warning: Removed 226 rows containing non-finite values (`stat_density()`).
Warning: Transformation introduced infinite values in continuous x-axis
Warning: Removed 35 rows containing non-finite values (`stat_density()`).

I used the patchwork package to combine two charts that highlight both ESG Fund and Non-ESG Fund density. By showing the two charts side by side, we can observe the density distribution of ESG and non-ESG funds within sector.