Homework2

Author

Sophia Wang

Setup working directory

library(esquisse)
library(readr)
library(dplyr)

Attaching package: 'dplyr'
The following objects are masked from 'package:stats':

    filter, lag
The following objects are masked from 'package:base':

    intersect, setdiff, setequal, union
library(ggplot2)
library(tidyr)
library(ggiraph)

Homework problem 1: Recreate the chart above in Esquisse

# the URL of our data on GitHub
github_url <- "https://raw.githubusercontent.com/t-emery/sais-susfin_data/main/datasets/etf_comparison-2022-10-03.csv"
# read the data from GitHub
blackrock_esg_vs_non_esg_etf <- github_url |> 
  read_csv() |> 
# select the four columns we will use in our anlaysis here
  select(company_name:standard_etf)
Rows: 537 Columns: 14
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (4): ticker, company_name, sector, esg_uw_ow
dbl (7): esg_etf, standard_etf, esg_tilt, esg_tilt_z_score, esg_tilt_rank, e...
lgl (3): in_esg_only, in_standard_only, in_on_index_only

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
blackrock_esg_vs_non_esg_etf
# A tibble: 537 × 4
   company_name                       sector                esg_etf standard_etf
   <chr>                              <chr>                   <dbl>        <dbl>
 1 PRUDENTIAL FINANCIAL INC           Financials              0.537       0.106 
 2 GENERAL MILLS INC                  Consumer Staples        0.552       0.151 
 3 KELLOGG                            Consumer Staples        0.453       0.0592
 4 AUTOMATIC DATA PROCESSING INC      Information Technolo…   0.649       0.312 
 5 ECOLAB INC                         Materials               0.441       0.118 
 6 JOHNSON CONTROLS INTERNATIONAL PLC Industrials             0.416       0.112 
 7 EVERSOURCE ENERGY                  Utilities               0.392       0.0896
 8 PUBLIC SERVICE ENTERPRISE GROUP IN Utilities               0.376       0.0929
 9 RAYTHEON TECHNOLOGIES CORP         Industrials             0.677       0.401 
10 CHENIERE ENERGY INC                Energy                  0.274       0     
# ℹ 527 more rows
ggplot(blackrock_esg_vs_non_esg_etf) +
  aes(x = esg_etf, y = standard_etf, colour = sector) +
  geom_point(shape = "circle", size = 1.5) +
  geom_smooth(span = 0.75) +
  scale_color_viridis_d(option = "plasma", direction = 1) +
  scale_x_continuous(trans = "log10") +
  scale_y_continuous(trans = "log10") +
  labs(
    x = " ESG ETF (ESGU)",
    y = "Standard ETF(IVV)",
    title = "ESG ETF vs. Standard ETF",
    caption = "Sophia Wang"
  ) +
  theme_minimal() +
  facet_wrap(vars(sector))
Warning: Transformation introduced infinite values in continuous x-axis
Warning: Transformation introduced infinite values in continuous y-axis
Warning: Transformation introduced infinite values in continuous x-axis
Warning: Transformation introduced infinite values in continuous y-axis
`geom_smooth()` using method = 'loess' and formula = 'y ~ x'
Warning: Removed 261 rows containing non-finite values (`stat_smooth()`).

Homework problem 2: exploring the Outliers

blackrock_esg_vs_non_esg_etf_long <- blackrock_esg_vs_non_esg_etf |> 
  # we'll learn a lot more about long data & pivot_longer() in future weeks. 
  pivot_longer(cols = contains("etf"), names_to = "fund_type", values_to = "weight") |> 
  # case_when() is like an extended "if else"
  mutate(fund_type = case_when(fund_type == "esg_etf" ~ "ESG ETF (ESGU)",
                               fund_type == "standard_etf" ~ "Standard ETF (IVV)"))

blackrock_esg_vs_non_esg_etf_long
# A tibble: 1,074 × 4
   company_name                  sector                 fund_type         weight
   <chr>                         <chr>                  <chr>              <dbl>
 1 PRUDENTIAL FINANCIAL INC      Financials             ESG ETF (ESGU)    0.537 
 2 PRUDENTIAL FINANCIAL INC      Financials             Standard ETF (IV… 0.106 
 3 GENERAL MILLS INC             Consumer Staples       ESG ETF (ESGU)    0.552 
 4 GENERAL MILLS INC             Consumer Staples       Standard ETF (IV… 0.151 
 5 KELLOGG                       Consumer Staples       ESG ETF (ESGU)    0.453 
 6 KELLOGG                       Consumer Staples       Standard ETF (IV… 0.0592
 7 AUTOMATIC DATA PROCESSING INC Information Technology ESG ETF (ESGU)    0.649 
 8 AUTOMATIC DATA PROCESSING INC Information Technology Standard ETF (IV… 0.312 
 9 ECOLAB INC                    Materials              ESG ETF (ESGU)    0.441 
10 ECOLAB INC                    Materials              Standard ETF (IV… 0.118 
# ℹ 1,064 more rows
#import the dataset
blackrock_esg_vs_non_esg_etf_long %>%
#limit the weight variable to companies over a 1% weight
 filter(weight >= 1 & weight <= 7) %>%
#set up the variables of the plot
  ggplot() +
  aes(
    x = weight,
    y = company_name,
    colour = fund_type,
    size = weight
  ) +
  geom_point(shape = "circle") +
#set up the color for different groups
  scale_color_manual(
    values = c(`ESG ETF (ESGU)` = "#6DF874",
    `Standard ETF (IVV)` = "#A19D9F")
  ) +
#set up the names of axis and the title of the graph
  labs(
    x = "weight",
    y = "Company name",
    title = "Weight of the Fund",
    caption = "Sophia Wang"
  ) +
  theme_minimal()

The graph is a bubble graph showing the weight of two different groups of funds: ESG ETF and Standard ETF, and the size of the bubble are corresponds to the weight of the company in the ETF. In the graph we can see that there are some larges outliers like Apple INC and Microsoft INC and there outliers indicate that these companies have a significant weight within ETF.

Homework problem 3: Make your own charts with Esquisse

ggplot(blackrock_esg_vs_non_esg_etf_long) +
  aes(x = fund_type, y = weight, colour = sector) +
  geom_boxplot(fill = "#112446") +
  scale_color_hue(direction = 1) +
  labs(
    x = "Weight",
    y = "Fund Type",
    title = "Distribution of Weights in ETF by Sector",
    caption = "Sophia Wang"
  ) +
  theme_minimal()

The purpose of this boxplot is to figure out of the weight distributed by fund type and sectors. The plot shows which sectors have a wider range of weights versus those that are more uniform. From the plot we can know that in most industries the weight of ESG ETF is higher than Standard ETF.

ggplot(blackrock_esg_vs_non_esg_etf) +
  aes(x = esg_etf, y = standard_etf) +
  geom_point(shape = "circle", size = 1.5, colour = "#112446") +
  geom_smooth(span = 0.75) +
  scale_x_continuous(trans = "log10") +
  scale_y_continuous(trans = "log10") +
  labs(
    x = "ESG ETF",
    y = "Standard ETF",
    title = "ESG ETF vs. Standard ETF",
    caption = "Sophia Wang"
  ) +
  theme_minimal()
Warning: Transformation introduced infinite values in continuous x-axis
Warning: Transformation introduced infinite values in continuous y-axis
Warning: Transformation introduced infinite values in continuous x-axis
Warning: Transformation introduced infinite values in continuous y-axis
`geom_smooth()` using method = 'loess' and formula = 'y ~ x'
Warning: Removed 261 rows containing non-finite values (`stat_smooth()`).

This plot is a scatter plot comparing companies’ weights in an ESG ETF versus a Standard ETF. From this plot, we can infer a positive relationship between the weights in the ESG ETF and the Standard ETF. As the weight in one ETF increases, the weight in the other tends to increase.

Homework problem 4: Understanding aes()

# Specifying the dataset and aesthetic mappings
ggplot(blackrock_esg_vs_non_esg_etf) +
  # Set up x-axis and y-axis for the plot
  aes(x = esg_etf, y = standard_etf) +
  # Add points to the plot
  geom_point(aes(colour = sector), shape = "circle", size = 1.5) + 
  # Add a smooth line to show the trend
  geom_smooth(span = 0.75) +
  scale_color_hue(direction = 1) + 
  # Transform the scales of the x and y axes to logarithmic
  scale_x_continuous(trans = "log10") + 
  scale_y_continuous(trans = "log10") +
  # Apply a minimalistic theme for a clean and simple plot appearance.
  theme_minimal() 
Warning: Transformation introduced infinite values in continuous x-axis
Warning: Transformation introduced infinite values in continuous y-axis
Warning: Transformation introduced infinite values in continuous x-axis
Warning: Transformation introduced infinite values in continuous y-axis
`geom_smooth()` using method = 'loess' and formula = 'y ~ x'
Warning: Removed 261 rows containing non-finite values (`stat_smooth()`).

In the plot we add the aes(colour=sector) in geom_point when add points to just color the point so that we can get only one smooth line in the plot.

  # Specifying the dataset and aesthetic mappings
ggplot(blackrock_esg_vs_non_esg_etf) +
  # Set up x-axis and y-axis for the plot
  aes(x = esg_etf, y = standard_etf, colour = sector) +
  # Add points to the plot
  geom_point(shape = "circle", size = 1.5) +
  # Add a smooth line to show the trend
  geom_smooth(span = 0.75) +
  scale_color_hue(direction = 1) +
  # Transform the scales of the x and y axes to logarithmic
  scale_x_continuous(trans = "log10") +
  scale_y_continuous(trans = "log10") +
  # Apply a minimalistic theme for a clean and simple plot appearance
  theme_minimal()
Warning: Transformation introduced infinite values in continuous x-axis
Warning: Transformation introduced infinite values in continuous y-axis
Warning: Transformation introduced infinite values in continuous x-axis
Warning: Transformation introduced infinite values in continuous y-axis
`geom_smooth()` using method = 'loess' and formula = 'y ~ x'
Warning: Removed 261 rows containing non-finite values (`stat_smooth()`).

In this plot we put “colour=sector” when setting up the x and y axis so we got smooth lines for different sectors of data

# Specifying the dataset and aesthetic mappings
  ggplot(blackrock_esg_vs_non_esg_etf) +
  # Set up x-axis and y-axis for the plot
  aes(x = esg_etf, y = standard_etf) +
  # Add points to the plot and choose the color of points
  geom_point(shape = "circle", size = 1.5, colour = "purple") +
  # Add a smooth line to show the trend
  geom_smooth(span = 0.75, colour = "yellow") + 
  # Transform the scales of the x and y axes to logarithmic
  scale_x_continuous(trans = "log10") +
  scale_y_continuous(trans = "log10") +
  # Apply a minimalistic theme for a clean and simple plot appearance
  theme_minimal()
Warning: Transformation introduced infinite values in continuous x-axis
Warning: Transformation introduced infinite values in continuous y-axis
Warning: Transformation introduced infinite values in continuous x-axis
Warning: Transformation introduced infinite values in continuous y-axis
`geom_smooth()` using method = 'loess' and formula = 'y ~ x'
Warning: Removed 261 rows containing non-finite values (`stat_smooth()`).

in this plot we use color to set up the color for both points and smooth line.