HW2

Question 1

library(ggplot2)
library(ggthemes)
library(tidyverse)
library(dplyr)
library(pkgdown)
library(esquisse)

import the data

github_url <- "https://raw.githubusercontent.com/t-emery/sais-susfin_data/main/datasets/etf_comparison-2022-10-03.csv"

blackrock_esg_vs_non_esg_etf <- github_url |> 
  read_csv() |> 
  select(company_name:standard_etf)
esquisser()
library(ggplot2)

ggplot(blackrock_esg_vs_non_esg_etf) +
 aes(x = esg_etf, y = standard_etf, colour = sector) +
 geom_point(shape = "bullet", 
 size = 1.65) +
 geom_smooth(span = 0.75) +
 scale_color_viridis_d(option = "viridis", direction = 1) +
 scale_x_continuous(trans = "log10") +
 scale_y_continuous(trans = "log10") +
 labs(x = "ESG ETF (ESGU)", 
 y = "Standard ETF (IVV)", title = "HW2_Q1", caption = "Xiaorui Zhang") +
 theme_minimal() +
 facet_wrap(vars(sector))

Question 2


blackrock_esg_vs_non_esg_etf_long <- blackrock_esg_vs_non_esg_etf |> 
  # we'll learn a lot more about long data & pivot_longer() in future weeks. 
  pivot_longer(cols = contains("etf"), names_to = "fund_type", values_to = "weight") |> 
  # case_when() is like an extended "if else"
  mutate(fund_type = case_when(fund_type == "esg_etf" ~ "ESG ETF (ESGU)",
                               fund_type == "standard_etf" ~ "Standard ETF (IVV)"))

blackrock_esg_vs_non_esg_etf_long


library(dplyr)
library(ggplot2)

blackrock_esg_vs_non_esg_etf_long %>%
 filter(weight >= 1L & weight <= 7L) %>%
 #limit the weight variable to companies over a 1% weight
 ggplot() +
 aes(x = weight, y = company_name, colour = fund_type, size = weight) +
 geom_point(shape = "circle") +
#Choose a Point chart and assign variables to aesthetics
 scale_color_manual(values = c(`ESG ETF (ESGU)` = "#14D029", `Standard ETF (IVV)` = "#B6B7B6"
)) +
  #Change the color for the ESG fund points to be green colored, and the non-esg fund to be grey.
 labs(x = "Weight", y = "Funds Name", title = "Comparison between Funds of different weight in ESG/Standard ETF", 
 caption = "Xiaorui Zhang", size = "weight") +
  #Add name, meaningful titles and labels. 
 theme_minimal()


The chart showcases that certain companies have a much larger weight compared to others. For example, with technology companies like Apple and Microsoft having a preference in ETF. Meanwhile, traditional companies like EXXON or Home Depot have less passion in ETF investment.

Question 3

esquisser()

library(ggplot2)
library(ggplot2)

ggplot(blackrock_esg_vs_non_esg_etf) +
 aes(x = esg_etf, y = sector, colour = sector) +
 geom_boxplot(fill = "#112446") +
 scale_color_hue(direction = -1) +
 scale_x_continuous(trans = "log10") +
 labs(x = "ESG ETF", y = "Sector", 
 title = "Preference for ESG ETF Investment for Different Sectors", caption = "Xiaorui Zhang") +
 theme_minimal()

blackrock_esg_vs_non_esg_etf %>%
 filter(esg_etf >= 1L & esg_etf <= 7L) %>%
 filter(standard_etf >= 1 & 
 standard_etf <= 6.95) %>%
 ggplot() +
 aes(x = esg_etf, y = standard_etf, colour = company_name, size = company_name) +
 geom_point(shape = "circle") +
 scale_color_hue(direction = -1) +
 scale_x_continuous(trans = "log10") +
 scale_y_continuous(trans = "log10") +
 labs(x = "Standard ETF", y = "ESG ETF", title = "Preference for ESG ETF Investment for Leading Companies", 
 caption = "Xiaorui Zhang") +
 theme_minimal()

The first chart gives a good picture of the distribution of investments in ESG ETFs across different sectors. Although we will see in the analysis below that large IT companies show enthusiasm for investing in ESG ETFs, the distribution of investments in ESG ETFs is more evenly distributed across the sectors when looking at this chart, accompanied by higher averages in the Utilities and Financials sectors.

In the second chart, we focus on the large companies. We can see a clear preference for ESG investment for IT companies like Apple and Microsoft, while less support coming from traditional companies like EXXON.

Question 4

library(ggplot2)

p1 <- ggplot(blackrock_esg_vs_non_esg_etf) +
#esg_etf to the x and standard_etf to the y, make sector as the standards for the color
 aes(x = esg_etf, y = standard_etf, colour = sector) +
#Choose a point chart
 geom_point(shape = "circle", 
 size = 1.5) +
#Create a smooth line for all data, set the span and the color
 geom_smooth(method = 'loess', se = TRUE, aes(group = 1), span = 0.75, color = "blue")+
 scale_color_hue(direction = 1) +
#Change the x and y axis to log10.
 scale_x_continuous(trans = "log10") +
 scale_y_continuous(trans = "log10") +
 theme_minimal()

print(p1)

#The ggplot functions that make it work here are geom_point and geom_smooth. "method = 'loess', se = TRUE, aes(group = 1)" makes sure there is only one smooth line.
library(ggplot2)

p2 <- ggplot(blackrock_esg_vs_non_esg_etf) +
#esg_etf to the x and standard_etf to the y
 aes(x = esg_etf, y = standard_etf, colour = sector) +
#Choose a Point chart
 geom_point(shape = "circle", 
 size = 1.5) +
#Create a smooth line
 geom_smooth(span = 0.75)+
 scale_color_hue(direction = 1) +
#Change the x and y axis to log10.
 scale_x_continuous(trans = "log10") +
 scale_y_continuous(trans = "log10") +
 theme_minimal()

print(p2)

#Here compared with chart above, because the restraint of method = 'loess', se = TRUE, aes(group = 1) is gone, the smooth lines are created by groups.
library(ggplot2)

p3 <- ggplot(blackrock_esg_vs_non_esg_etf) +
#esg_etf to the x and standard_etf to the y
 aes(x = esg_etf, y = standard_etf) +
#Choose a Point chart
 geom_point(shape = "circle", 
 size = 1.5, colour = "purple") +
#Smooth line, make the color yellow
 geom_smooth(span = 0.75,color = "yellow") +
#Change the x and y axis to log10
 scale_x_continuous(trans = "log10") +
 scale_y_continuous(trans = "log10") +
 theme_minimal()

print(p3)

#Lastly, points and smooth line are created by not speparating them by the sector and giving them an aligned color. (colir = "")

Question 5


# ggplot2 scatterplot
library(ggplot2)
install.packages("hrbrthemes")
library(hrbrthemes)

Q5 <- blackrock_esg_vs_non_esg_etf %>%
 filter(esg_etf >= 1L & esg_etf <= 7L) %>%
 filter(standard_etf >= 1 & 
 standard_etf <= 6.95) %>%
ggplot(aes(x=esg_etf, y=standard_etf, color=sector)) + 
    geom_point(size=2) +
    labs(x = "ESG ETF", y = "Standard ETF", title = "IT Companies leading the ETF Investment", 
 caption = "Xiaorui Zhang") +
    theme_ipsum()

print(Q5)

This chart explains the same result as in Q3 but with better visual effect. Now, we can clearly see that among the large enterprises, IT companies are leading the ETF Investment.

Question 6

install.packages("remotes")
remotes::install_github("AllanCameron/geomtextpath")
library(geomtextpath)

#curved text in ggplot2 Extensions Gallery

Q6 <- ggplot(blackrock_esg_vs_non_esg_etf, aes(x=esg_etf, y=standard_etf, color=sector)) +
  geom_point(alpha = 0.2) +
  geom_labelsmooth(aes(label = sector), text_smoothing = 30, fill = "#F6F6FF",
                method = "loess", formula = y ~ x,
                size = 2, linewidth = 1, boxlinewidth = 0.3) +   labs(x = "ESG ETF", y = "Standard ETF", title = "IT Companies leading the ETF Investment", 
 caption = "Xiaorui Zhang")
  theme(legend.position = "none")

print(Q6)

This chart explains the same result as above, that generally speaking, IT companies are leading the ETF Investment. However, the curved text extension allows us to put the sector name on the smooth lines. Yet, as there is a relative convergence of investment tendencies across sectors, and we can see many text labels overlapping, proving that this chart may need a deeper tweak.