HW2_Data visualization_Nadia XING

Author

Nadia XING

Question1

library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.4.4     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(esquisse)
library(ggplot2)
github_url <- "https://raw.githubusercontent.com/t-emery/sais-susfin_data/main/datasets/etf_comparison-2022-10-03.csv"

blackrock_esg_vs_non_esg_etf <- github_url |> 
  read_csv() |> 
  select(company_name:standard_etf)
Rows: 537 Columns: 14
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (4): ticker, company_name, sector, esg_uw_ow
dbl (7): esg_etf, standard_etf, esg_tilt, esg_tilt_z_score, esg_tilt_rank, e...
lgl (3): in_esg_only, in_standard_only, in_on_index_only

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
ggplot(blackrock_esg_vs_non_esg_etf) +
  aes(x = esg_etf, y = standard_etf, colour = sector) +
  geom_point(shape = 19, size = 2) +  
  geom_smooth(method = "loess", se = FALSE, color = "darkgrey") +
  scale_x_continuous(trans = "log10", labels = scales::comma) +  
  scale_y_continuous(trans = "log10", labels = scales::comma) +
 
  labs(
    x = "Weight in ESG ETF (ESGU) [Log Scale]",
    y = "Weight in Standard ETF (IVV) [Log Scale]",
    title = "Large Cap American Equities ETFs: ESG vs. Non-ESG",
    subtitle = "A comparison of the holdings of BlackRock iShares ESGU and IVV",
    caption = "Visualization by Nadia Xing"
  ) +
  theme_minimal() +
  theme(legend.position = "bottom") +  
  facet_wrap(vars(sector), ncol = 3) +   
  scale_color_brewer(palette = "Set2")  
Warning: Transformation introduced infinite values in continuous x-axis
Warning: Transformation introduced infinite values in continuous y-axis
Warning: Transformation introduced infinite values in continuous x-axis
Warning: Transformation introduced infinite values in continuous y-axis
`geom_smooth()` using formula = 'y ~ x'
Warning: Removed 261 rows containing non-finite values (`stat_smooth()`).
Warning in RColorBrewer::brewer.pal(n, pal): n too large, allowed maximum for palette Set2 is 8
Returning the palette you asked for with that many colors
Warning: Removed 93 rows containing missing values (`geom_point()`).

Question2

blackrock_esg_vs_non_esg_etf_long <- blackrock_esg_vs_non_esg_etf |> 
  pivot_longer(cols = contains("etf"), names_to = "fund_type", values_to = "weight") |> 
  mutate(fund_type = case_when(fund_type == "esg_etf" ~ "ESG ETF (ESGU)",
                               fund_type == "standard_etf" ~ "Standard ETF (IVV)"))
blackrock_esg_vs_non_esg_etf_long
# A tibble: 1,074 × 4
   company_name                  sector                 fund_type         weight
   <chr>                         <chr>                  <chr>              <dbl>
 1 PRUDENTIAL FINANCIAL INC      Financials             ESG ETF (ESGU)    0.537 
 2 PRUDENTIAL FINANCIAL INC      Financials             Standard ETF (IV… 0.106 
 3 GENERAL MILLS INC             Consumer Staples       ESG ETF (ESGU)    0.552 
 4 GENERAL MILLS INC             Consumer Staples       Standard ETF (IV… 0.151 
 5 KELLOGG                       Consumer Staples       ESG ETF (ESGU)    0.453 
 6 KELLOGG                       Consumer Staples       Standard ETF (IV… 0.0592
 7 AUTOMATIC DATA PROCESSING INC Information Technology ESG ETF (ESGU)    0.649 
 8 AUTOMATIC DATA PROCESSING INC Information Technology Standard ETF (IV… 0.312 
 9 ECOLAB INC                    Materials              ESG ETF (ESGU)    0.441 
10 ECOLAB INC                    Materials              Standard ETF (IV… 0.118 
# ℹ 1,064 more rows
library(dplyr)
library(ggplot2)
blackrock_esg_vs_non_esg_etf_long %>%
 filter(weight >= 1 & weight <= 100) %>%
 ggplot() +
 aes(x = weight, y = company_name, colour = fund_type, size = weight) +
 geom_point(shape = "circle") +
 scale_color_manual(values = c(`ESG ETF (ESGU)` = "green", `Standard ETF (IVV)` = "gray"
)) +
 labs(x = "Weight", y = "Company Name", title = "Comparation of Weight in ESG and non-ESG", caption = "Nadia Xing") +
 theme_minimal()

Short Reflection: This chart shows different companies’ weight on ESG ETF and Standard ETF. We cans see that a majority of those companies put similar weight on this two kinds of ETF.

Question3

ggplot(blackrock_esg_vs_non_esg_etf) +
aes(x = esg_etf, y = sector, colour = sector) +
  geom_boxplot(fill = "#B22222") +
  scale_color_manual(
    values = c(Communication = "#F8766D",
    `Consumer Discretionary` = "#DA8C15",
    `Consumer Staples` = "#ACA000",
    Energy = "#58B016",
    Financials = "#00BB4C",
    `Health Care` = "#00C19F",
    Industrials = "#00BAD5",
    `Information Technology` = "#3AA7F3",
    Materials = "#918BFD",
    `Real Estate` = "#E26EEF",
    Utilities = "#FF61C3")
  ) +
  scale_x_continuous(trans = "log10") +
  labs(caption = "Nadia Xing") +
  coord_flip() +
  theme_minimal() +
  theme(legend.position = "bottom")
Warning: Transformation introduced infinite values in continuous x-axis
Warning: Removed 226 rows containing non-finite values (`stat_boxplot()`).

ggplot(blackrock_esg_vs_non_esg_etf) +
  aes(
    x = standard_etf,
    y = esg_etf,
    colour = sector,
    size = sector
  ) +
  geom_tile() +
  scale_color_hue(direction = 1) +
  scale_x_continuous(trans = "log10") +
  scale_y_continuous(trans = "log10") +
  coord_flip() +
  theme_light() +
  ylim(0, 1.2)
Scale for y is already present.
Adding another scale for y, which will replace the existing scale.
Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
ℹ Please use `linewidth` instead.
Warning: Using size for a discrete variable is not advised.
Warning: Transformation introduced infinite values in continuous x-axis
Warning: Removed 232 rows containing missing values (`geom_tile()`).

Question4

chart1

ggplot(blackrock_esg_vs_non_esg_etf) +
  aes(x = esg_etf, y = standard_etf) +
  geom_point(mapping = aes(color = sector, shape = "circle")) +
  geom_smooth(span = 0.75, method = "lm")+
  scale_color_hue(direction = 1) +
  scale_x_continuous(trans = "log10") +
  scale_y_continuous(trans = "log10") +
  labs(title = "Chart 1") +
  theme_minimal()
Warning: Transformation introduced infinite values in continuous x-axis
Warning: Transformation introduced infinite values in continuous y-axis
Warning: Transformation introduced infinite values in continuous x-axis
Warning: Transformation introduced infinite values in continuous y-axis
`geom_smooth()` using formula = 'y ~ x'
Warning: Removed 261 rows containing non-finite values (`stat_smooth()`).

chart 2

ggplot(blackrock_esg_vs_non_esg_etf) +
  aes(x = esg_etf, y = standard_etf, colour = sector) +
  geom_point(shape = "circle", size = 1.5) +
  geom_smooth(span = 0.75) +
  scale_color_hue(direction = 1) +
  scale_x_continuous(trans = "log10") +
  scale_y_continuous(trans = "log10") +
  labs(title = "Chart 2") +
  theme_minimal()
Warning: Transformation introduced infinite values in continuous x-axis
Warning: Transformation introduced infinite values in continuous y-axis
Warning: Transformation introduced infinite values in continuous x-axis
Warning: Transformation introduced infinite values in continuous y-axis
`geom_smooth()` using method = 'loess' and formula = 'y ~ x'
Warning: Removed 261 rows containing non-finite values (`stat_smooth()`).

chart 3

ggplot(blackrock_esg_vs_non_esg_etf) +
  aes(x = esg_etf, y = standard_etf) +
  #change the size and colour
  geom_point(shape = "circle", size = 1.5, colour = "green") +
    #change the size and colour
  geom_smooth(span = 0.75, color = "purple") +
   # format the axes to make the units clear
  scale_x_continuous(trans = "log10") +
  scale_y_continuous(trans = "log10") +
  #label the axes to clarify x and y titles 
  labs(x = "ESG ETF", y = "Standard ETF") +
  theme_minimal()
Warning: Transformation introduced infinite values in continuous x-axis
Warning: Transformation introduced infinite values in continuous y-axis
Warning: Transformation introduced infinite values in continuous x-axis
Warning: Transformation introduced infinite values in continuous y-axis
`geom_smooth()` using method = 'loess' and formula = 'y ~ x'
Warning: Removed 261 rows containing non-finite values (`stat_smooth()`).

Question5

library(dplyr)
library(ggplot2)
blackrock_esg_vs_non_esg_etf_long %>%
  filter(weight >= 1 & weight <= 100) %>%
  ggplot(aes(x = reorder(company_name, -weight), y = weight, fill = fund_type)) +
  geom_bar(stat = "identity", position = position_dodge(), width = 0.7) +
  scale_fill_manual(values = c(`ESG ETF (ESGU)` = "green", `Standard ETF (IVV)` = "darkgray")) +
  labs(x = "Company Name", y = "Weight", 
       title = "Comparison of Company Weight in ESG vs. Non-ESG ETFs",
       subtitle = "Bar chart showing the weight distribution across companies", 
       caption = "Data visualization by Nadia Xing") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Question6

options(repos = c(CRAN = "https://cran.rstudio.com/"))
install.packages("devtools")

The downloaded binary packages are in
    /var/folders/pz/gf8r5yfj7r93ln6kz_gbkjtc0000gn/T//RtmpgoqRVd/downloaded_packages
devtools::install_github("thomasp85/patchwork")
Skipping install of 'patchwork' from a github remote, the SHA1 (d9437579) has not changed since last install.
  Use `force = TRUE` to force installation
library(ggplot2)
library(patchwork)
# 从GitHub读取CSV文件
github_url <- "https://raw.githubusercontent.com/t-emery/sais-susfin_data/main/datasets/etf_comparison-2022-10-03.csv"
blackrock_esg_vs_non_esg_etf <- read_csv(github_url, show_col_types = FALSE)
# 图1: 使用 geom_density
p1 <- ggplot(blackrock_esg_vs_non_esg_etf) +
  aes(x = esg_etf, fill = sector, colour = sector, group = sector) +
  geom_density(adjust = 1) +
  scale_fill_viridis_d() +  # 使用 viridis 颜色方案
  scale_color_viridis_d() +
  scale_x_continuous(trans = "log10") +
  theme_gray()

# 图2: 添加了几何对象geom_density以修正问题
p2 <- ggplot(blackrock_esg_vs_non_esg_etf) +
  aes(x = standard_etf, fill = sector, group = sector, color = sector) +
  geom_density(adjust = 1) +  # 添加了geom_density
  scale_color_brewer(palette = "Set1") +  # 使用 Brewer 颜色方案
  theme_gray()

# 使用 patchwork 显示图形
p1 + p2 + plot_layout(ncol = 2)  # 明确指定布局为两列
Warning: Transformation introduced infinite values in continuous x-axis
Warning: Removed 226 rows containing non-finite values (`stat_density()`).
Warning in RColorBrewer::brewer.pal(n, pal): n too large, allowed maximum for palette Set1 is 9
Returning the palette you asked for with that many colors