setwd("/Users/enmingliang/Desktop/H2")
install.packages("esquisse", repos = "https://raw.githubusercontent.com/t-emery/sais-susfin_data/main/datasets/etf_comparison-2022-10-03.csv")
Warning: unable to access index for repository https://raw.githubusercontent.com/t-emery/sais-susfin_data/main/datasets/etf_comparison-2022-10-03.csv/src/contrib:
  cannot open URL 'https://raw.githubusercontent.com/t-emery/sais-susfin_data/main/datasets/etf_comparison-2022-10-03.csv/src/contrib/PACKAGES'
Warning: package 'esquisse' is not available for this version of R

A version of this package for your version of R might be available elsewhere,
see the ideas at
https://cran.r-project.org/doc/manuals/r-patched/R-admin.html#Installing-packages
Warning: unable to access index for repository https://raw.githubusercontent.com/t-emery/sais-susfin_data/main/datasets/etf_comparison-2022-10-03.csv/bin/macosx/big-sur-x86_64/contrib/4.3:
  cannot open URL 'https://raw.githubusercontent.com/t-emery/sais-susfin_data/main/datasets/etf_comparison-2022-10-03.csv/bin/macosx/big-sur-x86_64/contrib/4.3/PACKAGES'
library(esquisse)
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.4.4     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
# the URL of our data on GitHub
github_url <- "https://raw.githubusercontent.com/t-emery/sais-susfin_data/main/datasets/etf_comparison-2022-10-03.csv"


# read the data from GitHub
blackrock_esg_vs_non_esg_etf <- github_url |> 
  read_csv() |> 
  
# select the four columns we will use in our anlaysis here
  select(company_name:standard_etf)
Rows: 537 Columns: 14
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (4): ticker, company_name, sector, esg_uw_ow
dbl (7): esg_etf, standard_etf, esg_tilt, esg_tilt_z_score, esg_tilt_rank, e...
lgl (3): in_esg_only, in_standard_only, in_on_index_only

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(ggplot2)

ggplot(blackrock_esg_vs_non_esg_etf) +
 aes(x = esg_etf, y = standard_etf, colour = sector) +
 geom_point(shape = "circle", 
 size = 1.5) +
 geom_smooth(span = 0.75) +
 scale_color_viridis_d(option = "plasma", direction = 1) +
 scale_x_continuous(trans = "log10") +
 scale_y_continuous(trans = "log10") +
 labs(title = "esg vs non esg etf", 
 caption = "Enming Liang") +
 theme_minimal() +
 facet_wrap(vars(sector))
Warning: Transformation introduced infinite values in continuous x-axis
Warning: Transformation introduced infinite values in continuous y-axis
Warning: Transformation introduced infinite values in continuous x-axis
Warning: Transformation introduced infinite values in continuous y-axis
`geom_smooth()` using method = 'loess' and formula = 'y ~ x'
Warning: Removed 261 rows containing non-finite values (`stat_smooth()`).

blackrock_esg_vs_non_esg_etf_long <- blackrock_esg_vs_non_esg_etf |> 
  # we'll learn a lot more about long data & pivot_longer() in future weeks. 
  pivot_longer(cols = contains("etf"), names_to = "fund_type", values_to = "weight") |> 
  # case_when() is like an extended "if else"
  mutate(fund_type = case_when(fund_type == "esg_etf" ~ "ESG ETF (ESGU)",
                               fund_type == "standard_etf" ~ "Standard ETF (IVV)"))

blackrock_esg_vs_non_esg_etf_long
# A tibble: 1,074 × 4
   company_name                  sector                 fund_type         weight
   <chr>                         <chr>                  <chr>              <dbl>
 1 PRUDENTIAL FINANCIAL INC      Financials             ESG ETF (ESGU)    0.537 
 2 PRUDENTIAL FINANCIAL INC      Financials             Standard ETF (IV… 0.106 
 3 GENERAL MILLS INC             Consumer Staples       ESG ETF (ESGU)    0.552 
 4 GENERAL MILLS INC             Consumer Staples       Standard ETF (IV… 0.151 
 5 KELLOGG                       Consumer Staples       ESG ETF (ESGU)    0.453 
 6 KELLOGG                       Consumer Staples       Standard ETF (IV… 0.0592
 7 AUTOMATIC DATA PROCESSING INC Information Technology ESG ETF (ESGU)    0.649 
 8 AUTOMATIC DATA PROCESSING INC Information Technology Standard ETF (IV… 0.312 
 9 ECOLAB INC                    Materials              ESG ETF (ESGU)    0.441 
10 ECOLAB INC                    Materials              Standard ETF (IV… 0.118 
# ℹ 1,064 more rows
library(dplyr)
library(ggplot2)

blackrock_esg_vs_non_esg_etf_long %>%
 filter(weight >= 1L & weight <= 7L) %>%
 ggplot() +
 aes(x = weight, y = company_name, colour = fund_type, size = weight) +
 geom_point(shape = "circle") +
 scale_color_manual(values = c(`ESG ETF (ESGU)` = "#2FDD0F", `Standard ETF (IVV)` = "#808080"
)) +
 labs(title = "ESG and Standard ESG", caption = "Enming Liang") +
 theme_minimal()

#Large outliers in the dataset, particularly those with significantly higher weights, indicate companies that have a substantial impact on the performance of the ETF. These outliers could represent companies that are heavily focused on ESG criteria or non-ESG criteria, depending on the fund type
# the URL of our data on GitHub
github_url <- "https://raw.githubusercontent.com/t-emery/sais-susfin_data/main/datasets/etf_comparison-2022-10-03.csv"

# read the data from GitHub
blackrock_esg_vs_non_esg_etf_p3 <- github_url |> 
  read_csv() |> 
  
# select the four columns we will use in our anlaysis here
  select(esg_tilt_rank:esg_tilt_quantile_5)
Rows: 537 Columns: 14
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (4): ticker, company_name, sector, esg_uw_ow
dbl (7): esg_etf, standard_etf, esg_tilt, esg_tilt_z_score, esg_tilt_rank, e...
lgl (3): in_esg_only, in_standard_only, in_on_index_only

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
library(ggplot2)

ggplot(blackrock_esg_vs_non_esg_etf_p3) +
 aes(x = esg_tilt_quantile_5, y = esg_tilt_percentile, colour = esg_tilt_rank) +
 geom_tile() +
 scale_color_viridis_c(option = "viridis", direction = 1) +
 labs(title = "QUANTILE VS PERCENTILE") +
 theme_minimal()

#THE COLOR OF chart more green represent the rank more highr
library(ggplot2)

# Assuming the correct data frame is named blackrock_esg_vs_non_esg_etf
ggplot(data = blackrock_esg_vs_non_esg_etf, aes(x = esg_etf, y = standard_etf, color = sector)) +
  geom_point() +
  geom_smooth(method = 'lm', formula = y ~ poly(x, 2), color = "light blue") +
  scale_y_log10() +
  labs(title = "Scatter plot of ESG vs Standard ETFs by Sector",
       x = "ESG ETF",
       y = "Standard ETF",
       color = "Sector") +
  theme_minimal()
Warning: Transformation introduced infinite values in continuous y-axis
Transformation introduced infinite values in continuous y-axis
Warning: Removed 35 rows containing non-finite values (`stat_smooth()`).

library(ggplot2)

# Assuming the correct data frame is named blackrock_esg_vs_non_esg_etf
ggplot(data = blackrock_esg_vs_non_esg_etf, aes(x = esg_etf, y = standard_etf, color = sector)) +
  geom_point() +
  geom_smooth(aes(group = sector), method = 'lm', formula = y ~ poly(x, 2)) +
  scale_y_log10() +
  labs(title = "Scatter plot with separate smooth lines for each sector",
       x = "ESG ETF",
       y = "Standard ETF",
       color = "Sector") +
  theme_minimal()
Warning: Transformation introduced infinite values in continuous y-axis
Transformation introduced infinite values in continuous y-axis
Warning: Removed 35 rows containing non-finite values (`stat_smooth()`).

library(ggplot2)

# Assuming the correct data frame is named blackrock_esg_vs_non_esg_etf
ggplot(data = blackrock_esg_vs_non_esg_etf, aes(x = esg_etf, y = standard_etf)) +
  geom_point(color = "purple") +  # Color for all points set to purple
  geom_smooth(method = 'lm', formula = y ~ poly(x, 2), color = "yellow") +
  scale_y_log10() +
  labs(title = "Scatter plot with a single smooth line for all sectors",
       x = "ESG ETF",
       y = "Standard ETF") +
  theme_minimal()
Warning: Transformation introduced infinite values in continuous y-axis
Transformation introduced infinite values in continuous y-axis
Warning: Removed 35 rows containing non-finite values (`stat_smooth()`).

#No Sector Different.
library(ggplot2)


ggplot(blackrock_esg_vs_non_esg_etf, aes(x = sector, y = esg_etf, fill = sector)) + 
  geom_violin(trim = FALSE) +
  labs(title = "Violin plot of ESG ETF by Sector",
       x = "Sector",
       y = "ESG ETF Value") +
  theme_light() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) 

# Load necessary libraries
library(ggplot2)
library(patchwork)

# Your plotting code here...

install.packages("devtools", repos = "https://raw.githubusercontent.com/t-emery/sais-susfin_data/main/datasets/etf_comparison-2022-10-03.csv")
Warning: unable to access index for repository https://raw.githubusercontent.com/t-emery/sais-susfin_data/main/datasets/etf_comparison-2022-10-03.csv/src/contrib:
  cannot open URL 'https://raw.githubusercontent.com/t-emery/sais-susfin_data/main/datasets/etf_comparison-2022-10-03.csv/src/contrib/PACKAGES'
Warning: package 'devtools' is not available for this version of R

A version of this package for your version of R might be available elsewhere,
see the ideas at
https://cran.r-project.org/doc/manuals/r-patched/R-admin.html#Installing-packages
Warning: unable to access index for repository https://raw.githubusercontent.com/t-emery/sais-susfin_data/main/datasets/etf_comparison-2022-10-03.csv/bin/macosx/big-sur-x86_64/contrib/4.3:
  cannot open URL 'https://raw.githubusercontent.com/t-emery/sais-susfin_data/main/datasets/etf_comparison-2022-10-03.csv/bin/macosx/big-sur-x86_64/contrib/4.3/PACKAGES'
library(ggplot2)
library(patchwork)

p1 <- ggplot(blackrock_esg_vs_non_esg_etf) + geom_point(aes(esg_etf, sector))
p2 <- ggplot(blackrock_esg_vs_non_esg_etf) + geom_boxplot(aes(standard_etf, sector, group = standard_etf))

p1 + p2

p3 <- ggplot(blackrock_esg_vs_non_esg_etf) + geom_smooth(aes(sector, esg_etf))
p4 <- ggplot(blackrock_esg_vs_non_esg_etf) + geom_bar(aes(standard_etf))

(p1 | p2 | p3) /
      p4
`geom_smooth()` using method = 'loess' and formula = 'y ~ x'