Susfin-homework-w2

HW Problem1

library(ggplot2)
library(dplyr)

# Assuming blackrock_esg_vs_non_esg_etf is your original dataset
blackrock_esg_vs_non_esg_etf_filtered <- blackrock_esg_vs_non_esg_etf %>%
  filter(esg_etf > 0, standard_etf > 0)

ggplot(blackrock_esg_vs_non_esg_etf_filtered) +
  aes(x = esg_etf, y = standard_etf, colour = sector) +
  geom_point(size = 1.5) +  
  geom_smooth(span = 0.75) +  
  scale_color_viridis_d(option = "viridis", direction = 1) +  
  scale_x_continuous(trans = "log10") + 
  scale_y_continuous(trans = "log10") +  
  labs(
    x = "ESG ETF (ESGU)",
    y = "Standard ETF (IVV)",
    title = "We made this chart using Esquisse!",
    subtitle = "It's a great tool for learning ggplot2. Even if it has limitations",
    caption = "Xingyu Pu"
  ) +
  theme_minimal() +
  facet_wrap(~sector) 
`geom_smooth()` using method = 'loess' and formula = 'y ~ x'

HW Problem2

blackrock_esg_vs_non_esg_etf_long <- blackrock_esg_vs_non_esg_etf |> 
  # we'll learn a lot more about long data & pivot_longer() in future weeks. 
  pivot_longer(cols = contains("etf"), names_to = "fund_type", values_to = "weight") |> 
  # case_when() is like an extended "if else"
  mutate(fund_type = case_when(fund_type == "esg_etf" ~ "ESG ETF (ESGU)",
                               fund_type == "standard_etf" ~ "Standard ETF (IVV)"))

blackrock_esg_vs_non_esg_etf_long
# A tibble: 1,074 × 4
   company_name                  sector                 fund_type         weight
   <chr>                         <chr>                  <chr>              <dbl>
 1 PRUDENTIAL FINANCIAL INC      Financials             ESG ETF (ESGU)    0.537 
 2 PRUDENTIAL FINANCIAL INC      Financials             Standard ETF (IV… 0.106 
 3 GENERAL MILLS INC             Consumer Staples       ESG ETF (ESGU)    0.552 
 4 GENERAL MILLS INC             Consumer Staples       Standard ETF (IV… 0.151 
 5 KELLOGG                       Consumer Staples       ESG ETF (ESGU)    0.453 
 6 KELLOGG                       Consumer Staples       Standard ETF (IV… 0.0592
 7 AUTOMATIC DATA PROCESSING INC Information Technology ESG ETF (ESGU)    0.649 
 8 AUTOMATIC DATA PROCESSING INC Information Technology Standard ETF (IV… 0.312 
 9 ECOLAB INC                    Materials              ESG ETF (ESGU)    0.441 
10 ECOLAB INC                    Materials              Standard ETF (IV… 0.118 
# ℹ 1,064 more rows

Visualization: Comparison of Company Weight in ESG/Standard ETF

library(ggplot2)
library(dplyr)

blackrock_esg_vs_non_esg_etf_long %>%
  # Filter rows where the 'weight' column is between 1 and 7 inclusive
  filter(weight >= 1L & weight <= 7L) %>%
  # Initialize a ggplot object without specifying data aesthetics yet
  ggplot() +
  # Define aesthetics for the plot:
  # x-axis will represent 'weight',
  # y-axis will show 'company_name' reordered by 'weight' for clarity,
  # 'colour' is mapped to 'fund_type' to differentiate ESG from Standard ETFs,
  # 'size' of the points is determined by 'weight' to reflect the magnitude
  aes(
    x = weight,
    y = reorder(company_name, weight), # Reorder company names based on weight
    colour = fund_type, # Color code points by the type of fund
    size = weight # Size of points reflects their weight
  ) +
  # Add points to the plot with default shape (circle)
  geom_point() +
  # Manually set the colors for different types of funds
  scale_color_manual(
    values = c(
      `ESG ETF (ESGU)` = "#49D21E", # Assign green color to ESG ETF
      `Standard ETF (IVV)` = "#646367" # Assign grey color to Standard ETF
    )
  ) +
  # Define labels for axes, plot title, subtitle, and legend
  labs(
    x = "Weight", # Label for the x-axis
    y = "Company Name", # Label for the y-axis
    title = "Comparison of Company Weight in ESG/Standard ETF", # Main title of the plot
    subtitle = "Exclude companies whose weight are below 1%", # Subtitle for additional information
    caption = "Xingyu Pu", # Caption for credit or source
    color = "Fund Type", # Legend title for 'colour' aesthetic
    size = "Weight" # Legend title for 'size' aesthetic
  ) +
  # Use a minimal theme for a cleaner look
  theme_minimal()

Interpretation of the chart: This analysis highlights the dominance of technology companies, with Apple Inc and Microsoft Corp leading in both ETFs,but their weight are slightly higher in ESG ETFs compared to standard ETFs.Moreover, for the same company, its weightings in the two ETFs are often relatively close.

HW Problem 3

###Plot 1

library(tidyverse)

blackrock_long <- blackrock_esg_vs_non_esg_etf %>%
  pivot_longer(cols = esg_etf:standard_etf, names_to = "ETF_Type", values_to = "Weight") %>%
  mutate(ETF_Type = recode(ETF_Type, esg_etf = "ESG", standard_etf = "Standard")) %>%
  select(-company_name) # Assuming we don't need the company name for the average calculation

# Step 2: Calculate Averages
average_weights <- blackrock_long %>%
  group_by(sector, ETF_Type) %>%
  summarise(Average_Weight = mean(Weight, na.rm = TRUE), .groups = 'drop')

# Step 3: Data Visualization
ggplot(average_weights, aes(x = sector, y = Average_Weight, fill = ETF_Type)) +
  geom_bar(stat = "identity", position = "dodge") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
  labs(title = "Average Weight Comparison between ESG and Standard ETFs",
       x = "Sector", y = "Average Weight",caption = "Xingyu Pu") +
  scale_fill_brewer(palette = "Set1")

These findings suggest that while some sectors like Information Technology, Industrials, Real Estate, and Materials may be more prevalent in ESG ETFs, others such as Communication and Utilities are more heavily weighted in standard ETFs. This could reflect the emphasis on certain industries within ESG investment strategies that align with sustainability and governance criteria not as emphasized in standard ETF allocations.

Plot 2

library(tidyverse)
# the URL of our data on GitHub
github_url <- "https://raw.githubusercontent.com/t-emery/sais-susfin_data/main/datasets/etf_comparison-2022-10-03.csv"

# read the data from GitHub
esg_tilt_compar <- github_url |> 
  read_csv(show_col_types = FALSE) |> 
  # select the four columns we will use in our anlaysis here
  select(company_name, esg_tilt, in_esg_only, in_standard_only) |>
  na.omit() # Remove rows with NA values in 'esg_tilt'

# Top 10 Companies by ESG Tilt
top_10 <- esg_tilt_compar %>%
  arrange(desc(esg_tilt)) %>%
  head(10)

# Bottom 10 Companies by ESG Tilt
bottom_10 <- esg_tilt_compar %>%
  arrange(esg_tilt) %>%
  head(10)

# Plot for Top 10 Companies by ESG Tilt with gradient color
print(
  ggplot(top_10, aes(x = reorder(company_name, esg_tilt), y = esg_tilt, fill = esg_tilt)) +
    geom_col() +
    scale_fill_gradient(low = "lightblue", high = "darkblue") +
    coord_flip() + # Flip coordinates to have company names on y-axis
    labs(title = "Top 10 Companies by ESG Tilt", x = "ESG Tilt", y = "Company") +
    theme_minimal()
)

# Plot for Bottom 10 Companies by ESG Tilt with gradient color
print(
  ggplot(bottom_10, aes(x = reorder(company_name, esg_tilt), y = esg_tilt, fill = esg_tilt)) +
    geom_col() +
    scale_fill_gradient(low = "lightblue", high = "darkblue") +
    coord_flip() + # Flip coordinates to have company names on y-axis
    labs(title = "Bottom 10 Companies by ESG Tilt", x = "ESG Tilt", y = "Company") +
    theme_minimal()
)

The Top 10 Companies by ESG Tilt graph indicates those companies that are more heavily favored in ESG ETFs compared to their presence in standard ETFs. A higher ESG tilt means these companies are significantly represented in ESG-focused investments, likely due to their strong environmental, social, and governance practices.

The Bottom 10 Companies by ESG Tilt graph, conversely, shows those companies with the lowest ESG tilt. These companies are either less favored or underrepresented in ESG ETFs compared to standard ETFs, which could be due to weaker ESG practices or other factors making them less attractive for sustainable investment portfolios.

HW Problem 4

library(ggplot2)
library(dplyr)

blackrock_esg_vs_non_esg_etf_filtered <- blackrock_esg_vs_non_esg_etf %>%
  filter(esg_etf > 0, standard_etf > 0)
  ggplot(blackrock_esg_vs_non_esg_etf_filtered,aes(x = esg_etf, y = standard_etf))+scale_x_log10() +
  scale_y_log10()+
  geom_point(aes(color = sector)) +
  geom_smooth(method = "loess")
`geom_smooth()` using formula = 'y ~ x'

Points are colored by the “sector” variable, and the loess regression line has default colors.

library(ggplot2)
library(dplyr)

blackrock_esg_vs_non_esg_etf_filtered <- blackrock_esg_vs_non_esg_etf %>%
  filter(esg_etf > 0, standard_etf > 0)
  ggplot(blackrock_esg_vs_non_esg_etf_filtered,aes(x = esg_etf, y = standard_etf,color = sector))+scale_x_log10() +
  scale_y_log10()+
  geom_point() +
  geom_smooth(method = "loess")
`geom_smooth()` using formula = 'y ~ x'

Both points and the loess regression line are colored by the “sector” variable.

library(ggplot2)
library(dplyr)

blackrock_esg_vs_non_esg_etf_filtered <- blackrock_esg_vs_non_esg_etf %>%
  filter(esg_etf > 0, standard_etf > 0)
  ggplot(blackrock_esg_vs_non_esg_etf_filtered,aes(x = esg_etf, y = standard_etf))+scale_x_log10() +
  scale_y_log10()+
  geom_point(color = "purple") +
  geom_smooth(method = "loess",color = "yellow")
`geom_smooth()` using formula = 'y ~ x'

Points are explicitly set to be purple in color, and the loess regression line is explicitly set to be yellow in color.

HW Prolem 5

library(tidyverse)
# the URL of our data on GitHub
github_url <- "https://raw.githubusercontent.com/t-emery/sais-susfin_data/main/datasets/etf_comparison-2022-10-03.csv"

# read the data from GitHub
data5 <- github_url |> 
  read_csv(show_col_types = FALSE) |> 
  # select the four columns we will use in our anlaysis here
  select(company_name:standard_etf,esg_tilt)

# Filter for specific sectors and remove rows where esg_etf or standard_etf are zero
filtered_data <- data5 %>%
  filter(sector %in% c("Information Technology", "Communication", "Health Care")) %>%
  filter(esg_etf > 0, standard_etf > 0) %>%
  mutate(esg_tilt_size = esg_tilt) %>%
  # Ensure that larger circles are drawn first (behind smaller ones)
  arrange(desc(esg_tilt_size))

# Bubble chart with enhanced features
ggplot(filtered_data, aes(x = esg_etf, y = standard_etf, color = sector, size = esg_tilt_size)) +
  geom_point(alpha = 0.7) + # Set transparency to make overlaps visible
  scale_size(range = c(3, 15), name = "ESG Tilt Size") + # More pronounced size range
  theme_minimal() +
  labs(title = "Companies from 3 sectors with the highest average weights",subtitle = "Only include companies exist in ESG/Standard ETFs simultaneously",
       x = "ESG ETF (log10)",
       y = "Standard ETF (log10)") +
  scale_color_brewer(palette = "Set1") + # Use a distinct color palette
  theme(legend.position = "right") +
  scale_x_log10() + 
  scale_y_log10() + 
  geom_abline(intercept = 0, slope = 1, linetype = "dashed", color = "gray") 

As can be seen from this bubble chart, companies with a high proportion of both ESG/Standard ETFs tend to have relatively higher weights in ESG ETFs. Among them, information technology companies have the highest weighting in both ETFs. The communication sector accounts for a relatively high proportion in standard ETFs, and there are a few outliers that account for a very high proportion in both ETFs.

HW Problem6

library(ggplot2)
library(ggiraph)
library(dplyr)
library(readr)
github_url <- "https://raw.githubusercontent.com/t-emery/sais-susfin_data/main/datasets/etf_comparison-2022-10-03.csv"
data6 <- read_csv(github_url, show_col_types = FALSE) %>%
  select(company_name, sector, esg_uw_ow) %>%
  mutate(esg_uw_ow = toupper(esg_uw_ow)) # Ensure case consistency

# Ensure esg_uw_ow contains only 'Underweight' and 'Overweight'
data6 <- data6 %>%
  filter(esg_uw_ow %in% c("UNDERWEIGHT", "OVERWEIGHT"))

# Prepare the distribution data
sector_distribution <- data6 %>%
  count(sector, esg_uw_ow) %>%
  mutate(tooltip_text = paste("Sector:", sector, "\nStatus:", esg_uw_ow, "\nCount:", n)) # Add a tooltip text

# Create the interactive stacked bar chart
gg <- ggplot(sector_distribution, aes(x = sector, y = n, fill = esg_uw_ow)) +
  geom_bar_interactive(aes(tooltip = tooltip_text, data_id = sector), 
                       stat = "identity", position = "stack") +
  scale_fill_manual(values = c("UNDERWEIGHT" = "red", "OVERWEIGHT" = "green")) +
  labs(title = "Distribution of Companies by ESG UW/OW Status and Sector",
       x = "Sector", y = "Count of Companies") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Display the interactive chart
girafe(ggobj = gg)

This interactive chart uses {ggiraph} to show the number and proportion of companies in each sector that are underweighted or overweighted in the ESG ETF compared to the standard ETF.This visualization provides insights into how different sectors are represented in terms of their alignment with ESG investment criteria.Sectors with More Underweighted Companies may face greater challenges in meeting ESG criteria or could be in industries with higher environmental impacts or social concerns, leading to a more cautious approach by ESG investors.

This analysis can help investors understand which sectors are leading in sustainability efforts and are therefore prioritized in ESG investments, as well as which sectors may need to improve their ESG practices to become more attractive to sustainable finance initiatives.