library(ggplot2)library(dplyr)# Assuming blackrock_esg_vs_non_esg_etf is your original datasetblackrock_esg_vs_non_esg_etf_filtered <- blackrock_esg_vs_non_esg_etf %>%filter(esg_etf >0, standard_etf >0)ggplot(blackrock_esg_vs_non_esg_etf_filtered) +aes(x = esg_etf, y = standard_etf, colour = sector) +geom_point(size =1.5) +geom_smooth(span =0.75) +scale_color_viridis_d(option ="viridis", direction =1) +scale_x_continuous(trans ="log10") +scale_y_continuous(trans ="log10") +labs(x ="ESG ETF (ESGU)",y ="Standard ETF (IVV)",title ="We made this chart using Esquisse!",subtitle ="It's a great tool for learning ggplot2. Even if it has limitations",caption ="Xingyu Pu" ) +theme_minimal() +facet_wrap(~sector)
`geom_smooth()` using method = 'loess' and formula = 'y ~ x'
HW Problem2
blackrock_esg_vs_non_esg_etf_long <- blackrock_esg_vs_non_esg_etf |># we'll learn a lot more about long data & pivot_longer() in future weeks. pivot_longer(cols =contains("etf"), names_to ="fund_type", values_to ="weight") |># case_when() is like an extended "if else"mutate(fund_type =case_when(fund_type =="esg_etf"~"ESG ETF (ESGU)", fund_type =="standard_etf"~"Standard ETF (IVV)"))blackrock_esg_vs_non_esg_etf_long
# A tibble: 1,074 × 4
company_name sector fund_type weight
<chr> <chr> <chr> <dbl>
1 PRUDENTIAL FINANCIAL INC Financials ESG ETF (ESGU) 0.537
2 PRUDENTIAL FINANCIAL INC Financials Standard ETF (IV… 0.106
3 GENERAL MILLS INC Consumer Staples ESG ETF (ESGU) 0.552
4 GENERAL MILLS INC Consumer Staples Standard ETF (IV… 0.151
5 KELLOGG Consumer Staples ESG ETF (ESGU) 0.453
6 KELLOGG Consumer Staples Standard ETF (IV… 0.0592
7 AUTOMATIC DATA PROCESSING INC Information Technology ESG ETF (ESGU) 0.649
8 AUTOMATIC DATA PROCESSING INC Information Technology Standard ETF (IV… 0.312
9 ECOLAB INC Materials ESG ETF (ESGU) 0.441
10 ECOLAB INC Materials Standard ETF (IV… 0.118
# ℹ 1,064 more rows
Visualization: Comparison of Company Weight in ESG/Standard ETF
library(ggplot2)library(dplyr)blackrock_esg_vs_non_esg_etf_long %>%# Filter rows where the 'weight' column is between 1 and 7 inclusivefilter(weight >=1L & weight <=7L) %>%# Initialize a ggplot object without specifying data aesthetics yetggplot() +# Define aesthetics for the plot:# x-axis will represent 'weight',# y-axis will show 'company_name' reordered by 'weight' for clarity,# 'colour' is mapped to 'fund_type' to differentiate ESG from Standard ETFs,# 'size' of the points is determined by 'weight' to reflect the magnitudeaes(x = weight,y =reorder(company_name, weight), # Reorder company names based on weightcolour = fund_type, # Color code points by the type of fundsize = weight # Size of points reflects their weight ) +# Add points to the plot with default shape (circle)geom_point() +# Manually set the colors for different types of fundsscale_color_manual(values =c(`ESG ETF (ESGU)`="#49D21E", # Assign green color to ESG ETF`Standard ETF (IVV)`="#646367"# Assign grey color to Standard ETF ) ) +# Define labels for axes, plot title, subtitle, and legendlabs(x ="Weight", # Label for the x-axisy ="Company Name", # Label for the y-axistitle ="Comparison of Company Weight in ESG/Standard ETF", # Main title of the plotsubtitle ="Exclude companies whose weight are below 1%", # Subtitle for additional informationcaption ="Xingyu Pu", # Caption for credit or sourcecolor ="Fund Type", # Legend title for 'colour' aestheticsize ="Weight"# Legend title for 'size' aesthetic ) +# Use a minimal theme for a cleaner looktheme_minimal()
Interpretation of the chart: This analysis highlights the dominance of technology companies, with Apple Inc and Microsoft Corp leading in both ETFs,but their weight are slightly higher in ESG ETFs compared to standard ETFs.Moreover, for the same company, its weightings in the two ETFs are often relatively close.
HW Problem 3
###Plot 1
library(tidyverse)blackrock_long <- blackrock_esg_vs_non_esg_etf %>%pivot_longer(cols = esg_etf:standard_etf, names_to ="ETF_Type", values_to ="Weight") %>%mutate(ETF_Type =recode(ETF_Type, esg_etf ="ESG", standard_etf ="Standard")) %>%select(-company_name) # Assuming we don't need the company name for the average calculation# Step 2: Calculate Averagesaverage_weights <- blackrock_long %>%group_by(sector, ETF_Type) %>%summarise(Average_Weight =mean(Weight, na.rm =TRUE), .groups ='drop')# Step 3: Data Visualizationggplot(average_weights, aes(x = sector, y = Average_Weight, fill = ETF_Type)) +geom_bar(stat ="identity", position ="dodge") +theme(axis.text.x =element_text(angle =45, hjust =1)) +labs(title ="Average Weight Comparison between ESG and Standard ETFs",x ="Sector", y ="Average Weight",caption ="Xingyu Pu") +scale_fill_brewer(palette ="Set1")
These findings suggest that while some sectors like Information Technology, Industrials, Real Estate, and Materials may be more prevalent in ESG ETFs, others such as Communication and Utilities are more heavily weighted in standard ETFs. This could reflect the emphasis on certain industries within ESG investment strategies that align with sustainability and governance criteria not as emphasized in standard ETF allocations.
Plot 2
library(tidyverse)# the URL of our data on GitHubgithub_url <-"https://raw.githubusercontent.com/t-emery/sais-susfin_data/main/datasets/etf_comparison-2022-10-03.csv"# read the data from GitHubesg_tilt_compar <- github_url |>read_csv(show_col_types =FALSE) |># select the four columns we will use in our anlaysis hereselect(company_name, esg_tilt, in_esg_only, in_standard_only) |>na.omit() # Remove rows with NA values in 'esg_tilt'# Top 10 Companies by ESG Tilttop_10 <- esg_tilt_compar %>%arrange(desc(esg_tilt)) %>%head(10)# Bottom 10 Companies by ESG Tiltbottom_10 <- esg_tilt_compar %>%arrange(esg_tilt) %>%head(10)# Plot for Top 10 Companies by ESG Tilt with gradient colorprint(ggplot(top_10, aes(x =reorder(company_name, esg_tilt), y = esg_tilt, fill = esg_tilt)) +geom_col() +scale_fill_gradient(low ="lightblue", high ="darkblue") +coord_flip() +# Flip coordinates to have company names on y-axislabs(title ="Top 10 Companies by ESG Tilt", x ="ESG Tilt", y ="Company") +theme_minimal())
# Plot for Bottom 10 Companies by ESG Tilt with gradient colorprint(ggplot(bottom_10, aes(x =reorder(company_name, esg_tilt), y = esg_tilt, fill = esg_tilt)) +geom_col() +scale_fill_gradient(low ="lightblue", high ="darkblue") +coord_flip() +# Flip coordinates to have company names on y-axislabs(title ="Bottom 10 Companies by ESG Tilt", x ="ESG Tilt", y ="Company") +theme_minimal())
The Top 10 Companies by ESG Tilt graph indicates those companies that are more heavily favored in ESG ETFs compared to their presence in standard ETFs. A higher ESG tilt means these companies are significantly represented in ESG-focused investments, likely due to their strong environmental, social, and governance practices.
The Bottom 10 Companies by ESG Tilt graph, conversely, shows those companies with the lowest ESG tilt. These companies are either less favored or underrepresented in ESG ETFs compared to standard ETFs, which could be due to weaker ESG practices or other factors making them less attractive for sustainable investment portfolios.
Points are explicitly set to be purple in color, and the loess regression line is explicitly set to be yellow in color.
HW Prolem 5
library(tidyverse)# the URL of our data on GitHubgithub_url <-"https://raw.githubusercontent.com/t-emery/sais-susfin_data/main/datasets/etf_comparison-2022-10-03.csv"# read the data from GitHubdata5 <- github_url |>read_csv(show_col_types =FALSE) |># select the four columns we will use in our anlaysis hereselect(company_name:standard_etf,esg_tilt)# Filter for specific sectors and remove rows where esg_etf or standard_etf are zerofiltered_data <- data5 %>%filter(sector %in%c("Information Technology", "Communication", "Health Care")) %>%filter(esg_etf >0, standard_etf >0) %>%mutate(esg_tilt_size = esg_tilt) %>%# Ensure that larger circles are drawn first (behind smaller ones)arrange(desc(esg_tilt_size))# Bubble chart with enhanced featuresggplot(filtered_data, aes(x = esg_etf, y = standard_etf, color = sector, size = esg_tilt_size)) +geom_point(alpha =0.7) +# Set transparency to make overlaps visiblescale_size(range =c(3, 15), name ="ESG Tilt Size") +# More pronounced size rangetheme_minimal() +labs(title ="Companies from 3 sectors with the highest average weights",subtitle ="Only include companies exist in ESG/Standard ETFs simultaneously",x ="ESG ETF (log10)",y ="Standard ETF (log10)") +scale_color_brewer(palette ="Set1") +# Use a distinct color palettetheme(legend.position ="right") +scale_x_log10() +scale_y_log10() +geom_abline(intercept =0, slope =1, linetype ="dashed", color ="gray")
As can be seen from this bubble chart, companies with a high proportion of both ESG/Standard ETFs tend to have relatively higher weights in ESG ETFs. Among them, information technology companies have the highest weighting in both ETFs. The communication sector accounts for a relatively high proportion in standard ETFs, and there are a few outliers that account for a very high proportion in both ETFs.
HW Problem6
library(ggplot2)library(ggiraph)library(dplyr)library(readr)github_url <-"https://raw.githubusercontent.com/t-emery/sais-susfin_data/main/datasets/etf_comparison-2022-10-03.csv"data6 <-read_csv(github_url, show_col_types =FALSE) %>%select(company_name, sector, esg_uw_ow) %>%mutate(esg_uw_ow =toupper(esg_uw_ow)) # Ensure case consistency# Ensure esg_uw_ow contains only 'Underweight' and 'Overweight'data6 <- data6 %>%filter(esg_uw_ow %in%c("UNDERWEIGHT", "OVERWEIGHT"))# Prepare the distribution datasector_distribution <- data6 %>%count(sector, esg_uw_ow) %>%mutate(tooltip_text =paste("Sector:", sector, "\nStatus:", esg_uw_ow, "\nCount:", n)) # Add a tooltip text# Create the interactive stacked bar chartgg <-ggplot(sector_distribution, aes(x = sector, y = n, fill = esg_uw_ow)) +geom_bar_interactive(aes(tooltip = tooltip_text, data_id = sector), stat ="identity", position ="stack") +scale_fill_manual(values =c("UNDERWEIGHT"="red", "OVERWEIGHT"="green")) +labs(title ="Distribution of Companies by ESG UW/OW Status and Sector",x ="Sector", y ="Count of Companies") +theme_minimal() +theme(axis.text.x =element_text(angle =45, hjust =1))# Display the interactive chartgirafe(ggobj = gg)
This interactive chart uses {ggiraph} to show the number and proportion of companies in each sector that are underweighted or overweighted in the ESG ETF compared to the standard ETF.This visualization provides insights into how different sectors are represented in terms of their alignment with ESG investment criteria.Sectors with More Underweighted Companies may face greater challenges in meeting ESG criteria or could be in industries with higher environmental impacts or social concerns, leading to a more cautious approach by ESG investors.
This analysis can help investors understand which sectors are leading in sustainability efforts and are therefore prioritized in ESG investments, as well as which sectors may need to improve their ESG practices to become more attractive to sustainable finance initiatives.