Data Visualization - Edition

Author

Grace Huang

Data Visulization

When you click the Render button a document will be generated that includes both content and the output of embedded code. You can embed code like this:

options(repos = c(CRAN = "https://cran.rstudio.com/"))
#####1.5.1.1
install.packages("pacman")
Installing package into 'C:/Users/Shiya (Grace) Huang/AppData/Local/R/win-library/4.3'
(as 'lib' is unspecified)
package 'pacman' successfully unpacked and MD5 sums checked

The downloaded binary packages are in
    C:\Users\Shiya (Grace) Huang\AppData\Local\Temp\RtmpiWYG2y\downloaded_packages
pacman::p_load(tidyverse, data.table, janitor,ggplot2,esquisse,datamods)

github_url <- "https://raw.githubusercontent.com/t-emery/sais-susfin_data/main/datasets/etf_comparison-2022-10-03.csv"
blackrock_esg_vs_non_esg_etf <- github_url |> 
  read_csv() |> 
  select(company_name:standard_etf)
Rows: 537 Columns: 14
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (4): ticker, company_name, sector, esg_uw_ow
dbl (7): esg_etf, standard_etf, esg_tilt, esg_tilt_z_score, esg_tilt_rank, e...
lgl (3): in_esg_only, in_standard_only, in_on_index_only

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
#### 1.5.1.2
ggplot(blackrock_esg_vs_non_esg_etf) +
  aes(x = esg_etf, y = standard_etf, colour = sector) +
  geom_point(shape = "square", size = 0.75) +
  geom_smooth(span = 0.45) +
  scale_color_brewer(palette = "Spectral", direction = 1) +
  scale_x_continuous(trans = "log10") +
  scale_y_continuous(trans = "log10") +
  labs(title = "1.5.1.1", caption = "Grace Huang") +
  theme_minimal() +
  facet_wrap(vars(sector))
Warning: Transformation introduced infinite values in continuous x-axis
Warning: Transformation introduced infinite values in continuous y-axis
Warning: Transformation introduced infinite values in continuous x-axis
Warning: Transformation introduced infinite values in continuous y-axis
`geom_smooth()` using method = 'loess' and formula = 'y ~ x'
Warning: Removed 261 rows containing non-finite values (`stat_smooth()`).
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: span too small.  fewer data values than degrees of freedom.
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: pseudoinverse used at -0.83043
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: neighborhood radius 0.21739
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: reciprocal condition number 0
Warning in simpleLoess(y, x, w, span, degree = degree, parametric = parametric,
: There are other near singularities as well. 0.052415
Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
else if (is.data.frame(newdata))
as.matrix(model.frame(delete.response(terms(object)), : span too small.  fewer
data values than degrees of freedom.
Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
else if (is.data.frame(newdata))
as.matrix(model.frame(delete.response(terms(object)), : pseudoinverse used at
-0.83043
Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
else if (is.data.frame(newdata))
as.matrix(model.frame(delete.response(terms(object)), : neighborhood radius
0.21739
Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
else if (is.data.frame(newdata))
as.matrix(model.frame(delete.response(terms(object)), : reciprocal condition
number 0
Warning in predLoess(object$y, object$x, newx = if (is.null(newdata)) object$x
else if (is.data.frame(newdata))
as.matrix(model.frame(delete.response(terms(object)), : There are other near
singularities as well. 0.052415
Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
-Inf

#####1.5.1.3
blackrock_esg_vs_non_esg_etf_long <- blackrock_esg_vs_non_esg_etf |> 
  # we'll learn a lot more about long data & pivot_longer() in future weeks. 
  pivot_longer(cols = contains("etf"), names_to = "fund_type", values_to = "weight") |> 
  # case_when() is like an extended "if else"
  mutate(fund_type = case_when(fund_type == "esg_etf" ~ "ESG ETF (ESGU)",
                               fund_type == "standard_etf" ~ "Standard ETF (IVV)"))

blackrock_esg_vs_non_esg_etf_long %>%
  filter(weight >= 1L & weight <= 7L) %>%
  ggplot() +
  aes(
    x = weight,
    y = company_name,
    colour = fund_type,
    size = weight
  ) +
  geom_point(shape = "circle") +
  scale_color_manual(
    values = c(`ESG ETF (ESGU)` = "#35FF00",
               `Standard ETF (IVV)` = "#FF1B00")
  ) +
  labs(
    x = "Weight",
    y = "Company Name",
    title = "1.5.1.2",
    caption = "Grace Huang"
  ) +
  theme_minimal()