Data Importing Practice

Author

Pinandito Wisambudi

Preparation

library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.4.4     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl)
library(here)
here() starts at /Users/pinanditowisambudi/Sustainable Finance Class
library(ggplot2)
library(dplyr)

path_to_sheet <- here("data-raw", "CM_Data_Explorer.xlsx")

path_to_sheet
[1] "/Users/pinanditowisambudi/Sustainable Finance Class/data-raw/CM_Data_Explorer.xlsx"

Homework Problem 1

Your boss knows that they will be asking her about how the implications of the electric vehicle (EV) market and policy choices on the demand for copper and cobalt.

  1. Use the workflow developed in this chapter to import and tidy worksheet 2.3 EV from the dataset.
EV_sheet <- partial(
  .f = read_excel,
  path = path_to_sheet,
  sheet = "2.3 EV",
  col_names = FALSE
)

sheet_header <- EV_sheet(range = "A4:W5")
New names:
• `` -> `...1`
• `` -> `...2`
• `` -> `...3`
• `` -> `...4`
• `` -> `...5`
• `` -> `...6`
• `` -> `...7`
• `` -> `...8`
• `` -> `...9`
• `` -> `...10`
• `` -> `...11`
• `` -> `...12`
• `` -> `...13`
• `` -> `...14`
• `` -> `...15`
• `` -> `...16`
• `` -> `...17`
• `` -> `...18`
• `` -> `...19`
• `` -> `...20`
• `` -> `...21`
• `` -> `...22`
• `` -> `...23`
sheet_header
# A tibble: 2 × 23
  ...1   ...2 ...3  ...4    ...5  ...6  ...7  ...8  ...9 ...10 ...11 ...12 ...13
  <lgl> <dbl> <lgl> <chr>  <dbl> <dbl> <dbl> <dbl> <dbl> <lgl> <chr> <dbl> <dbl>
1 NA       NA NA    State…    NA    NA    NA    NA    NA NA    Anno…    NA    NA
2 NA     2022 NA    2025    2030  2035  2040  2045  2050 NA    2025   2030  2035
# ℹ 10 more variables: ...14 <dbl>, ...15 <dbl>, ...16 <dbl>, ...17 <lgl>,
#   ...18 <chr>, ...19 <dbl>, ...20 <dbl>, ...21 <dbl>, ...22 <dbl>,
#   ...23 <dbl>
sheet_header_processed <- sheet_header |> 
  t() |>
  as_tibble() |>
  rename(scenario = V1, year = V2) |>
  fill(scenario) |>
  replace_na(list(scenario = "Current Year"))
Warning: The `x` argument of `as_tibble.matrix()` must have unique column names if
`.name_repair` is omitted as of tibble 2.0.0.
ℹ Using compatibility `.name_repair`.
sheet_header_processed
# A tibble: 23 × 2
   scenario                 year 
   <chr>                    <chr>
 1 Current Year             <NA> 
 2 Current Year             2022 
 3 Current Year             <NA> 
 4 Stated policies scenario 2025 
 5 Stated policies scenario 2030 
 6 Stated policies scenario 2035 
 7 Stated policies scenario 2040 
 8 Stated policies scenario 2045 
 9 Stated policies scenario 2050 
10 Stated policies scenario <NA> 
# ℹ 13 more rows
EV_name <- EV_sheet(range = "A7") |> 
  pull()
New names:
• `` -> `...1`
EV_name
[1] "Constrained nickel supply"
EV_info <- EV_sheet(range = "A8:W19")
New names:
• `` -> `...1`
• `` -> `...2`
• `` -> `...3`
• `` -> `...4`
• `` -> `...5`
• `` -> `...6`
• `` -> `...7`
• `` -> `...8`
• `` -> `...9`
• `` -> `...10`
• `` -> `...11`
• `` -> `...12`
• `` -> `...13`
• `` -> `...14`
• `` -> `...15`
• `` -> `...16`
• `` -> `...17`
• `` -> `...18`
• `` -> `...19`
• `` -> `...20`
• `` -> `...21`
• `` -> `...22`
• `` -> `...23`
EV_info
# A tibble: 12 × 23
   ...1         ...2 ...3     ...4    ...5    ...6    ...7    ...8    ...9 ...10
   <chr>       <dbl> <lgl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl> <lgl>
 1 Copper    3.81e+2 NA    6.17e+2 1.39e+3 1.74e+3 2.23e+3 2.42e+3 2.31e+3 NA   
 2 Cobalt    6.35e+1 NA    6.12e+1 3.80e+1 2.91e+1 3.54e+1 4.09e+1 4.74e+1 NA   
 3 Graphite  5.57e+2 NA    9.36e+2 1.59e+3 1.78e+3 1.69e+3 1.49e+3 1.08e+3 NA   
 4 Lithium   6.96e+1 NA    1.23e+2 2.19e+2 3.07e+2 4.07e+2 4.50e+2 4.13e+2 NA   
 5 Manganese 7.45e+1 NA    5.39e+1 1.56e+2 3.36e+2 5.33e+2 7.12e+2 8.58e+2 NA   
 6 Nickel    3.13e+2 NA    5.54e+2 5.13e+2 5.89e+2 6.30e+2 6.92e+2 6.88e+2 NA   
 7 Silicon   8.70e+0 NA    4.05e+1 1.39e+2 2.24e+2 3.52e+2 3.74e+2 3.73e+2 NA   
 8 Neodymium 3.96e+0 NA    7.25e+0 1.22e+1 1.51e+1 1.88e+1 2.18e+1 2.29e+1 NA   
 9 Dysprosi… 4.13e-1 NA    7.41e-1 1.21e+0 1.48e+0 1.84e+0 2.13e+0 2.23e+0 NA   
10 Praseody… 5.94e-1 NA    1.09e+0 1.83e+0 2.27e+0 2.83e+0 3.27e+0 3.43e+0 NA   
11 Terbium   8.06e-2 NA    1.48e-1 2.52e-1 3.14e-1 3.91e-1 4.52e-1 4.74e-1 NA   
12 Total EV  1.47e+3 NA    2.39e+3 4.06e+3 5.02e+3 5.91e+3 6.21e+3 5.80e+3 NA   
# ℹ 13 more variables: ...11 <dbl>, ...12 <dbl>, ...13 <dbl>, ...14 <dbl>,
#   ...15 <dbl>, ...16 <dbl>, ...17 <lgl>, ...18 <dbl>, ...19 <dbl>,
#   ...20 <dbl>, ...21 <dbl>, ...22 <dbl>, ...23 <dbl>
EV_info_col_names <- names(EV_info)

sheet_headers_and_col_names <- sheet_header_processed |> 
  add_column(EV_info_col_names = EV_info_col_names)

EV_info_long <- EV_info |> 
  rename(indicator = `...1`) |> 
  pivot_longer(cols = -indicator,
               names_to = "EV_info_col_names") |> 
  add_column(EV_name)

combined_data <- EV_info_long |> 
  left_join(sheet_headers_and_col_names, by = join_by(EV_info_col_names)) |> 
  filter(!is.na(year)) |> 
  mutate(unit = "kiloton",
  year = as.integer(year)
  ) |> 
  select(EV_name, indicator, scenario, unit, year, value)

combined_data
# A tibble: 228 × 6
   EV_name                   indicator scenario                unit   year value
   <chr>                     <chr>     <chr>                   <chr> <int> <dbl>
 1 Constrained nickel supply Copper    Current Year            kilo…  2022  381.
 2 Constrained nickel supply Copper    Stated policies scenar… kilo…  2025  617.
 3 Constrained nickel supply Copper    Stated policies scenar… kilo…  2030 1389.
 4 Constrained nickel supply Copper    Stated policies scenar… kilo…  2035 1736.
 5 Constrained nickel supply Copper    Stated policies scenar… kilo…  2040 2233.
 6 Constrained nickel supply Copper    Stated policies scenar… kilo…  2045 2418.
 7 Constrained nickel supply Copper    Stated policies scenar… kilo…  2050 2313.
 8 Constrained nickel supply Copper    Announced pledges scen… kilo…  2025  732.
 9 Constrained nickel supply Copper    Announced pledges scen… kilo…  2030 2113.
10 Constrained nickel supply Copper    Announced pledges scen… kilo…  2035 3587.
# ℹ 218 more rows
read_EV_table <-
  function(EV_name_range, EV_info_range) {
    EV_name <-
      EV_sheet(range = EV_name_range) |>
      pull()
    
    EV_info <- EV_sheet(range = EV_info_range)
    EV_info_col_names <- names(EV_info)
    EV_info_long <- EV_info |>
      rename(indicator = `...1`) |>
      pivot_longer(cols = -indicator,
                   names_to = "EV_info_col_names") |>
      add_column(EV_name)
    
    combined_data <- EV_info_long |>
      left_join(sheet_headers_and_col_names, by = join_by(EV_info_col_names)) |>
      filter(!is.na(year)) |>
      mutate(
        year = as.integer(year)
      ) |>
      select(EV_name, indicator, scenario, year, value)
    combined_data
  }

nickel_supply_table <- read_EV_table(
  EV_name_range = "A7",
  EV_info_range = "A8:W19"
)
New names:
New names:
• `` -> `...1`
Wider_use_of_silicon_rich_anodes_table <- read_EV_table(
  EV_name_range = "A22",
  EV_info_range = "A23:W34"
)
New names:
New names:
• `` -> `...1`
Faster_uptake_of_solid_state_batteries_table <- read_EV_table(
  EV_name_range = "A37",
  EV_info_range = "A38:W49"
)
New names:
New names:
• `` -> `...1`
Lower_battery_sizes_table <- read_EV_table(
  EV_name_range = "A52",
  EV_info_range = "A53:W64"
)
New names:
New names:
• `` -> `...1`
Limited_battery_size_reduction_table <- read_EV_table(
  EV_name_range = "A67",
  EV_info_range = "A68:W79"
)
New names:
New names:
• `` -> `...1`
STEPS_Base_case_table <- read_EV_table(
  EV_name_range = "A82",
  EV_info_range = "A83:W94"
)
New names:
New names:
• `` -> `...1`
final_EV_table <- nickel_supply_table |> 
  bind_rows(Wider_use_of_silicon_rich_anodes_table) |>
  bind_rows(Faster_uptake_of_solid_state_batteries_table) |>
  bind_rows(Lower_battery_sizes_table) |>
  bind_rows(Limited_battery_size_reduction_table) |>
  bind_rows(STEPS_Base_case_table)

final_EV_table
# A tibble: 1,368 × 5
   EV_name                   indicator scenario                    year value
   <chr>                     <chr>     <chr>                      <int> <dbl>
 1 Constrained nickel supply Copper    Current Year                2022  381.
 2 Constrained nickel supply Copper    Stated policies scenario    2025  617.
 3 Constrained nickel supply Copper    Stated policies scenario    2030 1389.
 4 Constrained nickel supply Copper    Stated policies scenario    2035 1736.
 5 Constrained nickel supply Copper    Stated policies scenario    2040 2233.
 6 Constrained nickel supply Copper    Stated policies scenario    2045 2418.
 7 Constrained nickel supply Copper    Stated policies scenario    2050 2313.
 8 Constrained nickel supply Copper    Announced pledges scenario  2025  732.
 9 Constrained nickel supply Copper    Announced pledges scenario  2030 2113.
10 Constrained nickel supply Copper    Announced pledges scenario  2035 3587.
# ℹ 1,358 more rows
write_csv(final_EV_table,here("data", "iea_mineral_demand_for_EV.csv"))

cleaned_data <- here("data", "iea_mineral_demand_for_EV.csv") |> 
  read_csv()
Rows: 1368 Columns: 5
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (3): EV_name, indicator, scenario
dbl (2): year, value

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
  1. Use the tidied dataset to come up with 5 compelling data visualizations that illustrate key actionable insights about how policy scenarios, and technological scenarios will impact demand for copper and cobalt.

Data Visualization 1

final_EV_table %>%
  filter(EV_name %in% "Constrained nickel supply") %>%
  filter(indicator %in% c("Cobalt", "Copper", "Nickel")) %>%
  filter(scenario %in% "Net Zero Emissions by 2050 scenario") %>%
  ggplot() +
  aes(x = year, y = value, fill = EV_name) +
  geom_area() +
  scale_fill_brewer(palette = "Accent", direction = 1) +
  labs(
    x = "Year",
    y = "Demand (kt)",
    title = "Cobalt and Copper Demand in the Net Zero 2050 Scenario with the Constrained Nickel Supply",
    subtitle = "Copper demands remains the highest under the constrained nickel supply condition",
    fill = "Assumptions"
  ) +
  ggthemes::theme_base() +
  theme(
    legend.position = "none",
    plot.title = element_text(face = "bold", size = 10), # Adjust title size
    plot.subtitle = element_text(face = "italic", size = 8), # Adjust subtitle size
    axis.title.y = element_text(face = "bold", size = 8), # Adjust y-axis title size
    axis.title.x = element_text(face = "bold", size = 8), # Adjust x-axis title size
    axis.text.x = element_text(size = 8), # Adjust x-axis text size
    axis.text.y = element_text(size = 8), # Adjust y-axis text size
    strip.text = element_text(size = 8) # Adjust facet label size if needed
  ) +
  facet_wrap(vars(indicator))

Data Visualization 2

final_EV_table %>%
 filter(indicator %in% c("Cobalt", "Copper")) %>%
 filter(!(scenario %in% "Current Year")) %>%
 filter(!is.na(value)) %>%
 ggplot() +
  aes(
    x = year,
    y = value,
    fill = indicator,
    colour = indicator
  ) +
  geom_point(shape = "circle", size = 1.5) +
  geom_smooth(span = 0.75) +
  scale_fill_brewer(palette = "Accent", direction = 1) +
  scale_color_brewer(palette = "Accent", direction = 1) +
  labs(
    x = "Year",
    y = "Demand (kt)",
    title = "Cobalt and Copper Demand between Scenarios",
    subtitle = "Copper demand is projected to increase significantly in all scenarios,
    while Cobalt demands relatively flat"
  ) +
  theme_minimal() +
  theme(
    plot.title = element_text(face = "bold"),
    axis.title.y = element_text(face = "bold"),
    axis.title.x = element_text(face = "bold")
  ) +
  facet_wrap(vars(scenario))
`geom_smooth()` using method = 'loess' and formula = 'y ~ x'

Data Visualization 3

cleaned_data %>%
 filter(indicator %in% c("Cobalt", "Copper")) %>%
 filter(scenario %in% "Net Zero Emissions by 2050 scenario") %>%
 ggplot() +
  aes(x = EV_name, fill = indicator, weight = value) +
  geom_bar() +
  scale_fill_hue(direction = 1) +
  labs(
    x = "Conditions",
    y = "Demand (kt)",
    title = "Annual Cobalt and Copper Demand by Conditions",
    fill = "Minerals"
  ) +
  coord_flip() +
  theme_minimal() +
  facet_wrap(vars(year))

Data Visualization 4

filtered_data <- cleaned_data %>%
  filter(scenario == "Net Zero Emissions by 2050 scenario",
         EV_name == "constrained nickel supply")

cobalt_data <- cleaned_data[cleaned_data$indicator == "Cobalt", ]
copper_data <- cleaned_data[cleaned_data$indicator == "Copper", ]
graphite_data <- cleaned_data[cleaned_data$indicator == "Graphite", ]
lithium_data <- cleaned_data[cleaned_data$indicator == "Lithium", ]
manganese_data <- cleaned_data[cleaned_data$indicator == "Manganese", ]
nickel_data <- cleaned_data[cleaned_data$indicator == "Nickel", ]
silicon_data <- cleaned_data[cleaned_data$indicator == "Silicon", ]
neodymium_data <- cleaned_data[cleaned_data$indicator == "Neodymium", ]
dysprosium_data <- cleaned_data[cleaned_data$indicator == "Dysprosium", ]
praseodymium_data <- cleaned_data[cleaned_data$indicator == "Praseodymium", ]
terbium_data <- cleaned_data[cleaned_data$indicator == "Terbium", ]


mineral_stacked_bar <- ggplot() +
  geom_bar(data = cobalt_data, aes(x = factor(year), y = value), stat = "identity", fill = "maroon") +
  geom_bar(data = copper_data, aes(x = factor(year), y = value), stat = "identity", fill = "pink") +
  geom_bar(data = graphite_data, aes(x = factor(year), y = value), stat = "identity", fill = "magenta") +
  geom_bar(data = lithium_data, aes(x = factor(year), y = value), stat = "identity", fill = "purple") +
  geom_bar(data = manganese_data, aes(x = factor(year), y = value), stat = "identity", fill = "navy") +
  geom_bar(data = nickel_data, aes(x = factor(year), y = value), stat = "identity", fill = "lightblue") +
  geom_bar(data = silicon_data, aes(x = factor(year), y = value), stat = "identity", fill = "lightgreen") +
  geom_bar(data = neodymium_data, aes(x = factor(year), y = value), stat = "identity", fill = "darkgreen") +
  geom_bar(data = dysprosium_data, aes(x = factor(year), y = value), stat = "identity", fill = "yellow") +
  geom_bar(data = praseodymium_data, aes(x = factor(year), y = value), stat = "identity", fill = "orange") +
  geom_bar(data = terbium_data, aes(x = factor(year), y = value), stat = "identity", fill = "red") +
  labs(
    title = "Mineral demand for EV in the Net Zero Emissions by 2050 Scenario, Constrained nickel supply", 
    x = "Year", 
    y = "Demand (kt)", 
    fill = "Scenario") +
    theme(
    plot.title = element_text(face = "bold"),
    axis.title.y = element_text(face = "bold"),
    axis.title.x = element_text(face = "bold")
  ) +
  theme_minimal()

mineral_stacked_bar
Warning: Removed 12 rows containing missing values (`position_stack()`).
Removed 12 rows containing missing values (`position_stack()`).
Removed 12 rows containing missing values (`position_stack()`).
Removed 12 rows containing missing values (`position_stack()`).
Removed 12 rows containing missing values (`position_stack()`).
Removed 12 rows containing missing values (`position_stack()`).
Removed 12 rows containing missing values (`position_stack()`).
Removed 12 rows containing missing values (`position_stack()`).
Removed 12 rows containing missing values (`position_stack()`).
Removed 12 rows containing missing values (`position_stack()`).
Removed 12 rows containing missing values (`position_stack()`).

Data Visualization 5

copper_bar <- ggplot() +
  geom_bar(data = copper_data, aes(x = factor(year), y = value), stat = "identity", fill = "pink") +
  labs(
    title = "Copper demand for EV in the Net Zero Emissions by 2050 Scenario, Constrained nickel supply", 
    x = "Year", 
    y = "Demand (kt)", 
    fill = "Scenario") +
    theme(
    plot.title = element_text(face = "bold"),
    axis.title.y = element_text(face = "bold"),
    axis.title.x = element_text(face = "bold")
  ) +
  theme_minimal()

copper_bar
Warning: Removed 12 rows containing missing values (`position_stack()`).