DataImport

4.5.0.1 Homework problem 1:

Your boss knows that they will be asking her about how the implications of the electric vehicle (EV) market and policy choices on the demand for copper and cobalt.

Use the workflow developed in this chapter to import and tidy worksheet 2.3 EV from the dataset.

Use the tidied dataset to come up with 5 compelling data visualizations that illustrate key actionable insights about how policy scenarios, and technological scenarios will impact demand for copper and cobalt.

Data import is time consuming when you’re first learning, so we’re limiting this to one practice question. Make your insights awesome.

library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.5.0     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl)
#install.packages("here") 
library(here) 
here() starts at /Users/leiyuhan/Desktop/Sus_Finance/DataImport
path_to_sheet <- "/Users/leiyuhan/Desktop/Sus_Finance/DataImport/data-raw/CM_Data_Explorer.xlsx"

path_to_sheet
[1] "/Users/leiyuhan/Desktop/Sus_Finance/DataImport/data-raw/CM_Data_Explorer.xlsx"
read_key_minerals_sheet <- partial(
  # provide the function, in this case readxl::read_excel()
  .f = read_excel,
  # provide any arguments you want filled in
  path = path_to_sheet,
  sheet = "2.3 EV",
  col_names = FALSE
)

# now all we have to do is provide the range
sheet_header <- read_key_minerals_sheet(range = "A4:W5")
New names:
• `` -> `...1`
• `` -> `...2`
• `` -> `...3`
• `` -> `...4`
• `` -> `...5`
• `` -> `...6`
• `` -> `...7`
• `` -> `...8`
• `` -> `...9`
• `` -> `...10`
• `` -> `...11`
• `` -> `...12`
• `` -> `...13`
• `` -> `...14`
• `` -> `...15`
• `` -> `...16`
• `` -> `...17`
• `` -> `...18`
• `` -> `...19`
• `` -> `...20`
• `` -> `...21`
• `` -> `...22`
• `` -> `...23`
sheet_header
# A tibble: 2 × 23
  ...1   ...2 ...3  ...4    ...5  ...6  ...7  ...8  ...9 ...10 ...11 ...12 ...13
  <lgl> <dbl> <lgl> <chr>  <dbl> <dbl> <dbl> <dbl> <dbl> <lgl> <chr> <dbl> <dbl>
1 NA       NA NA    State…    NA    NA    NA    NA    NA NA    Anno…    NA    NA
2 NA     2022 NA    2025    2030  2035  2040  2045  2050 NA    2025   2030  2035
# ℹ 10 more variables: ...14 <dbl>, ...15 <dbl>, ...16 <dbl>, ...17 <lgl>,
#   ...18 <chr>, ...19 <dbl>, ...20 <dbl>, ...21 <dbl>, ...22 <dbl>,
#   ...23 <dbl>
sheet_header_processed <- sheet_header |> 
  # transpose the data
  t() |>
  # turn it back into a tibble
  as_tibble() |>
  # make them meaningful
  rename(scenario = V1, year = V2) |>
  # fill scenario down
  fill(scenario) |>
  #insert "Current" at top
  replace_na(list(scenario = "Current Year"))
Warning: The `x` argument of `as_tibble.matrix()` must have unique column names if
`.name_repair` is omitted as of tibble 2.0.0.
ℹ Using compatibility `.name_repair`.
sheet_header_processed
# A tibble: 23 × 2
   scenario                 year 
   <chr>                    <chr>
 1 Current Year             <NA> 
 2 Current Year             2022 
 3 Current Year             <NA> 
 4 Stated policies scenario 2025 
 5 Stated policies scenario 2030 
 6 Stated policies scenario 2035 
 7 Stated policies scenario 2040 
 8 Stated policies scenario 2045 
 9 Stated policies scenario 2050 
10 Stated policies scenario <NA> 
# ℹ 13 more rows
mineral_name <- read_key_minerals_sheet(range = "A7") |> 
  pull()
New names:
• `` -> `...1`
mineral_name
[1] "Constrained nickel supply"
mineral_info <- read_key_minerals_sheet(range = "A8:W18")
New names:
• `` -> `...1`
• `` -> `...2`
• `` -> `...3`
• `` -> `...4`
• `` -> `...5`
• `` -> `...6`
• `` -> `...7`
• `` -> `...8`
• `` -> `...9`
• `` -> `...10`
• `` -> `...11`
• `` -> `...12`
• `` -> `...13`
• `` -> `...14`
• `` -> `...15`
• `` -> `...16`
• `` -> `...17`
• `` -> `...18`
• `` -> `...19`
• `` -> `...20`
• `` -> `...21`
• `` -> `...22`
• `` -> `...23`
mineral_info
# A tibble: 11 × 23
   ...1         ...2 ...3     ...4    ...5    ...6    ...7    ...8    ...9 ...10
   <chr>       <dbl> <lgl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl> <lgl>
 1 Copper    3.81e+2 NA    617.    1.39e+3 1.74e+3 2.23e+3 2.42e+3 2.31e+3 NA   
 2 Cobalt    6.35e+1 NA     61.2   3.80e+1 2.91e+1 3.54e+1 4.09e+1 4.74e+1 NA   
 3 Graphite  5.57e+2 NA    936.    1.59e+3 1.78e+3 1.69e+3 1.49e+3 1.08e+3 NA   
 4 Lithium   6.96e+1 NA    123.    2.19e+2 3.07e+2 4.07e+2 4.50e+2 4.13e+2 NA   
 5 Manganese 7.45e+1 NA     53.9   1.56e+2 3.36e+2 5.33e+2 7.12e+2 8.58e+2 NA   
 6 Nickel    3.13e+2 NA    554.    5.13e+2 5.89e+2 6.30e+2 6.92e+2 6.88e+2 NA   
 7 Silicon   8.70e+0 NA     40.5   1.39e+2 2.24e+2 3.52e+2 3.74e+2 3.73e+2 NA   
 8 Neodymium 3.96e+0 NA      7.25  1.22e+1 1.51e+1 1.88e+1 2.18e+1 2.29e+1 NA   
 9 Dysprosi… 4.13e-1 NA      0.741 1.21e+0 1.48e+0 1.84e+0 2.13e+0 2.23e+0 NA   
10 Praseody… 5.94e-1 NA      1.09  1.83e+0 2.27e+0 2.83e+0 3.27e+0 3.43e+0 NA   
11 Terbium   8.06e-2 NA      0.148 2.52e-1 3.14e-1 3.91e-1 4.52e-1 4.74e-1 NA   
# ℹ 13 more variables: ...11 <dbl>, ...12 <dbl>, ...13 <dbl>, ...14 <dbl>,
#   ...15 <dbl>, ...16 <dbl>, ...17 <lgl>, ...18 <dbl>, ...19 <dbl>,
#   ...20 <dbl>, ...21 <dbl>, ...22 <dbl>, ...23 <dbl>
mineral_info_col_names <- names(mineral_info)

mineral_info_col_names
 [1] "...1"  "...2"  "...3"  "...4"  "...5"  "...6"  "...7"  "...8"  "...9" 
[10] "...10" "...11" "...12" "...13" "...14" "...15" "...16" "...17" "...18"
[19] "...19" "...20" "...21" "...22" "...23"
sheet_headers_and_col_names <- sheet_header_processed |> 
  add_column(mineral_info_col_names = mineral_info_col_names)

sheet_headers_and_col_names
# A tibble: 23 × 3
   scenario                 year  mineral_info_col_names
   <chr>                    <chr> <chr>                 
 1 Current Year             <NA>  ...1                  
 2 Current Year             2022  ...2                  
 3 Current Year             <NA>  ...3                  
 4 Stated policies scenario 2025  ...4                  
 5 Stated policies scenario 2030  ...5                  
 6 Stated policies scenario 2035  ...6                  
 7 Stated policies scenario 2040  ...7                  
 8 Stated policies scenario 2045  ...8                  
 9 Stated policies scenario 2050  ...9                  
10 Stated policies scenario <NA>  ...10                 
# ℹ 13 more rows
mineral_info_long <- mineral_info |> 
  rename(indicator = `...1`) |> 
  pivot_longer(cols = -indicator,
               names_to = "mineral_info_col_names") |> 
  add_column(mineral_name)

mineral_info_long
# A tibble: 242 × 4
   indicator mineral_info_col_names value mineral_name             
   <chr>     <chr>                  <dbl> <chr>                    
 1 Copper    ...2                    381. Constrained nickel supply
 2 Copper    ...3                     NA  Constrained nickel supply
 3 Copper    ...4                    617. Constrained nickel supply
 4 Copper    ...5                   1389. Constrained nickel supply
 5 Copper    ...6                   1736. Constrained nickel supply
 6 Copper    ...7                   2233. Constrained nickel supply
 7 Copper    ...8                   2418. Constrained nickel supply
 8 Copper    ...9                   2313. Constrained nickel supply
 9 Copper    ...10                    NA  Constrained nickel supply
10 Copper    ...11                   732. Constrained nickel supply
# ℹ 232 more rows
combined_data <- mineral_info_long |> 
  left_join(sheet_headers_and_col_names, by = join_by(mineral_info_col_names)) |> 
  # filter out what were empty columns (where years are NA) 
  filter(!is.na(year)) |> 
  # case_when is supercharged if else
  mutate(unit = case_when(
    indicator == "Share of clean technologies in total demand" ~ "Percent",
    .default = "kiloton"
    ),
    # convert the year column from character to numeric
    year = as.integer(year)
  ) |> 
  select(mineral_name, indicator, scenario, unit, year, value)

combined_data
# A tibble: 209 × 6
   mineral_name              indicator scenario                unit   year value
   <chr>                     <chr>     <chr>                   <chr> <int> <dbl>
 1 Constrained nickel supply Copper    Current Year            kilo…  2022  381.
 2 Constrained nickel supply Copper    Stated policies scenar… kilo…  2025  617.
 3 Constrained nickel supply Copper    Stated policies scenar… kilo…  2030 1389.
 4 Constrained nickel supply Copper    Stated policies scenar… kilo…  2035 1736.
 5 Constrained nickel supply Copper    Stated policies scenar… kilo…  2040 2233.
 6 Constrained nickel supply Copper    Stated policies scenar… kilo…  2045 2418.
 7 Constrained nickel supply Copper    Stated policies scenar… kilo…  2050 2313.
 8 Constrained nickel supply Copper    Announced pledges scen… kilo…  2025  732.
 9 Constrained nickel supply Copper    Announced pledges scen… kilo…  2030 2113.
10 Constrained nickel supply Copper    Announced pledges scen… kilo…  2035 3587.
# ℹ 199 more rows
mineral_name_range <- "A7"
mineral_info_range <- "A8:W18"


mineral_name <- read_key_minerals_sheet(range = mineral_name_range) |> 
  pull()
New names:
• `` -> `...1`
mineral_info <- read_key_minerals_sheet(range = mineral_info_range)
New names:
• `` -> `...1`
• `` -> `...2`
• `` -> `...3`
• `` -> `...4`
• `` -> `...5`
• `` -> `...6`
• `` -> `...7`
• `` -> `...8`
• `` -> `...9`
• `` -> `...10`
• `` -> `...11`
• `` -> `...12`
• `` -> `...13`
• `` -> `...14`
• `` -> `...15`
• `` -> `...16`
• `` -> `...17`
• `` -> `...18`
• `` -> `...19`
• `` -> `...20`
• `` -> `...21`
• `` -> `...22`
• `` -> `...23`
mineral_info_col_names <- names(mineral_info)

mineral_info_long <- mineral_info |> 
  rename(indicator = `...1`) |> 
  pivot_longer(cols = -indicator,
               names_to = "mineral_info_col_names") |> 
  add_column(mineral_name)

combined_data <- mineral_info_long |> 
  left_join(sheet_headers_and_col_names, by = join_by(mineral_info_col_names)) |> 
  # filter out what were empty columns (where years are NA) 
  filter(!is.na(year)) |> 
  # case_when is supercharged if else
  mutate(unit = case_when(
    indicator == "Share of clean technologies in total demand" ~ "Percent",
    .default = "kiloton"
    ),
    # convert the year column from character to numeric
    year = as.integer(year)
  ) |> 
  select(mineral_name, indicator, scenario, unit, year, value)

combined_data
# A tibble: 209 × 6
   mineral_name              indicator scenario                unit   year value
   <chr>                     <chr>     <chr>                   <chr> <int> <dbl>
 1 Constrained nickel supply Copper    Current Year            kilo…  2022  381.
 2 Constrained nickel supply Copper    Stated policies scenar… kilo…  2025  617.
 3 Constrained nickel supply Copper    Stated policies scenar… kilo…  2030 1389.
 4 Constrained nickel supply Copper    Stated policies scenar… kilo…  2035 1736.
 5 Constrained nickel supply Copper    Stated policies scenar… kilo…  2040 2233.
 6 Constrained nickel supply Copper    Stated policies scenar… kilo…  2045 2418.
 7 Constrained nickel supply Copper    Stated policies scenar… kilo…  2050 2313.
 8 Constrained nickel supply Copper    Announced pledges scen… kilo…  2025  732.
 9 Constrained nickel supply Copper    Announced pledges scen… kilo…  2030 2113.
10 Constrained nickel supply Copper    Announced pledges scen… kilo…  2035 3587.
# ℹ 199 more rows
read_iea_mineral_table <-
  function(mineral_name_range, mineral_info_range) {
    mineral_name <-
      read_key_minerals_sheet(range = mineral_name_range) |>
      pull()
    
    mineral_info <- read_key_minerals_sheet(range = mineral_info_range)
    
    mineral_info_col_names <- names(mineral_info)
    
    mineral_info_long <- mineral_info |>
      rename(indicator = `...1`) |>
      pivot_longer(cols = -indicator,
                   names_to = "mineral_info_col_names") |>
      add_column(mineral_name)
    
    combined_data <- mineral_info_long |>
      left_join(sheet_headers_and_col_names, by = join_by(mineral_info_col_names)) |>
      # filter out what were empty columns (where years are NA)
      filter(!is.na(year)) |>
      # case_when is supercharged if else
      mutate(
        unit = case_when(
          indicator == "Share of clean technologies in total demand" ~ "Percent",
          .default = "kiloton"
        ),
        # convert the year column from character to numeric
        year = as.integer(year)
      ) |>
      select(mineral_name, indicator, scenario, unit, year, value)
    
    combined_data
  }
Constrained_nickel_supply <- read_iea_mineral_table(
  mineral_name_range = "A7",
  mineral_info_range = "A8:W19"
)
New names:
New names:
• `` -> `...1`
Wider_use_of_silicon_rich_anodes <- read_iea_mineral_table(
  mineral_name_range = "A22",
  mineral_info_range = "A23:W34"
)
New names:
New names:
• `` -> `...1`
Faster_uptake_of_solid_state_batteries <- read_iea_mineral_table(
  mineral_name_range = "A37",
  mineral_info_range = "A38:W49"
)
New names:
New names:
• `` -> `...1`
Lower_battery_sizes <- read_iea_mineral_table(
  mineral_name_range = "A52",
  mineral_info_range = "A53:W64"
)
New names:
New names:
• `` -> `...1`
Limited_battery_size_reduction <- read_iea_mineral_table(
  mineral_name_range = "A67",
  mineral_info_range = "A68:W79"
)
New names:
New names:
• `` -> `...1`
STEPS_Base_case <- read_iea_mineral_table(
  mineral_name_range = "A82",
  mineral_info_range = "A83:W94"
)
New names:
New names:
• `` -> `...1`
final_iea_ev_table <- STEPS_Base_case |> 
  bind_rows(Constrained_nickel_supply) |> 
  bind_rows(Wider_use_of_silicon_rich_anodes) |>
  bind_rows(Faster_uptake_of_solid_state_batteries) |>
  bind_rows(Lower_battery_sizes) |> 
  bind_rows(Limited_battery_size_reduction) |> 
  bind_rows(STEPS_Base_case) 

final_iea_ev_table
# A tibble: 1,596 × 6
   mineral_name      indicator scenario                   unit     year value
   <chr>             <chr>     <chr>                      <chr>   <int> <dbl>
 1 STEPS - Base case Copper    Current Year               kiloton  2022  373.
 2 STEPS - Base case Copper    Stated policies scenario   kiloton  2025  580.
 3 STEPS - Base case Copper    Stated policies scenario   kiloton  2030 1044.
 4 STEPS - Base case Copper    Stated policies scenario   kiloton  2035 1219.
 5 STEPS - Base case Copper    Stated policies scenario   kiloton  2040 1421.
 6 STEPS - Base case Copper    Stated policies scenario   kiloton  2045 1458.
 7 STEPS - Base case Copper    Stated policies scenario   kiloton  2050 1346.
 8 STEPS - Base case Copper    Announced pledges scenario kiloton  2025  689.
 9 STEPS - Base case Copper    Announced pledges scenario kiloton  2030 1592.
10 STEPS - Base case Copper    Announced pledges scenario kiloton  2035 2570.
# ℹ 1,586 more rows
#Write the data
write_csv(final_iea_ev_table, "/Users/leiyuhan/Desktop/Sus_Finance/DataImport/data/EV.csv")
 read_csv("/Users/leiyuhan/Desktop/Sus_Finance/DataImport/data/EV.csv")
Rows: 1596 Columns: 6
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (4): mineral_name, indicator, scenario, unit
dbl (2): year, value

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# A tibble: 1,596 × 6
   mineral_name      indicator scenario                   unit     year value
   <chr>             <chr>     <chr>                      <chr>   <dbl> <dbl>
 1 STEPS - Base case Copper    Current Year               kiloton  2022  373.
 2 STEPS - Base case Copper    Stated policies scenario   kiloton  2025  580.
 3 STEPS - Base case Copper    Stated policies scenario   kiloton  2030 1044.
 4 STEPS - Base case Copper    Stated policies scenario   kiloton  2035 1219.
 5 STEPS - Base case Copper    Stated policies scenario   kiloton  2040 1421.
 6 STEPS - Base case Copper    Stated policies scenario   kiloton  2045 1458.
 7 STEPS - Base case Copper    Stated policies scenario   kiloton  2050 1346.
 8 STEPS - Base case Copper    Announced pledges scenario kiloton  2025  689.
 9 STEPS - Base case Copper    Announced pledges scenario kiloton  2030 1592.
10 STEPS - Base case Copper    Announced pledges scenario kiloton  2035 2570.
# ℹ 1,586 more rows
clean_data <- read_csv("/Users/leiyuhan/Desktop/Sus_Finance/DataImport/data/EV.csv")
Rows: 1596 Columns: 6
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (4): mineral_name, indicator, scenario, unit
dbl (2): year, value

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.

1. Trend of Copper and Cobalt Demand Over Time Under Different Scenarios

library(ggplot2)
library(dplyr)
library(readr)

ev_data <- read_csv("/Users/leiyuhan/Desktop/Sus_Finance/DataImport/data/EV.csv")
Rows: 1596 Columns: 6
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (4): mineral_name, indicator, scenario, unit
dbl (2): year, value

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
ggplot(ev_data %>% filter(indicator %in% c('Copper', 'Cobalt')), 
       aes(x = year, y = value, color = scenario, group = interaction(scenario, indicator))) +
  geom_line() +
  facet_wrap(~indicator, scales = "free_y") +
  labs(title = "Demand Trend for Copper and Cobalt Over Time", x = "Year", y = "Demand (kiloton)", color = "Scenario") +
  theme_minimal()

2. Comparison of Copper and Cobalt Demand in Key Years Under Different Scenarios

ev_selected_years <- ev_data %>% 
  filter(year %in% c(2025, 2030, 2035, 2040), indicator %in% c('Copper', 'Cobalt'))

ggplot(ev_selected_years, aes(x = factor(year), y = value, fill = scenario)) +
  geom_bar(stat = "identity", position = position_dodge()) +
  facet_wrap(~indicator) +
  labs(title = "Copper and Cobalt Demand Comparison in Key Years", x = "Year", y = "Demand (kiloton)", fill = "Scenario") +
  theme_minimal()
Warning: Removed 16 rows containing missing values or values outside the scale range
(`geom_bar()`).

3.Annual Demand Trend for Copper and Cobalt

library(ggplot2)
library(dplyr)
library(readr)

ev_data <- read_csv("/Users/leiyuhan/Desktop/Sus_Finance/DataImport/data/EV.csv")
Rows: 1596 Columns: 6
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (4): mineral_name, indicator, scenario, unit
dbl (2): year, value

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Annual Demand Trend
ggplot(ev_data %>% filter(indicator %in% c('Copper', 'Cobalt')), aes(x = year, y = value, group = indicator, color = indicator)) +
  geom_line() +
  labs(title = "Annual Demand Trend for Copper and Cobalt", x = "Year", y = "Demand (kiloton)") +
  theme_minimal()

4. Scatter Plot of Copper and Cobalt Demand with Scenario Highlight

ggplot(ev_data %>% filter(indicator %in% c('Copper', 'Cobalt')), aes(x = year, y = value, color = indicator, size = value)) +
  geom_point(alpha = 0.6) + # Adjust transparency with alpha
  facet_wrap(~scenario) +
  labs(title = "Scatter Plot of Copper and Cobalt Demand (Size Indicates Magnitude)", x = "Year", y = "Demand (kiloton)", color = "Mineral") +
  theme_minimal()
Warning: Removed 24 rows containing missing values or values outside the scale range
(`geom_point()`).

5.Box Plot: Annual Demand Variability for Copper and Cobalt

ev_data %>%
  filter(indicator %in% c('Copper', 'Cobalt')) %>%
  ggplot(aes(x = indicator, y = value, fill = indicator)) +
  geom_boxplot() +
  labs(title = "Annual Demand Variability for Copper and Cobalt", x = "Mineral", y = "Demand (kiloton)") +
  theme_minimal()
Warning: Removed 24 rows containing non-finite outside the scale range
(`stat_boxplot()`).