HW Data Import

Q1 Data Importing and Tidying

library(tidyverse) # pretty much always...
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.4
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.4.4     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.0
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl) # for reading excel files
# install.packages("here") # install the package if you haven't yet
library(here) # for using relative file paths
here() starts at /Users/ciel.wang/Desktop/JH/R

read excel:

setwd("/Users/ciel.wang/Desktop/JH/R/data-raw")
path_to_sheet <- here("data-raw", "CM_Data_Explorer.xlsx")
EV <- read_excel(path_to_sheet, sheet = "2.3 EV") 
New names:
• `` -> `...2`
• `` -> `...3`
• `` -> `...4`
• `` -> `...5`
• `` -> `...6`
• `` -> `...7`
• `` -> `...8`
• `` -> `...9`
• `` -> `...10`
• `` -> `...11`
• `` -> `...12`
• `` -> `...13`
• `` -> `...14`
• `` -> `...15`
• `` -> `...16`
• `` -> `...17`
• `` -> `...18`
• `` -> `...19`
• `` -> `...20`
• `` -> `...21`
• `` -> `...22`
• `` -> `...23`
read_EV_sheet <- partial(
  # provide the function, in this case readxl::read_excel()
  .f = read_excel,
  # provide any arguments you want filled in
  path = path_to_sheet,
  sheet = "2.3 EV",
  col_names = FALSE
)

Start with a simple case:

sheet_header <- read_EV_sheet(range = "A4:W5")
New names:
• `` -> `...1`
• `` -> `...2`
• `` -> `...3`
• `` -> `...4`
• `` -> `...5`
• `` -> `...6`
• `` -> `...7`
• `` -> `...8`
• `` -> `...9`
• `` -> `...10`
• `` -> `...11`
• `` -> `...12`
• `` -> `...13`
• `` -> `...14`
• `` -> `...15`
• `` -> `...16`
• `` -> `...17`
• `` -> `...18`
• `` -> `...19`
• `` -> `...20`
• `` -> `...21`
• `` -> `...22`
• `` -> `...23`
sheet_header
# A tibble: 2 × 23
  ...1   ...2 ...3  ...4    ...5  ...6  ...7  ...8  ...9 ...10 ...11 ...12 ...13
  <lgl> <dbl> <lgl> <chr>  <dbl> <dbl> <dbl> <dbl> <dbl> <lgl> <chr> <dbl> <dbl>
1 NA       NA NA    State…    NA    NA    NA    NA    NA NA    Anno…    NA    NA
2 NA     2022 NA    2025    2030  2035  2040  2045  2050 NA    2025   2030  2035
# ℹ 10 more variables: ...14 <dbl>, ...15 <dbl>, ...16 <dbl>, ...17 <lgl>,
#   ...18 <chr>, ...19 <dbl>, ...20 <dbl>, ...21 <dbl>, ...22 <dbl>,
#   ...23 <dbl>
sheet_header_processed <- sheet_header |> 
  # transpose the data
  t() |>
  # turn it back into a tibble
  as_tibble() |>
  # make them meaningful
  rename(scenario = V1, year = V2) |>
  # fill scenario down
  fill(scenario) |>
  #insert "Current" at top
  replace_na(list(scenario = "Current Year"))
Warning: The `x` argument of `as_tibble.matrix()` must have unique column names if
`.name_repair` is omitted as of tibble 2.0.0.
ℹ Using compatibility `.name_repair`.
sheet_header_processed
# A tibble: 23 × 2
   scenario                 year 
   <chr>                    <chr>
 1 Current Year             <NA> 
 2 Current Year             2022 
 3 Current Year             <NA> 
 4 Stated policies scenario 2025 
 5 Stated policies scenario 2030 
 6 Stated policies scenario 2035 
 7 Stated policies scenario 2040 
 8 Stated policies scenario 2045 
 9 Stated policies scenario 2050 
10 Stated policies scenario <NA> 
# ℹ 13 more rows
market_trend <- read_EV_sheet(range = "A7") |> 
  pull()
New names:
• `` -> `...1`
market_trend
[1] "Constrained nickel supply"
market_implication_info <- read_EV_sheet(range = "A8:W9") 
New names:
• `` -> `...1`
• `` -> `...2`
• `` -> `...3`
• `` -> `...4`
• `` -> `...5`
• `` -> `...6`
• `` -> `...7`
• `` -> `...8`
• `` -> `...9`
• `` -> `...10`
• `` -> `...11`
• `` -> `...12`
• `` -> `...13`
• `` -> `...14`
• `` -> `...15`
• `` -> `...16`
• `` -> `...17`
• `` -> `...18`
• `` -> `...19`
• `` -> `...20`
• `` -> `...21`
• `` -> `...22`
• `` -> `...23`
market_implication_info
# A tibble: 2 × 23
  ...1    ...2 ...3   ...4   ...5   ...6   ...7   ...8   ...9 ...10 ...11  ...12
  <chr>  <dbl> <lgl> <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl> <lgl> <dbl>  <dbl>
1 Copper 381.  NA    617.  1389.  1736.  2233.  2418.  2313.  NA    732.  2113. 
2 Cobalt  63.5 NA     61.2   38.0   29.1   35.4   40.9   47.4 NA     71.2   58.4
# ℹ 11 more variables: ...13 <dbl>, ...14 <dbl>, ...15 <dbl>, ...16 <dbl>,
#   ...17 <lgl>, ...18 <dbl>, ...19 <dbl>, ...20 <dbl>, ...21 <dbl>,
#   ...22 <dbl>, ...23 <dbl>
#put them together
info_col_names <- names(market_implication_info)
info_col_names
 [1] "...1"  "...2"  "...3"  "...4"  "...5"  "...6"  "...7"  "...8"  "...9" 
[10] "...10" "...11" "...12" "...13" "...14" "...15" "...16" "...17" "...18"
[19] "...19" "...20" "...21" "...22" "...23"
sheet_headers_and_col_names <- sheet_header_processed |> 
  add_column(info_col_names = info_col_names)
sheet_headers_and_col_names
# A tibble: 23 × 3
   scenario                 year  info_col_names
   <chr>                    <chr> <chr>         
 1 Current Year             <NA>  ...1          
 2 Current Year             2022  ...2          
 3 Current Year             <NA>  ...3          
 4 Stated policies scenario 2025  ...4          
 5 Stated policies scenario 2030  ...5          
 6 Stated policies scenario 2035  ...6          
 7 Stated policies scenario 2040  ...7          
 8 Stated policies scenario 2045  ...8          
 9 Stated policies scenario 2050  ...9          
10 Stated policies scenario <NA>  ...10         
# ℹ 13 more rows
implication_info_long <- market_implication_info |> 
  rename(mineral = `...1`) |> 
  pivot_longer(cols = -mineral,
               names_to = "info_col_names") |> 
  add_column(market_trend)
implication_info_long
# A tibble: 44 × 4
   mineral info_col_names value market_trend             
   <chr>   <chr>          <dbl> <chr>                    
 1 Copper  ...2            381. Constrained nickel supply
 2 Copper  ...3             NA  Constrained nickel supply
 3 Copper  ...4            617. Constrained nickel supply
 4 Copper  ...5           1389. Constrained nickel supply
 5 Copper  ...6           1736. Constrained nickel supply
 6 Copper  ...7           2233. Constrained nickel supply
 7 Copper  ...8           2418. Constrained nickel supply
 8 Copper  ...9           2313. Constrained nickel supply
 9 Copper  ...10            NA  Constrained nickel supply
10 Copper  ...11           732. Constrained nickel supply
# ℹ 34 more rows
#combine the tidied header with the tidied table
combined_data <- implication_info_long |>
  left_join(sheet_headers_and_col_names, by = join_by(info_col_names)) |>
  filter(!is.na(year)) |>
  mutate(year = as.integer(year),
         unit='kilotone') |>
  select(market_trend, mineral, scenario, year, value, unit)
combined_data
# A tibble: 38 × 6
   market_trend              mineral scenario                   year value unit 
   <chr>                     <chr>   <chr>                     <int> <dbl> <chr>
 1 Constrained nickel supply Copper  Current Year               2022  381. kilo…
 2 Constrained nickel supply Copper  Stated policies scenario   2025  617. kilo…
 3 Constrained nickel supply Copper  Stated policies scenario   2030 1389. kilo…
 4 Constrained nickel supply Copper  Stated policies scenario   2035 1736. kilo…
 5 Constrained nickel supply Copper  Stated policies scenario   2040 2233. kilo…
 6 Constrained nickel supply Copper  Stated policies scenario   2045 2418. kilo…
 7 Constrained nickel supply Copper  Stated policies scenario   2050 2313. kilo…
 8 Constrained nickel supply Copper  Announced pledges scenar…  2025  732. kilo…
 9 Constrained nickel supply Copper  Announced pledges scenar…  2030 2113. kilo…
10 Constrained nickel supply Copper  Announced pledges scenar…  2035 3587. kilo…
# ℹ 28 more rows

Now, write replicatable functions for all kinds of market development:

#In the first case:
market_trend_range <- "A7"
market_implication_info_range <- "A8:W9"



market_trend <- read_EV_sheet(range = market_trend_range) |> 
  pull()
New names:
• `` -> `...1`
market_implication_info <- read_EV_sheet(range = market_implication_info_range) 
New names:
• `` -> `...1`
• `` -> `...2`
• `` -> `...3`
• `` -> `...4`
• `` -> `...5`
• `` -> `...6`
• `` -> `...7`
• `` -> `...8`
• `` -> `...9`
• `` -> `...10`
• `` -> `...11`
• `` -> `...12`
• `` -> `...13`
• `` -> `...14`
• `` -> `...15`
• `` -> `...16`
• `` -> `...17`
• `` -> `...18`
• `` -> `...19`
• `` -> `...20`
• `` -> `...21`
• `` -> `...22`
• `` -> `...23`
info_col_names <- names(market_implication_info)

implication_info_long <- market_implication_info |> 
  rename(mineral = `...1`) |> 
  pivot_longer(cols = -mineral,
               names_to = "info_col_names") |> 
  add_column(market_trend)

combined_data <- implication_info_long |>
  left_join(sheet_headers_and_col_names, by = join_by(info_col_names)) |>
  filter(!is.na(year)) |>
  mutate(year = as.integer(year),
         unit='kilotone') |>
  select(market_trend, mineral, scenario, year, value, unit)
combined_data
# A tibble: 38 × 6
   market_trend              mineral scenario                   year value unit 
   <chr>                     <chr>   <chr>                     <int> <dbl> <chr>
 1 Constrained nickel supply Copper  Current Year               2022  381. kilo…
 2 Constrained nickel supply Copper  Stated policies scenario   2025  617. kilo…
 3 Constrained nickel supply Copper  Stated policies scenario   2030 1389. kilo…
 4 Constrained nickel supply Copper  Stated policies scenario   2035 1736. kilo…
 5 Constrained nickel supply Copper  Stated policies scenario   2040 2233. kilo…
 6 Constrained nickel supply Copper  Stated policies scenario   2045 2418. kilo…
 7 Constrained nickel supply Copper  Stated policies scenario   2050 2313. kilo…
 8 Constrained nickel supply Copper  Announced pledges scenar…  2025  732. kilo…
 9 Constrained nickel supply Copper  Announced pledges scenar…  2030 2113. kilo…
10 Constrained nickel supply Copper  Announced pledges scenar…  2035 3587. kilo…
# ℹ 28 more rows
#for all market trends: 
read_EV_table <-
  function(market_trend_range, market_implication_info_range) {
    market_trend <- read_EV_sheet(range = market_trend_range) |> 
      pull()
    
    market_implication_info <- read_EV_sheet(range = market_implication_info_range) 
    
    info_col_names <- names(market_implication_info)
    
    implication_info_long <- market_implication_info |> 
      rename(mineral = `...1`) |> 
      pivot_longer(cols = -mineral,
                   names_to = "info_col_names") |> 
      add_column(market_trend)
    
    combined_data <- implication_info_long |>
      left_join(sheet_headers_and_col_names, by = join_by(info_col_names)) |>
      filter(!is.na(year)) |>
      mutate(year = as.integer(year),
             unit='kilotone') |>
      select(market_trend, mineral, scenario, year, value, unit)
    combined_data
  }


#read in all of the tables at the same time and bind them together as one dataset with just a few lines of code.

nickel_supply_table <- read_EV_table(
  market_trend_range = "A7",
  market_implication_info_range = "A8:W9"
)
New names:
New names:
• `` -> `...1`
anodes_use_table <- read_EV_table(
  market_trend_range = "A22",
  market_implication_info_range = "A23:W24"
)
New names:
New names:
• `` -> `...1`
battery_upstake_table <- read_EV_table(
  market_trend_range = "A37",
  market_implication_info_range = "A38:W39"
)
New names:
New names:
• `` -> `...1`
battery_size <- read_EV_table(
  market_trend_range = "A52",
  market_implication_info_range = "A53:W54"
)
New names:
New names:
• `` -> `...1`
battery_size_reduction <- read_EV_table(
  market_trend_range = "A67",
  market_implication_info_range = "A68:W69"
)
New names:
New names:
• `` -> `...1`
base_case <- read_EV_table(
  market_trend_range = "A82",
  market_implication_info_range = "A83:W84"
)
New names:
New names:
• `` -> `...1`
final_EV_table <- nickel_supply_table |> 
  bind_rows(anodes_use_table) |> 
  bind_rows(battery_upstake_table) |>
  bind_rows(battery_size) |>
  bind_rows(battery_size_reduction) |>
  bind_rows(base_case)

final_EV_table
# A tibble: 228 × 6
   market_trend              mineral scenario                   year value unit 
   <chr>                     <chr>   <chr>                     <int> <dbl> <chr>
 1 Constrained nickel supply Copper  Current Year               2022  381. kilo…
 2 Constrained nickel supply Copper  Stated policies scenario   2025  617. kilo…
 3 Constrained nickel supply Copper  Stated policies scenario   2030 1389. kilo…
 4 Constrained nickel supply Copper  Stated policies scenario   2035 1736. kilo…
 5 Constrained nickel supply Copper  Stated policies scenario   2040 2233. kilo…
 6 Constrained nickel supply Copper  Stated policies scenario   2045 2418. kilo…
 7 Constrained nickel supply Copper  Stated policies scenario   2050 2313. kilo…
 8 Constrained nickel supply Copper  Announced pledges scenar…  2025  732. kilo…
 9 Constrained nickel supply Copper  Announced pledges scenar…  2030 2113. kilo…
10 Constrained nickel supply Copper  Announced pledges scenar…  2035 3587. kilo…
# ℹ 218 more rows

Q2 Data Visualization

#visualization1
#general demand trend under each policy scenarios
ggplot(final_EV_table,  aes(
  x = year, y = value, color = scenario, group = scenario)) +
  geom_point() +
  facet_wrap(~mineral, scales = 'free_y') +
  labs(title = "Mineral Demand Trends for EVs", 
       y = "Demand (kt)", 
       x = "Year", 
       color = "Scenario") +
  theme_minimal()
Warning: Removed 24 rows containing missing values (`geom_point()`).

#visualization 2
#Overall demand trend under each market advancement
ggplot(final_EV_table, aes(x = year, y = value, fill = market_trend)) +
  geom_bar(stat = "identity", position = position_dodge(width = 0.7)) +
  facet_wrap(~mineral, scales = 'free_y') +
  labs(title = "Impact of Technology on Copper and Cobalt Demand",
       subtitle = "Overall Demand under different technological advancements",
       y = "Demand (kt)",
       x = "Year") +
  scale_fill_brewer(palette = "Set2") + 
  theme_minimal() +
  theme(legend.position = "bottom")
Warning: Removed 24 rows containing missing values (`geom_bar()`).

#3
ggplot(final_EV_table, aes(x = market_trend, y = scenario, fill = value)) +
  geom_tile() +
  facet_wrap(~mineral, scales = 'free_y') +
  scale_fill_gradient(low = "blue", high = "red") +
  labs(title = "Demand Heatmap for Copper and Cobalt",
       subtitle = "Interplay between policy and technological scenarios",
       y = "Policy Scenario",
       x = "Market Trend") +
  theme_minimal()

#4 Compare the demand trend for copper and cobalt using grouped bar chart
  ggplot(final_EV_table, aes(x = factor(year), y = value, fill = mineral)) +
    geom_bar(stat = "identity", position = position_dodge()) +
    scale_fill_manual(values = c("Copper" = "orange", "Cobalt" = "blue")) +
    labs(title = "Comparative Demand for Copper and Cobalt",
         y = "Demand (kt)",
         x = "Year",
         fill = "Mineral") +
    theme_minimal()
Warning: Removed 24 rows containing missing values (`geom_bar()`).

#5 Compare the disribution of demand trend with lines
  ggplot() +
    geom_line(data = final_EV_table |>
                filter(mineral == "Copper"),
              aes(x = market_trend, y = value, color = "Copper",group=scenario)) +
    geom_line(data = final_EV_table |>
                filter(mineral == "Cobalt"),
              aes(x = market_trend, y = value, color = "Cobalt", group=scenario)) +
    scale_y_continuous(name = "Copper Demand (kt)",
                       sec.axis = sec_axis(~ . / 10, name = "Cobalt Demand (kt)")) + 
    scale_color_manual(values = c("Copper" = "orange", "Cobalt" = "blue")) +
    labs(title = "Copper vs Cobalt Demand Over Time",
         x = "Year") +
    theme_minimal()