data_importing_HW

library(tidyverse) # pretty much always...
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.4.4     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl) # for reading excel files
library(here) # for using relative file paths
here() starts at /Users/xingyuning/Desktop/susfin/IEA
path_to_sheet <- here("data-raw", "CM_Data_Explorer.xlsx")

path_to_sheet
[1] "/Users/xingyuning/Desktop/susfin/IEA/data-raw/CM_Data_Explorer.xlsx"
read_minerals_demand_forEV_sheet <- partial(.f = read_excel, 
                                   path = path_to_sheet, 
                                   sheet = "2.3 EV",
                                   col_names = FALSE)

sheet_header_EV <- read_minerals_demand_forEV_sheet(range = "A4:W5")
New names:
• `` -> `...1`
• `` -> `...2`
• `` -> `...3`
• `` -> `...4`
• `` -> `...5`
• `` -> `...6`
• `` -> `...7`
• `` -> `...8`
• `` -> `...9`
• `` -> `...10`
• `` -> `...11`
• `` -> `...12`
• `` -> `...13`
• `` -> `...14`
• `` -> `...15`
• `` -> `...16`
• `` -> `...17`
• `` -> `...18`
• `` -> `...19`
• `` -> `...20`
• `` -> `...21`
• `` -> `...22`
• `` -> `...23`
sheet_header_EV
# A tibble: 2 × 23
  ...1   ...2 ...3  ...4    ...5  ...6  ...7  ...8  ...9 ...10 ...11 ...12 ...13
  <lgl> <dbl> <lgl> <chr>  <dbl> <dbl> <dbl> <dbl> <dbl> <lgl> <chr> <dbl> <dbl>
1 NA       NA NA    State…    NA    NA    NA    NA    NA NA    Anno…    NA    NA
2 NA     2022 NA    2025    2030  2035  2040  2045  2050 NA    2025   2030  2035
# ℹ 10 more variables: ...14 <dbl>, ...15 <dbl>, ...16 <dbl>, ...17 <lgl>,
#   ...18 <chr>, ...19 <dbl>, ...20 <dbl>, ...21 <dbl>, ...22 <dbl>,
#   ...23 <dbl>
#tidying the headers
# transpose the data from two rows into two columns using t()
sheet_header_processed_EV <- sheet_header_EV |> 
  t() |> 
  as_tibble() |> 
  rename(senario = V1, year = V2) |> 
  fill(senario) |> 
  replace_na(list(senario = "Current Year"))
Warning: The `x` argument of `as_tibble.matrix()` must have unique column names if
`.name_repair` is omitted as of tibble 2.0.0.
ℹ Using compatibility `.name_repair`.
sheet_header_processed_EV
# A tibble: 23 × 2
   senario                  year 
   <chr>                    <chr>
 1 Current Year             <NA> 
 2 Current Year             2022 
 3 Current Year             <NA> 
 4 Stated policies scenario 2025 
 5 Stated policies scenario 2030 
 6 Stated policies scenario 2035 
 7 Stated policies scenario 2040 
 8 Stated policies scenario 2045 
 9 Stated policies scenario 2050 
10 Stated policies scenario <NA> 
# ℹ 13 more rows
demand_name <- read_minerals_demand_forEV_sheet(range = "A7") |> 
  pull()
New names:
• `` -> `...1`
demand_name
[1] "Constrained nickel supply"
demand_mineral_info <-read_minerals_demand_forEV_sheet(range = "A8:W19")
New names:
• `` -> `...1`
• `` -> `...2`
• `` -> `...3`
• `` -> `...4`
• `` -> `...5`
• `` -> `...6`
• `` -> `...7`
• `` -> `...8`
• `` -> `...9`
• `` -> `...10`
• `` -> `...11`
• `` -> `...12`
• `` -> `...13`
• `` -> `...14`
• `` -> `...15`
• `` -> `...16`
• `` -> `...17`
• `` -> `...18`
• `` -> `...19`
• `` -> `...20`
• `` -> `...21`
• `` -> `...22`
• `` -> `...23`
demand_mineral_info
# A tibble: 12 × 23
   ...1         ...2 ...3     ...4    ...5    ...6    ...7    ...8    ...9 ...10
   <chr>       <dbl> <lgl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl> <lgl>
 1 Copper    3.81e+2 NA    6.17e+2 1.39e+3 1.74e+3 2.23e+3 2.42e+3 2.31e+3 NA   
 2 Cobalt    6.35e+1 NA    6.12e+1 3.80e+1 2.91e+1 3.54e+1 4.09e+1 4.74e+1 NA   
 3 Graphite  5.57e+2 NA    9.36e+2 1.59e+3 1.78e+3 1.69e+3 1.49e+3 1.08e+3 NA   
 4 Lithium   6.96e+1 NA    1.23e+2 2.19e+2 3.07e+2 4.07e+2 4.50e+2 4.13e+2 NA   
 5 Manganese 7.45e+1 NA    5.39e+1 1.56e+2 3.36e+2 5.33e+2 7.12e+2 8.58e+2 NA   
 6 Nickel    3.13e+2 NA    5.54e+2 5.13e+2 5.89e+2 6.30e+2 6.92e+2 6.88e+2 NA   
 7 Silicon   8.70e+0 NA    4.05e+1 1.39e+2 2.24e+2 3.52e+2 3.74e+2 3.73e+2 NA   
 8 Neodymium 3.96e+0 NA    7.25e+0 1.22e+1 1.51e+1 1.88e+1 2.18e+1 2.29e+1 NA   
 9 Dysprosi… 4.13e-1 NA    7.41e-1 1.21e+0 1.48e+0 1.84e+0 2.13e+0 2.23e+0 NA   
10 Praseody… 5.94e-1 NA    1.09e+0 1.83e+0 2.27e+0 2.83e+0 3.27e+0 3.43e+0 NA   
11 Terbium   8.06e-2 NA    1.48e-1 2.52e-1 3.14e-1 3.91e-1 4.52e-1 4.74e-1 NA   
12 Total EV  1.47e+3 NA    2.39e+3 4.06e+3 5.02e+3 5.91e+3 6.21e+3 5.80e+3 NA   
# ℹ 13 more variables: ...11 <dbl>, ...12 <dbl>, ...13 <dbl>, ...14 <dbl>,
#   ...15 <dbl>, ...16 <dbl>, ...17 <lgl>, ...18 <dbl>, ...19 <dbl>,
#   ...20 <dbl>, ...21 <dbl>, ...22 <dbl>, ...23 <dbl>
demand_mineral_info_col_names <- names(demand_mineral_info)
demand_mineral_info_col_names
 [1] "...1"  "...2"  "...3"  "...4"  "...5"  "...6"  "...7"  "...8"  "...9" 
[10] "...10" "...11" "...12" "...13" "...14" "...15" "...16" "...17" "...18"
[19] "...19" "...20" "...21" "...22" "...23"
 sheet_headers_and_col_names_EV <-sheet_header_processed_EV |> 
  add_column(demand_mineral_info_col_names = demand_mineral_info_col_names)
 sheet_headers_and_col_names_EV
# A tibble: 23 × 3
   senario                  year  demand_mineral_info_col_names
   <chr>                    <chr> <chr>                        
 1 Current Year             <NA>  ...1                         
 2 Current Year             2022  ...2                         
 3 Current Year             <NA>  ...3                         
 4 Stated policies scenario 2025  ...4                         
 5 Stated policies scenario 2030  ...5                         
 6 Stated policies scenario 2035  ...6                         
 7 Stated policies scenario 2040  ...7                         
 8 Stated policies scenario 2045  ...8                         
 9 Stated policies scenario 2050  ...9                         
10 Stated policies scenario <NA>  ...10                        
# ℹ 13 more rows
demand_mineral_info_long <- demand_mineral_info |> 
  rename(mineral = `...1`) |> 
  pivot_longer(cols = -mineral,
               names_to = "demand_mineral_info_col_names") |> 
    add_column(demand_name)

demand_mineral_info_long
# A tibble: 264 × 4
   mineral demand_mineral_info_col_names value demand_name              
   <chr>   <chr>                         <dbl> <chr>                    
 1 Copper  ...2                           381. Constrained nickel supply
 2 Copper  ...3                            NA  Constrained nickel supply
 3 Copper  ...4                           617. Constrained nickel supply
 4 Copper  ...5                          1389. Constrained nickel supply
 5 Copper  ...6                          1736. Constrained nickel supply
 6 Copper  ...7                          2233. Constrained nickel supply
 7 Copper  ...8                          2418. Constrained nickel supply
 8 Copper  ...9                          2313. Constrained nickel supply
 9 Copper  ...10                           NA  Constrained nickel supply
10 Copper  ...11                          732. Constrained nickel supply
# ℹ 254 more rows
combined_data_EV <- demand_mineral_info_long |> 
  left_join(sheet_headers_and_col_names_EV, by = join_by(demand_mineral_info_col_names)) |> 
  filter(!is.na(year)) |> 
  mutate(
    unit = "kiloton",
    year = as.integer(year)
  ) |> 
  select(demand_name, mineral, senario, unit, year, value)

combined_data_EV
# A tibble: 228 × 6
   demand_name               mineral senario                   unit   year value
   <chr>                     <chr>   <chr>                     <chr> <int> <dbl>
 1 Constrained nickel supply Copper  Current Year              kilo…  2022  381.
 2 Constrained nickel supply Copper  Stated policies scenario  kilo…  2025  617.
 3 Constrained nickel supply Copper  Stated policies scenario  kilo…  2030 1389.
 4 Constrained nickel supply Copper  Stated policies scenario  kilo…  2035 1736.
 5 Constrained nickel supply Copper  Stated policies scenario  kilo…  2040 2233.
 6 Constrained nickel supply Copper  Stated policies scenario  kilo…  2045 2418.
 7 Constrained nickel supply Copper  Stated policies scenario  kilo…  2050 2313.
 8 Constrained nickel supply Copper  Announced pledges scenar… kilo…  2025  732.
 9 Constrained nickel supply Copper  Announced pledges scenar… kilo…  2030 2113.
10 Constrained nickel supply Copper  Announced pledges scenar… kilo…  2035 3587.
# ℹ 218 more rows
demand_name_range <- "A7"
demand_info_range <- "A8:W19"

demand_name <- read_minerals_demand_forEV_sheet (range = demand_name_range) |> 
pull()
New names:
• `` -> `...1`
demand_info <- read_minerals_demand_forEV_sheet(range = demand_info_range)
New names:
• `` -> `...1`
• `` -> `...2`
• `` -> `...3`
• `` -> `...4`
• `` -> `...5`
• `` -> `...6`
• `` -> `...7`
• `` -> `...8`
• `` -> `...9`
• `` -> `...10`
• `` -> `...11`
• `` -> `...12`
• `` -> `...13`
• `` -> `...14`
• `` -> `...15`
• `` -> `...16`
• `` -> `...17`
• `` -> `...18`
• `` -> `...19`
• `` -> `...20`
• `` -> `...21`
• `` -> `...22`
• `` -> `...23`
demand_mineral_info_col_names <-names(demand_info)

demand_info_long <- demand_info |> 
  rename(mineral = '...1') |> 
  pivot_longer(cols = - mineral,
               names_to = "demand_mineral_info_col_names") |> 
  add_column(demand_name)

combined_data_EV<- demand_mineral_info_long |> 
  left_join(sheet_headers_and_col_names_EV, by = join_by(demand_mineral_info_col_names)) |> 
  filter(!is.na(year)) |> 
  mutate(unit = "kiloton", year = as.integer(year)) |> 
  select(demand_name, mineral,senario,unit,year,value)

combined_data_EV
# A tibble: 228 × 6
   demand_name               mineral senario                   unit   year value
   <chr>                     <chr>   <chr>                     <chr> <int> <dbl>
 1 Constrained nickel supply Copper  Current Year              kilo…  2022  381.
 2 Constrained nickel supply Copper  Stated policies scenario  kilo…  2025  617.
 3 Constrained nickel supply Copper  Stated policies scenario  kilo…  2030 1389.
 4 Constrained nickel supply Copper  Stated policies scenario  kilo…  2035 1736.
 5 Constrained nickel supply Copper  Stated policies scenario  kilo…  2040 2233.
 6 Constrained nickel supply Copper  Stated policies scenario  kilo…  2045 2418.
 7 Constrained nickel supply Copper  Stated policies scenario  kilo…  2050 2313.
 8 Constrained nickel supply Copper  Announced pledges scenar… kilo…  2025  732.
 9 Constrained nickel supply Copper  Announced pledges scenar… kilo…  2030 2113.
10 Constrained nickel supply Copper  Announced pledges scenar… kilo…  2035 3587.
# ℹ 218 more rows
read_iea_EV_table <-
  function(demand_name_range, demand_info_range) {
    demand_name <-
      read_minerals_demand_forEV_sheet(range = demand_name_range) |>
      pull()
    
    demand_info <- read_minerals_demand_forEV_sheet(range = demand_info_range)
    
    demand_mineral_info_col_names <- names(demand_info)
    
    demand_mineral_info_long <- demand_mineral_info |>
      rename(mineral = `...1`) |>
      pivot_longer(cols = -mineral,
                   names_to = "demand_mineral_info_col_names") |>
      add_column(demand_name)
    
    combined_data_EV <- demand_mineral_info_long |>
      left_join(sheet_headers_and_col_names_EV, by = join_by(demand_mineral_info_col_names)) |>
      # filter out what were empty columns (where years are NA)
      filter(!is.na(year)) |>
      # case_when is supercharged if else
      mutate(
        unit = "kiloton",
        # convert the year column from character to numeric
        year = as.integer(year)
      ) |>
      select(demand_name, mineral, senario, unit, year, value)
    
    combined_data_EV
  }
Constrained_nickel_supply_table <- read_iea_EV_table (
  demand_name_range = "A7",
  demand_info_range = "A8:W19"
)
New names:
New names:
• `` -> `...1`
Wider_use_of_silicon_rich_anodes_table <- read_iea_EV_table(
  demand_name_range = "A22",
  demand_info_range = "A23:W34"
)
New names:
New names:
• `` -> `...1`
Faster_uptake_of_solid_state_batteries_table <- read_iea_EV_table(
  demand_name_range = "A37",
  demand_info_range = "A38:W49"
)
New names:
New names:
• `` -> `...1`
low_batteries_size_table <- read_iea_EV_table(
  demand_name_range = "A52",
  demand_info_range = "A53:W64"
)
New names:
New names:
• `` -> `...1`
Limited_battery_size_reduction_table <- read_iea_EV_table(
  demand_name_range = "A67",
  demand_info_range = "A68:W79"
)
New names:
New names:
• `` -> `...1`
STEPS_Base_case_table <- read_iea_EV_table(
  demand_name_range = "A82",
  demand_info_range = "A83:W94"
)
New names:
New names:
• `` -> `...1`
final_EV_demand_table <-Constrained_nickel_supply_table  |> 
  bind_rows(Wider_use_of_silicon_rich_anodes_table) |> 
  bind_rows(Faster_uptake_of_solid_state_batteries_table) |>
  bind_rows(low_batteries_size_table) |>
  bind_rows(Limited_battery_size_reduction_table) |> 
  bind_rows(STEPS_Base_case_table)

final_EV_demand_table
# A tibble: 1,368 × 6
   demand_name               mineral senario                   unit   year value
   <chr>                     <chr>   <chr>                     <chr> <int> <dbl>
 1 Constrained nickel supply Copper  Current Year              kilo…  2022  381.
 2 Constrained nickel supply Copper  Stated policies scenario  kilo…  2025  617.
 3 Constrained nickel supply Copper  Stated policies scenario  kilo…  2030 1389.
 4 Constrained nickel supply Copper  Stated policies scenario  kilo…  2035 1736.
 5 Constrained nickel supply Copper  Stated policies scenario  kilo…  2040 2233.
 6 Constrained nickel supply Copper  Stated policies scenario  kilo…  2045 2418.
 7 Constrained nickel supply Copper  Stated policies scenario  kilo…  2050 2313.
 8 Constrained nickel supply Copper  Announced pledges scenar… kilo…  2025  732.
 9 Constrained nickel supply Copper  Announced pledges scenar… kilo…  2030 2113.
10 Constrained nickel supply Copper  Announced pledges scenar… kilo…  2035 3587.
# ℹ 1,358 more rows
write_csv(final_EV_demand_table,here("data", "iea_EV_demand_for_critical_minerals.csv"))
our_cleaned_data_EV <- here("data", "iea_EV_demand_for_critical_minerals.csv") |> 
  read_csv()
Rows: 1368 Columns: 6
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (4): demand_name, mineral, senario, unit
dbl (2): year, value

ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
our_cleaned_data_EV
# A tibble: 1,368 × 6
   demand_name               mineral senario                   unit   year value
   <chr>                     <chr>   <chr>                     <chr> <dbl> <dbl>
 1 Constrained nickel supply Copper  Current Year              kilo…  2022  381.
 2 Constrained nickel supply Copper  Stated policies scenario  kilo…  2025  617.
 3 Constrained nickel supply Copper  Stated policies scenario  kilo…  2030 1389.
 4 Constrained nickel supply Copper  Stated policies scenario  kilo…  2035 1736.
 5 Constrained nickel supply Copper  Stated policies scenario  kilo…  2040 2233.
 6 Constrained nickel supply Copper  Stated policies scenario  kilo…  2045 2418.
 7 Constrained nickel supply Copper  Stated policies scenario  kilo…  2050 2313.
 8 Constrained nickel supply Copper  Announced pledges scenar… kilo…  2025  732.
 9 Constrained nickel supply Copper  Announced pledges scenar… kilo…  2030 2113.
10 Constrained nickel supply Copper  Announced pledges scenar… kilo…  2035 3587.
# ℹ 1,358 more rows

Problem2

trend_of_demand <- read.csv("/Users/xingyuning/Desktop/susfin/IEA/data/iea_EV_demand_for_critical_minerals.csv")
copper_cobalt_demand_trend <-trend_of_demand |> filter(mineral %in% c("Copper","Cobalt"))

ggplot(copper_cobalt_demand_trend, aes(x = year, y = value, color = mineral, linetype = senario)) +
  geom_line() +
  labs(title = "Demand Trend for Copper and Cobalt Over Time",
       x = "Year",
       y = "Demand (kiloton)",
       color = "Mineral",
       linetype = "Scenario") +
  theme_minimal()

Insight: This visualization can show the overall growth in demand for copper and cobalt. An upward trend would indicate increasing demand of copper in 2025-2040 under announced pledges scenario, net zero emissions by 2050 scenario and stated policies scenario. But the demand of cobalt would be basically on flat.

comparision_of_demand <- read.csv("/Users/xingyuning/Desktop/susfin/IEA/data/iea_EV_demand_for_critical_minerals.csv") 
  comparision_of_demand_1 <- comparision_of_demand |> filter(year %in% c(2025, 2030, 2035, 2040)) |> 
    filter(mineral != "Total EV")
  
  ggplot(comparision_of_demand_1, aes(x = factor(year), y = value, fill = senario)) +
  geom_bar(stat = "identity", position = "dodge") +
  facet_wrap(~mineral) +
  labs(title = "Demand Comparison Under Different Scenarios",
       x = "Year",
       y = "Demand (kiloton)",
       fill = "Scenario") +
  theme_minimal()+
     theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))+
scale_y_continuous(limits = c(0, 6000))

Insight: Copper,Graphite,Nickel and Lithium would have increasing higher demand under theses three scenarios.

yearly_increase_in_demand  <- read.csv("/Users/xingyuning/Desktop/susfin/IEA/data/iea_EV_demand_for_critical_minerals.csv") 
yearly_increase_in_demand_1 <- yearly_increase_in_demand |> group_by(mineral, senario) |> 
  arrange(year) |> 
  mutate(increase = value - lag(value)) |> 
 filter(mineral != "Total EV")
  ggplot(yearly_increase_in_demand_1, aes(x = year, y = increase, color = senario)) +
  geom_line() +
  facet_wrap(~mineral) +
  labs(title = "Yearly Increase in Demand for Copper and Cobalt",
       x = "Year",
       y = "Increase in Demand (kiloton)",
       color = "Scenario") +
  theme_minimal() +
     theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
Warning: Removed 4 rows containing missing values (`geom_line()`).

Insight:

Overall,2030-2040 have higher increase in demand under all scenarios.

policy_and_other_scenario_impact <- read.csv("/Users/xingyuning/Desktop/susfin/IEA/data/iea_EV_demand_for_critical_minerals.csv") 
 policy_and_other_scenario_impact_1<- policy_and_other_scenario_impact |> filter(senario %in% c("Stated policies scenario", "Announced pledges scenario")) 
  ggplot(policy_and_other_scenario_impact_1, aes(x = year, y = value, color = senario)) +
  geom_line() +
  facet_wrap(~mineral) +
  labs(title = "Policy vs. Announced pledges scenario Impact on Demand",
       x = "Year",
       y = "Demand (kiloton)",
       color = "Scenario") +
  theme_minimal()+
     theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))

Insight:

Announced pledges scenario has more huge impact on demand than stated policies scenario.

scenario_impact <- read.csv("/Users/xingyuning/Desktop/susfin/IEA/data/iea_EV_demand_for_critical_minerals.csv") 
scenario_impact_1 <- scenario_impact |> 
  group_by(year, senario) |> 
  summarise(total_demand = sum(value)) |> 
  ungroup()
`summarise()` has grouped output by 'year'. You can override using the
`.groups` argument.
  ggplot(scenario_impact_1,aes(x = year, y = total_demand, fill = senario)) +
  geom_area(position = 'stack') +
  labs(title = "Cumulative Scenario Impact on Mineral Demand",
       x = "Year",
       y = "Total Demand (kiloton)",
       fill = "Scenario") +
  theme_minimal()

Insight:

This visualization can illustrate the cumulative impact of different scenarios over time, providing a long-term view of demand changes. It could imply that emphasizing the importance of long-term planning in both policy formulation and supply chain management.