library(tidyverse) # pretty much always...
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.5
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.4.4     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl) # for reading excel files
library(here) # for using relative file paths
here() starts at /Users/enmingliang/Desktop/data-raw
library(ggplot2)

path_to_sheet <- here("CM_Data_Explorer.xlsx")

path_to_sheet
[1] "/Users/enmingliang/Desktop/data-raw/CM_Data_Explorer.xlsx"
read_key_minerals_sheet <- partial(
  .f = read_excel,
  path = path_to_sheet,
  sheet = "1 Total demand for key minerals",
  col_names = FALSE
)

sheet_header <- read_key_minerals_sheet(range = "A4:W5")
New names:
• `` -> `...1`
• `` -> `...2`
• `` -> `...3`
• `` -> `...4`
• `` -> `...5`
• `` -> `...6`
• `` -> `...7`
• `` -> `...8`
• `` -> `...9`
• `` -> `...10`
• `` -> `...11`
• `` -> `...12`
• `` -> `...13`
• `` -> `...14`
• `` -> `...15`
• `` -> `...16`
• `` -> `...17`
• `` -> `...18`
• `` -> `...19`
• `` -> `...20`
• `` -> `...21`
• `` -> `...22`
• `` -> `...23`
sheet_header
# A tibble: 2 × 23
  ...1   ...2 ...3  ...4    ...5  ...6  ...7  ...8  ...9 ...10 ...11 ...12 ...13
  <lgl> <dbl> <lgl> <chr>  <dbl> <dbl> <dbl> <dbl> <dbl> <lgl> <chr> <dbl> <dbl>
1 NA       NA NA    State…    NA    NA    NA    NA    NA NA    Anno…    NA    NA
2 NA     2022 NA    2025    2030  2035  2040  2045  2050 NA    2025   2030  2035
# ℹ 10 more variables: ...14 <dbl>, ...15 <dbl>, ...16 <dbl>, ...17 <lgl>,
#   ...18 <chr>, ...19 <dbl>, ...20 <dbl>, ...21 <dbl>, ...22 <dbl>,
#   ...23 <dbl>
sheet_header_processed <- sheet_header |> 
  # transpose the data
  t() |>
  # turn it back into a tibble
  as_tibble() |>
  # make them meaningful
  rename(scenario = V1, year = V2) |>
  # fill scenario down
  fill(scenario) |>
  #insert "Current" at top
  replace_na(list(scenario = "Current Year"))
Warning: The `x` argument of `as_tibble.matrix()` must have unique column names if
`.name_repair` is omitted as of tibble 2.0.0.
ℹ Using compatibility `.name_repair`.
sheet_header_processed
# A tibble: 23 × 2
   scenario                 year 
   <chr>                    <chr>
 1 Current Year             <NA> 
 2 Current Year             2022 
 3 Current Year             <NA> 
 4 Stated policies scenario 2025 
 5 Stated policies scenario 2030 
 6 Stated policies scenario 2035 
 7 Stated policies scenario 2040 
 8 Stated policies scenario 2045 
 9 Stated policies scenario 2050 
10 Stated policies scenario <NA> 
# ℹ 13 more rows
mineral_name <- read_key_minerals_sheet(range = "A7") |> 
  pull()
New names:
• `` -> `...1`
mineral_name
[1] "Copper"
mineral_info <- read_key_minerals_sheet(range = "A8:W18")
New names:
• `` -> `...1`
• `` -> `...2`
• `` -> `...3`
• `` -> `...4`
• `` -> `...5`
• `` -> `...6`
• `` -> `...7`
• `` -> `...8`
• `` -> `...9`
• `` -> `...10`
• `` -> `...11`
• `` -> `...12`
• `` -> `...13`
• `` -> `...14`
• `` -> `...15`
• `` -> `...16`
• `` -> `...17`
• `` -> `...18`
• `` -> `...19`
• `` -> `...20`
• `` -> `...21`
• `` -> `...22`
• `` -> `...23`
mineral_info
# A tibble: 11 × 23
   ...1         ...2 ...3     ...4    ...5    ...6    ...7    ...8    ...9 ...10
   <chr>       <dbl> <lgl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl> <lgl>
 1 Solar PV  6.82e+2 NA    7.79e+2 9.07e+2 9.25e+2 9.59e+2 1.12e+3 1.26e+3 NA   
 2 Wind      3.94e+2 NA    4.28e+2 6.46e+2 5.95e+2 5.32e+2 6.39e+2 7.21e+2 NA   
 3 Other lo… 8.50e+1 NA    6.90e+1 1.08e+2 8.69e+1 8.47e+1 1.10e+2 1.17e+2 NA   
 4 Electric… 3.73e+2 NA    5.80e+2 1.04e+3 1.22e+3 1.42e+3 1.46e+3 1.35e+3 NA   
 5 Grid bat… 2.04e+1 NA    3.84e+1 8.27e+1 1.54e+2 2.25e+2 2.38e+2 2.20e+2 NA   
 6 Electric… 4.18e+3 NA    4.58e+3 6.51e+3 6.51e+3 6.58e+3 7.11e+3 6.98e+3 NA   
 7 Hydrogen… 3.20e-3 NA    3.88e-3 6.76e-3 6.53e-3 6.99e-3 8.90e-3 1.45e-2 NA   
 8 Total cl… 5.74e+3 NA    6.47e+3 9.30e+3 9.49e+3 9.80e+3 1.07e+4 1.06e+4 NA   
 9 Other us… 1.98e+4 NA    2.03e+4 2.15e+4 2.20e+4 2.34e+4 2.51e+4 2.57e+4 NA   
10 Total de… 2.55e+4 NA    2.68e+4 3.08e+4 3.15e+4 3.32e+4 3.58e+4 3.64e+4 NA   
11 Share of… 2.25e-1 NA    2.42e-1 3.01e-1 3.01e-1 2.96e-1 2.98e-1 2.93e-1 NA   
# ℹ 13 more variables: ...11 <dbl>, ...12 <dbl>, ...13 <dbl>, ...14 <dbl>,
#   ...15 <dbl>, ...16 <dbl>, ...17 <lgl>, ...18 <dbl>, ...19 <dbl>,
#   ...20 <dbl>, ...21 <dbl>, ...22 <dbl>, ...23 <dbl>
mineral_info_col_names <- names(mineral_info)

mineral_info_col_names
 [1] "...1"  "...2"  "...3"  "...4"  "...5"  "...6"  "...7"  "...8"  "...9" 
[10] "...10" "...11" "...12" "...13" "...14" "...15" "...16" "...17" "...18"
[19] "...19" "...20" "...21" "...22" "...23"
sheet_headers_and_col_names <- sheet_header_processed |> 
  add_column(mineral_info_col_names = mineral_info_col_names)

sheet_headers_and_col_names
# A tibble: 23 × 3
   scenario                 year  mineral_info_col_names
   <chr>                    <chr> <chr>                 
 1 Current Year             <NA>  ...1                  
 2 Current Year             2022  ...2                  
 3 Current Year             <NA>  ...3                  
 4 Stated policies scenario 2025  ...4                  
 5 Stated policies scenario 2030  ...5                  
 6 Stated policies scenario 2035  ...6                  
 7 Stated policies scenario 2040  ...7                  
 8 Stated policies scenario 2045  ...8                  
 9 Stated policies scenario 2050  ...9                  
10 Stated policies scenario <NA>  ...10                 
# ℹ 13 more rows
read_iea_mineral_table <-
  function(mineral_name_range, mineral_info_range) {
    mineral_name <-
      read_key_minerals_sheet(range = mineral_name_range) |>
      pull()
    
    mineral_info <- read_key_minerals_sheet(range = mineral_info_range)
    
    mineral_info_col_names <- names(mineral_info)
    
    mineral_info_long <- mineral_info |>
      rename(indicator = `...1`) |>
      pivot_longer(cols = -indicator,
                   names_to = "mineral_info_col_names") |>
      add_column(mineral_name)
    
    combined_data <- mineral_info_long |>
      left_join(sheet_headers_and_col_names, by = join_by(mineral_info_col_names)) |>
      # filter out what were empty columns (where years are NA)
      filter(!is.na(year)) |>
      # case_when is supercharged if else
      mutate(
        unit = case_when(
          indicator == "Share of clean technologies in total demand" ~ "Percent",
          .default = "kiloton"
        ),
        # convert the year column from character to numeric
        year = as.integer(year)
      ) |>
      select(mineral_name, indicator, scenario, unit, year, value)
    
    combined_data
  }

copper_table <- read_iea_mineral_table(
  mineral_name_range = "A7",
  mineral_info_range = "A8:W18"
)
New names:
New names:
• `` -> `...1`
cobalt_table <- read_iea_mineral_table(
  mineral_name_range = "A21",
  mineral_info_range = "A22:W29"
)
New names:
New names:
• `` -> `...1`
final_iea_minerals_table <- copper_table |> 
  bind_rows(cobalt_table)

final_iea_minerals_table
# A tibble: 361 × 6
   mineral_name indicator scenario                   unit     year value
   <chr>        <chr>     <chr>                      <chr>   <int> <dbl>
 1 Copper       Solar PV  Current Year               kiloton  2022  682.
 2 Copper       Solar PV  Stated policies scenario   kiloton  2025  779.
 3 Copper       Solar PV  Stated policies scenario   kiloton  2030  907.
 4 Copper       Solar PV  Stated policies scenario   kiloton  2035  925.
 5 Copper       Solar PV  Stated policies scenario   kiloton  2040  959.
 6 Copper       Solar PV  Stated policies scenario   kiloton  2045 1122.
 7 Copper       Solar PV  Stated policies scenario   kiloton  2050 1262.
 8 Copper       Solar PV  Announced pledges scenario kiloton  2025  923.
 9 Copper       Solar PV  Announced pledges scenario kiloton  2030 1177.
10 Copper       Solar PV  Announced pledges scenario kiloton  2035 1369.
# ℹ 351 more rows
write_csv(final_iea_minerals_table,here("iea_total_demand_for_critical_minerals.csv"))
library(ggplot2)
library(dplyr)

# Load the dataset
data <- read.csv("/Users/enmingliang/Desktop/data-raw/iea_total_demand_for_critical_minerals.csv")

# Ensure your dataset has 'mineral_name', 'scenario', 'year', and 'value' columns
filtered_data <- data %>%
  filter(mineral_name %in% c("Copper", "Cobalt"))
  # Optionally, you can add more filters for specific scenarios or years

# Creating the line chart correctly
ggplot(filtered_data, aes(x = year, y = value, color = scenario, group = scenario)) + # Note the comma here
  geom_line() +
  facet_wrap(~mineral_name, scales = "free_y") + # Creates separate plots for Copper and Cobalt
  theme_minimal() +
  labs(title = "Demand Trends for Copper and Cobalt Under Different Scenarios",
       x = "Year",
       y = "Demand (metric tons)",
       color = "Scenario")

#Visualization 1: Scenario Comparison
#The model uses a line chart to visualize how the demand for copper and cobalt changes over time under different future scenarios. Each line represents a scenario, showing how policy and technology might influence mineral demand.
#Lines: Each line tracks demand for a mineral under a specific future scenario.
#Purpose: Identify how different scenarios affect mineral demand.
copper_cobalt_data <- data %>% filter(mineral_name %in% c("Copper", "Cobalt"))

# Aggregate data to get average demand per scenario for each mineral
avg_demand_by_scenario <- copper_cobalt_data %>%
  group_by(mineral_name, scenario) %>%
  summarise(avg_demand = mean(value))
`summarise()` has grouped output by 'mineral_name'. You can override using the
`.groups` argument.
# Plotting the average demand by scenario
ggplot(avg_demand_by_scenario, aes(x = scenario, y = avg_demand, fill = mineral_name)) +
  geom_bar(stat = "identity", position = "dodge") +
  theme_minimal() +
  labs(title = "Average Demand by Scenario",
       x = "Scenario",
       y = "Average Demand (kiloton)",
       fill = "Mineral") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

#Visualization 2: Scenario Comparison
#Purpose: To compare the impact of different policy and technological scenarios on the average demand for copper and cobalt.
#Explanation: By calculating the average demand under each scenario and plotting these averages in a bar chart, this visualization highlights which scenarios lead to higher or lower demand for each mineral.
#Comments: This comparison provides a straightforward way to assess the relative impact of scenarios, helping stakeholders understand which conditions could lead to increased demand and, by extension, potential supply challenges or investment opportunities.
# Plotting the demand by application
ggplot(copper_cobalt_data, aes(x = indicator, y = value, fill = scenario)) +
  geom_bar(stat = "identity", position = "dodge") +
  facet_wrap(~mineral_name, scales = "free_y") +
  theme_minimal() +
  labs(title = "Demand for Copper and Cobalt by Application",
       x = "Application",
       y = "Demand (kiloton)",
       fill = "Scenario") +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

#Visualization 3: Demand by Application
#Purpose: To identify which technologies or applications drive the demand for copper and cobalt.
#Explanation: This bar chart breaks down demand by application (e.g., Solar PV, Wind, EVs) for each mineral, showing how different uses contribute to overall demand under various scenarios.
#Comments: Understanding demand drivers is essential for pinpointing where investments in technology or infrastructure might be most needed or most impactful. This insight helps in aligning strategy with future market conditions.
# Comparing the demand for Copper and Cobalt
ggplot(copper_cobalt_data, aes(x = year, y = value, color = mineral_name)) +
  geom_line() +
  facet_wrap(~scenario, scales = "free_y") +
  theme_minimal() +
  labs(title = "Copper vs Cobalt: Demand Comparison Across Scenarios",
       x = "Year",
       y = "Demand (kiloton)",
       color = "Mineral")

#Visualization 4: Mineral Comparison
#Purpose: To directly compare the demand trajectories for copper and cobalt across different scenarios.
#Explanation: This line plot juxtaposes the demand for copper against cobalt over the years, within each scenario. It visually represents how each mineral's demand responds to policy and technological developments.
#Comments: Comparing these minerals side by side allows for a nuanced understanding of their market dynamics, highlighting potential areas of concern or opportunity in raw material supply chains.
# Calculating yearly growth rates
copper_cobalt_growth <- copper_cobalt_data %>%
  arrange(mineral_name, scenario, year) %>%
  group_by(mineral_name, scenario) %>%
  mutate(growth_rate = (value / lag(value) - 1) * 100) %>%
  # Removing the first year for each scenario since it won't have a growth rate
  filter(!is.na(growth_rate))

# Plotting the growth rates
ggplot(copper_cobalt_growth, aes(x = year, y = growth_rate, color = scenario)) +
  geom_line() +
  facet_wrap(~mineral_name, scales = "free_y") +
  theme_minimal() +
  labs(title = "Yearly Growth Rates in Demand for Copper and Cobalt",
       x = "Year",
       y = "Growth Rate (%)",
       color = "Scenario")

#Visualization 5: Yearly Growth Rates
#Purpose: To analyze the annual changes in demand for copper and cobalt, indicating acceleration or deceleration in growth.
#Explanation: By calculating and plotting the yearly growth rates, this visualization shows how demand growth varies year over year, offering insights into demand momentum under different scenarios.
#Comments: Growth rates are a key indicator of market health and can signal impending shifts in demand. This analysis is crucial for timing investments, expansions, or policy interventions.