Your boss knows that they will be asking her about how the implications of the electric vehicle (EV) market and policy choices on the demand for copper and cobalt.
Use the workflow developed in this chapter to import and tidy worksheet 2.3 EV from the dataset.
Use the tidied dataset to come up with 5 compelling data visualizations that illustrate key actionable insights about how policy scenarios, and technological scenarios will impact demand for copper and cobalt.
Data import is time consuming when you’re first learning, so we’re limiting this to one practice question. Make your insights awesome.
library(tidyverse)
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr 1.1.4 ✔ readr 2.1.5
✔ forcats 1.0.0 ✔ stringr 1.5.1
✔ ggplot2 3.5.0 ✔ tibble 3.2.1
✔ lubridate 1.9.3 ✔ tidyr 1.3.1
✔ purrr 1.0.2
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag() masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
read_key_minerals_sheet <-partial(# provide the function, in this case readxl::read_excel().f = read_excel,# provide any arguments you want filled inpath = path_to_sheet,sheet ="2.3 EV",col_names =FALSE)# now all we have to do is provide the rangesheet_header <-read_key_minerals_sheet(range ="A4:W5")
# A tibble: 2 × 23
...1 ...2 ...3 ...4 ...5 ...6 ...7 ...8 ...9 ...10 ...11 ...12 ...13
<lgl> <dbl> <lgl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <lgl> <chr> <dbl> <dbl>
1 NA NA NA State… NA NA NA NA NA NA Anno… NA NA
2 NA 2022 NA 2025 2030 2035 2040 2045 2050 NA 2025 2030 2035
# ℹ 10 more variables: ...14 <dbl>, ...15 <dbl>, ...16 <dbl>, ...17 <lgl>,
# ...18 <chr>, ...19 <dbl>, ...20 <dbl>, ...21 <dbl>, ...22 <dbl>,
# ...23 <dbl>
sheet_header_processed <- sheet_header |># transpose the datat() |># turn it back into a tibbleas_tibble() |># make them meaningfulrename(scenario = V1, year = V2) |># fill scenario downfill(scenario) |>#insert "Current" at topreplace_na(list(scenario ="Current Year"))
Warning: The `x` argument of `as_tibble.matrix()` must have unique column names if
`.name_repair` is omitted as of tibble 2.0.0.
ℹ Using compatibility `.name_repair`.
sheet_header_processed
# A tibble: 23 × 2
scenario year
<chr> <chr>
1 Current Year <NA>
2 Current Year 2022
3 Current Year <NA>
4 Stated policies scenario 2025
5 Stated policies scenario 2030
6 Stated policies scenario 2035
7 Stated policies scenario 2040
8 Stated policies scenario 2045
9 Stated policies scenario 2050
10 Stated policies scenario <NA>
# ℹ 13 more rows
combined_data <- mineral_info_long |>left_join(sheet_headers_and_col_names, by =join_by(mineral_info_col_names)) |># filter out what were empty columns (where years are NA) filter(!is.na(year)) |># case_when is supercharged if elsemutate(unit =case_when( indicator =="Share of clean technologies in total demand"~"Percent",.default ="kiloton" ),# convert the year column from character to numericyear =as.integer(year) ) |>select(mineral_name, indicator, scenario, unit, year, value)combined_data
mineral_info_col_names <-names(mineral_info)mineral_info_long <- mineral_info |>rename(indicator =`...1`) |>pivot_longer(cols =-indicator,names_to ="mineral_info_col_names") |>add_column(mineral_name)combined_data <- mineral_info_long |>left_join(sheet_headers_and_col_names, by =join_by(mineral_info_col_names)) |># filter out what were empty columns (where years are NA) filter(!is.na(year)) |># case_when is supercharged if elsemutate(unit =case_when( indicator =="Share of clean technologies in total demand"~"Percent",.default ="kiloton" ),# convert the year column from character to numericyear =as.integer(year) ) |>select(mineral_name, indicator, scenario, unit, year, value)combined_data
read_iea_mineral_table <-function(mineral_name_range, mineral_info_range) { mineral_name <-read_key_minerals_sheet(range = mineral_name_range) |>pull() mineral_info <-read_key_minerals_sheet(range = mineral_info_range) mineral_info_col_names <-names(mineral_info) mineral_info_long <- mineral_info |>rename(indicator =`...1`) |>pivot_longer(cols =-indicator,names_to ="mineral_info_col_names") |>add_column(mineral_name) combined_data <- mineral_info_long |>left_join(sheet_headers_and_col_names, by =join_by(mineral_info_col_names)) |># filter out what were empty columns (where years are NA)filter(!is.na(year)) |># case_when is supercharged if elsemutate(unit =case_when( indicator =="Share of clean technologies in total demand"~"Percent",.default ="kiloton" ),# convert the year column from character to numericyear =as.integer(year) ) |>select(mineral_name, indicator, scenario, unit, year, value) combined_data }
# A tibble: 1,596 × 6
mineral_name indicator scenario unit year value
<chr> <chr> <chr> <chr> <int> <dbl>
1 STEPS - Base case Copper Current Year kiloton 2022 373.
2 STEPS - Base case Copper Stated policies scenario kiloton 2025 580.
3 STEPS - Base case Copper Stated policies scenario kiloton 2030 1044.
4 STEPS - Base case Copper Stated policies scenario kiloton 2035 1219.
5 STEPS - Base case Copper Stated policies scenario kiloton 2040 1421.
6 STEPS - Base case Copper Stated policies scenario kiloton 2045 1458.
7 STEPS - Base case Copper Stated policies scenario kiloton 2050 1346.
8 STEPS - Base case Copper Announced pledges scenario kiloton 2025 689.
9 STEPS - Base case Copper Announced pledges scenario kiloton 2030 1592.
10 STEPS - Base case Copper Announced pledges scenario kiloton 2035 2570.
# ℹ 1,586 more rows
#Write the datawrite_csv(final_iea_ev_table, "/Users/leiyuhan/Desktop/Sus_Finance/DataImport/data/EV.csv")read_csv("/Users/leiyuhan/Desktop/Sus_Finance/DataImport/data/EV.csv")
Rows: 1596 Columns: 6
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (4): mineral_name, indicator, scenario, unit
dbl (2): year, value
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# A tibble: 1,596 × 6
mineral_name indicator scenario unit year value
<chr> <chr> <chr> <chr> <dbl> <dbl>
1 STEPS - Base case Copper Current Year kiloton 2022 373.
2 STEPS - Base case Copper Stated policies scenario kiloton 2025 580.
3 STEPS - Base case Copper Stated policies scenario kiloton 2030 1044.
4 STEPS - Base case Copper Stated policies scenario kiloton 2035 1219.
5 STEPS - Base case Copper Stated policies scenario kiloton 2040 1421.
6 STEPS - Base case Copper Stated policies scenario kiloton 2045 1458.
7 STEPS - Base case Copper Stated policies scenario kiloton 2050 1346.
8 STEPS - Base case Copper Announced pledges scenario kiloton 2025 689.
9 STEPS - Base case Copper Announced pledges scenario kiloton 2030 1592.
10 STEPS - Base case Copper Announced pledges scenario kiloton 2035 2570.
# ℹ 1,586 more rows
Rows: 1596 Columns: 6
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (4): mineral_name, indicator, scenario, unit
dbl (2): year, value
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
1. Trend of Copper and Cobalt Demand Over Time Under Different Scenarios
Rows: 1596 Columns: 6
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (4): mineral_name, indicator, scenario, unit
dbl (2): year, value
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
ggplot(ev_data %>%filter(indicator %in%c('Copper', 'Cobalt')), aes(x = year, y = value, color = scenario, group =interaction(scenario, indicator))) +geom_line() +facet_wrap(~indicator, scales ="free_y") +labs(title ="Demand Trend for Copper and Cobalt Over Time", x ="Year", y ="Demand (kiloton)", color ="Scenario") +theme_minimal()
2. Comparison of Copper and Cobalt Demand in Key Years Under Different Scenarios
ev_selected_years <- ev_data %>%filter(year %in%c(2025, 2030, 2035, 2040), indicator %in%c('Copper', 'Cobalt'))ggplot(ev_selected_years, aes(x =factor(year), y = value, fill = scenario)) +geom_bar(stat ="identity", position =position_dodge()) +facet_wrap(~indicator) +labs(title ="Copper and Cobalt Demand Comparison in Key Years", x ="Year", y ="Demand (kiloton)", fill ="Scenario") +theme_minimal()
Warning: Removed 16 rows containing missing values or values outside the scale range
(`geom_bar()`).
Rows: 1596 Columns: 6
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (4): mineral_name, indicator, scenario, unit
dbl (2): year, value
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
# Annual Demand Trendggplot(ev_data %>%filter(indicator %in%c('Copper', 'Cobalt')), aes(x = year, y = value, group = indicator, color = indicator)) +geom_line() +labs(title ="Annual Demand Trend for Copper and Cobalt", x ="Year", y ="Demand (kiloton)") +theme_minimal()
4. Scatter Plot of Copper and Cobalt Demand with Scenario Highlight
ggplot(ev_data %>%filter(indicator %in%c('Copper', 'Cobalt')), aes(x = year, y = value, color = indicator, size = value)) +geom_point(alpha =0.6) +# Adjust transparency with alphafacet_wrap(~scenario) +labs(title ="Scatter Plot of Copper and Cobalt Demand (Size Indicates Magnitude)", x ="Year", y ="Demand (kiloton)", color ="Mineral") +theme_minimal()
Warning: Removed 24 rows containing missing values or values outside the scale range
(`geom_point()`).
5.Box Plot: Annual Demand Variability for Copper and Cobalt
ev_data %>%filter(indicator %in%c('Copper', 'Cobalt')) %>%ggplot(aes(x = indicator, y = value, fill = indicator)) +geom_boxplot() +labs(title ="Annual Demand Variability for Copper and Cobalt", x ="Mineral", y ="Demand (kiloton)") +theme_minimal()
Warning: Removed 24 rows containing non-finite outside the scale range
(`stat_boxplot()`).