read_key_minerals_sheet <-partial(.f = read_excel,path = path_to_sheet,sheet ="1 Total demand for key minerals",col_names =FALSE)# now all we have to do is provide the rangesheet_header <-read_key_minerals_sheet(range ="A4:W5")
# A tibble: 2 × 23
...1 ...2 ...3 ...4 ...5 ...6 ...7 ...8 ...9 ...10 ...11 ...12 ...13
<lgl> <dbl> <lgl> <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <lgl> <chr> <dbl> <dbl>
1 NA NA NA State… NA NA NA NA NA NA Anno… NA NA
2 NA 2022 NA 2025 2030 2035 2040 2045 2050 NA 2025 2030 2035
# ℹ 10 more variables: ...14 <dbl>, ...15 <dbl>, ...16 <dbl>, ...17 <lgl>,
# ...18 <chr>, ...19 <dbl>, ...20 <dbl>, ...21 <dbl>, ...22 <dbl>,
# ...23 <dbl>
sheet_header_processed <- sheet_header |># transpose the datat() |># turn it back into a tibbleas_tibble() |># make them meaningfulrename(scenario = V1, year = V2) |># fill scenario downfill(scenario) |>#insert "Current" at topreplace_na(list(scenario ="Current Year"))
Warning: The `x` argument of `as_tibble.matrix()` must have unique column names if
`.name_repair` is omitted as of tibble 2.0.0.
ℹ Using compatibility `.name_repair`.
sheet_header_processed
# A tibble: 23 × 2
scenario year
<chr> <chr>
1 Current Year <NA>
2 Current Year 2022
3 Current Year <NA>
4 Stated policies scenario 2025
5 Stated policies scenario 2030
6 Stated policies scenario 2035
7 Stated policies scenario 2040
8 Stated policies scenario 2045
9 Stated policies scenario 2050
10 Stated policies scenario <NA>
# ℹ 13 more rows
# A tibble: 242 × 4
indicator mineral_info_col_names value mineral_name
<chr> <chr> <dbl> <chr>
1 Solar PV ...2 682. Copper
2 Solar PV ...3 NA Copper
3 Solar PV ...4 779. Copper
4 Solar PV ...5 907. Copper
5 Solar PV ...6 925. Copper
6 Solar PV ...7 959. Copper
7 Solar PV ...8 1122. Copper
8 Solar PV ...9 1262. Copper
9 Solar PV ...10 NA Copper
10 Solar PV ...11 923. Copper
# ℹ 232 more rows
combined_data <- mineral_info_long |>left_join(sheet_headers_and_col_names, by =join_by(mineral_info_col_names)) |># filter out what were empty columns (where years are NA) filter(!is.na(year)) |># case_when is supercharged if elsemutate(unit =case_when( indicator =="Share of clean technologies in total demand"~"Percent",.default ="kiloton" ),# convert the year column from character to numericyear =as.integer(year) ) |>select(mineral_name, indicator, scenario, unit, year, value)combined_data
# A tibble: 209 × 6
mineral_name indicator scenario unit year value
<chr> <chr> <chr> <chr> <int> <dbl>
1 Copper Solar PV Current Year kiloton 2022 682.
2 Copper Solar PV Stated policies scenario kiloton 2025 779.
3 Copper Solar PV Stated policies scenario kiloton 2030 907.
4 Copper Solar PV Stated policies scenario kiloton 2035 925.
5 Copper Solar PV Stated policies scenario kiloton 2040 959.
6 Copper Solar PV Stated policies scenario kiloton 2045 1122.
7 Copper Solar PV Stated policies scenario kiloton 2050 1262.
8 Copper Solar PV Announced pledges scenario kiloton 2025 923.
9 Copper Solar PV Announced pledges scenario kiloton 2030 1177.
10 Copper Solar PV Announced pledges scenario kiloton 2035 1369.
# ℹ 199 more rows
read_iea_mineral_table <-function(mineral_name_range, mineral_info_range) { mineral_name <-read_key_minerals_sheet(range = mineral_name_range) |>pull() mineral_info <-read_key_minerals_sheet(range = mineral_info_range) mineral_info_col_names <-names(mineral_info) mineral_info_long <- mineral_info |>rename(indicator =`...1`) |>pivot_longer(cols =-indicator,names_to ="mineral_info_col_names") |>add_column(mineral_name) combined_data <- mineral_info_long |>left_join(sheet_headers_and_col_names, by =join_by(mineral_info_col_names)) |># filter out what were empty columns (where years are NA)filter(!is.na(year)) |># case_when is supercharged if elsemutate(unit =case_when( indicator =="Share of clean technologies in total demand"~"Percent",.default ="kiloton" ),# convert the year column from character to numericyear =as.integer(year) ) |>select(mineral_name, indicator, scenario, unit, year, value) combined_data }
Rows: 779 Columns: 6
── Column specification ────────────────────────────────────────────────────────
Delimiter: ","
chr (4): mineral_name, indicator, scenario, unit
dbl (2): year, value
ℹ Use `spec()` to retrieve the full column specification for this data.
ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
cleaned_total_demand
# A tibble: 779 × 6
mineral_name indicator scenario unit year value
<chr> <chr> <chr> <chr> <dbl> <dbl>
1 Copper Solar PV Current Year kiloton 2022 682.
2 Copper Solar PV Stated policies scenario kiloton 2025 779.
3 Copper Solar PV Stated policies scenario kiloton 2030 907.
4 Copper Solar PV Stated policies scenario kiloton 2035 925.
5 Copper Solar PV Stated policies scenario kiloton 2040 959.
6 Copper Solar PV Stated policies scenario kiloton 2045 1122.
7 Copper Solar PV Stated policies scenario kiloton 2050 1262.
8 Copper Solar PV Announced pledges scenario kiloton 2025 923.
9 Copper Solar PV Announced pledges scenario kiloton 2030 1177.
10 Copper Solar PV Announced pledges scenario kiloton 2035 1369.
# ℹ 769 more rows
The echo: false option disables the printing of code (only output is displayed).