Data Import

Quarto

Quarto enables you to weave together content and executable code into a finished document. To learn more about Quarto see https://quarto.org.

Running Code

When you click the Render button a document will be generated that includes both content and the output of embedded code. You can embed code like this:

options(repos = c(CRAN = "https://cran.rstudio.com/"))
library(tidyverse) # pretty much always...
── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ readr     2.1.4
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   3.4.4     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.0
✔ purrr     1.0.2     
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(readxl) # for reading excel files
library(here) # for using relative file paths
here() starts at C:/Users/rachelz/OneDrive/Desktop/Graduate/Spring 2024/Sustainable Finance/Assignmengts/Week 5 data import

You can add options to executable code like this

path_to_sheet <- here("data-raw", "CM_Data_Explorer.xlsx")
path_to_sheet
[1] "C:/Users/rachelz/OneDrive/Desktop/Graduate/Spring 2024/Sustainable Finance/Assignmengts/Week 5 data import/data-raw/CM_Data_Explorer.xlsx"
read_EV_sheet <- partial(
  # provide the function, in this case readxl::read_excel()
  .f = read_excel,
  # provide any arguments you want filled in
  path = path_to_sheet,
  sheet = "2.3 EV",
  col_names = FALSE
)

sheet_header <- read_EV_sheet(range = "A4:W5")
New names:
• `` -> `...1`
• `` -> `...2`
• `` -> `...3`
• `` -> `...4`
• `` -> `...5`
• `` -> `...6`
• `` -> `...7`
• `` -> `...8`
• `` -> `...9`
• `` -> `...10`
• `` -> `...11`
• `` -> `...12`
• `` -> `...13`
• `` -> `...14`
• `` -> `...15`
• `` -> `...16`
• `` -> `...17`
• `` -> `...18`
• `` -> `...19`
• `` -> `...20`
• `` -> `...21`
• `` -> `...22`
• `` -> `...23`
sheet_header
# A tibble: 2 × 23
  ...1   ...2 ...3  ...4    ...5  ...6  ...7  ...8  ...9 ...10 ...11 ...12 ...13
  <lgl> <dbl> <lgl> <chr>  <dbl> <dbl> <dbl> <dbl> <dbl> <lgl> <chr> <dbl> <dbl>
1 NA       NA NA    State…    NA    NA    NA    NA    NA NA    Anno…    NA    NA
2 NA     2022 NA    2025    2030  2035  2040  2045  2050 NA    2025   2030  2035
# ℹ 10 more variables: ...14 <dbl>, ...15 <dbl>, ...16 <dbl>, ...17 <lgl>,
#   ...18 <chr>, ...19 <dbl>, ...20 <dbl>, ...21 <dbl>, ...22 <dbl>,
#   ...23 <dbl>
mineral_info <- read_EV_sheet(range = "A8:W18")
New names:
• `` -> `...1`
• `` -> `...2`
• `` -> `...3`
• `` -> `...4`
• `` -> `...5`
• `` -> `...6`
• `` -> `...7`
• `` -> `...8`
• `` -> `...9`
• `` -> `...10`
• `` -> `...11`
• `` -> `...12`
• `` -> `...13`
• `` -> `...14`
• `` -> `...15`
• `` -> `...16`
• `` -> `...17`
• `` -> `...18`
• `` -> `...19`
• `` -> `...20`
• `` -> `...21`
• `` -> `...22`
• `` -> `...23`
mineral_info
# A tibble: 11 × 23
   ...1         ...2 ...3     ...4    ...5    ...6    ...7    ...8    ...9 ...10
   <chr>       <dbl> <lgl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl>   <dbl> <lgl>
 1 Copper    3.81e+2 NA    617.    1.39e+3 1.74e+3 2.23e+3 2.42e+3 2.31e+3 NA   
 2 Cobalt    6.35e+1 NA     61.2   3.80e+1 2.91e+1 3.54e+1 4.09e+1 4.74e+1 NA   
 3 Graphite  5.57e+2 NA    936.    1.59e+3 1.78e+3 1.69e+3 1.49e+3 1.08e+3 NA   
 4 Lithium   6.96e+1 NA    123.    2.19e+2 3.07e+2 4.07e+2 4.50e+2 4.13e+2 NA   
 5 Manganese 7.45e+1 NA     53.9   1.56e+2 3.36e+2 5.33e+2 7.12e+2 8.58e+2 NA   
 6 Nickel    3.13e+2 NA    554.    5.13e+2 5.89e+2 6.30e+2 6.92e+2 6.88e+2 NA   
 7 Silicon   8.70e+0 NA     40.5   1.39e+2 2.24e+2 3.52e+2 3.74e+2 3.73e+2 NA   
 8 Neodymium 3.96e+0 NA      7.25  1.22e+1 1.51e+1 1.88e+1 2.18e+1 2.29e+1 NA   
 9 Dysprosi… 4.13e-1 NA      0.741 1.21e+0 1.48e+0 1.84e+0 2.13e+0 2.23e+0 NA   
10 Praseody… 5.94e-1 NA      1.09  1.83e+0 2.27e+0 2.83e+0 3.27e+0 3.43e+0 NA   
11 Terbium   8.06e-2 NA      0.148 2.52e-1 3.14e-1 3.91e-1 4.52e-1 4.74e-1 NA   
# ℹ 13 more variables: ...11 <dbl>, ...12 <dbl>, ...13 <dbl>, ...14 <dbl>,
#   ...15 <dbl>, ...16 <dbl>, ...17 <lgl>, ...18 <dbl>, ...19 <dbl>,
#   ...20 <dbl>, ...21 <dbl>, ...22 <dbl>, ...23 <dbl>
mineral_info_col_names <- names(mineral_info)
mineral_info_col_names
 [1] "...1"  "...2"  "...3"  "...4"  "...5"  "...6"  "...7"  "...8"  "...9" 
[10] "...10" "...11" "...12" "...13" "...14" "...15" "...16" "...17" "...18"
[19] "...19" "...20" "...21" "...22" "...23"
issue <- read_EV_sheet(range = "A7") |>    
  pull()  
New names:
• `` -> `...1`
issue
[1] "Constrained nickel supply"
sheet_header_processed <- sheet_header |> 
  # transpose the data
  t() |>
  # turn it back into a tibble
  as_tibble() |>
  # make them meaningful
  rename(scenario = V1, year = V2) |>
  # fill scenario down
  fill(scenario) |>
  #insert "Current" at top
  replace_na(list(scenario = "Current Year"))
Warning: The `x` argument of `as_tibble.matrix()` must have unique column names if
`.name_repair` is omitted as of tibble 2.0.0.
ℹ Using compatibility `.name_repair`.
sheet_header_processed
# A tibble: 23 × 2
   scenario                 year 
   <chr>                    <chr>
 1 Current Year             <NA> 
 2 Current Year             2022 
 3 Current Year             <NA> 
 4 Stated policies scenario 2025 
 5 Stated policies scenario 2030 
 6 Stated policies scenario 2035 
 7 Stated policies scenario 2040 
 8 Stated policies scenario 2045 
 9 Stated policies scenario 2050 
10 Stated policies scenario <NA> 
# ℹ 13 more rows
#-----------
sheet_headers_and_col_names <- sheet_header_processed |> 
  add_column(mineral_info_col_names = mineral_info_col_names)

sheet_headers_and_col_names
# A tibble: 23 × 3
   scenario                 year  mineral_info_col_names
   <chr>                    <chr> <chr>                 
 1 Current Year             <NA>  ...1                  
 2 Current Year             2022  ...2                  
 3 Current Year             <NA>  ...3                  
 4 Stated policies scenario 2025  ...4                  
 5 Stated policies scenario 2030  ...5                  
 6 Stated policies scenario 2035  ...6                  
 7 Stated policies scenario 2040  ...7                  
 8 Stated policies scenario 2045  ...8                  
 9 Stated policies scenario 2050  ...9                  
10 Stated policies scenario <NA>  ...10                 
# ℹ 13 more rows
#___________

mineral_info_long <- mineral_info |> 
  rename(indicator = `...1`) |> 
  pivot_longer(cols = -indicator,
               names_to = "mineral_info_col_names") |> 
  add_column(issue)

mineral_info_long
# A tibble: 242 × 4
   indicator mineral_info_col_names value issue                    
   <chr>     <chr>                  <dbl> <chr>                    
 1 Copper    ...2                    381. Constrained nickel supply
 2 Copper    ...3                     NA  Constrained nickel supply
 3 Copper    ...4                    617. Constrained nickel supply
 4 Copper    ...5                   1389. Constrained nickel supply
 5 Copper    ...6                   1736. Constrained nickel supply
 6 Copper    ...7                   2233. Constrained nickel supply
 7 Copper    ...8                   2418. Constrained nickel supply
 8 Copper    ...9                   2313. Constrained nickel supply
 9 Copper    ...10                    NA  Constrained nickel supply
10 Copper    ...11                   732. Constrained nickel supply
# ℹ 232 more rows
#-----------
combined_data <- mineral_info_long |> 
  left_join(sheet_headers_and_col_names, by = join_by(mineral_info_col_names)) |> 
  # filter out what were empty columns (where years are NA) 
  filter(!is.na(year)) |> 
  # case_when is supercharged if else
  mutate(unit = case_when(
    indicator == "Share of clean technologies in total demand" ~ "Percent",
    .default = "kiloton"
    ),
    # convert the year column from character to numeric
    year = as.integer(year)
  ) |> 
  select(issue, indicator, scenario, unit, year, value)

combined_data
# A tibble: 209 × 6
   issue                     indicator scenario                unit   year value
   <chr>                     <chr>     <chr>                   <chr> <int> <dbl>
 1 Constrained nickel supply Copper    Current Year            kilo…  2022  381.
 2 Constrained nickel supply Copper    Stated policies scenar… kilo…  2025  617.
 3 Constrained nickel supply Copper    Stated policies scenar… kilo…  2030 1389.
 4 Constrained nickel supply Copper    Stated policies scenar… kilo…  2035 1736.
 5 Constrained nickel supply Copper    Stated policies scenar… kilo…  2040 2233.
 6 Constrained nickel supply Copper    Stated policies scenar… kilo…  2045 2418.
 7 Constrained nickel supply Copper    Stated policies scenar… kilo…  2050 2313.
 8 Constrained nickel supply Copper    Announced pledges scen… kilo…  2025  732.
 9 Constrained nickel supply Copper    Announced pledges scen… kilo…  2030 2113.
10 Constrained nickel supply Copper    Announced pledges scen… kilo…  2035 3587.
# ℹ 199 more rows
read_iea_EV_table <-
  function(mineral_name_range, mineral_info_range) {
    mineral_name <-
      read_EV_sheet(range = mineral_name_range) |>
      pull()
    
    mineral_info <- read_EV_sheet(range = mineral_info_range)
    
    mineral_info_col_names <- names(mineral_info)
    
    mineral_info_long <- mineral_info |>
      rename(indicator = `...1`) |>
      pivot_longer(cols = -indicator,
                   names_to = "mineral_info_col_names") |>
      add_column(mineral_name)
    
    combined_data <- mineral_info_long |>
      left_join(sheet_headers_and_col_names, by = join_by(mineral_info_col_names)) |>
      # filter out what were empty columns (where years are NA)
      filter(!is.na(year)) |>
      rename(issue = mineral_name) |>
      # case_when is supercharged if else
      mutate(
        unit = case_when(
          indicator == "Share of clean technologies in total demand" ~ "Percent",
          .default = "kiloton"
        ),
        # convert the year column from character to numeric
        year = as.integer(year)
      ) |>
      select(issue, indicator, scenario, unit, year, value)
    
    combined_data
  }
ConstrainedNickelSupply_table <- read_iea_EV_table(
  mineral_name_range = "A7",
  mineral_info_range = "A8:W19"
)
New names:
New names:
• `` -> `...1`
WiderUseofSiliconRichAnodes_table <- read_iea_EV_table(
  mineral_name_range = "A22",
  mineral_info_range = "A23:W34"
)
New names:
New names:
• `` -> `...1`
FasterUptakeofSolidStateBatteries_table <- read_iea_EV_table(
  mineral_name_range = "A37",
  mineral_info_range = "A38:W49"
)
New names:
New names:
• `` -> `...1`
LowerBatterySizes_table <- read_iea_EV_table(
  mineral_name_range = "A52",
  mineral_info_range = "A53:W64"
)
New names:
New names:
• `` -> `...1`
LimitedBatterySizeReduction_table <- read_iea_EV_table(
  mineral_name_range = "A67",
  mineral_info_range = "A68:W79"
)
New names:
New names:
• `` -> `...1`
final_iea_EV_table <- ConstrainedNickelSupply_table |> 
  bind_rows(WiderUseofSiliconRichAnodes_table) |> 
  bind_rows(FasterUptakeofSolidStateBatteries_table) |>
  bind_rows(LowerBatterySizes_table) |>
  bind_rows(LimitedBatterySizeReduction_table)

final_iea_EV_table
# A tibble: 1,140 × 6
   issue                     indicator scenario                unit   year value
   <chr>                     <chr>     <chr>                   <chr> <int> <dbl>
 1 Constrained nickel supply Copper    Current Year            kilo…  2022  381.
 2 Constrained nickel supply Copper    Stated policies scenar… kilo…  2025  617.
 3 Constrained nickel supply Copper    Stated policies scenar… kilo…  2030 1389.
 4 Constrained nickel supply Copper    Stated policies scenar… kilo…  2035 1736.
 5 Constrained nickel supply Copper    Stated policies scenar… kilo…  2040 2233.
 6 Constrained nickel supply Copper    Stated policies scenar… kilo…  2045 2418.
 7 Constrained nickel supply Copper    Stated policies scenar… kilo…  2050 2313.
 8 Constrained nickel supply Copper    Announced pledges scen… kilo…  2025  732.
 9 Constrained nickel supply Copper    Announced pledges scen… kilo…  2030 2113.
10 Constrained nickel supply Copper    Announced pledges scen… kilo…  2035 3587.
# ℹ 1,130 more rows

Visualizations

library(ggplot2)

stated_policy <- final_iea_EV_table |>
  filter(scenario == "Stated policies scenario") |>
  filter(indicator == "Copper" | indicator == "Cobalt") 
  
ggplot(stated_policy)+
  aes(x= year, y = value , color = indicator) +
  geom_point(shape = "circle", size = 1.5) +
  geom_smooth(span = 0.8) +
  labs(x = "Year", y = "Demand for electric vehicles (kt)", title = "Mineral demand for electric vehicles (kt) for Copper and Cobalt ", subtitle = "Stated Policies Scenario, 2022-2050") +
  scale_color_hue(direction = 2) +
  scale_x_continuous(trans = "log10") +
  scale_y_continuous(trans = "log10") +
  theme_minimal() +
  theme(plot.title = element_text(size = 12))
`geom_smooth()` using method = 'loess' and formula = 'y ~ x'
Warning: Removed 12 rows containing non-finite values (`stat_smooth()`).
Warning: Removed 12 rows containing missing values (`geom_point()`).

Copper_Cobalt <- final_iea_EV_table |>
  filter(indicator == "Copper" | indicator == "Cobalt")

ggplot(Copper_Cobalt, aes(x = scenario, y = value, fill = factor(indicator))) +
  geom_boxplot(fill = "skyblue", color = "black") + 
  labs(x = "Scenario", y = "Value", title = "Boxplot of Copper and Cobalt by Scenario") +
  facet_wrap(~factor(indicator)) + 
  theme(axis.text.x = element_text(angle = 30, hjust = 1)) 
Warning: Removed 24 rows containing non-finite values (`stat_boxplot()`).

Copper_futureScenario <- final_iea_EV_table |>
  filter(indicator == "Copper") |>
  filter(scenario != "Current Year")

ggplot(Copper_futureScenario, aes(x = year, y = value, color = scenario)) +
  geom_smooth(alpha = 0.5, span = 0.8) +
  labs(x = "Year", y = "Copper Demand", title = "Copper Demand Trend") +
  facet_wrap(~scenario, scales = "free_y", ncol = 1) +  
  theme(axis.text.x = element_text(angle = 30, hjust = 1),
        plot.title = element_text(size = 12),
        strip.text = element_text(size = 8))
`geom_smooth()` using method = 'loess' and formula = 'y ~ x'
Warning: Removed 12 rows containing non-finite values (`stat_smooth()`).

Cobalt_futureScenario <- final_iea_EV_table |>
  filter(indicator == "Cobalt") |>
  filter(scenario != "Current Year")

ggplot(Cobalt_futureScenario, aes(x = year, y = value, color = scenario)) +
  geom_smooth(alpha = 0.5, span = 0.8) +
  labs(x = "Year", y = "Cobalt Demand", title = "Cobalt Demand Trend") +
  facet_wrap(~scenario, scales = "free_y", ncol = 1) +  
  theme(axis.text.x = element_text(angle = 30, hjust = 1),
        plot.title = element_text(size = 12),
        strip.text = element_text(size = 8))
`geom_smooth()` using method = 'loess' and formula = 'y ~ x'
Warning: Removed 12 rows containing non-finite values (`stat_smooth()`).

Copper <- final_iea_EV_table |>
  filter(indicator == "Copper")

ggplot(Copper, aes(x = year, y = value, color = scenario)) +
  geom_point(alpha = 0.6) +
  geom_line() +
  labs(x = "Year", y = "Copper Demand", title = "Copper Demand by Scenario") +
  facet_wrap(~scenario, scales = "free_y", ncol = 2) +  
  theme(axis.text.x = element_text(angle = 30, hjust = 1),
        plot.title = element_text(size = 12),
        strip.text = element_text(size = 8))  
Warning: Removed 12 rows containing missing values (`geom_point()`).
Warning: Removed 2 rows containing missing values (`geom_line()`).