Data 608 Story 7

Where do Strategic Minerals Come From

Author

Yana Rabkova

Instructions

The US Geological Survey publishes a list of Strategic Minerals ( https://www.usgs.gov/news/national-news-release/us-geological-survey-releases-2022-list-critical-minerals ). Having a secure supply of these minerals is essential to our security and economic prosperity. However many of these minerals are sourced from outside of the US. This assignment is to develop a reference catalog of the source or sources of each of these minerals and a judgement on the reliability of each source under stressed circumstance (e.g. war, economic crisis, etc.)

Notes:

You will need to identify a source or sources for each of the minerals in the 2022 List of Critical Minerals
You will need to categorize each source country as an ally, a competitor or a neutral party.
You will need to develop data visualizations that tell the story of source dependency and shortfall impact.
This assignment is due at the end of week fourteen of the semester

2025 List:https://www.usgs.gov/programs/mineral-resources-program/science/about-2025-list-critical-minerals

#load libraries
library(readr)
library(tidyverse)

── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
✔ dplyr     1.1.4     ✔ purrr     1.0.2
✔ forcats   1.0.0     ✔ stringr   1.5.1
✔ ggplot2   4.0.0     ✔ tibble    3.2.1
✔ lubridate 1.9.3     ✔ tidyr     1.3.1
── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
✖ dplyr::filter() masks stats::filter()
✖ dplyr::lag()    masks stats::lag()
ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors

library(ggplot2)
library(scales)


Attaching package: 'scales'

The following object is masked from 'package:purrr':

    discard

The following object is masked from 'package:readr':

    col_factor

library(RColorBrewer)
library(gridExtra)


Attaching package: 'gridExtra'

The following object is masked from 'package:dplyr':

    combine

# 1. LOAD ALL DATASETS
# Dataset 1: Net Import Reliance
net_import <- read.csv("MCS2025_Fig2_Net_Import_Reliance.csv", 
                       stringsAsFactors = FALSE)

# Dataset 2: Major Import Sources by Country
import_sources_country <- read.csv("MCS2025_Fig3_Major_Import_Sources.csv",
                                   stringsAsFactors = FALSE)

# Dataset 3: Critical Minerals Salient Statistics
critical_salient <- read.csv("MCS2025_T5_Critical_Minerals_Salient.csv",
                             stringsAsFactors = FALSE)

# Dataset 4: End Use Applications
end_use <- read.csv("MCS2025_T4_Critical_Minerals_End_Use.csv",
                    stringsAsFactors = FALSE)



# Function to parse comma-separated countries
parse_countries <- function(country_string) {
  if (is.na(country_string) || country_string == "") {
    return(character(0))
  }
  countries <- str_split(country_string, ",")[[1]]
  countries <- str_trim(countries)
  return(countries)
}

# Create expanded dataset with one row per mineral-country pair
mineral_country_pairs <- net_import %>%
  rowwise() %>%
  mutate(
    countries_list = list(parse_countries(Major_Import_Sources_2020_2023))
  ) %>%
  unnest(countries_list) %>%
  rename(country = countries_list) %>%
  group_by(Commodity) %>%
  mutate(
    import_rank = row_number(),  # 1 = primary source, 2 = secondary, etc.
    import_reliance_numeric = case_when(
      Net_Import_Reliance_pct_2024 == "100" ~ 100,
      Net_Import_Reliance_pct_2024 == ">95" ~ 97.5,
      Net_Import_Reliance_pct_2024 == ">75" ~ 80,
      Net_Import_Reliance_pct_2024 == ">50" ~ 60,
      Net_Import_Reliance_pct_2024 == "<50" ~ 40,
      Net_Import_Reliance_pct_2024 == "<25" ~ 20,
      Net_Import_Reliance_pct_2024 == "E" ~ NA_real_,
      TRUE ~ as.numeric(Net_Import_Reliance_pct_2024)
    )
  ) %>%
  ungroup()

Warning: There were 15 warnings in `mutate()`.
The first warning was:
ℹ In argument: `import_reliance_numeric = case_when(...)`.
ℹ In group 1: `Commodity = "ABRASIVES, fused aluminum oxide"`.
Caused by warning:
! NAs introduced by coercion
ℹ Run `dplyr::last_dplyr_warnings()` to see the 14 remaining warnings.

# STEP 1: CLEAN DATA - FILTER TO CRITICAL MINERALS ONLY


# List of critical minerals to keep (based on 2025 list)
critical_minerals_keep <- c(
  "ALUMINUM",
  "ALUMINA",
  "ANTIMONY, metal and oxide",
  "ARSENIC, all forms",
  "BARITE",
  "BISMUTH, metal, alloys, and scrap",
  "CESIUM",
  "CHROMIUM, all forms ",
  "COBALT, metal, oxides, and salts",
  "FLUORSPAR",
  "GALLIUM, metal",
  "GERMANIUM",
  "GRAPHITE (NATURAL)",
  "INDIUM",
  "LITHIUM",
  "MAGNESIUM COMPOUNDS",
  "MAGNESIUM METAL",
  "MANGANESE",
  "NICKEL",
  "NIOBIUM (COLUMBIUM)",
  "PALLADIUM",
  "PLATINUM",
  "RARE EARTHS, compounds and metals",
  "RUBIDIUM",
  "SCANDIUM",
  "TANTALUM",
  "TELLURIUM",
  "TIN, refined",
  "TITANIUM MINERAL CONCENTRATES",
  "TITANIUM, sponge metal",
  "TUNGSTEN",
  "VANADIUM",
  "YTTRIUM, compounds",
  "ZINC, refined",
  "ZIRCONIUM, ores and concentrates"
)

# Filter to critical minerals only
net_import <- net_import %>%
  filter(Commodity %in% critical_minerals_keep)

mineral_country_pairs <- mineral_country_pairs %>%
  filter(Commodity %in% critical_minerals_keep)

cat("Filtered to", length(unique(net_import$Commodity)), "critical minerals\n\n")

Filtered to 35 critical minerals

# =============================================================================
# STEP 2: CLEAN MINERAL NAMES FOR DISPLAY
# =============================================================================

clean_mineral_name <- function(name) {
  clean <- name %>%
    str_remove(",.*$") %>%              # Remove everything after comma
    str_remove("\\(.*\\)") %>%          # Remove parentheses and contents
    str_trim() %>%                      # Remove extra whitespace
    str_to_title()                      # Convert to title case
  
  return(clean)
}

# Add cleaned names to datasets
mineral_country_pairs <- mineral_country_pairs %>%
  mutate(Commodity_clean = clean_mineral_name(Commodity))

# =============================================================================
# STEP 3: COUNTRY CLASSIFICATION
# =============================================================================

classify_country <- function(country) {
  allies <- c(
    "Albania", "Belgium", "Bulgaria", "Canada", "Croatia", "Czechia",
    "Denmark", "Estonia", "Finland", "France", "Germany", "Greece", 
    "Iceland", "Italy", "Latvia", "Lithuania", "Luxembourg", 
    "Netherlands", "Norway", "Poland", "Portugal", "Romania", 
    "Slovenia", "Spain", "Sweden", "Turkey", "United Kingdom",
    "Australia", "Japan", "Republic of Korea", "New Zealand", "Israel",
    "Argentina", "Bahrain", "Brazil", "Chile", "Egypt", "Jordan", 
    "Mexico", "Morocco", "Peru", "Philippines", "Jamaica", 
    "Austria", "Ireland", "Senegal", "Madagascar"
  )
  
  competitors <- c(
    "China", "Russia", "Iran", "North Korea", "Venezuela", "Cuba",
    "Belarus", "Syria", "Myanmar", "Zimbabwe", "China3"
  )
  
  country_clean <- str_trim(country)
  
  if (country_clean %in% allies) {
    return("Ally")
  } else if (country_clean %in% competitors) {
    return("Competitor")
  } else {
    return("Neutral")
  }
}

# Add classification
mineral_country_pairs <- mineral_country_pairs %>%
  mutate(classification = sapply(country, classify_country))

# Create mineral summary
mineral_summary <- mineral_country_pairs %>%
  group_by(Commodity_clean, import_reliance_numeric) %>%
  summarise(
    n_allies = sum(classification == "Ally"),
    n_competitors = sum(classification == "Competitor"),
    n_neutrals = sum(classification == "Neutral"),
    total_sources = n(),
    primary_source = first(country),
    primary_classification = first(classification),
    .groups = "drop"
  ) %>%
  mutate(
    vulnerability_score = (n_competitors * 3 + n_neutrals * 1.5) / 
                         (n_allies + 0.1) * (import_reliance_numeric / 100)
  )

# PLOT 1: Import Reliance Distribution
import_categories <- mineral_country_pairs %>%
  select(Commodity_clean, import_reliance_numeric) %>%
  distinct() %>%
  filter(!is.na(import_reliance_numeric)) %>%
  mutate(
    reliance_category = case_when(
      import_reliance_numeric == 100 ~ "100%",
      import_reliance_numeric >= 75 ~ "75-99%",
      import_reliance_numeric >= 50 ~ "50-74%",
      import_reliance_numeric >= 25 ~ "25-49%",
      TRUE ~ "<25%"
    ),
    reliance_category = factor(reliance_category, 
                               levels = c("100%", "75-99%", "50-74%", "25-49%", "<25%"))
  ) %>%
  count(reliance_category) %>%
  complete(reliance_category, fill = list(n = 0))

plot1 <- ggplot(import_categories, 
                aes(x = reliance_category, y = n, fill = reliance_category)) +
  geom_col(width = 0.65, show.legend = FALSE) +
  geom_text(aes(label = n), vjust = -0.3, size = 5, fontface = "bold") +
  scale_fill_manual(
    values = c("100%" = "red",
               "75-99%" = "grey",
               "50-74%" = "grey",
               "25-49%" = "grey",
               "<25%" = "grey")
  ) +
  scale_y_continuous(expand = expansion(mult = c(0, 0.12))) +
  labs(
    title = "U.S. Import Dependence for Critical Minerals",
    x = "Import Reliance",
    y = "Number of Minerals"
  ) +
  theme_minimal(base_size = 13) +
  theme(
    plot.title = element_text(face = "bold", size = 15, hjust = 0.5, margin = margin(b = 20)),
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank(),
    axis.title = element_text(size = 12)
  )

print(plot1)

# PLOT 2: Source Country Classification
source_classification <- mineral_summary %>%
  count(primary_classification) %>%
  mutate(
    percentage = n / sum(n) * 100,
    primary_classification = factor(primary_classification, 
                                   levels = c("Ally", "Neutral", "Competitor"))
  )

plot2 <- ggplot(source_classification,
                aes(x = primary_classification, y = n, fill = primary_classification)) +
  geom_col(width = 0.6, show.legend = FALSE) +
  geom_text(aes(label = paste0(n, "\n(", round(percentage, 0), "%)")), 
            vjust = -0.3, size = 3, fontface = "bold") +
  scale_fill_manual(
    values = c("Ally" = "darkgreen", "Neutral" = "#fee08b", "Competitor" = "#d73027")
  ) +
  scale_y_continuous(expand = expansion(mult = c(0, 0.15))) +
  labs(
    title = "Allies are Primary Import Sources",
    subtitle = "This is based on the data for 35 critical minerals", 
    x = "Country Classification",
    y = "Number of Minerals"
  ) +
  theme_minimal(base_size = 13) +
  theme(
    plot.title = element_text(face = "bold", size = 15, hjust = 0.5, margin = margin(b = 20)),
    plot.subtitle = element_text(color = "darkgrey"), 
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank(),
    axis.title = element_text(size = 12)
  )

print(plot2)

# PLOT 3: Top Source Countries
top_sources <- mineral_country_pairs %>%
  count(country, classification, sort = TRUE) %>%
  head(10) %>%
  mutate(country = fct_reorder(country, n))

plot3 <- ggplot(top_sources, 
                aes(x = country, y = n, fill = classification)) +
  geom_col(width = 0.7) +
  geom_text(aes(label = n), hjust = -0.2, size = 4, fontface = "bold") +
  coord_flip() +
  scale_fill_manual(
    values = c("Ally" = "darkgreen", "Neutral" = "#fee08b", "Competitor" = "#d73027"),
    name = NULL
  ) +
  scale_y_continuous(expand = expansion(mult = c(0, 0.12))) +
  labs(
    title = "Top 10 Source Countries for Critical Minerals",
    x = NULL,
    y = "Number of Minerals Supplied"
  ) +
  theme_minimal(base_size = 13) +
  theme(
    plot.title = element_text(face = "bold", size = 15, hjust = 0.5, margin = margin(b = 20)),
    legend.position = "bottom",
    legend.text = element_text(size = 11),
    panel.grid.major.y = element_blank(),
    panel.grid.minor = element_blank(),
    axis.title.x = element_text(size = 12)
  )

print(plot3)

# PLOT 4 China Dependency WITH Applications
# Shows import reliance AND what each mineral is used for

# Get China-dependent minerals
china_minerals <- mineral_country_pairs %>%
  filter(country == "China" | country == "China3") %>%
  filter(import_rank == 1) %>%
  select(Commodity_clean, import_reliance_numeric) %>%
  distinct() %>%
  arrange(desc(import_reliance_numeric)) %>%
  head(15)

# Add applications from end_use
china_with_apps <- china_minerals %>%
  left_join(
    end_use %>% select(Critical.Mineral, Primary.Applications),
    by = c("Commodity_clean" = "Critical.Mineral")
  ) %>%
  mutate(
    Applications = case_when(
      !is.na(Primary.Applications) ~ Primary.Applications,
      Commodity_clean == "Rare Earths" ~ "Batteries, magnets, defense",
      Commodity_clean == "Magnesium" ~ "Metallurgy, alloys",
      Commodity_clean == "Yttrium" ~ "Phosphors, lasers",
      TRUE ~ "Multiple uses"
    ),
    # Shorten applications for display
    Apps_short = case_when(
      str_length(Applications) > 35 ~ paste0(str_sub(Applications, 1, 32), "..."),
      TRUE ~ Applications
    ),
    Commodity_clean = fct_reorder(Commodity_clean, import_reliance_numeric)
  )

# Create plot with applications labeled
plot4_with_apps <- ggplot(china_with_apps,
                          aes(x = Commodity_clean, y = import_reliance_numeric)) +
  geom_col(width = 0.7, fill = "#fee5d9") +
  # Add percentage labels
  geom_text(aes(label = paste0(round(import_reliance_numeric, 0), "%")), 
            hjust = 1.1, size = 3.5, fontface = "bold", color = "black") +
  # Add application labels OUTSIDE the bars
  geom_text(aes(label = Apps_short, y = 0), 
            hjust = -0.05, size = 3, color = "gray30") +
  coord_flip() +
  scale_y_continuous(labels = percent_format(scale = 1),
                     limits = c(0, 105),  # Extended to make room for labels
                     expand = c(0, 0)) +
  labs(
    title = "U.S. Dependency on China as Primary Source",
    subtitle = "Import reliance and primary applications for each mineral",
    x = NULL,
    y = "U.S. Import Reliance"
  ) +
  theme_minimal(base_size = 13) +
  theme(
    plot.title = element_text(face = "bold", size = 15, hjust = 0.5, margin = margin(b = 5)),
    plot.subtitle = element_text(size = 11, hjust = 0.5, margin = margin(b = 20)),
    panel.grid.major.y = element_blank(),
    panel.grid.minor = element_blank(),
    axis.text.y = element_text(size = 11),
    axis.title.x = element_text(size = 12)
  )

print(plot4_with_apps)

I have examined 35 critical minerals(which is only half of the critical minerals) and found serious problems with America’s supply chains. We’re completely dependent on imports for 15 of these minerals, and China is our main supplier for most of them. This is a big problem because these aren’t just random materials, they’re essential for our military equipment, computer chips, batteries, and advanced technology.