TidyTuesday: National Science Foundation Grant Terminations under the Trump Administration

Author

Joseph Buoro

Published

May 8, 2025

0.1 Project Questions

The questions I sought to answer using the data on National Science Foundation Grant Terminations under the Trump Administration include:

How many grants, and how much money, were terminated by state or congressional district? What institutions? How can you present these on a map?
Grants from what directorates, divisions, or programs made up most of the projects terminated?
What topics or terms are most common in project titles or abstracts?

Code

library(tidytuesdayR)
library(tidyverse)
library(tmap)
library(gt)
library(sf)
library(patchwork)
library(thematic)
thematic_on()

Loading and cleaning data

Code

tt_data <- tt_load(last_tuesday())
raw_nsf_terminations <- tt_data$nsf_terminations

nsf_terminations <- raw_nsf_terminations |> 
  janitor::clean_names() |> 
  mutate(usaspending_obligated = stringi::stri_replace_first_fixed(usaspending_obligated, "$", "") |> 
           readr::parse_number()) |> 
  mutate(in_cruz_list = !is.na(in_cruz_list)) |> 
  mutate(grant_number = as.character(grant_number))

nsf_terminations <- nsf_terminations |> 
  select(project_title, termination_letter_date,
         org_name, org_city, org_state, org_district,
         org_zip, usaspending_obligated, award_type,
         directorate_abbrev, directorate,
         division, nsf_program_name, nsf_startdate,
         nsf_expected_end_date, in_cruz_list)

Checking for missing values

Code

nsf_NAs <- nsf_terminations |> 
  map_df(is.na) |> 
  map_df(sum) |> 
  pivot_longer(
    cols = everything(),
    names_to = 'var',
    values_to = 'mising'
  )
gt(nsf_NAs) |> 
  cols_label(
    var = 'Variables',
    mising = 'Number of Missing Values'
  ) |> 
  tab_header(
    title = 'Variables with Missing Values'
  ) |> 
  opt_interactive(
    page_size_default = 5
  )

Variables with Missing Values

Five variables had missing values, which will be dealt with on a case-by-case basis.

0.1.1 Number of Grants Terminated and Amount Involved

A total of 1041 were terminated, involving a total grant amount of $613,263,199. Note that the amounts involved in two of the terminated grants were missing in the data.

Code

daily_amounts <- nsf_terminations |> 
  filter(!is.na(usaspending_obligated)) |> 
  group_by(termination_letter_date) |> 
  summarise(num_of_grants = n(),
            amounts = sum(usaspending_obligated))

gt(daily_amounts) |> 
  cols_label(
    termination_letter_date = 'Dates of Termination',
    num_of_grants = 'Number of Grants',
    amounts = 'Amounts'
  ) |> 
  tab_header(
    title = md('**Dates and Total Amounts of Granted Terminated**')
  ) |> 
  fmt_currency(
    columns = amounts
  ) |> 
  grand_summary_rows(
    columns = c(amounts, num_of_grants),
    fns = list(
      list(label = md('**Total**'), fn = 'sum')
    ),
    fmt = list(amounts ~ fmt_currency(., use_seps = TRUE),
               num_of_grants ~ fmt_number(., use_seps = TRUE, 
                                          decimals = 0))
  ) |> 
  cols_align(
    align = 'left',
    columns = everything()
  )

Dates and Total Amounts of Granted Terminated
	Dates of Termination	Number of Grants	Amounts
	2025-04-18	388	$236,197,023.00
	2025-04-21	1	$641,684.00
	2025-04-22	1	$599,999.00
	2025-04-25	649	$375,824,493.00
Total	—	1,039	613,263,199

0.1.2 States and Locations of Institutions Affected

Code

us_states <- read_sf('US_states.shp') |> 
  filter(!STUSPS %in% c('RI', 'PR', 'AK', 'HI')) |> 
  select(org_state = STUSPS, states = NAME, geometry)|> 
  left_join(nsf_terminations |> 
  group_by(org_state) |> 
  summarise(Amounts = sum(usaspending_obligated, na.rm = TRUE)))

tm_shape(us_states)+
  tm_polygons('Amounts')+
  tm_layout(legend.position = c('right', 'bottom'),
            main.title = "Grants Terminations by States",
            main.title.fontface = 'bold')+
  tm_text('org_state')

0.1.3 Grants Terminations by Award Type

Code

award1 <- nsf_terminations |> 
  group_by(award_type) |> 
  summarise(num_of_grants = n()) |> 
  ggplot(aes(x = reorder(award_type, num_of_grants), y = num_of_grants)) +
  geom_col(fill = 'darkgreen') +
  labs(x = "", y = "", title = "Number of Grants Termination by Award Type") +
  coord_flip()+
  theme_minimal()+
  theme(plot.title.position = 'plot')

award2 <- nsf_terminations |> 
  group_by(award_type) |> 
  summarise(Amounts = sum(usaspending_obligated, na.rm = TRUE)) |> 
  ggplot(aes(x = reorder(award_type, Amounts), y = Amounts)) +
  geom_col(fill = 'darkgreen') +
  labs(x = "", y = "", title = "Grants Amounts Terminated by Award Type") +
  scale_y_continuous(labels = scales::dollar_format(scale = 1/1e6, 
                                                  suffix = "M"))+
  coord_flip()+
  theme_minimal()+
  theme(plot.title.position = 'plot')

award1/award2

0.1.4 Grants Terminations by Directorates

Code

dir1 <- nsf_terminations |> 
  filter(!is.na(directorate_abbrev)) |> 
  group_by(directorate_abbrev) |> 
  summarise(num_of_grants = n()) |> 
  ggplot(aes(x = reorder(directorate_abbrev, num_of_grants), y = num_of_grants)) +
  geom_col(fill = 'darkblue') +
  labs(x = "", y = "", title = "Number of Grants Termination by Directorates") +
  coord_flip()+
  theme_minimal()+
  theme(plot.title.position = 'plot')

dir2 <- nsf_terminations |> 
  filter(!is.na(directorate_abbrev)) |> 
  group_by(directorate_abbrev) |> 
  summarise(Amounts = sum(usaspending_obligated, na.rm = TRUE)) |> 
  ggplot(aes(x = reorder(directorate_abbrev, Amounts), y = Amounts)) +
  geom_col(fill = 'darkblue') +
  labs(x = "", y = "", title = "Grants Amounts Terminated by Directorates") +
  scale_y_continuous(labels = scales::dollar_format(scale = 1/1e6, 
                                                  suffix = "M"))+
  coord_flip()+
  theme_minimal()+
  theme(plot.title.position = 'plot')

dir1/dir2

0.1.5 Grant Terminations by Programmes

Code

prog1 <- nsf_terminations |> 
  filter(!is.na(nsf_program_name)) |> 
  group_by(nsf_program_name) |> 
  summarise(num_of_prog = n()) |> 
  arrange(desc(num_of_prog)) |> 
  head(10) |> 
  ggplot(aes(x = reorder(nsf_program_name, num_of_prog), y = num_of_prog))+
  geom_col(fill = 'darkorange')+
  labs(x = '', y = '', title = 'Top 10 Most Affected Programs by Numbers')+
  coord_flip()+
  theme_minimal()+
  theme(plot.title.position = 'plot')

prog2 <- nsf_terminations |> 
  filter(!is.na(nsf_program_name)) |> 
  group_by(nsf_program_name) |> 
  summarise(Amounts = sum(usaspending_obligated)) |> 
  arrange(desc(Amounts)) |> 
  head(10) |> 
  ggplot(aes(x = reorder(nsf_program_name, Amounts), y = Amounts))+
  geom_col(fill = 'darkorange')+
  labs(x = '', y = '', title = 'Top 10 Most Affected Programs by Grants Amount Terminated')+
  scale_y_continuous(labels = scales::dollar_format(scale = 1/1e6, 
                                                  suffix = "M"))+
  coord_flip()+
  theme_minimal()+
  theme(plot.title.position = 'plot')
  

prog1/prog2

0.1.6 Most Mentioned Terms In Titles of Terminated Projects

Code

library(tm)
library(wordcloud)

proj <- Corpus(VectorSource(nsf_terminations$project_title)) |> 
  tm_map(content_transformer(tolower)) |> 
  tm_map(removeNumbers) |> 
  tm_map(removeWords, stopwords('english')) |> 
  tm_map(removePunctuation) |> 
  tm_map(stripWhitespace)

wordcloud(proj,
       scale=c(3,0.5),     # Set min and max scale
       max.words=100,       # Set top n words
       random.order=FALSE, # Words in decreasing freq
       rot.per=0.4,       # % of vertical words
       use.r.layout=TRUE, # Use C++ collision detection
       colors=brewer.pal(8,"Spectral")) #colours