TidyTuesday: National Science Foundation Grant Terminations under the Trump Administration

Author

Joseph Buoro

Published

May 8, 2025

0.1 Project Questions

The questions I sought to answer using the data on National Science Foundation Grant Terminations under the Trump Administration include:

  • How many grants, and how much money, were terminated by state or congressional district? What institutions? How can you present these on a map?

  • Grants from what directorates, divisions, or programs made up most of the projects terminated?

  • What topics or terms are most common in project titles or abstracts?

Code
library(tidytuesdayR)
library(tidyverse)
library(tmap)
library(gt)
library(sf)
library(patchwork)
library(thematic)
thematic_on()

Loading and cleaning data

Code
tt_data <- tt_load(last_tuesday())
raw_nsf_terminations <- tt_data$nsf_terminations

nsf_terminations <- raw_nsf_terminations |> 
  janitor::clean_names() |> 
  mutate(usaspending_obligated = stringi::stri_replace_first_fixed(usaspending_obligated, "$", "") |> 
           readr::parse_number()) |> 
  mutate(in_cruz_list = !is.na(in_cruz_list)) |> 
  mutate(grant_number = as.character(grant_number))

nsf_terminations <- nsf_terminations |> 
  select(project_title, termination_letter_date,
         org_name, org_city, org_state, org_district,
         org_zip, usaspending_obligated, award_type,
         directorate_abbrev, directorate,
         division, nsf_program_name, nsf_startdate,
         nsf_expected_end_date, in_cruz_list)

Checking for missing values

Code
nsf_NAs <- nsf_terminations |> 
  map_df(is.na) |> 
  map_df(sum) |> 
  pivot_longer(
    cols = everything(),
    names_to = 'var',
    values_to = 'mising'
  )
gt(nsf_NAs) |> 
  cols_label(
    var = 'Variables',
    mising = 'Number of Missing Values'
  ) |> 
  tab_header(
    title = 'Variables with Missing Values'
  ) |> 
  opt_interactive(
    page_size_default = 5
  )
Variables with Missing Values

Five variables had missing values, which will be dealt with on a case-by-case basis.

0.1.1 Number of Grants Terminated and Amount Involved

A total of 1041 were terminated, involving a total grant amount of $613,263,199. Note that the amounts involved in two of the terminated grants were missing in the data.

Code
daily_amounts <- nsf_terminations |> 
  filter(!is.na(usaspending_obligated)) |> 
  group_by(termination_letter_date) |> 
  summarise(num_of_grants = n(),
            amounts = sum(usaspending_obligated))

gt(daily_amounts) |> 
  cols_label(
    termination_letter_date = 'Dates of Termination',
    num_of_grants = 'Number of Grants',
    amounts = 'Amounts'
  ) |> 
  tab_header(
    title = md('**Dates and Total Amounts of Granted Terminated**')
  ) |> 
  fmt_currency(
    columns = amounts
  ) |> 
  grand_summary_rows(
    columns = c(amounts, num_of_grants),
    fns = list(
      list(label = md('**Total**'), fn = 'sum')
    ),
    fmt = list(amounts ~ fmt_currency(., use_seps = TRUE),
               num_of_grants ~ fmt_number(., use_seps = TRUE, 
                                          decimals = 0))
  ) |> 
  cols_align(
    align = 'left',
    columns = everything()
  )
Dates and Total Amounts of Granted Terminated
Dates of Termination Number of Grants Amounts
2025-04-18 388 $236,197,023.00
2025-04-21 1 $641,684.00
2025-04-22 1 $599,999.00
2025-04-25 649 $375,824,493.00
Total 1,039 613,263,199

0.1.2 States and Locations of Institutions Affected

Code
us_states <- read_sf('US_states.shp') |> 
  filter(!STUSPS %in% c('RI', 'PR', 'AK', 'HI')) |> 
  select(org_state = STUSPS, states = NAME, geometry)|> 
  left_join(nsf_terminations |> 
  group_by(org_state) |> 
  summarise(Amounts = sum(usaspending_obligated, na.rm = TRUE)))

tm_shape(us_states)+
  tm_polygons('Amounts')+
  tm_layout(legend.position = c('right', 'bottom'),
            main.title = "Grants Terminations by States",
            main.title.fontface = 'bold')+
  tm_text('org_state')

0.1.3 Grants Terminations by Award Type

Code
award1 <- nsf_terminations |> 
  group_by(award_type) |> 
  summarise(num_of_grants = n()) |> 
  ggplot(aes(x = reorder(award_type, num_of_grants), y = num_of_grants)) +
  geom_col(fill = 'darkgreen') +
  labs(x = "", y = "", title = "Number of Grants Termination by Award Type") +
  coord_flip()+
  theme_minimal()+
  theme(plot.title.position = 'plot')

award2 <- nsf_terminations |> 
  group_by(award_type) |> 
  summarise(Amounts = sum(usaspending_obligated, na.rm = TRUE)) |> 
  ggplot(aes(x = reorder(award_type, Amounts), y = Amounts)) +
  geom_col(fill = 'darkgreen') +
  labs(x = "", y = "", title = "Grants Amounts Terminated by Award Type") +
  scale_y_continuous(labels = scales::dollar_format(scale = 1/1e6, 
                                                  suffix = "M"))+
  coord_flip()+
  theme_minimal()+
  theme(plot.title.position = 'plot')

award1/award2

0.1.4 Grants Terminations by Directorates

Code
dir1 <- nsf_terminations |> 
  filter(!is.na(directorate_abbrev)) |> 
  group_by(directorate_abbrev) |> 
  summarise(num_of_grants = n()) |> 
  ggplot(aes(x = reorder(directorate_abbrev, num_of_grants), y = num_of_grants)) +
  geom_col(fill = 'darkblue') +
  labs(x = "", y = "", title = "Number of Grants Termination by Directorates") +
  coord_flip()+
  theme_minimal()+
  theme(plot.title.position = 'plot')

dir2 <- nsf_terminations |> 
  filter(!is.na(directorate_abbrev)) |> 
  group_by(directorate_abbrev) |> 
  summarise(Amounts = sum(usaspending_obligated, na.rm = TRUE)) |> 
  ggplot(aes(x = reorder(directorate_abbrev, Amounts), y = Amounts)) +
  geom_col(fill = 'darkblue') +
  labs(x = "", y = "", title = "Grants Amounts Terminated by Directorates") +
  scale_y_continuous(labels = scales::dollar_format(scale = 1/1e6, 
                                                  suffix = "M"))+
  coord_flip()+
  theme_minimal()+
  theme(plot.title.position = 'plot')

dir1/dir2

0.1.5 Grant Terminations by Programmes

Code
prog1 <- nsf_terminations |> 
  filter(!is.na(nsf_program_name)) |> 
  group_by(nsf_program_name) |> 
  summarise(num_of_prog = n()) |> 
  arrange(desc(num_of_prog)) |> 
  head(10) |> 
  ggplot(aes(x = reorder(nsf_program_name, num_of_prog), y = num_of_prog))+
  geom_col(fill = 'darkorange')+
  labs(x = '', y = '', title = 'Top 10 Most Affected Programs by Numbers')+
  coord_flip()+
  theme_minimal()+
  theme(plot.title.position = 'plot')

prog2 <- nsf_terminations |> 
  filter(!is.na(nsf_program_name)) |> 
  group_by(nsf_program_name) |> 
  summarise(Amounts = sum(usaspending_obligated)) |> 
  arrange(desc(Amounts)) |> 
  head(10) |> 
  ggplot(aes(x = reorder(nsf_program_name, Amounts), y = Amounts))+
  geom_col(fill = 'darkorange')+
  labs(x = '', y = '', title = 'Top 10 Most Affected Programs by Grants Amount Terminated')+
  scale_y_continuous(labels = scales::dollar_format(scale = 1/1e6, 
                                                  suffix = "M"))+
  coord_flip()+
  theme_minimal()+
  theme(plot.title.position = 'plot')
  

prog1/prog2

0.1.6 Most Mentioned Terms In Titles of Terminated Projects

Code
library(tm)
library(wordcloud)

proj <- Corpus(VectorSource(nsf_terminations$project_title)) |> 
  tm_map(content_transformer(tolower)) |> 
  tm_map(removeNumbers) |> 
  tm_map(removeWords, stopwords('english')) |> 
  tm_map(removePunctuation) |> 
  tm_map(stripWhitespace)

wordcloud(proj,
       scale=c(3,0.5),     # Set min and max scale
       max.words=100,       # Set top n words
       random.order=FALSE, # Words in decreasing freq
       rot.per=0.4,       # % of vertical words
       use.r.layout=TRUE, # Use C++ collision detection
       colors=brewer.pal(8,"Spectral")) #colours