Code
library(tidytuesdayR)
library(tidyverse)
library(tmap)
library(gt)
library(sf)
library(patchwork)
library(thematic)
thematic_on()The questions I sought to answer using the data on National Science Foundation Grant Terminations under the Trump Administration include:
How many grants, and how much money, were terminated by state or congressional district? What institutions? How can you present these on a map?
Grants from what directorates, divisions, or programs made up most of the projects terminated?
What topics or terms are most common in project titles or abstracts?
library(tidytuesdayR)
library(tidyverse)
library(tmap)
library(gt)
library(sf)
library(patchwork)
library(thematic)
thematic_on()Loading and cleaning data
tt_data <- tt_load(last_tuesday())
raw_nsf_terminations <- tt_data$nsf_terminations
nsf_terminations <- raw_nsf_terminations |>
janitor::clean_names() |>
mutate(usaspending_obligated = stringi::stri_replace_first_fixed(usaspending_obligated, "$", "") |>
readr::parse_number()) |>
mutate(in_cruz_list = !is.na(in_cruz_list)) |>
mutate(grant_number = as.character(grant_number))
nsf_terminations <- nsf_terminations |>
select(project_title, termination_letter_date,
org_name, org_city, org_state, org_district,
org_zip, usaspending_obligated, award_type,
directorate_abbrev, directorate,
division, nsf_program_name, nsf_startdate,
nsf_expected_end_date, in_cruz_list)Checking for missing values
nsf_NAs <- nsf_terminations |>
map_df(is.na) |>
map_df(sum) |>
pivot_longer(
cols = everything(),
names_to = 'var',
values_to = 'mising'
)
gt(nsf_NAs) |>
cols_label(
var = 'Variables',
mising = 'Number of Missing Values'
) |>
tab_header(
title = 'Variables with Missing Values'
) |>
opt_interactive(
page_size_default = 5
)Five variables had missing values, which will be dealt with on a case-by-case basis.
A total of 1041 were terminated, involving a total grant amount of $613,263,199. Note that the amounts involved in two of the terminated grants were missing in the data.
daily_amounts <- nsf_terminations |>
filter(!is.na(usaspending_obligated)) |>
group_by(termination_letter_date) |>
summarise(num_of_grants = n(),
amounts = sum(usaspending_obligated))
gt(daily_amounts) |>
cols_label(
termination_letter_date = 'Dates of Termination',
num_of_grants = 'Number of Grants',
amounts = 'Amounts'
) |>
tab_header(
title = md('**Dates and Total Amounts of Granted Terminated**')
) |>
fmt_currency(
columns = amounts
) |>
grand_summary_rows(
columns = c(amounts, num_of_grants),
fns = list(
list(label = md('**Total**'), fn = 'sum')
),
fmt = list(amounts ~ fmt_currency(., use_seps = TRUE),
num_of_grants ~ fmt_number(., use_seps = TRUE,
decimals = 0))
) |>
cols_align(
align = 'left',
columns = everything()
)| Dates and Total Amounts of Granted Terminated | |||
|---|---|---|---|
| Dates of Termination | Number of Grants | Amounts | |
| 2025-04-18 | 388 | $236,197,023.00 | |
| 2025-04-21 | 1 | $641,684.00 | |
| 2025-04-22 | 1 | $599,999.00 | |
| 2025-04-25 | 649 | $375,824,493.00 | |
| Total | — | 1,039 | 613,263,199 |
us_states <- read_sf('US_states.shp') |>
filter(!STUSPS %in% c('RI', 'PR', 'AK', 'HI')) |>
select(org_state = STUSPS, states = NAME, geometry)|>
left_join(nsf_terminations |>
group_by(org_state) |>
summarise(Amounts = sum(usaspending_obligated, na.rm = TRUE)))
tm_shape(us_states)+
tm_polygons('Amounts')+
tm_layout(legend.position = c('right', 'bottom'),
main.title = "Grants Terminations by States",
main.title.fontface = 'bold')+
tm_text('org_state')award1 <- nsf_terminations |>
group_by(award_type) |>
summarise(num_of_grants = n()) |>
ggplot(aes(x = reorder(award_type, num_of_grants), y = num_of_grants)) +
geom_col(fill = 'darkgreen') +
labs(x = "", y = "", title = "Number of Grants Termination by Award Type") +
coord_flip()+
theme_minimal()+
theme(plot.title.position = 'plot')
award2 <- nsf_terminations |>
group_by(award_type) |>
summarise(Amounts = sum(usaspending_obligated, na.rm = TRUE)) |>
ggplot(aes(x = reorder(award_type, Amounts), y = Amounts)) +
geom_col(fill = 'darkgreen') +
labs(x = "", y = "", title = "Grants Amounts Terminated by Award Type") +
scale_y_continuous(labels = scales::dollar_format(scale = 1/1e6,
suffix = "M"))+
coord_flip()+
theme_minimal()+
theme(plot.title.position = 'plot')
award1/award2dir1 <- nsf_terminations |>
filter(!is.na(directorate_abbrev)) |>
group_by(directorate_abbrev) |>
summarise(num_of_grants = n()) |>
ggplot(aes(x = reorder(directorate_abbrev, num_of_grants), y = num_of_grants)) +
geom_col(fill = 'darkblue') +
labs(x = "", y = "", title = "Number of Grants Termination by Directorates") +
coord_flip()+
theme_minimal()+
theme(plot.title.position = 'plot')
dir2 <- nsf_terminations |>
filter(!is.na(directorate_abbrev)) |>
group_by(directorate_abbrev) |>
summarise(Amounts = sum(usaspending_obligated, na.rm = TRUE)) |>
ggplot(aes(x = reorder(directorate_abbrev, Amounts), y = Amounts)) +
geom_col(fill = 'darkblue') +
labs(x = "", y = "", title = "Grants Amounts Terminated by Directorates") +
scale_y_continuous(labels = scales::dollar_format(scale = 1/1e6,
suffix = "M"))+
coord_flip()+
theme_minimal()+
theme(plot.title.position = 'plot')
dir1/dir2prog1 <- nsf_terminations |>
filter(!is.na(nsf_program_name)) |>
group_by(nsf_program_name) |>
summarise(num_of_prog = n()) |>
arrange(desc(num_of_prog)) |>
head(10) |>
ggplot(aes(x = reorder(nsf_program_name, num_of_prog), y = num_of_prog))+
geom_col(fill = 'darkorange')+
labs(x = '', y = '', title = 'Top 10 Most Affected Programs by Numbers')+
coord_flip()+
theme_minimal()+
theme(plot.title.position = 'plot')
prog2 <- nsf_terminations |>
filter(!is.na(nsf_program_name)) |>
group_by(nsf_program_name) |>
summarise(Amounts = sum(usaspending_obligated)) |>
arrange(desc(Amounts)) |>
head(10) |>
ggplot(aes(x = reorder(nsf_program_name, Amounts), y = Amounts))+
geom_col(fill = 'darkorange')+
labs(x = '', y = '', title = 'Top 10 Most Affected Programs by Grants Amount Terminated')+
scale_y_continuous(labels = scales::dollar_format(scale = 1/1e6,
suffix = "M"))+
coord_flip()+
theme_minimal()+
theme(plot.title.position = 'plot')
prog1/prog2library(tm)
library(wordcloud)
proj <- Corpus(VectorSource(nsf_terminations$project_title)) |>
tm_map(content_transformer(tolower)) |>
tm_map(removeNumbers) |>
tm_map(removeWords, stopwords('english')) |>
tm_map(removePunctuation) |>
tm_map(stripWhitespace)
wordcloud(proj,
scale=c(3,0.5), # Set min and max scale
max.words=100, # Set top n words
random.order=FALSE, # Words in decreasing freq
rot.per=0.4, # % of vertical words
use.r.layout=TRUE, # Use C++ collision detection
colors=brewer.pal(8,"Spectral")) #colours