This document retrieves data from the Nobel Prize API and transforms it into tidy dataframes for exploratory analysis. By examining laureate-level information—including birth countries, prize categories, and award years—we aim to uncover patterns in Nobel recognition across time, geography, and collaboration. The analysis is structured around four key questions to guide interpretation and visualization.
req_packages <- c("DBI","RMySQL","dplyr","dbplyr","knitr","tidyr", "readr", "stringr","tibble", "rmarkdown", "purrr", "lubridate", "here", "httr2", "httr", "RCurl","rvest","xml2","jsonlite","kableExtra", "tidytext", "geniusr","sentimentr","syuzhet","ggplot2", "tidyverse","DT")
for (pkg in req_packages) {
if (!require(pkg, character.only = TRUE)) {
message(paste("Installing package:", pkg))
install.packages(pkg, dependencies = TRUE)
} else {
message(paste(pkg, " already installed."))
}
library(pkg, character.only = TRUE)
}
I retrieve the Nobel Prize Laureate data from the API and load it into a dataframe.
# Pull JSON data from the Nobel Prize API
url <- "https://api.nobelprize.org/2.1/laureates"
raw_data <- fromJSON(url, flatten = TRUE)
# Convert to tibble and inspect structure
laureates_df <- as_tibble(raw_data$laureates)
glimpse(laureates_df)
## Rows: 25
## Columns: 69
## $ id <chr> "745", "102", "779", "259", "1004", "…
## $ fileName <chr> "spence", "bohr", "ciechanover", "klu…
## $ gender <chr> "male", "male", "male", "male", "male…
## $ sameAs <list> <"https://www.wikidata.org/wiki/Q157…
## $ links <list> [<data.frame[2 x 6]>], [<data.frame[…
## $ nobelPrizes <list> [<data.frame[1 x 17]>], [<data.frame…
## $ knownName.en <chr> "A. Michael Spence", "Aage N. Bohr", …
## $ knownName.se <chr> "A. Michael Spence", "Aage N. Bohr", …
## $ givenName.en <chr> "A. Michael", "Aage N.", "Aaron", "Aa…
## $ givenName.se <chr> "A. Michael", "Aage N.", "Aaron", "Aa…
## $ familyName.en <chr> "Spence", "Bohr", "Ciechanover", "Klu…
## $ familyName.se <chr> "Spence", "Bohr", "Ciechanover", "Klu…
## $ fullName.en <chr> "A. Michael Spence", "Aage Niels Bohr…
## $ fullName.se <chr> "A. Michael Spence", "Aage Niels Bohr…
## $ birth.date <chr> "1943-00-00", "1922-06-19", "1947-10-…
## $ birth.place.city.en <chr> "Montclair, NJ", "Copenhagen", "Haifa…
## $ birth.place.city.no <chr> "Montclair, NJ", "København", "Haifa"…
## $ birth.place.city.se <chr> "Montclair, NJ", "Köpenhamn", "Haifa"…
## $ birth.place.country.en <chr> "USA", "Denmark", "British Protectora…
## $ birth.place.country.no <chr> "USA", "Danmark", "British i Palestin…
## $ birth.place.country.se <chr> "USA", "Danmark", "British Protectora…
## $ birth.place.cityNow.en <chr> "Montclair, NJ", "Copenhagen", "Haifa…
## $ birth.place.cityNow.no <chr> "Montclair, NJ", "København", "Haifa"…
## $ birth.place.cityNow.se <chr> "Montclair, NJ", "Köpenhamn", "Haifa"…
## $ birth.place.cityNow.sameAs <list> <"https://www.wikidata.org/wiki/Q678…
## $ birth.place.cityNow.latitude <chr> "40.825930", "55.678127", "32.794421"…
## $ birth.place.cityNow.longitude <chr> "-74.209030", "12.572532", "34.990340…
## $ birth.place.countryNow.en <chr> "USA", "Denmark", "Israel", "Lithuani…
## $ birth.place.countryNow.no <chr> "USA", "Danmark", "Israel", "Litauen"…
## $ birth.place.countryNow.se <chr> "USA", "Danmark", "Israel", "Litauen"…
## $ birth.place.countryNow.sameAs <list> "https://www.wikidata.org/wiki/Q30",…
## $ birth.place.countryNow.latitude <chr> "39.828175", "56.000000", "31.000000"…
## $ birth.place.countryNow.longitude <chr> "-98.579500", "10.000000", "35.000000…
## $ birth.place.continent.en <chr> "North America", "Europe", "Asia", "E…
## $ birth.place.continent.no <chr> "Nord-Amerika", "Europa", "Asia", "Eu…
## $ birth.place.continent.se <chr> "Nordamerika", "Europa", "Asien", "Eu…
## $ birth.place.locationString.en <chr> "Montclair, NJ, USA", "Copenhagen, De…
## $ birth.place.locationString.no <chr> "Montclair, NJ, USA", "København, Dan…
## $ birth.place.locationString.se <chr> "Montclair, NJ, USA", "Köpenhamn, Dan…
## $ wikipedia.slug <chr> "Michael_Spence", "Aage_Bohr", "Aaron…
## $ wikipedia.english <chr> "https://en.wikipedia.org/wiki/Michae…
## $ wikidata.id <chr> "Q157245", "Q103854", "Q233205", "Q19…
## $ wikidata.url <chr> "https://www.wikidata.org/wiki/Q15724…
## $ death.date <chr> NA, "2009-09-08", NA, "2018-11-20", N…
## $ death.place.city.en <chr> NA, "Copenhagen", NA, NA, NA, "Oxford…
## $ death.place.city.no <chr> NA, "København", NA, NA, NA, "Oxford"…
## $ death.place.city.se <chr> NA, "Köpenhamn", NA, NA, NA, "Oxford"…
## $ death.place.country.en <chr> NA, "Denmark", NA, NA, NA, "United Ki…
## $ death.place.country.no <chr> NA, "Danmark", NA, NA, NA, "Storbrita…
## $ death.place.country.se <chr> NA, "Danmark", NA, NA, NA, "Storbrita…
## $ death.place.country.sameAs <chr> NA, "https://www.wikidata.org/wiki/Q3…
## $ death.place.cityNow.en <chr> NA, "Copenhagen", NA, NA, NA, "Oxford…
## $ death.place.cityNow.no <chr> NA, "København", NA, NA, NA, "Oxford"…
## $ death.place.cityNow.se <chr> NA, "Köpenhamn", NA, NA, NA, "Oxford"…
## $ death.place.cityNow.sameAs <list> <NULL>, <"https://www.wikidata.org/w…
## $ death.place.cityNow.latitude <chr> NA, "55.678127", NA, NA, NA, "51.7533…
## $ death.place.cityNow.longitude <chr> NA, "12.572532", NA, NA, NA, "-1.2609…
## $ death.place.countryNow.en <chr> NA, "Denmark", NA, NA, NA, "United Ki…
## $ death.place.countryNow.no <chr> NA, "Danmark", NA, NA, NA, "Storbrita…
## $ death.place.countryNow.se <chr> NA, "Danmark", NA, NA, NA, "Storbrita…
## $ death.place.countryNow.sameAs <list> <NULL>, "https://www.wikidata.org/wi…
## $ death.place.countryNow.latitude <chr> NA, "56.000000", NA, NA, NA, "54.6000…
## $ death.place.countryNow.longitude <chr> NA, "10.000000", NA, NA, NA, "-2.0000…
## $ death.place.continent.en <chr> NA, "Europe", NA, NA, NA, "Europe", N…
## $ death.place.continent.no <chr> NA, "Europa", NA, NA, NA, "Europa", N…
## $ death.place.continent.se <chr> NA, "Europa", NA, NA, NA, "Europa", N…
## $ death.place.locationString.en <chr> NA, "Copenhagen, Denmark", NA, "", NA…
## $ death.place.locationString.no <chr> NA, "København, Danmark", NA, "", NA,…
## $ death.place.locationString.se <chr> NA, "Köpenhamn, Danmark", NA, "", NA,…
#Questions:
category_counts <- laureates_df %>%
unnest(nobelPrizes, names_sep = "_") %>%
count(nobelPrizes_category.en, sort = TRUE)
ggplot(category_counts, aes(x = reorder(nobelPrizes_category.en, n), y = n)) +
geom_col(fill = "steelblue") +
coord_flip() +
labs(title = "Number of Laureates by Prize Category",
x = "Category", y = "Number of Laureates")
yearly_counts <- laureates_df %>%
unnest(nobelPrizes, names_sep = "_") %>%
mutate(year = as.integer(nobelPrizes_awardYear)) %>%
count(year)
ggplot(yearly_counts, aes(x = year, y = n)) +
geom_line(color = "darkgreen") +
labs(title = "Number of Laureates Over Time",
x = "Year", y = "Count")
country_counts <- laureates_df %>%
filter(!is.na(birth.place.countryNow.en)) %>%
count(birthCountry = birth.place.countryNow.en, sort = TRUE) %>%
slice_max(n, n = 10)
ggplot(country_counts, aes(x = reorder(birthCountry, n), y = n)) +
geom_col(fill = "tomato") +
coord_flip() +
labs(title = "Top Countries by Nobel Laureates",
x = "Country", y = "Number of Laureates")
In this project, we used data from the Nobel Prize website to explore who wins Nobel Prizes, where they’re from, and how those prizes are shared. We cleaned up the data so it was easier to work with, then answered four big questions:
From what we found, Chemistry had the most winners, and shared prizes were more common than solo ones. The United States had the highest number of laureates in our sample, and recent years showed more variation in how many people win each year. This kind of analysis shows how public data can help us tell stories and spot patterns. If we wanted to go further, we could look at gender, age, or even how laureates moved between countries during their lives.