Assignment 10B More JSON Practice
Working with JSON data available through either of the APIs at https://www.nobelprize.org/about/developer-zone-2/
Ask and answer 4 interesting questions
Load required Libraries.
Fetch All Laureates from Nobel API, Extract Personal data, Prizes, category, organisations, affliation country etc. and summarize that data.
Which country has recieved the most Nobel Prizes?
USA
`%||%` <- function(x, y) if (is.null(x) || length(x) == 0) y else x
get_in <- function(x, path) {
for (k in path) {
if (is.null(x) || is.null(x[[k]])) return(NA_character_)
x <- x[[k]]
}
if (is.list(x)) return(NA_character_) else return(as.character(x))
}
# --- Fetch All Laureates from Nobel API ---
fetch_all_laureates <- function(limit = 100) {
offset <- 0
all_items <- list()
repeat {
url <- sprintf("https://api.nobelprize.org/2.1/laureates?limit=%d&offset=%d", limit, offset)
resp <- fromJSON(url, simplifyVector = FALSE)
items <- resp$laureates
if (is.null(items) || length(items) == 0) break
all_items <- c(all_items, items)
if (length(items) < limit) break
offset <- offset + limit
Sys.sleep(0.2)
}
all_items
}
# Expand and Summarize Data ---
laureates <- fetch_all_laureates(limit = 100)
rows <- map_dfr(laureates, function(l) {
# Extract name
name <- get_in(l, c("knownName", "en")) %||% get_in(l, c("orgName", "en"))
# Extract personal information
gender <- get_in(l, c("gender"))
birth_date <- get_in(l, c("birth", "date"))
birth_city <- get_in(l, c("birth", "place", "city", "en"))
birth_country <- get_in(l, c("birth", "place", "country", "en"))
birth_country_now <- get_in(l, c("birth", "place", "countryNow", "en"))
# For organizations
org_country <- get_in(l, c("founded", "place", "countryNow", "en")) %||%
get_in(l, c("founded", "place", "country", "en"))
# Primary country (for counting awards)
person_country <- birth_country_now %||% birth_country
country <- person_country %||% org_country
prizes <- l$nobelPrizes
if (is.null(prizes) || length(prizes) == 0) {
return(tibble(
name = name,
gender = gender,
birth_date = birth_date,
birth_city = birth_city,
birth_country = birth_country,
birth_country_now = birth_country_now,
country = country,
category = NA_character_,
year = NA_integer_,
affiliation_country = NA_character_,
residence_country = NA_character_
))
}
map_dfr(prizes, function(p) {
year <- suppressWarnings(as.integer(p$awardYear))
category <- get_in(p, c("category", "en"))
# Extract affiliation and residence countries
affiliation_country <- NA_character_
residence_country <- NA_character_
if (!is.null(p$affiliations) && length(p$affiliations) > 0) {
affiliation_country <- get_in(p$affiliations[[1]], c("country", "en"))
}
if (!is.null(p$residences) && length(p$residences) > 0) {
residence_country <- get_in(p$residences[[1]], c("country", "en"))
}
tibble(
name = name,
gender = gender,
birth_date = birth_date,
birth_city = birth_city,
birth_country = birth_country,
birth_country_now = birth_country_now,
country = country,
year = year,
category = tolower(category),
affiliation_country = affiliation_country,
residence_country = residence_country
)
})
})
# Summary by primary country
award_summary <- rows %>%
filter(!is.na(country), country != "") %>%
count(country, sort = TRUE, name = "award_count")
top_n <- 10
kable(
head(award_summary, top_n),
caption = paste("Top", top_n, "Countries by Nobel Awards"),
col.names = c("Country", "Number of Awards"),
align = "l"
) %>%
kable_styling()
| Country | Number of Awards |
|---|---|
| USA | 298 |
| United Kingdom | 95 |
| Germany | 84 |
| France | 63 |
| Japan | 30 |
| Sweden | 30 |
| Poland | 29 |
| Russia | 29 |
| Canada | 22 |
| Italy | 20 |
How many and organisations have won a Nobel Prize?
# Count the total number of people/rows in the original data
total_award_winners <- nrow(rows)
print( total_award_winners)
## [1] 1026
Plot of top 20 countries that have recieved nobel Prizes.
top_n <- 20
award_top <- award_summary %>%
slice_head(n = top_n)
# Bar plot
ggplot(award_top, aes(x = reorder(country, award_count), y = award_count, fill = country)) +
geom_bar(stat = "identity") +
geom_text(aes(label = award_count), hjust = -0.1, size = 3) +
coord_flip() + # horizontal bars
labs(
title = paste("Top", top_n, "Countries by Nobel Awards"),
x = "Country",
y = "Number of Nobel Awards"
) +
theme_minimal() +
theme(legend.position = "none")
Who were the first 20 people to receive awards and in what
categories?
# Get first 20 laureates with name, birth country, and affiliation country
first_20 <- rows %>%
filter(!is.na(name)) %>%
select(name, birth_country, affiliation_country, year, category) %>%
arrange(year) %>%
head(20)
# Create kable table
first_20 %>%
kable(caption = "First 20 Nobel Prize Winners: Name, Birth Country, and Award Country",
col.names = c("Name", "Birth Country", "Award Country", "Year", "Category")) %>%
kable_styling()
| Name | Birth Country | Award Country | Year | Category |
|---|---|---|---|---|
| Emil von Behring | Prussia | Germany | 1901 | physiology or medicine |
| Frédéric Passy | France | NA | 1901 | peace |
| Henry Dunant | Switzerland | NA | 1901 | peace |
| Jacobus H. van ’t Hoff | the Netherlands | Germany | 1901 | chemistry |
| Sully Prudhomme | France | NA | 1901 | literature |
| Wilhelm Conrad Röntgen | Prussia | Germany | 1901 | physics |
| Albert Gobat | Switzerland | NA | 1902 | peace |
| Élie Ducommun | Switzerland | NA | 1902 | peace |
| Emil Fischer | Prussia | Germany | 1902 | chemistry |
| Hendrik A. Lorentz | the Netherlands | the Netherlands | 1902 | physics |
| Pieter Zeeman | the Netherlands | the Netherlands | 1902 | physics |
| Ronald Ross | India | United Kingdom | 1902 | physiology or medicine |
| Theodor Mommsen | Schleswig | NA | 1902 | literature |
| Bjørnstjerne Bjørnson | Norway | NA | 1903 | literature |
| Henri Becquerel | France | France | 1903 | physics |
| Marie Curie | Russian Empire | NA | 1903 | physics |
| Niels Ryberg Finsen | Faroe Islands (Denmark) | Denmark | 1903 | physiology or medicine |
| Pierre Curie | France | France | 1903 | physics |
| Randal Cremer | United Kingdom | NA | 1903 | peace |
| Svante Arrhenius | Sweden | Sweden | 1903 | chemistry |
Who are the most recent 20 people to receive awards and in what categories?
# Get latest 20 laureates with all requested fields
latest_20_detailed <- rows %>%
filter(!is.na(name), !is.na(year)) %>%
select(name, gender, birth_date, birth_country,
category, year, affiliation_country) %>%
arrange(desc(year)) %>% # Changed to descending order
head(20)
# Create kable table
latest_20_detailed %>%
kable(caption = "Latest 20 Nobel Prize Winners: Detailed Information",
col.names = c("Name", "Gender", "Birth Date",
"Birth Country", "Category", "Award Year", "Award Country")) %>%
kable_styling()
| Name | Gender | Birth Date | Birth Country | Category | Award Year | Award Country |
|---|---|---|---|---|---|---|
| Fred Ramsdell | male | 1960-12-04 | USA | physiology or medicine | 2025 | USA |
| Joel Mokyr | male | 1946-07-26 | the Netherlands | economic sciences | 2025 | USA |
| John Clarke | male | 1942-00-00 | United Kingdom | physics | 2025 | USA |
| John M. Martinis | male | 1958-00-00 | NA | physics | 2025 | USA |
| László Krasznahorkai | male | 1954-01-05 | Hungary | literature | 2025 | NA |
| Maria Corina Machado | female | 1967-00-00 | Venezuela | peace | 2025 | NA |
| Mary E. Brunkow | female | 1961-00-00 | NA | physiology or medicine | 2025 | USA |
| Michel H. Devoret | male | 1953-00-00 | France | physics | 2025 | USA |
| Omar M. Yaghi | male | 1965-02-09 | Jordan | chemistry | 2025 | USA |
| Peter Howitt | male | 1946-05-31 | Canada | economic sciences | 2025 | USA |
| Philippe Aghion | male | 1956-08-17 | France | economic sciences | 2025 | France |
| Richard Robson | male | 1937-06-04 | United Kingdom | chemistry | 2025 | Australia |
| Shimon Sakaguchi | male | 1951-01-19 | Japan | physiology or medicine | 2025 | Japan |
| Susumu Kitagawa | male | 1951-07-04 | Japan | chemistry | 2025 | Japan |
| Daron Acemoglu | male | 1967-09-03 | Turkey | economic sciences | 2024 | USA |
| David Baker | male | 1962-00-00 | USA | chemistry | 2024 | USA |
| Demis Hassabis | male | 1976-07-27 | United Kingdom | chemistry | 2024 | United Kingdom |
| Gary Ruvkun | male | 1952-00-00 | USA | physiology or medicine | 2024 | USA |
| Geoffrey Hinton | male | 1947-12-06 | United Kingdom | physics | 2024 | Canada |
| Han Kang | female | 1970-11-27 | South Korea | literature | 2024 | NA |
What is the Average age of Noble Prize winners?
# Convert the laureates list to a dataframe first
laureates_df <- map_dfr(laureates, function(l) {
name <- get_in(l, c("knownName", "en")) %||% get_in(l, c("orgName", "en"))
birth_date <- get_in(l, c("birth", "date"))
prizes <- l$nobelPrizes
if (is.null(prizes) || length(prizes) == 0) return(NULL)
map_dfr(prizes, function(p) {
tibble(
name = name,
birth_date = birth_date,
award_year = get_in(p, c("awardYear"))
)
})
})
# Create age_df from rows - INCLUDE category column
age_df <- rows %>%
filter(!is.na(year), !is.na(birth_date), birth_date != "") %>%
mutate(
birth_dt = parse_date_time(birth_date, orders = c("Ymd", "Y-m-d", "Y-m", "Y"), quiet = TRUE),
award_dt = make_date(year, 12, 10),
age_years = time_length(interval(birth_dt, award_dt), "years")
) %>%
filter(!is.na(age_years), age_years >= 0) %>%
select(name, category, year, birth_date, age_years, everything()) # Make sure category is included
# Overall average age
overall_avg_age <- mean(age_df$age_years, na.rm = TRUE)
cat(sprintf("Overall average age at award: %.2f years (N = %d laureate–prize records)\n",
overall_avg_age, nrow(age_df)))
## Overall average age at award: 60.77 years (N = 971 laureate–prize records)
Which categories are winning the most awards. And at what Average age are the winners?
# Average age by category
avg_by_category <- age_df %>%
group_by(category) %>%
summarise(
n = n(),
avg_age = mean(age_years, na.rm = TRUE),
median_age = median(age_years, na.rm = TRUE)
) %>%
arrange(desc(n))
#print(avg_by_category %>% mutate(avg_age = round(avg_age, 2), median_age = round(median_age, 2)))
avg_by_category %>%
mutate(avg_age = round(avg_age, 2),
median_age = round(median_age, 2)) %>%
kable(caption = "Average and Median Age by Category") %>%
kable_styling()
| category | n | avg_age | median_age |
|---|---|---|---|
| physiology or medicine | 228 | 59.24 | 58.49 |
| physics | 226 | 57.95 | 56.20 |
| chemistry | 193 | 59.48 | 58.20 |
| literature | 121 | 65.30 | 67.16 |
| peace | 110 | 61.21 | 62.02 |
| economic sciences | 93 | 67.58 | 67.48 |
Are women winning Awards too? And what is the Average age by gender?
avg_by_gender <- age_df %>%
filter(!is.na(gender) & gender != "") %>%
group_by(gender) %>%
summarise(n = n(), avg_age = mean(age_years, na.rm = TRUE)) %>%
arrange(desc(n)) %>%
mutate(avg_age = round(avg_age, 2))
avg_by_gender %>%
kable(col.names = c("Gender", "Number of Laureates", "Average Age (Years)"),
caption = "Average Age of Nobel Laureates by Gender",
align = c("l", "c", "r")) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"),
full_width = FALSE,
position = "left")
| Gender | Number of Laureates | Average Age (Years) |
|---|---|---|
| male | 906 | 60.94 |
| female | 65 | 58.37 |
Who are the Youngest 10 prize winners and in what categories?
youngest <- age_df %>%
arrange(age_years) %>%
select(name, category, birth_date, age_years, year) %>%
head(10) %>%
mutate(age_years = round(age_years, 2))
youngest %>%
kable(col.names = c("Name", "Category", "Birth Date", "Age (Years)", "Award Year"),
caption = "Youngest 10 Nobel Laureate–Prize Observations",
align = c("l", "l", "c", "c", "r")) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"),
full_width = FALSE,
position = "left")
| Name | Category | Birth Date | Age (Years) | Award Year |
|---|---|---|---|---|
| Malala Yousafzai | peace | 1997-07-12 | 17.41 | 2014 |
| Lawrence Bragg | physics | 1890-03-31 | 25.69 | 1915 |
| Nadia Murad | peace | 1993-03-10 | 25.75 | 2018 |
| Werner Heisenberg | physics | 1901-12-05 | 31.01 | 1932 |
| Tsung-Dao Lee | physics | 1926-11-24 | 31.04 | 1957 |
| Carl D. Anderson | physics | 1905-09-03 | 31.27 | 1936 |
| Paul A.M. Dirac | physics | 1902-08-08 | 31.34 | 1933 |
| Frederick G. Banting | physiology or medicine | 1891-11-14 | 32.07 | 1923 |
| Tawakkol Karman | peace | 1979-02-07 | 32.84 | 2011 |
| Rudolf Mössbauer | physics | 1929-01-31 | 32.86 | 1961 |
Who are the Oldest 10 prize winners?
oldest <- age_df %>%
arrange(desc(age_years)) %>%
select(name, category, year, birth_date, age_years) %>%
head(10)%>%
mutate(age_years = round(age_years, 2))
oldest %>%
kable(col.names = c("Name", "Category", "Award Year", "Birth Date", "Age (Years)"),
caption = "Oldest 10 Nobel Laureate–Prize Observations",
align = c("l", "l", "c", "c", "r")) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"),
full_width = FALSE,
position = "left")
| Name | Category | Award Year | Birth Date | Age (Years) |
|---|---|---|---|---|
| John B. Goodenough | chemistry | 2019 | 1922-07-25 | 97.38 |
| Arthur Ashkin | physics | 2018 | 1922-09-02 | 96.27 |
| John J. Hopfield | physics | 2024 | 1933-07-15 | 91.41 |
| Leonid Hurwicz | economic sciences | 2007 | 1917-08-21 | 90.30 |
| Syukuro Manabe | physics | 2021 | 1931-09-21 | 90.22 |
| Klaus Hasselmann | physics | 2021 | 1931-10-25 | 90.13 |
| Lloyd S. Shapley | economic sciences | 2012 | 1923-06-02 | 89.52 |
| Roger Penrose | physics | 2020 | 1931-08-08 | 89.34 |
| Richard Robson | chemistry | 2025 | 1937-06-04 | 88.52 |
| Raymond Davis Jr. | physics | 2002 | 1914-10-14 | 88.16 |
What category has won the most prizes?
ggplot(avg_by_category, aes(x = reorder(category, -n), y = n, fill = category)) +
geom_bar(stat = "identity") +
geom_text(aes(label = n), vjust = -0.3, size = 3.5) +
labs(
title = "Number of Nobel Laureates by Category",
x = "Category",
y = "Number of Laureates"
) +
theme_minimal() +
theme(legend.position = "none")
Which Organisations have won a prize?
# Create dataframe and identify organizations
laureates_df <- data.frame(
id = sapply(laureates, function(x) x$id %||% NA),
name = sapply(laureates, function(x) get_in(x, c("knownName", "en"))),
org_name = sapply(laureates, function(x) get_in(x, c("orgName", "en"))),
gender = sapply(laureates, function(x) x$gender %||% NA),
birth_date = sapply(laureates, function(x) x$birth$date %||% NA),
founded_date = sapply(laureates, function(x) x$founded$date %||% NA),
stringsAsFactors = FALSE
)
# Filter to see only organizations
organizations_df <- laureates_df %>%
filter(!is.na(org_name) | is.na(gender))
organizations_df %>%
arrange(org_name) %>%
select(org_name, founded_date, id) %>%
kable(caption = "Nobel Prize Winning Organizations") %>%
kable_styling()
| org_name | founded_date | id |
|---|---|---|
| American Friends Service Committee | 1917-00-00 | 509 |
| Amnesty International | 1961-00-00 | 537 |
| Center for Civil Liberties | 2007-00-00 | 1020 |
| Doctors Without Borders | 1971-00-00 | 568 |
| European Union | 1952-00-00 | 881 |
| Friends Service Council | 1647-00-00 | 508 |
| Grameen Bank | 1976-00-00 | 810 |
| Institute of International Law | 1873-00-00 | 467 |
| Intergovernmental Panel on Climate Change | 1988-00-00 | 818 |
| International Atomic Energy Agency | 1957-00-00 | 797 |
| International Campaign to Abolish Nuclear Weapons | 2007-00-00 | 948 |
| International Campaign to Ban Landmines | 1992-00-00 | 564 |
| International Committee of the Red Cross | 1863-00-00 | 482 |
| International Labour Organization | 1919-00-00 | 527 |
| International Physicians for the Prevention of Nuclear War | 1980-00-00 | 547 |
| League of Red Cross Societies | 1919-00-00 | 523 |
| Memorial | 1987-00-00 | 1019 |
| Nansen International Office for Refugees | 1921-00-00 | 503 |
| National Dialogue Quartet | NA | 925 |
| Nihon Hidankyo | 1956-08-10 | 1043 |
| Office of the United Nations High Commissioner for Refugees | 1950-12-14 | 515 |
| Organisation for the Prohibition of Chemical Weapons | 1997-00-00 | 893 |
| Permanent International Peace Bureau | 1891-00-00 | 477 |
| Pugwash Conferences on Science and World Affairs | 1957-00-00 | 561 |
| United Nations | 1945-00-00 | 748 |
| United Nations Children’s Fund | 1946-00-00 | 525 |
| United Nations Peacekeeping Forces | 1948-00-00 | 550 |
| World Food Programme | 1961-00-00 | 994 |
Which country “lost” the most nobel laureates (who were born there but received their Nobel prize as a citizen of a different country)?
Filter for cases where birth country is not the same as affiliation country.
# Filter laureates whose birth and affiliation countries differ
different_countries <- rows %>%
filter(!is.na(birth_country),
!is.na(affiliation_country),
birth_country != affiliation_country) %>%
select(name, year, category, birth_country, affiliation_country)
# Count occurrences of each birth country
birth_country_counts <- different_countries %>%
count(birth_country, sort = TRUE, name = "count")
# Display full results as kable
birth_country_counts %>%
kable(
caption = "All Birth Countries of Laureates Awarded in a Different Country",
col.names = c("Birth Country", "Count"),
align = "l"
) %>%
kable_styling(
full_width = FALSE,
bootstrap_options = c("striped", "hover", "condensed", "responsive")
)
| Birth Country | Count |
|---|---|
| Germany | 26 |
| United Kingdom | 24 |
| Canada | 15 |
| France | 12 |
| Austria-Hungary | 11 |
| Prussia | 11 |
| Russia | 10 |
| Russian Empire | 10 |
| Scotland | 9 |
| the Netherlands | 9 |
| Italy | 8 |
| Austria | 7 |
| China | 7 |
| Hungary | 7 |
| India | 7 |
| Japan | 7 |
| Australia | 6 |
| West Germany | 5 |
| British Mandate of Palestine | 4 |
| Poland | 4 |
| South Africa | 4 |
| USA | 4 |
| New Zealand | 3 |
| Norway | 3 |
| Sweden | 3 |
| Switzerland | 3 |
| USSR | 3 |
| Austrian Empire | 2 |
| Egypt | 2 |
| Luxembourg | 2 |
| Romania | 2 |
| Turkey | 2 |
| Argentina | 1 |
| Bavaria | 1 |
| Belgium | 1 |
| Brazil | 1 |
| British Protectorate of Palestine | 1 |
| British West Indies | 1 |
| Cyprus | 1 |
| Czechoslovakia | 1 |
| Faroe Islands (Denmark) | 1 |
| Finland | 1 |
| French Algeria | 1 |
| French protectorate of Tunisia | 1 |
| German-occupied Poland | 1 |
| Hesse-Kassel | 1 |
| Ireland | 1 |
| Java, Dutch East Indies | 1 |
| Jordan | 1 |
| Korea | 1 |
| Lebanon | 1 |
| Lithuania | 1 |
| Mecklenburg | 1 |
| Mexico | 1 |
| Morocco | 1 |
| Schleswig | 1 |
| Spain | 1 |
| Taiwan | 1 |
| Venezuela | 1 |
| Württemberg | 1 |
Show an example of birth country and affiliation country?
different_countries[1:10, ] %>%
kable(
caption = "Nobel Laureates with Birth Country Different from Affiliation Country",
col.names = c("Name", "Year", "Category", "Birth Country", "Affiliation Country"),
align = "l"
) %>%
kable_styling()
| Name | Year | Category | Birth Country | Affiliation Country |
|---|---|---|---|---|
| Aaron Ciechanover | 2004 | chemistry | British Protectorate of Palestine | Israel |
| Aaron Klug | 1982 | chemistry | Lithuania | United Kingdom |
| Abdus Salam | 1979 | physics | India | Italy |
| Abhijit Banerjee | 2019 | economic sciences | India | USA |
| Ada E. Yonath | 2009 | chemistry | British Mandate of Palestine | Israel |
| Adolf von Baeyer | 1905 | chemistry | Prussia | Germany |
| Ahmed Zewail | 1999 | chemistry | Egypt | USA |
| Alan MacDiarmid | 2000 | chemistry | New Zealand | USA |
| Albert A. Michelson | 1907 | physics | Prussia | USA |
| Albert Szent-Györgyi | 1937 | physiology or medicine | Austria-Hungary | Hungary |
The average age to win a nobel prize is 61 years old and most winners are in the psyiology and medicine categories. Women have only won 65 prizes out if 971.The rest have been won by Organisations.