Assignment 10B More JSON Practice

Working with JSON data available through either of the APIs at https://www.nobelprize.org/about/developer-zone-2/

Ask and answer 4 interesting questions

Load required Libraries.

Fetch All Laureates from Nobel API, Extract Personal data, Prizes, category, organisations, affliation country etc. and summarize that data.

Which country has recieved the most Nobel Prizes?

USA

`%||%` <- function(x, y) if (is.null(x) || length(x) == 0) y else x

get_in <- function(x, path) {
  for (k in path) {
    if (is.null(x) || is.null(x[[k]])) return(NA_character_)
    x <- x[[k]]
  }
  if (is.list(x)) return(NA_character_) else return(as.character(x))
}

# --- Fetch All Laureates from Nobel API ---
fetch_all_laureates <- function(limit = 100) {
  offset <- 0
  all_items <- list()
  
  repeat {
    url <- sprintf("https://api.nobelprize.org/2.1/laureates?limit=%d&offset=%d", limit, offset)
    resp <- fromJSON(url, simplifyVector = FALSE)
    items <- resp$laureates
    if (is.null(items) || length(items) == 0) break
    all_items <- c(all_items, items)
    if (length(items) < limit) break
    offset <- offset + limit
    Sys.sleep(0.2)
  }
  
  all_items
}

# Expand and Summarize Data ---

laureates <- fetch_all_laureates(limit = 100)
rows <- map_dfr(laureates, function(l) {
  # Extract name
  name <- get_in(l, c("knownName", "en")) %||% get_in(l, c("orgName", "en"))
  
  # Extract personal information
  gender <- get_in(l, c("gender"))
  birth_date <- get_in(l, c("birth", "date"))
  birth_city <- get_in(l, c("birth", "place", "city", "en"))
  birth_country <- get_in(l, c("birth", "place", "country", "en"))
  birth_country_now <- get_in(l, c("birth", "place", "countryNow", "en"))
  
  # For organizations
  org_country <- get_in(l, c("founded", "place", "countryNow", "en")) %||%
                 get_in(l, c("founded", "place", "country", "en"))
  
  # Primary country (for counting awards)
  person_country <- birth_country_now %||% birth_country
  country <- person_country %||% org_country
  
  prizes <- l$nobelPrizes
  if (is.null(prizes) || length(prizes) == 0) {
    return(tibble(
      name = name,
      gender = gender,
      birth_date = birth_date,
      birth_city = birth_city,
      birth_country = birth_country,
      birth_country_now = birth_country_now,
      country = country,
      category = NA_character_,
      year = NA_integer_,
      affiliation_country = NA_character_,
      residence_country = NA_character_
    ))
  }
  
  map_dfr(prizes, function(p) {
    year <- suppressWarnings(as.integer(p$awardYear))
    category <- get_in(p, c("category", "en"))
    
    # Extract affiliation and residence countries
    affiliation_country <- NA_character_
    residence_country <- NA_character_
    
    if (!is.null(p$affiliations) && length(p$affiliations) > 0) {
      affiliation_country <- get_in(p$affiliations[[1]], c("country", "en"))
    }
    
    if (!is.null(p$residences) && length(p$residences) > 0) {
      residence_country <- get_in(p$residences[[1]], c("country", "en"))
    }
    
    tibble(
      name = name,
      gender = gender,
      birth_date = birth_date,
      birth_city = birth_city,
      birth_country = birth_country,
      birth_country_now = birth_country_now,
      country = country,
      year = year,
      category = tolower(category),
      affiliation_country = affiliation_country,
      residence_country = residence_country
    )
  })
})

# Summary by primary country
award_summary <- rows %>%
  filter(!is.na(country), country != "") %>%
  count(country, sort = TRUE, name = "award_count")

top_n <- 10
kable(
  head(award_summary, top_n),
  caption = paste("Top", top_n, "Countries by Nobel Awards"),
  col.names = c("Country", "Number of Awards"),
  align = "l"
) %>%
  kable_styling()
Top 10 Countries by Nobel Awards
Country Number of Awards
USA 298
United Kingdom 95
Germany 84
France 63
Japan 30
Sweden 30
Poland 29
Russia 29
Canada 22
Italy 20

How many and organisations have won a Nobel Prize?

# Count the total number of people/rows in the original data
total_award_winners <- nrow(rows)
print( total_award_winners)
## [1] 1026

Plot of top 20 countries that have recieved nobel Prizes.

top_n <- 20
award_top <- award_summary %>% 
  slice_head(n = top_n)

# Bar plot
ggplot(award_top, aes(x = reorder(country, award_count), y = award_count, fill = country)) +
  geom_bar(stat = "identity") +
  geom_text(aes(label = award_count), hjust = -0.1, size = 3) +
  coord_flip() +  # horizontal bars
  labs(
    title = paste("Top", top_n, "Countries by Nobel Awards"),
    x = "Country",
    y = "Number of Nobel Awards"
  ) +
  theme_minimal() +
  theme(legend.position = "none")

Who were the first 20 people to receive awards and in what categories?

# Get first 20 laureates with name, birth country, and affiliation country
first_20 <- rows %>%
  filter(!is.na(name)) %>%
  select(name, birth_country, affiliation_country, year, category) %>%
  arrange(year) %>%
  head(20)

# Create kable table
first_20 %>%
  kable(caption = "First 20 Nobel Prize Winners: Name, Birth Country, and Award Country",
        col.names = c("Name", "Birth Country", "Award Country", "Year", "Category")) %>%
  kable_styling()
First 20 Nobel Prize Winners: Name, Birth Country, and Award Country
Name Birth Country Award Country Year Category
Emil von Behring Prussia Germany 1901 physiology or medicine
Frédéric Passy France NA 1901 peace
Henry Dunant Switzerland NA 1901 peace
Jacobus H. van ’t Hoff the Netherlands Germany 1901 chemistry
Sully Prudhomme France NA 1901 literature
Wilhelm Conrad Röntgen Prussia Germany 1901 physics
Albert Gobat Switzerland NA 1902 peace
Élie Ducommun Switzerland NA 1902 peace
Emil Fischer Prussia Germany 1902 chemistry
Hendrik A. Lorentz the Netherlands the Netherlands 1902 physics
Pieter Zeeman the Netherlands the Netherlands 1902 physics
Ronald Ross India United Kingdom 1902 physiology or medicine
Theodor Mommsen Schleswig NA 1902 literature
Bjørnstjerne Bjørnson Norway NA 1903 literature
Henri Becquerel France France 1903 physics
Marie Curie Russian Empire NA 1903 physics
Niels Ryberg Finsen Faroe Islands (Denmark) Denmark 1903 physiology or medicine
Pierre Curie France France 1903 physics
Randal Cremer United Kingdom NA 1903 peace
Svante Arrhenius Sweden Sweden 1903 chemistry

Who are the most recent 20 people to receive awards and in what categories?

# Get latest 20 laureates with all requested fields
latest_20_detailed <- rows %>%
  filter(!is.na(name), !is.na(year)) %>%
  select(name, gender, birth_date, birth_country, 
         category, year, affiliation_country) %>%
  arrange(desc(year)) %>%  # Changed to descending order
  head(20)

# Create kable table
latest_20_detailed %>%
  kable(caption = "Latest 20 Nobel Prize Winners: Detailed Information",
        col.names = c("Name", "Gender", "Birth Date", 
                      "Birth Country", "Category", "Award Year", "Award Country")) %>%
  kable_styling()
Latest 20 Nobel Prize Winners: Detailed Information
Name Gender Birth Date Birth Country Category Award Year Award Country
Fred Ramsdell male 1960-12-04 USA physiology or medicine 2025 USA
Joel Mokyr male 1946-07-26 the Netherlands economic sciences 2025 USA
John Clarke male 1942-00-00 United Kingdom physics 2025 USA
John M. Martinis male 1958-00-00 NA physics 2025 USA
László Krasznahorkai male 1954-01-05 Hungary literature 2025 NA
Maria Corina Machado female 1967-00-00 Venezuela peace 2025 NA
Mary E. Brunkow female 1961-00-00 NA physiology or medicine 2025 USA
Michel H. Devoret male 1953-00-00 France physics 2025 USA
Omar M. Yaghi male 1965-02-09 Jordan chemistry 2025 USA
Peter Howitt male 1946-05-31 Canada economic sciences 2025 USA
Philippe Aghion male 1956-08-17 France economic sciences 2025 France
Richard Robson male 1937-06-04 United Kingdom chemistry 2025 Australia
Shimon Sakaguchi male 1951-01-19 Japan physiology or medicine 2025 Japan
Susumu Kitagawa male 1951-07-04 Japan chemistry 2025 Japan
Daron Acemoglu male 1967-09-03 Turkey economic sciences 2024 USA
David Baker male 1962-00-00 USA chemistry 2024 USA
Demis Hassabis male 1976-07-27 United Kingdom chemistry 2024 United Kingdom
Gary Ruvkun male 1952-00-00 USA physiology or medicine 2024 USA
Geoffrey Hinton male 1947-12-06 United Kingdom physics 2024 Canada
Han Kang female 1970-11-27 South Korea literature 2024 NA

What is the Average age of Noble Prize winners?

# Convert the laureates list to a dataframe first
laureates_df <- map_dfr(laureates, function(l) {
  name <- get_in(l, c("knownName", "en")) %||% get_in(l, c("orgName", "en"))
  birth_date <- get_in(l, c("birth", "date"))
  
  prizes <- l$nobelPrizes
  if (is.null(prizes) || length(prizes) == 0) return(NULL)
  
  map_dfr(prizes, function(p) {
    tibble(
      name = name,
      birth_date = birth_date,
      award_year = get_in(p, c("awardYear"))
    )
  })
})

# Create age_df from rows - INCLUDE category column
age_df <- rows %>%
  filter(!is.na(year), !is.na(birth_date), birth_date != "") %>%
  mutate(
    birth_dt = parse_date_time(birth_date, orders = c("Ymd", "Y-m-d", "Y-m", "Y"), quiet = TRUE),
    award_dt = make_date(year, 12, 10),
    age_years = time_length(interval(birth_dt, award_dt), "years")
  ) %>%
  filter(!is.na(age_years), age_years >= 0) %>%
  select(name, category, year, birth_date, age_years, everything())  # Make sure category is included

# Overall average age
overall_avg_age <- mean(age_df$age_years, na.rm = TRUE)

cat(sprintf("Overall average age at award: %.2f years (N = %d laureate–prize records)\n",
overall_avg_age, nrow(age_df)))
## Overall average age at award: 60.77 years (N = 971 laureate–prize records)

Which categories are winning the most awards. And at what Average age are the winners?

# Average age by category
avg_by_category <- age_df %>%
  group_by(category) %>%
  summarise(
    n = n(),
    avg_age = mean(age_years, na.rm = TRUE),
    median_age = median(age_years, na.rm = TRUE)
  ) %>%
  arrange(desc(n))

#print(avg_by_category %>% mutate(avg_age = round(avg_age, 2), median_age = round(median_age, 2)))
avg_by_category %>%
  mutate(avg_age = round(avg_age, 2), 
         median_age = round(median_age, 2)) %>%
  kable(caption = "Average and Median Age by Category") %>%
  kable_styling()
Average and Median Age by Category
category n avg_age median_age
physiology or medicine 228 59.24 58.49
physics 226 57.95 56.20
chemistry 193 59.48 58.20
literature 121 65.30 67.16
peace 110 61.21 62.02
economic sciences 93 67.58 67.48

Are women winning Awards too? And what is the Average age by gender?

avg_by_gender <- age_df %>%
  filter(!is.na(gender) & gender != "") %>%
  group_by(gender) %>%
  summarise(n = n(), avg_age = mean(age_years, na.rm = TRUE)) %>%
  arrange(desc(n)) %>%
  mutate(avg_age = round(avg_age, 2))

avg_by_gender %>%
  kable(col.names = c("Gender", "Number of Laureates", "Average Age (Years)"),
        caption = "Average Age of Nobel Laureates by Gender",
        align = c("l", "c", "r")) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"),
                full_width = FALSE,
                position = "left")
Average Age of Nobel Laureates by Gender
Gender Number of Laureates Average Age (Years)
male 906 60.94
female 65 58.37

Who are the Youngest 10 prize winners and in what categories?

youngest <- age_df %>%
  arrange(age_years) %>%
  select(name, category, birth_date, age_years, year) %>%
  head(10) %>%
  mutate(age_years = round(age_years, 2))

youngest %>%
  kable(col.names = c("Name", "Category", "Birth Date", "Age (Years)", "Award Year"),
        caption = "Youngest 10 Nobel Laureate–Prize Observations",
        align = c("l", "l", "c", "c", "r")) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"),
                full_width = FALSE,
                position = "left")
Youngest 10 Nobel Laureate–Prize Observations
Name Category Birth Date Age (Years) Award Year
Malala Yousafzai peace 1997-07-12 17.41 2014
Lawrence Bragg physics 1890-03-31 25.69 1915
Nadia Murad peace 1993-03-10 25.75 2018
Werner Heisenberg physics 1901-12-05 31.01 1932
Tsung-Dao Lee physics 1926-11-24 31.04 1957
Carl D. Anderson physics 1905-09-03 31.27 1936
Paul A.M. Dirac physics 1902-08-08 31.34 1933
Frederick G. Banting physiology or medicine 1891-11-14 32.07 1923
Tawakkol Karman peace 1979-02-07 32.84 2011
Rudolf Mössbauer physics 1929-01-31 32.86 1961

Who are the Oldest 10 prize winners?

oldest <- age_df %>%
  arrange(desc(age_years)) %>%
  select(name, category, year, birth_date, age_years) %>%
  head(10)%>%
  mutate(age_years = round(age_years, 2))


oldest  %>%
  kable(col.names = c("Name", "Category", "Award Year", "Birth Date", "Age (Years)"),
        caption = "Oldest 10 Nobel Laureate–Prize Observations",
        align = c("l", "l", "c", "c", "r")) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"),
                full_width = FALSE,
                position = "left")
Oldest 10 Nobel Laureate–Prize Observations
Name Category Award Year Birth Date Age (Years)
John B. Goodenough chemistry 2019 1922-07-25 97.38
Arthur Ashkin physics 2018 1922-09-02 96.27
John J. Hopfield physics 2024 1933-07-15 91.41
Leonid Hurwicz economic sciences 2007 1917-08-21 90.30
Syukuro Manabe physics 2021 1931-09-21 90.22
Klaus Hasselmann physics 2021 1931-10-25 90.13
Lloyd S. Shapley economic sciences 2012 1923-06-02 89.52
Roger Penrose physics 2020 1931-08-08 89.34
Richard Robson chemistry 2025 1937-06-04 88.52
Raymond Davis Jr.  physics 2002 1914-10-14 88.16

What category has won the most prizes?

ggplot(avg_by_category, aes(x = reorder(category, -n), y = n, fill = category)) +
  geom_bar(stat = "identity") +
  geom_text(aes(label = n), vjust = -0.3, size = 3.5) +
  labs(
    title = "Number of Nobel Laureates by Category",
    x = "Category",
    y = "Number of Laureates"
  ) +
  theme_minimal() +
  theme(legend.position = "none")

Which Organisations have won a prize?

# Create dataframe and identify organizations
laureates_df <- data.frame(
  id = sapply(laureates, function(x) x$id %||% NA),
  name = sapply(laureates, function(x) get_in(x, c("knownName", "en"))),
  org_name = sapply(laureates, function(x) get_in(x, c("orgName", "en"))),
  gender = sapply(laureates, function(x) x$gender %||% NA),
  birth_date = sapply(laureates, function(x) x$birth$date %||% NA),
  founded_date = sapply(laureates, function(x) x$founded$date %||% NA),
  stringsAsFactors = FALSE
)

# Filter to see only organizations
organizations_df <- laureates_df %>%
  filter(!is.na(org_name) | is.na(gender))

organizations_df %>%
  arrange(org_name) %>%
  select(org_name, founded_date, id) %>%
  kable(caption = "Nobel Prize Winning Organizations") %>%
  kable_styling()
Nobel Prize Winning Organizations
org_name founded_date id
American Friends Service Committee 1917-00-00 509
Amnesty International 1961-00-00 537
Center for Civil Liberties 2007-00-00 1020
Doctors Without Borders 1971-00-00 568
European Union 1952-00-00 881
Friends Service Council 1647-00-00 508
Grameen Bank 1976-00-00 810
Institute of International Law 1873-00-00 467
Intergovernmental Panel on Climate Change 1988-00-00 818
International Atomic Energy Agency 1957-00-00 797
International Campaign to Abolish Nuclear Weapons 2007-00-00 948
International Campaign to Ban Landmines 1992-00-00 564
International Committee of the Red Cross 1863-00-00 482
International Labour Organization 1919-00-00 527
International Physicians for the Prevention of Nuclear War 1980-00-00 547
League of Red Cross Societies 1919-00-00 523
Memorial 1987-00-00 1019
Nansen International Office for Refugees 1921-00-00 503
National Dialogue Quartet NA 925
Nihon Hidankyo 1956-08-10 1043
Office of the United Nations High Commissioner for Refugees 1950-12-14 515
Organisation for the Prohibition of Chemical Weapons 1997-00-00 893
Permanent International Peace Bureau 1891-00-00 477
Pugwash Conferences on Science and World Affairs 1957-00-00 561
United Nations 1945-00-00 748
United Nations Children’s Fund 1946-00-00 525
United Nations Peacekeeping Forces 1948-00-00 550
World Food Programme 1961-00-00 994

Which country “lost” the most nobel laureates (who were born there but received their Nobel prize as a citizen of a different country)?

Filter for cases where birth country is not the same as affiliation country.

# Filter laureates whose birth and affiliation countries differ
different_countries <- rows %>%
  filter(!is.na(birth_country), 
         !is.na(affiliation_country),
         birth_country != affiliation_country) %>%
  select(name, year, category, birth_country, affiliation_country)

# Count occurrences of each birth country
birth_country_counts <- different_countries %>%
  count(birth_country, sort = TRUE, name = "count")

# Display full results as kable
birth_country_counts %>%
  kable(
    caption = "All Birth Countries of Laureates Awarded in a Different Country",
    col.names = c("Birth Country", "Count"),
    align = "l"
  ) %>%
  kable_styling(
    full_width = FALSE,
    bootstrap_options = c("striped", "hover", "condensed", "responsive")
  )
All Birth Countries of Laureates Awarded in a Different Country
Birth Country Count
Germany 26
United Kingdom 24
Canada 15
France 12
Austria-Hungary 11
Prussia 11
Russia 10
Russian Empire 10
Scotland 9
the Netherlands 9
Italy 8
Austria 7
China 7
Hungary 7
India 7
Japan 7
Australia 6
West Germany 5
British Mandate of Palestine 4
Poland 4
South Africa 4
USA 4
New Zealand 3
Norway 3
Sweden 3
Switzerland 3
USSR 3
Austrian Empire 2
Egypt 2
Luxembourg 2
Romania 2
Turkey 2
Argentina 1
Bavaria 1
Belgium 1
Brazil 1
British Protectorate of Palestine 1
British West Indies 1
Cyprus 1
Czechoslovakia 1
Faroe Islands (Denmark) 1
Finland 1
French Algeria 1
French protectorate of Tunisia 1
German-occupied Poland 1
Hesse-Kassel 1
Ireland 1
Java, Dutch East Indies 1
Jordan 1
Korea 1
Lebanon 1
Lithuania 1
Mecklenburg 1
Mexico 1
Morocco 1
Schleswig 1
Spain 1
Taiwan 1
Venezuela 1
Württemberg 1

Show an example of birth country and affiliation country?

different_countries[1:10, ] %>%
  kable(
    caption = "Nobel Laureates with Birth Country Different from Affiliation Country",
    col.names = c("Name", "Year", "Category", "Birth Country", "Affiliation Country"),
    align = "l"
  ) %>%
  kable_styling()
Nobel Laureates with Birth Country Different from Affiliation Country
Name Year Category Birth Country Affiliation Country
Aaron Ciechanover 2004 chemistry British Protectorate of Palestine Israel
Aaron Klug 1982 chemistry Lithuania United Kingdom
Abdus Salam 1979 physics India Italy
Abhijit Banerjee 2019 economic sciences India USA
Ada E. Yonath 2009 chemistry British Mandate of Palestine Israel
Adolf von Baeyer 1905 chemistry Prussia Germany
Ahmed Zewail 1999 chemistry Egypt USA
Alan MacDiarmid 2000 chemistry New Zealand USA
Albert A. Michelson 1907 physics Prussia USA
Albert Szent-Györgyi 1937 physiology or medicine Austria-Hungary Hungary

The average age to win a nobel prize is 61 years old and most winners are in the psyiology and medicine categories. Women have only won 65 prizes out if 971.The rest have been won by Organisations.