## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   4.0.0     ✔ tibble    3.3.0
## ✔ lubridate 1.9.4     ✔ tidyr     1.3.1
## ✔ purrr     1.1.0     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter()  masks stats::filter()
## ✖ purrr::flatten() masks jsonlite::flatten()
## ✖ dplyr::lag()     masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
API_URL <- "https://api.nobelprize.org/2.1/laureates"
# Initialize
all_laureates <- list()
offset <- 0
limit <- 100  # Fetch 100 at a time
repeat {
  # Build URL with pagination
  url <- paste0(API_URL, "?offset=", offset, "&limit=", limit)
  
  response <- GET(url)
  
  if (status_code(response) != 200) {
    stop(paste("API Error: Status code", status_code(response)))
  }
  
  data <- content(response, as = "text", encoding = "UTF-8")
  laureates_json <- fromJSON(data, flatten = TRUE)
  
  # Get laureates from this batch
  batch <- laureates_json$laureates
  
  if (is.null(batch) || nrow(batch) == 0) {
    break  # No more data
  }
  
  all_laureates[[length(all_laureates) + 1]] <- batch
  
  print(paste("Fetched", nrow(batch), "laureates. Total so far:", 
              sum(sapply(all_laureates, nrow))))
  
  # Check if we got less than limit (last page)
  if (nrow(batch) < limit) {
    break
  }
  
  offset <- offset + limit
}
## [1] "Fetched 100 laureates. Total so far: 100"
## [1] "Fetched 100 laureates. Total so far: 200"
## [1] "Fetched 100 laureates. Total so far: 300"
## [1] "Fetched 100 laureates. Total so far: 400"
## [1] "Fetched 100 laureates. Total so far: 500"
## [1] "Fetched 100 laureates. Total so far: 600"
## [1] "Fetched 100 laureates. Total so far: 700"
## [1] "Fetched 100 laureates. Total so far: 800"
## [1] "Fetched 100 laureates. Total so far: 900"
## [1] "Fetched 100 laureates. Total so far: 1000"
## [1] "Fetched 18 laureates. Total so far: 1018"
# Combine all batches
laureates <- bind_rows(all_laureates)
print(paste(" Total laureates fetched:", nrow(laureates)))
## [1] " Total laureates fetched: 1018"
# View structure
#glimpse(laureates)
# Check column names
colnames(laureates)
##   [1] "id"                               "fileName"                        
##   [3] "gender"                           "sameAs"                          
##   [5] "links"                            "nobelPrizes"                     
##   [7] "acronym"                          "nativeName"                      
##   [9] "penName"                          "knownName.en"                    
##  [11] "knownName.se"                     "knownName.no"                    
##  [13] "givenName.en"                     "givenName.se"                    
##  [15] "givenName.no"                     "familyName.en"                   
##  [17] "familyName.se"                    "familyName.no"                   
##  [19] "fullName.en"                      "fullName.se"                     
##  [21] "fullName.no"                      "birth.date"                      
##  [23] "birth.place.city.en"              "birth.place.city.no"             
##  [25] "birth.place.city.se"              "birth.place.country.en"          
##  [27] "birth.place.country.no"           "birth.place.country.se"          
##  [29] "birth.place.cityNow.en"           "birth.place.cityNow.no"          
##  [31] "birth.place.cityNow.se"           "birth.place.cityNow.sameAs"      
##  [33] "birth.place.cityNow.latitude"     "birth.place.cityNow.longitude"   
##  [35] "birth.place.countryNow.en"        "birth.place.countryNow.no"       
##  [37] "birth.place.countryNow.se"        "birth.place.countryNow.sameAs"   
##  [39] "birth.place.countryNow.latitude"  "birth.place.countryNow.longitude"
##  [41] "birth.place.continent.en"         "birth.place.continent.no"        
##  [43] "birth.place.continent.se"         "birth.place.locationString.en"   
##  [45] "birth.place.locationString.no"    "birth.place.locationString.se"   
##  [47] "wikipedia.slug"                   "wikipedia.english"               
##  [49] "wikidata.id"                      "wikidata.url"                    
##  [51] "death.date"                       "death.place.city.en"             
##  [53] "death.place.city.no"              "death.place.city.se"             
##  [55] "death.place.country.en"           "death.place.country.no"          
##  [57] "death.place.country.se"           "death.place.country.sameAs"      
##  [59] "death.place.cityNow.en"           "death.place.cityNow.no"          
##  [61] "death.place.cityNow.se"           "death.place.cityNow.sameAs"      
##  [63] "death.place.cityNow.latitude"     "death.place.cityNow.longitude"   
##  [65] "death.place.countryNow.en"        "death.place.countryNow.no"       
##  [67] "death.place.countryNow.se"        "death.place.countryNow.sameAs"   
##  [69] "death.place.countryNow.latitude"  "death.place.countryNow.longitude"
##  [71] "death.place.continent.en"         "death.place.continent.no"        
##  [73] "death.place.continent.se"         "death.place.locationString.en"   
##  [75] "death.place.locationString.no"    "death.place.locationString.se"   
##  [77] "orgName.en"                       "orgName.no"                      
##  [79] "orgName.se"                       "founded.date"                    
##  [81] "founded.place.city.en"            "founded.place.city.no"           
##  [83] "founded.place.city.se"            "founded.place.country.en"        
##  [85] "founded.place.country.no"         "founded.place.country.se"        
##  [87] "founded.place.country.sameAs"     "founded.place.cityNow.en"        
##  [89] "founded.place.cityNow.no"         "founded.place.cityNow.se"        
##  [91] "founded.place.cityNow.sameAs"     "founded.place.countryNow.en"     
##  [93] "founded.place.countryNow.no"      "founded.place.countryNow.se"     
##  [95] "founded.place.countryNow.sameAs"  "founded.place.continent.en"      
##  [97] "founded.place.continent.no"       "founded.place.continent.se"      
##  [99] "founded.place.locationString.en"  "founded.place.locationString.no" 
## [101] "founded.place.locationString.se"  "penNameOf.fullName"              
## [103] "foundedCountry.en"                "foundedCountry.no"               
## [105] "foundedCountry.se"                "foundedCountryNow.en"            
## [107] "foundedCountryNow.no"             "foundedCountryNow.se"            
## [109] "foundedContinent.en"
# View first laureate's name
laureates$knownName.en[1]
## [1] "A. Michael Spence"
# Nobel prizes are nested - let's look at one
laureates$nobelPrizes[[1]]
##   awardYear sortOrder portion dateAwarded prizeStatus prizeAmount
## 1      2001         2     1/3  2001-10-10    received    10000000
##   prizeAmountAdjusted
## 1            15547541
##                                                                                                                                                                                                                                                                                                                                                                                                                                                 affiliations
## 1 Stanford University, Stanford University, Stanford University, Stanford University, Stanford, CA, Stanford, CA, Stanford, CA, USA, USA, USA, Stanford, CA, Stanford, CA, Stanford, CA, https://www.wikidata.org/wiki/Q173813, https://www.wikipedia.org/wiki/Stanford,_California, 37.424734, -122.163858, USA, USA, USA, https://www.wikidata.org/wiki/Q30, 39.828175, -98.579500, North America, Stanford, CA, USA, Stanford, CA, USA, Stanford, CA, USA
##                                                                                                                                                                                                                                                                                                                                                                                                                               links
## 1 nobelPrize, external, external, https://api.nobelprize.org/2/nobelPrize/eco/2001, https://www.nobelprize.org/prizes/economic-sciences/2001/spence/facts/, https://www.nobelprize.org/prizes/economic-sciences/2001/summary/, GET, GET, GET, application/json, text/html, text/html, NA, A. Michael Spence - Facts, The Sveriges Riksbank Prize in Economic Sciences in Memory of Alfred Nobel 2001, laureate facts, prize summary
##         category.en category.no category.se
## 1 Economic Sciences     Økonomi     Ekonomi
##                                                          categoryFullName.en
## 1 The Sveriges Riksbank Prize in Economic Sciences in Memory of Alfred Nobel
##                                                       categoryFullName.no
## 1 Sveriges Riksbanks pris i økonomisk vitenskap til minne om Alfred Nobel
##                                                      categoryFullName.se
## 1 Sveriges Riksbanks pris i ekonomisk vetenskap till Alfred Nobels minne
##                                               motivation.en
## 1 for their analyses of markets with asymmetric information
##                                                motivation.se
## 1 för deras analys av marknader med assymetrisk informations
## [1] "Total laureate-prize records: 1026"
## [1] "Records with missing continent: 23"
## [1] "Continental Distribution by Decade:"
## # A tibble: 26 × 4
##    decade    birth_continent count percentage
##    <chr>     <chr>           <int>      <dbl>
##  1 1975-1984 Europe             49      49.5 
##  2 1975-1984 North America      39      39.4 
##  3 1975-1984 South America       4       4.04
##  4 1975-1984 Africa              3       3.03
##  5 1975-1984 Asia                3       3.03
##  6 1975-1984 Oceania             1       1.01
##  7 1985-1994 North America      46      47.9 
##  8 1985-1994 Europe             35      36.5 
##  9 1985-1994 Asia                8       8.33
## 10 1985-1994 Africa              7       7.29
## # ℹ 16 more rows
## [1] "\nSummary Table (Wide Format):"
## # A tibble: 6 × 6
##   birth_continent `1975-1984` `1985-1994` `1995-2004` `2005-2014` `2015-2024`
##   <chr>                 <int>       <int>       <int>       <int>       <int>
## 1 Africa                    3           7           6           5           3
## 2 Asia                      3           8          11          23          19
## 3 Europe                   49          35          40          34          57
## 4 North America            39          46          55          48          46
## 5 Oceania                   1           0           4           3           0
## 6 South America             4           0           0           1           2
viz3 <- ggplot(continental_distribution, 
               aes(x = decade, y = percentage, color = birth_continent, 
                   group = birth_continent)) +
  geom_line(linewidth = 1.2, alpha = 0.9) +
  geom_point(size = 3, alpha = 0.9) +
  scale_color_manual(values = pastel_colors) +
  labs(
    title = "Continental Trends in Nobel Prize Winners (1975-2024)",
    subtitle = "Asia shows strongest upward trajectory while Europe declines proportionally",
    x = "Decade",
    y = "Percentage of Laureates (%)",
    color = "Continent of Birth",
    caption = "Source: Nobel Prize API"
  ) +
  theme_minimal(base_size = 12) +
  theme(
    plot.title = element_text(size = 15, face = "bold", hjust = 0),
    plot.subtitle = element_text(size = 11, color = "gray40", hjust = 0),
    plot.caption = element_text(size = 9, color = "gray50", hjust = 1),
    axis.text.x = element_text(angle = 45, hjust = 1, size = 10),
    legend.position = "right",
    legend.title = element_text(size = 11, face = "bold"),
    panel.grid.minor = element_blank()
  )
print(viz3)
# Save the plot
ggsave("nobel_continental_trend_lines.png", viz3, width = 12, height = 7, dpi = 300)
### Median age at award by category and decade
# Use the more precise prize date when available; otherwise default to Dec 10 of the award year
age_by_category_decade <- laureates_expanded %>%
  filter(!is.na(nobelPrizes_awardYear)) %>%
  mutate(
    award_year = as.integer(nobelPrizes_awardYear),
    # Prefer exact date if present (e.g., "YYYY-MM-DD"); else assume Nobel ceremony date (Dec 10)
    award_date_exact = suppressWarnings(lubridate::ymd(nobelPrizes_dateAwarded)),
    award_date_fallback = lubridate::ymd(paste0(award_year, "-12-10")),
    award_date = dplyr::coalesce(award_date_exact, award_date_fallback),
    birth_date = suppressWarnings(lubridate::ymd(birth.date)),
    age_at_award = as.numeric(lubridate::interval(birth_date, award_date) / lubridate::years(1)),
    category = nobelPrizes_category.en,
    decade = dplyr::case_when(
      award_year >= 1975 & award_year < 1985 ~ "1975-1984",
      award_year >= 1985 & award_year < 1995 ~ "1985-1994",
      award_year >= 1995 & award_year < 2005 ~ "1995-2004",
      award_year >= 2005 & award_year < 2015 ~ "2005-2014",
      award_year >= 2015 & award_year <= 2025 ~ "2015-2024",
      TRUE ~ NA_character_
    )
  ) %>%
  # Keep sensible records
  filter(!is.na(birth_date), !is.na(award_date), !is.na(age_at_award),
         age_at_award > 0, !is.na(category), !is.na(decade))
# Summary table: median age by category & decade (+ count for context)
median_age_tbl <- age_by_category_decade %>%
  group_by(category, decade) %>%
  summarise(
    median_age = round(stats::median(age_at_award, na.rm = TRUE), 1),
    n = dplyr::n(),
    .groups = "drop"
  ) %>%
  arrange(category, decade)
print(median_age_tbl)
## # A tibble: 30 × 4
##    category          decade    median_age     n
##    <chr>             <chr>          <dbl> <int>
##  1 Chemistry         1975-1984       60.7    15
##  2 Chemistry         1985-1994       57.7    19
##  3 Chemistry         1995-2004       63.6    26
##  4 Chemistry         2005-2014       69.3    22
##  5 Chemistry         2015-2024       71.7    25
##  6 Economic Sciences 1975-1984       64.7    13
##  7 Economic Sciences 1985-1994       67      15
##  8 Economic Sciences 1995-2004       61.3    17
##  9 Economic Sciences 2005-2014       69.7    20
## 10 Economic Sciences 2015-2024       67.5    19
## # ℹ 20 more rows
# Optional: wide-format table (categories as rows, decades as columns)
median_age_wide <- median_age_tbl %>%
  select(category, decade, median_age) %>%
  tidyr::pivot_wider(names_from = decade, values_from = median_age) %>%
  arrange(category)
print(median_age_wide)
## # A tibble: 6 × 6
##   category           `1975-1984` `1985-1994` `1995-2004` `2005-2014` `2015-2024`
##   <chr>                    <dbl>       <dbl>       <dbl>       <dbl>       <dbl>
## 1 Chemistry                 60.7        57.7        63.6        69.3        71.7
## 2 Economic Sciences         64.7        67          61.3        69.7        67.5
## 3 Literature                73.2        65.3        70.3        71.9        69.6
## 4 Peace                     54.4        58          61.7        59.5        59  
## 5 Physics                   53.8        61          64.1        68.5        77.1
## 6 Physiology or Med…        55.6        58.8        60.5        62.9        70.2
# Visualization: faceted lines by category across decades
viz_median_age <- ggplot(median_age_tbl,
                         aes(x = decade, y = median_age, group = category)) +
  geom_line(linewidth = 1.1) +
  geom_point(size = 3) +
  labs(
    title = "Median Age at Nobel Prize Award by Category and Decade",
    subtitle = "Based on Nobel Prize API data; exact award date used when available",
    x = "Decade",
    y = "Median age (years)",
    caption = "Source: Nobel Prize API (laureates endpoint)"
  ) +
  theme_minimal(base_size = 12) +
  theme(
    plot.title = element_text(size = 15, face = "bold", hjust = 0),
    plot.subtitle = element_text(size = 11, color = "gray40", hjust = 0),
    axis.text.x = element_text(angle = 45, hjust = 1),
    panel.grid.minor = element_blank()
  ) +
  facet_wrap(~ category, scales = "free_y")
print(viz_median_age)
ggsave("median_age_by_category_decade.png", viz_median_age,
       width = 12, height = 8, dpi = 300)
# ----------------------------------------------------------------------------
# IMPORTANT NOTE ABOUT METHODOLOGY
# ----------------------------------------------------------------------------
# This analysis counts institutional affiliations at time of award (where 
# laureates were working when they received the prize), NOT where they:
# - Earned their PhD
# - Conducted their prize-winning research
# - Were educated
# 
# Different methodologies will produce different rankings. Internet sources
# may use different criteria (e.g., counting by PhD institution or research 
# institution rather than affiliation at award time).
# ----------------------------------------------------------------------------
library(tidyverse)
# ----------------------------------------------------------------------------
# Extract Institution Data
# ----------------------------------------------------------------------------
# Get institutions from affiliations
institutions_simple <- laureates_expanded %>%
  filter(!is.na(nobelPrizes_awardYear)) %>%
  mutate(award_year = as.numeric(nobelPrizes_awardYear)) %>%
  filter(award_year >= 1975 & award_year <= 2025) %>%
  unnest(nobelPrizes_affiliations, names_sep = "_", keep_empty = FALSE) %>%
  filter(!is.na(nobelPrizes_affiliations_name.en)) %>%
  select(
    laureate_id = id,
    award_year,
    institution = nobelPrizes_affiliations_name.en,
    country = nobelPrizes_affiliations_country.en
  )
# Count laureates per institution
top_institutions <- institutions_simple %>%
  group_by(institution, country) %>%
  summarise(laureates = n_distinct(laureate_id), .groups = "drop") %>%
  arrange(desc(laureates)) %>%
  head(15)
print("Top 15 Institutions:")
## [1] "Top 15 Institutions:"
print(top_institutions)
## # A tibble: 15 × 3
##    institution                                  country        laureates
##    <chr>                                        <chr>              <int>
##  1 University of California                     USA                   32
##  2 Massachusetts Institute of Technology (MIT)  USA                   22
##  3 Stanford University                          USA                   17
##  4 University of Chicago                        USA                   17
##  5 Howard Hughes Medical Institute              USA                   16
##  6 Princeton University                         USA                   16
##  7 Harvard University                           USA                   14
##  8 Columbia University                          USA                   11
##  9 California Institute of Technology (Caltech) USA                   10
## 10 Yale University                              USA                    8
## 11 MRC Laboratory of Molecular Biology          United Kingdom         7
## 12 Rockefeller University                       USA                    7
## 13 Harvard Medical School                       USA                    6
## 14 University of Cambridge                      United Kingdom         6
## 15 Bell Laboratories                            USA                    5
# ----------------------------------------------------------------------------
# Bar chart
# ----------------------------------------------------------------------------
viz_simple <- ggplot(top_institutions, 
                     aes(x = reorder(institution, laureates),  
                         y = laureates)) +
  geom_col(fill = "#FF5F1F", alpha = 0.85, width = 0.7) +
  geom_text(aes(label = laureates), 
            hjust = 1.2,
            size = 4, 
            fontface = "bold",
            color = "navy") +
  coord_flip() +
  labs(
    title = "Top 15 Institutions \n Producing Nobel Laureates\n (1975-2024)",
    x = NULL,
    y = "Number of Nobel Laureates",
    caption = "Source: Nobel Prize API"
  ) +
  theme_minimal(base_size = 12) +
  theme(
    plot.title = element_text(size = 15, face = "bold"),
    axis.text.y = element_text(size = 10, face = "bold"),  # Smaller text for long names
    panel.grid.major.y = element_blank(),
    plot.margin = margin(10, 10, 10, 80)  # Extra left margin for long names
  )
print(viz_simple)
ggsave("nobel_institutions_simple.png", viz_simple, 
       width = 14, height = 8, dpi = 300)  # Wider to accommodate long names
# Quick summary
cat("\nQuick Summary:\n")
## 
## Quick Summary:
cat("Top Institution:", top_institutions$institution[1], 
    "with", top_institutions$laureates[1], "laureates\n")
## Top Institution: University of California with 32 laureates
cat("US institutions in top 15:", sum(top_institutions$country == "USA", na.rm = TRUE), "\n")
## US institutions in top 15: 13
library(tidyverse)
# ============================================================================
# Question 5: Youngest Nobel Prize Winners by Region (1985-2025)
# ============================================================================
# Calculate age at award
youngest_by_region <- nobel_geographic %>%
  filter(award_year >= 1985 & award_year <= 2025) %>%
  filter(!is.na(birth_continent) & !is.na(birth_country)) %>%
  left_join(
    laureates %>% select(id, birth.date),
    by = c("laureate_id" = "id")
  ) %>%
  mutate(
    birth_year = as.numeric(str_sub(birth.date, 1, 4)),
    age_at_award = award_year - birth_year
  ) %>%
  filter(!is.na(age_at_award) & age_at_award > 0) %>%
  group_by(birth_continent) %>%
  arrange(age_at_award) %>%
  slice(1) %>%
  ungroup() %>%
  arrange(age_at_award) %>%
  select(name, birth_continent, birth_country, category, award_year, age_at_award)
# Print results
cat("\n", strrep("=", 80), "\n")
## 
##  ================================================================================
cat("YOUNGEST NOBEL PRIZE WINNERS BY REGION AT TIME OF AWARD (1985-2025)\n")
## YOUNGEST NOBEL PRIZE WINNERS BY REGION AT TIME OF AWARD (1985-2025)
cat(strrep("=", 80), "\n\n")
## ================================================================================
for(i in 1:nrow(youngest_by_region)) {
  cat(sprintf("%s: %s (%s) - Age %d - %s %d\n",
              youngest_by_region$birth_continent[i],
              youngest_by_region$name[i],
              youngest_by_region$birth_country[i],
              youngest_by_region$age_at_award[i],
              youngest_by_region$category[i],
              youngest_by_region$award_year[i]))
}
## Asia: Malala Yousafzai (Pakistan) - Age 17 - Peace 2014
## North America: Rigoberta Menchú Tum (Guatemala) - Age 33 - Peace 1992
## Europe: Konstantin Novoselov (Russia) - Age 36 - Physics 2010
## Africa: Leymah Gbowee (Liberia) - Age 39 - Peace 2011
## Oceania: José Ramos-Horta (East Timor) - Age 47 - Peace 1996
## South America: Maria Corina Machado (Venezuela) - Age 58 - Peace 2025
cat("\n", strrep("=", 80), "\n")
## 
##  ================================================================================
print(youngest_by_region)
## # A tibble: 6 × 6
##   name            birth_continent birth_country category award_year age_at_award
##   <chr>           <chr>           <chr>         <chr>         <dbl>        <dbl>
## 1 Malala Yousafz… Asia            Pakistan      Peace          2014           17
## 2 Rigoberta Menc… North America   Guatemala     Peace          1992           33
## 3 Konstantin Nov… Europe          Russia        Physics        2010           36
## 4 Leymah Gbowee   Africa          Liberia       Peace          2011           39
## 5 José Ramos-Hor… Oceania         East Timor    Peace          1996           47
## 6 Maria Corina M… South America   Venezuela     Peace          2025           58
library(tidyverse)
# Count prizes by category and decade
category_by_decade <- nobel_geographic %>%
  filter(award_year >= 1975 & award_year <= 2025) %>%
  filter(!is.na(category)) %>%
  mutate(
    decade = case_when(
      award_year >= 1975 & award_year < 1985 ~ "1975-1984",
      award_year >= 1985 & award_year < 1995 ~ "1985-1994",
      award_year >= 1995 & award_year < 2005 ~ "1995-2004",
      award_year >= 2005 & award_year < 2015 ~ "2005-2014",
      award_year >= 2015 & award_year <= 2025 ~ "2015-2024",
      TRUE ~ "Other"
    )
  ) %>%
  filter(decade != "Other") %>%
  group_by(decade, category) %>%
  summarise(prizes = n(), .groups = "drop")
# Overall totals by category
category_totals <- category_by_decade %>%
  group_by(category) %>%
  summarise(total_prizes = sum(prizes), .groups = "drop") %>%
  arrange(desc(total_prizes))
# Print results
cat("\n", strrep("=", 80), "\n")
## 
##  ================================================================================
cat("NOBEL PRIZES BY CATEGORY (1975-2025)\n")
## NOBEL PRIZES BY CATEGORY (1975-2025)
cat(strrep("=", 80), "\n\n")
## ================================================================================
cat("Overall Totals:\n")
## Overall Totals:
for(i in 1:nrow(category_totals)) {
  cat(sprintf("%d. %s: %d prizes\n",
              i,
              category_totals$category[i],
              category_totals$total_prizes[i]))
}
## 1. Physics: 129 prizes
## 2. Physiology or Medicine: 119 prizes
## 3. Chemistry: 114 prizes
## 4. Economic Sciences: 90 prizes
## 5. Peace: 72 prizes
## 6. Literature: 51 prizes
cat("\n\nBreakdown by Decade:\n")
## 
## 
## Breakdown by Decade:
print(category_by_decade %>% 
        pivot_wider(names_from = decade, values_from = prizes, values_fill = 0))
## # A tibble: 6 × 6
##   category           `1975-1984` `1985-1994` `1995-2004` `2005-2014` `2015-2024`
##   <chr>                    <int>       <int>       <int>       <int>       <int>
## 1 Chemistry                   15          19          26          23          31
## 2 Economic Sciences           13          15          18          20          24
## 3 Literature                  10          10          10          10          11
## 4 Peace                       13          13          15          16          15
## 5 Physics                     24          21          28          25          31
## 6 Physiology or Med…          26          20          23          25          25
# ============================================================================
# Visualization: Stacked Bar Chart by Decade
# ============================================================================
# Pastel colors for categories
category_colors <- c(
  "Physics" = "#B4A7D6",              # Pastel purple
  "Chemistry" = "#A8D8EA",            # Pastel blue
  "Physiology or Medicine" = "#CD5B45", # Pastel coral
  "Literature" = "#014D4E",           # Pastel teal
  "Peace" = "#F7B7A3",                # Pastel peach
  "Economic Sciences" = "#FFF1A8"     # Pastel yellow
)
viz_categories <- ggplot(category_by_decade, 
                         aes(x = decade, y = prizes, fill = category)) +
  geom_col(alpha = 0.85, width = 0.7) +
  geom_text(aes(label = prizes), 
            position = position_stack(vjust = 0.5),
            size = 3.5, 
            fontface = "bold",
            color = "white") +
  scale_fill_manual(values = category_colors) +
  labs(
    title = "Nobel Prize Distribution by Category\nAcross Five Decades (1975-2024)",
    x = "Decade",
    y = "Number of Prizes Awarded",
    fill = "Category",
    caption = "Source: Nobel Prize API"
  ) +
  theme_minimal(base_size = 12) +
  theme(
    plot.title = element_text(size = 15, face = "bold", hjust = 0.5),
    axis.text.x = element_text(angle = 45, hjust = 1, size = 10, face = "bold"),
    axis.text.y = element_text(size = 10),
    legend.position = "bottom",
    legend.title = element_text(face = "bold"),
    panel.grid.major.x = element_blank()
  ) +
  guides(fill = guide_legend(nrow = 2))
print(viz_categories)
ggsave("nobel_categories_by_decade.png", viz_categories, 
       width = 12, height = 8, dpi = 300)
Overall Prize Distribution:
Decade-by-Decade Trends: