Introduction.

For this assignment, we will be working with two JSON files available through the API at nobelprize.org, ask 4 interesting questions and answer using data analysis.

Load Packages Required for the Project

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.4.4     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
library(jsonlite)
## 
## Attaching package: 'jsonlite'
## 
## The following object is masked from 'package:purrr':
## 
##     flatten
library(httr2)

Load Json files into R

I will get the prize and the Prize and Laureate files.

nobel_prize <- "http://api.nobelprize.org/v1/prize.json"
nobel_laureate <- "http://api.nobelprize.org/v1/laureate.json"
# read data from json files
nobelPrize <- fromJSON(nobel_prize)
nobelLaureate <- fromJSON(nobel_laureate)
# display names of main keys in the JSON files
names(nobelPrize)
## [1] "prizes"
names(nobelLaureate)
## [1] "laureates"
# examine sub elements in files
names(nobelPrize$prizes)
## [1] "year"              "category"          "laureates"        
## [4] "overallMotivation"
names(nobelLaureate$laureates)
##  [1] "id"              "firstname"       "surname"         "born"           
##  [5] "died"            "bornCountry"     "bornCountryCode" "bornCity"       
##  [9] "diedCountry"     "diedCountryCode" "diedCity"        "gender"         
## [13] "prizes"

Start cleaning and tyding data, as well as to create a data frame.

nobelPrize <- nobelPrize$prizes %>%
  unnest_wider(laureates) %>%
  unnest(id, firstname, surname, motivation, share)
## Warning: `unnest()` has a new interface. See `?unnest` for details.
## ℹ Try `df %>% unnest(c(id, firstname, surname, motivation, share))`, with
##   `mutate()` if needed.
# Create Data Frames with JSON Data
df_nobelPrize <- bind_rows(nobelPrize)
df_nobelLaureate <- bind_rows(nobelLaureate)

# Combine the two data frames by id to start asking questions
join_df <- inner_join(df_nobelPrize, df_nobelLaureate, by = "id") %>%
  select(-c("firstname.y", "surname.y", "prizes")) %>%
  as.data.frame()

head(join_df)
##   year   category   id firstname.x surname.x
## 1 2023  chemistry 1029      Moungi   Bawendi
## 2 2023  chemistry 1030       Louis      Brus
## 3 2023  chemistry 1031     Aleksey   Yekimov
## 4 2023  economics 1034     Claudia    Goldin
## 5 2023 literature 1032         Jon     Fosse
## 6 2023      peace 1033      Narges Mohammadi
##                                                                                                          motivation
## 1                                                                 "for the discovery and synthesis of quantum dots"
## 2                                                                 "for the discovery and synthesis of quantum dots"
## 3                                                                 "for the discovery and synthesis of quantum dots"
## 4                                         "for having advanced our understanding of women’s labour market outcomes"
## 5                                            "for his innovative plays and prose which give voice to the unsayable"
## 6 "for her fight against the oppression of women in Iran and her fight to promote human rights and freedom for all"
##   share overallMotivation       born       died       bornCountry
## 1     3              <NA> 1961-00-00 0000-00-00            France
## 2     3              <NA> 1943-00-00 0000-00-00               USA
## 3     3              <NA> 1945-00-00 0000-00-00 USSR (now Russia)
## 4     1              <NA> 1946-00-00 0000-00-00               USA
## 5     1              <NA> 1959-09-29 0000-00-00            Norway
## 6     1              <NA> 1972-04-21 0000-00-00              Iran
##   bornCountryCode                       bornCity diedCountry diedCountryCode
## 1              FR                          Paris        <NA>            <NA>
## 2              US                  Cleveland, OH        <NA>            <NA>
## 3              RU Leningrad (now St. Petersburg)        <NA>            <NA>
## 4              US                   New York, NY        <NA>            <NA>
## 5              NO                      Haugesund        <NA>            <NA>
## 6              IR                         Zanjan        <NA>            <NA>
##   diedCity gender
## 1     <NA>   male
## 2     <NA>   male
## 3     <NA>   male
## 4     <NA> female
## 5     <NA>   male
## 6     <NA> female

Question 1: what is the country with most novel prize winners?

join_df %>%
    group_by(bornCountry) %>%
  top_n(15) %>%
  filter(n() > 10) %>% 
  filter(!is.na(bornCountry)) %>%
  ggplot() +
  geom_bar(aes(x = bornCountry, fill = bornCountry)) +
  ggtitle("Countries with most Nobel Prize Winners") +
  ylab("Numbers of Winners") +
  xlab("Birth Country") +
  theme(plot.title = element_text(hjust = 0.3),
        plot.subtitle = element_text(hjust = 0.3),
        legend.title = element_blank(),
        legend.position = "none", 
        axis.text.x = element_text(angle = 85, hjust = 1))
## Selecting by gender

Based on the data analysis and Graph, the country with most awards is USA with over two hundred awards.

Question 2: What is the count of awards between male and female?

print(join_df$gender)
##    [1] "male"   "male"   "male"   "female" "male"   "female" "male"   "male"  
##    [9] "female" "female" "male"   "female" "male"   "male"   "male"   "male"  
##   [17] "male"   "female" "male"   "org"    "org"    "male"   "male"   "male"  
##   [25] "male"   "male"   "male"   "male"   "male"   "male"   "male"   "female"
##   [33] "male"   "male"   "male"   "male"   "male"   "male"   "female" "female"
##   [41] "male"   "male"   "female" "org"    "male"   "male"   "female" "male"  
##   [49] "male"   "male"   "male"   "male"   "male"   "male"   "female" "male"  
##   [57] "male"   "male"   "male"   "male"   "male"   "male"   "male"   "male"  
##   [65] "female" "male"   "male"   "male"   "male"   "female" "male"   "female"
##   [73] "male"   "male"   "female" "male"   "male"   "male"   "male"   "male"  
##   [81] "male"   "male"   "org"    "male"   "male"   "male"   "male"   "male"  
##   [89] "male"   "male"   "male"   "male"   "male"   "male"   "male"   "male"  
##   [97] "male"   "male"   "male"   "male"   "male"   "male"   "male"   "male"  
##  [105] "female" "org"    "male"   "male"   "male"   "male"   "female" "male"  
##  [113] "male"   "male"   "male"   "male"   "male"   "female" "male"   "male"  
##  [121] "male"   "male"   "female" "male"   "male"   "male"   "male"   "male"  
##  [129] "male"   "male"   "female" "org"    "male"   "male"   "male"   "male"  
##  [137] "male"   "male"   "male"   "male"   "male"   "male"   "org"    "male"  
##  [145] "male"   "male"   "male"   "male"   "male"   "male"   "male"   "female"
##  [153] "female" "female" "male"   "male"   "male"   "male"   "male"   "male"  
##  [161] "male"   "male"   "male"   "male"   "male"   "male"   "male"   "male"  
##  [169] "male"   "male"   "male"   "male"   "male"   "female" "female" "male"  
##  [177] "female" "male"   "male"   "male"   "male"   "female" "female" "male"  
##  [185] "male"   "male"   "male"   "male"   "male"   "male"   "male"   "male"  
##  [193] "male"   "male"   "female" "male"   "male"   "male"   "male"   "male"  
##  [201] "female" "org"    "male"   "male"   "male"   "male"   "male"   "male"  
##  [209] "male"   "male"   "male"   "male"   "org"    "male"   "male"   "male"  
##  [217] "male"   "male"   "male"   "male"   "male"   "male"   "male"   "org"   
##  [225] "male"   "male"   "male"   "male"   "male"   "male"   "male"   "male"  
##  [233] "male"   "male"   "male"   "female" "female" "male"   "male"   "male"  
##  [241] "male"   "female" "male"   "male"   "male"   "male"   "male"   "female"
##  [249] "male"   "male"   "male"   "male"   "male"   "male"   "male"   "male"  
##  [257] "male"   "male"   "male"   "male"   "male"   "male"   "male"   "male"  
##  [265] "male"   "male"   "male"   "male"   "male"   "male"   "male"   "male"  
##  [273] "male"   "org"    "male"   "male"   "male"   "male"   "male"   "male"  
##  [281] "male"   "male"   "male"   "male"   "male"   "male"   "male"   "male"  
##  [289] "male"   "male"   "male"   "male"   "male"   "male"   "male"   "male"  
##  [297] "male"   "org"    "male"   "male"   "male"   "male"   "male"   "male"  
##  [305] "male"   "male"   "male"   "male"   "male"   "male"   "male"   "male"  
##  [313] "male"   "male"   "male"   "male"   "male"   "male"   "male"   "org"   
##  [321] "female" "male"   "male"   "male"   "male"   "male"   "male"   "male"  
##  [329] "male"   "male"   "female" "male"   "male"   "male"   "male"   "male"  
##  [337] "male"   "male"   "male"   "male"   "male"   "male"   "male"   "male"  
##  [345] "org"    "male"   "male"   "male"   "female" "male"   "male"   "male"  
##  [353] "male"   "male"   "male"   "male"   "male"   "male"   "male"   "male"  
##  [361] "male"   "male"   "male"   "male"   "male"   "male"   "female" "male"  
##  [369] "male"   "male"   "male"   "male"   "male"   "male"   "male"   "male"  
##  [377] "female" "male"   "male"   "male"   "male"   "male"   "female" "female"
##  [385] "male"   "male"   "male"   "male"   "male"   "male"   "male"   "male"  
##  [393] "male"   "male"   "male"   "male"   "male"   "male"   "male"   "male"  
##  [401] "male"   "male"   "male"   "male"   "male"   "male"   "male"   "male"  
##  [409] "male"   "male"   "male"   "male"   "male"   "org"    "male"   "male"  
##  [417] "male"   "male"   "female" "male"   "male"   "male"   "male"   "male"  
##  [425] "male"   "male"   "male"   "male"   "male"   "male"   "male"   "male"  
##  [433] "male"   "male"   "male"   "male"   "male"   "male"   "male"   "female"
##  [441] "male"   "male"   "male"   "male"   "org"    "male"   "male"   "male"  
##  [449] "male"   "male"   "male"   "male"   "male"   "male"   "male"   "male"  
##  [457] "male"   "male"   "male"   "male"   "male"   "male"   "male"   "female"
##  [465] "male"   "male"   "male"   "female" "male"   "male"   "male"   "male"  
##  [473] "male"   "male"   "male"   "male"   "male"   "org"    "male"   "male"  
##  [481] "male"   "male"   "male"   "male"   "male"   "male"   "male"   "male"  
##  [489] "male"   "male"   "male"   "male"   "male"   "male"   "male"   "male"  
##  [497] "male"   "male"   "male"   "male"   "female" "male"   "male"   "male"  
##  [505] "male"   "male"   "male"   "male"   "male"   "male"   "male"   "male"  
##  [513] "male"   "male"   "male"   "male"   "male"   "male"   "male"   "male"  
##  [521] "male"   "org"    "male"   "male"   "male"   "male"   "male"   "female"
##  [529] "male"   "male"   "male"   "female" "female" "male"   "male"   "male"  
##  [537] "male"   "male"   "male"   "male"   "male"   "male"   "male"   "male"  
##  [545] "male"   "male"   "male"   "male"   "male"   "male"   "male"   "male"  
##  [553] "male"   "male"   "male"   "male"   "male"   "male"   "male"   "male"  
##  [561] "male"   "male"   "male"   "male"   "male"   "male"   "male"   "male"  
##  [569] "male"   "male"   "male"   "male"   "male"   "male"   "male"   "male"  
##  [577] "male"   "male"   "male"   "male"   "male"   "male"   "male"   "male"  
##  [585] "male"   "male"   "male"   "male"   "male"   "male"   "male"   "male"  
##  [593] "male"   "male"   "male"   "male"   "male"   "male"   "male"   "male"  
##  [601] "male"   "male"   "male"   "male"   "org"    "male"   "male"   "male"  
##  [609] "male"   "male"   "male"   "male"   "male"   "male"   "male"   "male"  
##  [617] "male"   "male"   "male"   "male"   "male"   "male"   "male"   "male"  
##  [625] "male"   "male"   "female" "male"   "male"   "male"   "male"   "male"  
##  [633] "org"    "male"   "male"   "male"   "male"   "male"   "male"   "female"
##  [641] "male"   "male"   "male"   "male"   "male"   "male"   "male"   "male"  
##  [649] "male"   "male"   "org"    "org"    "male"   "female" "male"   "male"  
##  [657] "male"   "male"   "male"   "male"   "male"   "male"   "male"   "male"  
##  [665] "male"   "male"   "male"   "male"   "male"   "male"   "male"   "male"  
##  [673] "male"   "male"   "male"   "male"   "male"   "male"   "male"   "male"  
##  [681] "male"   "male"   "male"   "male"   "male"   "male"   "male"   "male"  
##  [689] "male"   "male"   "male"   "male"   "male"   "male"   "male"   "male"  
##  [697] "male"   "male"   "male"   "male"   "male"   "male"   "male"   "male"  
##  [705] "male"   "male"   "male"   "male"   "male"   "male"   "male"   "male"  
##  [713] "male"   "male"   "male"   "male"   "org"    "male"   "male"   "male"  
##  [721] "male"   "male"   "male"   "male"   "male"   "male"   "male"   "male"  
##  [729] "male"   "male"   "male"   "male"   "male"   "male"   "male"   "male"  
##  [737] "male"   "male"   "male"   "male"   "male"   "male"   "male"   "male"  
##  [745] "male"   "male"   "male"   "male"   "male"   "male"   "male"   "male"  
##  [753] "male"   "male"   "male"   "male"   "male"   "male"   "male"   "male"  
##  [761] "male"   "male"   "org"    "org"    "male"   "male"   "female" "male"  
##  [769] "male"   "male"   "male"   "male"   "female" "male"   "male"   "male"  
##  [777] "male"   "female" "male"   "male"   "male"   "male"   "male"   "male"  
##  [785] "male"   "org"    "male"   "male"   "male"   "male"   "male"   "male"  
##  [793] "male"   "male"   "male"   "male"   "male"   "male"   "male"   "female"
##  [801] "org"    "male"   "male"   "male"   "male"   "male"   "male"   "male"  
##  [809] "male"   "male"   "male"   "male"   "male"   "male"   "male"   "male"  
##  [817] "male"   "male"   "female" "male"   "male"   "male"   "male"   "male"  
##  [825] "male"   "male"   "male"   "male"   "male"   "male"   "male"   "male"  
##  [833] "male"   "male"   "male"   "male"   "male"   "male"   "male"   "male"  
##  [841] "male"   "female" "male"   "male"   "male"   "male"   "male"   "male"  
##  [849] "male"   "male"   "male"   "male"   "male"   "male"   "male"   "male"  
##  [857] "male"   "female" "male"   "male"   "male"   "male"   "male"   "male"  
##  [865] "male"   "male"   "male"   "male"   "female" "male"   "male"   "male"  
##  [873] "male"   "male"   "male"   "male"   "male"   "male"   "male"   "male"  
##  [881] "male"   "male"   "male"   "male"   "male"   "male"   "male"   "male"  
##  [889] "male"   "male"   "male"   "male"   "male"   "male"   "male"   "male"  
##  [897] "male"   "male"   "male"   "male"   "male"   "male"   "male"   "male"  
##  [905] "male"   "male"   "male"   "male"   "male"   "male"   "male"   "org"   
##  [913] "male"   "male"   "male"   "male"   "male"   "male"   "male"   "male"  
##  [921] "male"   "male"   "male"   "male"   "male"   "male"   "male"   "male"  
##  [929] "male"   "male"   "male"   "male"   "female" "male"   "male"   "male"  
##  [937] "male"   "male"   "male"   "male"   "org"    "male"   "male"   "male"  
##  [945] "female" "male"   "male"   "male"   "male"   "male"   "male"   "male"  
##  [953] "male"   "male"   "male"   "male"   "male"   "male"   "male"   "male"  
##  [961] "male"   "male"   "male"   "male"   "male"   "male"   "male"   "male"  
##  [969] "male"   "male"   "male"   "female" "male"   "male"   "male"   "male"  
##  [977] "male"   "org"    "male"   "male"   "male"   "male"   "male"   "male"  
##  [985] "male"   "female" "male"   "male"   "male"   "male"   "male"   "male"  
##  [993] "male"   "male"   "male"   "male"   "male"   "male"   "male"   "male"
join_df %>% 
  group_by(gender) %>%
  ggplot() +
  geom_bar(aes(x = gender, fill = gender), position = "dodge") +
  ggtitle("Nobel Prizes by Gender") +
  xlab("Awards by gender") +
  ylab("Count") +
  theme(plot.title = element_text(hjust = 0.5),
        plot.subtitle = element_text(hjust = 0.5),
        axis.text.x = element_text(angle = 45, hjust = 1)) +
  labs(fill = "Gender")

Based on the graph above there is a huge difference of awards between male and female with over 800 awards for males against less than a 100 for females.

Question 3: What is the count of awards winned by USA per category in the last century?

join_df %>% 
  group_by(bornCountry, category) %>%
  filter(year %in% 2000:2023) %>%
  filter(bornCountry %in% "USA") %>% 
  ggplot() +
  geom_bar(aes(x = bornCountry, fill = category), position = "dodge") +
  ggtitle("Nobel Prizes by Category") +
  xlab("Nobel Prizes winned by the US in the last century") +
  ylab("Count") +
  theme(plot.title = element_text(hjust = 0.5),
        plot.subtitle = element_text(hjust = 0.5),
        axis.text.x = element_text(angle = 85, hjust = 1)) +
  labs(fill = "Category")

The most awards by category winned by the US is Economics with 35 in the 21st century.

Question 3: number of females awarded per category in the last century?

join_df %>%
  group_by(gender, category) %>%
  filter(bornCountry %in% "USA") %>%
  filter(year %in% 2000:2023) %>%
  filter(gender %in% "female")%>%
      ggplot() +
      geom_bar(mapping = aes(x = gender, fill = category),position = "dodge")+
 ggtitle("females awarded in last century") +
  xlab("Females Awarded") +
  ylab("category") +
  theme(plot.title = element_text(hjust = 0.5),
        plot.subtitle = element_text(hjust = 0.5),
        axis.text.x = element_text(angle = 45, hjust = 1)) +
  labs(fill = "Category")

There is a total of 9 US females awarded with the nobel prize in the 21st century.

Conclussion:

I was able to succesfully upload the JSON files, clean and organize the data and analize it, in order to answer the questions with data analysis.