We use the Nobel Prize API to fetch laureate data in JSON format.
Purpose: This data seek to analyse the different parameter in the Nobel Prize API above. This capture Nobel Prize winner from 1975
url <- "http://api.nobelprize.org/v1/laureate.json"
nobelprz <- fromJSON(url)
names(nobelprz)
## [1] "laureates"
names(nobelprz$laureates)
## [1] "id" "firstname" "surname" "born"
## [5] "died" "bornCountry" "bornCountryCode" "bornCity"
## [9] "diedCountry" "diedCountryCode" "diedCity" "gender"
## [13] "prizes"
names(nobelprz$laureates$prizes[[1]])
## [1] "year" "category" "share" "motivation" "affiliations"
gender <- as.data.frame(table(nobelprz$laureates$gender), stringsAsFactors = FALSE)
ggplot(gender) + geom_bar(aes(Var1, Freq), stat = "identity", fill = "skyblue3") + theme_bw() + labs(x = "gender", y = "count", title = "All Nobel Prizes by Gender")
## Comparing the Nobel prizes won by Gender and their various categories
More male has won the nobel prize than female in all categories
cnt <- sapply(nobelprz$laureates$prizes, function(x) nrow(x))
prizes <- ldply(nobelprz$laureates$prizes, as.data.frame)
prizes$id <- rep(nobelprz$laureates$id, cnt)
prizes$gender <- rep(nobelprz$laureates$gender, cnt)
pg <- as.data.frame(table(prizes$category, prizes$gender), stringsAsFactors = FALSE)
ggplot(pg, aes(Var2, Freq)) + geom_bar(aes(fill = Var2), stat = "identity") + theme_bw() + facet_grid(. ~ Var1) + labs(x = "gender", y = "count", title = "All Nobel Prizes by Gender and Category") + scale_fill_manual(values = c("darkorange", "skyblue3", "grey"), name = NULL) + theme(axis.text.x = element_blank())
multi <- which(sapply(nobelprz$laureates$prizes, function(x) nrow(x)) > 1)
winners <- nobelprz$laureates[multi, c("firstname", "surname", "born", "bornCountry")]
print(kable(winners), type = "html", comment = FALSE, include.rownames = FALSE)
##
##
## | |firstname |surname |born |bornCountry |
## |:---|:-----------------------------------------------------------|:-------|:----------|:---------------------------|
## |6 |Marie |Curie |1867-11-07 |Russian Empire (now Poland) |
## |65 |John |Bardeen |1908-05-23 |USA |
## |213 |Linus |Pauling |1901-02-28 |USA |
## |218 |Frederick |Sanger |1918-08-13 |United Kingdom |
## |476 |International Committee of the Red Cross |NA |1863-00-00 |NA |
## |508 |Office of the United Nations High Commissioner for Refugees |NA |1950-12-14 |NA |
prizes$born <- rep(nobelprz$laureates$born, cnt)
prizes$age <- as.Date(paste(prizes$year, "12-31", sep = "-"), "%Y-%m-%d") - as.Date(prizes$born, "%Y-%m-%d")
ggplot(prizes[!is.na(prizes$category), ]) + geom_boxplot(aes(category, as.numeric(age) / 365), fill = "red") + theme_bw() + stat_summary(aes(category, as.numeric(age) / 365), fun.y = "median", geom = "point") + labs(x = "category", y = "age (years)", title = "Age Distribution of Nobel Laureates by Category")
## Warning: `fun.y` is deprecated. Use `fun` instead.
## Warning: Removed 39 rows containing non-finite values (stat_boxplot).
## Warning: Removed 39 rows containing non-finite values (stat_summary).
```