library("jsonlite")
library("plyr")
library("dplyr")
library("httr")
library("purrr")
In this assignment, The Nobel Prize, JSON data was pulled using the Developer zone API from https://www.nobelprize.org/about/developer-zone-2/. A request for data with a GET function and return a list containing all of the information returned by the API server.
resp <- GET("https://api.nobelprize.org/2.1/laureates?_ga=2.235250305.1628049656.1634086931-1839215677.1633638535")
A summary provides a “Status” on the success or failure of the API request, and it comes in the form of a number. A desired number 200 corresponds to a successful request. The content type indicates what form the data takes and this response has the data in a json format.
resp # summary look at the resulting response
## Response [https://api.nobelprize.org/2.1/laureates?_ga=2.235250305.1628049656.1634086931-1839215677.1633638535]
## Date: 2021-10-16 21:17
## Status: 200
## Content-Type: application/json;charset=utf-8
## Size: 86.9 kB
Converting the raw Unicode into a character vector resembling a JSON format.
rawToChar(resp$content)
From a character vector, it is converted into list data structure using the fromJSON().The `laureates’ dataset is stored in the variables consisting of three elements.
res = fromJSON(rawToChar(resp$content))
names(res)
## [1] "laureates" "meta" "links"
This analysis was developed by individually extracting list elements from the ‘laureates’ data frame using ‘[ ]’ and tidyverse function
res1 <- as.data.frame(res$laureates$id) # column name 'Id'
colnames(res1)[1] = "Laureates.Id" # change column name by colname function
head(res1, 4)
## Laureates.Id
## 1 745
## 2 102
## 3 779
## 4 259
res2 <- as.data.frame(res$laureates$knownName) # knownName
colnames(res2)[1] = "KnownName" # change column name by colname function
res2 <- subset(res2, select = -c(se)) # drop columns 'se'
head(res2, 4)
## KnownName
## 1 A. Michael Spence
## 2 Aage N. Bohr
## 3 Aaron Ciechanover
## 4 Aaron Klug
res3 <- as.data.frame(res$laureates$givenName) # givenName
colnames(res3)[1] = "GivenName" # change column name by colname function
res3 <- subset(res3, select = -c(se)) # drop columns 'se'
head(res3, 4)
## GivenName
## 1 A. Michael
## 2 Aage N.
## 3 Aaron
## 4 Aaron
res4 <- as.data.frame(res$laureates$familyName) # familyName
colnames(res4)[1] = "FamilyName" # change column name by colname function
res4 <- subset(res4, select = -c(se)) # drop columns 'se'
head(res4, 4)
## FamilyName
## 1 Spence
## 2 Bohr
## 3 Ciechanover
## 4 Klug
res5 <- as.data.frame(res$laureates$fullName) # fullName
colnames(res5)[1] = "FullName" # change column name by colname function
res5 <- subset(res5, select = -c(se)) # drop columns 'se'
head(res5, 4)
## FullName
## 1 A. Michael Spence
## 2 Aage Niels Bohr
## 3 Aaron Ciechanover
## 4 Aaron Klug
res6 <- as.data.frame(res$laureates$fileName) # fileName
colnames(res6)[1] = "FileName" # change column name by colname function
head(res6, 4)
## FileName
## 1 spence
## 2 bohr
## 3 ciechanover
## 4 klug
res7 <- as.data.frame(res$laureates$gender) # gender
colnames(res7)[1] = "Gender" # change column name by colname function
head(res7, 4)
## Gender
## 1 male
## 2 male
## 3 male
## 4 male
res8 <- as.data.frame(res$laureates$birth) # birth.date
colnames(res8)[1] = "BirthDate" # change column name by colname function
res8 <- subset(res8, select = -c(place)) # drop columns 'place'
head(res8, 4)
## BirthDate
## 1 1943-00-00
## 2 1922-06-19
## 3 1947-10-01
## 4 1926-08-11
res8.1 <- as.data.frame(res$laureates$birth$place$city) # birthplacecity
colnames(res8.1)[1] = "Birth.PlaceCity" # change column name by colname function
res8.1 <- subset(res8.1, select = -c(no, se)) # drop columns 'no' and 'se'
head(res8.1, 4)
## Birth.PlaceCity
## 1 Montclair, NJ
## 2 Copenhagen
## 3 Haifa
## 4 Zelvas
res8.2 <- as.data.frame(res$laureates$birth$place$country) # birthplacecountry
colnames(res8.2)[1] = "Birth.PlaceCountry" # change column name by colname function
res8.2 <- subset(res8.2, select = -c(no, se)) # drop columns 'no' and 'se'
head(res8.2, 4)
## Birth.PlaceCountry
## 1 USA
## 2 Denmark
## 3 British Protectorate of Palestine
## 4 Lithuania
res8.3 <- as.data.frame(res$laureates$birth$place$cityNow) # birthplacecityNow
colnames(res8.3)[1] = "Birth.PlaceCityNow" # change column name by colname function
res8.3 <- subset(res8.3, select = -c(no, se, sameAs)) # drop columns 'no', 'se', 'sameAs'
head(res8.3, 4)
## Birth.PlaceCityNow
## 1 Montclair, NJ
## 2 Copenhagen
## 3 Haifa
## 4 Zelvas
res8.4 <- as.data.frame(res$laureates$birth$place$countryNow) # birthplacecountryNow
colnames(res8.4)[1] = "Birth.PlaceCountryNow" # change column name by colname function
res8.4 <- subset(res8.4, select = -c(no, se, sameAs)) # drop columns 'no', 'se', and 'sameAs'
head(res8.4, 4)
## Birth.PlaceCountryNow
## 1 USA
## 2 Denmark
## 3 Israel
## 4 Lithuania
res8.5 <- as.data.frame(res$laureates$birth$place$continent) # birthplacecontinent
colnames(res8.5)[1] = "Birth.PlaceContinent" # change column name by colname function
res8.5 <- subset(res8.5, select = -c(no, se)) # drop columns 'no' and 'se'
head(res8.5, 4)
## Birth.PlaceContinent
## 1 North America
## 2 Europe
## 3 Asia
## 4 Europe
res8.6 <- as.data.frame(res$laureates$birth$place$locationString) # birthplacelocationString
colnames(res8.6)[1] = "Birth.PlaceLocationString" # change column name by colname function
res8.6 <- subset(res8.6, select = -c(no, se)) # drop columns 'no' and 'se'
head(res8.6, 4)
## Birth.PlaceLocationString
## 1 Montclair, NJ, USA
## 2 Copenhagen, Denmark
## 3 Haifa, British Protectorate of Palestine (now Israel)
## 4 Zelvas, Lithuania
res9 <- as.data.frame(res$laureates$death) # birthdeath
colnames(res9)[1] = "Birth.Death" # change column name by colname function
res9 <- subset(res9, select = -c(place)) # drop columns 'place'
head(res9, 4)
## Birth.Death
## 1 <NA>
## 2 2009-09-08
## 3 <NA>
## 4 2018-11-20
The Laureates data set contained only 25 values as shown on the embedded table using the reactable function.
res_cbind = cbind.data.frame(res1, res2, res3, res4, res5, res6, res7, res8, res8.1, res8.2, res8.3, res8.4, res8.5, res8.6,res9)
#head(res_cbind, 5)
library(reactable)
reactable(res_cbind)
library(ggplot2)
country_gender <- res_cbind %>%
count(res_cbind$Gender)
country_gender
## res_cbind$Gender n
## 1 female 1
## 2 male 24
ggplot(data = res_cbind, aes(x = Birth.PlaceContinent)) +
stat_count(width = 0.5, aes(fill = Gender)) +
labs(x = "Continent", y = "Count",
title = "Nobel Prizes by Continent and Gender")
country_plot <- res_cbind %>%
count(Birth.PlaceContinent)
country_plot
## Birth.PlaceContinent n
## 1 Africa 3
## 2 Asia 6
## 3 Europe 9
## 4 North America 4
## 5 Oceania 1
## 6 South America 1
## 7 <NA> 1
ggplot(data = res_cbind, aes(x = Birth.PlaceContinent)) +
stat_count(width = 0.7, aes(fill = Birth.PlaceCountry)) +
labs(x = "Continent", y = "Count",
title = "Nobel Prizes by Continent and Birth Place") +
coord_flip()
library("stringr")
birth_year <- str_sub(res_cbind$BirthDate, 1, 4)
birth_year
## [1] "1943" "1922" "1947" "1926" "1948" "1926" "1961" "1976" "1939" "1969"
## [11] "1903" "1835" "1876" "1931" "1946" "1930" "1948" "1948" "1936" "1914"
## [21] "1927" "1852" "1913" "1898" "1879"
ggplot(data = res_cbind, aes(x = Birth.PlaceContinent, birth_year)) +
labs(title = "Nobel Prizes by Birth Place Continent and Birth Year") +
geom_point(colour = "blue", size = 3)