More Practice with JSON

Load packages

library("jsonlite")
library("plyr")
library("dplyr")
library("httr")
library("purrr")

Make a request to the API Server

In this assignment, The Nobel Prize, JSON data was pulled using the Developer zone API from https://www.nobelprize.org/about/developer-zone-2/. A request for data with a GET function and return a list containing all of the information returned by the API server.

resp <- GET("https://api.nobelprize.org/2.1/laureates?_ga=2.235250305.1628049656.1634086931-1839215677.1633638535")

A summary provides a “Status” on the success or failure of the API request, and it comes in the form of a number. A desired number 200 corresponds to a successful request. The content type indicates what form the data takes and this response has the data in a json format.

resp      # summary look at the resulting response

## Response [https://api.nobelprize.org/2.1/laureates?_ga=2.235250305.1628049656.1634086931-1839215677.1633638535]
##   Date: 2021-10-16 21:17
##   Status: 200
##   Content-Type: application/json;charset=utf-8
##   Size: 86.9 kB

Converting the raw Unicode into a character vector resembling a JSON format.

rawToChar(resp$content)

From a character vector, it is converted into list data structure using the fromJSON().The `laureates’ dataset is stored in the variables consisting of three elements.

res = fromJSON(rawToChar(resp$content))
names(res)

## [1] "laureates" "meta"      "links"

Parse and generate data

This analysis was developed by individually extracting list elements from the ‘laureates’ data frame using ‘[ ]’ and tidyverse function

res1 <- as.data.frame(res$laureates$id)   # column name 'Id'
colnames(res1)[1] = "Laureates.Id"  # change column name by colname function
head(res1, 4)

##   Laureates.Id
## 1          745
## 2          102
## 3          779
## 4          259

res2 <- as.data.frame(res$laureates$knownName)   # knownName
colnames(res2)[1] = "KnownName"  # change column name by colname function
res2 <- subset(res2, select = -c(se)) # drop columns 'se'

head(res2, 4)

##           KnownName
## 1 A. Michael Spence
## 2      Aage N. Bohr
## 3 Aaron Ciechanover
## 4        Aaron Klug

res3 <- as.data.frame(res$laureates$givenName)   # givenName
colnames(res3)[1] = "GivenName"  # change column name by colname function
res3 <- subset(res3, select = -c(se)) # drop columns 'se'

head(res3, 4)

##    GivenName
## 1 A. Michael
## 2    Aage N.
## 3      Aaron
## 4      Aaron

res4 <- as.data.frame(res$laureates$familyName)   # familyName
colnames(res4)[1] = "FamilyName"  # change column name by colname function
res4 <- subset(res4, select = -c(se)) # drop columns 'se'

head(res4, 4)

##    FamilyName
## 1      Spence
## 2        Bohr
## 3 Ciechanover
## 4        Klug

res5 <- as.data.frame(res$laureates$fullName)   # fullName
colnames(res5)[1] = "FullName"  # change column name by colname function
res5 <- subset(res5, select = -c(se)) # drop columns 'se'

head(res5, 4)

##            FullName
## 1 A. Michael Spence
## 2   Aage Niels Bohr
## 3 Aaron Ciechanover
## 4        Aaron Klug

res6 <- as.data.frame(res$laureates$fileName)   # fileName
colnames(res6)[1] = "FileName"  # change column name by colname function

head(res6, 4)

##      FileName
## 1      spence
## 2        bohr
## 3 ciechanover
## 4        klug

res7 <- as.data.frame(res$laureates$gender)   # gender
colnames(res7)[1] = "Gender"  # change column name by colname function

head(res7, 4)

##   Gender
## 1   male
## 2   male
## 3   male
## 4   male

res8 <- as.data.frame(res$laureates$birth)   # birth.date
colnames(res8)[1] = "BirthDate"  # change column name by colname function
res8 <- subset(res8, select = -c(place)) # drop columns 'place'

head(res8, 4)

##    BirthDate
## 1 1943-00-00
## 2 1922-06-19
## 3 1947-10-01
## 4 1926-08-11

res8.1 <- as.data.frame(res$laureates$birth$place$city)   # birthplacecity
colnames(res8.1)[1] = "Birth.PlaceCity"  # change column name by colname function
res8.1 <- subset(res8.1, select = -c(no, se)) # drop columns 'no' and 'se'

head(res8.1, 4)

##   Birth.PlaceCity
## 1   Montclair, NJ
## 2      Copenhagen
## 3           Haifa
## 4          Zelvas

res8.2 <- as.data.frame(res$laureates$birth$place$country)   # birthplacecountry
colnames(res8.2)[1] = "Birth.PlaceCountry"  # change column name by colname function
res8.2 <- subset(res8.2, select = -c(no, se)) # drop columns 'no' and 'se'

head(res8.2, 4)

##                  Birth.PlaceCountry
## 1                               USA
## 2                           Denmark
## 3 British Protectorate of Palestine
## 4                         Lithuania

res8.3 <- as.data.frame(res$laureates$birth$place$cityNow)   # birthplacecityNow
colnames(res8.3)[1] = "Birth.PlaceCityNow"  # change column name by colname function
res8.3 <- subset(res8.3, select = -c(no, se, sameAs)) # drop columns 'no', 'se', 'sameAs'

head(res8.3, 4)

##   Birth.PlaceCityNow
## 1      Montclair, NJ
## 2         Copenhagen
## 3              Haifa
## 4             Zelvas

res8.4 <- as.data.frame(res$laureates$birth$place$countryNow)   # birthplacecountryNow
colnames(res8.4)[1] = "Birth.PlaceCountryNow"  # change column name by colname function
res8.4 <- subset(res8.4, select = -c(no, se, sameAs)) # drop columns 'no', 'se', and 'sameAs'

head(res8.4, 4)

##   Birth.PlaceCountryNow
## 1                   USA
## 2               Denmark
## 3                Israel
## 4             Lithuania

res8.5 <- as.data.frame(res$laureates$birth$place$continent)   # birthplacecontinent
colnames(res8.5)[1] = "Birth.PlaceContinent"  # change column name by colname function
res8.5 <- subset(res8.5, select = -c(no, se)) # drop columns 'no' and 'se'

head(res8.5, 4)

##   Birth.PlaceContinent
## 1        North America
## 2               Europe
## 3                 Asia
## 4               Europe

res8.6 <- as.data.frame(res$laureates$birth$place$locationString)   # birthplacelocationString
colnames(res8.6)[1] = "Birth.PlaceLocationString"  # change column name by colname function
res8.6 <- subset(res8.6, select = -c(no, se)) # drop columns 'no' and 'se'

head(res8.6, 4)

##                               Birth.PlaceLocationString
## 1                                    Montclair, NJ, USA
## 2                                   Copenhagen, Denmark
## 3 Haifa, British Protectorate of Palestine (now Israel)
## 4                                     Zelvas, Lithuania

res9 <- as.data.frame(res$laureates$death)   # birthdeath
colnames(res9)[1] = "Birth.Death"  # change column name by colname function
res9 <- subset(res9, select = -c(place)) # drop columns 'place'

head(res9, 4)

##   Birth.Death
## 1        <NA>
## 2  2009-09-08
## 3        <NA>
## 4  2018-11-20

The Laureates data set contained only 25 values as shown on the embedded table using the reactable function.

res_cbind = cbind.data.frame(res1, res2, res3, res4, res5, res6, res7, res8, res8.1, res8.2, res8.3, res8.4, res8.5, res8.6,res9)
#head(res_cbind, 5)

library(reactable)

reactable(res_cbind)

Plot

library(ggplot2)

Gender Distribution

How many female and male laureates recieved nobel prizes based on this dataset? Female(1) and Male(24)

country_gender <- res_cbind %>%
  count(res_cbind$Gender)

country_gender

##   res_cbind$Gender  n
## 1           female  1
## 2             male 24

ggplot(data = res_cbind, aes(x = Birth.PlaceContinent)) +
  stat_count(width = 0.5, aes(fill = Gender)) +
  labs(x = "Continent", y = "Count", 
     title = "Nobel Prizes by Continent and Gender")

Continent

On which countinent where these Laureates born?

country_plot <- res_cbind %>%
  count(Birth.PlaceContinent)

country_plot

##   Birth.PlaceContinent n
## 1               Africa 3
## 2                 Asia 6
## 3               Europe 9
## 4        North America 4
## 5              Oceania 1
## 6        South America 1
## 7                 <NA> 1

ggplot(data = res_cbind, aes(x = Birth.PlaceContinent)) +
  stat_count(width = 0.7, aes(fill = Birth.PlaceCountry)) +
  labs(x = "Continent", y = "Count", 
       title = "Nobel Prizes by Continent and Birth Place") +
coord_flip()

Birth Year

In what year did this dataset of Laureates were born?

library("stringr")
birth_year <- str_sub(res_cbind$BirthDate, 1, 4) 

birth_year

##  [1] "1943" "1922" "1947" "1926" "1948" "1926" "1961" "1976" "1939" "1969"
## [11] "1903" "1835" "1876" "1931" "1946" "1930" "1948" "1948" "1936" "1914"
## [21] "1927" "1852" "1913" "1898" "1879"

ggplot(data = res_cbind, aes(x = Birth.PlaceContinent, birth_year)) +
  labs(title = "Nobel Prizes by Birth Place Continent and Birth Year") +

geom_point(colour = "blue", size = 3)