The New York Times web site provides a rich set of APIs, as described here: NY Times API’s. This notebook connects to the Articles API, reads in the JSON data, and transforms it to a dataframe.
#libraries
library(jsonlite)
library(dplyr)
#Retrieve articles using my API key, in which John Grisham is mentioned
articles <- fromJSON("http://api.nytimes.com/svc/search/v2/articlesearch.json?q=john%20grisham&api-key=rqcHM5dPov1NfBSDtKEjuWvf4vSjY1bt", flatten = TRUE) %>% data.frame()
dim(articles)
## [1] 10 33
#Select a few columns of interest from the dataframe
articles <- articles %>%
select(source = response.docs.source, doc_type = response.docs.document_type, purpose = response.docs.subsection_name, headline = response.docs.headline.main, article_author = response.docs.byline.original)
head(articles,1)
## source doc_type purpose
## 1 The New York Times article Book Review
## headline article_author
## 1 The Strange Things You Find on Authors’ Websites By Tina Jordan