library(RJSONIO)
library (RCurl)
## Loading required package: bitops
library(knitr)
First, let’s set the terms of our API call. We’ll do a search for “Zika Virus” among articles published by the New York Times. To keep things simple initially, we’ll do only the first page of results. There are ten results per page, and the first page is numbered 0.
api <- "1b2a171910f0db22167c58a348785f0b:2:74811169"
query <- "Zika+virus" # Query string, use + instead of space
Now, let’s string together the URL required to make the request.
The format is as follows:
http://api.nytimes.com/svc/search/v2/articlesearch.response-format?[q=search term&fq=filter-field:(filter-term)&additional-params=values]&api-key=####
Source: http://developer.nytimes.com/docs/read/article_search_api_v2#examples
We want our response format in JSON, so we put json for that. Then we get the contents of the URL via the function getURL, which gives us the content in JSON. Then, we convert the JSON content into R objects, via the function fromJSON
But, the search results will have lots of fields unless we limit them. Let’s peruse the list of fields available and pick a few: The headline, the author aka byline, the news desk, the word count and the publication date.
Now we have the first page of the most relevant search results for our query.
response_format <- "json"
URL <- paste("http://api.nytimes.com/svc/search/v2/articlesearch.", response_format, "?", "q=", query, "&fl=headline,byline,pub_date,news_desk,word_count", "&api-key=", api, sep = "")
gotURL <- getURL(URL)
RRaw <- fromJSON(gotURL)
The search results are in the form of lists, under $response$docs. We’ll practice drawing out the field values from these results.
RRawi <- unlist(RRaw$response$docs[[1]])
RRawi["headline.main"] #the headline
## headline.main
## "Zika Study Could Help Overcome an Obstacle to Vaccine Research"
RRawi["news_desk"] #news desk
## news_desk
## "Science"
RRawi["word_count"] #word count
## word_count
## "308"
RRawi["pub_date"] #the date
## pub_date
## "2016-03-29T00:00:00Z"
#The byline is a little more complicated, as it's comprised of multiple fields; we'll combine it into one.
byline <- paste(RRawi[c("byline.person.firstname", "byline.person.middlename", "byline.person.lastname", "byline.person.qualifier")], collapse = " ")
byline
## [1] "Donald G. McNEIL Jr"
Let’s convert them into a dataframe comprised of the columns of our search.
headlines <- c()
bylines <- c()
wordcounts <- c()
desks <- c()
dates <- c()
for (i in 1:10) {
RRawi <- unlist(RRaw$response$docs[[i]])
headlines <- append(headlines, RRawi["headline.main"])
bylines <- append(bylines, paste(RRawi[c("byline.person.firstname", "byline.person.middlename", "byline.person.lastname", "byline.person.qualifier")], collapse = " "))
desks <- append(desks, RRawi["news_desk"])
wordcounts <- append(wordcounts, RRawi["word_count"])
dates <- append(dates, RRawi["pub_date"] )
}
dates <- strptime(dates, format="%Y-%m-%d")
RDF <- data.frame(headlines, bylines, desks, wordcounts, dates, stringsAsFactors = FALSE)
RDFLatest <- RDF[order(dates, decreasing = TRUE), ]
View(RDFLatest)
kable(RDFLatest)
| headlines | bylines | desks | wordcounts | dates | |
|---|---|---|---|---|---|
| 2 | In Miami, Facing Risk of Zika With Resolve but Limited Resources | Sabrina NA TAVERNISE NA | National | 1319 | 2016-04-02 |
| 5 | 5 Reasons to Think the Zika Virus Causes Microcephaly | Donald G. McNEIL Jr | Health | NA | 2016-04-01 |
| 1 | Zika Study Could Help Overcome an Obstacle to Vaccine Research | Donald G. McNEIL Jr | Science | 308 | 2016-03-29 |
| 10 | Study of Zika Outbreak Estimates 1 in 100 Risk of Microcephaly | Catherine Saint LOUIS NA | Foreign | 497 | 2016-03-16 |
| 4 | New Zika Notice Says Higher-Altitude Areas Can Be Safe for Pregnant Women | Donald G. McNEIL Jr | Foreign | 167 | 2016-03-15 |
| 6 | A Trail of Zika-Borne Anguish | Photographs Mauricio LIMA NA | World | NA | 2016-03-03 |
| 3 | Zika Virus Test Is ‘Weeks, Not Years’ Away, W.H.O. Says | Sewell NA CHAN NA | National | 825 | 2016-02-13 |
| 7 | Air Travel Between U.S. and Zika-Affected Areas | Sergio NA PEÇANHA NA | Science | NA | 2016-02-06 |
| 8 | Senate Democrats Urge Obama to Form a Response Plan to Zika Virus | Jennifer NA STEINHAUER NA | Politics | 374 | 2016-02-05 |
| 9 | US Zika Case Sparks Questions About Sex and Mosquito Germs | NA NA NA NA | None | 883 | 2016-02-03 |
The obvious limitation here is on the number of pages of search results, which is 10. Let’s create an option for the user to designate the number of pages.
api <- "1b2a171910f0db22167c58a348785f0b:2:74811169"
query <- "Zika+virus" # Query string, use + instead of space
pages <- 2 #how many pages of results do we want?
enddate <- gsub("-","",Sys.Date()) #today's date, can be changed by user; format YYYYMMDD
response_format <- "json"
headlines <- c()
bylines <- c()
wordcounts <- c()
desks <- c()
dates <- c()
for (p in 0:(pages-1)) {
URL <- paste("http://api.nytimes.com/svc/search/v2/articlesearch.", response_format, "?", "q=", query, "&fl=headline,byline,pub_date,news_desk,word_count","&page=",p, "&api-key=", api, "&enddate=", enddate, sep = "")
gotURL <- getURL(URL)
RRaw <- fromJSON(gotURL)
for (i in 1:10) {
RRawi <- unlist(RRaw$response$docs[[i]])
headlines <- append(headlines, RRawi["headline.main"])
bylines <- append(bylines, paste(RRawi[c("byline.person.firstname", "byline.person.middlename", "byline.person.lastname", "byline.person.qualifier")], collapse = " "))
desks <- append(desks, RRawi["news_desk"])
wordcounts <- append(wordcounts, RRawi["word_count"])
dates <- append(dates, RRawi["pub_date"] )
}
}
dates <- strptime(dates, format="%Y-%m-%d")
RDFbulk <- data.frame(headlines, bylines, desks, wordcounts, dates, stringsAsFactors = FALSE)
RDFbulk <- RDFbulk[order(dates, decreasing = TRUE), ]
View(RDFbulk)
kable(RDFbulk)
| headlines | bylines | desks | wordcounts | dates | |
|---|---|---|---|---|---|
| 2 | In Miami, Facing Risk of Zika With Resolve but Limited Resources | Sabrina NA TAVERNISE NA | National | 1319 | 2016-04-02 |
| 6 | 5 Reasons to Think the Zika Virus Causes Microcephaly | Donald G. McNEIL Jr | Health | NA | 2016-04-01 |
| 20 | Team of Rival Scientists Comes Together to Fight Zika | Amy NA HARMON NA | National | 2434 | 2016-03-31 |
| 1 | Zika Study Could Help Overcome an Obstacle to Vaccine Research | Donald G. McNEIL Jr | Science | 308 | 2016-03-29 |
| 10 | One Traveler May Have Brought Zika to the Americas in 2013 | Donald G. McNEIL Jr | Foreign | 853 | 2016-03-25 |
| 11 | One Traveler May Have Brought Zika to the Americas in 2013 | Donald G. McNEIL Jr | Foreign | 853 | 2016-03-25 |
| 18 | Puerto Rico Braces for Its Own Zika Epidemic | Donald G. McNEIL Jr | National | 1935 | 2016-03-20 |
| 8 | Study of Zika Outbreak Estimates 1 in 100 Risk of Microcephaly | Catherine Saint LOUIS NA | Foreign | 497 | 2016-03-16 |
| 4 | New Zika Notice Says Higher-Altitude Areas Can Be Safe for Pregnant Women | Donald G. McNEIL Jr | Foreign | 167 | 2016-03-15 |
| 16 | Test of Zika-Fighting Genetically Engineered Mosquitoes Gets Tentative F.D.A. Approval | Andrew NA POLLACK NA | Business | 999 | 2016-03-12 |
| 5 | A Trail of Zika-Borne Anguish | Photographs Mauricio LIMA NA | World | NA | 2016-03-03 |
| 17 | Officials Try to Ease Concerns Facing Rio Games | Rebecca R. RUIZ NA | Sports | 1286 | 2016-03-03 |
| 14 | Obama on Vaccine for Zika | NA NA NA NA | U.S. | 25 | 2016-02-25 |
| 3 | Zika Virus Test Is ‘Weeks, Not Years’ Away, W.H.O. Says | Sewell NA CHAN NA | National | 825 | 2016-02-13 |
| 7 | Air Travel Between U.S. and Zika-Affected Areas | Sergio NA PEÇANHA NA | Science | NA | 2016-02-06 |
| 9 | Senate Democrats Urge Obama to Form a Response Plan to Zika Virus | Jennifer NA STEINHAUER NA | Politics | 374 | 2016-02-05 |
| 15 | Scientists Find Zika in Saliva and Urine | NA NA NA NA | World / Americas | 34 | 2016-02-05 |
| 13 | Latin America Scrambles to Squash Zika-Spreading Mosquito | NA NA NA NA | None | 1198 | 2016-02-04 |
| 12 | Understanding Zika | Aaron NA BYRD NA | World / Americas | 11 | 2016-02-03 |
| 19 | Latin America to Unite Against Zika | NA NA NA NA | World / Americas | 30 | 2016-01-28 |