New York Times Article Search API

Task: Make an API call and covert to a dataframe

library(XML)
library(stringr)
library(jsonlite)
library(dplyr)
library(tidyr)
baseurl<-"https://api.nytimes.com/svc/search/v2/articlesearch.json"
pagelist<-seq(0,9,1)
api<-fromJSON(paste0(baseurl,"?q=","election","&page=",pagelist[1],"&begin_date=20160101&end_date=20160930&api-key=",getOption("nytimesid")))
apidf<-as.data.frame(api$response)
## lots of different classes of characters
lapply(apidf,class)
## $meta.hits
## [1] "integer"
## 
## $meta.time
## [1] "integer"
## 
## $meta.offset
## [1] "integer"
## 
## $docs.web_url
## [1] "character"
## 
## $docs.snippet
## [1] "character"
## 
## $docs.lead_paragraph
## [1] "character"
## 
## $docs.abstract
## [1] "character"
## 
## $docs.print_page
## [1] "character"
## 
## $docs.blog
## [1] "list"
## 
## $docs.source
## [1] "character"
## 
## $docs.multimedia
## [1] "list"
## 
## $docs.headline
## [1] "data.frame"
## 
## $docs.keywords
## [1] "list"
## 
## $docs.pub_date
## [1] "character"
## 
## $docs.document_type
## [1] "character"
## 
## $docs.news_desk
## [1] "character"
## 
## $docs.section_name
## [1] "character"
## 
## $docs.subsection_name
## [1] "character"
## 
## $docs.byline
## [1] "data.frame"
## 
## $docs.type_of_material
## [1] "character"
## 
## $docs._id
## [1] "character"
## 
## $docs.word_count
## [1] "character"
## 
## $docs.slideshow_credits
## [1] "logical"

There are quite a few different classes of items in this data frame. We need to do some further processing of this data. We proceed as below by identifying the simple data structures compared to the more complicated ones.

headline<-select(apidf,contains("docs.headline"))
names(headline)
## [1] "docs.headline"
apidf$docs.headline<-NULL

#data frame of headlines
headline<-as.data.frame(cbind(headline$docs.headline$main,headline$docs.headline$print_headline,headline$docs.headline$content_kicker,headline$docs.headline$kicker))
#rename headlines
headline<-rename(headline,main=V1,print=V2,content_kicker=V3,kicker=V4)

#bylines 
byline<-select(apidf,contains("docs.byline"))
apidf$docs.byline<-NULL
names(byline)
## [1] "docs.byline"
#do various transformations on byline
byline<-as.data.frame(cbind(byline$docs.byline$contributor,byline$docs.byline$person,byline$docs.byline$original,byline$docs.byline$organization))
l<-names(api$response$docs$byline)
colnames(byline)<-l
byline$person<-NULL
byline<-as.data.frame(byline)

# transform and reshape keywords
keywords<-select(apidf,contains("docs.keywords"))
keywords$num<-row.names(keywords)
apidf$docs.keywords<-NULL
#needs some more work
#d<-as.data.frame(keywords$docs.keywords[3])
#d$num<-keywords$num[3]


#transform blogs
apidf$docs.blog<-NULL
apidf$docs.multimedia<-NULL

#combine the different dataset 
diff<-bind_cols(headline,byline)
finaldf<-bind_cols(apidf,diff)
#is data.frame
is.data.frame(finaldf)
## [1] TRUE
lapply(finaldf,class)
## $meta.hits
## [1] "integer"
## 
## $meta.time
## [1] "integer"
## 
## $meta.offset
## [1] "integer"
## 
## $docs.web_url
## [1] "character"
## 
## $docs.snippet
## [1] "character"
## 
## $docs.lead_paragraph
## [1] "character"
## 
## $docs.abstract
## [1] "character"
## 
## $docs.print_page
## [1] "character"
## 
## $docs.source
## [1] "character"
## 
## $docs.pub_date
## [1] "character"
## 
## $docs.document_type
## [1] "character"
## 
## $docs.news_desk
## [1] "character"
## 
## $docs.section_name
## [1] "character"
## 
## $docs.subsection_name
## [1] "character"
## 
## $docs.type_of_material
## [1] "character"
## 
## $docs._id
## [1] "character"
## 
## $docs.word_count
## [1] "character"
## 
## $docs.slideshow_credits
## [1] "logical"
## 
## $main
## [1] "factor"
## 
## $print
## [1] "factor"
## 
## $content_kicker
## [1] "factor"
## 
## $kicker
## [1] "factor"
## 
## $contributor
## [1] "list"
## 
## $original
## [1] "list"
## 
## $organization
## [1] "list"
head(finaldf)
##   meta.hits meta.time meta.offset
## 1     18975        19           0
## 2     18975        19           0
## 3     18975        19           0
## 4     18975        19           0
## 5     18975        19           0
## 6     18975        19           0
##                                                                                                      docs.web_url
## 1 http://www.nytimes.com/2016/09/19/theater/review-in-what-did-you-expect-a-potluck-of-election-year-anxiety.html
## 2                                         http://www.nytimes.com/2016/09/24/opinion/the-feckless-fec-rebuked.html
## 3                http://takingnote.blogs.nytimes.com/2016/09/29/donald-trump-proves-perversely-good-for-the-peso/
## 4    http://www.nytimes.com/interactive/2016/08/09/us/elections/Bush-Rubio-and-Kasich-Donors-give-to-Clinton.html
## 5                                    http://www.nytimes.com/2016/07/01/movies/the-purge-election-year-review.html
## 6                                                 http://www.nytimes.com/2016/09/13/opinion/election-choices.html
##                                                                                                                                                docs.snippet
## 1             Richard Nelson portrays life in a political season as a bewildering mirage for an extended family that finds itself in reduced circumstances.
## 2                                         A federal judge’s ruling is further evidence that the F.E.C. is paralyzed and should be replaced by a new agency.
## 3                                                                    The currency turns out to be inversely tied to the fortunes of the Republican nominee.
## 4 People who donated to establishment Republican candidates in the primary season are more likely to give money to Hillary Clinton than to Donald J. Trump.
## 5         In the third chapter of James DeMonaco’s dystopian series, a senator joins insurgents who oppose the yearly overnight killing spree in the title.
## 6                                                 Here are some recommendations in Tuesday’s Democratic primary races for New York State legislative seats.
##                                                                                                                                         docs.lead_paragraph
## 1             Richard Nelson portrays life in a political season as a bewildering mirage for an extended family that finds itself in reduced circumstances.
## 2                                         A federal judge’s ruling is further evidence that the F.E.C. is paralyzed and should be replaced by a new agency.
## 3                                                                                                                                                      <NA>
## 4 People who donated to establishment Republican candidates in the primary season are more likely to give money to Hillary Clinton than to Donald J. Trump.
## 5         In the third chapter of James DeMonaco’s dystopian series, a senator joins insurgents who oppose the yearly overnight killing spree in the title.
## 6                                                 Here are some recommendations in Tuesday’s Democratic primary races for New York State legislative seats.
##                                                                                docs.abstract
## 1                                                                                       <NA>
## 2                                                                                       <NA>
## 3     The currency turns out to be inversely tied to the fortunes of the Republican nominee.
## 4                                                                                       <NA>
## 5 A O Scott reviews movie The Purge: Election Year, written and directed by James DeMonaco. 
## 6                                                                                       <NA>
##   docs.print_page        docs.source        docs.pub_date
## 1               3                    2016-09-19T04:00:00Z
## 2              20                    2016-09-24T04:00:00Z
## 3            <NA> The New York Times 2016-09-29T21:52:06Z
## 4            <NA> The New York Times 2016-08-09T04:00:00Z
## 5               7 The New York Times 2016-07-01T00:00:00Z
## 6              26                    2016-09-13T04:00:00Z
##   docs.document_type       docs.news_desk docs.section_name
## 1            article              Culture           Theater
## 2            article            Editorial           Opinion
## 3           blogpost                 OpEd           Opinion
## 4         multimedia U.S. / Election 2016              U.S.
## 5            article              Weekend            Movies
## 6            article            Editorial           Opinion
##   docs.subsection_name docs.type_of_material                 docs._id
## 1                 <NA>                Review 57defe6a7c459f227dbabb59
## 2                 <NA>             Editorial 57e5cd1d7c459f227dbad274
## 3                 <NA>                  Blog 57ed8dc07c459f6063986b3e
## 4        Election 2016   Interactive Feature 57ad65b87c459f21c999469e
## 5                 <NA>                Review 5775907e38f0d87c9d83794b
## 6                 <NA>             Editorial 57e2d90a7c459f227dbac5cc
##   docs.word_count docs.slideshow_credits
## 1            <NA>                     NA
## 2            <NA>                     NA
## 3             379                     NA
## 4            <NA>                     NA
## 5             707                     NA
## 6            <NA>                     NA
##                                                                             main
## 1          Review: In ‘What Did You Expect?,’ a Potluck of Election-Year Anxiety
## 2                                                   The Feckless F.E.C., Rebuked
## 3                               Donald Trump Proves Perversely Good for The Peso
## 4 Donors for Bush, Kasich and Christie Are Turning to Clinton More Than to Trump
## 5    Review: ‘The Purge: Election Year’ Offers a Campaign Platform of Blood Lust
## 6                                                               Election Choices
##                                                             print
## 1 A Potluck of Anxiety  as a Family Gathers in an Election Season
## 2                                    The Feckless F.E.C., Rebuked
## 3                                                            <NA>
## 4                                                            <NA>
## 5                               A Campaign Platform of Blood Lust
## 6                                                Election Choices
##   content_kicker      kicker contributor               original
## 1           <NA>        <NA>                    By BEN BRANTLEY
## 2      Editorial   Editorial          NA By THE EDITORIAL BOARD
## 3           <NA> Taking Note          NA   By FRANCIS X. CLINES
## 4           <NA>        <NA>          NA         By ADAM PEARCE
## 5           <NA>        <NA>          NA         By A. O. SCOTT
## 6      Editorial   Editorial          NA By THE EDITORIAL BOARD
##          organization
## 1                  NA
## 2 THE EDITORIAL BOARD
## 3                  NA
## 4                  NA
## 5                  NA
## 6 THE EDITORIAL BOARD