The New York Times web site provides a rich set of APIs, as described here: http://developer.nytimes.com/docs
Your task is to choose one of the New York Times APIs, construct an interface in R to read in the JSON data, and transform it to an R dataframe.
The key is shown in a hidden chunk of code, and we are extracing Food Article metadata from the NY Times Top Stories website on October 29, 2017.
#library(httr)
#library(dplyr)
#library(tidyr)
#library(jsonlite)
url <- paste0("http://api.nytimes.com/svc/topstories/v2/food.json", "?api-key=", key)
food <- GET(url)
food_text <- content(food, "text")
food_df <- data.frame(fromJSON(food_text))
food_df %>%
select(one_of(c("section", "results.title", "results.abstract", "results.url", "results.byline")))
#library(tm)
clean_food <- Corpus(VectorSource(food_df$results.abstract)) %>%
tm_map(content_transformer(tolower)) %>%
tm_map(removeNumbers) %>%
tm_map(removeWords, stopwords("english")) %>%
tm_map(removePunctuation) %>%
tm_map(stripWhitespace)
food_data <- TermDocumentMatrix(clean_food)
m <- as.matrix(food_data)
v <- sort(rowSums(m),decreasing=TRUE)
d <- data.frame(word = names(v),freq=v)
filter(d, freq>1)