The New York Times web site provides a rich set of APIs, as described here: https://developer.nytimes.com/apis(https://developer.nytimes.com/apis)
You’ll need to start by signing up for an API key.
Your task is to choose one of the New York Times APIs, construct an interface in R to read in the JSON data, and transform it into an R DataFrame.
library(tidyverse)
library(httr)
library(jsonlite)
library(stringr)
Searching for Climate articles released in the first five months in 2019
url = str_c("https://api.nytimes.com/svc/search/v2/articlesearch.json",
'?fq=news_desk:("Climate")',
'&begin_date=',"20190101",
'&end_date=',"20190531",
'&api-key=',api_key,
sep = '')
url
## [1] "https://api.nytimes.com/svc/search/v2/articlesearch.json?fq=news_desk:(\"Climate\")&begin_date=20190101&end_date=20190531&api-key=I005E9HTTpYWKlx2Vztc1DiCS01pll1G"
data <- GET(url)
data
## Response [https://api.nytimes.com/svc/search/v2/articlesearch.json?fq=news_desk:("Climate")&begin_date=20190101&end_date=20190531&api-key=I005E9HTTpYWKlx2Vztc1DiCS01pll1G]
## Date: 2019-10-28 01:58
## Status: 200
## Content-Type: application/json;charset=UTF-8
## Size: 222 kB
http_status(data)
## $category
## [1] "Success"
##
## $reason
## [1] "OK"
##
## $message
## [1] "Success: (200) OK"
stop_for_status(data)
climate_df <- fromJSON(url) %>%
.$response %>%
.$doc %>%
mutate(headline = headline$main,
pub_date = as.Date(pub_date),
byline = byline$original) %>%
mutate(byline = str_remove(byline, '^By ')) %>%
rename(author = byline,
date = pub_date) %>%
select(date,
type_of_material,
headline,
abstract,
author,
web_url)
climate_df