Useful Resources

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6     ✔ purrr   0.3.4
## ✔ tibble  3.1.8     ✔ dplyr   1.0.9
## ✔ tidyr   1.2.0     ✔ stringr 1.4.1
## ✔ readr   2.1.2     ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(httr)
library(tidyjson)
## 
## Attaching package: 'tidyjson'
## 
## The following object is masked from 'package:stats':
## 
##     filter
library(ggplot2)
library(urltools)
library(nytimes) # see https://github.com/mkearney/nytimes for installation

First, get your own nytimes api key from https://developer.nytimes.com/apis

nytimes_key <- '<YOUR_API_KEY>'

The easiest approach is to use a wrapper package:

library(nytimes)
Sys.setenv(NYTIMES_KEY=nytimes_key)
nytsearch <- nyt_search("gamergate", n = 100)
head(nytsearch)
## [[1]]
## Response [http://api.nytimes.com/svc/search/v2/articlesearch.json?q=gamergate&sort=newest&api-key=8F1vKhrbCRYm95ZRu8C2xvmf1uqWknBl]
##   Date: 2022-10-13 18:02
##   Status: 200
##   Content-Type: application/json
##   Size: 229 kB
## 
## 
## [[2]]
## Response [http://api.nytimes.com/svc/search/v2/articlesearch.json?q=gamergate&page=1&sort=newest&api-key=8F1vKhrbCRYm95ZRu8C2xvmf1uqWknBl]
##   Date: 2022-10-13 18:02
##   Status: 200
##   Content-Type: application/json
##   Size: 196 kB
## 
## 
## [[3]]
## Response [http://api.nytimes.com/svc/search/v2/articlesearch.json?q=gamergate&page=2&sort=newest&api-key=8F1vKhrbCRYm95ZRu8C2xvmf1uqWknBl]
##   Date: 2022-10-13 18:02
##   Status: 200
##   Content-Type: application/json
##   Size: 196 kB
## 
## 
## [[4]]
## Response [http://api.nytimes.com/svc/search/v2/articlesearch.json?q=gamergate&page=3&sort=newest&api-key=8F1vKhrbCRYm95ZRu8C2xvmf1uqWknBl]
##   Date: 2022-10-13 18:02
##   Status: 200
##   Content-Type: application/json
##   Size: 192 kB
## 
## 
## [[5]]
## Response [http://api.nytimes.com/svc/search/v2/articlesearch.json?q=gamergate&page=4&sort=newest&api-key=8F1vKhrbCRYm95ZRu8C2xvmf1uqWknBl]
##   Date: 2022-10-13 18:02
##   Status: 200
##   Content-Type: application/json
##   Size: 212 kB
## 
## 
## [[6]]
## Response [http://api.nytimes.com/svc/search/v2/articlesearch.json?q=gamergate&page=5&sort=newest&api-key=8F1vKhrbCRYm95ZRu8C2xvmf1uqWknBl]
##   Date: 2022-10-13 18:03
##   Status: 200
##   Content-Type: application/json
##   Size: 215 kB
# For rate-limiting
Sys.sleep(1)

Let’s check out the article search API and construct a URL: https://api.nytimes.com/svc/search/v2/articlesearch.json?q=gamergate&api-key=yourkey

# Build the URL
url <- "http://api.nytimes.com/svc/search/v2/articlesearch.json"
url <- param_set(url, "q", url_encode("gamergate"))
url <- param_set(url, "api-key", url_encode(nytimes_key))
url
## [1] "http://api.nytimes.com/svc/search/v2/articlesearch.json?q=gamergate&api-key=8F1vKhrbCRYm95ZRu8C2xvmf1uqWknBl"

Next, let’s retrieve the data from the web API

resp <- GET(url)
# For rate-limiting
Sys.sleep(1)

Finally, let’s deconstruct the JSON. Look at what happens each step of the way.

js <- content(resp, "text")
## No encoding supplied: defaulting to UTF-8.
js2 <- enter_object(js, "response")
js3 <- enter_object(js2, "docs")
df <- gather_array(js3)
df2 <- gather_object(df)
df3 <- spread_values(df, 
                     abstract=jstring("abstract"),
                     web_url=jstring("web_url"),
                     snippet=jstring("snippet"))

This can be done much more cleanly using piping:

final_df <- 
  content(resp, "text") %>%
  enter_object("response") %>%
  enter_object("docs") %>%
  gather_array() %>%
  spread_values(abstract=jstring("abstract"),
                web_url=jstring("web_url"),
                snippet=jstring("snippet"))
## No encoding supplied: defaulting to UTF-8.
head(final_df)
## # A tbl_json: 6 x 6 tibble with a "JSON" attribute
##   ..JSON                  document.id array.index abstract       web_url snippet
##   <chr>                         <int>       <int> <chr>          <chr>   <chr>  
## 1 "{\"abstract\":\"He..."           1           1 Here’s what y… https:… Here’s…
## 2 "{\"abstract\":\"In..."           1           2 Intel’s decis… https:… Intel’…
## 3 "{\"abstract\":\"Th..."           1           3 The atmospher… https:… The at…
## 4 "{\"abstract\":\"Th..."           1           4 The precursor… https:… The pr…
## 5 "{\"abstract\":\"Th..."           1           5 The legacy of… https:… The le…
## 6 "{\"abstract\":\"Ho..."           1           6 How online mo… https:… How on…