library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.8 ✔ dplyr 1.0.9
## ✔ tidyr 1.2.0 ✔ stringr 1.4.1
## ✔ readr 2.1.2 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(httr)
library(tidyjson)
##
## Attaching package: 'tidyjson'
##
## The following object is masked from 'package:stats':
##
## filter
library(ggplot2)
library(urltools)
library(nytimes) # see https://github.com/mkearney/nytimes for installation
First, get your own nytimes api key from https://developer.nytimes.com/apis
nytimes_key <- '<YOUR_API_KEY>'
The easiest approach is to use a wrapper package:
library(nytimes)
Sys.setenv(NYTIMES_KEY=nytimes_key)
nytsearch <- nyt_search("gamergate", n = 100)
head(nytsearch)
## [[1]]
## Response [http://api.nytimes.com/svc/search/v2/articlesearch.json?q=gamergate&sort=newest&api-key=8F1vKhrbCRYm95ZRu8C2xvmf1uqWknBl]
## Date: 2022-10-13 18:02
## Status: 200
## Content-Type: application/json
## Size: 229 kB
##
##
## [[2]]
## Response [http://api.nytimes.com/svc/search/v2/articlesearch.json?q=gamergate&page=1&sort=newest&api-key=8F1vKhrbCRYm95ZRu8C2xvmf1uqWknBl]
## Date: 2022-10-13 18:02
## Status: 200
## Content-Type: application/json
## Size: 196 kB
##
##
## [[3]]
## Response [http://api.nytimes.com/svc/search/v2/articlesearch.json?q=gamergate&page=2&sort=newest&api-key=8F1vKhrbCRYm95ZRu8C2xvmf1uqWknBl]
## Date: 2022-10-13 18:02
## Status: 200
## Content-Type: application/json
## Size: 196 kB
##
##
## [[4]]
## Response [http://api.nytimes.com/svc/search/v2/articlesearch.json?q=gamergate&page=3&sort=newest&api-key=8F1vKhrbCRYm95ZRu8C2xvmf1uqWknBl]
## Date: 2022-10-13 18:02
## Status: 200
## Content-Type: application/json
## Size: 192 kB
##
##
## [[5]]
## Response [http://api.nytimes.com/svc/search/v2/articlesearch.json?q=gamergate&page=4&sort=newest&api-key=8F1vKhrbCRYm95ZRu8C2xvmf1uqWknBl]
## Date: 2022-10-13 18:02
## Status: 200
## Content-Type: application/json
## Size: 212 kB
##
##
## [[6]]
## Response [http://api.nytimes.com/svc/search/v2/articlesearch.json?q=gamergate&page=5&sort=newest&api-key=8F1vKhrbCRYm95ZRu8C2xvmf1uqWknBl]
## Date: 2022-10-13 18:03
## Status: 200
## Content-Type: application/json
## Size: 215 kB
# For rate-limiting
Sys.sleep(1)
Let’s check out the article search API and construct a URL: https://api.nytimes.com/svc/search/v2/articlesearch.json?q=gamergate&api-key=yourkey
# Build the URL
url <- "http://api.nytimes.com/svc/search/v2/articlesearch.json"
url <- param_set(url, "q", url_encode("gamergate"))
url <- param_set(url, "api-key", url_encode(nytimes_key))
url
## [1] "http://api.nytimes.com/svc/search/v2/articlesearch.json?q=gamergate&api-key=8F1vKhrbCRYm95ZRu8C2xvmf1uqWknBl"
Next, let’s retrieve the data from the web API
resp <- GET(url)
# For rate-limiting
Sys.sleep(1)
Finally, let’s deconstruct the JSON. Look at what happens each step of the way.
js <- content(resp, "text")
## No encoding supplied: defaulting to UTF-8.
js2 <- enter_object(js, "response")
js3 <- enter_object(js2, "docs")
df <- gather_array(js3)
df2 <- gather_object(df)
df3 <- spread_values(df,
abstract=jstring("abstract"),
web_url=jstring("web_url"),
snippet=jstring("snippet"))
This can be done much more cleanly using piping:
final_df <-
content(resp, "text") %>%
enter_object("response") %>%
enter_object("docs") %>%
gather_array() %>%
spread_values(abstract=jstring("abstract"),
web_url=jstring("web_url"),
snippet=jstring("snippet"))
## No encoding supplied: defaulting to UTF-8.
head(final_df)
## # A tbl_json: 6 x 6 tibble with a "JSON" attribute
## ..JSON document.id array.index abstract web_url snippet
## <chr> <int> <int> <chr> <chr> <chr>
## 1 "{\"abstract\":\"He..." 1 1 Here’s what y… https:… Here’s…
## 2 "{\"abstract\":\"In..." 1 2 Intel’s decis… https:… Intel’…
## 3 "{\"abstract\":\"Th..." 1 3 The atmospher… https:… The at…
## 4 "{\"abstract\":\"Th..." 1 4 The precursor… https:… The pr…
## 5 "{\"abstract\":\"Th..." 1 5 The legacy of… https:… The le…
## 6 "{\"abstract\":\"Ho..." 1 6 How online mo… https:… How on…