The New York Times web site provides a rich set of APIs, as described here: http://developer.nytimes.com/docs
You’ll need to start by signing up for an API key. Your task is to choose one of the New York Times APIs, construct an interface in R to read in the JSON data, and transform it to an R dataframe.
library(tidyverse)
library(jsonlite)
library(tidytext)
library(wordcloud)
my_key = "8pksSbRhREVBiY5g8kDGDZMfcSsOYwQX"
nyt_mr_df <- jsonlite::fromJSON(txt = paste0("https://api.nytimes.com/svc/movies/v2/reviews/all.json?api-key=", my_key)) %>% data.frame
nyt_mr_df %>%
head()
## status
## 1 OK
## 2 OK
## 3 OK
## 4 OK
## 5 OK
## 6 OK
## copyright
## 1 Copyright (c) 2019 The New York Times Company. All Rights Reserved.
## 2 Copyright (c) 2019 The New York Times Company. All Rights Reserved.
## 3 Copyright (c) 2019 The New York Times Company. All Rights Reserved.
## 4 Copyright (c) 2019 The New York Times Company. All Rights Reserved.
## 5 Copyright (c) 2019 The New York Times Company. All Rights Reserved.
## 6 Copyright (c) 2019 The New York Times Company. All Rights Reserved.
## has_more num_results results.display_title results.mpaa_rating
## 1 TRUE 20 The Legend of Cocaine Island
## 2 TRUE 20 The Burial Of Kojo
## 3 TRUE 20 Relaxer
## 4 TRUE 20 Diane
## 5 TRUE 20 Super Deluxe
## 6 TRUE 20 A Vigilante R
## results.critics_pick results.byline
## 1 0 ELISABETH VINCENTELLI
## 2 1 GLENN KENNY
## 3 1 JEANNETTE CATSOULIS
## 4 1 JEANNETTE CATSOULIS
## 5 0 RACHEL SALTZ
## 6 0 MANOHLA DARGIS
## results.headline
## 1 The Legend of Cocaine Island Review: Florida Mans Silly Odyssey
## 2 The Burial of Kojo Review: A Dazzling Modern Fable
## 3 Relaxer Review: Help! Hes Sitting and He Cant Get Up
## 4 Diane Review: A Vibrant Tale of Love, Sisterhood and Decline
## 5 Super Deluxe Review: A Tamil Film, With a Cosmic Indie Vibe
## 6 A Vigilante Review: Vengeance Is Hers
## results.summary_short
## 1 A harebrained scheme ends in comic confusion in Theo Loves goofy documentary.
## 2 The Ghana-born musician Blitz Bazawule makes a stunning feature directing debut that comes alive with rhythm and color.
## 3 Sad and strange and defiantly gross, Joel Potrykuss Relaxer is a compassionate survival tale swirling with childhood trauma.
## 4 Mary Kay Place is astonishing as a weary widow dealing with guilt, heartache and a diminishing circle of friends.
## 5 Thiagarajan Kumararajas second feature, with multiple spiraling plots, cant sustain its raffish, oddball tone for three hours.
## 6 Olivia Wilde stars as an avenger of domestic-abuse victims in Sarah Daggar-Nicksons satisfyingly lean, mean genre movie.
## results.publication_date results.opening_date results.date_updated
## 1 2019-03-29 2019-03-29 2019-03-29 12:04:03
## 2 2019-03-28 2019-03-29 2019-03-29 16:44:19
## 3 2019-03-28 <NA> 2019-03-28 11:04:06
## 4 2019-03-28 2019-03-29 2019-03-29 16:44:18
## 5 2019-03-28 2019-03-28 2019-03-29 16:44:20
## 6 2019-03-28 2019-03-29 2019-03-29 16:44:20
## results.link.type
## 1 article
## 2 article
## 3 article
## 4 article
## 5 article
## 6 article
## results.link.url
## 1 http://www.nytimes.com/2019/03/29/movies/the-legend-of-cocaine-island-review.html
## 2 http://www.nytimes.com/2019/03/28/movies/the-burial-of-kojo-review.html
## 3 http://www.nytimes.com/2019/03/28/movies/relaxer-review.html
## 4 http://www.nytimes.com/2019/03/28/movies/diane-review.html
## 5 http://www.nytimes.com/2019/03/28/movies/super-deluxe-review.html
## 6 http://www.nytimes.com/2019/03/28/movies/a-vigilante-review.html
## results.link.suggested_link_text
## 1 Read the New York Times Review of The Legend of Cocaine Island
## 2 Read the New York Times Review of The Burial Of Kojo
## 3 Read the New York Times Review of Relaxer
## 4 Read the New York Times Review of Diane
## 5 Read the New York Times Review of Super Deluxe
## 6 Read the New York Times Review of A Vigilante
## results.multimedia.type
## 1 mediumThreeByTwo210
## 2 mediumThreeByTwo210
## 3 mediumThreeByTwo210
## 4 mediumThreeByTwo210
## 5 mediumThreeByTwo210
## 6 mediumThreeByTwo210
## results.multimedia.src
## 1 https://static01.nyt.com/images/2019/03/28/arts/legend1/legend1-mediumThreeByTwo210.jpg
## 2 https://static01.nyt.com/images/2019/03/29/arts/burial1/merlin_152483154_8fdbc98a-e1cb-4e33-b792-2d17ed9573f7-mediumThreeByTwo210.jpg
## 3 https://static01.nyt.com/images/2019/03/29/arts/28relaxer1/relaxer1-mediumThreeByTwo210.jpg
## 4 https://static01.nyt.com/images/2019/03/29/arts/29diane1/diane1-mediumThreeByTwo210.jpg
## 5 https://static01.nyt.com/images/2019/03/29/arts/28superdeluxe/28superdeluxe-mediumThreeByTwo210.jpg
## 6 https://static01.nyt.com/images/2019/03/29/arts/28vigilante-1/28vigilante-1-mediumThreeByTwo210.jpg
## results.multimedia.width results.multimedia.height
## 1 210 140
## 2 210 140
## 3 210 140
## 4 210 140
## 5 210 140
## 6 210 140
movie = "frozen"
frozen_review <- jsonlite::fromJSON(txt = paste0("https://api.nytimes.com/svc/movies/v2/reviews/search.json?query=", movie, "&api-key=", my_key)) %>% data.frame
frozen_review
## status
## 1 OK
## 2 OK
## 3 OK
## 4 OK
## 5 OK
## 6 OK
## 7 OK
## 8 OK
## copyright
## 1 Copyright (c) 2019 The New York Times Company. All Rights Reserved.
## 2 Copyright (c) 2019 The New York Times Company. All Rights Reserved.
## 3 Copyright (c) 2019 The New York Times Company. All Rights Reserved.
## 4 Copyright (c) 2019 The New York Times Company. All Rights Reserved.
## 5 Copyright (c) 2019 The New York Times Company. All Rights Reserved.
## 6 Copyright (c) 2019 The New York Times Company. All Rights Reserved.
## 7 Copyright (c) 2019 The New York Times Company. All Rights Reserved.
## 8 Copyright (c) 2019 The New York Times Company. All Rights Reserved.
## has_more num_results results.display_title results.mpaa_rating
## 1 FALSE 8 Dawson City: Frozen Time
## 2 FALSE 8 Frozen PG
## 3 FALSE 8 The Frozen Ground R
## 4 FALSE 8 Frozen R
## 5 FALSE 8 Frozen River R
## 6 FALSE 8 Frozen
## 7 FALSE 8 Frozen Assets PG-13
## 8 FALSE 8 Frozen Justice
## results.critics_pick results.byline
## 1 1 GLENN KENNY
## 2 1 STEPHEN HOLDEN
## 3 0 STEPHEN HOLDEN
## 4 1 JEANNETTE CATSOULIS
## 5 1 STEPHEN HOLDEN
## 6 0 Anita Gates
## 7 0 Stephen Holden
## 8 0 MORDAUNT HALL.
## results.headline
## 1 Review: In Dawson City: Frozen Time, Early Movies Lost and Found
## 2 From the Heat of Royal Passion, Poof! Its Permafrost
## 3 People as the Big Game in an Alaskan Hunt
## 4 A Nightmare on a Ski Lift
## 5 Only a Few More Smuggling Days Left Before Christmas? Its Not a Wonderful Life
## 6 Frozen (Movie)
## 7 FROZEN ASSETS (MOVIE)
## 8 Frozen Justice
## results.summary_short
## 1 A lot of future moguls got their start in one remote Yukon town. And an amazing amount of film history was left behind.
## 2 Frozen, from Disney, departs a bit from the companys traditional princess formula.
## 3 The Frozen Ground, starring Nicolas Cage, is a film about a serial killer and the investigator whose pursuit eventually pays off.
## 4 A minimalist setup delivers maximum fright in Frozen, a nifty little chiller that balances its cold terrain with an unexpectedly warm heart.
## 5 Courtney Hunts somber film Frozen River ventures deep into the trenches where hard-working Americans struggle to put food on the table.
## 6
## 7 Smirky sperm-bank farce. Get the hammer.
## 8
## results.publication_date results.opening_date results.date_updated
## 1 2017-06-08 <NA> 2017-11-02 04:16:44
## 2 2013-11-26 2013-11-27 2017-11-02 04:18:18
## 3 2013-08-22 <NA> 2017-11-02 04:16:36
## 4 2010-02-04 <NA> 2017-11-02 04:16:31
## 5 2008-08-01 2008-09-05 2017-11-02 04:18:08
## 6 1998-03-25 1997-00-00 2017-11-02 04:17:51
## 7 1992-10-24 1992-10-23 2017-11-02 04:17:44
## 8 1929-10-26 1929-10-13 2017-11-02 04:16:49
## results.link.type
## 1 article
## 2 article
## 3 article
## 4 article
## 5 article
## 6 article
## 7 article
## 8 article
## results.link.url
## 1 http://www.nytimes.com/2017/06/08/movies/dawson-city-frozen-time-review.html
## 2 http://www.nytimes.com/2013/11/27/movies/disneys-frozen-a-makeover-of-the-snow-queen.html
## 3 http://www.nytimes.com/2013/08/23/movies/the-frozen-ground-stars-nicolas-cage.html
## 4 http://www.nytimes.com/2010/02/05/movies/05frozen.html
## 5 http://www.nytimes.com/2008/08/01/movies/01froz.html
## 6 http://www.nytimes.com/1998/03/25/movies/film-review-young-disaffected-and-constantly-tired-after-tiananmen-square.html
## 7 http://www.nytimes.com/1992/10/24/movies/review-film-when-a-bank-opens-a-branch-for-sperm.html
## 8 http://www.nytimes.com/1929/10/26/archives/the-screen.html
## results.link.suggested_link_text
## 1 Read the New York Times Review of Dawson City: Frozen Time
## 2 Read the New York Times Review of Frozen
## 3 Read the New York Times Review of The Frozen Ground
## 4 Read the New York Times Review of Frozen
## 5 Read the New York Times Review of Frozen River
## 6 Read the New York Times Review of Frozen
## 7 Read the New York Times Review of Frozen Assets
## 8 Read the New York Times Review of Frozen Justice
## results.multimedia.type
## 1 mediumThreeByTwo210
## 2 mediumThreeByTwo210
## 3 mediumThreeByTwo210
## 4 <NA>
## 5 <NA>
## 6 <NA>
## 7 <NA>
## 8 <NA>
## results.multimedia.src
## 1 https://static01.nyt.com/images/2017/05/26/movies/video-dawson-city-frozen-time/video-dawson-city-frozen-time-mediumThreeByTwo210.jpg
## 2 https://static01.nyt.com/images/2013/11/27/arts/FROZEN/FROZEN-mediumThreeByTwo210.jpg
## 3 https://static01.nyt.com/images/2013/08/23/arts/23FROZEN_SPAN/0823FROZEN_SPAN-mediumThreeByTwo210.jpg
## 4 <NA>
## 5 <NA>
## 6 <NA>
## 7 <NA>
## 8 <NA>
## results.multimedia.width results.multimedia.height
## 1 210 140
## 2 210 140
## 3 210 140
## 4 NA NA
## 5 NA NA
## 6 NA NA
## 7 NA NA
## 8 NA NA
library(devtools)
devtools::install_github("mkearney/nytimes"); library(nytimes)
## Skipping install of 'nytimes' from a github remote, the SHA1 (29db6ff9) has not changed since last install.
## Use `force = TRUE` to force installation
# let's set some parameters to look for "Trump" for past 7 days
search = "Trump"
begin_date <- str_replace_all(Sys.Date()-7, "-", "")
end_date <- str_replace_all(Sys.Date(), "-", "")
url <- paste0("http://api.nytimes.com/svc/search/v2/articlesearch.json?q=",
search, "&begin_date=", begin_date, "&end_date=", end_date, "&facet_filter=true&api-key=", my_key)
initialQuery <- jsonlite::fromJSON(txt = url, flatten = TRUE) %>% data.frame()
# find max pages
max_page <- round((initialQuery$response.meta.hits[1] / 10) -1)
# set up a for-loop to scrape all the pages (not just 10 pages at a time)
pages <- list()
for(i in 0:10){ # manually set the max page to 10
# for(i in 0:max_page){
nytSearch <- jsonlite::fromJSON(paste0(url, "&page=", i), flatten = TRUE) %>% data.frame()
message("Retrieving page ", i) # keep track of where we usually fail
pages[[i+1]] <- nytSearch
Sys.sleep(1) # use it to avoid being interpretted as bot
}
## Retrieving page 0
## Retrieving page 1
## Retrieving page 2
## Retrieving page 3
## Retrieving page 4
## Retrieving page 5
## Retrieving page 6
## Retrieving page 7
## Retrieving page 8
## Retrieving page 9
## Retrieving page 10
# We often encounter this message, "Error in open.connection(con, "rb") : HTTP error 429."
# There are unfortunately too many pages and we cannot extract them at once!
# We have to manually set the max page to 10
combined <- rbind_pages(pages)
print(paste0("we have successfully scrapped ", dim(combined)[1], " articles about Trump in the past 7 days"))
## [1] "we have successfully scrapped 110 articles about Trump in the past 7 days"
# extract and unnest tokens from headline in the past 7 days
headline <- combined$response.docs.headline.main %>%
as.character %>%
as.data.frame(., stringsAsFactors = F) %>%
tidytext::unnest_tokens(., output = word, input = ".", token = "words") %>%
dplyr::mutate(word = str_replace_all(word, pattern = "[[:punct:]]", "")) %>%
dplyr::filter(!word %in% c(stop_words$word, "donald", "trump", "trumps")) %>%
arrange(word)
# visualize the words (or tokens) in word cloud
# word appears at least 3 times in all the headlines in the past 7 days
set.seed(1234)
wordcloud(word = arrange(plyr::count(headline, vars = "word"), -freq)$word,
freq = arrange(plyr::count(headline, vars = "word"), -freq)$freq,
min.freq = 3, colors = brewer.pal(8, "RdBu"), scale = c(4, .1), rot.per = .2)