library(jsonlite)
library(stringr)
library(knitr)
library(tidyr)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

Tasks:

1. Choose one of the New York Times APIs

The first necessity for approaching this assignment was to sign up for an NYT API key (from http://developer.nytimes.com/). After receiving it, I used the same website url to select a New York Times API to work with. After choosing the Article Search API, I decided to focus on picking the articles using February 1st of 2015 as the beginning date and sorting through any that included the keyword “Trump” from newest to oldest.

archive = "https://api.nytimes.com/svc/search/v2/articlesearch.json?api-key=3cdaa2a1d0734fd180bd97c00cf962c3&q=Trump&begin_date=20150201&sort=newest"

2. Construct an interface in R to read in the JSON data

Because the API revealed only ten articles per page, I created a for loop to gather more than one page. However, because there were more than 40k hits, which is more than 4k pages, I selected the first 15 pages to work with. Inside the loop, each page was read using fromJSON() and then made into a dataframe. From there, certain columns that were either empty or held non-essential information were dropped. Using the loop, this process was repeated so that each newly-read page was bound to the previous one. Column names were changed for clarity. The publishing date and times were separated.

# Empty dataframe
articles = data.frame()

# Pages for loop
for (i in 0:15) {
  url = paste0(archive, "&page=", i) 
  each = fromJSON(url)
  allarti = flatten(data.frame(each))
  allarti = allarti[,c(4, 6, 10, 13:17)]
  articles = rbind(articles,allarti)
}

# Changing column names 
colnames(articles) = c("URL", "Paragraph", "Source", "Published", "Type", "Desk", "Section", "Subsection")

# Replacing the letter T for easier column separation
articles$Published = str_replace_all(articles$Published, "T", "-")

# Separating dates and times
articles = separate(articles, Published, c("Year", "Month", "Day" , "Hour", "Minute"))
## Warning: Too many values at 160 locations: 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
## 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...

3. Transform into an R dataframe

Due to the for loop from above, each page is read in as a dataframe and attached to the previous page, creating one big dataframe.

kable(head(articles, 3))
URL Paragraph Source Year Month Day Hour Minute Type Desk Section Subsection
https://www.nytimes.com/aponline/2017/04/01/us/ap-us-trump-protester-lawsuit.html A federal judge has rejected President Donald Trump’s free speech defense against a lawsuit accusing him of inciting violence against protesters during his campaign. AP 2017 04 01 21 42 article None U.S. NA
https://www.nytimes.com/aponline/2017/04/01/us/politics/ap-us-trump-aide-freedom-caucus.html A top adviser to President Donald Trump on Saturday urged the defeat of a Michigan congressman and member of a conservative group of U.S. House lawmakers who derailed the White House on legislation to repeal and replace the Obama-era health care law. AP 2017 04 01 21 13 article None U.S. Politics
https://www.nytimes.com/interactive/2017/04/01/us/politics/how-much-people-in-the-trump-administration-are-worth-financial-disclosure.html A look at the personal wealth of top officials working for President Trump. The New York Times 2017 04 01 20 43 multimedia U.S. U.S. Politics

If we are interested in reading only news regarding Trump that involves foreign affairs, we can filter through the data to read the headlines or even follow up on the links provided.

Foreign = filter(articles, Desk == "Foreign")
kable(head(Foreign, 3))
URL Paragraph Source Year Month Day Hour Minute Type Desk Section Subsection
https://www.nytimes.com/2017/04/01/us/politics/isis-iraq-syria-civilians-casualties.html The worries about civilian casualties have grown as Iraqi forces push to take western Mosul from the Islamic State with the help of American and allied air power. The New York Times 2017 04 01 19 57 article Foreign U.S. Politics
https://www.nytimes.com/2017/04/01/world/europe/brexit-scotland-independence-vote.html Scotland’s Nicola Sturgeon is moving, post-Brexit, toward a second independence referendum against the wishes of British Prime Minister Theresa May. The New York Times 2017 04 01 15 38 article Foreign World Europe
https://www.nytimes.com/2017/03/31/us/politics/trump-bashar-assad-syria.html Remarks by the White House press secretary made it clear that President Trump has abandoned the goal of forcing President Bashar al-Assad of Syria from office. The New York Times 2017 04 01 01 17 article Foreign U.S. Politics