library(jsonlite)
library(stringr)
library(knitr)
library(tidyr)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
The first necessity for approaching this assignment was to sign up for an NYT API key (from http://developer.nytimes.com/). After receiving it, I used the same website url to select a New York Times API to work with. After choosing the Article Search API, I decided to focus on picking the articles using February 1st of 2015 as the beginning date and sorting through any that included the keyword “Trump” from newest to oldest.
archive = "https://api.nytimes.com/svc/search/v2/articlesearch.json?api-key=3cdaa2a1d0734fd180bd97c00cf962c3&q=Trump&begin_date=20150201&sort=newest"
Because the API revealed only ten articles per page, I created a for loop to gather more than one page. However, because there were more than 40k hits, which is more than 4k pages, I selected the first 15 pages to work with. Inside the loop, each page was read using fromJSON() and then made into a dataframe. From there, certain columns that were either empty or held non-essential information were dropped. Using the loop, this process was repeated so that each newly-read page was bound to the previous one. Column names were changed for clarity. The publishing date and times were separated.
# Empty dataframe
articles = data.frame()
# Pages for loop
for (i in 0:15) {
url = paste0(archive, "&page=", i)
each = fromJSON(url)
allarti = flatten(data.frame(each))
allarti = allarti[,c(4, 6, 10, 13:17)]
articles = rbind(articles,allarti)
}
# Changing column names
colnames(articles) = c("URL", "Paragraph", "Source", "Published", "Type", "Desk", "Section", "Subsection")
# Replacing the letter T for easier column separation
articles$Published = str_replace_all(articles$Published, "T", "-")
# Separating dates and times
articles = separate(articles, Published, c("Year", "Month", "Day" , "Hour", "Minute"))
## Warning: Too many values at 160 locations: 1, 2, 3, 4, 5, 6, 7, 8, 9, 10,
## 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, ...
Due to the for loop from above, each page is read in as a dataframe and attached to the previous page, creating one big dataframe.
kable(head(articles, 3))
| URL | Paragraph | Source | Year | Month | Day | Hour | Minute | Type | Desk | Section | Subsection |
|---|---|---|---|---|---|---|---|---|---|---|---|
| https://www.nytimes.com/aponline/2017/04/01/us/ap-us-trump-protester-lawsuit.html | A federal judge has rejected President Donald Trump’s free speech defense against a lawsuit accusing him of inciting violence against protesters during his campaign. | AP | 2017 | 04 | 01 | 21 | 42 | article | None | U.S. | NA |
| https://www.nytimes.com/aponline/2017/04/01/us/politics/ap-us-trump-aide-freedom-caucus.html | A top adviser to President Donald Trump on Saturday urged the defeat of a Michigan congressman and member of a conservative group of U.S. House lawmakers who derailed the White House on legislation to repeal and replace the Obama-era health care law. | AP | 2017 | 04 | 01 | 21 | 13 | article | None | U.S. | Politics |
| https://www.nytimes.com/interactive/2017/04/01/us/politics/how-much-people-in-the-trump-administration-are-worth-financial-disclosure.html | A look at the personal wealth of top officials working for President Trump. | The New York Times | 2017 | 04 | 01 | 20 | 43 | multimedia | U.S. | U.S. | Politics |
If we are interested in reading only news regarding Trump that involves foreign affairs, we can filter through the data to read the headlines or even follow up on the links provided.
Foreign = filter(articles, Desk == "Foreign")
kable(head(Foreign, 3))
| URL | Paragraph | Source | Year | Month | Day | Hour | Minute | Type | Desk | Section | Subsection |
|---|---|---|---|---|---|---|---|---|---|---|---|
| https://www.nytimes.com/2017/04/01/us/politics/isis-iraq-syria-civilians-casualties.html | The worries about civilian casualties have grown as Iraqi forces push to take western Mosul from the Islamic State with the help of American and allied air power. | The New York Times | 2017 | 04 | 01 | 19 | 57 | article | Foreign | U.S. | Politics |
| https://www.nytimes.com/2017/04/01/world/europe/brexit-scotland-independence-vote.html | Scotlands Nicola Sturgeon is moving, post-Brexit, toward a second independence referendum against the wishes of British Prime Minister Theresa May. | The New York Times | 2017 | 04 | 01 | 15 | 38 | article | Foreign | World | Europe |
| https://www.nytimes.com/2017/03/31/us/politics/trump-bashar-assad-syria.html | Remarks by the White House press secretary made it clear that President Trump has abandoned the goal of forcing President Bashar al-Assad of Syria from office. | The New York Times | 2017 | 04 | 01 | 01 | 17 | article | Foreign | U.S. | Politics |