rm(list=ls())
#install.packages("devtools")
#devtools::install_github("mkearney/nytimes")
library(data.table)
library(plyr)
library(tidyverse)
library(splitstackshape)
library(magrittr)
library(rlang)
library(gridExtra)
library(knitr)
library(kableExtra)
library(data.table)
library(ggplot2)
library(RCurl)
library(httr)
library(rtimes)
library(jsonlite)
library(tidyverse)
##
## Attaching package: 'shiny'
## The following object is masked from 'package:jsonlite':
##
## validate
##
## Listening on http://127.0.0.1:5372
# Create query term
term <- "domestic+terrorist"
begin_date <- "20000420"
end_date <- "20160428"
baseurl <- paste0("http://api.nytimes.com/svc/search/v2/articlesearch.json?fq=body=",
term, "&begin_date=", begin_date, "&end_date=", end_date, "&facet_filter=true&api-key=",
NYTIMES_KEY, sep = "")
initialQuery <- RJSONIO::fromJSON(baseurl)
initialQuery[[3]][2]
## $meta
## hits offset time
## 229435 0 29
Sys.sleep(1)
# Another way
new_search <- "\"domestic terrorist\""
articleSearchURL <- "http://api.nytimes.com/svc/search/v2/articlesearch.json"
APIquery <- list(`api-key` = NYTIMES_KEY, q = new_search)
rawArticle <- GET(articleSearchURL, query = APIquery)
rawArticle[[3]]
## $`content-type`
## [1] "application/json;charset=UTF-8"
##
## $date
## [1] "Tue, 16 Oct 2018 23:10:36 GMT"
##
## $server
## [1] "nginx/1.12.2"
##
## $via
## [1] "kong/0.9.5"
##
## $`x-kong-proxy-latency`
## [1] "6"
##
## $`x-kong-upstream-latency`
## [1] "50"
##
## $`x-ratelimit-limit-day`
## [1] "1000"
##
## $`x-ratelimit-limit-second`
## [1] "1"
##
## $`x-ratelimit-remaining-day`
## [1] "643"
##
## $`x-ratelimit-remaining-second`
## [1] "0"
##
## $`transfer-encoding`
## [1] "chunked"
##
## $connection
## [1] "keep-alive"
##
## attr(,"class")
## [1] "insensitive" "list"
Sys.sleep(1)
orig_url <- "http://api.nytimes.com/svc/search/v2/articlesearch.json?api-key="
term <- "\"domestic terrorist\""
baseurl <- paste0(orig_url, NYTIMES_KEY, "&q=", term, "&facet_filter=true",
sep = "")
baseurl <- URLencode(baseurl)
initialQuery <- RJSONIO::fromJSON(baseurl)
Sys.sleep(1)
initialQuery[[3]][2]
## $meta
## hits offset time
## 373 0 11
total_meta_hits <- initialQuery[[3]][2]
total_meta_hits <- round(min(50,total_meta_hits$meta[[1]]/10 -1),0)
total_meta_hits
## [1] 36
domestic_terroirst <- list()
for (i in 0:total_meta_hits)
{
nytSearch <- jsonlite::fromJSON(paste0(baseurl, "&page=", i), flatten = TRUE) %>%
data.frame()
domestic_terroirst[[i + 1]] <- nytSearch
Sys.sleep(1)
}
domestic_terroirst_df <- rbind_pages(domestic_terroirst)
rm(domestic_terroirst)
chooseOne <- function(question)
{
domestic_terroirst_df %>%
filter(!UQ(sym(question)) == "") %>%
dplyr::group_by_(question) %>%
dplyr::summarise(count = n()) %>%
dplyr::mutate(percent = (count/sum(count))* 100) %>%
dplyr::arrange(desc(count))
}
my_names <- colnames(domestic_terroirst_df)
the_names <- my_names[(c(2, 3, 10, 11, 12, 15, 19, 20, 25))]
lapply(the_names, function(x) chooseOne(x))
## [[1]]
## # A tibble: 1 x 3
## copyright count percent
## <fct> <int> <dbl>
## 1 Copyright (c) 2018 The New York Times Company. All Rights~ 370 100
##
## [[2]]
## # A tibble: 360 x 3
## response.docs.web_url count percent
## <chr> <int> <dbl>
## 1 https://artsbeat.blogs.nytimes.com/2010/05/03/the-return~ 2 0.541
## 2 https://cityroom.blogs.nytimes.com/2011/09/29/a-tribute-~ 2 0.541
## 3 https://learning.blogs.nytimes.com/2002/06/19/reign-of-t~ 2 0.541
## 4 https://learning.blogs.nytimes.com/2003/09/25/balanced-d~ 2 0.541
## 5 https://thecaucus.blogs.nytimes.com/2011/10/05/romney-on~ 2 0.541
## 6 https://www.nytimes.com/2009/12/20/opinion/20rich.html 2 0.541
## 7 https://www.nytimes.com/2012/05/18/world/europe/italy-st~ 2 0.541
## 8 https://www.nytimes.com/2012/05/19/opinion/a-new-attack-~ 2 0.541
## 9 https://www.nytimes.com/2012/05/20/us/3-in-chicago-face-~ 2 0.541
## 10 https://www.nytimes.com/2012/05/21/world/europe/italian-~ 2 0.541
## # ... with 350 more rows
##
## [[3]]
## # A tibble: 40 x 3
## response.docs.news_desk count percent
## <chr> <int> <dbl>
## 1 National Desk 66 20.2
## 2 National 46 14.1
## 3 Foreign 33 10.1
## 4 OpEd 25 7.67
## 5 Editorial Desk 22 6.75
## 6 Foreign Desk 22 6.75
## 7 Metropolitan Desk 19 5.83
## 8 Editorial 9 2.76
## 9 Metro 9 2.76
## 10 BookReview 8 2.45
## # ... with 30 more rows
##
## [[4]]
## # A tibble: 20 x 3
## response.docs.type_of_material count percent
## <chr> <int> <dbl>
## 1 News 224 60.5
## 2 Blog 47 12.7
## 3 Op-Ed 37 10
## 4 Review 16 4.32
## 5 Editorial 13 3.51
## 6 Summary 10 2.70
## 7 Letter 5 1.35
## 8 An Analysis; News Analysis 3 0.811
## 9 Biography 3 0.811
## 10 Chronology 2 0.541
## 11 Article 1 0.270
## 12 Brief 1 0.270
## 13 Front Page 1 0.270
## 14 Interactive Feature 1 0.270
## 15 List 1 0.270
## 16 Obituary (Obit) 1 0.270
## 17 Obituary; Biography 1 0.270
## 18 Review; Biography 1 0.270
## 19 Special Report 1 0.270
## 20 Text 1 0.270
##
## [[5]]
## # A tibble: 370 x 3
## response.docs._id count percent
## <chr> <int> <dbl>
## 1 4fc49bb945c1498b0da8db1a 1 0.270
## 2 4fc4a2a245c1498b0daac6ec 1 0.270
## 3 4fd115358eb7c8105d5cb186 1 0.270
## 4 4fd1170a8eb7c8105d5ce428 1 0.270
## 5 4fd12a228eb7c8105d5f2670 1 0.270
## 6 4fd136a28eb7c8105d608803 1 0.270
## 7 4fd149fd8eb7c8105d6273c6 1 0.270
## 8 4fd14fd88eb7c8105d630bf1 1 0.270
## 9 4fd169ff8eb7c8105d65a2b2 1 0.270
## 10 4fd1720e8eb7c8105d665789 1 0.270
## # ... with 360 more rows
##
## [[6]]
## # A tibble: 61 x 3
## response.docs.uri count percent
## <chr> <int> <dbl>
## 1 nyt://article/01ed2997-5dfd-5f21-875a-4ae589ca913d 1 1.64
## 2 nyt://article/035125d8-6829-5fdf-b24e-f02728536abf 1 1.64
## 3 nyt://article/0854e100-f844-52f7-8fd7-ffd74d0469d2 1 1.64
## 4 nyt://article/0a7a086d-1dcd-5f97-9cc0-58cb30de48b5 1 1.64
## 5 nyt://article/1562239c-63d0-5468-a28a-e2d7e3999445 1 1.64
## 6 nyt://article/1d9a8eb2-68f1-5f91-b2e1-a9856ec732dd 1 1.64
## 7 nyt://article/1d9daa2f-5caa-5bd0-9869-069dd7688de0 1 1.64
## 8 nyt://article/1dda5a39-9ef4-5e7d-afab-27cb34acb154 1 1.64
## 9 nyt://article/293a0f00-2efe-56aa-a7b2-20318b0136e7 1 1.64
## 10 nyt://article/30a5a253-4ed1-51f3-9192-b1e80c157b1f 1 1.64
## # ... with 51 more rows
##
## [[7]]
## # A tibble: 351 x 3
## response.docs.headline.main count percent
## <chr> <int> <dbl>
## 1 NEWS SUMMARY 6 1.62
## 2 Caught in the Cross-Fire 5 1.35
## 3 A New Attack on the Constitution 2 0.541
## 4 A Rally Cry Echoes Online 2 0.541
## 5 A Tribute to a Reporter With Legendary Drive and Swagger 2 0.541
## 6 Balanced Diets 2 0.541
## 7 Fatal Bombing at Italian School Is Thought to Be the Wor~ 2 0.541
## 8 Italy Steps Up Security Forces 2 0.541
## 9 Leftward, Ho? 2 0.541
## 10 Reign of Terror? 2 0.541
## # ... with 341 more rows
##
## [[8]]
## # A tibble: 105 x 3
## response.docs.headline.kicker count percent
## <chr> <int> <dbl>
## 1 The Caucus 11 7.19
## 2 Op-Ed Columnist 8 5.23
## 3 Op-Ed Contributor 6 3.92
## 4 Editorial 4 2.61
## 5 Opinionator 4 2.61
## 6 The Learning Network 4 2.61
## 7 The Lede 4 2.61
## 8 City Room 3 1.96
## 9 Contributing Op-Ed Writer 3 1.96
## 10 THREATS AND RESPONSES: DOMESTIC SECURITY 3 1.96
## # ... with 95 more rows
##
## [[9]]
## # A tibble: 6 x 3
## response.docs.headline.sub count percent
## <chr> <int> <dbl>
## 1 (7 Letters) 1 16.7
## 2 6 Arrested in Canada Raid Attended the Same Mosque 1 16.7
## 3 A Plague of Stunt Casting Is Being Spread Over the Airwav~ 1 16.7
## 4 Bank Data Is Sifted by U.S. in Secret to Block Terror 1 16.7
## 5 Lawmaker Will Begin Look Into Homegrown Islamic Terrorism; 1 16.7
## 6 Shoot-Outs Rise as Gang Activity Spreads From Cities to S~ 1 16.7
domestic_terroirst_df %>%
filter(!UQ(sym("response.docs.section_name")) == "") %>%
dplyr::group_by(response.docs.section_name) %>%
dplyr::summarize(count = n()) %>%
dplyr::mutate(percent = (count/sum(count)) * 100) %>%
ggplot() + geom_bar(aes(y = percent, x = reorder(response.docs.section_name, count),
fill = response.docs.section_name),stat = "identity") +
coord_flip() + theme(legend.position = "none")
domestic_terroirst_df %>%
dplyr::group_by(response.docs.type_of_material) %>%
dplyr::summarize(count = n()) %>%
dplyr::mutate(percent = (count/sum(count)) * 100) %>%
ggplot() + geom_bar(aes(y = percent, x = reorder(response.docs.type_of_material, count),
fill = response.docs.type_of_material), stat = "identity") +
coord_flip() + theme(legend.position = "none")
domestic_terroirst_df$response.docs.pub_date <- as.Date(gsub("T.*", "",
domestic_terroirst_df$response.docs.pub_date))
ggplot(domestic_terroirst_df, aes(x = response.docs.pub_date)) + stat_bin(aes(y = cumsum(..count..)),
binwidth = 1)
cleaner_domestic_terroirst <- list()
fields <- c("headline", "web_url", "abstract", "news_desk", "word_count", "pub_date")
orig_url <- "http://api.nytimes.com/svc/search/v2/articlesearch.json?api-key="
term <- "\"domestic terrorist\""
baseurl <- paste0(orig_url, NYTIMES_KEY, "&q=", term, "&fl=", paste(fields,
collapse = ","), sep = "")
## Test Run
baseurl <- URLencode(baseurl)
df_2 <- fromJSON(baseurl, flatten = TRUE)
my_df <- df_2$response$docs
dim(my_df)
## [1] 10 13
my_df
## web_url
## 1 https://www.nytimes.com/aponline/2018/10/08/us/ap-us-limousine-crash-fbi-informant.html
## 2 https://www.nytimes.com/2018/07/11/us/politics/manafort-trump-russia-jail-transfer.html
## 3 https://www.nytimes.com/2018/05/31/world/americas/what-is-terrorism.html
## 4 https://www.nytimes.com/2016/02/28/books/review/a-doubters-almanac-by-ethan-canin.html
## 5 https://www.nytimes.com/2018/05/16/world/asia/indonesia-swords-terrorism-sumatra.html
## 6 https://www.nytimes.com/2018/04/18/us/kansas-militia-somali-trial-verdict.html
## 7 https://www.nytimes.com/2018/04/01/us/atf-bomb-lab-beltsville.html
## 8 https://www.nytimes.com/2017/08/13/us/politics/charlottesville-sessions-justice-department.html
## 9 https://www.nytimes.com/2017/08/15/us/politics/right-wing-extremism-charlottesville.html
## 10 https://www.nytimes.com/2017/10/19/opinion/columnists/fbi-blacks-civil-rights.html
## pub_date news_desk word_count score
## 1 2018-10-09T00:50:32+0000 None 495 86.11455
## 2 2018-07-12T02:38:30+0000 Washington 562 63.71361
## 3 2018-05-31T20:27:28+0000 Foreign 1303 60.86338
## 4 2016-02-28T00:00:00Z BookReview 1263 44.53065
## 5 2018-05-16T06:44:56+0000 Foreign 1035 41.57404
## 6 2018-04-18T19:07:01+0000 National 1040 41.47201
## 7 2018-04-01T14:24:59+0000 Washington 996 39.93658
## 8 2017-08-13T22:34:17+0000 National 983 37.14599
## 9 2017-08-15T21:30:26+0000 Washington 1180 33.09294
## 10 2017-10-19T08:55:01+0000 OpEd 753 32.86577
## abstract
## 1 <NA>
## 2 <NA>
## 3 <NA>
## 4 Ellen Ullman reviews novel A Doubter's Almanac by Ethan Canin.
## 5 <NA>
## 6 <NA>
## 7 <NA>
## 8 <NA>
## 9 <NA>
## 10 <NA>
## headline.main
## 1 Limo Company Operator Has History as FBI Informant
## 2 Manafort Is Transferred From Jail Where He Was Treated Like a V.I.P.
## 3 What Is Terrorism? Attacks in Canada and Belgium Reflect Uncertain Definition
## 4 A Doubters Almanac, by Ethan Canin
## 5 Indonesia Sword Attack on Police Follows String of Deadly Bombings
## 6 Kansas Trio Convicted in Plot to Bomb Somali Immigrants
## 7 Anatomy of a Bomb Investigation: Inside an A.T.F. Lab
## 8 A Hate Crime? How the Charlottesville Car Attack May Become a Federal Case
## 9 Revocation of Grants to Help Fight Hate Under New Scrutiny After Charlottesville
## 10 The F.B.I.s Black Phantom Menace
## headline.kicker headline.content_kicker
## 1 <NA> <NA>
## 2 <NA> <NA>
## 3 <NA> <NA>
## 4 <NA> <NA>
## 5 <NA> <NA>
## 6 <NA> <NA>
## 7 <NA> <NA>
## 8
## 9 <NA> <NA>
## 10 Op-Ed Columnist Op-Ed Columnist
## headline.print_headline
## 1 Limo Company Operator Has History as FBI Informant
## 2
## 3 Attacks in Canada and Belgium Reflect Fuzzy Definition of Terrorism
## 4 Domestic Terrorist
## 5 4 Men Wielding Swords Attack a Police Station in Indonesia, Then Are Killed
## 6 Verdict Is Guilty for Men In Plot to Bomb Migrants
## 7 Anatomy of a Bomb Investigation: Dissecting Devices in an A.T.F. Lab
## 8 Was the Car Attack a Hate Crime? How the Justice Dept. May View It
## 9 U.S. Rescinded Grant To Combat Extremism Of Right-Wing Groups
## 10
## headline.name headline.seo headline.sub
## 1 NA NA NA
## 2 NA NA NA
## 3 NA NA NA
## 4 NA NA NA
## 5 NA NA NA
## 6 NA NA NA
## 7 NA NA NA
## 8 NA NA NA
## 9 NA NA NA
## 10 NA NA NA