rm(list=ls())
#install.packages("devtools")
#devtools::install_github("mkearney/nytimes")
library(data.table)
library(plyr)
library(tidyverse)
library(splitstackshape)
library(magrittr)
library(rlang)
library(gridExtra)
library(knitr)
library(kableExtra)
library(data.table)
library(ggplot2)
library(RCurl)
library(httr)
library(rtimes)
library(jsonlite)
library(tidyverse)

Introduction

Setup a Times Key(Shiny App)

## 
## Attaching package: 'shiny'
## The following object is masked from 'package:jsonlite':
## 
##     validate
## 
## Listening on http://127.0.0.1:5372

Initial Attempt

  • This attempt was a failure in that it returned too many results
# Create query term
term <- "domestic+terrorist"
begin_date <- "20000420"
end_date <- "20160428"

baseurl <- paste0("http://api.nytimes.com/svc/search/v2/articlesearch.json?fq=body=", 
    term, "&begin_date=", begin_date, "&end_date=", end_date, "&facet_filter=true&api-key=", 
    NYTIMES_KEY, sep = "")
initialQuery <- RJSONIO::fromJSON(baseurl)
initialQuery[[3]][2]
## $meta
##   hits offset   time 
## 229435      0     29
Sys.sleep(1)
  • 200k+ repsonses, this is not the correct way to search for the term

Second Attempt

  • Correctly identify how to search multiple word phrases
  • Get request gives me an idea of my query limits
# Another way
new_search <- "\"domestic terrorist\""
articleSearchURL <- "http://api.nytimes.com/svc/search/v2/articlesearch.json"
APIquery <- list(`api-key` = NYTIMES_KEY, q = new_search)
rawArticle <- GET(articleSearchURL, query = APIquery)
rawArticle[[3]]
## $`content-type`
## [1] "application/json;charset=UTF-8"
## 
## $date
## [1] "Tue, 16 Oct 2018 23:10:36 GMT"
## 
## $server
## [1] "nginx/1.12.2"
## 
## $via
## [1] "kong/0.9.5"
## 
## $`x-kong-proxy-latency`
## [1] "6"
## 
## $`x-kong-upstream-latency`
## [1] "50"
## 
## $`x-ratelimit-limit-day`
## [1] "1000"
## 
## $`x-ratelimit-limit-second`
## [1] "1"
## 
## $`x-ratelimit-remaining-day`
## [1] "643"
## 
## $`x-ratelimit-remaining-second`
## [1] "0"
## 
## $`transfer-encoding`
## [1] "chunked"
## 
## $connection
## [1] "keep-alive"
## 
## attr(,"class")
## [1] "insensitive" "list"
Sys.sleep(1)

Initial API Request With Correct Search Terms

  • Encode the url becuase the term had a space
orig_url <- "http://api.nytimes.com/svc/search/v2/articlesearch.json?api-key="
term <- "\"domestic terrorist\""
baseurl <- paste0(orig_url, NYTIMES_KEY, "&q=", term, "&facet_filter=true", 
    sep = "")
baseurl <- URLencode(baseurl)
initialQuery <- RJSONIO::fromJSON(baseurl)
Sys.sleep(1)
initialQuery[[3]][2]
## $meta
##   hits offset   time 
##    373      0     11
total_meta_hits <- initialQuery[[3]][2]
total_meta_hits <- round(min(50,total_meta_hits$meta[[1]]/10 -1),0)
total_meta_hits
## [1] 36
  • This worked and my query shows me that there are 350+ repsonses
    • total_meta_hits variable created, as metahits updates frequently(capped at 500)
  • In the next section I will loop through an api request

Loop Through All Metadata

  • I took the loop from the walk through I posted earlier
domestic_terroirst <- list()
for (i in 0:total_meta_hits)
{
    nytSearch <-  jsonlite::fromJSON(paste0(baseurl, "&page=", i), flatten = TRUE) %>% 
        data.frame()
    domestic_terroirst[[i + 1]] <- nytSearch
    Sys.sleep(1)
}

Build DF From Loop

  • Display column names
domestic_terroirst_df <- rbind_pages(domestic_terroirst)
rm(domestic_terroirst)

Exploratory Analysis

  • I love this chooseone function
    • lapply, for summary analysis
chooseOne <- function(question)
{
    domestic_terroirst_df %>% 
        filter(!UQ(sym(question)) == "") %>% 
        dplyr::group_by_(question) %>% 
        dplyr::summarise(count = n()) %>% 
        dplyr::mutate(percent = (count/sum(count))* 100) %>% 
        dplyr::arrange(desc(count))
}
my_names <- colnames(domestic_terroirst_df)
the_names <- my_names[(c(2, 3, 10, 11, 12, 15, 19, 20, 25))]
lapply(the_names, function(x) chooseOne(x))
## [[1]]
## # A tibble: 1 x 3
##   copyright                                                  count percent
##   <fct>                                                      <int>   <dbl>
## 1 Copyright (c) 2018 The New York Times Company. All Rights~   370     100
## 
## [[2]]
## # A tibble: 360 x 3
##    response.docs.web_url                                     count percent
##    <chr>                                                     <int>   <dbl>
##  1 https://artsbeat.blogs.nytimes.com/2010/05/03/the-return~     2   0.541
##  2 https://cityroom.blogs.nytimes.com/2011/09/29/a-tribute-~     2   0.541
##  3 https://learning.blogs.nytimes.com/2002/06/19/reign-of-t~     2   0.541
##  4 https://learning.blogs.nytimes.com/2003/09/25/balanced-d~     2   0.541
##  5 https://thecaucus.blogs.nytimes.com/2011/10/05/romney-on~     2   0.541
##  6 https://www.nytimes.com/2009/12/20/opinion/20rich.html        2   0.541
##  7 https://www.nytimes.com/2012/05/18/world/europe/italy-st~     2   0.541
##  8 https://www.nytimes.com/2012/05/19/opinion/a-new-attack-~     2   0.541
##  9 https://www.nytimes.com/2012/05/20/us/3-in-chicago-face-~     2   0.541
## 10 https://www.nytimes.com/2012/05/21/world/europe/italian-~     2   0.541
## # ... with 350 more rows
## 
## [[3]]
## # A tibble: 40 x 3
##    response.docs.news_desk count percent
##    <chr>                   <int>   <dbl>
##  1 National Desk              66   20.2 
##  2 National                   46   14.1 
##  3 Foreign                    33   10.1 
##  4 OpEd                       25    7.67
##  5 Editorial Desk             22    6.75
##  6 Foreign Desk               22    6.75
##  7 Metropolitan Desk          19    5.83
##  8 Editorial                   9    2.76
##  9 Metro                       9    2.76
## 10 BookReview                  8    2.45
## # ... with 30 more rows
## 
## [[4]]
## # A tibble: 20 x 3
##    response.docs.type_of_material count percent
##    <chr>                          <int>   <dbl>
##  1 News                             224  60.5  
##  2 Blog                              47  12.7  
##  3 Op-Ed                             37  10    
##  4 Review                            16   4.32 
##  5 Editorial                         13   3.51 
##  6 Summary                           10   2.70 
##  7 Letter                             5   1.35 
##  8 An Analysis; News Analysis         3   0.811
##  9 Biography                          3   0.811
## 10 Chronology                         2   0.541
## 11 Article                            1   0.270
## 12 Brief                              1   0.270
## 13 Front Page                         1   0.270
## 14 Interactive Feature                1   0.270
## 15 List                               1   0.270
## 16 Obituary (Obit)                    1   0.270
## 17 Obituary; Biography                1   0.270
## 18 Review; Biography                  1   0.270
## 19 Special Report                     1   0.270
## 20 Text                               1   0.270
## 
## [[5]]
## # A tibble: 370 x 3
##    response.docs._id        count percent
##    <chr>                    <int>   <dbl>
##  1 4fc49bb945c1498b0da8db1a     1   0.270
##  2 4fc4a2a245c1498b0daac6ec     1   0.270
##  3 4fd115358eb7c8105d5cb186     1   0.270
##  4 4fd1170a8eb7c8105d5ce428     1   0.270
##  5 4fd12a228eb7c8105d5f2670     1   0.270
##  6 4fd136a28eb7c8105d608803     1   0.270
##  7 4fd149fd8eb7c8105d6273c6     1   0.270
##  8 4fd14fd88eb7c8105d630bf1     1   0.270
##  9 4fd169ff8eb7c8105d65a2b2     1   0.270
## 10 4fd1720e8eb7c8105d665789     1   0.270
## # ... with 360 more rows
## 
## [[6]]
## # A tibble: 61 x 3
##    response.docs.uri                                  count percent
##    <chr>                                              <int>   <dbl>
##  1 nyt://article/01ed2997-5dfd-5f21-875a-4ae589ca913d     1    1.64
##  2 nyt://article/035125d8-6829-5fdf-b24e-f02728536abf     1    1.64
##  3 nyt://article/0854e100-f844-52f7-8fd7-ffd74d0469d2     1    1.64
##  4 nyt://article/0a7a086d-1dcd-5f97-9cc0-58cb30de48b5     1    1.64
##  5 nyt://article/1562239c-63d0-5468-a28a-e2d7e3999445     1    1.64
##  6 nyt://article/1d9a8eb2-68f1-5f91-b2e1-a9856ec732dd     1    1.64
##  7 nyt://article/1d9daa2f-5caa-5bd0-9869-069dd7688de0     1    1.64
##  8 nyt://article/1dda5a39-9ef4-5e7d-afab-27cb34acb154     1    1.64
##  9 nyt://article/293a0f00-2efe-56aa-a7b2-20318b0136e7     1    1.64
## 10 nyt://article/30a5a253-4ed1-51f3-9192-b1e80c157b1f     1    1.64
## # ... with 51 more rows
## 
## [[7]]
## # A tibble: 351 x 3
##    response.docs.headline.main                               count percent
##    <chr>                                                     <int>   <dbl>
##  1 NEWS SUMMARY                                                  6   1.62 
##  2 Caught in the Cross-Fire                                      5   1.35 
##  3 A New Attack on the Constitution                              2   0.541
##  4 A Rally Cry Echoes Online                                     2   0.541
##  5 A Tribute to a Reporter With Legendary Drive and Swagger      2   0.541
##  6 Balanced Diets                                                2   0.541
##  7 Fatal Bombing at Italian School Is Thought to Be the Wor~     2   0.541
##  8 Italy Steps Up Security Forces                                2   0.541
##  9 Leftward, Ho?                                                 2   0.541
## 10 Reign of Terror?                                              2   0.541
## # ... with 341 more rows
## 
## [[8]]
## # A tibble: 105 x 3
##    response.docs.headline.kicker            count percent
##    <chr>                                    <int>   <dbl>
##  1 The Caucus                                  11    7.19
##  2 Op-Ed Columnist                              8    5.23
##  3 Op-Ed Contributor                            6    3.92
##  4 Editorial                                    4    2.61
##  5 Opinionator                                  4    2.61
##  6 The Learning Network                         4    2.61
##  7 The Lede                                     4    2.61
##  8 City Room                                    3    1.96
##  9 Contributing Op-Ed Writer                    3    1.96
## 10 THREATS AND RESPONSES: DOMESTIC SECURITY     3    1.96
## # ... with 95 more rows
## 
## [[9]]
## # A tibble: 6 x 3
##   response.docs.headline.sub                                 count percent
##   <chr>                                                      <int>   <dbl>
## 1 (7 Letters)                                                    1    16.7
## 2 6 Arrested in Canada Raid Attended the Same Mosque             1    16.7
## 3 A Plague of Stunt Casting Is Being Spread Over the Airwav~     1    16.7
## 4 Bank Data Is Sifted by U.S. in Secret to Block Terror          1    16.7
## 5 Lawmaker Will Begin Look Into Homegrown Islamic Terrorism;     1    16.7
## 6 Shoot-Outs Rise as Gang Activity Spreads From Cities to S~     1    16.7

Create Visual Displays

domestic_terroirst_df %>% 
    filter(!UQ(sym("response.docs.section_name")) == "") %>% 
    dplyr::group_by(response.docs.section_name) %>% 
    dplyr::summarize(count = n()) %>% 
    dplyr::mutate(percent = (count/sum(count)) * 100) %>% 
    ggplot() + geom_bar(aes(y = percent, x = reorder(response.docs.section_name, count), 
    fill = response.docs.section_name),stat = "identity") + 
    coord_flip() + theme(legend.position = "none")

domestic_terroirst_df %>% 
    dplyr::group_by(response.docs.type_of_material) %>% 
    dplyr::summarize(count = n()) %>% 
    dplyr::mutate(percent = (count/sum(count)) * 100) %>% 
    ggplot() + geom_bar(aes(y = percent, x = reorder(response.docs.type_of_material, count),
                        fill = response.docs.type_of_material), stat = "identity") + 
                        coord_flip() + theme(legend.position = "none")

Author Article Counts

chooseOne('response.docs.byline.original')
## # A tibble: 246 x 3
##    response.docs.byline.original     count percent
##    <chr>                             <int>   <dbl>
##  1 By FRANK RICH                         8   2.37 
##  2 By PHILIP SHENON                      8   2.37 
##  3 By ADAM NAGOURNEY                     5   1.48 
##  4 By JIM YARDLEY                        5   1.48 
##  5 By RICHARD G. JONES                   5   1.48 
##  6 By TIMOTHY EGAN                       5   1.48 
##  7 By ERIC LICHTBLAU                     4   1.19 
##  8 By ERIK ECKHOLM                       4   1.19 
##  9 By ROBERT MACKEY                      4   1.19 
## 10 By ADAM NAGOURNEY and JANET ELDER     3   0.890
## # ... with 236 more rows

Timeseries Results Graph

domestic_terroirst_df$response.docs.pub_date <- as.Date(gsub("T.*", "", 
    domestic_terroirst_df$response.docs.pub_date))

ggplot(domestic_terroirst_df, aes(x = response.docs.pub_date)) + stat_bin(aes(y = cumsum(..count..)), 
    binwidth = 1)

Specified Search Example

  • Just a test run
cleaner_domestic_terroirst <- list()
fields <- c("headline", "web_url", "abstract", "news_desk", "word_count", "pub_date")
orig_url <- "http://api.nytimes.com/svc/search/v2/articlesearch.json?api-key="
term <- "\"domestic terrorist\""
baseurl <- paste0(orig_url, NYTIMES_KEY, "&q=", term, "&fl=", paste(fields, 
    collapse = ","), sep = "")
## Test Run
baseurl <- URLencode(baseurl)
df_2 <- fromJSON(baseurl, flatten = TRUE)
my_df <- df_2$response$docs
dim(my_df)
## [1] 10 13
my_df
##                                                                                            web_url
## 1          https://www.nytimes.com/aponline/2018/10/08/us/ap-us-limousine-crash-fbi-informant.html
## 2          https://www.nytimes.com/2018/07/11/us/politics/manafort-trump-russia-jail-transfer.html
## 3                         https://www.nytimes.com/2018/05/31/world/americas/what-is-terrorism.html
## 4           https://www.nytimes.com/2016/02/28/books/review/a-doubters-almanac-by-ethan-canin.html
## 5            https://www.nytimes.com/2018/05/16/world/asia/indonesia-swords-terrorism-sumatra.html
## 6                   https://www.nytimes.com/2018/04/18/us/kansas-militia-somali-trial-verdict.html
## 7                               https://www.nytimes.com/2018/04/01/us/atf-bomb-lab-beltsville.html
## 8  https://www.nytimes.com/2017/08/13/us/politics/charlottesville-sessions-justice-department.html
## 9         https://www.nytimes.com/2017/08/15/us/politics/right-wing-extremism-charlottesville.html
## 10              https://www.nytimes.com/2017/10/19/opinion/columnists/fbi-blacks-civil-rights.html
##                    pub_date  news_desk word_count    score
## 1  2018-10-09T00:50:32+0000       None        495 86.11455
## 2  2018-07-12T02:38:30+0000 Washington        562 63.71361
## 3  2018-05-31T20:27:28+0000    Foreign       1303 60.86338
## 4      2016-02-28T00:00:00Z BookReview       1263 44.53065
## 5  2018-05-16T06:44:56+0000    Foreign       1035 41.57404
## 6  2018-04-18T19:07:01+0000   National       1040 41.47201
## 7  2018-04-01T14:24:59+0000 Washington        996 39.93658
## 8  2017-08-13T22:34:17+0000   National        983 37.14599
## 9  2017-08-15T21:30:26+0000 Washington       1180 33.09294
## 10 2017-10-19T08:55:01+0000       OpEd        753 32.86577
##                                                           abstract
## 1                                                             <NA>
## 2                                                             <NA>
## 3                                                             <NA>
## 4  Ellen Ullman reviews novel A Doubter's Almanac by Ethan Canin. 
## 5                                                             <NA>
## 6                                                             <NA>
## 7                                                             <NA>
## 8                                                             <NA>
## 9                                                             <NA>
## 10                                                            <NA>
##                                                                       headline.main
## 1                                Limo Company Operator Has History as FBI Informant
## 2            Manafort Is Transferred From Jail Where He Was Treated Like a ‘V.I.P.’
## 3     What Is Terrorism? Attacks in Canada and Belgium Reflect Uncertain Definition
## 4                                             ‘A Doubter’s Almanac,’ by Ethan Canin
## 5                Indonesia Sword Attack on Police Follows String of Deadly Bombings
## 6                           Kansas Trio Convicted in Plot to Bomb Somali Immigrants
## 7                             Anatomy of a Bomb Investigation: Inside an A.T.F. Lab
## 8        A Hate Crime? How the Charlottesville Car Attack May Become a Federal Case
## 9  Revocation of Grants to Help Fight Hate Under New Scrutiny After Charlottesville
## 10                                                The F.B.I.’s Black Phantom Menace
##    headline.kicker headline.content_kicker
## 1             <NA>                    <NA>
## 2             <NA>                    <NA>
## 3             <NA>                    <NA>
## 4             <NA>                    <NA>
## 5             <NA>                    <NA>
## 6             <NA>                    <NA>
## 7             <NA>                    <NA>
## 8                                         
## 9             <NA>                    <NA>
## 10 Op-Ed Columnist         Op-Ed Columnist
##                                                        headline.print_headline
## 1                           Limo Company Operator Has History as FBI Informant
## 2                                                                             
## 3          Attacks in Canada and Belgium Reflect Fuzzy Definition of Terrorism
## 4                                                           Domestic Terrorist
## 5  4 Men Wielding Swords Attack a Police Station in Indonesia, Then Are Killed
## 6                           Verdict Is Guilty for Men In Plot to Bomb Migrants
## 7         Anatomy of a Bomb Investigation: Dissecting Devices in an A.T.F. Lab
## 8           Was the Car Attack a Hate Crime? How the Justice Dept. May View It
## 9                U.S. Rescinded Grant To Combat Extremism Of Right-Wing Groups
## 10                                                                            
##    headline.name headline.seo headline.sub
## 1             NA           NA           NA
## 2             NA           NA           NA
## 3             NA           NA           NA
## 4             NA           NA           NA
## 5             NA           NA           NA
## 6             NA           NA           NA
## 7             NA           NA           NA
## 8             NA           NA           NA
## 9             NA           NA           NA
## 10            NA           NA           NA

Conclusion

  • I really wanted to go further with this, and may do so at a later time.
    • My idea here was to produce some sort of modern day Manufactoring consent type analysis of the NY Times
    • Combining this data with sentiment analysis, could lead to some understanding of the bias presented in one of the most respected newspapers in the world