JMSC 6116 Lecture 3: Learning R’s List, Loops, and User-defined Functions: Using NYT API

First at all, we install and load the two required libraries: RJSONIO, RCurl, and plotly.

#install.packages("RCurl", dependencies = TRUE)
#install.packages("RJSONIO", dependencies = TRUE)
#install.packages("plotly", dependencies = TRUE)

require(RJSONIO)
require(RCurl)
require(plotly)

Please register and obtain your own API key here http://developer.nytimes.com/ and put it into the following code.

#### API key here
api <- "YOUR KEY HERE"

Let’s do a trial run to search “South Korea” in the New York Times 2019 archive. Variable tesing is a list.

year <- 2019
search_q <- URLencode("'Hong Kong'")
url <- paste('http://api.nytimes.com/svc/search/v2/articlesearch.json?q=',search_q,'&begin_date=',year,'0101&end_date=',year,'1231&api-key=',api,sep="")
testing <- fromJSON(getURL(url))  ### Convert the API output to R Object
class(testing)

## [1] "list"

str(testing$response$meta)  # Show its data structure

##  Named num [1:3] 1767 0 150
##  - attr(*, "names")= chr [1:3] "hits" "offset" "time"

print(testing$response$meta["hits"]) ### Display the number of hits in 2019

## hits 
## 1767

Sys.sleep(5) ### Wait a while to limit within 1 call per second

Next, define your variables: search term search_q, time duration year_range, and a empty variable nyt_china for storing the results.

search_q <- URLencode("'China'")
year_range <- 1990:2019 
nyt_china <- data.frame(year=character(0),hits=numeric(0))

Getting each year’s article count via a for loop

for(year in year_range){
    url <- paste('http://api.nytimes.com/svc/search/v2/articlesearch.json?q=',search_q,'&begin_date=',year,'0101&end_date=',year,'1231&api-key=',api,sep="")
    nyt_robj <- fromJSON(getURL(url))  ## Convert from JSON to R Object
  hits <- nyt_robj$response$meta["hits"]   ## save the article hits
  new_data <- data.frame(year=year,hits=hits)
    nyt_china <- rbind(nyt_china,new_data)  ## Add a new row to nyt_china
    print(paste(year,hits,sep=":"))  ### Print the new row contents
    Sys.sleep(5) ### Wait a while to limit within 1 call per second
}

## [1] "1990:2188"
## [1] "1991:2080"
## [1] "1992:2426"
## [1] "1993:2593"
## [1] "1994:2837"
## [1] "1995:2938"
## [1] "1996:3190"
## [1] "1997:3327"
## [1] "1998:3475"
## [1] "1999:3716"
## [1] "2000:4000"
## [1] "2001:4759"
## [1] "2002:3804"
## [1] "2003:4642"
## [1] "2004:4484"
## [1] "2005:6987"
## [1] "2006:11619"
## [1] "2007:8974"
## [1] "2008:12140"
## [1] "2009:7926"
## [1] "2010:6103"
## [1] "2011:5604"
## [1] "2012:6375"
## [1] "2013:5522"
## [1] "2014:6310"
## [1] "2015:5780"
## [1] "2016:4710"
## [1] "2017:4767"
## [1] "2018:5879"
## [1] "2019:5866"

We then repeat the search for “Japan”. The data are stored in the variable nyt_japan.

search_q <- URLencode("'Japan'")
nyt_japan <- data.frame(year=character(0),hits=numeric(0))

for(year in year_range){
    url <- paste('http://api.nytimes.com/svc/search/v2/articlesearch.json?q=',search_q,'&begin_date=',year,'0101&end_date=',year,'1231&api-key=',api,sep="")
    nyt_robj <- fromJSON(getURL(url))  ## Convert from JSON to R Object
  hits <- nyt_robj$response$meta["hits"]   ## save the article hits
  new_data <- data.frame(year=year,hits=hits)
    nyt_japan <- rbind(nyt_japan,new_data)  ## Add a new row to nyt_china
    print(paste(year,hits,sep=":"))  ### Print the new row contents
    Sys.sleep(5) ### Wait a while to limit within 1 call per second
}

## [1] "1990:3835"
## [1] "1991:3600"
## [1] "1992:4024"
## [1] "1993:3675"
## [1] "1994:3413"
## [1] "1995:3895"
## [1] "1996:3196"
## [1] "1997:3292"
## [1] "1998:4066"
## [1] "1999:3405"
## [1] "2000:3713"
## [1] "2001:4823"
## [1] "2002:4361"
## [1] "2003:4157"
## [1] "2004:3583"
## [1] "2005:4696"
## [1] "2006:6910"
## [1] "2007:4950"
## [1] "2008:6254"
## [1] "2009:4619"
## [1] "2010:3156"
## [1] "2011:3634"
## [1] "2012:2899"
## [1] "2013:2867"
## [1] "2014:2877"
## [1] "2015:2499"
## [1] "2016:2025"
## [1] "2017:2265"
## [1] "2018:2269"
## [1] "2019:2278"

A function named SearchNYT is defined to search keyword in the NYT archive. No need to duplciate the code again.

SearchNYT <- function(sq){ 
  nyt <- data.frame(year=character(0),hits=numeric(0))
  for(year in year_range){
      url <- paste('http://api.nytimes.com/svc/search/v2/articlesearch.json?q=',sq,'&begin_date=',year,'0101&end_date=',year,'1231&api-key=',api,sep="")
      nyt_robj <- fromJSON(getURL(url))  ## Convert from JSON to R Object
    hits <- nyt_robj$response$meta["hits"]   ## save the article hits
    new_data <- data.frame(year=year,hits=hits)
      nyt <- rbind(nyt,new_data)    ## Add a new row 
      print(paste(year,hits,sep=":"))  ### Print the new row contents
      Sys.sleep(5) ### Wait a while to limit within 1 call per second
  }
  return(nyt)
}

We search for “North Korea,” “Syria,” and “Hong Kong” by calling the defined function SearchNYT.

search_q <- URLencode("'North Korea'")
nyt_nk <- SearchNYT(search_q)

## [1] "1990:189"
## [1] "1991:223"
## [1] "1992:284"
## [1] "1993:405"
## [1] "1994:811"
## [1] "1995:280"
## [1] "1996:354"
## [1] "1997:319"
## [1] "1998:353"
## [1] "1999:544"
## [1] "2000:684"
## [1] "2001:795"
## [1] "2002:1170"
## [1] "2003:1721"
## [1] "2004:933"
## [1] "2005:1259"
## [1] "2006:2245"
## [1] "2007:1338"
## [1] "2008:1160"
## [1] "2009:1161"
## [1] "2010:733"
## [1] "2011:442"
## [1] "2012:550"
## [1] "2013:801"
## [1] "2014:635"
## [1] "2015:526"
## [1] "2016:682"
## [1] "2017:2105"
## [1] "2018:1976"
## [1] "2019:1001"

search_q <- URLencode("'Syria'")
nyt_sy <- SearchNYT(search_q)

## [1] "1990:509"
## [1] "1991:888"
## [1] "1992:395"
## [1] "1993:398"
## [1] "1994:391"
## [1] "1995:285"
## [1] "1996:415"
## [1] "1997:219"
## [1] "1998:190"
## [1] "1999:387"
## [1] "2000:494"
## [1] "2001:490"
## [1] "2002:509"
## [1] "2003:982"
## [1] "2004:554"
## [1] "2005:1253"
## [1] "2006:2550"
## [1] "2007:1469"
## [1] "2008:1098"
## [1] "2009:564"
## [1] "2010:329"
## [1] "2011:1163"
## [1] "2012:2032"
## [1] "2013:2418"
## [1] "2014:2811"
## [1] "2015:3127"
## [1] "2016:2487"
## [1] "2017:2180"
## [1] "2018:1638"
## [1] "2019:1569"

search_q <- URLencode("'Hong Kong'")
nyt_hk <- SearchNYT(search_q)

## [1] "1990:646"
## [1] "1991:708"
## [1] "1992:868"
## [1] "1993:973"
## [1] "1994:1009"
## [1] "1995:1036"
## [1] "1996:1062"
## [1] "1997:1620"
## [1] "1998:1403"
## [1] "1999:1097"
## [1] "2000:1192"
## [1] "2001:1355"
## [1] "2002:1213"
## [1] "2003:1610"
## [1] "2004:1063"
## [1] "2005:1845"
## [1] "2006:2605"
## [1] "2007:2228"
## [1] "2008:3219"
## [1] "2009:2293"
## [1] "2010:1415"
## [1] "2011:1251"
## [1] "2012:1557"
## [1] "2013:1529"
## [1] "2014:1906"
## [1] "2015:1436"
## [1] "2016:1176"
## [1] "2017:1002"
## [1] "2018:1153"
## [1] "2019:1767"

Last, all five time series are plotted by lines.

p <- plot_ly(x = nyt_china$year, y = nyt_china$hits, name = "China", type = 'scatter', mode = 'lines')
p <- add_trace(p, y = nyt_japan$hits, name = "Japan")
p <- add_trace(p, y = nyt_nk$hits, name = "North Korea")
p <- add_trace(p, y = nyt_sy$hits, name = "Syria")
p <- add_trace(p, y = nyt_hk$hits, name = "Hong Kong")
layout(p, title = "Country's Names mentioned in New York Times (1990 to 2019)", xaxis = list(title = "Year"), yaxis = list (title = "Number of hits"))

Finally, we upload the plot to the plotly server. You should first create an account here (https://plot.ly/) and obtain the username and the API key.

#### Upload to plotly server
Sys.setenv("plotly_username"="YOUR PLOTLY ACCOUNT NAME")
Sys.setenv("plotly_api_key"="YOUR PLOTLY KEY HERE")
api_create(p, filename = "lecture3")

JMSC 6116 Lecture 3: Learning R’s List, Loops, and User-defined Functions: Using NYT API

King-wa Fu

Februrary 14, 2020