JMSC 6116 Lecture 3: Learning R’s List, Loops, and User-defined Functions: Using NYT API

First at all, we install and load the two required libraries: RJSONIO, RCurl, and plotly.

if (!require("RCurl")) install.packages("RCurl", repos="https://cran.cnr.berkeley.edu/", dependencies = TRUE)
if (!require("RJSONIO")) install.packages("RJSONIO", repos="https://cran.cnr.berkeley.edu/", dependencies = TRUE)
if (!require("plotly")) install.packages("plotly", repos="https://cran.cnr.berkeley.edu/", dependencies = TRUE)

library(RJSONIO)
library(RCurl)
library(plotly)

Please register and obtain your own API key here http://developer.nytimes.com/ and put it into the following code.

#### API key here
api <- "YOUR KEY HERE"

Let’s do a trial run to search “South Korea” in the New York Times 2017 archive. variable tesing is a list.

year <- 2017
search_q <- URLencode("'South Korea'")
url <- paste('http://api.nytimes.com/svc/search/v2/articlesearch.json?q=',search_q,'&begin_date=',year,'0101&end_date=',year,'1231&api-key=',api,sep="")
testing <- fromJSON(getURL(url))  ### Convert the API output to R Object
class(testing)

## [1] "list"

str(testing$response$meta)  # Show its data structure

##  Named num [1:3] 1724 0 103
##  - attr(*, "names")= chr [1:3] "hits" "offset" "time"

print(testing$response$meta["hits"]) ### Display the number of hits in 2017

## hits 
## 1724

Sys.sleep(5) ### Wait a while to limit within 1 call per second

Next, define your variables: search term search_q, time duration year_range, and a empty variable nyt_china for storing the results.

search_q <- URLencode("'China'")
year_range <- 1990:2017 
nyt_china <- data.frame(year=character(0),hits=numeric(0))

Getting each year’s article count via a for loop

for(year in year_range){
    url <- paste('http://api.nytimes.com/svc/search/v2/articlesearch.json?q=',search_q,'&begin_date=',year,'0101&end_date=',year,'1231&api-key=',api,sep="")
    nyt_robj <- fromJSON(getURL(url))  ## Convert from JSON to R Object
  hits <- nyt_robj$response$meta["hits"]   ## save the article hits
    nyt_china <- rbind(nyt_china,data.frame(year=year,hits=hits))   ## Add a new row to nyt_china
    print(paste(year,hits,sep=":"))  ### Print the new row contents
    Sys.sleep(5) ### Wait a while to limit within 1 call per second
}

## [1] "1990:1930"
## [1] "1991:1676"
## [1] "1992:1785"
## [1] "1993:1711"
## [1] "1994:1872"
## [1] "1995:1991"
## [1] "1996:2305"
## [1] "1997:2594"
## [1] "1998:2690"
## [1] "1999:2932"
## [1] "2000:3011"
## [1] "2001:2958"
## [1] "2002:2520"
## [1] "2003:3014"
## [1] "2004:2942"
## [1] "2005:3312"
## [1] "2006:4622"
## [1] "2007:5550"
## [1] "2008:6337"
## [1] "2009:6313"
## [1] "2010:7224"
## [1] "2011:6877"
## [1] "2012:7389"
## [1] "2013:5673"
## [1] "2014:6291"
## [1] "2015:5818"
## [1] "2016:7413"
## [1] "2017:4496"

We then repeat the search for “Japan”. The data are stored in the variable nyt_japan.

search_q <- URLencode("'Japan'")
nyt_japan <- data.frame(year=character(0),hits=numeric(0))

for(year in year_range){
    url <- paste('http://api.nytimes.com/svc/search/v2/articlesearch.json?q=',search_q,'&begin_date=',year,'0101&end_date=',year,'1231&api-key=',api,sep="")
    nyt_robj <- fromJSON(getURL(url))  ## Convert from JSON to R Object
  hits <- nyt_robj$response$meta["hits"]   ## save the article hits
    nyt_japan <- rbind(nyt_japan,data.frame(year=year,hits=hits))   ## Add a new row to nyt_china
    print(paste(year,hits,sep=":"))  ### Print the new row contents
    Sys.sleep(5) ### Wait a while to limit within 1 call per second
}

## [1] "1990:3304"
## [1] "1991:2809"
## [1] "1992:2849"
## [1] "1993:2487"
## [1] "1994:2268"
## [1] "1995:2609"
## [1] "1996:2230"
## [1] "1997:2410"
## [1] "1998:2836"
## [1] "1999:2404"
## [1] "2000:2410"
## [1] "2001:2672"
## [1] "2002:2638"
## [1] "2003:2646"
## [1] "2004:2345"
## [1] "2005:2185"
## [1] "2006:2795"
## [1] "2007:3018"
## [1] "2008:3027"
## [1] "2009:3447"
## [1] "2010:3674"
## [1] "2011:4483"
## [1] "2012:3402"
## [1] "2013:3036"
## [1] "2014:2956"
## [1] "2015:2571"
## [1] "2016:3421"
## [1] "2017:2167"

A function named SearchNYT is defined to search keyword in the NYT archive. No need to duplciate the code again.

SearchNYT <- function(sq){ 
  nyt <- data.frame(year=character(0),hits=numeric(0))
  for(year in year_range){
      url <- paste('http://api.nytimes.com/svc/search/v2/articlesearch.json?q=',sq,'&begin_date=',year,'0101&end_date=',year,'1231&api-key=',api,sep="")
      nyt_robj <- fromJSON(getURL(url))  ## Convert from JSON to R Object
    hits <- nyt_robj$response$meta["hits"]   ## save the article hits
      nyt <- rbind(nyt,data.frame(year=year,hits=hits)) ## Add a new row 
      print(paste(year,hits,sep=":"))  ### Print the new row contents
      Sys.sleep(5) ### Wait a while to limit within 1 call per second
  }
  return(nyt)
}

We search for “North Korea” by calling the defined function SearchNYT.

search_q <- URLencode("'North Korea'")
nyt_nk <- SearchNYT(search_q)

## [1] "1990:277"
## [1] "1991:287"
## [1] "1992:331"
## [1] "1993:382"
## [1] "1994:714"
## [1] "1995:295"
## [1] "1996:362"
## [1] "1997:370"
## [1] "1998:346"
## [1] "1999:539"
## [1] "2000:619"
## [1] "2001:602"
## [1] "2002:907"
## [1] "2003:1148"
## [1] "2004:672"
## [1] "2005:663"
## [1] "2006:964"
## [1] "2007:910"
## [1] "2008:722"
## [1] "2009:1105"
## [1] "2010:1063"
## [1] "2011:710"
## [1] "2012:784"
## [1] "2013:964"
## [1] "2014:813"
## [1] "2015:639"
## [1] "2016:1155"
## [1] "2017:2083"

Last, all three time series are plotted by lines.

p <- plot_ly(x = nyt_china$year, y = nyt_china$hits, name = "China", type = 'scatter', mode = 'lines')
p <- add_trace(p, y = nyt_japan$hits, name = "Japan")
p <- add_trace(p, y = nyt_nk$hits, name = "North Korea")
layout(p, title = "Country's Names mentioned in New York Times (1990 to 2017)", xaxis = list(title = "Year"), yaxis = list (title = "Number of hits"))

Finally, we upload the plot to the plotly server. You should first create an account here (https://plot.ly/) and obtain the username and the API key.

#### Upload to plotly server
Sys.setenv("plotly_username"="YOUR PLOTLY ACCOUNT NAME")
Sys.setenv("plotly_api_key"="YOUR PLOTLY KEY HERE")
api_create(p, filename = "lecture3")

JMSC 6116 Lecture 3: Learning R’s List, Loops, and User-defined Functions: Using NYT API

King-wa Fu

January 29, 2018