First at all, we install and load the two required libraries: RJSONIO, RCurl, and plotly.
if (!require("RCurl")) install.packages("RCurl", repos="https://cran.cnr.berkeley.edu/", dependencies = TRUE)
if (!require("RJSONIO")) install.packages("RJSONIO", repos="https://cran.cnr.berkeley.edu/", dependencies = TRUE)
if (!require("plotly")) install.packages("plotly", repos="https://cran.cnr.berkeley.edu/", dependencies = TRUE)
library(RJSONIO)
library(RCurl)
library(plotly)
Please register and obtain your own API key here http://developer.nytimes.com/ and put it into the following code.
#### API key here
api <- "YOUR KEY HERE"
Let’s do a trial run to search “South Korea” in the New York Times 2017 archive. variable tesing is a list.
year <- 2017
search_q <- URLencode("'South Korea'")
url <- paste('http://api.nytimes.com/svc/search/v2/articlesearch.json?q=',search_q,'&begin_date=',year,'0101&end_date=',year,'1231&api-key=',api,sep="")
testing <- fromJSON(getURL(url)) ### Convert the API output to R Object
class(testing)
## [1] "list"
str(testing$response$meta) # Show its data structure
## Named num [1:3] 1724 0 103
## - attr(*, "names")= chr [1:3] "hits" "offset" "time"
print(testing$response$meta["hits"]) ### Display the number of hits in 2017
## hits
## 1724
Sys.sleep(5) ### Wait a while to limit within 1 call per second
Next, define your variables: search term search_q, time duration year_range, and a empty variable nyt_china for storing the results.
search_q <- URLencode("'China'")
year_range <- 1990:2017
nyt_china <- data.frame(year=character(0),hits=numeric(0))
Getting each year’s article count via a for loop
for(year in year_range){
url <- paste('http://api.nytimes.com/svc/search/v2/articlesearch.json?q=',search_q,'&begin_date=',year,'0101&end_date=',year,'1231&api-key=',api,sep="")
nyt_robj <- fromJSON(getURL(url)) ## Convert from JSON to R Object
hits <- nyt_robj$response$meta["hits"] ## save the article hits
nyt_china <- rbind(nyt_china,data.frame(year=year,hits=hits)) ## Add a new row to nyt_china
print(paste(year,hits,sep=":")) ### Print the new row contents
Sys.sleep(5) ### Wait a while to limit within 1 call per second
}
## [1] "1990:1930"
## [1] "1991:1676"
## [1] "1992:1785"
## [1] "1993:1711"
## [1] "1994:1872"
## [1] "1995:1991"
## [1] "1996:2305"
## [1] "1997:2594"
## [1] "1998:2690"
## [1] "1999:2932"
## [1] "2000:3011"
## [1] "2001:2958"
## [1] "2002:2520"
## [1] "2003:3014"
## [1] "2004:2942"
## [1] "2005:3312"
## [1] "2006:4622"
## [1] "2007:5550"
## [1] "2008:6337"
## [1] "2009:6313"
## [1] "2010:7224"
## [1] "2011:6877"
## [1] "2012:7389"
## [1] "2013:5673"
## [1] "2014:6291"
## [1] "2015:5818"
## [1] "2016:7413"
## [1] "2017:4496"
We then repeat the search for “Japan”. The data are stored in the variable nyt_japan.
search_q <- URLencode("'Japan'")
nyt_japan <- data.frame(year=character(0),hits=numeric(0))
for(year in year_range){
url <- paste('http://api.nytimes.com/svc/search/v2/articlesearch.json?q=',search_q,'&begin_date=',year,'0101&end_date=',year,'1231&api-key=',api,sep="")
nyt_robj <- fromJSON(getURL(url)) ## Convert from JSON to R Object
hits <- nyt_robj$response$meta["hits"] ## save the article hits
nyt_japan <- rbind(nyt_japan,data.frame(year=year,hits=hits)) ## Add a new row to nyt_china
print(paste(year,hits,sep=":")) ### Print the new row contents
Sys.sleep(5) ### Wait a while to limit within 1 call per second
}
## [1] "1990:3304"
## [1] "1991:2809"
## [1] "1992:2849"
## [1] "1993:2487"
## [1] "1994:2268"
## [1] "1995:2609"
## [1] "1996:2230"
## [1] "1997:2410"
## [1] "1998:2836"
## [1] "1999:2404"
## [1] "2000:2410"
## [1] "2001:2672"
## [1] "2002:2638"
## [1] "2003:2646"
## [1] "2004:2345"
## [1] "2005:2185"
## [1] "2006:2795"
## [1] "2007:3018"
## [1] "2008:3027"
## [1] "2009:3447"
## [1] "2010:3674"
## [1] "2011:4483"
## [1] "2012:3402"
## [1] "2013:3036"
## [1] "2014:2956"
## [1] "2015:2571"
## [1] "2016:3421"
## [1] "2017:2167"
A function named SearchNYT is defined to search keyword in the NYT archive. No need to duplciate the code again.
SearchNYT <- function(sq){
nyt <- data.frame(year=character(0),hits=numeric(0))
for(year in year_range){
url <- paste('http://api.nytimes.com/svc/search/v2/articlesearch.json?q=',sq,'&begin_date=',year,'0101&end_date=',year,'1231&api-key=',api,sep="")
nyt_robj <- fromJSON(getURL(url)) ## Convert from JSON to R Object
hits <- nyt_robj$response$meta["hits"] ## save the article hits
nyt <- rbind(nyt,data.frame(year=year,hits=hits)) ## Add a new row
print(paste(year,hits,sep=":")) ### Print the new row contents
Sys.sleep(5) ### Wait a while to limit within 1 call per second
}
return(nyt)
}
We search for “North Korea” by calling the defined function SearchNYT.
search_q <- URLencode("'North Korea'")
nyt_nk <- SearchNYT(search_q)
## [1] "1990:277"
## [1] "1991:287"
## [1] "1992:331"
## [1] "1993:382"
## [1] "1994:714"
## [1] "1995:295"
## [1] "1996:362"
## [1] "1997:370"
## [1] "1998:346"
## [1] "1999:539"
## [1] "2000:619"
## [1] "2001:602"
## [1] "2002:907"
## [1] "2003:1148"
## [1] "2004:672"
## [1] "2005:663"
## [1] "2006:964"
## [1] "2007:910"
## [1] "2008:722"
## [1] "2009:1105"
## [1] "2010:1063"
## [1] "2011:710"
## [1] "2012:784"
## [1] "2013:964"
## [1] "2014:813"
## [1] "2015:639"
## [1] "2016:1155"
## [1] "2017:2083"
Last, all three time series are plotted by lines.
p <- plot_ly(x = nyt_china$year, y = nyt_china$hits, name = "China", type = 'scatter', mode = 'lines')
p <- add_trace(p, y = nyt_japan$hits, name = "Japan")
p <- add_trace(p, y = nyt_nk$hits, name = "North Korea")
layout(p, title = "Country's Names mentioned in New York Times (1990 to 2017)", xaxis = list(title = "Year"), yaxis = list (title = "Number of hits"))
Finally, we upload the plot to the plotly server. You should first create an account here (https://plot.ly/) and obtain the username and the API key.
#### Upload to plotly server
Sys.setenv("plotly_username"="YOUR PLOTLY ACCOUNT NAME")
Sys.setenv("plotly_api_key"="YOUR PLOTLY KEY HERE")
api_create(p, filename = "lecture3")