First at all, we install and load the two required libraries: RJSONIO, RCurl, and plotly.
#install.packages("RCurl", dependencies = TRUE)
#install.packages("RJSONIO", dependencies = TRUE)
#install.packages("plotly", dependencies = TRUE)
require(RJSONIO)
require(RCurl)
require(plotly)
Please register and obtain your own API key here http://developer.nytimes.com/ and put it into the following code.
#### API key here
api <- "YOUR KEY HERE"
Let’s do a trial run to search “South Korea” in the New York Times 2019 archive. Variable tesing is a list.
year <- 2019
search_q <- URLencode("'Hong Kong'")
url <- paste('http://api.nytimes.com/svc/search/v2/articlesearch.json?q=',search_q,'&begin_date=',year,'0101&end_date=',year,'1231&api-key=',api,sep="")
testing <- fromJSON(getURL(url)) ### Convert the API output to R Object
class(testing)
## [1] "list"
str(testing$response$meta) # Show its data structure
## Named num [1:3] 1767 0 150
## - attr(*, "names")= chr [1:3] "hits" "offset" "time"
print(testing$response$meta["hits"]) ### Display the number of hits in 2019
## hits
## 1767
Sys.sleep(5) ### Wait a while to limit within 1 call per second
Next, define your variables: search term search_q, time duration year_range, and a empty variable nyt_china for storing the results.
search_q <- URLencode("'China'")
year_range <- 1990:2019
nyt_china <- data.frame(year=character(0),hits=numeric(0))
Getting each year’s article count via a for loop
for(year in year_range){
url <- paste('http://api.nytimes.com/svc/search/v2/articlesearch.json?q=',search_q,'&begin_date=',year,'0101&end_date=',year,'1231&api-key=',api,sep="")
nyt_robj <- fromJSON(getURL(url)) ## Convert from JSON to R Object
hits <- nyt_robj$response$meta["hits"] ## save the article hits
new_data <- data.frame(year=year,hits=hits)
nyt_china <- rbind(nyt_china,new_data) ## Add a new row to nyt_china
print(paste(year,hits,sep=":")) ### Print the new row contents
Sys.sleep(5) ### Wait a while to limit within 1 call per second
}
## [1] "1990:2188"
## [1] "1991:2080"
## [1] "1992:2426"
## [1] "1993:2593"
## [1] "1994:2837"
## [1] "1995:2938"
## [1] "1996:3190"
## [1] "1997:3327"
## [1] "1998:3475"
## [1] "1999:3716"
## [1] "2000:4000"
## [1] "2001:4759"
## [1] "2002:3804"
## [1] "2003:4642"
## [1] "2004:4484"
## [1] "2005:6987"
## [1] "2006:11619"
## [1] "2007:8974"
## [1] "2008:12140"
## [1] "2009:7926"
## [1] "2010:6103"
## [1] "2011:5604"
## [1] "2012:6375"
## [1] "2013:5522"
## [1] "2014:6310"
## [1] "2015:5780"
## [1] "2016:4710"
## [1] "2017:4767"
## [1] "2018:5879"
## [1] "2019:5866"
We then repeat the search for “Japan”. The data are stored in the variable nyt_japan.
search_q <- URLencode("'Japan'")
nyt_japan <- data.frame(year=character(0),hits=numeric(0))
for(year in year_range){
url <- paste('http://api.nytimes.com/svc/search/v2/articlesearch.json?q=',search_q,'&begin_date=',year,'0101&end_date=',year,'1231&api-key=',api,sep="")
nyt_robj <- fromJSON(getURL(url)) ## Convert from JSON to R Object
hits <- nyt_robj$response$meta["hits"] ## save the article hits
new_data <- data.frame(year=year,hits=hits)
nyt_japan <- rbind(nyt_japan,new_data) ## Add a new row to nyt_china
print(paste(year,hits,sep=":")) ### Print the new row contents
Sys.sleep(5) ### Wait a while to limit within 1 call per second
}
## [1] "1990:3835"
## [1] "1991:3600"
## [1] "1992:4024"
## [1] "1993:3675"
## [1] "1994:3413"
## [1] "1995:3895"
## [1] "1996:3196"
## [1] "1997:3292"
## [1] "1998:4066"
## [1] "1999:3405"
## [1] "2000:3713"
## [1] "2001:4823"
## [1] "2002:4361"
## [1] "2003:4157"
## [1] "2004:3583"
## [1] "2005:4696"
## [1] "2006:6910"
## [1] "2007:4950"
## [1] "2008:6254"
## [1] "2009:4619"
## [1] "2010:3156"
## [1] "2011:3634"
## [1] "2012:2899"
## [1] "2013:2867"
## [1] "2014:2877"
## [1] "2015:2499"
## [1] "2016:2025"
## [1] "2017:2265"
## [1] "2018:2269"
## [1] "2019:2278"
A function named SearchNYT is defined to search keyword in the NYT archive. No need to duplciate the code again.
SearchNYT <- function(sq){
nyt <- data.frame(year=character(0),hits=numeric(0))
for(year in year_range){
url <- paste('http://api.nytimes.com/svc/search/v2/articlesearch.json?q=',sq,'&begin_date=',year,'0101&end_date=',year,'1231&api-key=',api,sep="")
nyt_robj <- fromJSON(getURL(url)) ## Convert from JSON to R Object
hits <- nyt_robj$response$meta["hits"] ## save the article hits
new_data <- data.frame(year=year,hits=hits)
nyt <- rbind(nyt,new_data) ## Add a new row
print(paste(year,hits,sep=":")) ### Print the new row contents
Sys.sleep(5) ### Wait a while to limit within 1 call per second
}
return(nyt)
}
We search for “North Korea,” “Syria,” and “Hong Kong” by calling the defined function SearchNYT.
search_q <- URLencode("'North Korea'")
nyt_nk <- SearchNYT(search_q)
## [1] "1990:189"
## [1] "1991:223"
## [1] "1992:284"
## [1] "1993:405"
## [1] "1994:811"
## [1] "1995:280"
## [1] "1996:354"
## [1] "1997:319"
## [1] "1998:353"
## [1] "1999:544"
## [1] "2000:684"
## [1] "2001:795"
## [1] "2002:1170"
## [1] "2003:1721"
## [1] "2004:933"
## [1] "2005:1259"
## [1] "2006:2245"
## [1] "2007:1338"
## [1] "2008:1160"
## [1] "2009:1161"
## [1] "2010:733"
## [1] "2011:442"
## [1] "2012:550"
## [1] "2013:801"
## [1] "2014:635"
## [1] "2015:526"
## [1] "2016:682"
## [1] "2017:2105"
## [1] "2018:1976"
## [1] "2019:1001"
search_q <- URLencode("'Syria'")
nyt_sy <- SearchNYT(search_q)
## [1] "1990:509"
## [1] "1991:888"
## [1] "1992:395"
## [1] "1993:398"
## [1] "1994:391"
## [1] "1995:285"
## [1] "1996:415"
## [1] "1997:219"
## [1] "1998:190"
## [1] "1999:387"
## [1] "2000:494"
## [1] "2001:490"
## [1] "2002:509"
## [1] "2003:982"
## [1] "2004:554"
## [1] "2005:1253"
## [1] "2006:2550"
## [1] "2007:1469"
## [1] "2008:1098"
## [1] "2009:564"
## [1] "2010:329"
## [1] "2011:1163"
## [1] "2012:2032"
## [1] "2013:2418"
## [1] "2014:2811"
## [1] "2015:3127"
## [1] "2016:2487"
## [1] "2017:2180"
## [1] "2018:1638"
## [1] "2019:1569"
search_q <- URLencode("'Hong Kong'")
nyt_hk <- SearchNYT(search_q)
## [1] "1990:646"
## [1] "1991:708"
## [1] "1992:868"
## [1] "1993:973"
## [1] "1994:1009"
## [1] "1995:1036"
## [1] "1996:1062"
## [1] "1997:1620"
## [1] "1998:1403"
## [1] "1999:1097"
## [1] "2000:1192"
## [1] "2001:1355"
## [1] "2002:1213"
## [1] "2003:1610"
## [1] "2004:1063"
## [1] "2005:1845"
## [1] "2006:2605"
## [1] "2007:2228"
## [1] "2008:3219"
## [1] "2009:2293"
## [1] "2010:1415"
## [1] "2011:1251"
## [1] "2012:1557"
## [1] "2013:1529"
## [1] "2014:1906"
## [1] "2015:1436"
## [1] "2016:1176"
## [1] "2017:1002"
## [1] "2018:1153"
## [1] "2019:1767"
Last, all five time series are plotted by lines.
p <- plot_ly(x = nyt_china$year, y = nyt_china$hits, name = "China", type = 'scatter', mode = 'lines')
p <- add_trace(p, y = nyt_japan$hits, name = "Japan")
p <- add_trace(p, y = nyt_nk$hits, name = "North Korea")
p <- add_trace(p, y = nyt_sy$hits, name = "Syria")
p <- add_trace(p, y = nyt_hk$hits, name = "Hong Kong")
layout(p, title = "Country's Names mentioned in New York Times (1990 to 2019)", xaxis = list(title = "Year"), yaxis = list (title = "Number of hits"))
Finally, we upload the plot to the plotly server. You should first create an account here (https://plot.ly/) and obtain the username and the API key.
#### Upload to plotly server
Sys.setenv("plotly_username"="YOUR PLOTLY ACCOUNT NAME")
Sys.setenv("plotly_api_key"="YOUR PLOTLY KEY HERE")
api_create(p, filename = "lecture3")