DATA607_WEEK9_Assignment

DATA Acquisition

NYTimes Shiny App Link

Using NYTimes APIs and developer API Key

Per NYTimes documentaion, we need to obtain the API key by registering as a developer at NYTimes developer webstite.
Getting Started: The Times Developer Network is our API clearinghouse and community. Here’s how to get started:

Request an API key
Read the API documentation, FAQ and Terms of Use
Use the API Tool associated with each API to experiment without writing code

NYTimes Top Stories

nyt.articles.url <- "https://api.nytimes.com/svc/topstories/v2/home.json?"
nyt.json <- fromJSON(getURL(paste(nyt.articles.url,nyt.api.key,sep=""))) 
nyt.json$status

## [1] "OK"

summary(nyt.json)

##              Length Class  Mode     
## status        1     -none- character
## copyright     1     -none- character
## section       1     -none- character
## last_updated  1     -none- character
## num_results   1     -none- numeric  
## results      39     -none- list

length(nyt.json$results)

## [1] 39

names(nyt.json$results[[1]])

##  [1] "section"             "subsection"          "title"              
##  [4] "abstract"            "url"                 "byline"             
##  [7] "item_type"           "updated_date"        "created_date"       
## [10] "published_date"      "material_type_facet" "kicker"             
## [13] "des_facet"           "org_facet"           "per_facet"          
## [16] "geo_facet"           "multimedia"          "short_url"

Now Instantiating a nyt dataframe to store json response

nyt.data.df <- data.frame(
    id=numeric(),
    section = character(),
    title=character(),
    url= character(),
    author = character(),
    published_date =character()
  )

for(i in 1:length(nyt.json$results)){    
      id <- as.numeric(i)
      section <- nyt.json$results[[i]]$section
      title <- nyt.json$results[[i]]$title
      url <- nyt.json$results[[i]]$url
      author<- nyt.json$results[[i]]$byline
      published_date <- nyt.json$results[[i]]$published_date
           nyt.data.df <- rbind(nyt.data.df,(cbind(id,section,title,url,author,published_date)),stringsAsFactors = F)
}

DT::datatable(nyt.data.df, options = list(pageLength = 5))

EDA - NYTimes Top Stories

NYTimes Most popular (Most Shared)

nyt.mostshsred.url <- "https://api.nytimes.com/svc/mostpopular/v2/mostshared/all-sections/30.json?"

nyt.mostshared.json <- fromJSON(getURL(paste(nyt.mostshsred.url,nyt.api.key,sep=""))) 
nyt.mostshared.json$status

## [1] "OK"

summary(nyt.json)

##              Length Class  Mode     
## status        1     -none- character
## copyright     1     -none- character
## section       1     -none- character
## last_updated  1     -none- character
## num_results   1     -none- numeric  
## results      39     -none- list

length(nyt.json$results)

## [1] 39

names(nyt.json$results[[1]])

##  [1] "section"             "subsection"          "title"              
##  [4] "abstract"            "url"                 "byline"             
##  [7] "item_type"           "updated_date"        "created_date"       
## [10] "published_date"      "material_type_facet" "kicker"             
## [13] "des_facet"           "org_facet"           "per_facet"          
## [16] "geo_facet"           "multimedia"          "short_url"

nyt.mostshared.data.df <- data.frame(
    id=numeric(),
    section = character(),
    title=character(),
    url= character(),
    author = character(),
    published_date =character(),
    total_shares=integer(),
    des_facet = character()
)

for(i in 1:length(nyt.mostshared.json$results)){    
    id <- as.numeric(i)
    section <- nyt.mostshared.json$results[[i]]$section
    title <- nyt.mostshared.json$results[[i]]$title
    url <- nyt.mostshared.json$results[[i]]$url
    author<- nyt.mostshared.json$results[[i]]$byline
    published_date <- nyt.mostshared.json$results[[i]]$published_date
    total_shares <- nyt.mostshared.json$results[[i]]$total_shares
    des_facet <- ''
    if(length(nyt.mostshared.json$results[[i]]$des_facet>0)){
        des_facet <- nyt.mostshared.json$results[[i]]$des_facet
        }
      nyt.mostshared.data.df <- rbind(nyt.mostshared.data.df,(cbind(id,section,title,url,author,published_date,total_shares,des_facet)))
}

nyt.mostshared.df <-nyt.mostshared.data.df %>%
  arrange((desc(total_shares)))

DT::datatable(nyt.mostshared.df, options = list(pageLength = 5))

EDA - NYTimes Most popular (Most Shared)

Now, let’s visualized article section’s description facets for Technology