Using an API key for The New York Times, I retrieved data of most viewed articles on NYTimes.com in the past 30 days:
library(tidyverse)
library(RCurl)
library(rjson)
library(DT)
library(data.table)
library(httr)
library(kableExtra)
nyt_json_url <- "https://api.nytimes.com/svc/mostpopular/v2/viewed/30.json?api-key=BG3ZhQtYKZ7YG8bmO5ei49K4qG1mHYQh"
json_df <- jsonlite::fromJSON(nyt_json_url)
nyt_json_df <- json_df$results
DT::datatable(nyt_json_df, filter="top")
class(nyt_json_df)
## [1] "data.frame"
colnames(nyt_json_df)
## [1] "uri" "url" "id" "asset_id"
## [5] "source" "published_date" "updated" "section"
## [9] "subsection" "nytdsection" "adx_keywords" "column"
## [13] "byline" "type" "title" "abstract"
## [17] "des_facet" "org_facet" "per_facet" "geo_facet"
## [21] "media" "eta_id"
new_nyt_json_df <- nyt_json_df %>%
select("published_date", "section", "subsection", "byline", "title", "abstract") %>%
rename("text" = "abstract") %>%
arrange(desc(published_date))
DT::datatable(new_nyt_json_df, filter="top")
new_nyt_json_df2 <- new_nyt_json_df %>%
group_by(section) %>%
summarise(count=n()) %>%
arrange(desc(count))
new_nyt_json_df2
## # A tibble: 11 × 2
## section count
## <chr> <int>
## 1 U.S. 6
## 2 Business 2
## 3 Movies 2
## 4 New York 2
## 5 World 2
## 6 Arts 1
## 7 Health 1
## 8 Opinion 1
## 9 Science 1
## 10 Sports 1
## 11 Well 1
ggplot(new_nyt_json_df2, aes(x=reorder(section, -count), count)) +
geom_bar(stat="identity", position="dodge")
Articles in the U.S. section were the most viewed on The New York Times website in the past 30 days.