library(httr)
library(jsonlite)
library(dplyr)
library(tibble)
From the NYT API page on Books and Book Reviews, it seems I can access the Best Seller List after specifying a genre:
# creating my API request url
url1 <- paste0("https://api.nytimes.com/svc/books/v3/lists/current/hardcover-fiction.json?api-key=", api_key)
# making the API request
response1 <- GET(url1)
# Checking request was successful while also capturing and transforming the data info a df
if(status_code(response1) == 200) {
data1 <- fromJSON(rawToChar(response1$content), flatten = TRUE)
books_data1 <- data1$results$books
books_df1 <- as_tibble(books_data1, .name_repair = "universal") %>%
select(book_title = title, book_author = author, url = amazon_product_url, isbn13 = primary_isbn13)
print(head(books_df1))
} else {
print(paste("Failed to fetch data: Error code", status_code(response1)))
}
## # A tibble: 6 × 4
## book_title book_author url isbn13
## <chr> <chr> <chr> <chr>
## 1 THE WOMEN Kristin Hannah https://www.amazon… 97812…
## 2 FOURTH WING Rebecca Yarros https://www.amazon… 97816…
## 3 IRON FLAME Rebecca Yarros https://www.amazon… 97816…
## 4 EMPIRE OF THE DAMNED Jay Kristoff https://www.amazon… 97812…
## 5 A FATE INKED IN BLOOD Danielle L. Jensen https://www.amazon… 97805…
## 6 THE HEAVEN & EARTH GROCERY STORE James McBride https://www.amazon… 97805…
names(books_data1)
## [1] "rank" "rank_last_week" "weeks_on_list"
## [4] "asterisk" "dagger" "primary_isbn10"
## [7] "primary_isbn13" "publisher" "description"
## [10] "price" "title" "author"
## [13] "contributor" "contributor_note" "book_image"
## [16] "book_image_width" "book_image_height" "amazon_product_url"
## [19] "age_group" "book_review_link" "first_chapter_link"
## [22] "sunday_review_link" "article_chapter_link" "isbns"
## [25] "buy_links" "book_uri"
It looks like there are 15 top sellers in the hard cover fiction category.
==========================================
But I wanted to get more than just these 15 titles. So I tried to explore the NYT Book lists.
base_url <- "https://api.nytimes.com/svc/books/v3/lists/names.json"
response2 <- GET(url = base_url, query = list('api-key' = api_key))
content2 <- content(response2, "text")
parsed_content2 <- fromJSON(content2)
list_names2 <- parsed_content2$results
head(list_names2)
## list_name display_name
## 1 Combined Print and E-Book Fiction Combined Print & E-Book Fiction
## 2 Combined Print and E-Book Nonfiction Combined Print & E-Book Nonfiction
## 3 Hardcover Fiction Hardcover Fiction
## 4 Hardcover Nonfiction Hardcover Nonfiction
## 5 Trade Fiction Paperback Paperback Trade Fiction
## 6 Mass Market Paperback Paperback Mass-Market Fiction
## list_name_encoded oldest_published_date
## 1 combined-print-and-e-book-fiction 2011-02-13
## 2 combined-print-and-e-book-nonfiction 2011-02-13
## 3 hardcover-fiction 2008-06-08
## 4 hardcover-nonfiction 2008-06-08
## 5 trade-fiction-paperback 2008-06-08
## 6 mass-market-paperback 2008-06-08
## newest_published_date updated
## 1 2024-03-31 WEEKLY
## 2 2024-03-31 WEEKLY
## 3 2024-03-31 WEEKLY
## 4 2024-03-31 WEEKLY
## 5 2024-03-31 WEEKLY
## 6 2017-01-29 WEEKLY
I found that they have 59 book categories, including 29 ones that are weekly updated, while the rest are monthly updated. They include (first 8 ones) the following possible book lists:
So:
book_data_frames <- list()
# fetch and return book list details
get_list_details <- function(list_name_encoded) {
list_url <- paste0(base_url, "/", list_name_encoded, ".json")
list_response <- GET(url = list_url, query = list(`api-key` = api_key))
list_content <- content(list_response, "text", encoding = "UTF-8")
list_data <- fromJSON(list_content)
return(list_data$results$books)
}
for (i in 1:length(list_names2)) {
list_name_encoded <- list_names2$list_name_encoded[i]
list_display_name <- list_names2$display_name[i]
# print out the list name being fetched
print(paste("Fetching data for:", list_display_name))
books_data2 <- tryCatch({
get_list_details(list_name_encoded)
}, error = function(e) {
message("Error fetching data for ", list_display_name, ": ", e$message)
NULL # NULL if there's an error
})
# if books_data is empty, continue to the next iteration of the loop
if (is.null(books_data2)) {
message("Received NULL data for ", list_display_name, ". Continuing to the next list.")
next
}
# convert to data frame and store in the list with the display name as the key
book_data_frames[[list_display_name]] <- as.data.frame(books_data2)
print(paste("Successfully fetched data for:", list_display_name))
}
## [1] "Fetching data for: Combined Print & E-Book Fiction"
## [1] "Successfully fetched data for: Combined Print & E-Book Fiction"
## [1] "Fetching data for: Combined Print & E-Book Nonfiction"
## [1] "Successfully fetched data for: Combined Print & E-Book Nonfiction"
## [1] "Fetching data for: Hardcover Fiction"
## [1] "Successfully fetched data for: Hardcover Fiction"
## [1] "Fetching data for: Hardcover Nonfiction"
## Received NULL data for Hardcover Nonfiction. Continuing to the next list.
## [1] "Fetching data for: Paperback Trade Fiction"
## Received NULL data for Paperback Trade Fiction. Continuing to the next list.
## [1] "Fetching data for: Paperback Mass-Market Fiction"
## Received NULL data for Paperback Mass-Market Fiction. Continuing to the next list.
#------------------------
# combining the data frames into one overarching data frame
combined_book_data <- data.frame()
for (category_name in names(book_data_frames)) {
category_data <- book_data_frames[[category_name]]
if (nrow(category_data) > 0) {
# adding a new column with the category name
category_data$category <- category_name
combined_book_data <- rbind(combined_book_data, category_data)
}
}
names(combined_book_data)
## [1] "rank" "rank_last_week" "weeks_on_list"
## [4] "asterisk" "dagger" "primary_isbn10"
## [7] "primary_isbn13" "publisher" "description"
## [10] "price" "title" "author"
## [13] "contributor" "contributor_note" "book_image"
## [16] "book_image_width" "book_image_height" "amazon_product_url"
## [19] "age_group" "book_review_link" "first_chapter_link"
## [22] "sunday_review_link" "article_chapter_link" "isbns"
## [25] "buy_links" "book_uri" "category"
It did not continue through all of the 59 categories even though I tried to make it so by adding the “next” statement of the tryCatch function. However, from reading online, it seems this may also happen if there are rate limitations by NYT on those using these APIs.
In any case, now I have a single data frame that has 45 observations and 27 columns.