607_Week9_assignment

library(httr)
library(jsonlite)
library(dplyr)
library(tibble)

From the NYT API page on Books and Book Reviews, it seems I can access the Best Seller List after specifying a genre:

# creating my API request url
url1 <- paste0("https://api.nytimes.com/svc/books/v3/lists/current/hardcover-fiction.json?api-key=", api_key)

# making the API request
response1 <- GET(url1)

# Checking request was successful while also capturing and transforming the data info a df
if(status_code(response1) == 200) {
  data1 <- fromJSON(rawToChar(response1$content), flatten = TRUE)
  books_data1 <- data1$results$books
  
  books_df1 <- as_tibble(books_data1, .name_repair = "universal") %>%
    select(book_title = title, book_author = author, url = amazon_product_url, isbn13 = primary_isbn13)
  
  print(head(books_df1))
} else {
  print(paste("Failed to fetch data: Error code", status_code(response1)))
}

## # A tibble: 6 × 4
##   book_title                       book_author        url                 isbn13
##   <chr>                            <chr>              <chr>               <chr> 
## 1 THE WOMEN                        Kristin Hannah     https://www.amazon… 97812…
## 2 FOURTH WING                      Rebecca Yarros     https://www.amazon… 97816…
## 3 IRON FLAME                       Rebecca Yarros     https://www.amazon… 97816…
## 4 EMPIRE OF THE DAMNED             Jay Kristoff       https://www.amazon… 97812…
## 5 A FATE INKED IN BLOOD            Danielle L. Jensen https://www.amazon… 97805…
## 6 THE HEAVEN & EARTH GROCERY STORE James McBride      https://www.amazon… 97805…

names(books_data1)

##  [1] "rank"                 "rank_last_week"       "weeks_on_list"       
##  [4] "asterisk"             "dagger"               "primary_isbn10"      
##  [7] "primary_isbn13"       "publisher"            "description"         
## [10] "price"                "title"                "author"              
## [13] "contributor"          "contributor_note"     "book_image"          
## [16] "book_image_width"     "book_image_height"    "amazon_product_url"  
## [19] "age_group"            "book_review_link"     "first_chapter_link"  
## [22] "sunday_review_link"   "article_chapter_link" "isbns"               
## [25] "buy_links"            "book_uri"

It looks like there are 15 top sellers in the hard cover fiction category.

==========================================

But I wanted to get more than just these 15 titles. So I tried to explore the NYT Book lists.

base_url <- "https://api.nytimes.com/svc/books/v3/lists/names.json"

response2 <- GET(url = base_url, query = list('api-key' = api_key))
content2 <- content(response2, "text")
parsed_content2 <- fromJSON(content2)
list_names2 <- parsed_content2$results

head(list_names2)

##                              list_name                       display_name
## 1    Combined Print and E-Book Fiction    Combined Print & E-Book Fiction
## 2 Combined Print and E-Book Nonfiction Combined Print & E-Book Nonfiction
## 3                    Hardcover Fiction                  Hardcover Fiction
## 4                 Hardcover Nonfiction               Hardcover Nonfiction
## 5              Trade Fiction Paperback            Paperback Trade Fiction
## 6                Mass Market Paperback      Paperback Mass-Market Fiction
##                      list_name_encoded oldest_published_date
## 1    combined-print-and-e-book-fiction            2011-02-13
## 2 combined-print-and-e-book-nonfiction            2011-02-13
## 3                    hardcover-fiction            2008-06-08
## 4                 hardcover-nonfiction            2008-06-08
## 5              trade-fiction-paperback            2008-06-08
## 6                mass-market-paperback            2008-06-08
##   newest_published_date updated
## 1            2024-03-31  WEEKLY
## 2            2024-03-31  WEEKLY
## 3            2024-03-31  WEEKLY
## 4            2024-03-31  WEEKLY
## 5            2024-03-31  WEEKLY
## 6            2017-01-29  WEEKLY

I found that they have 59 book categories, including 29 ones that are weekly updated, while the rest are monthly updated. They include (first 8 ones) the following possible book lists:

Combined Print and E-Book Fiction
Combined Print and E-Book Nonfiction
Hardcover Fiction
Hardcover Nonfiction
Trade Fiction Paperback
Mass Market Paperback
Paperback Nonfiction
E-Book Fiction

So:

book_data_frames <- list()

# fetch and return book list details
get_list_details <- function(list_name_encoded) {
  list_url <- paste0(base_url, "/", list_name_encoded, ".json")
  list_response <- GET(url = list_url, query = list(`api-key` = api_key))
  list_content <- content(list_response, "text", encoding = "UTF-8")
  list_data <- fromJSON(list_content)
  return(list_data$results$books)
}

for (i in 1:length(list_names2)) {
  list_name_encoded <- list_names2$list_name_encoded[i]
  list_display_name <- list_names2$display_name[i]
  
  # print out the list name being fetched
  print(paste("Fetching data for:", list_display_name))
  
  books_data2 <- tryCatch({
    get_list_details(list_name_encoded)
  }, error = function(e) {
    message("Error fetching data for ", list_display_name, ": ", e$message)
    NULL # NULL if there's an error
  })
  
  # if books_data is empty, continue to the next iteration of the loop
  if (is.null(books_data2)) {
    message("Received NULL data for ", list_display_name, ". Continuing to the next list.")
    next
  }
  
  # convert to data frame and store in the list with the display name as the key
  book_data_frames[[list_display_name]] <- as.data.frame(books_data2)
  print(paste("Successfully fetched data for:", list_display_name))
}

## [1] "Fetching data for: Combined Print & E-Book Fiction"
## [1] "Successfully fetched data for: Combined Print & E-Book Fiction"
## [1] "Fetching data for: Combined Print & E-Book Nonfiction"
## [1] "Successfully fetched data for: Combined Print & E-Book Nonfiction"
## [1] "Fetching data for: Hardcover Fiction"
## [1] "Successfully fetched data for: Hardcover Fiction"
## [1] "Fetching data for: Hardcover Nonfiction"

## Received NULL data for Hardcover Nonfiction. Continuing to the next list.

## [1] "Fetching data for: Paperback Trade Fiction"

## Received NULL data for Paperback Trade Fiction. Continuing to the next list.

## [1] "Fetching data for: Paperback Mass-Market Fiction"

## Received NULL data for Paperback Mass-Market Fiction. Continuing to the next list.

#------------------------
# combining the data frames into one overarching data frame
combined_book_data <- data.frame()

for (category_name in names(book_data_frames)) {
  category_data <- book_data_frames[[category_name]]
  if (nrow(category_data) > 0) {
    # adding a new column with the category name
    category_data$category <- category_name
    combined_book_data <- rbind(combined_book_data, category_data)
  }
}

names(combined_book_data)

##  [1] "rank"                 "rank_last_week"       "weeks_on_list"       
##  [4] "asterisk"             "dagger"               "primary_isbn10"      
##  [7] "primary_isbn13"       "publisher"            "description"         
## [10] "price"                "title"                "author"              
## [13] "contributor"          "contributor_note"     "book_image"          
## [16] "book_image_width"     "book_image_height"    "amazon_product_url"  
## [19] "age_group"            "book_review_link"     "first_chapter_link"  
## [22] "sunday_review_link"   "article_chapter_link" "isbns"               
## [25] "buy_links"            "book_uri"             "category"

It did not continue through all of the 59 categories even though I tried to make it so by adding the “next” statement of the tryCatch function. However, from reading online, it seems this may also happen if there are rate limitations by NYT on those using these APIs.

In any case, now I have a single data frame that has 45 observations and 27 columns.

607_Week9_assignment

Fares A

2024-03-24