Data 607 - Week 9

Load the Required Packages:

Below, we load the packages required for collecting, cleaning up, and displaying the data.

library(tidyverse)
library(httr)
library(jsonlite)
library(DT)

Research Question:

Looking at the list of National Book Award winners for both Fiction and Nonfiction from 1984 to present, how many of these books were reviewed in the New York Times (NYT) Book Review? Did the winning authors have other books reviewed by the NYT Book Review over the years?

Load the National Book Award Winners Data Frame:

Below, we load a data frame from a .csv file containing all the National Book Award winners for both Fiction and Nonfiction from 1984 to present. We rename some of the columns.

my_url <- "https://raw.githubusercontent.com/geedoubledee/data607_week9/main/national_book_award_winners_fic_nonfic_1984-pres.csv"
national_book_award_winners <- as.data.frame(read_csv(my_url))
cols <- c("award_year", "book_author", "book_title", "category")
colnames(national_book_award_winners) <- cols

Set Up the NYT Book Review API Call:

Below, we set up a function to call the NYT Book Review API and retrieve reviews by title or author.

titles <- national_book_award_winners$book_title
titles <- str_replace_all(titles, " ", "+")
authors <- national_book_award_winners$book_author
authors <- str_replace_all(authors, " ", "+")
all_reviews <- as.data.frame(matrix(NA, nrow = 0, ncol = 9))
cols <- c("url", "publication_dt", "byline", "book_title", "book_author", "summary", "uuid", "uri", "isbn13")
colnames(all_reviews) <- cols
call_nyt_br_api <- function(v, by = "author"){
    new_reviews <- as.data.frame(matrix(NA, nrow = 0, ncol = 9))
    colnames(new_reviews) <- cols
    if (by == "author"){
        for (a in 1:length(v)){
            base <- "https://api.nytimes.com/svc/books/v3/reviews.json?"
            query <- paste0("author=", v[a], "&api-key=")
            end <- as.character(read.table("config.txt", header = FALSE))
            x_url <- paste0(base, query, end)
            response <- GET(x_url)
            json <- fromJSON(rawToChar(response$content))
            review <- as.data.frame(json$results)
            new_reviews <- rbind(new_reviews, review)
        }
    }else if (by == "title"){
        for (t in 1:length(v)){
            base <- "https://api.nytimes.com/svc/books/v3/reviews.json?"
            query <- paste0("title=", v[t], "&api-key=")
            end <- as.character(read.table("config.txt", header = FALSE))
            x_url <- paste0(base, query, end)
            response <- GET(x_url)
            json <- fromJSON(rawToChar(response$content))
            review <- as.data.frame(json$results)
            new_reviews <- rbind(new_reviews, review)
        }
    }
    new_reviews
}

First, we look up reviews for the National Book Award winners by title.

new_reviews1 <- call_nyt_br_api(titles, by = "title")

Then, we look up any other reviews these National Book Award winning authors might have received from the NYT Book Review.

new_reviews2 <- call_nyt_br_api(authors, by = "author")

We combine all the reviews we retrieved and remove some unnecessary columns.

all_reviews <- rbind(all_reviews, new_reviews1, new_reviews2)
reviews_final <- subset(all_reviews, select = -c(byline, summary, uuid,
                                                 uri, isbn13))

We join the National Book Award winners data frame to the reviews data frame. Matches are done on book title, and where there are multiple reviews for a book, we keep all of them in the resulting data frame. So each review for a book gets its own row.

national_book_award_winners_final <- national_book_award_winners %>%
    left_join(reviews_final, by = "book_title", multiple = "all")

We now add all the NYT Book Review reviews for books that did not win the National Book Award, but which were written by the National Book Award winning authors in our list.

non_winner_reviews <- reviews_final %>%
    filter(!reviews_final$book_title %in%
               national_book_award_winners_final$book_title) %>%
    mutate(award_year = NA, category = NA, book_author.y = NA)
cols <- c("url", "publication_dt", "book_title", "book_author.x", "award_year",
          "category", "book_author.y")
colnames(non_winner_reviews) <- cols
reorder <- c("award_year", "book_author.x", "book_title", "category", "url",
             "publication_dt", "book_author.y")
non_winner_reviews <- non_winner_reviews[reorder]
national_book_award_winners_final <- rbind(national_book_award_winners_final,
                                           non_winner_reviews)

We remove some bad matches, which resulted from looking up books by title. We only keep reviews where the title and the author match, not just the title.

for (i in 1:nrow(national_book_award_winners_final)){
    x <- national_book_award_winners_final$book_author.x
    y <- national_book_award_winners_final$book_author.y
    if (!is.na(y[i]) & y[i] != x[i]){
        national_book_award_winners_final[i, 5:7] <- NA
    }
}

national_book_award_winners_final <- subset(national_book_award_winners_final,
                                            select = -book_author.y)

We can now view all the reviews written in the NYT Book Review that were either for National Book Award winning books from 1984 to present or other books written by those National Book Award winning authors.

cols <- c("award_year", "book_author", "book_title", "category",
          "url", "publication_dt")
colnames(national_book_award_winners_final) <- cols
national_book_award_winners_final %<>%
    mutate(award_winner = ifelse(is.na(award_year), "no", "yes")) %>%
    arrange(book_author)

display <- national_book_award_winners_final %>%
    filter(!is.na(url))

datatable(display[, -6],
          options = list(pageLength = 25))

Data 607 - Week 9

Glen Dale Davis

2023-03-24

Load the Required Packages:

Research Question:

Load the National Book Award Winners Data Frame:

Set Up the NYT Book Review API Call: