library(devtools)
## Loading required package: usethis
library(RCurl)
library(plyr)
library(ggplot2)
library(httr)
library(jsonlite)
Preparing Data:
- Choose one of the New York Times APIs. I chose the Movie Reviews API
- Request API key.
- Construct an interface in R to read in the JSON data that will run 3 times so that I can get 60 entries for my data.frame. The max return from the API is 20.
- Transformed data to an R dataframe
Initialize data frames for use
temp_df <- data.frame(c())
movie_review_df <- data.frame(c())
movie_review_df_final <- data.frame(c())
Issue API Call
- Issue the NYT Movie Reviews API call 3 times so that I can get 60 entries for my data.frame. The max return from the API is 20.
- Check API response status
- Query the NYT Movie Reviews db to get the latest 60 movie reviews
x <- 1
loop_num = 4
while(x < loop_num)
{
if (x == 1) {
url <- "https://api.nytimes.com/svc/movies/v2/reviews/picks.json?opening-date=2020-02-12:2021-04-10&order=by-opening-date&api-key=33nx92050KEuFWADofpqmL4sZW0K4YEd"
movie_reviews_1 <- GET(url)
get_status <- movie_reviews_1$status_code
if (get_status != 200) {
stop_for_status(get_status)
}
mr_1_hdrs <- headers(movie_reviews_1)
movie_revs_parsed_1 <- content(movie_reviews_1,"parse")
results_list <- movie_revs_parsed_1[["results"]]
}
if (x == 2) {
url <- "https://api.nytimes.com/svc/movies/v2/reviews/picks.json?opening-date=2020-01-01:2021-02-11&order=by-opening-date&api-key=33nx92050KEuFWADofpqmL4sZW0K4YEd"
movie_reviews_2 <- GET(url)
get_status <- movie_reviews_2$status_code
if (get_status != 200) {
stop_for_status(get_status)
}
mr_2_hdrs <- headers(movie_reviews_2)
movie_revs_parsed_2 <- content(movie_reviews_2,"parse")
results_list <- movie_revs_parsed_2[["results"]]
}
if (x == 3) {
url <- "https://api.nytimes.com/svc/movies/v2/reviews/picks.json?opening-date=2020-10-01:2020-12-03&order=by-opening-date&api-key=33nx92050KEuFWADofpqmL4sZW0K4YEd"
movie_reviews_3 <- GET(url)
get_status <- movie_reviews_3$status_code
if (get_status != 200) {
stop_for_status(get_status)
}
mr_3_hdrs <- headers(movie_reviews_3)
movie_revs_parsed_3 <- content(movie_reviews_3,"parse")
results_list <- movie_revs_parsed_3[["results"]]
}
# Call function proc_results_2_df
(temp_data <- proc_results_2_df(results_list))
movie_review_df_final <- rbind(movie_review_df_final, temp_data)
x <- x + 1
}
movie_review_df_final
Add a unique ID column based on row name of each entry for future load to a db table.
(movie_review_df_final <- cbind( data.frame("movie_rev_id" = as.integer(rownames(movie_review_df_final))), movie_review_df_final))
Conclusion: I found using the NYT APIs easy to figure out. This is one one of easier assignments for me and I think that is because it is very ETL-like. I chose the Movie Reviews because one of my earlier assignments was building a database using MySql and I create the Movies DB and this code would fit nicely into that design.