INTRODUCTION:
I am utilizing the Movie Reviews API to read JSON data into a data frame and tidy/transform the data.
Loading the needed libraries
library(jsonlite)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(stringr)
Retrieving and reading in the data from the API to a data frame
#reading the JSON from the API
json <- fromJSON("https://api.nytimes.com/svc/topstories/v2/movies.json?api-key=DO7QwxDGP0DSIPYEKjgT3Ah0f24pvXHy")
#Adding the JSON file to a data frame
movie_reviews <- json$results
#Viewing the data in the "movie_reviews" data frame
View(movie_reviews)
Transforming and Tidying the data in the data frame
#Filter the movie_reviews data frame to include only movie review articles
movie_reviews<- movie_reviews %>%
filter(section == "movies")
#Rename columns for clarity
#Extract the "title", "abstract", "URL", "byline", "per facet"
movie_reviews <-select(movie_reviews, c('title', 'abstract', 'url','byline','des_facet'))
#Change the "title" column to "review_title" for clarity
colnames(movie_reviews)[colnames(movie_reviews)== "title"] <-"review_title"
#Change the "abstract" column to "review_summary" for clarity
colnames(movie_reviews)[colnames(movie_reviews)== "abstract"] <-"review_summary"
#Change the "url" column to "review_url" for clarity
colnames(movie_reviews)[colnames(movie_reviews)== "url"] <-"review_url"
#Change the "byline" column to "reviewer(s)" for clarity
colnames(movie_reviews)[colnames(movie_reviews)== "byline"] <-"reviewer(s)"
#Filter the des_facet column in movie_reviews to only include movies(values which end with '(movie)')
movie_reviews<- movie_reviews %>%
filter(!grepl('Documentary|Art|Actors and Actresses', des_facet))
# We will now extract only the movie titles from the "des_facet" column
# Remove the "Movie" part from the column
movie_reviews$des_facet<-gsub("Movies","",as.character(movie_reviews$des_facet))
# Remove the "(Movie)" part from the column
movie_reviews$des_facet<-gsub("(Movie)","",as.character(movie_reviews$des_facet))
# Remove special characters from the column ( "()","[]")
movie_reviews$des_facet<-gsub('[()0-9]',"",as.character(movie_reviews$des_facet))
#Remove text preceding the first comma
movie_reviews$des_facet <- gsub("^[^,]*,", "", movie_reviews$des_facet)
#Remove all double quotes from the column
movie_reviews$des_facet <- gsub('"', "", movie_reviews$des_facet)
#Change the "des_facet" column to "movie_title(s)" for clarity
colnames(movie_reviews)[colnames(movie_reviews)== "des_facet"] <-"movie_title(s)"
View(movie_reviews)