DATA 607 - Homework Assignment # 6

Vladimir Nimchenko

INTRODUCTION:

I am utilizing the Movie Reviews API to read JSON data into a data frame and tidy/transform the data.

Loading the needed libraries

library(jsonlite)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(stringr)

Retrieving and reading in the data from the API to a data frame

#reading the JSON from the API
json <- fromJSON("https://api.nytimes.com/svc/topstories/v2/movies.json?api-key=DO7QwxDGP0DSIPYEKjgT3Ah0f24pvXHy")

#Adding the JSON file to a data frame
movie_reviews <- json$results

#Viewing the data in the "movie_reviews" data frame
View(movie_reviews)

Transforming and Tidying the data in the data frame

#Filter the movie_reviews data frame to include only movie review articles

movie_reviews<- movie_reviews %>%
  filter(section == "movies")

#Rename columns for clarity

#Extract the "title", "abstract", "URL", "byline", "per facet"
movie_reviews <-select(movie_reviews, c('title', 'abstract', 'url','byline','des_facet'))
#Change the "title" column to "review_title" for clarity
colnames(movie_reviews)[colnames(movie_reviews)== "title"] <-"review_title"
#Change the "abstract" column to "review_summary" for clarity
colnames(movie_reviews)[colnames(movie_reviews)== "abstract"] <-"review_summary"
#Change the "url" column to "review_url" for clarity
colnames(movie_reviews)[colnames(movie_reviews)== "url"] <-"review_url"
#Change the "byline" column to "reviewer(s)" for clarity
colnames(movie_reviews)[colnames(movie_reviews)== "byline"] <-"reviewer(s)"



#Filter the des_facet column in movie_reviews to only include movies(values which end with '(movie)')
movie_reviews<- movie_reviews %>%
  filter(!grepl('Documentary|Art|Actors and Actresses', des_facet))

# We will now extract only the movie titles from the "des_facet" column
   
 # Remove the "Movie" part from the column
 movie_reviews$des_facet<-gsub("Movies","",as.character(movie_reviews$des_facet))
 
# Remove the "(Movie)" part from the column
movie_reviews$des_facet<-gsub("(Movie)","",as.character(movie_reviews$des_facet))

# Remove special characters  from the column ( "()","[]")
movie_reviews$des_facet<-gsub('[()0-9]',"",as.character(movie_reviews$des_facet))

#Remove text preceding the first comma
movie_reviews$des_facet <- gsub("^[^,]*,", "", movie_reviews$des_facet)

#Remove all double quotes from the column
movie_reviews$des_facet <- gsub('"', "", movie_reviews$des_facet)


#Change the "des_facet" column to "movie_title(s)" for clarity
colnames(movie_reviews)[colnames(movie_reviews)== "des_facet"] <-"movie_title(s)"


View(movie_reviews)