For this assignment I decided to create a relational database and populate it on the fly. Movies were loaded using https://www.themoviedb.org API. I used the randomNames package to create reviewers. I then made the reviewers rate each movie with a random value from 1 to 5. The ratings were then loaded into R for basic analysis.
library("httr")
library("DBI")
library("RMySQL")
library("randomNames")
library("tidyverse")## -- Attaching packages --------------------------------------------------------------------------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 2.2.1 v purrr 0.2.4
## v tibble 1.4.2 v dplyr 0.7.4
## v tidyr 0.7.2 v stringr 1.2.0
## v readr 1.1.1 v forcats 0.2.0
## -- Conflicts ------------------------------------------------------------------------------------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
rmysql.settingsfile <- "config/mylogin.cnf"
rmysql.db <- "movies"
con <- dbConnect(MySQL(), default.file = rmysql.settingsfile, group = rmysql.db, user = NULL, password = NULL)
dbListTables(con)## [1] "movies" "reviewers" "reviews"
link <- "https://api.themoviedb.org/3/discover/movie?api_key=53b3abb279c64aa6b8bd31cedf177293&language=en-US&include_adult=false&primary_release_year=2017&sort_by=vote_average.desc"
num_movies <- 6
request_movies <- GET(link)
request_movies$status_code## [1] 200
my_movies <- content(request_movies, "parsed")
movies <- my_movies$results
if(length(my_movies$results) < num_movies){
message("Movies not available. I suggest using the movies database as is.")
} else {
#Clear the movies table in preparation to new inserts
oldMovies <- dbGetQuery(con, "SELECT group_concat(movies.movie_id SEPARATOR ',') AS listing,
Count(*) AS num_rows FROM movies")
if(oldMovies$num_rows[1] > 0){
ds <- dbSendQuery(con, sprintf("DELETE FROM movies WHERE movies.movie_id IN (%s)",oldMovies[1]$listing))
dbClearResult(ds)
}
a <- 1
while(a <= num_movies){
sql <- sprintf("INSERT INTO movies SET
movie_id=%d,
movie_title='%s',
movie_popularity='%s',
movie_release_date='%s'
ON DUPLICATE KEY UPDATE movie_id='%s'",
movies[[a]]$id, movies[[a]]$title, movies[[a]]$popularity, movies[[a]]$release_date, movies[[a]]$id)
rs <- dbSendQuery(con, sql)
dbClearResult(rs)
a <- a + 1
}
}num_friends = 10
new_friends <- randomNames(num_friends)
# Clear the friends table in preparation to new friends
oldFriends <- dbGetQuery(con, "SELECT group_concat(reviewers.reviewer_id SEPARATOR ',') AS listing,
Count(*) AS num_rows FROM reviewers")
if(oldFriends$num_rows[1] > 0){
ds <- dbSendQuery(con, sprintf("DELETE FROM reviewers WHERE reviewers.reviewer_id IN (%s)", oldFriends[1]$listing))
dbClearResult(ds)
}## [1] TRUE
a <- 1
while(a <= num_friends){
new_friend = strsplit(new_friends[[a]],",")
sql <- sprintf("INSERT INTO reviewers SET
reviewer_id=%d,
reviewer_first_name='%s',
reviewer_last_name='%s'
ON DUPLICATE KEY UPDATE reviewer_first_name='%s', reviewer_last_name='%s'",
a, new_friend[[1]][1], new_friend[[1]][2],new_friend[[1]][1], new_friend[[1]][2])
rs <- dbSendQuery(con, sql)
dbClearResult(rs)
a <- a + 1
} # Remove old reviews
oldReviews <- dbGetQuery(con, "SELECT group_concat(reviews.review_id SEPARATOR ',') AS listing,
Count(*) AS num_rows FROM reviews")
if(oldReviews$num_rows[1] > 0){
ds <- dbSendQuery(con, sprintf("DELETE FROM reviews WHERE reviews.review_id IN (%s)", oldReviews[1]$listing))
dbClearResult(ds)
}
theMovies <- dbGetQuery(con, sprintf("SELECT movie_id FROM movies LIMIT %d", num_movies))
theReviewers <- dbGetQuery(con, sprintf("SELECT reviewer_id FROM reviewers LIMIT %d", num_friends))
for(r in 1:nrow(theReviewers) ){
for(m in 1:nrow(theMovies)){
rating <- floor(runif(1,1,6))
sql <- sprintf("INSERT INTO reviews SET
movie_id=%d,
reviewer_id=%d,
rating=%d
ON DUPLICATE KEY UPDATE movie_id=%d, reviewer_id=%d, rating=%d",
theMovies$movie_id[m], theReviewers$reviewer_id[r], rating, theMovies$movie_id[m], theReviewers$reviewer_id[r], rating)
rs <- dbSendQuery(con, sql)
dbClearResult(rs)
}
}sql <- "SELECT movies.movie_title AS Movie,
reviews.rating AS Rating,
CONCAT(reviewers.reviewer_first_name,' ',reviewers.reviewer_last_name) AS Reviewer
FROM reviews
INNER JOIN movies ON (movies.movie_id = reviews.movie_id)
INNER JOIN reviewers ON (reviewers.reviewer_id = reviews.reviewer_id)"
theReviews <- suppressWarnings(dbGetQuery(con, sql))
print(theReviews)## Movie Rating Reviewer
## 1 Cómo Filmar Una XXX 1 Bailey Stalin
## 2 Animal Crackers 4 Bailey Stalin
## 3 Soldiers Of The Damned 4 Bailey Stalin
## 4 The Jetsons & WWE: Robo-WrestleMania! 4 Bailey Stalin
## 5 Peppa Pig: My First Cinema Experience 1 Bailey Stalin
## 6 Lahoriye 3 Bailey Stalin
## 7 Cómo Filmar Una XXX 1 Cole Fiben
## 8 Animal Crackers 5 Cole Fiben
## 9 Soldiers Of The Damned 4 Cole Fiben
## 10 The Jetsons & WWE: Robo-WrestleMania! 4 Cole Fiben
## 11 Peppa Pig: My First Cinema Experience 2 Cole Fiben
## 12 Lahoriye 5 Cole Fiben
## 13 Cómo Filmar Una XXX 4 Tagawa Tatiana
## 14 Animal Crackers 3 Tagawa Tatiana
## 15 Soldiers Of The Damned 3 Tagawa Tatiana
## 16 The Jetsons & WWE: Robo-WrestleMania! 2 Tagawa Tatiana
## 17 Peppa Pig: My First Cinema Experience 5 Tagawa Tatiana
## 18 Lahoriye 1 Tagawa Tatiana
## 19 Cómo Filmar Una XXX 4 Rodriguez Fatima
## 20 Animal Crackers 4 Rodriguez Fatima
## 21 Soldiers Of The Damned 5 Rodriguez Fatima
## 22 The Jetsons & WWE: Robo-WrestleMania! 1 Rodriguez Fatima
## 23 Peppa Pig: My First Cinema Experience 5 Rodriguez Fatima
## 24 Lahoriye 5 Rodriguez Fatima
## 25 Cómo Filmar Una XXX 3 Folarin Quincy
## 26 Animal Crackers 2 Folarin Quincy
## 27 Soldiers Of The Damned 4 Folarin Quincy
## 28 The Jetsons & WWE: Robo-WrestleMania! 4 Folarin Quincy
## 29 Peppa Pig: My First Cinema Experience 1 Folarin Quincy
## 30 Lahoriye 1 Folarin Quincy
## 31 Cómo Filmar Una XXX 4 Ramirez-Jimenez Mona
## 32 Animal Crackers 5 Ramirez-Jimenez Mona
## 33 Soldiers Of The Damned 1 Ramirez-Jimenez Mona
## 34 The Jetsons & WWE: Robo-WrestleMania! 5 Ramirez-Jimenez Mona
## 35 Peppa Pig: My First Cinema Experience 1 Ramirez-Jimenez Mona
## 36 Lahoriye 5 Ramirez-Jimenez Mona
## 37 Cómo Filmar Una XXX 4 Sysavat Warren
## 38 Animal Crackers 2 Sysavat Warren
## 39 Soldiers Of The Damned 4 Sysavat Warren
## 40 The Jetsons & WWE: Robo-WrestleMania! 1 Sysavat Warren
## 41 Peppa Pig: My First Cinema Experience 4 Sysavat Warren
## 42 Lahoriye 2 Sysavat Warren
## 43 Cómo Filmar Una XXX 3 al-Taheri Zainuddeen
## 44 Animal Crackers 3 al-Taheri Zainuddeen
## 45 Soldiers Of The Damned 1 al-Taheri Zainuddeen
## 46 The Jetsons & WWE: Robo-WrestleMania! 2 al-Taheri Zainuddeen
## 47 Peppa Pig: My First Cinema Experience 4 al-Taheri Zainuddeen
## 48 Lahoriye 3 al-Taheri Zainuddeen
## 49 Cómo Filmar Una XXX 3 Pena Alejandro
## 50 Animal Crackers 5 Pena Alejandro
## 51 Soldiers Of The Damned 1 Pena Alejandro
## 52 The Jetsons & WWE: Robo-WrestleMania! 1 Pena Alejandro
## 53 Peppa Pig: My First Cinema Experience 5 Pena Alejandro
## 54 Lahoriye 3 Pena Alejandro
## 55 Cómo Filmar Una XXX 4 Gayman Amanda
## 56 Animal Crackers 4 Gayman Amanda
## 57 Soldiers Of The Damned 1 Gayman Amanda
## 58 The Jetsons & WWE: Robo-WrestleMania! 5 Gayman Amanda
## 59 Peppa Pig: My First Cinema Experience 1 Gayman Amanda
## 60 Lahoriye 4 Gayman Amanda
sql <- "SELECT movies.movie_title AS Movie,
movies.movie_release_date AS 'Release Date',
AVG(reviews.rating) As Average_Rating
FROM reviews
INNER JOIN movies ON (movies.movie_id = reviews.movie_id)
GROUP BY reviews.movie_id"
theReviews <- suppressWarnings(dbGetQuery(con, sql))
print(theReviews)## Movie Release Date Average_Rating
## 1 Cómo Filmar Una XXX 2017-11-29 3.1
## 2 Animal Crackers 2017-09-01 3.7
## 3 Soldiers Of The Damned 2017-12-07 2.8
## 4 The Jetsons & WWE: Robo-WrestleMania! 2017-02-28 2.9
## 5 Peppa Pig: My First Cinema Experience 2017-04-07 2.9
## 6 Lahoriye 2017-05-12 3.2
highestRated <- theReviews[which.max(theReviews$Average_Rating),]
print(highestRated)## Movie Release Date Average_Rating
## 2 Animal Crackers 2017-09-01 3.7
sql <- "SELECT movies.movie_title AS Movie,
movies.movie_release_date AS 'Release Date',
AVG(reviews.rating) As Average_Rating
FROM reviews
INNER JOIN movies ON (movies.movie_id = reviews.movie_id)
GROUP BY reviews.movie_id
ORDER BY Average_Rating ASC LIMIT 1"
leastLove <- suppressWarnings(dbGetQuery(con, sql))
print(leastLove)## Movie Release Date Average_Rating
## 1 Soldiers Of The Damned 2017-12-07 2.8
sql <- "SELECT CONCAT(reviewers.reviewer_first_name,' ', reviewers.reviewer_last_name) AS Reviewer,
AVG(reviews.rating) AS Average_Rating
FROM reviews
INNER JOIN reviewers ON (reviewers.reviewer_id = reviews.reviewer_id)
GROUP BY reviews.reviewer_id"
theReviewers <- suppressWarnings(dbGetQuery(con, sql))
print(theReviewers)## Reviewer Average_Rating
## 1 Bailey Stalin 2.8333
## 2 Cole Fiben 3.5000
## 3 Tagawa Tatiana 3.0000
## 4 Rodriguez Fatima 4.0000
## 5 Folarin Quincy 2.5000
## 6 Ramirez-Jimenez Mona 3.5000
## 7 Sysavat Warren 2.8333
## 8 al-Taheri Zainuddeen 2.6667
## 9 Pena Alejandro 3.0000
## 10 Gayman Amanda 3.1667
highestRater <- theReviewers[which.max(theReviewers$Average_Rating),]
print(highestRater)## Reviewer Average_Rating
## 4 Rodriguez Fatima 4
lowestRater <- theReviewers[which.min(theReviewers$Average_Rating),]
print(lowestRater)## Reviewer Average_Rating
## 5 Folarin Quincy 2.5
sql <- "SELECT movies.movie_title AS Movie,
reviews.rating AS Rating,
count(rating) as Counts from reviews
INNER JOIN movies ON (movies.movie_id = reviews.movie_id)
GROUP BY reviews.movie_id,reviews.rating"
theRatings <- suppressWarnings(dbGetQuery(con, sql))
theRatings$Rating <- factor(theRatings$Rating, levels = c("One"<-1, "Two"<-2, "Three"<-3, "Four"<-4,"Five"<-5))
ggplot(data = theRatings, aes(x = Movie, y = Counts, fill = Rating)) + geom_bar(stat = "identity") + coord_flip() + scale_fill_brewer(palette = 12) + labs(title = "Distribution of Ratings Per Movie")