Intro

For this assignment I decided to create a relational database and populate it on the fly. Movies were loaded using https://www.themoviedb.org API. I used the randomNames package to create reviewers. I then made the reviewers rate each movie with a random value from 1 to 5. The ratings were then loaded into R for basic analysis.

Load Packages

library("httr")
library("DBI")
library("RMySQL")
library("randomNames")
library("tidyverse")
## -- Attaching packages --------------------------------------------------------------------------------------------------------------------- tidyverse 1.2.1 --
## v ggplot2 2.2.1     v purrr   0.2.4
## v tibble  1.4.2     v dplyr   0.7.4
## v tidyr   0.7.2     v stringr 1.2.0
## v readr   1.1.1     v forcats 0.2.0
## -- Conflicts ------------------------------------------------------------------------------------------------------------------------ tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()


My Schema

Connect to Database

rmysql.settingsfile <- "config/mylogin.cnf"
rmysql.db <- "movies"
con <- dbConnect(MySQL(), default.file = rmysql.settingsfile, group = rmysql.db, user = NULL, password = NULL)
dbListTables(con)
## [1] "movies"    "reviewers" "reviews"


Get Movies

  link <- "https://api.themoviedb.org/3/discover/movie?api_key=53b3abb279c64aa6b8bd31cedf177293&language=en-US&include_adult=false&primary_release_year=2017&sort_by=vote_average.desc"
  num_movies <- 6
  request_movies <- GET(link)
  request_movies$status_code
## [1] 200
  my_movies <- content(request_movies, "parsed")
  movies <- my_movies$results
 
  if(length(my_movies$results) < num_movies){
    message("Movies not available. I suggest using the movies database as is.")
  } else {
    #Clear the movies table in preparation to new inserts
    oldMovies <- dbGetQuery(con, "SELECT group_concat(movies.movie_id SEPARATOR ',') AS listing, 
                                  Count(*) AS num_rows FROM movies")
  
  if(oldMovies$num_rows[1] > 0){
    ds <- dbSendQuery(con, sprintf("DELETE FROM movies WHERE movies.movie_id IN (%s)",oldMovies[1]$listing))
    dbClearResult(ds)
  }
    a <- 1
    while(a <= num_movies){
      sql <- sprintf("INSERT INTO movies SET 
                      movie_id=%d, 
                      movie_title='%s', 
                      movie_popularity='%s', 
                      movie_release_date='%s' 
                      ON DUPLICATE KEY UPDATE movie_id='%s'", 
                      movies[[a]]$id, movies[[a]]$title, movies[[a]]$popularity, movies[[a]]$release_date, movies[[a]]$id)
      
      rs <- dbSendQuery(con, sql)
      dbClearResult(rs)
      a <- a + 1
    }
  }


Generate Some Imaginary Friends

num_friends = 10
new_friends <- randomNames(num_friends)
  # Clear the friends table in preparation to new friends
  oldFriends <- dbGetQuery(con, "SELECT group_concat(reviewers.reviewer_id SEPARATOR ',') AS listing, 
                                Count(*) AS num_rows FROM reviewers")
  
  if(oldFriends$num_rows[1] > 0){
    ds <- dbSendQuery(con, sprintf("DELETE FROM reviewers WHERE reviewers.reviewer_id IN (%s)", oldFriends[1]$listing))
    dbClearResult(ds)
  }
## [1] TRUE
  a <- 1
  while(a <= num_friends){
    new_friend = strsplit(new_friends[[a]],",")
    sql <- sprintf("INSERT INTO reviewers SET 
                    reviewer_id=%d,
                    reviewer_first_name='%s', 
                    reviewer_last_name='%s'  
                    ON DUPLICATE KEY UPDATE   reviewer_first_name='%s', reviewer_last_name='%s'", 
                    a, new_friend[[1]][1], new_friend[[1]][2],new_friend[[1]][1], new_friend[[1]][2])
    
    rs <- dbSendQuery(con, sql)
    dbClearResult(rs)
    a <- a + 1
  }


Let Them Vote

 # Remove old reviews
 oldReviews <- dbGetQuery(con, "SELECT group_concat(reviews.review_id SEPARATOR ',') AS listing, 
                                Count(*) AS num_rows FROM reviews")
  if(oldReviews$num_rows[1] > 0){
    ds <- dbSendQuery(con, sprintf("DELETE FROM reviews WHERE reviews.review_id IN (%s)", oldReviews[1]$listing))
    dbClearResult(ds)
  }

theMovies <- dbGetQuery(con, sprintf("SELECT movie_id FROM movies LIMIT %d", num_movies))
theReviewers <- dbGetQuery(con, sprintf("SELECT reviewer_id FROM reviewers LIMIT %d", num_friends))

for(r in 1:nrow(theReviewers) ){
  for(m in 1:nrow(theMovies)){
    rating <- floor(runif(1,1,6))
    sql <- sprintf("INSERT INTO reviews SET 
                    movie_id=%d, 
                    reviewer_id=%d, 
                    rating=%d  
                    ON DUPLICATE KEY UPDATE movie_id=%d, reviewer_id=%d, rating=%d", 
                    theMovies$movie_id[m], theReviewers$reviewer_id[r], rating, theMovies$movie_id[m], theReviewers$reviewer_id[r], rating)
    rs <- dbSendQuery(con, sql)
    dbClearResult(rs)
  }
}


Load All Reviews

sql <- "SELECT movies.movie_title AS Movie,
        reviews.rating AS Rating, 
        CONCAT(reviewers.reviewer_first_name,' ',reviewers.reviewer_last_name) AS Reviewer
        FROM reviews 
        INNER JOIN movies ON (movies.movie_id = reviews.movie_id)
        INNER JOIN  reviewers ON (reviewers.reviewer_id = reviews.reviewer_id)"

theReviews <- suppressWarnings(dbGetQuery(con, sql))

print(theReviews)
##                                    Movie Rating              Reviewer
## 1                   Cómo Filmar Una XXX      1        Bailey  Stalin
## 2                        Animal Crackers      4        Bailey  Stalin
## 3                 Soldiers Of The Damned      4        Bailey  Stalin
## 4  The Jetsons & WWE: Robo-WrestleMania!      4        Bailey  Stalin
## 5  Peppa Pig: My First Cinema Experience      1        Bailey  Stalin
## 6                               Lahoriye      3        Bailey  Stalin
## 7                   Cómo Filmar Una XXX      1           Cole  Fiben
## 8                        Animal Crackers      5           Cole  Fiben
## 9                 Soldiers Of The Damned      4           Cole  Fiben
## 10 The Jetsons & WWE: Robo-WrestleMania!      4           Cole  Fiben
## 11 Peppa Pig: My First Cinema Experience      2           Cole  Fiben
## 12                              Lahoriye      5           Cole  Fiben
## 13                  Cómo Filmar Una XXX      4       Tagawa  Tatiana
## 14                       Animal Crackers      3       Tagawa  Tatiana
## 15                Soldiers Of The Damned      3       Tagawa  Tatiana
## 16 The Jetsons & WWE: Robo-WrestleMania!      2       Tagawa  Tatiana
## 17 Peppa Pig: My First Cinema Experience      5       Tagawa  Tatiana
## 18                              Lahoriye      1       Tagawa  Tatiana
## 19                  Cómo Filmar Una XXX      4     Rodriguez  Fatima
## 20                       Animal Crackers      4     Rodriguez  Fatima
## 21                Soldiers Of The Damned      5     Rodriguez  Fatima
## 22 The Jetsons & WWE: Robo-WrestleMania!      1     Rodriguez  Fatima
## 23 Peppa Pig: My First Cinema Experience      5     Rodriguez  Fatima
## 24                              Lahoriye      5     Rodriguez  Fatima
## 25                  Cómo Filmar Una XXX      3       Folarin  Quincy
## 26                       Animal Crackers      2       Folarin  Quincy
## 27                Soldiers Of The Damned      4       Folarin  Quincy
## 28 The Jetsons & WWE: Robo-WrestleMania!      4       Folarin  Quincy
## 29 Peppa Pig: My First Cinema Experience      1       Folarin  Quincy
## 30                              Lahoriye      1       Folarin  Quincy
## 31                  Cómo Filmar Una XXX      4 Ramirez-Jimenez  Mona
## 32                       Animal Crackers      5 Ramirez-Jimenez  Mona
## 33                Soldiers Of The Damned      1 Ramirez-Jimenez  Mona
## 34 The Jetsons & WWE: Robo-WrestleMania!      5 Ramirez-Jimenez  Mona
## 35 Peppa Pig: My First Cinema Experience      1 Ramirez-Jimenez  Mona
## 36                              Lahoriye      5 Ramirez-Jimenez  Mona
## 37                  Cómo Filmar Una XXX      4       Sysavat  Warren
## 38                       Animal Crackers      2       Sysavat  Warren
## 39                Soldiers Of The Damned      4       Sysavat  Warren
## 40 The Jetsons & WWE: Robo-WrestleMania!      1       Sysavat  Warren
## 41 Peppa Pig: My First Cinema Experience      4       Sysavat  Warren
## 42                              Lahoriye      2       Sysavat  Warren
## 43                  Cómo Filmar Una XXX      3 al-Taheri  Zainuddeen
## 44                       Animal Crackers      3 al-Taheri  Zainuddeen
## 45                Soldiers Of The Damned      1 al-Taheri  Zainuddeen
## 46 The Jetsons & WWE: Robo-WrestleMania!      2 al-Taheri  Zainuddeen
## 47 Peppa Pig: My First Cinema Experience      4 al-Taheri  Zainuddeen
## 48                              Lahoriye      3 al-Taheri  Zainuddeen
## 49                  Cómo Filmar Una XXX      3       Pena  Alejandro
## 50                       Animal Crackers      5       Pena  Alejandro
## 51                Soldiers Of The Damned      1       Pena  Alejandro
## 52 The Jetsons & WWE: Robo-WrestleMania!      1       Pena  Alejandro
## 53 Peppa Pig: My First Cinema Experience      5       Pena  Alejandro
## 54                              Lahoriye      3       Pena  Alejandro
## 55                  Cómo Filmar Una XXX      4        Gayman  Amanda
## 56                       Animal Crackers      4        Gayman  Amanda
## 57                Soldiers Of The Damned      1        Gayman  Amanda
## 58 The Jetsons & WWE: Robo-WrestleMania!      5        Gayman  Amanda
## 59 Peppa Pig: My First Cinema Experience      1        Gayman  Amanda
## 60                              Lahoriye      4        Gayman  Amanda


Reviews by Movie

sql <- "SELECT movies.movie_title AS Movie,
        movies.movie_release_date AS 'Release Date',
        AVG(reviews.rating) As Average_Rating
        FROM reviews 
        INNER JOIN movies ON (movies.movie_id = reviews.movie_id)
        GROUP BY reviews.movie_id"

theReviews <- suppressWarnings(dbGetQuery(con, sql))

print(theReviews)
##                                   Movie Release Date Average_Rating
## 1                  Cómo Filmar Una XXX   2017-11-29            3.1
## 2                       Animal Crackers   2017-09-01            3.7
## 3                Soldiers Of The Damned   2017-12-07            2.8
## 4 The Jetsons & WWE: Robo-WrestleMania!   2017-02-28            2.9
## 5 Peppa Pig: My First Cinema Experience   2017-04-07            2.9
## 6                              Lahoriye   2017-05-12            3.2


  1. Which movie got the most love?
highestRated <- theReviews[which.max(theReviews$Average_Rating),]
print(highestRated)
##             Movie Release Date Average_Rating
## 2 Animal Crackers   2017-09-01            3.7


  1. Which movie got the least love?
sql <- "SELECT movies.movie_title AS Movie,
        movies.movie_release_date AS 'Release Date',
        AVG(reviews.rating) As Average_Rating
        FROM reviews 
        INNER JOIN movies ON (movies.movie_id = reviews.movie_id)
        GROUP BY reviews.movie_id
        ORDER BY Average_Rating ASC LIMIT 1"

leastLove <- suppressWarnings(dbGetQuery(con, sql))

print(leastLove)
##                    Movie Release Date Average_Rating
## 1 Soldiers Of The Damned   2017-12-07            2.8


Reviews by Reviewer

sql <- "SELECT CONCAT(reviewers.reviewer_first_name,' ', reviewers.reviewer_last_name) AS Reviewer,
        AVG(reviews.rating) AS Average_Rating
        FROM reviews 
        INNER JOIN reviewers ON (reviewers.reviewer_id = reviews.reviewer_id)
        GROUP BY reviews.reviewer_id"

theReviewers <- suppressWarnings(dbGetQuery(con, sql))
print(theReviewers)
##                 Reviewer Average_Rating
## 1         Bailey  Stalin         2.8333
## 2            Cole  Fiben         3.5000
## 3        Tagawa  Tatiana         3.0000
## 4      Rodriguez  Fatima         4.0000
## 5        Folarin  Quincy         2.5000
## 6  Ramirez-Jimenez  Mona         3.5000
## 7        Sysavat  Warren         2.8333
## 8  al-Taheri  Zainuddeen         2.6667
## 9        Pena  Alejandro         3.0000
## 10        Gayman  Amanda         3.1667


  1. Which reviewer gave the most love?
highestRater <- theReviewers[which.max(theReviewers$Average_Rating),]
print(highestRater)
##            Reviewer Average_Rating
## 4 Rodriguez  Fatima              4


  1. Which reviewer gave the least love?
lowestRater <- theReviewers[which.min(theReviewers$Average_Rating),]
print(lowestRater)
##          Reviewer Average_Rating
## 5 Folarin  Quincy            2.5


Summary Plot

sql <- "SELECT movies.movie_title AS Movie,
        reviews.rating AS Rating,
        count(rating) as Counts  from reviews
        INNER JOIN movies ON (movies.movie_id = reviews.movie_id)
        GROUP BY reviews.movie_id,reviews.rating"

theRatings <- suppressWarnings(dbGetQuery(con, sql))
theRatings$Rating <- factor(theRatings$Rating, levels = c("One"<-1, "Two"<-2, "Three"<-3, "Four"<-4,"Five"<-5))

ggplot(data = theRatings, aes(x = Movie, y = Counts, fill = Rating)) + geom_bar(stat = "identity") + coord_flip() + scale_fill_brewer(palette = 12) + labs(title = "Distribution of Ratings Per Movie")