1 Pre-Requistes : Available Libraries

2 Movie Database

2.1 Connect to mySQL

library(RMySQL)
## Loading required package: DBI
library(DBI)
conn <- dbConnect(RMySQL::MySQL()          # Construct SQL driver
                 , dbname = "CUNY_DATA607"
                 , host = "35.231.71.159"
                 , port = 3306
                 , user = "root"
                 , password = "data607")
message("Connected to mysql on ", date())
## Connected to mysql on Sun Feb 10 13:27:43 2019

2.2 List of tables

dbListTables(conn)
## [1] "movie_ratings" "movies"        "persons"

2.3 Tables

2.3.1 Movies

dbReadTable(conn, "movies")
##    movie_id     movie_name     movie_genre
## 1         1        TITANIC         Romance
## 2         2     TERMINATOR Science Fiction
## 3         3      STAR WARS Science Fiction
## 4         4       DIE HARD          Action
## 5         5      GODFATHER           Drama
## 6         6   THE HAUNTING          Horror
## 7         7 BABY'S DAY OUT          Comedy
## 8         8      TOY STORY       Animation
## 9         9       AVENGERS       Superhero
## 10       10     LA LA LAND         Musical

2.3.2 Persons

#dbReadTable(conn, "")
dbGetQuery(conn, "SELECT * FROM persons ORDER BY person_id")
##    person_id person_first_name person_last_name
## 1          1              John              Doe
## 2          2              Jack           Holmes
## 3          3         Elizabeth             Dawn
## 4          4           Jessica           Parker
## 5          5           Emanuel           Banner
## 6          6             Diana           Hayden
## 7          7             Jared             Hill
## 8          8             Chloe              Sue
## 9          9             Shawn         Williams
## 10        10              Adam           Becker
## 11        11            Teresa              May
## 12        12             Aaron           Flinch
## 13        13             Harry          Dickens

2.3.3 Ratings

moviesQuery <-  fetch(dbSendQuery(conn, "SELECT movie_name,movie_genre,person_first_name,person_last_name,movie_rating,(select avg(mr.movie_rating) FROM movie_ratings mr WHERE mr.movie_id=mrs.movie_id) avg_rating FROM movies LEFT JOIN movie_ratings mrs USING (movie_id) LEFT JOIN persons USING(person_id) ORDER BY movie_id,person_id"))
library(DT)
DT::datatable(moviesQuery, options = list(pagelength=5))

3 Visualizations

3.1 Bar Graph

library(ggplot2)
ggplot(moviesQuery,mapping=aes(x=moviesQuery$movie_name,y=moviesQuery$avg_rating, fill= avg_rating)) +
geom_col() +
ggtitle("Ratings on the Movies") +
coord_flip()
## Warning: Removed 4 rows containing missing values (position_stack).

3.2 Box Plot

ggplot(moviesQuery,mapping=aes(x=moviesQuery$movie_name, y=moviesQuery$movie_rating)) +
geom_boxplot() +
coord_flip()
## Warning: Removed 4 rows containing non-finite values (stat_boxplot).

3.3 Scatter PLot

#plot(x=movieRatings$movie_name, y=movieRatings$avg_rating, xlim=c(min(movieRatings$avg_rating, na.rm = TRUE), max(movieRatings$avg_rating, na.rm = TRUE)), type='p', xlab='Movie Name', ylab='Average Rating', main='Movie versus Average Rating')  
plot(x=moviesQuery$movie_name, y=moviesQuery$movie_rating, xlim=c(min(moviesQuery$movie_rating, na.rm = TRUE), max(moviesQuery$movie_rating, na.rm = TRUE)), xlab='Movie Name', ylab='Movie Rating', main='Movie versus Rating') 
## Warning in xy.coords(x, y, xlabel, ylabel, log): NAs introduced by coercion

3.4 Histogram

hist(moviesQuery$movie_rating)

movieAvgRatings <- unique(moviesQuery[c("movie_name", "avg_rating")])
hist(movieAvgRatings$avg_rating)

3.5 Geom Smooth