Data Retrieval
Load R Packages
#library(RMySQL)
#library(getPass)
#library(plotly)
require(RMySQL)
## Loading required package: RMySQL
require(getPass)
## Loading required package: getPass
require(plotly)
## Loading required package: plotly
## Loading required package: ggplot2
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
 
Connect to Database
# Uncomment the below commend. Enter user name and password to connect to your database.
#movie_db = dbConnect(MySQL(), user=getPass(msg = 'Enter username'), password= getPass('Enter Password'), dbname='moviereview')
# Load the scripts to create tables(movie_db, review_db) and insert data into the tables.
# https://github.com/raghu74us/607_1/blob/master/moview_db.sql
 
Retrieve Data from Mysql database and Store in R DataFrames
# List the movies and netflix ratings from movie table.
movie_sql = "Select movie_id,title,netflix_rating FROM movie_db;"
movie_df <- dbGetQuery(movie_db, movie_sql)
knitr::kable(movie_df)
| 1 | Sing | 4.3 | 
| 2 | Troll | 3.9 | 
| 3 | Toy story | 3.5 | 
| 4 | Beauty and the Beast | 3.8 | 
| 5 | Storks | 4.1 | 
 
Retrive the individual user ratings for the movies.
rating_sql <- "Select * FROM review_db;"
rating_df <- dbGetQuery(movie_db, rating_sql)
knitr:: kable(rating_df)
| 1 | Albert | 5 | Excellent | 
| 2 | Albert | 4 | Super | 
| 3 | Albert | 5 | Excellent | 
| 4 | Albert | 4 | Super | 
| 5 | Albert | 3 | Wow | 
| 1 | Charlie | 4 | Excellent | 
| 2 | Charlie | 4 | Super | 
| 3 | Charlie | 5 | Excellent | 
| 4 | Charlie | 4 | Super | 
| 5 | Charlie | 3 | Wow | 
| 1 | David | 5 | Excellent | 
| 2 | David | 4 | Super | 
| 3 | David | 3 | Excellent | 
| 4 | David | 4 | Super | 
| 5 | David | 4 | Wow | 
| 1 | Lucy | 5 | Excellent | 
| 2 | Lucy | 4 | Super | 
| 3 | Lucy | 4 | Excellent | 
| 4 | Lucy | 5 | Super | 
| 5 | Lucy | 5 | Wow | 
| 1 | Jenniffer | 5 | Excellent | 
| 2 | Jenniffer | 3 | Super | 
| 3 | Jenniffer | 3 | Excellent | 
| 4 | Jenniffer | 4 | Super | 
| 5 | Jenniffer | 4 | Wow | 
 
Get the Average user rating for the movies.
compare_rating <- "select a.movie_id,a.title,  round( (sum(b.rating)/count(b.rating)),2) Average_rating
from movie_db a,
     review_db b 
where a.movie_id = b.movie_id
group by a.movie_id,a.title ;"
compare_df <- dbGetQuery(movie_db, compare_rating)
knitr:: kable(compare_df)
| 1 | Sing | 4.8 | 
| 2 | Troll | 3.8 | 
| 3 | Toy story | 4.0 | 
| 4 | Beauty and the Beast | 4.2 | 
| 5 | Storks | 3.8 | 
 
Compare the Netflix rating and User ratings for the movies.
ratings_compare <- merge(compare_df,movie_df)
ratings_compare$diff <-  movie_df$netflix_rating - compare_df$Average_rating
knitr:: kable(ratings_compare)
| 1 | Sing | 4.8 | 4.3 | -0.5 | 
| 2 | Troll | 3.8 | 3.9 | 0.1 | 
| 3 | Toy story | 4.0 | 3.5 | -0.5 | 
| 4 | Beauty and the Beast | 4.2 | 3.8 | -0.4 | 
| 5 | Storks | 3.8 | 4.1 | 0.3 | 
 
Plot the Graph for Net flix rating vs user rating.
trace_0 <- ratings_compare$netflix_rating
trace_1 <- ratings_compare$Average_rating
x <- ratings_compare$title
data <- data.frame(x, trace_0, trace_1)
p <- plot_ly(data, x = ~x, y = ~trace_0, name = 'Netflix Rating', type = 'scatter', mode = 'markers') %>%
add_trace(y = ~trace_1, name = 'User Rating', mode = 'markers')
p