Data Retrieval

Load R Packages

#library(RMySQL)
#library(getPass)
#library(plotly)

require(RMySQL)
## Loading required package: RMySQL
require(getPass)
## Loading required package: getPass
require(plotly)
## Loading required package: plotly
## Loading required package: ggplot2
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout

Connect to Database

# Uncomment the below commend. Enter user name and password to connect to your database.

#movie_db = dbConnect(MySQL(), user=getPass(msg = 'Enter username'), password= getPass('Enter Password'), dbname='moviereview')

# Load the scripts to create tables(movie_db, review_db) and insert data into the tables.
# https://github.com/raghu74us/607_1/blob/master/moview_db.sql

Retrieve Data from Mysql database and Store in R DataFrames

# List the movies and netflix ratings from movie table.
movie_sql = "Select movie_id,title,netflix_rating FROM movie_db;"
movie_df <- dbGetQuery(movie_db, movie_sql)

knitr::kable(movie_df)
movie_id title netflix_rating
1 Sing 4.3
2 Troll 3.9
3 Toy story 3.5
4 Beauty and the Beast 3.8
5 Storks 4.1

Retrive the individual user ratings for the movies.

rating_sql <- "Select * FROM review_db;"
rating_df <- dbGetQuery(movie_db, rating_sql)
knitr:: kable(rating_df)
movie_id user_name rating review_comments
1 Albert 5 Excellent
2 Albert 4 Super
3 Albert 5 Excellent
4 Albert 4 Super
5 Albert 3 Wow
1 Charlie 4 Excellent
2 Charlie 4 Super
3 Charlie 5 Excellent
4 Charlie 4 Super
5 Charlie 3 Wow
1 David 5 Excellent
2 David 4 Super
3 David 3 Excellent
4 David 4 Super
5 David 4 Wow
1 Lucy 5 Excellent
2 Lucy 4 Super
3 Lucy 4 Excellent
4 Lucy 5 Super
5 Lucy 5 Wow
1 Jenniffer 5 Excellent
2 Jenniffer 3 Super
3 Jenniffer 3 Excellent
4 Jenniffer 4 Super
5 Jenniffer 4 Wow

Get the Average user rating for the movies.

compare_rating <- "select a.movie_id,a.title,  round( (sum(b.rating)/count(b.rating)),2) Average_rating
from movie_db a,
     review_db b 
where a.movie_id = b.movie_id
group by a.movie_id,a.title ;"

compare_df <- dbGetQuery(movie_db, compare_rating)
knitr:: kable(compare_df)
movie_id title Average_rating
1 Sing 4.8
2 Troll 3.8
3 Toy story 4.0
4 Beauty and the Beast 4.2
5 Storks 3.8

Compare the Netflix rating and User ratings for the movies.

ratings_compare <- merge(compare_df,movie_df)
ratings_compare$diff <-  movie_df$netflix_rating - compare_df$Average_rating
knitr:: kable(ratings_compare)
movie_id title Average_rating netflix_rating diff
1 Sing 4.8 4.3 -0.5
2 Troll 3.8 3.9 0.1
3 Toy story 4.0 3.5 -0.5
4 Beauty and the Beast 4.2 3.8 -0.4
5 Storks 3.8 4.1 0.3

Plot the Graph for Net flix rating vs user rating.

trace_0 <- ratings_compare$netflix_rating
trace_1 <- ratings_compare$Average_rating

x <- ratings_compare$title

data <- data.frame(x, trace_0, trace_1)

p <- plot_ly(data, x = ~x, y = ~trace_0, name = 'Netflix Rating', type = 'scatter', mode = 'markers') %>%
add_trace(y = ~trace_1, name = 'User Rating', mode = 'markers')

p