Data Retrieval
Load R Packages
#library(RMySQL)
#library(getPass)
#library(plotly)
require(RMySQL)
## Loading required package: RMySQL
require(getPass)
## Loading required package: getPass
require(plotly)
## Loading required package: plotly
## Loading required package: ggplot2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
Connect to Database
# Uncomment the below commend. Enter user name and password to connect to your database.
#movie_db = dbConnect(MySQL(), user=getPass(msg = 'Enter username'), password= getPass('Enter Password'), dbname='moviereview')
# Load the scripts to create tables(movie_db, review_db) and insert data into the tables.
# https://github.com/raghu74us/607_1/blob/master/moview_db.sql
Retrieve Data from Mysql database and Store in R DataFrames
# List the movies and netflix ratings from movie table.
movie_sql = "Select movie_id,title,netflix_rating FROM movie_db;"
movie_df <- dbGetQuery(movie_db, movie_sql)
knitr::kable(movie_df)
| 1 |
Sing |
4.3 |
| 2 |
Troll |
3.9 |
| 3 |
Toy story |
3.5 |
| 4 |
Beauty and the Beast |
3.8 |
| 5 |
Storks |
4.1 |
Retrive the individual user ratings for the movies.
rating_sql <- "Select * FROM review_db;"
rating_df <- dbGetQuery(movie_db, rating_sql)
knitr:: kable(rating_df)
| 1 |
Albert |
5 |
Excellent |
| 2 |
Albert |
4 |
Super |
| 3 |
Albert |
5 |
Excellent |
| 4 |
Albert |
4 |
Super |
| 5 |
Albert |
3 |
Wow |
| 1 |
Charlie |
4 |
Excellent |
| 2 |
Charlie |
4 |
Super |
| 3 |
Charlie |
5 |
Excellent |
| 4 |
Charlie |
4 |
Super |
| 5 |
Charlie |
3 |
Wow |
| 1 |
David |
5 |
Excellent |
| 2 |
David |
4 |
Super |
| 3 |
David |
3 |
Excellent |
| 4 |
David |
4 |
Super |
| 5 |
David |
4 |
Wow |
| 1 |
Lucy |
5 |
Excellent |
| 2 |
Lucy |
4 |
Super |
| 3 |
Lucy |
4 |
Excellent |
| 4 |
Lucy |
5 |
Super |
| 5 |
Lucy |
5 |
Wow |
| 1 |
Jenniffer |
5 |
Excellent |
| 2 |
Jenniffer |
3 |
Super |
| 3 |
Jenniffer |
3 |
Excellent |
| 4 |
Jenniffer |
4 |
Super |
| 5 |
Jenniffer |
4 |
Wow |
Get the Average user rating for the movies.
compare_rating <- "select a.movie_id,a.title, round( (sum(b.rating)/count(b.rating)),2) Average_rating
from movie_db a,
review_db b
where a.movie_id = b.movie_id
group by a.movie_id,a.title ;"
compare_df <- dbGetQuery(movie_db, compare_rating)
knitr:: kable(compare_df)
| 1 |
Sing |
4.8 |
| 2 |
Troll |
3.8 |
| 3 |
Toy story |
4.0 |
| 4 |
Beauty and the Beast |
4.2 |
| 5 |
Storks |
3.8 |
Compare the Netflix rating and User ratings for the movies.
ratings_compare <- merge(compare_df,movie_df)
ratings_compare$diff <- movie_df$netflix_rating - compare_df$Average_rating
knitr:: kable(ratings_compare)
| 1 |
Sing |
4.8 |
4.3 |
-0.5 |
| 2 |
Troll |
3.8 |
3.9 |
0.1 |
| 3 |
Toy story |
4.0 |
3.5 |
-0.5 |
| 4 |
Beauty and the Beast |
4.2 |
3.8 |
-0.4 |
| 5 |
Storks |
3.8 |
4.1 |
0.3 |
Plot the Graph for Net flix rating vs user rating.
trace_0 <- ratings_compare$netflix_rating
trace_1 <- ratings_compare$Average_rating
x <- ratings_compare$title
data <- data.frame(x, trace_0, trace_1)
p <- plot_ly(data, x = ~x, y = ~trace_0, name = 'Netflix Rating', type = 'scatter', mode = 'markers') %>%
add_trace(y = ~trace_1, name = 'User Rating', mode = 'markers')
p