DATA 607 Assignment - SQL and R
Choose six popular movies, which your freinds, co-workers might have watched.
Gather the ratings from your friends and try to analyse the result
Used MySQL database as a system of record for the above purpose.
Created few tables: movies, users and user_ratings to hold the relevant data
Let’s load the required libraries in R for data analysis
library(dplyr)
library(sqldf)
library(RODBC)
library(DBI)
library(odbc)
library(downloader)
library(htmlTable)
Connect to the MySQL database
List the movies
mydb = dbConnect(odbc(), "MySQL")
movies_dt <- dbGetQuery(mydb, "select movie_name from movies")
htmlTable(movies_dt)
|
|
movie_name
|
|
1
|
Toy Story
|
|
2
|
Jumanji
|
|
3
|
Father of the the Bride Part II
|
|
4
|
Heat
|
|
5
|
Sudden Death
|
|
6
|
Golden Eye
|
Join multiple tables to get data in one frame
Filter the rows with missing rating data
ratings_dt <- dbGetQuery(mydb, "select movie_name, username, userrole, rating
from user_ratings a
inner join users b on a.user_id = b.userid
inner join movies c on a.movie_id = c.id
where rating > -1", row.names=FALSE)
htmlTable(head(ratings_dt, 20))
|
|
movie_name
|
username
|
userrole
|
rating
|
|
1
|
Toy Story
|
Ajay
|
Friend
|
4
|
|
2
|
Jumanji
|
Ajay
|
Friend
|
4
|
|
3
|
Father of the the Bride Part II
|
Ajay
|
Friend
|
5
|
|
4
|
Sudden Death
|
Ajay
|
Friend
|
5
|
|
5
|
Golden Eye
|
Ajay
|
Friend
|
4
|
|
6
|
Toy Story
|
Sachin
|
Friend
|
5
|
|
7
|
Jumanji
|
Sachin
|
Friend
|
3
|
|
8
|
Father of the the Bride Part II
|
Sachin
|
Friend
|
4
|
|
9
|
Heat
|
Sachin
|
Friend
|
5
|
|
10
|
Golden Eye
|
Sachin
|
Friend
|
5
|
|
11
|
Toy Story
|
Rob
|
Co-Worker
|
4
|
|
12
|
Jumanji
|
Rob
|
Co-Worker
|
5
|
|
13
|
Father of the the Bride Part II
|
Rob
|
Co-Worker
|
4
|
|
14
|
Heat
|
Rob
|
Co-Worker
|
4
|
|
15
|
Sudden Death
|
Rob
|
Co-Worker
|
5
|
|
16
|
Toy Story
|
Isaac
|
Co-Worker
|
3
|
|
17
|
Jumanji
|
Isaac
|
Co-Worker
|
4
|
|
18
|
Father of the the Bride Part II
|
Isaac
|
Co-Worker
|
5
|
|
19
|
Heat
|
Isaac
|
Co-Worker
|
5
|
|
20
|
Toy Story
|
John
|
Co-Worker
|
4
|
Group and sum the ratings by movie names
library(sqldf)
ratings_groupByMovies <- sqldf( "SELECT movie_name, sum(rating)
FROM ratings_dt
group by movie_name
order by sum(rating) desc", row.names=TRUE)
head(ratings_groupByMovies)
## movie_name sum(rating)
## 1 Father of the the Bride Part II 26
## 2 Jumanji 25
## 3 Heat 23
## 4 Toy Story 20
## 5 Sudden Death 19
## 6 Golden Eye 17
Highest rated movie by friends and co-workers
ratings_groupByRole <- sqldf("SELECT movie_name, userrole, sum(rating)
FROM ratings_dt
group by movie_name, userrole
order by movie_name", row.names=TRUE)
htmlTable(head(ratings_groupByRole, 2))
|
|
movie_name
|
userrole
|
sum(rating)
|
|
1
|
Father of the the Bride Part II
|
Co-Worker
|
17
|
|
2
|
Father of the the Bride Part II
|
Friend
|
9
|
Count the users who rated the movies in the survey based on their role
countByUserRole<- sqldf("SELECT movie_name,
SUM(
CASE WHEN userrole = 'Friend' THEN 1 ELSE 0 END
) As 'Num_Friends_Rated',
SUM(
CASE WHEN userrole = 'Co-Worker' THEN 1 ELSE 0 END
) As 'Num_Co-Workers_Rated'
FROM ratings_dt
group by movie_name")
htmlTable(countByUserRole)
|
|
movie_name
|
Num_Friends_Rated
|
Num_Co-Workers_Rated
|
|
1
|
Father of the the Bride Part II
|
2
|
4
|
|
2
|
Golden Eye
|
2
|
2
|
|
3
|
Heat
|
1
|
4
|
|
4
|
Jumanji
|
2
|
4
|
|
5
|
Sudden Death
|
1
|
3
|
|
6
|
Toy Story
|
2
|
3
|
The users were later asked a survey on various categories about the movies
Get survey data from csv file
## movie_name username Storyline Cinematography Entertainment
## 1 Toy Story Ajay 1 1 1
## 2 Jumanji Ajay 1 1 1
## 3 Father of the Bride Part II Ajay 1 1 1
## 4 Heat Ajay NA 1 1
## 5 Sudden Death Ajay 1 NA 1
## 6 Golden Eye Ajay 1 1 1
## Costume.Direction Music.Special.Effects
## 1 1 NA
## 2 1 1
## 3 1 1
## 4 1 1
## 5 1 1
## 6 NA 1
combined_df <-
inner_join(ratings_dt, movie_user_survey, by=c("movie_name", "username"))
htmlTable(head(arrange(combined_df, movie_name, username), 15))
|
|
movie_name
|
username
|
userrole
|
rating
|
Storyline
|
Cinematography
|
Entertainment
|
Costume.Direction
|
Music.Special.Effects
|
|
1
|
Golden Eye
|
Ajay
|
Friend
|
4
|
1
|
1
|
1
|
|
1
|
|
2
|
Golden Eye
|
John
|
Co-Worker
|
4
|
1
|
1
|
|
|
1
|
|
3
|
Golden Eye
|
Mary
|
Co-Worker
|
4
|
1
|
1
|
1
|
|
1
|
|
4
|
Golden Eye
|
Sachin
|
Friend
|
5
|
1
|
1
|
|
|
1
|
|
5
|
Heat
|
Isaac
|
Co-Worker
|
5
|
|
1
|
1
|
|
|
|
6
|
Heat
|
John
|
Co-Worker
|
4
|
1
|
1
|
1
|
1
|
|
|
7
|
Heat
|
Mary
|
Co-Worker
|
5
|
1
|
1
|
|
1
|
1
|
|
8
|
Heat
|
Rob
|
Co-Worker
|
4
|
1
|
1
|
|
1
|
1
|
|
9
|
Heat
|
Sachin
|
Friend
|
5
|
1
|
1
|
1
|
1
|
|
|
10
|
Jumanji
|
Ajay
|
Friend
|
4
|
1
|
1
|
1
|
1
|
1
|
|
11
|
Jumanji
|
Isaac
|
Co-Worker
|
4
|
1
|
1
|
1
|
1
|
1
|
|
12
|
Jumanji
|
John
|
Co-Worker
|
4
|
1
|
1
|
1
|
1
|
1
|
|
13
|
Jumanji
|
Mary
|
Co-Worker
|
5
|
1
|
1
|
1
|
1
|
1
|
|
14
|
Jumanji
|
Rob
|
Co-Worker
|
5
|
1
|
1
|
1
|
1
|
1
|
|
15
|
Jumanji
|
Sachin
|
Friend
|
3
|
1
|
1
|
1
|
1
|
1
|