In this assignment a database called movie_ratings was created in MySQL. The database contains about recent popular movies choosen at random. People that I know (friends, family members, and myself included) rated each of these movie that on a scale of 1 (not good) to 5 (excellent). For the movies not seen no rate was provided.
The movie_ratings.db can be downloaded from https://github.com/bsosnovski/DATA607/blob/master/Week2Assignment/Week2Assignment_MySQL/movie_ratings_db.sql
Movie_Ratings Schema Diagram
library(knitr)
#library(DBI)
library(RMySQL)
## Loading required package: DBI
con <- dbConnect(MySQL(), user='test_user', password='data607', dbname='movie_ratings', host='localhost')
dbListTables(con)
## [1] "airlines" "airports" "critics" "flights" "movies" "planes"
## [7] "ratings" "weather"
dbListFields(con, "critics")
## [1] "CriticID" "FirstName" "Username"
sql <- "SELECT * FROM critics"
dbGetQuery(con, sql)
## CriticID FirstName Username
## 1 1 Joan jp
## 2 2 Valera vk
## 3 3 Nataliya nk
## 4 4 Bianca bs
## 5 5 Robert rk
## 6 6 Mila mp
## 7 7 Alexander as
## 8 8 Ewa ed
## 9 9 Nika ns
## 10 10 Sasha sg
## 11 11 Irina ig
Movies <- dbReadTable(con, 'movies')
str(Movies)
## 'data.frame': 8 obs. of 2 variables:
## $ MovieID: int 1 2 3 4 5 6 7 8
## $ Title : chr "Three_Billboards_Outside_Ebbing_Missouri" "La_La_Land" "Lady_Bird" "Red_Sparrow" ...
Movies
## MovieID Title
## 1 1 Three_Billboards_Outside_Ebbing_Missouri
## 2 2 La_La_Land
## 3 3 Lady_Bird
## 4 4 Red_Sparrow
## 5 5 Star_Wars_The_Last_Jedi
## 6 6 Coco
## 7 7 Wonder_Woman
## 8 8 Black_Panther
sql <- "SELECT * FROM ratings"
dbGetQuery(con, sql, n=5)
## RatingID Rate CriticID MovieID
## 1 1 5 1 1
## 2 2 4 1 3
## 3 3 5 2 1
## 4 4 4 2 5
## 5 5 4 2 6
sql <- "SELECT c.FirstName, m.Title, r.Rate
FROM movies m JOIN ratings r ON r.MovieID = m.MovieID
JOIN critics c ON r.CriticID = c.CriticID
ORDER BY c.CriticID, m.MovieID;"
result <- dbGetQuery(con, sql)
result
## FirstName Title Rate
## 1 Joan Three_Billboards_Outside_Ebbing_Missouri 5
## 2 Joan Lady_Bird 4
## 3 Valera Three_Billboards_Outside_Ebbing_Missouri 5
## 4 Valera Star_Wars_The_Last_Jedi 4
## 5 Valera Coco 4
## 6 Valera Wonder_Woman 3
## 7 Valera Black_Panther 2
## 8 Nataliya La_La_Land 4
## 9 Nataliya Star_Wars_The_Last_Jedi 5
## 10 Nataliya Coco 4
## 11 Nataliya Wonder_Woman 5
## 12 Nataliya Black_Panther 5
## 13 Bianca Three_Billboards_Outside_Ebbing_Missouri 5
## 14 Bianca La_La_Land 4
## 15 Bianca Lady_Bird 5
## 16 Bianca Red_Sparrow 3
## 17 Bianca Star_Wars_The_Last_Jedi 5
## 18 Bianca Wonder_Woman 4
## 19 Bianca Black_Panther 4
## 20 Robert La_La_Land 4
## 21 Robert Star_Wars_The_Last_Jedi 5
## 22 Mila La_La_Land 5
## 23 Mila Coco 5
## 24 Mila Wonder_Woman 5
## 25 Alexander La_La_Land 3
## 26 Alexander Lady_Bird 4
## 27 Alexander Red_Sparrow 2
## 28 Alexander Star_Wars_The_Last_Jedi 5
## 29 Alexander Wonder_Woman 5
## 30 Alexander Black_Panther 5
## 31 Ewa Lady_Bird 2
## 32 Ewa Red_Sparrow 3
## 33 Ewa Wonder_Woman 4
## 34 Nika La_La_Land 4
## 35 Nika Wonder_Woman 4
## 36 Sasha Wonder_Woman 4
num_rates <- "SELECT m.title, COUNT(*) FROM movies m
JOIN ratings r ON m.movieID = r.MovieID
GROUP BY m.title"
dbGetQuery(con, num_rates)
## title COUNT(*)
## 1 Three_Billboards_Outside_Ebbing_Missouri 3
## 2 La_La_Land 6
## 3 Lady_Bird 4
## 4 Red_Sparrow 3
## 5 Star_Wars_The_Last_Jedi 5
## 6 Coco 3
## 7 Wonder_Woman 8
## 8 Black_Panther 4
avg_ratings <- "SELECT m.title, AVG(Rate)FROM movies m
JOIN ratings r ON m.movieID = r.MovieID
GROUP BY m.title"
dbGetQuery(con,avg_ratings)
## Warning in .local(conn, statement, ...): Decimal MySQL column 1 imported as
## numeric
## title AVG(Rate)
## 1 Three_Billboards_Outside_Ebbing_Missouri 5.0000
## 2 La_La_Land 4.0000
## 3 Lady_Bird 3.7500
## 4 Red_Sparrow 2.6667
## 5 Star_Wars_The_Last_Jedi 4.8000
## 6 Coco 4.3333
## 7 Wonder_Woman 4.2500
## 8 Black_Panther 4.0000
dbDisconnect(con)
## [1] TRUE