SQL and R

In this assignment a database called movie_ratings was created in MySQL. The database contains about recent popular movies choosen at random. People that I know (friends, family members, and myself included) rated each of these movie that on a scale of 1 (not good) to 5 (excellent). For the movies not seen no rate was provided.

The movie_ratings.db can be downloaded from https://github.com/bsosnovski/DATA607/blob/master/Week2Assignment/Week2Assignment_MySQL/movie_ratings_db.sql

Movie_Ratings Schema Diagram

Movie_Ratings Schema Diagram

Load packages

library(knitr)
#library(DBI)
library(RMySQL)
## Loading required package: DBI

Establish Connection

con <- dbConnect(MySQL(), user='test_user', password='data607', dbname='movie_ratings', host='localhost')

Basic Use

dbListTables(con)
## [1] "airlines" "airports" "critics"  "flights"  "movies"   "planes"  
## [7] "ratings"  "weather"
dbListFields(con, "critics")
## [1] "CriticID"  "FirstName" "Username"

SQL Queries

sql <- "SELECT * FROM critics"
dbGetQuery(con, sql)
##    CriticID FirstName Username
## 1         1      Joan       jp
## 2         2    Valera       vk
## 3         3  Nataliya       nk
## 4         4    Bianca       bs
## 5         5    Robert       rk
## 6         6      Mila       mp
## 7         7 Alexander       as
## 8         8       Ewa       ed
## 9         9      Nika       ns
## 10       10     Sasha       sg
## 11       11     Irina       ig
Movies <- dbReadTable(con, 'movies')
str(Movies)
## 'data.frame':    8 obs. of  2 variables:
##  $ MovieID: int  1 2 3 4 5 6 7 8
##  $ Title  : chr  "Three_Billboards_Outside_Ebbing_Missouri" "La_La_Land" "Lady_Bird" "Red_Sparrow" ...
Movies
##   MovieID                                    Title
## 1       1 Three_Billboards_Outside_Ebbing_Missouri
## 2       2                               La_La_Land
## 3       3                                Lady_Bird
## 4       4                              Red_Sparrow
## 5       5                  Star_Wars_The_Last_Jedi
## 6       6                                     Coco
## 7       7                             Wonder_Woman
## 8       8                            Black_Panther
sql <- "SELECT * FROM ratings"
dbGetQuery(con, sql, n=5)
##   RatingID Rate CriticID MovieID
## 1        1    5        1       1
## 2        2    4        1       3
## 3        3    5        2       1
## 4        4    4        2       5
## 5        5    4        2       6
sql <- "SELECT   c.FirstName, m.Title, r.Rate 
        FROM movies m JOIN ratings r ON r.MovieID = m.MovieID 
        JOIN critics c ON r.CriticID = c.CriticID 
        ORDER BY c.CriticID, m.MovieID;"
result <- dbGetQuery(con, sql)
result
##    FirstName                                    Title Rate
## 1       Joan Three_Billboards_Outside_Ebbing_Missouri    5
## 2       Joan                                Lady_Bird    4
## 3     Valera Three_Billboards_Outside_Ebbing_Missouri    5
## 4     Valera                  Star_Wars_The_Last_Jedi    4
## 5     Valera                                     Coco    4
## 6     Valera                             Wonder_Woman    3
## 7     Valera                            Black_Panther    2
## 8   Nataliya                               La_La_Land    4
## 9   Nataliya                  Star_Wars_The_Last_Jedi    5
## 10  Nataliya                                     Coco    4
## 11  Nataliya                             Wonder_Woman    5
## 12  Nataliya                            Black_Panther    5
## 13    Bianca Three_Billboards_Outside_Ebbing_Missouri    5
## 14    Bianca                               La_La_Land    4
## 15    Bianca                                Lady_Bird    5
## 16    Bianca                              Red_Sparrow    3
## 17    Bianca                  Star_Wars_The_Last_Jedi    5
## 18    Bianca                             Wonder_Woman    4
## 19    Bianca                            Black_Panther    4
## 20    Robert                               La_La_Land    4
## 21    Robert                  Star_Wars_The_Last_Jedi    5
## 22      Mila                               La_La_Land    5
## 23      Mila                                     Coco    5
## 24      Mila                             Wonder_Woman    5
## 25 Alexander                               La_La_Land    3
## 26 Alexander                                Lady_Bird    4
## 27 Alexander                              Red_Sparrow    2
## 28 Alexander                  Star_Wars_The_Last_Jedi    5
## 29 Alexander                             Wonder_Woman    5
## 30 Alexander                            Black_Panther    5
## 31       Ewa                                Lady_Bird    2
## 32       Ewa                              Red_Sparrow    3
## 33       Ewa                             Wonder_Woman    4
## 34      Nika                               La_La_Land    4
## 35      Nika                             Wonder_Woman    4
## 36     Sasha                             Wonder_Woman    4

SQL Aggregate functions

num_rates <- "SELECT m.title, COUNT(*) FROM movies m
        JOIN ratings r ON m.movieID = r.MovieID
        GROUP BY m.title"
dbGetQuery(con, num_rates)
##                                      title COUNT(*)
## 1 Three_Billboards_Outside_Ebbing_Missouri        3
## 2                               La_La_Land        6
## 3                                Lady_Bird        4
## 4                              Red_Sparrow        3
## 5                  Star_Wars_The_Last_Jedi        5
## 6                                     Coco        3
## 7                             Wonder_Woman        8
## 8                            Black_Panther        4
avg_ratings <- "SELECT m.title, AVG(Rate)FROM movies m
        JOIN ratings r ON m.movieID = r.MovieID
        GROUP BY m.title"
dbGetQuery(con,avg_ratings)
## Warning in .local(conn, statement, ...): Decimal MySQL column 1 imported as
## numeric
##                                      title AVG(Rate)
## 1 Three_Billboards_Outside_Ebbing_Missouri    5.0000
## 2                               La_La_Land    4.0000
## 3                                Lady_Bird    3.7500
## 4                              Red_Sparrow    2.6667
## 5                  Star_Wars_The_Last_Jedi    4.8000
## 6                                     Coco    4.3333
## 7                             Wonder_Woman    4.2500
## 8                            Black_Panther    4.0000
dbDisconnect(con)
## [1] TRUE