SQL and R

In this assignment a database called movie_ratings was created in SQLite. The database contains about recent popular movies choosen at random. People that I know (friends, family members, and myself included) rated each of these movie that on a scale of 1 (not good) to 5 (excellent). For the movies not seen no rate was provided.

The movie_ratings.db can be downloaded from https://github.com/bsosnovski/DATA607/blob/master/Week2Assignment/movie_ratings.db

Movie_Ratings Schema Diagram

Movie_Ratings Schema Diagram

Load packages

library(knitr)
library(RSQLite)

Establish Connection

con <- dbConnect(RSQLite::SQLite(), "movie_ratings.db")

Basic Use

dbListTables(con)
## [1] "critics"         "movies"          "new_table"       "ratings"        
## [5] "sqlite_sequence"
dbListFields(con, "critics")
## [1] "CriticID"  "FirstName" "Username"

SQL Queries

sql <- "SELECT * FROM critics"
dbGetQuery(con, sql)
##    CriticID FirstName Username
## 1         1      Joan       jp
## 2         2    Valera       vk
## 3         3  Nataliya       nk
## 4         4    Bianca       bs
## 5         5    Robert       rk
## 6         6      Mila       mp
## 7         7 Alexander       as
## 8         8       Ewa       ed
## 9         9      Nika       ns
## 10       10     Sasha       sg
## 11       11       Ira       ig
Movies <- dbReadTable(con, 'movies')
str(Movies)
## 'data.frame':    8 obs. of  2 variables:
##  $ MovieID: int  1 2 3 4 5 6 7 8
##  $ Title  : chr  "Three_Billboards_Outside_Ebbing_Missouri" "La_La_Land" "Lady_Bird" "Red_Sparrow" ...
Movies
##   MovieID                                    Title
## 1       1 Three_Billboards_Outside_Ebbing_Missouri
## 2       2                               La_La_Land
## 3       3                                Lady_Bird
## 4       4                              Red_Sparrow
## 5       5                  Star_Wars_The_Last_Jedi
## 6       6                                     Coco
## 7       7                             Wonder_Woman
## 8       8                            Black_Panther
sql <- "SELECT * FROM ratings"
dbGetQuery(con, sql, n=5)
##   RatingID Rate CriticID MovieID
## 1        1    5        1       1
## 2        2    4        1       3
## 3        3    5        2       1
## 4        4    4        2       5
## 5        5    4        2       6
sql <- "SELECT   c.FirstName, m.Title, r.Rate 
        FROM movies m JOIN ratings r ON r.MovieID = m.MovieID 
        JOIN critics c ON r.CriticID = c.CriticID 
        ORDER BY c.CriticID, m.MovieID;"
result <- dbGetQuery(con, sql)
result
##    FirstName                                    Title Rate
## 1       Joan Three_Billboards_Outside_Ebbing_Missouri    5
## 2       Joan                                Lady_Bird    4
## 3     Valera Three_Billboards_Outside_Ebbing_Missouri    5
## 4     Valera                  Star_Wars_The_Last_Jedi    4
## 5     Valera                                     Coco    4
## 6     Valera                             Wonder_Woman    3
## 7     Valera                            Black_Panther    2
## 8   Nataliya                               La_La_Land    4
## 9   Nataliya                  Star_Wars_The_Last_Jedi    5
## 10  Nataliya                                     Coco    4
## 11  Nataliya                             Wonder_Woman    5
## 12  Nataliya                            Black_Panther    5
## 13    Bianca Three_Billboards_Outside_Ebbing_Missouri    5
## 14    Bianca                               La_La_Land    4
## 15    Bianca                                Lady_Bird    5
## 16    Bianca                              Red_Sparrow    3
## 17    Bianca                  Star_Wars_The_Last_Jedi    5
## 18    Bianca                             Wonder_Woman    4
## 19    Bianca                            Black_Panther    4
## 20    Robert                               La_La_Land    4
## 21    Robert                  Star_Wars_The_Last_Jedi    5
## 22      Mila                               La_La_Land    5
## 23      Mila                                     Coco    5
## 24      Mila                             Wonder_Woman    5
## 25 Alexander                               La_La_Land    3
## 26 Alexander                                Lady_Bird    4
## 27 Alexander                              Red_Sparrow    2
## 28 Alexander                  Star_Wars_The_Last_Jedi    5
## 29 Alexander                             Wonder_Woman    5
## 30 Alexander                            Black_Panther    5
## 31       Ewa                                Lady_Bird    2
## 32       Ewa                              Red_Sparrow    3
## 33       Ewa                             Wonder_Woman    4
## 34      Nika                               La_La_Land    4
## 35      Nika                             Wonder_Woman    4
## 36     Sasha                             Wonder_Woman    4

SQL Aggregate functions

num_rates <- "SELECT m.title, COUNT(*) FROM movies m
        JOIN ratings r ON m.movieID = r.MovieID
        GROUP BY m.title"
dbGetQuery(con, num_rates)
##                                      Title COUNT(*)
## 1                            Black_Panther        4
## 2                                     Coco        3
## 3                               La_La_Land        6
## 4                                Lady_Bird        4
## 5                              Red_Sparrow        3
## 6                  Star_Wars_The_Last_Jedi        5
## 7 Three_Billboards_Outside_Ebbing_Missouri        3
## 8                             Wonder_Woman        8
avg_ratings <- "SELECT m.title, AVG(Rate)FROM movies m
        JOIN ratings r ON m.movieID = r.MovieID
        GROUP BY m.title"
dbGetQuery(con,avg_ratings)
##                                      Title AVG(Rate)
## 1                            Black_Panther  4.000000
## 2                                     Coco  4.333333
## 3                               La_La_Land  4.000000
## 4                                Lady_Bird  3.750000
## 5                              Red_Sparrow  2.666667
## 6                  Star_Wars_The_Last_Jedi  4.800000
## 7 Three_Billboards_Outside_Ebbing_Missouri  5.000000
## 8                             Wonder_Woman  4.250000
dbDisconnect(con)