In this assignment a database called movie_ratings was created in SQLite. The database contains about recent popular movies choosen at random. People that I know (friends, family members, and myself included) rated each of these movie that on a scale of 1 (not good) to 5 (excellent). For the movies not seen no rate was provided.
The movie_ratings.db can be downloaded from https://github.com/bsosnovski/DATA607/blob/master/Week2Assignment/movie_ratings.db
Movie_Ratings Schema Diagram
library(knitr)
library(RSQLite)
con <- dbConnect(RSQLite::SQLite(), "movie_ratings.db")
dbListTables(con)
## [1] "critics" "movies" "new_table" "ratings"
## [5] "sqlite_sequence"
dbListFields(con, "critics")
## [1] "CriticID" "FirstName" "Username"
sql <- "SELECT * FROM critics"
dbGetQuery(con, sql)
## CriticID FirstName Username
## 1 1 Joan jp
## 2 2 Valera vk
## 3 3 Nataliya nk
## 4 4 Bianca bs
## 5 5 Robert rk
## 6 6 Mila mp
## 7 7 Alexander as
## 8 8 Ewa ed
## 9 9 Nika ns
## 10 10 Sasha sg
## 11 11 Ira ig
Movies <- dbReadTable(con, 'movies')
str(Movies)
## 'data.frame': 8 obs. of 2 variables:
## $ MovieID: int 1 2 3 4 5 6 7 8
## $ Title : chr "Three_Billboards_Outside_Ebbing_Missouri" "La_La_Land" "Lady_Bird" "Red_Sparrow" ...
Movies
## MovieID Title
## 1 1 Three_Billboards_Outside_Ebbing_Missouri
## 2 2 La_La_Land
## 3 3 Lady_Bird
## 4 4 Red_Sparrow
## 5 5 Star_Wars_The_Last_Jedi
## 6 6 Coco
## 7 7 Wonder_Woman
## 8 8 Black_Panther
sql <- "SELECT * FROM ratings"
dbGetQuery(con, sql, n=5)
## RatingID Rate CriticID MovieID
## 1 1 5 1 1
## 2 2 4 1 3
## 3 3 5 2 1
## 4 4 4 2 5
## 5 5 4 2 6
sql <- "SELECT c.FirstName, m.Title, r.Rate
FROM movies m JOIN ratings r ON r.MovieID = m.MovieID
JOIN critics c ON r.CriticID = c.CriticID
ORDER BY c.CriticID, m.MovieID;"
result <- dbGetQuery(con, sql)
result
## FirstName Title Rate
## 1 Joan Three_Billboards_Outside_Ebbing_Missouri 5
## 2 Joan Lady_Bird 4
## 3 Valera Three_Billboards_Outside_Ebbing_Missouri 5
## 4 Valera Star_Wars_The_Last_Jedi 4
## 5 Valera Coco 4
## 6 Valera Wonder_Woman 3
## 7 Valera Black_Panther 2
## 8 Nataliya La_La_Land 4
## 9 Nataliya Star_Wars_The_Last_Jedi 5
## 10 Nataliya Coco 4
## 11 Nataliya Wonder_Woman 5
## 12 Nataliya Black_Panther 5
## 13 Bianca Three_Billboards_Outside_Ebbing_Missouri 5
## 14 Bianca La_La_Land 4
## 15 Bianca Lady_Bird 5
## 16 Bianca Red_Sparrow 3
## 17 Bianca Star_Wars_The_Last_Jedi 5
## 18 Bianca Wonder_Woman 4
## 19 Bianca Black_Panther 4
## 20 Robert La_La_Land 4
## 21 Robert Star_Wars_The_Last_Jedi 5
## 22 Mila La_La_Land 5
## 23 Mila Coco 5
## 24 Mila Wonder_Woman 5
## 25 Alexander La_La_Land 3
## 26 Alexander Lady_Bird 4
## 27 Alexander Red_Sparrow 2
## 28 Alexander Star_Wars_The_Last_Jedi 5
## 29 Alexander Wonder_Woman 5
## 30 Alexander Black_Panther 5
## 31 Ewa Lady_Bird 2
## 32 Ewa Red_Sparrow 3
## 33 Ewa Wonder_Woman 4
## 34 Nika La_La_Land 4
## 35 Nika Wonder_Woman 4
## 36 Sasha Wonder_Woman 4
num_rates <- "SELECT m.title, COUNT(*) FROM movies m
JOIN ratings r ON m.movieID = r.MovieID
GROUP BY m.title"
dbGetQuery(con, num_rates)
## Title COUNT(*)
## 1 Black_Panther 4
## 2 Coco 3
## 3 La_La_Land 6
## 4 Lady_Bird 4
## 5 Red_Sparrow 3
## 6 Star_Wars_The_Last_Jedi 5
## 7 Three_Billboards_Outside_Ebbing_Missouri 3
## 8 Wonder_Woman 8
avg_ratings <- "SELECT m.title, AVG(Rate)FROM movies m
JOIN ratings r ON m.movieID = r.MovieID
GROUP BY m.title"
dbGetQuery(con,avg_ratings)
## Title AVG(Rate)
## 1 Black_Panther 4.000000
## 2 Coco 4.333333
## 3 La_La_Land 4.000000
## 4 Lady_Bird 3.750000
## 5 Red_Sparrow 2.666667
## 6 Star_Wars_The_Last_Jedi 4.800000
## 7 Three_Billboards_Outside_Ebbing_Missouri 5.000000
## 8 Wonder_Woman 4.250000
dbDisconnect(con)