The purpose of this asignment is to create a database containing rankings of 6 recent movies by 5 different users, connect to the database from R and load the database into a dataframe. This particular implementation is using Mysql as a database server and is using RMySQL package to istablish the connection to database and retrieve the data.
# DROP DATABASE IF EXISTS moviedb;
# CREATE database moviedb;
# use moviedb;
#
# CREATE TABLE users (
# userID int not null auto_increment,
# fname varchar(20),
# lname varchar(20),
# primary key (userID)
# );
#
# INSERT INTO users (fname, lname) VALUES
# ('Natalia','Lesechko'),
# ('Victoria','Elikishvili'),
# ('Sergey','Vayner'),
# ('Bing','Bong'),
# ('Dmitry','Yuster');
#
# CREATE TABLE movies (
# movieID int not null auto_increment,
# title varchar (30),
# genre varchar(20),
# imdbrating float,
# primary key (movieID)
# );
#
# INSERT INTO movies (title, genre, imdbrating) VALUES
# ('Jason Bourne','Action',6.9),
# ('War Dogs','Comedy',7.3),
# ('Suicide Squad','Action',6.7),
# ('Inside Out','Animation',8.3),
# ('Zootopia','Animation',8.1),
# ('Now you see me 2','Action',6.6);
#
# CREATE TABLE reviews(
# userID int,
# movieID int,
# userrating int
# );
#
# INSERT INTO reviews (userID, movieID, userrating) VALUES
# (1,1,4),(1,2,3),(1,3,2),(1,4,4),(1,5,4),(1,6,4),
# (2,1,2),(2,2,2),(2,3,1),(2,4,5),(2,5,5),(2,6,3),
# (3,1,5),(3,2,5),(3,3,3),(3,4,4),(3,5,3),(3,6,3),
# (4,1,4),(4,2,3),(4,3,4),(4,4,5),(4,5,5),(4,6,3),
# (5,1,5),(5,2,5),(5,3,3),(5,4,4),(5,5,4),(5,6,3);
#
# SELECT users.fname, users.lname, movies.title, movies.genre, movies.imdbrating, reviews.userrating from reviews
# JOIN users on users.userID = reviews.userID
# JOIN movies on reviews.movieID = movies.movieID
# ORDER BY title;
rmysql.settingsfile<-"C:/ProgramData/MySQL/MySQL Server 5.7/my.ini"
con <- dbConnect(RMySQL::MySQL(), default.file=rmysql.settingsfile, dbname = "moviedb", username=NULL, password=NULL)
sql <- "SELECT users.fname, users.lname, movies.title, movies.genre, movies.imdbrating, reviews.userrating from reviews JOIN users on users.userID = reviews.userID JOIN movies on reviews.movieID = movies.movieID ORDER BY title;"
res <- dbGetQuery(con, sql)
res
## fname lname title genre imdbrating userrating
## 1 Sergey Vayner Inside Out Animation 8.3 4
## 2 Victoria Elikishvili Inside Out Animation 8.3 5
## 3 Natalia Lesechko Inside Out Animation 8.3 4
## 4 Dmitry Yuster Inside Out Animation 8.3 4
## 5 Bing Bong Inside Out Animation 8.3 5
## 6 Natalia Lesechko Jason Borurne Action 6.9 4
## 7 Dmitry Yuster Jason Borurne Action 6.9 5
## 8 Bing Bong Jason Borurne Action 6.9 4
## 9 Sergey Vayner Jason Borurne Action 6.9 5
## 10 Victoria Elikishvili Jason Borurne Action 6.9 2
## 11 Bing Bong Now you see me 2 Action 6.6 3
## 12 Sergey Vayner Now you see me 2 Action 6.6 3
## 13 Victoria Elikishvili Now you see me 2 Action 6.6 3
## 14 Natalia Lesechko Now you see me 2 Action 6.6 4
## 15 Dmitry Yuster Now you see me 2 Action 6.6 3
## 16 Victoria Elikishvili Suicide Squad Action 6.7 1
## 17 Natalia Lesechko Suicide Squad Action 6.7 2
## 18 Dmitry Yuster Suicide Squad Action 6.7 3
## 19 Bing Bong Suicide Squad Action 6.7 4
## 20 Sergey Vayner Suicide Squad Action 6.7 3
## 21 Victoria Elikishvili War Dogs Comedy 7.3 2
## 22 Natalia Lesechko War Dogs Comedy 7.3 3
## 23 Dmitry Yuster War Dogs Comedy 7.3 5
## 24 Bing Bong War Dogs Comedy 7.3 3
## 25 Sergey Vayner War Dogs Comedy 7.3 5
## 26 Sergey Vayner Zootopia Animation 8.1 3
## 27 Victoria Elikishvili Zootopia Animation 8.1 5
## 28 Natalia Lesechko Zootopia Animation 8.1 4
## 29 Dmitry Yuster Zootopia Animation 8.1 4
## 30 Bing Bong Zootopia Animation 8.1 5
a=aggregate(res[, 6], list(res$title, res$imdbrating), mean)
colnames(a) <- c("Move_Title", "User_Ranking","IMDB_Ranking")
a
## Move_Title User_Ranking IMDB_Ranking
## 1 Now you see me 2 6.6 3.2
## 2 Suicide Squad 6.7 2.6
## 3 Jason Borurne 6.9 4.0
## 4 War Dogs 7.3 3.6
## 5 Zootopia 8.1 4.2
## 6 Inside Out 8.3 4.4
ggplot(a, aes(x=Move_Title, y=User_Ranking)) + geom_bar(stat="identity")
ggplot(a, aes(x=Move_Title, y=IMDB_Ranking)) + geom_bar(stat="identity")
cor(a$User_Ranking,a$IMDB_Ranking)
## [1] 0.8088351