Assignment - SQL and R

Take the results from survey and store them in a SQL database.

SQL code

table1<-dbGetQuery(conn, 'CREATE TABLE sandbox.movie (
movie_id INT PRIMARY KEY,
movie_name VARCHAR(255) NOT NULL'
);
## Warning in postgresqlQuickSQL(conn, statement, ...): Could not create execute: CREATE TABLE sandbox.movie (
## movie_id INT PRIMARY KEY,
## movie_name VARCHAR(255) NOT NULL
data1<-dbGetQuery(conn,'INSERT INTO sandbox.movie (movie_id, movie_name)
VALUES
(1, "Avengers:Endgame"),
(2, "X-Men: Dark Phenix"),
(3, "Captain Marvel"),
(4, "Terminator: Dark Fate"),
(5, "Godzilla:King of the Monsters"),
(6, "Rambo: Last Blood")'
)
## Warning in postgresqlQuickSQL(conn, statement, ...): Could not create execute: INSERT INTO sandbox.movie (movie_id, movie_name)
## VALUES
## (1, "Avengers:Endgame"),
## (2, "X-Men: Dark Phenix"),
## (3, "Captain Marvel"),
## (4, "Terminator: Dark Fate"),
## (5, "Godzilla:King of the Monsters"),
## (6, "Rambo: Last Blood")
table2<-dbGetQuery(conn, 'CREATE TABLE sandbox.critics (
user_id INT PRIMARY KEY,
user_name VARCHAR(55) NOT NULL'
)
## Warning in postgresqlQuickSQL(conn, statement, ...): Could not create execute: CREATE TABLE sandbox.critics (
## user_id INT PRIMARY KEY,
## user_name VARCHAR(55) NOT NULL
data2<-dbGetQuery(conn,'INSERT INTO sandbox.critics (user_id, user_name)
VALUES
(1, "Ivan"),
(2, "Nick"),
(3, "Victoria"),
(4, "Roman"),
(5, "Arianna")'
)
## Warning in postgresqlQuickSQL(conn, statement, ...): Could not create execute: INSERT INTO sandbox.critics (user_id, user_name)
## VALUES
## (1, "Ivan"),
## (2, "Nick"),
## (3, "Victoria"),
## (4, "Roman"),
## (5, "Arianna")
table3<-dbGetQuery(conn, 'CREATE TABLE sandbox.rate (
user_id INT,
movie_id INT,
rating FLOAT'
);
## Warning in postgresqlQuickSQL(conn, statement, ...): Could not create execute: CREATE TABLE sandbox.rate (
## user_id INT,
## movie_id INT,
## rating FLOAT
table3<-dbGetQuery(conn, 'INSERT INTO sandbox.rate (user_id, movie_id, rating)
VALUES
(1, 1, "4"),
(1, 2, "2"),
(1, 3, "5"),
(1, 4, "5"),
(1, 5, "3"),
(1, 6, "3"),

(2, 1, "5"),
(2, 2, "4"),
(2, 3, "4"),
(2, 4, "5"),
(2, 5, "4"),
(2, 6, "2"),

(3, 1, "4"),
(3, 2, "2"),
(3, 3, "5"),
(3, 4, "3"),
(3, 5, "5"),
(3, 6, "1"),

(4, 1, "5"),
(4, 2, "5"),
(4, 3, "4"),
(4, 4, "2"),
(4, 5, "3"),
(4, 6, "4"),

(5, 1, "1"),
(5, 2, "4"),
(5, 3, "4"),
(5, 4, "3"),
(5, 5, "3"),
(5, 6, "5")'
)
## Warning in postgresqlQuickSQL(conn, statement, ...): Could not create execute: INSERT INTO sandbox.rate (user_id, movie_id, rating)
## VALUES
## (1, 1, "4"),
## (1, 2, "2"),
## (1, 3, "5"),
## (1, 4, "5"),
## (1, 5, "3"),
## (1, 6, "3"),
## 
## (2, 1, "5"),
## (2, 2, "4"),
## (2, 3, "4"),
## (2, 4, "5"),
## (2, 5, "4"),
## (2, 6, "2"),
## 
## (3, 1, "4"),
## (3, 2, "2"),
## (3, 3, "5"),
## (3, 4, "3"),
## (3, 5, "5"),
## (3, 6, "1"),
## 
## (4, 1, "5"),
## (4, 2, "5"),
## (4, 3, "4"),
## (4, 4, "2"),
## (4, 5, "3"),
## (4, 6, "4"),
## 
## (5, 1, "1"),
## (5, 2, "4"),
## (5, 3, "4"),
## (5, 4, "3"),
## (5, 5, "3"),
## (5, 6, "5")

Load the information into an R dataframe.

Movie<- dbGetQuery(conn, 'SELECT*FROM sandbox.movie') 
Critic<- dbGetQuery(conn, 'select * from sandbox.critics') 
Rate<- dbGetQuery(conn, 'select * from sandbox.rate') 
sql1<-dbGetQuery(conn, '
SELECT a.movie_name, r.rating, c.user_name 
                    FROM sandbox.movie a
                     LEFT JOIN sandbox.rate r on a.movie_id = r.movie_id
                     LEFT JOIN sandbox.critics c ON r.user_id = c.user_id
                     ORDER BY movie_name;')
colnames(sql1) <-c("Movie", "Rating","Critic")
kable(sql1) 
Movie Rating Critic
Avengers:Endgame 4 Ivan
Avengers:Endgame 4 Victoria
Avengers:Endgame 1 Arianna
Avengers:Endgame 5 Roman
Avengers:Endgame 5 Nick
Captain Marvel 5 Victoria
Captain Marvel 4 Arianna
Captain Marvel 4 Nick
Captain Marvel 5 Ivan
Captain Marvel 4 Roman
Godzilla:King of the Monsters 4 Nick
Godzilla:King of the Monsters 3 Ivan
Godzilla:King of the Monsters 3 Roman
Godzilla:King of the Monsters 5 Victoria
Godzilla:King of the Monsters 3 Arianna
Rambo: Last Blood 5 Arianna
Rambo: Last Blood 3 Ivan
Rambo: Last Blood 2 Nick
Rambo: Last Blood 1 Victoria
Rambo: Last Blood 4 Roman
Terminator: Dark Fate 2 Roman
Terminator: Dark Fate 5 Nick
Terminator: Dark Fate 3 Arianna
Terminator: Dark Fate 5 Ivan
Terminator: Dark Fate 3 Victoria
X-Men: Dark Phenix 5 Roman
X-Men: Dark Phenix 2 Victoria
X-Men: Dark Phenix 2 Ivan
X-Men: Dark Phenix 4 Arianna
X-Men: Dark Phenix 4 Nick

Let’s see the sum of ratings for each movie.

sql2<-dbGetQuery(conn,'
  SELECT a.movie_name, Sum (r.rating) 
                    FROM sandbox.movie a
                     LEFT JOIN sandbox.rate r on a.movie_id = r.movie_id
                     GROUP BY a.movie_name')

colnames(sql2) <-c("Movie", "Rating")
a<-sql2
a
##                           Movie Rating
## 1 Godzilla:King of the Monsters     18
## 2                Captain Marvel     22
## 3              Avengers:Endgame     19
## 4         Terminator: Dark Fate     18
## 5            X-Men: Dark Phenix     17
## 6             Rambo: Last Blood     15
ggplot(a, aes(x = Rating))+
  geom_bar(aes(fill = Movie))

From the data visualisation we can conclude that the ratings for movie “Terminator: Dark Fate” and “Godzilla:King of the Monsters” are the same. The lowest ratings is for “Rambo: Last Blood”" and the highest is for movie “Captain: MArvel”.