Using Knit with Parameters…
for DB password
params: dbPwd: enter password
install.packages(“RMySQL”);
install.packages(“ggplot2”);
library(RCurl);
library(DBI);
library(RMySQL);
library(ggplot2);
dbDrv <- dbDriver("MySQL");
con <- dbConnect(dbDrv, user="root", password=params$dbPwd, host="localhost", port=3306, dbname="movies");
dbListTables(con);
## [1] "movie" "person" "rating"
dbReadTable(con, "movie");
## id title
## 1 10 Blade Runner 2049
## 2 14 Dunkirk
## 3 11 Jumanji: Welcome to the Jungle
## 4 15 Pirates of the Caribbean: Dead Men Tell No Tales
## 5 13 Star Wars: The Last Jedi
## 6 12 Wonder
sql <- "SELECT
p.name, m.title, IFNULL(r.rank, 'NA') AS rank
FROM
person p
LEFT JOIN
rating r ON p.id = r.person_id
INNER JOIN
movie m ON r.movie_id = m.id;";
res <- dbGetQuery(con, sql);
knitr::kable(
head(res),
caption = "dbGetQuery result set [head]"
)
| name | title | rank |
|---|---|---|
| Benjamin | Blade Runner 2049 | 4 |
| Benjamin | Dunkirk | 2 |
| Benjamin | Jumanji: Welcome to the Jungle | 5 |
| Benjamin | Pirates of the Caribbean: Dead Men Tell No Tales | 5 |
| Benjamin | Star Wars: The Last Jedi | 3 |
| Benjamin | Wonder | 4 |
SELECT
p.name, m.title, IFNULL(r.rank, 'NA') AS rank
FROM
person p
LEFT JOIN
rating r ON p.id = r.person_id
INNER JOIN
movie m ON r.movie_id = m.id;
| name | title | rank |
|---|---|---|
| Benjamin | Blade Runner 2049 | 4 |
| Benjamin | Dunkirk | 2 |
| Benjamin | Jumanji: Welcome to the Jungle | 5 |
| Benjamin | Pirates of the Caribbean: Dead Men Tell No Tales | 5 |
| Benjamin | Star Wars: The Last Jedi | 3 |
| Benjamin | Wonder | 4 |
| Gabriella | Blade Runner 2049 | 5 |
| Gabriella | Dunkirk | 4 |
| Gabriella | Jumanji: Welcome to the Jungle | NA |
| Gabriella | Pirates of the Caribbean: Dead Men Tell No Tales | 3 |
Note: SQL output.var=“res”
SELECT
m.id movie_id, AVG(r.rank) AS avg_rank
FROM
rating r
INNER JOIN
movie m ON r.movie_id = m.id
GROUP BY m.title
ORDER BY AVG(r.rank) DESC;
barplot(height=res$avg_rank, names.arg = res$movie_id, xlab = "Movie ID", main = "Movie Ratings")
| Movie ID | Movie Title |
|---|---|
| 10 | Blade Runner 2049 |
| 11 | Jumanji: Welcome to the Jungle |
| 12 | Wonder |
| 13 | Star Wars: The Last Jedi |
| 14 | Dunkirk |
| 15 | Pirates of the Caribbean: Dead Men Tell No Tales |
dbDisconnect(con)
## [1] TRUE
Below is the scipt for setting up “movies” database
/* These statements should be executed only once */
CREATE DATABASE movies;
USE movies;
/*************************************************/
DROP TABLE IF EXISTS person;
DROP TABLE IF EXISTS movie;
DROP TABLE IF EXISTS rating;
CREATE TABLE person (
id INTEGER NOT NULL PRIMARY KEY,
name VARCHAR(30) NOT NULL UNIQUE
);
LOAD DATA INFILE 'C:/ProgramData/MySQL/MySQL Server 5.7/Uploads/movies/person.csv'
INTO TABLE person
FIELDS TERMINATED BY ','
OPTIONALLY ENCLOSED BY '"'
LINES TERMINATED BY '\r\n'
IGNORE 1 ROWS;
CREATE TABLE movie (
id INTEGER NOT NULL PRIMARY KEY,
title VARCHAR(60) NOT NULL UNIQUE KEY
);
LOAD DATA INFILE 'C:/ProgramData/MySQL/MySQL Server 5.7/Uploads/movies/movie.csv'
INTO TABLE movie
FIELDS TERMINATED BY ','
OPTIONALLY ENCLOSED BY '"'
LINES TERMINATED BY '\r\n'
IGNORE 1 ROWS;
CREATE TABLE rating (
person_id INTEGER NOT NULL REFERENCES person (id),
movie_id INTEGER NOT NULL REFERENCES movie (id),
rank INTEGER COMMENT 'rating score of each movie by a person on a scale of 1 to 5',
CHECK (IFNULL(rank, 1) BETWEEN 1 AND 5),
CONSTRAINT PRIMARY KEY (person_id , movie_id),
INDEX (person_id , movie_id) COMMENT 'good practice to define an index with first column being a reference to another table (FK)',
INDEX (movie_id , person_id) COMMENT 'index on a foreign key (FK) column'
);
LOAD DATA INFILE 'C:/ProgramData/MySQL/MySQL Server 5.7/Uploads/movies/rating.csv'
INTO TABLE rating
FIELDS TERMINATED BY ','
OPTIONALLY ENCLOSED BY '"'
LINES TERMINATED BY '\r\n'
IGNORE 1 ROWS
(person_id, movie_id, @rank)
SET rank = nullif(@rank, '');