Using Knit with Parameters…
for DB password
params: dbPwd: enter password


Package Installs (optional)

install.packages(“RMySQL”);
install.packages(“ggplot2”);

Libraries

library(RCurl);
library(DBI);
library(RMySQL);
library(ggplot2);

Getting MySQL Driver and Connection

dbDrv <- dbDriver("MySQL");
con <- dbConnect(dbDrv, user="root", password=params$dbPwd, host="localhost", port=3306, dbname="movies");

Using DBI Functions

dbListTables(con);
## [1] "movie"  "person" "rating"
dbReadTable(con, "movie");
##   id                                            title
## 1 10                                Blade Runner 2049
## 2 14                                          Dunkirk
## 3 11                   Jumanji: Welcome to the Jungle
## 4 15 Pirates of the Caribbean: Dead Men Tell No Tales
## 5 13                         Star Wars: The Last Jedi
## 6 12                                           Wonder
sql <- "SELECT 
     p.name, m.title, IFNULL(r.rank, 'NA') AS rank
 FROM
 person p
 LEFT JOIN
 rating r ON p.id = r.person_id
 INNER JOIN
 movie m ON r.movie_id = m.id;";
res <- dbGetQuery(con, sql);
knitr::kable(
  head(res),
  caption = "dbGetQuery result set [head]"
)
dbGetQuery result set [head]
name title rank
Benjamin Blade Runner 2049 4
Benjamin Dunkirk 2
Benjamin Jumanji: Welcome to the Jungle 5
Benjamin Pirates of the Caribbean: Dead Men Tell No Tales 5
Benjamin Star Wars: The Last Jedi 3
Benjamin Wonder 4

Using {sql} markdown to execute SQL

SELECT 
    p.name, m.title, IFNULL(r.rank, 'NA') AS rank
FROM
    person p
        LEFT JOIN
    rating r ON p.id = r.person_id
        INNER JOIN
    movie m ON r.movie_id = m.id;
Same result as dbGetQuery [showing first 10 rows]
name title rank
Benjamin Blade Runner 2049 4
Benjamin Dunkirk 2
Benjamin Jumanji: Welcome to the Jungle 5
Benjamin Pirates of the Caribbean: Dead Men Tell No Tales 5
Benjamin Star Wars: The Last Jedi 3
Benjamin Wonder 4
Gabriella Blade Runner 2049 5
Gabriella Dunkirk 4
Gabriella Jumanji: Welcome to the Jungle NA
Gabriella Pirates of the Caribbean: Dead Men Tell No Tales 3

Plotting a Bar chart

Note: SQL output.var=“res”

SELECT 
    m.id movie_id, AVG(r.rank) AS avg_rank
FROM
    rating r 
        INNER JOIN
    movie m ON r.movie_id = m.id
GROUP BY m.title
ORDER BY AVG(r.rank) DESC;
barplot(height=res$avg_rank, names.arg = res$movie_id, xlab = "Movie ID", main = "Movie Ratings")
X Axis Legend
Movie ID Movie Title
10 Blade Runner 2049
11 Jumanji: Welcome to the Jungle
12 Wonder
13 Star Wars: The Last Jedi
14 Dunkirk
15 Pirates of the Caribbean: Dead Men Tell No Tales

Close DB Connection

dbDisconnect(con)
## [1] TRUE

Using {sql, eval=FALSE} markdown to only display SQL (with syntax hightlighting)

Below is the scipt for setting up “movies” database

/* These statements should be executed only once */
CREATE DATABASE movies;
USE movies;
/*************************************************/

DROP TABLE IF EXISTS person;
DROP TABLE IF EXISTS movie;
DROP TABLE IF EXISTS rating;

CREATE TABLE person (
    id INTEGER NOT NULL PRIMARY KEY,
    name VARCHAR(30) NOT NULL UNIQUE
);

LOAD DATA INFILE 'C:/ProgramData/MySQL/MySQL Server 5.7/Uploads/movies/person.csv' 
INTO TABLE person
FIELDS TERMINATED BY ',' 
OPTIONALLY ENCLOSED BY '"'
LINES TERMINATED BY '\r\n'
IGNORE 1 ROWS;

CREATE TABLE movie (
    id INTEGER NOT NULL PRIMARY KEY,
    title VARCHAR(60) NOT NULL UNIQUE KEY
);

LOAD DATA INFILE 'C:/ProgramData/MySQL/MySQL Server 5.7/Uploads/movies/movie.csv' 
INTO TABLE movie
FIELDS TERMINATED BY ',' 
OPTIONALLY ENCLOSED BY '"'
LINES TERMINATED BY '\r\n'
IGNORE 1 ROWS;

CREATE TABLE rating (
    person_id INTEGER NOT NULL REFERENCES person (id),
    movie_id INTEGER NOT NULL REFERENCES movie (id),
    rank INTEGER COMMENT 'rating score of each movie by a person on a scale of 1 to 5',
    CHECK (IFNULL(rank, 1) BETWEEN 1 AND 5),
    CONSTRAINT PRIMARY KEY (person_id , movie_id),
    INDEX (person_id , movie_id) COMMENT 'good practice to define an index with first column being a reference to another table (FK)',
    INDEX (movie_id , person_id) COMMENT 'index on a foreign key (FK) column'
);

LOAD DATA INFILE 'C:/ProgramData/MySQL/MySQL Server 5.7/Uploads/movies/rating.csv' 
INTO TABLE rating
FIELDS TERMINATED BY ',' 
OPTIONALLY ENCLOSED BY '"'
LINES TERMINATED BY '\r\n'
IGNORE 1 ROWS
(person_id, movie_id, @rank)
SET rank = nullif(@rank, '');