I could not find a way to make 100% reproducible mainly due to username/password. My solution was to create a super-user in MySQL who does not have a password.
#install.packages("RMySQL")
#install.packages("dplyr")
library(RMySQL)
## Loading required package: DBI
library(DBI)
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
db_user <- 'newuser'
db_password <- NULL
db_name <- NULL
db_host <- NULL
db_port <- 3306
drv<-dbDriver("MySQL")
movies <- dbConnect(drv, user = db_user, password = db_password, dbname = db_name, host = db_host, port = db_port)
Database Movie_ratings is created
dbSendQuery(movies, "CREATE DATABASE movie_ratings;")
## <MySQLResult:-217778552,0,0>
dbSendQuery(movies, "USE movie_ratings")
## <MySQLResult:-217778552,0,1>
Table ratings is created
query<-"CREATE TABLE ratings (Friends TEXT, Movie TEXT, Rating TEXT);"
#Send the query to MySQL for execution
results <- dbSendQuery(movies, query)
dbClearResult(results)
## [1] TRUE
query <- "INSERT INTO
ratings(Friends, Movie, Rating)
VALUES
('Friend1','Avengers: Endgame','5'),
('Friend1','The Lion King','10'),
('Friend1','Star Wars: The Rise of Skywalker',NULL),
('Friend1','Joker',NULL),
('Friend1','Spider-Man: Far From Home',NULL),
('Friend1','Jumanji: The Next Level',NULL),
('Friend2','Avengers: Endgame',NULL),
('Friend2','The Lion King',NULL),
('Friend2','Star Wars: The Rise of Skywalker','8'),
('Friend2','Joker','8'),
('Friend2','Spider-Man: Far From Home','9'),
('Friend2','Jumanji: The Next Level',NULL),
('Friend3','Avengers: Endgame','10'),
('Friend3','The Lion King',NULL),
('Friend3','Star Wars: The Rise of Skywalker',NULL),
('Friend3','Joker',NULL),
('Friend3','Spider-Man: Far From Home',NULL),
('Friend3','Jumanji: The Next Level',NULL),
('Friend4','Avengers: Endgame','5'),
('Friend4','The Lion King','7'),
('Friend4','Star Wars: The Rise of Skywalker','9'),
('Friend4','Joker','9'),
('Friend4','Spider-Man: Far From Home','4'),
('Friend4','Jumanji: The Next Level','6'),
('Friend5','Avengers: Endgame',NULL),
('Friend5','The Lion King',NULL),
('Friend5','Star Wars: The Rise of Skywalker',NULL),
('Friend5','Joker',NULL),
('Friend5','Spider-Man: Far From Home','5'),
('Friend5','Jumanji: The Next Level','9');"
results <- dbSendQuery(movies, query)
dbClearResult(results)
## [1] TRUE
rs = dbSendQuery(movies, "select * from ratings")
datar = fetch(rs, n=-1)
dbClearResult(rs)
## [1] TRUE
head(datar)
## Friends Movie Rating
## 1 Friend1 Avengers: Endgame 5
## 2 Friend1 The Lion King 10
## 3 Friend1 Star Wars: The Rise of Skywalker <NA>
## 4 Friend1 Joker <NA>
## 5 Friend1 Spider-Man: Far From Home <NA>
## 6 Friend1 Jumanji: The Next Level <NA>
You also can find the sum and the percentage of missings in your dataset with the code below:
Number_of_missing_ratings<-sum(is.na(datar))
Pct_missing_ratings<-mean(is.na(datar))
Number_of_missing_ratings
## [1] 16
Pct_missing_ratings
## [1] 0.1777778
na.omit(datar)
## Friends Movie Rating
## 1 Friend1 Avengers: Endgame 5
## 2 Friend1 The Lion King 10
## 9 Friend2 Star Wars: The Rise of Skywalker 8
## 10 Friend2 Joker 8
## 11 Friend2 Spider-Man: Far From Home 9
## 13 Friend3 Avengers: Endgame 10
## 19 Friend4 Avengers: Endgame 5
## 20 Friend4 The Lion King 7
## 21 Friend4 Star Wars: The Rise of Skywalker 9
## 22 Friend4 Joker 9
## 23 Friend4 Spider-Man: Far From Home 4
## 24 Friend4 Jumanji: The Next Level 6
## 29 Friend5 Spider-Man: Far From Home 5
## 30 Friend5 Jumanji: The Next Level 9