Grando 2 Homework
# This is a standard setup I include so that my working
# directory is set correctly whether I work on one of my
# windows or linux machines.
if (Sys.info()["sysname"] == "Windows") {
setwd("~/Masters/DATA607/Week2/Assignment")
} else {
setwd("~/Documents/Masters/DATA607/Week2/Assignment")
}
Show how the .csv files can be downloaded
# I'm just showing how I can load the pre-exported csv files
# since I will be loading the database files directly later
# in the assignment.
surveyee_from_csv <- read.csv(file = "./surveyees.csv", header = FALSE,
sep = ",", stringsAsFactors = FALSE)
head(surveyee_from_csv)
## V1 V2 V3
## 1 1 John Grando
## 2 2 Maggie Grando
## 3 3 Lucy Grando
## 4 4 Jack Grando
## 5 5 Mike Carey
## 6 6 Keeva Carey
surveys_from_csv <- read.csv(file = "./surveys.csv", header = FALSE,
sep = ",", stringsAsFactors = FALSE)
head(surveys_from_csv)
## V1 V2 V3 V4
## 1 1 1 1 5
## 2 2 1 2 5
## 3 3 1 3 4
## 4 4 1 4 2
## 5 5 1 5 5
## 6 6 1 6 5
movies_from_csv <- read.csv(file = "./movies.csv", header = FALSE,
sep = ",", stringsAsFactors = FALSE)
head(movies_from_csv)
## V1 V2
## 1 1 The Little Mermaid
## 2 2 Tangled
## 3 3 Beauty And The Beast
## 4 4 Cinderella
## 5 5 Moana
## 6 6 Hercules
Load the Individual Tables To R Directly From The Database
library(RMySQL)
## Loading required package: DBI
mysqldriver <- dbDriver("MySQL")
db = dbConnect(mysqldriver, user = "data607", password = "password",
dbname = "Movies", host = "127.0.0.1")
surveyee_db <- dbGetQuery(db, "SELECT * FROM surveyees")
head(surveyee_db)
## surveyeeid firstname lastname
## 1 1 John Grando
## 2 2 Maggie Grando
## 3 3 Lucy Grando
## 4 4 Jack Grando
## 5 5 Mike Carey
## 6 6 Keeva Carey
movies_db <- dbGetQuery(db, "SELECT * FROM movies")
head(movies_db)
## movieid title
## 1 1 The Little Mermaid
## 2 2 Tangled
## 3 3 Beauty And The Beast
## 4 4 Cinderella
## 5 5 Moana
## 6 6 Hercules
surveys_db <- dbGetQuery(db, "SELECT * FROM surveys")
head(surveys_db)
## surveyid surveyeeid movieid score
## 1 1 1 1 5
## 2 2 1 2 5
## 3 3 1 3 4
## 4 4 1 4 2
## 5 5 1 5 5
## 6 6 1 6 5
Add a movie and surveys on the movie to show we can edit data from R.
dbGetQuery(db, "INSERT INTO movies (title) VALUES ('Up')")
## data frame with 0 columns and 0 rows
movies_db <- dbGetQuery(db, "SELECT * FROM movies")
print(movies_db)
## movieid title
## 1 1 The Little Mermaid
## 2 2 Tangled
## 3 3 Beauty And The Beast
## 4 4 Cinderella
## 5 5 Moana
## 6 6 Hercules
## 7 7 Up
## 8 8 Up
# Run this to delete the extra row if more than one is added
# during testing
delete_rows <- nrow(movies_db) - 7
if (nrow(movies_db) > 7) {
dbGetQuery(db, sprintf("DELETE FROM movies order by movieid desc limit %s",
delete_rows))
}
## data frame with 0 columns and 0 rows
movies_db <- dbGetQuery(db, "SELECT * FROM movies")
print(movies_db)
## movieid title
## 1 1 The Little Mermaid
## 2 2 Tangled
## 3 3 Beauty And The Beast
## 4 4 Cinderella
## 5 5 Moana
## 6 6 Hercules
## 7 7 Up
# Add survey data
dbGetQuery(db, "INSERT INTO surveys (surveyeeid, movieid, score) VALUES (1, 7, 4)")
## data frame with 0 columns and 0 rows
surveys_db <- dbGetQuery(db, "SELECT * FROM surveys")
tail(surveys_db)
## surveyid surveyeeid movieid score
## 33 33 6 3 3
## 34 34 6 4 3
## 35 35 6 5 3
## 36 36 6 6 3
## 37 37 1 7 4
## 38 38 1 7 4
delete_rows <- nrow(surveys_db) - 37
if (nrow(surveys_db) > 37) {
dbGetQuery(db, sprintf("DELETE FROM surveys ORDER BY surveyid DESC LIMIT %s",
delete_rows))
}
## data frame with 0 columns and 0 rows
surveys_db <- dbGetQuery(db, "SELECT * FROM surveys")
tail(surveys_db)
## surveyid surveyeeid movieid score
## 32 32 6 2 3
## 33 33 6 3 3
## 34 34 6 4 3
## 35 35 6 5 3
## 36 36 6 6 3
## 37 37 1 7 4
Load The Tables Using Advanced Queries
# I find that, for the purposes of SQL, the best way to
# demonstrate knowledge of the language is to chain multiple
# commands into a single query. Therefore, I have created a
# full table that provides user-friendly information by
# joining multiple tables.
full_table <- dbGetQuery(db, "SELECT sye.firstname AS first_name, sye.lastname AS last_name, mv.title as Movie_Title, sy.score as Score FROM surveys AS sy JOIN surveyees AS sye ON sy.surveyeeid=sye.surveyeeid JOIN movies AS mv ON sy.movieid=mv.movieid")
head(full_table)
## first_name last_name Movie_Title Score
## 1 John Grando The Little Mermaid 5
## 2 John Grando Tangled 5
## 3 John Grando Beauty And The Beast 4
## 4 John Grando Cinderella 2
## 5 John Grando Moana 5
## 6 John Grando Hercules 5
# Also, I have created a summary table which returns the
# average survey results.
summary_table <- dbGetQuery(db, "SELECT mv.title as Movie_Title, AVG(sy.score) as AVG_Score FROM surveys as sy JOIN movies AS mv ON sy.movieid=mv.movieid GROUP BY mv.title ORDER BY AVG_Score DESC")
## Warning in .local(conn, statement, ...): Decimal MySQL column 1 imported as
## numeric
print(summary_table)
## Movie_Title AVG_Score
## 1 Up 4.0000
## 2 Moana 3.5000
## 3 Beauty And The Beast 3.3333
## 4 Tangled 3.3333
## 5 The Little Mermaid 3.1667
## 6 Hercules 3.0000
## 7 Cinderella 2.3333