Grando 2 Homework

# This is a standard setup I include so that my working
# directory is set correctly whether I work on one of my
# windows or linux machines.
if (Sys.info()["sysname"] == "Windows") {
    setwd("~/Masters/DATA607/Week2/Assignment")
} else {
    setwd("~/Documents/Masters/DATA607/Week2/Assignment")
}

Show how the .csv files can be downloaded

# I'm just showing how I can load the pre-exported csv files
# since I will be loading the database files directly later
# in the assignment.
surveyee_from_csv <- read.csv(file = "./surveyees.csv", header = FALSE, 
    sep = ",", stringsAsFactors = FALSE)
head(surveyee_from_csv)
##   V1     V2     V3
## 1  1   John Grando
## 2  2 Maggie Grando
## 3  3   Lucy Grando
## 4  4   Jack Grando
## 5  5   Mike  Carey
## 6  6  Keeva  Carey
surveys_from_csv <- read.csv(file = "./surveys.csv", header = FALSE, 
    sep = ",", stringsAsFactors = FALSE)
head(surveys_from_csv)
##   V1 V2 V3 V4
## 1  1  1  1  5
## 2  2  1  2  5
## 3  3  1  3  4
## 4  4  1  4  2
## 5  5  1  5  5
## 6  6  1  6  5
movies_from_csv <- read.csv(file = "./movies.csv", header = FALSE, 
    sep = ",", stringsAsFactors = FALSE)
head(movies_from_csv)
##   V1                   V2
## 1  1   The Little Mermaid
## 2  2              Tangled
## 3  3 Beauty And The Beast
## 4  4           Cinderella
## 5  5                Moana
## 6  6             Hercules

Load the Individual Tables To R Directly From The Database

library(RMySQL)
## Loading required package: DBI
mysqldriver <- dbDriver("MySQL")
db = dbConnect(mysqldriver, user = "data607", password = "password", 
    dbname = "Movies", host = "127.0.0.1")
surveyee_db <- dbGetQuery(db, "SELECT * FROM surveyees")
head(surveyee_db)
##   surveyeeid firstname lastname
## 1          1      John   Grando
## 2          2    Maggie   Grando
## 3          3      Lucy   Grando
## 4          4      Jack   Grando
## 5          5      Mike    Carey
## 6          6     Keeva    Carey
movies_db <- dbGetQuery(db, "SELECT * FROM movies")
head(movies_db)
##   movieid                title
## 1       1   The Little Mermaid
## 2       2              Tangled
## 3       3 Beauty And The Beast
## 4       4           Cinderella
## 5       5                Moana
## 6       6             Hercules
surveys_db <- dbGetQuery(db, "SELECT * FROM surveys")
head(surveys_db)
##   surveyid surveyeeid movieid score
## 1        1          1       1     5
## 2        2          1       2     5
## 3        3          1       3     4
## 4        4          1       4     2
## 5        5          1       5     5
## 6        6          1       6     5

Add a movie and surveys on the movie to show we can edit data from R.

dbGetQuery(db, "INSERT INTO movies (title) VALUES ('Up')")
## data frame with 0 columns and 0 rows
movies_db <- dbGetQuery(db, "SELECT * FROM movies")
print(movies_db)
##   movieid                title
## 1       1   The Little Mermaid
## 2       2              Tangled
## 3       3 Beauty And The Beast
## 4       4           Cinderella
## 5       5                Moana
## 6       6             Hercules
## 7       7                   Up
## 8       8                   Up
# Run this to delete the extra row if more than one is added
# during testing
delete_rows <- nrow(movies_db) - 7
if (nrow(movies_db) > 7) {
    dbGetQuery(db, sprintf("DELETE FROM movies order by movieid desc limit %s", 
        delete_rows))
}
## data frame with 0 columns and 0 rows
movies_db <- dbGetQuery(db, "SELECT * FROM movies")
print(movies_db)
##   movieid                title
## 1       1   The Little Mermaid
## 2       2              Tangled
## 3       3 Beauty And The Beast
## 4       4           Cinderella
## 5       5                Moana
## 6       6             Hercules
## 7       7                   Up
# Add survey data
dbGetQuery(db, "INSERT INTO surveys (surveyeeid, movieid, score) VALUES (1, 7, 4)")
## data frame with 0 columns and 0 rows
surveys_db <- dbGetQuery(db, "SELECT * FROM surveys")
tail(surveys_db)
##    surveyid surveyeeid movieid score
## 33       33          6       3     3
## 34       34          6       4     3
## 35       35          6       5     3
## 36       36          6       6     3
## 37       37          1       7     4
## 38       38          1       7     4
delete_rows <- nrow(surveys_db) - 37
if (nrow(surveys_db) > 37) {
    dbGetQuery(db, sprintf("DELETE FROM surveys ORDER BY surveyid DESC LIMIT %s", 
        delete_rows))
}
## data frame with 0 columns and 0 rows
surveys_db <- dbGetQuery(db, "SELECT * FROM surveys")
tail(surveys_db)
##    surveyid surveyeeid movieid score
## 32       32          6       2     3
## 33       33          6       3     3
## 34       34          6       4     3
## 35       35          6       5     3
## 36       36          6       6     3
## 37       37          1       7     4

Load The Tables Using Advanced Queries

# I find that, for the purposes of SQL, the best way to
# demonstrate knowledge of the language is to chain multiple
# commands into a single query.  Therefore, I have created a
# full table that provides user-friendly information by
# joining multiple tables.
full_table <- dbGetQuery(db, "SELECT sye.firstname AS first_name, sye.lastname AS last_name, mv.title as Movie_Title, sy.score as Score FROM surveys AS sy JOIN surveyees AS sye ON sy.surveyeeid=sye.surveyeeid JOIN movies AS mv ON sy.movieid=mv.movieid")
head(full_table)
##   first_name last_name          Movie_Title Score
## 1       John    Grando   The Little Mermaid     5
## 2       John    Grando              Tangled     5
## 3       John    Grando Beauty And The Beast     4
## 4       John    Grando           Cinderella     2
## 5       John    Grando                Moana     5
## 6       John    Grando             Hercules     5
# Also, I have created a summary table which returns the
# average survey results.
summary_table <- dbGetQuery(db, "SELECT mv.title as Movie_Title, AVG(sy.score) as AVG_Score FROM surveys as sy JOIN movies AS mv ON sy.movieid=mv.movieid GROUP BY mv.title ORDER BY AVG_Score DESC")
## Warning in .local(conn, statement, ...): Decimal MySQL column 1 imported as
## numeric
print(summary_table)
##            Movie_Title AVG_Score
## 1                   Up    4.0000
## 2                Moana    3.5000
## 3 Beauty And The Beast    3.3333
## 4              Tangled    3.3333
## 5   The Little Mermaid    3.1667
## 6             Hercules    3.0000
## 7           Cinderella    2.3333

Data Plots

require(ggplot2)
## Loading required package: ggplot2
ggplot(data = summary_table, aes(x = Movie_Title, y = AVG_Score)) + 
    geom_bar(stat = "identity") + coord_flip()