R Markdown, sql, and CSV files needed to create the database and this html document can be found here: https://github.com/rweberc/Data607_Assignment2
library(dplyr)
library(RMySQL)
library(ggplot2)
password <- read.table("C:/Users/weberr1/Desktop/CUNY/DATA 607/Assignment/password.csv", stringsAsFactors = FALSE)
mydb = dbConnect(MySQL(), user='rweberc', password=password$V1[1], dbname='reviews', host='localhost')Load movie data:
moviesDf <- dbGetQuery(mydb, "SELECT * FROM movies") %>%
mutate(Director = gsub("\r", "", Director)) # remove return character
moviesDf## MovieId Title Director
## 1 1 The Big Sick Michael Showalter
## 2 2 Get Out Jordan Peele
## 3 3 Lady Bird Greta Gerwig
## 4 4 The Shape of Water Guillermo del Toro
## 5 5 Phantom Thread Paul Thomas Anderson
## 6 6 Call Me by Your Name Luca Guadagnino
Load critic data:
criticsDf <- dbGetQuery(mydb, "SELECT * FROM critics") %>%
mutate(LastName = gsub("\r", "", LastName))
criticsDf ## CriticId FirstName LastName
## 1 1 Linda Weber
## 2 2 Fred Weber
## 3 3 Lara Diaz
## 4 4 Henry Steele
## 5 5 Daniel Rhodes
Load review data:
reviewsDf <- dbGetQuery(mydb, "SELECT * FROM reviews")
head(reviewsDf)## ReviewId MovieId CriticId Rating
## 1 1 1 1 5
## 2 2 1 2 3
## 3 3 1 3 4
## 4 4 1 4 1
## 5 5 1 5 2
## 6 6 2 1 5
mergeDf <- left_join(reviewsDf, criticsDf, by=c("CriticId"))
mergeDf <- left_join(mergeDf, moviesDf, by=c("MovieId"))
mergeDf <- mergeDf %>% select(Title, FirstName, LastName, Rating)