Database Creation

First, we create a database named boxoffice that has 3 tables: -movie_goers: ID, first_name, last_name, address, revenue -movie: movie_ID, title, genre -ratings: ID, movie_ID, ratings,comments

we upload data for all the tables.See script in github for sql statements.

Upload libraries

library(RMySQL)
## Loading required package: DBI
library(DBI)

Connecting to the database locally

condb= dbConnect(MySQL(), user='root', password='Clorox1!', dbname='boxofiice')

Query table and create data frame

#user table
user<-dbGetQuery(condb, "select * from movie_goers")
## Warning in .local(conn, statement, ...): Decimal MySQL column 4 imported as
## numeric
#movies table
movies<-dbGetQuery(condb, "select * from movies")

#ratings table
ratings<-dbGetQuery(condb, "select * from ratings")

#Join ratings and movies
movies_ratings<-dbGetQuery(condb, "select a.movie_ID, a.title, b.ratings from movies a
                           join ratings b on a.movie_ID=b.movie_ID")

#Create data frame
mrating_df<-as.data.frame(movies_ratings)

#get first data from data frame
head(mrating_df)

Data Manipulation and summary

#calculating the highest and the lowest rating

queries<-"select a.title, min(ratings) as lowest, max(ratings) as highest from movies a join ratings b on a.movie_ID =b.movie_ID group by title order by title"
hl_ratings<-dbGetQuery(condb,queries)

head(hl_ratings)
#Calculate the average
q<-"select a.title, avg(ratings) as average_ratings from movies a join
ratings b on a.movie_ID=b.movie_ID group by title order by title"

avg_ratings<-dbGetQuery(condb, q)
## Warning in .local(conn, statement, ...): Decimal MySQL column 1 imported as
## numeric
head(avg_ratings)