First, we create a database named boxoffice that has 3 tables: -movie_goers: ID, first_name, last_name, address, revenue -movie: movie_ID, title, genre -ratings: ID, movie_ID, ratings,comments
we upload data for all the tables.See script in github for sql statements.
library(RMySQL)
## Loading required package: DBI
library(DBI)
condb= dbConnect(MySQL(), user='root', password='Clorox1!', dbname='boxofiice')
#user table
user<-dbGetQuery(condb, "select * from movie_goers")
## Warning in .local(conn, statement, ...): Decimal MySQL column 4 imported as
## numeric
#movies table
movies<-dbGetQuery(condb, "select * from movies")
#ratings table
ratings<-dbGetQuery(condb, "select * from ratings")
#Join ratings and movies
movies_ratings<-dbGetQuery(condb, "select a.movie_ID, a.title, b.ratings from movies a
join ratings b on a.movie_ID=b.movie_ID")
#Create data frame
mrating_df<-as.data.frame(movies_ratings)
#get first data from data frame
head(mrating_df)
#calculating the highest and the lowest rating
queries<-"select a.title, min(ratings) as lowest, max(ratings) as highest from movies a join ratings b on a.movie_ID =b.movie_ID group by title order by title"
hl_ratings<-dbGetQuery(condb,queries)
head(hl_ratings)
#Calculate the average
q<-"select a.title, avg(ratings) as average_ratings from movies a join
ratings b on a.movie_ID=b.movie_ID group by title order by title"
avg_ratings<-dbGetQuery(condb, q)
## Warning in .local(conn, statement, ...): Decimal MySQL column 1 imported as
## numeric
head(avg_ratings)