Connect to the movie database and load into a dataframe

Required packages:

RMySQL ggplot2 stringr pander

library(RMySQL)
## Loading required package: DBI
library(ggplot2)
library(stringr)
library(pander)
con <- dbConnect(MySQL(),user = 'movie_guest', password = 'datascience',  dbname = 'movies', host = 'localhost') ###This uses the guest user created by the SQL script with read only access
query <- ' select
              user_name,
              movie_name,
              rating
        from user_rating ra
          left join users u
            on ra.user_id = u.id
          left join movie_names m
            on ra.movie_id = m.id
        order by 1,2'
movie_df <- dbGetQuery(con, query)

Rename the columns and inspect the dataframe

colnames(movie_df) <- c("Name", "Title", "Rating")
class(movie_df) ###we see the object is the correct type
## [1] "data.frame"
pander(head(movie_df))
Name Title Rating
Andrew B Cainso Royal 4
Andrew B Superbad 3
Andrew B The Dark Knight 4
Andrew B The Wolf of Wall Street 4
Andrew B Toy Story 5
Andrew B Wonder Woman 3
###Titles need to be shortened to show properly in the plot
shorten <- function (data){
  str_replace(data, 'The', '')
}
movie_df$title_short <- sapply(movie_df$Title, shorten)

Produce ratings plot

avg_ratings <- aggregate(Rating ~ title_short, movie_df, mean)
ggplot(avg_ratings) + geom_bar(aes(x = title_short, y = Rating, fill = title_short), stat = 'identity') + 
  labs(title = "Average Ratings", x = "Title", y = "Rating", fill = "Title")