RMySQL ggplot2 stringr pander
library(RMySQL)
## Loading required package: DBI
library(ggplot2)
library(stringr)
library(pander)
con <- dbConnect(MySQL(),user = 'movie_guest', password = 'datascience', dbname = 'movies', host = 'localhost') ###This uses the guest user created by the SQL script with read only access
query <- ' select
user_name,
movie_name,
rating
from user_rating ra
left join users u
on ra.user_id = u.id
left join movie_names m
on ra.movie_id = m.id
order by 1,2'
movie_df <- dbGetQuery(con, query)
colnames(movie_df) <- c("Name", "Title", "Rating")
class(movie_df) ###we see the object is the correct type
## [1] "data.frame"
pander(head(movie_df))
| Name | Title | Rating |
|---|---|---|
| Andrew B | Cainso Royal | 4 |
| Andrew B | Superbad | 3 |
| Andrew B | The Dark Knight | 4 |
| Andrew B | The Wolf of Wall Street | 4 |
| Andrew B | Toy Story | 5 |
| Andrew B | Wonder Woman | 3 |
###Titles need to be shortened to show properly in the plot
shorten <- function (data){
str_replace(data, 'The', '')
}
movie_df$title_short <- sapply(movie_df$Title, shorten)
avg_ratings <- aggregate(Rating ~ title_short, movie_df, mean)
ggplot(avg_ratings) + geom_bar(aes(x = title_short, y = Rating, fill = title_short), stat = 'identity') +
labs(title = "Average Ratings", x = "Title", y = "Rating", fill = "Title")