#clean up the environment
rm(list=ls())

#install and load the rio package for import
#install.packages(rio)
library(rio)
library(bitops)
library(RCurl)
library(ggplot2)

#Link to the data in github
#movies <- import("https://raw.githubusercontent.com/excelsiordata/DATA607/master/MoviesAndReviews.csv")

#Link to the data locally for full reproducibility without a manual step in between (loading .csv to github)
movies <- import("C:/ProgramData/MySQL/MySQL Server 5.7/Uploads/MoviesAndReviews.csv", stringsAsFactors = FALSE)
#Take a peek at the data
head(movies)
##   Movie ID Movie Title Movie Length (in minutes)
## 1        1    Zootopia                       108
## 2        1    Zootopia                       108
## 3        1    Zootopia                       108
## 4        1    Zootopia                       108
## 5        1    Zootopia                       108
## 6        2   Moonlight                       110
##                       Rotten Tomatoes Link Reviewer Name       Rating
## 1 https://www.rottentomatoes.com/m/zootopi          Noah            5
## 2 https://www.rottentomatoes.com/m/zootopi          Emma 4.9000000954
## 3 https://www.rottentomatoes.com/m/zootopi        Olivia            5
## 4 https://www.rottentomatoes.com/m/zootopi          Liam 4.8000001907
## 5 https://www.rottentomatoes.com/m/zootopi        Sophia 4.9000000954
## 6 https://www.rottentomatoes.com/m/moonlig          Noah 4.9000000954
#Convert the movie length and review to numbers vs. chars
movies$Rating <- as.numeric(as.character(movies$Rating))
movies$`Movie Length (in minutes)` <- as.numeric(as.character(movies$`Movie Length (in minutes)`))

#Create the data frame
movies.df <- data.frame(movies, stringsAsFactors=FALSE)

#Rename the movie length column
names(movies.df)[names(movies.df)=="Movie.Length..in.minutes."] <- "Movie.Length.In.Minutes"

#Take a peek at the data frame
head(movies.df)
##   Movie.ID Movie.Title Movie.Length.In.Minutes
## 1        1    Zootopia                     108
## 2        1    Zootopia                     108
## 3        1    Zootopia                     108
## 4        1    Zootopia                     108
## 5        1    Zootopia                     108
## 6        2   Moonlight                     110
##                       Rotten.Tomatoes.Link Reviewer.Name Rating
## 1 https://www.rottentomatoes.com/m/zootopi          Noah    5.0
## 2 https://www.rottentomatoes.com/m/zootopi          Emma    4.9
## 3 https://www.rottentomatoes.com/m/zootopi        Olivia    5.0
## 4 https://www.rottentomatoes.com/m/zootopi          Liam    4.8
## 5 https://www.rottentomatoes.com/m/zootopi        Sophia    4.9
## 6 https://www.rottentomatoes.com/m/moonlig          Noah    4.9
#Calculate mean rating by movie title
meanRating <- aggregate(movies.df[, 6], list(movies.df$Movie.Title), mean, stringsAsFactors=FALSE)
meanRating
##              Group.1    x
## 1            Arrival 4.40
## 2 Hell or High Water 4.70
## 3         La La Land 4.60
## 4          Moonlight 4.82
## 5           Zootopia 4.92
names(meanRating)[names(meanRating)=="Group.1"] <- "Movie Title"
names(meanRating)[names(meanRating)=="x"] <- "Average Rating"
print(meanRating)
##          Movie Title Average Rating
## 1            Arrival           4.40
## 2 Hell or High Water           4.70
## 3         La La Land           4.60
## 4          Moonlight           4.82
## 5           Zootopia           4.92
meanReviewer <- aggregate(movies.df[, 6], list(movies.df$Reviewer.Name), mean, stringsAsFactors=FALSE)
print(meanReviewer)
##   Group.1    x
## 1    Emma 4.68
## 2    Liam 4.60
## 3    Noah 4.74
## 4  Olivia 4.70
## 5  Sophia 4.72
M1 <- ggplot(movies.df, aes(x = reorder(movies.df$Movie.Title, -movies.df$Rating), y = movies.df$Rating/5)) + geom_bar(stat = "identity", fill="dodgerblue4")
M1 <- M1 + labs(list(
  title = "Average Movie Rating by Title",
  x="Movie Title", y="Average Rating"))
print(M1)

M2 <- ggplot(movies.df, aes(x = reorder(movies.df$Reviewer.Name, -movies.df$Rating), y = movies.df$Rating/5)) + geom_bar(stat = "identity", fill="dodgerblue4")
M2 <- M2 + labs(list(
  title = "Average Score by Reviewer",
  x="Reviewer", y="Average Rating"))
print(M2)

Conclusions:

The highest rated movie in our study was “Zootopia”, with “Arrival” coming in last.

All of the reviewers’ scores seem to be in the same ballpark with the exception of Liam, who has a noticably lower average rating than the other reviewers. Seems he’s a bit pickier than the others.