knitr::opts_chunk$set(echo = TRUE)
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(readr)
library(plyr)
## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
## 
## Attaching package: 'plyr'
## The following objects are masked from 'package:dplyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
library(sqldf)
## Loading required package: gsubfn
## Loading required package: proto
## Warning in doTryCatch(return(expr), name, parentenv, handler): unable to load shared object '/Library/Frameworks/R.framework/Resources/modules//R_X11.so':
##   dlopen(/Library/Frameworks/R.framework/Resources/modules//R_X11.so, 6): Library not loaded: /opt/X11/lib/libSM.6.dylib
##   Referenced from: /Library/Frameworks/R.framework/Versions/4.0/Resources/modules/R_X11.so
##   Reason: image not found
## Could not load tcltk.  Will use slower R code instead.
## Loading required package: RSQLite
library(ggplot2)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ tibble  3.0.3     ✓ stringr 1.4.0
## ✓ tidyr   1.1.1     ✓ forcats 0.5.0
## ✓ purrr   0.3.4
## ── Conflicts ────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## x plyr::arrange()   masks dplyr::arrange()
## x purrr::compact()  masks plyr::compact()
## x plyr::count()     masks dplyr::count()
## x plyr::failwith()  masks dplyr::failwith()
## x dplyr::filter()   masks stats::filter()
## x plyr::id()        masks dplyr::id()
## x dplyr::lag()      masks stats::lag()
## x plyr::mutate()    masks dplyr::mutate()
## x plyr::rename()    masks dplyr::rename()
## x plyr::summarise() masks dplyr::summarise()
## x plyr::summarize() masks dplyr::summarize()
library(tidyr)

box_office <- read.csv('/Users/mustafatelab/Desktop/CUNY SPS MSDS/607 - Data Acquisition & Management/Week 2/Assignment/box_office.csv')
survey <- read.csv('/Users/mustafatelab/Desktop/CUNY SPS MSDS/607 - Data Acquisition & Management/Week 2/Assignment/Pixar_Survey - Favorite Pixar Film.csv')

The Below is an analysis conducted on a small social sample, requesting the participants to rate a selection of movies from 1-10. The results show the movie, rating, and an id designating which results came from the same participant.

The first goal is to see which movie is the most liked by my social circle.

Secondly, we will find out who is the biggest critic.

lastly, by integrating box office data, we will check for if the ratings are assiciated with the money the movies brought in.

head(survey)

The Winner Is: Toy Story

Not So Much: A Bug’s Life unfortunately comes in last.

avg_movie_rating <- sqldf('select movie, avg(Ratings) from survey group by Movie Order by avg(Ratings) desc')
avg_movie_rating

Biggest Critic: Participant number 12! (must not be a huge fan of animated films)

avg_user_rating <- sqldf('select userid, avg(Ratings) from survey group by userid Order by avg(Ratings)')
avg_user_rating

We also have box office numbers for the associated films. (USA and worldwide do not overlap)

box_office

Lets combine the ratings with the box office

rating_box <- avg_movie_rating %>%
  left_join( box_office, by = c( 'Movie' = 'movies'), copy = FALSE)%>%
  mutate(total_gross = usa_box_office + world_box_office)

rating_box

Rating VS Total$$

cor(rating_box$`avg(Ratings)`, rating_box$total_gross)
## [1] 0.1451279
plot(rating_box$`avg(Ratings)`, rating_box$total_gross)

Rating VS Domestic$$

cor(rating_box$`avg(Ratings)`, rating_box$usa_box_office)
## [1] 0.3500506
plot(rating_box$`avg(Ratings)`, rating_box$usa_box_office)

Rating VS Foreign$$

cor(rating_box$`avg(Ratings)`, rating_box$world_box_office)
## [1] 0.0652046
plot(rating_box$`avg(Ratings)`, rating_box$world_box_office)

From the sample that we surveyed, there does not appear to be a strong correlation between box office gross and movie rating.

However, we do see that the more local the box office measure the higher the correlation, with domestic being the most correlated, and rest of the world being the least correlated.