Each month, the popular Rotten Tomatoes website ranks the 100 best movies on Netflix as ranked by the Tomatometer.
September 2020: See https://editorial.rottentomatoes.com/guide/best-netflix-movies-to-watch-right-now/
An informal survey was created selecting the 5th film from each of set of 10 films among the top 60.
THe Movie Survey is available at https://www.surveymonkey.com/. Note: Not working but can be here.
#
library(RMySQL)
## Loading required package: DBI
library(tidyverse)
## -- Attaching packages -------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2 v purrr 0.3.4
## v tibble 3.0.3 v dplyr 1.0.2
## v tidyr 1.1.2 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.5.0
## -- Conflicts ----------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
Prompt user for database username and password then setup database connection.
#prompt for input
#vardb_schema <- readline(prompt = "Enter your DB schema: ")
#vardb_user <- readline(prompt = "Enter your DB username: ")
#vardb_password <- readline(prompt = "Enter your password: ")
vardb_user <- "DATA607"
vardb_password <- "hello"
vardb_schema <- "a02"
cat("Schema=", vardb_schema, " username=", vardb_user, " password=", vardb_password)
## Schema= a02 username= DATA607 password= hello
mydb = dbConnect(RMySQL::MySQL(), user=vardb_user, password=vardb_password, port=3306, dbname=vardb_schema, host='localhost')
summary(mydb)
## <MySQLConnection:0,0>
## User: DATA607
## Host: localhost
## Dbname: a02
## Connection type: localhost via TCP/IP
##
## Results:
The .csv data will be loaded into the
# put code from SQL script here.
pop_movie_df <- dbGetQuery(mydb, "select * from survey_popmovies")
dim(pop_movie_df)
## [1] 6 5
str(pop_movie_df)
## 'data.frame': 6 obs. of 5 variables:
## $ movie_id : int 1 2 3 4 5 6
## $ movie_name : chr "Ladybird" "Homecoming: A Film by Beyonce" "Aquarius" "The Florida Project" ...
## $ movie_year : int 2017 2019 2016 2017 2019 2018
## $ movie_rank : int 5 15 25 35 45 55
## $ movie_reviewsite: chr "https://www.rottentomatoes.com/m/lady_bird/\r" "https://www.rottentomatoes.com/m/homecoming_a_film_by_beyonce\r" "https://www.rottentomatoes.com/m/aquarius/\r" "https://www.rottentomatoes.com/m/the_florida_project/\r" ...
pop_movie_df
## movie_id movie_name movie_year movie_rank
## 1 1 Ladybird 2017 5
## 2 2 Homecoming: A Film by Beyonce 2019 15
## 3 3 Aquarius 2016 25
## 4 4 The Florida Project 2017 35
## 5 5 I Lost My Body 2019 45
## 6 6 The Death of Stalin 2018 55
## movie_reviewsite
## 1 https://www.rottentomatoes.com/m/lady_bird/\r
## 2 https://www.rottentomatoes.com/m/homecoming_a_film_by_beyonce\r
## 3 https://www.rottentomatoes.com/m/aquarius/\r
## 4 https://www.rottentomatoes.com/m/the_florida_project/\r
## 5 https://www.rottentomatoes.com/m/i_lost_my_body/\r
## 6 https://www.rottentomatoes.com/m/the_death_of_stalin/\r
The .csv data will be loaded into the and handle missing data
# put code from SQL script here.
survey_responses_df <- dbGetQuery(mydb, "select * from survey_responses")
dim(survey_responses_df)
## [1] 15 5
str(survey_responses_df)
## 'data.frame': 15 obs. of 5 variables:
## $ survey_obs : int 1 2 3 4 5 6 7 8 9 10 ...
## $ movie_id : int 1 2 3 4 5 1 2 3 4 5 ...
## $ survey_id : int 123 123 123 123 123 234 234 234 234 234 ...
## $ survey_seen_movie: int 1 1 1 1 1 1 0 1 0 1 ...
## $ survey_likert : int 5 4 3 2 1 1 0 3 0 4 ...
survey_responses_df
## survey_obs movie_id survey_id survey_seen_movie survey_likert
## 1 1 1 123 1 5
## 2 2 2 123 1 4
## 3 3 3 123 1 3
## 4 4 4 123 1 2
## 5 5 5 123 1 1
## 6 6 1 234 1 1
## 7 7 2 234 0 0
## 8 8 3 234 1 3
## 9 9 4 234 0 0
## 10 10 5 234 1 4
## 11 11 1 345 1 5
## 12 12 2 345 1 0
## 13 13 3 345 1 3
## 14 14 4 345 0 0
## 15 15 5 345 1 3
add narrative
survey_responses_df <- dbGetQuery(mydb, "select * from survey_responses WHERE survey_seen_movie = 1")
dim(survey_responses_df)
## [1] 12 5
str(survey_responses_df)
## 'data.frame': 12 obs. of 5 variables:
## $ survey_obs : int 1 2 3 4 5 6 8 10 11 12 ...
## $ movie_id : int 1 2 3 4 5 1 3 5 1 2 ...
## $ survey_id : int 123 123 123 123 123 234 234 234 345 345 ...
## $ survey_seen_movie: int 1 1 1 1 1 1 1 1 1 1 ...
## $ survey_likert : int 5 4 3 2 1 1 3 4 5 0 ...
survey_responses_df
## survey_obs movie_id survey_id survey_seen_movie survey_likert
## 1 1 1 123 1 5
## 2 2 2 123 1 4
## 3 3 3 123 1 3
## 4 4 4 123 1 2
## 5 5 5 123 1 1
## 6 6 1 234 1 1
## 7 8 3 234 1 3
## 8 10 5 234 1 4
## 9 11 1 345 1 5
## 10 12 2 345 1 0
## 11 13 3 345 1 3
## 12 15 5 345 1 3
#ggplot(data=survey_responses_df, aes(x=survey_likert, y=movie_id)) + geom_line()
g <- ggplot(survey_responses_df, aes(x = survey_likert, y = movie_id)) + geom_line() + xlab('Likert Rating') + ylab('Popular Movies') + facet_wrap(~movie_id) + labs(title = "Popular Movie Likert Results")
print(g)
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?
narrative