# MySQL code for creating csv file

# CREATE TABLE ratings
#(int NOT NULL PRIMARY KEY,name varchar(30) NOT NULL UNIQUE);

# alter table ratings
# drop update_time;
# LOAD DATA INFILE 'C:/Program Files/MySQL/MySQL Server 8.0/bin/ratings.csv' 
# INTO TABLE ratings 
# FIELDS TERMINATED BY ',' 
# ENCLOSED BY '"'
# LINES TERMINATED BY '\n'
# IGNORE 1 ROWS;

Intro

I solicited movie ratings from my family on a handful of movies I should have known many wouldn’t like. I will therefore get a reasonably good idea of what critically-acclaimed movies were most popular among my family.

library(tidyverse)
## -- Attaching packages -------------------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2     v purrr   0.3.4
## v tibble  3.0.3     v dplyr   1.0.2
## v tidyr   1.1.2     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.5.0
## -- Conflicts ----------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(RMySQL)
## Warning: package 'RMySQL' was built under R version 4.0.3
## Loading required package: DBI
library(openintro)
## Loading required package: airports
## Loading required package: cherryblossom
## Loading required package: usdata
library(ggplot2)
library(dplyr)
library(knitr)
library(tinytex)
library(DBI)

## I couldn't connect my R to MySQL and extract the needed data. I just saved my SQL extract down to csv and read it in.

# db <-dbConnect(RMariaDB::MariaDB(), user='root', password='evanpassword', dbname ='labs', host='localhost')

# ratings <- dbGetQuery(db, "SELECT * FROM ratings")

ratings <- read.csv(file = 'Data/ratings1.csv')

glimpse(ratings)
## Rows: 54
## Columns: 3
## $ name   <chr> "The Devil All the Time", "The Florida Project", "Book Smart...
## $ friend <chr> "a", "a", "a", "a", "a", "a", "b", "b", "b", "b", "b", "b", ...
## $ rating <int> 2, 3, 4, NA, 5, NA, NA, 3, 3, NA, 4, NA, 4, NA, 3, 2, 4, 2, ...

Dealing with n/a’s in the underlying file

ratings[is.na(ratings)] = 0

Organizing the movie titles by average rating

head(ratings)
##                            name friend rating
## 1        The Devil All the Time      a      2
## 2           The Florida Project      a      3
## 3                    Book Smart      a      4
## 4 I'm Thinking of Ending Things      a      0
## 5                          Roma      a      5
## 6                Marriage Story      a      0
ratings %>%
  group_by(name) %>%
  summarise(avg_rating = mean(rating)) %>%
  arrange(desc(avg_rating))
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 6 x 2
##   name                          avg_rating
##   <chr>                              <dbl>
## 1 Roma                                4.78
## 2 Book Smart                          4   
## 3 The Florida Project                 3   
## 4 The Devil All the Time              2.22
## 5 Marriage Story                      1.67
## 6 I'm Thinking of Ending Things       1.11

Graphic Representation

ratings %>%
  group_by(name) %>%
  summarise(avg_rating = mean(rating)) %>%
  ggplot(., aes(y = name, x= avg_rating, fill = avg_rating)) + geom_bar(stat = 'identity')
## `summarise()` ungrouping output (override with `.groups` argument)