# MySQL code for creating csv file
# CREATE TABLE ratings
#(int NOT NULL PRIMARY KEY,name varchar(30) NOT NULL UNIQUE);
# alter table ratings
# drop update_time;
# LOAD DATA INFILE 'C:/Program Files/MySQL/MySQL Server 8.0/bin/ratings.csv'
# INTO TABLE ratings
# FIELDS TERMINATED BY ','
# ENCLOSED BY '"'
# LINES TERMINATED BY '\n'
# IGNORE 1 ROWS;
I solicited movie ratings from my family on a handful of movies I should have known many wouldn’t like. I will therefore get a reasonably good idea of what critically-acclaimed movies were most popular among my family.
library(tidyverse)
## -- Attaching packages -------------------------------------------------------------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.2 v purrr 0.3.4
## v tibble 3.0.3 v dplyr 1.0.2
## v tidyr 1.1.2 v stringr 1.4.0
## v readr 1.3.1 v forcats 0.5.0
## -- Conflicts ----------------------------------------------------------------------------------- tidyverse_conflicts() --
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(RMySQL)
## Warning: package 'RMySQL' was built under R version 4.0.3
## Loading required package: DBI
library(openintro)
## Loading required package: airports
## Loading required package: cherryblossom
## Loading required package: usdata
library(ggplot2)
library(dplyr)
library(knitr)
library(tinytex)
library(DBI)
## I couldn't connect my R to MySQL and extract the needed data. I just saved my SQL extract down to csv and read it in.
# db <-dbConnect(RMariaDB::MariaDB(), user='root', password='evanpassword', dbname ='labs', host='localhost')
# ratings <- dbGetQuery(db, "SELECT * FROM ratings")
ratings <- read.csv(file = 'Data/ratings1.csv')
glimpse(ratings)
## Rows: 54
## Columns: 3
## $ name <chr> "The Devil All the Time", "The Florida Project", "Book Smart...
## $ friend <chr> "a", "a", "a", "a", "a", "a", "b", "b", "b", "b", "b", "b", ...
## $ rating <int> 2, 3, 4, NA, 5, NA, NA, 3, 3, NA, 4, NA, 4, NA, 3, 2, 4, 2, ...
ratings[is.na(ratings)] = 0
head(ratings)
## name friend rating
## 1 The Devil All the Time a 2
## 2 The Florida Project a 3
## 3 Book Smart a 4
## 4 I'm Thinking of Ending Things a 0
## 5 Roma a 5
## 6 Marriage Story a 0
ratings %>%
group_by(name) %>%
summarise(avg_rating = mean(rating)) %>%
arrange(desc(avg_rating))
## `summarise()` ungrouping output (override with `.groups` argument)
## # A tibble: 6 x 2
## name avg_rating
## <chr> <dbl>
## 1 Roma 4.78
## 2 Book Smart 4
## 3 The Florida Project 3
## 4 The Devil All the Time 2.22
## 5 Marriage Story 1.67
## 6 I'm Thinking of Ending Things 1.11
ratings %>%
group_by(name) %>%
summarise(avg_rating = mean(rating)) %>%
ggplot(., aes(y = name, x= avg_rating, fill = avg_rating)) + geom_bar(stat = 'identity')
## `summarise()` ungrouping output (override with `.groups` argument)