Introduction

In this assignment, I asked my friends and family to rate 6 movies on a scale of 1 to 5. I took their response and created a table in Below is the link to the Google Form. https://docs.google.com/forms/d/e/1FAIpQLSd3ht1QlE3axt5LtI3KMkc4skkR9r6wn8PqtPYvVQphLhuQug/viewform

Storing Data in My SQL

-- MySQL Script generated by MySQL Workbench
-- Sun Feb 12 23:51:29 2023
-- Model: New Model    Version: 1.0
-- MySQL Workbench Forward Engineering

SET @OLD_UNIQUE_CHECKS=@@UNIQUE_CHECKS, UNIQUE_CHECKS=0;
SET @OLD_FOREIGN_KEY_CHECKS=@@FOREIGN_KEY_CHECKS, FOREIGN_KEY_CHECKS=0;
SET @OLD_SQL_MODE=@@SQL_MODE, SQL_MODE='ONLY_FULL_GROUP_BY,STRICT_TRANS_TABLES,NO_ZERO_IN_DATE,NO_ZERO_DATE,ERROR_FOR_DIVISION_BY_ZERO,NO_ENGINE_SUBSTITUTION';

-- -----------------------------------------------------
-- Schema mydb
-- -----------------------------------------------------
SHOW WARNINGS;
-- -----------------------------------------------------
-- Schema Movie Rating
-- -----------------------------------------------------
DROP SCHEMA IF EXISTS `Movie Rating` ;

-- -----------------------------------------------------
-- Schema Movie Rating
-- -----------------------------------------------------
CREATE SCHEMA IF NOT EXISTS `Movie Rating` DEFAULT CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci ;
SHOW WARNINGS;
USE `Movie Rating` ;

-- -----------------------------------------------------
-- Table `Movie`
-- -----------------------------------------------------
DROP TABLE IF EXISTS `Movie` ;

SHOW WARNINGS;
CREATE TABLE IF NOT EXISTS `Movie` (
  `id Movie` INT NOT NULL,
  `Movie Name` VARCHAR(200) NOT NULL,
  `Genre` VARCHAR(45) NOT NULL,
  `Year` INT NOT NULL,
  PRIMARY KEY (`id Movie`))
ENGINE = InnoDB
DEFAULT CHARACTER SET = utf8mb4
COLLATE = utf8mb4_0900_ai_ci;

SHOW WARNINGS;

-- -----------------------------------------------------
-- Table `Movie Rating`
-- -----------------------------------------------------
DROP TABLE IF EXISTS `Movie Rating` ;

SHOW WARNINGS;
CREATE TABLE IF NOT EXISTS `Movie Rating` (
  `idRating` INT NOT NULL,
  `Name` VARCHAR(45) NOT NULL,
  `Gender` VARCHAR(45) NOT NULL,
  `Age` INT NOT NULL,
  `Avengers: Endgame` INT NULL DEFAULT NULL,
  `Everything Everywhere All at Once` INT NULL DEFAULT NULL,
  `Black Panther: Wakanda Forever` INT NULL DEFAULT NULL,
  `Scream` INT NULL DEFAULT NULL,
  `Doctor Strange in the Multiverse of Madness` INT NULL DEFAULT NULL,
  `Top Gun: Maverick` INT NULL DEFAULT NULL,
  PRIMARY KEY (`idRating`))
ENGINE = InnoDB
DEFAULT CHARACTER SET = utf8mb4
COLLATE = utf8mb4_0900_ai_ci;

SHOW WARNINGS;

-- -----------------------------------------------------
-- Table `Person`
-- -----------------------------------------------------
DROP TABLE IF EXISTS `Person` ;

SHOW WARNINGS;
CREATE TABLE IF NOT EXISTS `Person` (
  `idPerson` INT NOT NULL,
  `First Name` VARCHAR(45) NOT NULL,
  `Gender` VARCHAR(45) NOT NULL,
  `Age` VARCHAR(45) NOT NULL,
  PRIMARY KEY (`idPerson`))
ENGINE = InnoDB
DEFAULT CHARACTER SET = utf8mb4
COLLATE = utf8mb4_0900_ai_ci;

SHOW WARNINGS;

SET SQL_MODE=@OLD_SQL_MODE;
SET FOREIGN_KEY_CHECKS=@OLD_FOREIGN_KEY_CHECKS;
SET UNIQUE_CHECKS=@OLD_UNIQUE_CHECKS;

Connect to SQL

library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6     ✔ purrr   0.3.4
## ✔ tibble  3.1.8     ✔ dplyr   1.0.9
## ✔ tidyr   1.2.0     ✔ stringr 1.4.1
## ✔ readr   2.1.2     ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library(DT)
library(RMySQL)
## Loading required package: DBI
library(DBI)
dbconnect = dbConnect(RMySQL::MySQL(),
                           dbname='Movie Rating',
                           host='localhost',
                           port=3306,
                           user='root',
                           password = rstudioapi::askForPassword("Database password"))

rating <- dbGetQuery(dbconnect, "SELECT * FROM `Movie Rating`.`Movie Rating`")
datatable(rating)

Movie

Below is a table of the movies that each person is rating.

movies <- dbGetQuery(dbconnect, "SELECT * FROM `Movie Rating`.Movie;")
datatable(movies, caption = 'Table 1: Movie title and Year')

Reviewer Information

Here is table of first name, age, and gender of each person.

person <- dbGetQuery(dbconnect, "SELECT * FROM `Movie Rating`.Person;")
datatable(person, caption = 'Table 2: Information of Reviewers')

Analysis

Here is a table of each person’s rating of each movie. There are missing values for those people who has not seen a movie. 4 people have never seen “Everything Everywhere All at Once” and “Scream”. 2 people has never seen “Top Gun: Maverick”.

rating <- dbGetQuery(dbconnect, "SELECT * FROM `Movie Rating`.`Movie Rating`")
datatable(rating, caption = 'Table 3: Ratings of each movie.')

Below is a table of the average rating of each movie. Some people did not see certain movies. So there are missing values. I used “na.rm = TRUE” to deal with missing values. Avengers: Endgame has the highest average rating and Scream has the lowest average rating. Both movies “Everything Everywhere All at Once” and “Scream” had 4 missing values each. Yet, “Everything Everywhere All at Once” had a higher average rating than “Scream” did.

# #new <-rating%>%
#   summarise(Avengers= mean(rating[, 5],na.rm = TRUE),Everything.Everywhere.All.at.Once= mean(rating[, 6],na.rm = TRUE), Black.Panther= mean(rating[, 7],na.rm = TRUE), Scream = mean(rating[, 8],na.rm = TRUE), Doctor.Strange= mean(rating[, 9],na.rm = TRUE), Top.Gun= mean(rating[, 10],na.rm = TRUE))
# 
# datatable(new, caption = 'Table 4: Average rating of each movie.')

Avengers= mean(rating[, 5],na.rm = TRUE)
Everything.Everywhere.All.at.Once= mean(rating[, 6],na.rm = TRUE)
Black.Panther= mean(rating[, 7],na.rm = TRUE)
Scream = mean(rating[, 8],na.rm = TRUE)
Doctor.Strange= mean(rating[, 9],na.rm = TRUE) 
Top.Gun= mean(rating[, 10],na.rm = TRUE)
Movies <- c('Avengers: Endgame','Everything Everywhere All at Once','Black Panther: Wakanda Forever','Scream','Doctor Strange in the Multiverse of Madness','   
Top Gun: Maverick' )
Average_Rating <- c(Avengers,Everything.Everywhere.All.at.Once, Black.Panther, Scream, Doctor.Strange,  Top.Gun)

new1 <- data.frame(Movies, Average_Rating)
datatable(new1, caption = 'Table 4: Average rating of each movie.')
p <- ggplot(new1, aes(x=reorder(Movies,-Average_Rating), y= Average_Rating)) + ggtitle("Movies") + theme(plot.title=element_text(hjust=0.5))+theme(axis.text.x=element_text(angle=45,hjust=1))+ xlab("Movies")+ ylab("Average_Rating")+ 
geom_bar(stat = "identity",fill = "steelblue", color = "black")
p + scale_y_continuous(labels = function(x) format(x, scientific = FALSE))

Do either gender have a movie preference?

The following movies has a higher average rating from females : ‘Avengers: Endgame’, ‘Scream’, and ‘Top Gun: Maverick’.

The following movies has a higher average rating from males : ‘Everything Everywhere All at Once’, and ‘Black Panther: Wakanda Forever’.

names(rating) <- c('id', 'Name', 'Gender', 'Age', 'Avengers', 'Everything.Everywhere.All.at.Once', 'Black.Panther', 'Scream', 'Doctor.Strange', 'Top.Gun')
new2 <-rating%>%
  group_by(Gender)%>%
  summarise(Avengers=mean(Avengers, na.rm = TRUE), Everything.Everywhere.All.at.Once=mean(Everything.Everywhere.All.at.Once, na.rm = TRUE), Black.Panther = mean(Black.Panther, na.rm = TRUE), Scream = mean(Scream, na.rm = TRUE), Doctor.Strange=mean(Doctor.Strange, na.rm = TRUE), Top.Gun = mean(Top.Gun,na.rm = TRUE))

datatable(new2)