In this assignment, I asked my friends and family to rate 6 movies on a scale of 1 to 5. I took their response and created a table in Below is the link to the Google Form. https://docs.google.com/forms/d/e/1FAIpQLSd3ht1QlE3axt5LtI3KMkc4skkR9r6wn8PqtPYvVQphLhuQug/viewform
-- MySQL Script generated by MySQL Workbench
-- Sun Feb 12 23:51:29 2023
-- Model: New Model Version: 1.0
-- MySQL Workbench Forward Engineering
SET @OLD_UNIQUE_CHECKS=@@UNIQUE_CHECKS, UNIQUE_CHECKS=0;
SET @OLD_FOREIGN_KEY_CHECKS=@@FOREIGN_KEY_CHECKS, FOREIGN_KEY_CHECKS=0;
SET @OLD_SQL_MODE=@@SQL_MODE, SQL_MODE='ONLY_FULL_GROUP_BY,STRICT_TRANS_TABLES,NO_ZERO_IN_DATE,NO_ZERO_DATE,ERROR_FOR_DIVISION_BY_ZERO,NO_ENGINE_SUBSTITUTION';
-- -----------------------------------------------------
-- Schema mydb
-- -----------------------------------------------------
SHOW WARNINGS;
-- -----------------------------------------------------
-- Schema Movie Rating
-- -----------------------------------------------------
DROP SCHEMA IF EXISTS `Movie Rating` ;
-- -----------------------------------------------------
-- Schema Movie Rating
-- -----------------------------------------------------
CREATE SCHEMA IF NOT EXISTS `Movie Rating` DEFAULT CHARACTER SET utf8mb4 COLLATE utf8mb4_0900_ai_ci ;
SHOW WARNINGS;
USE `Movie Rating` ;
-- -----------------------------------------------------
-- Table `Movie`
-- -----------------------------------------------------
DROP TABLE IF EXISTS `Movie` ;
SHOW WARNINGS;
CREATE TABLE IF NOT EXISTS `Movie` (
`id Movie` INT NOT NULL,
`Movie Name` VARCHAR(200) NOT NULL,
`Genre` VARCHAR(45) NOT NULL,
`Year` INT NOT NULL,
PRIMARY KEY (`id Movie`))
ENGINE = InnoDB
DEFAULT CHARACTER SET = utf8mb4
COLLATE = utf8mb4_0900_ai_ci;
SHOW WARNINGS;
-- -----------------------------------------------------
-- Table `Movie Rating`
-- -----------------------------------------------------
DROP TABLE IF EXISTS `Movie Rating` ;
SHOW WARNINGS;
CREATE TABLE IF NOT EXISTS `Movie Rating` (
`idRating` INT NOT NULL,
`Name` VARCHAR(45) NOT NULL,
`Gender` VARCHAR(45) NOT NULL,
`Age` INT NOT NULL,
`Avengers: Endgame` INT NULL DEFAULT NULL,
`Everything Everywhere All at Once` INT NULL DEFAULT NULL,
`Black Panther: Wakanda Forever` INT NULL DEFAULT NULL,
`Scream` INT NULL DEFAULT NULL,
`Doctor Strange in the Multiverse of Madness` INT NULL DEFAULT NULL,
`Top Gun: Maverick` INT NULL DEFAULT NULL,
PRIMARY KEY (`idRating`))
ENGINE = InnoDB
DEFAULT CHARACTER SET = utf8mb4
COLLATE = utf8mb4_0900_ai_ci;
SHOW WARNINGS;
-- -----------------------------------------------------
-- Table `Person`
-- -----------------------------------------------------
DROP TABLE IF EXISTS `Person` ;
SHOW WARNINGS;
CREATE TABLE IF NOT EXISTS `Person` (
`idPerson` INT NOT NULL,
`First Name` VARCHAR(45) NOT NULL,
`Gender` VARCHAR(45) NOT NULL,
`Age` VARCHAR(45) NOT NULL,
PRIMARY KEY (`idPerson`))
ENGINE = InnoDB
DEFAULT CHARACTER SET = utf8mb4
COLLATE = utf8mb4_0900_ai_ci;
SHOW WARNINGS;
SET SQL_MODE=@OLD_SQL_MODE;
SET FOREIGN_KEY_CHECKS=@OLD_FOREIGN_KEY_CHECKS;
SET UNIQUE_CHECKS=@OLD_UNIQUE_CHECKS;
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.8 ✔ dplyr 1.0.9
## ✔ tidyr 1.2.0 ✔ stringr 1.4.1
## ✔ readr 2.1.2 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(DT)
library(RMySQL)
## Loading required package: DBI
library(DBI)
dbconnect = dbConnect(RMySQL::MySQL(),
dbname='Movie Rating',
host='localhost',
port=3306,
user='root',
password = rstudioapi::askForPassword("Database password"))
rating <- dbGetQuery(dbconnect, "SELECT * FROM `Movie Rating`.`Movie Rating`")
datatable(rating)
Below is a table of the movies that each person is rating.
movies <- dbGetQuery(dbconnect, "SELECT * FROM `Movie Rating`.Movie;")
datatable(movies, caption = 'Table 1: Movie title and Year')
Here is table of first name, age, and gender of each person.
person <- dbGetQuery(dbconnect, "SELECT * FROM `Movie Rating`.Person;")
datatable(person, caption = 'Table 2: Information of Reviewers')
Here is a table of each person’s rating of each movie. There are missing values for those people who has not seen a movie. 4 people have never seen “Everything Everywhere All at Once” and “Scream”. 2 people has never seen “Top Gun: Maverick”.
rating <- dbGetQuery(dbconnect, "SELECT * FROM `Movie Rating`.`Movie Rating`")
datatable(rating, caption = 'Table 3: Ratings of each movie.')
Below is a table of the average rating of each movie. Some people did not see certain movies. So there are missing values. I used “na.rm = TRUE” to deal with missing values. Avengers: Endgame has the highest average rating and Scream has the lowest average rating. Both movies “Everything Everywhere All at Once” and “Scream” had 4 missing values each. Yet, “Everything Everywhere All at Once” had a higher average rating than “Scream” did.
# #new <-rating%>%
# summarise(Avengers= mean(rating[, 5],na.rm = TRUE),Everything.Everywhere.All.at.Once= mean(rating[, 6],na.rm = TRUE), Black.Panther= mean(rating[, 7],na.rm = TRUE), Scream = mean(rating[, 8],na.rm = TRUE), Doctor.Strange= mean(rating[, 9],na.rm = TRUE), Top.Gun= mean(rating[, 10],na.rm = TRUE))
#
# datatable(new, caption = 'Table 4: Average rating of each movie.')
Avengers= mean(rating[, 5],na.rm = TRUE)
Everything.Everywhere.All.at.Once= mean(rating[, 6],na.rm = TRUE)
Black.Panther= mean(rating[, 7],na.rm = TRUE)
Scream = mean(rating[, 8],na.rm = TRUE)
Doctor.Strange= mean(rating[, 9],na.rm = TRUE)
Top.Gun= mean(rating[, 10],na.rm = TRUE)
Movies <- c('Avengers: Endgame','Everything Everywhere All at Once','Black Panther: Wakanda Forever','Scream','Doctor Strange in the Multiverse of Madness','
Top Gun: Maverick' )
Average_Rating <- c(Avengers,Everything.Everywhere.All.at.Once, Black.Panther, Scream, Doctor.Strange, Top.Gun)
new1 <- data.frame(Movies, Average_Rating)
datatable(new1, caption = 'Table 4: Average rating of each movie.')
p <- ggplot(new1, aes(x=reorder(Movies,-Average_Rating), y= Average_Rating)) + ggtitle("Movies") + theme(plot.title=element_text(hjust=0.5))+theme(axis.text.x=element_text(angle=45,hjust=1))+ xlab("Movies")+ ylab("Average_Rating")+
geom_bar(stat = "identity",fill = "steelblue", color = "black")
p + scale_y_continuous(labels = function(x) format(x, scientific = FALSE))
The following movies has a higher average rating from females : ‘Avengers: Endgame’, ‘Scream’, and ‘Top Gun: Maverick’.
The following movies has a higher average rating from males : ‘Everything Everywhere All at Once’, and ‘Black Panther: Wakanda Forever’.
names(rating) <- c('id', 'Name', 'Gender', 'Age', 'Avengers', 'Everything.Everywhere.All.at.Once', 'Black.Panther', 'Scream', 'Doctor.Strange', 'Top.Gun')
new2 <-rating%>%
group_by(Gender)%>%
summarise(Avengers=mean(Avengers, na.rm = TRUE), Everything.Everywhere.All.at.Once=mean(Everything.Everywhere.All.at.Once, na.rm = TRUE), Black.Panther = mean(Black.Panther, na.rm = TRUE), Scream = mean(Scream, na.rm = TRUE), Doctor.Strange=mean(Doctor.Strange, na.rm = TRUE), Top.Gun = mean(Top.Gun,na.rm = TRUE))
datatable(new2)