# Loading the required packages
install.packages("readxl", repos = "http://cran.us.r-project.org")
##
## The downloaded binary packages are in
## /var/folders/5m/4f5rvwrn5rngf6j4gpl2mc9w0000gn/T//RtmpeksbXX/downloaded_packages
library("readxl")
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(ggplot2)
# Getting the working directory and pulling the data from the Excel file
getwd()
## [1] "/Users/ursulapodosenin/Desktop"
movie_ratings <- read_excel("/Users/ursulapodosenin/Desktop/MovieRatings.xlsx", sheet = 1, range = "A1:G6",
col_name = TRUE, col_types = NULL)
movie_ratings
## # A tibble: 5 × 7
## Critic CaptainAmerica Deadpool Frozen JungleBook PitchPerfect2 StarWarsForce
## <chr> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 Burton NA NA NA 4 NA 4
## 2 Charley 4 5 4 3 2 3
## 3 Dan NA 5 NA NA NA 5
## 4 Dieudon… 5 4 NA NA NA 5
## 5 Matt 4 NA 2 NA 2 5
# Naming the columns in the data frame
movieratings <- movie_ratings[c("CaptainAmerica", "Deadpool", "Frozen", "JungleBook", "PitchPerfect2", "StarWarsForce")]
movieratings
## # A tibble: 5 × 6
## CaptainAmerica Deadpool Frozen JungleBook PitchPerfect2 StarWarsForce
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 NA NA NA 4 NA 4
## 2 4 5 4 3 2 3
## 3 NA 5 NA NA NA 5
## 4 5 4 NA NA NA 5
## 5 4 NA 2 NA 2 5
# Getting the average rating of each movie
movie_average <- colMeans(movieratings[sapply(movieratings, is.numeric)], na.rm = TRUE)
movie_average
## CaptainAmerica Deadpool Frozen JungleBook PitchPerfect2
## 4.333333 4.666667 3.000000 3.500000 2.000000
## StarWarsForce
## 4.400000
# Calculating global average rating
global_average <- mean(movie_average)
# Calculating movie biases
movie_biases <- movie_average - global_average
# Creating a data frame with movie names, average ratings, biases, and baseline estimates
movie_data <- data.frame(
Movie = names(movie_average),
Average_Rating = movie_average,
Movie_Bias = movie_biases,
Baseline_Estimate = global_average + movie_biases
)
# Printing movie data
print(movie_data)
## Movie Average_Rating Movie_Bias Baseline_Estimate
## CaptainAmerica CaptainAmerica 4.333333 0.6833333 4.333333
## Deadpool Deadpool 4.666667 1.0166667 4.666667
## Frozen Frozen 3.000000 -0.6500000 3.000000
## JungleBook JungleBook 3.500000 -0.1500000 3.500000
## PitchPerfect2 PitchPerfect2 2.000000 -1.6500000 2.000000
## StarWarsForce StarWarsForce 4.400000 0.7500000 4.400000
# Adding IMDB ratings for each movie
IMDB <- c(6.9, 8.0, 7.4, 7.6, 6.4, 5.5)
movieratings <- rbind(movieratings, IMDB)
movieratings
## # A tibble: 6 × 6
## CaptainAmerica Deadpool Frozen JungleBook PitchPerfect2 StarWarsForce
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 NA NA NA 4 NA 4
## 2 4 5 4 3 2 3
## 3 NA 5 NA NA NA 5
## 4 5 4 NA NA NA 5
## 5 4 NA 2 NA 2 5
## 6 6.9 8 7.4 7.6 6.4 5.5
Critic<- c("Burton", "Charley", "Dan", "Dieudonne", "Matt", "IMDB")
cbind(movieratings, Critic)
## CaptainAmerica Deadpool Frozen JungleBook PitchPerfect2 StarWarsForce
## 1 NA NA NA 4.0 NA 4.0
## 2 4.0 5 4.0 3.0 2.0 3.0
## 3 NA 5 NA NA NA 5.0
## 4 5.0 4 NA NA NA 5.0
## 5 4.0 NA 2.0 NA 2.0 5.0
## 6 6.9 8 7.4 7.6 6.4 5.5
## Critic
## 1 Burton
## 2 Charley
## 3 Dan
## 4 Dieudonne
## 5 Matt
## 6 IMDB
# Comparing the average class score for a movie against the IMDB rating
ca<-mean(4,5,4)
cac<- 4/6.9
cac
## [1] 0.5797101
dp<-mean(5,5,4)
dpc<-5/8
dpc
## [1] 0.625
fr<-mean(4,2)
frc<- 4/7.4
frc
## [1] 0.5405405
jb<- mean(4,3)
jbc<- 4/7.6
jbc
## [1] 0.5263158
sw<- mean(4,3,5,5,5)
swc<- 4/5.5
swc
## [1] 0.7272727
p2<-mean(2,2)
p2c<- 2/6.4
p2c
## [1] 0.3125
tc<-mean(cac, dpc, frc, jbc, p2c, swc)
tc
## [1] 0.5797101
# Using the percentage above to predict the average class rating for Oppenheimer, Barbie, and Interstellar. The ratio of the class rating to the IMDB rating is the prediction factor.
IMDB_Oppenheimer<- 8.3
IMDB_Barbie<- 6.8
IMDB_Interstellar<-8.7
classOppenheimer<-tc*IMDB_Oppenheimer
classOppenheimer
## [1] 4.811594
classBarbie<- tc*IMDB_Barbie
classBarbie
## [1] 3.942029
class_Interstellar<-tc*IMDB_Interstellar
class_Interstellar
## [1] 5.043478
# Creating a data frame that combines the average class rating for the listed movies and the predicted movies
average_class_ratings<- as.data.frame(c(ca, dp, fr, jb, p2, sw, classOppenheimer, classBarbie, class_Interstellar))
average_class_ratings
## c(ca, dp, fr, jb, p2, sw, classOppenheimer, classBarbie, class_Interstellar)
## 1 4.000000
## 2 5.000000
## 3 4.000000
## 4 4.000000
## 5 2.000000
## 6 4.000000
## 7 4.811594
## 8 3.942029
## 9 5.043478
movies_list<- c("Captain America", "Deadpool", "Frozen", "Jungle Book", "Pitch Perfect 2", "Star Wars Force", "Oppenheimer", "Barbie", "Interstellar" )
cbind(movies_list, average_class_ratings)
## movies_list
## 1 Captain America
## 2 Deadpool
## 3 Frozen
## 4 Jungle Book
## 5 Pitch Perfect 2
## 6 Star Wars Force
## 7 Oppenheimer
## 8 Barbie
## 9 Interstellar
## c(ca, dp, fr, jb, p2, sw, classOppenheimer, classBarbie, class_Interstellar)
## 1 4.000000
## 2 5.000000
## 3 4.000000
## 4 4.000000
## 5 2.000000
## 6 4.000000
## 7 4.811594
## 8 3.942029
## 9 5.043478