MOVIE RATINGS AVERAGE

Load Libraries

library(RMySQL)
## Loading required package: DBI
library(hrbrthemes)
## NOTE: Either Arial Narrow or Roboto Condensed fonts are required to use these themes.
##       Please use hrbrthemes::import_roboto_condensed() to install Roboto Condensed and
##       if Arial Narrow is not on your system, please see https://bit.ly/arialnarrow
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.4.4     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(knitr)

Load Table From Database

This extract the values for our rating by connecting to a local database in my machine, I provided an alternate csv method of loading the movie ratings table through csv, with the csv file.

#Here i will extract results from database 
mysqlconnection = dbConnect(RMySQL::MySQL(), dbname='movie_ratings', host='localhost', port=3306, user='root', password='Uussd0vtusuZ')
dbListTables(mysqlconnection)
## [1] "ratings"
result = dbSendQuery(mysqlconnection, "select * from ratings")
data.frame = fetch(result, n = 10) 
#Below all NA value are displayed
print(data.frame)
##    id barbie past_lives the_holdovers anatomy_of_fall poor_things air     name
## 1   1      3          5             5               1          NA   3 Jennifer
## 2   2      5         NA             2               3           4  NA Jonathan
## 3   3      4          2             3              NA           4   5  daglish
## 4   4      5          4             0               4           2   3   manolo
## 5   5      5          2             3               4           2   1    verde
## 6   6      1          3             5               5          NA   5  juaquin
## 7   7      4         NA            NA               5           4   4  bandido
## 8   8      4          4             4               4           4   5   nelson
## 9   9      3          4             4               4           3   3    aaron
## 10 10      5          5             5              NA           5   5     john

Alterative Load as CSV IF No Connection To DB

As csv we extract null values and must convert them to NA in order for it to function just like reading it from a database would

data.frame <- read_csv("movie_ratings.csv")
## Rows: 10 Columns: 8
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (6): past_lives, the_holdovers, anatomy_of_fall, poor_things, air, name
## dbl (2): id, barbie
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
data.frame[data.frame == 'NULL'] <- NA
data.frame <- data.frame |> mutate_at(c("barbie", "past_lives", "the_holdovers", "anatomy_of_fall", "poor_things", "air"), as.numeric)

print(data.frame[2:7])
## # A tibble: 10 × 6
##    barbie past_lives the_holdovers anatomy_of_fall poor_things   air
##     <dbl>      <dbl>         <dbl>           <dbl>       <dbl> <dbl>
##  1      3          5             5               1          NA     3
##  2      5         NA             2               3           4    NA
##  3      4          2             3              NA           4     5
##  4      5          4             0               4           2     3
##  5      5          2             3               4           2     1
##  6      1          3             5               5          NA     5
##  7      4         NA            NA               5           4     4
##  8      4          4             4               4           4     5
##  9      3          4             4               4           3     3
## 10      5          5             5              NA           5     5

Calculate Average And Compare

In each column we’d like to remove all NA values in order to calculate average rating based on how many people voted for the movie

data.frame.reduced <- data.frame |> select(barbie : air)
df <- tribble(~movie_name, ~average,)
kable(df)
movie_name average
for (i in colnames(data.frame.reduced)){
     df[nrow(df) + 1,] = list(i, mean( na.omit(data.frame[[i]])))
}

ggplot(df, aes(x = reorder(movie_name, -average), y = average, fill = movie_name)) +  geom_bar(stat = "identity") + theme_ipsum()