Load in libraries

library(tidyverse)
library(kableExtra)
library(knitr)
library(stringr)
library(readr)
library(plyr)
library(dplyr)
library(data.table)
library(readxl)
require(xlsx)
library(rJava)
library(xlsx)
rm (list=ls())

Tidy dataset 1: Movies database

Load in Data from xlsx file

movies <- as.data.frame(read.xlsx("Movie Ratings.xlsx", header=TRUE,1))
#tail(movies)
#dim(movies)

movies%<>% 
    gather(Movies,Movie_rating,3:12) %>% 
    select(Timestamp,Reviewer=Name,Movies,Movie_rating,Number.of.Movies.Seen) %>%   
    separate('Timestamp',c("day","time")," ") %>% 
    na.omit() %>% 
    arrange(Reviewer) %>% 
  #  mutate(Reviewer==Name) %>% 
    replace(.==0,NA) %>% 
    filter(Movie_rating>0)

    
movies %>% 
    dplyr::group_by(Movies) %>% 
    dplyr::summarise(Movie_rating=round(mean(Movie_rating,na.rm=TRUE),2)) %>% 
    kable(.)
Movies Movie_rating
Alien..Covenant 2.50
Blade.Runner.2049 4.00
Ghost.in.the.Shell 3.60
Guardians.of.the.Galaxy.2 3.94
Spider.Man..Homecoming 3.73
Star.Wars..The.Last.Jedi 4.22
Thor..Ragnarok 4.50
Valerian.and.the.City.of.a.Thousand.Planets 2.80
War.for.the.Planet.of.the.Apes 4.17
Wonder.Woman 4.26

data Visualization

movies %>%  
    dplyr::group_by(Reviewer) %>% 
    dplyr::summarise(Average_Review=round(mean(Movie_rating,na.rm=TRUE),2)) %>% 
    arrange(desc(Average_Review)) %>% 
    ggplot(., aes(x=Reviewer , Average_Review)) + 
    geom_bar(aes(fill = Reviewer), position = "dodge", stat = "identity")+
    coord_flip()+
    labs(title="Average Rating By User")

movies %>%  
    dplyr::group_by(Movies) %>% 
    dplyr::summarise(Average_Review=round(mean(Movie_rating,na.rm=TRUE),2)) %>% 
    arrange(desc(Average_Review)) %>% 
    ggplot(., aes(x=Movies , Average_Review)) + 
    geom_bar(aes(fill = Movies), position = "dodge", stat = "identity")+
    theme(axis.text.x=element_text(angle=45,hjust=1))+
    labs(title="Average Rating By Movie")

 movies %>%  
    dplyr::group_by(Reviewer) %>% 
    filter(Number.of.Movies.Seen>5) %>% 
    dplyr::summarise(Average_Review=round(mean(Movie_rating,na.rm=TRUE),2)) %>% 
    ggplot(., aes(x=Reviewer , Average_Review)) + 
    geom_bar(aes(fill = Reviewer), position = "dodge", stat = "identity")+
    theme(axis.text.x=element_text(angle=45,hjust=1))+
    labs(title="Active Critics")

movies %>%  
    dplyr::group_by(Movies) %>% 
    dplyr::summarise(Average_Review=round(mean(Movie_rating,na.rm=TRUE),2)) %>% 
    arrange(desc(Average_Review)) %>% 
    ggplot(., aes(x=Movies , Average_Review)) + 
    geom_bar(aes(fill = Movies), position = "dodge", stat = "identity")+
    coord_flip()+
    labs(title="Active Critics Average Rating By Movie")