#problemstatement: "Choose six recent popular movies. Ask at least five people that you know (friends, family, classmates, imaginary friends) to rate each of these movie that they have seen on a scale of 1 to 5. Take the results (observations) and store them in a SQL database. Load the information into an R dataframe. Your deliverables should include your SQL scripts and your R Markdown code, posted to GitHub."
#Import the Required Libraries
library("RMySQL")
## Loading required package: DBI
library("DT")
library(plotrix)
library(data.table)
#I was trying to embed api code to get realtime data on movie but they are al paid Commenting them out in next line.
#devtools::install_github("hrbrmstr/omdbapi")
# I created 2 tables for this task:- 1. Movies containing movies reviewed and 2.Review results
movieconn = dbConnect(MySQL(), user = Sys.getenv("dbloginid"), password = Sys.getenv("dbpw"), dbname='movies', host='localhost')
dbListTables(movieconn)
## [1] "Movies" "Reviews"
# Showing Data in movies table
mvs <- dbSendQuery(movieconn, "select * from Movies;")
movies <- fetch(mvs)
datatable ( movies )
# Showing Data in Reviews table
rw <- dbSendQuery(movieconn, "select * from Reviews;")
rws <- fetch(rw)
datatable ( rws )
#Disconncet from DB
dbDisconnect(movieconn)
## Warning: Closing open result sets
## [1] TRUE
# Adding mean rating for each movie in the dataframe
setDT(rws)[, Mean_Rating := round(mean(Rating),digits = 1), by = Movie_Name]
datatable ( rws )
# Get Unique Rows for Each movie with Rating and selecting only Movie_Name and Mean rating
rws_uniq <- rws[!duplicated(rws[,'Movie_Name']),c(1,5)]
datatable ( rws_uniq )
#Creating Dataframe with all the movies and their mean rating and genre
final_dataset <- merge(rws_uniq, movies, by="Movie_Name")
final_dataset
## Movie_Name Mean_Rating Release_Date Genre
## 1: Den of Thieves 3.8 2018-01-19 Action
## 2: Golden Exits 4.2 2018-02-09 Drama
## 3: Hostiles 3.0 2018-01-19 Drama
## 4: JUMANJI: Welcome To The Jungle 3.5 2017-12-20 Adventure
## 5: Maze Runner: The Death Cure 3.3 2018-01-26 Fiction
## 6: The Post 3.8 2018-01-12 Thriller
final_dataset$Movie_Name
## [1] "Den of Thieves" "Golden Exits"
## [3] "Hostiles" "JUMANJI: Welcome To The Jungle"
## [5] "Maze Runner: The Death Cure" "The Post"
#Making a pie chart of data
slices <- c(final_dataset$Mean_Rating)
lbls <- c(final_dataset$Movie_Name)
mnr <- final_dataset$Mean_Rating # Adding Mean Rating in the label
gnr <- final_dataset$Genre
nl <- "\n"
lbls <- paste(lbls,"(", mnr,")",nl, gnr)
pie(slices, labels = lbls, main="Pie Chart of Movies with ratings and Genre", cex=0.7)
