This is an R Markdown document for providing documentation for performing Data Exploration , Analysis & Recommendation Of A Movie Based On Survey Of Watchlist And Ratings
knitr::opts_chunk$set(message = FALSE, echo = TRUE)
# Libraries Used : DT , RMySQL, dplyr
# Library for data display in tabular format
library(DT)
# Library for db operations
library(RMySQL)
# Library for data structure operations
library(dplyr)mysql_dbconn <- function(schema, uid, pwd) {
rmysql.settingsfile <- "C:/ProgramData/MySQL/MySQL Server 5.6/my.ini"
conn <- dbConnect(RMySQL::MySQL(), dbname = schema, username = uid, password = pwd)
return(conn)
}conn <- mysql_dbconn("entertainment", "root", "WhiteLotus21")
# List the database tables in entertainment schema
entdbtables <- dbListTables(conn)The tables in entertainment schema are :
entdbtables## [1] "certificate" "genre" "movies" "survey"
This details the table columns in the entertainment schema, using RMySQL package functions.
for(i in entdbtables) # for each row
{
cat("\nTable Name : ", i,"\n")
coltab <- dbListFields(conn,i)
cat("Columns : ","\n",coltab,"\n")
if (i == "survey")
{
cat(" We have a SURVEY!!")
cat("\n\n")
querystr<-paste("select count(*) from ",i)
if(isIdCurrent(conn))
{
sres <- dbGetQuery(conn,querystr)
print(sres)
}
}
}##
## Table Name : certificate
## Columns :
## certificateid certificatedesc
##
## Table Name : genre
## Columns :
## genreid genretype
##
## Table Name : movies
## Columns :
## movieid moviename genre yearreleased leadcast dirrectedby countryreleased language watchtimemin certificate awards info
##
## Table Name : survey
## Columns :
## respondentid movieid rating responsedt
## We have a SURVEY!!
##
## count(*)
## 1 90
Loading movie data in data sets based on different genres
if (isIdCurrent(conn)) {
allmoviesdb <- dbGetQuery(conn, "select movieid, moviename, genre, genretype from movies, genre where movies.genre = genre.genreid")
allmovies <- subset(allmoviesdb, select = c(movieid, genre))
action <- dbGetQuery(conn, "select movieid, genre from movies where genre='A'")
horror <- dbGetQuery(conn, "select movieid, genre from movies where genre='H'")
animated <- dbGetQuery(conn, "select movieid, genre from movies where genre='N'")
adventure <- dbGetQuery(conn, "select movieid, genre from movies where genre='V'")
western <- dbGetQuery(conn, "select movieid, genre from movies where genre='W'")
}joinstr <- paste("select respondentid, survey.movieid, moviename, genre, rating from movies, survey where movies.movieid = survey.movieid")
smjoinres <- dbGetQuery(conn, joinstr)ratehi <- subset(smjoinres, rating >= 4)recommendation <- function(respid) {
respwatched <- subset(smjoinres, respondentid == respid, select = c(movieid,
genre))
respliked <- subset(ratehi, respondentid == respid, select = c(movieid, genre))
respnotwatched <- setdiff(allmovies, respwatched)
if (nrow(respliked) == 0) {
werecom.resp <- sample_n(subset(respnotwatched, genre %in% respwatched$genre),
1, replace = TRUE)
} else {
werecom.resp <- sample_n(subset(respnotwatched, genre %in% respliked$genre),
1, replace = FALSE)
}
return(werecom.resp$movieid)
}recomdf <- as.data.frame(matrix(ncol=3, nrow=length(unique(smjoinres$respondentid))))
dim(recomdf)## [1] 9 3
for(i in unique(smjoinres$respondentid)) # for each row
{
#cat("\n***********************************************************\n")
#cat ("Recommended to respondent ", smjoinres$respondentid[i]," : \n" )
recommovie <- recommendation(smjoinres$respondentid[i])
#print(recommovie)
recomdf[i,1] <- smjoinres$respondentid[i]
recomdf[i,2] <- subset(allmoviesdb, select = c(moviename), movieid == recommovie)
recomdf[i,3] <- subset(allmoviesdb, movieid == recommovie, select = genretype)
}Two PopCorns ānā A DVD Entertainment Inc.
datatable(smjoinres, options = list(searching = FALSE, pageLength = 5, lengthMenu = c(5,
10, 15, 20)), rownames = FALSE)colnames(recomdf) <- c("Respondent Id", "We Recommend You Watch", "Genre")
datatable(recomdf, options = list(searching = FALSE, pageLength = 5, lengthMenu = c(5,
10, 15, 20)), rownames = FALSE)dbDisconnect(conn)## [1] TRUE
#####################################################################