DATA 643 - Final Project Part-I Context Based Movie Recommendation System

Description

This assignment leverages the context pre filtering and makes it 2D and applies traditional recommender algorithms.

Data set source: https://github.com/irecsys/CARSKit/tree/master/context-aware_data_sets

Load Libraries

#Step1 - Load libraries
library(dplyr)
library(recommenderlab)
library(reshape2)
library(RCurl)
library(ggplot2)
library(base)

Binarize data.

# Step - Binarize.
binarize <- function(df, ratingFieldName) {
  #If the rating is >= 3, then we treat it as 1, else 0.
  
  df$ratingFieldName = ifelse(as.numeric(df[[ratingFieldName]]) >= 3, 1, 0)
  return (df)
}

Pre-Filtering technique

#3. Pre-Filtering - Prepare the predicates and Pre-Filter the contexts as per the match criteria.
filterByContext <- function (df, contexts, exactMatch){
  predicates <- sapply(contexts, function(context) {
    name  <-  context[1]
    value <-  context[2]
    paste0(name,"=='",value , "'")
  })
  
  #Now combine ALL given predicates
  if (exactMatch) {
    predicates <- do.call(paste, c(as.list(predicates), sep=" & "))  
  }
  else {
    predicates <- do.call(paste, c(as.list(predicates), sep=" | "))  
  }
  
  #Print
  print(predicates)
  
  #Filter Now.
  ( df.filtered <- df %>% filter_( predicates) )
}

Get real ratings matrix

#Get real ratings matrix from the sparse matrix
getRealRatingsMatrix <- function(sparseMatrix) {
  
  sparseMatrix<-as.matrix(sparseMatrix)
  
  #Lest check how sparse this matrix is.
  sparseMatrixTemp <- sparseMatrix
  is.na(sparseMatrixTemp) <- sparseMatrixTemp==0  
  
  print(paste0("Sparsity of the matrix:",      sum(is.na(sparseMatrixTemp))/(nrow(sparseMatrixTemp)*ncol(sparseMatrixTemp))))
  
  #Make it as real ratings matrix.
  sparseMatrix.realRatings <- as(sapply(data.frame(sparseMatrix),as.numeric), "realRatingMatrix")
  
  return(sparseMatrix.realRatings)
}

Split the data frame into train and test

#Split the data frame into train and test.
splitDF <- function(normRatingMat)
{
  ## 75% of the sample size
  smp_size <- floor(0.75 * nrow(normRatingMat))
  set.seed(123)
  train_ind <- sample(seq_len(nrow(normRatingMat)), size = smp_size)
  
  train.RatingMat <- normRatingMat[train_ind, ]
  test.RatingMat <- normRatingMat[-train_ind, ]
  
  
  return(list(trainDF=train.RatingMat, testDF=test.RatingMat))
}

Create the Model

#Create the Model for the rating matrix and method.
createModel <-function (ratingMat,algType)
{
  model <- Recommender(ratingMat, method = algType)
  names(getModel(model))
  getModel(model)$method
  
  getModel(model)$nn
  
  return (model)
}

Recommendations

#Provide the recommendations/predictions.
recommendations <- function(ratingMat, model, userID, n)
{
  ### predict top n recommendations for given user
  topN_recommendList <-predict(model,ratingMat[userID],n=n) 
  topN_recommendList@items[[1]]
  return(topN_recommendList)
}

Data Acquisition and preprocessing

#Data Acquisition and preprocessing
ratingsurl <- getURL("https://raw.githubusercontent.com/srajeev1/MSDA-IS643/master/projects/Project4/ratings.txt")
ratings <- read.csv(text = ratingsurl,header = TRUE, stringsAsFactors = FALSE)
knitr::kable(ratings[10:20,])

	userid	itemid	rating	Time	Location	Companion
10	1090	tt4052394	3	NA	NA	NA
11	1090	tt1596343	2	NA	NA	NA
12	1090	tt2574698	2	NA	NA	NA
13	1090	tt0454848	2	NA	NA	NA
14	1088	tt0405422	4	NA	NA	NA
15	1088	tt0378194	4	NA	NA	NA
16	1088	tt0211915	5	NA	NA	NA
17	1088	tt0169547	5	NA	NA	NA
18	1088	tt0133093	5	NA	NA	NA
19	1110	tt0454876	3	NA	NA	NA
20	1110	tt0181689	3	NA	NA	NA

# Step2. #binarize.
ranking.data <- binarize(ratings, "rating")
ranking.data$rating=ranking.data$ratingFieldName
knitr::kable(ranking.data[10:20,])

	userid	itemid	rating	Time	Location	Companion	ratingFieldName
10	1090	tt4052394	1	NA	NA	NA	1
11	1090	tt1596343	0	NA	NA	NA	0
12	1090	tt2574698	0	NA	NA	NA	0
13	1090	tt0454848	0	NA	NA	NA	0
14	1088	tt0405422	1	NA	NA	NA	1
15	1088	tt0378194	1	NA	NA	NA	1
16	1088	tt0211915	1	NA	NA	NA	1
17	1088	tt0169547	1	NA	NA	NA	1
18	1088	tt0133093	1	NA	NA	NA	1
19	1110	tt0454876	1	NA	NA	NA	1
20	1110	tt0181689	1	NA	NA	NA	1

#3. Initialize the context.
#Pre-filter contexts - sample context we are trying out is the Time as 'Weekend' and Location is 'Home'
contexts <- list(c('Time','Weekend'), c('Location','Home'))
ranking.data.context <- filterByContext(ranking.data, contexts, TRUE)

## [1] "Time=='Weekend' & Location=='Home'"

knitr::kable(ranking.data.context[10:20,])

	userid	itemid	rating	Time	Location	Companion	ratingFieldName
10	1067	tt1099212	0	Weekend	Home	Partner	0
11	1018	tt0110357	1	Weekend	Home	Family	1
12	1018	tt0111161	1	Weekend	Home	Family	1
13	1018	tt0169547	1	Weekend	Home	Family	1
14	1018	tt0367594	1	Weekend	Home	Family	1
15	1018	tt0133093	1	Weekend	Home	Family	1
16	1018	tt0120338	0	Weekend	Home	Alone	0
17	1018	tt1291150	0	Weekend	Home	Alone	0
18	1018	tt1099212	0	Weekend	Home	Alone	0
19	1018	tt0120338	0	Weekend	Home	Family	0
20	1018	tt1291150	0	Weekend	Home	Family	0

#4. Now convert into 2D and apply the traditional recommenders
rating.df <- ranking.data.context %>% select(userid, itemid, rating)
rating.wide <-dcast(rating.df, userid ~ itemid, value.var='rating', fun.aggregate= mean, na.rm=T)
dim(rating.wide)

## [1] 67 79

#5. This is a highly sparse matrix, so, get the real ratings matrix
realrating.matrix <- getRealRatingsMatrix(rating.wide)

## [1] "Sparsity of the matrix:0.890421311165691"

#6. Visualize it!
image(realrating.matrix, main = "Heatmap of the rating matrix")

#7. Split Dataframe
splitData <-splitDF(realrating.matrix)


#8. Prepare models
IBCF.model <- createModel(splitData$trainDF,"IBCF")
UBCF.model <- createModel(splitData$trainDF,"UBCF")

#Let us get the top 4 recommendations for user 1122 for the given context.

userID <- 10
topN <- 10
predict_list <-recommendations(splitData$trainDF, UBCF.model, userID, topN)
ranking.data.context[predict_list@items[[1]],c("itemid", "Time", "Location")]

##       itemid    Time Location
## 24 tt0232500 Weekend     Home
## 66 tt0462538 Weekend     Home
## 69 tt0462538 Weekend     Home
## 63 tt0169547 Weekend     Home
## 17 tt1291150 Weekend     Home
## 26 tt1055369 Weekend     Home
## 70 ttnanana1 Weekend     Home
## 51 tt0133093 Weekend     Home
## 14 tt0367594 Weekend     Home
## 60 tt0382625 Weekend     Home

userID <- 10
topN <- 10
predict_list <-recommendations(splitData$trainDF, IBCF.model, userID, topN)
ranking.data.context[predict_list@items[[1]],c("itemid", "Time", "Location")]

##       itemid    Time Location
## 70 ttnanana1 Weekend     Home
## 71 tt1232829 Weekend     Home
## 4  tt1375666 Weekend     Home
## 5  tt0110475 Weekend     Home
## 14 tt0367594 Weekend     Home
## 15 tt0133093 Weekend     Home
## 21 tt1099212 Weekend     Home
## 22 tt0319262 Weekend     Home
## 23 tt0441773 Weekend     Home
## 26 tt1055369 Weekend     Home

DATA 643 - Final Project Part-I
Context Based Movie Recommendation System

Sreejaya Nair, Suman K Polavarapu

7/18/2016

Description

Load Libraries

Binarize data.

Pre-Filtering technique

Get real ratings matrix

Split the data frame into train and test

Create the Model

Recommendations

Data Acquisition and preprocessing

DATA 643 - Final Project Part-IContext Based Movie Recommendation System

Sreejaya Nair, Suman K Polavarapu

7/18/2016

Description

Load Libraries

Binarize data.

Pre-Filtering technique

Get real ratings matrix

Split the data frame into train and test

Create the Model

Recommendations

Data Acquisition and preprocessing

DATA 643 - Final Project Part-I
Context Based Movie Recommendation System