Building system that recommends movies to user using Collaborative Filtering.
Building my own dataset
myMovieDataSet <- matrix(c(2, NA, 3, 5, 2, NA, 3, 3, 1, NA, 2, 2), nrow = 4, byrow = T)
#Movies
colnames(myMovieDataSet) <- c("M1", "M2", "M3")
#Users
rownames(myMovieDataSet) <- c("U1", "U2", "U3", "U4")
myMovieDataSet
## M1 M2 M3
## U1 2 NA 3
## U2 5 2 NA
## U3 3 3 1
## U4 NA 2 2
movies<-myMovieDataSet
Here NA refers that user has not rated this movie yet.So missing data is what we will predict.
Rating is assumed from 1 to 5.
Writing user defined function to make prediction
findSimilarity <- function(m1, m2)
{
result <- sum(m1*m2, na.rm = TRUE)/(sqrt((sum(m1^2, na.rm = TRUE))*(sum(m2^2, na.rm = TRUE))))
}
#Building matrix initializing with NAs
weighted_matrix <- data.frame(matrix(NA, nrow = nrow(movies), ncol = ncol(movies)))
for(i in 1:nrow(weighted_matrix))
{
for(j in 1:ncol(weighted_matrix))
{
if(i==j)
{
#Movie with same co-ordinate will be one
weighted_matrix[i,j]=1
}
else
{
weighted_matrix[i,j] <- findSimilarity(t(movies[i,]), t(movies[j,]))
}
}
}
# renaming column names
colnames(weighted_matrix)<-colnames(movies)
weighted_matrix<-weighted_matrix[-c(4), ]
# renaming row names
rownames(weighted_matrix)<-colnames(movies)
weighted_matrix
## M1 M2 M3
## M1 1.0000000 0.5150262 0.5726563
## M2 0.5150262 1.0000000 0.8946301
## M3 0.5726563 0.8946301 1.0000000
diag(weighted_matrix)<-NA
Writing function that will use above function and will make prediction
predictUserDefined <- function(user, movie)
{
sum(movies[user,]*t(weighted_matrix[,movie]),na.rm = TRUE)/sum(weighted_matrix[,movie],na.rm = TRUE)
}
predictUserDefined("U1","M2")
## [1] 2.634644
Calculate using built-in function
library(recommenderlab)
rrm <- as(myMovieDataSet, 'realRatingMatrix')
recc_model <- Recommender(data = rrm, method = 'UBCF', param=list(method="Cosine", normalize='center'))
recom <- predict(recc_model, rrm, type = "ratings")
recom <- as(recom, 'matrix')
recom["U1","M2"]
## [1] 2.42703