DATA 643 Recommender System

This recommender system will use MovieLense data from recommenderlab package. This system will recommend top 5 movies to randomly selected users using item-based collaborative filtering.

## Loading required package: Matrix

## Loading required package: arules

## 
## Attaching package: 'arules'

## The following objects are masked from 'package:base':
## 
##     abbreviate, write

## Loading required package: proxy

## 
## Attaching package: 'proxy'

## The following object is masked from 'package:Matrix':
## 
##     as.matrix

## The following objects are masked from 'package:stats':
## 
##     as.dist, dist

## The following object is masked from 'package:base':
## 
##     as.matrix

## Loading required package: registry

## 
## Attaching package: 'dplyr'

## The following objects are masked from 'package:arules':
## 
##     intersect, recode, setdiff, setequal, union

## The following objects are masked from 'package:stats':
## 
##     filter, lag

## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union

## Loading required package: SnowballC

## 
## Attaching package: 'lsa'

## The following object is masked from 'package:dplyr':
## 
##     query

Load data

data("MovieLense")
movies <- as.matrix(MovieLense@data)

Let’s explore the data

dim(movies)

## [1]  943 1664

The data contains 943 users with ratings on 1664 movies.

Normalize data

The dataset will be normalized taking standard score using universal mean.

movies[movies==0] <- NA
mmean <- mean(movies, na.rm = T)
msd <- sd(movies, na.rm=T)
moviesnorm <- (movies - mmean)/msd

Generate similarity matrix

We will be generating similarity matrix based on similarities between items.

library(lsa)
movietemp <- movies
movietemp[is.na(movietemp)] = 0
movie_sim_mtx = cosine(as.matrix(movietemp))

#image(as.matrix(movie_sim_mtx))
#library(corrplot)
#corrplot(as.matrix(movie_sim_mtx), is.corr = FALSE, method = "color")

Create function to return top 5 recommendations for a user

Populate all na values for a user

getTopFive <- function(u){
  
  # get all normalized ratings for this user
  user_ratings <- moviesnorm[u,]

  # get list of movies user did not rate
  movies_user_not_rated <- which(is.na(user_ratings))

  # get list of movies user rated
  movies_user_rated <- which(!is.na(user_ratings))
  
  # list of movie names 
  movie_names <- names(user_ratings)
  
  # list to store predicted values for not rated moves
  now_rated_value <- list()
  now_rated_name <- list()

  # calculated weighted similarity for non related
  for(i in 1:length(movies_user_not_rated)){
    # get column from similarity matrix
    movie_item_simlarity = movie_sim_mtx[i,]
    
    # calculate weight and weight average
    weight_total = sum(movie_item_simlarity[unlist(movies_user_rated)])
    weighted <- movie_item_simlarity * moviesnorm[1,]
    
    # store the movie rating and its name
    now_rated_value[i] <- sum(weighted,na.rm=T)/weight_total
    now_rated_name[i] <- movie_names[movies_user_not_rated[i]]
  }

  # return top five recommended values
  now_rated_df <- cbind.data.frame("m"=unlist(now_rated_name),
                                   "v"=unlist(now_rated_value), 
                                   stringsAsFactors = FALSE)
  now_rated_sorted_df <- now_rated_df[order(-now_rated_df$v),] 
  rv <- head(now_rated_sorted_df$m,n=5)

  return(rv)
}

Randomly pick 10 users to display their top 5 movie picks

# Print out top 5 recommendations for 10 users
recommends <- data.frame(matrix(nrow=10,ncol=6))
names(recommends) <- c("User","Top1","Top3","Top3","Top4","Top5")

# randomly pick 10 users
set.seed(123)
users <- sample(1:nrow(moviesnorm),10, replace = FALSE)

for(i in c(1:length(users))){
  top5 <- getTopFive(users[i])
  recommends[i,] <- c(users[i],as.list(top5))
}

# print out the recommendations
kable(recommends)

User	Top1	Top3	Top3	Top4	Top5
272	Good Morning (1971)	Locusts, The (1997)	Anna (1996)	Jupiter’s Wife (1994)	Return of the Pink Panther, The (1974)
743	Buddy (1997)	Big One, The (1997)	Favor, The (1994)	Prefontaine (1997)	Dadetown (1995)
385	Smilla’s Sense of Snow (1997)	Silence of the Palace, The (Saimt el Qusur) (1994)	Underground (1995)	Price Above Rubies, A (1998)	Jean de Florette (1986)
831	Jury Duty (1995)	American Dream (1990)	Mirage (1995)	Gone with the Wind (1939)	Two Friends (1986)
884	Family Thing, A (1996)	8 Heads in a Duffel Bag (1997)	Collectionneuse, La (1967)	New Age, The (1994)	Hana-bi (1997)
43	Rhyme & Reason (1997)	Hotel de Love (1996)	Vermont Is For Lovers (1992)	Bitter Sugar (Azucar Amargo) (1996)	It Takes Two (1995)
495	Enfer, L’ (1994)	JLG/JLG - autoportrait de decembre (1994)	Boys, Les (1997)	Clean Slate (Coup de Torchon) (1981)	Secret Adventures of Tom Thumb, The (1993)
836	Stranger, The (1994)	Boys, Les (1997)	Condition Red (1995)	Little City (1998)	Willy Wonka and the Chocolate Factory (1971)
516	1-900 (1994)	Dunston Checks In (1996)	Story of Xinghua, The (1993)	Bitter Sugar (Azucar Amargo) (1996)	Nowhere (1997)
427	Kazaam (1996)	Larger Than Life (1996)	Angel Baby (1995)	Mostro, Il (1994)	Hana-bi (1997)

Recommendation system with built in system

# collect movie names and its index
movie_names <- colnames(movies)

# run recommenderlab builtin function
recsys <- Recommender(data = MovieLense, method = "IBCF", parameter = list(method = "Cosine"))
recsysTop5 <- predict(object = recsys, newdata = MovieLense, n = 5)
top5items <- recsysTop5@items

# get top 5 movie names
top5builtin <- movie_names[unlist(top5items[22])]

# Print out top 5 recommendations for 10 users
recommends_builtin <- data.frame(matrix(nrow=10,ncol=6))
names(recommends) <- c("User","Top1","Top3","Top3","Top4","Top5")

for(i in c(1:length(users))){
  top5 <- movie_names[unlist(top5items[users[i]])]
  recommends_builtin[i,] <- c(users[i],as.list(top5))
}

kable(recommends_builtin)

X1	X2	X3	X4	X5	X6
272	Postino, Il (1994)	From Dusk Till Dawn (1996)	Crimson Tide (1995)	Eat Drink Man Woman (1994)	Three Colors: Blue (1993)
743	Usual Suspects, The (1995)	Muppet Treasure Island (1996)	Eat Drink Man Woman (1994)	Hoop Dreams (1994)	Pulp Fiction (1994)
385	Return of the Pink Panther, The (1974)	FairyTale: A True Story (1997)	Midnight in the Garden of Good and Evil (1997)	Hard Rain (1998)	Wonderland (1997)
831	From Dusk Till Dawn (1996)	Rumble in the Bronx (1995)	Batman Forever (1995)	To Wong Foo, Thanks for Everything! Julie Newmar (1995)	Natural Born Killers (1994)
884	GoldenEye (1995)	Richard III (1995)	Mighty Aphrodite (1995)	From Dusk Till Dawn (1996)	Muppet Treasure Island (1996)
43	Mighty Aphrodite (1995)	Crumb (1994)	Haunted World of Edward D. Wood Jr., The (1995)	Supercop (1992)	Sleeper (1973)
495	French Twist (Gazon maudit) (1995)	Crumb (1994)	Desperado (1995)	Faster Pussycat! Kill! Kill! (1965)	Carlito’s Way (1993)
836	GoldenEye (1995)	Four Rooms (1995)	Dead Man Walking (1995)	From Dusk Till Dawn (1996)	Muppet Treasure Island (1996)
516	Four Rooms (1995)	Twelve Monkeys (1995)	Postino, Il (1994)	Batman Forever (1995)	Eat Drink Man Woman (1994)
427	Twelve Monkeys (1995)	Dead Man Walking (1995)	Mr. Holland’s Opus (1995)	Faster Pussycat! Kill! Kill! (1965)	Brother Minister: The Assassination of Malcolm X (1994)

From running my own recommender algorithm and built-in, the built-in processes the feed much faster. Unfortunately, the outputs from both systems do not match which tells me that should improve on the system I have created.