Content based and Collaborative filtering

Part 2.1
Part 2.3

library(tidyverse)

## ── Attaching packages ─────────────────────────────────────── tidyverse 1.2.1 ──

## ✔ ggplot2 3.1.0       ✔ purrr   0.3.0  
## ✔ tibble  2.0.1       ✔ dplyr   0.8.0.1
## ✔ tidyr   0.8.2       ✔ stringr 1.4.0  
## ✔ readr   1.3.1       ✔ forcats 0.4.0

## Warning: package 'tibble' was built under R version 3.5.2

## Warning: package 'purrr' was built under R version 3.5.2

## Warning: package 'dplyr' was built under R version 3.5.2

## Warning: package 'stringr' was built under R version 3.5.2

## Warning: package 'forcats' was built under R version 3.5.2

## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()

library(janitor)

## Warning: package 'janitor' was built under R version 3.5.2

## 
## Attaching package: 'janitor'

## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test

library(stringr)
library(stats)
library(crayon)

## 
## Attaching package: 'crayon'

## The following object is masked from 'package:ggplot2':
## 
##     %+%

library(COSINE)

## Loading required package: MASS

## 
## Attaching package: 'MASS'

## The following object is masked from 'package:dplyr':
## 
##     select

## Loading required package: genalg

library(textreuse)

## 
## Attaching package: 'textreuse'

## The following object is masked from 'package:readr':
## 
##     tokenize

library(sqldf)

## Loading required package: gsubfn

## Loading required package: proto

## Loading required package: RSQLite

library(data.table)

## Warning: package 'data.table' was built under R version 3.5.2

## 
## Attaching package: 'data.table'

## The following objects are masked from 'package:dplyr':
## 
##     between, first, last

## The following object is masked from 'package:purrr':
## 
##     transpose

Part 2.1

files <- list.files("/Users/jayavarshini/Desktop/ms/sem1/dmm/Assing_4/corpus/", full.names=T)

corpus <- TextReuseCorpus(files, tokenizer = tokenize_ngrams, n = 5,keep_tokens = TRUE)
names(corpus)

##   [1] "g0pA_taska" "g0pA_taskb" "g0pA_taskc" "g0pA_taskd" "g0pA_taske"
##   [6] "g0pB_taska" "g0pB_taskb" "g0pB_taskc" "g0pB_taskd" "g0pB_taske"
##  [11] "g0pC_taska" "g0pC_taskb" "g0pC_taskc" "g0pC_taskd" "g0pC_taske"
##  [16] "g0pD_taska" "g0pD_taskb" "g0pD_taskc" "g0pD_taskd" "g0pD_taske"
##  [21] "g0pE_taska" "g0pE_taskb" "g0pE_taskc" "g0pE_taskd" "g0pE_taske"
##  [26] "g1pA_taska" "g1pA_taskb" "g1pA_taskc" "g1pA_taskd" "g1pA_taske"
##  [31] "g1pB_taska" "g1pB_taskb" "g1pB_taskc" "g1pB_taskd" "g1pB_taske"
##  [36] "g1pD_taska" "g1pD_taskb" "g1pD_taskc" "g1pD_taskd" "g1pD_taske"
##  [41] "g2pA_taska" "g2pA_taskb" "g2pA_taskc" "g2pA_taskd" "g2pA_taske"
##  [46] "g2pB_taska" "g2pB_taskb" "g2pB_taskc" "g2pB_taskd" "g2pB_taske"
##  [51] "g2pC_taska" "g2pC_taskb" "g2pC_taskc" "g2pC_taskd" "g2pC_taske"
##  [56] "g2pE_taska" "g2pE_taskb" "g2pE_taskc" "g2pE_taskd" "g2pE_taske"
##  [61] "g3pA_taska" "g3pA_taskb" "g3pA_taskc" "g3pA_taskd" "g3pA_taske"
##  [66] "g3pB_taska" "g3pB_taskb" "g3pB_taskc" "g3pB_taskd" "g3pB_taske"
##  [71] "g3pC_taska" "g3pC_taskb" "g3pC_taskc" "g3pC_taskd" "g3pC_taske"
##  [76] "g4pB_taska" "g4pB_taskb" "g4pB_taskc" "g4pB_taskd" "g4pB_taske"
##  [81] "g4pC_taska" "g4pC_taskb" "g4pC_taskc" "g4pC_taskd" "g4pC_taske"
##  [86] "g4pD_taska" "g4pD_taskb" "g4pD_taskc" "g4pD_taskd" "g4pD_taske"
##  [91] "g4pE_taska" "g4pE_taskb" "g4pE_taskc" "g4pE_taskd" "g4pE_taske"
##  [96] "orig_taska" "orig_taskb" "orig_taskc" "orig_taskd" "orig_taske"

2.1 a

total.tokens <- tokens(corpus)
total.len <- lengths(total.tokens)
sum(total.len)

## [1] 22033

2.1 b

Dimesnsions of the characterstic matrix: [ 22033x100] [ number of singles x number of documents ]

2.1 c

doc<-corpus[["orig_taske"]]
tokens(doc)[1:5]

## [1] "in mathematics and computer science"     
## [2] "mathematics and computer science dynamic"
## [3] "and computer science dynamic programming"
## [4] "computer science dynamic programming is" 
## [5] "science dynamic programming is a"

2.1 d

The reduction in the size of the problem will be 98.63% ### ### Part 2.2 User id: 20395743 mod 671 =27

Content based filtering

#install.packages("dplyr")
library(dplyr)
library(sqldf)
setwd("/Users/jayavarshini/Desktop/ms/sem1/dmm/Assing_4/ml-latest-small/")
ratings1<-read.csv("ratings.csv", sep=",", header=T)
movies1<-read.csv("movies.csv", sep=",", header=T)

combine=left_join(ratings1,movies1,by="movieId")

head(combine)

user_27 <- sqldf(x = "select * from combine where userid = '27'")
user_27

genres <- as.data.frame(user_27$genres, stringsAsFactors=FALSE)
#library(data.table)
#genres
genres2 <- as.data.frame(tstrsplit(genres[,1], '[|]', type.convert=TRUE), stringsAsFactors=FALSE)
genre_list <- c("Action", "Adventure", "Animation", "Children", "Comedy", "Crime","Documentary", "Drama", "Fantasy","Film-Noir", "Horror", "IMAX", "Musical", "Mystery","Romance","Sci-Fi", "Thriller", "War", "Western", "(no genres listed)")

#Create a matrix with number of movies the user has watched and 20 genrelist 
genre_matrix <- matrix(0,nrow(user_27)+1,20) 
genre_matrix[1,] <- genre_list #set first row to genre list
colnames(genre_matrix) <- genre_list #set column names to genre list
for (i in 1:nrow(user_27)) {
 for (c in 1:ncol(genres2)) {
 genmat_col = which(genre_matrix[1,] == genres2[i,c])
 genre_matrix[i+1,genmat_col] <- 1
 }
}
genre_matrix

##       Action   Adventure   Animation   Children   Comedy   Crime  
##  [1,] "Action" "Adventure" "Animation" "Children" "Comedy" "Crime"
##  [2,] "0"      "0"         "0"         "0"        "0"      "1"    
##  [3,] "0"      "0"         "0"         "0"        "0"      "0"    
##  [4,] "0"      "0"         "0"         "0"        "0"      "1"    
##  [5,] "0"      "0"         "0"         "0"        "1"      "1"    
##  [6,] "0"      "0"         "0"         "0"        "0"      "0"    
##  [7,] "0"      "0"         "0"         "0"        "0"      "1"    
##  [8,] "0"      "0"         "0"         "0"        "0"      "1"    
##  [9,] "0"      "0"         "0"         "0"        "0"      "0"    
## [10,] "0"      "0"         "0"         "0"        "0"      "0"    
## [11,] "1"      "1"         "0"         "0"        "0"      "0"    
## [12,] "0"      "0"         "0"         "0"        "1"      "0"    
## [13,] "0"      "0"         "0"         "0"        "0"      "0"    
## [14,] "0"      "0"         "0"         "0"        "0"      "0"    
## [15,] "0"      "0"         "0"         "0"        "0"      "0"    
## [16,] "0"      "0"         "0"         "0"        "1"      "0"    
## [17,] "0"      "0"         "0"         "0"        "0"      "1"    
## [18,] "0"      "0"         "0"         "0"        "1"      "0"    
## [19,] "0"      "0"         "0"         "0"        "0"      "0"    
## [20,] "0"      "0"         "0"         "0"        "1"      "1"    
## [21,] "0"      "0"         "0"         "0"        "0"      "0"    
## [22,] "0"      "0"         "0"         "0"        "0"      "0"    
## [23,] "0"      "0"         "0"         "0"        "0"      "0"    
## [24,] "0"      "0"         "0"         "0"        "0"      "0"    
##       Documentary   Drama   Fantasy   Film-Noir   Horror   IMAX  
##  [1,] "Documentary" "Drama" "Fantasy" "Film-Noir" "Horror" "IMAX"
##  [2,] "0"           "0"     "0"       "0"         "0"      "0"   
##  [3,] "0"           "1"     "0"       "0"         "0"      "0"   
##  [4,] "0"           "1"     "0"       "0"         "0"      "0"   
##  [5,] "0"           "1"     "0"       "0"         "0"      "0"   
##  [6,] "0"           "1"     "0"       "0"         "0"      "0"   
##  [7,] "0"           "0"     "0"       "0"         "1"      "0"   
##  [8,] "0"           "1"     "0"       "0"         "0"      "0"   
##  [9,] "0"           "0"     "0"       "0"         "0"      "0"   
## [10,] "0"           "1"     "0"       "0"         "0"      "0"   
## [11,] "0"           "0"     "0"       "0"         "0"      "0"   
## [12,] "0"           "1"     "0"       "0"         "0"      "0"   
## [13,] "0"           "1"     "0"       "0"         "0"      "0"   
## [14,] "0"           "1"     "0"       "0"         "0"      "0"   
## [15,] "0"           "0"     "0"       "0"         "0"      "0"   
## [16,] "0"           "0"     "0"       "0"         "0"      "0"   
## [17,] "0"           "1"     "0"       "0"         "0"      "0"   
## [18,] "0"           "1"     "0"       "0"         "0"      "0"   
## [19,] "0"           "1"     "0"       "0"         "0"      "0"   
## [20,] "0"           "0"     "0"       "0"         "0"      "0"   
## [21,] "0"           "1"     "0"       "0"         "1"      "0"   
## [22,] "0"           "1"     "0"       "0"         "0"      "0"   
## [23,] "0"           "1"     "0"       "0"         "1"      "0"   
## [24,] "0"           "1"     "0"       "0"         "0"      "0"   
##       Musical   Mystery   Romance   Sci-Fi   Thriller   War   Western  
##  [1,] "Musical" "Mystery" "Romance" "Sci-Fi" "Thriller" "War" "Western"
##  [2,] "0"       "1"       "0"       "0"      "1"        "0"   "0"      
##  [3,] "0"       "0"       "0"       "0"      "0"        "1"   "0"      
##  [4,] "0"       "0"       "0"       "0"      "1"        "0"   "0"      
##  [5,] "0"       "0"       "0"       "0"      "1"        "0"   "0"      
##  [6,] "0"       "0"       "0"       "0"      "0"        "1"   "0"      
##  [7,] "0"       "0"       "0"       "0"      "1"        "0"   "0"      
##  [8,] "0"       "0"       "0"       "0"      "0"        "0"   "0"      
##  [9,] "0"       "0"       "0"       "0"      "1"        "0"   "0"      
## [10,] "0"       "0"       "0"       "1"      "0"        "0"   "0"      
## [11,] "0"       "0"       "0"       "0"      "1"        "0"   "0"      
## [12,] "0"       "0"       "1"       "0"      "0"        "0"   "0"      
## [13,] "0"       "0"       "1"       "0"      "0"        "0"   "0"      
## [14,] "0"       "0"       "1"       "0"      "0"        "0"   "0"      
## [15,] "0"       "0"       "0"       "0"      "1"        "0"   "0"      
## [16,] "0"       "0"       "0"       "0"      "0"        "0"   "0"      
## [17,] "0"       "0"       "0"       "0"      "1"        "0"   "0"      
## [18,] "0"       "0"       "1"       "0"      "0"        "0"   "0"      
## [19,] "0"       "0"       "0"       "0"      "0"        "0"   "0"      
## [20,] "0"       "0"       "0"       "0"      "0"        "0"   "0"      
## [21,] "0"       "0"       "0"       "0"      "1"        "0"   "0"      
## [22,] "0"       "1"       "0"       "0"      "1"        "0"   "0"      
## [23,] "0"       "1"       "0"       "0"      "0"        "0"   "0"      
## [24,] "0"       "0"       "1"       "0"      "0"        "0"   "0"      
##       (no genres listed)  
##  [1,] "(no genres listed)"
##  [2,] "0"                 
##  [3,] "0"                 
##  [4,] "0"                 
##  [5,] "0"                 
##  [6,] "0"                 
##  [7,] "0"                 
##  [8,] "0"                 
##  [9,] "0"                 
## [10,] "0"                 
## [11,] "0"                 
## [12,] "0"                 
## [13,] "0"                 
## [14,] "0"                 
## [15,] "0"                 
## [16,] "0"                 
## [17,] "0"                 
## [18,] "0"                 
## [19,] "0"                 
## [20,] "0"                 
## [21,] "0"                 
## [22,] "0"                 
## [23,] "0"                 
## [24,] "0"

genre_matrix2 <- as.data.frame(genre_matrix[-1,], stringsAsFactors=FALSE) #remove first row, which was the genre list
#convert from characters to integers
for (c in 1:ncol(genre_matrix2)) {
  genre_matrix2[,c] <- as.integer(genre_matrix2[,c])
} 
genre_matrix2

genre_matrix2 <- data.frame(user_27$movieId, genre_matrix2)
genre_matrix2["Sum", ] <- colSums(genre_matrix2)
#genre_matrix2$user_27[nrow(genre_matrix2)] <- NA
options(digits = 2)
user.df <- genre_matrix2[nrow(genre_matrix2),]/23.00000
user.df

genre_matrix2["Average",] <- user.df
genre_matrix2

genre_matrix2

write.csv(genre_matrix2, "userprofile.csv")
userprofile <-read.csv("userprofile.csv", sep=",", header=T)

user <- as.vector(userprofile[nrow(userprofile), ])
user <- user[,3:22]
user<-as.vector(user)
user <- apply(X=user,2,mean)

## Building MOVIE PROFILE 
library(lsa)

## Loading required package: SnowballC

## Warning: package 'SnowballC' was built under R version 3.5.2

df <- movies1[sample(nrow(movies1),10),]
df

genres <- as.data.frame(df$genres, stringsAsFactors=FALSE)
library(data.table)
genres2 <- as.data.frame(tstrsplit(genres[,1], '[|]', type.convert=TRUE), stringsAsFactors=FALSE)
genre_list <- c("Action", "Adventure", "Animation", "Children", "Comedy", "Crime","Documentary", "Drama", "Fantasy","Film-Noir", "Horror", "IMAX", "Musical", "Mystery","Romance","Sci-Fi", "Thriller", "War", "Western", "(no genres listed)")
genre_matrix <- matrix(0,nrow(df)+1,20)
genre_matrix[1,] <- genre_list #set first row to genre list
colnames(genre_matrix) <- genre_list #set column names to genre list
for (i in 1:nrow(df)) {
 for (c in 1:ncol(genres2)) {
 genmat_col = which(genre_matrix[1,] == genres2[i,c])
 genre_matrix[i+1,genmat_col] <- 1
 }
}
genre_matrix2 <- as.data.frame(genre_matrix[-1,], stringsAsFactors=FALSE) #remove first row, which was the genre list
for (c in 1:ncol(genre_matrix2)) {
  genre_matrix2[,c] <- as.integer(genre_matrix2[,c])
} #convert from characters to integers
genre_matrix2 <- data.frame(df$movieId, genre_matrix2)
genre_matrix2

write.csv(genre_matrix2, "movieprofile.csv")
movieprofile <-read.csv("movieprofile.csv", sep=",", header=T)
options(digits = 4)
movie<-as.vector(movieprofile[,3:22])
output<-data.frame(10,0)
for (i in 1:10) 
{
  m1<-movie[i,]
  x2 <- apply(X=m1,2,mean)
  output[i,]<-cosine(user,x2)
 
}
simresult <- data.frame(df$movieId, df$title, output)
simresult <- simresult[,1:3]
colnames(simresult)[1]<- paste("MovieId")
colnames(simresult)[2]<-paste("Title")
colnames(simresult)[3]<- paste("Similarites")
simresult1 <- simresult[order(simresult$Similarites, decreasing = TRUE),]
cat("user ID 250 has chose the following 10 movies")

## user ID 250 has chose the following 10 movies

df$movieId

##  [1] 52694  5636  8544  3576  8999  2071  5395  7263  3106  2722

cat("Of these, the following 5 movies are recommended")

## Of these, the following 5 movies are recommended

head(simresult1,5)

Part 2.3

movies2 <- read.csv("/Users/jayavarshini/Desktop/ms/sem1/dmm/Assing_4/ml-latest-small/movies.csv",header = TRUE,sep = ",")
ratings2 <- read.csv("/Users/jayavarshini/Desktop/ms/sem1/dmm/Assing_4/ml-latest-small/ratings.csv",header = TRUE,sep = ",")
movies2 <- data.frame(lapply(movies2, as.character),stringsAsFactors = FALSE)
ratings2 <- data.frame(lapply(ratings2,as.character),stringsAsFactors = FALSE)

Part 2.3 a

u191 <- subset(ratings2,ratings2$userId==191)
row.names(u191) <- 1:length(as.numeric(u191$movieId))
test.set <- head(u191)
for (i in 1:6){
  u191[i,3] <- NA
}
neighbours.191 <- c("513","317","415","64","556") 
ros <- c("191",neighbours.191)
user513 <- subset(ratings2,ratings2$userId==513)
user317 <- subset(ratings2,ratings2$userId==317)
user415 <-subset(ratings2,ratings2$userId==415)
user64 <- subset(ratings2,ratings2$userId==64)
user556 <-subset(ratings2,ratings2$userId==556)
utility <- matrix(as.numeric(0),nrow = 6,ncol = 27)
mov.list <- intersect(u191$movieId,user513$movieId)
mov.list <- c(unique(mov.list),intersect(u191$movieId,user415$movieId))
mov.list <- c(unique(mov.list),intersect(u191$movieId,user64$movieId))
mov.list <- c(unique(mov.list),intersect(u191$movieId,user556$movieId))
mov.list <- c(unique(mov.list),intersect(u191$movieId,user317$movieId))
mov.list <- unique(mov.list)
colnames(utility) <- factor(mov.list) 
rownames(utility) <- ros
  i <- "191"
  user_id <- subset(ratings2,ratings2$userId==as.numeric(i))
    
  for (j in 1:29){
    w <- colnames(utility)[j]
    if (is.element(w,user_id$movieId) && !is.null(w) ){
      cv <- which(user_id$movieId == w)
      utility[i,w] <- as.numeric(user_id[cv,3])
    }    
    
  }
  
  i <- "513"
  user_id <- subset(ratings2,ratings2$userId==as.numeric(i))
    
  for (j in 1:29){
    w <- colnames(utility)[j]
    if (is.element(w,user_id$movieId) && !is.null(w) ){
      cv <- which(user_id$movieId == w)
      utility[i,w] <- as.numeric(user_id[cv,3])
    }    
    
  }
  i <- "317"
  user_id <- subset(ratings2,ratings2$userId==as.numeric(i))
    
  for (j in 1:29){
    w <- colnames(utility)[j]
    if (is.element(w,user_id$movieId) && !is.null(w) ){
      cv <- which(user_id$movieId == w)
      utility[i,w] <- as.numeric(user_id[cv,3])
    }    
    
  }
  i <- "415"
  user_id <- subset(ratings2,ratings2$userId==as.numeric(i))
    
  for (j in 1:29){
    w <- colnames(utility)[j]
    if (is.element(w,user_id$movieId) && !is.null(w) ){
      cv <- which(user_id$movieId == w)
      utility[i,w] <- as.numeric(user_id[cv,3])
    }    
    
  }
  i <- "64"
  user_id <- subset(ratings2,ratings2$userId==as.numeric(i))
    
  for (j in 1:29){
    w <- colnames(utility)[j]
    if (is.element(w,user_id$movieId) && !is.null(w) ){
      cv <- which(user_id$movieId == w)
      utility[i,w] <- as.numeric(user_id[cv,3])
    }    
    
  }
  i <- "556"
  
  user_id <- subset(ratings2,ratings2$userId==as.numeric(i))
    
  for (j in 1:29){
    w <- colnames(utility)[j]
    if (is.element(w,user_id$movieId) && !is.null(w) ){
      cv <- which(user_id$movieId == w)
      utility[i,w] <- as.numeric(user_id[cv,3])
    }    
    
  }
for (i in 1:5){
  utility["191",i] <- as.numeric(0)
}
for (t in 1:ncol(utility)) {
  for (r in 1:nrow(utility)) {
    if(utility[r,t] == 0){
      utility[r,t] <- NA
    }
  }
}
m_util <- utility
  means <- c(mean(m_util[1,],na.rm = TRUE))
for (h in 2:6){
  means <- c(means,mean(m_util[h,],na.rm = TRUE))
}
for (t in 1:6){
  for (j in 1:27){
    if (!is.na(m_util[t,j]) ){
      m_util[t,j] <- m_util[t,j] - means[t]
    }
    else{
      m_util[t,j] <- NA
    }
  }
}  
  
js <- matrix(NA, nrow = 1,ncol = 3)  #similarity score matrix for neighbourhood
  
colnames(js) <- c("513","317","415")  
js[1,] <- c(0.4359,0.4033,0.3256)#matrix with similarity scores between users
moviewq <- c("150","296","380","590")#Movie list for running prediction 
rate <- matrix(NA,nrow = 1,ncol = 4)#Matrix of prediction values
colnames(rate) <- c("150","296","380","590")
for (m in moviewq){
  if (!is.na(m_util["513",m])){
    ratenum <- sum(0,js[1]*m_util["513",m])
  }
  if(!is.na(m_util["317",m])){
    ratenum <- sum(ratenum,js[2]*m_util["317",m])
  }
  if(!is.na(m_util["415",m])){
    ratenum <- sum(ratenum,js[3]*m_util["415",m])
  }
  
  rateden <- 0
  for(u in 1:3){
    
    if(!is.na(m_util[colnames(js)[u],m]) ){
      rateden <- sum(rateden,js[u])
    }
    else {
      next
    }
  }
  
  rateqq <- (ratenum / rateden) + means[1]
  rate[1,m] <- round(rateqq,0)
  
}
errornum <- matrix(as.numeric(0),nrow = 4,ncol = 3)
rownames(errornum) <- c("150","296","380","590") 
colnames(errornum) <- c("p","a","square of diff")
u191 <- subset(ratings2,ratings2$userId==191)
for (y in 1:4){
  errornum[colnames(rate)[y],"p"] <- rate[1,colnames(rate)[y]]
  errornum[colnames(rate)[y],"a"] <- as.numeric(u191$rating[which(u191$movieId %in% colnames(rate)[y])])
  errornum[colnames(rate)[y],"square of diff"] <-((as.numeric(errornum[y,"a"]) - as.numeric(errornum[y,"p"])))^2 
}
sumee <- 0
for (h in 1:4){
sumee <- sumee + as.numeric(errornum[h,"square of diff"])  
}
rsme <- round((sqrt(sumee / 4)),3)
errornum

##     p a square of diff
## 150 3 4              1
## 296 2 5              9
## 380 3 3              0
## 590 2 4              4

cat("User ID 191, 5 random user IDs: ", as.numeric(rownames(utility)),"\n")

## User ID 191, 5 random user IDs:  191 513 317 415 64 556

cat("Using user-user similarity, User ID 191 will rate the movies as follows:", "\n")

## Using user-user similarity, User ID 191 will rate the movies as follows:

for(i in 1:4){
  cat(rownames(errornum)[i],":", as.numeric(errornum[i,1]),"\n")
}

## 150 : 3 
## 296 : 2 
## 380 : 3 
## 590 : 2

cat("RSME:",rsme,"\n")

## RSME: 1.871

Part 2.3 b

ree <- t(utility)
ree["231","191"] <- 2
meanree <- apply(ree, 1, function(x) mean(x, na.rm=T))
uti.ree <- ree
for(i in 1:nrow(ree)) 
{
  for(j in 1:ncol(ree)) 
  {
    if(!is.na(ree[i,j]))
    {
      ree[i,j] <- ree[i,j] - meanree[i]
    } 
    else
    {
      ree[i,j] <- 0 
    }
  }
}
error_num <- matrix(as.numeric(0),nrow = 4,ncol = 3)#matric with predicted, actual and difference values
rownames(error_num)<- c("150","296","380","590")
colnames(error_num) <- c("p","a","diff")
cosine_matrix <- matrix(NA,nrow = 27,ncol = 1)
for (h in 1:nrow(ree)){
  cos <- cosine(ree[h,],ree[1,])
  cosine_matrix[h,1] <- round(cos,4)    
}
cosine_matrix[1] <- 0.0000
ree <- cbind(ree,cosine_matrix)
maaa <- c(tail(sort(ree[,7])))
rate.num <- sum(ree["595",7]*ree["595",1] + ree["10",7]*ree["10",1] + ree["34",7]*ree["34",1])
  
qwqq <- c("595","10","34")
rate.den <- 0
  for(u in 1:3){
    
    if(ree[qwqq[u],1] != 0){
      rate.den <- sum(rate.den,ree[qwqq[u],7])
    }
    else {
      next
    }
  }
  
  rate.qq <- (rate.num / rate.den) + meanree[1]
  error_num[1,"p"] <- round(rate.qq)
  error_num[1,"a"] <-  as.numeric(u191$rating[which(u191$movieId %in% "110")])
  error_num[1,"diff"] <- ((as.numeric(error_num[1,"p"])) - (as.numeric(error_num[1,"a"])))^2 
ree <- ree[,-7]
cosine_matrix <- matrix(NA,nrow = 27,ncol = 1)
for (h in 1:nrow(ree)){
  cos <- cosine(ree[h,],ree[2,])
  cosine_matrix[h,1] <- round(cos,4)    
}
cosine_matrix[2] <- 0.0000
ree <- cbind(ree,cosine_matrix)
maaa <- c(tail(sort(ree[,7])))
#from maaa we get the neighbouring movies 
qwqq <- c("165","208","457")#neighbouring movie items
rate.num <- sum(ree[qwqq[1],7]*ree[qwqq[1],1] + ree[qwqq[2],7]*ree[qwqq[2],1] + ree[qwqq[3],7]*ree[qwqq[3],1])
  
rate.den <- 0
  for(u in 1:3){
    
    if(ree[qwqq[u],1] != 0){
      rate.den <- sum(rate.den,ree[qwqq[u],7])
    }
    else {
      next
    }
  }
  
  rate.qq <- (rate.num / rate.den) + meanree[2]
  error_num[2,"p"] <- round(rate.qq)
  error_num[2,"a"] <-  as.numeric(u191$rating[which(u191$movieId %in% "150")])
  error_num[2,"diff"] <- ((as.numeric(error_num[2,"p"])) - (as.numeric(error_num[2,"a"])))^2 
  
ree <- ree[,-7]
cosine_matrix <- matrix(NA,nrow = 27,ncol = 1)
for (h in 1:nrow(ree)){
  cos <- cosine(ree[h,],ree[3,])
  cosine_matrix[h,1] <- round(cos,4)    
}
cosine_matrix[3] <- 0.0000
ree <- cbind(ree,cosine_matrix)
maaa <- c(tail(sort(ree[,7])))
qwqq <- c("356","165","150")#neighbouring movie items
rate.num <- sum(ree[qwqq[1],7]*ree[qwqq[1],1] + ree[qwqq[2],7]*ree[qwqq[2],1] + ree[qwqq[3],7]*ree[qwqq[3],1])
  
rate.den <- 0
  for(u in 1:3){
    
    if(ree[qwqq[u],1] != 0){
      rate.den <- sum(rate.den,ree[qwqq[u],7])
    }
    else {
      next
    }
  }
  
  rate.qq <- (rate.num / rate.den) + meanree[3]
  error_num[3,"p"] <- round(rate.qq)
  error_num[3,"a"] <-  as.numeric(u191$rating[which(u191$movieId %in% "161")])
  error_num[3,"diff"] <- ((as.numeric(error_num[3,"p"])) - (as.numeric(error_num[3,"a"])))^2 
ree <- ree[,-7]
cosine_matrix <- matrix(NA,nrow = 27,ncol = 1)
for (h in 1:nrow(ree)){
  cos <- cosine(ree[h,],ree[4,])
  cosine_matrix[h,1] <- round(cos,4)    
}
cosine_matrix[5] <- 0.0000
cosine_matrix[4] <- 0.0000
ree <- cbind(ree,cosine_matrix)
maaa <- c(tail(sort(ree[,7])))
qwqq <- c("150","165","457")#neighbouring movie items
rate.num <- sum(ree[qwqq[1],7]*ree[qwqq[1],1] + ree[qwqq[2],7]*ree[qwqq[2],1] + ree[qwqq[3],7]*ree[qwqq[3],1])
  
rate.den <- 0
  for(u in 1:3){
    
    if(ree[qwqq[u],1] != 0){
      rate.den <- sum(rate.den,ree[qwqq[u],7])
    }
    else {
      next
    }
  }
  
  rate.qq <- (rate.num / rate.den) + meanree[4]
  error_num[4,"p"] <- round(rate.qq)
  error_num[4,2] <- u191["208",3]
  error_num[4,"a"] <-  as.numeric(u191$rating[which(u191$movieId %in% "208")])
  error_num[4,"diff"] <- ((as.numeric(error_num[4,"p"])) - (as.numeric(error_num[4,"a"])))^2 
ree <- ree[,-7]
sum_2 <- 0
for (h in 1:4){
sum_2 <- sum_2 + as.numeric(error_num[h,"diff"])  
}
rmse_2 <- round((sqrt(sum_2 / 4)),3)
cat("User ID 191, 5 random user IDs:  ", as.numeric(colnames(uti.ree)),"\n")

## User ID 191, 5 random user IDs:   191 513 317 415 64 556

cat("Using item-item similarity, User 191 will rate the movies as follows:" , "\n")

## Using item-item similarity, User 191 will rate the movies as follows:

for (i in 1:4){
  cat(rownames(error_num)[i],":", as.numeric(error_num[i,1]),"\n")
  
}

## 150 : 4 
## 296 : 3 
## 380 : 5 
## 590 : 3

cat("RSME",":",rmse_2)

## RSME : 0.866