library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.1.0 ✔ purrr 0.3.0
## ✔ tibble 2.0.1 ✔ dplyr 0.8.0.1
## ✔ tidyr 0.8.2 ✔ stringr 1.4.0
## ✔ readr 1.3.1 ✔ forcats 0.4.0
## Warning: package 'tibble' was built under R version 3.5.2
## Warning: package 'purrr' was built under R version 3.5.2
## Warning: package 'dplyr' was built under R version 3.5.2
## Warning: package 'stringr' was built under R version 3.5.2
## Warning: package 'forcats' was built under R version 3.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(janitor)
## Warning: package 'janitor' was built under R version 3.5.2
##
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
library(stringr)
library(stats)
library(crayon)
##
## Attaching package: 'crayon'
## The following object is masked from 'package:ggplot2':
##
## %+%
library(COSINE)
## Loading required package: MASS
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
## Loading required package: genalg
library(textreuse)
##
## Attaching package: 'textreuse'
## The following object is masked from 'package:readr':
##
## tokenize
library(sqldf)
## Loading required package: gsubfn
## Loading required package: proto
## Loading required package: RSQLite
library(data.table)
## Warning: package 'data.table' was built under R version 3.5.2
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
##
## between, first, last
## The following object is masked from 'package:purrr':
##
## transpose
files <- list.files("/Users/jayavarshini/Desktop/ms/sem1/dmm/Assing_4/corpus/", full.names=T)
corpus <- TextReuseCorpus(files, tokenizer = tokenize_ngrams, n = 5,keep_tokens = TRUE)
names(corpus)
## [1] "g0pA_taska" "g0pA_taskb" "g0pA_taskc" "g0pA_taskd" "g0pA_taske"
## [6] "g0pB_taska" "g0pB_taskb" "g0pB_taskc" "g0pB_taskd" "g0pB_taske"
## [11] "g0pC_taska" "g0pC_taskb" "g0pC_taskc" "g0pC_taskd" "g0pC_taske"
## [16] "g0pD_taska" "g0pD_taskb" "g0pD_taskc" "g0pD_taskd" "g0pD_taske"
## [21] "g0pE_taska" "g0pE_taskb" "g0pE_taskc" "g0pE_taskd" "g0pE_taske"
## [26] "g1pA_taska" "g1pA_taskb" "g1pA_taskc" "g1pA_taskd" "g1pA_taske"
## [31] "g1pB_taska" "g1pB_taskb" "g1pB_taskc" "g1pB_taskd" "g1pB_taske"
## [36] "g1pD_taska" "g1pD_taskb" "g1pD_taskc" "g1pD_taskd" "g1pD_taske"
## [41] "g2pA_taska" "g2pA_taskb" "g2pA_taskc" "g2pA_taskd" "g2pA_taske"
## [46] "g2pB_taska" "g2pB_taskb" "g2pB_taskc" "g2pB_taskd" "g2pB_taske"
## [51] "g2pC_taska" "g2pC_taskb" "g2pC_taskc" "g2pC_taskd" "g2pC_taske"
## [56] "g2pE_taska" "g2pE_taskb" "g2pE_taskc" "g2pE_taskd" "g2pE_taske"
## [61] "g3pA_taska" "g3pA_taskb" "g3pA_taskc" "g3pA_taskd" "g3pA_taske"
## [66] "g3pB_taska" "g3pB_taskb" "g3pB_taskc" "g3pB_taskd" "g3pB_taske"
## [71] "g3pC_taska" "g3pC_taskb" "g3pC_taskc" "g3pC_taskd" "g3pC_taske"
## [76] "g4pB_taska" "g4pB_taskb" "g4pB_taskc" "g4pB_taskd" "g4pB_taske"
## [81] "g4pC_taska" "g4pC_taskb" "g4pC_taskc" "g4pC_taskd" "g4pC_taske"
## [86] "g4pD_taska" "g4pD_taskb" "g4pD_taskc" "g4pD_taskd" "g4pD_taske"
## [91] "g4pE_taska" "g4pE_taskb" "g4pE_taskc" "g4pE_taskd" "g4pE_taske"
## [96] "orig_taska" "orig_taskb" "orig_taskc" "orig_taskd" "orig_taske"
total.tokens <- tokens(corpus)
total.len <- lengths(total.tokens)
sum(total.len)
## [1] 22033
Dimesnsions of the characterstic matrix: [ 22033x100] [ number of singles x number of documents ]
doc<-corpus[["orig_taske"]]
tokens(doc)[1:5]
## [1] "in mathematics and computer science"
## [2] "mathematics and computer science dynamic"
## [3] "and computer science dynamic programming"
## [4] "computer science dynamic programming is"
## [5] "science dynamic programming is a"
The reduction in the size of the problem will be 98.63% ### ### Part 2.2 User id: 20395743 mod 671 =27
Content based filtering
#install.packages("dplyr")
library(dplyr)
library(sqldf)
setwd("/Users/jayavarshini/Desktop/ms/sem1/dmm/Assing_4/ml-latest-small/")
ratings1<-read.csv("ratings.csv", sep=",", header=T)
movies1<-read.csv("movies.csv", sep=",", header=T)
combine=left_join(ratings1,movies1,by="movieId")
head(combine)
user_27 <- sqldf(x = "select * from combine where userid = '27'")
user_27
genres <- as.data.frame(user_27$genres, stringsAsFactors=FALSE)
#library(data.table)
#genres
genres2 <- as.data.frame(tstrsplit(genres[,1], '[|]', type.convert=TRUE), stringsAsFactors=FALSE)
genre_list <- c("Action", "Adventure", "Animation", "Children", "Comedy", "Crime","Documentary", "Drama", "Fantasy","Film-Noir", "Horror", "IMAX", "Musical", "Mystery","Romance","Sci-Fi", "Thriller", "War", "Western", "(no genres listed)")
#Create a matrix with number of movies the user has watched and 20 genrelist
genre_matrix <- matrix(0,nrow(user_27)+1,20)
genre_matrix[1,] <- genre_list #set first row to genre list
colnames(genre_matrix) <- genre_list #set column names to genre list
for (i in 1:nrow(user_27)) {
for (c in 1:ncol(genres2)) {
genmat_col = which(genre_matrix[1,] == genres2[i,c])
genre_matrix[i+1,genmat_col] <- 1
}
}
genre_matrix
## Action Adventure Animation Children Comedy Crime
## [1,] "Action" "Adventure" "Animation" "Children" "Comedy" "Crime"
## [2,] "0" "0" "0" "0" "0" "1"
## [3,] "0" "0" "0" "0" "0" "0"
## [4,] "0" "0" "0" "0" "0" "1"
## [5,] "0" "0" "0" "0" "1" "1"
## [6,] "0" "0" "0" "0" "0" "0"
## [7,] "0" "0" "0" "0" "0" "1"
## [8,] "0" "0" "0" "0" "0" "1"
## [9,] "0" "0" "0" "0" "0" "0"
## [10,] "0" "0" "0" "0" "0" "0"
## [11,] "1" "1" "0" "0" "0" "0"
## [12,] "0" "0" "0" "0" "1" "0"
## [13,] "0" "0" "0" "0" "0" "0"
## [14,] "0" "0" "0" "0" "0" "0"
## [15,] "0" "0" "0" "0" "0" "0"
## [16,] "0" "0" "0" "0" "1" "0"
## [17,] "0" "0" "0" "0" "0" "1"
## [18,] "0" "0" "0" "0" "1" "0"
## [19,] "0" "0" "0" "0" "0" "0"
## [20,] "0" "0" "0" "0" "1" "1"
## [21,] "0" "0" "0" "0" "0" "0"
## [22,] "0" "0" "0" "0" "0" "0"
## [23,] "0" "0" "0" "0" "0" "0"
## [24,] "0" "0" "0" "0" "0" "0"
## Documentary Drama Fantasy Film-Noir Horror IMAX
## [1,] "Documentary" "Drama" "Fantasy" "Film-Noir" "Horror" "IMAX"
## [2,] "0" "0" "0" "0" "0" "0"
## [3,] "0" "1" "0" "0" "0" "0"
## [4,] "0" "1" "0" "0" "0" "0"
## [5,] "0" "1" "0" "0" "0" "0"
## [6,] "0" "1" "0" "0" "0" "0"
## [7,] "0" "0" "0" "0" "1" "0"
## [8,] "0" "1" "0" "0" "0" "0"
## [9,] "0" "0" "0" "0" "0" "0"
## [10,] "0" "1" "0" "0" "0" "0"
## [11,] "0" "0" "0" "0" "0" "0"
## [12,] "0" "1" "0" "0" "0" "0"
## [13,] "0" "1" "0" "0" "0" "0"
## [14,] "0" "1" "0" "0" "0" "0"
## [15,] "0" "0" "0" "0" "0" "0"
## [16,] "0" "0" "0" "0" "0" "0"
## [17,] "0" "1" "0" "0" "0" "0"
## [18,] "0" "1" "0" "0" "0" "0"
## [19,] "0" "1" "0" "0" "0" "0"
## [20,] "0" "0" "0" "0" "0" "0"
## [21,] "0" "1" "0" "0" "1" "0"
## [22,] "0" "1" "0" "0" "0" "0"
## [23,] "0" "1" "0" "0" "1" "0"
## [24,] "0" "1" "0" "0" "0" "0"
## Musical Mystery Romance Sci-Fi Thriller War Western
## [1,] "Musical" "Mystery" "Romance" "Sci-Fi" "Thriller" "War" "Western"
## [2,] "0" "1" "0" "0" "1" "0" "0"
## [3,] "0" "0" "0" "0" "0" "1" "0"
## [4,] "0" "0" "0" "0" "1" "0" "0"
## [5,] "0" "0" "0" "0" "1" "0" "0"
## [6,] "0" "0" "0" "0" "0" "1" "0"
## [7,] "0" "0" "0" "0" "1" "0" "0"
## [8,] "0" "0" "0" "0" "0" "0" "0"
## [9,] "0" "0" "0" "0" "1" "0" "0"
## [10,] "0" "0" "0" "1" "0" "0" "0"
## [11,] "0" "0" "0" "0" "1" "0" "0"
## [12,] "0" "0" "1" "0" "0" "0" "0"
## [13,] "0" "0" "1" "0" "0" "0" "0"
## [14,] "0" "0" "1" "0" "0" "0" "0"
## [15,] "0" "0" "0" "0" "1" "0" "0"
## [16,] "0" "0" "0" "0" "0" "0" "0"
## [17,] "0" "0" "0" "0" "1" "0" "0"
## [18,] "0" "0" "1" "0" "0" "0" "0"
## [19,] "0" "0" "0" "0" "0" "0" "0"
## [20,] "0" "0" "0" "0" "0" "0" "0"
## [21,] "0" "0" "0" "0" "1" "0" "0"
## [22,] "0" "1" "0" "0" "1" "0" "0"
## [23,] "0" "1" "0" "0" "0" "0" "0"
## [24,] "0" "0" "1" "0" "0" "0" "0"
## (no genres listed)
## [1,] "(no genres listed)"
## [2,] "0"
## [3,] "0"
## [4,] "0"
## [5,] "0"
## [6,] "0"
## [7,] "0"
## [8,] "0"
## [9,] "0"
## [10,] "0"
## [11,] "0"
## [12,] "0"
## [13,] "0"
## [14,] "0"
## [15,] "0"
## [16,] "0"
## [17,] "0"
## [18,] "0"
## [19,] "0"
## [20,] "0"
## [21,] "0"
## [22,] "0"
## [23,] "0"
## [24,] "0"
genre_matrix2 <- as.data.frame(genre_matrix[-1,], stringsAsFactors=FALSE) #remove first row, which was the genre list
#convert from characters to integers
for (c in 1:ncol(genre_matrix2)) {
genre_matrix2[,c] <- as.integer(genre_matrix2[,c])
}
genre_matrix2
genre_matrix2 <- data.frame(user_27$movieId, genre_matrix2)
genre_matrix2["Sum", ] <- colSums(genre_matrix2)
#genre_matrix2$user_27[nrow(genre_matrix2)] <- NA
options(digits = 2)
user.df <- genre_matrix2[nrow(genre_matrix2),]/23.00000
user.df
genre_matrix2["Average",] <- user.df
genre_matrix2
genre_matrix2
write.csv(genre_matrix2, "userprofile.csv")
userprofile <-read.csv("userprofile.csv", sep=",", header=T)
user <- as.vector(userprofile[nrow(userprofile), ])
user <- user[,3:22]
user<-as.vector(user)
user <- apply(X=user,2,mean)
## Building MOVIE PROFILE
library(lsa)
## Loading required package: SnowballC
## Warning: package 'SnowballC' was built under R version 3.5.2
df <- movies1[sample(nrow(movies1),10),]
df
genres <- as.data.frame(df$genres, stringsAsFactors=FALSE)
library(data.table)
genres2 <- as.data.frame(tstrsplit(genres[,1], '[|]', type.convert=TRUE), stringsAsFactors=FALSE)
genre_list <- c("Action", "Adventure", "Animation", "Children", "Comedy", "Crime","Documentary", "Drama", "Fantasy","Film-Noir", "Horror", "IMAX", "Musical", "Mystery","Romance","Sci-Fi", "Thriller", "War", "Western", "(no genres listed)")
genre_matrix <- matrix(0,nrow(df)+1,20)
genre_matrix[1,] <- genre_list #set first row to genre list
colnames(genre_matrix) <- genre_list #set column names to genre list
for (i in 1:nrow(df)) {
for (c in 1:ncol(genres2)) {
genmat_col = which(genre_matrix[1,] == genres2[i,c])
genre_matrix[i+1,genmat_col] <- 1
}
}
genre_matrix2 <- as.data.frame(genre_matrix[-1,], stringsAsFactors=FALSE) #remove first row, which was the genre list
for (c in 1:ncol(genre_matrix2)) {
genre_matrix2[,c] <- as.integer(genre_matrix2[,c])
} #convert from characters to integers
genre_matrix2 <- data.frame(df$movieId, genre_matrix2)
genre_matrix2
write.csv(genre_matrix2, "movieprofile.csv")
movieprofile <-read.csv("movieprofile.csv", sep=",", header=T)
options(digits = 4)
movie<-as.vector(movieprofile[,3:22])
output<-data.frame(10,0)
for (i in 1:10)
{
m1<-movie[i,]
x2 <- apply(X=m1,2,mean)
output[i,]<-cosine(user,x2)
}
simresult <- data.frame(df$movieId, df$title, output)
simresult <- simresult[,1:3]
colnames(simresult)[1]<- paste("MovieId")
colnames(simresult)[2]<-paste("Title")
colnames(simresult)[3]<- paste("Similarites")
simresult1 <- simresult[order(simresult$Similarites, decreasing = TRUE),]
cat("user ID 250 has chose the following 10 movies")
## user ID 250 has chose the following 10 movies
df$movieId
## [1] 52694 5636 8544 3576 8999 2071 5395 7263 3106 2722
cat("Of these, the following 5 movies are recommended")
## Of these, the following 5 movies are recommended
head(simresult1,5)
movies2 <- read.csv("/Users/jayavarshini/Desktop/ms/sem1/dmm/Assing_4/ml-latest-small/movies.csv",header = TRUE,sep = ",")
ratings2 <- read.csv("/Users/jayavarshini/Desktop/ms/sem1/dmm/Assing_4/ml-latest-small/ratings.csv",header = TRUE,sep = ",")
movies2 <- data.frame(lapply(movies2, as.character),stringsAsFactors = FALSE)
ratings2 <- data.frame(lapply(ratings2,as.character),stringsAsFactors = FALSE)
u191 <- subset(ratings2,ratings2$userId==191)
row.names(u191) <- 1:length(as.numeric(u191$movieId))
test.set <- head(u191)
for (i in 1:6){
u191[i,3] <- NA
}
neighbours.191 <- c("513","317","415","64","556")
ros <- c("191",neighbours.191)
user513 <- subset(ratings2,ratings2$userId==513)
user317 <- subset(ratings2,ratings2$userId==317)
user415 <-subset(ratings2,ratings2$userId==415)
user64 <- subset(ratings2,ratings2$userId==64)
user556 <-subset(ratings2,ratings2$userId==556)
utility <- matrix(as.numeric(0),nrow = 6,ncol = 27)
mov.list <- intersect(u191$movieId,user513$movieId)
mov.list <- c(unique(mov.list),intersect(u191$movieId,user415$movieId))
mov.list <- c(unique(mov.list),intersect(u191$movieId,user64$movieId))
mov.list <- c(unique(mov.list),intersect(u191$movieId,user556$movieId))
mov.list <- c(unique(mov.list),intersect(u191$movieId,user317$movieId))
mov.list <- unique(mov.list)
colnames(utility) <- factor(mov.list)
rownames(utility) <- ros
i <- "191"
user_id <- subset(ratings2,ratings2$userId==as.numeric(i))
for (j in 1:29){
w <- colnames(utility)[j]
if (is.element(w,user_id$movieId) && !is.null(w) ){
cv <- which(user_id$movieId == w)
utility[i,w] <- as.numeric(user_id[cv,3])
}
}
i <- "513"
user_id <- subset(ratings2,ratings2$userId==as.numeric(i))
for (j in 1:29){
w <- colnames(utility)[j]
if (is.element(w,user_id$movieId) && !is.null(w) ){
cv <- which(user_id$movieId == w)
utility[i,w] <- as.numeric(user_id[cv,3])
}
}
i <- "317"
user_id <- subset(ratings2,ratings2$userId==as.numeric(i))
for (j in 1:29){
w <- colnames(utility)[j]
if (is.element(w,user_id$movieId) && !is.null(w) ){
cv <- which(user_id$movieId == w)
utility[i,w] <- as.numeric(user_id[cv,3])
}
}
i <- "415"
user_id <- subset(ratings2,ratings2$userId==as.numeric(i))
for (j in 1:29){
w <- colnames(utility)[j]
if (is.element(w,user_id$movieId) && !is.null(w) ){
cv <- which(user_id$movieId == w)
utility[i,w] <- as.numeric(user_id[cv,3])
}
}
i <- "64"
user_id <- subset(ratings2,ratings2$userId==as.numeric(i))
for (j in 1:29){
w <- colnames(utility)[j]
if (is.element(w,user_id$movieId) && !is.null(w) ){
cv <- which(user_id$movieId == w)
utility[i,w] <- as.numeric(user_id[cv,3])
}
}
i <- "556"
user_id <- subset(ratings2,ratings2$userId==as.numeric(i))
for (j in 1:29){
w <- colnames(utility)[j]
if (is.element(w,user_id$movieId) && !is.null(w) ){
cv <- which(user_id$movieId == w)
utility[i,w] <- as.numeric(user_id[cv,3])
}
}
for (i in 1:5){
utility["191",i] <- as.numeric(0)
}
for (t in 1:ncol(utility)) {
for (r in 1:nrow(utility)) {
if(utility[r,t] == 0){
utility[r,t] <- NA
}
}
}
m_util <- utility
means <- c(mean(m_util[1,],na.rm = TRUE))
for (h in 2:6){
means <- c(means,mean(m_util[h,],na.rm = TRUE))
}
for (t in 1:6){
for (j in 1:27){
if (!is.na(m_util[t,j]) ){
m_util[t,j] <- m_util[t,j] - means[t]
}
else{
m_util[t,j] <- NA
}
}
}
js <- matrix(NA, nrow = 1,ncol = 3) #similarity score matrix for neighbourhood
colnames(js) <- c("513","317","415")
js[1,] <- c(0.4359,0.4033,0.3256)#matrix with similarity scores between users
moviewq <- c("150","296","380","590")#Movie list for running prediction
rate <- matrix(NA,nrow = 1,ncol = 4)#Matrix of prediction values
colnames(rate) <- c("150","296","380","590")
for (m in moviewq){
if (!is.na(m_util["513",m])){
ratenum <- sum(0,js[1]*m_util["513",m])
}
if(!is.na(m_util["317",m])){
ratenum <- sum(ratenum,js[2]*m_util["317",m])
}
if(!is.na(m_util["415",m])){
ratenum <- sum(ratenum,js[3]*m_util["415",m])
}
rateden <- 0
for(u in 1:3){
if(!is.na(m_util[colnames(js)[u],m]) ){
rateden <- sum(rateden,js[u])
}
else {
next
}
}
rateqq <- (ratenum / rateden) + means[1]
rate[1,m] <- round(rateqq,0)
}
errornum <- matrix(as.numeric(0),nrow = 4,ncol = 3)
rownames(errornum) <- c("150","296","380","590")
colnames(errornum) <- c("p","a","square of diff")
u191 <- subset(ratings2,ratings2$userId==191)
for (y in 1:4){
errornum[colnames(rate)[y],"p"] <- rate[1,colnames(rate)[y]]
errornum[colnames(rate)[y],"a"] <- as.numeric(u191$rating[which(u191$movieId %in% colnames(rate)[y])])
errornum[colnames(rate)[y],"square of diff"] <-((as.numeric(errornum[y,"a"]) - as.numeric(errornum[y,"p"])))^2
}
sumee <- 0
for (h in 1:4){
sumee <- sumee + as.numeric(errornum[h,"square of diff"])
}
rsme <- round((sqrt(sumee / 4)),3)
errornum
## p a square of diff
## 150 3 4 1
## 296 2 5 9
## 380 3 3 0
## 590 2 4 4
cat("User ID 191, 5 random user IDs: ", as.numeric(rownames(utility)),"\n")
## User ID 191, 5 random user IDs: 191 513 317 415 64 556
cat("Using user-user similarity, User ID 191 will rate the movies as follows:", "\n")
## Using user-user similarity, User ID 191 will rate the movies as follows:
for(i in 1:4){
cat(rownames(errornum)[i],":", as.numeric(errornum[i,1]),"\n")
}
## 150 : 3
## 296 : 2
## 380 : 3
## 590 : 2
cat("RSME:",rsme,"\n")
## RSME: 1.871
ree <- t(utility)
ree["231","191"] <- 2
meanree <- apply(ree, 1, function(x) mean(x, na.rm=T))
uti.ree <- ree
for(i in 1:nrow(ree))
{
for(j in 1:ncol(ree))
{
if(!is.na(ree[i,j]))
{
ree[i,j] <- ree[i,j] - meanree[i]
}
else
{
ree[i,j] <- 0
}
}
}
error_num <- matrix(as.numeric(0),nrow = 4,ncol = 3)#matric with predicted, actual and difference values
rownames(error_num)<- c("150","296","380","590")
colnames(error_num) <- c("p","a","diff")
cosine_matrix <- matrix(NA,nrow = 27,ncol = 1)
for (h in 1:nrow(ree)){
cos <- cosine(ree[h,],ree[1,])
cosine_matrix[h,1] <- round(cos,4)
}
cosine_matrix[1] <- 0.0000
ree <- cbind(ree,cosine_matrix)
maaa <- c(tail(sort(ree[,7])))
rate.num <- sum(ree["595",7]*ree["595",1] + ree["10",7]*ree["10",1] + ree["34",7]*ree["34",1])
qwqq <- c("595","10","34")
rate.den <- 0
for(u in 1:3){
if(ree[qwqq[u],1] != 0){
rate.den <- sum(rate.den,ree[qwqq[u],7])
}
else {
next
}
}
rate.qq <- (rate.num / rate.den) + meanree[1]
error_num[1,"p"] <- round(rate.qq)
error_num[1,"a"] <- as.numeric(u191$rating[which(u191$movieId %in% "110")])
error_num[1,"diff"] <- ((as.numeric(error_num[1,"p"])) - (as.numeric(error_num[1,"a"])))^2
ree <- ree[,-7]
cosine_matrix <- matrix(NA,nrow = 27,ncol = 1)
for (h in 1:nrow(ree)){
cos <- cosine(ree[h,],ree[2,])
cosine_matrix[h,1] <- round(cos,4)
}
cosine_matrix[2] <- 0.0000
ree <- cbind(ree,cosine_matrix)
maaa <- c(tail(sort(ree[,7])))
#from maaa we get the neighbouring movies
qwqq <- c("165","208","457")#neighbouring movie items
rate.num <- sum(ree[qwqq[1],7]*ree[qwqq[1],1] + ree[qwqq[2],7]*ree[qwqq[2],1] + ree[qwqq[3],7]*ree[qwqq[3],1])
rate.den <- 0
for(u in 1:3){
if(ree[qwqq[u],1] != 0){
rate.den <- sum(rate.den,ree[qwqq[u],7])
}
else {
next
}
}
rate.qq <- (rate.num / rate.den) + meanree[2]
error_num[2,"p"] <- round(rate.qq)
error_num[2,"a"] <- as.numeric(u191$rating[which(u191$movieId %in% "150")])
error_num[2,"diff"] <- ((as.numeric(error_num[2,"p"])) - (as.numeric(error_num[2,"a"])))^2
ree <- ree[,-7]
cosine_matrix <- matrix(NA,nrow = 27,ncol = 1)
for (h in 1:nrow(ree)){
cos <- cosine(ree[h,],ree[3,])
cosine_matrix[h,1] <- round(cos,4)
}
cosine_matrix[3] <- 0.0000
ree <- cbind(ree,cosine_matrix)
maaa <- c(tail(sort(ree[,7])))
qwqq <- c("356","165","150")#neighbouring movie items
rate.num <- sum(ree[qwqq[1],7]*ree[qwqq[1],1] + ree[qwqq[2],7]*ree[qwqq[2],1] + ree[qwqq[3],7]*ree[qwqq[3],1])
rate.den <- 0
for(u in 1:3){
if(ree[qwqq[u],1] != 0){
rate.den <- sum(rate.den,ree[qwqq[u],7])
}
else {
next
}
}
rate.qq <- (rate.num / rate.den) + meanree[3]
error_num[3,"p"] <- round(rate.qq)
error_num[3,"a"] <- as.numeric(u191$rating[which(u191$movieId %in% "161")])
error_num[3,"diff"] <- ((as.numeric(error_num[3,"p"])) - (as.numeric(error_num[3,"a"])))^2
ree <- ree[,-7]
cosine_matrix <- matrix(NA,nrow = 27,ncol = 1)
for (h in 1:nrow(ree)){
cos <- cosine(ree[h,],ree[4,])
cosine_matrix[h,1] <- round(cos,4)
}
cosine_matrix[5] <- 0.0000
cosine_matrix[4] <- 0.0000
ree <- cbind(ree,cosine_matrix)
maaa <- c(tail(sort(ree[,7])))
qwqq <- c("150","165","457")#neighbouring movie items
rate.num <- sum(ree[qwqq[1],7]*ree[qwqq[1],1] + ree[qwqq[2],7]*ree[qwqq[2],1] + ree[qwqq[3],7]*ree[qwqq[3],1])
rate.den <- 0
for(u in 1:3){
if(ree[qwqq[u],1] != 0){
rate.den <- sum(rate.den,ree[qwqq[u],7])
}
else {
next
}
}
rate.qq <- (rate.num / rate.den) + meanree[4]
error_num[4,"p"] <- round(rate.qq)
error_num[4,2] <- u191["208",3]
error_num[4,"a"] <- as.numeric(u191$rating[which(u191$movieId %in% "208")])
error_num[4,"diff"] <- ((as.numeric(error_num[4,"p"])) - (as.numeric(error_num[4,"a"])))^2
ree <- ree[,-7]
sum_2 <- 0
for (h in 1:4){
sum_2 <- sum_2 + as.numeric(error_num[h,"diff"])
}
rmse_2 <- round((sqrt(sum_2 / 4)),3)
cat("User ID 191, 5 random user IDs: ", as.numeric(colnames(uti.ree)),"\n")
## User ID 191, 5 random user IDs: 191 513 317 415 64 556
cat("Using item-item similarity, User 191 will rate the movies as follows:" , "\n")
## Using item-item similarity, User 191 will rate the movies as follows:
for (i in 1:4){
cat(rownames(error_num)[i],":", as.numeric(error_num[i,1]),"\n")
}
## 150 : 4
## 296 : 3
## 380 : 5
## 590 : 3
cat("RSME",":",rmse_2)
## RSME : 0.866