library(recommenderlab)
library(tm)
data(MovieLense)
#hardcoding rating data
myMovieDataSet <- matrix(c(3, 2, NA, 1, 2, 3, 5, 1, 4, 3, 4, 1, 5, 3, 5, 4, 2, NA, 2, 1, NA, 2, 4, 2, 3, 5, 3, NA, 3, 1, 5, NA, 3, 5, 2, 3, 1, 3, 2, 3, NA, 3), nrow = 6, byrow = T)
# getting movie names from movieLense database
colnames(myMovieDataSet)<-colnames(MovieLense)[1:7]
# writing user names
rownames(myMovieDataSet) <- c("U1", "U2", "U3", "U4", "U5", "U6")
myMovieDataSet
## Toy Story (1995) GoldenEye (1995) Four Rooms (1995) Get Shorty (1995)
## U1 3 2 NA 1
## U2 1 4 3 4
## U3 5 4 2 NA
## U4 2 4 2 3
## U5 3 1 5 NA
## U6 3 1 3 2
## Copycat (1995) Shanghai Triad (Yao a yao yao dao waipo qiao) (1995)
## U1 2 3
## U2 1 5
## U3 2 1
## U4 5 3
## U5 3 5
## U6 3 NA
## Twelve Monkeys (1995)
## U1 5
## U2 3
## U3 NA
## U4 NA
## U5 2
## U6 3
#A matrix containing ratings (typically 1-5 stars, etc.).
ratingMatrix <- as(myMovieDataSet, "realRatingMatrix")
#Dissimilarity and Similarity Calculation Between Rating Data
#method=>similarity measure to use
similarity_rating_matrix <- as.matrix(similarity(ratingMatrix, method = "cosine", which = "items"))
s <- svd(similarity_rating_matrix)
#
s$u #s$u is the matrix whose columns contain the left singular vectors of X
## [,1] [,2] [,3] [,4] [,5] [,6]
## [1,] -0.3770026 0.5554078 -0.21864128 0.1402564 -0.36774684 0.19172555
## [2,] -0.3962431 -0.5311554 -0.12675147 0.5098699 -0.33376133 -0.41640854
## [3,] -0.3862971 -0.2434491 -0.51584529 -0.2377167 0.63757372 0.03676604
## [4,] -0.3562993 0.4002149 -0.25214857 -0.3254674 -0.13035985 -0.39787763
## [5,] -0.3972987 -0.1625243 0.63906292 -0.5314793 -0.08428847 -0.18803891
## [6,] -0.3926946 0.2870112 0.44458107 0.5128880 0.49873172 0.09732846
## [7,] -0.3356046 -0.2848984 -0.02002608 -0.1113474 -0.27209156 0.76508525
## [,7]
## [1,] -0.556703613
## [2,] -0.006276293
## [3,] -0.247082023
## [4,] 0.606688246
## [5,] -0.287062270
## [6,] 0.210957105
## [7,] 0.366079082
V <- s$v
t(V) #s$v is the matrix whose columns contain the right singular vectors of X
## [,1] [,2] [,3] [,4] [,5]
## [1,] -0.3770026 -0.396243078 -0.38629709 -0.3562993 -0.39729870
## [2,] -0.5554078 0.531155412 0.24344906 -0.4002149 0.16252433
## [3,] 0.2186413 0.126751470 0.51584529 0.2521486 -0.63906292
## [4,] -0.1402564 -0.509869911 0.23771670 0.3254674 0.53147930
## [5,] 0.3677468 0.333761334 -0.63757372 0.1303599 0.08428847
## [6,] -0.1917256 0.416408538 -0.03676604 0.3978776 0.18803891
## [7,] 0.5567036 0.006276293 0.24708202 -0.6066882 0.28706227
## [,6] [,7]
## [1,] -0.39269464 -0.33560462
## [2,] -0.28701125 0.28489843
## [3,] -0.44458107 0.02002608
## [4,] -0.51288798 0.11134737
## [5,] -0.49873172 0.27209156
## [6,] -0.09732846 -0.76508525
## [7,] -0.21095710 -0.36607908
s$d #s$d is the vector that contains the singular values of X
## [1] 4.1499588 1.0000000 0.8707070 0.7778677 0.6395443 0.4728235 0.3890163
s$u %*% diag(s$d) %*% t(s$v)
## [,1] [,2] [,3] [,4] [,5]
## [1,] -4.024558e-16 7.930825e-01 7.604337e-01 4.594683e-01 8.265582e-01
## [2,] 7.930825e-01 5.839445e-17 6.859943e-01 7.950464e-01 7.925939e-01
## [3,] 7.604337e-01 6.859943e-01 4.996004e-16 6.135720e-01 7.961540e-01
## [4,] 4.594683e-01 7.950464e-01 6.135720e-01 2.498002e-16 6.835991e-01
## [5,] 8.265582e-01 7.925939e-01 7.961540e-01 6.835991e-01 9.020562e-17
## [6,] 6.378198e-01 7.699747e-01 8.091548e-01 7.033392e-01 7.178641e-01
## [7,] 6.375696e-01 5.359422e-01 5.719052e-01 6.125172e-01 5.663794e-01
## [,6] [,7]
## [1,] 6.378198e-01 6.375696e-01
## [2,] 7.699747e-01 5.359422e-01
## [3,] 8.091548e-01 5.719052e-01
## [4,] 7.033392e-01 6.125172e-01
## [5,] 7.178641e-01 5.663794e-01
## [6,] -4.163336e-17 7.024033e-01
## [7,] 7.024033e-01 -2.151057e-16
plot_summary <- c("A cowboy doll is profoundly threatened and jealous when a new spaceman figure supplants him as top toy in a boy's room",
"James Bond teams up with the lone survivor of a destroyed Russian research center to stop the hijacking of a nuclear space weapon by a fellow agent formerly believed to be dead",
"Four interlocking tales that take place in a fading hotel on New Year's Eve",
"A mobster travels to Hollywood to collect a debt and discovers that the movie business is much the same as his current job",
"An agoraphobic psychologist and a female detective must work together to take down a serial killer who copies serial killers from the past",
"A provincial boy related to a Shanghai crime family is recruited by his uncle into cosmopolitan Shanghai in the 1930s to be a servant to a ganglord's mistress",
"In a future world devastated by disease, a convict is sent back in time to gather information about the man-made virus that wiped out most of the human population on the planet")
movie_corpus <- Corpus(VectorSource(plot_summary))
dtm <- DocumentTermMatrix(movie_corpus, control = list(removePunctuation = TRUE, removeNumbers = TRUE, stopwords = TRUE, tolower = TRUE, weighting = weightTfIdf))
dtm_matrix <- as.matrix(dtm)
dimnames(dtm_matrix)$Docs <- colnames(myMovieDataSet)
movie_svd <- svd(dtm_matrix)
#Sigmak <- movie_svd$d
Uk <- movie_svd$u
#Vk <- t(as.matrix(movie_svd$v))
movie_cat <- as.matrix(t(Uk))
dimnames(movie_cat) <- list(SVs = paste0("sv", 1:7), Movies = colnames(myMovieDataSet))
movies <- as(myMovieDataSet, "realRatingMatrix")
movies_norm <- normalize(movies)
norm_cat <- as.vector(movies_norm@data)
norm_cat <- ifelse(norm_cat == 0, NA, norm_cat)
norm_cat <- matrix(norm_cat, nrow = nrow(movies), ncol = ncol(movies))
user_cat <- matrix(nrow = nrow(myMovieDataSet), ncol = ncol(myMovieDataSet))
for (i in 1:nrow(user_cat)){
for (j in 1:ncol(user_cat)){
user_cat[i,j] <- sum(norm_cat[i,]*movie_cat[j,], na.rm = T)/sum(movie_cat[j,] != 0)
}
}
estimated <- matrix(nrow = nrow(myMovieDataSet), ncol = ncol(myMovieDataSet))
colnames(estimated) = colnames(myMovieDataSet)
rownames(estimated) = rownames(myMovieDataSet)
for (i in 1:nrow(estimated))
{
for(j in 1:ncol(estimated))
{
if(is.na(norm_cat[i,j]))
{
numerator <- 0
denominator <- 1
for(k in 1:nrow(movie_cat))
{
numerator <- sum(user_cat[i,k]*movie_cat[k,j])
denominator <- sqrt(sum(user_cat[i,]^2, na.rm = TRUE))*sqrt(sum(movie_cat[,j]^2, na.rm = TRUE))
}
estimated[i,j] <- numerator/denominator
}
else
{
estimated[i,j] <- NA
}
}
}
estimated
## Toy Story (1995) GoldenEye (1995) Four Rooms (1995) Get Shorty (1995)
## U1 NA NA -2.887986e-16 NA
## U2 NA NA 1.511556e-15 NA
## U3 NA NA NA 6.986927e-17
## U4 NA NA NA NA
## U5 NA NA NA -5.958972e-17
## U6 NA NA NA NA
## Copycat (1995) Shanghai Triad (Yao a yao yao dao waipo qiao) (1995)
## U1 NA NA
## U2 NA NA
## U3 NA NA
## U4 NA NA
## U5 NA NA
## U6 NA 2.270232e-15
## Twelve Monkeys (1995)
## U1 NA
## U2 0
## U3 0
## U4 0
## U5 NA
## U6 NA
Looks like something gone wrong ! values are not as expected. Unfortunately changing the initial value didn’t change result much.
Data Mining Algorithms In R/Dimensionality Reduction/Singular Value Decomposition => https://en.wikibooks.org/wiki/Data_Mining_Algorithms_In_R/Dimensionality_Reduction/Singular_Value_Decomposition
Singular Value Decomposition | Stanford University https://www.youtube.com/watch?v=P5mlg91as1c