install.packages(“superml”) install.packages(“lsa”) install.packages(“kableExtra”)
library(dplyr) library(superml) library(lsa) library(kableExtra)
#' Calculate cosine similarity between the rows of a matrix
#'
#' This function calculates the cosine similarity between the rows of an input matrix, according to the values
#' of the variables in the columns
#'
#' @param m An \code{N x P} matrix
#'
#' @return Returns an \code{N x N} matrix with the cosine similarity between the corresponding rows in \code{m}
#'
#'
#' @export
cosine_similarity_matrix <- function(m){
ret <- m %*% t(m) / (sqrt(rowSums(m^2) %*% t(rowSums(m^2))))
return(ret)
}
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(superml)
## Loading required package: R6
library(lsa)
## Loading required package: SnowballC
library(kableExtra)
##
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
##
## group_rows
setwd("/Users/stepniak/624/Presentation")
movies_tmdb_5000 <- read.csv("tmdb_5000_movies.csv", header=TRUE)
movies_tmdb_5000_top <- movies_tmdb_5000 %>% select(original_title, overview) %>% slice(1:10)
# initialise the class
tfv <- TfIdfVectorizer$new(remove_stopwords = TRUE, norm = TRUE)
# generate the matrix
tf_mat <- tfv$fit_transform(movies_tmdb_5000_top$overview)
# generate cosine similarity matrix
cosine_similarity <- cosine_similarity_matrix(tf_mat)
# 1) specify the movie title you like for which you want to generate similar movie list; 2) determine position of your liked movie in the similarity column
position <- which(movies_tmdb_5000_top$original_title == "Avatar")
# pull the column with your likes movie and its similarity scores vs other movies
similarity_column <- as.matrix(cosine_similarity[,position])
# add synopsis to the matriix from previous step
similarity_column_df <- as.data.frame(similarity_column)
similarity_column_df %>% mutate(overview = movies_tmdb_5000_top$overview) -> similarity_column_df_with_overview
# add titles
rownames(similarity_column_df_with_overview)=movies_tmdb_5000_top$original_title
# order your recommended movie lost by coside similairty (the higher the better)
most_similar <- similarity_column_df_with_overview[order(-similarity_column), , drop = FALSE]
# add a column name to similarity score column
names(most_similar)[1] <- "Cosine Similarity"
# display the ordered recommended list
most_similar %>%
kbl() %>%
kable_paper("hover", full_width = F) %>%
column_spec(1, width = "15em") %>%
column_spec(2, width = "5em") %>%
column_spec(3, width = "75em") %>%
kable_styling(bootstrap_options = "striped", font_size = 12)
| Cosine Similarity | overview | |
|---|---|---|
| Avatar | 1.0000000 | In the 22nd century, a paraplegic Marine is dispatched to the moon Pandora on a unique mission, but becomes torn between following orders and protecting an alien civilization. |
| John Carter | 0.0322705 | John Carter is a war-weary, former military captain who’s inexplicably transported to the mysterious and exotic planet of Barsoom (Mars) and reluctantly becomes embroiled in an epic conflict. It’s a world on the brink of collapse, and Carter rediscovers his humanity when he realizes the survival of Barsoom and its people rests in his hands. |
| Spider-Man 3 | 0.0318960 | The seemingly invincible Spider-Man goes up against an all-new crop of villain – including the shape-shifting Sandman. While Spider-Man’s superpowers are altered by an alien organism, his alter ego, Peter Parker, deals with nemesis Eddie Brock and also gets caught up in a love triangle. |
| Avengers: Age of Ultron | 0.0306817 | When Tony Stark tries to jumpstart a dormant peacekeeping program, things go awry and Earth’s Mightiest Heroes are put to the ultimate test as the fate of the planet hangs in the balance. As the villainous Ultron emerges, it is up to The Avengers to stop him from enacting his terrible plans, and soon uneasy alliances and unexpected action pave the way for an epic and unique global adventure. |
| The Dark Knight Rises | 0.0258603 | Following the death of District Attorney Harvey Dent, Batman assumes responsibility for Dent’s crimes to protect the late attorney’s reputation and is subsequently hunted by the Gotham City Police Department. Eight years later, Batman encounters the mysterious Selina Kyle and the villainous Bane, a new terrorist leader who overwhelms Gotham’s finest. The Dark Knight resurfaces to protect a city that has branded him an enemy. |
| Pirates of the Caribbean: At World’s End | 0.0000000 | Captain Barbossa, long believed to be dead, has come back to life and is headed to the edge of the Earth with Will Turner and Elizabeth Swann. But nothing is quite as it seems. |
| Spectre | 0.0000000 | A cryptic message from Bond’s past sends him on a trail to uncover a sinister organization. While M battles political forces to keep the secret service alive, Bond peels back the layers of deceit to reveal the terrible truth behind SPECTRE. |
| Tangled | 0.0000000 | When the kingdom’s most wanted-and most charming-bandit Flynn Rider hides out in a mysterious tower, he’s taken hostage by Rapunzel, a beautiful and feisty tower-bound teen with 70 feet of magical, golden hair. Flynn’s curious captor, who’s looking for her ticket out of the tower where she’s been locked away for years, strikes a deal with the handsome thief and the unlikely duo sets off on an action-packed escapade, complete with a super-cop horse, an over-protective chameleon and a gruff gang of pub thugs. |
| Harry Potter and the Half-Blood Prince | 0.0000000 | As Harry begins his sixth year at Hogwarts, he discovers an old book marked as ‘Property of the Half-Blood Prince’, and begins to learn more about Lord Voldemort’s dark past. |
| Batman v Superman: Dawn of Justice | 0.0000000 | Fearing the actions of a god-like Super Hero left unchecked, Gotham City’s own formidable, forceful vigilante takes on Metropolis’s most revered, modern-day savior, while the world wrestles with what sort of hero it really needs. And with Batman and Superman at war with one another, a new threat quickly arises, putting mankind in greater danger than it’s ever known before. |