Librerías
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.0 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(recommenderlab)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
##
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
##
## Loading required package: arules
##
## Attaching package: 'arules'
##
## The following object is masked from 'package:dplyr':
##
## recode
##
## The following objects are masked from 'package:base':
##
## abbreviate, write
##
## Loading required package: proxy
##
## Attaching package: 'proxy'
##
## The following object is masked from 'package:Matrix':
##
## as.matrix
##
## The following objects are masked from 'package:stats':
##
## as.dist, dist
##
## The following object is masked from 'package:base':
##
## as.matrix
##
## Registered S3 methods overwritten by 'registry':
## method from
## print.registry_field proxy
## print.registry_entry proxy
Matriz de evaluaciones de películas
movie_ratings_tarea <- data.frame(
panda = c(5, 4, 5, 4),
enredados = c(4, 3, 4, NA),
UP = c(4, NA, 4, 5),
CARS = c(5, 5, 5, NA),
Toystory = c(3, 2, 3, 4),
shrek = c(5, 3, 4, 5),
monja= c(2, 4, 2, NA)
)
rownames(movie_ratings_tarea) <- c('Luis','David', 'Emilio', 'Yo' )
movie_ratings_tarea
## panda enredados UP CARS Toystory shrek monja
## Luis 5 4 4 5 3 5 2
## David 4 3 NA 5 2 3 4
## Emilio 5 4 4 5 3 4 2
## Yo 4 NA 5 NA 4 5 NA
cosine_similarity <- function(vec_1, vec_2) {
vec_len <- length(vec_1)
# NA values are replaced with 0
vec_1[is.na(vec_1)] <- 0
vec_2[is.na(vec_2)] <- 0
# Computing the denominator
vec_1_denom <- sqrt(sum(vec_1^2))
vec_2_denom <- sqrt(sum(vec_2^2))
denominator <- vec_1_denom * vec_2_denom
# Computing the numerator
tib = tibble(vec_1 = vec_1, vec_2 = vec_2)
tib <- tib %>% mutate(products = vec_1 * vec_2)
numerator <- sum(tib$products)
# Return the cosine similarity
return (numerator / denominator)
}
# Obtenemos los vectores de cada persona
You <- as.numeric(as.vector(movie_ratings_tarea['Yo',]))
Luis <- as.numeric(as.vector(movie_ratings_tarea['Luis',]))
David <- as.numeric(as.vector(movie_ratings_tarea['David',]))
Emilio <- as.numeric(as.vector(movie_ratings_tarea['Emilio',]))
# Obtenemos "distancia" usando similitud por conseno
similarities_tarea <- data.frame(
cosine_similarity = c(cosine_similarity(You, David), cosine_similarity(You, Luis), cosine_similarity(You, Emilio))
)
rownames(similarities_tarea) <- c('David', 'Luis', 'Emilio')
similarities_tarea
## cosine_similarity
## David 0.4845562
## Luis 0.7762349
## Emilio 0.7546822
# Creamos una función para obtener un promedio ponderado en base a los amigos
movie_rating_weighted_average <- function(movie, friends) {
denominator <- 0
numerator <- 0
for (friend in friends) {
friend_similarity <- similarities_tarea[friend,][1]
friend_rating <- movie_ratings_tarea[friend, movie][1]
# Tomaremos el promedio ponderado en cuenta solo para películas con calificación
if (is.na(friend_rating)) next
denominator <- denominator + friend_similarity
numerator <- numerator + (friend_similarity * friend_rating)
}
return (numerator / denominator)
}
friend_names <- c('Luis', 'David', 'Emilio')
new_movies <- c('enredados', 'CARS', 'monja')
new_movie_predicted_ratings <- tibble()
for (n in new_movies) {
predicted_rating <- movie_rating_weighted_average(n, friend_names)
prediction_tibble <- tibble(movie = n, predicted_rating = predicted_rating)
new_movie_predicted_ratings <- bind_rows(new_movie_predicted_ratings, prediction_tibble)
}
new_movie_predicted_ratings
## # A tibble: 3 × 2
## movie predicted_rating
## <chr> <dbl>
## 1 enredados 3.76
## 2 CARS 5
## 3 monja 2.48