Recommendation Engines

Professor: Ivan Tarradellas Olmo

Done By: Hani Eid

Lab #2

Recommendation Sports


library(data.table)
library(xlsx)
## Loading required package: rJava
## Loading required package: xlsxjars
library(knitr)
library(kableExtra)
knitr::opts_knit$set(root.dir = 'C:/Users/Hani/Google Drive/IE/Term 2/5- Recommendation Engines/Assignments/2/Problem')
#setwd('C:/Users/Hani/Google Drive/IE/Term 2/5- Recommendation Engines/Assignments/2/Problem')
drawTable = function(DataFrame){
  DataFrame %>%
  kable("html") %>%
  kable_styling() %>%
  row_spec(row = 1:nrow(DataFrame), bold = T, color = "white", background = "#000000", align = "c") %>%
  column_spec(column = c(1,3), background = "#0AA19A") %>%
  row_spec(0, bold = T, align = "c", font_size = 18, color = "#B60404")  
}
createProfiles = function(sports, users) {
    profiles = sapply(sports, function(x) sapply(users, function(y) sum(x * y, na.rm = T)))
    return(profiles)
}
process = function(sports, profiles) {
    # Predictions
    sports_T = as.data.frame(t(sports))
    profiles_T = as.data.frame(t(profiles))
    numerator = sapply(profiles_T, function(x)(sapply(sports_T, function(y) sum(x * y))))
    profiles_Sqaured = profiles * profiles
    sports_Squared = sports * sports
    sports_Squared_Sum = rowSums(sports_Squared, na.rm = T)
    profiles_Sqaured_SUm = rowSums(profiles_Sqaured, na.rm = T)

    denom1 = sqrt(sports_Squared_Sum)
    denom2 = sqrt(profiles_Sqaured_SUm)
    denom_Final = sapply(denom2, function(x) sapply(denom1, function(y) sum(x * y, na.rm = T)))
    prediction_Table1 = as.data.frame(numerator / denom_Final)
    sum(numerator / denom_Final, na.rm = T)

    prediction_Table1[is.na(prediction_Table1)] = 0

    Likes1 = colSums(data.frame(Likes = sapply(prediction_Table1, function(x) x > 0)))
    Dislikes1 = colSums(data.frame(Dislikes = sapply(prediction_Table1, function(x) x < 0)))
    Neutral1 = colSums(data.frame(Neutral = sapply(prediction_Table1, function(x) x == 0)))

    newList <- list("Prediction" = prediction_Table1, "Likes" = Likes1, "Dislikes" = Dislikes1, "Neutral" = Neutral1)
    return(newList)
}

CB - Simply Unary:

CB_Simply_Unary_Sports = read.xlsx("exerciseCB.xlsx", sheetName = "CB - Simply Unary", colIndex = 2:11, rowIndex = 2:22, header = T)
CB_Simply_Unary_Users = read.xlsx("exerciseCB.xlsx", sheetName = "CB - Simply Unary", colIndex = 15:18, rowIndex = 2:22, header = T)
CB_Simply_Unary_Predictions = read.xlsx("exerciseCB.xlsx", sheetName = "CB - Simply Unary", colIndex = 25:28, rowIndex = 2:22, header = T)

CB_Simply_Unary_Sports = as.data.frame(sapply(CB_Simply_Unary_Sports, as.numeric))


# Create User Profiles Table
profiles = createProfiles(CB_Simply_Unary_Sports, CB_Simply_Unary_Users)
sum(profiles, na.rm = T)
## [1] 2
# Predictions
output = process(CB_Simply_Unary_Sports, profiles)

Prediction = output$Prediction
Likes1 = output$Likes
Dislikes1 = output$Dislikes
Neutral = output$Neutral
rownames(Prediction) <- c()
drawTable(Prediction)
User.1 User.2 User.3 User.4
0.4364358 0.2773501 -0.6382847 0
0.0000000 0.8807048 -0.5025189 0
0.3299144 0.5241424 -0.7237469 0
0.0629941 0.8006408 -0.6030227 0
0.0824786 0.5765567 -0.3947710 0
0.4454354 0.6227524 -0.7106691 0
0.2070197 0.6139406 -0.6055301 0
0.0000000 0.6864065 -0.4308202 0
0.4454354 0.4529108 -0.7817360 0
-0.0824786 0.5765567 -0.5263614 0
0.1649572 0.5765567 -0.4605662 0
0.4948717 0.3144855 -0.9211324 0
0.1259882 0.7606087 -0.5527708 0
0.0000000 0.8334936 -0.3692745 0
0.2182179 0.4160251 -0.6963106 0
0.6598289 0.3144855 -0.8553372 0
0.0000000 0.8807048 -0.5025189 0
0.2672612 0.7359801 -0.5685352 0
-0.1454786 0.5547002 -0.3481553 0
0.0771517 0.7354355 -0.4923660 0

CB - Unit Weight:

CB_Unit_Weight = read.xlsx("exerciseCB.xlsx", sheetName = "CB - Unit Weight", colIndex = 2:11, rowIndex = 2:22, header = T)
CB_Unit_Weight_Users = read.xlsx("exerciseCB.xlsx", sheetName = "CB - Unit Weight", colIndex = 15:18, rowIndex = 2:22, header = T)

CB_Unit_Weight = CB_Unit_Weight / rowSums(CB_Unit_Weight, na.rm = T)
CB_Unit_Weight = as.data.frame(sapply(CB_Unit_Weight, as.numeric))

# Create User Profiles Table
profiles2 = createProfiles(CB_Unit_Weight, CB_Unit_Weight_Users)
sum(profiles2, na.rm = T)
## [1] -5.551115e-17
# Predictions
output2 = process(CB_Unit_Weight, profiles2)

Prediction2 = output2$Prediction
Likes2 = output2$Likes
Dislikes2 = output2$Dislikes
Neutral2 = output2$Neutral

rownames(Prediction2) <- c()

drawTable(Prediction2)
User.1 User.2 User.3 User.4
0.4279345 -0.2683725 -0.3822354 0
-0.2543633 0.8346834 -0.0569803 0
0.3286794 0.0062994 -0.3618734 0
-0.1635193 0.6437428 -0.2849014 0
-0.0489522 0.1133893 0.1644879 0
0.6594941 0.1002972 -0.3223292 0
-0.1284729 0.2546005 -0.3223292 0
-0.0726752 0.3327823 0.0000000 0
0.4453727 -0.2468854 -0.4432026 0
-0.2727339 0.0818923 0.0000000 0
0.0349659 0.1322876 0.0986928 0
0.5734406 -0.4346591 -0.7566444 0
-0.0908440 0.6055546 -0.1709409 0
-0.0423939 0.5891883 0.1994310 0
0.1211254 0.2891387 -0.6837635 0
0.7972223 -0.4031621 -0.6250541 0
-0.2543633 0.8346834 -0.0569803 0
0.2997701 0.3471825 0.0000000 0
-0.2925122 0.1659030 0.1019294 0
0.0484502 0.3982476 -0.0284901 0

CB - IDF:

CB_IDF = read.xlsx("exerciseCB.xlsx", sheetName = "CB - IDF", colIndex = 2:11, rowIndex = 2:22, header = T)
CB_IDF_Users = read.xlsx("exerciseCB.xlsx", sheetName = "CB - IDF", colIndex = 15:18, rowIndex = 2:22, header = T)
CB_IDF_DF = colSums(CB_IDF, na.rm = T)
CB_IDF = CB_IDF / rowSums(CB_IDF)

IDF = log10(nrow(CB_IDF) / colSums(CB_IDF > 0))
CB_IDF = as.data.frame(sapply(CB_IDF, as.numeric))

# Create User Profiles Table
profiles3 = createProfiles(CB_IDF, CB_IDF_Users)
sum(profiles3, na.rm = T)
## [1] -5.551115e-17
profiles3 = as.data.frame(t(as.data.frame(t(profiles3)) * IDF))

# Predictions
output3 = process(CB_IDF, profiles3)

Prediction3 = output3$Prediction
Likes3 = output3$Likes
Dislikes3 = output3$Dislikes
Neutral3 = output3$Neutral

rownames(Prediction3) <- c()

drawTable(Prediction3)
User.1 User.2 User.3 User.4
0.4903091 -0.4363629 -0.4505263 0
-0.2228319 0.6956329 -0.0876160 0
0.2350269 -0.1495089 -0.3400316 0
-0.1375099 0.4901912 -0.2880700 0
-0.0569612 0.1117277 0.1592409 0
0.6591106 -0.1727666 -0.4048738 0
-0.1094526 0.2636744 -0.2971410 0
-0.0601152 0.1385157 -0.0163107 0
0.3607507 -0.2705844 -0.4164519 0
-0.2232022 0.0941731 0.0158313 0
0.0524502 0.0483194 0.0533896 0
0.6220964 -0.5463667 -0.7784262 0
-0.0835215 0.4445096 -0.1946981 0
-0.0554566 0.4265722 0.1533189 0
0.0721562 0.1852638 -0.6287827 0
0.7883375 -0.5162661 -0.6706096 0
-0.2228319 0.6956329 -0.0876160 0
0.1816009 0.1889433 0.0000000 0
-0.2127451 0.1006560 0.0811885 0
0.0298654 0.2211304 -0.0471352 0
Hybrid_Switching = read.xlsx("exerciseCB.xlsx", sheetName = "Hybrid - Switching", colIndex = 2:11, rowIndex = 2:22, header = T)
Hybrid_Challenge = read.xlsx("exerciseCB.xlsx", sheetName = "Hybrid - Challenge", colIndex = 2:11, rowIndex = 2:22, header = T)