Recommendation Engines
Professor: Ivan Tarradellas Olmo
Done By: Hani Eid
Lab #2
Recommendation Sports
library(data.table)
library(xlsx)
## Loading required package: rJava
## Loading required package: xlsxjars
library(knitr)
library(kableExtra)
knitr::opts_knit$set(root.dir = 'C:/Users/Hani/Google Drive/IE/Term 2/5- Recommendation Engines/Assignments/2/Problem')
#setwd('C:/Users/Hani/Google Drive/IE/Term 2/5- Recommendation Engines/Assignments/2/Problem')
drawTable = function(DataFrame){
DataFrame %>%
kable("html") %>%
kable_styling() %>%
row_spec(row = 1:nrow(DataFrame), bold = T, color = "white", background = "#000000", align = "c") %>%
column_spec(column = c(1,3), background = "#0AA19A") %>%
row_spec(0, bold = T, align = "c", font_size = 18, color = "#B60404")
}
createProfiles = function(sports, users) {
profiles = sapply(sports, function(x) sapply(users, function(y) sum(x * y, na.rm = T)))
return(profiles)
}
process = function(sports, profiles) {
# Predictions
sports_T = as.data.frame(t(sports))
profiles_T = as.data.frame(t(profiles))
numerator = sapply(profiles_T, function(x)(sapply(sports_T, function(y) sum(x * y))))
profiles_Sqaured = profiles * profiles
sports_Squared = sports * sports
sports_Squared_Sum = rowSums(sports_Squared, na.rm = T)
profiles_Sqaured_SUm = rowSums(profiles_Sqaured, na.rm = T)
denom1 = sqrt(sports_Squared_Sum)
denom2 = sqrt(profiles_Sqaured_SUm)
denom_Final = sapply(denom2, function(x) sapply(denom1, function(y) sum(x * y, na.rm = T)))
prediction_Table1 = as.data.frame(numerator / denom_Final)
sum(numerator / denom_Final, na.rm = T)
prediction_Table1[is.na(prediction_Table1)] = 0
Likes1 = colSums(data.frame(Likes = sapply(prediction_Table1, function(x) x > 0)))
Dislikes1 = colSums(data.frame(Dislikes = sapply(prediction_Table1, function(x) x < 0)))
Neutral1 = colSums(data.frame(Neutral = sapply(prediction_Table1, function(x) x == 0)))
newList <- list("Prediction" = prediction_Table1, "Likes" = Likes1, "Dislikes" = Dislikes1, "Neutral" = Neutral1)
return(newList)
}
CB - Simply Unary:
CB_Simply_Unary_Sports = read.xlsx("exerciseCB.xlsx", sheetName = "CB - Simply Unary", colIndex = 2:11, rowIndex = 2:22, header = T)
CB_Simply_Unary_Users = read.xlsx("exerciseCB.xlsx", sheetName = "CB - Simply Unary", colIndex = 15:18, rowIndex = 2:22, header = T)
CB_Simply_Unary_Predictions = read.xlsx("exerciseCB.xlsx", sheetName = "CB - Simply Unary", colIndex = 25:28, rowIndex = 2:22, header = T)
CB_Simply_Unary_Sports = as.data.frame(sapply(CB_Simply_Unary_Sports, as.numeric))
# Create User Profiles Table
profiles = createProfiles(CB_Simply_Unary_Sports, CB_Simply_Unary_Users)
sum(profiles, na.rm = T)
## [1] 2
# Predictions
output = process(CB_Simply_Unary_Sports, profiles)
Prediction = output$Prediction
Likes1 = output$Likes
Dislikes1 = output$Dislikes
Neutral = output$Neutral
rownames(Prediction) <- c()
drawTable(Prediction)
User.1
|
User.2
|
User.3
|
User.4
|
0.4364358
|
0.2773501
|
-0.6382847
|
0
|
0.0000000
|
0.8807048
|
-0.5025189
|
0
|
0.3299144
|
0.5241424
|
-0.7237469
|
0
|
0.0629941
|
0.8006408
|
-0.6030227
|
0
|
0.0824786
|
0.5765567
|
-0.3947710
|
0
|
0.4454354
|
0.6227524
|
-0.7106691
|
0
|
0.2070197
|
0.6139406
|
-0.6055301
|
0
|
0.0000000
|
0.6864065
|
-0.4308202
|
0
|
0.4454354
|
0.4529108
|
-0.7817360
|
0
|
-0.0824786
|
0.5765567
|
-0.5263614
|
0
|
0.1649572
|
0.5765567
|
-0.4605662
|
0
|
0.4948717
|
0.3144855
|
-0.9211324
|
0
|
0.1259882
|
0.7606087
|
-0.5527708
|
0
|
0.0000000
|
0.8334936
|
-0.3692745
|
0
|
0.2182179
|
0.4160251
|
-0.6963106
|
0
|
0.6598289
|
0.3144855
|
-0.8553372
|
0
|
0.0000000
|
0.8807048
|
-0.5025189
|
0
|
0.2672612
|
0.7359801
|
-0.5685352
|
0
|
-0.1454786
|
0.5547002
|
-0.3481553
|
0
|
0.0771517
|
0.7354355
|
-0.4923660
|
0
|
CB - Unit Weight:
CB_Unit_Weight = read.xlsx("exerciseCB.xlsx", sheetName = "CB - Unit Weight", colIndex = 2:11, rowIndex = 2:22, header = T)
CB_Unit_Weight_Users = read.xlsx("exerciseCB.xlsx", sheetName = "CB - Unit Weight", colIndex = 15:18, rowIndex = 2:22, header = T)
CB_Unit_Weight = CB_Unit_Weight / rowSums(CB_Unit_Weight, na.rm = T)
CB_Unit_Weight = as.data.frame(sapply(CB_Unit_Weight, as.numeric))
# Create User Profiles Table
profiles2 = createProfiles(CB_Unit_Weight, CB_Unit_Weight_Users)
sum(profiles2, na.rm = T)
## [1] -5.551115e-17
# Predictions
output2 = process(CB_Unit_Weight, profiles2)
Prediction2 = output2$Prediction
Likes2 = output2$Likes
Dislikes2 = output2$Dislikes
Neutral2 = output2$Neutral
rownames(Prediction2) <- c()
drawTable(Prediction2)
User.1
|
User.2
|
User.3
|
User.4
|
0.4279345
|
-0.2683725
|
-0.3822354
|
0
|
-0.2543633
|
0.8346834
|
-0.0569803
|
0
|
0.3286794
|
0.0062994
|
-0.3618734
|
0
|
-0.1635193
|
0.6437428
|
-0.2849014
|
0
|
-0.0489522
|
0.1133893
|
0.1644879
|
0
|
0.6594941
|
0.1002972
|
-0.3223292
|
0
|
-0.1284729
|
0.2546005
|
-0.3223292
|
0
|
-0.0726752
|
0.3327823
|
0.0000000
|
0
|
0.4453727
|
-0.2468854
|
-0.4432026
|
0
|
-0.2727339
|
0.0818923
|
0.0000000
|
0
|
0.0349659
|
0.1322876
|
0.0986928
|
0
|
0.5734406
|
-0.4346591
|
-0.7566444
|
0
|
-0.0908440
|
0.6055546
|
-0.1709409
|
0
|
-0.0423939
|
0.5891883
|
0.1994310
|
0
|
0.1211254
|
0.2891387
|
-0.6837635
|
0
|
0.7972223
|
-0.4031621
|
-0.6250541
|
0
|
-0.2543633
|
0.8346834
|
-0.0569803
|
0
|
0.2997701
|
0.3471825
|
0.0000000
|
0
|
-0.2925122
|
0.1659030
|
0.1019294
|
0
|
0.0484502
|
0.3982476
|
-0.0284901
|
0
|
CB - IDF:
CB_IDF = read.xlsx("exerciseCB.xlsx", sheetName = "CB - IDF", colIndex = 2:11, rowIndex = 2:22, header = T)
CB_IDF_Users = read.xlsx("exerciseCB.xlsx", sheetName = "CB - IDF", colIndex = 15:18, rowIndex = 2:22, header = T)
CB_IDF_DF = colSums(CB_IDF, na.rm = T)
CB_IDF = CB_IDF / rowSums(CB_IDF)
IDF = log10(nrow(CB_IDF) / colSums(CB_IDF > 0))
CB_IDF = as.data.frame(sapply(CB_IDF, as.numeric))
# Create User Profiles Table
profiles3 = createProfiles(CB_IDF, CB_IDF_Users)
sum(profiles3, na.rm = T)
## [1] -5.551115e-17
profiles3 = as.data.frame(t(as.data.frame(t(profiles3)) * IDF))
# Predictions
output3 = process(CB_IDF, profiles3)
Prediction3 = output3$Prediction
Likes3 = output3$Likes
Dislikes3 = output3$Dislikes
Neutral3 = output3$Neutral
rownames(Prediction3) <- c()
drawTable(Prediction3)
User.1
|
User.2
|
User.3
|
User.4
|
0.4903091
|
-0.4363629
|
-0.4505263
|
0
|
-0.2228319
|
0.6956329
|
-0.0876160
|
0
|
0.2350269
|
-0.1495089
|
-0.3400316
|
0
|
-0.1375099
|
0.4901912
|
-0.2880700
|
0
|
-0.0569612
|
0.1117277
|
0.1592409
|
0
|
0.6591106
|
-0.1727666
|
-0.4048738
|
0
|
-0.1094526
|
0.2636744
|
-0.2971410
|
0
|
-0.0601152
|
0.1385157
|
-0.0163107
|
0
|
0.3607507
|
-0.2705844
|
-0.4164519
|
0
|
-0.2232022
|
0.0941731
|
0.0158313
|
0
|
0.0524502
|
0.0483194
|
0.0533896
|
0
|
0.6220964
|
-0.5463667
|
-0.7784262
|
0
|
-0.0835215
|
0.4445096
|
-0.1946981
|
0
|
-0.0554566
|
0.4265722
|
0.1533189
|
0
|
0.0721562
|
0.1852638
|
-0.6287827
|
0
|
0.7883375
|
-0.5162661
|
-0.6706096
|
0
|
-0.2228319
|
0.6956329
|
-0.0876160
|
0
|
0.1816009
|
0.1889433
|
0.0000000
|
0
|
-0.2127451
|
0.1006560
|
0.0811885
|
0
|
0.0298654
|
0.2211304
|
-0.0471352
|
0
|
Hybrid_Switching = read.xlsx("exerciseCB.xlsx", sheetName = "Hybrid - Switching", colIndex = 2:11, rowIndex = 2:22, header = T)
Hybrid_Challenge = read.xlsx("exerciseCB.xlsx", sheetName = "Hybrid - Challenge", colIndex = 2:11, rowIndex = 2:22, header = T)