Recommendation Engines

Professor: Ivan Tarradellas Olmo

Done By: Hani Eid

Lab #2

Recommendation Sports

library(data.table)
library(xlsx)

## Loading required package: rJava

## Loading required package: xlsxjars

library(knitr)
library(kableExtra)

knitr::opts_knit$set(root.dir = 'C:/Users/Hani/Google Drive/IE/Term 2/5- Recommendation Engines/Assignments/2/Problem')
#setwd('C:/Users/Hani/Google Drive/IE/Term 2/5- Recommendation Engines/Assignments/2/Problem')

drawTable = function(DataFrame){
  DataFrame %>%
  kable("html") %>%
  kable_styling() %>%
  row_spec(row = 1:nrow(DataFrame), bold = T, color = "white", background = "#000000", align = "c") %>%
  column_spec(column = c(1,3), background = "#0AA19A") %>%
  row_spec(0, bold = T, align = "c", font_size = 18, color = "#B60404")  
}

createProfiles = function(sports, users) {
    profiles = sapply(sports, function(x) sapply(users, function(y) sum(x * y, na.rm = T)))
    return(profiles)
}

process = function(sports, profiles) {
    # Predictions
    sports_T = as.data.frame(t(sports))
    profiles_T = as.data.frame(t(profiles))
    numerator = sapply(profiles_T, function(x)(sapply(sports_T, function(y) sum(x * y))))
    profiles_Sqaured = profiles * profiles
    sports_Squared = sports * sports
    sports_Squared_Sum = rowSums(sports_Squared, na.rm = T)
    profiles_Sqaured_SUm = rowSums(profiles_Sqaured, na.rm = T)

    denom1 = sqrt(sports_Squared_Sum)
    denom2 = sqrt(profiles_Sqaured_SUm)
    denom_Final = sapply(denom2, function(x) sapply(denom1, function(y) sum(x * y, na.rm = T)))
    prediction_Table1 = as.data.frame(numerator / denom_Final)
    sum(numerator / denom_Final, na.rm = T)

    prediction_Table1[is.na(prediction_Table1)] = 0

    Likes1 = colSums(data.frame(Likes = sapply(prediction_Table1, function(x) x > 0)))
    Dislikes1 = colSums(data.frame(Dislikes = sapply(prediction_Table1, function(x) x < 0)))
    Neutral1 = colSums(data.frame(Neutral = sapply(prediction_Table1, function(x) x == 0)))

    newList <- list("Prediction" = prediction_Table1, "Likes" = Likes1, "Dislikes" = Dislikes1, "Neutral" = Neutral1)
    return(newList)
}

CB - Simply Unary:

CB_Simply_Unary_Sports = read.xlsx("exerciseCB.xlsx", sheetName = "CB - Simply Unary", colIndex = 2:11, rowIndex = 2:22, header = T)
CB_Simply_Unary_Users = read.xlsx("exerciseCB.xlsx", sheetName = "CB - Simply Unary", colIndex = 15:18, rowIndex = 2:22, header = T)
CB_Simply_Unary_Predictions = read.xlsx("exerciseCB.xlsx", sheetName = "CB - Simply Unary", colIndex = 25:28, rowIndex = 2:22, header = T)

CB_Simply_Unary_Sports = as.data.frame(sapply(CB_Simply_Unary_Sports, as.numeric))


# Create User Profiles Table
profiles = createProfiles(CB_Simply_Unary_Sports, CB_Simply_Unary_Users)
sum(profiles, na.rm = T)

## [1] 2

# Predictions
output = process(CB_Simply_Unary_Sports, profiles)

Prediction = output$Prediction
Likes1 = output$Likes
Dislikes1 = output$Dislikes
Neutral = output$Neutral
rownames(Prediction) <- c()
drawTable(Prediction)

User.1	User.2	User.3
0.4364358	0.2773501	-0.6382847
0.0000000	0.8807048	-0.5025189
0.3299144	0.5241424	-0.7237469
0.0629941	0.8006408	-0.6030227
0.0824786	0.5765567	-0.3947710
0.4454354	0.6227524	-0.7106691
0.2070197	0.6139406	-0.6055301
0.0000000	0.6864065	-0.4308202
0.4454354	0.4529108	-0.7817360
-0.0824786	0.5765567	-0.5263614
0.1649572	0.5765567	-0.4605662
0.4948717	0.3144855	-0.9211324
0.1259882	0.7606087	-0.5527708
0.0000000	0.8334936	-0.3692745
0.2182179	0.4160251	-0.6963106
0.6598289	0.3144855	-0.8553372
0.0000000	0.8807048	-0.5025189
0.2672612	0.7359801	-0.5685352
-0.1454786	0.5547002	-0.3481553
0.0771517	0.7354355	-0.4923660

CB - Unit Weight:

CB_Unit_Weight = read.xlsx("exerciseCB.xlsx", sheetName = "CB - Unit Weight", colIndex = 2:11, rowIndex = 2:22, header = T)
CB_Unit_Weight_Users = read.xlsx("exerciseCB.xlsx", sheetName = "CB - Unit Weight", colIndex = 15:18, rowIndex = 2:22, header = T)

CB_Unit_Weight = CB_Unit_Weight / rowSums(CB_Unit_Weight, na.rm = T)
CB_Unit_Weight = as.data.frame(sapply(CB_Unit_Weight, as.numeric))

# Create User Profiles Table
profiles2 = createProfiles(CB_Unit_Weight, CB_Unit_Weight_Users)
sum(profiles2, na.rm = T)

## [1] -5.551115e-17

# Predictions
output2 = process(CB_Unit_Weight, profiles2)

Prediction2 = output2$Prediction
Likes2 = output2$Likes
Dislikes2 = output2$Dislikes
Neutral2 = output2$Neutral

rownames(Prediction2) <- c()

drawTable(Prediction2)

User.1	User.2	User.3
0.4279345	-0.2683725	-0.3822354
-0.2543633	0.8346834	-0.0569803
0.3286794	0.0062994	-0.3618734
-0.1635193	0.6437428	-0.2849014
-0.0489522	0.1133893	0.1644879
0.6594941	0.1002972	-0.3223292
-0.1284729	0.2546005	-0.3223292
-0.0726752	0.3327823	0.0000000
0.4453727	-0.2468854	-0.4432026
-0.2727339	0.0818923	0.0000000
0.0349659	0.1322876	0.0986928
0.5734406	-0.4346591	-0.7566444
-0.0908440	0.6055546	-0.1709409
-0.0423939	0.5891883	0.1994310
0.1211254	0.2891387	-0.6837635
0.7972223	-0.4031621	-0.6250541
-0.2543633	0.8346834	-0.0569803
0.2997701	0.3471825	0.0000000
-0.2925122	0.1659030	0.1019294
0.0484502	0.3982476	-0.0284901

CB - IDF:

CB_IDF = read.xlsx("exerciseCB.xlsx", sheetName = "CB - IDF", colIndex = 2:11, rowIndex = 2:22, header = T)
CB_IDF_Users = read.xlsx("exerciseCB.xlsx", sheetName = "CB - IDF", colIndex = 15:18, rowIndex = 2:22, header = T)
CB_IDF_DF = colSums(CB_IDF, na.rm = T)
CB_IDF = CB_IDF / rowSums(CB_IDF)

IDF = log10(nrow(CB_IDF) / colSums(CB_IDF > 0))
CB_IDF = as.data.frame(sapply(CB_IDF, as.numeric))

# Create User Profiles Table
profiles3 = createProfiles(CB_IDF, CB_IDF_Users)
sum(profiles3, na.rm = T)

## [1] -5.551115e-17

profiles3 = as.data.frame(t(as.data.frame(t(profiles3)) * IDF))

# Predictions
output3 = process(CB_IDF, profiles3)

Prediction3 = output3$Prediction
Likes3 = output3$Likes
Dislikes3 = output3$Dislikes
Neutral3 = output3$Neutral

rownames(Prediction3) <- c()

drawTable(Prediction3)

User.1	User.2	User.3
0.4903091	-0.4363629	-0.4505263
-0.2228319	0.6956329	-0.0876160
0.2350269	-0.1495089	-0.3400316
-0.1375099	0.4901912	-0.2880700
-0.0569612	0.1117277	0.1592409
0.6591106	-0.1727666	-0.4048738
-0.1094526	0.2636744	-0.2971410
-0.0601152	0.1385157	-0.0163107
0.3607507	-0.2705844	-0.4164519
-0.2232022	0.0941731	0.0158313
0.0524502	0.0483194	0.0533896
0.6220964	-0.5463667	-0.7784262
-0.0835215	0.4445096	-0.1946981
-0.0554566	0.4265722	0.1533189
0.0721562	0.1852638	-0.6287827
0.7883375	-0.5162661	-0.6706096
-0.2228319	0.6956329	-0.0876160
0.1816009	0.1889433	0.0000000
-0.2127451	0.1006560	0.0811885
0.0298654	0.2211304	-0.0471352

Hybrid_Switching = read.xlsx("exerciseCB.xlsx", sheetName = "Hybrid - Switching", colIndex = 2:11, rowIndex = 2:22, header = T)

Hybrid_Challenge = read.xlsx("exerciseCB.xlsx", sheetName = "Hybrid - Challenge", colIndex = 2:11, rowIndex = 2:22, header = T)