In this section we create a full matrix of items and users by utilizing collaborative filtering technique.
# Install required R packages
library(recommenderlab)
## Loading required package: Matrix
## Loading required package: arules
##
## Attaching package: 'arules'
## The following objects are masked from 'package:base':
##
## abbreviate, write
## Loading required package: proxy
##
## Attaching package: 'proxy'
## The following object is masked from 'package:Matrix':
##
## as.matrix
## The following objects are masked from 'package:stats':
##
## as.dist, dist
## The following object is masked from 'package:base':
##
## as.matrix
## Loading required package: registry
library(reshape2)
library(ggplot2)
library(reshape2)
library(data.table)
##
## Attaching package: 'data.table'
## The following objects are masked from 'package:reshape2':
##
## dcast, melt
##Read the given Online retail Dataset from the saved location
UCIUnique<-read.csv("/Users/uzma/Downloads/ml-latest-small/Online Retail.csv",header=TRUE)
#Summary of the Transactional Data
summary(UCIUnique)
## InvoiceNo StockCode
## 573585 : 1114 85099B : 1157
## 581219 : 749 85123A : 1033
## 581492 : 731 22086 : 995
## 580729 : 721 23203 : 980
## 558475 : 705 22423 : 969
## 579777 : 687 23084 : 886
## (Other):299420 (Other):298107
## Description Quantity
## JUMBO BAG RED RETROSPOT : 1159 Min. :-80995.00
## WHITE HANGING HEART T-LIGHT HOLDER: 1065 1st Qu.: 1.00
## PAPER CHAIN KIT 50'S CHRISTMAS : 995 Median : 4.00
## REGENCY CAKESTAND 3 TIER : 971 Mean : 9.83
## RABBIT NIGHT LIGHT : 871 3rd Qu.: 12.00
## LUNCH BAG RED RETROSPOT : 846 Max. : 80995.00
## (Other) :298220
## InvoiceDate UnitPrice CustomerID
## 10/31/11 14:41: 1114 Min. :-11062.060 Min. :12347
## 12/8/11 9:28 : 749 1st Qu.: 1.250 1st Qu.:13975
## 12/9/11 10:03 : 731 Median : 2.080 Median :15156
## 12/5/11 17:24 : 721 Mean : 4.252 Mean :15288
## 6/29/11 15:58 : 705 3rd Qu.: 4.130 3rd Qu.:16775
## 11/30/11 15:13: 687 Max. : 17836.460 Max. :18287
## (Other) :299420 NA's :33714
## Country
## United Kingdom:276922
## EIRE : 5338
## Germany : 5327
## France : 4979
## Spain : 1386
## Switzerland : 1304
## (Other) : 8871
head(UCIUnique)
## InvoiceNo StockCode Description Quantity
## 1 563756 22440 BALLOON WATER BOMB PACK OF 35 20
## 2 563756 21878 PACK OF 6 SANDCASTLE FLAGS ASSORTED 12
## 3 563756 22607 WOODEN ROUNDERS GARDEN SET 2
## 4 563756 22549 PICTURE DOMINOES 12
## 5 563756 21791 VINTAGE HEADS AND TAILS CARD GAME 12
## 6 563756 21888 BINGO SET 4
## InvoiceDate UnitPrice CustomerID Country
## 1 8/19/11 11:08 0.42 12418 Switzerland
## 2 8/19/11 11:08 0.85 12418 Switzerland
## 3 8/19/11 11:08 9.95 12418 Switzerland
## 4 8/19/11 11:08 1.45 12418 Switzerland
## 5 8/19/11 11:08 1.25 12418 Switzerland
## 6 8/19/11 11:08 3.75 12418 Switzerland
##Create a dataframe that contains calculated utility values along with Item and customer Ids
tmp1<-cbind.data.frame(ItemID=UCIUnique$StockCode,CustomerID=UCIUnique$CustomerID,Utility=UCIUnique$Quantity*UCIUnique$UnitPrice)
##Aggregate Utility values for the same item and customer (repeated observations) because utility is a scalar
tmp11<-aggregate(Utility ~ ItemID + CustomerID,data=tmp1,FUN=sum)
tmp2<-unique(tmp11)
##Create another data frame for profit margin
Profmar<-cbind.data.frame(ItemID=UCIUnique$StockCode,ProfitMargin=UCIUnique$UnitPrice*0.25,Quantity=UCIUnique$Quantity)
Profmar1<-unique(Profmar)
##Create a recommender algorithm for a subset of 26 users and 970 items: created because we picked up first 1500 observations from the dataset
x<-tmp2[1:1500,]
finaltemporary<-acast(x,x$CustomerID ~ x$ItemID,value.var="Utility")
## use collaborative filtering to get full matrix
## Confirm the class
class(finaltemporary)
## [1] "matrix"
# Convert it to matrix
Fnewdata<-as.matrix(finaltemporary)
Fnewdata[is.na(Fnewdata)] = 0
## Rescale each column to range between 0 and 5
Fnewdata<-apply(Fnewdata, MARGIN = 2, FUN = function(X) ((X - min(X))*5)/diff(range(X)))
Fnewdata[Fnewdata==0]<-NA
# Convert this into realRatingMatrix data structure
# This will be a recommenderlab sparse-matrix like data-structure
recomdata1 <- as(Fnewdata, "realRatingMatrix")
##User Based Collaborative Filtering (UBCF)
rec11=Recommender(recomdata1[1:nrow(recomdata1)],method="POPULAR")
##Predict the unknown ratings
recom11 <- predict(rec11, recomdata1[1:nrow(recomdata1)], type="ratings")
# Convert prediction into a complete matrix
#as(recom1, "list")
recomfinal1<-as(recom11, "matrix") # Is full of ratings. NAs disappear
recomfinal1[is.na(recomfinal1)] = 0
##Scale back to 0-5 range
recomfinal1<-apply(recomfinal1, MARGIN = 2, FUN = function(X) ((X - min(X))*5)/diff(range(X)))
##Superimpose existing and predicted to get the full matrix
Fnewdata[is.na(Fnewdata)] = 0
recomfinal1[is.na(recomfinal1)] = 0
for(i in 1:nrow(recomfinal1)){
for (j in 1:ncol(recomfinal1)){
if (recomfinal1[i,j]==0){
recomfinal1[i,j]<-Fnewdata[i,j]
}
}
}
##recomfinal1 is the matrix we will use for greedy algorithm
##Size of the assortment to be tested is denoted by NF
NF<-8
##Create a matrix with total number of rows= customer number in recomfinal1 and columns=alpha value+z value+items #picked+user number+system time
library(reshape2)
library(data.table)
dframeoutput<- data.frame(matrix(ncol = 5+NF, nrow = 0))
#x31 <- c("Replication number","Customer ID", "alpha", "z",as.list("Items") ,"System Time")
x31 <- c("Replication number","Customer ID", "alpha", "z" ,"System Time")
colnames(dframeoutput) <- x31
colnames(dframeoutput[,6:(5+NF)])<-"Items"
BFdataframe<- data.frame(matrix(ncol = 5+NF, nrow = 0))
#x31 <- c("Replication number","Customer ID", "alpha", "z",as.list("Items") ,"System Time")
y31 <- c("Replication number","Customer ID", "alpha", "z" ,"System Time")
colnames(BFdataframe) <- y31
colnames(BFdataframe[,6:(5+NF)])<-"Items"
##Run a loop to pick N items for each user and set five replications
#set.seed(1)
##Alpha values
t1<-c(0.01,0.1,1)
#z values
t2<-c(0,1,4)
#Start replications
##Initialize
s1<-1
u<-1
# r is for replications, l is for users
for(r in 1:2){
for (l in 1:nrow(recomfinal1)){
for(z1 in t2){
for(alpha in t1){
set.seed(r)
testset<-sample(recomfinal1[l,], NF, replace = FALSE, prob = NULL)
##Reshape for this user
long <- melt(testset)
#Rownames are item ids -- create a column for those
setDT(long, keep.rownames = TRUE)[]
##From Profmargin data frame merge the revenue
colnames(long)[1]<-"ItemID"
colnames(long)[2]<-"Utility"
##For loop to identify profit margins for each item
longtemp <- data.frame(matrix(ncol = 3, nrow = 0))
x <- c("ItemID", "Utility", "Margin")
colnames(longtemp) <- x
## Final data set for greedy and bruteforce
longtemp<-merge(long, Profmar1, by="ItemID")
## Calulate mean of all profit margins for the same item because of different countries they are sold in
longtemp1<-aggregate(.~ItemID, data=longtemp, mean)
##sytem time
old <- Sys.time() # get start time
Items = c(1:NF)
Revenue=longtemp1$ProfitMargin
Preference.weight=exp(longtemp1$Utility)
ItemID=longtemp1$ItemID
dataf = data.frame(Items, Revenue, Preference.weight)#,ItemID)
### Complete Code and Output for Greedy Algorithm
#Declare variables
n<-max(dataf$Items, na.rm = TRUE)
Q1<-matrix(0,n,n)
V0<-vector(,n)
item_index<-vector(,n)
f1<-vector(,n+1)
#Compute No-Choice
for (j in 1:n){
V0[j]<-exp((j-z1)*alpha)}
#Start Algorithm A1
k=1
f1[1]<-0
while(k<=n){
for(i in 1:n){
if (k==1){Q1[i,k]<-(dataf$Revenue[i]*dataf$Preference.weight[i])/(dataf$Preference.weight[i]+V0[k])
}
else
{
Q1[i,k]<-((dataf$Revenue[i]*dataf$Preference.weight[i])/(P1f+dataf$Preference.weight[i]+V0[k])
+(P2f/(P1f+dataf$Preference.weight[i]+V0[k]))) }
}
for(z in item_index){Q1[z,k]<-0}
f1[k+1]<-max(Q1[,k],na.rm=FALSE)
#Identify highest expected revenue and corresponding item
if(f1[k+1]>f1[k]){
item_index[k]<-which.max(Q1[,k])
P1=NULL
P2=NULL
P1f=NULL
P2f=NULL
for(j in item_index){ P1[j]<-(dataf$Preference.weight[j])
P2[j]<-(dataf$Preference.weight[j]*dataf$Revenue[j])
}
P1f<-sum(P1,na.rm=TRUE)
P2f<-sum(P2,na.rm=TRUE)
k=k+1 }
else
{#cat("Assortment returned by Algorithm 1 = ", item_index, "\n")
break}
}
# print elapsed time
new <- Sys.time() - old # calculate difference
y<-rownames(recomfinal1)[l]
#y1<-(item_index)
y1<-rep(0,NF)
for(p1 in 1:NF){
if(item_index[p1]>0){y1[p1]<-as.character(longtemp1[item_index[p1],1])}
else{y1[p1]<-0}
}
dframeoutput[s1,5]<-new
dframeoutput[s1,1]<-r
dframeoutput[s1,2]<-y
dframeoutput[s1,3]<-alpha
dframeoutput[s1,4]<-z1
dframeoutput[s1,6:(5+NF)]<-y1
s1<-s1+1
##Brute force algorithm for the same replication for every user
# Brute force--all 2^n assortments
#Use the function to generate all assortments and calculate their expected revenues
# Create a vector to store all revenues
old1 <- Sys.time() # get start time
d=NULL
PW=NULL
s = n;
res <- sapply(0:(s-1),function(x)rep(c(rep(0,2^x),rep(1,2^x)),2^(s-x-1)))
t<-2^n
E=matrix(0,t,n)
for(i in 1:t){
d<-rowSums(res)
for(j in 1:n){
if(res[i,j]>0){
PW[j]<-dataf$Preference.weight[j]
}
else
{PW[j]<-0}
}
PWSum<-sum(PW,na.rm=TRUE)
for (k in 1:n){
if(res[i,k]>0){
E[i,k]<-(dataf$Revenue[k]*dataf$Preference.weight[k])/(PWSum+V0[d[i]])}
else{E[i,k]<-0}
}
}
Final<-rowSums(E)
#max(Final)
y3<-rep(0,NF)
y5<-rep(0,NF)
y4<-as.integer(E[which.max(Final),]>0)
for(p in 1:NF){
if (y4[p]=="1"){y3[p]<-p}
else{y3[p]<-0}
}
for(p2 in 1:NF){
if(y3[p2]>0){y5[p2]<-as.character(longtemp1[y3[p2],1])}
else{y5[p2]<-0}
}
#BFSolution<-which( E[which.max(Final),]>0, arr.ind=TRUE)
#cat("Assortment returned by brute force = ", BFSolution, "\n")
new1 <- Sys.time() - old1 # calculate difference
q<-rownames(recomfinal1)[l]
#y2<-(BFSolution)
BFdataframe[u,5]<-new1
BFdataframe[u,1]<-r
BFdataframe[u,2]<-q
BFdataframe[u,3]<-alpha
BFdataframe[u,4]<-z1
BFdataframe[u,6:(5+NF)]<-y5
u<-u+1
}
}
}
}
## What items were selected
##Brute force
head(BFdataframe)
## Replication number Customer ID alpha z System Time NA NA NA NA NA NA NA
## 1 1 12347 0.01 0 0.05548811 0 0 0 0 0 0 0
## 2 1 12347 0.10 0 0.06352782 0 0 0 0 0 0 0
## 3 1 12347 1.00 0 0.04884505 0 0 0 0 0 0 0
## 4 1 12347 0.01 1 0.06206989 0 0 0 0 0 0 0
## 5 1 12347 0.10 1 0.04942608 0 0 0 0 0 0 0
## 6 1 12347 1.00 1 0.04765201 0 0 0 0 0 0 0
## NA
## 1 84078A
## 2 84078A
## 3 84078A
## 4 84078A
## 5 84078A
## 6 84078A
tail(BFdataframe)
## Replication number Customer ID alpha z System Time NA NA NA NA NA NA
## 481 2 12384 0.01 1 0.04899287 0 0 0 0 0 0
## 482 2 12384 0.10 1 0.05241203 0 0 0 0 0 0
## 483 2 12384 1.00 1 0.04559612 0 0 0 0 0 0
## 484 2 12384 0.01 4 0.04828596 0 0 0 0 0 0
## 485 2 12384 0.10 4 0.05352497 0 0 0 0 0 0
## 486 2 12384 1.00 4 0.05048203 0 0 0 0 0 0
## NA NA
## 481 84078A 0
## 482 84078A 0
## 483 84078A 0
## 484 84078A 0
## 485 84078A 0
## 486 84078A 0
##greedy
head(dframeoutput)
## Replication number Customer ID alpha z System Time NA NA NA NA NA NA
## 1 1 12347 0.01 0 0.001432896 84078A 0 0 0 0 0
## 2 1 12347 0.10 0 0.001550913 84078A 0 0 0 0 0
## 3 1 12347 1.00 0 0.001390934 84078A 0 0 0 0 0
## 4 1 12347 0.01 1 0.001423836 84078A 0 0 0 0 0
## 5 1 12347 0.10 1 0.001372814 84078A 0 0 0 0 0
## 6 1 12347 1.00 1 0.001473904 84078A 0 0 0 0 0
## NA NA
## 1 0 0
## 2 0 0
## 3 0 0
## 4 0 0
## 5 0 0
## 6 0 0
tail(dframeoutput)
## Replication number Customer ID alpha z System Time NA NA NA NA NA
## 481 2 12384 0.01 1 0.001315117 84078A 0 0 0 0
## 482 2 12384 0.10 1 0.001347065 84078A 0 0 0 0
## 483 2 12384 1.00 1 0.001333952 84078A 0 0 0 0
## 484 2 12384 0.01 4 0.001329184 84078A 0 0 0 0
## 485 2 12384 0.10 4 0.001461983 84078A 0 0 0 0
## 486 2 12384 1.00 4 0.001428127 84078A 0 0 0 0
## NA NA NA
## 481 0 0 0
## 482 0 0 0
## 483 0 0 0
## 484 0 0 0
## 485 0 0 0
## 486 0 0 0
#whattoselect<-which(rownames(dataf$ItemID))