Recommendation Systems

Data Mining II

Luigi Ruberto

Hidding function

clean <- function(set) {

    ID <- set$SESSIONID
    lev <- levels(factor(ID))

    for (i in lev) {
        if (sum(ID == i) <= 1) 
            ID[ID == i] <- NaN else {
            index <- match(i, rev(ID))
            set$USERID[length(ID) - index + 1] = 0
        }
    }
    set$SESSIONID <- ID
    clean1 <- set[complete.cases(set), ]
    return(clean1)
}

Association Rules

Construction of the model

rls.prm <- caren(train[, c(2, 4)], Bas = TRUE, prm = TRUE)

Time needed

## Time difference of 2.667 secs

Generation of 10 recommendations per user

pr <- predict.caren(rls.prm, test2[test2$USERID != 0, c(2, 4)], Bas = TRUE, 
    Top = 10)

Time needed

## Time difference of 5.853 secs

1 recommendation

plot of chunk unnamed-chunk-13

Number of guessed recommendations: 22

Average score for good recommendations: 0.76

Average score for bad recommendations: 0.18

2 recommendations

plot of chunk unnamed-chunk-15

Number of guessed recommendations: 68

Average score for good recommendations: 0.66

Average score for bad recommendations: 0.16

5 recommendations

plot of chunk unnamed-chunk-17

Number of guessed recommendations: 99

Average score for good recommendations: 0.57

Average score for bad recommendations: 0.12

10 recommendations

plot of chunk unnamed-chunk-19

Number of guessed recommendations: 153

Average score for good recommendations: 0.42

Average score for bad recommendations: 0.10

Resume on Association Rules

plot of chunk unnamed-chunk-20

Neighborhood-based

Construction of the similarity matrix

col1 <- train$SESSIONID
col2 <- train$REQUESTEDURL
A <- data.frame(col1, col2)
sm <- simmatrix(A)

Time needed

## Time difference of 29.91 mins

Generation of 10 recommendations per user

pr_nb <- vector()
for (i in 1:length(lev)) {
    R <- test2[test2$SESSIONID == lev[i] & test2$USERID != 0, ]$REQUESTEDURL
    pr_nb <- c(pr_nb, topNrec.cf(R, sm, 10, 5))
}

Time needed

## Time difference of 4.312 hours

1 recommendation

plot of chunk unnamed-chunk-30

Number of guessed recommendations: 623

Average score for good recommendations: 0.54

Average score for bad recommendations: 0.29

2 recommendations

plot of chunk unnamed-chunk-32

Number of guessed recommendations: 792

Average score for good recommendations: 0.51

Average score for bad recommendations: 0.24

5 recommendations

plot of chunk unnamed-chunk-34

Number of guessed recommendations: 957

Average score for good recommendations: 0.47

Average score for bad recommendations: 0.16

10 recommendations

plot of chunk unnamed-chunk-36

Number of guessed recommendations: 1047

Average score for good recommendations: 0.45

Average score for bad recommendations: 0.10

Resume on Association Rules

plot of chunk unnamed-chunk-37