if (!require(mlba)) {
  library(devtools)
  install_github("gedeck/mlba/mlba", force=TRUE)
}

Association Rules

The Process of Rule Selection

Lift

library(arules)
fp.df <- mlba::Faceplate

# remove first column and convert to matrix
fp.mat <- as.matrix(fp.df[, -1])

# convert the binary incidence matrix into a transactions database
fp.trans <- as(fp.mat, "transactions")
inspect(fp.trans)

#>      items                    
#> [1]  {Red, White, Green}      
#> [2]  {White, Orange}          
#> [3]  {White, Blue}            
#> [4]  {Red, White, Orange}     
#> [5]  {Red, Blue}              
#> [6]  {White, Blue}            
#> [7]  {Red, Blue}              
#> [8]  {Red, White, Blue, Green}
#> [9]  {Red, White, Blue}       
#> [10] {Yellow}

## get rules
# when running apriori(), include the minimum support, minimum confidence, and target
# as arguments.
rules <- apriori(fp.trans, parameter = list(supp = 0.2, conf = 0.5, target = "rules"))

#> Apriori
#> 
#> Parameter specification:
#>  confidence minval smax arem  aval originalSupport maxtime support minlen
#>         0.5    0.1    1 none FALSE            TRUE       5     0.2      1
#>  maxlen target  ext
#>      10  rules TRUE
#> 
#> Algorithmic control:
#>  filter tree heap memopt load sort verbose
#>     0.1 TRUE TRUE  FALSE TRUE    2    TRUE
#> 
#> Absolute minimum support count: 2 
#> 
#> set item appearances ...[0 item(s)] done [0.00s].
#> set transactions ...[6 item(s), 10 transaction(s)] done [0.00s].
#> sorting and recoding items ... [5 item(s)] done [0.00s].
#> creating transaction tree ... done [0.00s].
#> checking subsets of size 1 2 3 done [0.00s].
#> writing ... [18 rule(s)] done [0.00s].
#> creating S4 object  ... done [0.00s].

# inspect the first six rules, sorted by their lift
inspect(head(sort(rules, by = "lift"), n = 6))

#>     lhs               rhs     support confidence coverage lift     count
#> [1] {Red, White}   => {Green} 0.2     0.5        0.4      2.500000 2    
#> [2] {Green}        => {Red}   0.2     1.0        0.2      1.666667 2    
#> [3] {White, Green} => {Red}   0.2     1.0        0.2      1.666667 2    
#> [4] {Orange}       => {White} 0.2     1.0        0.2      1.428571 2    
#> [5] {Green}        => {White} 0.2     1.0        0.2      1.428571 2    
#> [6] {Red, Green}   => {White} 0.2     1.0        0.2      1.428571 2

Example 2: Rules for Similar Book Purchases

all.books.df <- mlba::CharlesBookClub

# create a binary incidence matrix
count.books.df <- all.books.df[, 8:18]
incid.books.mat <- as.matrix(count.books.df > 0)

#  convert the binary incidence matrix into a transactions database
books.trans <- as(incid.books.mat, "transactions")
inspect(books.trans[1:10])

#>      items      
#> [1]  {YouthBks, 
#>       CookBks}  
#> [2]  {}         
#> [3]  {ChildBks, 
#>       YouthBks, 
#>       CookBks,  
#>       RefBks,   
#>       GeogBks,  
#>       ItalCook} 
#> [4]  {}         
#> [5]  {}         
#> [6]  {}         
#> [7]  {GeogBks}  
#> [8]  {ChildBks} 
#> [9]  {}         
#> [10] {CookBks}

# plot data
itemFrequencyPlot(books.trans)

# run apriori function
rules <- apriori(books.trans,
    parameter = list(supp= 200/4000, conf = 0.5, target = "rules"))

#> Apriori
#> 
#> Parameter specification:
#>  confidence minval smax arem  aval originalSupport maxtime support minlen
#>         0.5    0.1    1 none FALSE            TRUE       5    0.05      1
#>  maxlen target  ext
#>      10  rules TRUE
#> 
#> Algorithmic control:
#>  filter tree heap memopt load sort verbose
#>     0.1 TRUE TRUE  FALSE TRUE    2    TRUE
#> 
#> Absolute minimum support count: 200 
#> 
#> set item appearances ...[0 item(s)] done [0.00s].
#> set transactions ...[11 item(s), 4000 transaction(s)] done [0.00s].
#> sorting and recoding items ... [9 item(s)] done [0.00s].
#> creating transaction tree ... done [0.00s].
#> checking subsets of size 1 2 3 4 done [0.00s].
#> writing ... [72 rule(s)] done [0.00s].
#> creating S4 object  ... done [0.00s].

# inspect top-30 rules sorted by lift
inspect(head(sort(rules, by = "lift"), n=30))

#>      lhs                               rhs        support confidence coverage
#> [1]  {ChildBks, CookBks, GeogBks}   => {YouthBks} 0.06325 0.5776256  0.10950 
#> [2]  {ChildBks, CookBks, RefBks}    => {DoItYBks} 0.06125 0.5917874  0.10350 
#> [3]  {DoItYBks, GeogBks}            => {YouthBks} 0.05450 0.5396040  0.10100 
#> [4]  {ChildBks, CookBks, RefBks}    => {YouthBks} 0.05525 0.5338164  0.10350 
#> [5]  {ChildBks, CookBks, DoItYBks}  => {YouthBks} 0.06700 0.5244618  0.12775 
#> [6]  {ChildBks, YouthBks, CookBks}  => {DoItYBks} 0.06700 0.5583333  0.12000 
#> [7]  {ChildBks, RefBks}             => {DoItYBks} 0.07100 0.5536062  0.12825 
#> [8]  {ChildBks, CookBks, GeogBks}   => {DoItYBks} 0.06050 0.5525114  0.10950 
#> [9]  {ChildBks, GeogBks}            => {YouthBks} 0.07550 0.5162393  0.14625 
#> [10] {CookBks, GeogBks}             => {YouthBks} 0.08025 0.5136000  0.15625 
#> [11] {ChildBks, YouthBks, RefBks}   => {CookBks}  0.05525 0.8911290  0.06200 
#> [12] {ChildBks, YouthBks}           => {DoItYBks} 0.08025 0.5440678  0.14750 
#> [13] {CookBks, RefBks}              => {DoItYBks} 0.07450 0.5330948  0.13975 
#> [14] {CookBks, DoItYBks, RefBks}    => {ChildBks} 0.06125 0.8221477  0.07450 
#> [15] {ChildBks, DoItYBks, RefBks}   => {CookBks}  0.06125 0.8626761  0.07100 
#> [16] {ChildBks, CookBks}            => {DoItYBks} 0.12775 0.5278926  0.24200 
#> [17] {YouthBks, CookBks, RefBks}    => {ChildBks} 0.05525 0.8095238  0.06825 
#> [18] {YouthBks, GeogBks}            => {DoItYBks} 0.05450 0.5215311  0.10450 
#> [19] {YouthBks, CookBks}            => {DoItYBks} 0.08375 0.5201863  0.16100 
#> [20] {ChildBks, RefBks, GeogBks}    => {CookBks}  0.05025 0.8481013  0.05925 
#> [21] {YouthBks, CookBks, DoItYBks}  => {ChildBks} 0.06700 0.8000000  0.08375 
#> [22] {YouthBks, RefBks}             => {CookBks}  0.06825 0.8400000  0.08125 
#> [23] {ChildBks, YouthBks, GeogBks}  => {CookBks}  0.06325 0.8377483  0.07550 
#> [24] {ChildBks, YouthBks, DoItYBks} => {CookBks}  0.06700 0.8348910  0.08025 
#> [25] {ChildBks, ArtBks}             => {DoItYBks} 0.05375 0.5106888  0.10525 
#> [26] {YouthBks, CookBks, GeogBks}   => {ChildBks} 0.06325 0.7881620  0.08025 
#> [27] {CookBks, DoItYBks, GeogBks}   => {ChildBks} 0.06050 0.7806452  0.07750 
#> [28] {ChildBks, GeogBks}            => {DoItYBks} 0.07375 0.5042735  0.14625 
#> [29] {YouthBks, DoItYBks}           => {GeogBks}  0.05450 0.5278450  0.10325 
#> [30] {CookBks, RefBks, GeogBks}     => {ChildBks} 0.05025 0.7790698  0.06450 
#>      lift     count
#> [1]  2.424452 253  
#> [2]  2.323013 245  
#> [3]  2.264864 218  
#> [4]  2.240573 221  
#> [5]  2.201309 268  
#> [6]  2.191691 268  
#> [7]  2.173135 284  
#> [8]  2.168838 242  
#> [9]  2.166797 302  
#> [10] 2.155719 321  
#> [11] 2.144715 221  
#> [12] 2.135693 321  
#> [13] 2.092619 298  
#> [14] 2.086669 245  
#> [15] 2.076236 245  
#> [16] 2.072198 511  
#> [17] 2.054629 221  
#> [18] 2.047227 218  
#> [19] 2.041948 335  
#> [20] 2.041158 201  
#> [21] 2.030457 268  
#> [22] 2.021661 273  
#> [23] 2.016242 253  
#> [24] 2.009365 268  
#> [25] 2.004667 215  
#> [26] 2.000411 253  
#> [27] 1.981333 242  
#> [28] 1.979484 295  
#> [29] 1.978801 218  
#> [30] 1.977334 201

Collaborative Filtering

Example 4: Predicting Movie Ratings with MovieLens Data

library(recommenderlab)

# download MovieLens data
ratings <- mlba::MovieLensRatings
movies <- mlba::MovieLensMovies

# convert ratings to rating matrix
idxUserId <- sort(unique(ratings$userId))
idxMovieId <- sort(unique(ratings$movieId))
m <- matrix(NA, nrow=length(idxUserId), ncol=length(idxMovieId),
            dimnames=list(
              user=paste("u", 1:length(idxUserId), sep=''),
              item=movies$title[match(idxMovieId, movies$movieId)]
            ))
for (i in 1:nrow(ratings)) {
  rating <- ratings[i,]
  irow <- match(rating$userId, idxUserId)
  icol <- match(rating$movieId, idxMovieId)
  m[irow, icol] <- rating$rating
}
ratingMatrix <- as(m, "realRatingMatrix")

# UBCF model and prediction
recommender <- Recommender(ratingMatrix[-1], method="UBCF")
pred <- predict(recommender, ratingMatrix[1])
as(pred, 'list')

#> $`0`
#>  [1] "Bossa Nova (2000)"           "Maltese Falcon, The (1941)" 
#>  [3] "Michael Clayton (2007)"      "3-Iron (Bin-jip) (2004)"    
#>  [5] "Battlestar Galactica (2003)" "Enemy at the Gates (2001)"  
#>  [7] "Watchmen (2009)"             "Walk to Remember, A (2002)" 
#>  [9] "Tristan & Isolde (2006)"     "Atonement (2007)"

# IBCF model and prediction
recommender <- Recommender(ratingMatrix[-1], method="IBCF")
pred <- predict(recommender, ratingMatrix[1])
as(pred, 'list')

#> $`0`
#>  [1] "NeverEnding Story III, The (1994)"   "Amateur (1994)"                     
#>  [3] "Mute Witness (1994)"                 "My Crazy Life (Mi vida loca) (1993)"
#>  [5] "Miami Rhapsody (1995)"               "Pushing Hands (Tui shou) (1992)"    
#>  [7] "Suture (1993)"                       "Double Happiness (1994)"            
#>  [9] "Being Human (1993)"                  "Lassie (1994)"

set.seed(1)
e <- evaluationScheme(ratingMatrix, method="split", train=0.9, given=10)

r1 <- Recommender(getData(e, "train"), "UBCF")
r2 <- Recommender(getData(e, "train"), "IBCF")
r3 <- Recommender(getData(e, "train"), "RANDOM")

p1 <- predict(r1, getData(e, "known"), type="ratings")
p2 <- predict(r2, getData(e, "known"), type="ratings")
p3 <- predict(r3, getData(e, "known"), type="ratings")
error <- rbind(
  UBCF = calcPredictionAccuracy(p1, getData(e, "unknown")),
  IBCF = calcPredictionAccuracy(p2, getData(e, "unknown")),
  RANDOM = calcPredictionAccuracy(p3, getData(e, "unknown"))
)
error

#>            RMSE      MSE       MAE
#> UBCF   1.102623 1.215777 0.8499722
#> IBCF   1.262599 1.594156 1.0324675
#> RANDOM 1.771871 3.139526 1.4467703

Machine Learning for Business Analytics

Chapter 15: Association Rules and Collaborative Filtering

Association Rules

The Process of Rule Selection

Lift

Example 2: Rules for Similar Book Purchases

Collaborative Filtering

Example 4: Predicting Movie Ratings with MovieLens Data