library(arules)
## Warning: package 'arules' was built under R version 4.0.5
## Loading required package: Matrix
## 
## Attaching package: 'arules'
## The following objects are masked from 'package:base':
## 
##     abbreviate, write
library(arulesViz)
## Warning: package 'arulesViz' was built under R version 4.0.5
Groceries <- read.transactions("C:\\Users\\tariqm\\Documents\\R\\Datasets\\groceries.csv", sep = ",")
summary(Groceries)
## transactions as itemMatrix in sparse format with
##  9835 rows (elements/itemsets/transactions) and
##  169 columns (items) and a density of 0.02609146 
## 
## most frequent items:
##       whole milk other vegetables       rolls/buns             soda 
##             2513             1903             1809             1715 
##           yogurt          (Other) 
##             1372            34055 
## 
## element (itemset/transaction) length distribution:
## sizes
##    1    2    3    4    5    6    7    8    9   10   11   12   13   14   15   16 
## 2159 1643 1299 1005  855  645  545  438  350  246  182  117   78   77   55   46 
##   17   18   19   20   21   22   23   24   26   27   28   29   32 
##   29   14   14    9   11    4    6    1    1    1    1    3    1 
## 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.000   2.000   3.000   4.409   6.000  32.000 
## 
## includes extended item information - examples:
##             labels
## 1 abrasive cleaner
## 2 artif. sweetener
## 3   baby cosmetics
rules <- apriori(Groceries, parameter = list(supp = 0.001, conf = .8, maxlen = 21))
## Apriori
## 
## Parameter specification:
##  confidence minval smax arem  aval originalSupport maxtime support minlen
##         0.8    0.1    1 none FALSE            TRUE       5   0.001      1
##  maxlen target  ext
##      21  rules TRUE
## 
## Algorithmic control:
##  filter tree heap memopt load sort verbose
##     0.1 TRUE TRUE  FALSE TRUE    2    TRUE
## 
## Absolute minimum support count: 9 
## 
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[169 item(s), 9835 transaction(s)] done [0.01s].
## sorting and recoding items ... [157 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4 5 6 done [0.02s].
## writing ... [410 rule(s)] done [0.00s].
## creating S4 object  ... done [0.00s].
summary(rules)
## set of 410 rules
## 
## rule length distribution (lhs + rhs):sizes
##   3   4   5   6 
##  29 229 140  12 
## 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   3.000   4.000   4.000   4.329   5.000   6.000 
## 
## summary of quality measures:
##     support           confidence        coverage             lift       
##  Min.   :0.001017   Min.   :0.8000   Min.   :0.001017   Min.   : 3.131  
##  1st Qu.:0.001017   1st Qu.:0.8333   1st Qu.:0.001220   1st Qu.: 3.312  
##  Median :0.001220   Median :0.8462   Median :0.001322   Median : 3.588  
##  Mean   :0.001247   Mean   :0.8663   Mean   :0.001449   Mean   : 3.951  
##  3rd Qu.:0.001322   3rd Qu.:0.9091   3rd Qu.:0.001627   3rd Qu.: 4.341  
##  Max.   :0.003152   Max.   :1.0000   Max.   :0.003559   Max.   :11.235  
##      count      
##  Min.   :10.00  
##  1st Qu.:10.00  
##  Median :12.00  
##  Mean   :12.27  
##  3rd Qu.:13.00  
##  Max.   :31.00  
## 
## mining info:
##       data ntransactions support confidence
##  Groceries          9835   0.001        0.8
plot(rules)
## To reduce overplotting, jitter is added! Use jitter = 0 to prevent jitter.

plot(rules, method="two-key plot", jitter = 100)

plot(rules, method="grouped")

# long loading time # plot(rules, method="paracoord")

set.seed(1234)

plot(rules, method = "graph")
## Warning: Too many rules supplied. Only plotting the best 100 rules using lift
## (change control parameter max if needed)

plot(rules, method = "graph", interactive=TRUE, shading = NA)
## Warning in plot.rules(rules, method = "graph", interactive = TRUE, shading =
## NA): The parameter interactive is deprecated. Use engine='interactive' instead.
## Warning: plot: Too many rules supplied. Only plotting the best 100 rules using
## 'support' (change control parameter max if needed)
plot(rules, method = "graph", engine = "htmlwidget")
## Warning: Too many rules supplied. Only plotting the best 100 rules using lift
## (change control parameter max if needed)
quality(rules)<-round(quality(rules),digits=3)

redundant <- is.redundant(rules, measure="confidence")
which(redundant)
##  [1]  54 119 263 268 282 288 300 302 310 346 351 355 399 400 402 403 404 405
rules <- rules[!redundant]

rules <-sort(rules, by="lift", decreasing=TRUE)

summary(rules)
## set of 392 rules
## 
## rule length distribution (lhs + rhs):sizes
##   3   4   5   6 
##  29 227 130   6 
## 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   3.000   4.000   4.000   4.288   5.000   6.000 
## 
## summary of quality measures:
##     support           confidence        coverage             lift       
##  Min.   :0.001000   Min.   :0.8000   Min.   :0.001000   Min.   : 3.131  
##  1st Qu.:0.001000   1st Qu.:0.8330   1st Qu.:0.001000   1st Qu.: 3.312  
##  Median :0.001000   Median :0.8460   Median :0.001000   Median : 3.588  
##  Mean   :0.001158   Mean   :0.8666   Mean   :0.001408   Mean   : 3.959  
##  3rd Qu.:0.001000   3rd Qu.:0.9090   3rd Qu.:0.002000   3rd Qu.: 4.357  
##  Max.   :0.003000   Max.   :1.0000   Max.   :0.004000   Max.   :11.235  
##      count      
##  Min.   :10.00  
##  1st Qu.:10.00  
##  Median :12.00  
##  Mean   :12.33  
##  3rd Qu.:13.00  
##  Max.   :31.00  
## 
## mining info:
##       data ntransactions support confidence
##  Groceries          9835   0.001        0.8
inspect(rules[1:10])
##      lhs                        rhs               support confidence coverage   lift count
## [1]  {liquor,                                                                             
##       red/blush wine}        => {bottled beer}      0.002      0.905    0.002 11.235    19
## [2]  {citrus fruit,                                                                       
##       fruit/vegetable juice,                                                              
##       other vegetables,                                                                   
##       soda}                  => {root vegetables}   0.001      0.909    0.001  8.340    10
## [3]  {oil,                                                                                
##       other vegetables,                                                                   
##       tropical fruit,                                                                     
##       whole milk,                                                                         
##       yogurt}                => {root vegetables}   0.001      0.909    0.001  8.340    10
## [4]  {citrus fruit,                                                                       
##       fruit/vegetable juice,                                                              
##       grapes}                => {tropical fruit}    0.001      0.846    0.001  8.064    11
## [5]  {other vegetables,                                                                   
##       rice,                                                                               
##       whole milk,                                                                         
##       yogurt}                => {root vegetables}   0.001      0.867    0.002  7.951    13
## [6]  {oil,                                                                                
##       other vegetables,                                                                   
##       tropical fruit,                                                                     
##       whole milk}            => {root vegetables}   0.001      0.867    0.002  7.951    13
## [7]  {ham,                                                                                
##       other vegetables,                                                                   
##       pip fruit,                                                                          
##       yogurt}                => {tropical fruit}    0.001      0.833    0.001  7.942    10
## [8]  {beef,                                                                               
##       citrus fruit,                                                                       
##       other vegetables,                                                                   
##       tropical fruit}        => {root vegetables}   0.001      0.833    0.001  7.645    10
## [9]  {butter,                                                                             
##       cream cheese,                                                                       
##       root vegetables}       => {yogurt}            0.001      0.909    0.001  6.517    10
## [10] {butter,                                                                             
##       sliced cheese,                                                                      
##       tropical fruit,                                                                     
##       whole milk}            => {yogurt}            0.001      0.909    0.001  6.517    10
rules.top10 <- rules[1:10]

plot(rules.top10)
## To reduce overplotting, jitter is added! Use jitter = 0 to prevent jitter.

plot(rules.top10, method="two-key plot", jitter = 100)

plot(rules.top10, method="grouped")

plot(rules.top10, method="paracoord")

set.seed(1234)

plot(rules.top10, method = "graph")

plot(rules.top10, method = "graph", interactive=TRUE, shading=NA)
## Warning in plot.rules(rules.top10, method = "graph", interactive = TRUE, : The
## parameter interactive is deprecated. Use engine='interactive' instead.
plot(rules.top10, method = "graph", engine = "htmlwidget")
rules.wholemilk <- apriori(Groceries, parameter = list(supp = 0.001, conf = .8, maxlen = 21),
                           appearance = list(default="lhs",rhs="whole milk"),
                           control = list(verbose=F))
summary(rules.wholemilk)
## set of 252 rules
## 
## rule length distribution (lhs + rhs):sizes
##   3   4   5   6 
##  18 146  81   7 
## 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   3.000   4.000   4.000   4.306   5.000   6.000 
## 
## summary of quality measures:
##     support           confidence        coverage             lift      
##  Min.   :0.001017   Min.   :0.8000   Min.   :0.001017   Min.   :3.131  
##  1st Qu.:0.001017   1st Qu.:0.8333   1st Qu.:0.001220   1st Qu.:3.261  
##  Median :0.001220   Median :0.8481   Median :0.001322   Median :3.319  
##  Mean   :0.001256   Mean   :0.8689   Mean   :0.001457   Mean   :3.401  
##  3rd Qu.:0.001322   3rd Qu.:0.9091   3rd Qu.:0.001627   3rd Qu.:3.558  
##  Max.   :0.002847   Max.   :1.0000   Max.   :0.003457   Max.   :3.914  
##      count      
##  Min.   :10.00  
##  1st Qu.:10.00  
##  Median :12.00  
##  Mean   :12.36  
##  3rd Qu.:13.00  
##  Max.   :28.00  
## 
## mining info:
##       data ntransactions support confidence
##  Groceries          9835   0.001        0.8
quality(rules.wholemilk)<-round(quality(rules.wholemilk),digits=3)
redundant <- is.redundant(rules.wholemilk, measure="confidence")
which(redundant)
##  [1]  33  66 167 170 179 186 216 246 247 248 249
rules.wholemilk <- rules.wholemilk[!redundant]
rules.wholemilk <-sort(rules.wholemilk, by="lift", decreasing=TRUE)
summary(rules.wholemilk)
## set of 241 rules
## 
## rule length distribution (lhs + rhs):sizes
##   3   4   5   6 
##  18 144  76   3 
## 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   3.000   4.000   4.000   4.266   5.000   6.000 
## 
## summary of quality measures:
##     support           confidence        coverage             lift      
##  Min.   :0.001000   Min.   :0.8000   Min.   :0.001000   Min.   :3.131  
##  1st Qu.:0.001000   1st Qu.:0.8330   1st Qu.:0.001000   1st Qu.:3.261  
##  Median :0.001000   Median :0.8570   Median :0.001000   Median :3.355  
##  Mean   :0.001166   Mean   :0.8694   Mean   :0.001402   Mean   :3.403  
##  3rd Qu.:0.001000   3rd Qu.:0.9090   3rd Qu.:0.002000   3rd Qu.:3.558  
##  Max.   :0.003000   Max.   :1.0000   Max.   :0.003000   Max.   :3.914  
##      count     
##  Min.   :10.0  
##  1st Qu.:10.0  
##  Median :12.0  
##  Mean   :12.4  
##  3rd Qu.:13.0  
##  Max.   :28.0  
## 
## mining info:
##       data ntransactions support confidence
##  Groceries          9835   0.001        0.8
inspect(rules.wholemilk[1:10])
##      lhs                     rhs          support confidence coverage  lift count
## [1]  {rice,                                                                      
##       sugar}              => {whole milk}   0.001          1    0.001 3.914    12
## [2]  {canned fish,                                                               
##       hygiene articles}   => {whole milk}   0.001          1    0.001 3.914    11
## [3]  {butter,                                                                    
##       rice,                                                                      
##       root vegetables}    => {whole milk}   0.001          1    0.001 3.914    10
## [4]  {flour,                                                                     
##       root vegetables,                                                           
##       whipped/sour cream} => {whole milk}   0.002          1    0.002 3.914    17
## [5]  {butter,                                                                    
##       domestic eggs,                                                             
##       soft cheese}        => {whole milk}   0.001          1    0.001 3.914    10
## [6]  {butter,                                                                    
##       hygiene articles,                                                          
##       pip fruit}          => {whole milk}   0.001          1    0.001 3.914    10
## [7]  {hygiene articles,                                                          
##       root vegetables,                                                           
##       whipped/sour cream} => {whole milk}   0.001          1    0.001 3.914    10
## [8]  {hygiene articles,                                                          
##       pip fruit,                                                                 
##       root vegetables}    => {whole milk}   0.001          1    0.001 3.914    10
## [9]  {cream cheese,                                                              
##       domestic eggs,                                                             
##       sugar}              => {whole milk}   0.001          1    0.001 3.914    11
## [10] {curd,                                                                      
##       domestic eggs,                                                             
##       sugar}              => {whole milk}   0.001          1    0.001 3.914    10
rules.wholemilk.top10 <- rules.wholemilk[1:10]

plot(rules.wholemilk.top10, method = "graph", engine = "htmlwidget")
rules.wholemilk2 <- apriori(Groceries, parameter = list(supp = 0.001, conf = .1, maxlen = 21),
                            appearance = list(default="rhs",lhs="whole milk"),
                            control = list(verbose=F))
summary(rules.wholemilk2)
## set of 24 rules
## 
## rule length distribution (lhs + rhs):sizes
##  1  2 
##  7 17 
## 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.000   1.000   2.000   1.708   2.000   2.000 
## 
## summary of quality measures:
##     support          confidence        coverage           lift       
##  Min.   :0.02613   Min.   :0.1023   Min.   :0.2555   Min.   :0.8991  
##  1st Qu.:0.03007   1st Qu.:0.1101   1st Qu.:0.2555   1st Qu.:1.0000  
##  Median :0.04118   Median :0.1281   Median :0.2555   Median :1.3916  
##  Mean   :0.06927   Mean   :0.1478   Mean   :0.4727   Mean   :1.3628  
##  3rd Qu.:0.10595   3rd Qu.:0.1768   3rd Qu.:1.0000   3rd Qu.:1.5732  
##  Max.   :0.19349   Max.   :0.2929   Max.   :1.0000   Max.   :1.9461  
##      count       
##  Min.   : 257.0  
##  1st Qu.: 295.8  
##  Median : 405.0  
##  Mean   : 681.3  
##  3rd Qu.:1042.0  
##  Max.   :1903.0  
## 
## mining info:
##       data ntransactions support confidence
##  Groceries          9835   0.001        0.1
quality(rules.wholemilk2)<-round(quality(rules.wholemilk2),digits=3)
redundant <- is.redundant(rules.wholemilk2, measure="confidence")
which(redundant)
## [1] 21
rules.wholemilk2 <- rules.wholemilk2[!redundant]
rules.wholemilk2 <-sort(rules.wholemilk2, by="lift", decreasing=TRUE)
summary(rules.wholemilk2)
## set of 23 rules
## 
## rule length distribution (lhs + rhs):sizes
##  1  2 
##  7 16 
## 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.000   1.000   2.000   1.696   2.000   2.000 
## 
## summary of quality measures:
##     support          confidence        coverage           lift      
##  Min.   :0.02600   Min.   :0.1020   Min.   :0.2560   Min.   :1.000  
##  1st Qu.:0.03000   1st Qu.:0.1100   1st Qu.:0.2560   1st Qu.:1.000  
##  Median :0.04200   Median :0.1260   Median :0.2560   Median :1.442  
##  Mean   :0.07057   Mean   :0.1474   Mean   :0.4824   Mean   :1.383  
##  3rd Qu.:0.10700   3rd Qu.:0.1790   3rd Qu.:1.0000   3rd Qu.:1.575  
##  Max.   :0.19300   Max.   :0.2930   Max.   :1.0000   Max.   :1.946  
##      count       
##  Min.   : 257.0  
##  1st Qu.: 295.5  
##  Median : 416.0  
##  Mean   : 693.8  
##  3rd Qu.:1052.0  
##  Max.   :1903.0  
## 
## mining info:
##       data ntransactions support confidence
##  Groceries          9835   0.001        0.1
inspect(rules.wholemilk2[1:10])
##      lhs             rhs                  support confidence coverage lift 
## [1]  {whole milk} => {butter}             0.028   0.108      0.256    1.946
## [2]  {whole milk} => {curd}               0.026   0.102      0.256    1.919
## [3]  {whole milk} => {domestic eggs}      0.030   0.117      0.256    1.850
## [4]  {whole milk} => {whipped/sour cream} 0.032   0.126      0.256    1.760
## [5]  {whole milk} => {root vegetables}    0.049   0.191      0.256    1.756
## [6]  {whole milk} => {tropical fruit}     0.042   0.166      0.256    1.578
## [7]  {whole milk} => {yogurt}             0.056   0.219      0.256    1.572
## [8]  {whole milk} => {pip fruit}          0.030   0.118      0.256    1.557
## [9]  {whole milk} => {other vegetables}   0.075   0.293      0.256    1.514
## [10] {whole milk} => {pastry}             0.033   0.130      0.256    1.463
##      count
## [1]  271  
## [2]  257  
## [3]  295  
## [4]  317  
## [5]  481  
## [6]  416  
## [7]  551  
## [8]  296  
## [9]  736  
## [10] 327
rules.wholemilk2.top10 <- rules.wholemilk2[1:10]

plot(rules.wholemilk2.top10, method = "graph", engine = "htmlwidget")