INSPECT DATASET

library(arules)
## Loading required package: Matrix
## 
## Attaching package: 'arules'
## The following objects are masked from 'package:base':
## 
##     abbreviate, write
grd <- read.transactions("http://fimi.ua.ac.be/data/retail.dat",
                      format="basket") 

itemFrequencyPlot(grd,support=.1) 

itemFrequencyPlot(grd,support=.2) 
itemFrequencyPlot(grd,support=.3) 

itemFrequencyPlot(grd,support=.5) 

summary(grd)
## transactions as itemMatrix in sparse format with
##  88162 rows (elements/itemsets/transactions) and
##  16470 columns (items) and a density of 0.0006257289 
## 
## most frequent items:
##      39      48      38      32      41 (Other) 
##   50675   42135   15596   15167   14945  770058 
## 
## element (itemset/transaction) length distribution:
## sizes
##    1    2    3    4    5    6    7    8    9   10   11   12   13   14   15 
## 3016 5516 6919 7210 6814 6163 5746 5143 4660 4086 3751 3285 2866 2620 2310 
##   16   17   18   19   20   21   22   23   24   25   26   27   28   29   30 
## 2115 1874 1645 1469 1290 1205  981  887  819  684  586  582  472  480  355 
##   31   32   33   34   35   36   37   38   39   40   41   42   43   44   45 
##  310  303  272  234  194  136  153  123  115  112   76   66   71   60   50 
##   46   47   48   49   50   51   52   53   54   55   56   57   58   59   60 
##   44   37   37   33   22   24   21   21   10   11   10    9   11    4    9 
##   61   62   63   64   65   66   67   68   71   73   74   76 
##    7    4    5    2    2    5    3    3    1    1    1    1 
## 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    1.00    4.00    8.00   10.31   14.00   76.00 
## 
## includes extended item information - examples:
##   labels
## 1      0
## 2      1
## 3     10

RULES AND ANALYSIS

grdar<-apriori(grd, parameter = list(supp = .05, conf=.5))
## Apriori
## 
## Parameter specification:
##  confidence minval smax arem  aval originalSupport maxtime support minlen
##         0.5    0.1    1 none FALSE            TRUE       5    0.05      1
##  maxlen target   ext
##      10  rules FALSE
## 
## Algorithmic control:
##  filter tree heap memopt load sort verbose
##     0.1 TRUE TRUE  FALSE TRUE    2    TRUE
## 
## Absolute minimum support count: 4408 
## 
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[16470 item(s), 88162 transaction(s)] done [0.16s].
## sorting and recoding items ... [6 item(s)] done [0.00s].
## creating transaction tree ... done [0.03s].
## checking subsets of size 1 2 3 done [0.00s].
## writing ... [15 rule(s)] done [0.00s].
## creating S4 object  ... done [0.01s].
inspect(grdar)
##      lhs        rhs  support    confidence lift     
## [1]  {}      => {39} 0.57479413 0.5747941  1.0000000
## [2]  {38}    => {48} 0.09010685 0.5093614  1.0657723
## [3]  {38}    => {39} 0.11734080 0.6633111  1.1539977
## [4]  {32}    => {48} 0.09112770 0.5297026  1.1083338
## [5]  {32}    => {39} 0.09590300 0.5574603  0.9698434
## [6]  {41}    => {48} 0.10228897 0.6034125  1.2625621
## [7]  {41}    => {39} 0.12946621 0.7637337  1.3287082
## [8]  {48}    => {39} 0.33055058 0.6916340  1.2032726
## [9]  {39}    => {48} 0.33055058 0.5750765  1.2032726
## [10] {38,48} => {39} 0.06921349 0.7681269  1.3363513
## [11] {38,39} => {48} 0.06921349 0.5898502  1.2341847
## [12] {32,48} => {39} 0.06127356 0.6723923  1.1697968
## [13] {32,39} => {48} 0.06127356 0.6389119  1.3368399
## [14] {41,48} => {39} 0.08355074 0.8168108  1.4210493
## [15] {39,41} => {48} 0.08355074 0.6453478  1.3503063
grdar_st_su <- sort(grdar,decreasing = T, by = "support")
grdar_st_cf <- sort(grdar,decreasing = T, by = "confidence")
grdar_st_lf <- sort(grdar,decreasing = T, by = "lift")
grdar_st_all<- sort(grdar,decreasing = T, by = c("support", "confidence", "lift"))

inspect(grdar_st_su)
##      lhs        rhs  support    confidence lift     
## [1]  {}      => {39} 0.57479413 0.5747941  1.0000000
## [2]  {48}    => {39} 0.33055058 0.6916340  1.2032726
## [3]  {39}    => {48} 0.33055058 0.5750765  1.2032726
## [4]  {41}    => {39} 0.12946621 0.7637337  1.3287082
## [5]  {38}    => {39} 0.11734080 0.6633111  1.1539977
## [6]  {41}    => {48} 0.10228897 0.6034125  1.2625621
## [7]  {32}    => {39} 0.09590300 0.5574603  0.9698434
## [8]  {32}    => {48} 0.09112770 0.5297026  1.1083338
## [9]  {38}    => {48} 0.09010685 0.5093614  1.0657723
## [10] {41,48} => {39} 0.08355074 0.8168108  1.4210493
## [11] {39,41} => {48} 0.08355074 0.6453478  1.3503063
## [12] {38,48} => {39} 0.06921349 0.7681269  1.3363513
## [13] {38,39} => {48} 0.06921349 0.5898502  1.2341847
## [14] {32,48} => {39} 0.06127356 0.6723923  1.1697968
## [15] {32,39} => {48} 0.06127356 0.6389119  1.3368399
inspect(grdar_st_cf)
##      lhs        rhs  support    confidence lift     
## [1]  {41,48} => {39} 0.08355074 0.8168108  1.4210493
## [2]  {38,48} => {39} 0.06921349 0.7681269  1.3363513
## [3]  {41}    => {39} 0.12946621 0.7637337  1.3287082
## [4]  {48}    => {39} 0.33055058 0.6916340  1.2032726
## [5]  {32,48} => {39} 0.06127356 0.6723923  1.1697968
## [6]  {38}    => {39} 0.11734080 0.6633111  1.1539977
## [7]  {39,41} => {48} 0.08355074 0.6453478  1.3503063
## [8]  {32,39} => {48} 0.06127356 0.6389119  1.3368399
## [9]  {41}    => {48} 0.10228897 0.6034125  1.2625621
## [10] {38,39} => {48} 0.06921349 0.5898502  1.2341847
## [11] {39}    => {48} 0.33055058 0.5750765  1.2032726
## [12] {}      => {39} 0.57479413 0.5747941  1.0000000
## [13] {32}    => {39} 0.09590300 0.5574603  0.9698434
## [14] {32}    => {48} 0.09112770 0.5297026  1.1083338
## [15] {38}    => {48} 0.09010685 0.5093614  1.0657723
inspect(grdar_st_lf)
##      lhs        rhs  support    confidence lift     
## [1]  {41,48} => {39} 0.08355074 0.8168108  1.4210493
## [2]  {39,41} => {48} 0.08355074 0.6453478  1.3503063
## [3]  {32,39} => {48} 0.06127356 0.6389119  1.3368399
## [4]  {38,48} => {39} 0.06921349 0.7681269  1.3363513
## [5]  {41}    => {39} 0.12946621 0.7637337  1.3287082
## [6]  {41}    => {48} 0.10228897 0.6034125  1.2625621
## [7]  {38,39} => {48} 0.06921349 0.5898502  1.2341847
## [8]  {39}    => {48} 0.33055058 0.5750765  1.2032726
## [9]  {48}    => {39} 0.33055058 0.6916340  1.2032726
## [10] {32,48} => {39} 0.06127356 0.6723923  1.1697968
## [11] {38}    => {39} 0.11734080 0.6633111  1.1539977
## [12] {32}    => {48} 0.09112770 0.5297026  1.1083338
## [13] {38}    => {48} 0.09010685 0.5093614  1.0657723
## [14] {}      => {39} 0.57479413 0.5747941  1.0000000
## [15] {32}    => {39} 0.09590300 0.5574603  0.9698434
inspect(grdar_st_all)
##      lhs        rhs  support    confidence lift     
## [1]  {}      => {39} 0.57479413 0.5747941  1.0000000
## [2]  {48}    => {39} 0.33055058 0.6916340  1.2032726
## [3]  {39}    => {48} 0.33055058 0.5750765  1.2032726
## [4]  {41}    => {39} 0.12946621 0.7637337  1.3287082
## [5]  {38}    => {39} 0.11734080 0.6633111  1.1539977
## [6]  {41}    => {48} 0.10228897 0.6034125  1.2625621
## [7]  {32}    => {39} 0.09590300 0.5574603  0.9698434
## [8]  {32}    => {48} 0.09112770 0.5297026  1.1083338
## [9]  {38}    => {48} 0.09010685 0.5093614  1.0657723
## [10] {41,48} => {39} 0.08355074 0.8168108  1.4210493
## [11] {39,41} => {48} 0.08355074 0.6453478  1.3503063
## [12] {38,48} => {39} 0.06921349 0.7681269  1.3363513
## [13] {38,39} => {48} 0.06921349 0.5898502  1.2341847
## [14] {32,48} => {39} 0.06127356 0.6723923  1.1697968
## [15] {32,39} => {48} 0.06127356 0.6389119  1.3368399

The biggest support happens when there’s no lhs, and only with a rhs of 39 - which means the probability of 39 happens without any precedents is 0.57

the biggest confidence is 0.81, happening when 41,48 are in the basket then the probability of buying 39 is 0.81 as the support is not big only 0.08 and ranks 10th, there’s a big lift (the biggest one) so this rule is not happening often, but once happening, we are more confident to predict 39 will happen at a confidence of 0.81

If sort by support, confidence and lift, which shows with the bigger support, which rule will happen more possibily. It shows when buy 48, which is a popular product with a support of 0.33, people will by 39 at confidence of 0.69, which brings a high list of 1.2

Do Next

  1. the hypothesis I want to test is - There will be a high possibility to buy 39 if there are 41 and 48 in basket

  2. It will be interesting if we have sales data -> which can help us make a more financially reasonable result

  3. Predict the possibility of buying 39 if there is 48 in the basket

Association Analysis in Application