Introduction

This project is to perform apirior alogoritm in the dataset.For more information about these topics, please check in the following links:

First it’s important to define the Apriori algorithm, including some statistical concepts (support, confidence, lift and conviction) to select interesting rules.

If you want more information about these measures, please check here.

Loading Data

library(arules)
library(plotly)
library(arulesViz)
library(tidyverse)
library(ggplot2)
library(dplyr)
library(knitr)
library(gridExtra)

DATASET

dataset <- read.csv('Market_Basket_Optimisation.csv' ,header = F)
dataset <- read.transactions('Market_Basket_Optimisation.csv',sep = ',',rm.duplicates = T)
## distribution of transactions with duplicates:
## 1 
## 5

DISCRIPTION OF THE DATASET

Transcrpit

# Transaction object
dataset
## transactions in sparse format with
##  7501 transactions (rows) and
##  119 items (columns)

SUMMARY

# Summary
summary(dataset)
## transactions as itemMatrix in sparse format with
##  7501 rows (elements/itemsets/transactions) and
##  119 columns (items) and a density of 0.03288973 
## 
## most frequent items:
## mineral water          eggs     spaghetti  french fries     chocolate 
##          1788          1348          1306          1282          1229 
##       (Other) 
##         22405 
## 
## element (itemset/transaction) length distribution:
## sizes
##    1    2    3    4    5    6    7    8    9   10   11   12   13   14   15   16 
## 1754 1358 1044  816  667  493  391  324  259  139  102   67   40   22   17    4 
##   18   19   20 
##    1    2    1 
## 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.000   2.000   3.000   3.914   5.000  20.000 
## 
## includes extended item information - examples:
##              labels
## 1           almonds
## 2 antioxydant juice
## 3         asparagus

STRUCTURE

# Structure
str(dataset)
## Formal class 'transactions' [package "arules"] with 3 slots
##   ..@ data       :Formal class 'ngCMatrix' [package "Matrix"] with 5 slots
##   .. .. ..@ i       : int [1:29358] 0 1 3 32 38 47 52 53 59 64 ...
##   .. .. ..@ p       : int [1:7502] 0 20 23 24 26 31 32 34 37 40 ...
##   .. .. ..@ Dim     : int [1:2] 119 7501
##   .. .. ..@ Dimnames:List of 2
##   .. .. .. ..$ : NULL
##   .. .. .. ..$ : NULL
##   .. .. ..@ factors : list()
##   ..@ itemInfo   :'data.frame':  119 obs. of  1 variable:
##   .. ..$ labels: chr [1:119] "almonds" "antioxydant juice" "asparagus" "avocado" ...
##   ..@ itemsetInfo:'data.frame':  0 obs. of  0 variables

GLIMPSE

# Glimpse
glimpse(dataset)
## Formal class 'transactions' [package "arules"] with 3 slots
##   ..@ data       :Formal class 'ngCMatrix' [package "Matrix"] with 5 slots
##   ..@ itemInfo   :'data.frame':  119 obs. of  1 variable:
##   .. ..$ labels: chr [1:119] "almonds" "antioxydant juice" "asparagus" "avocado" ...
##   ..@ itemsetInfo:'data.frame':  0 obs. of  0 variables

Data analysis

# Absolute Item Frequency Plot
itemFrequencyPlot(dataset, topN=15, type="absolute", col="wheat2",xlab="Item name", 
                  ylab="Frequency (absolute)", main="Absolute Item Frequency Plot")

# Relative Item Frequency Plot
itemFrequencyPlot(dataset, topN=15, type="relative", col="lightcyan2", xlab="Item name", 
                  ylab="Frequency (relative)", main="Relative Item Frequency Plot")

# Apriori algorithm ## Choice of support and confidence In the following graphs we can see the number of rules generated with a support level of 10%, 5%, 1% and 0.5%.

rules <- apriori(data = dataset, parameter = list(support = 0.003,confidence = 0.2))
## Apriori
## 
## Parameter specification:
##  confidence minval smax arem  aval originalSupport maxtime support minlen
##         0.2    0.1    1 none FALSE            TRUE       5   0.003      1
##  maxlen target  ext
##      10  rules TRUE
## 
## Algorithmic control:
##  filter tree heap memopt load sort verbose
##     0.1 TRUE TRUE  FALSE TRUE    2    TRUE
## 
## Absolute minimum support count: 22 
## 
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[119 item(s), 7501 transaction(s)] done [0.00s].
## sorting and recoding items ... [115 item(s)] done [0.00s].
## creating transaction tree ... done [0.01s].
## checking subsets of size 1 2 3 4 5 done [0.01s].
## writing ... [1348 rule(s)] done [0.00s].
## creating S4 object  ... done [0.00s].
# Support and confidence values
supportLevels <- c(0.1, 0.05, 0.01, 0.005)
confidenceLevels <- c(0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1)
# Empty integers 
rules_sup10 <- integer(length=9)
rules_sup5 <- integer(length=9)
rules_sup1 <- integer(length=9)
rules_sup0.5 <- integer(length=9)
# Subplot
grid.arrange(plot1, plot2, plot3, plot4, ncol=2)

Output

inspect(sort(rules , by = 'lift')[1:10])
##      lhs                                       rhs             support    
## [1]  {mineral water,whole wheat pasta}      => {olive oil}     0.003866151
## [2]  {frozen vegetables,milk,mineral water} => {soup}          0.003066258
## [3]  {fromage blanc}                        => {honey}         0.003332889
## [4]  {spaghetti,tomato sauce}               => {ground beef}   0.003066258
## [5]  {light cream}                          => {chicken}       0.004532729
## [6]  {pasta}                                => {escalope}      0.005865885
## [7]  {french fries,herb & pepper}           => {ground beef}   0.003199573
## [8]  {cereals,spaghetti}                    => {ground beef}   0.003066258
## [9]  {frozen vegetables,mineral water,soup} => {milk}          0.003066258
## [10] {french fries,ground beef}             => {herb & pepper} 0.003199573
##      confidence coverage    lift     count
## [1]  0.4027778  0.009598720 6.115863 29   
## [2]  0.2771084  0.011065191 5.484407 23   
## [3]  0.2450980  0.013598187 5.164271 25   
## [4]  0.4893617  0.006265831 4.980600 23   
## [5]  0.2905983  0.015597920 4.843951 34   
## [6]  0.3728814  0.015731236 4.700812 44   
## [7]  0.4615385  0.006932409 4.697422 24   
## [8]  0.4600000  0.006665778 4.681764 23   
## [9]  0.6052632  0.005065991 4.670863 23   
## [10] 0.2307692  0.013864818 4.665768 24

Data Visualize (rules)

# Scatter plot
plotly_arules(rules)

### Head of rules

head(quality(rules))
##       support confidence    coverage     lift count
## 1 0.238368218  0.2383682 1.000000000 1.000000  1788
## 2 0.003732836  0.4827586 0.007732302 2.772720    28
## 3 0.003199573  0.4137931 0.007732302 1.735941    24
## 4 0.003066258  0.3382353 0.009065458 2.560144    23
## 5 0.003066258  0.3239437 0.009465405 1.860568    23
## 6 0.003466205  0.3661972 0.009465405 1.536267    26
# Scatter
plot(rules,interestMeasure=c("support","lift"),shading = "confidence",jitter=0)
## Warning: Unknown control parameters: interestMeasure
## Available control parameters (with default values):
## main  =  Scatter plot for 1348 rules
## engine    =  default
## pch   =  19
## cex   =  0.5
## xlim  =  NULL
## ylim  =  NULL
## zlim  =  NULL
## alpha     =  NULL
## col   =  c("#EE0000FF", "#EE0303FF", "#EE0606FF", "#EE0909FF", "#EE0C0CFF", "#EE0F0FFF", "#EE1212FF", "#EE1515FF", "#EE1818FF", "#EE1B1BFF", "#EE1E1EFF", "#EE2222FF", "#EE2525FF", "#EE2828FF", "#EE2B2BFF", "#EE2E2EFF", "#EE3131FF", "#EE3434FF", "#EE3737FF", "#EE3A3AFF", "#EE3D3DFF", "#EE4040FF", "#EE4444FF", "#EE4747FF", "#EE4A4AFF", "#EE4D4DFF", "#EE5050FF", "#EE5353FF", "#EE5656FF", "#EE5959FF", "#EE5C5CFF", "#EE5F5FFF", "#EE6262FF", "#EE6666FF", "#EE6969FF", "#EE6C6CFF", "#EE6F6FFF", "#EE7272FF", "#EE7575FF",  "#EE7878FF", "#EE7B7BFF", "#EE7E7EFF", "#EE8181FF", "#EE8484FF", "#EE8888FF", "#EE8B8BFF", "#EE8E8EFF", "#EE9191FF", "#EE9494FF", "#EE9797FF", "#EE9999FF", "#EE9B9BFF", "#EE9D9DFF", "#EE9F9FFF", "#EEA0A0FF", "#EEA2A2FF", "#EEA4A4FF", "#EEA5A5FF", "#EEA7A7FF", "#EEA9A9FF", "#EEABABFF", "#EEACACFF", "#EEAEAEFF", "#EEB0B0FF", "#EEB1B1FF", "#EEB3B3FF", "#EEB5B5FF", "#EEB7B7FF", "#EEB8B8FF", "#EEBABAFF", "#EEBCBCFF", "#EEBDBDFF", "#EEBFBFFF", "#EEC1C1FF", "#EEC3C3FF", "#EEC4C4FF", "#EEC6C6FF", "#EEC8C8FF",  "#EEC9C9FF", "#EECBCBFF", "#EECDCDFF", "#EECFCFFF", "#EED0D0FF", "#EED2D2FF", "#EED4D4FF", "#EED5D5FF", "#EED7D7FF", "#EED9D9FF", "#EEDBDBFF", "#EEDCDCFF", "#EEDEDEFF", "#EEE0E0FF", "#EEE1E1FF", "#EEE3E3FF", "#EEE5E5FF", "#EEE7E7FF", "#EEE8E8FF", "#EEEAEAFF", "#EEECECFF", "#EEEEEEFF")
## newpage   =  TRUE
## jitter    =  NA
## verbose   =  FALSE

subrules2 <- head(rules,n=10,by="lift")

Data visual using arulesViz

Data Visuvalization

Graph

# Graph (circular layout)
plot(subrules2, method="graph", control=list(layout=igraph::in_circle()))

Parallel coordinates plot

# Parallel coordinates plot
plot(subrules2, method="paracoord", control=list(reorder=TRUE))

Grouped matrix plot

# Grouped matrix plot
plot(subrules2, method="grouped")

Scatter plot

# Scatter plot
plot(subrules2, measure=c("support", "lift"), shading="confidence", jitter=0)

OUTPUT

#Output
inspect(sort(rules ,by='lift')[1:50])
##      lhs                       rhs                     support confidence    coverage     lift count
## [1]  {mineral water,                                                                                
##       whole wheat pasta}    => {olive oil}         0.003866151  0.4027778 0.009598720 6.115863    29
## [2]  {frozen vegetables,                                                                            
##       milk,                                                                                         
##       mineral water}        => {soup}              0.003066258  0.2771084 0.011065191 5.484407    23
## [3]  {fromage blanc}        => {honey}             0.003332889  0.2450980 0.013598187 5.164271    25
## [4]  {spaghetti,                                                                                    
##       tomato sauce}         => {ground beef}       0.003066258  0.4893617 0.006265831 4.980600    23
## [5]  {light cream}          => {chicken}           0.004532729  0.2905983 0.015597920 4.843951    34
## [6]  {pasta}                => {escalope}          0.005865885  0.3728814 0.015731236 4.700812    44
## [7]  {french fries,                                                                                 
##       herb & pepper}        => {ground beef}       0.003199573  0.4615385 0.006932409 4.697422    24
## [8]  {cereals,                                                                                      
##       spaghetti}            => {ground beef}       0.003066258  0.4600000 0.006665778 4.681764    23
## [9]  {frozen vegetables,                                                                            
##       mineral water,                                                                                
##       soup}                 => {milk}              0.003066258  0.6052632 0.005065991 4.670863    23
## [10] {french fries,                                                                                 
##       ground beef}          => {herb & pepper}     0.003199573  0.2307692 0.013864818 4.665768    24
## [11] {chocolate,                                                                                    
##       frozen vegetables,                                                                            
##       mineral water}        => {shrimp}            0.003199573  0.3287671 0.009732036 4.600900    24
## [12] {frozen vegetables,                                                                            
##       milk,                                                                                         
##       mineral water}        => {olive oil}         0.003332889  0.3012048 0.011065191 4.573557    25
## [13] {pasta}                => {shrimp}            0.005065991  0.3220339 0.015731236 4.506672    38
## [14] {chocolate,                                                                                    
##       herb & pepper}        => {ground beef}       0.003999467  0.4411765 0.009065458 4.490183    30
## [15] {chocolate,                                                                                    
##       mineral water,                                                                                
##       shrimp}               => {frozen vegetables} 0.003199573  0.4210526 0.007598987 4.417225    24
## [16] {cake,                                                                                         
##       frozen vegetables}    => {tomatoes}          0.003066258  0.2987013 0.010265298 4.367560    23
## [17] {milk,                                                                                         
##       tomatoes}             => {soup}              0.003066258  0.2190476 0.013998134 4.335293    23
## [18] {eggs,                                                                                         
##       ground beef}          => {herb & pepper}     0.004132782  0.2066667 0.019997334 4.178455    31
## [19] {milk,                                                                                         
##       olive oil}            => {soup}              0.003599520  0.2109375 0.017064391 4.174781    27
## [20] {whole wheat pasta}    => {olive oil}         0.007998933  0.2714932 0.029462738 4.122410    60
## [21] {milk,                                                                                         
##       mineral water,                                                                                
##       olive oil}            => {frozen vegetables} 0.003332889  0.3906250 0.008532196 4.098011    25
## [22] {mineral water,                                                                                
##       shrimp,                                                                                       
##       spaghetti}            => {frozen vegetables} 0.003332889  0.3906250 0.008532196 4.098011    25
## [23] {herb & pepper,                                                                                
##       spaghetti}            => {ground beef}       0.006399147  0.3934426 0.016264498 4.004360    48
## [24] {herb & pepper,                                                                                
##       milk}                 => {ground beef}       0.003599520  0.3913043 0.009198773 3.982597    27
## [25] {herb & pepper,                                                                                
##       mineral water}        => {ground beef}       0.006665778  0.3906250 0.017064391 3.975683    50
## [26] {chocolate,                                                                                    
##       frozen vegetables,                                                                            
##       spaghetti}            => {ground beef}       0.003066258  0.3898305 0.007865618 3.967597    23
## [27] {frozen vegetables,                                                                            
##       mineral water,                                                                                
##       olive oil}            => {milk}              0.003332889  0.5102041 0.006532462 3.937285    25
## [28] {frozen vegetables,                                                                            
##       mineral water,                                                                                
##       spaghetti}            => {shrimp}            0.003332889  0.2777778 0.011998400 3.887334    25
## [29] {cereals,                                                                                      
##       ground beef}          => {spaghetti}         0.003066258  0.6764706 0.004532729 3.885303    23
## [30] {frozen vegetables,                                                                            
##       soup}                 => {milk}              0.003999467  0.5000000 0.007998933 3.858539    30
## [31] {chicken,                                                                                      
##       olive oil}            => {milk}              0.003599520  0.5000000 0.007199040 3.858539    27
## [32] {tomato sauce}         => {ground beef}       0.005332622  0.3773585 0.014131449 3.840659    40
## [33] {cake,                                                                                         
##       tomatoes}             => {frozen vegetables} 0.003066258  0.3650794 0.008398880 3.830014    23
## [34] {mushroom cream sauce} => {escalope}          0.005732569  0.3006993 0.019064125 3.790833    43
## [35] {frozen vegetables,                                                                            
##       milk,                                                                                         
##       spaghetti}            => {ground beef}       0.003066258  0.3709677 0.008265565 3.775616    23
## [36] {milk,                                                                                         
##       mineral water,                                                                                
##       soup}                 => {frozen vegetables} 0.003066258  0.3593750 0.008532196 3.770170    23
## [37] {shrimp,                                                                                       
##       tomatoes}             => {frozen vegetables} 0.003999467  0.3571429 0.011198507 3.746753    30
## [38] {frozen vegetables,                                                                            
##       mineral water,                                                                                
##       spaghetti}            => {tomatoes}          0.003066258  0.2555556 0.011998400 3.736690    23
## [39] {frozen vegetables,                                                                            
##       mineral water,                                                                                
##       spaghetti}            => {ground beef}       0.004399413  0.3666667 0.011998400 3.731841    33
## [40] {chocolate,                                                                                    
##       mineral water,                                                                                
##       spaghetti}            => {olive oil}         0.003866151  0.2436975 0.015864551 3.700354    29
## [41] {chicken,                                                                                      
##       milk}                 => {olive oil}         0.003599520  0.2432432 0.014798027 3.693457    27
## [42] {frozen smoothie,                                                                              
##       mineral water,                                                                                
##       spaghetti}            => {milk}              0.003199573  0.4705882 0.006799093 3.631566    24
## [43] {milk,                                                                                         
##       soup}                 => {olive oil}         0.003599520  0.2368421 0.015197974 3.596260    27
## [44] {ground beef,                                                                                  
##       milk,                                                                                         
##       mineral water}        => {frozen vegetables} 0.003732836  0.3373494 0.011065191 3.539102    28
## [45] {milk,                                                                                         
##       olive oil}            => {chicken}           0.003599520  0.2109375 0.017064391 3.516094    27
## [46] {olive oil,                                                                                    
##       tomatoes}             => {spaghetti}         0.004399413  0.6111111 0.007199040 3.509912    33
## [47] {frozen vegetables,                                                                            
##       shrimp}               => {tomatoes}          0.003999467  0.2400000 0.016664445 3.509240    30
## [48] {spaghetti,                                                                                    
##       whole wheat pasta}    => {milk}              0.003999467  0.4545455 0.008798827 3.507763    30
## [49] {frozen vegetables,                                                                            
##       spaghetti}            => {tomatoes}          0.006665778  0.2392344 0.027862952 3.498046    50
## [50] {chocolate,                                                                                    
##       ground beef,                                                                                  
##       spaghetti}            => {frozen vegetables} 0.003066258  0.3333333 0.009198773 3.496970    23