Association analysis

The aim of this is to find the relationship in the transactions below to optimize supermarket purchases

# Loading the dataset for association

library(arules)
## Loading required package: Matrix
## 
## Attaching package: 'arules'
## The following objects are masked from 'package:base':
## 
##     abbreviate, write
path_trans<-"http://bit.ly/SupermarketDatasetII"

assoc_df<-read.transactions(path_trans,sep=',')
## Warning in asMethod(object): removing duplicated items in transactions
assoc_df
## transactions in sparse format with
##  7501 transactions (rows) and
##  119 items (columns)
# Previewing a quick summary of my dataset to understand the purchases
summary(assoc_df)
## transactions as itemMatrix in sparse format with
##  7501 rows (elements/itemsets/transactions) and
##  119 columns (items) and a density of 0.03288973 
## 
## most frequent items:
## mineral water          eggs     spaghetti  french fries     chocolate 
##          1788          1348          1306          1282          1229 
##       (Other) 
##         22405 
## 
## element (itemset/transaction) length distribution:
## sizes
##    1    2    3    4    5    6    7    8    9   10   11   12   13   14   15   16 
## 1754 1358 1044  816  667  493  391  324  259  139  102   67   40   22   17    4 
##   18   19   20 
##    1    2    1 
## 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.000   2.000   3.000   3.914   5.000  20.000 
## 
## includes extended item information - examples:
##              labels
## 1           almonds
## 2 antioxydant juice
## 3         asparagus
# Mineral water eggs spaghetti,fries seem to be leading in popularity (as individual items)

The most frequent items:mineral water at 1788 times being bought followed by eggs at 1348, spaghetti 1306 french fries 1282, chocolate 1229 and Other 22405

# Plotting item frequency considering the top 20 items
par(mfcol=c(1,2))
itemFrequencyPlot(assoc_df,topN=20,col="blue",ylab="Item frequency",main=" Item Frequency Plots")
itemFrequencyPlot(assoc_df,support=0.09,col="darkblue",ylab="Frequency > 0.1 support")

# Mineral water is still taking the lead even with the minimum support at 0.09.

Apriori algoritm to build association rules

# The first rules 
rule1<-apriori(assoc_df,parameter = list(support=0.001,conf=0.8))
## Apriori
## 
## Parameter specification:
##  confidence minval smax arem  aval originalSupport maxtime support minlen
##         0.8    0.1    1 none FALSE            TRUE       5   0.001      1
##  maxlen target  ext
##      10  rules TRUE
## 
## Algorithmic control:
##  filter tree heap memopt load sort verbose
##     0.1 TRUE TRUE  FALSE TRUE    2    TRUE
## 
## Absolute minimum support count: 7 
## 
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[119 item(s), 7501 transaction(s)] done [0.02s].
## sorting and recoding items ... [116 item(s)] done [0.00s].
## creating transaction tree ... done [0.01s].
## checking subsets of size 1 2 3 4 5 6 done [0.03s].
## writing ... [74 rule(s)] done [0.00s].
## creating S4 object  ... done [0.00s].
rule1
## set of 74 rules
# With a restriction of a support of 0.001 (freq(X)/Total transactions) and a confidence of 80% the items were filtered to 74 items. We seem to have lost important rules as 74 is very little to work with
# Visualizing this in an association plot
#install.packages("arulesViz")
library(arulesViz)
## Loading required package: grid
## Registered S3 method overwritten by 'seriation':
##   method         from 
##   reorder.hclust gclus
plot(rule1,type = "graph",control=list(type="items"))
## Warning: Unknown control parameters: type, type
## Available control parameters (with default values):
## main  =  Scatter plot for 74 rules
## engine    =  default
## pch   =  19
## cex   =  0.5
## xlim  =  NULL
## ylim  =  NULL
## zlim  =  NULL
## alpha     =  NULL
## col   =  c("#EE0000FF", "#EE0303FF", "#EE0606FF", "#EE0909FF", "#EE0C0CFF", "#EE0F0FFF", "#EE1212FF", "#EE1515FF", "#EE1818FF", "#EE1B1BFF", "#EE1E1EFF", "#EE2222FF", "#EE2525FF", "#EE2828FF", "#EE2B2BFF", "#EE2E2EFF", "#EE3131FF", "#EE3434FF", "#EE3737FF", "#EE3A3AFF", "#EE3D3DFF", "#EE4040FF", "#EE4444FF", "#EE4747FF", "#EE4A4AFF", "#EE4D4DFF", "#EE5050FF", "#EE5353FF", "#EE5656FF", "#EE5959FF", "#EE5C5CFF", "#EE5F5FFF", "#EE6262FF", "#EE6666FF", "#EE6969FF", "#EE6C6CFF", "#EE6F6FFF", "#EE7272FF", "#EE7575FF",  "#EE7878FF", "#EE7B7BFF", "#EE7E7EFF", "#EE8181FF", "#EE8484FF", "#EE8888FF", "#EE8B8BFF", "#EE8E8EFF", "#EE9191FF", "#EE9494FF", "#EE9797FF", "#EE9999FF", "#EE9B9BFF", "#EE9D9DFF", "#EE9F9FFF", "#EEA0A0FF", "#EEA2A2FF", "#EEA4A4FF", "#EEA5A5FF", "#EEA7A7FF", "#EEA9A9FF", "#EEABABFF", "#EEACACFF", "#EEAEAEFF", "#EEB0B0FF", "#EEB1B1FF", "#EEB3B3FF", "#EEB5B5FF", "#EEB7B7FF", "#EEB8B8FF", "#EEBABAFF", "#EEBCBCFF", "#EEBDBDFF", "#EEBFBFFF", "#EEC1C1FF", "#EEC3C3FF", "#EEC4C4FF", "#EEC6C6FF", "#EEC8C8FF",  "#EEC9C9FF", "#EECBCBFF", "#EECDCDFF", "#EECFCFFF", "#EED0D0FF", "#EED2D2FF", "#EED4D4FF", "#EED5D5FF", "#EED7D7FF", "#EED9D9FF", "#EEDBDBFF", "#EEDCDCFF", "#EEDEDEFF", "#EEE0E0FF", "#EEE1E1FF", "#EEE3E3FF", "#EEE5E5FF", "#EEE7E7FF", "#EEE8E8FF", "#EEEAEAFF", "#EEECECFF", "#EEEEEEFF")
## newpage   =  TRUE
## jitter    =  NA
## verbose   =  FALSE
## To reduce overplotting, jitter is added! Use jitter = 0 to prevent jitter.

# As much as we can see a positive correlation between confidence and support there are a few datapoints which are not popular but have a large confidence thus meaning they are bought in conjunction to other items(They are less likely to be bought alone).
inspect(rule1[1:10])
##      lhs                             rhs             support     confidence
## [1]  {frozen smoothie,spinach}    => {mineral water} 0.001066524 0.8888889 
## [2]  {bacon,pancakes}             => {spaghetti}     0.001733102 0.8125000 
## [3]  {nonfat milk,turkey}         => {mineral water} 0.001199840 0.8181818 
## [4]  {ground beef,nonfat milk}    => {mineral water} 0.001599787 0.8571429 
## [5]  {mushroom cream sauce,pasta} => {escalope}      0.002532996 0.9500000 
## [6]  {milk,pasta}                 => {shrimp}        0.001599787 0.8571429 
## [7]  {cooking oil,fromage blanc}  => {mineral water} 0.001199840 0.8181818 
## [8]  {black tea,salmon}           => {mineral water} 0.001066524 0.8000000 
## [9]  {black tea,frozen smoothie}  => {milk}          0.001199840 0.8181818 
## [10] {red wine,tomato sauce}      => {chocolate}     0.001066524 0.8000000 
##      coverage    lift      count
## [1]  0.001199840  3.729058  8   
## [2]  0.002133049  4.666587 13   
## [3]  0.001466471  3.432428  9   
## [4]  0.001866418  3.595877 12   
## [5]  0.002666311 11.976387 19   
## [6]  0.001866418 11.995203 12   
## [7]  0.001466471  3.432428  9   
## [8]  0.001333156  3.356152  8   
## [9]  0.001466471  6.313973  9   
## [10] 0.001333156  4.882669  8
# We are 95 % confident that for every transaction of {mushroom cream sauce, pasta}, escalope was bought along with it.
# Minimizing support thershold alittle bit
rule2<-apriori(assoc_df,parameter =list(support=0.001,conf=0.75))
## Apriori
## 
## Parameter specification:
##  confidence minval smax arem  aval originalSupport maxtime support minlen
##        0.75    0.1    1 none FALSE            TRUE       5   0.001      1
##  maxlen target  ext
##      10  rules TRUE
## 
## Algorithmic control:
##  filter tree heap memopt load sort verbose
##     0.1 TRUE TRUE  FALSE TRUE    2    TRUE
## 
## Absolute minimum support count: 7 
## 
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[119 item(s), 7501 transaction(s)] done [0.01s].
## sorting and recoding items ... [116 item(s)] done [0.00s].
## creating transaction tree ... done [0.01s].
## checking subsets of size 1 2 3 4 5 6 done [0.02s].
## writing ... [110 rule(s)] done [0.00s].
## creating S4 object  ... done [0.01s].
rule2
## set of 110 rules
# Viewing the rules extracted from these
inspect(rule2[1:20])
##      lhs                               rhs             support     confidence
## [1]  {frozen smoothie,spinach}      => {mineral water} 0.001066524 0.8888889 
## [2]  {blueberries,eggs}             => {mineral water} 0.001599787 0.7500000 
## [3]  {bacon,pancakes}               => {spaghetti}     0.001733102 0.8125000 
## [4]  {nonfat milk,turkey}           => {mineral water} 0.001199840 0.8181818 
## [5]  {ground beef,nonfat milk}      => {mineral water} 0.001599787 0.8571429 
## [6]  {barbecue sauce,chocolate}     => {mineral water} 0.001333156 0.7692308 
## [7]  {mushroom cream sauce,pasta}   => {escalope}      0.002532996 0.9500000 
## [8]  {milk,pasta}                   => {shrimp}        0.001599787 0.8571429 
## [9]  {mineral water,pasta}          => {shrimp}        0.001599787 0.7500000 
## [10] {cooking oil,fromage blanc}    => {mineral water} 0.001199840 0.8181818 
## [11] {black tea,salmon}             => {mineral water} 0.001066524 0.8000000 
## [12] {black tea,frozen smoothie}    => {milk}          0.001199840 0.8181818 
## [13] {red wine,tomato sauce}        => {chocolate}     0.001066524 0.8000000 
## [14] {shrimp,tomato sauce}          => {spaghetti}     0.001199840 0.7500000 
## [15] {pancakes,tomato sauce}        => {mineral water} 0.001066524 0.8000000 
## [16] {chicken,protein bar}          => {spaghetti}     0.001199840 0.8181818 
## [17] {meatballs,whole wheat pasta}  => {milk}          0.001333156 0.8333333 
## [18] {red wine,soup}                => {mineral water} 0.001866418 0.9333333 
## [19] {turkey,whole wheat pasta}     => {mineral water} 0.001466471 0.8461538 
## [20] {milk,spaghetti,strong cheese} => {mineral water} 0.001066524 0.8000000 
##      coverage    lift      count
## [1]  0.001199840  3.729058  8   
## [2]  0.002133049  3.146393 12   
## [3]  0.002133049  4.666587 13   
## [4]  0.001466471  3.432428  9   
## [5]  0.001866418  3.595877 12   
## [6]  0.001733102  3.227069 10   
## [7]  0.002666311 11.976387 19   
## [8]  0.001866418 11.995203 12   
## [9]  0.002133049 10.495802 12   
## [10] 0.001466471  3.432428  9   
## [11] 0.001333156  3.356152  8   
## [12] 0.001466471  6.313973  9   
## [13] 0.001333156  4.882669  8   
## [14] 0.001599787  4.307619  9   
## [15] 0.001333156  3.356152  8   
## [16] 0.001466471  4.699220  9   
## [17] 0.001599787  6.430898 10   
## [18] 0.001999733  3.915511 14   
## [19] 0.001733102  3.549776 11   
## [20] 0.001333156  3.356152  8
# Mineral water seem to be standing out as a famous item. It is good to know which items are bought before mineral water for the management  to maximize the discounts on these products such as ground beef,light cream,olive oil,
sorted<-sort(rule2,by="confidence",decreasing = TRUE)
inspect(sorted[1:10])
##      lhs                       rhs                 support confidence    coverage      lift count
## [1]  {french fries,                                                                              
##       mushroom cream sauce,                                                                      
##       pasta}                => {escalope}      0.001066524  1.0000000 0.001066524 12.606723     8
## [2]  {ground beef,                                                                               
##       light cream,                                                                               
##       olive oil}            => {mineral water} 0.001199840  1.0000000 0.001199840  4.195190     9
## [3]  {cake,                                                                                      
##       meatballs,                                                                                 
##       mineral water}        => {milk}          0.001066524  1.0000000 0.001066524  7.717078     8
## [4]  {cake,                                                                                      
##       olive oil,                                                                                 
##       shrimp}               => {mineral water} 0.001199840  1.0000000 0.001199840  4.195190     9
## [5]  {mushroom cream sauce,                                                                      
##       pasta}                => {escalope}      0.002532996  0.9500000 0.002666311 11.976387    19
## [6]  {red wine,                                                                                  
##       soup}                 => {mineral water} 0.001866418  0.9333333 0.001999733  3.915511    14
## [7]  {eggs,                                                                                      
##       mineral water,                                                                             
##       pasta}                => {shrimp}        0.001333156  0.9090909 0.001466471 12.722185    10
## [8]  {herb & pepper,                                                                             
##       mineral water,                                                                             
##       rice}                 => {ground beef}   0.001333156  0.9090909 0.001466471  9.252498    10
## [9]  {ground beef,                                                                               
##       pancakes,                                                                                  
##       whole wheat rice}     => {mineral water} 0.001333156  0.9090909 0.001466471  3.813809    10
## [10] {frozen vegetables,                                                                         
##       milk,                                                                                      
##       spaghetti,                                                                                 
##       turkey}               => {mineral water} 0.001199840  0.9000000 0.001333156  3.775671     9
# Getting items purchased before mineral water
mineral<-subset(rule2,subset=rhs %pin% "mineral water")
# Sorting items by their confidence level
sorted_mineral<-sort(mineral,by="confidence",decreasing = TRUE)
# Viewing the top 10 items
inspect(sorted_mineral[1:10])
##      lhs                    rhs                 support confidence    coverage     lift count
## [1]  {ground beef,                                                                           
##       light cream,                                                                           
##       olive oil}         => {mineral water} 0.001199840  1.0000000 0.001199840 4.195190     9
## [2]  {cake,                                                                                  
##       olive oil,                                                                             
##       shrimp}            => {mineral water} 0.001199840  1.0000000 0.001199840 4.195190     9
## [3]  {red wine,                                                                              
##       soup}              => {mineral water} 0.001866418  0.9333333 0.001999733 3.915511    14
## [4]  {ground beef,                                                                           
##       pancakes,                                                                              
##       whole wheat rice}  => {mineral water} 0.001333156  0.9090909 0.001466471 3.813809    10
## [5]  {frozen vegetables,                                                                     
##       milk,                                                                                  
##       spaghetti,                                                                             
##       turkey}            => {mineral water} 0.001199840  0.9000000 0.001333156 3.775671     9
## [6]  {chocolate,                                                                             
##       frozen vegetables,                                                                     
##       olive oil,                                                                             
##       shrimp}            => {mineral water} 0.001199840  0.9000000 0.001333156 3.775671     9
## [7]  {frozen smoothie,                                                                       
##       spinach}           => {mineral water} 0.001066524  0.8888889 0.001199840 3.729058     8
## [8]  {cake,                                                                                  
##       meatballs,                                                                             
##       milk}              => {mineral water} 0.001066524  0.8888889 0.001199840 3.729058     8
## [9]  {cake,                                                                                  
##       olive oil,                                                                             
##       whole wheat pasta} => {mineral water} 0.001066524  0.8888889 0.001199840 3.729058     8
## [10] {brownies,                                                                              
##       eggs,                                                                                  
##       ground beef}       => {mineral water} 0.001066524  0.8888889 0.001199840 3.729058     8
# Ground beef,olive oil and cake seem to be standing out
# Getting items that are bought after eggs are bought
eggs<-subset(rule2,subset=lhs %pin% "eggs")
# Sorting items by their confidence level
sorted_eggs<-sort(eggs,by="confidence",decreasing = TRUE)
# Viewing the top 10 items
inspect(sorted_eggs[1:10])
##      lhs                    rhs                 support confidence    coverage      lift count
## [1]  {eggs,                                                                                   
##       mineral water,                                                                          
##       pasta}             => {shrimp}        0.001333156  0.9090909 0.001466471 12.722185    10
## [2]  {brownies,                                                                               
##       eggs,                                                                                   
##       ground beef}       => {mineral water} 0.001066524  0.8888889 0.001199840  3.729058     8
## [3]  {chocolate,                                                                              
##       eggs,                                                                                   
##       frozen vegetables,                                                                      
##       ground beef}       => {mineral water} 0.001466471  0.8461538 0.001733102  3.549776    11
## [4]  {chocolate,                                                                              
##       eggs,                                                                                   
##       olive oil,                                                                              
##       spaghetti}         => {mineral water} 0.001199840  0.8181818 0.001466471  3.432428     9
## [5]  {cooking oil,                                                                            
##       eggs,                                                                                   
##       olive oil}         => {mineral water} 0.001066524  0.8000000 0.001333156  3.356152     8
## [6]  {cake,                                                                                   
##       eggs,                                                                                   
##       milk,                                                                                   
##       turkey}            => {mineral water} 0.001066524  0.8000000 0.001333156  3.356152     8
## [7]  {chocolate,                                                                              
##       eggs,                                                                                   
##       milk,                                                                                   
##       olive oil}         => {mineral water} 0.001066524  0.8000000 0.001333156  3.356152     8
## [8]  {eggs,                                                                                   
##       olive oil,                                                                              
##       soup}              => {mineral water} 0.001466471  0.7857143 0.001866418  3.296221    11
## [9]  {blueberries,                                                                            
##       eggs}              => {mineral water} 0.001599787  0.7500000 0.002133049  3.146393    12
## [10] {eggs,                                                                                   
##       frozen vegetables,                                                                      
##       milk,                                                                                   
##       olive oil}         => {mineral water} 0.001199840  0.7500000 0.001599787  3.146393     9
# Mineral water is most likely to be bought when eggs are bought