624_hw10

Your assignment is to use R to mine the data for association rules. You should report support, confidence and lift and your top 10 rules by lift.

library(arules)
library(arulesViz)

tr <- read.transactions('GroceryDataSet.csv', format = 'basket', sep=',')
tr

## transactions in sparse format with
##  9835 transactions (rows) and
##  169 items (columns)

summary(tr)

## transactions as itemMatrix in sparse format with
##  9835 rows (elements/itemsets/transactions) and
##  169 columns (items) and a density of 0.02609146 
## 
## most frequent items:
##       whole milk other vegetables       rolls/buns             soda 
##             2513             1903             1809             1715 
##           yogurt          (Other) 
##             1372            34055 
## 
## element (itemset/transaction) length distribution:
## sizes
##    1    2    3    4    5    6    7    8    9   10   11   12   13   14   15   16 
## 2159 1643 1299 1005  855  645  545  438  350  246  182  117   78   77   55   46 
##   17   18   19   20   21   22   23   24   26   27   28   29   32 
##   29   14   14    9   11    4    6    1    1    1    1    3    1 
## 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.000   2.000   3.000   4.409   6.000  32.000 
## 
## includes extended item information - examples:
##             labels
## 1 abrasive cleaner
## 2 artif. sweetener
## 3   baby cosmetics

itemFrequencyPlot(tr,topN=20,type="relative",main="Top 20 Frequency Plot")

Apriori to find rules

# Min Support as 0.001, confidence as 0.8.
rules <- apriori(tr, parameter = list(supp=0.001, conf=0.6,maxlen=20)) # maxlen is maximum of n items

## Apriori
## 
## Parameter specification:
##  confidence minval smax arem  aval originalSupport maxtime support minlen
##         0.6    0.1    1 none FALSE            TRUE       5   0.001      1
##  maxlen target   ext
##      20  rules FALSE
## 
## Algorithmic control:
##  filter tree heap memopt load sort verbose
##     0.1 TRUE TRUE  FALSE TRUE    2    TRUE
## 
## Absolute minimum support count: 9 
## 
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[169 item(s), 9835 transaction(s)] done [0.00s].
## sorting and recoding items ... [157 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4 5 6 done [0.01s].
## writing ... [2918 rule(s)] done [0.00s].
## creating S4 object  ... done [0.00s].

summary(rules)

## set of 2918 rules
## 
## rule length distribution (lhs + rhs):sizes
##    2    3    4    5    6 
##    3  490 1765  626   34 
## 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   2.000   4.000   4.000   4.068   4.000   6.000 
## 
## summary of quality measures:
##     support           confidence          lift            count      
##  Min.   :0.001017   Min.   :0.6000   Min.   : 2.348   Min.   :10.00  
##  1st Qu.:0.001118   1st Qu.:0.6316   1st Qu.: 2.668   1st Qu.:11.00  
##  Median :0.001220   Median :0.6818   Median : 3.168   Median :12.00  
##  Mean   :0.001480   Mean   :0.7028   Mean   : 3.450   Mean   :14.55  
##  3rd Qu.:0.001525   3rd Qu.:0.7500   3rd Qu.: 3.692   3rd Qu.:15.00  
##  Max.   :0.009354   Max.   :1.0000   Max.   :18.996   Max.   :92.00  
## 
## mining info:
##  data ntransactions support confidence
##    tr          9835   0.001        0.6

plot(rules,jitter=1)

plot(rules,jitter=1,method="two-key plot")

Top 10 rules by lift

top10rules <- head(rules, n = 10, by = "lift")
inspect(top10rules)

##      lhs                        rhs                      support confidence      lift count
## [1]  {Instant food products,                                                               
##       soda}                  => {hamburger meat}     0.001220132  0.6315789 18.995654    12
## [2]  {popcorn,                                                                             
##       soda}                  => {salty snack}        0.001220132  0.6315789 16.697793    12
## [3]  {ham,                                                                                 
##       processed cheese}      => {white bread}        0.001931876  0.6333333 15.045491    19
## [4]  {other vegetables,                                                                    
##       tropical fruit,                                                                      
##       white bread,                                                                         
##       yogurt}                => {butter}             0.001016777  0.6666667 12.030581    10
## [5]  {hamburger meat,                                                                      
##       whipped/sour cream,                                                                  
##       yogurt}                => {butter}             0.001016777  0.6250000 11.278670    10
## [6]  {domestic eggs,                                                                       
##       other vegetables,                                                                    
##       tropical fruit,                                                                      
##       whole milk,                                                                          
##       yogurt}                => {butter}             0.001016777  0.6250000 11.278670    10
## [7]  {liquor,                                                                              
##       red/blush wine}        => {bottled beer}       0.001931876  0.9047619 11.235269    19
## [8]  {butter,                                                                              
##       other vegetables,                                                                    
##       sugar}                 => {whipped/sour cream} 0.001016777  0.7142857  9.964539    10
## [9]  {butter,                                                                              
##       hard cheese,                                                                         
##       whole milk}            => {whipped/sour cream} 0.001423488  0.6666667  9.300236    14
## [10] {butter,                                                                              
##       fruit/vegetable juice,                                                               
##       other vegetables,                                                                    
##       tropical fruit}        => {whipped/sour cream} 0.001016777  0.6666667  9.300236    10

plot(top10rules, method = "graph",  engine = "htmlwidget")

624_hw10

Albina Gallyavova

5/3/2020

Apriori to find rules

Top 10 rules by lift