1.19,000 members
2.540 Zip Codes
3.440 Cities
4.35 States
5.119 different food items
6.1927 different retail items
7.To identify useful and/or hidden information in the data collected by Zoo.
8.To Study buying and/or visiting behaviour of Zoo members.
9.There was a minimum support of 0.1% and a confidence of 50% was set. There were 19,066Â transactions. Transactions were defined by combining customer name with date of purchase
library(readxl)
library(factoextra)
## Loading required package: ggplot2
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
library(MASS)
library(readxl)
library(arules)
## Loading required package: Matrix
##
## Attaching package: 'arules'
## The following objects are masked from 'package:base':
##
## abbreviate, write
library(arulesViz)
## Loading required package: grid
library(knitr)
TransFood <- read.csv('https://xiaoruizhu.github.io/Data-Mining-R/data/food_4_association.csv')
TransFood <- TransFood[, -1]
# Find out elements that are not equal to 0 or 1 and change them to 1.
Others <- which(!(as.matrix(TransFood) ==1 | as.matrix(TransFood) ==0), arr.ind=T )
TransFood[Others] <- 1
TransFood <- as(as.matrix(TransFood), "transactions")
itemFrequencyPlot(TransFood, support = 0.1, cex.names=0.8)
basket_rules <- apriori(TransFood,parameter = list(sup = 0.003, conf = 0.5,target="rules"))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.5 0.1 1 none FALSE TRUE 5 0.003 1
## maxlen target ext
## 10 rules TRUE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 57
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[115 item(s), 19076 transaction(s)] done [0.01s].
## sorting and recoding items ... [74 item(s)] done [0.00s].
## creating transaction tree ... done [0.01s].
## checking subsets of size 1 2 3 4 done [0.00s].
## writing ... [42 rule(s)] done [0.00s].
## creating S4 object ... done [0.00s].
Now, in the apriori, we take a confidence of 50% and a support of 0.3% and check the association rules:
inspect(head(basket_rules))
## lhs rhs support confidence coverage lift count
## [1] {Small.Pink.LemonadeFood} => {Chicken.Nugget.BasketFood} 0.003355001 0.5925926 0.005661564 16.034463 64
## [2] {Grilled.Chicken.SandwichFood} => {French.Fries.BasketFood} 0.003721954 0.6698113 0.005556720 6.862149 71
## [3] {FloatFood} => {Ice.Cream.ConeFood} 0.007024533 0.7089947 0.009907737 6.355631 134
## [4] {Side.of.CheeseFood} => {Cheese.ConeyFood} 0.004665548 0.6846154 0.006814846 25.912149 89
## [5] {Side.of.CheeseFood} => {Hot.DogFood} 0.006290627 0.9230769 0.006814846 21.605663 120
## [6] {BurgerFood} => {French.Fries.BasketFood} 0.004613126 0.6616541 0.006972112 6.778579 88
Applying rule for size greater than 3
inspect(subset(basket_rules, size(basket_rules)>3))
## lhs rhs support confidence coverage lift count
## [1] {Krazy.KritterFood,
## Medium.DrinkFood,
## Slice.of.PeppFood} => {Slice.of.CheeseFood} 0.003250157 0.5535714 0.005871252 3.437477 62
## [2] {Medium.DrinkFood,
## Slice.of.PeppFood,
## Small.DrinkFood} => {Slice.of.CheeseFood} 0.003145313 0.6000000 0.005242189 3.725781 60
## [3] {Medium.DrinkFood,
## Slice.of.CheeseFood,
## Small.DrinkFood} => {Slice.of.PeppFood} 0.003145313 0.5172414 0.006080939 4.191545 60
We are trying to find the strongest relationship based on high lift(>10) and high confidence(>0.6). The set of rules show that when LHS part is chosen, there is a high chance that RHS is chosen
###strongest relationship
inspect(subset(basket_rules,lift>10&confidence>0.6))
## lhs rhs support
## [1] {Side.of.CheeseFood} => {Cheese.ConeyFood} 0.004665548
## [2] {Side.of.CheeseFood} => {Hot.DogFood} 0.006290627
## [3] {Cheese.ConeyFood,Side.of.CheeseFood} => {Hot.DogFood} 0.004351017
## [4] {Hot.DogFood,Side.of.CheeseFood} => {Cheese.ConeyFood} 0.004351017
## confidence coverage lift count
## [1] 0.6846154 0.006814846 25.91215 89
## [2] 0.9230769 0.006814846 21.60566 120
## [3] 0.9325843 0.004665548 21.82819 83
## [4] 0.6916667 0.006290627 26.17903 83
inspect(subset(basket_rules,support>0.006))
## lhs rhs support confidence coverage lift count
## [1] {FloatFood} => {Ice.Cream.ConeFood} 0.007024533 0.7089947 0.009907737 6.355631 134
## [2] {Side.of.CheeseFood} => {Hot.DogFood} 0.006290627 0.9230769 0.006814846 21.605663 120
## [3] {SandwichFood} => {French.Fries.BasketFood} 0.007653596 0.6822430 0.011218285 6.989510 146
## [4] {Hot.Chocolate.Souvenir.RefillFood} => {Hot.Chocolate.SouvenirFood} 0.014992661 0.5596869 0.026787586 13.180972 286
## [5] {ToppingFood} => {Ice.Cream.ConeFood} 0.028569931 0.9981685 0.028622353 8.947868 545
## [6] {Add.CheeseFood} => {Soft.Pretzel..3_39Food} 0.019133990 0.6965649 0.027469071 7.601643 365
## [7] {Chicken.TendersFood} => {French.Fries.BasketFood} 0.017299224 0.7586207 0.022803523 7.771992 330
## [8] {CheeseburgerFood} => {French.Fries.BasketFood} 0.016879849 0.7931034 0.021283288 8.125264 322
## [9] {ChipsFood,
## Slice.of.PeppFood} => {Slice.of.CheeseFood} 0.008282659 0.5808824 0.014258754 3.607068 158
## [10] {GatoradeFood,
## Slice.of.PeppFood} => {Slice.of.CheeseFood} 0.010117425 0.5830816 0.017351646 3.620724 193
## [11] {Slice.of.PeppFood,
## Souvenir.DrinkFood} => {Slice.of.CheeseFood} 0.008125393 0.5032468 0.016145943 3.124979 155
## [12] {Medium.DrinkFood,
## Slice.of.PeppFood} => {Slice.of.CheeseFood} 0.013629692 0.5273834 0.025843992 3.274858 260
## [13] {Bottled.WaterFood,
## Slice.of.PeppFood} => {Slice.of.CheeseFood} 0.010694066 0.5151515 0.020759069 3.198903 204
inspect(subset(basket_rules,confidence>0.6))
## lhs rhs support confidence coverage lift count
## [1] {Grilled.Chicken.SandwichFood} => {French.Fries.BasketFood} 0.003721954 0.6698113 0.005556720 6.862149 71
## [2] {FloatFood} => {Ice.Cream.ConeFood} 0.007024533 0.7089947 0.009907737 6.355631 134
## [3] {Side.of.CheeseFood} => {Cheese.ConeyFood} 0.004665548 0.6846154 0.006814846 25.912149 89
## [4] {Side.of.CheeseFood} => {Hot.DogFood} 0.006290627 0.9230769 0.006814846 21.605663 120
## [5] {BurgerFood} => {French.Fries.BasketFood} 0.004613126 0.6616541 0.006972112 6.778579 88
## [6] {SandwichFood} => {French.Fries.BasketFood} 0.007653596 0.6822430 0.011218285 6.989510 146
## [7] {ToppingFood} => {Ice.Cream.ConeFood} 0.028569931 0.9981685 0.028622353 8.947868 545
## [8] {Add.CheeseFood} => {Soft.Pretzel..3_39Food} 0.019133990 0.6965649 0.027469071 7.601643 365
## [9] {Chicken.TendersFood} => {French.Fries.BasketFood} 0.017299224 0.7586207 0.022803523 7.771992 330
## [10] {CheeseburgerFood} => {French.Fries.BasketFood} 0.016879849 0.7931034 0.021283288 8.125264 322
## [11] {Cheese.ConeyFood,
## Side.of.CheeseFood} => {Hot.DogFood} 0.004351017 0.9325843 0.004665548 21.828193 83
## [12] {Hot.DogFood,
## Side.of.CheeseFood} => {Cheese.ConeyFood} 0.004351017 0.6916667 0.006290627 26.179034 83
## [13] {Bottled.WaterFood,
## ToppingFood} => {Ice.Cream.ConeFood} 0.004036486 1.0000000 0.004036486 8.964286 77
## [14] {Add.CheeseFood,
## Bottled.WaterFood} => {Soft.Pretzel..3_39Food} 0.003826798 0.8021978 0.004770392 8.754419 73
## [15] {CheeseburgerFood,
## Chicken.TendersFood} => {French.Fries.BasketFood} 0.003931642 0.9615385 0.004088908 9.850863 75
## [16] {Chicken.TendersFood,
## Souvenir.DrinkFood} => {French.Fries.BasketFood} 0.003197735 0.7922078 0.004036486 8.116088 61
## [17] {Chicken.TendersFood,
## Krazy.KritterFood} => {French.Fries.BasketFood} 0.005661564 0.9557522 0.005923674 9.791584 108
## [18] {Chicken.TendersFood,
## Slice.of.PeppFood} => {French.Fries.BasketFood} 0.003669532 0.9210526 0.003984064 9.436090 70
## [19] {Chicken.TendersFood,
## Small.DrinkFood} => {French.Fries.BasketFood} 0.004822814 0.8214286 0.005871252 8.415452 92
## [20] {Chicken.TendersFood,
## Medium.DrinkFood} => {French.Fries.BasketFood} 0.004141329 0.8144330 0.005084923 8.343783 79
## [21] {Bottled.WaterFood,
## Chicken.TendersFood} => {French.Fries.BasketFood} 0.003459845 0.7586207 0.004560705 7.771992 66
## [22] {Chicken.TendersFood,
## Slice.of.CheeseFood} => {French.Fries.BasketFood} 0.005399455 0.8728814 0.006185783 8.942580 103
## [23] {CheeseburgerFood,
## Souvenir.DrinkFood} => {French.Fries.BasketFood} 0.003250157 0.9117647 0.003564689 9.340936 62
## [24] {CheeseburgerFood,
## Krazy.KritterFood} => {French.Fries.BasketFood} 0.005451877 0.8813559 0.006185783 9.029402 104
## [25] {CheeseburgerFood,
## Slice.of.PeppFood} => {French.Fries.BasketFood} 0.003721954 0.8658537 0.004298595 8.870582 71
## [26] {CheeseburgerFood,
## Small.DrinkFood} => {French.Fries.BasketFood} 0.004141329 0.8315789 0.004980080 8.519441 79
## [27] {CheeseburgerFood,
## Medium.DrinkFood} => {French.Fries.BasketFood} 0.005189767 0.8761062 0.005923674 8.975619 99
## [28] {Bottled.WaterFood,
## CheeseburgerFood} => {French.Fries.BasketFood} 0.003092892 0.7662338 0.004036486 7.849987 59
## [29] {CheeseburgerFood,
## Slice.of.CheeseFood} => {French.Fries.BasketFood} 0.005242189 0.8695652 0.006028518 8.908607 100
## [30] {Hot.DogFood,
## Krazy.KritterFood} => {French.Fries.BasketFood} 0.003669532 0.6140351 0.005976096 6.290727 70
#####Bottled Water and ToppingFood
BottledWater2.lhs <- subset(basket_rules, subset = (lhs %in% c("Bottled.WaterFood","ToppingFood"))&(lift>5))
inspect(BottledWater2.lhs)
## lhs rhs support confidence coverage lift count
## [1] {ToppingFood} => {Ice.Cream.ConeFood} 0.028569931 0.9981685 0.028622353 8.947868 545
## [2] {Bottled.WaterFood,
## ToppingFood} => {Ice.Cream.ConeFood} 0.004036486 1.0000000 0.004036486 8.964286 77
## [3] {Add.CheeseFood,
## Bottled.WaterFood} => {Soft.Pretzel..3_39Food} 0.003826798 0.8021978 0.004770392 8.754419 73
## [4] {Bottled.WaterFood,
## Chicken.TendersFood} => {French.Fries.BasketFood} 0.003459845 0.7586207 0.004560705 7.771992 66
## [5] {Bottled.WaterFood,
## CheeseburgerFood} => {French.Fries.BasketFood} 0.003092892 0.7662338 0.004036486 7.849987 59
####+++Slice of CheeseFood LHS
Slice.Cheese.lhs <- subset(basket_rules, subset = (lhs%in% "Slice.of.CheeseFood")&(lift>5) )
inspect(Slice.Cheese.lhs)
## lhs rhs support confidence coverage lift count
## [1] {Chicken.TendersFood,
## Slice.of.CheeseFood} => {French.Fries.BasketFood} 0.005399455 0.8728814 0.006185783 8.942580 103
## [2] {CheeseburgerFood,
## Slice.of.CheeseFood} => {French.Fries.BasketFood} 0.005242189 0.8695652 0.006028518 8.908607 100
#RHS
Slice.Cheese.rhs <- subset(basket_rules, subset = (rhs%in% "Slice.of.CheeseFood")&(lift>5) )
inspect(Slice.Cheese.rhs)
It can be observed that there’s very high confidence (almost 100%) that if we buy bottled water food and toppingfood, we buy ice cream cone food as well
Similar Analysis can be done for other products as we based on confidence, lift and support
There are 42 rules that satisfy the above criterion. The size varies from 2 to a maximum of 4. The confidence vs support plot is shown below
#Plots
library('arulesViz')
plot(basket_rules)
plot(head(sort(basket_rules, by="lift"), 10), method = "graph")
plot(head(sort(basket_rules, by="lift"), 10), method = "grouped")