I am a Data analyst at Carrefour Kenya and are currently undertaking a project that will inform the marketing department on the most relevant marketing strategies that will result in the highest no. of sales (total price including tax).
I am expected to find out the associations between products.
Link to the dataset: http://bit.ly/SupermarketDatasetII
# Installing the necessary packages
install.packages(c("arules", "tidyverse"))
# Loading the libraries
library(arules)
## Loading required package: Matrix
##
## Attaching package: 'arules'
## The following objects are masked from 'package:base':
##
## abbreviate, write
library(tidyverse)
## -- Attaching packages --------------------------------------- tidyverse 1.3.1 --
## v ggplot2 3.3.5 v purrr 0.3.4
## v tibble 3.1.6 v dplyr 1.0.8
## v tidyr 1.2.0 v stringr 1.4.0
## v readr 2.1.2 v forcats 0.5.1
## -- Conflicts ------------------------------------------ tidyverse_conflicts() --
## x tidyr::expand() masks Matrix::expand()
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
## x tidyr::pack() masks Matrix::pack()
## x dplyr::recode() masks arules::recode()
## x tidyr::unpack() masks Matrix::unpack()
# Reading the dataset
assos <- read.transactions("C:/Users/user/Downloads/Supermarket_Sales_Dataset II Par 3.csv", sep = ",", rm.duplicates = TRUE)
## distribution of transactions with duplicates:
## 1
## 5
assos
## transactions in sparse format with
## 7501 transactions (rows) and
## 119 items (columns)
# Verifying the object's class
class(assos)
## [1] "transactions"
## attr(,"package")
## [1] "arules"
# Previewing first 5 transactions
inspect(assos[1:5])
## items
## [1] {almonds,
## antioxydant juice,
## avocado,
## cottage cheese,
## energy drink,
## frozen smoothie,
## green grapes,
## green tea,
## honey,
## low fat yogurt,
## mineral water,
## olive oil,
## salad,
## salmon,
## shrimp,
## spinach,
## tomato juice,
## vegetables mix,
## whole weat flour,
## yams}
## [2] {burgers,
## eggs,
## meatballs}
## [3] {chutney}
## [4] {avocado,
## turkey}
## [5] {energy bar,
## green tea,
## milk,
## mineral water,
## whole wheat rice}
# Getting a summary of the transactions
summary(assos)
## transactions as itemMatrix in sparse format with
## 7501 rows (elements/itemsets/transactions) and
## 119 columns (items) and a density of 0.03288973
##
## most frequent items:
## mineral water eggs spaghetti french fries chocolate
## 1788 1348 1306 1282 1229
## (Other)
## 22405
##
## element (itemset/transaction) length distribution:
## sizes
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
## 1754 1358 1044 816 667 493 391 324 259 139 102 67 40 22 17 4
## 18 19 20
## 1 2 1
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 2.000 3.000 3.914 5.000 20.000
##
## includes extended item information - examples:
## labels
## 1 almonds
## 2 antioxydant juice
## 3 asparagus
# Plotting the most frequent items both with and without setting the support lower limit
options(repr.plot.width = 15, repr.plot.height = 10)
par(mfrow = c(1, 2))
itemFrequencyPlot(assos, topN = 10,col="lightblue", main = "Frequency plot (default)", cex = 1.5, cex.main= 1.75, cex.lab=1.5, cex.axis=1.2)
itemFrequencyPlot(assos, support = 0.1,col="orange", main = "Frequency plot(supp=0.1)", cex = 1.5, cex.main= 1.75, cex.lab=1.5, cex.axis=1.2)
# Building a model based on association rules using the apriori function
# supp = 0.001, conf = 0.8
rules <- apriori (assos, parameter = list(supp = 0.001, conf = 0.8))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.8 0.1 1 none FALSE TRUE 5 0.001 1
## maxlen target ext
## 10 rules TRUE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 7
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[119 item(s), 7501 transaction(s)] done [0.00s].
## sorting and recoding items ... [116 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4 5 6 done [0.01s].
## writing ... [74 rule(s)] done [0.00s].
## creating S4 object ... done [0.00s].
rules
## set of 74 rules
# Building a model based on association rules using the apriori function
# supp = 0.002, conf = 0.8
rules1 <- apriori (assos, parameter = list(supp = 0.002, conf = 0.8))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.8 0.1 1 none FALSE TRUE 5 0.002 1
## maxlen target ext
## 10 rules TRUE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 15
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[119 item(s), 7501 transaction(s)] done [0.00s].
## sorting and recoding items ... [115 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4 5 done [0.00s].
## writing ... [2 rule(s)] done [0.00s].
## creating S4 object ... done [0.00s].
rules1
## set of 2 rules
# Building a model based on association rules using the apriori function
# supp = 0.001, conf = 0.6
rules2 <- apriori (assos, parameter = list(supp = 0.001, conf = 0.6))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.6 0.1 1 none FALSE TRUE 5 0.001 1
## maxlen target ext
## 10 rules TRUE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 7
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[119 item(s), 7501 transaction(s)] done [0.00s].
## sorting and recoding items ... [116 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4 5 6 done [0.00s].
## writing ... [545 rule(s)] done [0.00s].
## creating S4 object ... done [0.00s].
rules2
## set of 545 rules
# Observing rules built in our model i.e. first 10 model rules
inspect(rules[1:10])
## lhs rhs support confidence
## [1] {frozen smoothie, spinach} => {mineral water} 0.001066524 0.8888889
## [2] {bacon, pancakes} => {spaghetti} 0.001733102 0.8125000
## [3] {nonfat milk, turkey} => {mineral water} 0.001199840 0.8181818
## [4] {ground beef, nonfat milk} => {mineral water} 0.001599787 0.8571429
## [5] {mushroom cream sauce, pasta} => {escalope} 0.002532996 0.9500000
## [6] {milk, pasta} => {shrimp} 0.001599787 0.8571429
## [7] {cooking oil, fromage blanc} => {mineral water} 0.001199840 0.8181818
## [8] {black tea, salmon} => {mineral water} 0.001066524 0.8000000
## [9] {black tea, frozen smoothie} => {milk} 0.001199840 0.8181818
## [10] {red wine, tomato sauce} => {chocolate} 0.001066524 0.8000000
## coverage lift count
## [1] 0.001199840 3.729058 8
## [2] 0.002133049 4.666587 13
## [3] 0.001466471 3.432428 9
## [4] 0.001866418 3.595877 12
## [5] 0.002666311 11.976387 19
## [6] 0.001866418 11.995203 12
## [7] 0.001466471 3.432428 9
## [8] 0.001333156 3.356152 8
## [9] 0.001466471 6.313973 9
## [10] 0.001333156 4.882669 8
# Inspecting the first 5 rules with the highest lift
inspect(head(rules, n = 5, by = "lift"))
## lhs rhs support confidence coverage lift count
## [1] {eggs,
## mineral water,
## pasta} => {shrimp} 0.001333156 0.9090909 0.001466471 12.722185 10
## [2] {french fries,
## mushroom cream sauce,
## pasta} => {escalope} 0.001066524 1.0000000 0.001066524 12.606723 8
## [3] {milk,
## pasta} => {shrimp} 0.001599787 0.8571429 0.001866418 11.995203 12
## [4] {mushroom cream sauce,
## pasta} => {escalope} 0.002532996 0.9500000 0.002666311 11.976387 19
## [5] {chocolate,
## ground beef,
## milk,
## mineral water,
## spaghetti} => {frozen vegetables} 0.001066524 0.8888889 0.001199840 9.325253 8
# Inspecting the first 5 rules with the highest confidence
inspect(head(rules, n = 5, by = "confidence"))
## lhs rhs support confidence coverage lift count
## [1] {french fries,
## mushroom cream sauce,
## pasta} => {escalope} 0.001066524 1.00 0.001066524 12.606723 8
## [2] {ground beef,
## light cream,
## olive oil} => {mineral water} 0.001199840 1.00 0.001199840 4.195190 9
## [3] {cake,
## meatballs,
## mineral water} => {milk} 0.001066524 1.00 0.001066524 7.717078 8
## [4] {cake,
## olive oil,
## shrimp} => {mineral water} 0.001199840 1.00 0.001199840 4.195190 9
## [5] {mushroom cream sauce,
## pasta} => {escalope} 0.002532996 0.95 0.002666311 11.976387 19
# Looking at the least popular transactions
itm <- itemFrequency(assos, type = "relative")
head(sort(itm), n = 10)
## water spray napkins cream bramble tea
## 0.0003999467 0.0006665778 0.0009332089 0.0018664178 0.0038661512
## chutney mashed potato chocolate bread dessert wine ketchup
## 0.0041327823 0.0041327823 0.0042660979 0.0043994134 0.0043994134
# We may want to make a promotion to increase the sale of Tea
# Let us look at what people buy after buying tea
tea = subset(rules, subset = lhs %pin% "tea")
# Then order by confidence
tea = sort(tea, by="confidence", decreasing=TRUE)
inspect(tea[1:5])
## lhs rhs support
## [1] {black tea, spaghetti, turkey} => {eggs} 0.001066524
## [2] {green tea, ground beef, tomato sauce} => {spaghetti} 0.001333156
## [3] {black tea, frozen smoothie} => {milk} 0.001199840
## [4] {black tea, salmon} => {mineral water} 0.001066524
## [5] {cookies, green tea, milk} => {french fries} 0.001066524
## confidence coverage lift count
## [1] 0.8888889 0.001199840 4.946258 8
## [2] 0.8333333 0.001599787 4.786243 10
## [3] 0.8181818 0.001466471 6.313973 9
## [4] 0.8000000 0.001333156 3.356152 8
## [5] 0.8000000 0.001333156 4.680811 8
# We may want to make a promotion to increase the sale of ground beef
# Let us look at what people buy after buying ground beef
beef = subset(rules, subset = lhs %pin% "ground beef")
beef
## set of 12 rules
# Then order by confidence
beef = sort(beef, by="confidence", decreasing=TRUE)
inspect(beef[1:5])
## lhs rhs support confidence coverage lift count
## [1] {ground beef,
## light cream,
## olive oil} => {mineral water} 0.001199840 1.0000000 0.001199840 4.195190 9
## [2] {ground beef,
## pancakes,
## whole wheat rice} => {mineral water} 0.001333156 0.9090909 0.001466471 3.813809 10
## [3] {brownies,
## eggs,
## ground beef} => {mineral water} 0.001066524 0.8888889 0.001199840 3.729058 8
## [4] {ground beef,
## salmon,
## shrimp} => {spaghetti} 0.001066524 0.8888889 0.001199840 5.105326 8
## [5] {chocolate,
## ground beef,
## milk,
## mineral water,
## spaghetti} => {frozen vegetables} 0.001066524 0.8888889 0.001199840 9.325253 8