df <- read.csv("G:\\RStudio\\udemy\\ml\\Machine Learning AZ\\Part 5 - Association Rule Learning\\Section 28 - Apriori\\Apriori\\Market_Basket_Optimisation.csv", header = FALSE)
head(df)
Build the sparse matrix
# install.packages("arules")
library(arules)
df <- read.transactions("G:\\RStudio\\udemy\\ml\\Machine Learning AZ\\Part 5 - Association Rule Learning\\Section 28 - Apriori\\Apriori\\Market_Basket_Optimisation.csv", sep =',', rm.duplicates = TRUE)
distribution of transactions with duplicates:
1
5
summary(df)
transactions as itemMatrix in sparse format with
7501 rows (elements/itemsets/transactions) and
119 columns (items) and a density of 0.03288973
most frequent items:
mineral water eggs spaghetti french fries chocolate (Other)
1788 1348 1306 1282 1229 22405
element (itemset/transaction) length distribution:
sizes
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 18 19 20
1754 1358 1044 816 667 493 391 324 259 139 102 67 40 22 17 4 1 2 1
Min. 1st Qu. Median Mean 3rd Qu. Max.
1.000 2.000 3.000 3.914 5.000 20.000
includes extended item information - examples:
labels
1 almonds
2 antioxydant juice
3 asparagus
# frequency plot
itemFrequencyPlot(df, topN=10)
# set support to items that are purchased 3 times a day over the whole week 7*3/7500
# set confidence to default of 0.8
rules <- apriori(data = df, parameter = list(support = 0.003 , confidence = 0.2) )
Apriori
Parameter specification:
confidence minval smax arem aval originalSupport maxtime support minlen maxlen target ext
0.2 0.1 1 none FALSE TRUE 5 0.003 1 10 rules FALSE
Algorithmic control:
filter tree heap memopt load sort verbose
0.1 TRUE TRUE FALSE TRUE 2 TRUE
Absolute minimum support count: 22
set item appearances ...[0 item(s)] done [0.00s].
set transactions ...[119 item(s), 7501 transaction(s)] done [0.01s].
sorting and recoding items ... [115 item(s)] done [0.00s].
creating transaction tree ... done [0.01s].
checking subsets of size 1 2 3 4 5 done [0.02s].
writing ... [1348 rule(s)] done [0.00s].
creating S4 object ... done [0.01s].
Visualize the results
# show the top 20 rules sorted by lift
inspect(sort(rules, by = 'lift')[1:20])
lhs rhs support confidence lift
[1] {mineral water,whole wheat pasta} => {olive oil} 0.003866151 0.4027778 6.115863
[2] {frozen vegetables,milk,mineral water} => {soup} 0.003066258 0.2771084 5.484407
[3] {fromage blanc} => {honey} 0.003332889 0.2450980 5.164271
[4] {spaghetti,tomato sauce} => {ground beef} 0.003066258 0.4893617 4.980600
[5] {light cream} => {chicken} 0.004532729 0.2905983 4.843951
[6] {pasta} => {escalope} 0.005865885 0.3728814 4.700812
[7] {french fries,herb & pepper} => {ground beef} 0.003199573 0.4615385 4.697422
[8] {cereals,spaghetti} => {ground beef} 0.003066258 0.4600000 4.681764
[9] {frozen vegetables,mineral water,soup} => {milk} 0.003066258 0.6052632 4.670863
[10] {french fries,ground beef} => {herb & pepper} 0.003199573 0.2307692 4.665768
[11] {chocolate,frozen vegetables,mineral water} => {shrimp} 0.003199573 0.3287671 4.600900
[12] {frozen vegetables,milk,mineral water} => {olive oil} 0.003332889 0.3012048 4.573557
[13] {pasta} => {shrimp} 0.005065991 0.3220339 4.506672
[14] {chocolate,herb & pepper} => {ground beef} 0.003999467 0.4411765 4.490183
[15] {chocolate,mineral water,shrimp} => {frozen vegetables} 0.003199573 0.4210526 4.417225
[16] {cake,frozen vegetables} => {tomatoes} 0.003066258 0.2987013 4.367560
[17] {milk,tomatoes} => {soup} 0.003066258 0.2190476 4.335293
[18] {eggs,ground beef} => {herb & pepper} 0.004132782 0.2066667 4.178455
[19] {milk,olive oil} => {soup} 0.003599520 0.2109375 4.174781
[20] {whole wheat pasta} => {olive oil} 0.007998933 0.2714932 4.122410
# set support to items that are purchased 4 times a day over the whole week 7*4/7500
# set confidence to default of 0.8
rules <- apriori(data = df, parameter = list(support = 0.004 , confidence = 0.2) )
Apriori
Parameter specification:
confidence minval smax arem aval originalSupport maxtime support minlen maxlen target ext
0.2 0.1 1 none FALSE TRUE 5 0.004 1 10 rules FALSE
Algorithmic control:
filter tree heap memopt load sort verbose
0.1 TRUE TRUE FALSE TRUE 2 TRUE
Absolute minimum support count: 30
set item appearances ...[0 item(s)] done [0.00s].
set transactions ...[119 item(s), 7501 transaction(s)] done [0.01s].
sorting and recoding items ... [114 item(s)] done [0.00s].
creating transaction tree ... done [0.01s].
checking subsets of size 1 2 3 4 done [0.01s].
writing ... [811 rule(s)] done [0.00s].
creating S4 object ... done [0.01s].
# show the top 20 rules sorted by lift
inspect(sort(rules, by = 'lift')[1:20])
lhs rhs support confidence lift
[1] {light cream} => {chicken} 0.004532729 0.2905983 4.843951
[2] {pasta} => {escalope} 0.005865885 0.3728814 4.700812
[3] {pasta} => {shrimp} 0.005065991 0.3220339 4.506672
[4] {eggs,ground beef} => {herb & pepper} 0.004132782 0.2066667 4.178455
[5] {whole wheat pasta} => {olive oil} 0.007998933 0.2714932 4.122410
[6] {herb & pepper,spaghetti} => {ground beef} 0.006399147 0.3934426 4.004360
[7] {herb & pepper,mineral water} => {ground beef} 0.006665778 0.3906250 3.975683
[8] {tomato sauce} => {ground beef} 0.005332622 0.3773585 3.840659
[9] {mushroom cream sauce} => {escalope} 0.005732569 0.3006993 3.790833
[10] {frozen vegetables,mineral water,spaghetti} => {ground beef} 0.004399413 0.3666667 3.731841
[11] {olive oil,tomatoes} => {spaghetti} 0.004399413 0.6111111 3.509912
[12] {frozen vegetables,spaghetti} => {tomatoes} 0.006665778 0.2392344 3.498046
[13] {mineral water,soup} => {olive oil} 0.005199307 0.2254335 3.423030
[14] {ground beef,milk} => {olive oil} 0.004932676 0.2242424 3.404944
[15] {eggs,herb & pepper} => {ground beef} 0.004132782 0.3297872 3.356491
[16] {spaghetti,tomatoes} => {frozen vegetables} 0.006665778 0.3184713 3.341054
[17] {herb & pepper} => {ground beef} 0.015997867 0.3234501 3.291994
[18] {grated cheese,spaghetti} => {ground beef} 0.005332622 0.3225806 3.283144
[19] {cooking oil,ground beef} => {spaghetti} 0.004799360 0.5714286 3.281995
[20] {frozen vegetables,olive oil} => {milk} 0.004799360 0.4235294 3.268410