Section 25 Hierarchical Clustering

df <-  read.csv("G:\\RStudio\\udemy\\ml\\Machine Learning AZ\\Part 5 - Association Rule Learning\\Section 28 - Apriori\\Apriori\\Market_Basket_Optimisation.csv", header = FALSE)
head(df)

Build the sparse matrix

# install.packages("arules")
library(arules)
df <- read.transactions("G:\\RStudio\\udemy\\ml\\Machine Learning AZ\\Part 5 - Association Rule Learning\\Section 28 - Apriori\\Apriori\\Market_Basket_Optimisation.csv", sep =',', rm.duplicates = TRUE)
distribution of transactions with duplicates:
1 
5 
summary(df)
transactions as itemMatrix in sparse format with
 7501 rows (elements/itemsets/transactions) and
 119 columns (items) and a density of 0.03288973 

most frequent items:
mineral water          eggs     spaghetti  french fries     chocolate       (Other) 
         1788          1348          1306          1282          1229         22405 

element (itemset/transaction) length distribution:
sizes
   1    2    3    4    5    6    7    8    9   10   11   12   13   14   15   16   18   19   20 
1754 1358 1044  816  667  493  391  324  259  139  102   67   40   22   17    4    1    2    1 

   Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
  1.000   2.000   3.000   3.914   5.000  20.000 

includes extended item information - examples:
             labels
1           almonds
2 antioxydant juice
3         asparagus
# frequency plot
itemFrequencyPlot(df, topN=10)

Training Apriori on the dataset

# set support to items that are purchased 3 times a day over the whole week 7*3/7500
# set confidence to default of 0.8  
rules <-  apriori(data = df, parameter = list(support = 0.003 , confidence = 0.2) )
Apriori

Parameter specification:
 confidence minval smax arem  aval originalSupport maxtime support minlen maxlen target   ext
        0.2    0.1    1 none FALSE            TRUE       5   0.003      1     10  rules FALSE

Algorithmic control:
 filter tree heap memopt load sort verbose
    0.1 TRUE TRUE  FALSE TRUE    2    TRUE

Absolute minimum support count: 22 

set item appearances ...[0 item(s)] done [0.00s].
set transactions ...[119 item(s), 7501 transaction(s)] done [0.01s].
sorting and recoding items ... [115 item(s)] done [0.00s].
creating transaction tree ... done [0.01s].
checking subsets of size 1 2 3 4 5 done [0.02s].
writing ... [1348 rule(s)] done [0.00s].
creating S4 object  ... done [0.01s].

Visualize the results

# show the top 20 rules sorted by lift
inspect(sort(rules, by = 'lift')[1:20])
     lhs                                            rhs                 support     confidence lift    
[1]  {mineral water,whole wheat pasta}           => {olive oil}         0.003866151 0.4027778  6.115863
[2]  {frozen vegetables,milk,mineral water}      => {soup}              0.003066258 0.2771084  5.484407
[3]  {fromage blanc}                             => {honey}             0.003332889 0.2450980  5.164271
[4]  {spaghetti,tomato sauce}                    => {ground beef}       0.003066258 0.4893617  4.980600
[5]  {light cream}                               => {chicken}           0.004532729 0.2905983  4.843951
[6]  {pasta}                                     => {escalope}          0.005865885 0.3728814  4.700812
[7]  {french fries,herb & pepper}                => {ground beef}       0.003199573 0.4615385  4.697422
[8]  {cereals,spaghetti}                         => {ground beef}       0.003066258 0.4600000  4.681764
[9]  {frozen vegetables,mineral water,soup}      => {milk}              0.003066258 0.6052632  4.670863
[10] {french fries,ground beef}                  => {herb & pepper}     0.003199573 0.2307692  4.665768
[11] {chocolate,frozen vegetables,mineral water} => {shrimp}            0.003199573 0.3287671  4.600900
[12] {frozen vegetables,milk,mineral water}      => {olive oil}         0.003332889 0.3012048  4.573557
[13] {pasta}                                     => {shrimp}            0.005065991 0.3220339  4.506672
[14] {chocolate,herb & pepper}                   => {ground beef}       0.003999467 0.4411765  4.490183
[15] {chocolate,mineral water,shrimp}            => {frozen vegetables} 0.003199573 0.4210526  4.417225
[16] {cake,frozen vegetables}                    => {tomatoes}          0.003066258 0.2987013  4.367560
[17] {milk,tomatoes}                             => {soup}              0.003066258 0.2190476  4.335293
[18] {eggs,ground beef}                          => {herb & pepper}     0.004132782 0.2066667  4.178455
[19] {milk,olive oil}                            => {soup}              0.003599520 0.2109375  4.174781
[20] {whole wheat pasta}                         => {olive oil}         0.007998933 0.2714932  4.122410
# set support to items that are purchased 4 times a day over the whole week 7*4/7500
# set confidence to default of 0.8  
rules <-  apriori(data = df, parameter = list(support = 0.004 , confidence = 0.2) )
Apriori

Parameter specification:
 confidence minval smax arem  aval originalSupport maxtime support minlen maxlen target   ext
        0.2    0.1    1 none FALSE            TRUE       5   0.004      1     10  rules FALSE

Algorithmic control:
 filter tree heap memopt load sort verbose
    0.1 TRUE TRUE  FALSE TRUE    2    TRUE

Absolute minimum support count: 30 

set item appearances ...[0 item(s)] done [0.00s].
set transactions ...[119 item(s), 7501 transaction(s)] done [0.01s].
sorting and recoding items ... [114 item(s)] done [0.00s].
creating transaction tree ... done [0.01s].
checking subsets of size 1 2 3 4 done [0.01s].
writing ... [811 rule(s)] done [0.00s].
creating S4 object  ... done [0.01s].
# show the top 20 rules sorted by lift
inspect(sort(rules, by = 'lift')[1:20])
     lhs                                            rhs                 support     confidence lift    
[1]  {light cream}                               => {chicken}           0.004532729 0.2905983  4.843951
[2]  {pasta}                                     => {escalope}          0.005865885 0.3728814  4.700812
[3]  {pasta}                                     => {shrimp}            0.005065991 0.3220339  4.506672
[4]  {eggs,ground beef}                          => {herb & pepper}     0.004132782 0.2066667  4.178455
[5]  {whole wheat pasta}                         => {olive oil}         0.007998933 0.2714932  4.122410
[6]  {herb & pepper,spaghetti}                   => {ground beef}       0.006399147 0.3934426  4.004360
[7]  {herb & pepper,mineral water}               => {ground beef}       0.006665778 0.3906250  3.975683
[8]  {tomato sauce}                              => {ground beef}       0.005332622 0.3773585  3.840659
[9]  {mushroom cream sauce}                      => {escalope}          0.005732569 0.3006993  3.790833
[10] {frozen vegetables,mineral water,spaghetti} => {ground beef}       0.004399413 0.3666667  3.731841
[11] {olive oil,tomatoes}                        => {spaghetti}         0.004399413 0.6111111  3.509912
[12] {frozen vegetables,spaghetti}               => {tomatoes}          0.006665778 0.2392344  3.498046
[13] {mineral water,soup}                        => {olive oil}         0.005199307 0.2254335  3.423030
[14] {ground beef,milk}                          => {olive oil}         0.004932676 0.2242424  3.404944
[15] {eggs,herb & pepper}                        => {ground beef}       0.004132782 0.3297872  3.356491
[16] {spaghetti,tomatoes}                        => {frozen vegetables} 0.006665778 0.3184713  3.341054
[17] {herb & pepper}                             => {ground beef}       0.015997867 0.3234501  3.291994
[18] {grated cheese,spaghetti}                   => {ground beef}       0.005332622 0.3225806  3.283144
[19] {cooking oil,ground beef}                   => {spaghetti}         0.004799360 0.5714286  3.281995
[20] {frozen vegetables,olive oil}               => {milk}              0.004799360 0.4235294  3.268410
LS0tDQp0aXRsZTogIk1MIFVzaW5nIFIgU2VjdGlvbiAyNCBBcHJpb3JpIEFzc29jaWF0aW9uIFJ1bGUgbGVhcm5pbmciDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQoNCiMgU2VjdGlvbiAyNSBIaWVyYXJjaGljYWwgQ2x1c3RlcmluZw0KDQpgYGB7cn0NCmRmIDwtICByZWFkLmNzdigiRzpcXFJTdHVkaW9cXHVkZW15XFxtbFxcTWFjaGluZSBMZWFybmluZyBBWlxcUGFydCA1IC0gQXNzb2NpYXRpb24gUnVsZSBMZWFybmluZ1xcU2VjdGlvbiAyOCAtIEFwcmlvcmlcXEFwcmlvcmlcXE1hcmtldF9CYXNrZXRfT3B0aW1pc2F0aW9uLmNzdiIsIGhlYWRlciA9IEZBTFNFKQ0KaGVhZChkZikNCmBgYA0KDQpCdWlsZCB0aGUgc3BhcnNlIG1hdHJpeA0KYGBge3J9DQojIGluc3RhbGwucGFja2FnZXMoImFydWxlcyIpDQpsaWJyYXJ5KGFydWxlcykNCmRmIDwtIHJlYWQudHJhbnNhY3Rpb25zKCJHOlxcUlN0dWRpb1xcdWRlbXlcXG1sXFxNYWNoaW5lIExlYXJuaW5nIEFaXFxQYXJ0IDUgLSBBc3NvY2lhdGlvbiBSdWxlIExlYXJuaW5nXFxTZWN0aW9uIDI4IC0gQXByaW9yaVxcQXByaW9yaVxcTWFya2V0X0Jhc2tldF9PcHRpbWlzYXRpb24uY3N2Iiwgc2VwID0nLCcsIHJtLmR1cGxpY2F0ZXMgPSBUUlVFKQ0Kc3VtbWFyeShkZikNCmBgYA0KDQpgYGB7cn0NCiMgZnJlcXVlbmN5IHBsb3QNCml0ZW1GcmVxdWVuY3lQbG90KGRmLCB0b3BOPTEwKQ0KYGBgDQojIFRyYWluaW5nIEFwcmlvcmkgb24gdGhlIGRhdGFzZXQNCg0KYGBge3J9DQojIHNldCBzdXBwb3J0IHRvIGl0ZW1zIHRoYXQgYXJlIHB1cmNoYXNlZCAzIHRpbWVzIGEgZGF5IG92ZXIgdGhlIHdob2xlIHdlZWsgNyozLzc1MDANCiMgc2V0IGNvbmZpZGVuY2UgdG8gZGVmYXVsdCBvZiAwLjggIA0KcnVsZXMgPC0gIGFwcmlvcmkoZGF0YSA9IGRmLCBwYXJhbWV0ZXIgPSBsaXN0KHN1cHBvcnQgPSAwLjAwMyAsIGNvbmZpZGVuY2UgPSAwLjIpICkNCmBgYA0KDQpWaXN1YWxpemUgdGhlIHJlc3VsdHMNCg0KYGBge3J9DQojIHNob3cgdGhlIHRvcCAyMCBydWxlcyBzb3J0ZWQgYnkgbGlmdA0KaW5zcGVjdChzb3J0KHJ1bGVzLCBieSA9ICdsaWZ0JylbMToyMF0pDQpgYGANCg0KYGBge3J9DQojIHNldCBzdXBwb3J0IHRvIGl0ZW1zIHRoYXQgYXJlIHB1cmNoYXNlZCA0IHRpbWVzIGEgZGF5IG92ZXIgdGhlIHdob2xlIHdlZWsgNyo0Lzc1MDANCiMgc2V0IGNvbmZpZGVuY2UgdG8gZGVmYXVsdCBvZiAwLjggIA0KcnVsZXMgPC0gIGFwcmlvcmkoZGF0YSA9IGRmLCBwYXJhbWV0ZXIgPSBsaXN0KHN1cHBvcnQgPSAwLjAwNCAsIGNvbmZpZGVuY2UgPSAwLjIpICkNCmBgYA0KDQpgYGB7cn0NCiMgc2hvdyB0aGUgdG9wIDIwIHJ1bGVzIHNvcnRlZCBieSBsaWZ0DQppbnNwZWN0KHNvcnQocnVsZXMsIGJ5ID0gJ2xpZnQnKVsxOjIwXSkNCmBgYA0KDQo=