library(tidyverse)
## ── Attaching packages ────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 3.2.0 ✔ purrr 0.3.2
## ✔ tibble 2.1.3 ✔ dplyr 0.8.3
## ✔ tidyr 0.8.3 ✔ stringr 1.4.0
## ✔ readr 1.3.1 ✔ forcats 0.4.0
## ── Conflicts ───────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(arules)
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
## The following object is masked from 'package:tidyr':
##
## expand
##
## Attaching package: 'arules'
## The following object is masked from 'package:dplyr':
##
## recode
## The following objects are masked from 'package:base':
##
## abbreviate, write
library(arulesViz)
## Loading required package: grid
## Registered S3 method overwritten by 'seriation':
## method from
## reorder.hclust gclus
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(treemap)
MBA on Instacart Data
aisles <- read.csv('/Users/srikant/Downloads/instacart_2017_05_01/aisles.csv')
departments = read.csv('/Users/srikant/Downloads/instacart_2017_05_01/departments.csv')
prior = read.csv('/Users/srikant/Downloads/instacart_2017_05_01/order_products__prior.csv')
train = read.csv('/Users/srikant/Downloads/instacart_2017_05_01/order_products__train.csv')
orders = read.csv('/Users/srikant/Downloads/instacart_2017_05_01/orders.csv')
products = read.csv('/Users/srikant/Downloads/instacart_2017_05_01/products.csv')
Which is the most bought product at Instacart
top_30 <- train %>%
left_join(products) %>%
group_by(product_name) %>%
summarise(count = n()) %>%
top_n(n=30, wt = count) %>%
mutate(percentage = count/sum(count)) %>%
arrange(desc(percentage))
## Joining, by = "product_id"
ggplot(top_30, aes(x=reorder(product_name,count), y = percentage)) +
geom_col() +
ggtitle('Top 30 products')+
ylab('Percentage')+
theme (
axis.text.x=element_text(angle=90, hjust=1, vjust=0.5),
axis.title.x = element_blank())
ggplot(orders, aes(x=days_since_prior_order)) +
geom_bar()
## Warning: Removed 206209 rows containing non-finite values (stat_count).
The most purchased products are Bananas. And the top 30 include Vegetables and Fruits.
head(orders)
tmp <-train %>%
group_by(product_id) %>%
left_join(products,by="product_id")
write.csv(tmp, file = "transactions.csv")
transactions<-read.transactions("transactions.csv", format = "single", sep = ",",cols = c(2,6))
summary(transactions)
## transactions as itemMatrix in sparse format with
## 131210 rows (elements/itemsets/transactions) and
## 39124 columns (items) and a density of 0.0002697241
##
## most frequent items:
## Banana Bag of Organic Bananas Organic Strawberries
## 18726 15480 10894
## Organic Baby Spinach Large Lemon (Other)
## 9784 8135 1321599
##
## element (itemset/transaction) length distribution:
## sizes
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
## 6846 7368 8033 8218 8895 8708 8541 7983 7217 6553 6034 5383 4843 4394 3831
## 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
## 3522 3108 2719 2473 2102 1857 1681 1462 1292 1079 986 860 679 634 553
## 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45
## 446 403 346 315 280 210 193 178 142 99 90 88 75 79 64
## 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
## 48 49 32 26 31 24 23 18 15 12 10 6 5 4 8
## 61 62 63 64 65 66 67 68 70 72 74 75 76 77 80
## 3 3 5 4 3 2 1 2 4 2 2 1 2 1 2
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.00 5.00 9.00 10.55 14.00 80.00
##
## includes extended item information - examples:
## labels
## 1 \\"Constant Comment\\" Black Tea
## 2 \\"Constant Comment\\" Decaffeinated Black Tea Blend
## 3 \\"Darn Good\\" Chili Mix
##
## includes extended transaction information - examples:
## transactionID
## 1 1
## 2 100000
## 3 1000008
inspect(head(transactions))
## items transactionID
## [1] {Bag of Organic Bananas,
## Bulgarian Yogurt,
## Cucumber Kirby,
## Lightly Smoked Sardines in Olive Oil,
## Organic 4% Milk Fat Whole Milk Cottage Cheese,
## Organic Celery Hearts,
## Organic Hass Avocado,
## Organic Whole String Cheese} 1
## [2] {Corn Tortillas,
## Extra Virgin Olive Oil,
## Gala Apples,
## Garnet Sweet Potato (Yam),
## Ground Cumin,
## I Heart Baby Kale,
## No Salt Added Black Beans,
## Organic Baby Carrots,
## Organic Baby Spinach,
## Organic Yellow Onion,
## Original Hummus,
## Snack Sticks Chicken & Rice Recipe Dog Treats,
## Total 2% All Natural Plain Greek Yogurt,
## Unscented Long Lasting Stick Deodorant,
## Wheat Sandwich Thins} 100000
## [3] {Daily Moisture Shampoo,
## DeTox Caffeine Free Organic Herbal Tea Bags,
## Ensure Plus Milk Chocolate Nutrition Shake,
## G Series Perform Frost Glacier Cherry Sports Drink,
## Original No Pulp 100% Florida Orange Juice,
## Triple Chocolate Ripple,
## ZzzQuil Liquid Warming Berry Flavor Sleep-Aid} 1000008
## [4] {Almond Chia Granola Clusters,
## Boneless Skinless Chicken Breasts,
## Broccoli Crown,
## Fresh Cauliflower,
## Orange Bell Pepper,
## Organic Gala Apples,
## Organic Red Onion,
## Veggie Chips} 1000029
## [5] {Cereal,
## Organic Salted Butter} 100003
## [6] {2% Reduced Fat DHA Omega-3 Reduced Fat Milk,
## All-Natural Autumn Apple Fruit Strip,
## All-Natural Harvest Grape Fruit Strip,
## All-Natural Orchard Cherry Fruit Strip,
## Apples, Pumpkin & Carrots Organic Baby Food,
## Baby Food Pears Squash,
## Bag of Organic Bananas,
## Bananas, Raspberries & Oats Organic Baby Food,
## Brussels Sprouts,
## Cucumber Kirby,
## Diced Tomatoes,
## Electrolyte Enhanced Water,
## Ground Turkey Breast,
## Jalapeno Peppers,
## Large Grapefruit,
## Organic Avocado,
## Organic Chicken Broth,
## Organic Chicken Stock,
## Organic Chocolate Chip Chewy Granola Bars,
## Organic Grade A Large Brown Eggs,
## Organic Red Bell Pepper,
## Organic Spring Mix,
## Organic Sweet Mini Peppers,
## Organic Whole Kernel Sweet Corn No Salt Added,
## Organic Yellow Onion,
## Parsley, Italian (Flat), New England Grown,
## Ripened Raspberry Fruit Leather,
## Saffron Threads,
## Shredded Mild Cheddar Cheese,
## Stage 2 Pear Pumpkin Baby Food,
## Summer Strawberry Fruit Strip,
## Total 2% with Strawberry Lowfat Greek Strained Yogurt} 1000046
frequentItems <- eclat (transactions, parameter = list(supp = 0.007, maxlen = 15))
## Eclat
##
## parameter specification:
## tidLists support minlen maxlen target ext
## FALSE 0.007 1 15 frequent itemsets FALSE
##
## algorithmic control:
## sparse sort verbose
## 7 -2 TRUE
##
## Absolute minimum support count: 918
##
## create itemset ...
## set transactions ...[39124 item(s), 131210 transaction(s)] done [0.48s].
## sorting and recoding items ... [163 item(s)] done [0.01s].
## creating sparse bit matrix ... [163 row(s), 131210 column(s)] done [0.01s].
## writing ... [211 set(s)] done [0.19s].
## Creating S4 object ... done [0.00s].
inspect(frequentItems)
## items support count
## [1] {Bag of Organic Bananas,
## Organic Large Extra Fancy Fuji Apple} 0.007415593 973
## [2] {Banana,
## Broccoli Crown} 0.007049768 925
## [3] {Banana,
## Organic Fuji Apple} 0.009221858 1210
## [4] {Banana,
## Honeycrisp Apple} 0.009381907 1231
## [5] {Bag of Organic Bananas,
## Organic Lemon} 0.008132002 1067
## [6] {Banana,
## Yellow Onions} 0.008162488 1071
## [7] {Limes,
## Organic Cilantro} 0.007674720 1007
## [8] {Banana,
## Seedless Red Grapes} 0.008856032 1162
## [9] {Banana,
## Organic Whole Milk} 0.007956711 1044
## [10] {Bag of Organic Bananas,
## Organic Whole Milk} 0.008520692 1118
## [11] {Organic Strawberries,
## Organic Whole Milk} 0.007270787 954
## [12] {Bag of Organic Bananas,
## Organic Yellow Onion} 0.007621370 1000
## [13] {Banana,
## Organic Blueberries} 0.007049768 925
## [14] {Bag of Organic Bananas,
## Organic Blueberries} 0.008665498 1137
## [15] {Organic Blueberries,
## Organic Strawberries} 0.009671519 1269
## [16] {Banana,
## Organic Zucchini} 0.007156467 939
## [17] {Bag of Organic Bananas,
## Organic Zucchini} 0.007933847 1041
## [18] {Organic Baby Spinach,
## Organic Zucchini} 0.007240302 950
## [19] {Bag of Organic Bananas,
## Organic Cucumber} 0.009663898 1268
## [20] {Organic Cucumber,
## Organic Strawberries} 0.007865254 1032
## [21] {Organic Baby Spinach,
## Organic Cucumber} 0.007110739 933
## [22] {Banana,
## Strawberries} 0.014846429 1948
## [23] {Bag of Organic Bananas,
## Organic Raspberries} 0.013566039 1780
## [24] {Organic Raspberries,
## Organic Strawberries} 0.012727688 1670
## [25] {Organic Hass Avocado,
## Organic Raspberries} 0.007766176 1019
## [26] {Banana,
## Limes} 0.010144044 1331
## [27] {Bag of Organic Bananas,
## Limes} 0.007103117 932
## [28] {Limes,
## Organic Baby Spinach} 0.007156467 939
## [29] {Large Lemon,
## Limes} 0.012156086 1595
## [30] {Limes,
## Organic Avocado} 0.007903361 1037
## [31] {Banana,
## Organic Avocado} 0.016888957 2216
## [32] {Bag of Organic Bananas,
## Organic Avocado} 0.008154866 1070
## [33] {Organic Avocado,
## Organic Strawberries} 0.008292051 1088
## [34] {Organic Avocado,
## Organic Baby Spinach} 0.010685161 1402
## [35] {Large Lemon,
## Organic Avocado} 0.010281229 1349
## [36] {Banana,
## Organic Hass Avocado} 0.007217438 947
## [37] {Bag of Organic Bananas,
## Organic Hass Avocado} 0.018443716 2420
## [38] {Organic Hass Avocado,
## Organic Strawberries} 0.011729289 1539
## [39] {Organic Baby Spinach,
## Organic Hass Avocado} 0.009541956 1252
## [40] {Banana,
## Large Lemon} 0.016446917 2158
## [41] {Bag of Organic Bananas,
## Large Lemon} 0.008055788 1057
## [42] {Large Lemon,
## Organic Strawberries} 0.007750934 1017
## [43] {Large Lemon,
## Organic Baby Spinach} 0.009435256 1238
## [44] {Banana,
## Organic Baby Spinach} 0.015242741 2000
## [45] {Bag of Organic Bananas,
## Organic Baby Spinach} 0.017041384 2236
## [46] {Organic Baby Spinach,
## Organic Strawberries} 0.012491426 1639
## [47] {Banana,
## Organic Strawberries} 0.016568859 2174
## [48] {Bag of Organic Bananas,
## Organic Strawberries} 0.023428092 3074
## [49] {Banana} 0.142717781 18726
## [50] {Bag of Organic Bananas} 0.117978813 15480
## [51] {Organic Strawberries} 0.083027208 10894
## [52] {Organic Baby Spinach} 0.074567487 9784
## [53] {Large Lemon} 0.061999848 8135
## [54] {Organic Hass Avocado} 0.055582654 7293
## [55] {Organic Avocado} 0.056466733 7409
## [56] {Limes} 0.045979727 6033
## [57] {Organic Raspberries} 0.042268120 5546
## [58] {Strawberries} 0.049493179 6494
## [59] {Organic Cucumber} 0.035157381 4613
## [60] {Organic Zucchini} 0.034974468 4589
## [61] {Organic Blueberries} 0.037847725 4966
## [62] {Organic Yellow Onion} 0.032695679 4290
## [63] {Organic Whole Milk} 0.037405686 4908
## [64] {Organic Garlic} 0.031689658 4158
## [65] {Organic Red Onion} 0.029098392 3818
## [66] {Organic Grape Tomatoes} 0.029136499 3823
## [67] {Seedless Red Grapes} 0.030935142 4059
## [68] {Asparagus} 0.029479460 3868
## [69] {Organic Cilantro} 0.026872952 3526
## [70] {Yellow Onions} 0.028671595 3762
## [71] {Organic Lemon} 0.026712903 3505
## [72] {Organic Baby Carrots} 0.027414069 3597
## [73] {Honeycrisp Apple} 0.027063486 3551
## [74] {Organic Fuji Apple} 0.024822803 3257
## [75] {Small Hass Avocado} 0.023649112 3103
## [76] {Red Peppers} 0.022140081 2905
## [77] {Sparkling Water Grapefruit} 0.025600183 3359
## [78] {Broccoli Crown} 0.022345858 2932
## [79] {Organic Large Extra Fancy Fuji Apple} 0.022033382 2891
## [80] {Organic Baby Arugula} 0.022277265 2923
## [81] {Fresh Cauliflower} 0.020204253 2651
## [82] {Original Hummus} 0.021781876 2858
## [83] {Organic Gala Apples} 0.021408429 2809
## [84] {Organic Small Bunch Celery} 0.019990854 2623
## [85] {Organic Blackberries} 0.021667556 2843
## [86] {Organic Garnet Sweet Potato (Yam)} 0.019571679 2568
## [87] {Green Bell Pepper} 0.019213475 2521
## [88] {Raspberries} 0.024990473 3279
## [89] {Organic Tomato Cluster} 0.019343038 2538
## [90] {Organic Red Bell Pepper} 0.018237939 2393
## [91] {Michigan Organic Kale} 0.020021340 2627
## [92] {Carrots} 0.019030562 2497
## [93] {Organic Peeled Whole Baby Carrots} 0.018748571 2460
## [94] {Organic Italian Parsley Bunch} 0.018291289 2400
## [95] {Organic Granny Smith Apple} 0.017811142 2337
## [96] {Organic Half & Half} 0.020166146 2646
## [97] {100% Whole Wheat Bread} 0.017513909 2298
## [98] {Cucumber Kirby} 0.018390367 2413
## [99] {Apple Honeycrisp Organic} 0.017163326 2252
## [100] {Blueberries} 0.017704443 2323
## [101] {Organic Whole String Cheese} 0.015189391 1993
## [102] {Red Vine Tomato} 0.017026141 2234
## [103] {Half & Half} 0.018474202 2424
## [104] {Organic Banana} 0.017773036 2332
## [105] {Organic D'Anjou Pears} 0.014427254 1893
## [106] {Organic Kiwi} 0.014328176 1880
## [107] {Organic Ginger Root} 0.015296090 2007
## [108] {Orange Bell Pepper} 0.014206234 1864
## [109] {Organic Navel Orange} 0.015090313 1980
## [110] {Unsweetened Almondmilk} 0.016530752 2169
## [111] {Extra Virgin Olive Oil} 0.015760994 2068
## [112] {Jalapeno Peppers} 0.014472982 1899
## [113] {Bunched Cilantro} 0.014633031 1920
## [114] {Roma Tomato} 0.014884536 1953
## [115] {Garlic} 0.014633031 1920
## [116] {Organic Unsweetened Almond Milk} 0.016584102 2176
## [117] {Uncured Genoa Salami} 0.013627010 1788
## [118] {Organic Carrot Bunch} 0.013276427 1742
## [119] {Boneless Skinless Chicken Breasts} 0.015913421 2088
## [120] {Organic Lacinato (Dinosaur) Kale} 0.013421233 1761
## [121] {Grated Parmesan} 0.012910601 1694
## [122] {Large Alfresco Eggs} 0.014465361 1898
## [123] {Lime Sparkling Water} 0.014983614 1966
## [124] {Organic Black Beans} 0.012011280 1576
## [125] {Organic Grade A Free Range Large Brown Eggs} 0.013466961 1767
## [126] {Organic Broccoli} 0.012598125 1653
## [127] {Hass Avocado} 0.012445698 1633
## [128] {Hass Avocados} 0.017346239 2276
## [129] {Grape White/Green Seedless} 0.011637832 1527
## [130] {Green Onions} 0.011012880 1445
## [131] {Red Raspberries} 0.011378706 1493
## [132] {Feta Cheese Crumbles} 0.011043366 1449
## [133] {Sparkling Natural Mineral Water} 0.013741331 1803
## [134] {Organic Sticks Low Moisture Part Skim Mozzarella String Cheese} 0.010227879 1342
## [135] {Spring Water} 0.016957549 2225
## [136] {Red Onion} 0.011188172 1468
## [137] {Clementines, Bag} 0.011264385 1478
## [138] {Russet Potato} 0.010807103 1418
## [139] {Unsalted Butter} 0.010936666 1435
## [140] {Shredded Parmesan} 0.010090694 1324
## [141] {Organic Baby Broccoli} 0.010296471 1351
## [142] {Cantaloupe} 0.009259965 1215
## [143] {Organic Reduced Fat 2% Milk} 0.011698803 1535
## [144] {Honey Nut Cheerios} 0.009282829 1218
## [145] {Organic Whole Strawberries} 0.009244722 1213
## [146] {Sharp Cheddar Cheese} 0.009618169 1262
## [147] {2% Reduced Fat Milk} 0.011736910 1540
## [148] {Unsweetened Original Almond Breeze Almond Milk} 0.011165308 1465
## [149] {Organic Broccoli Florets} 0.010372685 1361
## [150] {Pure Irish Butter} 0.011249143 1476
## [151] {Sparkling Lemon Water} 0.010845210 1423
## [152] {Fresh Ginger Root} 0.009785839 1284
## [153] {Organic Basil} 0.009313315 1222
## [154] {No Salt Added Black Beans} 0.009526713 1250
## [155] {Yellow Bell Pepper} 0.008886518 1166
## [156] {Organic Roasted Turkey Breast} 0.008558799 1123
## [157] {Granny Smith Apples} 0.010509870 1379
## [158] {Organic Extra Firm Tofu} 0.009038945 1186
## [159] {Organic Garbanzo Beans} 0.008695984 1141
## [160] {Organic Russet Potato} 0.008947489 1174
## [161] {Bartlett Pears} 0.009198994 1207
## [162] {Green Beans} 0.008840790 1160
## [163] {Organic Roma Tomato} 0.009305693 1221
## [164] {Organic Romaine Lettuce} 0.008795061 1154
## [165] {Organic Romaine} 0.008779819 1152
## [166] {100% Raw Coconut Water} 0.009892539 1298
## [167] {Whipped Cream Cheese} 0.008215837 1078
## [168] {Organic Spring Mix} 0.010380306 1362
## [169] {Frozen Organic Wild Blueberries} 0.008116759 1065
## [170] {Organic Red Radish, Bunch} 0.007933847 1041
## [171] {Organic Reduced Fat Milk} 0.009191373 1206
## [172] {Organic Celery Hearts} 0.008093895 1062
## [173] {Baby Spinach} 0.008741712 1147
## [174] {Unsweetened Vanilla Almond Milk} 0.009511470 1248
## [175] {Vanilla Almond Breeze Almond Milk} 0.009732490 1277
## [176] {Lightly Salted Baked Snap Pea Crisps} 0.007552778 991
## [177] {100% Recycled Paper Towels} 0.009016081 1183
## [178] {Shredded Mild Cheddar Cheese} 0.007895740 1036
## [179] {Organic Sour Cream} 0.007979575 1047
## [180] {Organic Bell Pepper} 0.007186952 943
## [181] {Organic Large Brown Grade AA Cage Free Eggs} 0.008665498 1137
## [182] {Cherubs Heavenly Salad Tomatoes} 0.007804283 1024
## [183] {Organic Coconut Milk} 0.007964332 1045
## [184] {Organic Bartlett Pear} 0.007468943 980
## [185] {Whole Milk} 0.010174529 1335
## [186] {Pineapple Chunks} 0.008208216 1077
## [187] {Brussels Sprouts} 0.008756955 1149
## [188] {Sour Cream} 0.007789040 1022
## [189] {Total 2% with Strawberry Lowfat Greek Strained Yogurt} 0.007415593 973
## [190] {Organic Hothouse Cucumbers} 0.007057389 926
## [191] {Fat Free Milk} 0.009298072 1220
## [192] {Ground Turkey Breast} 0.007301273 958
## [193] {Organic Large Grade AA Brown Eggs} 0.008017682 1052
## [194] {Shredded Mozzarella} 0.007194574 944
## [195] {Italian Sparkling Mineral Water} 0.008627391 1132
## [196] {Pure Sparkling Water} 0.008574042 1125
## [197] {Original Orange Juice} 0.008032924 1054
## [198] {Roasted Turkey Breast} 0.007072632 928
## [199] {Total 0% Greek Yogurt} 0.007971953 1046
## [200] {Organic Bread with 21 Whole Grains} 0.007148845 938
## [201] {Total Greek Strained Yogurt} 0.007247923 951
## [202] {Boneless Skinless Chicken Breast} 0.007186952 943
## [203] {Creamy Peanut Butter} 0.007552778 991
## [204] {Total 0% Nonfat Greek Yogurt} 0.007568021 993
## [205] {Gala Apples} 0.007308894 959
## [206] {Romaine Hearts} 0.007529914 988
## [207] {Organic 2% Reduced Fat Milk} 0.007095496 931
## [208] {Blackberries} 0.008208216 1077
## [209] {Soda} 0.011485405 1507
## [210] {Clementines} 0.010296471 1351
## [211] {Natural Spring Water} 0.007240302 950
rules <- apriori (transactions, parameter = list(supp = 0.001, conf = 0.5))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.5 0.1 1 none FALSE TRUE 5 0.001 1
## maxlen target ext
## 10 rules FALSE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 131
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[39124 item(s), 131210 transaction(s)] done [0.47s].
## sorting and recoding items ... [1865 item(s)] done [0.02s].
## creating transaction tree ... done [0.06s].
## checking subsets of size 1 2 3 4 done [0.05s].
## writing ... [11 rule(s)] done [0.01s].
## creating S4 object ... done [0.05s].
rules_conf <- sort (rules, by="confidence", decreasing=TRUE)
inspect(head(rules_conf))
## lhs rhs support confidence lift count
## [1] {Organic Hass Avocado,
## Organic Raspberries,
## Organic Strawberries} => {Bag of Organic Bananas} 0.001737672 0.5984252 5.072311 228
## [2] {Organic Cucumber,
## Organic Hass Avocado,
## Organic Strawberries} => {Bag of Organic Bananas} 0.001066992 0.5468750 4.635366 140
## [3] {Organic Hass Avocado,
## Organic Kiwi} => {Bag of Organic Bananas} 0.001448060 0.5459770 4.627755 190
## [4] {Organic Navel Orange,
## Organic Raspberries} => {Bag of Organic Bananas} 0.001150827 0.5412186 4.587422 151
## [5] {Strawberries,
## Yellow Onions} => {Banana} 0.001143206 0.5357143 3.753662 150
## [6] {Organic Hass Avocado,
## Organic Whole String Cheese} => {Bag of Organic Bananas} 0.001158448 0.5314685 4.504779 152
plot(rules, method = "graph", engine = "htmlwidget")
tmp <-train %>%
left_join(products,by="product_id") %>%
group_by(department_id) %>%
summarise(count = n()) %>%
left_join(departments, by='department_id') %>%
mutate(percentage = count/sum(count))
ggplot(tmp, aes(x=reorder(department,count),y=count))+
geom_col()+
ggtitle('Top Departments')+
ylab('Percentage')+
theme (
axis.text.x=element_text(angle=90, hjust=1, vjust=0.5),
axis.title.x = element_blank())
#
# left_join(products,by="department_id") %>%
# group_by("department_id")
tmp <- products %>% group_by(department_id, aisle_id) %>% summarize(n=n())
tmp <- tmp %>% left_join(departments,by="department_id")
tmp <- tmp %>% left_join(aisles,by="aisle_id")
tmp2<-train %>%
group_by(product_id) %>%
summarize(count=n()) %>%
left_join(products,by="product_id") %>%
ungroup() %>%
group_by(department_id,aisle_id) %>%
summarize(sumcount = sum(count)) %>%
left_join(tmp, by = c("department_id", "aisle_id")) %>%
mutate(onesize = 1)
treemap(tmp2,index=c("department","aisle"),vSize="onesize",vColor="department",palette="Set3",title="",sortID="-sumcount", border.col="#FFFFFF",type="categorical", fontsize.legend = 0,bg.labels = "#FFFFFF")
# treemap(tmp2,index=c("department","aisle"),vSize="sumcount",title="",palette="Set3",border.col="#FFFFFF")
treemap(tmp2,index=c("department","aisle"),vSize="sumcount",title="",palette="Set3",border.col="#FFFFFF")
Apriori without Bananas
tmp <-train %>%
filter(!product_id %in% c(39276,13176,24852)) %>%
group_by(product_id) %>%
left_join(products,by="product_id")
write.csv(tmp, file = "transactions.csv")
transactions<-read.transactions("transactions.csv", format = "single", sep = ",",cols = c(2,6))
summary(transactions)
## transactions as itemMatrix in sparse format with
## 131023 rows (elements/itemsets/transactions) and
## 39121 columns (items) and a density of 0.0002633862
##
## most frequent items:
## Organic Strawberries Organic Baby Spinach Large Lemon
## 10894 9784 8135
## Organic Avocado Organic Hass Avocado (Other)
## 7409 7293 1306537
##
## element (itemset/transaction) length distribution:
## sizes
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
## 7098 7632 8298 8593 9073 9040 8531 7980 7256 6529 5944 5298 4730 4333 3720
## 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
## 3424 2996 2639 2343 2008 1762 1622 1415 1189 988 965 783 632 616 484
## 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45
## 421 389 324 291 234 184 206 158 112 95 88 85 79 58 58
## 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60
## 45 48 22 31 27 26 17 20 9 9 9 7 5 4 6
## 61 62 63 64 65 66 67 68 69 70 72 73 75 76 77
## 3 5 4 4 1 2 1 1 3 1 2 2 2 1 1
## 79 80
## 1 1
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.0 5.0 8.0 10.3 14.0 80.0
##
## includes extended item information - examples:
## labels
## 1 \\"Constant Comment\\" Black Tea
## 2 \\"Constant Comment\\" Decaffeinated Black Tea Blend
## 3 \\"Darn Good\\" Chili Mix
##
## includes extended transaction information - examples:
## transactionID
## 1 1
## 2 100000
## 3 1000008
inspect(head(transactions))
## items transactionID
## [1] {Bulgarian Yogurt,
## Cucumber Kirby,
## Lightly Smoked Sardines in Olive Oil,
## Organic 4% Milk Fat Whole Milk Cottage Cheese,
## Organic Celery Hearts,
## Organic Hass Avocado,
## Organic Whole String Cheese} 1
## [2] {Corn Tortillas,
## Extra Virgin Olive Oil,
## Gala Apples,
## Garnet Sweet Potato (Yam),
## Ground Cumin,
## I Heart Baby Kale,
## No Salt Added Black Beans,
## Organic Baby Carrots,
## Organic Baby Spinach,
## Organic Yellow Onion,
## Original Hummus,
## Snack Sticks Chicken & Rice Recipe Dog Treats,
## Total 2% All Natural Plain Greek Yogurt,
## Unscented Long Lasting Stick Deodorant,
## Wheat Sandwich Thins} 100000
## [3] {Daily Moisture Shampoo,
## DeTox Caffeine Free Organic Herbal Tea Bags,
## Ensure Plus Milk Chocolate Nutrition Shake,
## G Series Perform Frost Glacier Cherry Sports Drink,
## Original No Pulp 100% Florida Orange Juice,
## Triple Chocolate Ripple,
## ZzzQuil Liquid Warming Berry Flavor Sleep-Aid} 1000008
## [4] {Almond Chia Granola Clusters,
## Boneless Skinless Chicken Breasts,
## Broccoli Crown,
## Fresh Cauliflower,
## Orange Bell Pepper,
## Organic Gala Apples,
## Organic Red Onion,
## Veggie Chips} 1000029
## [5] {Cereal,
## Organic Salted Butter} 100003
## [6] {2% Reduced Fat DHA Omega-3 Reduced Fat Milk,
## All-Natural Autumn Apple Fruit Strip,
## All-Natural Harvest Grape Fruit Strip,
## All-Natural Orchard Cherry Fruit Strip,
## Apples, Pumpkin & Carrots Organic Baby Food,
## Baby Food Pears Squash,
## Bananas, Raspberries & Oats Organic Baby Food,
## Brussels Sprouts,
## Cucumber Kirby,
## Diced Tomatoes,
## Electrolyte Enhanced Water,
## Ground Turkey Breast,
## Jalapeno Peppers,
## Large Grapefruit,
## Organic Avocado,
## Organic Chicken Broth,
## Organic Chicken Stock,
## Organic Chocolate Chip Chewy Granola Bars,
## Organic Grade A Large Brown Eggs,
## Organic Red Bell Pepper,
## Organic Spring Mix,
## Organic Sweet Mini Peppers,
## Organic Whole Kernel Sweet Corn No Salt Added,
## Organic Yellow Onion,
## Parsley, Italian (Flat), New England Grown,
## Ripened Raspberry Fruit Leather,
## Saffron Threads,
## Shredded Mild Cheddar Cheese,
## Stage 2 Pear Pumpkin Baby Food,
## Summer Strawberry Fruit Strip,
## Total 2% with Strawberry Lowfat Greek Strained Yogurt} 1000046
frequentItems <- eclat (transactions, parameter = list(supp = 0.007, maxlen = 15))
## Eclat
##
## parameter specification:
## tidLists support minlen maxlen target ext
## FALSE 0.007 1 15 frequent itemsets FALSE
##
## algorithmic control:
## sparse sort verbose
## 7 -2 TRUE
##
## Absolute minimum support count: 917
##
## create itemset ...
## set transactions ...[39121 item(s), 131023 transaction(s)] done [0.46s].
## sorting and recoding items ... [161 item(s)] done [0.01s].
## creating sparse bit matrix ... [161 row(s), 131023 column(s)] done [0.01s].
## writing ... [180 set(s)] done [0.18s].
## Creating S4 object ... done [0.00s].
inspect(frequentItems)
## items support count
## [1] {Limes,
## Organic Cilantro} 0.007685674 1007
## [2] {Organic Strawberries,
## Organic Whole Milk} 0.007281164 954
## [3] {Organic Blueberries,
## Organic Strawberries} 0.009685322 1269
## [4] {Organic Baby Spinach,
## Organic Zucchini} 0.007250635 950
## [5] {Organic Cucumber,
## Organic Strawberries} 0.007876480 1032
## [6] {Organic Baby Spinach,
## Organic Cucumber} 0.007120887 933
## [7] {Organic Raspberries,
## Organic Strawberries} 0.012745854 1670
## [8] {Organic Hass Avocado,
## Organic Raspberries} 0.007777260 1019
## [9] {Limes,
## Organic Baby Spinach} 0.007166681 939
## [10] {Large Lemon,
## Limes} 0.012173435 1595
## [11] {Limes,
## Organic Avocado} 0.007914641 1037
## [12] {Organic Avocado,
## Organic Strawberries} 0.008303886 1088
## [13] {Organic Avocado,
## Organic Baby Spinach} 0.010700411 1402
## [14] {Large Lemon,
## Organic Avocado} 0.010295902 1349
## [15] {Organic Hass Avocado,
## Organic Strawberries} 0.011746029 1539
## [16] {Organic Baby Spinach,
## Organic Hass Avocado} 0.009555574 1252
## [17] {Large Lemon,
## Organic Strawberries} 0.007761996 1017
## [18] {Large Lemon,
## Organic Baby Spinach} 0.009448723 1238
## [19] {Organic Baby Spinach,
## Organic Strawberries} 0.012509254 1639
## [20] {Organic Strawberries} 0.083145707 10894
## [21] {Organic Baby Spinach} 0.074673912 9784
## [22] {Large Lemon} 0.062088336 8135
## [23] {Organic Hass Avocado} 0.055661983 7293
## [24] {Organic Avocado} 0.056547324 7409
## [25] {Limes} 0.046045351 6033
## [26] {Organic Raspberries} 0.042328446 5546
## [27] {Strawberries} 0.049563817 6494
## [28] {Organic Cucumber} 0.035207559 4613
## [29] {Organic Zucchini} 0.035024385 4589
## [30] {Organic Blueberries} 0.037901742 4966
## [31] {Organic Yellow Onion} 0.032742343 4290
## [32] {Organic Whole Milk} 0.037459072 4908
## [33] {Organic Garlic} 0.031734886 4158
## [34] {Organic Red Onion} 0.029139922 3818
## [35] {Organic Grape Tomatoes} 0.029178083 3823
## [36] {Seedless Red Grapes} 0.030979294 4059
## [37] {Asparagus} 0.029521534 3868
## [38] {Organic Cilantro} 0.026911306 3526
## [39] {Yellow Onions} 0.028712516 3762
## [40] {Organic Lemon} 0.026751028 3505
## [41] {Organic Baby Carrots} 0.027453195 3597
## [42] {Honeycrisp Apple} 0.027102112 3551
## [43] {Red Peppers} 0.022171680 2905
## [44] {Organic Fuji Apple} 0.024858231 3257
## [45] {Sparkling Water Grapefruit} 0.025636720 3359
## [46] {Small Hass Avocado} 0.023682865 3103
## [47] {Broccoli Crown} 0.022377750 2932
## [48] {Organic Large Extra Fancy Fuji Apple} 0.022064828 2891
## [49] {Organic Baby Arugula} 0.022309060 2923
## [50] {Fresh Cauliflower} 0.020233089 2651
## [51] {Original Hummus} 0.021812964 2858
## [52] {Organic Small Bunch Celery} 0.020019386 2623
## [53] {Organic Gala Apples} 0.021438984 2809
## [54] {Organic Blackberries} 0.021698480 2843
## [55] {Organic Garnet Sweet Potato (Yam)} 0.019599612 2568
## [56] {Green Bell Pepper} 0.019240897 2521
## [57] {Organic Tomato Cluster} 0.019370645 2538
## [58] {Raspberries} 0.025026140 3279
## [59] {Organic Red Bell Pepper} 0.018263969 2393
## [60] {Michigan Organic Kale} 0.020049915 2627
## [61] {Carrots} 0.019057723 2497
## [62] {Organic Peeled Whole Baby Carrots} 0.018775330 2460
## [63] {Organic Italian Parsley Bunch} 0.018317395 2400
## [64] {Organic Granny Smith Apple} 0.017836563 2337
## [65] {Organic Half & Half} 0.020194928 2646
## [66] {100% Whole Wheat Bread} 0.017538905 2298
## [67] {Cucumber Kirby} 0.018416614 2413
## [68] {Blueberries} 0.017729712 2323
## [69] {Apple Honeycrisp Organic} 0.017187822 2252
## [70] {Organic Banana} 0.017798402 2332
## [71] {Organic Whole String Cheese} 0.015211070 1993
## [72] {Red Vine Tomato} 0.017050442 2234
## [73] {Half & Half} 0.018500569 2424
## [74] {Organic D'Anjou Pears} 0.014447845 1893
## [75] {Organic Kiwi} 0.014348626 1880
## [76] {Organic Ginger Root} 0.015317921 2007
## [77] {Orange Bell Pepper} 0.014226510 1864
## [78] {Organic Navel Orange} 0.015111851 1980
## [79] {Unsweetened Almondmilk} 0.016554345 2169
## [80] {Extra Virgin Olive Oil} 0.015783488 2068
## [81] {Jalapeno Peppers} 0.014493639 1899
## [82] {Bunched Cilantro} 0.014653916 1920
## [83] {Roma Tomato} 0.014905780 1953
## [84] {Garlic} 0.014653916 1920
## [85] {Uncured Genoa Salami} 0.013646459 1788
## [86] {Organic Carrot Bunch} 0.013295376 1742
## [87] {Organic Unsweetened Almond Milk} 0.016607771 2176
## [88] {Boneless Skinless Chicken Breasts} 0.015936133 2088
## [89] {Organic Lacinato (Dinosaur) Kale} 0.013440388 1761
## [90] {Grated Parmesan} 0.012929028 1694
## [91] {Large Alfresco Eggs} 0.014486006 1898
## [92] {Lime Sparkling Water} 0.015004999 1966
## [93] {Organic Black Beans} 0.012028422 1576
## [94] {Organic Grade A Free Range Large Brown Eggs} 0.013486182 1767
## [95] {Organic Broccoli} 0.012616106 1653
## [96] {Hass Avocado} 0.012463461 1633
## [97] {Hass Avocados} 0.017370996 2276
## [98] {Green Onions} 0.011028598 1445
## [99] {Grape White/Green Seedless} 0.011654442 1527
## [100] {Feta Cheese Crumbles} 0.011059127 1449
## [101] {Sparkling Natural Mineral Water} 0.013760943 1803
## [102] {Red Raspberries} 0.011394946 1493
## [103] {Organic Sticks Low Moisture Part Skim Mozzarella String Cheese} 0.010242477 1342
## [104] {Red Onion} 0.011204140 1468
## [105] {Spring Water} 0.016981751 2225
## [106] {Clementines, Bag} 0.011280462 1478
## [107] {Russet Potato} 0.010822527 1418
## [108] {Unsalted Butter} 0.010952276 1435
## [109] {Shredded Parmesan} 0.010105096 1324
## [110] {Organic Baby Broccoli} 0.010311167 1351
## [111] {Cantaloupe} 0.009273181 1215
## [112] {Organic Reduced Fat 2% Milk} 0.011715500 1535
## [113] {Honey Nut Cheerios} 0.009296078 1218
## [114] {Organic Whole Strawberries} 0.009257917 1213
## [115] {Sharp Cheddar Cheese} 0.009631897 1262
## [116] {2% Reduced Fat Milk} 0.011753662 1540
## [117] {Unsweetened Original Almond Breeze Almond Milk} 0.011181243 1465
## [118] {Pure Irish Butter} 0.011265198 1476
## [119] {Fresh Ginger Root} 0.009799806 1284
## [120] {Sparkling Lemon Water} 0.010860689 1423
## [121] {Organic Broccoli Florets} 0.010387489 1361
## [122] {Organic Basil} 0.009326607 1222
## [123] {No Salt Added Black Beans} 0.009540310 1250
## [124] {Yellow Bell Pepper} 0.008899201 1166
## [125] {Organic Roasted Turkey Breast} 0.008571014 1123
## [126] {Granny Smith Apples} 0.010524870 1379
## [127] {Organic Garbanzo Beans} 0.008708395 1141
## [128] {Organic Extra Firm Tofu} 0.009051846 1186
## [129] {Organic Russet Potato} 0.008960259 1174
## [130] {Green Beans} 0.008853407 1160
## [131] {Bartlett Pears} 0.009212123 1207
## [132] {Organic Roma Tomato} 0.009318975 1221
## [133] {Organic Romaine Lettuce} 0.008807614 1154
## [134] {Organic Romaine} 0.008792349 1152
## [135] {100% Raw Coconut Water} 0.009906658 1298
## [136] {Whipped Cream Cheese} 0.008227563 1078
## [137] {Organic Spring Mix} 0.010395121 1362
## [138] {Frozen Organic Wild Blueberries} 0.008128344 1065
## [139] {Organic Red Radish, Bunch} 0.007945170 1041
## [140] {Organic Celery Hearts} 0.008105447 1062
## [141] {Organic Reduced Fat Milk} 0.009204491 1206
## [142] {Baby Spinach} 0.008754188 1147
## [143] {Unsweetened Vanilla Almond Milk} 0.009525045 1248
## [144] {Vanilla Almond Breeze Almond Milk} 0.009746380 1277
## [145] {Lightly Salted Baked Snap Pea Crisps} 0.007563558 991
## [146] {Shredded Mild Cheddar Cheese} 0.007907009 1036
## [147] {Organic Sour Cream} 0.007990963 1047
## [148] {100% Recycled Paper Towels} 0.009028949 1183
## [149] {Cherubs Heavenly Salad Tomatoes} 0.007815422 1024
## [150] {Organic Bell Pepper} 0.007197210 943
## [151] {Organic Large Brown Grade AA Cage Free Eggs} 0.008677866 1137
## [152] {Organic Coconut Milk} 0.007975699 1045
## [153] {Whole Milk} 0.010189051 1335
## [154] {Organic Bartlett Pear} 0.007479603 980
## [155] {Sour Cream} 0.007800157 1022
## [156] {Pineapple Chunks} 0.008219931 1077
## [157] {Brussels Sprouts} 0.008769453 1149
## [158] {Total 2% with Strawberry Lowfat Greek Strained Yogurt} 0.007426177 973
## [159] {Organic Hothouse Cucumbers} 0.007067461 926
## [160] {Fat Free Milk} 0.009311342 1220
## [161] {Ground Turkey Breast} 0.007311693 958
## [162] {Organic Large Grade AA Brown Eggs} 0.008029125 1052
## [163] {Shredded Mozzarella} 0.007204842 944
## [164] {Italian Sparkling Mineral Water} 0.008639704 1132
## [165] {Pure Sparkling Water} 0.008586279 1125
## [166] {Original Orange Juice} 0.008044389 1054
## [167] {Roasted Turkey Breast} 0.007082726 928
## [168] {Total 0% Greek Yogurt} 0.007983331 1046
## [169] {Total Greek Strained Yogurt} 0.007258268 951
## [170] {Organic Bread with 21 Whole Grains} 0.007159048 938
## [171] {Boneless Skinless Chicken Breast} 0.007197210 943
## [172] {Creamy Peanut Butter} 0.007563558 991
## [173] {Total 0% Nonfat Greek Yogurt} 0.007578822 993
## [174] {Romaine Hearts} 0.007540661 988
## [175] {Gala Apples} 0.007319326 959
## [176] {Organic 2% Reduced Fat Milk} 0.007105623 931
## [177] {Blackberries} 0.008219931 1077
## [178] {Soda} 0.011501797 1507
## [179] {Clementines} 0.010311167 1351
## [180] {Natural Spring Water} 0.007250635 950
rules <- apriori (transactions, parameter = list(supp = 0.0005, conf = 0.5))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.5 0.1 1 none FALSE TRUE 5 5e-04 1
## maxlen target ext
## 10 rules FALSE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 65
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[39121 item(s), 131023 transaction(s)] done [0.45s].
## sorting and recoding items ... [3619 item(s)] done [0.02s].
## creating transaction tree ... done [0.06s].
## checking subsets of size 1 2 3 4 done [0.19s].
## writing ... [7 rule(s)] done [0.02s].
## creating S4 object ... done [0.04s].
rules_conf <- sort (rules, by="confidence", decreasing=TRUE)
inspect(head(rules_conf))
## lhs rhs support confidence lift count
## [1] {Lime Sparkling Water,
## Peach Pear Flavored Sparkling Water} => {Sparkling Water Grapefruit} 0.0005876831 0.5968992 23.282979 77
## [2] {Peach Pear Flavored Sparkling Water,
## Sparkling Water Berry} => {Sparkling Water Grapefruit} 0.0005495218 0.5454545 21.276300 72
## [3] {Pure Sparkling Water,
## Sparkling Lemon Water} => {Sparkling Water Grapefruit} 0.0005342573 0.5426357 21.166345 70
## [4] {Cantaloupe,
## Organic Raspberries} => {Organic Strawberries} 0.0006182121 0.5400000 6.494623 81
## [5] {Non Fat Raspberry Yogurt,
## Nonfat Icelandic Style Strawberry Yogurt} => {Icelandic Style Skyr Blueberry Non-fat Yogurt} 0.0005037284 0.5196850 96.582543 66
## [6] {Lime Sparkling Water,
## Sparkling Water Berry} => {Sparkling Water Grapefruit} 0.0005266251 0.5187970 20.236481 69
plot(rules, method = "graph", engine = "htmlwidget")
Rules without produce
tmp <-train %>%
left_join(products) %>%
filter(department_id != 4) %>%
group_by(product_id) %>%
left_join(products,by="product_id")
## Joining, by = "product_id"
write.csv(tmp, file = "transactions.csv")
transactions<-read.transactions("transactions.csv", format = "single", sep = ",",cols = c(2,6))
summary(transactions)
## transactions as itemMatrix in sparse format with
## 127803 rows (elements/itemsets/transactions) and
## 37591 columns (items) and a density of 0.0002030561
##
## most frequent items:
## Organic Whole Milk Sparkling Water Grapefruit
## 4908 3359
## Original Hummus Organic Half & Half
## 2858 2646
## Half & Half (Other)
## 2424 959336
##
## element (itemset/transaction) length distribution:
## sizes
## 1 2 3 4 5 6 7 8 9 10 11 12
## 10874 11457 12017 11925 11321 10305 9318 7832 6573 5824 4756 4057
## 13 14 15 16 17 18 19 20 21 22 23 24
## 3462 2933 2468 1997 1712 1404 1218 1065 836 732 584 491
## 25 26 27 28 29 30 31 32 33 34 35 36
## 430 352 288 232 203 194 133 123 93 81 77 76
## 37 38 39 40 41 42 43 44 45 46 47 48
## 49 47 38 37 40 24 15 14 12 15 16 6
## 49 50 51 52 53 55 56 57 58 59 60 61
## 5 10 1 4 3 5 3 3 1 2 1 2
## 62 65 67 71 72 73
## 1 2 1 1 1 1
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 3.000 6.000 7.633 10.000 73.000
##
## includes extended item information - examples:
## labels
## 1 \\"Constant Comment\\" Black Tea
## 2 \\"Constant Comment\\" Decaffeinated Black Tea Blend
## 3 \\"Darn Good\\" Chili Mix
##
## includes extended transaction information - examples:
## transactionID
## 1 1
## 2 100000
## 3 1000008
inspect(head(transactions))
## items transactionID
## [1] {Bulgarian Yogurt,
## Lightly Smoked Sardines in Olive Oil,
## Organic 4% Milk Fat Whole Milk Cottage Cheese,
## Organic Whole String Cheese} 1
## [2] {Corn Tortillas,
## Extra Virgin Olive Oil,
## Ground Cumin,
## No Salt Added Black Beans,
## Original Hummus,
## Snack Sticks Chicken & Rice Recipe Dog Treats,
## Total 2% All Natural Plain Greek Yogurt,
## Unscented Long Lasting Stick Deodorant,
## Wheat Sandwich Thins} 100000
## [3] {Daily Moisture Shampoo,
## DeTox Caffeine Free Organic Herbal Tea Bags,
## Ensure Plus Milk Chocolate Nutrition Shake,
## G Series Perform Frost Glacier Cherry Sports Drink,
## Original No Pulp 100% Florida Orange Juice,
## Triple Chocolate Ripple,
## ZzzQuil Liquid Warming Berry Flavor Sleep-Aid} 1000008
## [4] {Almond Chia Granola Clusters,
## Boneless Skinless Chicken Breasts,
## Veggie Chips} 1000029
## [5] {Cereal,
## Organic Salted Butter} 100003
## [6] {2% Reduced Fat DHA Omega-3 Reduced Fat Milk,
## All-Natural Autumn Apple Fruit Strip,
## All-Natural Harvest Grape Fruit Strip,
## All-Natural Orchard Cherry Fruit Strip,
## Apples, Pumpkin & Carrots Organic Baby Food,
## Baby Food Pears Squash,
## Bananas, Raspberries & Oats Organic Baby Food,
## Diced Tomatoes,
## Electrolyte Enhanced Water,
## Ground Turkey Breast,
## Organic Chicken Broth,
## Organic Chicken Stock,
## Organic Chocolate Chip Chewy Granola Bars,
## Organic Grade A Large Brown Eggs,
## Organic Whole Kernel Sweet Corn No Salt Added,
## Ripened Raspberry Fruit Leather,
## Saffron Threads,
## Shredded Mild Cheddar Cheese,
## Stage 2 Pear Pumpkin Baby Food,
## Summer Strawberry Fruit Strip,
## Total 2% with Strawberry Lowfat Greek Strained Yogurt} 1000046
frequentItems <- eclat (transactions, parameter = list(supp = 0.007, maxlen = 15))
## Eclat
##
## parameter specification:
## tidLists support minlen maxlen target ext
## FALSE 0.007 1 15 frequent itemsets FALSE
##
## algorithmic control:
## sparse sort verbose
## 7 -2 TRUE
##
## Absolute minimum support count: 894
##
## create itemset ...
## set transactions ...[37591 item(s), 127803 transaction(s)] done [0.35s].
## sorting and recoding items ... [72 item(s)] done [0.01s].
## creating sparse bit matrix ... [72 row(s), 127803 column(s)] done [0.01s].
## writing ... [72 set(s)] done [0.03s].
## Creating S4 object ... done [0.00s].
inspect(frequentItems)
## items support count
## [1] {Organic Whole Milk} 0.038402854 4908
## [2] {Sparkling Water Grapefruit} 0.026282638 3359
## [3] {Original Hummus} 0.022362542 2858
## [4] {100% Whole Wheat Bread} 0.017980799 2298
## [5] {Organic Half & Half} 0.020703739 2646
## [6] {Blueberries} 0.018176412 2323
## [7] {Half & Half} 0.018966691 2424
## [8] {Organic Whole String Cheese} 0.015594313 1993
## [9] {Uncured Genoa Salami} 0.013990282 1788
## [10] {Unsweetened Almondmilk} 0.016971433 2169
## [11] {Extra Virgin Olive Oil} 0.016181154 2068
## [12] {Lime Sparkling Water} 0.015383050 1966
## [13] {Organic Unsweetened Almond Milk} 0.017026204 2176
## [14] {Grated Parmesan} 0.013254775 1694
## [15] {Boneless Skinless Chicken Breasts} 0.016337645 2088
## [16] {Large Alfresco Eggs} 0.014850982 1898
## [17] {Organic Black Beans} 0.012331479 1576
## [18] {Organic Grade A Free Range Large Brown Eggs} 0.013825967 1767
## [19] {Sparkling Natural Mineral Water} 0.014107650 1803
## [20] {Organic Sticks Low Moisture Part Skim Mozzarella String Cheese} 0.010500536 1342
## [21] {2% Reduced Fat Milk} 0.012049795 1540
## [22] {Spring Water} 0.017409607 2225
## [23] {Honey Nut Cheerios} 0.009530293 1218
## [24] {Sparkling Lemon Water} 0.011134324 1423
## [25] {Organic Reduced Fat 2% Milk} 0.012010673 1535
## [26] {Unsalted Butter} 0.011228218 1435
## [27] {Sharp Cheddar Cheese} 0.009874573 1262
## [28] {Shredded Parmesan} 0.010359694 1324
## [29] {Pure Irish Butter} 0.011549025 1476
## [30] {Unsweetened Original Almond Breeze Almond Milk} 0.011462955 1465
## [31] {Feta Cheese Crumbles} 0.011337762 1449
## [32] {Organic Broccoli Florets} 0.010649202 1361
## [33] {Organic Whole Strawberries} 0.009491170 1213
## [34] {Organic Roasted Turkey Breast} 0.008786961 1123
## [35] {Lightly Salted Baked Snap Pea Crisps} 0.007754122 991
## [36] {100% Raw Coconut Water} 0.010156256 1298
## [37] {Whipped Cream Cheese} 0.008434857 1078
## [38] {Whole Milk} 0.010445764 1335
## [39] {No Salt Added Black Beans} 0.009780678 1250
## [40] {Vanilla Almond Breeze Almond Milk} 0.009991941 1277
## [41] {Unsweetened Vanilla Almond Milk} 0.009765029 1248
## [42] {Organic Garbanzo Beans} 0.008927803 1141
## [43] {Organic Extra Firm Tofu} 0.009279907 1186
## [44] {100% Recycled Paper Towels} 0.009256434 1183
## [45] {Sour Cream} 0.007996682 1022
## [46] {Shredded Mild Cheddar Cheese} 0.008106226 1036
## [47] {Frozen Organic Wild Blueberries} 0.008333138 1065
## [48] {Organic Reduced Fat Milk} 0.009436398 1206
## [49] {Fat Free Milk} 0.009545942 1220
## [50] {Creamy Peanut Butter} 0.007754122 991
## [51] {Total 2% with Strawberry Lowfat Greek Strained Yogurt} 0.007613280 973
## [52] {Organic Large Brown Grade AA Cage Free Eggs} 0.008896505 1137
## [53] {Organic Sour Cream} 0.008192296 1047
## [54] {Original Orange Juice} 0.008247068 1054
## [55] {Oven Roasted Turkey Breast} 0.007018615 897
## [56] {Pineapple Chunks} 0.008427032 1077
## [57] {Pure Sparkling Water} 0.008802610 1125
## [58] {Organic Large Grade AA Brown Eggs} 0.008231419 1052
## [59] {Organic Coconut Milk} 0.008176647 1045
## [60] {Roasted Turkey Breast} 0.007261175 928
## [61] {Shredded Mozzarella} 0.007386368 944
## [62] {Organic Bread with 21 Whole Grains} 0.007339421 938
## [63] {Organic 2% Reduced Fat Milk} 0.007284649 931
## [64] {Italian Sparkling Mineral Water} 0.008857382 1132
## [65] {Ground Turkey Breast} 0.007495912 958
## [66] {Total 0% Greek Yogurt} 0.008184471 1046
## [67] {Peach Pear Flavored Sparkling Water} 0.007034264 899
## [68] {Boneless Skinless Chicken Breast} 0.007378544 943
## [69] {Total Greek Strained Yogurt} 0.007441140 951
## [70] {Total 0% Nonfat Greek Yogurt} 0.007769771 993
## [71] {Soda} 0.011791585 1507
## [72] {Natural Spring Water} 0.007433315 950
rules <- apriori (transactions, parameter = list(supp = 0.001, conf = 0.4))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.4 0.1 1 none FALSE TRUE 5 0.001 1
## maxlen target ext
## 10 rules FALSE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 127
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[37591 item(s), 127803 transaction(s)] done [0.33s].
## sorting and recoding items ... [1540 item(s)] done [0.01s].
## creating transaction tree ... done [0.05s].
## checking subsets of size 1 2 3 done [0.02s].
## writing ... [4 rule(s)] done [0.00s].
## creating S4 object ... done [0.03s].
rules_conf <- sort (rules, by="confidence", decreasing=TRUE)
inspect(head(rules_conf))
## lhs rhs support confidence lift count
## [1] {Lime Sparkling Water,
## Sparkling Lemon Water} => {Sparkling Water Grapefruit} 0.001158032 0.4512195 17.16797 148
## [2] {Sparkling Lemon Water,
## Sparkling Water Grapefruit} => {Lime Sparkling Water} 0.001158032 0.4498480 29.24310 148
## [3] {Nonfat Icelandic Style Strawberry Yogurt} => {Icelandic Style Skyr Blueberry Non-fat Yogurt} 0.001197155 0.4226519 76.61870 153
## [4] {Non Fat Acai & Mixed Berries Yogurt} => {Icelandic Style Skyr Blueberry Non-fat Yogurt} 0.001322348 0.4023810 72.94396 169
plot(rules, method = "graph", engine = "htmlwidget")
tmp <-train %>%
group_by(product_id) %>%
left_join(products,by="product_id") %>%
filter(department_id == 16)
write.csv(tmp, file = "transactions.csv")
transactions<-read.transactions("transactions.csv", format = "single", sep = ",",cols = c(2,6))
summary(transactions)
## transactions as itemMatrix in sparse format with
## 87401 rows (elements/itemsets/transactions) and
## 3034 columns (items) and a density of 0.0008185247
##
## most frequent items:
## Organic Whole Milk Organic Half & Half
## 4908 2646
## Half & Half Organic Unsweetened Almond Milk
## 2424 2176
## Unsweetened Almondmilk (Other)
## 2169 202729
##
## element (itemset/transaction) length distribution:
## sizes
## 1 2 3 4 5 6 7 8 9 10 11 12
## 32679 22193 13857 8082 4731 2637 1469 785 437 233 136 58
## 13 14 15 16 17 18 19 20 21 22 23 24
## 38 23 13 8 3 4 4 2 2 1 2 1
## 25 27 29
## 1 1 1
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 1.000 2.000 2.483 3.000 29.000
##
## includes extended item information - examples:
## labels
## 1 0% Fat Black Cherry Greek Yogurt y
## 2 0% Fat Blueberry Greek Yogurt
## 3 0% Fat Free Organic Milk
##
## includes extended transaction information - examples:
## transactionID
## 1 1
## 2 100000
## 3 100003
inspect(head(transactions))
## items transactionID
## [1] {Bulgarian Yogurt,
## Organic 4% Milk Fat Whole Milk Cottage Cheese,
## Organic Whole String Cheese} 1
## [2] {Total 2% All Natural Plain Greek Yogurt} 100000
## [3] {Organic Salted Butter} 100003
## [4] {2% Reduced Fat DHA Omega-3 Reduced Fat Milk,
## Organic Grade A Large Brown Eggs,
## Shredded Mild Cheddar Cheese,
## Total 2% with Strawberry Lowfat Greek Strained Yogurt} 1000046
## [5] {2% Reduced Fat Milk,
## Natural Bliss Hazelnut Coffee Creamer,
## Total 2% Greek Strained Yogurt with Cherry 5.3 oz,
## Total 2% with Strawberry Lowfat Greek Strained Yogurt} 1000162
## [6] {Milk, Vitamin D} 1000209
frequentItems <- eclat (transactions, parameter = list(supp = 0.007, maxlen = 15))
## Eclat
##
## parameter specification:
## tidLists support minlen maxlen target ext
## FALSE 0.007 1 15 frequent itemsets FALSE
##
## algorithmic control:
## sparse sort verbose
## 7 -2 TRUE
##
## Absolute minimum support count: 611
##
## create itemset ...
## set transactions ...[3034 item(s), 87401 transaction(s)] done [0.06s].
## sorting and recoding items ... [64 item(s)] done [0.00s].
## creating sparse bit matrix ... [64 row(s), 87401 column(s)] done [0.01s].
## writing ... [64 set(s)] done [0.02s].
## Creating S4 object ... done [0.00s].
inspect(frequentItems)
## items support count
## [1] {Organic Whole Milk} 0.056154964 4908
## [2] {Organic Half & Half} 0.030274253 2646
## [3] {Half & Half} 0.027734236 2424
## [4] {Organic Whole String Cheese} 0.022802943 1993
## [5] {Unsweetened Almondmilk} 0.024816650 2169
## [6] {Grated Parmesan} 0.019381929 1694
## [7] {Large Alfresco Eggs} 0.021715999 1898
## [8] {Organic Unsweetened Almond Milk} 0.024896740 2176
## [9] {Organic Grade A Free Range Large Brown Eggs} 0.020217160 1767
## [10] {Organic Sticks Low Moisture Part Skim Mozzarella String Cheese} 0.015354515 1342
## [11] {Organic Reduced Fat 2% Milk} 0.017562728 1535
## [12] {Unsalted Butter} 0.016418576 1435
## [13] {Shredded Parmesan} 0.015148568 1324
## [14] {Feta Cheese Crumbles} 0.016578758 1449
## [15] {Pure Irish Butter} 0.016887679 1476
## [16] {Total 2% with Strawberry Lowfat Greek Strained Yogurt} 0.011132596 973
## [17] {Sharp Cheddar Cheese} 0.014439194 1262
## [18] {2% Reduced Fat Milk} 0.017619936 1540
## [19] {Whipped Cream Cheese} 0.012333955 1078
## [20] {Unsweetened Original Almond Breeze Almond Milk} 0.016761822 1465
## [21] {Organic Reduced Fat Milk} 0.013798469 1206
## [22] {Organic Sour Cream} 0.011979268 1047
## [23] {Shredded Mild Cheddar Cheese} 0.011853411 1036
## [24] {Organic Large Brown Grade AA Cage Free Eggs} 0.013009004 1137
## [25] {Unsweetened Vanilla Almond Milk} 0.014279013 1248
## [26] {Vanilla Almond Breeze Almond Milk} 0.014610817 1277
## [27] {Whole Milk} 0.015274425 1335
## [28] {Sour Cream} 0.011693230 1022
## [29] {Organic Large Grade AA Brown Eggs} 0.012036476 1052
## [30] {Total 2% All Natural Greek Strained Yogurt with Honey} 0.009267629 810
## [31] {Fat Free Milk} 0.013958650 1220
## [32] {Shredded Mozzarella} 0.010800792 944
## [33] {Total Greek Strained Yogurt} 0.010880882 951
## [34] {Icelandic Style Skyr Blueberry Non-fat Yogurt} 0.008066269 705
## [35] {Total 2% Greek Strained Yogurt with Cherry 5.3 oz} 0.007414103 648
## [36] {Total 2% Lowfat Greek Strained Yogurt With Blueberry} 0.007402661 647
## [37] {Total 2% Lowfat Greek Strained Yogurt with Peach} 0.007368337 644
## [38] {Organic Coconut Milk} 0.011956385 1045
## [39] {Total 0% Nonfat Greek Yogurt} 0.011361426 993
## [40] {Vanilla Skyr Nonfat Yogurt} 0.008661228 757
## [41] {Total 0% Greek Yogurt} 0.011967826 1046
## [42] {Salted Butter} 0.009244745 808
## [43] {Organic Heavy Whipping Cream} 0.009908353 866
## [44] {Organic 2% Reduced Fat Milk} 0.010652052 931
## [45] {YoKids Squeezers Organic Low-Fat Yogurt, Strawberry} 0.008386632 733
## [46] {Organic Fat Free Milk} 0.009279070 811
## [47] {Organic Milk} 0.009816821 858
## [48] {Heavy Whipping Cream} 0.010045652 878
## [49] {Organic American Cheese Singles} 0.007963296 696
## [50] {Organic Unsalted Butter} 0.008501047 743
## [51] {Organic Grade A Large Brown Eggs} 0.009999886 874
## [52] {Organic Mexican Blend Finely Shredded Cheese} 0.007597167 664
## [53] {Organic Large Grade A Brown Eggs} 0.008798526 769
## [54] {Organic Egg Whites} 0.009542225 834
## [55] {Unsalted Pure Irish Butter} 0.008420956 736
## [56] {2% Reduced Fat Organic Milk} 0.007848880 686
## [57] {Total 2% All Natural Plain Greek Yogurt} 0.007299688 638
## [58] {Cage Free Brown Eggs-Large, Grade A} 0.007894647 690
## [59] {Total 2% Lowfat Plain Greek Yogurt} 0.007185272 628
## [60] {Organic Large Brown Eggs} 0.007082299 619
## [61] {Grade A Large White Eggs} 0.007402661 647
## [62] {Total 0% Nonfat Plain Greek Yogurt} 0.007551401 660
## [63] {Toasted Coconut Almondmilk Blend} 0.007070857 618
## [64] {Reduced Fat 2% Milk} 0.008729877 763
rules <- apriori (transactions, parameter = list(supp = 0.001, conf = 0.5))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.5 0.1 1 none FALSE TRUE 5 0.001 1
## maxlen target ext
## 10 rules FALSE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 87
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[3034 item(s), 87401 transaction(s)] done [0.06s].
## sorting and recoding items ... [523 item(s)] done [0.00s].
## creating transaction tree ... done [0.03s].
## checking subsets of size 1 2 3 done [0.00s].
## writing ... [1 rule(s)] done [0.00s].
## creating S4 object ... done [0.01s].
rules_conf <- sort (rules, by="confidence", decreasing=TRUE)
inspect(head(rules_conf))
## lhs rhs support confidence lift count
## [1] {Total 2% Lowfat Greek Strained Yogurt With Blueberry,
## Total 2% Lowfat Greek Strained Yogurt with Peach} => {Total 2% with Strawberry Lowfat Greek Strained Yogurt} 0.001041178 0.5083799 45.66589 91
plot(rules, method = "graph", engine = "htmlwidget")