This project is to perform apirior alogoritm in the dataset.For more information about these topics, please check in the following links:
First it’s important to define the Apriori algorithm, including some statistical concepts (support, confidence, lift and conviction) to select interesting rules.
If you want more information about these measures, please check here.
library(arules)
library(plotly)
library(arulesViz)
library(tidyverse)
library(ggplot2)
library(dplyr)
library(knitr)
library(gridExtra)
dataset <- read.csv('Market_Basket_Optimisation.csv' ,header = F)
dataset <- read.transactions('Market_Basket_Optimisation.csv',sep = ',',rm.duplicates = T)
## distribution of transactions with duplicates:
## 1
## 5
# Transaction object
dataset
## transactions in sparse format with
## 7501 transactions (rows) and
## 119 items (columns)
# Summary
summary(dataset)
## transactions as itemMatrix in sparse format with
## 7501 rows (elements/itemsets/transactions) and
## 119 columns (items) and a density of 0.03288973
##
## most frequent items:
## mineral water eggs spaghetti french fries chocolate
## 1788 1348 1306 1282 1229
## (Other)
## 22405
##
## element (itemset/transaction) length distribution:
## sizes
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
## 1754 1358 1044 816 667 493 391 324 259 139 102 67 40 22 17 4
## 18 19 20
## 1 2 1
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 2.000 3.000 3.914 5.000 20.000
##
## includes extended item information - examples:
## labels
## 1 almonds
## 2 antioxydant juice
## 3 asparagus
# Structure
str(dataset)
## Formal class 'transactions' [package "arules"] with 3 slots
## ..@ data :Formal class 'ngCMatrix' [package "Matrix"] with 5 slots
## .. .. ..@ i : int [1:29358] 0 1 3 32 38 47 52 53 59 64 ...
## .. .. ..@ p : int [1:7502] 0 20 23 24 26 31 32 34 37 40 ...
## .. .. ..@ Dim : int [1:2] 119 7501
## .. .. ..@ Dimnames:List of 2
## .. .. .. ..$ : NULL
## .. .. .. ..$ : NULL
## .. .. ..@ factors : list()
## ..@ itemInfo :'data.frame': 119 obs. of 1 variable:
## .. ..$ labels: chr [1:119] "almonds" "antioxydant juice" "asparagus" "avocado" ...
## ..@ itemsetInfo:'data.frame': 0 obs. of 0 variables
# Glimpse
glimpse(dataset)
## Formal class 'transactions' [package "arules"] with 3 slots
## ..@ data :Formal class 'ngCMatrix' [package "Matrix"] with 5 slots
## ..@ itemInfo :'data.frame': 119 obs. of 1 variable:
## .. ..$ labels: chr [1:119] "almonds" "antioxydant juice" "asparagus" "avocado" ...
## ..@ itemsetInfo:'data.frame': 0 obs. of 0 variables
# Absolute Item Frequency Plot
itemFrequencyPlot(dataset, topN=15, type="absolute", col="wheat2",xlab="Item name",
ylab="Frequency (absolute)", main="Absolute Item Frequency Plot")
# Relative Item Frequency Plot
itemFrequencyPlot(dataset, topN=15, type="relative", col="lightcyan2", xlab="Item name",
ylab="Frequency (relative)", main="Relative Item Frequency Plot")
# Apriori algorithm ## Choice of support and confidence In the following graphs we can see the number of rules generated with a support level of 10%, 5%, 1% and 0.5%.
rules <- apriori(data = dataset, parameter = list(support = 0.003,confidence = 0.2))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.2 0.1 1 none FALSE TRUE 5 0.003 1
## maxlen target ext
## 10 rules TRUE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 22
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[119 item(s), 7501 transaction(s)] done [0.00s].
## sorting and recoding items ... [115 item(s)] done [0.00s].
## creating transaction tree ... done [0.01s].
## checking subsets of size 1 2 3 4 5 done [0.01s].
## writing ... [1348 rule(s)] done [0.00s].
## creating S4 object ... done [0.00s].
# Support and confidence values
supportLevels <- c(0.1, 0.05, 0.01, 0.005)
confidenceLevels <- c(0.9, 0.8, 0.7, 0.6, 0.5, 0.4, 0.3, 0.2, 0.1)
# Empty integers
rules_sup10 <- integer(length=9)
rules_sup5 <- integer(length=9)
rules_sup1 <- integer(length=9)
rules_sup0.5 <- integer(length=9)
# Subplot
grid.arrange(plot1, plot2, plot3, plot4, ncol=2)
inspect(sort(rules , by = 'lift')[1:10])
## lhs rhs support
## [1] {mineral water,whole wheat pasta} => {olive oil} 0.003866151
## [2] {frozen vegetables,milk,mineral water} => {soup} 0.003066258
## [3] {fromage blanc} => {honey} 0.003332889
## [4] {spaghetti,tomato sauce} => {ground beef} 0.003066258
## [5] {light cream} => {chicken} 0.004532729
## [6] {pasta} => {escalope} 0.005865885
## [7] {french fries,herb & pepper} => {ground beef} 0.003199573
## [8] {cereals,spaghetti} => {ground beef} 0.003066258
## [9] {frozen vegetables,mineral water,soup} => {milk} 0.003066258
## [10] {french fries,ground beef} => {herb & pepper} 0.003199573
## confidence coverage lift count
## [1] 0.4027778 0.009598720 6.115863 29
## [2] 0.2771084 0.011065191 5.484407 23
## [3] 0.2450980 0.013598187 5.164271 25
## [4] 0.4893617 0.006265831 4.980600 23
## [5] 0.2905983 0.015597920 4.843951 34
## [6] 0.3728814 0.015731236 4.700812 44
## [7] 0.4615385 0.006932409 4.697422 24
## [8] 0.4600000 0.006665778 4.681764 23
## [9] 0.6052632 0.005065991 4.670863 23
## [10] 0.2307692 0.013864818 4.665768 24
# Scatter plot
plotly_arules(rules)
### Head of rules
head(quality(rules))
## support confidence coverage lift count
## 1 0.238368218 0.2383682 1.000000000 1.000000 1788
## 2 0.003732836 0.4827586 0.007732302 2.772720 28
## 3 0.003199573 0.4137931 0.007732302 1.735941 24
## 4 0.003066258 0.3382353 0.009065458 2.560144 23
## 5 0.003066258 0.3239437 0.009465405 1.860568 23
## 6 0.003466205 0.3661972 0.009465405 1.536267 26
# Scatter
plot(rules,interestMeasure=c("support","lift"),shading = "confidence",jitter=0)
## Warning: Unknown control parameters: interestMeasure
## Available control parameters (with default values):
## main = Scatter plot for 1348 rules
## engine = default
## pch = 19
## cex = 0.5
## xlim = NULL
## ylim = NULL
## zlim = NULL
## alpha = NULL
## col = c("#EE0000FF", "#EE0303FF", "#EE0606FF", "#EE0909FF", "#EE0C0CFF", "#EE0F0FFF", "#EE1212FF", "#EE1515FF", "#EE1818FF", "#EE1B1BFF", "#EE1E1EFF", "#EE2222FF", "#EE2525FF", "#EE2828FF", "#EE2B2BFF", "#EE2E2EFF", "#EE3131FF", "#EE3434FF", "#EE3737FF", "#EE3A3AFF", "#EE3D3DFF", "#EE4040FF", "#EE4444FF", "#EE4747FF", "#EE4A4AFF", "#EE4D4DFF", "#EE5050FF", "#EE5353FF", "#EE5656FF", "#EE5959FF", "#EE5C5CFF", "#EE5F5FFF", "#EE6262FF", "#EE6666FF", "#EE6969FF", "#EE6C6CFF", "#EE6F6FFF", "#EE7272FF", "#EE7575FF", "#EE7878FF", "#EE7B7BFF", "#EE7E7EFF", "#EE8181FF", "#EE8484FF", "#EE8888FF", "#EE8B8BFF", "#EE8E8EFF", "#EE9191FF", "#EE9494FF", "#EE9797FF", "#EE9999FF", "#EE9B9BFF", "#EE9D9DFF", "#EE9F9FFF", "#EEA0A0FF", "#EEA2A2FF", "#EEA4A4FF", "#EEA5A5FF", "#EEA7A7FF", "#EEA9A9FF", "#EEABABFF", "#EEACACFF", "#EEAEAEFF", "#EEB0B0FF", "#EEB1B1FF", "#EEB3B3FF", "#EEB5B5FF", "#EEB7B7FF", "#EEB8B8FF", "#EEBABAFF", "#EEBCBCFF", "#EEBDBDFF", "#EEBFBFFF", "#EEC1C1FF", "#EEC3C3FF", "#EEC4C4FF", "#EEC6C6FF", "#EEC8C8FF", "#EEC9C9FF", "#EECBCBFF", "#EECDCDFF", "#EECFCFFF", "#EED0D0FF", "#EED2D2FF", "#EED4D4FF", "#EED5D5FF", "#EED7D7FF", "#EED9D9FF", "#EEDBDBFF", "#EEDCDCFF", "#EEDEDEFF", "#EEE0E0FF", "#EEE1E1FF", "#EEE3E3FF", "#EEE5E5FF", "#EEE7E7FF", "#EEE8E8FF", "#EEEAEAFF", "#EEECECFF", "#EEEEEEFF")
## newpage = TRUE
## jitter = NA
## verbose = FALSE
subrules2 <- head(rules,n=10,by="lift")
# Graph (circular layout)
plot(subrules2, method="graph", control=list(layout=igraph::in_circle()))
# Parallel coordinates plot
plot(subrules2, method="paracoord", control=list(reorder=TRUE))
# Grouped matrix plot
plot(subrules2, method="grouped")
# Scatter plot
plot(subrules2, measure=c("support", "lift"), shading="confidence", jitter=0)
#Output
inspect(sort(rules ,by='lift')[1:50])
## lhs rhs support confidence coverage lift count
## [1] {mineral water,
## whole wheat pasta} => {olive oil} 0.003866151 0.4027778 0.009598720 6.115863 29
## [2] {frozen vegetables,
## milk,
## mineral water} => {soup} 0.003066258 0.2771084 0.011065191 5.484407 23
## [3] {fromage blanc} => {honey} 0.003332889 0.2450980 0.013598187 5.164271 25
## [4] {spaghetti,
## tomato sauce} => {ground beef} 0.003066258 0.4893617 0.006265831 4.980600 23
## [5] {light cream} => {chicken} 0.004532729 0.2905983 0.015597920 4.843951 34
## [6] {pasta} => {escalope} 0.005865885 0.3728814 0.015731236 4.700812 44
## [7] {french fries,
## herb & pepper} => {ground beef} 0.003199573 0.4615385 0.006932409 4.697422 24
## [8] {cereals,
## spaghetti} => {ground beef} 0.003066258 0.4600000 0.006665778 4.681764 23
## [9] {frozen vegetables,
## mineral water,
## soup} => {milk} 0.003066258 0.6052632 0.005065991 4.670863 23
## [10] {french fries,
## ground beef} => {herb & pepper} 0.003199573 0.2307692 0.013864818 4.665768 24
## [11] {chocolate,
## frozen vegetables,
## mineral water} => {shrimp} 0.003199573 0.3287671 0.009732036 4.600900 24
## [12] {frozen vegetables,
## milk,
## mineral water} => {olive oil} 0.003332889 0.3012048 0.011065191 4.573557 25
## [13] {pasta} => {shrimp} 0.005065991 0.3220339 0.015731236 4.506672 38
## [14] {chocolate,
## herb & pepper} => {ground beef} 0.003999467 0.4411765 0.009065458 4.490183 30
## [15] {chocolate,
## mineral water,
## shrimp} => {frozen vegetables} 0.003199573 0.4210526 0.007598987 4.417225 24
## [16] {cake,
## frozen vegetables} => {tomatoes} 0.003066258 0.2987013 0.010265298 4.367560 23
## [17] {milk,
## tomatoes} => {soup} 0.003066258 0.2190476 0.013998134 4.335293 23
## [18] {eggs,
## ground beef} => {herb & pepper} 0.004132782 0.2066667 0.019997334 4.178455 31
## [19] {milk,
## olive oil} => {soup} 0.003599520 0.2109375 0.017064391 4.174781 27
## [20] {whole wheat pasta} => {olive oil} 0.007998933 0.2714932 0.029462738 4.122410 60
## [21] {milk,
## mineral water,
## olive oil} => {frozen vegetables} 0.003332889 0.3906250 0.008532196 4.098011 25
## [22] {mineral water,
## shrimp,
## spaghetti} => {frozen vegetables} 0.003332889 0.3906250 0.008532196 4.098011 25
## [23] {herb & pepper,
## spaghetti} => {ground beef} 0.006399147 0.3934426 0.016264498 4.004360 48
## [24] {herb & pepper,
## milk} => {ground beef} 0.003599520 0.3913043 0.009198773 3.982597 27
## [25] {herb & pepper,
## mineral water} => {ground beef} 0.006665778 0.3906250 0.017064391 3.975683 50
## [26] {chocolate,
## frozen vegetables,
## spaghetti} => {ground beef} 0.003066258 0.3898305 0.007865618 3.967597 23
## [27] {frozen vegetables,
## mineral water,
## olive oil} => {milk} 0.003332889 0.5102041 0.006532462 3.937285 25
## [28] {frozen vegetables,
## mineral water,
## spaghetti} => {shrimp} 0.003332889 0.2777778 0.011998400 3.887334 25
## [29] {cereals,
## ground beef} => {spaghetti} 0.003066258 0.6764706 0.004532729 3.885303 23
## [30] {frozen vegetables,
## soup} => {milk} 0.003999467 0.5000000 0.007998933 3.858539 30
## [31] {chicken,
## olive oil} => {milk} 0.003599520 0.5000000 0.007199040 3.858539 27
## [32] {tomato sauce} => {ground beef} 0.005332622 0.3773585 0.014131449 3.840659 40
## [33] {cake,
## tomatoes} => {frozen vegetables} 0.003066258 0.3650794 0.008398880 3.830014 23
## [34] {mushroom cream sauce} => {escalope} 0.005732569 0.3006993 0.019064125 3.790833 43
## [35] {frozen vegetables,
## milk,
## spaghetti} => {ground beef} 0.003066258 0.3709677 0.008265565 3.775616 23
## [36] {milk,
## mineral water,
## soup} => {frozen vegetables} 0.003066258 0.3593750 0.008532196 3.770170 23
## [37] {shrimp,
## tomatoes} => {frozen vegetables} 0.003999467 0.3571429 0.011198507 3.746753 30
## [38] {frozen vegetables,
## mineral water,
## spaghetti} => {tomatoes} 0.003066258 0.2555556 0.011998400 3.736690 23
## [39] {frozen vegetables,
## mineral water,
## spaghetti} => {ground beef} 0.004399413 0.3666667 0.011998400 3.731841 33
## [40] {chocolate,
## mineral water,
## spaghetti} => {olive oil} 0.003866151 0.2436975 0.015864551 3.700354 29
## [41] {chicken,
## milk} => {olive oil} 0.003599520 0.2432432 0.014798027 3.693457 27
## [42] {frozen smoothie,
## mineral water,
## spaghetti} => {milk} 0.003199573 0.4705882 0.006799093 3.631566 24
## [43] {milk,
## soup} => {olive oil} 0.003599520 0.2368421 0.015197974 3.596260 27
## [44] {ground beef,
## milk,
## mineral water} => {frozen vegetables} 0.003732836 0.3373494 0.011065191 3.539102 28
## [45] {milk,
## olive oil} => {chicken} 0.003599520 0.2109375 0.017064391 3.516094 27
## [46] {olive oil,
## tomatoes} => {spaghetti} 0.004399413 0.6111111 0.007199040 3.509912 33
## [47] {frozen vegetables,
## shrimp} => {tomatoes} 0.003999467 0.2400000 0.016664445 3.509240 30
## [48] {spaghetti,
## whole wheat pasta} => {milk} 0.003999467 0.4545455 0.008798827 3.507763 30
## [49] {frozen vegetables,
## spaghetti} => {tomatoes} 0.006665778 0.2392344 0.027862952 3.498046 50
## [50] {chocolate,
## ground beef,
## spaghetti} => {frozen vegetables} 0.003066258 0.3333333 0.009198773 3.496970 23