#loadinng necessary libraries
library('arules')
## Loading required package: Matrix
##
## Attaching package: 'arules'
## The following objects are masked from 'package:base':
##
## abbreviate, write
library('arulesViz')
data("Groceries")
Groceries
## transactions in sparse format with
## 9835 transactions (rows) and
## 169 items (columns)
#summary of our dataset
summary(Groceries)
## transactions as itemMatrix in sparse format with
## 9835 rows (elements/itemsets/transactions) and
## 169 columns (items) and a density of 0.02609146
##
## most frequent items:
## whole milk other vegetables rolls/buns soda
## 2513 1903 1809 1715
## yogurt (Other)
## 1372 34055
##
## element (itemset/transaction) length distribution:
## sizes
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
## 2159 1643 1299 1005 855 645 545 438 350 246 182 117 78 77 55 46
## 17 18 19 20 21 22 23 24 26 27 28 29 32
## 29 14 14 9 11 4 6 1 1 1 1 3 1
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 2.000 3.000 4.409 6.000 32.000
##
## includes extended item information - examples:
## labels level2 level1
## 1 frankfurter sausage meat and sausage
## 2 sausage sausage meat and sausage
## 3 liver loaf sausage meat and sausage
#displaying from the 10th to 20th transactions of the Groceries dataset.
apply(Groceries@data[,1:10], 2, function(r) paste(Groceries@itemInfo[r, "labels"], collapse = ", "))
## [1] "citrus fruit, semi-finished bread, margarine, ready soups"
## [2] "tropical fruit, yogurt, coffee"
## [3] "whole milk"
## [4] "pip fruit, yogurt, cream cheese , meat spreads"
## [5] "other vegetables, whole milk, condensed milk, long life bakery product"
## [6] "whole milk, butter, yogurt, rice, abrasive cleaner"
## [7] "rolls/buns"
## [8] "other vegetables, UHT-milk, rolls/buns, bottled beer, liquor (appetizer)"
## [9] "pot plants"
## [10] "whole milk, cereals"
# a barchart of the top 10 most frequently purchased items.
itemFrequencyPlot(Groceries, topN = 10, type = "absolute", col = c("red4", "yellow4","magenta", "tan", "green4"), main = "Top 10 Most Frequently Purchased Items")

#displaying top 10 itemsts with the highest support
itemsets <- apriori(Groceries, parameter=list(minlen=1, support=0.02,target="frequent itemsets"))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## NA 0.1 1 none FALSE TRUE 5 0.02 1
## maxlen target ext
## 10 frequent itemsets TRUE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 196
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[169 item(s), 9835 transaction(s)] done [0.00s].
## sorting and recoding items ... [59 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 done [0.00s].
## sorting transactions ... done [0.00s].
## writing ... [122 set(s)] done [0.00s].
## creating S4 object ... done [0.00s].
inspect(head(sort(itemsets, by = "support"), 10))
## items support count
## [1] {whole milk} 0.25551601 2513
## [2] {other vegetables} 0.19349263 1903
## [3] {rolls/buns} 0.18393493 1809
## [4] {soda} 0.17437722 1715
## [5] {yogurt} 0.13950178 1372
## [6] {bottled water} 0.11052364 1087
## [7] {root vegetables} 0.10899847 1072
## [8] {tropical fruit} 0.10493137 1032
## [9] {shopping bags} 0.09852567 969
## [10] {sausage} 0.09395018 924
#Rule generation and visualization
rules <- apriori(Groceries, parameter = list(support = 0.001, confidence = 0.6, target = "rules"))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.6 0.1 1 none FALSE TRUE 5 0.001 1
## maxlen target ext
## 10 rules TRUE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 9
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[169 item(s), 9835 transaction(s)] done [0.00s].
## sorting and recoding items ... [157 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4 5 6 done [0.01s].
## writing ... [2918 rule(s)] done [0.00s].
## creating S4 object ... done [0.00s].
#summary of rules
summary(rules)
## set of 2918 rules
##
## rule length distribution (lhs + rhs):sizes
## 2 3 4 5 6
## 3 490 1765 626 34
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2.000 4.000 4.000 4.068 4.000 6.000
##
## summary of quality measures:
## support confidence coverage lift
## Min. :0.001017 Min. :0.6000 Min. :0.001017 Min. : 2.348
## 1st Qu.:0.001118 1st Qu.:0.6316 1st Qu.:0.001525 1st Qu.: 2.668
## Median :0.001220 Median :0.6818 Median :0.001830 Median : 3.168
## Mean :0.001480 Mean :0.7028 Mean :0.002157 Mean : 3.450
## 3rd Qu.:0.001525 3rd Qu.:0.7500 3rd Qu.:0.002339 3rd Qu.: 3.692
## Max. :0.009354 Max. :1.0000 Max. :0.014642 Max. :18.996
## count
## Min. :10.00
## 1st Qu.:11.00
## Median :12.00
## Mean :14.55
## 3rd Qu.:15.00
## Max. :92.00
##
## mining info:
## data ntransactions support confidence
## Groceries 9835 0.001 0.6
## call
## apriori(data = Groceries, parameter = list(support = 0.001, confidence = 0.6, target = "rules"))
#Scatterplot of the 2,918 rules with minimum support 0.001 and minimum confidence 0.6
plot(rules, main = "Scatter Plot For 2918 rules")
## To reduce overplotting, jitter is added! Use jitter = 0 to prevent jitter.

#Scatterplot matrix on the support, confidence, and lift of the 2,918 rules
plot(rules@quality[, c(1:2,4)], col = "red4")

# compute the 1/Support(Y)
slope <- sort(round(rules@quality$lift / rules@quality$confidence, 2))
# Display the number of times each slope appears in the dataset
unlist(lapply(split(slope,f=slope),length))
## 3.91 5.17 5.44 5.73 7.17 9.05 9.17 9.53 10.64 12.08 12.42 13.22 13.83
## 1585 940 12 7 188 1 102 55 1 4 1 5 2
## 13.95 18.05 23.76 26.44 30.08
## 9 3 1 1 1
#let display some content of the rule generated: sorting it by lift to find those items in the rule
inspect(head(sort(rules, by = "lift"), 10))
## lhs rhs support confidence coverage lift count
## [1] {Instant food products,
## soda} => {hamburger meat} 0.001220132 0.6315789 0.001931876 18.995654 12
## [2] {soda,
## popcorn} => {salty snack} 0.001220132 0.6315789 0.001931876 16.697793 12
## [3] {ham,
## processed cheese} => {white bread} 0.001931876 0.6333333 0.003050330 15.045491 19
## [4] {tropical fruit,
## other vegetables,
## yogurt,
## white bread} => {butter} 0.001016777 0.6666667 0.001525165 12.030581 10
## [5] {hamburger meat,
## yogurt,
## whipped/sour cream} => {butter} 0.001016777 0.6250000 0.001626843 11.278670 10
## [6] {tropical fruit,
## other vegetables,
## whole milk,
## yogurt,
## domestic eggs} => {butter} 0.001016777 0.6250000 0.001626843 11.278670 10
## [7] {liquor,
## red/blush wine} => {bottled beer} 0.001931876 0.9047619 0.002135231 11.235269 19
## [8] {other vegetables,
## butter,
## sugar} => {whipped/sour cream} 0.001016777 0.7142857 0.001423488 9.964539 10
## [9] {whole milk,
## butter,
## hard cheese} => {whipped/sour cream} 0.001423488 0.6666667 0.002135231 9.300236 14
## [10] {tropical fruit,
## other vegetables,
## butter,
## fruit/vegetable juice} => {whipped/sour cream} 0.001016777 0.6666667 0.001525165 9.300236 10
#fetching the rules whose confidence is about 0.90
confidentRules <- rules[quality(rules)$confidence > 0.9]
inspect(head(sort(confidentRules, by = "confidence"),10))
## lhs rhs support confidence coverage lift count
## [1] {rice,
## sugar} => {whole milk} 0.001220132 1 0.001220132 3.913649 12
## [2] {canned fish,
## hygiene articles} => {whole milk} 0.001118454 1 0.001118454 3.913649 11
## [3] {root vegetables,
## butter,
## rice} => {whole milk} 0.001016777 1 0.001016777 3.913649 10
## [4] {root vegetables,
## whipped/sour cream,
## flour} => {whole milk} 0.001728521 1 0.001728521 3.913649 17
## [5] {butter,
## soft cheese,
## domestic eggs} => {whole milk} 0.001016777 1 0.001016777 3.913649 10
## [6] {citrus fruit,
## root vegetables,
## soft cheese} => {other vegetables} 0.001016777 1 0.001016777 5.168156 10
## [7] {pip fruit,
## butter,
## hygiene articles} => {whole milk} 0.001016777 1 0.001016777 3.913649 10
## [8] {root vegetables,
## whipped/sour cream,
## hygiene articles} => {whole milk} 0.001016777 1 0.001016777 3.913649 10
## [9] {pip fruit,
## root vegetables,
## hygiene articles} => {whole milk} 0.001016777 1 0.001016777 3.913649 10
## [10] {cream cheese ,
## domestic eggs,
## sugar} => {whole milk} 0.001118454 1 0.001118454 3.913649 11
#Graphical visualization of the top five rules sorted by lift
top_5 <- head(sort(rules, by = "lift"), 5)
plot(top_5, method = "graph", control=list(type="items"))
## Warning: Unknown control parameters: type
## Available control parameters (with default values):
## layout = stress
## circular = FALSE
## ggraphdots = NULL
## edges = <environment>
## nodes = <environment>
## nodetext = <environment>
## colors = c("#EE0000FF", "#EEEEEEFF")
## engine = ggplot2
## max = 100
## verbose = FALSE
