library(arules)
## Loading required package: Matrix
##
## Attaching package: 'arules'
## The following objects are masked from 'package:base':
##
## abbreviate, write
library(arulesViz)
library(ggplot2)
#There are 7 distinct items: paper, pencils, pens, cards, ruler, calculator, folder.
#The following transactions were given and I am creating a vector for each transaction and presenting them as a list.
supply_transactions <- list(
c("paper", "pens", "pencils"),
c("pens", "ruler"),
c("pencils", "cards", "ruler"),
c("paper", "cards"),
c("pens", "pencils", "cards"),
c("paper", "ruler", "calculator"),
c("paper", "pens", "cards", "folder"),
c("pencils", "ruler", "folder"),
c("paper", "pencils", "cards", "ruler"),
c("pens", "cards")
)
#The as() function is converting the data to a transaction class object
receipts <- as(supply_transactions, "transactions")
receipts
## transactions in sparse format with
## 10 transactions (rows) and
## 7 items (columns)
itemFrequencyPlot(receipts, topN = 7, support=0.25,cex.names=0.8)

#This generates all frequent itemsets, with support greater than 0.25. There are 9.
itemsets <- apriori(receipts, parameter = list(supp = 0.25, target = "frequent itemsets"))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## NA 0.1 1 none FALSE TRUE 5 0.25 1
## maxlen target ext
## 10 frequent itemsets TRUE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 2
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[7 item(s), 10 transaction(s)] done [0.00s].
## sorting and recoding items ... [5 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 done [0.00s].
## sorting transactions ... done [0.00s].
## writing ... [9 set(s)] done [0.00s].
## creating S4 object ... done [0.00s].
itemsets
## set of 9 itemsets
inspect(itemsets)
## items support count
## [1] {pens} 0.5 5
## [2] {ruler} 0.5 5
## [3] {pencils} 0.5 5
## [4] {paper} 0.5 5
## [5] {cards} 0.6 6
## [6] {cards, pens} 0.3 3
## [7] {pencils, ruler} 0.3 3
## [8] {cards, pencils} 0.3 3
## [9] {cards, paper} 0.3 3
#This generates all the rules with support greater than 0.25, confidence at least 0.50 and the lift values.
rules <- apriori(receipts, parameter = list(supp = 0.25, conf = 0.5, target = "rules"))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.5 0.1 1 none FALSE TRUE 5 0.25 1
## maxlen target ext
## 10 rules TRUE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 2
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[7 item(s), 10 transaction(s)] done [0.00s].
## sorting and recoding items ... [5 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 done [0.00s].
## writing ... [13 rule(s)] done [0.00s].
## creating S4 object ... done [0.00s].
rules
## set of 13 rules
summary(rules)
## set of 13 rules
##
## rule length distribution (lhs + rhs):sizes
## 1 2
## 5 8
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 1.000 2.000 1.615 2.000 2.000
##
## summary of quality measures:
## support confidence coverage lift
## Min. :0.3000 Min. :0.5000 Min. :0.5000 Min. :1.000
## 1st Qu.:0.3000 1st Qu.:0.5000 1st Qu.:0.5000 1st Qu.:1.000
## Median :0.3000 Median :0.5000 Median :0.6000 Median :1.000
## Mean :0.3846 Mean :0.5462 Mean :0.7154 Mean :1.031
## 3rd Qu.:0.5000 3rd Qu.:0.6000 3rd Qu.:1.0000 3rd Qu.:1.000
## Max. :0.6000 Max. :0.6000 Max. :1.0000 Max. :1.200
## count
## Min. :3.000
## 1st Qu.:3.000
## Median :3.000
## Mean :3.846
## 3rd Qu.:5.000
## Max. :6.000
##
## mining info:
## data ntransactions support confidence
## receipts 10 0.25 0.5
## call
## apriori(data = receipts, parameter = list(supp = 0.25, conf = 0.5, target = "rules"))
#Sort the lift values
rules_by_lift <- sort(rules, by = "lift", decreasing = TRUE)
rules_by_lift
## set of 13 rules
#Display the actual rules sorted by lift. Here there are 13.
inspect(rules_by_lift)
## lhs rhs support confidence coverage lift count
## [1] {ruler} => {pencils} 0.3 0.6 0.5 1.2 3
## [2] {pencils} => {ruler} 0.3 0.6 0.5 1.2 3
## [3] {} => {pens} 0.5 0.5 1.0 1.0 5
## [4] {} => {ruler} 0.5 0.5 1.0 1.0 5
## [5] {} => {pencils} 0.5 0.5 1.0 1.0 5
## [6] {} => {paper} 0.5 0.5 1.0 1.0 5
## [7] {} => {cards} 0.6 0.6 1.0 1.0 6
## [8] {pens} => {cards} 0.3 0.6 0.5 1.0 3
## [9] {cards} => {pens} 0.3 0.5 0.6 1.0 3
## [10] {pencils} => {cards} 0.3 0.6 0.5 1.0 3
## [11] {cards} => {pencils} 0.3 0.5 0.6 1.0 3
## [12] {paper} => {cards} 0.3 0.6 0.5 1.0 3
## [13] {cards} => {paper} 0.3 0.5 0.6 1.0 3
#Creates dataframe with all rules in original itemset order
df_rules <- as(rules, "data.frame")
df_rules
## rules support confidence coverage lift count
## 1 {} => {pens} 0.5 0.5 1.0 1.0 5
## 2 {} => {ruler} 0.5 0.5 1.0 1.0 5
## 3 {} => {pencils} 0.5 0.5 1.0 1.0 5
## 4 {} => {paper} 0.5 0.5 1.0 1.0 5
## 5 {} => {cards} 0.6 0.6 1.0 1.0 6
## 6 {pens} => {cards} 0.3 0.6 0.5 1.0 3
## 7 {cards} => {pens} 0.3 0.5 0.6 1.0 3
## 8 {ruler} => {pencils} 0.3 0.6 0.5 1.2 3
## 9 {pencils} => {ruler} 0.3 0.6 0.5 1.2 3
## 10 {pencils} => {cards} 0.3 0.6 0.5 1.0 3
## 11 {cards} => {pencils} 0.3 0.5 0.6 1.0 3
## 12 {paper} => {cards} 0.3 0.6 0.5 1.0 3
## 13 {cards} => {paper} 0.3 0.5 0.6 1.0 3
# Sort dataframe descending by lift
df_sorted <- df_rules[order(df_rules$lift, decreasing = TRUE), ]
# Base scatterplot object
p <- plot(
rules,
measure = c("support", "confidence"), # support on x, confidence on y
shading = "lift",
jitter = TRUE
)
# Add custom axis breaks (adjust 'by' to suit your data)
# For a y-axis step of 0.1 (or change to 1)
p +
scale_y_continuous(breaks = seq(0, 1, by = 0.1)) +
scale_x_continuous(breaks = seq(0, max(quality(rules)$support), by = 0.05))

#Visuals of the metrics; graphs used in previous project
#jitter = 0 rather than jitter = TRUE spreads the data points so they are distinguishable, rather than on top of one another
plot(rules, shading = "lift", jitter = 0)

plot(rules, method = "two-key plot", shading = "order")
## To reduce overplotting, jitter is added! Use jitter = 0 to prevent jitter.

plot(rules, method = "graph", control = list(type = "items"))
## Warning: Unknown control parameters: type
## Available control parameters (with default values):
## layout = stress
## circular = FALSE
## ggraphdots = NULL
## edges = <environment>
## nodes = <environment>
## nodetext = <environment>
## colors = c("#EE0000FF", "#EEEEEEFF")
## engine = ggplot2
## max = 100
## verbose = FALSE
