Association Rule Mining - Apriori Algorithm

library(arules)

## Loading required package: Matrix

## 
## Attaching package: 'arules'

## The following objects are masked from 'package:base':
## 
##     abbreviate, write

library(arulesViz)
library(ggplot2)


#There are 7 distinct items: paper, pencils, pens, cards, ruler, calculator, folder.
#The following transactions were given and I am creating a vector for each transaction and presenting them as a list.

supply_transactions <- list(
  c("paper", "pens", "pencils"),
  c("pens", "ruler"),
  c("pencils", "cards", "ruler"),
  c("paper", "cards"),
  c("pens", "pencils", "cards"),
  c("paper", "ruler", "calculator"),
  c("paper", "pens", "cards", "folder"),
  c("pencils", "ruler", "folder"),
  c("paper", "pencils", "cards", "ruler"),
  c("pens", "cards")
)

#The as() function is converting the data to a transaction class object 

receipts <- as(supply_transactions, "transactions")
receipts

## transactions in sparse format with
##  10 transactions (rows) and
##  7 items (columns)

itemFrequencyPlot(receipts, topN = 7, support=0.25,cex.names=0.8)

#This generates all frequent itemsets, with support greater than 0.25. There are 9.
itemsets <- apriori(receipts, parameter = list(supp = 0.25, target = "frequent itemsets"))

## Apriori
## 
## Parameter specification:
##  confidence minval smax arem  aval originalSupport maxtime support minlen
##          NA    0.1    1 none FALSE            TRUE       5    0.25      1
##  maxlen            target  ext
##      10 frequent itemsets TRUE
## 
## Algorithmic control:
##  filter tree heap memopt load sort verbose
##     0.1 TRUE TRUE  FALSE TRUE    2    TRUE
## 
## Absolute minimum support count: 2 
## 
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[7 item(s), 10 transaction(s)] done [0.00s].
## sorting and recoding items ... [5 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 done [0.00s].
## sorting transactions ... done [0.00s].
## writing ... [9 set(s)] done [0.00s].
## creating S4 object  ... done [0.00s].

itemsets

## set of 9 itemsets

inspect(itemsets)

##     items            support count
## [1] {pens}           0.5     5    
## [2] {ruler}          0.5     5    
## [3] {pencils}        0.5     5    
## [4] {paper}          0.5     5    
## [5] {cards}          0.6     6    
## [6] {cards, pens}    0.3     3    
## [7] {pencils, ruler} 0.3     3    
## [8] {cards, pencils} 0.3     3    
## [9] {cards, paper}   0.3     3

#This generates all the rules with support greater than 0.25, confidence at least 0.50 and the lift values.
rules <- apriori(receipts, parameter = list(supp = 0.25, conf = 0.5, target = "rules"))

## Apriori
## 
## Parameter specification:
##  confidence minval smax arem  aval originalSupport maxtime support minlen
##         0.5    0.1    1 none FALSE            TRUE       5    0.25      1
##  maxlen target  ext
##      10  rules TRUE
## 
## Algorithmic control:
##  filter tree heap memopt load sort verbose
##     0.1 TRUE TRUE  FALSE TRUE    2    TRUE
## 
## Absolute minimum support count: 2 
## 
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[7 item(s), 10 transaction(s)] done [0.00s].
## sorting and recoding items ... [5 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 done [0.00s].
## writing ... [13 rule(s)] done [0.00s].
## creating S4 object  ... done [0.00s].

rules

## set of 13 rules

summary(rules)

## set of 13 rules
## 
## rule length distribution (lhs + rhs):sizes
## 1 2 
## 5 8 
## 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.000   1.000   2.000   1.615   2.000   2.000 
## 
## summary of quality measures:
##     support         confidence        coverage           lift      
##  Min.   :0.3000   Min.   :0.5000   Min.   :0.5000   Min.   :1.000  
##  1st Qu.:0.3000   1st Qu.:0.5000   1st Qu.:0.5000   1st Qu.:1.000  
##  Median :0.3000   Median :0.5000   Median :0.6000   Median :1.000  
##  Mean   :0.3846   Mean   :0.5462   Mean   :0.7154   Mean   :1.031  
##  3rd Qu.:0.5000   3rd Qu.:0.6000   3rd Qu.:1.0000   3rd Qu.:1.000  
##  Max.   :0.6000   Max.   :0.6000   Max.   :1.0000   Max.   :1.200  
##      count      
##  Min.   :3.000  
##  1st Qu.:3.000  
##  Median :3.000  
##  Mean   :3.846  
##  3rd Qu.:5.000  
##  Max.   :6.000  
## 
## mining info:
##      data ntransactions support confidence
##  receipts            10    0.25        0.5
##                                                                                   call
##  apriori(data = receipts, parameter = list(supp = 0.25, conf = 0.5, target = "rules"))

#Sort the lift values
rules_by_lift <- sort(rules, by = "lift", decreasing = TRUE)
rules_by_lift

## set of 13 rules

#Display the actual rules sorted by lift. Here there are 13.
inspect(rules_by_lift)

##      lhs          rhs       support confidence coverage lift count
## [1]  {ruler}   => {pencils} 0.3     0.6        0.5      1.2  3    
## [2]  {pencils} => {ruler}   0.3     0.6        0.5      1.2  3    
## [3]  {}        => {pens}    0.5     0.5        1.0      1.0  5    
## [4]  {}        => {ruler}   0.5     0.5        1.0      1.0  5    
## [5]  {}        => {pencils} 0.5     0.5        1.0      1.0  5    
## [6]  {}        => {paper}   0.5     0.5        1.0      1.0  5    
## [7]  {}        => {cards}   0.6     0.6        1.0      1.0  6    
## [8]  {pens}    => {cards}   0.3     0.6        0.5      1.0  3    
## [9]  {cards}   => {pens}    0.3     0.5        0.6      1.0  3    
## [10] {pencils} => {cards}   0.3     0.6        0.5      1.0  3    
## [11] {cards}   => {pencils} 0.3     0.5        0.6      1.0  3    
## [12] {paper}   => {cards}   0.3     0.6        0.5      1.0  3    
## [13] {cards}   => {paper}   0.3     0.5        0.6      1.0  3

#Creates dataframe with all rules in original itemset order
df_rules <- as(rules, "data.frame")
df_rules

##                   rules support confidence coverage lift count
## 1          {} => {pens}     0.5        0.5      1.0  1.0     5
## 2         {} => {ruler}     0.5        0.5      1.0  1.0     5
## 3       {} => {pencils}     0.5        0.5      1.0  1.0     5
## 4         {} => {paper}     0.5        0.5      1.0  1.0     5
## 5         {} => {cards}     0.6        0.6      1.0  1.0     6
## 6     {pens} => {cards}     0.3        0.6      0.5  1.0     3
## 7     {cards} => {pens}     0.3        0.5      0.6  1.0     3
## 8  {ruler} => {pencils}     0.3        0.6      0.5  1.2     3
## 9  {pencils} => {ruler}     0.3        0.6      0.5  1.2     3
## 10 {pencils} => {cards}     0.3        0.6      0.5  1.0     3
## 11 {cards} => {pencils}     0.3        0.5      0.6  1.0     3
## 12   {paper} => {cards}     0.3        0.6      0.5  1.0     3
## 13   {cards} => {paper}     0.3        0.5      0.6  1.0     3

# Sort dataframe descending by lift
df_sorted <- df_rules[order(df_rules$lift, decreasing = TRUE), ]

# Base scatterplot object
p <- plot(
  rules,
  measure = c("support", "confidence"),  # support on x, confidence on y
  shading = "lift",
  jitter = TRUE
)

# Add custom axis breaks (adjust 'by' to suit your data)
# For a y-axis step of 0.1 (or change to 1)
p + 
  scale_y_continuous(breaks = seq(0, 1, by = 0.1)) +
  scale_x_continuous(breaks = seq(0, max(quality(rules)$support), by = 0.05))

#Visuals of the metrics; graphs used in previous project
#jitter = 0 rather than jitter = TRUE spreads the data points so they are distinguishable, rather than on top of one another
plot(rules, shading = "lift", jitter = 0)

plot(rules, method = "two-key plot", shading = "order")

## To reduce overplotting, jitter is added! Use jitter = 0 to prevent jitter.

plot(rules, method = "graph", control = list(type = "items"))

## Warning: Unknown control parameters: type

## Available control parameters (with default values):
## layout    =  stress
## circular  =  FALSE
## ggraphdots    =  NULL
## edges     =  <environment>
## nodes     =  <environment>
## nodetext  =  <environment>
## colors    =  c("#EE0000FF", "#EEEEEEFF")
## engine    =  ggplot2
## max   =  100
## verbose   =  FALSE

Association Rule Mining - Apriori Algorithm

LLJ

2025-07-03