library(arulesViz)  # For visualization
# Define transaction data
transactions <- list(
  c("paper", "pens", "pencils"),
  c("pens", "ruler"),
  c("pencils", "cards", "ruler"),
  c("paper", "cards"),
  c("pens", "pencils", "cards"),
  c("paper", "ruler", "calculator"),
  c("paper", "pens", "cards", "folder", "pencils", "ruler", "folder"),
  c("paper", "pencils", "cards", "ruler", "pens", "cards")
)
# Convert to transactions class that arules requires
trans <- as(transactions, "transactions")
# Run Apriori algorithm to find frequent itemsets
# min_support = 0.25 (25%)
frequent_itemsets <- apriori(trans, 
                             parameter = list(support = 0.25, 
                                              target = "frequent itemsets",
                                              minlen = 1))
# Print frequent itemsets
cat("Frequent Itemsets:\n")
inspect(frequent_itemsets)
# Generate association rules
# min_confidence = 0.7 (70%)
rules <- apriori(trans, 
                 parameter = list(support = 0.25, 
                                  confidence = 0.7,
                                  minlen = 2))  # minlen=2 ensures we don't get rules with empty antecedent
# Print the rules
cat("\nAssociation Rules:\n")
inspect(rules)

# Sort rules by confidence
rules_by_confidence <- sort(rules, by = "confidence", decreasing = TRUE)
cat("\nRules Sorted by Confidence:\n")
inspect(head(rules_by_confidence, 10))  # Top 10 rules by confidence
# Sort rules by lift
rules_by_lift <- sort(rules, by = "lift", decreasing = TRUE)
cat("\nRules Sorted by Lift:\n")
inspect(head(rules_by_lift, 10))  # Top 10 rules by lift

# Calculate additional metrics for the rules
rules_metrics <- data.frame(
  lhs = labels(lhs(rules)),
  rhs = labels(rhs(rules)),
  support = quality(rules)$support,
  confidence = quality(rules)$confidence,
  lift = quality(rules)$lift,
  count = quality(rules)$count
)

# Sort by lift
rules_metrics <- rules_metrics[order(-rules_metrics$lift), ]

# Print metrics table
cat("\nDetailed Rules Metrics:\n")
print(rules_metrics)

# Filter strong rules (high confidence and lift)
strong_rules <- subset(rules, confidence > 0.8 & lift > 1.2)
cat("\nStrong Rules (Confidence > 0.8 and Lift > 1.2):\n")
inspect(strong_rules)

# Find redundant rules
redundant <- is.redundant(rules)
cat("\nRedundant Rules:\n")
inspect(rules[redundant])


# Interpretation of rules
cat("\nInterpretation of Top Rules:\n")

# Function to get readable interpretation of a rule
interpret_rule <- function(rule) {
  lhs_items <- labels(lhs(rule))
  rhs_items <- labels(rhs(rule))
  support <- round(quality(rule)$support[1] * 100, 2)
  confidence <- round(quality(rule)$confidence[1] * 100, 2)
  lift <- round(quality(rule)$lift[1], 2)
  
  interpretation <- paste0(
    "Rule: {", lhs_items, "} => {", rhs_items, "}\n",
    "  - This rule appears in ", support, "% of all transactions\n",
    "  - When customers buy ", lhs_items, ", they also buy ", rhs_items, " ", confidence, "% of the time\n",
    "  - Customers who buy ", lhs_items, " are ", lift, " times more likely to buy ", rhs_items, " than the average customer\n",
    "  - Business value: "
  )
  
  # Add business interpretation based on lift
  if (lift > 2) {
    interpretation <- paste0(interpretation, "Very strong association, consider bundling these items together for promotions")
  } else if (lift > 1.5) {
    interpretation <- paste0(interpretation, "Strong association, place these items nearby in store layout")
  } else if (lift > 1) {
    interpretation <- paste0(interpretation, "Positive association, may suggest these items together")
  } else {
    interpretation <- paste0(interpretation, "No positive association, these items may be substitutes rather than complements")
  }
  
  return(interpretation)
}

# Interpret top 5 rules by lift
top_rules_by_lift <- head(rules_by_lift, 5)
for (i in 1:length(top_rules_by_lift)) {
  cat(interpret_rule(top_rules_by_lift[i]), "\n\n")
}

R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

```# Load required libraries library(arules)

Define transaction data

transactions <- list( c(“paper”, “pens”, “pencils”), c(“pens”, “ruler”), c(“pencils”, “cards”, “ruler”), c(“paper”, “cards”), c(“pens”, “pencils”, “cards”), c(“paper”, “ruler”, “calculator”), c(“paper”, “pens”, “cards”, “folder”, “pencils”, “ruler”, “folder”), c(“paper”, “pencils”, “cards”, “ruler”, “pens”, “cards”) )

Set minimum support threshold

min_support <- 0.25

Number of transactions

num_transactions <- length(transactions)

Function to calculate support for itemsets

calculate_support <- function(itemset, transactions) { count <- sum(sapply(transactions, function(t) all(itemset %in% t))) return(count / length(transactions)) }

Generate candidate 1-itemsets

all_items <- unique(unlist(transactions)) candidate_1_itemsets <- lapply(all_items, function(item) c(item))

Calculate support for 1-itemsets

itemset_1_support <- sapply(candidate_1_itemsets, calculate_support, transactions) names(itemset_1_support) <- all_items

Display candidate 1-itemsets with support

cat(“Candidate 1-itemsets with support:”) itemset_1_df <- data.frame( Item = names(itemset_1_support), Support = itemset_1_support, stringsAsFactors = FALSE ) print(itemset_1_df)

Pruning 1-itemsets

frequent_1_itemsets <- candidate_1_itemsets[itemset_1_support >= min_support] frequent_1_items <- sapply(frequent_1_itemsets, function(x) x[1])

cat(“-itemsets (after pruning with min_support =”, min_support, “):”) frequent_1_df <- data.frame( Item = sapply(frequent_1_itemsets, paste, collapse=“,”), Support = itemset_1_support[itemset_1_support >= min_support], stringsAsFactors = FALSE ) print(frequent_1_df)

Generate candidate 2-itemsets

candidate_2_itemsets <- combn(frequent_1_items, 2, simplify = FALSE)

Calculate support for 2-itemsets

itemset_2_support <- sapply(candidate_2_itemsets, calculate_support, transactions) names(itemset_2_support) <- sapply(candidate_2_itemsets, paste, collapse=“,”)

Display candidate 2-itemsets with support

cat(“-itemsets with support:”) itemset_2_df <- data.frame( Itemset = names(itemset_2_support), Support = itemset_2_support, stringsAsFactors = FALSE ) print(itemset_2_df)

Pruning 2-itemsets

frequent_2_itemsets <- candidate_2_itemsets[itemset_2_support >= min_support]

cat(“-itemsets (after pruning with min_support =”, min_support, “):”) frequent_2_df <- data.frame( Itemset = sapply(frequent_2_itemsets, paste, collapse=“,”), Support = itemset_2_support[itemset_2_support >= min_support], stringsAsFactors = FALSE ) print(frequent_2_df)

Generate candidate 3-itemsets (using the Apriori principle)

frequent_2_items_list <- lapply(frequent_2_itemsets, function(x) sort(x)) candidate_3_itemsets <- list()

for (i in 1:(length(frequent_2_items_list) - 1)) { for (j in (i+1):length(frequent_2_items_list)) { if (identical(frequent_2_items_list[[i]][1], frequent_2_items_list[[j]][1])) { new_itemset <- unique(c(frequent_2_items_list[[i]], frequent_2_items_list[[j]])) if (length(new_itemset) == 3) { # Check if all subsets are frequent (Apriori principle) all_subsets_frequent <- TRUE subsets <- combn(new_itemset, 2, simplify = FALSE) for (subset in subsets) { if (!any(sapply(frequent_2_itemsets, function(x) setequal(x, subset)))) { all_subsets_frequent <- FALSE break } } if (all_subsets_frequent) { candidate_3_itemsets <- c(candidate_3_itemsets, list(new_itemset)) } } } } }

Ensure unique candidate 3-itemsets

candidate_3_itemsets <- unique(candidate_3_itemsets)

Calculate support for 3-itemsets

itemset_3_support <- sapply(candidate_3_itemsets, calculate_support, transactions) names(itemset_3_support) <- sapply(candidate_3_itemsets, paste, collapse=“,”)

Display candidate 3-itemsets with support

cat(“-itemsets with support:”) itemset_3_df <- data.frame( Itemset = names(itemset_3_support), Support = itemset_3_support, stringsAsFactors = FALSE ) print(itemset_3_df)

Pruning 3-itemsets

frequent_3_itemsets <- candidate_3_itemsets[itemset_3_support >= min_support]

cat(“-itemsets (after pruning with min_support =”, min_support, “):”) frequent_3_df <- data.frame( Itemset = sapply(frequent_3_itemsets, paste, collapse=“,”), Support = itemset_3_support[itemset_3_support >= min_support], stringsAsFactors = FALSE ) print(frequent_3_df)

Continue for 4-itemsets following the same pattern…

Generate candidate 4-itemsets

if (length(frequent_3_itemsets) > 1) { candidate_4_itemsets <- list() for (i in 1:(length(frequent_3_itemsets) - 1)) { for (j in (i+1):length(frequent_3_itemsets)) { common_items <- intersect(frequent_3_itemsets[[i]], frequent_3_itemsets[[j]]) if (length(common_items) == 2) { new_itemset <- unique(c(frequent_3_itemsets[[i]], frequent_3_itemsets[[j]])) if (length(new_itemset) == 4) { # Check if all subsets are frequent (Apriori principle) all_subsets <- combn(new_itemset, 3, simplify = FALSE) all_freq <- TRUE for (subset in all_subsets) { if (!any(sapply(frequent_3_itemsets, function(x) setequal(x, subset)))) { all_freq <- FALSE break } } if (all_freq) { candidate_4_itemsets <- c(candidate_4_itemsets, list(new_itemset)) } } } } }

if (length(candidate_4_itemsets) > 0) { # Calculate support for 4-itemsets itemset_4_support <- sapply(candidate_4_itemsets, calculate_support, transactions) names(itemset_4_support) <- sapply(candidate_4_itemsets, paste, collapse=“,”)

cat("\nCandidate 4-itemsets with support:\n")
itemset_4_df <- data.frame(
  Itemset = names(itemset_4_support),
  Support = itemset_4_support,
  stringsAsFactors = FALSE
)
print(itemset_4_df)

# Pruning 4-itemsets
frequent_4_itemsets <- candidate_4_itemsets[itemset_4_support >= min_support]

if (length(frequent_4_itemsets) > 0) {
  cat("\nFrequent 4-itemsets (after pruning with min_support =", min_support, "):\n")
  frequent_4_df <- data.frame(
    Itemset = sapply(frequent_4_itemsets, paste, collapse=","),
    Support = itemset_4_support[itemset_4_support >= min_support],
    stringsAsFactors = FALSE
  )
  print(frequent_4_df)
} else {
  cat("\nNo frequent 4-itemsets found with min_support =", min_support, "\n")
}

} else { cat(“candidate 4-itemsets generated”) } } else { cat(“frequent 3-itemsets to generate 4-itemsets”) }

Summary of all frequent itemsets

cat(“OF ALL FREQUENT ITEMSETS (min_support =”, min_support, “):”) cat(“Frequent 1-itemsets:”, length(frequent_1_itemsets), “”) cat(“Frequent 2-itemsets:”, length(frequent_2_itemsets), “”) cat(“Frequent 3-itemsets:”, length(frequent_3_itemsets), “”) if(exists(“frequent_4_itemsets”)) { cat(“Frequent 4-itemsets:”, length(frequent_4_itemsets), “”) } ```

Dr. Wu

2025-05-14