library(arulesViz) # For visualization
# Define transaction data
transactions <- list(
c("paper", "pens", "pencils"),
c("pens", "ruler"),
c("pencils", "cards", "ruler"),
c("paper", "cards"),
c("pens", "pencils", "cards"),
c("paper", "ruler", "calculator"),
c("paper", "pens", "cards", "folder", "pencils", "ruler", "folder"),
c("paper", "pencils", "cards", "ruler", "pens", "cards")
)
# Convert to transactions class that arules requires
trans <- as(transactions, "transactions")
# Run Apriori algorithm to find frequent itemsets
# min_support = 0.25 (25%)
frequent_itemsets <- apriori(trans,
parameter = list(support = 0.25,
target = "frequent itemsets",
minlen = 1))
# Print frequent itemsets
cat("Frequent Itemsets:\n")
inspect(frequent_itemsets)
# Generate association rules
# min_confidence = 0.7 (70%)
rules <- apriori(trans,
parameter = list(support = 0.25,
confidence = 0.7,
minlen = 2)) # minlen=2 ensures we don't get rules with empty antecedent
# Print the rules
cat("\nAssociation Rules:\n")
inspect(rules)
# Sort rules by confidence
rules_by_confidence <- sort(rules, by = "confidence", decreasing = TRUE)
cat("\nRules Sorted by Confidence:\n")
inspect(head(rules_by_confidence, 10)) # Top 10 rules by confidence
# Sort rules by lift
rules_by_lift <- sort(rules, by = "lift", decreasing = TRUE)
cat("\nRules Sorted by Lift:\n")
inspect(head(rules_by_lift, 10)) # Top 10 rules by lift
# Calculate additional metrics for the rules
rules_metrics <- data.frame(
lhs = labels(lhs(rules)),
rhs = labels(rhs(rules)),
support = quality(rules)$support,
confidence = quality(rules)$confidence,
lift = quality(rules)$lift,
count = quality(rules)$count
)
# Sort by lift
rules_metrics <- rules_metrics[order(-rules_metrics$lift), ]
# Print metrics table
cat("\nDetailed Rules Metrics:\n")
print(rules_metrics)
# Filter strong rules (high confidence and lift)
strong_rules <- subset(rules, confidence > 0.8 & lift > 1.2)
cat("\nStrong Rules (Confidence > 0.8 and Lift > 1.2):\n")
inspect(strong_rules)
# Find redundant rules
redundant <- is.redundant(rules)
cat("\nRedundant Rules:\n")
inspect(rules[redundant])
# Interpretation of rules
cat("\nInterpretation of Top Rules:\n")
# Function to get readable interpretation of a rule
interpret_rule <- function(rule) {
lhs_items <- labels(lhs(rule))
rhs_items <- labels(rhs(rule))
support <- round(quality(rule)$support[1] * 100, 2)
confidence <- round(quality(rule)$confidence[1] * 100, 2)
lift <- round(quality(rule)$lift[1], 2)
interpretation <- paste0(
"Rule: {", lhs_items, "} => {", rhs_items, "}\n",
" - This rule appears in ", support, "% of all transactions\n",
" - When customers buy ", lhs_items, ", they also buy ", rhs_items, " ", confidence, "% of the time\n",
" - Customers who buy ", lhs_items, " are ", lift, " times more likely to buy ", rhs_items, " than the average customer\n",
" - Business value: "
)
# Add business interpretation based on lift
if (lift > 2) {
interpretation <- paste0(interpretation, "Very strong association, consider bundling these items together for promotions")
} else if (lift > 1.5) {
interpretation <- paste0(interpretation, "Strong association, place these items nearby in store layout")
} else if (lift > 1) {
interpretation <- paste0(interpretation, "Positive association, may suggest these items together")
} else {
interpretation <- paste0(interpretation, "No positive association, these items may be substitutes rather than complements")
}
return(interpretation)
}
# Interpret top 5 rules by lift
top_rules_by_lift <- head(rules_by_lift, 5)
for (i in 1:length(top_rules_by_lift)) {
cat(interpret_rule(top_rules_by_lift[i]), "\n\n")
}
This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.
When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:
```# Load required libraries library(arules)
transactions <- list( c(“paper”, “pens”, “pencils”), c(“pens”, “ruler”), c(“pencils”, “cards”, “ruler”), c(“paper”, “cards”), c(“pens”, “pencils”, “cards”), c(“paper”, “ruler”, “calculator”), c(“paper”, “pens”, “cards”, “folder”, “pencils”, “ruler”, “folder”), c(“paper”, “pencils”, “cards”, “ruler”, “pens”, “cards”) )
min_support <- 0.25
num_transactions <- length(transactions)
calculate_support <- function(itemset, transactions) { count <- sum(sapply(transactions, function(t) all(itemset %in% t))) return(count / length(transactions)) }
all_items <- unique(unlist(transactions)) candidate_1_itemsets <- lapply(all_items, function(item) c(item))
itemset_1_support <- sapply(candidate_1_itemsets, calculate_support, transactions) names(itemset_1_support) <- all_items
cat(“Candidate 1-itemsets with support:”) itemset_1_df <- data.frame( Item = names(itemset_1_support), Support = itemset_1_support, stringsAsFactors = FALSE ) print(itemset_1_df)
frequent_1_itemsets <- candidate_1_itemsets[itemset_1_support >= min_support] frequent_1_items <- sapply(frequent_1_itemsets, function(x) x[1])
cat(“-itemsets (after pruning with min_support =”, min_support, “):”) frequent_1_df <- data.frame( Item = sapply(frequent_1_itemsets, paste, collapse=“,”), Support = itemset_1_support[itemset_1_support >= min_support], stringsAsFactors = FALSE ) print(frequent_1_df)
candidate_2_itemsets <- combn(frequent_1_items, 2, simplify = FALSE)
itemset_2_support <- sapply(candidate_2_itemsets, calculate_support, transactions) names(itemset_2_support) <- sapply(candidate_2_itemsets, paste, collapse=“,”)
cat(“-itemsets with support:”) itemset_2_df <- data.frame( Itemset = names(itemset_2_support), Support = itemset_2_support, stringsAsFactors = FALSE ) print(itemset_2_df)
frequent_2_itemsets <- candidate_2_itemsets[itemset_2_support >= min_support]
cat(“-itemsets (after pruning with min_support =”, min_support, “):”) frequent_2_df <- data.frame( Itemset = sapply(frequent_2_itemsets, paste, collapse=“,”), Support = itemset_2_support[itemset_2_support >= min_support], stringsAsFactors = FALSE ) print(frequent_2_df)
frequent_2_items_list <- lapply(frequent_2_itemsets, function(x) sort(x)) candidate_3_itemsets <- list()
for (i in 1:(length(frequent_2_items_list) - 1)) { for (j in (i+1):length(frequent_2_items_list)) { if (identical(frequent_2_items_list[[i]][1], frequent_2_items_list[[j]][1])) { new_itemset <- unique(c(frequent_2_items_list[[i]], frequent_2_items_list[[j]])) if (length(new_itemset) == 3) { # Check if all subsets are frequent (Apriori principle) all_subsets_frequent <- TRUE subsets <- combn(new_itemset, 2, simplify = FALSE) for (subset in subsets) { if (!any(sapply(frequent_2_itemsets, function(x) setequal(x, subset)))) { all_subsets_frequent <- FALSE break } } if (all_subsets_frequent) { candidate_3_itemsets <- c(candidate_3_itemsets, list(new_itemset)) } } } } }
candidate_3_itemsets <- unique(candidate_3_itemsets)
itemset_3_support <- sapply(candidate_3_itemsets, calculate_support, transactions) names(itemset_3_support) <- sapply(candidate_3_itemsets, paste, collapse=“,”)
cat(“-itemsets with support:”) itemset_3_df <- data.frame( Itemset = names(itemset_3_support), Support = itemset_3_support, stringsAsFactors = FALSE ) print(itemset_3_df)
frequent_3_itemsets <- candidate_3_itemsets[itemset_3_support >= min_support]
cat(“-itemsets (after pruning with min_support =”, min_support, “):”) frequent_3_df <- data.frame( Itemset = sapply(frequent_3_itemsets, paste, collapse=“,”), Support = itemset_3_support[itemset_3_support >= min_support], stringsAsFactors = FALSE ) print(frequent_3_df)
if (length(frequent_3_itemsets) > 1) { candidate_4_itemsets <- list() for (i in 1:(length(frequent_3_itemsets) - 1)) { for (j in (i+1):length(frequent_3_itemsets)) { common_items <- intersect(frequent_3_itemsets[[i]], frequent_3_itemsets[[j]]) if (length(common_items) == 2) { new_itemset <- unique(c(frequent_3_itemsets[[i]], frequent_3_itemsets[[j]])) if (length(new_itemset) == 4) { # Check if all subsets are frequent (Apriori principle) all_subsets <- combn(new_itemset, 3, simplify = FALSE) all_freq <- TRUE for (subset in all_subsets) { if (!any(sapply(frequent_3_itemsets, function(x) setequal(x, subset)))) { all_freq <- FALSE break } } if (all_freq) { candidate_4_itemsets <- c(candidate_4_itemsets, list(new_itemset)) } } } } }
if (length(candidate_4_itemsets) > 0) { # Calculate support for 4-itemsets itemset_4_support <- sapply(candidate_4_itemsets, calculate_support, transactions) names(itemset_4_support) <- sapply(candidate_4_itemsets, paste, collapse=“,”)
cat("\nCandidate 4-itemsets with support:\n")
itemset_4_df <- data.frame(
Itemset = names(itemset_4_support),
Support = itemset_4_support,
stringsAsFactors = FALSE
)
print(itemset_4_df)
# Pruning 4-itemsets
frequent_4_itemsets <- candidate_4_itemsets[itemset_4_support >= min_support]
if (length(frequent_4_itemsets) > 0) {
cat("\nFrequent 4-itemsets (after pruning with min_support =", min_support, "):\n")
frequent_4_df <- data.frame(
Itemset = sapply(frequent_4_itemsets, paste, collapse=","),
Support = itemset_4_support[itemset_4_support >= min_support],
stringsAsFactors = FALSE
)
print(frequent_4_df)
} else {
cat("\nNo frequent 4-itemsets found with min_support =", min_support, "\n")
}
} else { cat(“candidate 4-itemsets generated”) } } else { cat(“frequent 3-itemsets to generate 4-itemsets”) }
cat(“OF ALL FREQUENT ITEMSETS (min_support =”, min_support, “):”) cat(“Frequent 1-itemsets:”, length(frequent_1_itemsets), “”) cat(“Frequent 2-itemsets:”, length(frequent_2_itemsets), “”) cat(“Frequent 3-itemsets:”, length(frequent_3_itemsets), “”) if(exists(“frequent_4_itemsets”)) { cat(“Frequent 4-itemsets:”, length(frequent_4_itemsets), “”) } ```