# Instala (una sola vez) si no los tienes
# install.packages(c("arules", "arulesViz", "tidyverse"))
library(arules)
library(arulesViz) # gráficos y exploración
library(tidyverse) # utilidades opcionales
Útil para validar el pipeline de punta a punta sin preparar datos.
data("Groceries") # transacciones de ejemplo (clase transactions)
Groceries
## transactions in sparse format with
## 9835 transactions (rows) and
## 169 items (columns)
summary(Groceries)
## transactions as itemMatrix in sparse format with
## 9835 rows (elements/itemsets/transactions) and
## 169 columns (items) and a density of 0.02609146
##
## most frequent items:
## whole milk other vegetables rolls/buns soda
## 2513 1903 1809 1715
## yogurt (Other)
## 1372 34055
##
## element (itemset/transaction) length distribution:
## sizes
## 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
## 2159 1643 1299 1005 855 645 545 438 350 246 182 117 78 77 55 46
## 17 18 19 20 21 22 23 24 26 27 28 29 32
## 29 14 14 9 11 4 6 1 1 1 1 3 1
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 2.000 3.000 4.409 6.000 32.000
##
## includes extended item information - examples:
## labels level2 level1
## 1 frankfurter sausage meat and sausage
## 2 sausage sausage meat and sausage
## 3 liver loaf sausage meat and sausage
inspect(head(Groceries, 3)) # ver primeras 3 transacciones
## items
## [1] {citrus fruit,
## semi-finished bread,
## margarine,
## ready soups}
## [2] {tropical fruit,
## yogurt,
## coffee}
## [3] {whole milk}
Usa esta opción si tienes un
.csvestilo “basket”: una transacción por fila y items separados por coma (o tu separador).
# --- Configura esta ruta a tu archivo ---
ruta_csv <- "data/mi_basket.csv" # ejemplo de línea: "Pan,Leche,Huevos"
# Lee líneas (cada línea = una transacción). Cambia split="," si usas otro separador.
lineas <- readLines(ruta_csv, encoding = "UTF-8")
# Convierte cada línea a vector de items (trim de espacios)
split_items <- strsplit(lineas, split = ",")
split_items <- lapply(split_items, function(x) trimws(x))
# Crea objeto de clase 'transactions'
trans <- as(split_items, "transactions")
trans
summary(trans)
inspect(head(trans, 3))
⚠️ El resto del análisis funcionará con el objeto
transacciones.
Si usas Groceries, asigna:
# Usa Groceries OR tus propias transacciones 'trans'
transacciones <- Groceries
# transacciones <- trans # <- descomenta si cargaste tus datos
# Items más frecuentes
itemFrequencyPlot(transacciones, topN = 15, type = "absolute",
main = "Top 15 ítems más frecuentes")
# Distribución de longitud de transacción (cuántos items por ticket)
size_tab <- size(transacciones)
summary(size_tab)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 2.000 3.000 4.409 6.000 32.000
hist(size_tab, breaks = 20, main = "Tamaño de transacción", xlab = "nº de items")
Ajusta support y confidence a tu densidad de datos.
EnGroceries, valores típicos de demo:supp = 0.001,conf = 0.1–0.3.
reglas <- apriori(
transacciones,
parameter = list(supp = 0.001, conf = 0.2, minlen = 2, maxlen = 4),
control = list(verbose = TRUE)
)
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.2 0.1 1 none FALSE TRUE 5 0.001 2
## maxlen target ext
## 4 rules TRUE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 9
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[169 item(s), 9835 transaction(s)] done [0.00s].
## sorting and recoding items ... [157 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4
## done [0.01s].
## writing ... [19781 rule(s)] done [0.00s].
## creating S4 object ... done [0.00s].
reglas
## set of 19781 rules
summary(reglas)
## set of 19781 rules
##
## rule length distribution (lhs + rhs):sizes
## 2 3 4
## 620 9337 9824
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2.000 3.000 3.000 3.465 4.000 4.000
##
## summary of quality measures:
## support confidence coverage lift
## Min. :0.001017 Min. :0.2000 Min. :0.001017 Min. : 0.8028
## 1st Qu.:0.001118 1st Qu.:0.2581 1st Qu.:0.002949 1st Qu.: 2.0673
## Median :0.001423 Median :0.3438 Median :0.004372 Median : 2.6850
## Mean :0.002013 Mean :0.3849 Mean :0.006127 Mean : 2.9321
## 3rd Qu.:0.002034 3rd Qu.:0.4808 3rd Qu.:0.006406 3rd Qu.: 3.4827
## Max. :0.074835 Max. :1.0000 Max. :0.255516 Max. :35.7158
## count
## Min. : 10.0
## 1st Qu.: 11.0
## Median : 14.0
## Mean : 19.8
## 3rd Qu.: 20.0
## Max. :736.0
##
## mining info:
## data ntransactions support confidence
## transacciones 9835 0.001 0.2
## call
## apriori(data = transacciones, parameter = list(supp = 0.001, conf = 0.2, minlen = 2, maxlen = 4), control = list(verbose = TRUE))
# Remueve reglas redundantes
reglas <- reglas[!is.redundant(reglas)]
# Ordena por lift (mayor asociación que el azar)
reglas_lift <- sort(reglas, by = "lift", decreasing = TRUE)
inspect(head(reglas_lift, 10))
## lhs rhs support
## [1] {bottled beer, red/blush wine} => {liquor} 0.001931876
## [2] {hamburger meat, soda} => {Instant food products} 0.001220132
## [3] {ham, white bread} => {processed cheese} 0.001931876
## [4] {bottled beer, liquor} => {red/blush wine} 0.001931876
## [5] {Instant food products, soda} => {hamburger meat} 0.001220132
## [6] {curd, sugar} => {flour} 0.001118454
## [7] {sugar, baking powder} => {flour} 0.001016777
## [8] {processed cheese, white bread} => {ham} 0.001931876
## [9] {ham, fruit/vegetable juice} => {processed cheese} 0.001118454
## [10] {margarine, sugar} => {flour} 0.001626843
## confidence coverage lift count
## [1] 0.3958333 0.004880529 35.71579 19
## [2] 0.2105263 0.005795628 26.20919 12
## [3] 0.3800000 0.005083884 22.92822 19
## [4] 0.4130435 0.004677173 21.49356 19
## [5] 0.6315789 0.001931876 18.99565 12
## [6] 0.3235294 0.003457041 18.60767 11
## [7] 0.3125000 0.003253686 17.97332 10
## [8] 0.4634146 0.004168785 17.80345 19
## [9] 0.2894737 0.003863752 17.46610 11
## [10] 0.2962963 0.005490595 17.04137 16
# También puedes filtrar por un item en RHS (consecuente)
reglas_leche <- subset(reglas_lift, rhs %pin% "whole milk")
inspect(head(reglas_leche, 10))
## lhs rhs support confidence coverage lift count
## [1] {rice,
## sugar} => {whole milk} 0.001220132 1 0.001220132 3.913649 12
## [2] {canned fish,
## hygiene articles} => {whole milk} 0.001118454 1 0.001118454 3.913649 11
## [3] {root vegetables,
## butter,
## rice} => {whole milk} 0.001016777 1 0.001016777 3.913649 10
## [4] {root vegetables,
## whipped/sour cream,
## flour} => {whole milk} 0.001728521 1 0.001728521 3.913649 17
## [5] {butter,
## soft cheese,
## domestic eggs} => {whole milk} 0.001016777 1 0.001016777 3.913649 10
## [6] {pip fruit,
## butter,
## hygiene articles} => {whole milk} 0.001016777 1 0.001016777 3.913649 10
## [7] {root vegetables,
## whipped/sour cream,
## hygiene articles} => {whole milk} 0.001016777 1 0.001016777 3.913649 10
## [8] {pip fruit,
## root vegetables,
## hygiene articles} => {whole milk} 0.001016777 1 0.001016777 3.913649 10
## [9] {cream cheese ,
## domestic eggs,
## sugar} => {whole milk} 0.001118454 1 0.001118454 3.913649 11
## [10] {curd,
## domestic eggs,
## sugar} => {whole milk} 0.001016777 1 0.001016777 3.913649 10
# Gráfico básico de dispersión (soporte vs confianza; color por lift)
plot(reglas_lift, method = "scatterplot", measure = c("support", "confidence"),
shading = "lift")
# Matriz de calor (agrupa por items)
plot(reglas_lift, method = "grouped")
# Grafo (ojo: limitar a n reglas para legibilidad)
plot(head(reglas_lift, 50), method = "graph", engine = "htmlwidget")
# A data.frame
df_reglas <- as(reglas_lift, "data.frame")
head(df_reglas)
## rules support confidence
## 633 {bottled beer,red/blush wine} => {liquor} 0.001931876 0.3958333
## 696 {hamburger meat,soda} => {Instant food products} 0.001220132 0.2105263
## 1489 {ham,white bread} => {processed cheese} 0.001931876 0.3800000
## 632 {bottled beer,liquor} => {red/blush wine} 0.001931876 0.4130435
## 695 {Instant food products,soda} => {hamburger meat} 0.001220132 0.6315789
## 2022 {curd,sugar} => {flour} 0.001118454 0.3235294
## coverage lift count
## 633 0.004880529 35.71579 19
## 696 0.005795628 26.20919 12
## 1489 0.005083884 22.92822 19
## 632 0.004677173 21.49356 19
## 695 0.001931876 18.99565 12
## 2022 0.003457041 18.60767 11
# CSV
readr::write_csv(df_reglas, "reglas_apriori.csv")
# Guardar reglas en formato .rds para reutilizar
saveRDS(reglas_lift, "reglas_apriori.rds")
# Grid sencillo de parámetros para comparar # de reglas y métricas
grid <- expand.grid(
supp = c(0.005, 0.003, 0.001),
conf = c(0.3, 0.2, 0.15)
)
resumen <- purrr::map2_dfr(grid$supp, grid$conf, function(s, c) {
r <- apriori(transacciones, parameter = list(supp = s, conf = c, minlen=2))
r <- r[!is.redundant(r)]
tibble(supp = s, conf = c, n_rules = length(r),
lift_med = ifelse(length(r)>0, mean(quality(r)$lift), NA_real_))
})
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.3 0.1 1 none FALSE TRUE 5 0.005 2
## maxlen target ext
## 10 rules TRUE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 49
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[169 item(s), 9835 transaction(s)] done [0.00s].
## sorting and recoding items ... [120 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4 done [0.00s].
## writing ... [482 rule(s)] done [0.00s].
## creating S4 object ... done [0.00s].
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.3 0.1 1 none FALSE TRUE 5 0.003 2
## maxlen target ext
## 10 rules TRUE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 29
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[169 item(s), 9835 transaction(s)] done [0.00s].
## sorting and recoding items ... [136 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4 5 done [0.00s].
## writing ... [1361 rule(s)] done [0.00s].
## creating S4 object ... done [0.00s].
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.3 0.1 1 none FALSE TRUE 5 0.001 2
## maxlen target ext
## 10 rules TRUE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 9
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[169 item(s), 9835 transaction(s)] done [0.00s].
## sorting and recoding items ... [157 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4 5 6 done [0.01s].
## writing ... [13770 rule(s)] done [0.00s].
## creating S4 object ... done [0.00s].
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.2 0.1 1 none FALSE TRUE 5 0.005 2
## maxlen target ext
## 10 rules TRUE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 49
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[169 item(s), 9835 transaction(s)] done [0.00s].
## sorting and recoding items ... [120 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4 done [0.00s].
## writing ... [872 rule(s)] done [0.00s].
## creating S4 object ... done [0.00s].
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.2 0.1 1 none FALSE TRUE 5 0.003 2
## maxlen target ext
## 10 rules TRUE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 29
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[169 item(s), 9835 transaction(s)] done [0.00s].
## sorting and recoding items ... [136 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4 5 done [0.00s].
## writing ... [2245 rule(s)] done [0.00s].
## creating S4 object ... done [0.00s].
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.2 0.1 1 none FALSE TRUE 5 0.001 2
## maxlen target ext
## 10 rules TRUE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 9
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[169 item(s), 9835 transaction(s)] done [0.00s].
## sorting and recoding items ... [157 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4 5 6 done [0.01s].
## writing ... [21633 rule(s)] done [0.00s].
## creating S4 object ... done [0.00s].
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.15 0.1 1 none FALSE TRUE 5 0.005 2
## maxlen target ext
## 10 rules TRUE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 49
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[169 item(s), 9835 transaction(s)] done [0.00s].
## sorting and recoding items ... [120 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4 done [0.00s].
## writing ... [1158 rule(s)] done [0.00s].
## creating S4 object ... done [0.00s].
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.15 0.1 1 none FALSE TRUE 5 0.003 2
## maxlen target ext
## 10 rules TRUE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 29
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[169 item(s), 9835 transaction(s)] done [0.00s].
## sorting and recoding items ... [136 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4 5 done [0.00s].
## writing ... [2958 rule(s)] done [0.00s].
## creating S4 object ... done [0.00s].
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.15 0.1 1 none FALSE TRUE 5 0.001 2
## maxlen target ext
## 10 rules TRUE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 9
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[169 item(s), 9835 transaction(s)] done [0.00s].
## sorting and recoding items ... [157 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4 5 6 done [0.01s].
## writing ... [26820 rule(s)] done [0.00s].
## creating S4 object ... done [0.01s].
resumen
## # A tibble: 9 × 4
## supp conf n_rules lift_med
## <dbl> <dbl> <int> <dbl>
## 1 0.005 0.3 460 2.17
## 2 0.003 0.3 1276 2.39
## 3 0.001 0.3 12030 3.25
## 4 0.005 0.2 828 2.05
## 5 0.003 0.2 2089 2.30
## 6 0.001 0.2 18642 3.16
## 7 0.005 0.15 1100 2.00
## 8 0.003 0.15 2744 2.25
## 9 0.001 0.15 22991 3.08
# Ejemplo: reglas que impliquen 'yogurt' en el RHS (consecuente)
reglas_yogurt <- apriori(
transacciones,
parameter = list(supp = 0.001, conf = 0.2, minlen = 2),
appearance = list(rhs = "yogurt", default = "lhs")
)
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.2 0.1 1 none FALSE TRUE 5 0.001 2
## maxlen target ext
## 10 rules TRUE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 9
##
## set item appearances ...[1 item(s)] done [0.00s].
## set transactions ...[169 item(s), 9835 transaction(s)] done [0.00s].
## sorting and recoding items ... [157 item(s)] done [0.00s].
## creating transaction tree ... done [0.00s].
## checking subsets of size 1 2 3 4 5 6 done [0.01s].
## writing ... [2328 rule(s)] done [0.00s].
## creating S4 object ... done [0.00s].
reglas_yogurt <- sort(reglas_yogurt, by = "lift", decreasing = TRUE)
inspect(head(reglas_yogurt, 10))
## lhs rhs support confidence coverage lift count
## [1] {root vegetables,
## butter,
## cream cheese } => {yogurt} 0.001016777 0.9090909 0.001118454 6.516698 10
## [2] {tropical fruit,
## whole milk,
## butter,
## sliced cheese} => {yogurt} 0.001016777 0.9090909 0.001118454 6.516698 10
## [3] {other vegetables,
## curd,
## whipped/sour cream,
## cream cheese } => {yogurt} 0.001016777 0.9090909 0.001118454 6.516698 10
## [4] {tropical fruit,
## other vegetables,
## butter,
## white bread} => {yogurt} 0.001016777 0.9090909 0.001118454 6.516698 10
## [5] {sausage,
## pip fruit,
## sliced cheese} => {yogurt} 0.001220132 0.8571429 0.001423488 6.144315 12
## [6] {tropical fruit,
## whole milk,
## butter,
## curd} => {yogurt} 0.001220132 0.8571429 0.001423488 6.144315 12
## [7] {tropical fruit,
## butter,
## white bread} => {yogurt} 0.001118454 0.8461538 0.001321810 6.065542 11
## [8] {tropical fruit,
## butter,
## margarine} => {yogurt} 0.001118454 0.8461538 0.001321810 6.065542 11
## [9] {whole milk,
## curd,
## whipped/sour cream,
## cream cheese } => {yogurt} 0.001118454 0.8461538 0.001321810 6.065542 11
## [10] {whipped/sour cream,
## cream cheese ,
## margarine} => {yogurt} 0.001016777 0.8333333 0.001220132 5.973639 10
transactions.supp en función de la escasez
de tus datos (menos denso ⇒ supp más bajo).is.redundant) y
limita maxlen para evitar reglas triviales.