Librerías

library(dplyr)
library(tidyverse)
library(janitor)
library(tidyr)
library(lubridate)
library(Matrix)
library(arules)
library(arulesViz)
library(datasets)
library(plyr)

Desarrollo del Market Basket Analysis

Ordenar de menor a mayor los tickets

df_limpia<-read.csv("C:\\Users\\javaw\\OneDrive - Instituto Tecnologico y de Estudios Superiores de Monterrey\\7mo Semestre\\Modulo 3\\abarrotes_bd_limpia.csv")
df_limpia <-df_limpia[order(df_limpia$F.Ticket),]
head(df_limpia)
##   ï..vcClaveTienda  DescGiro Fecha Hora                      Marca
## 1            MX001 Abarrotes    NA    8                NUTRI LECHE
## 2            MX001 Abarrotes    NA    8                     DAN UP
## 3            MX001 Abarrotes    NA    8                      BIMBO
## 4            MX001 Abarrotes    NA    8                      PEPSI
## 5            MX001 Abarrotes    NA    8 BLANCA NIEVES (DETERGENTE)
## 6            MX001 Abarrotes    NA    8                      FLASH
##                   Fabricante                           Producto Precio
## 1                    MEXILAC                Nutri Leche 1 Litro   16.0
## 2           DANONE DE MEXICO DANUP STRAWBERRY P/BEBER 350GR NAL   14.0
## 3                GRUPO BIMBO                Rebanadas Bimbo 2Pz    5.0
## 4        PEPSI-COLA MEXICANA                   Pepsi N.R. 400Ml    8.0
## 5 FABRICA DE JABON LA CORONA      Detergente Blanca Nieves 500G   19.5
## 6                       ALEN      Flash Xtra Brisa Marina 500Ml    9.5
##   Ult.Costo Unidades F.Ticket NombreDepartamento          NombreFamilia
## 1     12.31        1        1          Abarrotes Lacteos y Refrigerados
## 2     14.00        1        2          Abarrotes Lacteos y Refrigerados
## 3      5.00        1        3          Abarrotes         Pan y Tortilla
## 4      8.00        1        3          Abarrotes                Bebidas
## 5     15.00        1        4          Abarrotes     Limpieza del Hogar
## 6      7.31        1        4          Abarrotes     Limpieza del Hogar
##              NombreCategoria      Estado Mts.2 Tipo.ubicación      Giro
## 1                      Leche Nuevo León    60         Esquina Abarrotes
## 2                     Yogurt Nuevo León    60         Esquina Abarrotes
## 3      Pan Dulce Empaquetado Nuevo León    60         Esquina Abarrotes
## 4 Refrescos Plástico (N.R.) Nuevo León    60         Esquina Abarrotes
## 5                Lavandería Nuevo León    60         Esquina Abarrotes
## 6      Limpiadores Líquidos Nuevo León    60         Esquina Abarrotes
##   Hora.inicio Hora.cierre Dia_de_la_semana Subtotal Utilidad
## 1       08:00       22:00               NA     16.0     3.69
## 2       08:00       22:00               NA     14.0     0.00
## 3       08:00       22:00               NA      5.0     0.00
## 4       08:00       22:00               NA      8.0     0.00
## 5       08:00       22:00               NA     19.5     4.50
## 6       08:00       22:00               NA      9.5     2.19
  tail(df_limpia)
##        ï..vcClaveTienda    DescGiro Fecha Hora          Marca
## 107394            MX004 Carnicería    NA   11         YEMINA
## 167771            MX004 Carnicería    NA   11     DEL FUERTE
## 149429            MX004 Carnicería    NA   11 COCA COLA ZERO
## 168750            MX004 Carnicería    NA   11       DIAMANTE
## 161193            MX004 Carnicería    NA   12          PEPSI
## 112970            MX004 Carnicería    NA   12      COCA COLA
##                  Fabricante                       Producto Precio Ult.Costo
## 107394               HERDEZ    PASTA SPAGHETTI YEMINA 200G      7      5.38
## 167771 ALIMENTOS DEL FUERTE PURE DE TOMATE DEL FUERTE 345G     12      9.23
## 149429            COCA COLA           COCA COLA ZERO 600ML     15     11.54
## 168750           EMPACADOS              ARROZ DIAMANTE225G     11      8.46
## 161193  PEPSI-COLA MEXICANA              PEPSI N. R. 500ML     10      7.69
## 112970            COCA COLA     COCA COLA RETORNABLE 500ML     10      7.69
##        Unidades F.Ticket NombreDepartamento        NombreFamilia
## 107394        2   450032          Abarrotes       Sopas y Pastas
## 167771        1   450032          Abarrotes Salsas y Sazonadores
## 149429        2   450034          Abarrotes              Bebidas
## 168750        1   450037          Abarrotes    Granos y Semillas
## 161193        1   450039          Abarrotes              Bebidas
## 112970        8   450040          Abarrotes              Bebidas
##                      NombreCategoria  Estado Mts.2 Tipo.ubicación      Giro
## 107394 Fideos, Spaguetti, Tallarines Sinaloa    53         Esquina Abarrotes
## 167771          Salsa para Spaguetti Sinaloa    53         Esquina Abarrotes
## 149429         Refrescos Retornables Sinaloa    53         Esquina Abarrotes
## 168750                         Arroz Sinaloa    53         Esquina Abarrotes
## 161193    Refrescos Plástico (N.R.) Sinaloa    53         Esquina Abarrotes
## 112970         Refrescos Retornables Sinaloa    53         Esquina Abarrotes
##        Hora.inicio Hora.cierre Dia_de_la_semana Subtotal Utilidad
## 107394       07:00       23:00               NA       14     1.62
## 167771       07:00       23:00               NA       12     2.77
## 149429       07:00       23:00               NA       30     3.46
## 168750       07:00       23:00               NA       11     2.54
## 161193       07:00       23:00               NA       10     2.31
## 112970       07:00       23:00               NA       80     2.31

Generar basket

basket<-ddply(df_limpia,c("F.Ticket"),function(df_limpia)paste(df_limpia$Marca, collapse = ","))

Eliminar numero de ticket

basket$F.Ticket<-NULL

Renombramos el nombre de la columna

colnames(basket) <- c("Marca")

Exportar basket

write.csv(basket, file = "basket.csv", quote = FALSE, row.names = FALSE)

Importar transacciones

tr<- read.transactions("C:\\Users\\javaw\\OneDrive - Instituto Tecnologico y de Estudios Superiores de Monterrey\\7mo Semestre\\Modulo 3\\basket.csv", format = "basket", sep = ",")

reglas.asociacion<-apriori(tr,parameter=list(supp=0.001,conf=0.2,maxlen=10))
## Apriori
## 
## Parameter specification:
##  confidence minval smax arem  aval originalSupport maxtime support minlen
##         0.2    0.1    1 none FALSE            TRUE       5   0.001      1
##  maxlen target  ext
##      10  rules TRUE
## 
## Algorithmic control:
##  filter tree heap memopt load sort verbose
##     0.1 TRUE TRUE  FALSE TRUE    2    TRUE
## 
## Absolute minimum support count: 115 
## 
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[604 item(s), 115111 transaction(s)] done [0.04s].
## sorting and recoding items ... [207 item(s)] done [0.00s].
## creating transaction tree ... done [0.03s].
## checking subsets of size 1 2 3 done [0.00s].
## writing ... [11 rule(s)] done [0.00s].
## creating S4 object  ... done [0.00s].
summary(reglas.asociacion)
## set of 11 rules
## 
## rule length distribution (lhs + rhs):sizes
##  2 
## 11 
## 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       2       2       2       2       2       2 
## 
## summary of quality measures:
##     support           confidence        coverage             lift       
##  Min.   :0.001016   Min.   :0.2069   Min.   :0.003562   Min.   : 1.325  
##  1st Qu.:0.001103   1st Qu.:0.2356   1st Qu.:0.004504   1st Qu.: 1.787  
##  Median :0.001416   Median :0.2442   Median :0.005803   Median : 3.972  
##  Mean   :0.001519   Mean   :0.2536   Mean   :0.006054   Mean   :17.563  
##  3rd Qu.:0.001651   3rd Qu.:0.2685   3rd Qu.:0.006893   3rd Qu.:21.798  
##  Max.   :0.002745   Max.   :0.3098   Max.   :0.010503   Max.   :65.908  
##      count      
##  Min.   :117.0  
##  1st Qu.:127.0  
##  Median :163.0  
##  Mean   :174.9  
##  3rd Qu.:190.0  
##  Max.   :316.0  
## 
## mining info:
##  data ntransactions support confidence
##    tr        115111   0.001        0.2
##                                                                         call
##  apriori(data = tr, parameter = list(supp = 0.001, conf = 0.2, maxlen = 10))
inspect(reglas.asociacion)
##      lhs                  rhs         support     confidence coverage   
## [1]  {FANTA}           => {COCA COLA} 0.001051159 0.2439516  0.004308884
## [2]  {SALVO}           => {FABULOSO}  0.001103283 0.3097561  0.003561779
## [3]  {FABULOSO}        => {SALVO}     0.001103283 0.2347505  0.004699811
## [4]  {COCA COLA ZERO}  => {COCA COLA} 0.001416025 0.2969035  0.004769310
## [5]  {SPRITE}          => {COCA COLA} 0.001346526 0.2069426  0.006506763
## [6]  {PINOL}           => {CLORALEX}  0.001016410 0.2363636  0.004300197
## [7]  {BLUE HOUSE}      => {BIMBO}     0.001711392 0.2720994  0.006289581
## [8]  {HELLMANN´S}     => {BIMBO}     0.001537646 0.2649701  0.005803094
## [9]  {REYMA}           => {CONVERMEX} 0.002093631 0.2441743  0.008574333
## [10] {FUD}             => {BIMBO}     0.001589770 0.2183771  0.007279930
## [11] {COCA COLA LIGHT} => {COCA COLA} 0.002745176 0.2613730  0.010502906
##      lift      count
## [1]   1.561906 121  
## [2]  65.908196 127  
## [3]  65.908196 127  
## [4]   1.900932 163  
## [5]   1.324955 155  
## [6]  25.030409 117  
## [7]   4.078870 197  
## [8]   3.971997 177  
## [9]  18.564824 241  
## [10]  3.273552 183  
## [11]  1.673447 316
reglas.asociacion<-sort(reglas.asociacion, by='confidence', decreasing = TRUE)
summary(reglas.asociacion)
## set of 11 rules
## 
## rule length distribution (lhs + rhs):sizes
##  2 
## 11 
## 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       2       2       2       2       2       2 
## 
## summary of quality measures:
##     support           confidence        coverage             lift       
##  Min.   :0.001016   Min.   :0.2069   Min.   :0.003562   Min.   : 1.325  
##  1st Qu.:0.001103   1st Qu.:0.2356   1st Qu.:0.004504   1st Qu.: 1.787  
##  Median :0.001416   Median :0.2442   Median :0.005803   Median : 3.972  
##  Mean   :0.001519   Mean   :0.2536   Mean   :0.006054   Mean   :17.563  
##  3rd Qu.:0.001651   3rd Qu.:0.2685   3rd Qu.:0.006893   3rd Qu.:21.798  
##  Max.   :0.002745   Max.   :0.3098   Max.   :0.010503   Max.   :65.908  
##      count      
##  Min.   :117.0  
##  1st Qu.:127.0  
##  Median :163.0  
##  Mean   :174.9  
##  3rd Qu.:190.0  
##  Max.   :316.0  
## 
## mining info:
##  data ntransactions support confidence
##    tr        115111   0.001        0.2
##                                                                         call
##  apriori(data = tr, parameter = list(supp = 0.001, conf = 0.2, maxlen = 10))
inspect(reglas.asociacion)
##      lhs                  rhs         support     confidence coverage   
## [1]  {SALVO}           => {FABULOSO}  0.001103283 0.3097561  0.003561779
## [2]  {COCA COLA ZERO}  => {COCA COLA} 0.001416025 0.2969035  0.004769310
## [3]  {BLUE HOUSE}      => {BIMBO}     0.001711392 0.2720994  0.006289581
## [4]  {HELLMANN´S}     => {BIMBO}     0.001537646 0.2649701  0.005803094
## [5]  {COCA COLA LIGHT} => {COCA COLA} 0.002745176 0.2613730  0.010502906
## [6]  {REYMA}           => {CONVERMEX} 0.002093631 0.2441743  0.008574333
## [7]  {FANTA}           => {COCA COLA} 0.001051159 0.2439516  0.004308884
## [8]  {PINOL}           => {CLORALEX}  0.001016410 0.2363636  0.004300197
## [9]  {FABULOSO}        => {SALVO}     0.001103283 0.2347505  0.004699811
## [10] {FUD}             => {BIMBO}     0.001589770 0.2183771  0.007279930
## [11] {SPRITE}          => {COCA COLA} 0.001346526 0.2069426  0.006506763
##      lift      count
## [1]  65.908196 127  
## [2]   1.900932 163  
## [3]   4.078870 197  
## [4]   3.971997 177  
## [5]   1.673447 316  
## [6]  18.564824 241  
## [7]   1.561906 121  
## [8]  25.030409 117  
## [9]  65.908196 127  
## [10]  3.273552 183  
## [11]  1.324955 155

Visualización del Basket

top10reglas<-head(reglas.asociacion, n=10, by= "confidence")
plot(top10reglas,method="graph", engine="htmlwidget")
LS0tDQp0aXRsZTogIk1hcmtldCBCYXNrZXQgQW5hbHlzaXMiDQphdXRob3I6ICJKYXZpZXIgQXlhbGEiDQpkYXRlOiAiYHIgU3lzLkRhdGUoKWAiDQpvdXRwdXQ6DQogIGh0bWxfZG9jdW1lbnQ6DQogICAgdG9jOiB0cnVlDQogICAgdG9jX2Zsb2F0OiB0cnVlDQogICAgY29kZV9kb3dubG9hZDogdHJ1ZQ0KLS0tDQo8Y2VudGVyPg0KPGltZyBzcmM9IkM6Ly9Vc2Vycy8vamF2YXcvL0Rvd25sb2Fkcy8vbWFya2V0IGJhc2tldC5qZmlmIj4NCjwvY2VudGVyPg0KDQpgYGB7Y3NzLCBlY2hvPUZBTFNFfQ0KaDEsIGg0IHsNCiAgdGV4dC1hbGlnbjogY2VudGVyOw0KfQ0KDQpoMSwgaDIsIGgzIHsNCiAgY29sb3I6ICNFQzA5MDk7DQp9DQpgYGANCg0KDQojIyBMaWJyZXLDrWFzDQpgYGB7ciBtZXNzYWdlPUZBTFNFLCB3YXJuaW5nPUZBTFNFfQ0KbGlicmFyeShkcGx5cikNCmxpYnJhcnkodGlkeXZlcnNlKQ0KbGlicmFyeShqYW5pdG9yKQ0KbGlicmFyeSh0aWR5cikNCmxpYnJhcnkobHVicmlkYXRlKQ0KbGlicmFyeShNYXRyaXgpDQpsaWJyYXJ5KGFydWxlcykNCmxpYnJhcnkoYXJ1bGVzVml6KQ0KbGlicmFyeShkYXRhc2V0cykNCmxpYnJhcnkocGx5cikNCmBgYA0KDQojIyBEZXNhcnJvbGxvIGRlbCBNYXJrZXQgQmFza2V0IEFuYWx5c2lzDQoNCiMjIyBPcmRlbmFyIGRlIG1lbm9yIGEgbWF5b3IgbG9zIHRpY2tldHMNCmBgYHtyIG1lc3NhZ2U9RkFMU0UsIHdhcm5pbmc9RkFMU0V9DQpkZl9saW1waWE8LXJlYWQuY3N2KCJDOlxcVXNlcnNcXGphdmF3XFxPbmVEcml2ZSAtIEluc3RpdHV0byBUZWNub2xvZ2ljbyB5IGRlIEVzdHVkaW9zIFN1cGVyaW9yZXMgZGUgTW9udGVycmV5XFw3bW8gU2VtZXN0cmVcXE1vZHVsbyAzXFxhYmFycm90ZXNfYmRfbGltcGlhLmNzdiIpDQpkZl9saW1waWEgPC1kZl9saW1waWFbb3JkZXIoZGZfbGltcGlhJEYuVGlja2V0KSxdDQpoZWFkKGRmX2xpbXBpYSkNCiAgdGFpbChkZl9saW1waWEpDQpgYGANCg0KIyMjIEdlbmVyYXIgYmFza2V0DQpgYGB7ciBtZXNzYWdlPUZBTFNFLCB3YXJuaW5nPUZBTFNFfQ0KYmFza2V0PC1kZHBseShkZl9saW1waWEsYygiRi5UaWNrZXQiKSxmdW5jdGlvbihkZl9saW1waWEpcGFzdGUoZGZfbGltcGlhJE1hcmNhLCBjb2xsYXBzZSA9ICIsIikpDQoNCmBgYA0KDQojIyMgRWxpbWluYXIgbnVtZXJvIGRlIHRpY2tldA0KYGBge3IgbWVzc2FnZT1GQUxTRSwgd2FybmluZz1GQUxTRX0NCmJhc2tldCRGLlRpY2tldDwtTlVMTA0KDQpgYGANCg0KIyMjIFJlbm9tYnJhbW9zIGVsIG5vbWJyZSBkZSBsYSBjb2x1bW5hDQpgYGB7ciBtZXNzYWdlPUZBTFNFLCB3YXJuaW5nPUZBTFNFfQ0KY29sbmFtZXMoYmFza2V0KSA8LSBjKCJNYXJjYSIpDQoNCmBgYA0KDQojIyMgRXhwb3J0YXIgYmFza2V0DQpgYGB7ciBtZXNzYWdlPUZBTFNFLCB3YXJuaW5nPUZBTFNFfQ0Kd3JpdGUuY3N2KGJhc2tldCwgZmlsZSA9ICJiYXNrZXQuY3N2IiwgcXVvdGUgPSBGQUxTRSwgcm93Lm5hbWVzID0gRkFMU0UpDQoNCmBgYA0KDQojIyMgSW1wb3J0YXIgdHJhbnNhY2Npb25lcw0KYGBge3IgbWVzc2FnZT1GQUxTRSwgd2FybmluZz1GQUxTRX0NCg0KdHI8LSByZWFkLnRyYW5zYWN0aW9ucygiQzpcXFVzZXJzXFxqYXZhd1xcT25lRHJpdmUgLSBJbnN0aXR1dG8gVGVjbm9sb2dpY28geSBkZSBFc3R1ZGlvcyBTdXBlcmlvcmVzIGRlIE1vbnRlcnJleVxcN21vIFNlbWVzdHJlXFxNb2R1bG8gM1xcYmFza2V0LmNzdiIsIGZvcm1hdCA9ICJiYXNrZXQiLCBzZXAgPSAiLCIpDQoNCnJlZ2xhcy5hc29jaWFjaW9uPC1hcHJpb3JpKHRyLHBhcmFtZXRlcj1saXN0KHN1cHA9MC4wMDEsY29uZj0wLjIsbWF4bGVuPTEwKSkNCnN1bW1hcnkocmVnbGFzLmFzb2NpYWNpb24pDQppbnNwZWN0KHJlZ2xhcy5hc29jaWFjaW9uKQ0KYGBgDQoNCmBgYHtyIG1lc3NhZ2U9RkFMU0UsIHdhcm5pbmc9RkFMU0V9DQpyZWdsYXMuYXNvY2lhY2lvbjwtc29ydChyZWdsYXMuYXNvY2lhY2lvbiwgYnk9J2NvbmZpZGVuY2UnLCBkZWNyZWFzaW5nID0gVFJVRSkNCnN1bW1hcnkocmVnbGFzLmFzb2NpYWNpb24pDQppbnNwZWN0KHJlZ2xhcy5hc29jaWFjaW9uKQ0KYGBgDQoNCiMjIFZpc3VhbGl6YWNpw7NuIGRlbCBCYXNrZXQNCg0KYGBge3IgbWVzc2FnZT1GQUxTRSwgd2FybmluZz1GQUxTRX0NCnRvcDEwcmVnbGFzPC1oZWFkKHJlZ2xhcy5hc29jaWFjaW9uLCBuPTEwLCBieT0gImNvbmZpZGVuY2UiKQ0KcGxvdCh0b3AxMHJlZ2xhcyxtZXRob2Q9ImdyYXBoIiwgZW5naW5lPSJodG1sd2lkZ2V0IikNCmBgYA0KDQo=