Ordenar de menor a mayor los tickets
df_limpia<-read.csv("C:\\Users\\javaw\\OneDrive - Instituto Tecnologico y de Estudios Superiores de Monterrey\\7mo Semestre\\Modulo 3\\abarrotes_bd_limpia.csv")
df_limpia <-df_limpia[order(df_limpia$F.Ticket),]
head(df_limpia)
## ï..vcClaveTienda DescGiro Fecha Hora Marca
## 1 MX001 Abarrotes NA 8 NUTRI LECHE
## 2 MX001 Abarrotes NA 8 DAN UP
## 3 MX001 Abarrotes NA 8 BIMBO
## 4 MX001 Abarrotes NA 8 PEPSI
## 5 MX001 Abarrotes NA 8 BLANCA NIEVES (DETERGENTE)
## 6 MX001 Abarrotes NA 8 FLASH
## Fabricante Producto Precio
## 1 MEXILAC Nutri Leche 1 Litro 16.0
## 2 DANONE DE MEXICO DANUP STRAWBERRY P/BEBER 350GR NAL 14.0
## 3 GRUPO BIMBO Rebanadas Bimbo 2Pz 5.0
## 4 PEPSI-COLA MEXICANA Pepsi N.R. 400Ml 8.0
## 5 FABRICA DE JABON LA CORONA Detergente Blanca Nieves 500G 19.5
## 6 ALEN Flash Xtra Brisa Marina 500Ml 9.5
## Ult.Costo Unidades F.Ticket NombreDepartamento NombreFamilia
## 1 12.31 1 1 Abarrotes Lacteos y Refrigerados
## 2 14.00 1 2 Abarrotes Lacteos y Refrigerados
## 3 5.00 1 3 Abarrotes Pan y Tortilla
## 4 8.00 1 3 Abarrotes Bebidas
## 5 15.00 1 4 Abarrotes Limpieza del Hogar
## 6 7.31 1 4 Abarrotes Limpieza del Hogar
## NombreCategoria Estado Mts.2 Tipo.ubicación Giro
## 1 Leche Nuevo León 60 Esquina Abarrotes
## 2 Yogurt Nuevo León 60 Esquina Abarrotes
## 3 Pan Dulce Empaquetado Nuevo León 60 Esquina Abarrotes
## 4 Refrescos Plástico (N.R.) Nuevo León 60 Esquina Abarrotes
## 5 LavanderÃa Nuevo León 60 Esquina Abarrotes
## 6 Limpiadores LÃquidos Nuevo León 60 Esquina Abarrotes
## Hora.inicio Hora.cierre Dia_de_la_semana Subtotal Utilidad
## 1 08:00 22:00 NA 16.0 3.69
## 2 08:00 22:00 NA 14.0 0.00
## 3 08:00 22:00 NA 5.0 0.00
## 4 08:00 22:00 NA 8.0 0.00
## 5 08:00 22:00 NA 19.5 4.50
## 6 08:00 22:00 NA 9.5 2.19
tail(df_limpia)
## ï..vcClaveTienda DescGiro Fecha Hora Marca
## 107394 MX004 CarnicerÃa NA 11 YEMINA
## 167771 MX004 CarnicerÃa NA 11 DEL FUERTE
## 149429 MX004 CarnicerÃa NA 11 COCA COLA ZERO
## 168750 MX004 CarnicerÃa NA 11 DIAMANTE
## 161193 MX004 CarnicerÃa NA 12 PEPSI
## 112970 MX004 CarnicerÃa NA 12 COCA COLA
## Fabricante Producto Precio Ult.Costo
## 107394 HERDEZ PASTA SPAGHETTI YEMINA 200G 7 5.38
## 167771 ALIMENTOS DEL FUERTE PURE DE TOMATE DEL FUERTE 345G 12 9.23
## 149429 COCA COLA COCA COLA ZERO 600ML 15 11.54
## 168750 EMPACADOS ARROZ DIAMANTE225G 11 8.46
## 161193 PEPSI-COLA MEXICANA PEPSI N. R. 500ML 10 7.69
## 112970 COCA COLA COCA COLA RETORNABLE 500ML 10 7.69
## Unidades F.Ticket NombreDepartamento NombreFamilia
## 107394 2 450032 Abarrotes Sopas y Pastas
## 167771 1 450032 Abarrotes Salsas y Sazonadores
## 149429 2 450034 Abarrotes Bebidas
## 168750 1 450037 Abarrotes Granos y Semillas
## 161193 1 450039 Abarrotes Bebidas
## 112970 8 450040 Abarrotes Bebidas
## NombreCategoria Estado Mts.2 Tipo.ubicación Giro
## 107394 Fideos, Spaguetti, Tallarines Sinaloa 53 Esquina Abarrotes
## 167771 Salsa para Spaguetti Sinaloa 53 Esquina Abarrotes
## 149429 Refrescos Retornables Sinaloa 53 Esquina Abarrotes
## 168750 Arroz Sinaloa 53 Esquina Abarrotes
## 161193 Refrescos Plástico (N.R.) Sinaloa 53 Esquina Abarrotes
## 112970 Refrescos Retornables Sinaloa 53 Esquina Abarrotes
## Hora.inicio Hora.cierre Dia_de_la_semana Subtotal Utilidad
## 107394 07:00 23:00 NA 14 1.62
## 167771 07:00 23:00 NA 12 2.77
## 149429 07:00 23:00 NA 30 3.46
## 168750 07:00 23:00 NA 11 2.54
## 161193 07:00 23:00 NA 10 2.31
## 112970 07:00 23:00 NA 80 2.31
Generar basket
basket<-ddply(df_limpia,c("F.Ticket"),function(df_limpia)paste(df_limpia$Marca, collapse = ","))
Importar transacciones
tr<- read.transactions("C:\\Users\\javaw\\OneDrive - Instituto Tecnologico y de Estudios Superiores de Monterrey\\7mo Semestre\\Modulo 3\\basket.csv", format = "basket", sep = ",")
reglas.asociacion<-apriori(tr,parameter=list(supp=0.001,conf=0.2,maxlen=10))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.2 0.1 1 none FALSE TRUE 5 0.001 1
## maxlen target ext
## 10 rules TRUE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 115
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[604 item(s), 115111 transaction(s)] done [0.04s].
## sorting and recoding items ... [207 item(s)] done [0.00s].
## creating transaction tree ... done [0.03s].
## checking subsets of size 1 2 3 done [0.00s].
## writing ... [11 rule(s)] done [0.00s].
## creating S4 object ... done [0.00s].
summary(reglas.asociacion)
## set of 11 rules
##
## rule length distribution (lhs + rhs):sizes
## 2
## 11
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2 2 2 2 2 2
##
## summary of quality measures:
## support confidence coverage lift
## Min. :0.001016 Min. :0.2069 Min. :0.003562 Min. : 1.325
## 1st Qu.:0.001103 1st Qu.:0.2356 1st Qu.:0.004504 1st Qu.: 1.787
## Median :0.001416 Median :0.2442 Median :0.005803 Median : 3.972
## Mean :0.001519 Mean :0.2536 Mean :0.006054 Mean :17.563
## 3rd Qu.:0.001651 3rd Qu.:0.2685 3rd Qu.:0.006893 3rd Qu.:21.798
## Max. :0.002745 Max. :0.3098 Max. :0.010503 Max. :65.908
## count
## Min. :117.0
## 1st Qu.:127.0
## Median :163.0
## Mean :174.9
## 3rd Qu.:190.0
## Max. :316.0
##
## mining info:
## data ntransactions support confidence
## tr 115111 0.001 0.2
## call
## apriori(data = tr, parameter = list(supp = 0.001, conf = 0.2, maxlen = 10))
inspect(reglas.asociacion)
## lhs rhs support confidence coverage
## [1] {FANTA} => {COCA COLA} 0.001051159 0.2439516 0.004308884
## [2] {SALVO} => {FABULOSO} 0.001103283 0.3097561 0.003561779
## [3] {FABULOSO} => {SALVO} 0.001103283 0.2347505 0.004699811
## [4] {COCA COLA ZERO} => {COCA COLA} 0.001416025 0.2969035 0.004769310
## [5] {SPRITE} => {COCA COLA} 0.001346526 0.2069426 0.006506763
## [6] {PINOL} => {CLORALEX} 0.001016410 0.2363636 0.004300197
## [7] {BLUE HOUSE} => {BIMBO} 0.001711392 0.2720994 0.006289581
## [8] {HELLMANN´S} => {BIMBO} 0.001537646 0.2649701 0.005803094
## [9] {REYMA} => {CONVERMEX} 0.002093631 0.2441743 0.008574333
## [10] {FUD} => {BIMBO} 0.001589770 0.2183771 0.007279930
## [11] {COCA COLA LIGHT} => {COCA COLA} 0.002745176 0.2613730 0.010502906
## lift count
## [1] 1.561906 121
## [2] 65.908196 127
## [3] 65.908196 127
## [4] 1.900932 163
## [5] 1.324955 155
## [6] 25.030409 117
## [7] 4.078870 197
## [8] 3.971997 177
## [9] 18.564824 241
## [10] 3.273552 183
## [11] 1.673447 316
reglas.asociacion<-sort(reglas.asociacion, by='confidence', decreasing = TRUE)
summary(reglas.asociacion)
## set of 11 rules
##
## rule length distribution (lhs + rhs):sizes
## 2
## 11
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2 2 2 2 2 2
##
## summary of quality measures:
## support confidence coverage lift
## Min. :0.001016 Min. :0.2069 Min. :0.003562 Min. : 1.325
## 1st Qu.:0.001103 1st Qu.:0.2356 1st Qu.:0.004504 1st Qu.: 1.787
## Median :0.001416 Median :0.2442 Median :0.005803 Median : 3.972
## Mean :0.001519 Mean :0.2536 Mean :0.006054 Mean :17.563
## 3rd Qu.:0.001651 3rd Qu.:0.2685 3rd Qu.:0.006893 3rd Qu.:21.798
## Max. :0.002745 Max. :0.3098 Max. :0.010503 Max. :65.908
## count
## Min. :117.0
## 1st Qu.:127.0
## Median :163.0
## Mean :174.9
## 3rd Qu.:190.0
## Max. :316.0
##
## mining info:
## data ntransactions support confidence
## tr 115111 0.001 0.2
## call
## apriori(data = tr, parameter = list(supp = 0.001, conf = 0.2, maxlen = 10))
inspect(reglas.asociacion)
## lhs rhs support confidence coverage
## [1] {SALVO} => {FABULOSO} 0.001103283 0.3097561 0.003561779
## [2] {COCA COLA ZERO} => {COCA COLA} 0.001416025 0.2969035 0.004769310
## [3] {BLUE HOUSE} => {BIMBO} 0.001711392 0.2720994 0.006289581
## [4] {HELLMANN´S} => {BIMBO} 0.001537646 0.2649701 0.005803094
## [5] {COCA COLA LIGHT} => {COCA COLA} 0.002745176 0.2613730 0.010502906
## [6] {REYMA} => {CONVERMEX} 0.002093631 0.2441743 0.008574333
## [7] {FANTA} => {COCA COLA} 0.001051159 0.2439516 0.004308884
## [8] {PINOL} => {CLORALEX} 0.001016410 0.2363636 0.004300197
## [9] {FABULOSO} => {SALVO} 0.001103283 0.2347505 0.004699811
## [10] {FUD} => {BIMBO} 0.001589770 0.2183771 0.007279930
## [11] {SPRITE} => {COCA COLA} 0.001346526 0.2069426 0.006506763
## lift count
## [1] 65.908196 127
## [2] 1.900932 163
## [3] 4.078870 197
## [4] 3.971997 177
## [5] 1.673447 316
## [6] 18.564824 241
## [7] 1.561906 121
## [8] 25.030409 117
## [9] 65.908196 127
## [10] 3.273552 183
## [11] 1.324955 155