Librerías
library(dplyr)
library(tidyverse)
library(janitor)
library(tidyr)
library(lubridate)
library(Matrix)
library(arules)
library(arulesViz)
library(datasets)
library(plyr)
Importar base de datos
setwd("C:\\Users\\javaw\\OneDrive - Instituto Tecnologico y de Estudios Superiores de Monterrey\\7mo Semestre\\Modulo 3")
df<-read.csv("ventas_bd_limpia.csv")
Ordenar de menor a mayor por BillNo
df_limpia <-df[order(df$BillNo),]
head(df_limpia)
## BillNo Itemname Quantity Date Hour Price
## 1 536365 WHITE HANGING HEART T-LIGHT HOLDER 6 2010-01-12 8 2.55
## 2 536365 WHITE METAL LANTERN 6 2010-01-12 8 3.39
## 3 536365 CREAM CUPID HEARTS COAT HANGER 8 2010-01-12 8 2.75
## 4 536365 KNITTED UNION FLAG HOT WATER BOTTLE 6 2010-01-12 8 3.39
## 5 536365 RED WOOLLY HOTTIE WHITE HEART 6 2010-01-12 8 3.39
## 6 536365 SET 7 BABUSHKA NESTING BOXES 2 2010-01-12 8 7.65
## CustomerID Country
## 1 17850 United Kingdom
## 2 17850 United Kingdom
## 3 17850 United Kingdom
## 4 17850 United Kingdom
## 5 17850 United Kingdom
## 6 17850 United Kingdom
tail(df_limpia)
## BillNo Itemname Quantity Date Hour Price
## 511097 581587 CHILDRENS CUTLERY SPACEBOY 4 2011-09-12 12 4.15
## 511098 581587 PACK OF 20 SPACEBOY NAPKINS 12 2011-09-12 12 0.85
## 511099 581587 CHILDREN'S APRON DOLLY GIRL 6 2011-09-12 12 2.10
## 511100 581587 CHILDRENS CUTLERY DOLLY GIRL 4 2011-09-12 12 4.15
## 511101 581587 CHILDRENS CUTLERY CIRCUS PARADE 4 2011-09-12 12 4.15
## 511102 581587 BAKING SET 9 PIECE RETROSPOT 3 2011-09-12 12 4.95
## CustomerID Country
## 511097 12680 France
## 511098 12680 France
## 511099 12680 France
## 511100 12680 France
## 511101 12680 France
## 511102 12680 France
Generar basket
basket<-ddply(df_limpia,c("BillNo"),function(df_limpia)paste(df_limpia$Itemname, collapse = ","))
Eliminar numero de ticket
basket$BillNo<-NULL
Renombramos el nombre de la columna
colnames(basket) <- c("Itemname")
Exportar basket
write.csv(basket, file = "basket2ventas.csv", quote = FALSE, row.names = FALSE)
Importar transacciones
tr<- read.transactions("C:\\Users\\javaw\\OneDrive - Instituto Tecnologico y de Estudios Superiores de Monterrey\\7mo Semestre\\Modulo 3\\basket2ventas.csv", format = "basket", sep = ",")
reglas.asociacion<-apriori(tr,parameter=list(supp=0.001,conf=0.8,maxlen=10))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.8 0.1 1 none FALSE TRUE 5 0.001 1
## maxlen target ext
## 10 rules TRUE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 19
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[8530 item(s), 19522 transaction(s)] done [0.23s].
## sorting and recoding items ... [2597 item(s)] done [0.01s].
## creating transaction tree ... done [0.01s].
## checking subsets of size 1 2 3 4 5 6 7 8 9 10 done [4.98s].
## writing ... [6156989 rule(s)] done [2.18s].
## creating S4 object ... done [4.19s].
#summary(reglas.asociacion)
#inspect(reglas.asociacion)
reglas.asociacion<-sort(reglas.asociacion, by='confidence', decreasing = TRUE)
#summary(reglas.asociacion)
#inspect(reglas.asociacion)
top10reglas<-head(reglas.asociacion, n=10, by= "confidence")
inspect(top10reglas)
## lhs rhs support confidence coverage lift count
## [1] {SILVER MINI TAPE MEASURE} => {JUMBO BAG PINK VINTAGE PAISLEY} 0.001126934 1 0.001126934 27.18942 22
## [2] {SILVER MINI TAPE MEASURE} => {STRAWBERRY CHARLOTTE BAG} 0.001126934 1 0.001126934 32.26777 22
## [3] {SILVER MINI TAPE MEASURE} => {LUNCH BAG CARS BLUE} 0.001126934 1 0.001126934 20.20911 22
## [4] {SILVER MINI TAPE MEASURE} => {WOODLAND CHARLOTTE BAG} 0.001126934 1 0.001126934 28.79351 22
## [5] {SILVER MINI TAPE MEASURE} => {RED RETROSPOT CHARLOTTE BAG} 0.001126934 1 0.001126934 22.94007 22
## [6] {PINK POLKADOT BOWL,
## SET/20 FRUIT SALAD PAPER NAPKINS} => {STRAWBERRY CHARLOTTE BAG} 0.001024485 1 0.001024485 32.26777 20
## [7] {PINK POLKADOT BOWL,
## SET/20 FRUIT SALAD PAPER NAPKINS} => {LUNCH BAG CARS BLUE} 0.001024485 1 0.001024485 20.20911 20
## [8] {PINK POLKADOT BOWL,
## SET/20 FRUIT SALAD PAPER NAPKINS} => {WOODLAND CHARLOTTE BAG} 0.001024485 1 0.001024485 28.79351 20
## [9] {PINK POLKADOT BOWL,
## SET/20 FRUIT SALAD PAPER NAPKINS} => {RED RETROSPOT CHARLOTTE BAG} 0.001024485 1 0.001024485 22.94007 20
## [10] {SET/20 FRUIT SALAD PAPER NAPKINS,
## STRAWBERRY CHARLOTTE BAG} => {LUNCH BAG CARS BLUE} 0.001075709 1 0.001075709 20.20911 21
plot(top10reglas,method="graph", engine="htmlwidget")
LS0tDQp0aXRsZTogIk1hcmtldCBCYXNrZXQgQW5hbHlzaXMiDQphdXRob3I6ICJKYXZpZXIgQXlhbGEiDQpkYXRlOiAiYHIgU3lzLkRhdGUoKWAiDQpvdXRwdXQ6DQogIGh0bWxfZG9jdW1lbnQ6DQogICAgdG9jOiB0cnVlDQogICAgdG9jX2Zsb2F0OiB0cnVlDQogICAgY29kZV9kb3dubG9hZDogdHJ1ZQ0KLS0tDQpgYGB7Y3NzLCBlY2hvPUZBTFNFfQ0KaDEsIGg0IHsNCiAgdGV4dC1hbGlnbjogY2VudGVyOw0KfQ0KDQpoMSwgaDIsIGgzIHsNCiAgY29sb3I6ICNFQzA5MDk7DQp9DQpgYGANCg0KDQojIyBMaWJyZXLDrWFzDQpgYGB7ciBtZXNzYWdlPUZBTFNFLCB3YXJuaW5nPUZBTFNFfQ0KbGlicmFyeShkcGx5cikNCmxpYnJhcnkodGlkeXZlcnNlKQ0KbGlicmFyeShqYW5pdG9yKQ0KbGlicmFyeSh0aWR5cikNCmxpYnJhcnkobHVicmlkYXRlKQ0KbGlicmFyeShNYXRyaXgpDQpsaWJyYXJ5KGFydWxlcykNCmxpYnJhcnkoYXJ1bGVzVml6KQ0KbGlicmFyeShkYXRhc2V0cykNCmxpYnJhcnkocGx5cikNCmBgYA0KDQojIyBJbXBvcnRhciBiYXNlIGRlIGRhdG9zDQpgYGB7cn0NCnNldHdkKCJDOlxcVXNlcnNcXGphdmF3XFxPbmVEcml2ZSAtIEluc3RpdHV0byBUZWNub2xvZ2ljbyB5IGRlIEVzdHVkaW9zIFN1cGVyaW9yZXMgZGUgTW9udGVycmV5XFw3bW8gU2VtZXN0cmVcXE1vZHVsbyAzIikNCmRmPC1yZWFkLmNzdigidmVudGFzX2JkX2xpbXBpYS5jc3YiKQ0KYGBgDQoNCiMjIE9yZGVuYXIgZGUgbWVub3IgYSBtYXlvciBwb3IgQmlsbE5vICANCmBgYHtyIG1lc3NhZ2U9RkFMU0UsIHdhcm5pbmc9RkFMU0V9DQpkZl9saW1waWEgPC1kZltvcmRlcihkZiRCaWxsTm8pLF0NCmhlYWQoZGZfbGltcGlhKQ0KICB0YWlsKGRmX2xpbXBpYSkNCmBgYA0KDQoNCiMjIEdlbmVyYXIgYmFza2V0DQpgYGB7ciBtZXNzYWdlPUZBTFNFLCB3YXJuaW5nPUZBTFNFfQ0KYmFza2V0PC1kZHBseShkZl9saW1waWEsYygiQmlsbE5vIiksZnVuY3Rpb24oZGZfbGltcGlhKXBhc3RlKGRmX2xpbXBpYSRJdGVtbmFtZSwgY29sbGFwc2UgPSAiLCIpKQ0KDQpgYGANCg0KIyMgRWxpbWluYXIgbnVtZXJvIGRlIHRpY2tldA0KYGBge3IgbWVzc2FnZT1GQUxTRSwgd2FybmluZz1GQUxTRX0NCmJhc2tldCRCaWxsTm88LU5VTEwNCg0KYGBgDQoNCiMjIFJlbm9tYnJhbW9zIGVsIG5vbWJyZSBkZSBsYSBjb2x1bW5hDQpgYGB7ciBtZXNzYWdlPUZBTFNFLCB3YXJuaW5nPUZBTFNFfQ0KY29sbmFtZXMoYmFza2V0KSA8LSBjKCJJdGVtbmFtZSIpDQoNCmBgYA0KDQojIyBFeHBvcnRhciBiYXNrZXQNCmBgYHtyIG1lc3NhZ2U9RkFMU0UsIHdhcm5pbmc9RkFMU0V9DQp3cml0ZS5jc3YoYmFza2V0LCBmaWxlID0gImJhc2tldDJ2ZW50YXMuY3N2IiwgcXVvdGUgPSBGQUxTRSwgcm93Lm5hbWVzID0gRkFMU0UpDQoNCmBgYA0KDQojIyBJbXBvcnRhciB0cmFuc2FjY2lvbmVzDQpgYGB7ciBtZXNzYWdlPUZBTFNFLCB3YXJuaW5nPUZBTFNFfQ0KDQp0cjwtIHJlYWQudHJhbnNhY3Rpb25zKCJDOlxcVXNlcnNcXGphdmF3XFxPbmVEcml2ZSAtIEluc3RpdHV0byBUZWNub2xvZ2ljbyB5IGRlIEVzdHVkaW9zIFN1cGVyaW9yZXMgZGUgTW9udGVycmV5XFw3bW8gU2VtZXN0cmVcXE1vZHVsbyAzXFxiYXNrZXQydmVudGFzLmNzdiIsIGZvcm1hdCA9ICJiYXNrZXQiLCBzZXAgPSAiLCIpDQoNCnJlZ2xhcy5hc29jaWFjaW9uPC1hcHJpb3JpKHRyLHBhcmFtZXRlcj1saXN0KHN1cHA9MC4wMDEsY29uZj0wLjgsbWF4bGVuPTEwKSkNCiNzdW1tYXJ5KHJlZ2xhcy5hc29jaWFjaW9uKQ0KI2luc3BlY3QocmVnbGFzLmFzb2NpYWNpb24pDQpgYGANCg0KYGBge3IgbWVzc2FnZT1GQUxTRSwgd2FybmluZz1GQUxTRX0NCnJlZ2xhcy5hc29jaWFjaW9uPC1zb3J0KHJlZ2xhcy5hc29jaWFjaW9uLCBieT0nY29uZmlkZW5jZScsIGRlY3JlYXNpbmcgPSBUUlVFKQ0KI3N1bW1hcnkocmVnbGFzLmFzb2NpYWNpb24pDQojaW5zcGVjdChyZWdsYXMuYXNvY2lhY2lvbikNCmBgYA0KDQpgYGB7ciBtZXNzYWdlPUZBTFNFLCB3YXJuaW5nPUZBTFNFfQ0KdG9wMTByZWdsYXM8LWhlYWQocmVnbGFzLmFzb2NpYWNpb24sIG49MTAsIGJ5PSAiY29uZmlkZW5jZSIpDQppbnNwZWN0KHRvcDEwcmVnbGFzKQ0KcGxvdCh0b3AxMHJlZ2xhcyxtZXRob2Q9ImdyYXBoIiwgZW5naW5lPSJodG1sd2lkZ2V0IikNCmBgYA==