
Instalar paquetes y llamar
librerías
#install.packages("tidyverse") # Paquete global para manupulación y análisis de datos.
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
#install.packages("dplyr") #Para filtrar bases de datos
library(dplyr)
#install.packages("janitor") # Examinar y limpiar bases de datos sucias
library(janitor)
##
## Attaching package: 'janitor'
##
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
#install.packages("Matrix") # Para trabajar con matrices
library(Matrix)
##
## Attaching package: 'Matrix'
##
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
# install.packages("arules") # Generar reglas de asociación
library(arules)
##
## Attaching package: 'arules'
##
## The following object is masked from 'package:dplyr':
##
## recode
##
## The following objects are masked from 'package:base':
##
## abbreviate, write
# install.packages("arulesViz") # Vizualizas las reglas de asociación
library(arulesViz)
# install.packages("datasets")
library(datasets)
# install.packages("plyr")
library(plyr)
## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
##
## Attaching package: 'plyr'
##
## The following objects are masked from 'package:dplyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
##
## The following object is masked from 'package:purrr':
##
## compact
<span style=“color: red; >Importar la base de
datos
# file.choose()
df <- read.csv("/Users/estebanloyo/Desktop/Codes/RStudio/Bootcamp/abarrotes.csv")
<span style=“color: red; >Análisis descriptivo
summary(df)
## vcClaveTienda DescGiro Codigo.Barras PLU
## Length:200625 Length:200625 Min. :8.347e+05 Min. : 1.00
## Class :character Class :character 1st Qu.:7.501e+12 1st Qu.: 1.00
## Mode :character Mode :character Median :7.501e+12 Median : 1.00
## Mean :5.950e+12 Mean : 2.11
## 3rd Qu.:7.501e+12 3rd Qu.: 1.00
## Max. :1.750e+13 Max. :30.00
## NA's :199188
## Fecha Hora Marca Fabricante
## Length:200625 Length:200625 Length:200625 Length:200625
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## Producto Precio Ult.Costo Unidades
## Length:200625 Min. :-147.00 Min. : 0.38 Min. : 0.200
## Class :character 1st Qu.: 11.00 1st Qu.: 8.46 1st Qu.: 1.000
## Mode :character Median : 16.00 Median : 12.31 Median : 1.000
## Mean : 19.42 Mean : 15.31 Mean : 1.262
## 3rd Qu.: 25.00 3rd Qu.: 19.23 3rd Qu.: 1.000
## Max. :1000.00 Max. :769.23 Max. :96.000
##
## F.Ticket NombreDepartamento NombreFamilia NombreCategoria
## Min. : 1 Length:200625 Length:200625 Length:200625
## 1st Qu.: 33964 Class :character Class :character Class :character
## Median :105993 Mode :character Mode :character Mode :character
## Mean :193990
## 3rd Qu.:383005
## Max. :450040
##
## Estado Mts.2 Tipo.ubicación Giro
## Length:200625 Min. :47.0 Length:200625 Length:200625
## Class :character 1st Qu.:53.0 Class :character Class :character
## Mode :character Median :60.0 Mode :character Mode :character
## Mean :56.6
## 3rd Qu.:60.0
## Max. :62.0
##
## Hora.inicio Hora.cierre
## Length:200625 Length:200625
## Class :character Class :character
## Mode :character Mode :character
##
##
##
##
str(df)
## 'data.frame': 200625 obs. of 22 variables:
## $ vcClaveTienda : chr "MX001" "MX001" "MX001" "MX001" ...
## $ DescGiro : chr "Abarrotes" "Abarrotes" "Abarrotes" "Abarrotes" ...
## $ Codigo.Barras : num 7.5e+12 7.5e+12 7.5e+12 7.5e+12 7.5e+12 ...
## $ PLU : int NA NA NA NA NA NA NA NA NA NA ...
## $ Fecha : chr "19/06/2020" "19/06/2020" "19/06/2020" "19/06/2020" ...
## $ Hora : chr "08:16:21" "08:23:33" "08:24:33" "08:24:33" ...
## $ Marca : chr "NUTRI LECHE" "DAN UP" "BIMBO" "PEPSI" ...
## $ Fabricante : chr "MEXILAC" "DANONE DE MEXICO" "GRUPO BIMBO" "PEPSI-COLA MEXICANA" ...
## $ Producto : chr "Nutri Leche 1 Litro" "DANUP STRAWBERRY P/BEBER 350GR NAL" "Rebanadas Bimbo 2Pz" "Pepsi N.R. 400Ml" ...
## $ Precio : num 16 14 5 8 19.5 16 14 5 8 19.5 ...
## $ Ult.Costo : num 12.3 14 5 8 15 ...
## $ Unidades : num 1 1 1 1 1 1 1 1 1 1 ...
## $ F.Ticket : int 1 2 3 3 4 1 2 3 3 4 ...
## $ NombreDepartamento: chr "Abarrotes" "Abarrotes" "Abarrotes" "Abarrotes" ...
## $ NombreFamilia : chr "Lacteos y Refrigerados" "Lacteos y Refrigerados" "Pan y Tortilla" "Bebidas" ...
## $ NombreCategoria : chr "Leche" "Yogurt" "Pan Dulce Empaquetado" "Refrescos Plástico (N.R.)" ...
## $ Estado : chr "Nuevo León" "Nuevo León" "Nuevo León" "Nuevo León" ...
## $ Mts.2 : int 60 60 60 60 60 60 60 60 60 60 ...
## $ Tipo.ubicación : chr "Esquina" "Esquina" "Esquina" "Esquina" ...
## $ Giro : chr "Abarrotes" "Abarrotes" "Abarrotes" "Abarrotes" ...
## $ Hora.inicio : chr "08:00" "08:00" "08:00" "08:00" ...
## $ Hora.cierre : chr "22:00" "22:00" "22:00" "22:00" ...
#count(df, vcClaveTienda, sort=T)
#count(df, DescGiro, sort=T)
#count(df, Fecha, sort=T)
#count(df, Hora, sort=T)
#count(df, Marca, sort=T)
#count(df, Fabricante, sort=T)
#count(df, Producto, sort=T)
#count(df, NombreDepartamento, sort=T)
#count(df, NombreFamilia, sort=T)
#count(df, NombreCategoria, sort=T)
#count(df, Estado, sort=T)
#count(df, Tipo.ubicación, sort=T)
#count(df, Giro, sort=T)
#count(df, Hora.inicio, sort=T)
#count(df, Hora.cierre, sort=T)
head(df, 10)
## vcClaveTienda DescGiro Codigo.Barras PLU Fecha Hora
## 1 MX001 Abarrotes 7.501021e+12 NA 19/06/2020 08:16:21
## 2 MX001 Abarrotes 7.501032e+12 NA 19/06/2020 08:23:33
## 3 MX001 Abarrotes 7.501000e+12 NA 19/06/2020 08:24:33
## 4 MX001 Abarrotes 7.501031e+12 NA 19/06/2020 08:24:33
## 5 MX001 Abarrotes 7.501026e+12 NA 19/06/2020 08:26:28
## 6 MX001 Abarrotes 7.501021e+12 NA 19/06/2020 08:16:21
## 7 MX001 Abarrotes 7.501032e+12 NA 19/06/2020 08:23:33
## 8 MX001 Abarrotes 7.501000e+12 NA 19/06/2020 08:24:33
## 9 MX001 Abarrotes 7.501031e+12 NA 19/06/2020 08:24:33
## 10 MX001 Abarrotes 7.501026e+12 NA 19/06/2020 08:26:28
## Marca Fabricante
## 1 NUTRI LECHE MEXILAC
## 2 DAN UP DANONE DE MEXICO
## 3 BIMBO GRUPO BIMBO
## 4 PEPSI PEPSI-COLA MEXICANA
## 5 BLANCA NIEVES (DETERGENTE) FABRICA DE JABON LA CORONA
## 6 NUTRI LECHE MEXILAC
## 7 DAN UP DANONE DE MEXICO
## 8 BIMBO GRUPO BIMBO
## 9 PEPSI PEPSI-COLA MEXICANA
## 10 BLANCA NIEVES (DETERGENTE) FABRICA DE JABON LA CORONA
## Producto Precio Ult.Costo Unidades F.Ticket
## 1 Nutri Leche 1 Litro 16.0 12.31 1 1
## 2 DANUP STRAWBERRY P/BEBER 350GR NAL 14.0 14.00 1 2
## 3 Rebanadas Bimbo 2Pz 5.0 5.00 1 3
## 4 Pepsi N.R. 400Ml 8.0 8.00 1 3
## 5 Detergente Blanca Nieves 500G 19.5 15.00 1 4
## 6 Nutri Leche 1 Litro 16.0 12.31 1 1
## 7 DANUP STRAWBERRY P/BEBER 350GR NAL 14.0 14.00 1 2
## 8 Rebanadas Bimbo 2Pz 5.0 5.00 1 3
## 9 Pepsi N.R. 400Ml 8.0 8.00 1 3
## 10 Detergente Blanca Nieves 500G 19.5 15.00 1 4
## NombreDepartamento NombreFamilia NombreCategoria
## 1 Abarrotes Lacteos y Refrigerados Leche
## 2 Abarrotes Lacteos y Refrigerados Yogurt
## 3 Abarrotes Pan y Tortilla Pan Dulce Empaquetado
## 4 Abarrotes Bebidas Refrescos Plástico (N.R.)
## 5 Abarrotes Limpieza del Hogar Lavandería
## 6 Abarrotes Lacteos y Refrigerados Leche
## 7 Abarrotes Lacteos y Refrigerados Yogurt
## 8 Abarrotes Pan y Tortilla Pan Dulce Empaquetado
## 9 Abarrotes Bebidas Refrescos Plástico (N.R.)
## 10 Abarrotes Limpieza del Hogar Lavandería
## Estado Mts.2 Tipo.ubicación Giro Hora.inicio Hora.cierre
## 1 Nuevo León 60 Esquina Abarrotes 08:00 22:00
## 2 Nuevo León 60 Esquina Abarrotes 08:00 22:00
## 3 Nuevo León 60 Esquina Abarrotes 08:00 22:00
## 4 Nuevo León 60 Esquina Abarrotes 08:00 22:00
## 5 Nuevo León 60 Esquina Abarrotes 08:00 22:00
## 6 Nuevo León 60 Esquina Abarrotes 08:00 22:00
## 7 Nuevo León 60 Esquina Abarrotes 08:00 22:00
## 8 Nuevo León 60 Esquina Abarrotes 08:00 22:00
## 9 Nuevo León 60 Esquina Abarrotes 08:00 22:00
## 10 Nuevo León 60 Esquina Abarrotes 08:00 22:00
tail(df, 10)
## vcClaveTienda DescGiro Codigo.Barras PLU Fecha Hora
## 200616 MX005 Depósito 7.62221e+12 NA 07/08/2020 19:30:13
## 200617 MX005 Depósito 7.62221e+12 NA 25/07/2020 18:42:24
## 200618 MX005 Depósito 7.62221e+12 NA 18/07/2020 22:45:58
## 200619 MX005 Depósito 7.62221e+12 NA 12/07/2020 00:36:34
## 200620 MX005 Depósito 7.62221e+12 NA 12/07/2020 01:08:25
## 200621 MX005 Depósito 7.62221e+12 NA 23/10/2020 22:17:37
## 200622 MX005 Depósito 7.62221e+12 NA 10/10/2020 20:30:20
## 200623 MX005 Depósito 7.62221e+12 NA 10/10/2020 22:40:43
## 200624 MX005 Depósito 7.62221e+12 NA 27/06/2020 22:30:19
## 200625 MX005 Depósito 7.62221e+12 NA 26/06/2020 23:43:34
## Marca Fabricante Producto Precio
## 200616 TRIDENT XTRA CARE CADBURY ADAMS Trident Xtracare Freshmint 16.32G 9
## 200617 TRIDENT XTRA CARE CADBURY ADAMS Trident Xtracare Freshmint 16.32G 9
## 200618 TRIDENT XTRA CARE CADBURY ADAMS Trident Xtracare Freshmint 16.32G 9
## 200619 TRIDENT XTRA CARE CADBURY ADAMS Trident Xtracare Freshmint 16.32G 9
## 200620 TRIDENT XTRA CARE CADBURY ADAMS Trident Xtracare Freshmint 16.32G 9
## 200621 TRIDENT XTRA CARE CADBURY ADAMS Trident Xtracare Freshmint 16.32G 9
## 200622 TRIDENT XTRA CARE CADBURY ADAMS Trident Xtracare Freshmint 16.32G 9
## 200623 TRIDENT XTRA CARE CADBURY ADAMS Trident Xtracare Freshmint 16.32G 9
## 200624 TRIDENT XTRA CARE CADBURY ADAMS Trident Xtracare Freshmint 16.32G 9
## 200625 TRIDENT XTRA CARE CADBURY ADAMS Trident Xtracare Freshmint 16.32G 9
## Ult.Costo Unidades F.Ticket NombreDepartamento NombreFamilia
## 200616 6.92 1 106411 Abarrotes Dulcería
## 200617 6.92 1 104693 Abarrotes Dulcería
## 200618 6.92 1 103856 Abarrotes Dulcería
## 200619 6.92 1 103087 Abarrotes Dulcería
## 200620 6.92 1 103100 Abarrotes Dulcería
## 200621 6.92 1 116598 Abarrotes Dulcería
## 200622 6.92 1 114886 Abarrotes Dulcería
## 200623 6.92 1 114955 Abarrotes Dulcería
## 200624 6.92 1 101121 Abarrotes Dulcería
## 200625 6.92 1 100879 Abarrotes Dulcería
## NombreCategoria Estado Mts.2 Tipo.ubicación Giro Hora.inicio
## 200616 Gomas de Mazcar Quintana Roo 58 Esquina Mini súper 08:00
## 200617 Gomas de Mazcar Quintana Roo 58 Esquina Mini súper 08:00
## 200618 Gomas de Mazcar Quintana Roo 58 Esquina Mini súper 08:00
## 200619 Gomas de Mazcar Quintana Roo 58 Esquina Mini súper 08:00
## 200620 Gomas de Mazcar Quintana Roo 58 Esquina Mini súper 08:00
## 200621 Gomas de Mazcar Quintana Roo 58 Esquina Mini súper 08:00
## 200622 Gomas de Mazcar Quintana Roo 58 Esquina Mini súper 08:00
## 200623 Gomas de Mazcar Quintana Roo 58 Esquina Mini súper 08:00
## 200624 Gomas de Mazcar Quintana Roo 58 Esquina Mini súper 08:00
## 200625 Gomas de Mazcar Quintana Roo 58 Esquina Mini súper 08:00
## Hora.cierre
## 200616 21:00
## 200617 21:00
## 200618 21:00
## 200619 21:00
## 200620 21:00
## 200621 21:00
## 200622 21:00
## 200623 21:00
## 200624 21:00
## 200625 21:00
<span style=“color: red; >Tablas
# Tablas de Tienda y Departamentos
tabyl(df, vcClaveTienda, NombreDepartamento)
## vcClaveTienda Abarrotes Bebes e Infantiles Carnes Farmacia Ferretería Mercería
## MX001 95415 515 1 147 245 28
## MX002 6590 21 0 4 10 0
## MX003 4026 15 0 2 8 0
## MX004 82234 932 0 102 114 16
## MX005 10014 0 0 0 0 0
## Papelería Productos a Eliminar Vinos y Licores
## 35 3 80
## 0 0 4
## 0 0 0
## 32 5 20
## 7 0 0
# Tabla de Estado y Hora de Inicio
tabyl(df, Estado, Hora.inicio)
## Estado 07:00 08:00 09:00
## Chiapas 4051 0 0
## Jalisco 0 0 6629
## Nuevo León 0 96469 0
## Quintana Roo 0 10021 0
## Sinaloa 83455 0 0
<span style=“color: red; >Limpieza de Datos
<span style=“color: red; >Técnica 1. Eliminar valores
irrelevantes
# Eliminar columnas
df <- subset(df, select = -c(PLU))
# Eliminar renglones
df <- df[df$Precio >0, ]
<span style=“color: red; >Técnica 2. Eliminar valores
repetidos
df <- distinct(df)
<span style=“color: red; >Técnica 3. Corrgir errores
tipográficos y similares
df$Unidades <- ceiling(df$Unidades)
summary(df)
## vcClaveTienda DescGiro Codigo.Barras Fecha
## Length:200473 Length:200473 Min. :8.347e+05 Length:200473
## Class :character Class :character 1st Qu.:7.501e+12 Class :character
## Mode :character Mode :character Median :7.501e+12 Mode :character
## Mean :5.950e+12
## 3rd Qu.:7.501e+12
## Max. :1.750e+13
## Hora Marca Fabricante Producto
## Length:200473 Length:200473 Length:200473 Length:200473
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## Precio Ult.Costo Unidades F.Ticket
## Min. : 0.50 Min. : 0.38 Min. : 1.000 Min. : 1
## 1st Qu.: 11.00 1st Qu.: 8.46 1st Qu.: 1.000 1st Qu.: 33978
## Median : 16.00 Median : 12.31 Median : 1.000 Median :106035
## Mean : 19.45 Mean : 15.31 Mean : 1.261 Mean :194101
## 3rd Qu.: 25.00 3rd Qu.: 19.23 3rd Qu.: 1.000 3rd Qu.:383065
## Max. :1000.00 Max. :769.23 Max. :96.000 Max. :450040
## NombreDepartamento NombreFamilia NombreCategoria Estado
## Length:200473 Length:200473 Length:200473 Length:200473
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## Mts.2 Tipo.ubicación Giro Hora.inicio
## Min. :47.0 Length:200473 Length:200473 Length:200473
## 1st Qu.:53.0 Class :character Class :character Class :character
## Median :60.0 Mode :character Mode :character Mode :character
## Mean :56.6
## 3rd Qu.:60.0
## Max. :62.0
## Hora.cierre
## Length:200473
## Class :character
## Mode :character
##
##
##
<span style=“color: red; >Técnica 4. Convertir tipos de
datos
# Convertir de caracter a fecha
df$Fecha <- as.Date(df$Fecha, format = "%d/%m/%Y")
str(df$Fecha)
## Date[1:200473], format: "2020-06-19" "2020-06-19" "2020-06-19" "2020-06-19" "2020-06-19" ...
summary(df$Fecha)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## "2020-05-01" "2020-06-06" "2020-07-11" "2020-07-18" "2020-08-29" "2020-11-11"
<span style=“color: red; >Técnica 5. Tratar los valores
faltantes
# Borrar todos los NAs
# df <- na.omit(df)
# Reemplazar los NAs con CEROS
# df[is.na(df)] <- 0
# Reemplazar los NAs con PROMEDIO
# df$altura[is.na(df)] <- mean(df$altura, na.rm=T)
<span style=“color: red; >Técnica 6. Herramientas
Estadísticas
boxplot(df$Precio, horizontal = T)

boxplot(df$Unidades, horizontal = T)

<span style=“color: red; >Generar basket
# Ordenar de menor a mayor la columna de Ticket
df <- df[order(df$F.Ticket), ]
head(df)
## vcClaveTienda DescGiro Codigo.Barras Fecha Hora
## 1 MX001 Abarrotes 7.501021e+12 2020-06-19 08:16:21
## 2 MX001 Abarrotes 7.501032e+12 2020-06-19 08:23:33
## 3 MX001 Abarrotes 7.501000e+12 2020-06-19 08:24:33
## 4 MX001 Abarrotes 7.501031e+12 2020-06-19 08:24:33
## 5 MX001 Abarrotes 7.501026e+12 2020-06-19 08:26:28
## 6 MX001 Abarrotes 7.501025e+12 2020-06-19 08:26:28
## Marca Fabricante
## 1 NUTRI LECHE MEXILAC
## 2 DAN UP DANONE DE MEXICO
## 3 BIMBO GRUPO BIMBO
## 4 PEPSI PEPSI-COLA MEXICANA
## 5 BLANCA NIEVES (DETERGENTE) FABRICA DE JABON LA CORONA
## 6 FLASH ALEN
## Producto Precio Ult.Costo Unidades F.Ticket
## 1 Nutri Leche 1 Litro 16.0 12.31 1 1
## 2 DANUP STRAWBERRY P/BEBER 350GR NAL 14.0 14.00 1 2
## 3 Rebanadas Bimbo 2Pz 5.0 5.00 1 3
## 4 Pepsi N.R. 400Ml 8.0 8.00 1 3
## 5 Detergente Blanca Nieves 500G 19.5 15.00 1 4
## 6 Flash Xtra Brisa Marina 500Ml 9.5 7.31 1 4
## NombreDepartamento NombreFamilia NombreCategoria
## 1 Abarrotes Lacteos y Refrigerados Leche
## 2 Abarrotes Lacteos y Refrigerados Yogurt
## 3 Abarrotes Pan y Tortilla Pan Dulce Empaquetado
## 4 Abarrotes Bebidas Refrescos Plástico (N.R.)
## 5 Abarrotes Limpieza del Hogar Lavandería
## 6 Abarrotes Limpieza del Hogar Limpiadores Líquidos
## Estado Mts.2 Tipo.ubicación Giro Hora.inicio Hora.cierre
## 1 Nuevo León 60 Esquina Abarrotes 08:00 22:00
## 2 Nuevo León 60 Esquina Abarrotes 08:00 22:00
## 3 Nuevo León 60 Esquina Abarrotes 08:00 22:00
## 4 Nuevo León 60 Esquina Abarrotes 08:00 22:00
## 5 Nuevo León 60 Esquina Abarrotes 08:00 22:00
## 6 Nuevo León 60 Esquina Abarrotes 08:00 22:00
tail(df)
## vcClaveTienda DescGiro Codigo.Barras Fecha Hora
## 107247 MX004 Carnicería 1.024877e+10 2020-10-15 11:51:40
## 167624 MX004 Carnicería 7.501080e+12 2020-10-15 11:51:40
## 149282 MX004 Carnicería 7.501055e+12 2020-10-15 11:54:37
## 168603 MX004 Carnicería 7.501214e+12 2020-10-15 11:56:52
## 161046 MX004 Carnicería 7.501031e+12 2020-10-15 12:01:54
## 112823 MX004 Carnicería 7.500470e+07 2020-10-15 12:02:36
## Marca Fabricante Producto
## 107247 YEMINA HERDEZ PASTA SPAGHETTI YEMINA 200G
## 167624 DEL FUERTE ALIMENTOS DEL FUERTE PURE DE TOMATE DEL FUERTE 345G
## 149282 COCA COLA ZERO COCA COLA COCA COLA ZERO 600ML
## 168603 DIAMANTE EMPACADOS ARROZ DIAMANTE225G
## 161046 PEPSI PEPSI-COLA MEXICANA PEPSI N. R. 500ML
## 112823 COCA COLA COCA COLA COCA COLA RETORNABLE 500ML
## Precio Ult.Costo Unidades F.Ticket NombreDepartamento
## 107247 7 5.38 2 450032 Abarrotes
## 167624 12 9.23 1 450032 Abarrotes
## 149282 15 11.54 2 450034 Abarrotes
## 168603 11 8.46 1 450037 Abarrotes
## 161046 10 7.69 1 450039 Abarrotes
## 112823 10 7.69 8 450040 Abarrotes
## NombreFamilia NombreCategoria Estado Mts.2
## 107247 Sopas y Pastas Fideos, Spaguetti, Tallarines Sinaloa 53
## 167624 Salsas y Sazonadores Salsa para Spaguetti Sinaloa 53
## 149282 Bebidas Refrescos Retornables Sinaloa 53
## 168603 Granos y Semillas Arroz Sinaloa 53
## 161046 Bebidas Refrescos Plástico (N.R.) Sinaloa 53
## 112823 Bebidas Refrescos Retornables Sinaloa 53
## Tipo.ubicación Giro Hora.inicio Hora.cierre
## 107247 Esquina Abarrotes 07:00 23:00
## 167624 Esquina Abarrotes 07:00 23:00
## 149282 Esquina Abarrotes 07:00 23:00
## 168603 Esquina Abarrotes 07:00 23:00
## 161046 Esquina Abarrotes 07:00 23:00
## 112823 Esquina Abarrotes 07:00 23:00
# Generar el basket
basket <- ddply(df,c("F.Ticket"), function(df)paste(df$Marca, collapse = ","))
# Eliminar número de Ticket
basket$F.Ticket <- NULL
# Cambiar el título de la column a V1 por Marca
colnames(basket) <- c("Marca")
# Exportar basket
write.csv(basket, "basket.csv", quote = F, row.names = F)
<span style=“color: red; >Market Basket Analysis
#file.choose()
tr <- read.transactions("/Users/estebanloyo/Desktop/Codes/RStudio/Bootcamp/basket.csv", format = "basket", sep = ",")
reglas.asociacion <- apriori(tr, parameter = list(supp=0.001, conf=0.2, maxlen=10))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.2 0.1 1 none FALSE TRUE 5 0.001 1
## maxlen target ext
## 10 rules TRUE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 115
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[604 item(s), 115031 transaction(s)] done [0.01s].
## sorting and recoding items ... [207 item(s)] done [0.00s].
## creating transaction tree ... done [0.02s].
## checking subsets of size 1 2 3 done [0.00s].
## writing ... [11 rule(s)] done [0.00s].
## creating S4 object ... done [0.01s].
summary(reglas.asociacion)
## set of 11 rules
##
## rule length distribution (lhs + rhs):sizes
## 2
## 11
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2 2 2 2 2 2
##
## summary of quality measures:
## support confidence coverage lift
## Min. :0.001017 Min. :0.2069 Min. :0.003564 Min. : 1.326
## 1st Qu.:0.001104 1st Qu.:0.2358 1st Qu.:0.004507 1st Qu.: 1.789
## Median :0.001417 Median :0.2442 Median :0.005807 Median : 3.972
## Mean :0.001521 Mean :0.2537 Mean :0.006056 Mean :17.558
## 3rd Qu.:0.001652 3rd Qu.:0.2685 3rd Qu.:0.006894 3rd Qu.:21.808
## Max. :0.002747 Max. :0.3098 Max. :0.010502 Max. :65.862
## count
## Min. :117.0
## 1st Qu.:127.0
## Median :163.0
## Mean :174.9
## 3rd Qu.:190.0
## Max. :316.0
##
## mining info:
## data ntransactions support confidence
## tr 115031 0.001 0.2
## call
## apriori(data = tr, parameter = list(supp = 0.001, conf = 0.2, maxlen = 10))
inspect(reglas.asociacion)
## lhs rhs support confidence coverage
## [1] {FANTA} => {COCA COLA} 0.001051890 0.2439516 0.004311881
## [2] {SALVO} => {FABULOSO} 0.001104050 0.3097561 0.003564257
## [3] {FABULOSO} => {SALVO} 0.001104050 0.2347505 0.004703080
## [4] {COCA COLA ZERO} => {COCA COLA} 0.001417009 0.2969035 0.004772627
## [5] {SPRITE} => {COCA COLA} 0.001347463 0.2069426 0.006511288
## [6] {PINOL} => {CLORALEX} 0.001017117 0.2368421 0.004294495
## [7] {BLUE HOUSE} => {BIMBO} 0.001712582 0.2720994 0.006293956
## [8] {HELLMANN´S} => {BIMBO} 0.001538716 0.2649701 0.005807130
## [9] {REYMA} => {CONVERMEX} 0.002095087 0.2441743 0.008580296
## [10] {FUD} => {BIMBO} 0.001590876 0.2186380 0.007276299
## [11] {COCA COLA LIGHT} => {COCA COLA} 0.002747086 0.2615894 0.010501517
## lift count
## [1] 1.562646 121
## [2] 65.862391 127
## [3] 65.862391 127
## [4] 1.901832 163
## [5] 1.325583 155
## [6] 25.063647 117
## [7] 4.078691 197
## [8] 3.971823 177
## [9] 18.551922 241
## [10] 3.277319 183
## [11] 1.675626 316
reglas.asociacion <- sort(reglas.asociacion, by= "confidence", decreasing = T)
summary(reglas.asociacion)
## set of 11 rules
##
## rule length distribution (lhs + rhs):sizes
## 2
## 11
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2 2 2 2 2 2
##
## summary of quality measures:
## support confidence coverage lift
## Min. :0.001017 Min. :0.2069 Min. :0.003564 Min. : 1.326
## 1st Qu.:0.001104 1st Qu.:0.2358 1st Qu.:0.004507 1st Qu.: 1.789
## Median :0.001417 Median :0.2442 Median :0.005807 Median : 3.972
## Mean :0.001521 Mean :0.2537 Mean :0.006056 Mean :17.558
## 3rd Qu.:0.001652 3rd Qu.:0.2685 3rd Qu.:0.006894 3rd Qu.:21.808
## Max. :0.002747 Max. :0.3098 Max. :0.010502 Max. :65.862
## count
## Min. :117.0
## 1st Qu.:127.0
## Median :163.0
## Mean :174.9
## 3rd Qu.:190.0
## Max. :316.0
##
## mining info:
## data ntransactions support confidence
## tr 115031 0.001 0.2
## call
## apriori(data = tr, parameter = list(supp = 0.001, conf = 0.2, maxlen = 10))
inspect(reglas.asociacion)
## lhs rhs support confidence coverage
## [1] {SALVO} => {FABULOSO} 0.001104050 0.3097561 0.003564257
## [2] {COCA COLA ZERO} => {COCA COLA} 0.001417009 0.2969035 0.004772627
## [3] {BLUE HOUSE} => {BIMBO} 0.001712582 0.2720994 0.006293956
## [4] {HELLMANN´S} => {BIMBO} 0.001538716 0.2649701 0.005807130
## [5] {COCA COLA LIGHT} => {COCA COLA} 0.002747086 0.2615894 0.010501517
## [6] {REYMA} => {CONVERMEX} 0.002095087 0.2441743 0.008580296
## [7] {FANTA} => {COCA COLA} 0.001051890 0.2439516 0.004311881
## [8] {PINOL} => {CLORALEX} 0.001017117 0.2368421 0.004294495
## [9] {FABULOSO} => {SALVO} 0.001104050 0.2347505 0.004703080
## [10] {FUD} => {BIMBO} 0.001590876 0.2186380 0.007276299
## [11] {SPRITE} => {COCA COLA} 0.001347463 0.2069426 0.006511288
## lift count
## [1] 65.862391 127
## [2] 1.901832 163
## [3] 4.078691 197
## [4] 3.971823 177
## [5] 1.675626 316
## [6] 18.551922 241
## [7] 1.562646 121
## [8] 25.063647 117
## [9] 65.862391 127
## [10] 3.277319 183
## [11] 1.325583 155
top10reglas <- head(reglas.asociacion, n=10, by= "confidence")
plot(top10reglas, method = "graph", engine = "htmlwidget")
LS0tCnRpdGxlOiAiQWJhcnJvdGVzIgphdXRob3I6ICJFc3RlYmFuIExveW8gLSBBMDA4Mzc3MjUiCmRhdGU6ICIyMDI0LTA5LTEwIgpvdXRwdXQ6ICAgICAKICBodG1sX2RvY3VtZW50OgogICAgdG9jOiBUcnVlCiAgICB0b2NfZmxvYXQ6IFRydWUKICAgIGNvZGVfZG93bmxvYWQ6IFRydWUKICAgIHRoZW1lOiBjb3NtbwplZGl0b3Jfb3B0aW9uczogCiAgY2h1bmtfb3V0cHV0X3R5cGU6IGNvbnNvbGUKLS0tCgohW10oL1VzZXJzL2VzdGViYW5sb3lvL0Rlc2t0b3AvQ29kZXMvUlN0dWRpby9Cb290Y2FtcC81OWY1MGRfZDVkOTg0ZTM2MTRjNDA0MmI5YTBkOWMzMWQ5ZGE5YjN+bXYyLmdpZikKCiMgPHNwYW4gc3R5bGU9ICJjb2xvcjogcmVkOyI+IEluc3RhbGFyIHBhcXVldGVzIHkgbGxhbWFyIGxpYnJlcsOtYXM8L3NwYW4+CmBgYHtyfQojaW5zdGFsbC5wYWNrYWdlcygidGlkeXZlcnNlIikgIyBQYXF1ZXRlIGdsb2JhbCBwYXJhIG1hbnVwdWxhY2nDs24geSBhbsOhbGlzaXMgZGUgZGF0b3MuCmxpYnJhcnkodGlkeXZlcnNlKQoKI2luc3RhbGwucGFja2FnZXMoImRwbHlyIikgI1BhcmEgZmlsdHJhciBiYXNlcyBkZSBkYXRvcwpsaWJyYXJ5KGRwbHlyKQoKI2luc3RhbGwucGFja2FnZXMoImphbml0b3IiKSAjIEV4YW1pbmFyIHkgbGltcGlhciBiYXNlcyBkZSBkYXRvcyBzdWNpYXMKbGlicmFyeShqYW5pdG9yKQoKI2luc3RhbGwucGFja2FnZXMoIk1hdHJpeCIpICMgUGFyYSB0cmFiYWphciBjb24gbWF0cmljZXMKbGlicmFyeShNYXRyaXgpCgojIGluc3RhbGwucGFja2FnZXMoImFydWxlcyIpICMgR2VuZXJhciByZWdsYXMgZGUgYXNvY2lhY2nDs24KbGlicmFyeShhcnVsZXMpCgojIGluc3RhbGwucGFja2FnZXMoImFydWxlc1ZpeiIpICMgVml6dWFsaXphcyBsYXMgcmVnbGFzIGRlIGFzb2NpYWNpw7NuCmxpYnJhcnkoYXJ1bGVzVml6KQoKIyBpbnN0YWxsLnBhY2thZ2VzKCJkYXRhc2V0cyIpCmxpYnJhcnkoZGF0YXNldHMpCgojIGluc3RhbGwucGFja2FnZXMoInBseXIiKQpsaWJyYXJ5KHBseXIpCmBgYAoKIyA8c3BhbiBzdHlsZT0iY29sb3I6IHJlZDsgPkltcG9ydGFyIGxhIGJhc2UgZGUgZGF0b3M8L3NwYW4+CmBgYHtyfQojIGZpbGUuY2hvb3NlKCkKZGYgPC0gcmVhZC5jc3YoIi9Vc2Vycy9lc3RlYmFubG95by9EZXNrdG9wL0NvZGVzL1JTdHVkaW8vQm9vdGNhbXAvYWJhcnJvdGVzLmNzdiIpCmBgYAoKIyA8c3BhbiBzdHlsZT0iY29sb3I6IHJlZDsgPkFuw6FsaXNpcyBkZXNjcmlwdGl2bzwvc3Bhbj4KYGBge3J9CnN1bW1hcnkoZGYpCnN0cihkZikKCiNjb3VudChkZiwgdmNDbGF2ZVRpZW5kYSwgc29ydD1UKQojY291bnQoZGYsIERlc2NHaXJvLCBzb3J0PVQpCiNjb3VudChkZiwgRmVjaGEsIHNvcnQ9VCkKI2NvdW50KGRmLCBIb3JhLCBzb3J0PVQpCiNjb3VudChkZiwgTWFyY2EsIHNvcnQ9VCkKI2NvdW50KGRmLCBGYWJyaWNhbnRlLCBzb3J0PVQpCiNjb3VudChkZiwgUHJvZHVjdG8sIHNvcnQ9VCkKI2NvdW50KGRmLCBOb21icmVEZXBhcnRhbWVudG8sIHNvcnQ9VCkKI2NvdW50KGRmLCBOb21icmVGYW1pbGlhLCBzb3J0PVQpCiNjb3VudChkZiwgTm9tYnJlQ2F0ZWdvcmlhLCBzb3J0PVQpCiNjb3VudChkZiwgRXN0YWRvLCBzb3J0PVQpCiNjb3VudChkZiwgVGlwby51YmljYWNpw7NuLCBzb3J0PVQpCiNjb3VudChkZiwgR2lybywgc29ydD1UKQojY291bnQoZGYsIEhvcmEuaW5pY2lvLCBzb3J0PVQpCiNjb3VudChkZiwgSG9yYS5jaWVycmUsIHNvcnQ9VCkKCmhlYWQoZGYsIDEwKQp0YWlsKGRmLCAxMCkKYGBgCgojIyA8c3BhbiBzdHlsZT0iY29sb3I6IHJlZDsgPlRhYmxhczwvc3Bhbj4KYGBge3J9CiMgVGFibGFzIGRlIFRpZW5kYSB5IERlcGFydGFtZW50b3MKdGFieWwoZGYsIHZjQ2xhdmVUaWVuZGEsIE5vbWJyZURlcGFydGFtZW50bykKIyBUYWJsYSBkZSBFc3RhZG8geSBIb3JhIGRlIEluaWNpbwp0YWJ5bChkZiwgRXN0YWRvLCBIb3JhLmluaWNpbykKYGBgCgojIDxzcGFuIHN0eWxlPSJjb2xvcjogcmVkOyA+TGltcGllemEgZGUgRGF0b3M8L3NwYW4+CgojIyA8c3BhbiBzdHlsZT0iY29sb3I6IHJlZDsgPlTDqWNuaWNhIDEuIEVsaW1pbmFyIHZhbG9yZXMgaXJyZWxldmFudGVzPC9zcGFuPgpgYGB7cn0KIyBFbGltaW5hciBjb2x1bW5hcwpkZiA8LSBzdWJzZXQoZGYsIHNlbGVjdCA9IC1jKFBMVSkpCgojIEVsaW1pbmFyIHJlbmdsb25lcwpkZiA8LSBkZltkZiRQcmVjaW8gPjAsIF0KYGBgCgojIyA8c3BhbiBzdHlsZT0iY29sb3I6IHJlZDsgPlTDqWNuaWNhIDIuIEVsaW1pbmFyIHZhbG9yZXMgcmVwZXRpZG9zPC9zcGFuPgpgYGB7cn0KZGYgPC0gZGlzdGluY3QoZGYpCmBgYAoKIyMgPHNwYW4gc3R5bGU9ImNvbG9yOiByZWQ7ID5Uw6ljbmljYSAzLiBDb3JyZ2lyIGVycm9yZXMgdGlwb2dyw6FmaWNvcyB5IHNpbWlsYXJlczwvc3Bhbj4KYGBge3J9CmRmJFVuaWRhZGVzIDwtIGNlaWxpbmcoZGYkVW5pZGFkZXMpCnN1bW1hcnkoZGYpCmBgYAoKIyMgPHNwYW4gc3R5bGU9ImNvbG9yOiByZWQ7ID5Uw6ljbmljYSA0LiBDb252ZXJ0aXIgdGlwb3MgZGUgZGF0b3M8L3NwYW4+CmBgYHtyfQojIENvbnZlcnRpciBkZSBjYXJhY3RlciBhIGZlY2hhCmRmJEZlY2hhIDwtIGFzLkRhdGUoZGYkRmVjaGEsIGZvcm1hdCA9ICIlZC8lbS8lWSIpCnN0cihkZiRGZWNoYSkKc3VtbWFyeShkZiRGZWNoYSkKYGBgCgojIyA8c3BhbiBzdHlsZT0iY29sb3I6IHJlZDsgPlTDqWNuaWNhIDUuIFRyYXRhciBsb3MgdmFsb3JlcyBmYWx0YW50ZXM8L3NwYW4+CmBgYHtyfQojIEJvcnJhciB0b2RvcyBsb3MgTkFzCiMgZGYgPC0gbmEub21pdChkZikKCiMgUmVlbXBsYXphciBsb3MgTkFzIGNvbiBDRVJPUwojIGRmW2lzLm5hKGRmKV0gPC0gMAoKIyBSZWVtcGxhemFyIGxvcyBOQXMgY29uIFBST01FRElPCiMgZGYkYWx0dXJhW2lzLm5hKGRmKV0gPC0gbWVhbihkZiRhbHR1cmEsIG5hLnJtPVQpCmBgYAoKIyMgPHNwYW4gc3R5bGU9ImNvbG9yOiByZWQ7ID5Uw6ljbmljYSA2LiBIZXJyYW1pZW50YXMgRXN0YWTDrXN0aWNhczwvc3Bhbj4KYGBge3J9CmJveHBsb3QoZGYkUHJlY2lvLCBob3Jpem9udGFsID0gVCkKYm94cGxvdChkZiRVbmlkYWRlcywgaG9yaXpvbnRhbCA9IFQpCmBgYAoKIyA8c3BhbiBzdHlsZT0iY29sb3I6IHJlZDsgPkdlbmVyYXIgYmFza2V0PC9zcGFuPgpgYGB7cn0KIyBPcmRlbmFyIGRlIG1lbm9yIGEgbWF5b3IgbGEgY29sdW1uYSBkZSBUaWNrZXQKZGYgPC0gZGZbb3JkZXIoZGYkRi5UaWNrZXQpLCBdCmhlYWQoZGYpCnRhaWwoZGYpCgojIEdlbmVyYXIgZWwgYmFza2V0CmJhc2tldCA8LSBkZHBseShkZixjKCJGLlRpY2tldCIpLCBmdW5jdGlvbihkZilwYXN0ZShkZiRNYXJjYSwgY29sbGFwc2UgPSAiLCIpKQoKIyBFbGltaW5hciBuw7ptZXJvIGRlIFRpY2tldApiYXNrZXQkRi5UaWNrZXQgPC0gTlVMTAoKIyBDYW1iaWFyIGVsIHTDrXR1bG8gZGUgbGEgY29sdW1uIGEgVjEgcG9yIE1hcmNhCmNvbG5hbWVzKGJhc2tldCkgPC0gYygiTWFyY2EiKQoKIyBFeHBvcnRhciBiYXNrZXQKd3JpdGUuY3N2KGJhc2tldCwgImJhc2tldC5jc3YiLCBxdW90ZSA9IEYsIHJvdy5uYW1lcyA9IEYpCmBgYAoKCiMgPHNwYW4gc3R5bGU9ImNvbG9yOiByZWQ7ID5NYXJrZXQgQmFza2V0IEFuYWx5c2lzPC9zcGFuPgoKYGBge3Igd2FybmluZz1GQUxTRX0KI2ZpbGUuY2hvb3NlKCkKdHIgPC0gcmVhZC50cmFuc2FjdGlvbnMoIi9Vc2Vycy9lc3RlYmFubG95by9EZXNrdG9wL0NvZGVzL1JTdHVkaW8vQm9vdGNhbXAvYmFza2V0LmNzdiIsIGZvcm1hdCA9ICJiYXNrZXQiLCBzZXAgPSAiLCIpCgpyZWdsYXMuYXNvY2lhY2lvbiA8LSBhcHJpb3JpKHRyLCBwYXJhbWV0ZXIgPSBsaXN0KHN1cHA9MC4wMDEsIGNvbmY9MC4yLCBtYXhsZW49MTApKQpzdW1tYXJ5KHJlZ2xhcy5hc29jaWFjaW9uKQppbnNwZWN0KHJlZ2xhcy5hc29jaWFjaW9uKQoKcmVnbGFzLmFzb2NpYWNpb24gPC0gc29ydChyZWdsYXMuYXNvY2lhY2lvbiwgYnk9ICJjb25maWRlbmNlIiwgZGVjcmVhc2luZyA9IFQpCnN1bW1hcnkocmVnbGFzLmFzb2NpYWNpb24pCmluc3BlY3QocmVnbGFzLmFzb2NpYWNpb24pCgp0b3AxMHJlZ2xhcyA8LSBoZWFkKHJlZ2xhcy5hc29jaWFjaW9uLCBuPTEwLCBieT0gImNvbmZpZGVuY2UiKQpwbG90KHRvcDEwcmVnbGFzLCBtZXRob2QgPSAiZ3JhcGgiLCBlbmdpbmUgPSAiaHRtbHdpZGdldCIpCmBgYAoK