Analisis de las Ventas de Abarrotes

Una Empresa con 5 tiendas en el pais solicita un analisis de sus ventas de

abarrotes entre mayo y noviembre de 2020

Paso 0. Instalar pauqetes y librerias

#install.packages("dplyr")
#install.packages("tidyverse")
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ readr     2.1.4
## ✔ ggplot2   3.4.1     ✔ stringr   1.5.0
## ✔ lubridate 1.9.2     ✔ tibble    3.2.0
## ✔ purrr     1.0.1     ✔ tidyr     1.3.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors
#install.packages("janitor")
library(janitor)
## 
## Attaching package: 'janitor'
## 
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
#install.packages("lubridate")
library(lubridate)

Paso 1. Importar la base de datos

#file.choose()
bd <- read.csv("/Users/genarorodriguezalcantara/Desktop/Tec/R Files Manipulación de Datos/BDD/abarrotes.csv")

Paso 2. Entender la base de datos

summary(bd)
##  vcClaveTienda        DescGiro         Codigo.Barras            PLU        
##  Length:200625      Length:200625      Min.   :8.347e+05   Min.   : 1.00   
##  Class :character   Class :character   1st Qu.:7.501e+12   1st Qu.: 1.00   
##  Mode  :character   Mode  :character   Median :7.501e+12   Median : 1.00   
##                                        Mean   :5.950e+12   Mean   : 2.11   
##                                        3rd Qu.:7.501e+12   3rd Qu.: 1.00   
##                                        Max.   :1.750e+13   Max.   :30.00   
##                                                            NA's   :199188  
##     Fecha               Hora              Marca            Fabricante       
##  Length:200625      Length:200625      Length:200625      Length:200625     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##    Producto             Precio          Ult.Costo         Unidades     
##  Length:200625      Min.   :-147.00   Min.   :  0.38   Min.   : 0.200  
##  Class :character   1st Qu.:  11.00   1st Qu.:  8.46   1st Qu.: 1.000  
##  Mode  :character   Median :  16.00   Median : 12.31   Median : 1.000  
##                     Mean   :  19.42   Mean   : 15.31   Mean   : 1.262  
##                     3rd Qu.:  25.00   3rd Qu.: 19.23   3rd Qu.: 1.000  
##                     Max.   :1000.00   Max.   :769.23   Max.   :96.000  
##                                                                        
##     F.Ticket      NombreDepartamento NombreFamilia      NombreCategoria   
##  Min.   :     1   Length:200625      Length:200625      Length:200625     
##  1st Qu.: 33964   Class :character   Class :character   Class :character  
##  Median :105993   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :193990                                                           
##  3rd Qu.:383005                                                           
##  Max.   :450040                                                           
##                                                                           
##     Estado              Mts.2      Tipo.ubicación         Giro          
##  Length:200625      Min.   :47.0   Length:200625      Length:200625     
##  Class :character   1st Qu.:53.0   Class :character   Class :character  
##  Mode  :character   Median :60.0   Mode  :character   Mode  :character  
##                     Mean   :56.6                                        
##                     3rd Qu.:60.0                                        
##                     Max.   :62.0                                        
##                                                                         
##  Hora.inicio        Hora.cierre       
##  Length:200625      Length:200625     
##  Class :character   Class :character  
##  Mode  :character   Mode  :character  
##                                       
##                                       
##                                       
## 
#count(bd, vcClaveTienda, sort=TRUE)
#count(bd, DescGiro, sort=TRUE)
#count(bd, Marca, sort=TRUE)
#count(bd, Fabricante, sort=TRUE)
#count(bd, Producto, sort=TRUE)
#count(bd, NombreDepartamento, sort=TRUE)
#count(bd, NombreFamilia, sort=TRUE)
#count(bd, NombreCategoria, sort=TRUE)
#ount(bd, Estado, sort=TRUE)
#count(bd, Tipo.ubicación, sort=TRUE)
#count(bd, Giro, sort=TRUE)
tibble(bd)
## # A tibble: 200,625 × 22
##    vcClaveTienda DescGiro Codig…¹   PLU Fecha Hora  Marca Fabri…² Produ…³ Precio
##    <chr>         <chr>      <dbl> <int> <chr> <chr> <chr> <chr>   <chr>    <dbl>
##  1 MX001         Abarrot… 7.50e12    NA 19/0… 08:1… NUTR… MEXILAC Nutri …   16  
##  2 MX001         Abarrot… 7.50e12    NA 19/0… 08:2… DAN … DANONE… DANUP …   14  
##  3 MX001         Abarrot… 7.50e12    NA 19/0… 08:2… BIMBO GRUPO … Rebana…    5  
##  4 MX001         Abarrot… 7.50e12    NA 19/0… 08:2… PEPSI PEPSI-… Pepsi …    8  
##  5 MX001         Abarrot… 7.50e12    NA 19/0… 08:2… BLAN… FABRIC… Deterg…   19.5
##  6 MX001         Abarrot… 7.50e12    NA 19/0… 08:1… NUTR… MEXILAC Nutri …   16  
##  7 MX001         Abarrot… 7.50e12    NA 19/0… 08:2… DAN … DANONE… DANUP …   14  
##  8 MX001         Abarrot… 7.50e12    NA 19/0… 08:2… BIMBO GRUPO … Rebana…    5  
##  9 MX001         Abarrot… 7.50e12    NA 19/0… 08:2… PEPSI PEPSI-… Pepsi …    8  
## 10 MX001         Abarrot… 7.50e12    NA 19/0… 08:2… BLAN… FABRIC… Deterg…   19.5
## # … with 200,615 more rows, 12 more variables: Ult.Costo <dbl>, Unidades <dbl>,
## #   F.Ticket <int>, NombreDepartamento <chr>, NombreFamilia <chr>,
## #   NombreCategoria <chr>, Estado <chr>, Mts.2 <int>, Tipo.ubicación <chr>,
## #   Giro <chr>, Hora.inicio <chr>, Hora.cierre <chr>, and abbreviated variable
## #   names ¹​Codigo.Barras, ²​Fabricante, ³​Producto
head(bd, n=7)
##   vcClaveTienda  DescGiro Codigo.Barras PLU      Fecha     Hora
## 1         MX001 Abarrotes   7.50102e+12  NA 19/06/2020 08:16:21
## 2         MX001 Abarrotes   7.50103e+12  NA 19/06/2020 08:23:33
## 3         MX001 Abarrotes   7.50100e+12  NA 19/06/2020 08:24:33
## 4         MX001 Abarrotes   7.50103e+12  NA 19/06/2020 08:24:33
## 5         MX001 Abarrotes   7.50103e+12  NA 19/06/2020 08:26:28
## 6         MX001 Abarrotes   7.50102e+12  NA 19/06/2020 08:16:21
## 7         MX001 Abarrotes   7.50103e+12  NA 19/06/2020 08:23:33
##                        Marca                 Fabricante
## 1                NUTRI LECHE                    MEXILAC
## 2                     DAN UP           DANONE DE MEXICO
## 3                      BIMBO                GRUPO BIMBO
## 4                      PEPSI        PEPSI-COLA MEXICANA
## 5 BLANCA NIEVES (DETERGENTE) FABRICA DE JABON LA CORONA
## 6                NUTRI LECHE                    MEXILAC
## 7                     DAN UP           DANONE DE MEXICO
##                             Producto Precio Ult.Costo Unidades F.Ticket
## 1                Nutri Leche 1 Litro   16.0     12.31        1        1
## 2 DANUP STRAWBERRY P/BEBER 350GR NAL   14.0     14.00        1        2
## 3                Rebanadas Bimbo 2Pz    5.0      5.00        1        3
## 4                   Pepsi N.R. 400Ml    8.0      8.00        1        3
## 5      Detergente Blanca Nieves 500G   19.5     15.00        1        4
## 6                Nutri Leche 1 Litro   16.0     12.31        1        1
## 7 DANUP STRAWBERRY P/BEBER 350GR NAL   14.0     14.00        1        2
##   NombreDepartamento          NombreFamilia           NombreCategoria
## 1          Abarrotes Lacteos y Refrigerados                     Leche
## 2          Abarrotes Lacteos y Refrigerados                    Yogurt
## 3          Abarrotes         Pan y Tortilla     Pan Dulce Empaquetado
## 4          Abarrotes                Bebidas Refrescos Plástico (N.R.)
## 5          Abarrotes     Limpieza del Hogar                Lavandería
## 6          Abarrotes Lacteos y Refrigerados                     Leche
## 7          Abarrotes Lacteos y Refrigerados                    Yogurt
##       Estado Mts.2 Tipo.ubicación      Giro Hora.inicio Hora.cierre
## 1 Nuevo León    60        Esquina Abarrotes        8:00       22:00
## 2 Nuevo León    60        Esquina Abarrotes        8:00       22:00
## 3 Nuevo León    60        Esquina Abarrotes        8:00       22:00
## 4 Nuevo León    60        Esquina Abarrotes        8:00       22:00
## 5 Nuevo León    60        Esquina Abarrotes        8:00       22:00
## 6 Nuevo León    60        Esquina Abarrotes        8:00       22:00
## 7 Nuevo León    60        Esquina Abarrotes        8:00       22:00
#?tail
tabyl(bd, vcClaveTienda, NombreDepartamento)
##  vcClaveTienda Abarrotes Bebes e Infantiles Carnes Farmacia Ferretería Mercería
##          MX001     95415                515      1      147        245       28
##          MX002      6590                 21      0        4         10        0
##          MX003      4026                 15      0        2          8        0
##          MX004     82234                932      0      102        114       16
##          MX005     10014                  0      0        0          0        0
##  Papelería Productos a Eliminar Vinos y Licores
##         35                    3              80
##          0                    0               4
##          0                    0               0
##         32                    5              20
##          7                    0               0
tabyl(bd, NombreFamilia, vcClaveTienda)
##              NombreFamilia MX001 MX002 MX003 MX004 MX005
##                 Accesorios    88     0     0    58     0
##                     Aceite   346    29    18  1088     2
##                   Aderezos   544    21    30   909     3
##                    Alcohol     6     2     0     8     0
##                 Alimentos    256     9    15   530     0
##         Alimentos a Granel     1     0     0     0     0
##    Alimentos para Mascotas   300     9    36   533     0
##                Analgésicos     0     0     1     0     0
##                  Antiácido     0     0     1     0     0
##                 Antigripal    17     0     0    40     0
##     Artículos de Escritura     0     0     0     6     0
##              Azúcar y Miel   349     0     0    38     4
##                    Bebidas 38511  3416  1460 21504    27
##       Bebidas Premezcladas     0     4     0    19     0
##                    Botanas 13051  1194   498  5724  1116
##  C. Frías y Salchichonería   451     1   143  1528     0
##                   Cereales   533     7    10   210     0
##                    Cerveza  4644   196    26  1041  8110
##                   Cigarros  3775   451    75  2237   279
##                  Cuadernos     7     0     0     8     0
##           Cuidado Personal  1940   117    40  3319    17
##             Dermatológicos    33     1     0    20     0
##                Desechables   809    38    25  2588     0
##                   Dulcería  1725    45   108   486   307
##                   Especias  1596    28    22  3249    26
##                   Galletas  3259   218   256  3754     0
##          Granos y Semillas  1138    18    19  1488     0
##     Harinas y Complementos   460    20    43  1237     0
##     Lacteos y Refrigerados  6795   139   503 10221     1
##                    Latería  1540    90   108  3365     4
##         Limpieza del Hogar  3771   295   172  4470    16
##                   Mantecas   203     7     6   581     0
##       Material de Curación    46     0     0    11     0
##    Materiales y Accesorios    28     0     0    18     7
##             Pan y Tortilla  5782    39   294  4387     0
##                    Pañales   114     8     0   215     0
##                 Pegamentos   104     8     6   102     0
##   Pilas para uso Doméstico   141     2     2    12     0
##                      Pollo     1     0     0     0     0
##                    Postres    29     0     2    56     0
##       Productos Higiénicos    57     4     0   129     0
##      Productos sin Familia     3     0     0     5     0
##                        Ron     1     0     0     0     0
##       Salsas y Sazonadores  1550    94    59  3527    90
##                    Sangría    13     0     0     0     0
##             Sopas y Pastas  1280    65    37  2749    10
##       Te, Chocolate y Café   454    42    27   906     2
##                    Tequila    62     0     0     1     0
##                     Varios    73     1     0    39     0
##          Velas y Veladoras   579    11     9  1039     0
##                     Whisky     4     0     0     0     0

Hallazgos

1. Fechas y Horas están en formato de caracter.

2. Precios negativos.

3. Falta una columna de ventas.

Paso 3. Limpiar la base de datos.

Existen 6 Técnicas para limpiar datos.

Técnica 1. Remover valores irrelevantes.

### Eliminar columnas

bd1 <-bd
  bd1 <- subset(bd1, select = -c(PLU, Codigo.Barras)) 

### Eliminar renglones

bd2 <- bd1
  bd2 <- bd2[bd2$Precio >0,]
  summary(bd2)
##  vcClaveTienda        DescGiro            Fecha               Hora          
##  Length:200478      Length:200478      Length:200478      Length:200478     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##     Marca            Fabricante          Producto             Precio       
##  Length:200478      Length:200478      Length:200478      Min.   :   0.50  
##  Class :character   Class :character   Class :character   1st Qu.:  11.00  
##  Mode  :character   Mode  :character   Mode  :character   Median :  16.00  
##                                                           Mean   :  19.45  
##                                                           3rd Qu.:  25.00  
##                                                           Max.   :1000.00  
##    Ult.Costo         Unidades         F.Ticket      NombreDepartamento
##  Min.   :  0.38   Min.   : 0.200   Min.   :     1   Length:200478     
##  1st Qu.:  8.46   1st Qu.: 1.000   1st Qu.: 33977   Class :character  
##  Median : 12.31   Median : 1.000   Median :106034   Mode  :character  
##  Mean   : 15.31   Mean   : 1.261   Mean   :194096                     
##  3rd Qu.: 19.23   3rd Qu.: 1.000   3rd Qu.:383062                     
##  Max.   :769.23   Max.   :96.000   Max.   :450040                     
##  NombreFamilia      NombreCategoria       Estado              Mts.2     
##  Length:200478      Length:200478      Length:200478      Min.   :47.0  
##  Class :character   Class :character   Class :character   1st Qu.:53.0  
##  Mode  :character   Mode  :character   Mode  :character   Median :60.0  
##                                                           Mean   :56.6  
##                                                           3rd Qu.:60.0  
##                                                           Max.   :62.0  
##  Tipo.ubicación         Giro           Hora.inicio        Hora.cierre       
##  Length:200478      Length:200478      Length:200478      Length:200478     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
## 

Técnica 2. Remover valores duplicados.

### ¿Cuántos renglones duplicados tenemos?

bd2[duplicated(bd2),]
##    vcClaveTienda  DescGiro      Fecha     Hora                      Marca
## 6          MX001 Abarrotes 19/06/2020 08:16:21                NUTRI LECHE
## 7          MX001 Abarrotes 19/06/2020 08:23:33                     DAN UP
## 8          MX001 Abarrotes 19/06/2020 08:24:33                      BIMBO
## 9          MX001 Abarrotes 19/06/2020 08:24:33                      PEPSI
## 10         MX001 Abarrotes 19/06/2020 08:26:28 BLANCA NIEVES (DETERGENTE)
##                    Fabricante                           Producto Precio
## 6                     MEXILAC                Nutri Leche 1 Litro   16.0
## 7            DANONE DE MEXICO DANUP STRAWBERRY P/BEBER 350GR NAL   14.0
## 8                 GRUPO BIMBO                Rebanadas Bimbo 2Pz    5.0
## 9         PEPSI-COLA MEXICANA                   Pepsi N.R. 400Ml    8.0
## 10 FABRICA DE JABON LA CORONA      Detergente Blanca Nieves 500G   19.5
##    Ult.Costo Unidades F.Ticket NombreDepartamento          NombreFamilia
## 6      12.31        1        1          Abarrotes Lacteos y Refrigerados
## 7      14.00        1        2          Abarrotes Lacteos y Refrigerados
## 8       5.00        1        3          Abarrotes         Pan y Tortilla
## 9       8.00        1        3          Abarrotes                Bebidas
## 10     15.00        1        4          Abarrotes     Limpieza del Hogar
##              NombreCategoria     Estado Mts.2 Tipo.ubicación      Giro
## 6                      Leche Nuevo León    60        Esquina Abarrotes
## 7                     Yogurt Nuevo León    60        Esquina Abarrotes
## 8      Pan Dulce Empaquetado Nuevo León    60        Esquina Abarrotes
## 9  Refrescos Plástico (N.R.) Nuevo León    60        Esquina Abarrotes
## 10                Lavandería Nuevo León    60        Esquina Abarrotes
##    Hora.inicio Hora.cierre
## 6         8:00       22:00
## 7         8:00       22:00
## 8         8:00       22:00
## 9         8:00       22:00
## 10        8:00       22:00
  sum(duplicated(bd2))  
## [1] 5

# Eliminar renglones duplicados.

bd3 <- bd2
  bd3 <- distinct(bd3)

Técnica 3. Resolver errores tipográficos y similares.

### Precios en absoluto

bd4 <- bd1
  bd4$Precio <- abs(bd4$Precio)  
  summary(bd4)
##  vcClaveTienda        DescGiro            Fecha               Hora          
##  Length:200625      Length:200625      Length:200625      Length:200625     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##     Marca            Fabricante          Producto             Precio       
##  Length:200625      Length:200625      Length:200625      Min.   :   0.50  
##  Class :character   Class :character   Class :character   1st Qu.:  11.00  
##  Mode  :character   Mode  :character   Mode  :character   Median :  16.00  
##                                                           Mean   :  19.45  
##                                                           3rd Qu.:  25.00  
##                                                           Max.   :1000.00  
##    Ult.Costo         Unidades         F.Ticket      NombreDepartamento
##  Min.   :  0.38   Min.   : 0.200   Min.   :     1   Length:200625     
##  1st Qu.:  8.46   1st Qu.: 1.000   1st Qu.: 33964   Class :character  
##  Median : 12.31   Median : 1.000   Median :105993   Mode  :character  
##  Mean   : 15.31   Mean   : 1.262   Mean   :193990                     
##  3rd Qu.: 19.23   3rd Qu.: 1.000   3rd Qu.:383005                     
##  Max.   :769.23   Max.   :96.000   Max.   :450040                     
##  NombreFamilia      NombreCategoria       Estado              Mts.2     
##  Length:200625      Length:200625      Length:200625      Min.   :47.0  
##  Class :character   Class :character   Class :character   1st Qu.:53.0  
##  Mode  :character   Mode  :character   Mode  :character   Median :60.0  
##                                                           Mean   :56.6  
##                                                           3rd Qu.:60.0  
##                                                           Max.   :62.0  
##  Tipo.ubicación         Giro           Hora.inicio        Hora.cierre       
##  Length:200625      Length:200625      Length:200625      Length:200625     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
## 

### Unidades en enteros

bd5 <- bd4
  bd5$Unidades <- ceiling(bd5$Unidades)  
  summary(bd5)  
##  vcClaveTienda        DescGiro            Fecha               Hora          
##  Length:200625      Length:200625      Length:200625      Length:200625     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##     Marca            Fabricante          Producto             Precio       
##  Length:200625      Length:200625      Length:200625      Min.   :   0.50  
##  Class :character   Class :character   Class :character   1st Qu.:  11.00  
##  Mode  :character   Mode  :character   Mode  :character   Median :  16.00  
##                                                           Mean   :  19.45  
##                                                           3rd Qu.:  25.00  
##                                                           Max.   :1000.00  
##    Ult.Costo         Unidades         F.Ticket      NombreDepartamento
##  Min.   :  0.38   Min.   : 1.000   Min.   :     1   Length:200625     
##  1st Qu.:  8.46   1st Qu.: 1.000   1st Qu.: 33964   Class :character  
##  Median : 12.31   Median : 1.000   Median :105993   Mode  :character  
##  Mean   : 15.31   Mean   : 1.262   Mean   :193990                     
##  3rd Qu.: 19.23   3rd Qu.: 1.000   3rd Qu.:383005                     
##  Max.   :769.23   Max.   :96.000   Max.   :450040                     
##  NombreFamilia      NombreCategoria       Estado              Mts.2     
##  Length:200625      Length:200625      Length:200625      Min.   :47.0  
##  Class :character   Class :character   Class :character   1st Qu.:53.0  
##  Mode  :character   Mode  :character   Mode  :character   Median :60.0  
##                                                           Mean   :56.6  
##                                                           3rd Qu.:60.0  
##                                                           Max.   :62.0  
##  Tipo.ubicación         Giro           Hora.inicio        Hora.cierre       
##  Length:200625      Length:200625      Length:200625      Length:200625     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
## 

Técanica 4. Convertir los tipos de datos

### Convertir de caracter a fecha

bd6 <- bd3
  bd6$Fecha <- as.Date(bd6$Fecha, format = "%d/%m/%Y")  
  summary(bd6)  
##  vcClaveTienda        DescGiro             Fecha                Hora          
##  Length:200473      Length:200473      Min.   :2020-05-01   Length:200473     
##  Class :character   Class :character   1st Qu.:2020-06-06   Class :character  
##  Mode  :character   Mode  :character   Median :2020-07-11   Mode  :character  
##                                        Mean   :2020-07-18                     
##                                        3rd Qu.:2020-08-29                     
##                                        Max.   :2020-11-11                     
##     Marca            Fabricante          Producto             Precio       
##  Length:200473      Length:200473      Length:200473      Min.   :   0.50  
##  Class :character   Class :character   Class :character   1st Qu.:  11.00  
##  Mode  :character   Mode  :character   Mode  :character   Median :  16.00  
##                                                           Mean   :  19.45  
##                                                           3rd Qu.:  25.00  
##                                                           Max.   :1000.00  
##    Ult.Costo         Unidades         F.Ticket      NombreDepartamento
##  Min.   :  0.38   Min.   : 0.200   Min.   :     1   Length:200473     
##  1st Qu.:  8.46   1st Qu.: 1.000   1st Qu.: 33978   Class :character  
##  Median : 12.31   Median : 1.000   Median :106035   Mode  :character  
##  Mean   : 15.31   Mean   : 1.261   Mean   :194101                     
##  3rd Qu.: 19.23   3rd Qu.: 1.000   3rd Qu.:383065                     
##  Max.   :769.23   Max.   :96.000   Max.   :450040                     
##  NombreFamilia      NombreCategoria       Estado              Mts.2     
##  Length:200473      Length:200473      Length:200473      Min.   :47.0  
##  Class :character   Class :character   Class :character   1st Qu.:53.0  
##  Mode  :character   Mode  :character   Mode  :character   Median :60.0  
##                                                           Mean   :56.6  
##                                                           3rd Qu.:60.0  
##                                                           Max.   :62.0  
##  Tipo.ubicación         Giro           Hora.inicio        Hora.cierre       
##  Length:200473      Length:200473      Length:200473      Length:200473     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
## 
  tibble(bd6)  
## # A tibble: 200,473 × 20
##    vcCla…¹ DescG…² Fecha      Hora  Marca Fabri…³ Produ…⁴ Precio Ult.C…⁵ Unida…⁶
##    <chr>   <chr>   <date>     <chr> <chr> <chr>   <chr>    <dbl>   <dbl>   <dbl>
##  1 MX001   Abarro… 2020-06-19 08:1… NUTR… MEXILAC Nutri …   16     12.3        1
##  2 MX001   Abarro… 2020-06-19 08:2… DAN … DANONE… DANUP …   14     14          1
##  3 MX001   Abarro… 2020-06-19 08:2… BIMBO GRUPO … Rebana…    5      5          1
##  4 MX001   Abarro… 2020-06-19 08:2… PEPSI PEPSI-… Pepsi …    8      8          1
##  5 MX001   Abarro… 2020-06-19 08:2… BLAN… FABRIC… Deterg…   19.5   15          1
##  6 MX001   Abarro… 2020-06-19 08:2… FLASH ALEN    Flash …    9.5    7.31       1
##  7 MX001   Abarro… 2020-06-19 08:2… VARI… DANONE… Danone…   11     11          1
##  8 MX001   Abarro… 2020-06-19 08:2… ZOTE  FABRIC… Jabon …    9.5    7.31       1
##  9 MX001   Abarro… 2020-06-19 08:2… ALWA… PROCTE… T Feme…   23.5   18.1        1
## 10 MX001   Abarro… 2020-06-19 15:2… JUMEX JUMEX   Jugo D…   12     12          1
## # … with 200,463 more rows, 10 more variables: F.Ticket <int>,
## #   NombreDepartamento <chr>, NombreFamilia <chr>, NombreCategoria <chr>,
## #   Estado <chr>, Mts.2 <int>, Tipo.ubicación <chr>, Giro <chr>,
## #   Hora.inicio <chr>, Hora.cierre <chr>, and abbreviated variable names
## #   ¹​vcClaveTienda, ²​DescGiro, ³​Fabricante, ⁴​Producto, ⁵​Ult.Costo, ⁶​Unidades

### Convertir de caracter a entero

bd7 <- bd6
  bd7$Hora <- substr(bd7$Hora, start = 1, stop = 2)
  tibble(bd7)    
## # A tibble: 200,473 × 20
##    vcCla…¹ DescG…² Fecha      Hora  Marca Fabri…³ Produ…⁴ Precio Ult.C…⁵ Unida…⁶
##    <chr>   <chr>   <date>     <chr> <chr> <chr>   <chr>    <dbl>   <dbl>   <dbl>
##  1 MX001   Abarro… 2020-06-19 08    NUTR… MEXILAC Nutri …   16     12.3        1
##  2 MX001   Abarro… 2020-06-19 08    DAN … DANONE… DANUP …   14     14          1
##  3 MX001   Abarro… 2020-06-19 08    BIMBO GRUPO … Rebana…    5      5          1
##  4 MX001   Abarro… 2020-06-19 08    PEPSI PEPSI-… Pepsi …    8      8          1
##  5 MX001   Abarro… 2020-06-19 08    BLAN… FABRIC… Deterg…   19.5   15          1
##  6 MX001   Abarro… 2020-06-19 08    FLASH ALEN    Flash …    9.5    7.31       1
##  7 MX001   Abarro… 2020-06-19 08    VARI… DANONE… Danone…   11     11          1
##  8 MX001   Abarro… 2020-06-19 08    ZOTE  FABRIC… Jabon …    9.5    7.31       1
##  9 MX001   Abarro… 2020-06-19 08    ALWA… PROCTE… T Feme…   23.5   18.1        1
## 10 MX001   Abarro… 2020-06-19 15    JUMEX JUMEX   Jugo D…   12     12          1
## # … with 200,463 more rows, 10 more variables: F.Ticket <int>,
## #   NombreDepartamento <chr>, NombreFamilia <chr>, NombreCategoria <chr>,
## #   Estado <chr>, Mts.2 <int>, Tipo.ubicación <chr>, Giro <chr>,
## #   Hora.inicio <chr>, Hora.cierre <chr>, and abbreviated variable names
## #   ¹​vcClaveTienda, ²​DescGiro, ³​Fabricante, ⁴​Producto, ⁵​Ult.Costo, ⁶​Unidades
  bd7$Hora <- as.integer(bd7$Hora)
  str(bd7)
## 'data.frame':    200473 obs. of  20 variables:
##  $ vcClaveTienda     : chr  "MX001" "MX001" "MX001" "MX001" ...
##  $ DescGiro          : chr  "Abarrotes" "Abarrotes" "Abarrotes" "Abarrotes" ...
##  $ Fecha             : Date, format: "2020-06-19" "2020-06-19" ...
##  $ Hora              : int  8 8 8 8 8 8 8 8 8 15 ...
##  $ Marca             : chr  "NUTRI LECHE" "DAN UP" "BIMBO" "PEPSI" ...
##  $ Fabricante        : chr  "MEXILAC" "DANONE DE MEXICO" "GRUPO BIMBO" "PEPSI-COLA MEXICANA" ...
##  $ Producto          : chr  "Nutri Leche 1 Litro" "DANUP STRAWBERRY P/BEBER 350GR NAL" "Rebanadas Bimbo 2Pz" "Pepsi N.R. 400Ml" ...
##  $ Precio            : num  16 14 5 8 19.5 9.5 11 9.5 23.5 12 ...
##  $ Ult.Costo         : num  12.3 14 5 8 15 ...
##  $ Unidades          : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ F.Ticket          : int  1 2 3 3 4 4 4 4 4 5 ...
##  $ NombreDepartamento: chr  "Abarrotes" "Abarrotes" "Abarrotes" "Abarrotes" ...
##  $ NombreFamilia     : chr  "Lacteos y Refrigerados" "Lacteos y Refrigerados" "Pan y Tortilla" "Bebidas" ...
##  $ NombreCategoria   : chr  "Leche" "Yogurt" "Pan Dulce Empaquetado" "Refrescos Plástico (N.R.)" ...
##  $ Estado            : chr  "Nuevo León" "Nuevo León" "Nuevo León" "Nuevo León" ...
##  $ Mts.2             : int  60 60 60 60 60 60 60 60 60 60 ...
##  $ Tipo.ubicación    : chr  "Esquina" "Esquina" "Esquina" "Esquina" ...
##  $ Giro              : chr  "Abarrotes" "Abarrotes" "Abarrotes" "Abarrotes" ...
##  $ Hora.inicio       : chr  "8:00" "8:00" "8:00" "8:00" ...
##  $ Hora.cierre       : chr  "22:00" "22:00" "22:00" "22:00" ...

Técnica 5. Tratar valores faltantes (NA)

### ¿Cuántos NA tengo en la base de datos?

sum(is.na(bd7))
## [1] 0
  sum(is.na(bd))
## [1] 199188

### ¿Cuántos NA tengo por variable?

sapply(bd, function(x) sum(is.na(x)))
##      vcClaveTienda           DescGiro      Codigo.Barras                PLU 
##                  0                  0                  0             199188 
##              Fecha               Hora              Marca         Fabricante 
##                  0                  0                  0                  0 
##           Producto             Precio          Ult.Costo           Unidades 
##                  0                  0                  0                  0 
##           F.Ticket NombreDepartamento      NombreFamilia    NombreCategoria 
##                  0                  0                  0                  0 
##             Estado              Mts.2     Tipo.ubicación               Giro 
##                  0                  0                  0                  0 
##        Hora.inicio        Hora.cierre 
##                  0                  0

### Borrar todos los registros NA de una tabla

bd8 <- bd
  bd8 <- na.omit(bd8)   
  summary(bd8)  
##  vcClaveTienda        DescGiro         Codigo.Barras            PLU        
##  Length:1437        Length:1437        Min.   :6.750e+08   Min.   : 1.000  
##  Class :character   Class :character   1st Qu.:6.750e+08   1st Qu.: 1.000  
##  Mode  :character   Mode  :character   Median :6.750e+08   Median : 1.000  
##                                        Mean   :2.616e+11   Mean   : 2.112  
##                                        3rd Qu.:6.750e+08   3rd Qu.: 1.000  
##                                        Max.   :7.501e+12   Max.   :30.000  
##     Fecha               Hora              Marca            Fabricante       
##  Length:1437        Length:1437        Length:1437        Length:1437       
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##    Producto             Precio        Ult.Costo        Unidades    
##  Length:1437        Min.   :30.00   Min.   : 1.00   Min.   :1.000  
##  Class :character   1st Qu.:90.00   1st Qu.:64.62   1st Qu.:1.000  
##  Mode  :character   Median :90.00   Median :64.62   Median :1.000  
##                     Mean   :87.94   Mean   :56.65   Mean   :1.124  
##                     3rd Qu.:90.00   3rd Qu.:64.62   3rd Qu.:1.000  
##                     Max.   :90.00   Max.   :64.62   Max.   :7.000  
##     F.Ticket      NombreDepartamento NombreFamilia      NombreCategoria   
##  Min.   :   772   Length:1437        Length:1437        Length:1437       
##  1st Qu.: 99955   Class :character   Class :character   Class :character  
##  Median :102493   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :100595                                                           
##  3rd Qu.:106546                                                           
##  Max.   :118356                                                           
##     Estado              Mts.2       Tipo.ubicación         Giro          
##  Length:1437        Min.   :58.00   Length:1437        Length:1437       
##  Class :character   1st Qu.:58.00   Class :character   Class :character  
##  Mode  :character   Median :58.00   Mode  :character   Mode  :character  
##                     Mean   :58.07                                        
##                     3rd Qu.:58.00                                        
##                     Max.   :60.00                                        
##  Hora.inicio        Hora.cierre       
##  Length:1437        Length:1437       
##  Class :character   Class :character  
##  Mode  :character   Mode  :character  
##                                       
##                                       
## 

### Reemplazar los NA con CEROS

bd9 <- bd
  bd9[is.na(bd9)] <-0  
  summary(bd9)  
##  vcClaveTienda        DescGiro         Codigo.Barras            PLU          
##  Length:200625      Length:200625      Min.   :8.347e+05   Min.   : 0.00000  
##  Class :character   Class :character   1st Qu.:7.501e+12   1st Qu.: 0.00000  
##  Mode  :character   Mode  :character   Median :7.501e+12   Median : 0.00000  
##                                        Mean   :5.950e+12   Mean   : 0.01513  
##                                        3rd Qu.:7.501e+12   3rd Qu.: 0.00000  
##                                        Max.   :1.750e+13   Max.   :30.00000  
##     Fecha               Hora              Marca            Fabricante       
##  Length:200625      Length:200625      Length:200625      Length:200625     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##    Producto             Precio          Ult.Costo         Unidades     
##  Length:200625      Min.   :-147.00   Min.   :  0.38   Min.   : 0.200  
##  Class :character   1st Qu.:  11.00   1st Qu.:  8.46   1st Qu.: 1.000  
##  Mode  :character   Median :  16.00   Median : 12.31   Median : 1.000  
##                     Mean   :  19.42   Mean   : 15.31   Mean   : 1.262  
##                     3rd Qu.:  25.00   3rd Qu.: 19.23   3rd Qu.: 1.000  
##                     Max.   :1000.00   Max.   :769.23   Max.   :96.000  
##     F.Ticket      NombreDepartamento NombreFamilia      NombreCategoria   
##  Min.   :     1   Length:200625      Length:200625      Length:200625     
##  1st Qu.: 33964   Class :character   Class :character   Class :character  
##  Median :105993   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :193990                                                           
##  3rd Qu.:383005                                                           
##  Max.   :450040                                                           
##     Estado              Mts.2      Tipo.ubicación         Giro          
##  Length:200625      Min.   :47.0   Length:200625      Length:200625     
##  Class :character   1st Qu.:53.0   Class :character   Class :character  
##  Mode  :character   Median :60.0   Mode  :character   Mode  :character  
##                     Mean   :56.6                                        
##                     3rd Qu.:60.0                                        
##                     Max.   :62.0                                        
##  Hora.inicio        Hora.cierre       
##  Length:200625      Length:200625     
##  Class :character   Class :character  
##  Mode  :character   Mode  :character  
##                                       
##                                       
## 

###Reemplazar los NA con el PROMEDIO

bd10 <- bd
  bd10$PLU[is.na(bd10$PLU)] <- mean(bd10$PLU, na.rm = TRUE)
  tibble(bd10)  
## # A tibble: 200,625 × 22
##    vcClaveTienda DescGiro Codig…¹   PLU Fecha Hora  Marca Fabri…² Produ…³ Precio
##    <chr>         <chr>      <dbl> <dbl> <chr> <chr> <chr> <chr>   <chr>    <dbl>
##  1 MX001         Abarrot… 7.50e12  2.11 19/0… 08:1… NUTR… MEXILAC Nutri …   16  
##  2 MX001         Abarrot… 7.50e12  2.11 19/0… 08:2… DAN … DANONE… DANUP …   14  
##  3 MX001         Abarrot… 7.50e12  2.11 19/0… 08:2… BIMBO GRUPO … Rebana…    5  
##  4 MX001         Abarrot… 7.50e12  2.11 19/0… 08:2… PEPSI PEPSI-… Pepsi …    8  
##  5 MX001         Abarrot… 7.50e12  2.11 19/0… 08:2… BLAN… FABRIC… Deterg…   19.5
##  6 MX001         Abarrot… 7.50e12  2.11 19/0… 08:1… NUTR… MEXILAC Nutri …   16  
##  7 MX001         Abarrot… 7.50e12  2.11 19/0… 08:2… DAN … DANONE… DANUP …   14  
##  8 MX001         Abarrot… 7.50e12  2.11 19/0… 08:2… BIMBO GRUPO … Rebana…    5  
##  9 MX001         Abarrot… 7.50e12  2.11 19/0… 08:2… PEPSI PEPSI-… Pepsi …    8  
## 10 MX001         Abarrot… 7.50e12  2.11 19/0… 08:2… BLAN… FABRIC… Deterg…   19.5
## # … with 200,615 more rows, 12 more variables: Ult.Costo <dbl>, Unidades <dbl>,
## #   F.Ticket <int>, NombreDepartamento <chr>, NombreFamilia <chr>,
## #   NombreCategoria <chr>, Estado <chr>, Mts.2 <int>, Tipo.ubicación <chr>,
## #   Giro <chr>, Hora.inicio <chr>, Hora.cierre <chr>, and abbreviated variable
## #   names ¹​Codigo.Barras, ²​Fabricante, ³​Producto

Técnica 6. Verificar datos con métodos estadísticos.

bd11 <- bd7
  boxplot(bd11$Precio, horizontal = TRUE)  

  boxplot(bd11$Unidades, horizontal = TRUE)

Paso 4. Manipular base de datos | Computación de Ventas

### Agregar columnas

bd11$diadelasemana <- wday(bd11$Fecha)
  summary(bd11)  
##  vcClaveTienda        DescGiro             Fecha                 Hora      
##  Length:200473      Length:200473      Min.   :2020-05-01   Min.   : 0.00  
##  Class :character   Class :character   1st Qu.:2020-06-06   1st Qu.:13.00  
##  Mode  :character   Mode  :character   Median :2020-07-11   Median :17.00  
##                                        Mean   :2020-07-18   Mean   :16.23  
##                                        3rd Qu.:2020-08-29   3rd Qu.:20.00  
##                                        Max.   :2020-11-11   Max.   :23.00  
##     Marca            Fabricante          Producto             Precio       
##  Length:200473      Length:200473      Length:200473      Min.   :   0.50  
##  Class :character   Class :character   Class :character   1st Qu.:  11.00  
##  Mode  :character   Mode  :character   Mode  :character   Median :  16.00  
##                                                           Mean   :  19.45  
##                                                           3rd Qu.:  25.00  
##                                                           Max.   :1000.00  
##    Ult.Costo         Unidades         F.Ticket      NombreDepartamento
##  Min.   :  0.38   Min.   : 0.200   Min.   :     1   Length:200473     
##  1st Qu.:  8.46   1st Qu.: 1.000   1st Qu.: 33978   Class :character  
##  Median : 12.31   Median : 1.000   Median :106035   Mode  :character  
##  Mean   : 15.31   Mean   : 1.261   Mean   :194101                     
##  3rd Qu.: 19.23   3rd Qu.: 1.000   3rd Qu.:383065                     
##  Max.   :769.23   Max.   :96.000   Max.   :450040                     
##  NombreFamilia      NombreCategoria       Estado              Mts.2     
##  Length:200473      Length:200473      Length:200473      Min.   :47.0  
##  Class :character   Class :character   Class :character   1st Qu.:53.0  
##  Mode  :character   Mode  :character   Mode  :character   Median :60.0  
##                                                           Mean   :56.6  
##                                                           3rd Qu.:60.0  
##                                                           Max.   :62.0  
##  Tipo.ubicación         Giro           Hora.inicio        Hora.cierre       
##  Length:200473      Length:200473      Length:200473      Length:200473     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##  diadelasemana  
##  Min.   :1.000  
##  1st Qu.:2.000  
##  Median :4.000  
##  Mean   :3.911  
##  3rd Qu.:6.000  
##  Max.   :7.000

### Agregar columna de ventas

bd11$subtotal <- bd11$Precio * bd11$Unidades
  summary(bd11)
##  vcClaveTienda        DescGiro             Fecha                 Hora      
##  Length:200473      Length:200473      Min.   :2020-05-01   Min.   : 0.00  
##  Class :character   Class :character   1st Qu.:2020-06-06   1st Qu.:13.00  
##  Mode  :character   Mode  :character   Median :2020-07-11   Median :17.00  
##                                        Mean   :2020-07-18   Mean   :16.23  
##                                        3rd Qu.:2020-08-29   3rd Qu.:20.00  
##                                        Max.   :2020-11-11   Max.   :23.00  
##     Marca            Fabricante          Producto             Precio       
##  Length:200473      Length:200473      Length:200473      Min.   :   0.50  
##  Class :character   Class :character   Class :character   1st Qu.:  11.00  
##  Mode  :character   Mode  :character   Mode  :character   Median :  16.00  
##                                                           Mean   :  19.45  
##                                                           3rd Qu.:  25.00  
##                                                           Max.   :1000.00  
##    Ult.Costo         Unidades         F.Ticket      NombreDepartamento
##  Min.   :  0.38   Min.   : 0.200   Min.   :     1   Length:200473     
##  1st Qu.:  8.46   1st Qu.: 1.000   1st Qu.: 33978   Class :character  
##  Median : 12.31   Median : 1.000   Median :106035   Mode  :character  
##  Mean   : 15.31   Mean   : 1.261   Mean   :194101                     
##  3rd Qu.: 19.23   3rd Qu.: 1.000   3rd Qu.:383065                     
##  Max.   :769.23   Max.   :96.000   Max.   :450040                     
##  NombreFamilia      NombreCategoria       Estado              Mts.2     
##  Length:200473      Length:200473      Length:200473      Min.   :47.0  
##  Class :character   Class :character   Class :character   1st Qu.:53.0  
##  Mode  :character   Mode  :character   Mode  :character   Median :60.0  
##                                                           Mean   :56.6  
##                                                           3rd Qu.:60.0  
##                                                           Max.   :62.0  
##  Tipo.ubicación         Giro           Hora.inicio        Hora.cierre       
##  Length:200473      Length:200473      Length:200473      Length:200473     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##  diadelasemana      subtotal     
##  Min.   :1.000   Min.   :   1.0  
##  1st Qu.:2.000   1st Qu.:  12.0  
##  Median :4.000   Median :  18.0  
##  Mean   :3.911   Mean   :  24.3  
##  3rd Qu.:6.000   3rd Qu.:  27.0  
##  Max.   :7.000   Max.   :2496.0

Paso 5. Exportar base de datos limpia.

bd_abarrotes_limpia <- bd11
  write.csv(bd_abarrotes_limpia, file = "bd_abarrotes_limpia.csv", row.names = FALSE)

Market Basket Analysis

Instalar paquetes y llamar librerías

#install.packages("plyr")
library(plyr)
## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
## 
## Attaching package: 'plyr'
## The following object is masked from 'package:purrr':
## 
##     compact
## The following objects are masked from 'package:dplyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
#install.packages("Matrix")
library(Matrix)
## 
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
#install.packages("arules")
library(arules)
## 
## Attaching package: 'arules'
## The following object is masked from 'package:dplyr':
## 
##     recode
## The following objects are masked from 'package:base':
## 
##     abbreviate, write
#install.packages("arulesViz")
library(arulesViz)
#install.packages("datasets")
library(datasets)

Importar la base de datos

#file.choose()
bd_limpia <- read.csv("/Users/genarorodriguezalcantara/Desktop/Tec/R Files Manipulación de Datos/BDD/bd_abarrotes_limpia.csv")

Ordenas de menor a mayor los tickets

#bd_limpia <- order(bd_limpia$F.Ticket)
head(bd_limpia)
##   vcClaveTienda  DescGiro      Fecha Hora                      Marca
## 1         MX001 Abarrotes 2020-06-19    8                NUTRI LECHE
## 2         MX001 Abarrotes 2020-06-19    8                     DAN UP
## 3         MX001 Abarrotes 2020-06-19    8                      BIMBO
## 4         MX001 Abarrotes 2020-06-19    8                      PEPSI
## 5         MX001 Abarrotes 2020-06-19    8 BLANCA NIEVES (DETERGENTE)
## 6         MX001 Abarrotes 2020-06-19    8                      FLASH
##                   Fabricante                           Producto Precio
## 1                    MEXILAC                Nutri Leche 1 Litro   16.0
## 2           DANONE DE MEXICO DANUP STRAWBERRY P/BEBER 350GR NAL   14.0
## 3                GRUPO BIMBO                Rebanadas Bimbo 2Pz    5.0
## 4        PEPSI-COLA MEXICANA                   Pepsi N.R. 400Ml    8.0
## 5 FABRICA DE JABON LA CORONA      Detergente Blanca Nieves 500G   19.5
## 6                       ALEN      Flash Xtra Brisa Marina 500Ml    9.5
##   Ult.Costo Unidades F.Ticket NombreDepartamento          NombreFamilia
## 1     12.31        1        1          Abarrotes Lacteos y Refrigerados
## 2     14.00        1        2          Abarrotes Lacteos y Refrigerados
## 3      5.00        1        3          Abarrotes         Pan y Tortilla
## 4      8.00        1        3          Abarrotes                Bebidas
## 5     15.00        1        4          Abarrotes     Limpieza del Hogar
## 6      7.31        1        4          Abarrotes     Limpieza del Hogar
##             NombreCategoria     Estado Mts.2 Tipo.ubicación      Giro
## 1                     Leche Nuevo León    60        Esquina Abarrotes
## 2                    Yogurt Nuevo León    60        Esquina Abarrotes
## 3     Pan Dulce Empaquetado Nuevo León    60        Esquina Abarrotes
## 4 Refrescos Plástico (N.R.) Nuevo León    60        Esquina Abarrotes
## 5                Lavandería Nuevo León    60        Esquina Abarrotes
## 6      Limpiadores Líquidos Nuevo León    60        Esquina Abarrotes
##   Hora.inicio Hora.cierre diadelasemana subtotal
## 1        8:00       22:00             6     16.0
## 2        8:00       22:00             6     14.0
## 3        8:00       22:00             6      5.0
## 4        8:00       22:00             6      8.0
## 5        8:00       22:00             6     19.5
## 6        8:00       22:00             6      9.5
tail(bd_limpia)
##        vcClaveTienda DescGiro      Fecha Hora             Marca    Fabricante
## 200468         MX005 Depósito 2020-07-12    1 TRIDENT XTRA CARE CADBURY ADAMS
## 200469         MX005 Depósito 2020-10-23   22 TRIDENT XTRA CARE CADBURY ADAMS
## 200470         MX005 Depósito 2020-10-10   20 TRIDENT XTRA CARE CADBURY ADAMS
## 200471         MX005 Depósito 2020-10-10   22 TRIDENT XTRA CARE CADBURY ADAMS
## 200472         MX005 Depósito 2020-06-27   22 TRIDENT XTRA CARE CADBURY ADAMS
## 200473         MX005 Depósito 2020-06-26   23 TRIDENT XTRA CARE CADBURY ADAMS
##                                 Producto Precio Ult.Costo Unidades F.Ticket
## 200468 Trident Xtracare Freshmint 16.32G      9      6.92        1   103100
## 200469 Trident Xtracare Freshmint 16.32G      9      6.92        1   116598
## 200470 Trident Xtracare Freshmint 16.32G      9      6.92        1   114886
## 200471 Trident Xtracare Freshmint 16.32G      9      6.92        1   114955
## 200472 Trident Xtracare Freshmint 16.32G      9      6.92        1   101121
## 200473 Trident Xtracare Freshmint 16.32G      9      6.92        1   100879
##        NombreDepartamento NombreFamilia NombreCategoria       Estado Mts.2
## 200468          Abarrotes      Dulcería Gomas de Mazcar Quintana Roo    58
## 200469          Abarrotes      Dulcería Gomas de Mazcar Quintana Roo    58
## 200470          Abarrotes      Dulcería Gomas de Mazcar Quintana Roo    58
## 200471          Abarrotes      Dulcería Gomas de Mazcar Quintana Roo    58
## 200472          Abarrotes      Dulcería Gomas de Mazcar Quintana Roo    58
## 200473          Abarrotes      Dulcería Gomas de Mazcar Quintana Roo    58
##        Tipo.ubicación       Giro Hora.inicio Hora.cierre diadelasemana subtotal
## 200468        Esquina Mini súper        8:00       21:00             1        9
## 200469        Esquina Mini súper        8:00       21:00             6        9
## 200470        Esquina Mini súper        8:00       21:00             7        9
## 200471        Esquina Mini súper        8:00       21:00             7        9
## 200472        Esquina Mini súper        8:00       21:00             7        9
## 200473        Esquina Mini súper        8:00       21:00             6        9

Extraer productos por ticket

basket <-  ddply(bd_limpia, c("F.Ticket"),function(bd_limpia)paste(bd_limpia$Marca,collapse=","))
head(bd_limpia)
##   vcClaveTienda  DescGiro      Fecha Hora                      Marca
## 1         MX001 Abarrotes 2020-06-19    8                NUTRI LECHE
## 2         MX001 Abarrotes 2020-06-19    8                     DAN UP
## 3         MX001 Abarrotes 2020-06-19    8                      BIMBO
## 4         MX001 Abarrotes 2020-06-19    8                      PEPSI
## 5         MX001 Abarrotes 2020-06-19    8 BLANCA NIEVES (DETERGENTE)
## 6         MX001 Abarrotes 2020-06-19    8                      FLASH
##                   Fabricante                           Producto Precio
## 1                    MEXILAC                Nutri Leche 1 Litro   16.0
## 2           DANONE DE MEXICO DANUP STRAWBERRY P/BEBER 350GR NAL   14.0
## 3                GRUPO BIMBO                Rebanadas Bimbo 2Pz    5.0
## 4        PEPSI-COLA MEXICANA                   Pepsi N.R. 400Ml    8.0
## 5 FABRICA DE JABON LA CORONA      Detergente Blanca Nieves 500G   19.5
## 6                       ALEN      Flash Xtra Brisa Marina 500Ml    9.5
##   Ult.Costo Unidades F.Ticket NombreDepartamento          NombreFamilia
## 1     12.31        1        1          Abarrotes Lacteos y Refrigerados
## 2     14.00        1        2          Abarrotes Lacteos y Refrigerados
## 3      5.00        1        3          Abarrotes         Pan y Tortilla
## 4      8.00        1        3          Abarrotes                Bebidas
## 5     15.00        1        4          Abarrotes     Limpieza del Hogar
## 6      7.31        1        4          Abarrotes     Limpieza del Hogar
##             NombreCategoria     Estado Mts.2 Tipo.ubicación      Giro
## 1                     Leche Nuevo León    60        Esquina Abarrotes
## 2                    Yogurt Nuevo León    60        Esquina Abarrotes
## 3     Pan Dulce Empaquetado Nuevo León    60        Esquina Abarrotes
## 4 Refrescos Plástico (N.R.) Nuevo León    60        Esquina Abarrotes
## 5                Lavandería Nuevo León    60        Esquina Abarrotes
## 6      Limpiadores Líquidos Nuevo León    60        Esquina Abarrotes
##   Hora.inicio Hora.cierre diadelasemana subtotal
## 1        8:00       22:00             6     16.0
## 2        8:00       22:00             6     14.0
## 3        8:00       22:00             6      5.0
## 4        8:00       22:00             6      8.0
## 5        8:00       22:00             6     19.5
## 6        8:00       22:00             6      9.5
head(basket)
##   F.Ticket                                                         V1
## 1        1                                                NUTRI LECHE
## 2        2                                                     DAN UP
## 3        3                                                BIMBO,PEPSI
## 4        4 BLANCA NIEVES (DETERGENTE),FLASH,VARIOS DANONE,ZOTE,ALWAYS
## 5        5                                                JUMEX,PEPSI
## 6        6                                                 VALLE FRUT

Eliminar número de ticket

basket$F.Ticket <- NULL

Renombrar columnas

colnames(basket) <- c("Marca")

Exportar basket

#write.csv(basket,"basket.csv",quote = FALSE, row.names = FALSE)

Importar transacciones

#file.choose()
tra <- read.transactions("/Users/genarorodriguezalcantara/Desktop/Tec/R Files Manipulación de Datos/BDD/basket.csv", format = "basket",sep = ",")
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in asMethod(object): removing duplicated items in transactions

##Conclusiones Estratégicas ### 1. Ubicar productos Salvo junto con Fabuloso… ### 2. Promoción en Reyma, Pinol, Queso/Jamón/Mayonesa, Zero/Light/Fanta… ### 3. Realizar un Business Case para venta de sandwiches preparados…

Generar reglas de asociación

reglas_as <- apriori(tra, parameter = list(supp=0.001, conf=0.2, maxlen=10))
## Apriori
## 
## Parameter specification:
##  confidence minval smax arem  aval originalSupport maxtime support minlen
##         0.2    0.1    1 none FALSE            TRUE       5   0.001      1
##  maxlen target  ext
##      10  rules TRUE
## 
## Algorithmic control:
##  filter tree heap memopt load sort verbose
##     0.1 TRUE TRUE  FALSE TRUE    2    TRUE
## 
## Absolute minimum support count: 115 
## 
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[604 item(s), 115031 transaction(s)] done [0.02s].
## sorting and recoding items ... [207 item(s)] done [0.00s].
## creating transaction tree ... done [0.02s].
## checking subsets of size 1 2 3 done [0.00s].
## writing ... [11 rule(s)] done [0.00s].
## creating S4 object  ... done [0.01s].
summary(reglas_as)
## set of 11 rules
## 
## rule length distribution (lhs + rhs):sizes
##  2 
## 11 
## 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       2       2       2       2       2       2 
## 
## summary of quality measures:
##     support           confidence        coverage             lift       
##  Min.   :0.001017   Min.   :0.2069   Min.   :0.003564   Min.   : 1.326  
##  1st Qu.:0.001104   1st Qu.:0.2358   1st Qu.:0.004507   1st Qu.: 1.789  
##  Median :0.001417   Median :0.2442   Median :0.005807   Median : 3.972  
##  Mean   :0.001521   Mean   :0.2537   Mean   :0.006056   Mean   :17.558  
##  3rd Qu.:0.001652   3rd Qu.:0.2685   3rd Qu.:0.006894   3rd Qu.:21.808  
##  Max.   :0.002747   Max.   :0.3098   Max.   :0.010502   Max.   :65.862  
##      count      
##  Min.   :117.0  
##  1st Qu.:127.0  
##  Median :163.0  
##  Mean   :174.9  
##  3rd Qu.:190.0  
##  Max.   :316.0  
## 
## mining info:
##  data ntransactions support confidence
##   tra        115031   0.001        0.2
##                                                                          call
##  apriori(data = tra, parameter = list(supp = 0.001, conf = 0.2, maxlen = 10))
inspect(reglas_as)
##      lhs                  rhs         support     confidence coverage   
## [1]  {FANTA}           => {COCA COLA} 0.001051890 0.2439516  0.004311881
## [2]  {SALVO}           => {FABULOSO}  0.001104050 0.3097561  0.003564257
## [3]  {FABULOSO}        => {SALVO}     0.001104050 0.2347505  0.004703080
## [4]  {COCA COLA ZERO}  => {COCA COLA} 0.001417009 0.2969035  0.004772627
## [5]  {SPRITE}          => {COCA COLA} 0.001347463 0.2069426  0.006511288
## [6]  {PINOL}           => {CLORALEX}  0.001017117 0.2368421  0.004294495
## [7]  {BLUE HOUSE}      => {BIMBO}     0.001712582 0.2720994  0.006293956
## [8]  {HELLMANN´S}      => {BIMBO}     0.001538716 0.2649701  0.005807130
## [9]  {REYMA}           => {CONVERMEX} 0.002095087 0.2441743  0.008580296
## [10] {FUD}             => {BIMBO}     0.001590876 0.2186380  0.007276299
## [11] {COCA COLA LIGHT} => {COCA COLA} 0.002747086 0.2615894  0.010501517
##      lift      count
## [1]   1.562646 121  
## [2]  65.862391 127  
## [3]  65.862391 127  
## [4]   1.901832 163  
## [5]   1.325583 155  
## [6]  25.063647 117  
## [7]   4.078691 197  
## [8]   3.971823 177  
## [9]  18.551922 241  
## [10]  3.277319 183  
## [11]  1.675626 316
reglas_as <- sort(reglas_as, by="confidence", decreasing = TRUE)
summary(reglas_as)
## set of 11 rules
## 
## rule length distribution (lhs + rhs):sizes
##  2 
## 11 
## 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       2       2       2       2       2       2 
## 
## summary of quality measures:
##     support           confidence        coverage             lift       
##  Min.   :0.001017   Min.   :0.2069   Min.   :0.003564   Min.   : 1.326  
##  1st Qu.:0.001104   1st Qu.:0.2358   1st Qu.:0.004507   1st Qu.: 1.789  
##  Median :0.001417   Median :0.2442   Median :0.005807   Median : 3.972  
##  Mean   :0.001521   Mean   :0.2537   Mean   :0.006056   Mean   :17.558  
##  3rd Qu.:0.001652   3rd Qu.:0.2685   3rd Qu.:0.006894   3rd Qu.:21.808  
##  Max.   :0.002747   Max.   :0.3098   Max.   :0.010502   Max.   :65.862  
##      count      
##  Min.   :117.0  
##  1st Qu.:127.0  
##  Median :163.0  
##  Mean   :174.9  
##  3rd Qu.:190.0  
##  Max.   :316.0  
## 
## mining info:
##  data ntransactions support confidence
##   tra        115031   0.001        0.2
##                                                                          call
##  apriori(data = tra, parameter = list(supp = 0.001, conf = 0.2, maxlen = 10))
inspect(reglas_as)
##      lhs                  rhs         support     confidence coverage   
## [1]  {SALVO}           => {FABULOSO}  0.001104050 0.3097561  0.003564257
## [2]  {COCA COLA ZERO}  => {COCA COLA} 0.001417009 0.2969035  0.004772627
## [3]  {BLUE HOUSE}      => {BIMBO}     0.001712582 0.2720994  0.006293956
## [4]  {HELLMANN´S}      => {BIMBO}     0.001538716 0.2649701  0.005807130
## [5]  {COCA COLA LIGHT} => {COCA COLA} 0.002747086 0.2615894  0.010501517
## [6]  {REYMA}           => {CONVERMEX} 0.002095087 0.2441743  0.008580296
## [7]  {FANTA}           => {COCA COLA} 0.001051890 0.2439516  0.004311881
## [8]  {PINOL}           => {CLORALEX}  0.001017117 0.2368421  0.004294495
## [9]  {FABULOSO}        => {SALVO}     0.001104050 0.2347505  0.004703080
## [10] {FUD}             => {BIMBO}     0.001590876 0.2186380  0.007276299
## [11] {SPRITE}          => {COCA COLA} 0.001347463 0.2069426  0.006511288
##      lift      count
## [1]  65.862391 127  
## [2]   1.901832 163  
## [3]   4.078691 197  
## [4]   3.971823 177  
## [5]   1.675626 316  
## [6]  18.551922 241  
## [7]   1.562646 121  
## [8]  25.063647 117  
## [9]  65.862391 127  
## [10]  3.277319 183  
## [11]  1.325583 155
top10reglas <- head(reglas_as, n=10, by="confidence")
plot(top10reglas, method = "graph", engine = "htmlwidget")
LS0tCnRpdGxlOiAiQWJhcnJvdGVzIEFuYWx5c2lzIgphdXRob3I6ICJHZW5hcm8gUm9kcsOtZ3VleiBBbGPDoW50YXJhIC0gQTAwODMzMTcyIgpkYXRlOiAiMjAyMy0wMy0xNyIKb3V0cHV0OgogIGh0bWxfZG9jdW1lbnQ6CiAgICB0b2M6IHRydWUKICAgIHRvY19mbG9hdDogdHJ1ZQogICAgY29kZV9kb3dubG9hZDogdHJ1ZQotLS0KIVtdKC9Vc2Vycy9nZW5hcm9yb2RyaWd1ZXphbGNhbnRhcmEvRGVza3RvcC9jb21vLWFwbGljYXItdmVudGFzLXBvci1pbXB1bHNvLWVuLXRpZW5kYS1hYmFycm90ZXMucG5nKQoKCiMgQW5hbGlzaXMgZGUgbGFzIFZlbnRhcyBkZSBBYmFycm90ZXMKCiMjIyBVbmEgRW1wcmVzYSBjb24gNSB0aWVuZGFzIGVuIGVsIHBhaXMgc29saWNpdGEgdW4gYW5hbGlzaXMgZGUgc3VzIHZlbnRhcyBkZSAKIyMgYWJhcnJvdGVzIGVudHJlIG1heW8geSBub3ZpZW1icmUgZGUgMjAyMAoKIyMgUGFzbyAwLiBJbnN0YWxhciBwYXVxZXRlcyB5IGxpYnJlcmlhcwpgYGB7cn0KI2luc3RhbGwucGFja2FnZXMoImRwbHlyIikKI2luc3RhbGwucGFja2FnZXMoInRpZHl2ZXJzZSIpCmxpYnJhcnkoZHBseXIpCmxpYnJhcnkodGlkeXZlcnNlKQojaW5zdGFsbC5wYWNrYWdlcygiamFuaXRvciIpCmxpYnJhcnkoamFuaXRvcikKI2luc3RhbGwucGFja2FnZXMoImx1YnJpZGF0ZSIpCmxpYnJhcnkobHVicmlkYXRlKQpgYGAKCiMjIFBhc28gMS4gSW1wb3J0YXIgbGEgYmFzZSBkZSBkYXRvcwpgYGB7cn0KI2ZpbGUuY2hvb3NlKCkKYmQgPC0gcmVhZC5jc3YoIi9Vc2Vycy9nZW5hcm9yb2RyaWd1ZXphbGNhbnRhcmEvRGVza3RvcC9UZWMvUiBGaWxlcyBNYW5pcHVsYWNpb8yBbiBkZSBEYXRvcy9CREQvYWJhcnJvdGVzLmNzdiIpCmBgYAoKIyMgUGFzbyAyLiBFbnRlbmRlciBsYSBiYXNlIGRlIGRhdG9zCmBgYHtyfQpzdW1tYXJ5KGJkKQpgYGAKCmBgYHtyfQojY291bnQoYmQsIHZjQ2xhdmVUaWVuZGEsIHNvcnQ9VFJVRSkKI2NvdW50KGJkLCBEZXNjR2lybywgc29ydD1UUlVFKQojY291bnQoYmQsIE1hcmNhLCBzb3J0PVRSVUUpCiNjb3VudChiZCwgRmFicmljYW50ZSwgc29ydD1UUlVFKQojY291bnQoYmQsIFByb2R1Y3RvLCBzb3J0PVRSVUUpCiNjb3VudChiZCwgTm9tYnJlRGVwYXJ0YW1lbnRvLCBzb3J0PVRSVUUpCiNjb3VudChiZCwgTm9tYnJlRmFtaWxpYSwgc29ydD1UUlVFKQojY291bnQoYmQsIE5vbWJyZUNhdGVnb3JpYSwgc29ydD1UUlVFKQojb3VudChiZCwgRXN0YWRvLCBzb3J0PVRSVUUpCiNjb3VudChiZCwgVGlwby51YmljYWNpw7NuLCBzb3J0PVRSVUUpCiNjb3VudChiZCwgR2lybywgc29ydD1UUlVFKQpgYGAKCmBgYHtyfQp0aWJibGUoYmQpCmhlYWQoYmQsIG49NykKIz90YWlsCmBgYAoKYGBge3J9CnRhYnlsKGJkLCB2Y0NsYXZlVGllbmRhLCBOb21icmVEZXBhcnRhbWVudG8pCnRhYnlsKGJkLCBOb21icmVGYW1pbGlhLCB2Y0NsYXZlVGllbmRhKQoKYGBgCgojIyBIYWxsYXpnb3MKIyMjIDEuIEZlY2hhcyB5IEhvcmFzIGVzdMOhbiBlbiBmb3JtYXRvIGRlIGNhcmFjdGVyLgojIyMgMi4gUHJlY2lvcyBuZWdhdGl2b3MuCiMjIyAzLiBGYWx0YSB1bmEgY29sdW1uYSBkZSB2ZW50YXMuCgojIyBQYXNvIDMuIExpbXBpYXIgbGEgYmFzZSBkZSBkYXRvcy4KCiMjIyBFeGlzdGVuIDYgVMOpY25pY2FzIHBhcmEgbGltcGlhciBkYXRvcy4KCiMjIyBUw6ljbmljYSAxLiBSZW1vdmVyIHZhbG9yZXMgaXJyZWxldmFudGVzLgogIAogICMjIyBFbGltaW5hciBjb2x1bW5hcwpgYGB7cn0KYmQxIDwtYmQKICBiZDEgPC0gc3Vic2V0KGJkMSwgc2VsZWN0ID0gLWMoUExVLCBDb2RpZ28uQmFycmFzKSkgCmBgYAogIAogICMjIyBFbGltaW5hciByZW5nbG9uZXMKYGBge3J9CmJkMiA8LSBiZDEKICBiZDIgPC0gYmQyW2JkMiRQcmVjaW8gPjAsXQogIHN1bW1hcnkoYmQyKQpgYGAKCiMjIyBUw6ljbmljYSAyLiBSZW1vdmVyIHZhbG9yZXMgZHVwbGljYWRvcy4KICAKICAjIyMgwr9DdcOhbnRvcyByZW5nbG9uZXMgZHVwbGljYWRvcyB0ZW5lbW9zPwpgYGB7cn0KYmQyW2R1cGxpY2F0ZWQoYmQyKSxdCiAgc3VtKGR1cGxpY2F0ZWQoYmQyKSkgIApgYGAKCiAgIyBFbGltaW5hciByZW5nbG9uZXMgZHVwbGljYWRvcy4KYGBge3J9CmJkMyA8LSBiZDIKICBiZDMgPC0gZGlzdGluY3QoYmQzKQpgYGAKCiMjIyBUw6ljbmljYSAzLiBSZXNvbHZlciBlcnJvcmVzIHRpcG9ncsOhZmljb3MgeSBzaW1pbGFyZXMuCiAgCiAgIyMjIFByZWNpb3MgZW4gYWJzb2x1dG8KYGBge3J9CmJkNCA8LSBiZDEKICBiZDQkUHJlY2lvIDwtIGFicyhiZDQkUHJlY2lvKSAgCiAgc3VtbWFyeShiZDQpCmBgYAoKICAjIyMgVW5pZGFkZXMgZW4gZW50ZXJvcwpgYGB7cn0KYmQ1IDwtIGJkNAogIGJkNSRVbmlkYWRlcyA8LSBjZWlsaW5nKGJkNSRVbmlkYWRlcykgIAogIHN1bW1hcnkoYmQ1KSAgCmBgYAoKIyMjIFTDqWNhbmljYSA0LiBDb252ZXJ0aXIgbG9zIHRpcG9zIGRlIGRhdG9zCiAgCiAgIyMjIENvbnZlcnRpciBkZSBjYXJhY3RlciBhIGZlY2hhCmBgYHtyfQpiZDYgPC0gYmQzCiAgYmQ2JEZlY2hhIDwtIGFzLkRhdGUoYmQ2JEZlY2hhLCBmb3JtYXQgPSAiJWQvJW0vJVkiKSAgCiAgc3VtbWFyeShiZDYpICAKICB0aWJibGUoYmQ2KSAgCmBgYAogIAogICMjIyBDb252ZXJ0aXIgZGUgY2FyYWN0ZXIgYSBlbnRlcm8KYGBge3J9CmJkNyA8LSBiZDYKICBiZDckSG9yYSA8LSBzdWJzdHIoYmQ3JEhvcmEsIHN0YXJ0ID0gMSwgc3RvcCA9IDIpCiAgdGliYmxlKGJkNykgICAgCiAgYmQ3JEhvcmEgPC0gYXMuaW50ZWdlcihiZDckSG9yYSkKICBzdHIoYmQ3KQpgYGAKCiMjIyBUw6ljbmljYSA1LiBUcmF0YXIgdmFsb3JlcyBmYWx0YW50ZXMgICAoTkEpCiAgCiAgIyMjIMK/Q3XDoW50b3MgTkEgdGVuZ28gZW4gbGEgYmFzZSBkZSBkYXRvcz8KYGBge3J9CnN1bShpcy5uYShiZDcpKQogIHN1bShpcy5uYShiZCkpCmBgYAoKICAjIyMgwr9DdcOhbnRvcyBOQSB0ZW5nbyBwb3IgdmFyaWFibGU/CmBgYHtyfQpzYXBwbHkoYmQsIGZ1bmN0aW9uKHgpIHN1bShpcy5uYSh4KSkpCmBgYAoKICAjIyMgQm9ycmFyIHRvZG9zIGxvcyByZWdpc3Ryb3MgTkEgZGUgdW5hIHRhYmxhCmBgYHtyfQpiZDggPC0gYmQKICBiZDggPC0gbmEub21pdChiZDgpICAgCiAgc3VtbWFyeShiZDgpICAKYGBgCgogICMjIyBSZWVtcGxhemFyIGxvcyBOQSBjb24gQ0VST1MKYGBge3J9CmJkOSA8LSBiZAogIGJkOVtpcy5uYShiZDkpXSA8LTAgIAogIHN1bW1hcnkoYmQ5KSAgCmBgYAoKICAjIyNSZWVtcGxhemFyIGxvcyBOQSBjb24gZWwgUFJPTUVESU8KYGBge3J9CmJkMTAgPC0gYmQKICBiZDEwJFBMVVtpcy5uYShiZDEwJFBMVSldIDwtIG1lYW4oYmQxMCRQTFUsIG5hLnJtID0gVFJVRSkKICB0aWJibGUoYmQxMCkgIApgYGAKCiMjIyBUw6ljbmljYSA2LiBWZXJpZmljYXIgZGF0b3MgY29uIG3DqXRvZG9zIGVzdGFkw61zdGljb3MuCmBgYHtyfQpiZDExIDwtIGJkNwogIGJveHBsb3QoYmQxMSRQcmVjaW8sIGhvcml6b250YWwgPSBUUlVFKSAgCiAgYm94cGxvdChiZDExJFVuaWRhZGVzLCBob3Jpem9udGFsID0gVFJVRSkKYGBgCiAgCiAgCiMjIyBQYXNvIDQuIE1hbmlwdWxhciBiYXNlIGRlIGRhdG9zIHwgQ29tcHV0YWNpw7NuIGRlIFZlbnRhcwogIAogICMjIyBBZ3JlZ2FyIGNvbHVtbmFzCmBgYHtyfQpiZDExJGRpYWRlbGFzZW1hbmEgPC0gd2RheShiZDExJEZlY2hhKQogIHN1bW1hcnkoYmQxMSkgIApgYGAKICAKICAjIyMgQWdyZWdhciBjb2x1bW5hIGRlIHZlbnRhcwpgYGB7cn0KYmQxMSRzdWJ0b3RhbCA8LSBiZDExJFByZWNpbyAqIGJkMTEkVW5pZGFkZXMKICBzdW1tYXJ5KGJkMTEpCmBgYAoKIyMjIFBhc28gNS4gRXhwb3J0YXIgYmFzZSBkZSBkYXRvcyBsaW1waWEuCmBgYHtyfQpiZF9hYmFycm90ZXNfbGltcGlhIDwtIGJkMTEKICB3cml0ZS5jc3YoYmRfYWJhcnJvdGVzX2xpbXBpYSwgZmlsZSA9ICJiZF9hYmFycm90ZXNfbGltcGlhLmNzdiIsIHJvdy5uYW1lcyA9IEZBTFNFKQpgYGAKCiMgTWFya2V0IEJhc2tldCBBbmFseXNpcyAKCiMjIEluc3RhbGFyIHBhcXVldGVzIHkgbGxhbWFyIGxpYnJlcsOtYXMgCmBgYHtyfQojaW5zdGFsbC5wYWNrYWdlcygicGx5ciIpCmxpYnJhcnkocGx5cikKI2luc3RhbGwucGFja2FnZXMoIk1hdHJpeCIpCmxpYnJhcnkoTWF0cml4KQojaW5zdGFsbC5wYWNrYWdlcygiYXJ1bGVzIikKbGlicmFyeShhcnVsZXMpCiNpbnN0YWxsLnBhY2thZ2VzKCJhcnVsZXNWaXoiKQpsaWJyYXJ5KGFydWxlc1ZpeikKI2luc3RhbGwucGFja2FnZXMoImRhdGFzZXRzIikKbGlicmFyeShkYXRhc2V0cykKYGBgCgojIyBJbXBvcnRhciBsYSBiYXNlIGRlIGRhdG9zCmBgYHtyfQojZmlsZS5jaG9vc2UoKQpiZF9saW1waWEgPC0gcmVhZC5jc3YoIi9Vc2Vycy9nZW5hcm9yb2RyaWd1ZXphbGNhbnRhcmEvRGVza3RvcC9UZWMvUiBGaWxlcyBNYW5pcHVsYWNpb8yBbiBkZSBEYXRvcy9CREQvYmRfYWJhcnJvdGVzX2xpbXBpYS5jc3YiKQpgYGAKCiMjIE9yZGVuYXMgZGUgbWVub3IgYSBtYXlvciBsb3MgdGlja2V0cwpgYGB7cn0KI2JkX2xpbXBpYSA8LSBvcmRlcihiZF9saW1waWEkRi5UaWNrZXQpCmhlYWQoYmRfbGltcGlhKQp0YWlsKGJkX2xpbXBpYSkKYGBgCgojIyBFeHRyYWVyIHByb2R1Y3RvcyBwb3IgdGlja2V0CmBgYHtyfQpiYXNrZXQgPC0gIGRkcGx5KGJkX2xpbXBpYSwgYygiRi5UaWNrZXQiKSxmdW5jdGlvbihiZF9saW1waWEpcGFzdGUoYmRfbGltcGlhJE1hcmNhLGNvbGxhcHNlPSIsIikpCmhlYWQoYmRfbGltcGlhKQpoZWFkKGJhc2tldCkKYGBgCgojIyBFbGltaW5hciBuw7ptZXJvIGRlIHRpY2tldApgYGB7cn0KYmFza2V0JEYuVGlja2V0IDwtIE5VTEwKYGBgCgojIyBSZW5vbWJyYXIgY29sdW1uYXMKYGBge3J9CmNvbG5hbWVzKGJhc2tldCkgPC0gYygiTWFyY2EiKQpgYGAKCiMjIEV4cG9ydGFyIGJhc2tldApgYGB7cn0KI3dyaXRlLmNzdihiYXNrZXQsImJhc2tldC5jc3YiLHF1b3RlID0gRkFMU0UsIHJvdy5uYW1lcyA9IEZBTFNFKQpgYGAKCiMjIEltcG9ydGFyIHRyYW5zYWNjaW9uZXMKYGBge3J9CiNmaWxlLmNob29zZSgpCnRyYSA8LSByZWFkLnRyYW5zYWN0aW9ucygiL1VzZXJzL2dlbmFyb3JvZHJpZ3VlemFsY2FudGFyYS9EZXNrdG9wL1RlYy9SIEZpbGVzIE1hbmlwdWxhY2lvzIFuIGRlIERhdG9zL0JERC9iYXNrZXQuY3N2IiwgZm9ybWF0ID0gImJhc2tldCIsc2VwID0gIiwiKQpgYGAKCiMjQ29uY2x1c2lvbmVzIEVzdHJhdMOpZ2ljYXMKIyMjIDEuIFViaWNhciBwcm9kdWN0b3MgU2Fsdm8ganVudG8gY29uIEZhYnVsb3NvLi4uCiMjIyAyLiBQcm9tb2Npw7NuIGVuIFJleW1hLCBQaW5vbCwgUXVlc28vSmFtw7NuL01heW9uZXNhLCBaZXJvL0xpZ2h0L0ZhbnRhLi4uCiMjIyAzLiBSZWFsaXphciB1biBCdXNpbmVzcyBDYXNlIHBhcmEgdmVudGEgZGUgc2FuZHdpY2hlcyBwcmVwYXJhZG9zLi4uCgojIyBHZW5lcmFyIHJlZ2xhcyBkZSBhc29jaWFjacOzbgpgYGB7cn0KcmVnbGFzX2FzIDwtIGFwcmlvcmkodHJhLCBwYXJhbWV0ZXIgPSBsaXN0KHN1cHA9MC4wMDEsIGNvbmY9MC4yLCBtYXhsZW49MTApKQpzdW1tYXJ5KHJlZ2xhc19hcykKaW5zcGVjdChyZWdsYXNfYXMpCmBgYApgYGB7cn0KcmVnbGFzX2FzIDwtIHNvcnQocmVnbGFzX2FzLCBieT0iY29uZmlkZW5jZSIsIGRlY3JlYXNpbmcgPSBUUlVFKQpzdW1tYXJ5KHJlZ2xhc19hcykKaW5zcGVjdChyZWdsYXNfYXMpCmBgYApgYGB7cn0KdG9wMTByZWdsYXMgPC0gaGVhZChyZWdsYXNfYXMsIG49MTAsIGJ5PSJjb25maWRlbmNlIikKcGxvdCh0b3AxMHJlZ2xhcywgbWV0aG9kID0gImdyYXBoIiwgZW5naW5lID0gImh0bWx3aWRnZXQiKQpgYGAKCiAg