JuveYell

Observaciones

#A la base de datos se le hicieron los siguientes cambios:
#Se cambió el formato a fecha corta
#Se duplicaron los primeros 5 registros
#Se cambió el formato a Hora (Español México)
#Se cambió el formato a Código de Barras (para que salga completo)
#Se guardó CSV UTF-8 (Delimitado por comas)

Importar la base de datos

file.choose()
## [1] "C:\\Users\\Migue\\OneDrive\\Documentos\\R\\Primer bloque\\abarrotes\\abarrotes.Rmd"
bd<-read.csv("C:\\Users\\Migue\\OneDrive\\Documentos\\R\\Primer bloque\\abarrotes\\abarrotes_ventas-2_07.csv")

Entender la base de datos

summary(bd)
##  vcClaveTienda        DescGiro         Codigo.Barras            PLU        
##  Length:200625      Length:200625      Min.   :8.347e+05   Min.   : 1.00   
##  Class :character   Class :character   1st Qu.:7.501e+12   1st Qu.: 1.00   
##  Mode  :character   Mode  :character   Median :7.501e+12   Median : 1.00   
##                                        Mean   :5.950e+12   Mean   : 2.11   
##                                        3rd Qu.:7.501e+12   3rd Qu.: 1.00   
##                                        Max.   :1.750e+13   Max.   :30.00   
##                                                            NA's   :199188  
##     Fecha               Hora              Marca            Fabricante       
##  Length:200625      Length:200625      Length:200625      Length:200625     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##    Producto             Precio          Ult.Costo         Unidades     
##  Length:200625      Min.   :-147.00   Min.   :  0.38   Min.   : 0.200  
##  Class :character   1st Qu.:  11.00   1st Qu.:  8.46   1st Qu.: 1.000  
##  Mode  :character   Median :  16.00   Median : 12.31   Median : 1.000  
##                     Mean   :  19.42   Mean   : 15.31   Mean   : 1.262  
##                     3rd Qu.:  25.00   3rd Qu.: 19.23   3rd Qu.: 1.000  
##                     Max.   :1000.00   Max.   :769.23   Max.   :96.000  
##                                                                        
##     F.Ticket      NombreDepartamento NombreFamilia      NombreCategoria   
##  Min.   :     1   Length:200625      Length:200625      Length:200625     
##  1st Qu.: 33964   Class :character   Class :character   Class :character  
##  Median :105993   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :193990                                                           
##  3rd Qu.:383005                                                           
##  Max.   :450040                                                           
##                                                                           
##     Estado              Mts.2      Tipo.ubicación         Giro          
##  Length:200625      Min.   :47.0   Length:200625      Length:200625     
##  Class :character   1st Qu.:53.0   Class :character   Class :character  
##  Mode  :character   Median :60.0   Mode  :character   Mode  :character  
##                     Mean   :56.6                                        
##                     3rd Qu.:60.0                                        
##                     Max.   :62.0                                        
##                                                                         
##  Hora.inicio        Hora.cierre       
##  Length:200625      Length:200625     
##  Class :character   Class :character  
##  Mode  :character   Mode  :character  
##                                       
##                                       
##                                       
## 
#install.packages("dplyr")
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
#count(bd, vcClaveTienda, sort = TRUE)
#count(bd, DescGiro, sort = TRUE)
#count(bd, Marca, sort = TRUE)
#count(bd, Fabricante, sort = TRUE)
#count(bd, Producto, sort = TRUE)
#count(bd, NombreDepartamento, sort = TRUE)
#count(bd, NombreFamilia, sort = TRUE)
#count(bd, NombreCategoria, sort = TRUE)
#count(bd, Estado, sort = TRUE)
#count(bd, Mts.2, sort = TRUE)
#count(bd, Tipo.ubicaci?n, sort = TRUE)
#count(bd, Giro, sort = TRUE)
#count(bd, Hora.inicio, sort = TRUE)
#count(bd, Hora.cierre, sort = TRUE)

#install.packages("tidyverse")
library(tidyverse)
## ── Attaching packages
## ───────────────────────────────────────
## tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6     ✔ purrr   0.3.4
## ✔ tibble  3.1.8     ✔ stringr 1.4.1
## ✔ tidyr   1.2.0     ✔ forcats 0.5.2
## ✔ readr   2.1.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
tibble(bd)
## # A tibble: 200,625 × 22
##    vcClaveTienda DescGiro Codig…¹   PLU Fecha Hora  Marca Fabri…² Produ…³ Precio
##    <chr>         <chr>      <dbl> <int> <chr> <chr> <chr> <chr>   <chr>    <dbl>
##  1 MX001         Abarrot… 7.50e12    NA 19/0… 08:1… NUTR… MEXILAC Nutri …   16  
##  2 MX001         Abarrot… 7.50e12    NA 19/0… 08:2… DAN … DANONE… DANUP …   14  
##  3 MX001         Abarrot… 7.50e12    NA 19/0… 08:2… BIMBO GRUPO … Rebana…    5  
##  4 MX001         Abarrot… 7.50e12    NA 19/0… 08:2… PEPSI PEPSI-… Pepsi …    8  
##  5 MX001         Abarrot… 7.50e12    NA 19/0… 08:2… BLAN… FABRIC… Deterg…   19.5
##  6 MX001         Abarrot… 7.50e12    NA 19/0… 08:1… NUTR… MEXILAC Nutri …   16  
##  7 MX001         Abarrot… 7.50e12    NA 19/0… 08:2… DAN … DANONE… DANUP …   14  
##  8 MX001         Abarrot… 7.50e12    NA 19/0… 08:2… BIMBO GRUPO … Rebana…    5  
##  9 MX001         Abarrot… 7.50e12    NA 19/0… 08:2… PEPSI PEPSI-… Pepsi …    8  
## 10 MX001         Abarrot… 7.50e12    NA 19/0… 08:2… BLAN… FABRIC… Deterg…   19.5
## # … with 200,615 more rows, 12 more variables: Ult.Costo <dbl>, Unidades <dbl>,
## #   F.Ticket <int>, NombreDepartamento <chr>, NombreFamilia <chr>,
## #   NombreCategoria <chr>, Estado <chr>, Mts.2 <int>, Tipo.ubicación <chr>,
## #   Giro <chr>, Hora.inicio <chr>, Hora.cierre <chr>, and abbreviated variable
## #   names ¹​Codigo.Barras, ²​Fabricante, ³​Producto
str(bd)
## 'data.frame':    200625 obs. of  22 variables:
##  $ vcClaveTienda     : chr  "MX001" "MX001" "MX001" "MX001" ...
##  $ DescGiro          : chr  "Abarrotes" "Abarrotes" "Abarrotes" "Abarrotes" ...
##  $ Codigo.Barras     : num  7.5e+12 7.5e+12 7.5e+12 7.5e+12 7.5e+12 ...
##  $ PLU               : int  NA NA NA NA NA NA NA NA NA NA ...
##  $ Fecha             : chr  "19/06/2020" "19/06/2020" "19/06/2020" "19/06/2020" ...
##  $ Hora              : chr  "08:16:21 a. m." "08:23:33 a. m." "08:24:33 a. m." "08:24:33 a. m." ...
##  $ Marca             : chr  "NUTRI LECHE" "DAN UP" "BIMBO" "PEPSI" ...
##  $ Fabricante        : chr  "MEXILAC" "DANONE DE MEXICO" "GRUPO BIMBO" "PEPSI-COLA MEXICANA" ...
##  $ Producto          : chr  "Nutri Leche 1 Litro" "DANUP STRAWBERRY P/BEBER 350GR NAL" "Rebanadas Bimbo 2Pz" "Pepsi N.R. 400Ml" ...
##  $ Precio            : num  16 14 5 8 19.5 16 14 5 8 19.5 ...
##  $ Ult.Costo         : num  12.3 14 5 8 15 ...
##  $ Unidades          : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ F.Ticket          : int  1 2 3 3 4 1 2 3 3 4 ...
##  $ NombreDepartamento: chr  "Abarrotes" "Abarrotes" "Abarrotes" "Abarrotes" ...
##  $ NombreFamilia     : chr  "Lacteos y Refrigerados" "Lacteos y Refrigerados" "Pan y Tortilla" "Bebidas" ...
##  $ NombreCategoria   : chr  "Leche" "Yogurt" "Pan Dulce Empaquetado" "Refrescos Plástico (N.R.)" ...
##  $ Estado            : chr  "Nuevo León" "Nuevo León" "Nuevo León" "Nuevo León" ...
##  $ Mts.2             : int  60 60 60 60 60 60 60 60 60 60 ...
##  $ Tipo.ubicación    : chr  "Esquina" "Esquina" "Esquina" "Esquina" ...
##  $ Giro              : chr  "Abarrotes" "Abarrotes" "Abarrotes" "Abarrotes" ...
##  $ Hora.inicio       : chr  "8:00" "8:00" "8:00" "8:00" ...
##  $ Hora.cierre       : chr  "22:00" "22:00" "22:00" "22:00" ...
head(bd)
##   vcClaveTienda  DescGiro Codigo.Barras PLU      Fecha           Hora
## 1         MX001 Abarrotes  7.501021e+12  NA 19/06/2020 08:16:21 a. m.
## 2         MX001 Abarrotes  7.501032e+12  NA 19/06/2020 08:23:33 a. m.
## 3         MX001 Abarrotes  7.501000e+12  NA 19/06/2020 08:24:33 a. m.
## 4         MX001 Abarrotes  7.501031e+12  NA 19/06/2020 08:24:33 a. m.
## 5         MX001 Abarrotes  7.501026e+12  NA 19/06/2020 08:26:28 a. m.
## 6         MX001 Abarrotes  7.501021e+12  NA 19/06/2020 08:16:21 a. m.
##                        Marca                 Fabricante
## 1                NUTRI LECHE                    MEXILAC
## 2                     DAN UP           DANONE DE MEXICO
## 3                      BIMBO                GRUPO BIMBO
## 4                      PEPSI        PEPSI-COLA MEXICANA
## 5 BLANCA NIEVES (DETERGENTE) FABRICA DE JABON LA CORONA
## 6                NUTRI LECHE                    MEXILAC
##                             Producto Precio Ult.Costo Unidades F.Ticket
## 1                Nutri Leche 1 Litro   16.0     12.31        1        1
## 2 DANUP STRAWBERRY P/BEBER 350GR NAL   14.0     14.00        1        2
## 3                Rebanadas Bimbo 2Pz    5.0      5.00        1        3
## 4                   Pepsi N.R. 400Ml    8.0      8.00        1        3
## 5      Detergente Blanca Nieves 500G   19.5     15.00        1        4
## 6                Nutri Leche 1 Litro   16.0     12.31        1        1
##   NombreDepartamento          NombreFamilia           NombreCategoria
## 1          Abarrotes Lacteos y Refrigerados                     Leche
## 2          Abarrotes Lacteos y Refrigerados                    Yogurt
## 3          Abarrotes         Pan y Tortilla     Pan Dulce Empaquetado
## 4          Abarrotes                Bebidas Refrescos Plástico (N.R.)
## 5          Abarrotes     Limpieza del Hogar                Lavandería
## 6          Abarrotes Lacteos y Refrigerados                     Leche
##       Estado Mts.2 Tipo.ubicación      Giro Hora.inicio Hora.cierre
## 1 Nuevo León    60        Esquina Abarrotes        8:00       22:00
## 2 Nuevo León    60        Esquina Abarrotes        8:00       22:00
## 3 Nuevo León    60        Esquina Abarrotes        8:00       22:00
## 4 Nuevo León    60        Esquina Abarrotes        8:00       22:00
## 5 Nuevo León    60        Esquina Abarrotes        8:00       22:00
## 6 Nuevo León    60        Esquina Abarrotes        8:00       22:00
head(bd, n=7)
##   vcClaveTienda  DescGiro Codigo.Barras PLU      Fecha           Hora
## 1         MX001 Abarrotes  7.501021e+12  NA 19/06/2020 08:16:21 a. m.
## 2         MX001 Abarrotes  7.501032e+12  NA 19/06/2020 08:23:33 a. m.
## 3         MX001 Abarrotes  7.501000e+12  NA 19/06/2020 08:24:33 a. m.
## 4         MX001 Abarrotes  7.501031e+12  NA 19/06/2020 08:24:33 a. m.
## 5         MX001 Abarrotes  7.501026e+12  NA 19/06/2020 08:26:28 a. m.
## 6         MX001 Abarrotes  7.501021e+12  NA 19/06/2020 08:16:21 a. m.
## 7         MX001 Abarrotes  7.501032e+12  NA 19/06/2020 08:23:33 a. m.
##                        Marca                 Fabricante
## 1                NUTRI LECHE                    MEXILAC
## 2                     DAN UP           DANONE DE MEXICO
## 3                      BIMBO                GRUPO BIMBO
## 4                      PEPSI        PEPSI-COLA MEXICANA
## 5 BLANCA NIEVES (DETERGENTE) FABRICA DE JABON LA CORONA
## 6                NUTRI LECHE                    MEXILAC
## 7                     DAN UP           DANONE DE MEXICO
##                             Producto Precio Ult.Costo Unidades F.Ticket
## 1                Nutri Leche 1 Litro   16.0     12.31        1        1
## 2 DANUP STRAWBERRY P/BEBER 350GR NAL   14.0     14.00        1        2
## 3                Rebanadas Bimbo 2Pz    5.0      5.00        1        3
## 4                   Pepsi N.R. 400Ml    8.0      8.00        1        3
## 5      Detergente Blanca Nieves 500G   19.5     15.00        1        4
## 6                Nutri Leche 1 Litro   16.0     12.31        1        1
## 7 DANUP STRAWBERRY P/BEBER 350GR NAL   14.0     14.00        1        2
##   NombreDepartamento          NombreFamilia           NombreCategoria
## 1          Abarrotes Lacteos y Refrigerados                     Leche
## 2          Abarrotes Lacteos y Refrigerados                    Yogurt
## 3          Abarrotes         Pan y Tortilla     Pan Dulce Empaquetado
## 4          Abarrotes                Bebidas Refrescos Plástico (N.R.)
## 5          Abarrotes     Limpieza del Hogar                Lavandería
## 6          Abarrotes Lacteos y Refrigerados                     Leche
## 7          Abarrotes Lacteos y Refrigerados                    Yogurt
##       Estado Mts.2 Tipo.ubicación      Giro Hora.inicio Hora.cierre
## 1 Nuevo León    60        Esquina Abarrotes        8:00       22:00
## 2 Nuevo León    60        Esquina Abarrotes        8:00       22:00
## 3 Nuevo León    60        Esquina Abarrotes        8:00       22:00
## 4 Nuevo León    60        Esquina Abarrotes        8:00       22:00
## 5 Nuevo León    60        Esquina Abarrotes        8:00       22:00
## 6 Nuevo León    60        Esquina Abarrotes        8:00       22:00
## 7 Nuevo León    60        Esquina Abarrotes        8:00       22:00
tail(bd)
##        vcClaveTienda DescGiro Codigo.Barras PLU      Fecha           Hora
## 200620         MX005 Depósito   7.62221e+12  NA 12/07/2020 01:08:25 a. m.
## 200621         MX005 Depósito   7.62221e+12  NA 23/10/2020 10:17:37 p. m.
## 200622         MX005 Depósito   7.62221e+12  NA 10/10/2020 08:30:20 p. m.
## 200623         MX005 Depósito   7.62221e+12  NA 10/10/2020 10:40:43 p. m.
## 200624         MX005 Depósito   7.62221e+12  NA 27/06/2020 10:30:19 p. m.
## 200625         MX005 Depósito   7.62221e+12  NA 26/06/2020 11:43:34 p. m.
##                    Marca    Fabricante                          Producto Precio
## 200620 TRIDENT XTRA CARE CADBURY ADAMS Trident Xtracare Freshmint 16.32G      9
## 200621 TRIDENT XTRA CARE CADBURY ADAMS Trident Xtracare Freshmint 16.32G      9
## 200622 TRIDENT XTRA CARE CADBURY ADAMS Trident Xtracare Freshmint 16.32G      9
## 200623 TRIDENT XTRA CARE CADBURY ADAMS Trident Xtracare Freshmint 16.32G      9
## 200624 TRIDENT XTRA CARE CADBURY ADAMS Trident Xtracare Freshmint 16.32G      9
## 200625 TRIDENT XTRA CARE CADBURY ADAMS Trident Xtracare Freshmint 16.32G      9
##        Ult.Costo Unidades F.Ticket NombreDepartamento NombreFamilia
## 200620      6.92        1   103100          Abarrotes      Dulcería
## 200621      6.92        1   116598          Abarrotes      Dulcería
## 200622      6.92        1   114886          Abarrotes      Dulcería
## 200623      6.92        1   114955          Abarrotes      Dulcería
## 200624      6.92        1   101121          Abarrotes      Dulcería
## 200625      6.92        1   100879          Abarrotes      Dulcería
##        NombreCategoria       Estado Mts.2 Tipo.ubicación       Giro Hora.inicio
## 200620 Gomas de Mazcar Quintana Roo    58        Esquina Mini súper        8:00
## 200621 Gomas de Mazcar Quintana Roo    58        Esquina Mini súper        8:00
## 200622 Gomas de Mazcar Quintana Roo    58        Esquina Mini súper        8:00
## 200623 Gomas de Mazcar Quintana Roo    58        Esquina Mini súper        8:00
## 200624 Gomas de Mazcar Quintana Roo    58        Esquina Mini súper        8:00
## 200625 Gomas de Mazcar Quintana Roo    58        Esquina Mini súper        8:00
##        Hora.cierre
## 200620       21:00
## 200621       21:00
## 200622       21:00
## 200623       21:00
## 200624       21:00
## 200625       21:00
#install.packages("janitor")
library(janitor)
## 
## Attaching package: 'janitor'
## 
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
tabyl(bd, vcClaveTienda, NombreDepartamento)
##  vcClaveTienda Abarrotes Bebes e Infantiles Carnes Farmacia Ferretería Mercería
##          MX001     95415                515      1      147        245       28
##          MX002      6590                 21      0        4         10        0
##          MX003      4026                 15      0        2          8        0
##          MX004     82234                932      0      102        114       16
##          MX005     10014                  0      0        0          0        0
##  Papelería Productos a Eliminar Vinos y Licores
##         35                    3              80
##          0                    0               4
##          0                    0               0
##         32                    5              20
##          7                    0               0
#### Observaciones
# 1. Casi ningún registro cuenta con PLU
# 2. Cambiar formato de fecha
# 3. Cambiar formato de hora
# 4. Hay precios negativos
# 5. Unidades menores a 1

La Herramienta “El Generador de Valor de Datos”

Paso 1. Definir el área del negocio que buscamos impactar o mejorar y su KPI.

Impulsar las ventas del giro de Carnicería.

Paso 2. Seleccionar plantilla(s) para crear valor a partir de los datos de los clientes. Visión | Segmentación | Personalización | Contextualización

Segmentación.

Paso 3. Generar ideas o conceptos específicos.

Promover la venta cruzada de este giro con productos de Abarrotes, debido a que es el giro con mayor rotación en la tienda.

Paso 4. Reunir los datos requeridos.

Hacer un análisis de datos para saber cuáles son los productos con mayores ventas del giro de Abarrotes y ver cómo promover una venta cruzada con algún producto de Carnicería.

Paso 5. Plan de ejecución.

Ofrecer una promoción de “Compra el Jamón Diamante y por $20 pesos más llévate las Medias Noches Bimbo”. De esta forma se promueve un producto con menor venta a través de la compra de un producto con mucho movimiento, en este caso, las Medias Noches Bimbo.

Técnicas para limpieza de datos

Técnica 1. Remover valores irrelevantes

#### Eliminar columnas
bd1<- bd
bd1<- subset (bd1, select = -c (PLU, Codigo.Barras))

#### Eliminar renglones
bd2<- bd1
bd2<- bd2[bd2$Precio > 0, ]
summary(bd1)
##  vcClaveTienda        DescGiro            Fecha               Hora          
##  Length:200625      Length:200625      Length:200625      Length:200625     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##     Marca            Fabricante          Producto             Precio       
##  Length:200625      Length:200625      Length:200625      Min.   :-147.00  
##  Class :character   Class :character   Class :character   1st Qu.:  11.00  
##  Mode  :character   Mode  :character   Mode  :character   Median :  16.00  
##                                                           Mean   :  19.42  
##                                                           3rd Qu.:  25.00  
##                                                           Max.   :1000.00  
##    Ult.Costo         Unidades         F.Ticket      NombreDepartamento
##  Min.   :  0.38   Min.   : 0.200   Min.   :     1   Length:200625     
##  1st Qu.:  8.46   1st Qu.: 1.000   1st Qu.: 33964   Class :character  
##  Median : 12.31   Median : 1.000   Median :105993   Mode  :character  
##  Mean   : 15.31   Mean   : 1.262   Mean   :193990                     
##  3rd Qu.: 19.23   3rd Qu.: 1.000   3rd Qu.:383005                     
##  Max.   :769.23   Max.   :96.000   Max.   :450040                     
##  NombreFamilia      NombreCategoria       Estado              Mts.2     
##  Length:200625      Length:200625      Length:200625      Min.   :47.0  
##  Class :character   Class :character   Class :character   1st Qu.:53.0  
##  Mode  :character   Mode  :character   Mode  :character   Median :60.0  
##                                                           Mean   :56.6  
##                                                           3rd Qu.:60.0  
##                                                           Max.   :62.0  
##  Tipo.ubicación         Giro           Hora.inicio        Hora.cierre       
##  Length:200625      Length:200625      Length:200625      Length:200625     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
## 
summary(bd2)
##  vcClaveTienda        DescGiro            Fecha               Hora          
##  Length:200478      Length:200478      Length:200478      Length:200478     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##     Marca            Fabricante          Producto             Precio       
##  Length:200478      Length:200478      Length:200478      Min.   :   0.50  
##  Class :character   Class :character   Class :character   1st Qu.:  11.00  
##  Mode  :character   Mode  :character   Mode  :character   Median :  16.00  
##                                                           Mean   :  19.45  
##                                                           3rd Qu.:  25.00  
##                                                           Max.   :1000.00  
##    Ult.Costo         Unidades         F.Ticket      NombreDepartamento
##  Min.   :  0.38   Min.   : 0.200   Min.   :     1   Length:200478     
##  1st Qu.:  8.46   1st Qu.: 1.000   1st Qu.: 33977   Class :character  
##  Median : 12.31   Median : 1.000   Median :106034   Mode  :character  
##  Mean   : 15.31   Mean   : 1.261   Mean   :194096                     
##  3rd Qu.: 19.23   3rd Qu.: 1.000   3rd Qu.:383062                     
##  Max.   :769.23   Max.   :96.000   Max.   :450040                     
##  NombreFamilia      NombreCategoria       Estado              Mts.2     
##  Length:200478      Length:200478      Length:200478      Min.   :47.0  
##  Class :character   Class :character   Class :character   1st Qu.:53.0  
##  Mode  :character   Mode  :character   Mode  :character   Median :60.0  
##                                                           Mean   :56.6  
##                                                           3rd Qu.:60.0  
##                                                           Max.   :62.0  
##  Tipo.ubicación         Giro           Hora.inicio        Hora.cierre       
##  Length:200478      Length:200478      Length:200478      Length:200478     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
## 
#### Esto no lo usaremos, pondremos precios negativos como absoluto

Técnica 2. Remover valores duplicados

#### ¿Cuántos renglones duplicados tenemos?
bd1[duplicated(bd1) ,]
##    vcClaveTienda  DescGiro      Fecha           Hora                      Marca
## 6          MX001 Abarrotes 19/06/2020 08:16:21 a. m.                NUTRI LECHE
## 7          MX001 Abarrotes 19/06/2020 08:23:33 a. m.                     DAN UP
## 8          MX001 Abarrotes 19/06/2020 08:24:33 a. m.                      BIMBO
## 9          MX001 Abarrotes 19/06/2020 08:24:33 a. m.                      PEPSI
## 10         MX001 Abarrotes 19/06/2020 08:26:28 a. m. BLANCA NIEVES (DETERGENTE)
##                    Fabricante                           Producto Precio
## 6                     MEXILAC                Nutri Leche 1 Litro   16.0
## 7            DANONE DE MEXICO DANUP STRAWBERRY P/BEBER 350GR NAL   14.0
## 8                 GRUPO BIMBO                Rebanadas Bimbo 2Pz    5.0
## 9         PEPSI-COLA MEXICANA                   Pepsi N.R. 400Ml    8.0
## 10 FABRICA DE JABON LA CORONA      Detergente Blanca Nieves 500G   19.5
##    Ult.Costo Unidades F.Ticket NombreDepartamento          NombreFamilia
## 6      12.31        1        1          Abarrotes Lacteos y Refrigerados
## 7      14.00        1        2          Abarrotes Lacteos y Refrigerados
## 8       5.00        1        3          Abarrotes         Pan y Tortilla
## 9       8.00        1        3          Abarrotes                Bebidas
## 10     15.00        1        4          Abarrotes     Limpieza del Hogar
##              NombreCategoria     Estado Mts.2 Tipo.ubicación      Giro
## 6                      Leche Nuevo León    60        Esquina Abarrotes
## 7                     Yogurt Nuevo León    60        Esquina Abarrotes
## 8      Pan Dulce Empaquetado Nuevo León    60        Esquina Abarrotes
## 9  Refrescos Plástico (N.R.) Nuevo León    60        Esquina Abarrotes
## 10                Lavandería Nuevo León    60        Esquina Abarrotes
##    Hora.inicio Hora.cierre
## 6         8:00       22:00
## 7         8:00       22:00
## 8         8:00       22:00
## 9         8:00       22:00
## 10        8:00       22:00
sum(duplicated(bd1))
## [1] 5
#### Eliminar renglones duplicados
bd3<- bd1
library(dplyr)
bd3<-distinct(bd3)

Técnica 3. Errores tipográficos y errores similares

#### Precios en absoluto
bd4 <- bd3
bd4$Precio <- abs(bd4$Precio)
summary(bd4)
##  vcClaveTienda        DescGiro            Fecha               Hora          
##  Length:200620      Length:200620      Length:200620      Length:200620     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##     Marca            Fabricante          Producto             Precio       
##  Length:200620      Length:200620      Length:200620      Min.   :   0.50  
##  Class :character   Class :character   Class :character   1st Qu.:  11.00  
##  Mode  :character   Mode  :character   Mode  :character   Median :  16.00  
##                                                           Mean   :  19.45  
##                                                           3rd Qu.:  25.00  
##                                                           Max.   :1000.00  
##    Ult.Costo         Unidades         F.Ticket      NombreDepartamento
##  Min.   :  0.38   Min.   : 0.200   Min.   :     1   Length:200620     
##  1st Qu.:  8.46   1st Qu.: 1.000   1st Qu.: 33967   Class :character  
##  Median : 12.31   Median : 1.000   Median :105996   Mode  :character  
##  Mean   : 15.31   Mean   : 1.262   Mean   :193994                     
##  3rd Qu.: 19.23   3rd Qu.: 1.000   3rd Qu.:383009                     
##  Max.   :769.23   Max.   :96.000   Max.   :450040                     
##  NombreFamilia      NombreCategoria       Estado              Mts.2     
##  Length:200620      Length:200620      Length:200620      Min.   :47.0  
##  Class :character   Class :character   Class :character   1st Qu.:53.0  
##  Mode  :character   Mode  :character   Mode  :character   Median :60.0  
##                                                           Mean   :56.6  
##                                                           3rd Qu.:60.0  
##                                                           Max.   :62.0  
##  Tipo.ubicación         Giro           Hora.inicio        Hora.cierre       
##  Length:200620      Length:200620      Length:200620      Length:200620     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
## 
#### Cantidades en enteros
bd5<-bd4
bd5$Unidades <- ceiling(bd5$Unidades)
summary(bd5)
##  vcClaveTienda        DescGiro            Fecha               Hora          
##  Length:200620      Length:200620      Length:200620      Length:200620     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##     Marca            Fabricante          Producto             Precio       
##  Length:200620      Length:200620      Length:200620      Min.   :   0.50  
##  Class :character   Class :character   Class :character   1st Qu.:  11.00  
##  Mode  :character   Mode  :character   Mode  :character   Median :  16.00  
##                                                           Mean   :  19.45  
##                                                           3rd Qu.:  25.00  
##                                                           Max.   :1000.00  
##    Ult.Costo         Unidades         F.Ticket      NombreDepartamento
##  Min.   :  0.38   Min.   : 1.000   Min.   :     1   Length:200620     
##  1st Qu.:  8.46   1st Qu.: 1.000   1st Qu.: 33967   Class :character  
##  Median : 12.31   Median : 1.000   Median :105996   Mode  :character  
##  Mean   : 15.31   Mean   : 1.262   Mean   :193994                     
##  3rd Qu.: 19.23   3rd Qu.: 1.000   3rd Qu.:383009                     
##  Max.   :769.23   Max.   :96.000   Max.   :450040                     
##  NombreFamilia      NombreCategoria       Estado              Mts.2     
##  Length:200620      Length:200620      Length:200620      Min.   :47.0  
##  Class :character   Class :character   Class :character   1st Qu.:53.0  
##  Mode  :character   Mode  :character   Mode  :character   Median :60.0  
##                                                           Mean   :56.6  
##                                                           3rd Qu.:60.0  
##                                                           Max.   :62.0  
##  Tipo.ubicación         Giro           Hora.inicio        Hora.cierre       
##  Length:200620      Length:200620      Length:200620      Length:200620     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
## 

Técnica 4. Convertir tipos de datos

#### Convertir de caracter a fecha
bd6<-bd5
bd6$Fecha<-as.Date(bd6$Fecha, format= "%d%m%Y")
tibble(bd6)
## # A tibble: 200,620 × 20
##    vcCla…¹ DescG…² Fecha  Hora      Marca Fabri…³ Produ…⁴ Precio Ult.C…⁵ Unida…⁶
##    <chr>   <chr>   <date> <chr>     <chr> <chr>   <chr>    <dbl>   <dbl>   <dbl>
##  1 MX001   Abarro… NA     08:16:21… NUTR… MEXILAC Nutri …   16     12.3        1
##  2 MX001   Abarro… NA     08:23:33… DAN … DANONE… DANUP …   14     14          1
##  3 MX001   Abarro… NA     08:24:33… BIMBO GRUPO … Rebana…    5      5          1
##  4 MX001   Abarro… NA     08:24:33… PEPSI PEPSI-… Pepsi …    8      8          1
##  5 MX001   Abarro… NA     08:26:28… BLAN… FABRIC… Deterg…   19.5   15          1
##  6 MX001   Abarro… NA     08:26:28… FLASH ALEN    Flash …    9.5    7.31       1
##  7 MX001   Abarro… NA     08:26:28… VARI… DANONE… Danone…   11     11          1
##  8 MX001   Abarro… NA     08:26:28… ZOTE  FABRIC… Jabon …    9.5    7.31       1
##  9 MX001   Abarro… NA     08:26:28… ALWA… PROCTE… T Feme…   23.5   18.1        1
## 10 MX001   Abarro… NA     03:24:02… JUMEX JUMEX   Jugo D…   12     12          1
## # … with 200,610 more rows, 10 more variables: F.Ticket <int>,
## #   NombreDepartamento <chr>, NombreFamilia <chr>, NombreCategoria <chr>,
## #   Estado <chr>, Mts.2 <int>, Tipo.ubicación <chr>, Giro <chr>,
## #   Hora.inicio <chr>, Hora.cierre <chr>, and abbreviated variable names
## #   ¹​vcClaveTienda, ²​DescGiro, ³​Fabricante, ⁴​Producto, ⁵​Ult.Costo, ⁶​Unidades
#### Convertir de caracter a entero
bd7<-bd6
bd7$Hora<- substr(bd7$Hora, start= 1, stop= 2)
tibble(bd7)
## # A tibble: 200,620 × 20
##    vcCla…¹ DescG…² Fecha  Hora  Marca     Fabri…³ Produ…⁴ Precio Ult.C…⁵ Unida…⁶
##    <chr>   <chr>   <date> <chr> <chr>     <chr>   <chr>    <dbl>   <dbl>   <dbl>
##  1 MX001   Abarro… NA     08    NUTRI LE… MEXILAC Nutri …   16     12.3        1
##  2 MX001   Abarro… NA     08    DAN UP    DANONE… DANUP …   14     14          1
##  3 MX001   Abarro… NA     08    BIMBO     GRUPO … Rebana…    5      5          1
##  4 MX001   Abarro… NA     08    PEPSI     PEPSI-… Pepsi …    8      8          1
##  5 MX001   Abarro… NA     08    BLANCA N… FABRIC… Deterg…   19.5   15          1
##  6 MX001   Abarro… NA     08    FLASH     ALEN    Flash …    9.5    7.31       1
##  7 MX001   Abarro… NA     08    VARIOS D… DANONE… Danone…   11     11          1
##  8 MX001   Abarro… NA     08    ZOTE      FABRIC… Jabon …    9.5    7.31       1
##  9 MX001   Abarro… NA     08    ALWAYS    PROCTE… T Feme…   23.5   18.1        1
## 10 MX001   Abarro… NA     03    JUMEX     JUMEX   Jugo D…   12     12          1
## # … with 200,610 more rows, 10 more variables: F.Ticket <int>,
## #   NombreDepartamento <chr>, NombreFamilia <chr>, NombreCategoria <chr>,
## #   Estado <chr>, Mts.2 <int>, Tipo.ubicación <chr>, Giro <chr>,
## #   Hora.inicio <chr>, Hora.cierre <chr>, and abbreviated variable names
## #   ¹​vcClaveTienda, ²​DescGiro, ³​Fabricante, ⁴​Producto, ⁵​Ult.Costo, ⁶​Unidades
bd7$Hora<-as.integer(bd7$Hora)
str(bd7)
## 'data.frame':    200620 obs. of  20 variables:
##  $ vcClaveTienda     : chr  "MX001" "MX001" "MX001" "MX001" ...
##  $ DescGiro          : chr  "Abarrotes" "Abarrotes" "Abarrotes" "Abarrotes" ...
##  $ Fecha             : Date, format: NA NA ...
##  $ Hora              : int  8 8 8 8 8 8 8 8 8 3 ...
##  $ Marca             : chr  "NUTRI LECHE" "DAN UP" "BIMBO" "PEPSI" ...
##  $ Fabricante        : chr  "MEXILAC" "DANONE DE MEXICO" "GRUPO BIMBO" "PEPSI-COLA MEXICANA" ...
##  $ Producto          : chr  "Nutri Leche 1 Litro" "DANUP STRAWBERRY P/BEBER 350GR NAL" "Rebanadas Bimbo 2Pz" "Pepsi N.R. 400Ml" ...
##  $ Precio            : num  16 14 5 8 19.5 9.5 11 9.5 23.5 12 ...
##  $ Ult.Costo         : num  12.3 14 5 8 15 ...
##  $ Unidades          : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ F.Ticket          : int  1 2 3 3 4 4 4 4 4 5 ...
##  $ NombreDepartamento: chr  "Abarrotes" "Abarrotes" "Abarrotes" "Abarrotes" ...
##  $ NombreFamilia     : chr  "Lacteos y Refrigerados" "Lacteos y Refrigerados" "Pan y Tortilla" "Bebidas" ...
##  $ NombreCategoria   : chr  "Leche" "Yogurt" "Pan Dulce Empaquetado" "Refrescos Plástico (N.R.)" ...
##  $ Estado            : chr  "Nuevo León" "Nuevo León" "Nuevo León" "Nuevo León" ...
##  $ Mts.2             : int  60 60 60 60 60 60 60 60 60 60 ...
##  $ Tipo.ubicación    : chr  "Esquina" "Esquina" "Esquina" "Esquina" ...
##  $ Giro              : chr  "Abarrotes" "Abarrotes" "Abarrotes" "Abarrotes" ...
##  $ Hora.inicio       : chr  "8:00" "8:00" "8:00" "8:00" ...
##  $ Hora.cierre       : chr  "22:00" "22:00" "22:00" "22:00" ...

Técnica 5. Valores faltantes

#### ¿Cuántos NA tengo en la base de datos?
sum(is.na(bd7))
## [1] 200620
sum(is.na(bd))
## [1] 199188
#### ¿Cuántos NA tengo por variable?
sapply(bd7, function(x) sum(is.na(x)))
##      vcClaveTienda           DescGiro              Fecha               Hora 
##                  0                  0             200620                  0 
##              Marca         Fabricante           Producto             Precio 
##                  0                  0                  0                  0 
##          Ult.Costo           Unidades           F.Ticket NombreDepartamento 
##                  0                  0                  0                  0 
##      NombreFamilia    NombreCategoria             Estado              Mts.2 
##                  0                  0                  0                  0 
##     Tipo.ubicación               Giro        Hora.inicio        Hora.cierre 
##                  0                  0                  0                  0
sapply(bd, function(x) sum(is.na(x)))
##      vcClaveTienda           DescGiro      Codigo.Barras                PLU 
##                  0                  0                  0             199188 
##              Fecha               Hora              Marca         Fabricante 
##                  0                  0                  0                  0 
##           Producto             Precio          Ult.Costo           Unidades 
##                  0                  0                  0                  0 
##           F.Ticket NombreDepartamento      NombreFamilia    NombreCategoria 
##                  0                  0                  0                  0 
##             Estado              Mts.2     Tipo.ubicación               Giro 
##                  0                  0                  0                  0 
##        Hora.inicio        Hora.cierre 
##                  0                  0
?sapply
## starting httpd help server ... done
#### Borrar todos los registros NA de una tabla
bd8<-bd7
bd8<-na.omit(bd8)
summary(bd8)
##  vcClaveTienda        DescGiro             Fecha          Hora    
##  Length:0           Length:0           Min.   :NA    Min.   : NA  
##  Class :character   Class :character   1st Qu.:NA    1st Qu.: NA  
##  Mode  :character   Mode  :character   Median :NA    Median : NA  
##                                        Mean   :NaN   Mean   :NaN  
##                                        3rd Qu.:NA    3rd Qu.: NA  
##                                        Max.   :NA    Max.   : NA  
##     Marca            Fabricante          Producto             Precio   
##  Length:0           Length:0           Length:0           Min.   : NA  
##  Class :character   Class :character   Class :character   1st Qu.: NA  
##  Mode  :character   Mode  :character   Mode  :character   Median : NA  
##                                                           Mean   :NaN  
##                                                           3rd Qu.: NA  
##                                                           Max.   : NA  
##    Ult.Costo      Unidades      F.Ticket   NombreDepartamento
##  Min.   : NA   Min.   : NA   Min.   : NA   Length:0          
##  1st Qu.: NA   1st Qu.: NA   1st Qu.: NA   Class :character  
##  Median : NA   Median : NA   Median : NA   Mode  :character  
##  Mean   :NaN   Mean   :NaN   Mean   :NaN                     
##  3rd Qu.: NA   3rd Qu.: NA   3rd Qu.: NA                     
##  Max.   : NA   Max.   : NA   Max.   : NA                     
##  NombreFamilia      NombreCategoria       Estado              Mts.2    
##  Length:0           Length:0           Length:0           Min.   : NA  
##  Class :character   Class :character   Class :character   1st Qu.: NA  
##  Mode  :character   Mode  :character   Mode  :character   Median : NA  
##                                                           Mean   :NaN  
##                                                           3rd Qu.: NA  
##                                                           Max.   : NA  
##  Tipo.ubicación         Giro           Hora.inicio        Hora.cierre       
##  Length:0           Length:0           Length:0           Length:0          
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
## 
#### Reemplazar NA con CEROS
bd9<-bd8
bd9[is.na(bd9)]<-0
summary(bd9)
##  vcClaveTienda        DescGiro             Fecha          Hora    
##  Length:0           Length:0           Min.   :NA    Min.   : NA  
##  Class :character   Class :character   1st Qu.:NA    1st Qu.: NA  
##  Mode  :character   Mode  :character   Median :NA    Median : NA  
##                                        Mean   :NaN   Mean   :NaN  
##                                        3rd Qu.:NA    3rd Qu.: NA  
##                                        Max.   :NA    Max.   : NA  
##     Marca            Fabricante          Producto             Precio   
##  Length:0           Length:0           Length:0           Min.   : NA  
##  Class :character   Class :character   Class :character   1st Qu.: NA  
##  Mode  :character   Mode  :character   Mode  :character   Median : NA  
##                                                           Mean   :NaN  
##                                                           3rd Qu.: NA  
##                                                           Max.   : NA  
##    Ult.Costo      Unidades      F.Ticket   NombreDepartamento
##  Min.   : NA   Min.   : NA   Min.   : NA   Length:0          
##  1st Qu.: NA   1st Qu.: NA   1st Qu.: NA   Class :character  
##  Median : NA   Median : NA   Median : NA   Mode  :character  
##  Mean   :NaN   Mean   :NaN   Mean   :NaN                     
##  3rd Qu.: NA   3rd Qu.: NA   3rd Qu.: NA                     
##  Max.   : NA   Max.   : NA   Max.   : NA                     
##  NombreFamilia      NombreCategoria       Estado              Mts.2    
##  Length:0           Length:0           Length:0           Min.   : NA  
##  Class :character   Class :character   Class :character   1st Qu.: NA  
##  Mode  :character   Mode  :character   Mode  :character   Median : NA  
##                                                           Mean   :NaN  
##                                                           3rd Qu.: NA  
##                                                           Max.   : NA  
##  Tipo.ubicación         Giro           Hora.inicio        Hora.cierre       
##  Length:0           Length:0           Length:0           Length:0          
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
## 
#### Reemplazar NA con el Promedio
bd10<-bd9
bd10$PLU[is.na(bd10$PLU)]<-mean(bd10$PLU, na.rm= TRUE)
## Warning in mean.default(bd10$PLU, na.rm = TRUE): argument is not numeric or
## logical: returning NA
summary(bd10)
##  vcClaveTienda        DescGiro             Fecha          Hora    
##  Length:0           Length:0           Min.   :NA    Min.   : NA  
##  Class :character   Class :character   1st Qu.:NA    1st Qu.: NA  
##  Mode  :character   Mode  :character   Median :NA    Median : NA  
##                                        Mean   :NaN   Mean   :NaN  
##                                        3rd Qu.:NA    3rd Qu.: NA  
##                                        Max.   :NA    Max.   : NA  
##     Marca            Fabricante          Producto             Precio   
##  Length:0           Length:0           Length:0           Min.   : NA  
##  Class :character   Class :character   Class :character   1st Qu.: NA  
##  Mode  :character   Mode  :character   Mode  :character   Median : NA  
##                                                           Mean   :NaN  
##                                                           3rd Qu.: NA  
##                                                           Max.   : NA  
##    Ult.Costo      Unidades      F.Ticket   NombreDepartamento
##  Min.   : NA   Min.   : NA   Min.   : NA   Length:0          
##  1st Qu.: NA   1st Qu.: NA   1st Qu.: NA   Class :character  
##  Median : NA   Median : NA   Median : NA   Mode  :character  
##  Mean   :NaN   Mean   :NaN   Mean   :NaN                     
##  3rd Qu.: NA   3rd Qu.: NA   3rd Qu.: NA                     
##  Max.   : NA   Max.   : NA   Max.   : NA                     
##  NombreFamilia      NombreCategoria       Estado              Mts.2    
##  Length:0           Length:0           Length:0           Min.   : NA  
##  Class :character   Class :character   Class :character   1st Qu.: NA  
##  Mode  :character   Mode  :character   Mode  :character   Median : NA  
##                                                           Mean   :NaN  
##                                                           3rd Qu.: NA  
##                                                           Max.   : NA  
##  Tipo.ubicación         Giro           Hora.inicio        Hora.cierre       
##  Length:0           Length:0           Length:0           Length:0          
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##       PLU     
##  Min.   : NA  
##  1st Qu.: NA  
##  Median : NA  
##  Mean   :NaN  
##  3rd Qu.: NA  
##  Max.   : NA
#### Reemplazar negativos con cero
bd11<-bd10
bd11[bd11<-0]
## data frame with 0 columns and 0 rows
summary(bd11)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       0       0       0       0       0       0

Técnica 6. Método estadístico

bd12<-bd7
boxplot(bd12$Precio, horizontal= TRUE)

boxplot(bd12$Unidades, horizontal= TRUE)

#### Agregar columnas

#install.packages("lubridate")
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
bd12$Dia_de_la_semana<- wday(bd12$Fecha)
summary(bd12)
##  vcClaveTienda        DescGiro             Fecha             Hora       
##  Length:200620      Length:200620      Min.   :NA       Min.   : 1.000  
##  Class :character   Class :character   1st Qu.:NA       1st Qu.: 5.000  
##  Mode  :character   Mode  :character   Median :NA       Median : 8.000  
##                                        Mean   :NaN      Mean   : 7.299  
##                                        3rd Qu.:NA       3rd Qu.:10.000  
##                                        Max.   :NA       Max.   :12.000  
##                                        NA's   :200620                   
##     Marca            Fabricante          Producto             Precio       
##  Length:200620      Length:200620      Length:200620      Min.   :   0.50  
##  Class :character   Class :character   Class :character   1st Qu.:  11.00  
##  Mode  :character   Mode  :character   Mode  :character   Median :  16.00  
##                                                           Mean   :  19.45  
##                                                           3rd Qu.:  25.00  
##                                                           Max.   :1000.00  
##                                                                            
##    Ult.Costo         Unidades         F.Ticket      NombreDepartamento
##  Min.   :  0.38   Min.   : 1.000   Min.   :     1   Length:200620     
##  1st Qu.:  8.46   1st Qu.: 1.000   1st Qu.: 33967   Class :character  
##  Median : 12.31   Median : 1.000   Median :105996   Mode  :character  
##  Mean   : 15.31   Mean   : 1.262   Mean   :193994                     
##  3rd Qu.: 19.23   3rd Qu.: 1.000   3rd Qu.:383009                     
##  Max.   :769.23   Max.   :96.000   Max.   :450040                     
##                                                                       
##  NombreFamilia      NombreCategoria       Estado              Mts.2     
##  Length:200620      Length:200620      Length:200620      Min.   :47.0  
##  Class :character   Class :character   Class :character   1st Qu.:53.0  
##  Mode  :character   Mode  :character   Mode  :character   Median :60.0  
##                                                           Mean   :56.6  
##                                                           3rd Qu.:60.0  
##                                                           Max.   :62.0  
##                                                                         
##  Tipo.ubicación         Giro           Hora.inicio        Hora.cierre       
##  Length:200620      Length:200620      Length:200620      Length:200620     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##  Dia_de_la_semana
##  Min.   : NA     
##  1st Qu.: NA     
##  Median : NA     
##  Mean   :NaN     
##  3rd Qu.: NA     
##  Max.   : NA     
##  NA's   :200620
bd12$Subtotal<-bd12$Precio * bd12$Unidades
summary(bd12)
##  vcClaveTienda        DescGiro             Fecha             Hora       
##  Length:200620      Length:200620      Min.   :NA       Min.   : 1.000  
##  Class :character   Class :character   1st Qu.:NA       1st Qu.: 5.000  
##  Mode  :character   Mode  :character   Median :NA       Median : 8.000  
##                                        Mean   :NaN      Mean   : 7.299  
##                                        3rd Qu.:NA       3rd Qu.:10.000  
##                                        Max.   :NA       Max.   :12.000  
##                                        NA's   :200620                   
##     Marca            Fabricante          Producto             Precio       
##  Length:200620      Length:200620      Length:200620      Min.   :   0.50  
##  Class :character   Class :character   Class :character   1st Qu.:  11.00  
##  Mode  :character   Mode  :character   Mode  :character   Median :  16.00  
##                                                           Mean   :  19.45  
##                                                           3rd Qu.:  25.00  
##                                                           Max.   :1000.00  
##                                                                            
##    Ult.Costo         Unidades         F.Ticket      NombreDepartamento
##  Min.   :  0.38   Min.   : 1.000   Min.   :     1   Length:200620     
##  1st Qu.:  8.46   1st Qu.: 1.000   1st Qu.: 33967   Class :character  
##  Median : 12.31   Median : 1.000   Median :105996   Mode  :character  
##  Mean   : 15.31   Mean   : 1.262   Mean   :193994                     
##  3rd Qu.: 19.23   3rd Qu.: 1.000   3rd Qu.:383009                     
##  Max.   :769.23   Max.   :96.000   Max.   :450040                     
##                                                                       
##  NombreFamilia      NombreCategoria       Estado              Mts.2     
##  Length:200620      Length:200620      Length:200620      Min.   :47.0  
##  Class :character   Class :character   Class :character   1st Qu.:53.0  
##  Mode  :character   Mode  :character   Mode  :character   Median :60.0  
##                                                           Mean   :56.6  
##                                                           3rd Qu.:60.0  
##                                                           Max.   :62.0  
##                                                                         
##  Tipo.ubicación         Giro           Hora.inicio        Hora.cierre       
##  Length:200620      Length:200620      Length:200620      Length:200620     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##  Dia_de_la_semana    Subtotal      
##  Min.   : NA      Min.   :   1.00  
##  1st Qu.: NA      1st Qu.:  12.00  
##  Median : NA      Median :  18.00  
##  Mean   :NaN      Mean   :  24.33  
##  3rd Qu.: NA      3rd Qu.:  27.00  
##  Max.   : NA      Max.   :2496.00  
##  NA's   :200620
bd12$Utilidad<-bd12$Precio - bd12$Ult.Costo
summary(bd12)
##  vcClaveTienda        DescGiro             Fecha             Hora       
##  Length:200620      Length:200620      Min.   :NA       Min.   : 1.000  
##  Class :character   Class :character   1st Qu.:NA       1st Qu.: 5.000  
##  Mode  :character   Mode  :character   Median :NA       Median : 8.000  
##                                        Mean   :NaN      Mean   : 7.299  
##                                        3rd Qu.:NA       3rd Qu.:10.000  
##                                        Max.   :NA       Max.   :12.000  
##                                        NA's   :200620                   
##     Marca            Fabricante          Producto             Precio       
##  Length:200620      Length:200620      Length:200620      Min.   :   0.50  
##  Class :character   Class :character   Class :character   1st Qu.:  11.00  
##  Mode  :character   Mode  :character   Mode  :character   Median :  16.00  
##                                                           Mean   :  19.45  
##                                                           3rd Qu.:  25.00  
##                                                           Max.   :1000.00  
##                                                                            
##    Ult.Costo         Unidades         F.Ticket      NombreDepartamento
##  Min.   :  0.38   Min.   : 1.000   Min.   :     1   Length:200620     
##  1st Qu.:  8.46   1st Qu.: 1.000   1st Qu.: 33967   Class :character  
##  Median : 12.31   Median : 1.000   Median :105996   Mode  :character  
##  Mean   : 15.31   Mean   : 1.262   Mean   :193994                     
##  3rd Qu.: 19.23   3rd Qu.: 1.000   3rd Qu.:383009                     
##  Max.   :769.23   Max.   :96.000   Max.   :450040                     
##                                                                       
##  NombreFamilia      NombreCategoria       Estado              Mts.2     
##  Length:200620      Length:200620      Length:200620      Min.   :47.0  
##  Class :character   Class :character   Class :character   1st Qu.:53.0  
##  Mode  :character   Mode  :character   Mode  :character   Median :60.0  
##                                                           Mean   :56.6  
##                                                           3rd Qu.:60.0  
##                                                           Max.   :62.0  
##                                                                         
##  Tipo.ubicación         Giro           Hora.inicio        Hora.cierre       
##  Length:200620      Length:200620      Length:200620      Length:200620     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##  Dia_de_la_semana    Subtotal          Utilidad      
##  Min.   : NA      Min.   :   1.00   Min.   :  0.000  
##  1st Qu.: NA      1st Qu.:  12.00   1st Qu.:  2.310  
##  Median : NA      Median :  18.00   Median :  3.230  
##  Mean   :NaN      Mean   :  24.33   Mean   :  4.142  
##  3rd Qu.: NA      3rd Qu.:  27.00   3rd Qu.:  5.420  
##  Max.   : NA      Max.   :2496.00   Max.   :230.770  
##  NA's   :200620
#### Exportar base de datos limpia
bd_limpia<-bd12
write.csv(bd_limpia, file="abarrotes_bd_limpia.csv", row.names= FALSE)

Market Basket Analysis

#install.packages("plyr")
#install.packages("Matrix")
#install.packages("arules")
#install.packages("arulesViz")
#install.packages("datasets")
library(Matrix)
## 
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
library(arules)
## 
## Attaching package: 'arules'
## The following object is masked from 'package:dplyr':
## 
##     recode
## The following objects are masked from 'package:base':
## 
##     abbreviate, write
library(arulesViz)
library(datasets)

#### Ordenar de menor a mayor los tickets
bd_limpia<- bd_limpia[order(bd_limpia$F.Ticket),]
head(bd_limpia)
##   vcClaveTienda  DescGiro Fecha Hora                      Marca
## 1         MX001 Abarrotes  <NA>    8                NUTRI LECHE
## 2         MX001 Abarrotes  <NA>    8                     DAN UP
## 3         MX001 Abarrotes  <NA>    8                      BIMBO
## 4         MX001 Abarrotes  <NA>    8                      PEPSI
## 5         MX001 Abarrotes  <NA>    8 BLANCA NIEVES (DETERGENTE)
## 6         MX001 Abarrotes  <NA>    8                      FLASH
##                   Fabricante                           Producto Precio
## 1                    MEXILAC                Nutri Leche 1 Litro   16.0
## 2           DANONE DE MEXICO DANUP STRAWBERRY P/BEBER 350GR NAL   14.0
## 3                GRUPO BIMBO                Rebanadas Bimbo 2Pz    5.0
## 4        PEPSI-COLA MEXICANA                   Pepsi N.R. 400Ml    8.0
## 5 FABRICA DE JABON LA CORONA      Detergente Blanca Nieves 500G   19.5
## 6                       ALEN      Flash Xtra Brisa Marina 500Ml    9.5
##   Ult.Costo Unidades F.Ticket NombreDepartamento          NombreFamilia
## 1     12.31        1        1          Abarrotes Lacteos y Refrigerados
## 2     14.00        1        2          Abarrotes Lacteos y Refrigerados
## 3      5.00        1        3          Abarrotes         Pan y Tortilla
## 4      8.00        1        3          Abarrotes                Bebidas
## 5     15.00        1        4          Abarrotes     Limpieza del Hogar
## 6      7.31        1        4          Abarrotes     Limpieza del Hogar
##             NombreCategoria     Estado Mts.2 Tipo.ubicación      Giro
## 1                     Leche Nuevo León    60        Esquina Abarrotes
## 2                    Yogurt Nuevo León    60        Esquina Abarrotes
## 3     Pan Dulce Empaquetado Nuevo León    60        Esquina Abarrotes
## 4 Refrescos Plástico (N.R.) Nuevo León    60        Esquina Abarrotes
## 5                Lavandería Nuevo León    60        Esquina Abarrotes
## 6      Limpiadores Líquidos Nuevo León    60        Esquina Abarrotes
##   Hora.inicio Hora.cierre Dia_de_la_semana Subtotal Utilidad
## 1        8:00       22:00               NA     16.0     3.69
## 2        8:00       22:00               NA     14.0     0.00
## 3        8:00       22:00               NA      5.0     0.00
## 4        8:00       22:00               NA      8.0     0.00
## 5        8:00       22:00               NA     19.5     4.50
## 6        8:00       22:00               NA      9.5     2.19
tail(bd_limpia)
##        vcClaveTienda   DescGiro Fecha Hora          Marca           Fabricante
## 107394         MX004 Carnicería  <NA>   11         YEMINA               HERDEZ
## 167771         MX004 Carnicería  <NA>   11     DEL FUERTE ALIMENTOS DEL FUERTE
## 149429         MX004 Carnicería  <NA>   11 COCA COLA ZERO            COCA COLA
## 168750         MX004 Carnicería  <NA>   11       DIAMANTE           EMPACADOS 
## 161193         MX004 Carnicería  <NA>   12          PEPSI  PEPSI-COLA MEXICANA
## 112970         MX004 Carnicería  <NA>   12      COCA COLA            COCA COLA
##                              Producto Precio Ult.Costo Unidades F.Ticket
## 107394    PASTA SPAGHETTI YEMINA 200G      7      5.38        2   450032
## 167771 PURE DE TOMATE DEL FUERTE 345G     12      9.23        1   450032
## 149429           COCA COLA ZERO 600ML     15     11.54        2   450034
## 168750             ARROZ DIAMANTE225G     11      8.46        1   450037
## 161193              PEPSI N. R. 500ML     10      7.69        1   450039
## 112970     COCA COLA RETORNABLE 500ML     10      7.69        8   450040
##        NombreDepartamento        NombreFamilia               NombreCategoria
## 107394          Abarrotes       Sopas y Pastas Fideos, Spaguetti, Tallarines
## 167771          Abarrotes Salsas y Sazonadores          Salsa para Spaguetti
## 149429          Abarrotes              Bebidas         Refrescos Retornables
## 168750          Abarrotes    Granos y Semillas                         Arroz
## 161193          Abarrotes              Bebidas     Refrescos Plástico (N.R.)
## 112970          Abarrotes              Bebidas         Refrescos Retornables
##         Estado Mts.2 Tipo.ubicación      Giro Hora.inicio Hora.cierre
## 107394 Sinaloa    53        Esquina Abarrotes        7:00       23:00
## 167771 Sinaloa    53        Esquina Abarrotes        7:00       23:00
## 149429 Sinaloa    53        Esquina Abarrotes        7:00       23:00
## 168750 Sinaloa    53        Esquina Abarrotes        7:00       23:00
## 161193 Sinaloa    53        Esquina Abarrotes        7:00       23:00
## 112970 Sinaloa    53        Esquina Abarrotes        7:00       23:00
##        Dia_de_la_semana Subtotal Utilidad
## 107394               NA       14     1.62
## 167771               NA       12     2.77
## 149429               NA       30     3.46
## 168750               NA       11     2.54
## 161193               NA       10     2.31
## 112970               NA       80     2.31

Market Basket Analysis: Cómo hacerlo

#### Generar basket
#install.packages("plyr")
library(plyr)
## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
## 
## Attaching package: 'plyr'
## The following object is masked from 'package:purrr':
## 
##     compact
## The following objects are masked from 'package:dplyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
basket<- ddply(bd_limpia,c("F.Ticket"), function(bd_limpia)paste(bd_limpia$Marca, collapse = ","))
    
  #### Eliminar número de ticket
    basket$F.Ticket<- NULL
    
  #### Renombramos el nombre de la columna
    colnames(basket)<- c("Marca")
    
  #### Exportar basket
  write.csv(basket,"basket.csv", quote = FALSE, row.names = FALSE)
  
#### Importar transacciones
  file.choose()
## [1] "C:\\Users\\Migue\\OneDrive\\Documentos\\R\\Primer bloque\\abarrotes\\abarrotes.Rmd"
  tr<- read.transactions("C:\\Users\\Migue\\OneDrive\\Documentos\\R\\Primer bloque\\jingle_bells\\basket.csv", format= "basket", sep=",")
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in asMethod(object): removing duplicated items in transactions
  reglas.asociacion<- apriori(tr, parameter = list(supp=0.001, conf= 0.2, maxlen=10))
## Apriori
## 
## Parameter specification:
##  confidence minval smax arem  aval originalSupport maxtime support minlen
##         0.2    0.1    1 none FALSE            TRUE       5   0.001      1
##  maxlen target  ext
##      10  rules TRUE
## 
## Algorithmic control:
##  filter tree heap memopt load sort verbose
##     0.1 TRUE TRUE  FALSE TRUE    2    TRUE
## 
## Absolute minimum support count: 115 
## 
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[604 item(s), 115111 transaction(s)] done [0.03s].
## sorting and recoding items ... [207 item(s)] done [0.00s].
## creating transaction tree ... done [0.03s].
## checking subsets of size 1 2 3 done [0.00s].
## writing ... [11 rule(s)] done [0.00s].
## creating S4 object  ... done [0.00s].
  summary(reglas.asociacion)
## set of 11 rules
## 
## rule length distribution (lhs + rhs):sizes
##  2 
## 11 
## 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       2       2       2       2       2       2 
## 
## summary of quality measures:
##     support           confidence        coverage             lift       
##  Min.   :0.001016   Min.   :0.2069   Min.   :0.003562   Min.   : 1.325  
##  1st Qu.:0.001103   1st Qu.:0.2356   1st Qu.:0.004504   1st Qu.: 1.787  
##  Median :0.001416   Median :0.2442   Median :0.005803   Median : 3.972  
##  Mean   :0.001519   Mean   :0.2536   Mean   :0.006054   Mean   :17.563  
##  3rd Qu.:0.001651   3rd Qu.:0.2685   3rd Qu.:0.006893   3rd Qu.:21.798  
##  Max.   :0.002745   Max.   :0.3098   Max.   :0.010503   Max.   :65.908  
##      count      
##  Min.   :117.0  
##  1st Qu.:127.0  
##  Median :163.0  
##  Mean   :174.9  
##  3rd Qu.:190.0  
##  Max.   :316.0  
## 
## mining info:
##  data ntransactions support confidence
##    tr        115111   0.001        0.2
##                                                                         call
##  apriori(data = tr, parameter = list(supp = 0.001, conf = 0.2, maxlen = 10))
  inspect(reglas.asociacion)
##      lhs                  rhs         support     confidence coverage   
## [1]  {FANTA}           => {COCA COLA} 0.001051159 0.2439516  0.004308884
## [2]  {SALVO}           => {FABULOSO}  0.001103283 0.3097561  0.003561779
## [3]  {FABULOSO}        => {SALVO}     0.001103283 0.2347505  0.004699811
## [4]  {COCA COLA ZERO}  => {COCA COLA} 0.001416025 0.2969035  0.004769310
## [5]  {SPRITE}          => {COCA COLA} 0.001346526 0.2069426  0.006506763
## [6]  {PINOL}           => {CLORALEX}  0.001016410 0.2363636  0.004300197
## [7]  {BLUE HOUSE}      => {BIMBO}     0.001711392 0.2720994  0.006289581
## [8]  {HELLMANN´S}      => {BIMBO}     0.001537646 0.2649701  0.005803094
## [9]  {REYMA}           => {CONVERMEX} 0.002093631 0.2441743  0.008574333
## [10] {FUD}             => {BIMBO}     0.001589770 0.2183771  0.007279930
## [11] {COCA COLA LIGHT} => {COCA COLA} 0.002745176 0.2613730  0.010502906
##      lift      count
## [1]   1.561906 121  
## [2]  65.908196 127  
## [3]  65.908196 127  
## [4]   1.900932 163  
## [5]   1.324955 155  
## [6]  25.030409 117  
## [7]   4.078870 197  
## [8]   3.971997 177  
## [9]  18.564824 241  
## [10]  3.273552 183  
## [11]  1.673447 316
  reglas.asociacion<- sort(reglas.asociacion, by= 'confidence', decreasing = TRUE)
  summary(reglas.asociacion)
## set of 11 rules
## 
## rule length distribution (lhs + rhs):sizes
##  2 
## 11 
## 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       2       2       2       2       2       2 
## 
## summary of quality measures:
##     support           confidence        coverage             lift       
##  Min.   :0.001016   Min.   :0.2069   Min.   :0.003562   Min.   : 1.325  
##  1st Qu.:0.001103   1st Qu.:0.2356   1st Qu.:0.004504   1st Qu.: 1.787  
##  Median :0.001416   Median :0.2442   Median :0.005803   Median : 3.972  
##  Mean   :0.001519   Mean   :0.2536   Mean   :0.006054   Mean   :17.563  
##  3rd Qu.:0.001651   3rd Qu.:0.2685   3rd Qu.:0.006893   3rd Qu.:21.798  
##  Max.   :0.002745   Max.   :0.3098   Max.   :0.010503   Max.   :65.908  
##      count      
##  Min.   :117.0  
##  1st Qu.:127.0  
##  Median :163.0  
##  Mean   :174.9  
##  3rd Qu.:190.0  
##  Max.   :316.0  
## 
## mining info:
##  data ntransactions support confidence
##    tr        115111   0.001        0.2
##                                                                         call
##  apriori(data = tr, parameter = list(supp = 0.001, conf = 0.2, maxlen = 10))
  inspect(reglas.asociacion)
##      lhs                  rhs         support     confidence coverage   
## [1]  {SALVO}           => {FABULOSO}  0.001103283 0.3097561  0.003561779
## [2]  {COCA COLA ZERO}  => {COCA COLA} 0.001416025 0.2969035  0.004769310
## [3]  {BLUE HOUSE}      => {BIMBO}     0.001711392 0.2720994  0.006289581
## [4]  {HELLMANN´S}      => {BIMBO}     0.001537646 0.2649701  0.005803094
## [5]  {COCA COLA LIGHT} => {COCA COLA} 0.002745176 0.2613730  0.010502906
## [6]  {REYMA}           => {CONVERMEX} 0.002093631 0.2441743  0.008574333
## [7]  {FANTA}           => {COCA COLA} 0.001051159 0.2439516  0.004308884
## [8]  {PINOL}           => {CLORALEX}  0.001016410 0.2363636  0.004300197
## [9]  {FABULOSO}        => {SALVO}     0.001103283 0.2347505  0.004699811
## [10] {FUD}             => {BIMBO}     0.001589770 0.2183771  0.007279930
## [11] {SPRITE}          => {COCA COLA} 0.001346526 0.2069426  0.006506763
##      lift      count
## [1]  65.908196 127  
## [2]   1.900932 163  
## [3]   4.078870 197  
## [4]   3.971997 177  
## [5]   1.673447 316  
## [6]  18.564824 241  
## [7]   1.561906 121  
## [8]  25.030409 117  
## [9]  65.908196 127  
## [10]  3.273552 183  
## [11]  1.324955 155
      top10reglas<- head(reglas.asociacion, n = 10, by = "confidence")
    plot(top10reglas, method = "graph", engine = "htmlwidget")

Conclusiones

Market Basket Analysis es una técnica de data mining utilizada por retailers para incrementar sus ventas a través de la comprensión y análisis de los patrones de compra del consumidor. En el ejercicio anterior se muestra la correlación entre ciertos productos y los productos con mayores ventas de acuerdo a las preferencias del cliente final. En resumen, se muestra que el giro de tienda con mayor rotación es “Abarrotes” y las marcas Nutri Leche y Dan Up son aquellas que más venden. Esto es un claro ejemplo de la implementación de este análisis y cómo le permite al vendedor conocer los patrones de compra de cada individuo dependiendo del segmento, estado o categoría de compra. A lo largo de este análisis se reafirman los productos con mayores ventas, ubicaciones de venta, hora de compra, entre otros.

Entre los factores destacados, se muestra que el Departamento con mayor movimiento es Abarrotes en la ubicación de esquina y sus ventas principales son a la hora de cierre de 22:00 horas.

LS0tDQp0aXRsZTogPHNwYW4gc3R5bGU9IkNvbG9yOkdyZWVuIj4gIkFiYXJyb3RlcyINCmF1dGhvcjogIkppbWVuYSBNaWd1ZWwgLSBBMDEzNjU4MTkiDQpkYXRlOiAiMjAyMi0wOS0wNyINCm91dHB1dDoNCiAgaHRtbF9kb2N1bWVudDoNCiAgICB0b2M6IHRydWUNCiAgICB0b2NfZmxvYXQ6IHRydWUNCiAgICBjb2RlX2Rvd25sb2FkOiB0cnVlDQotLS0NCg0KPGRpdj4NCjxwIHN0eWxlID0gJ3RleHQtYWxpZ246Y2VudGVyOyc+DQo8aW1nIHNyYz0iaHR0cHM6Ly93d3cuam9ybmFkYS5jb20ubXgvdWx0aW1hcy8yMDIwLzA4LzI0L2NyZWNlbi0xMC0yLXZlbnRhcy1kZS1hYmFycm90ZXMtZW4tanVsaW8tcGFzYWRvLTUzMDAuaHRtbC9hYmFycm90ZXMuanBnLTgzMDAuaHRtbC9pbWFnZV9sYXJnZSIgYWx0PSJKdXZlWWVsbCIgd2lkdGg9IjMwMHB4Ij4NCjwvcD4NCjwvZGl2Pg0KDQojIyBPYnNlcnZhY2lvbmVzDQoNCmBgYHtyfQ0KI0EgbGEgYmFzZSBkZSBkYXRvcyBzZSBsZSBoaWNpZXJvbiBsb3Mgc2lndWllbnRlcyBjYW1iaW9zOg0KI1NlIGNhbWJpw7MgZWwgZm9ybWF0byBhIGZlY2hhIGNvcnRhDQojU2UgZHVwbGljYXJvbiBsb3MgcHJpbWVyb3MgNSByZWdpc3Ryb3MNCiNTZSBjYW1iacOzIGVsIGZvcm1hdG8gYSBIb3JhIChFc3Bhw7FvbCBNw6l4aWNvKQ0KI1NlIGNhbWJpw7MgZWwgZm9ybWF0byBhIEPDs2RpZ28gZGUgQmFycmFzIChwYXJhIHF1ZSBzYWxnYSBjb21wbGV0bykNCiNTZSBndWFyZMOzIENTViBVVEYtOCAoRGVsaW1pdGFkbyBwb3IgY29tYXMpDQpgYGANCg0KIyMgSW1wb3J0YXIgbGEgYmFzZSBkZSBkYXRvcw0KYGBge3J9DQpmaWxlLmNob29zZSgpDQpiZDwtcmVhZC5jc3YoIkM6XFxVc2Vyc1xcTWlndWVcXE9uZURyaXZlXFxEb2N1bWVudG9zXFxSXFxQcmltZXIgYmxvcXVlXFxhYmFycm90ZXNcXGFiYXJyb3Rlc192ZW50YXMtMl8wNy5jc3YiKQ0KYGBgDQoNCiMjIEVudGVuZGVyIGxhIGJhc2UgZGUgZGF0b3MNCmBgYHtyfQ0Kc3VtbWFyeShiZCkNCg0KI2luc3RhbGwucGFja2FnZXMoImRwbHlyIikNCmxpYnJhcnkoZHBseXIpDQoNCiNjb3VudChiZCwgdmNDbGF2ZVRpZW5kYSwgc29ydCA9IFRSVUUpDQojY291bnQoYmQsIERlc2NHaXJvLCBzb3J0ID0gVFJVRSkNCiNjb3VudChiZCwgTWFyY2EsIHNvcnQgPSBUUlVFKQ0KI2NvdW50KGJkLCBGYWJyaWNhbnRlLCBzb3J0ID0gVFJVRSkNCiNjb3VudChiZCwgUHJvZHVjdG8sIHNvcnQgPSBUUlVFKQ0KI2NvdW50KGJkLCBOb21icmVEZXBhcnRhbWVudG8sIHNvcnQgPSBUUlVFKQ0KI2NvdW50KGJkLCBOb21icmVGYW1pbGlhLCBzb3J0ID0gVFJVRSkNCiNjb3VudChiZCwgTm9tYnJlQ2F0ZWdvcmlhLCBzb3J0ID0gVFJVRSkNCiNjb3VudChiZCwgRXN0YWRvLCBzb3J0ID0gVFJVRSkNCiNjb3VudChiZCwgTXRzLjIsIHNvcnQgPSBUUlVFKQ0KI2NvdW50KGJkLCBUaXBvLnViaWNhY2k/biwgc29ydCA9IFRSVUUpDQojY291bnQoYmQsIEdpcm8sIHNvcnQgPSBUUlVFKQ0KI2NvdW50KGJkLCBIb3JhLmluaWNpbywgc29ydCA9IFRSVUUpDQojY291bnQoYmQsIEhvcmEuY2llcnJlLCBzb3J0ID0gVFJVRSkNCg0KI2luc3RhbGwucGFja2FnZXMoInRpZHl2ZXJzZSIpDQpsaWJyYXJ5KHRpZHl2ZXJzZSkNCg0KdGliYmxlKGJkKQ0KDQpzdHIoYmQpDQoNCmhlYWQoYmQpDQpoZWFkKGJkLCBuPTcpDQoNCnRhaWwoYmQpDQoNCiNpbnN0YWxsLnBhY2thZ2VzKCJqYW5pdG9yIikNCmxpYnJhcnkoamFuaXRvcikNCg0KdGFieWwoYmQsIHZjQ2xhdmVUaWVuZGEsIE5vbWJyZURlcGFydGFtZW50bykNCg0KIyMjIyBPYnNlcnZhY2lvbmVzDQojIDEuIENhc2kgbmluZ8O6biByZWdpc3RybyBjdWVudGEgY29uIFBMVQ0KIyAyLiBDYW1iaWFyIGZvcm1hdG8gZGUgZmVjaGENCiMgMy4gQ2FtYmlhciBmb3JtYXRvIGRlIGhvcmENCiMgNC4gSGF5IHByZWNpb3MgbmVnYXRpdm9zDQojIDUuIFVuaWRhZGVzIG1lbm9yZXMgYSAxDQpgYGANCg0KIyMjIExhIEhlcnJhbWllbnRhICJFbCBHZW5lcmFkb3IgZGUgVmFsb3IgZGUgRGF0b3MiDQoNCiMjIyMgUGFzbyAxLiBEZWZpbmlyIGVsIMOhcmVhIGRlbCBuZWdvY2lvIHF1ZSBidXNjYW1vcyBpbXBhY3RhciBvIG1lam9yYXIgeSBzdSBLUEkuDQpJbXB1bHNhciBsYXMgdmVudGFzIGRlbCBnaXJvIGRlIENhcm5pY2Vyw61hLg0KDQojIyMjIFBhc28gMi4gU2VsZWNjaW9uYXIgcGxhbnRpbGxhKHMpIHBhcmEgY3JlYXIgdmFsb3IgYSBwYXJ0aXIgZGUgbG9zIGRhdG9zIGRlIGxvcyBjbGllbnRlcy4gVmlzacOzbiB8IFNlZ21lbnRhY2nDs24gfCBQZXJzb25hbGl6YWNpw7NuIHwgQ29udGV4dHVhbGl6YWNpw7NuDQpTZWdtZW50YWNpw7NuLg0KDQojIyMjIFBhc28gMy4gR2VuZXJhciBpZGVhcyBvIGNvbmNlcHRvcyBlc3BlY8OtZmljb3MuDQpQcm9tb3ZlciBsYSB2ZW50YSBjcnV6YWRhIGRlIGVzdGUgZ2lybyBjb24gcHJvZHVjdG9zIGRlIEFiYXJyb3RlcywgZGViaWRvIGEgcXVlIGVzIGVsIGdpcm8gY29uIG1heW9yIHJvdGFjacOzbiBlbiBsYSB0aWVuZGEuDQoNCiMjIyMgUGFzbyA0LiBSZXVuaXIgbG9zIGRhdG9zIHJlcXVlcmlkb3MuDQpIYWNlciB1biBhbsOhbGlzaXMgZGUgZGF0b3MgcGFyYSBzYWJlciBjdcOhbGVzIHNvbiBsb3MgcHJvZHVjdG9zIGNvbiBtYXlvcmVzIHZlbnRhcyBkZWwgZ2lybyBkZSBBYmFycm90ZXMgeSB2ZXIgY8OzbW8gcHJvbW92ZXIgdW5hIHZlbnRhIGNydXphZGEgY29uIGFsZ8O6biBwcm9kdWN0byBkZSBDYXJuaWNlcsOtYS4NCg0KIyMjIyBQYXNvIDUuIFBsYW4gZGUgZWplY3VjacOzbi4NCk9mcmVjZXIgdW5hIHByb21vY2nDs24gZGUgIkNvbXByYSBlbCBKYW3Ds24gRGlhbWFudGUgeSBwb3IgJDIwIHBlc29zIG3DoXMgbGzDqXZhdGUgbGFzIE1lZGlhcyBOb2NoZXMgQmltYm8iLiBEZSBlc3RhIGZvcm1hIHNlIHByb211ZXZlIHVuIHByb2R1Y3RvIGNvbiBtZW5vciB2ZW50YSBhIHRyYXbDqXMgZGUgbGEgY29tcHJhIGRlIHVuIHByb2R1Y3RvIGNvbiBtdWNobyBtb3ZpbWllbnRvLCBlbiBlc3RlIGNhc28sIGxhcyBNZWRpYXMgTm9jaGVzIEJpbWJvLg0KDQoNCg0KIyMgVMOpY25pY2FzIHBhcmEgbGltcGllemEgZGUgZGF0b3MNCg0KIyMjIFTDqWNuaWNhIDEuIFJlbW92ZXIgdmFsb3JlcyBpcnJlbGV2YW50ZXMNCmBgYHtyfQ0KIyMjIyBFbGltaW5hciBjb2x1bW5hcw0KYmQxPC0gYmQNCmJkMTwtIHN1YnNldCAoYmQxLCBzZWxlY3QgPSAtYyAoUExVLCBDb2RpZ28uQmFycmFzKSkNCg0KIyMjIyBFbGltaW5hciByZW5nbG9uZXMNCmJkMjwtIGJkMQ0KYmQyPC0gYmQyW2JkMiRQcmVjaW8gPiAwLCBdDQpzdW1tYXJ5KGJkMSkNCnN1bW1hcnkoYmQyKQ0KIyMjIyBFc3RvIG5vIGxvIHVzYXJlbW9zLCBwb25kcmVtb3MgcHJlY2lvcyBuZWdhdGl2b3MgY29tbyBhYnNvbHV0bw0KYGBgDQoNCiMjIyBUw6ljbmljYSAyLiBSZW1vdmVyIHZhbG9yZXMgZHVwbGljYWRvcw0KYGBge3J9DQojIyMjIMK/Q3XDoW50b3MgcmVuZ2xvbmVzIGR1cGxpY2Fkb3MgdGVuZW1vcz8NCmJkMVtkdXBsaWNhdGVkKGJkMSkgLF0NCnN1bShkdXBsaWNhdGVkKGJkMSkpDQoNCiMjIyMgRWxpbWluYXIgcmVuZ2xvbmVzIGR1cGxpY2Fkb3MNCmJkMzwtIGJkMQ0KbGlicmFyeShkcGx5cikNCmJkMzwtZGlzdGluY3QoYmQzKQ0KYGBgDQoNCiMjIyBUw6ljbmljYSAzLiBFcnJvcmVzIHRpcG9ncsOhZmljb3MgeSBlcnJvcmVzIHNpbWlsYXJlcw0KYGBge3J9DQojIyMjIFByZWNpb3MgZW4gYWJzb2x1dG8NCmJkNCA8LSBiZDMNCmJkNCRQcmVjaW8gPC0gYWJzKGJkNCRQcmVjaW8pDQpzdW1tYXJ5KGJkNCkNCg0KIyMjIyBDYW50aWRhZGVzIGVuIGVudGVyb3MNCmJkNTwtYmQ0DQpiZDUkVW5pZGFkZXMgPC0gY2VpbGluZyhiZDUkVW5pZGFkZXMpDQpzdW1tYXJ5KGJkNSkNCmBgYA0KDQojIyMgVMOpY25pY2EgNC4gQ29udmVydGlyIHRpcG9zIGRlIGRhdG9zDQpgYGB7cn0NCiMjIyMgQ29udmVydGlyIGRlIGNhcmFjdGVyIGEgZmVjaGENCmJkNjwtYmQ1DQpiZDYkRmVjaGE8LWFzLkRhdGUoYmQ2JEZlY2hhLCBmb3JtYXQ9ICIlZCVtJVkiKQ0KdGliYmxlKGJkNikNCg0KIyMjIyBDb252ZXJ0aXIgZGUgY2FyYWN0ZXIgYSBlbnRlcm8NCmJkNzwtYmQ2DQpiZDckSG9yYTwtIHN1YnN0cihiZDckSG9yYSwgc3RhcnQ9IDEsIHN0b3A9IDIpDQp0aWJibGUoYmQ3KQ0KYmQ3JEhvcmE8LWFzLmludGVnZXIoYmQ3JEhvcmEpDQpzdHIoYmQ3KQ0KYGBgDQoNCiMjIyBUw6ljbmljYSA1LiBWYWxvcmVzIGZhbHRhbnRlcw0KYGBge3J9DQojIyMjIMK/Q3XDoW50b3MgTkEgdGVuZ28gZW4gbGEgYmFzZSBkZSBkYXRvcz8NCnN1bShpcy5uYShiZDcpKQ0Kc3VtKGlzLm5hKGJkKSkNCg0KIyMjIyDCv0N1w6FudG9zIE5BIHRlbmdvIHBvciB2YXJpYWJsZT8NCnNhcHBseShiZDcsIGZ1bmN0aW9uKHgpIHN1bShpcy5uYSh4KSkpDQpzYXBwbHkoYmQsIGZ1bmN0aW9uKHgpIHN1bShpcy5uYSh4KSkpDQoNCj9zYXBwbHkNCg0KIyMjIyBCb3JyYXIgdG9kb3MgbG9zIHJlZ2lzdHJvcyBOQSBkZSB1bmEgdGFibGENCmJkODwtYmQ3DQpiZDg8LW5hLm9taXQoYmQ4KQ0Kc3VtbWFyeShiZDgpDQoNCiMjIyMgUmVlbXBsYXphciBOQSBjb24gQ0VST1MNCmJkOTwtYmQ4DQpiZDlbaXMubmEoYmQ5KV08LTANCnN1bW1hcnkoYmQ5KQ0KDQojIyMjIFJlZW1wbGF6YXIgTkEgY29uIGVsIFByb21lZGlvDQpiZDEwPC1iZDkNCmJkMTAkUExVW2lzLm5hKGJkMTAkUExVKV08LW1lYW4oYmQxMCRQTFUsIG5hLnJtPSBUUlVFKQ0Kc3VtbWFyeShiZDEwKQ0KDQojIyMjIFJlZW1wbGF6YXIgbmVnYXRpdm9zIGNvbiBjZXJvDQpiZDExPC1iZDEwDQpiZDExW2JkMTE8LTBdDQpzdW1tYXJ5KGJkMTEpDQpgYGANCg0KIyMjIFTDqWNuaWNhIDYuIE3DqXRvZG8gZXN0YWTDrXN0aWNvDQpgYGB7cn0NCmJkMTI8LWJkNw0KYm94cGxvdChiZDEyJFByZWNpbywgaG9yaXpvbnRhbD0gVFJVRSkNCmJveHBsb3QoYmQxMiRVbmlkYWRlcywgaG9yaXpvbnRhbD0gVFJVRSkNCg0KIyMjIyBBZ3JlZ2FyIGNvbHVtbmFzDQoNCiNpbnN0YWxsLnBhY2thZ2VzKCJsdWJyaWRhdGUiKQ0KbGlicmFyeShsdWJyaWRhdGUpDQpiZDEyJERpYV9kZV9sYV9zZW1hbmE8LSB3ZGF5KGJkMTIkRmVjaGEpDQpzdW1tYXJ5KGJkMTIpDQoNCmJkMTIkU3VidG90YWw8LWJkMTIkUHJlY2lvICogYmQxMiRVbmlkYWRlcw0Kc3VtbWFyeShiZDEyKQ0KDQpiZDEyJFV0aWxpZGFkPC1iZDEyJFByZWNpbyAtIGJkMTIkVWx0LkNvc3RvDQpzdW1tYXJ5KGJkMTIpDQoNCiMjIyMgRXhwb3J0YXIgYmFzZSBkZSBkYXRvcyBsaW1waWENCmJkX2xpbXBpYTwtYmQxMg0Kd3JpdGUuY3N2KGJkX2xpbXBpYSwgZmlsZT0iYWJhcnJvdGVzX2JkX2xpbXBpYS5jc3YiLCByb3cubmFtZXM9IEZBTFNFKQ0KYGBgDQoNCiMjIyBNYXJrZXQgQmFza2V0IEFuYWx5c2lzDQpgYGB7cn0NCiNpbnN0YWxsLnBhY2thZ2VzKCJwbHlyIikNCiNpbnN0YWxsLnBhY2thZ2VzKCJNYXRyaXgiKQ0KI2luc3RhbGwucGFja2FnZXMoImFydWxlcyIpDQojaW5zdGFsbC5wYWNrYWdlcygiYXJ1bGVzVml6IikNCiNpbnN0YWxsLnBhY2thZ2VzKCJkYXRhc2V0cyIpDQpsaWJyYXJ5KE1hdHJpeCkNCmxpYnJhcnkoYXJ1bGVzKQ0KbGlicmFyeShhcnVsZXNWaXopDQpsaWJyYXJ5KGRhdGFzZXRzKQ0KDQojIyMjIE9yZGVuYXIgZGUgbWVub3IgYSBtYXlvciBsb3MgdGlja2V0cw0KYmRfbGltcGlhPC0gYmRfbGltcGlhW29yZGVyKGJkX2xpbXBpYSRGLlRpY2tldCksXQ0KaGVhZChiZF9saW1waWEpDQp0YWlsKGJkX2xpbXBpYSkNCmBgYA0KDQojIyMgTWFya2V0IEJhc2tldCBBbmFseXNpczogQ8OzbW8gaGFjZXJsbw0KYGBge3J9DQojIyMjIEdlbmVyYXIgYmFza2V0DQojaW5zdGFsbC5wYWNrYWdlcygicGx5ciIpDQpsaWJyYXJ5KHBseXIpDQpiYXNrZXQ8LSBkZHBseShiZF9saW1waWEsYygiRi5UaWNrZXQiKSwgZnVuY3Rpb24oYmRfbGltcGlhKXBhc3RlKGJkX2xpbXBpYSRNYXJjYSwgY29sbGFwc2UgPSAiLCIpKQ0KICAgIA0KICAjIyMjIEVsaW1pbmFyIG7Dum1lcm8gZGUgdGlja2V0DQogICAgYmFza2V0JEYuVGlja2V0PC0gTlVMTA0KICAgIA0KICAjIyMjIFJlbm9tYnJhbW9zIGVsIG5vbWJyZSBkZSBsYSBjb2x1bW5hDQogICAgY29sbmFtZXMoYmFza2V0KTwtIGMoIk1hcmNhIikNCiAgICANCiAgIyMjIyBFeHBvcnRhciBiYXNrZXQNCiAgd3JpdGUuY3N2KGJhc2tldCwiYmFza2V0LmNzdiIsIHF1b3RlID0gRkFMU0UsIHJvdy5uYW1lcyA9IEZBTFNFKQ0KICANCiMjIyMgSW1wb3J0YXIgdHJhbnNhY2Npb25lcw0KICBmaWxlLmNob29zZSgpDQogIHRyPC0gcmVhZC50cmFuc2FjdGlvbnMoIkM6XFxVc2Vyc1xcTWlndWVcXE9uZURyaXZlXFxEb2N1bWVudG9zXFxSXFxQcmltZXIgYmxvcXVlXFxqaW5nbGVfYmVsbHNcXGJhc2tldC5jc3YiLCBmb3JtYXQ9ICJiYXNrZXQiLCBzZXA9IiwiKQ0KICANCiAgcmVnbGFzLmFzb2NpYWNpb248LSBhcHJpb3JpKHRyLCBwYXJhbWV0ZXIgPSBsaXN0KHN1cHA9MC4wMDEsIGNvbmY9IDAuMiwgbWF4bGVuPTEwKSkNCiAgc3VtbWFyeShyZWdsYXMuYXNvY2lhY2lvbikNCiAgaW5zcGVjdChyZWdsYXMuYXNvY2lhY2lvbikNCg0KICByZWdsYXMuYXNvY2lhY2lvbjwtIHNvcnQocmVnbGFzLmFzb2NpYWNpb24sIGJ5PSAnY29uZmlkZW5jZScsIGRlY3JlYXNpbmcgPSBUUlVFKQ0KICBzdW1tYXJ5KHJlZ2xhcy5hc29jaWFjaW9uKQ0KICBpbnNwZWN0KHJlZ2xhcy5hc29jaWFjaW9uKQ0KDQogICAgICB0b3AxMHJlZ2xhczwtIGhlYWQocmVnbGFzLmFzb2NpYWNpb24sIG4gPSAxMCwgYnkgPSAiY29uZmlkZW5jZSIpDQogICAgcGxvdCh0b3AxMHJlZ2xhcywgbWV0aG9kID0gImdyYXBoIiwgZW5naW5lID0gImh0bWx3aWRnZXQiKQ0KYGBgDQoNCiMjIENvbmNsdXNpb25lcw0KX19NYXJrZXQgQmFza2V0IEFuYWx5c2lzX18gZXMgdW5hIHTDqWNuaWNhIGRlIF9kYXRhIG1pbmluZ18gdXRpbGl6YWRhIHBvciBfcmV0YWlsZXJzXyBwYXJhIGluY3JlbWVudGFyIHN1cyB2ZW50YXMgYSB0cmF2w6lzIGRlIGxhIGNvbXByZW5zacOzbiB5IGFuw6FsaXNpcyBkZSBsb3MgcGF0cm9uZXMgZGUgY29tcHJhIGRlbCBjb25zdW1pZG9yLiBFbiBlbCBlamVyY2ljaW8gYW50ZXJpb3Igc2UgbXVlc3RyYSBsYSBjb3JyZWxhY2nDs24gZW50cmUgY2llcnRvcyBwcm9kdWN0b3MgeSBsb3MgcHJvZHVjdG9zIGNvbiBtYXlvcmVzIHZlbnRhcyBkZSBhY3VlcmRvIGEgbGFzIHByZWZlcmVuY2lhcyBkZWwgY2xpZW50ZSBmaW5hbC4gRW4gcmVzdW1lbiwgc2UgbXVlc3RyYSBxdWUgZWwgZ2lybyBkZSB0aWVuZGEgY29uIG1heW9yIHJvdGFjacOzbiBlcyAiQWJhcnJvdGVzIiB5IGxhcyBtYXJjYXMgTnV0cmkgTGVjaGUgeSBEYW4gVXAgc29uIGFxdWVsbGFzIHF1ZSBtw6FzIHZlbmRlbi4gRXN0byBlcyB1biBjbGFybyBlamVtcGxvIGRlIGxhIGltcGxlbWVudGFjacOzbiBkZSBlc3RlIGFuw6FsaXNpcyB5IGPDs21vIGxlIHBlcm1pdGUgYWwgdmVuZGVkb3IgY29ub2NlciBsb3MgcGF0cm9uZXMgZGUgY29tcHJhIGRlIGNhZGEgaW5kaXZpZHVvIGRlcGVuZGllbmRvIGRlbCBzZWdtZW50bywgZXN0YWRvIG8gY2F0ZWdvcsOtYSBkZSBjb21wcmEuIEEgbG8gbGFyZ28gZGUgZXN0ZSBhbsOhbGlzaXMgc2UgcmVhZmlybWFuIGxvcyBwcm9kdWN0b3MgY29uIG1heW9yZXMgdmVudGFzLCB1YmljYWNpb25lcyBkZSB2ZW50YSwgaG9yYSBkZSBjb21wcmEsIGVudHJlIG90cm9zLg0KDQpFbnRyZSBsb3MgZmFjdG9yZXMgZGVzdGFjYWRvcywgc2UgbXVlc3RyYSBxdWUgZWwgRGVwYXJ0YW1lbnRvIGNvbiBtYXlvciBtb3ZpbWllbnRvIGVzIEFiYXJyb3RlcyBlbiBsYSB1YmljYWNpw7NuIGRlIGVzcXVpbmEgeSBzdXMgdmVudGFzIHByaW5jaXBhbGVzIHNvbiBhIGxhIGhvcmEgZGUgY2llcnJlIGRlIDIyOjAwIGhvcmFzLg==