library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.0     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.1     ✔ tibble    3.1.8
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.1     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors
library(dplyr)
library(ggplot2)
library(readxl)

Importar la base de datos

Abarrotes_Ventas_2 <- read_excel("Downloads/Abarrotes_Ventas-2.xlsx")

#View(Abarrotes_Ventas_2)
#OTRA FORMA DE BUSCAR LA BASE DE DATOS
#file.choose()

Entender la base de datos

str(Abarrotes_Ventas_2)
## tibble [200,620 × 22] (S3: tbl_df/tbl/data.frame)
##  $ vcClaveTienda     : chr [1:200620] "MX001" "MX001" "MX001" "MX001" ...
##  $ DescGiro          : chr [1:200620] "Abarrotes" "Abarrotes" "Abarrotes" "Abarrotes" ...
##  $ Codigo Barras     : num [1:200620] 7.5e+12 7.5e+12 7.5e+12 7.5e+12 7.5e+12 ...
##  $ PLU               : logi [1:200620] NA NA NA NA NA NA ...
##  $ Fecha             : POSIXct[1:200620], format: "2020-06-19 08:16:20" "2020-06-19 08:23:32" ...
##  $ Hora              : POSIXct[1:200620], format: "1899-12-31 08:16:21" "1899-12-31 08:23:33" ...
##  $ Marca             : chr [1:200620] "NUTRI LECHE" "DAN UP" "BIMBO" "PEPSI" ...
##  $ Fabricante        : chr [1:200620] "MEXILAC" "DANONE DE MEXICO" "GRUPO BIMBO" "PEPSI-COLA MEXICANA" ...
##  $ Producto          : chr [1:200620] "Nutri Leche 1 Litro" "DANUP STRAWBERRY P/BEBER 350GR NAL" "Rebanadas Bimbo 2Pz" "Pepsi N.R. 400Ml" ...
##  $ Precio            : num [1:200620] 16 14 5 8 19.5 9.5 11 9.5 23.5 12 ...
##  $ Ult.Costo         : num [1:200620] 12.3 14 5 8 15 ...
##  $ Unidades          : num [1:200620] 1 1 1 1 1 1 1 1 1 1 ...
##  $ F.Ticket          : num [1:200620] 1 2 3 3 4 4 4 4 4 5 ...
##  $ NombreDepartamento: chr [1:200620] "Abarrotes" "Abarrotes" "Abarrotes" "Abarrotes" ...
##  $ NombreFamilia     : chr [1:200620] "Lacteos y Refrigerados" "Lacteos y Refrigerados" "Pan y Tortilla" "Bebidas" ...
##  $ NombreCategoria   : chr [1:200620] "Leche" "Yogurt" "Pan Dulce Empaquetado" "Refrescos Plástico (N.R.)" ...
##  $ Estado            : chr [1:200620] "Nuevo León" "Nuevo León" "Nuevo León" "Nuevo León" ...
##  $ Mts 2             : num [1:200620] 60 60 60 60 60 60 60 60 60 60 ...
##  $ Tipo ubicación    : chr [1:200620] "Esquina" "Esquina" "Esquina" "Esquina" ...
##  $ Giro              : chr [1:200620] "Abarrotes" "Abarrotes" "Abarrotes" "Abarrotes" ...
##  $ Hora inicio       : POSIXct[1:200620], format: "1899-12-31 08:00:00" "1899-12-31 08:00:00" ...
##  $ Hora cierre       : POSIXct[1:200620], format: "1899-12-31 22:00:00" "1899-12-31 22:00:00" ...
summary(Abarrotes_Ventas_2)
##  vcClaveTienda        DescGiro         Codigo Barras         PLU         
##  Length:200620      Length:200620      Min.   :8.347e+05   Mode:logical  
##  Class :character   Class :character   1st Qu.:7.501e+12   TRUE:1437     
##  Mode  :character   Mode  :character   Median :7.501e+12   NA's:199183   
##                                        Mean   :5.950e+12                 
##                                        3rd Qu.:7.501e+12                 
##                                        Max.   :1.750e+13                 
##      Fecha                             Hora                       
##  Min.   :2020-05-01 00:00:31.08   Min.   :1899-12-31 00:00:00.00  
##  1st Qu.:2020-06-06 13:24:49.08   1st Qu.:1899-12-31 13:12:42.75  
##  Median :2020-07-11 14:10:21.46   Median :1899-12-31 17:35:59.00  
##  Mean   :2020-07-19 15:19:40.65   Mean   :1899-12-31 16:43:52.05  
##  3rd Qu.:2020-08-29 22:07:47.33   3rd Qu.:1899-12-31 20:47:06.00  
##  Max.   :2020-11-11 23:53:47.73   Max.   :1899-12-31 23:59:59.00  
##     Marca            Fabricante          Producto             Precio       
##  Length:200620      Length:200620      Length:200620      Min.   :-147.00  
##  Class :character   Class :character   Class :character   1st Qu.:  11.00  
##  Mode  :character   Mode  :character   Mode  :character   Median :  16.00  
##                                                           Mean   :  19.42  
##                                                           3rd Qu.:  25.00  
##                                                           Max.   :1000.00  
##    Ult.Costo         Unidades         F.Ticket      NombreDepartamento
##  Min.   :  0.38   Min.   : 0.200   Min.   :     1   Length:200620     
##  1st Qu.:  8.46   1st Qu.: 1.000   1st Qu.: 33967   Class :character  
##  Median : 12.31   Median : 1.000   Median :105996   Mode  :character  
##  Mean   : 15.31   Mean   : 1.262   Mean   :193994                     
##  3rd Qu.: 19.23   3rd Qu.: 1.000   3rd Qu.:383008                     
##  Max.   :769.23   Max.   :96.000   Max.   :450040                     
##  NombreFamilia      NombreCategoria       Estado              Mts 2     
##  Length:200620      Length:200620      Length:200620      Min.   :47.0  
##  Class :character   Class :character   Class :character   1st Qu.:53.0  
##  Mode  :character   Mode  :character   Mode  :character   Median :60.0  
##                                                           Mean   :56.6  
##                                                           3rd Qu.:60.0  
##                                                           Max.   :62.0  
##  Tipo ubicación         Giro            Hora inicio                    
##  Length:200620      Length:200620      Min.   :1899-12-31 07:00:00.00  
##  Class :character   Class :character   1st Qu.:1899-12-31 07:00:00.00  
##  Mode  :character   Mode  :character   Median :1899-12-31 08:00:00.00  
##                                        Mean   :1899-12-31 07:35:49.71  
##                                        3rd Qu.:1899-12-31 08:00:00.00  
##                                        Max.   :1899-12-31 09:00:00.00  
##   Hora cierre                    
##  Min.   :1899-12-31 21:00:00.00  
##  1st Qu.:1899-12-31 22:00:00.00  
##  Median :1899-12-31 22:00:00.00  
##  Mean   :1899-12-31 22:23:11.42  
##  3rd Qu.:1899-12-31 23:00:00.00  
##  Max.   :1899-12-31 23:00:00.00
#OBSERVACIONES:
#1.-La variable PLU tiene 199183 NA's
#2.-La variable Fecha está como caracter
#3.-La variable Hora esta como caracter
#4.-La variable Precio tiene negativos
#5.-La variable Unidades tiene decimales. 

Entender la base de datos

count(Abarrotes_Ventas_2, vcClaveTienda, sort = TRUE)
## # A tibble: 5 × 2
##   vcClaveTienda     n
##   <chr>         <int>
## 1 MX001         96464
## 2 MX004         83455
## 3 MX005         10021
## 4 MX002          6629
## 5 MX003          4051
count(Abarrotes_Ventas_2, DescGiro, sort = TRUE)
## # A tibble: 3 × 2
##   DescGiro        n
##   <chr>       <int>
## 1 Abarrotes  100515
## 2 Carnicería  83455
## 3 Depósito    16650
count(Abarrotes_Ventas_2, Marca, sort = TRUE)
## # A tibble: 540 × 2
##    Marca           n
##    <chr>       <int>
##  1 COCA COLA   18686
##  2 PEPSI       15966
##  3 TECATE      11674
##  4 BIMBO        8316
##  5 LALA         5866
##  6 MARINELA     3696
##  7 DORITOS      3142
##  8 CHEETOS      3130
##  9 NUTRI LECHE  3127
## 10 MARLBORO     2579
## # … with 530 more rows
count(Abarrotes_Ventas_2, Fabricante, sort = TRUE)
## # A tibble: 241 × 2
##    Fabricante                          n
##    <chr>                           <int>
##  1 COCA COLA                       27519
##  2 PEPSI-COLA MEXICANA             22415
##  3 SABRITAS                        14296
##  4 CERVECERIA CUAUHTEMOC MOCTEZUMA 13681
##  5 GRUPO BIMBO                     13077
##  6 SIGMA ALIMENTOS                  8014
##  7 GRUPO INDUSTRIAL LALA            5868
##  8 GRUPO GAMESA                     5527
##  9 NESTLE                           3698
## 10 JUGOS DEL VALLE S.A. DE C.V.     3581
## # … with 231 more rows
count(Abarrotes_Ventas_2, Producto, sort = TRUE)
## # A tibble: 3,404 × 2
##    Producto                        n
##    <chr>                       <int>
##  1 Pepsi N.R. 1.5L              5108
##  2 Coca Cola Retornable 2.5L    3771
##  3 Caguamon Tecate Light 1.2Lt  3471
##  4 Pepsi N. R. 2.5L             2899
##  5 Cerveza Tecate Light 340Ml   2619
##  6 Cerveza Tecate Light 16Oz    2315
##  7 Coca Cola Retornable 1.5L    2124
##  8 Pepsi N.R. 3L                1832
##  9 Coca Cola Retornable 500Ml   1659
## 10 PEPSI N.R. 1.5L              1631
## # … with 3,394 more rows
count(Abarrotes_Ventas_2, NombreDepartamento, sort = TRUE)
## # A tibble: 9 × 2
##   NombreDepartamento        n
##   <chr>                 <int>
## 1 Abarrotes            198274
## 2 Bebes e Infantiles     1483
## 3 Ferretería              377
## 4 Farmacia                255
## 5 Vinos y Licores         104
## 6 Papelería                74
## 7 Mercería                 44
## 8 Productos a Eliminar      8
## 9 Carnes                    1
count(Abarrotes_Ventas_2, NombreFamilia, sort = TRUE)
## # A tibble: 51 × 2
##    NombreFamilia              n
##    <chr>                  <int>
##  1 Bebidas                64917
##  2 Botanas                21583
##  3 Lacteos y Refrigerados 17657
##  4 Cerveza                14017
##  5 Pan y Tortilla         10501
##  6 Limpieza del Hogar      8723
##  7 Galletas                7487
##  8 Cigarros                6817
##  9 Cuidado Personal        5433
## 10 Salsas y Sazonadores    5320
## # … with 41 more rows
count(Abarrotes_Ventas_2, NombreCategoria, sort = TRUE)
## # A tibble: 174 × 2
##    NombreCategoria               n
##    <chr>                     <int>
##  1 Refrescos Plástico (N.R.) 32861
##  2 Refrescos Retornables     13880
##  3 Frituras                  11082
##  4 Lata                       8150
##  5 Leche                      7053
##  6 Cajetilla                  6329
##  7 Botella                    5867
##  8 Productos sin Categoria    5455
##  9 Papas Fritas               5344
## 10 Jugos y Néctares           5295
## # … with 164 more rows
count(Abarrotes_Ventas_2, Estado, sort = TRUE)
## # A tibble: 5 × 2
##   Estado           n
##   <chr>        <int>
## 1 Nuevo León   96464
## 2 Sinaloa      83455
## 3 Quintana Roo 10021
## 4 Jalisco       6629
## 5 Chiapas       4051
count(Abarrotes_Ventas_2, Giro, sort = TRUE)
## # A tibble: 2 × 2
##   Giro            n
##   <chr>       <int>
## 1 Abarrotes  183970
## 2 Mini súper  16650
tibble(Abarrotes_Ventas_2)
## # A tibble: 200,620 × 22
##    vcClave…¹ DescG…² Codig…³ PLU   Fecha               Hora                Marca
##    <chr>     <chr>     <dbl> <lgl> <dttm>              <dttm>              <chr>
##  1 MX001     Abarro… 7.50e12 NA    2020-06-19 08:16:20 1899-12-31 08:16:21 NUTR…
##  2 MX001     Abarro… 7.50e12 NA    2020-06-19 08:23:32 1899-12-31 08:23:33 DAN …
##  3 MX001     Abarro… 7.50e12 NA    2020-06-19 08:24:33 1899-12-31 08:24:33 BIMBO
##  4 MX001     Abarro… 7.50e12 NA    2020-06-19 08:24:33 1899-12-31 08:24:33 PEPSI
##  5 MX001     Abarro… 7.50e12 NA    2020-06-19 08:26:28 1899-12-31 08:26:28 BLAN…
##  6 MX001     Abarro… 7.50e12 NA    2020-06-19 08:26:28 1899-12-31 08:26:28 FLASH
##  7 MX001     Abarro… 7.50e12 NA    2020-06-19 08:26:28 1899-12-31 08:26:28 VARI…
##  8 MX001     Abarro… 7.50e12 NA    2020-06-19 08:26:28 1899-12-31 08:26:28 ZOTE 
##  9 MX001     Abarro… 7.51e12 NA    2020-06-19 08:26:28 1899-12-31 08:26:28 ALWA…
## 10 MX001     Abarro… 3.22e10 NA    2020-06-19 15:24:02 1899-12-31 15:24:02 JUMEX
## # … with 200,610 more rows, 15 more variables: Fabricante <chr>,
## #   Producto <chr>, Precio <dbl>, Ult.Costo <dbl>, Unidades <dbl>,
## #   F.Ticket <dbl>, NombreDepartamento <chr>, NombreFamilia <chr>,
## #   NombreCategoria <chr>, Estado <chr>, `Mts 2` <dbl>, `Tipo ubicación` <chr>,
## #   Giro <chr>, `Hora inicio` <dttm>, `Hora cierre` <dttm>, and abbreviated
## #   variable names ¹​vcClaveTienda, ²​DescGiro, ³​`Codigo Barras`
#LOS PRIMEROS ENCABEZADOS DE LA BASE
head(Abarrotes_Ventas_2, n=7)
## # A tibble: 7 × 22
##   vcClaveT…¹ DescG…² Codig…³ PLU   Fecha               Hora                Marca
##   <chr>      <chr>     <dbl> <lgl> <dttm>              <dttm>              <chr>
## 1 MX001      Abarro… 7.50e12 NA    2020-06-19 08:16:20 1899-12-31 08:16:21 NUTR…
## 2 MX001      Abarro… 7.50e12 NA    2020-06-19 08:23:32 1899-12-31 08:23:33 DAN …
## 3 MX001      Abarro… 7.50e12 NA    2020-06-19 08:24:33 1899-12-31 08:24:33 BIMBO
## 4 MX001      Abarro… 7.50e12 NA    2020-06-19 08:24:33 1899-12-31 08:24:33 PEPSI
## 5 MX001      Abarro… 7.50e12 NA    2020-06-19 08:26:28 1899-12-31 08:26:28 BLAN…
## 6 MX001      Abarro… 7.50e12 NA    2020-06-19 08:26:28 1899-12-31 08:26:28 FLASH
## 7 MX001      Abarro… 7.50e12 NA    2020-06-19 08:26:28 1899-12-31 08:26:28 VARI…
## # … with 15 more variables: Fabricante <chr>, Producto <chr>, Precio <dbl>,
## #   Ult.Costo <dbl>, Unidades <dbl>, F.Ticket <dbl>, NombreDepartamento <chr>,
## #   NombreFamilia <chr>, NombreCategoria <chr>, Estado <chr>, `Mts 2` <dbl>,
## #   `Tipo ubicación` <chr>, Giro <chr>, `Hora inicio` <dttm>,
## #   `Hora cierre` <dttm>, and abbreviated variable names ¹​vcClaveTienda,
## #   ²​DescGiro, ³​`Codigo Barras`
#Los ultimos
tail(Abarrotes_Ventas_2, n=8)
## # A tibble: 8 × 22
##   vcClaveT…¹ DescG…² Codig…³ PLU   Fecha               Hora                Marca
##   <chr>      <chr>     <dbl> <lgl> <dttm>              <dttm>              <chr>
## 1 MX005      Depósi… 7.62e12 NA    2020-07-18 22:45:58 1899-12-31 22:45:58 TRID…
## 2 MX005      Depósi… 7.62e12 NA    2020-07-12 00:36:33 1899-12-31 00:36:34 TRID…
## 3 MX005      Depósi… 7.62e12 NA    2020-07-12 01:08:24 1899-12-31 01:08:25 TRID…
## 4 MX005      Depósi… 7.62e12 NA    2020-10-23 22:17:37 1899-12-31 22:17:37 TRID…
## 5 MX005      Depósi… 7.62e12 NA    2020-10-10 20:30:20 1899-12-31 20:30:20 TRID…
## 6 MX005      Depósi… 7.62e12 NA    2020-10-10 22:40:42 1899-12-31 22:40:43 TRID…
## 7 MX005      Depósi… 7.62e12 NA    2020-06-27 22:30:19 1899-12-31 22:30:19 TRID…
## 8 MX005      Depósi… 7.62e12 NA    2020-06-26 23:43:33 1899-12-31 23:43:34 TRID…
## # … with 15 more variables: Fabricante <chr>, Producto <chr>, Precio <dbl>,
## #   Ult.Costo <dbl>, Unidades <dbl>, F.Ticket <dbl>, NombreDepartamento <chr>,
## #   NombreFamilia <chr>, NombreCategoria <chr>, Estado <chr>, `Mts 2` <dbl>,
## #   `Tipo ubicación` <chr>, Giro <chr>, `Hora inicio` <dttm>,
## #   `Hora cierre` <dttm>, and abbreviated variable names ¹​vcClaveTienda,
## #   ²​DescGiro, ³​`Codigo Barras`

Instalar la base de datos

#install.packages("janitor")
library(janitor)
## 
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
tabyl(Abarrotes_Ventas_2, vcClaveTienda, NombreDepartamento)
##  vcClaveTienda Abarrotes Bebes e Infantiles Carnes Farmacia Ferretería Mercería
##          MX001     95410                515      1      147        245       28
##          MX002      6590                 21      0        4         10        0
##          MX003      4026                 15      0        2          8        0
##          MX004     82234                932      0      102        114       16
##          MX005     10014                  0      0        0          0        0
##  Papelería Productos a Eliminar Vinos y Licores
##         35                    3              80
##          0                    0               4
##          0                    0               0
##         32                    5              20
##          7                    0               0

Tecnica para remover datos

#tecnica 1
BD1 <- Abarrotes_Ventas_2
BD1 <- subset(BD1, select = -c(PLU))

#subset extraer de una base de datos
#-c es para borrar las columnas seleccionadas

Tecnica para Eliminar renglones

BD2 <- BD1
BD2 <- BD2 [BD2$Precio>0,]
summary(BD1$Precio)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
## -147.00   11.00   16.00   19.42   25.00 1000.00
summary(BD2$Precio)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##    0.50   11.00   16.00   19.45   25.00 1000.00

Técnica 2 Remover valores duplicados

#Técnica 2 Remover valores duplicados 
#¿Cuantos renglones/registros duplicados tenemos?
BD2[duplicated(BD2),] #Cuantos duplicados hay 
## # A tibble: 0 × 21
## # … with 21 variables: vcClaveTienda <chr>, DescGiro <chr>,
## #   Codigo Barras <dbl>, Fecha <dttm>, Hora <dttm>, Marca <chr>,
## #   Fabricante <chr>, Producto <chr>, Precio <dbl>, Ult.Costo <dbl>,
## #   Unidades <dbl>, F.Ticket <dbl>, NombreDepartamento <chr>,
## #   NombreFamilia <chr>, NombreCategoria <chr>, Estado <chr>, Mts 2 <dbl>,
## #   Tipo ubicación <chr>, Giro <chr>, Hora inicio <dttm>, Hora cierre <dttm>
sum(duplicated(BD2)) #que los sume
## [1] 0

Eliminar registros duplicados

#Eliminar registros duplicados 
BD3 <- BD2
library(dplyr) #Realizar operaciones comunes coo filtrar por fila, seleccionar columnas especificas, reordenar
BD3 <- distinct(BD3)

Tecnica 3. Errores tipografisoc y similares

#Tecnica 3 Errores tipografisoc y similares
 #Precios en absolutos 
BD4 <- BD1
BD4$Precio <- abs(BD4$Unidades)
summary(BD4$Precio)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.200   1.000   1.000   1.262   1.000  96.000
#Cantidades enteros 

BD5 <- BD4
BD5$Unidades <- ceiling(BD5$Unidades)
summary(BD5$Unidades)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.000   1.000   1.000   1.262   1.000  96.000
summary(Abarrotes_Ventas_2$Unidades)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   0.200   1.000   1.000   1.262   1.000  96.000

Convertir de caracter a fecha

#Convertir de caracter a fecha 
BD6 <- BD5
BD6$Fecha <- as.Date(BD6$Fecha, "%d/%m/%Y")
## Warning in as.POSIXlt.POSIXct(x, tz = tz): unknown timezone '%d/%m/%Y'
tibble(BD6)
## # A tibble: 200,620 × 21
##    vcClav…¹ DescG…² Codig…³ Fecha      Hora                Marca Fabri…⁴ Produ…⁵
##    <chr>    <chr>     <dbl> <date>     <dttm>              <chr> <chr>   <chr>  
##  1 MX001    Abarro… 7.50e12 2020-06-19 1899-12-31 08:16:21 NUTR… MEXILAC Nutri …
##  2 MX001    Abarro… 7.50e12 2020-06-19 1899-12-31 08:23:33 DAN … DANONE… DANUP …
##  3 MX001    Abarro… 7.50e12 2020-06-19 1899-12-31 08:24:33 BIMBO GRUPO … Rebana…
##  4 MX001    Abarro… 7.50e12 2020-06-19 1899-12-31 08:24:33 PEPSI PEPSI-… Pepsi …
##  5 MX001    Abarro… 7.50e12 2020-06-19 1899-12-31 08:26:28 BLAN… FABRIC… Deterg…
##  6 MX001    Abarro… 7.50e12 2020-06-19 1899-12-31 08:26:28 FLASH ALEN    Flash …
##  7 MX001    Abarro… 7.50e12 2020-06-19 1899-12-31 08:26:28 VARI… DANONE… Danone…
##  8 MX001    Abarro… 7.50e12 2020-06-19 1899-12-31 08:26:28 ZOTE  FABRIC… Jabon …
##  9 MX001    Abarro… 7.51e12 2020-06-19 1899-12-31 08:26:28 ALWA… PROCTE… T Feme…
## 10 MX001    Abarro… 3.22e10 2020-06-19 1899-12-31 15:24:02 JUMEX JUMEX   Jugo D…
## # … with 200,610 more rows, 13 more variables: Precio <dbl>, Ult.Costo <dbl>,
## #   Unidades <dbl>, F.Ticket <dbl>, NombreDepartamento <chr>,
## #   NombreFamilia <chr>, NombreCategoria <chr>, Estado <chr>, `Mts 2` <dbl>,
## #   `Tipo ubicación` <chr>, Giro <chr>, `Hora inicio` <dttm>,
## #   `Hora cierre` <dttm>, and abbreviated variable names ¹​vcClaveTienda,
## #   ²​DescGiro, ³​`Codigo Barras`, ⁴​Fabricante, ⁵​Producto
#Convertir de caracter 
BD7 <- BD6
BD7$Hora <- substr(BD7$Hora, start = 1, stop = 2)
tibble(BD7)
## # A tibble: 200,620 × 21
##    vcCla…¹ DescG…² Codig…³ Fecha      Hora  Marca Fabri…⁴ Produ…⁵ Precio Ult.C…⁶
##    <chr>   <chr>     <dbl> <date>     <chr> <chr> <chr>   <chr>    <dbl>   <dbl>
##  1 MX001   Abarro… 7.50e12 2020-06-19 18    NUTR… MEXILAC Nutri …      1   12.3 
##  2 MX001   Abarro… 7.50e12 2020-06-19 18    DAN … DANONE… DANUP …      1   14   
##  3 MX001   Abarro… 7.50e12 2020-06-19 18    BIMBO GRUPO … Rebana…      1    5   
##  4 MX001   Abarro… 7.50e12 2020-06-19 18    PEPSI PEPSI-… Pepsi …      1    8   
##  5 MX001   Abarro… 7.50e12 2020-06-19 18    BLAN… FABRIC… Deterg…      1   15   
##  6 MX001   Abarro… 7.50e12 2020-06-19 18    FLASH ALEN    Flash …      1    7.31
##  7 MX001   Abarro… 7.50e12 2020-06-19 18    VARI… DANONE… Danone…      1   11   
##  8 MX001   Abarro… 7.50e12 2020-06-19 18    ZOTE  FABRIC… Jabon …      1    7.31
##  9 MX001   Abarro… 7.51e12 2020-06-19 18    ALWA… PROCTE… T Feme…      1   18.1 
## 10 MX001   Abarro… 3.22e10 2020-06-19 18    JUMEX JUMEX   Jugo D…      1   12   
## # … with 200,610 more rows, 11 more variables: Unidades <dbl>, F.Ticket <dbl>,
## #   NombreDepartamento <chr>, NombreFamilia <chr>, NombreCategoria <chr>,
## #   Estado <chr>, `Mts 2` <dbl>, `Tipo ubicación` <chr>, Giro <chr>,
## #   `Hora inicio` <dttm>, `Hora cierre` <dttm>, and abbreviated variable names
## #   ¹​vcClaveTienda, ²​DescGiro, ³​`Codigo Barras`, ⁴​Fabricante, ⁵​Producto,
## #   ⁶​Ult.Costo
BD7$Hora <- as.integer(BD7$Hora)
str(BD7)
## tibble [200,620 × 21] (S3: tbl_df/tbl/data.frame)
##  $ vcClaveTienda     : chr [1:200620] "MX001" "MX001" "MX001" "MX001" ...
##  $ DescGiro          : chr [1:200620] "Abarrotes" "Abarrotes" "Abarrotes" "Abarrotes" ...
##  $ Codigo Barras     : num [1:200620] 7.5e+12 7.5e+12 7.5e+12 7.5e+12 7.5e+12 ...
##  $ Fecha             : Date[1:200620], format: "2020-06-19" "2020-06-19" ...
##  $ Hora              : int [1:200620] 18 18 18 18 18 18 18 18 18 18 ...
##  $ Marca             : chr [1:200620] "NUTRI LECHE" "DAN UP" "BIMBO" "PEPSI" ...
##  $ Fabricante        : chr [1:200620] "MEXILAC" "DANONE DE MEXICO" "GRUPO BIMBO" "PEPSI-COLA MEXICANA" ...
##  $ Producto          : chr [1:200620] "Nutri Leche 1 Litro" "DANUP STRAWBERRY P/BEBER 350GR NAL" "Rebanadas Bimbo 2Pz" "Pepsi N.R. 400Ml" ...
##  $ Precio            : num [1:200620] 1 1 1 1 1 1 1 1 1 1 ...
##  $ Ult.Costo         : num [1:200620] 12.3 14 5 8 15 ...
##  $ Unidades          : num [1:200620] 1 1 1 1 1 1 1 1 1 1 ...
##  $ F.Ticket          : num [1:200620] 1 2 3 3 4 4 4 4 4 5 ...
##  $ NombreDepartamento: chr [1:200620] "Abarrotes" "Abarrotes" "Abarrotes" "Abarrotes" ...
##  $ NombreFamilia     : chr [1:200620] "Lacteos y Refrigerados" "Lacteos y Refrigerados" "Pan y Tortilla" "Bebidas" ...
##  $ NombreCategoria   : chr [1:200620] "Leche" "Yogurt" "Pan Dulce Empaquetado" "Refrescos Plástico (N.R.)" ...
##  $ Estado            : chr [1:200620] "Nuevo León" "Nuevo León" "Nuevo León" "Nuevo León" ...
##  $ Mts 2             : num [1:200620] 60 60 60 60 60 60 60 60 60 60 ...
##  $ Tipo ubicación    : chr [1:200620] "Esquina" "Esquina" "Esquina" "Esquina" ...
##  $ Giro              : chr [1:200620] "Abarrotes" "Abarrotes" "Abarrotes" "Abarrotes" ...
##  $ Hora inicio       : POSIXct[1:200620], format: "1899-12-31 08:00:00" "1899-12-31 08:00:00" ...
##  $ Hora cierre       : POSIXct[1:200620], format: "1899-12-31 22:00:00" "1899-12-31 22:00:00" ...
BD7 <- BD6
BD7$Hora <- substr(BD7$Hora, start=1, stop=2)
tibble(BD7)
## # A tibble: 200,620 × 21
##    vcCla…¹ DescG…² Codig…³ Fecha      Hora  Marca Fabri…⁴ Produ…⁵ Precio Ult.C…⁶
##    <chr>   <chr>     <dbl> <date>     <chr> <chr> <chr>   <chr>    <dbl>   <dbl>
##  1 MX001   Abarro… 7.50e12 2020-06-19 18    NUTR… MEXILAC Nutri …      1   12.3 
##  2 MX001   Abarro… 7.50e12 2020-06-19 18    DAN … DANONE… DANUP …      1   14   
##  3 MX001   Abarro… 7.50e12 2020-06-19 18    BIMBO GRUPO … Rebana…      1    5   
##  4 MX001   Abarro… 7.50e12 2020-06-19 18    PEPSI PEPSI-… Pepsi …      1    8   
##  5 MX001   Abarro… 7.50e12 2020-06-19 18    BLAN… FABRIC… Deterg…      1   15   
##  6 MX001   Abarro… 7.50e12 2020-06-19 18    FLASH ALEN    Flash …      1    7.31
##  7 MX001   Abarro… 7.50e12 2020-06-19 18    VARI… DANONE… Danone…      1   11   
##  8 MX001   Abarro… 7.50e12 2020-06-19 18    ZOTE  FABRIC… Jabon …      1    7.31
##  9 MX001   Abarro… 7.51e12 2020-06-19 18    ALWA… PROCTE… T Feme…      1   18.1 
## 10 MX001   Abarro… 3.22e10 2020-06-19 18    JUMEX JUMEX   Jugo D…      1   12   
## # … with 200,610 more rows, 11 more variables: Unidades <dbl>, F.Ticket <dbl>,
## #   NombreDepartamento <chr>, NombreFamilia <chr>, NombreCategoria <chr>,
## #   Estado <chr>, `Mts 2` <dbl>, `Tipo ubicación` <chr>, Giro <chr>,
## #   `Hora inicio` <dttm>, `Hora cierre` <dttm>, and abbreviated variable names
## #   ¹​vcClaveTienda, ²​DescGiro, ³​`Codigo Barras`, ⁴​Fabricante, ⁵​Producto,
## #   ⁶​Ult.Costo
BD7$Hora <- as.integer(BD7$Hora)
str(BD7)
## tibble [200,620 × 21] (S3: tbl_df/tbl/data.frame)
##  $ vcClaveTienda     : chr [1:200620] "MX001" "MX001" "MX001" "MX001" ...
##  $ DescGiro          : chr [1:200620] "Abarrotes" "Abarrotes" "Abarrotes" "Abarrotes" ...
##  $ Codigo Barras     : num [1:200620] 7.5e+12 7.5e+12 7.5e+12 7.5e+12 7.5e+12 ...
##  $ Fecha             : Date[1:200620], format: "2020-06-19" "2020-06-19" ...
##  $ Hora              : int [1:200620] 18 18 18 18 18 18 18 18 18 18 ...
##  $ Marca             : chr [1:200620] "NUTRI LECHE" "DAN UP" "BIMBO" "PEPSI" ...
##  $ Fabricante        : chr [1:200620] "MEXILAC" "DANONE DE MEXICO" "GRUPO BIMBO" "PEPSI-COLA MEXICANA" ...
##  $ Producto          : chr [1:200620] "Nutri Leche 1 Litro" "DANUP STRAWBERRY P/BEBER 350GR NAL" "Rebanadas Bimbo 2Pz" "Pepsi N.R. 400Ml" ...
##  $ Precio            : num [1:200620] 1 1 1 1 1 1 1 1 1 1 ...
##  $ Ult.Costo         : num [1:200620] 12.3 14 5 8 15 ...
##  $ Unidades          : num [1:200620] 1 1 1 1 1 1 1 1 1 1 ...
##  $ F.Ticket          : num [1:200620] 1 2 3 3 4 4 4 4 4 5 ...
##  $ NombreDepartamento: chr [1:200620] "Abarrotes" "Abarrotes" "Abarrotes" "Abarrotes" ...
##  $ NombreFamilia     : chr [1:200620] "Lacteos y Refrigerados" "Lacteos y Refrigerados" "Pan y Tortilla" "Bebidas" ...
##  $ NombreCategoria   : chr [1:200620] "Leche" "Yogurt" "Pan Dulce Empaquetado" "Refrescos Plástico (N.R.)" ...
##  $ Estado            : chr [1:200620] "Nuevo León" "Nuevo León" "Nuevo León" "Nuevo León" ...
##  $ Mts 2             : num [1:200620] 60 60 60 60 60 60 60 60 60 60 ...
##  $ Tipo ubicación    : chr [1:200620] "Esquina" "Esquina" "Esquina" "Esquina" ...
##  $ Giro              : chr [1:200620] "Abarrotes" "Abarrotes" "Abarrotes" "Abarrotes" ...
##  $ Hora inicio       : POSIXct[1:200620], format: "1899-12-31 08:00:00" "1899-12-31 08:00:00" ...
##  $ Hora cierre       : POSIXct[1:200620], format: "1899-12-31 22:00:00" "1899-12-31 22:00:00" ...

Preguntas

#¿Cuantos NA's tengo en la base de datos? 
sum(is.na(BD7))
## [1] 0
sum(is.na(Abarrotes_Ventas_2))
## [1] 199183
#¿Cuantos NA's tengo por variable?
sapply(BD7, function(x) sum(is.na(x)))
##      vcClaveTienda           DescGiro      Codigo Barras              Fecha 
##                  0                  0                  0                  0 
##               Hora              Marca         Fabricante           Producto 
##                  0                  0                  0                  0 
##             Precio          Ult.Costo           Unidades           F.Ticket 
##                  0                  0                  0                  0 
## NombreDepartamento      NombreFamilia    NombreCategoria             Estado 
##                  0                  0                  0                  0 
##              Mts 2     Tipo ubicación               Giro        Hora inicio 
##                  0                  0                  0                  0 
##        Hora cierre 
##                  0
#Borrar todos los NA's de una base de datos 
BD8<- Abarrotes_Ventas_2
BD8 <- na.omit(BD8)
summary(BD8)
##  vcClaveTienda        DescGiro         Codigo Barras         PLU         
##  Length:1437        Length:1437        Min.   :6.750e+08   Mode:logical  
##  Class :character   Class :character   1st Qu.:6.750e+08   TRUE:1437     
##  Mode  :character   Mode  :character   Median :6.750e+08                 
##                                        Mean   :2.616e+11                 
##                                        3rd Qu.:6.750e+08                 
##                                        Max.   :7.501e+12                 
##      Fecha                             Hora                       
##  Min.   :2020-06-06 14:36:14.11   Min.   :1899-12-31 00:01:22.00  
##  1st Qu.:2020-06-20 21:48:46.75   1st Qu.:1899-12-31 15:57:22.00  
##  Median :2020-07-10 22:17:46.90   Median :1899-12-31 18:49:20.00  
##  Mean   :2020-07-16 11:50:19.00   Mean   :1899-12-31 17:46:04.46  
##  3rd Qu.:2020-08-08 21:42:17.13   3rd Qu.:1899-12-31 21:09:03.00  
##  Max.   :2020-11-11 20:37:03.69   Max.   :1899-12-31 23:58:14.00  
##     Marca            Fabricante          Producto             Precio     
##  Length:1437        Length:1437        Length:1437        Min.   :30.00  
##  Class :character   Class :character   Class :character   1st Qu.:90.00  
##  Mode  :character   Mode  :character   Mode  :character   Median :90.00  
##                                                           Mean   :87.94  
##                                                           3rd Qu.:90.00  
##                                                           Max.   :90.00  
##    Ult.Costo        Unidades        F.Ticket      NombreDepartamento
##  Min.   : 1.00   Min.   :1.000   Min.   :   772   Length:1437       
##  1st Qu.:64.62   1st Qu.:1.000   1st Qu.: 99955   Class :character  
##  Median :64.62   Median :1.000   Median :102493   Mode  :character  
##  Mean   :56.65   Mean   :1.124   Mean   :100595                     
##  3rd Qu.:64.62   3rd Qu.:1.000   3rd Qu.:106546                     
##  Max.   :64.62   Max.   :7.000   Max.   :118356                     
##  NombreFamilia      NombreCategoria       Estado              Mts 2      
##  Length:1437        Length:1437        Length:1437        Min.   :58.00  
##  Class :character   Class :character   Class :character   1st Qu.:58.00  
##  Mode  :character   Mode  :character   Mode  :character   Median :58.00  
##                                                           Mean   :58.07  
##                                                           3rd Qu.:58.00  
##                                                           Max.   :60.00  
##  Tipo ubicación         Giro            Hora inicio                 
##  Length:1437        Length:1437        Min.   :1899-12-31 08:00:00  
##  Class :character   Class :character   1st Qu.:1899-12-31 08:00:00  
##  Mode  :character   Mode  :character   Median :1899-12-31 08:00:00  
##                                        Mean   :1899-12-31 08:00:00  
##                                        3rd Qu.:1899-12-31 08:00:00  
##                                        Max.   :1899-12-31 08:00:00  
##   Hora cierre                    
##  Min.   :1899-12-31 21:00:00.00  
##  1st Qu.:1899-12-31 21:00:00.00  
##  Median :1899-12-31 21:00:00.00  
##  Mean   :1899-12-31 21:02:06.26  
##  3rd Qu.:1899-12-31 21:00:00.00  
##  Max.   :1899-12-31 22:00:00.00

Reemplazar NA’s con ceros

BD9 <- Abarrotes_Ventas_2
BD9 [is.na(BD9)]<-0
summary(BD9)
##  vcClaveTienda        DescGiro         Codigo Barras          PLU         
##  Length:200620      Length:200620      Min.   :8.347e+05   Mode :logical  
##  Class :character   Class :character   1st Qu.:7.501e+12   FALSE:199183   
##  Mode  :character   Mode  :character   Median :7.501e+12   TRUE :1437     
##                                        Mean   :5.950e+12                  
##                                        3rd Qu.:7.501e+12                  
##                                        Max.   :1.750e+13                  
##      Fecha                             Hora                       
##  Min.   :2020-05-01 00:00:31.08   Min.   :1899-12-31 00:00:00.00  
##  1st Qu.:2020-06-06 13:24:49.08   1st Qu.:1899-12-31 13:12:42.75  
##  Median :2020-07-11 14:10:21.46   Median :1899-12-31 17:35:59.00  
##  Mean   :2020-07-19 15:19:40.65   Mean   :1899-12-31 16:43:52.05  
##  3rd Qu.:2020-08-29 22:07:47.33   3rd Qu.:1899-12-31 20:47:06.00  
##  Max.   :2020-11-11 23:53:47.73   Max.   :1899-12-31 23:59:59.00  
##     Marca            Fabricante          Producto             Precio       
##  Length:200620      Length:200620      Length:200620      Min.   :-147.00  
##  Class :character   Class :character   Class :character   1st Qu.:  11.00  
##  Mode  :character   Mode  :character   Mode  :character   Median :  16.00  
##                                                           Mean   :  19.42  
##                                                           3rd Qu.:  25.00  
##                                                           Max.   :1000.00  
##    Ult.Costo         Unidades         F.Ticket      NombreDepartamento
##  Min.   :  0.38   Min.   : 0.200   Min.   :     1   Length:200620     
##  1st Qu.:  8.46   1st Qu.: 1.000   1st Qu.: 33967   Class :character  
##  Median : 12.31   Median : 1.000   Median :105996   Mode  :character  
##  Mean   : 15.31   Mean   : 1.262   Mean   :193994                     
##  3rd Qu.: 19.23   3rd Qu.: 1.000   3rd Qu.:383008                     
##  Max.   :769.23   Max.   :96.000   Max.   :450040                     
##  NombreFamilia      NombreCategoria       Estado              Mts 2     
##  Length:200620      Length:200620      Length:200620      Min.   :47.0  
##  Class :character   Class :character   Class :character   1st Qu.:53.0  
##  Mode  :character   Mode  :character   Mode  :character   Median :60.0  
##                                                           Mean   :56.6  
##                                                           3rd Qu.:60.0  
##                                                           Max.   :62.0  
##  Tipo ubicación         Giro            Hora inicio                    
##  Length:200620      Length:200620      Min.   :1899-12-31 07:00:00.00  
##  Class :character   Class :character   1st Qu.:1899-12-31 07:00:00.00  
##  Mode  :character   Mode  :character   Median :1899-12-31 08:00:00.00  
##                                        Mean   :1899-12-31 07:35:49.71  
##                                        3rd Qu.:1899-12-31 08:00:00.00  
##                                        Max.   :1899-12-31 09:00:00.00  
##   Hora cierre                    
##  Min.   :1899-12-31 21:00:00.00  
##  1st Qu.:1899-12-31 22:00:00.00  
##  Median :1899-12-31 22:00:00.00  
##  Mean   :1899-12-31 22:23:11.42  
##  3rd Qu.:1899-12-31 23:00:00.00  
##  Max.   :1899-12-31 23:00:00.00

Reemplazar NA con el promedio

BD10 <- Abarrotes_Ventas_2
BD10$PLU[is.na(BD10$PLU)] <- mean(BD10$PLU, na.rm=TRUE)
summary(BD10)
##  vcClaveTienda        DescGiro         Codigo Barras            PLU   
##  Length:200620      Length:200620      Min.   :8.347e+05   Min.   :1  
##  Class :character   Class :character   1st Qu.:7.501e+12   1st Qu.:1  
##  Mode  :character   Mode  :character   Median :7.501e+12   Median :1  
##                                        Mean   :5.950e+12   Mean   :1  
##                                        3rd Qu.:7.501e+12   3rd Qu.:1  
##                                        Max.   :1.750e+13   Max.   :1  
##      Fecha                             Hora                       
##  Min.   :2020-05-01 00:00:31.08   Min.   :1899-12-31 00:00:00.00  
##  1st Qu.:2020-06-06 13:24:49.08   1st Qu.:1899-12-31 13:12:42.75  
##  Median :2020-07-11 14:10:21.46   Median :1899-12-31 17:35:59.00  
##  Mean   :2020-07-19 15:19:40.65   Mean   :1899-12-31 16:43:52.05  
##  3rd Qu.:2020-08-29 22:07:47.33   3rd Qu.:1899-12-31 20:47:06.00  
##  Max.   :2020-11-11 23:53:47.73   Max.   :1899-12-31 23:59:59.00  
##     Marca            Fabricante          Producto             Precio       
##  Length:200620      Length:200620      Length:200620      Min.   :-147.00  
##  Class :character   Class :character   Class :character   1st Qu.:  11.00  
##  Mode  :character   Mode  :character   Mode  :character   Median :  16.00  
##                                                           Mean   :  19.42  
##                                                           3rd Qu.:  25.00  
##                                                           Max.   :1000.00  
##    Ult.Costo         Unidades         F.Ticket      NombreDepartamento
##  Min.   :  0.38   Min.   : 0.200   Min.   :     1   Length:200620     
##  1st Qu.:  8.46   1st Qu.: 1.000   1st Qu.: 33967   Class :character  
##  Median : 12.31   Median : 1.000   Median :105996   Mode  :character  
##  Mean   : 15.31   Mean   : 1.262   Mean   :193994                     
##  3rd Qu.: 19.23   3rd Qu.: 1.000   3rd Qu.:383008                     
##  Max.   :769.23   Max.   :96.000   Max.   :450040                     
##  NombreFamilia      NombreCategoria       Estado              Mts 2     
##  Length:200620      Length:200620      Length:200620      Min.   :47.0  
##  Class :character   Class :character   Class :character   1st Qu.:53.0  
##  Mode  :character   Mode  :character   Mode  :character   Median :60.0  
##                                                           Mean   :56.6  
##                                                           3rd Qu.:60.0  
##                                                           Max.   :62.0  
##  Tipo ubicación         Giro            Hora inicio                    
##  Length:200620      Length:200620      Min.   :1899-12-31 07:00:00.00  
##  Class :character   Class :character   1st Qu.:1899-12-31 07:00:00.00  
##  Mode  :character   Mode  :character   Median :1899-12-31 08:00:00.00  
##                                        Mean   :1899-12-31 07:35:49.71  
##                                        3rd Qu.:1899-12-31 08:00:00.00  
##                                        Max.   :1899-12-31 09:00:00.00  
##   Hora cierre                    
##  Min.   :1899-12-31 21:00:00.00  
##  1st Qu.:1899-12-31 22:00:00.00  
##  Median :1899-12-31 22:00:00.00  
##  Mean   :1899-12-31 22:23:11.42  
##  3rd Qu.:1899-12-31 23:00:00.00  
##  Max.   :1899-12-31 23:00:00.00

Reemplazar negativos con ceros

BD11 <- Abarrotes_Ventas_2
BD11[BD11 = O]<-0
summary(BD11)
##  vcClaveTienda    DescGiro Codigo Barras      PLU        Fecha        Hora  
##  Min.   :0     Min.   :0   Min.   :0     Min.   :0   Min.   :0   Min.   :0  
##  1st Qu.:0     1st Qu.:0   1st Qu.:0     1st Qu.:0   1st Qu.:0   1st Qu.:0  
##  Median :0     Median :0   Median :0     Median :0   Median :0   Median :0  
##  Mean   :0     Mean   :0   Mean   :0     Mean   :0   Mean   :0   Mean   :0  
##  3rd Qu.:0     3rd Qu.:0   3rd Qu.:0     3rd Qu.:0   3rd Qu.:0   3rd Qu.:0  
##  Max.   :0     Max.   :0   Max.   :0     Max.   :0   Max.   :0   Max.   :0  
##      Marca     Fabricante    Producto     Precio    Ult.Costo    Unidades
##  Min.   :0   Min.   :0    Min.   :0   Min.   :0   Min.   :0   Min.   :0  
##  1st Qu.:0   1st Qu.:0    1st Qu.:0   1st Qu.:0   1st Qu.:0   1st Qu.:0  
##  Median :0   Median :0    Median :0   Median :0   Median :0   Median :0  
##  Mean   :0   Mean   :0    Mean   :0   Mean   :0   Mean   :0   Mean   :0  
##  3rd Qu.:0   3rd Qu.:0    3rd Qu.:0   3rd Qu.:0   3rd Qu.:0   3rd Qu.:0  
##  Max.   :0   Max.   :0    Max.   :0   Max.   :0   Max.   :0   Max.   :0  
##     F.Ticket NombreDepartamento NombreFamilia NombreCategoria     Estado 
##  Min.   :0   Min.   :0          Min.   :0     Min.   :0       Min.   :0  
##  1st Qu.:0   1st Qu.:0          1st Qu.:0     1st Qu.:0       1st Qu.:0  
##  Median :0   Median :0          Median :0     Median :0       Median :0  
##  Mean   :0   Mean   :0          Mean   :0     Mean   :0       Mean   :0  
##  3rd Qu.:0   3rd Qu.:0          3rd Qu.:0     3rd Qu.:0       3rd Qu.:0  
##  Max.   :0   Max.   :0          Max.   :0     Max.   :0       Max.   :0  
##      Mts 2   Tipo ubicación      Giro    Hora inicio  Hora cierre
##  Min.   :0   Min.   :0      Min.   :0   Min.   :0    Min.   :0   
##  1st Qu.:0   1st Qu.:0      1st Qu.:0   1st Qu.:0    1st Qu.:0   
##  Median :0   Median :0      Median :0   Median :0    Median :0   
##  Mean   :0   Mean   :0      Mean   :0   Mean   :0    Mean   :0   
##  3rd Qu.:0   3rd Qu.:0      3rd Qu.:0   3rd Qu.:0    3rd Qu.:0   
##  Max.   :0   Max.   :0      Max.   :0   Max.   :0    Max.   :0

Tecnica 6 Metodo estadistico

#Grafica de caja y bigote
BD12 <- BD7
boxplot(BD12$Precio, horizontal = TRUE)

boxplot(BD12$Unidades, horizontal = TRUE)

#install.packages("lubridate")
library(lubridate)
BD12$Dia_de_la_semana <-wday(BD12$Fecha)
summary(BD12)
##  vcClaveTienda        DescGiro         Codigo Barras           Fecha           
##  Length:200620      Length:200620      Min.   :8.347e+05   Min.   :2020-05-01  
##  Class :character   Class :character   1st Qu.:7.501e+12   1st Qu.:2020-06-06  
##  Mode  :character   Mode  :character   Median :7.501e+12   Median :2020-07-11  
##                                        Mean   :5.950e+12   Mean   :2020-07-18  
##                                        3rd Qu.:7.501e+12   3rd Qu.:2020-08-29  
##                                        Max.   :1.750e+13   Max.   :2020-11-11  
##       Hora       Marca            Fabricante          Producto        
##  Min.   :18   Length:200620      Length:200620      Length:200620     
##  1st Qu.:18   Class :character   Class :character   Class :character  
##  Median :18   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :18                                                           
##  3rd Qu.:18                                                           
##  Max.   :18                                                           
##      Precio         Ult.Costo         Unidades         F.Ticket     
##  Min.   : 0.200   Min.   :  0.38   Min.   : 1.000   Min.   :     1  
##  1st Qu.: 1.000   1st Qu.:  8.46   1st Qu.: 1.000   1st Qu.: 33967  
##  Median : 1.000   Median : 12.31   Median : 1.000   Median :105996  
##  Mean   : 1.262   Mean   : 15.31   Mean   : 1.262   Mean   :193994  
##  3rd Qu.: 1.000   3rd Qu.: 19.23   3rd Qu.: 1.000   3rd Qu.:383008  
##  Max.   :96.000   Max.   :769.23   Max.   :96.000   Max.   :450040  
##  NombreDepartamento NombreFamilia      NombreCategoria       Estado         
##  Length:200620      Length:200620      Length:200620      Length:200620     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##      Mts 2      Tipo ubicación         Giro          
##  Min.   :47.0   Length:200620      Length:200620     
##  1st Qu.:53.0   Class :character   Class :character  
##  Median :60.0   Mode  :character   Mode  :character  
##  Mean   :56.6                                        
##  3rd Qu.:60.0                                        
##  Max.   :62.0                                        
##   Hora inicio                      Hora cierre                    
##  Min.   :1899-12-31 07:00:00.00   Min.   :1899-12-31 21:00:00.00  
##  1st Qu.:1899-12-31 07:00:00.00   1st Qu.:1899-12-31 22:00:00.00  
##  Median :1899-12-31 08:00:00.00   Median :1899-12-31 22:00:00.00  
##  Mean   :1899-12-31 07:35:49.71   Mean   :1899-12-31 22:23:11.42  
##  3rd Qu.:1899-12-31 08:00:00.00   3rd Qu.:1899-12-31 23:00:00.00  
##  Max.   :1899-12-31 09:00:00.00   Max.   :1899-12-31 23:00:00.00  
##  Dia_de_la_semana
##  Min.   :1.000   
##  1st Qu.:2.000   
##  Median :4.000   
##  Mean   :3.912   
##  3rd Qu.:6.000   
##  Max.   :7.000
BD12$Subtotal <- BD12$Precio * BD12$Unidades
summary(BD12)
##  vcClaveTienda        DescGiro         Codigo Barras           Fecha           
##  Length:200620      Length:200620      Min.   :8.347e+05   Min.   :2020-05-01  
##  Class :character   Class :character   1st Qu.:7.501e+12   1st Qu.:2020-06-06  
##  Mode  :character   Mode  :character   Median :7.501e+12   Median :2020-07-11  
##                                        Mean   :5.950e+12   Mean   :2020-07-18  
##                                        3rd Qu.:7.501e+12   3rd Qu.:2020-08-29  
##                                        Max.   :1.750e+13   Max.   :2020-11-11  
##       Hora       Marca            Fabricante          Producto        
##  Min.   :18   Length:200620      Length:200620      Length:200620     
##  1st Qu.:18   Class :character   Class :character   Class :character  
##  Median :18   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :18                                                           
##  3rd Qu.:18                                                           
##  Max.   :18                                                           
##      Precio         Ult.Costo         Unidades         F.Ticket     
##  Min.   : 0.200   Min.   :  0.38   Min.   : 1.000   Min.   :     1  
##  1st Qu.: 1.000   1st Qu.:  8.46   1st Qu.: 1.000   1st Qu.: 33967  
##  Median : 1.000   Median : 12.31   Median : 1.000   Median :105996  
##  Mean   : 1.262   Mean   : 15.31   Mean   : 1.262   Mean   :193994  
##  3rd Qu.: 1.000   3rd Qu.: 19.23   3rd Qu.: 1.000   3rd Qu.:383008  
##  Max.   :96.000   Max.   :769.23   Max.   :96.000   Max.   :450040  
##  NombreDepartamento NombreFamilia      NombreCategoria       Estado         
##  Length:200620      Length:200620      Length:200620      Length:200620     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##      Mts 2      Tipo ubicación         Giro          
##  Min.   :47.0   Length:200620      Length:200620     
##  1st Qu.:53.0   Class :character   Class :character  
##  Median :60.0   Mode  :character   Mode  :character  
##  Mean   :56.6                                        
##  3rd Qu.:60.0                                        
##  Max.   :62.0                                        
##   Hora inicio                      Hora cierre                    
##  Min.   :1899-12-31 07:00:00.00   Min.   :1899-12-31 21:00:00.00  
##  1st Qu.:1899-12-31 07:00:00.00   1st Qu.:1899-12-31 22:00:00.00  
##  Median :1899-12-31 08:00:00.00   Median :1899-12-31 22:00:00.00  
##  Mean   :1899-12-31 07:35:49.71   Mean   :1899-12-31 22:23:11.42  
##  3rd Qu.:1899-12-31 08:00:00.00   3rd Qu.:1899-12-31 23:00:00.00  
##  Max.   :1899-12-31 09:00:00.00   Max.   :1899-12-31 23:00:00.00  
##  Dia_de_la_semana    Subtotal       
##  Min.   :1.000    Min.   :   0.200  
##  1st Qu.:2.000    1st Qu.:   1.000  
##  Median :4.000    Median :   1.000  
##  Mean   :3.912    Mean   :   3.258  
##  3rd Qu.:6.000    3rd Qu.:   1.000  
##  Max.   :7.000    Max.   :9216.000
BD12$Utilidad <- BD12$Precio - BD12$Ult.Costo
summary(BD12)
##  vcClaveTienda        DescGiro         Codigo Barras           Fecha           
##  Length:200620      Length:200620      Min.   :8.347e+05   Min.   :2020-05-01  
##  Class :character   Class :character   1st Qu.:7.501e+12   1st Qu.:2020-06-06  
##  Mode  :character   Mode  :character   Median :7.501e+12   Median :2020-07-11  
##                                        Mean   :5.950e+12   Mean   :2020-07-18  
##                                        3rd Qu.:7.501e+12   3rd Qu.:2020-08-29  
##                                        Max.   :1.750e+13   Max.   :2020-11-11  
##       Hora       Marca            Fabricante          Producto        
##  Min.   :18   Length:200620      Length:200620      Length:200620     
##  1st Qu.:18   Class :character   Class :character   Class :character  
##  Median :18   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :18                                                           
##  3rd Qu.:18                                                           
##  Max.   :18                                                           
##      Precio         Ult.Costo         Unidades         F.Ticket     
##  Min.   : 0.200   Min.   :  0.38   Min.   : 1.000   Min.   :     1  
##  1st Qu.: 1.000   1st Qu.:  8.46   1st Qu.: 1.000   1st Qu.: 33967  
##  Median : 1.000   Median : 12.31   Median : 1.000   Median :105996  
##  Mean   : 1.262   Mean   : 15.31   Mean   : 1.262   Mean   :193994  
##  3rd Qu.: 1.000   3rd Qu.: 19.23   3rd Qu.: 1.000   3rd Qu.:383008  
##  Max.   :96.000   Max.   :769.23   Max.   :96.000   Max.   :450040  
##  NombreDepartamento NombreFamilia      NombreCategoria       Estado         
##  Length:200620      Length:200620      Length:200620      Length:200620     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##      Mts 2      Tipo ubicación         Giro          
##  Min.   :47.0   Length:200620      Length:200620     
##  1st Qu.:53.0   Class :character   Class :character  
##  Median :60.0   Mode  :character   Mode  :character  
##  Mean   :56.6                                        
##  3rd Qu.:60.0                                        
##  Max.   :62.0                                        
##   Hora inicio                      Hora cierre                    
##  Min.   :1899-12-31 07:00:00.00   Min.   :1899-12-31 21:00:00.00  
##  1st Qu.:1899-12-31 07:00:00.00   1st Qu.:1899-12-31 22:00:00.00  
##  Median :1899-12-31 08:00:00.00   Median :1899-12-31 22:00:00.00  
##  Mean   :1899-12-31 07:35:49.71   Mean   :1899-12-31 22:23:11.42  
##  3rd Qu.:1899-12-31 08:00:00.00   3rd Qu.:1899-12-31 23:00:00.00  
##  Max.   :1899-12-31 09:00:00.00   Max.   :1899-12-31 23:00:00.00  
##  Dia_de_la_semana    Subtotal           Utilidad      
##  Min.   :1.000    Min.   :   0.200   Min.   :-768.23  
##  1st Qu.:2.000    1st Qu.:   1.000   1st Qu.: -18.23  
##  Median :4.000    Median :   1.000   Median : -11.31  
##  Mean   :3.912    Mean   :   3.258   Mean   : -14.05  
##  3rd Qu.:6.000    3rd Qu.:   1.000   3rd Qu.:  -7.46  
##  Max.   :7.000    Max.   :9216.000   Max.   :  76.00

Exportar la base de datos limpia

BD_Limpia <- BD12
write.csv(BD_Limpia, file = "Practica_2_abarrotes_limpia.csv", row.names = FALSE)
#Market basket analysis

#install.packages("plyr")
library(plyr)
## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
## 
## Attaching package: 'plyr'
## The following objects are masked from 'package:dplyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
## The following object is masked from 'package:purrr':
## 
##     compact
#install.packages("Matrix")
library(Matrix)
## 
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
#install.packages("arules")
library(arules)
## 
## Attaching package: 'arules'
## The following object is masked from 'package:dplyr':
## 
##     recode
## The following objects are masked from 'package:base':
## 
##     abbreviate, write
#install.packages("arulesViz")
library(arulesViz)
#install.packages("dataset")
library(dataset)
## 
## Attaching package: 'dataset'
## The following object is masked from 'package:arules':
## 
##     size
## The following object is masked from 'package:base':
## 
##     as.data.frame

Ordenar de menor a mayor los tickets

BD_Limpia <- BD_Limpia[order(BD_Limpia$F.Ticket),]
head(BD_Limpia)
## # A tibble: 6 × 24
##   vcClav…¹ DescG…² Codig…³ Fecha       Hora Marca Fabri…⁴ Produ…⁵ Precio Ult.C…⁶
##   <chr>    <chr>     <dbl> <date>     <int> <chr> <chr>   <chr>    <dbl>   <dbl>
## 1 MX001    Abarro… 7.50e12 2020-06-19    18 NUTR… MEXILAC Nutri …      1   12.3 
## 2 MX001    Abarro… 7.50e12 2020-06-19    18 DAN … DANONE… DANUP …      1   14   
## 3 MX001    Abarro… 7.50e12 2020-06-19    18 BIMBO GRUPO … Rebana…      1    5   
## 4 MX001    Abarro… 7.50e12 2020-06-19    18 PEPSI PEPSI-… Pepsi …      1    8   
## 5 MX001    Abarro… 7.50e12 2020-06-19    18 BLAN… FABRIC… Deterg…      1   15   
## 6 MX001    Abarro… 7.50e12 2020-06-19    18 FLASH ALEN    Flash …      1    7.31
## # … with 14 more variables: Unidades <dbl>, F.Ticket <dbl>,
## #   NombreDepartamento <chr>, NombreFamilia <chr>, NombreCategoria <chr>,
## #   Estado <chr>, `Mts 2` <dbl>, `Tipo ubicación` <chr>, Giro <chr>,
## #   `Hora inicio` <dttm>, `Hora cierre` <dttm>, Dia_de_la_semana <dbl>,
## #   Subtotal <dbl>, Utilidad <dbl>, and abbreviated variable names
## #   ¹​vcClaveTienda, ²​DescGiro, ³​`Codigo Barras`, ⁴​Fabricante, ⁵​Producto,
## #   ⁶​Ult.Costo
tail(BD_Limpia)
## # A tibble: 6 × 24
##   vcClav…¹ DescG…² Codig…³ Fecha       Hora Marca Fabri…⁴ Produ…⁵ Precio Ult.C…⁶
##   <chr>    <chr>     <dbl> <date>     <int> <chr> <chr>   <chr>    <dbl>   <dbl>
## 1 MX004    Carnic… 1.02e10 2020-10-15    18 YEMI… HERDEZ  PASTA …      2    5.38
## 2 MX004    Carnic… 7.50e12 2020-10-15    18 DEL … ALIMEN… PURE D…      1    9.23
## 3 MX004    Carnic… 7.50e12 2020-10-15    18 COCA… COCA C… COCA C…      2   11.5 
## 4 MX004    Carnic… 7.50e12 2020-10-15    18 DIAM… EMPACA… ARROZ …      1    8.46
## 5 MX004    Carnic… 7.50e12 2020-10-15    18 PEPSI PEPSI-… PEPSI …      1    7.69
## 6 MX004    Carnic… 7.50e 7 2020-10-15    18 COCA… COCA C… COCA C…      8    7.69
## # … with 14 more variables: Unidades <dbl>, F.Ticket <dbl>,
## #   NombreDepartamento <chr>, NombreFamilia <chr>, NombreCategoria <chr>,
## #   Estado <chr>, `Mts 2` <dbl>, `Tipo ubicación` <chr>, Giro <chr>,
## #   `Hora inicio` <dttm>, `Hora cierre` <dttm>, Dia_de_la_semana <dbl>,
## #   Subtotal <dbl>, Utilidad <dbl>, and abbreviated variable names
## #   ¹​vcClaveTienda, ²​DescGiro, ³​`Codigo Barras`, ⁴​Fabricante, ⁵​Producto,
## #   ⁶​Ult.Costo

Generar bascket

basket <-ddply(BD_Limpia,c("F.Ticket"),function(BD_Limpia)paste(BD_Limpia$Marca, collapse=","))

Eliminar numeros de ticket

basket$F.Ticket <- NULL

Renombrar el nombre de la columna

colnames(basket)<- c("Marca")

Exportar basket

write.csv(basket, "basket.csv", quote=FALSE, row.names=FALSE)

Importar transacciones

#file.choose()
tr <- read.transactions("/Users/marianaguevara/basket.csv", format = "basket", sep=",")
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in asMethod(object): removing duplicated items in transactions

Generar reglas de asociación

reglas.asociacion<-apriori(tr,parameter=list(supp=0.001, conf=0.2, maxlen=10))
## Apriori
## 
## Parameter specification:
##  confidence minval smax arem  aval originalSupport maxtime support minlen
##         0.2    0.1    1 none FALSE            TRUE       5   0.001      1
##  maxlen target  ext
##      10  rules TRUE
## 
## Algorithmic control:
##  filter tree heap memopt load sort verbose
##     0.1 TRUE TRUE  FALSE TRUE    2    TRUE
## 
## Absolute minimum support count: 115 
## 
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[604 item(s), 115111 transaction(s)] done [0.01s].
## sorting and recoding items ... [207 item(s)] done [0.00s].
## creating transaction tree ... done [0.02s].
## checking subsets of size 1 2 3 done [0.00s].
## writing ... [11 rule(s)] done [0.00s].
## creating S4 object  ... done [0.01s].
summary(reglas.asociacion)
## set of 11 rules
## 
## rule length distribution (lhs + rhs):sizes
##  2 
## 11 
## 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       2       2       2       2       2       2 
## 
## summary of quality measures:
##     support           confidence        coverage             lift       
##  Min.   :0.001016   Min.   :0.2069   Min.   :0.003562   Min.   : 1.325  
##  1st Qu.:0.001103   1st Qu.:0.2356   1st Qu.:0.004504   1st Qu.: 1.787  
##  Median :0.001416   Median :0.2442   Median :0.005803   Median : 3.972  
##  Mean   :0.001519   Mean   :0.2536   Mean   :0.006054   Mean   :17.563  
##  3rd Qu.:0.001651   3rd Qu.:0.2685   3rd Qu.:0.006893   3rd Qu.:21.798  
##  Max.   :0.002745   Max.   :0.3098   Max.   :0.010503   Max.   :65.908  
##      count      
##  Min.   :117.0  
##  1st Qu.:127.0  
##  Median :163.0  
##  Mean   :174.9  
##  3rd Qu.:190.0  
##  Max.   :316.0  
## 
## mining info:
##  data ntransactions support confidence
##    tr        115111   0.001        0.2
##                                                                         call
##  apriori(data = tr, parameter = list(supp = 0.001, conf = 0.2, maxlen = 10))
inspect(reglas.asociacion)
##      lhs                  rhs         support     confidence coverage   
## [1]  {FANTA}           => {COCA COLA} 0.001051159 0.2439516  0.004308884
## [2]  {SALVO}           => {FABULOSO}  0.001103283 0.3097561  0.003561779
## [3]  {FABULOSO}        => {SALVO}     0.001103283 0.2347505  0.004699811
## [4]  {COCA COLA ZERO}  => {COCA COLA} 0.001416025 0.2969035  0.004769310
## [5]  {SPRITE}          => {COCA COLA} 0.001346526 0.2069426  0.006506763
## [6]  {PINOL}           => {CLORALEX}  0.001016410 0.2363636  0.004300197
## [7]  {BLUE HOUSE}      => {BIMBO}     0.001711392 0.2720994  0.006289581
## [8]  {HELLMANN´S}      => {BIMBO}     0.001537646 0.2649701  0.005803094
## [9]  {REYMA}           => {CONVERMEX} 0.002093631 0.2441743  0.008574333
## [10] {FUD}             => {BIMBO}     0.001589770 0.2183771  0.007279930
## [11] {COCA COLA LIGHT} => {COCA COLA} 0.002745176 0.2613730  0.010502906
##      lift      count
## [1]   1.561906 121  
## [2]  65.908196 127  
## [3]  65.908196 127  
## [4]   1.900932 163  
## [5]   1.324955 155  
## [6]  25.030409 117  
## [7]   4.078870 197  
## [8]   3.971997 177  
## [9]  18.564824 241  
## [10]  3.273552 183  
## [11]  1.673447 316

Visualizar reglas de asociacion

top10reglas <- head(reglas.asociacion, n=10, by="confidence")
plot(top10reglas, method="graph", engine ="htmlwidget")
LS0tCnRpdGxlOiAiQWN0IDIuMSBNYW5lamEgdW5hIGJhc2UgZGUgZGF0b3MiCmF1dGhvcjogIk1hcmlhbmEgR3VldmFyYSIKZGF0ZTogIjIwMjMtMDgtMTciCm91dHB1dDogCiAgaHRtbF9kb2N1bWVudDoKICAgIHRvYzogVFJVRQogICAgdG9jX2Zsb2F0OiBUUlVFCiAgICBjb2RlX2Rvd25sb2FkOiBUUlVFCi0tLQoKIVtdKC9Vc2Vycy9tYXJpYW5hZ3VldmFyYS9Eb3dubG9hZHMvb3h4by5naWYpCgpgYGB7cn0KbGlicmFyeSh0aWR5dmVyc2UpCmxpYnJhcnkoZHBseXIpCmxpYnJhcnkoZ2dwbG90MikKbGlicmFyeShyZWFkeGwpCmBgYAoKIyMjIEltcG9ydGFyIGxhIGJhc2UgZGUgZGF0b3MKCmBgYHtyfQpBYmFycm90ZXNfVmVudGFzXzIgPC0gcmVhZF9leGNlbCgiRG93bmxvYWRzL0FiYXJyb3Rlc19WZW50YXMtMi54bHN4IikKCiNWaWV3KEFiYXJyb3Rlc19WZW50YXNfMikKYGBgCgpgYGB7cn0KI09UUkEgRk9STUEgREUgQlVTQ0FSIExBIEJBU0UgREUgREFUT1MKI2ZpbGUuY2hvb3NlKCkKYGBgCgojIyMgRW50ZW5kZXIgbGEgYmFzZSBkZSBkYXRvcwoKYGBge3J9CnN0cihBYmFycm90ZXNfVmVudGFzXzIpCnN1bW1hcnkoQWJhcnJvdGVzX1ZlbnRhc18yKQoKI09CU0VSVkFDSU9ORVM6CiMxLi1MYSB2YXJpYWJsZSBQTFUgdGllbmUgMTk5MTgzIE5BJ3MKIzIuLUxhIHZhcmlhYmxlIEZlY2hhIGVzdMOhIGNvbW8gY2FyYWN0ZXIKIzMuLUxhIHZhcmlhYmxlIEhvcmEgZXN0YSBjb21vIGNhcmFjdGVyCiM0Li1MYSB2YXJpYWJsZSBQcmVjaW8gdGllbmUgbmVnYXRpdm9zCiM1Li1MYSB2YXJpYWJsZSBVbmlkYWRlcyB0aWVuZSBkZWNpbWFsZXMuIAoKYGBgCgojIyMgRW50ZW5kZXIgbGEgYmFzZSBkZSBkYXRvcwoKYGBge3J9CmNvdW50KEFiYXJyb3Rlc19WZW50YXNfMiwgdmNDbGF2ZVRpZW5kYSwgc29ydCA9IFRSVUUpCmNvdW50KEFiYXJyb3Rlc19WZW50YXNfMiwgRGVzY0dpcm8sIHNvcnQgPSBUUlVFKQpjb3VudChBYmFycm90ZXNfVmVudGFzXzIsIE1hcmNhLCBzb3J0ID0gVFJVRSkKY291bnQoQWJhcnJvdGVzX1ZlbnRhc18yLCBGYWJyaWNhbnRlLCBzb3J0ID0gVFJVRSkKY291bnQoQWJhcnJvdGVzX1ZlbnRhc18yLCBQcm9kdWN0bywgc29ydCA9IFRSVUUpCmNvdW50KEFiYXJyb3Rlc19WZW50YXNfMiwgTm9tYnJlRGVwYXJ0YW1lbnRvLCBzb3J0ID0gVFJVRSkKY291bnQoQWJhcnJvdGVzX1ZlbnRhc18yLCBOb21icmVGYW1pbGlhLCBzb3J0ID0gVFJVRSkKY291bnQoQWJhcnJvdGVzX1ZlbnRhc18yLCBOb21icmVDYXRlZ29yaWEsIHNvcnQgPSBUUlVFKQpjb3VudChBYmFycm90ZXNfVmVudGFzXzIsIEVzdGFkbywgc29ydCA9IFRSVUUpCmNvdW50KEFiYXJyb3Rlc19WZW50YXNfMiwgR2lybywgc29ydCA9IFRSVUUpCgpgYGAKCmBgYHtyfQoKdGliYmxlKEFiYXJyb3Rlc19WZW50YXNfMikKCiNMT1MgUFJJTUVST1MgRU5DQUJFWkFET1MgREUgTEEgQkFTRQpoZWFkKEFiYXJyb3Rlc19WZW50YXNfMiwgbj03KQoKI0xvcyB1bHRpbW9zCnRhaWwoQWJhcnJvdGVzX1ZlbnRhc18yLCBuPTgpCmBgYAoKIyMjIEluc3RhbGFyIGxhIGJhc2UgZGUgZGF0b3MKCmBgYHtyfQojaW5zdGFsbC5wYWNrYWdlcygiamFuaXRvciIpCmxpYnJhcnkoamFuaXRvcikKYGBgCgpgYGB7cn0KdGFieWwoQWJhcnJvdGVzX1ZlbnRhc18yLCB2Y0NsYXZlVGllbmRhLCBOb21icmVEZXBhcnRhbWVudG8pCmBgYAojIyMgVGVjbmljYSBwYXJhIHJlbW92ZXIgZGF0b3MgCgpgYGB7cn0KI3RlY25pY2EgMQpCRDEgPC0gQWJhcnJvdGVzX1ZlbnRhc18yCkJEMSA8LSBzdWJzZXQoQkQxLCBzZWxlY3QgPSAtYyhQTFUpKQoKI3N1YnNldCBleHRyYWVyIGRlIHVuYSBiYXNlIGRlIGRhdG9zCiMtYyBlcyBwYXJhIGJvcnJhciBsYXMgY29sdW1uYXMgc2VsZWNjaW9uYWRhcwpgYGAKCiMjIyBUZWNuaWNhIHBhcmEgRWxpbWluYXIgcmVuZ2xvbmVzCgpgYGB7cn0KQkQyIDwtIEJEMQpCRDIgPC0gQkQyIFtCRDIkUHJlY2lvPjAsXQpzdW1tYXJ5KEJEMSRQcmVjaW8pCnN1bW1hcnkoQkQyJFByZWNpbykKYGBgCiMjIyBUw6ljbmljYSAyIFJlbW92ZXIgdmFsb3JlcyBkdXBsaWNhZG9zIAoKYGBge3J9CiNUw6ljbmljYSAyIFJlbW92ZXIgdmFsb3JlcyBkdXBsaWNhZG9zIAojwr9DdWFudG9zIHJlbmdsb25lcy9yZWdpc3Ryb3MgZHVwbGljYWRvcyB0ZW5lbW9zPwpCRDJbZHVwbGljYXRlZChCRDIpLF0gI0N1YW50b3MgZHVwbGljYWRvcyBoYXkgCnN1bShkdXBsaWNhdGVkKEJEMikpICNxdWUgbG9zIHN1bWUKCmBgYAojIyMgRWxpbWluYXIgcmVnaXN0cm9zIGR1cGxpY2Fkb3MgCgpgYGB7cn0KI0VsaW1pbmFyIHJlZ2lzdHJvcyBkdXBsaWNhZG9zIApCRDMgPC0gQkQyCmxpYnJhcnkoZHBseXIpICNSZWFsaXphciBvcGVyYWNpb25lcyBjb211bmVzIGNvbyBmaWx0cmFyIHBvciBmaWxhLCBzZWxlY2Npb25hciBjb2x1bW5hcyBlc3BlY2lmaWNhcywgcmVvcmRlbmFyCkJEMyA8LSBkaXN0aW5jdChCRDMpCmBgYAoKCiMjIyBUZWNuaWNhIDMuIEVycm9yZXMgdGlwb2dyYWZpc29jIHkgc2ltaWxhcmVzCgpgYGB7cn0KI1RlY25pY2EgMyBFcnJvcmVzIHRpcG9ncmFmaXNvYyB5IHNpbWlsYXJlcwogI1ByZWNpb3MgZW4gYWJzb2x1dG9zIApCRDQgPC0gQkQxCkJENCRQcmVjaW8gPC0gYWJzKEJENCRVbmlkYWRlcykKc3VtbWFyeShCRDQkUHJlY2lvKQpgYGAKCmBgYHtyfQojQ2FudGlkYWRlcyBlbnRlcm9zIAoKQkQ1IDwtIEJENApCRDUkVW5pZGFkZXMgPC0gY2VpbGluZyhCRDUkVW5pZGFkZXMpCnN1bW1hcnkoQkQ1JFVuaWRhZGVzKQpzdW1tYXJ5KEFiYXJyb3Rlc19WZW50YXNfMiRVbmlkYWRlcykKYGBgCgojIyMgQ29udmVydGlyIGRlIGNhcmFjdGVyIGEgZmVjaGEgCgpgYGB7cn0KI0NvbnZlcnRpciBkZSBjYXJhY3RlciBhIGZlY2hhIApCRDYgPC0gQkQ1CkJENiRGZWNoYSA8LSBhcy5EYXRlKEJENiRGZWNoYSwgIiVkLyVtLyVZIikKdGliYmxlKEJENikKYGBgCgpgYGB7cn0KI0NvbnZlcnRpciBkZSBjYXJhY3RlciAKQkQ3IDwtIEJENgpCRDckSG9yYSA8LSBzdWJzdHIoQkQ3JEhvcmEsIHN0YXJ0ID0gMSwgc3RvcCA9IDIpCnRpYmJsZShCRDcpCkJENyRIb3JhIDwtIGFzLmludGVnZXIoQkQ3JEhvcmEpCnN0cihCRDcpCmBgYAoKYGBge3J9CkJENyA8LSBCRDYKQkQ3JEhvcmEgPC0gc3Vic3RyKEJENyRIb3JhLCBzdGFydD0xLCBzdG9wPTIpCnRpYmJsZShCRDcpCkJENyRIb3JhIDwtIGFzLmludGVnZXIoQkQ3JEhvcmEpCnN0cihCRDcpCmBgYAoKIyMjIFByZWd1bnRhcwoKYGBge3J9Cgojwr9DdWFudG9zIE5BJ3MgdGVuZ28gZW4gbGEgYmFzZSBkZSBkYXRvcz8gCnN1bShpcy5uYShCRDcpKQpzdW0oaXMubmEoQWJhcnJvdGVzX1ZlbnRhc18yKSkKCiPCv0N1YW50b3MgTkEncyB0ZW5nbyBwb3IgdmFyaWFibGU/CnNhcHBseShCRDcsIGZ1bmN0aW9uKHgpIHN1bShpcy5uYSh4KSkpCgojQm9ycmFyIHRvZG9zIGxvcyBOQSdzIGRlIHVuYSBiYXNlIGRlIGRhdG9zIApCRDg8LSBBYmFycm90ZXNfVmVudGFzXzIKQkQ4IDwtIG5hLm9taXQoQkQ4KQpzdW1tYXJ5KEJEOCkKCmBgYAoKIyMjIFJlZW1wbGF6YXIgTkEncyBjb24gY2Vyb3MKCmBgYHtyfQpCRDkgPC0gQWJhcnJvdGVzX1ZlbnRhc18yCkJEOSBbaXMubmEoQkQ5KV08LTAKc3VtbWFyeShCRDkpCgpgYGAKCgojIyMgUmVlbXBsYXphciBOQSBjb24gZWwgcHJvbWVkaW8gCgpgYGB7cn0KQkQxMCA8LSBBYmFycm90ZXNfVmVudGFzXzIKQkQxMCRQTFVbaXMubmEoQkQxMCRQTFUpXSA8LSBtZWFuKEJEMTAkUExVLCBuYS5ybT1UUlVFKQpzdW1tYXJ5KEJEMTApCmBgYAoKIyMjIFJlZW1wbGF6YXIgbmVnYXRpdm9zIGNvbiBjZXJvcyAKCmBgYHtyfQpCRDExIDwtIEFiYXJyb3Rlc19WZW50YXNfMgpCRDExW0JEMTEgPSBPXTwtMApzdW1tYXJ5KEJEMTEpCmBgYAoKIyMjIFRlY25pY2EgNiBNZXRvZG8gZXN0YWRpc3RpY28gCmBgYHtyfQojR3JhZmljYSBkZSBjYWphIHkgYmlnb3RlCkJEMTIgPC0gQkQ3CmJveHBsb3QoQkQxMiRQcmVjaW8sIGhvcml6b250YWwgPSBUUlVFKQpib3hwbG90KEJEMTIkVW5pZGFkZXMsIGhvcml6b250YWwgPSBUUlVFKQpgYGAKCgpgYGB7cn0KI2luc3RhbGwucGFja2FnZXMoImx1YnJpZGF0ZSIpCmxpYnJhcnkobHVicmlkYXRlKQpCRDEyJERpYV9kZV9sYV9zZW1hbmEgPC13ZGF5KEJEMTIkRmVjaGEpCnN1bW1hcnkoQkQxMikKYGBgCgpgYGB7cn0KQkQxMiRTdWJ0b3RhbCA8LSBCRDEyJFByZWNpbyAqIEJEMTIkVW5pZGFkZXMKc3VtbWFyeShCRDEyKQoKQkQxMiRVdGlsaWRhZCA8LSBCRDEyJFByZWNpbyAtIEJEMTIkVWx0LkNvc3RvCnN1bW1hcnkoQkQxMikKYGBgCgojIyMgRXhwb3J0YXIgbGEgYmFzZSBkZSBkYXRvcyBsaW1waWEgCmBgYHtyfQpCRF9MaW1waWEgPC0gQkQxMgp3cml0ZS5jc3YoQkRfTGltcGlhLCBmaWxlID0gIlByYWN0aWNhXzJfYWJhcnJvdGVzX2xpbXBpYS5jc3YiLCByb3cubmFtZXMgPSBGQUxTRSkKCmBgYAoKYGBge3J9CiNNYXJrZXQgYmFza2V0IGFuYWx5c2lzCgojaW5zdGFsbC5wYWNrYWdlcygicGx5ciIpCmxpYnJhcnkocGx5cikKI2luc3RhbGwucGFja2FnZXMoIk1hdHJpeCIpCmxpYnJhcnkoTWF0cml4KQojaW5zdGFsbC5wYWNrYWdlcygiYXJ1bGVzIikKbGlicmFyeShhcnVsZXMpCiNpbnN0YWxsLnBhY2thZ2VzKCJhcnVsZXNWaXoiKQpsaWJyYXJ5KGFydWxlc1ZpeikKI2luc3RhbGwucGFja2FnZXMoImRhdGFzZXQiKQpsaWJyYXJ5KGRhdGFzZXQpCmBgYAoKIyMjIE9yZGVuYXIgZGUgbWVub3IgYSBtYXlvciBsb3MgdGlja2V0cwpgYGB7cn0KQkRfTGltcGlhIDwtIEJEX0xpbXBpYVtvcmRlcihCRF9MaW1waWEkRi5UaWNrZXQpLF0KaGVhZChCRF9MaW1waWEpCnRhaWwoQkRfTGltcGlhKQpgYGAKCiMjIyBHZW5lcmFyIGJhc2NrZXQKYGBge3J9CmJhc2tldCA8LWRkcGx5KEJEX0xpbXBpYSxjKCJGLlRpY2tldCIpLGZ1bmN0aW9uKEJEX0xpbXBpYSlwYXN0ZShCRF9MaW1waWEkTWFyY2EsIGNvbGxhcHNlPSIsIikpCmBgYAoKIyMjIEVsaW1pbmFyIG51bWVyb3MgZGUgdGlja2V0CmBgYHtyfQpiYXNrZXQkRi5UaWNrZXQgPC0gTlVMTApgYGAKCiMjIyBSZW5vbWJyYXIgZWwgbm9tYnJlIGRlIGxhIGNvbHVtbmEgCmBgYHtyfQpjb2xuYW1lcyhiYXNrZXQpPC0gYygiTWFyY2EiKQpgYGAKCiMjIyBFeHBvcnRhciBiYXNrZXQKYGBge3J9CndyaXRlLmNzdihiYXNrZXQsICJiYXNrZXQuY3N2IiwgcXVvdGU9RkFMU0UsIHJvdy5uYW1lcz1GQUxTRSkKCmBgYAoKIyMjIEltcG9ydGFyIHRyYW5zYWNjaW9uZXMKYGBge3J9CiNmaWxlLmNob29zZSgpCnRyIDwtIHJlYWQudHJhbnNhY3Rpb25zKCIvVXNlcnMvbWFyaWFuYWd1ZXZhcmEvYmFza2V0LmNzdiIsIGZvcm1hdCA9ICJiYXNrZXQiLCBzZXA9IiwiKQpgYGAKCiMjIyBHZW5lcmFyIHJlZ2xhcyBkZSBhc29jaWFjacOzbgpgYGB7cn0KcmVnbGFzLmFzb2NpYWNpb248LWFwcmlvcmkodHIscGFyYW1ldGVyPWxpc3Qoc3VwcD0wLjAwMSwgY29uZj0wLjIsIG1heGxlbj0xMCkpCnN1bW1hcnkocmVnbGFzLmFzb2NpYWNpb24pCmluc3BlY3QocmVnbGFzLmFzb2NpYWNpb24pCmBgYAoKIyMjIFZpc3VhbGl6YXIgcmVnbGFzIGRlIGFzb2NpYWNpb24gCgpgYGB7cn0KdG9wMTByZWdsYXMgPC0gaGVhZChyZWdsYXMuYXNvY2lhY2lvbiwgbj0xMCwgYnk9ImNvbmZpZGVuY2UiKQpwbG90KHRvcDEwcmVnbGFzLCBtZXRob2Q9ImdyYXBoIiwgZW5naW5lID0iaHRtbHdpZGdldCIpCmBgYAoKCgoKCg==