Importar la base de datos

file.choose()
## [1] "C:\\Users\\jimen\\Downloads\\Market Basket.Rmd"
bd <- read.csv("C:\\Users\\jimen\\Downloads\\Abarrotes_Ventas-4 (1).csv")

Entender la base de datos

summary (bd)
##  vcClaveTienda        DescGiro         Codigo.Barras            PLU        
##  Length:200620      Length:200620      Min.   :8.347e+05   Min.   : 1.00   
##  Class :character   Class :character   1st Qu.:7.501e+12   1st Qu.: 1.00   
##  Mode  :character   Mode  :character   Median :7.501e+12   Median : 1.00   
##                                        Mean   :5.950e+12   Mean   : 2.11   
##                                        3rd Qu.:7.501e+12   3rd Qu.: 1.00   
##                                        Max.   :1.750e+13   Max.   :30.00   
##                                                            NA's   :199183  
##     Fecha               Hora              Marca            Fabricante       
##  Length:200620      Length:200620      Length:200620      Length:200620     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##    Producto             Precio          Ult.Costo         Unidades     
##  Length:200620      Min.   :-147.00   Min.   :  0.38   Min.   : 0.200  
##  Class :character   1st Qu.:  11.00   1st Qu.:  8.46   1st Qu.: 1.000  
##  Mode  :character   Median :  16.00   Median : 12.31   Median : 1.000  
##                     Mean   :  19.42   Mean   : 15.31   Mean   : 1.262  
##                     3rd Qu.:  25.00   3rd Qu.: 19.23   3rd Qu.: 1.000  
##                     Max.   :1000.00   Max.   :769.23   Max.   :96.000  
##                                                                        
##     F.Ticket      NombreDepartamento NombreFamilia      NombreCategoria   
##  Min.   :     1   Length:200620      Length:200620      Length:200620     
##  1st Qu.: 33967   Class :character   Class :character   Class :character  
##  Median :105996   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :193994                                                           
##  3rd Qu.:383009                                                           
##  Max.   :450040                                                           
##                                                                           
##     Estado              Mts.2      Tipo.ubicación         Giro          
##  Length:200620      Min.   :47.0   Length:200620      Length:200620     
##  Class :character   1st Qu.:53.0   Class :character   Class :character  
##  Mode  :character   Median :60.0   Mode  :character   Mode  :character  
##                     Mean   :56.6                                        
##                     3rd Qu.:60.0                                        
##                     Max.   :62.0                                        
##                                                                         
##  Hora.inicio        Hora.cierre       
##  Length:200620      Length:200620     
##  Class :character   Class :character  
##  Mode  :character   Mode  :character  
##                                       
##                                       
##                                       
## 
#install.packages("dplyr")
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
#count(bd, vcClaveTienda, sort= TRUE)
#count(bd, DescGiro, sort= TRUE)
#count(bd, Marca, sort= TRUE)
#count(bd, Fabricante, sort= TRUE)
#count(bd, NombreDepartamento, sort= TRUE)
#count(bd, Producto, sort= TRUE)
#count(bd, NombreCategoria, sort= TRUE)
#count(bd, Estado, sort= TRUE)
#count(bd, Mts.2, sort= TRUE)
#count(bd, Tipo.ubicaciC3n, sort= TRUE)
#count(bd, Giro, sort= TRUE)
#count(bd, Hora.inicio, sort= TRUE)
#count(bd, Hora.cierre, sort= TRUE)

#install.packages ("tidyverse")
library(tidyverse)
## ── Attaching packages
## ───────────────────────────────────────
## tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6     ✔ purrr   0.3.4
## ✔ tibble  3.1.8     ✔ stringr 1.4.1
## ✔ tidyr   1.2.0     ✔ forcats 0.5.2
## ✔ readr   2.1.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
tibble(bd)
## # A tibble: 200,620 × 22
##    vcClaveTienda DescGiro Codig…¹   PLU Fecha Hora  Marca Fabri…² Produ…³ Precio
##    <chr>         <chr>      <dbl> <int> <chr> <chr> <chr> <chr>   <chr>    <dbl>
##  1 MX001         Abarrot… 7.50e12    NA 19/0… 8:16… NUTR… MEXILAC Nutri …   16  
##  2 MX001         Abarrot… 7.50e12    NA 19/0… 8:23… DAN … DANONE… DANUP …   14  
##  3 MX001         Abarrot… 7.50e12    NA 19/0… 8:24… BIMBO GRUPO … Rebana…    5  
##  4 MX001         Abarrot… 7.50e12    NA 19/0… 8:24… PEPSI PEPSI-… Pepsi …    8  
##  5 MX001         Abarrot… 7.50e12    NA 19/0… 8:26… BLAN… FABRIC… Deterg…   19.5
##  6 MX001         Abarrot… 7.50e12    NA 19/0… 8:26… FLASH ALEN    Flash …    9.5
##  7 MX001         Abarrot… 7.50e12    NA 19/0… 8:26… VARI… DANONE… Danone…   11  
##  8 MX001         Abarrot… 7.50e12    NA 19/0… 8:26… ZOTE  FABRIC… Jabon …    9.5
##  9 MX001         Abarrot… 7.51e12    NA 19/0… 8:26… ALWA… PROCTE… T Feme…   23.5
## 10 MX001         Abarrot… 3.22e10    NA 19/0… 15:2… JUMEX JUMEX   Jugo D…   12  
## # … with 200,610 more rows, 12 more variables: Ult.Costo <dbl>, Unidades <dbl>,
## #   F.Ticket <int>, NombreDepartamento <chr>, NombreFamilia <chr>,
## #   NombreCategoria <chr>, Estado <chr>, Mts.2 <int>, Tipo.ubicación <chr>,
## #   Giro <chr>, Hora.inicio <chr>, Hora.cierre <chr>, and abbreviated variable
## #   names ¹​Codigo.Barras, ²​Fabricante, ³​Producto
str(bd)
## 'data.frame':    200620 obs. of  22 variables:
##  $ vcClaveTienda     : chr  "MX001" "MX001" "MX001" "MX001" ...
##  $ DescGiro          : chr  "Abarrotes" "Abarrotes" "Abarrotes" "Abarrotes" ...
##  $ Codigo.Barras     : num  7.5e+12 7.5e+12 7.5e+12 7.5e+12 7.5e+12 ...
##  $ PLU               : int  NA NA NA NA NA NA NA NA NA NA ...
##  $ Fecha             : chr  "19/06/2020" "19/06/2020" "19/06/2020" "19/06/2020" ...
##  $ Hora              : chr  "8:16:21" "8:23:33" "8:24:33" "8:24:33" ...
##  $ Marca             : chr  "NUTRI LECHE" "DAN UP" "BIMBO" "PEPSI" ...
##  $ Fabricante        : chr  "MEXILAC" "DANONE DE MEXICO" "GRUPO BIMBO" "PEPSI-COLA MEXICANA" ...
##  $ Producto          : chr  "Nutri Leche 1 Litro" "DANUP STRAWBERRY P/BEBER 350GR NAL" "Rebanadas Bimbo 2Pz" "Pepsi N.R. 400Ml" ...
##  $ Precio            : num  16 14 5 8 19.5 9.5 11 9.5 23.5 12 ...
##  $ Ult.Costo         : num  12.3 14 5 8 15 ...
##  $ Unidades          : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ F.Ticket          : int  1 2 3 3 4 4 4 4 4 5 ...
##  $ NombreDepartamento: chr  "Abarrotes" "Abarrotes" "Abarrotes" "Abarrotes" ...
##  $ NombreFamilia     : chr  "Lacteos y Refrigerados" "Lacteos y Refrigerados" "Pan y Tortilla" "Bebidas" ...
##  $ NombreCategoria   : chr  "Leche" "Yogurt" "Pan Dulce Empaquetado" "Refrescos Plástico (N.R.)" ...
##  $ Estado            : chr  "Nuevo León" "Nuevo León" "Nuevo León" "Nuevo León" ...
##  $ Mts.2             : int  60 60 60 60 60 60 60 60 60 60 ...
##  $ Tipo.ubicación    : chr  "Esquina" "Esquina" "Esquina" "Esquina" ...
##  $ Giro              : chr  "Abarrotes" "Abarrotes" "Abarrotes" "Abarrotes" ...
##  $ Hora.inicio       : chr  "8:00" "8:00" "8:00" "8:00" ...
##  $ Hora.cierre       : chr  "22:00" "22:00" "22:00" "22:00" ...
head(bd)
##   vcClaveTienda  DescGiro Codigo.Barras PLU      Fecha    Hora
## 1         MX001 Abarrotes  7.501021e+12  NA 19/06/2020 8:16:21
## 2         MX001 Abarrotes  7.501032e+12  NA 19/06/2020 8:23:33
## 3         MX001 Abarrotes  7.501000e+12  NA 19/06/2020 8:24:33
## 4         MX001 Abarrotes  7.501031e+12  NA 19/06/2020 8:24:33
## 5         MX001 Abarrotes  7.501026e+12  NA 19/06/2020 8:26:28
## 6         MX001 Abarrotes  7.501025e+12  NA 19/06/2020 8:26:28
##                        Marca                 Fabricante
## 1                NUTRI LECHE                    MEXILAC
## 2                     DAN UP           DANONE DE MEXICO
## 3                      BIMBO                GRUPO BIMBO
## 4                      PEPSI        PEPSI-COLA MEXICANA
## 5 BLANCA NIEVES (DETERGENTE) FABRICA DE JABON LA CORONA
## 6                      FLASH                       ALEN
##                             Producto Precio Ult.Costo Unidades F.Ticket
## 1                Nutri Leche 1 Litro   16.0     12.31        1        1
## 2 DANUP STRAWBERRY P/BEBER 350GR NAL   14.0     14.00        1        2
## 3                Rebanadas Bimbo 2Pz    5.0      5.00        1        3
## 4                   Pepsi N.R. 400Ml    8.0      8.00        1        3
## 5      Detergente Blanca Nieves 500G   19.5     15.00        1        4
## 6      Flash Xtra Brisa Marina 500Ml    9.5      7.31        1        4
##   NombreDepartamento          NombreFamilia           NombreCategoria
## 1          Abarrotes Lacteos y Refrigerados                     Leche
## 2          Abarrotes Lacteos y Refrigerados                    Yogurt
## 3          Abarrotes         Pan y Tortilla     Pan Dulce Empaquetado
## 4          Abarrotes                Bebidas Refrescos Plástico (N.R.)
## 5          Abarrotes     Limpieza del Hogar                Lavandería
## 6          Abarrotes     Limpieza del Hogar      Limpiadores Líquidos
##       Estado Mts.2 Tipo.ubicación      Giro Hora.inicio Hora.cierre
## 1 Nuevo León    60        Esquina Abarrotes        8:00       22:00
## 2 Nuevo León    60        Esquina Abarrotes        8:00       22:00
## 3 Nuevo León    60        Esquina Abarrotes        8:00       22:00
## 4 Nuevo León    60        Esquina Abarrotes        8:00       22:00
## 5 Nuevo León    60        Esquina Abarrotes        8:00       22:00
## 6 Nuevo León    60        Esquina Abarrotes        8:00       22:00
head(bd,n=7)
##   vcClaveTienda  DescGiro Codigo.Barras PLU      Fecha    Hora
## 1         MX001 Abarrotes  7.501021e+12  NA 19/06/2020 8:16:21
## 2         MX001 Abarrotes  7.501032e+12  NA 19/06/2020 8:23:33
## 3         MX001 Abarrotes  7.501000e+12  NA 19/06/2020 8:24:33
## 4         MX001 Abarrotes  7.501031e+12  NA 19/06/2020 8:24:33
## 5         MX001 Abarrotes  7.501026e+12  NA 19/06/2020 8:26:28
## 6         MX001 Abarrotes  7.501025e+12  NA 19/06/2020 8:26:28
## 7         MX001 Abarrotes  7.501032e+12  NA 19/06/2020 8:26:28
##                        Marca                 Fabricante
## 1                NUTRI LECHE                    MEXILAC
## 2                     DAN UP           DANONE DE MEXICO
## 3                      BIMBO                GRUPO BIMBO
## 4                      PEPSI        PEPSI-COLA MEXICANA
## 5 BLANCA NIEVES (DETERGENTE) FABRICA DE JABON LA CORONA
## 6                      FLASH                       ALEN
## 7              VARIOS DANONE           DANONE DE MEXICO
##                              Producto Precio Ult.Costo Unidades F.Ticket
## 1                 Nutri Leche 1 Litro   16.0     12.31        1        1
## 2  DANUP STRAWBERRY P/BEBER 350GR NAL   14.0     14.00        1        2
## 3                 Rebanadas Bimbo 2Pz    5.0      5.00        1        3
## 4                    Pepsi N.R. 400Ml    8.0      8.00        1        3
## 5       Detergente Blanca Nieves 500G   19.5     15.00        1        4
## 6       Flash Xtra Brisa Marina 500Ml    9.5      7.31        1        4
## 7 Danone Bipack Fresa Chocoarroz 130G   11.0     11.00        1        4
##   NombreDepartamento          NombreFamilia           NombreCategoria
## 1          Abarrotes Lacteos y Refrigerados                     Leche
## 2          Abarrotes Lacteos y Refrigerados                    Yogurt
## 3          Abarrotes         Pan y Tortilla     Pan Dulce Empaquetado
## 4          Abarrotes                Bebidas Refrescos Plástico (N.R.)
## 5          Abarrotes     Limpieza del Hogar                Lavandería
## 6          Abarrotes     Limpieza del Hogar      Limpiadores Líquidos
## 7          Abarrotes Lacteos y Refrigerados      Postres Refrigerados
##       Estado Mts.2 Tipo.ubicación      Giro Hora.inicio Hora.cierre
## 1 Nuevo León    60        Esquina Abarrotes        8:00       22:00
## 2 Nuevo León    60        Esquina Abarrotes        8:00       22:00
## 3 Nuevo León    60        Esquina Abarrotes        8:00       22:00
## 4 Nuevo León    60        Esquina Abarrotes        8:00       22:00
## 5 Nuevo León    60        Esquina Abarrotes        8:00       22:00
## 6 Nuevo León    60        Esquina Abarrotes        8:00       22:00
## 7 Nuevo León    60        Esquina Abarrotes        8:00       22:00
tail(bd)
##        vcClaveTienda DescGiro Codigo.Barras PLU      Fecha     Hora
## 200615         MX005 Depósito   7.62221e+12  NA 12/07/2020  1:08:25
## 200616         MX005 Depósito   7.62221e+12  NA 23/10/2020 22:17:37
## 200617         MX005 Depósito   7.62221e+12  NA 10/10/2020 20:30:20
## 200618         MX005 Depósito   7.62221e+12  NA 10/10/2020 22:40:43
## 200619         MX005 Depósito   7.62221e+12  NA 27/06/2020 22:30:19
## 200620         MX005 Depósito   7.62221e+12  NA 26/06/2020 23:43:34
##                    Marca    Fabricante                          Producto Precio
## 200615 TRIDENT XTRA CARE CADBURY ADAMS Trident Xtracare Freshmint 16.32G      9
## 200616 TRIDENT XTRA CARE CADBURY ADAMS Trident Xtracare Freshmint 16.32G      9
## 200617 TRIDENT XTRA CARE CADBURY ADAMS Trident Xtracare Freshmint 16.32G      9
## 200618 TRIDENT XTRA CARE CADBURY ADAMS Trident Xtracare Freshmint 16.32G      9
## 200619 TRIDENT XTRA CARE CADBURY ADAMS Trident Xtracare Freshmint 16.32G      9
## 200620 TRIDENT XTRA CARE CADBURY ADAMS Trident Xtracare Freshmint 16.32G      9
##        Ult.Costo Unidades F.Ticket NombreDepartamento NombreFamilia
## 200615      6.92        1   103100          Abarrotes      Dulcería
## 200616      6.92        1   116598          Abarrotes      Dulcería
## 200617      6.92        1   114886          Abarrotes      Dulcería
## 200618      6.92        1   114955          Abarrotes      Dulcería
## 200619      6.92        1   101121          Abarrotes      Dulcería
## 200620      6.92        1   100879          Abarrotes      Dulcería
##        NombreCategoria       Estado Mts.2 Tipo.ubicación       Giro Hora.inicio
## 200615 Gomas de Mazcar Quintana Roo    58        Esquina Mini súper        8:00
## 200616 Gomas de Mazcar Quintana Roo    58        Esquina Mini súper        8:00
## 200617 Gomas de Mazcar Quintana Roo    58        Esquina Mini súper        8:00
## 200618 Gomas de Mazcar Quintana Roo    58        Esquina Mini súper        8:00
## 200619 Gomas de Mazcar Quintana Roo    58        Esquina Mini súper        8:00
## 200620 Gomas de Mazcar Quintana Roo    58        Esquina Mini súper        8:00
##        Hora.cierre
## 200615       21:00
## 200616       21:00
## 200617       21:00
## 200618       21:00
## 200619       21:00
## 200620       21:00
#install.packages("janitor")
library(janitor)
## 
## Attaching package: 'janitor'
## 
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test

Observaciones

1. Casi ningun registro cuenta con PLU.

2. Cambiar formato de fecha.

3. Cambiar formato de hora

4. Hay precios negativos.

5. Hay unidades menores a 1

Tecnicas para limpieza de datos

Tecnica 1 Remover valores irrelevantes

Eliminar columnas

bd1 <- bd

bd1 <- subset (bd1,select = -c (PLU, Codigo.Barras))

Eliminar renglones

bd2 <- bd1
bd2 <- bd2[bd2$precio > 0,]
summary (bd1)
##  vcClaveTienda        DescGiro            Fecha               Hora          
##  Length:200620      Length:200620      Length:200620      Length:200620     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##     Marca            Fabricante          Producto             Precio       
##  Length:200620      Length:200620      Length:200620      Min.   :-147.00  
##  Class :character   Class :character   Class :character   1st Qu.:  11.00  
##  Mode  :character   Mode  :character   Mode  :character   Median :  16.00  
##                                                           Mean   :  19.42  
##                                                           3rd Qu.:  25.00  
##                                                           Max.   :1000.00  
##    Ult.Costo         Unidades         F.Ticket      NombreDepartamento
##  Min.   :  0.38   Min.   : 0.200   Min.   :     1   Length:200620     
##  1st Qu.:  8.46   1st Qu.: 1.000   1st Qu.: 33967   Class :character  
##  Median : 12.31   Median : 1.000   Median :105996   Mode  :character  
##  Mean   : 15.31   Mean   : 1.262   Mean   :193994                     
##  3rd Qu.: 19.23   3rd Qu.: 1.000   3rd Qu.:383009                     
##  Max.   :769.23   Max.   :96.000   Max.   :450040                     
##  NombreFamilia      NombreCategoria       Estado              Mts.2     
##  Length:200620      Length:200620      Length:200620      Min.   :47.0  
##  Class :character   Class :character   Class :character   1st Qu.:53.0  
##  Mode  :character   Mode  :character   Mode  :character   Median :60.0  
##                                                           Mean   :56.6  
##                                                           3rd Qu.:60.0  
##                                                           Max.   :62.0  
##  Tipo.ubicación         Giro           Hora.inicio        Hora.cierre       
##  Length:200620      Length:200620      Length:200620      Length:200620     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
## 
summary (bd2)
##  vcClaveTienda        DescGiro            Fecha               Hora          
##  Length:0           Length:0           Length:0           Length:0          
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##     Marca            Fabricante          Producto             Precio   
##  Length:0           Length:0           Length:0           Min.   : NA  
##  Class :character   Class :character   Class :character   1st Qu.: NA  
##  Mode  :character   Mode  :character   Mode  :character   Median : NA  
##                                                           Mean   :NaN  
##                                                           3rd Qu.: NA  
##                                                           Max.   : NA  
##    Ult.Costo      Unidades      F.Ticket   NombreDepartamento
##  Min.   : NA   Min.   : NA   Min.   : NA   Length:0          
##  1st Qu.: NA   1st Qu.: NA   1st Qu.: NA   Class :character  
##  Median : NA   Median : NA   Median : NA   Mode  :character  
##  Mean   :NaN   Mean   :NaN   Mean   :NaN                     
##  3rd Qu.: NA   3rd Qu.: NA   3rd Qu.: NA                     
##  Max.   : NA   Max.   : NA   Max.   : NA                     
##  NombreFamilia      NombreCategoria       Estado              Mts.2    
##  Length:0           Length:0           Length:0           Min.   : NA  
##  Class :character   Class :character   Class :character   1st Qu.: NA  
##  Mode  :character   Mode  :character   Mode  :character   Median : NA  
##                                                           Mean   :NaN  
##                                                           3rd Qu.: NA  
##                                                           Max.   : NA  
##  Tipo.ubicación         Giro           Hora.inicio        Hora.cierre       
##  Length:0           Length:0           Length:0           Length:0          
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
## 

Esto no lo usaremos, pondremos precios negativos como absolutos

Tecnica 2. Remover valores duplicados

Cuantos renglones duplicaados tenemos?

bd1 [duplicated (bd1),]
##  [1] vcClaveTienda      DescGiro           Fecha              Hora              
##  [5] Marca              Fabricante         Producto           Precio            
##  [9] Ult.Costo          Unidades           F.Ticket           NombreDepartamento
## [13] NombreFamilia      NombreCategoria    Estado             Mts.2             
## [17] Tipo.ubicación     Giro               Hora.inicio        Hora.cierre       
## <0 rows> (or 0-length row.names)
sum(duplicated(bd1))
## [1] 0

Eliminar renglones duplicados

bd3 <- bd1
library(dplyr)
bd3 <- distinct (bd3)

Tecnica 3. Errores tipograficos y errores similares

Precios en absoluto

bd4 <- bd3
bd4$Precio <- abs(bd4$Precio)
summary(bd4)
##  vcClaveTienda        DescGiro            Fecha               Hora          
##  Length:200620      Length:200620      Length:200620      Length:200620     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##     Marca            Fabricante          Producto             Precio       
##  Length:200620      Length:200620      Length:200620      Min.   :   0.50  
##  Class :character   Class :character   Class :character   1st Qu.:  11.00  
##  Mode  :character   Mode  :character   Mode  :character   Median :  16.00  
##                                                           Mean   :  19.45  
##                                                           3rd Qu.:  25.00  
##                                                           Max.   :1000.00  
##    Ult.Costo         Unidades         F.Ticket      NombreDepartamento
##  Min.   :  0.38   Min.   : 0.200   Min.   :     1   Length:200620     
##  1st Qu.:  8.46   1st Qu.: 1.000   1st Qu.: 33967   Class :character  
##  Median : 12.31   Median : 1.000   Median :105996   Mode  :character  
##  Mean   : 15.31   Mean   : 1.262   Mean   :193994                     
##  3rd Qu.: 19.23   3rd Qu.: 1.000   3rd Qu.:383009                     
##  Max.   :769.23   Max.   :96.000   Max.   :450040                     
##  NombreFamilia      NombreCategoria       Estado              Mts.2     
##  Length:200620      Length:200620      Length:200620      Min.   :47.0  
##  Class :character   Class :character   Class :character   1st Qu.:53.0  
##  Mode  :character   Mode  :character   Mode  :character   Median :60.0  
##                                                           Mean   :56.6  
##                                                           3rd Qu.:60.0  
##                                                           Max.   :62.0  
##  Tipo.ubicación         Giro           Hora.inicio        Hora.cierre       
##  Length:200620      Length:200620      Length:200620      Length:200620     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
## 

Cantidades en enteros

bd5 <- bd4
bd4$Unidades <- ceiling (bd5$Unidades)
summary(bd5)
##  vcClaveTienda        DescGiro            Fecha               Hora          
##  Length:200620      Length:200620      Length:200620      Length:200620     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##     Marca            Fabricante          Producto             Precio       
##  Length:200620      Length:200620      Length:200620      Min.   :   0.50  
##  Class :character   Class :character   Class :character   1st Qu.:  11.00  
##  Mode  :character   Mode  :character   Mode  :character   Median :  16.00  
##                                                           Mean   :  19.45  
##                                                           3rd Qu.:  25.00  
##                                                           Max.   :1000.00  
##    Ult.Costo         Unidades         F.Ticket      NombreDepartamento
##  Min.   :  0.38   Min.   : 0.200   Min.   :     1   Length:200620     
##  1st Qu.:  8.46   1st Qu.: 1.000   1st Qu.: 33967   Class :character  
##  Median : 12.31   Median : 1.000   Median :105996   Mode  :character  
##  Mean   : 15.31   Mean   : 1.262   Mean   :193994                     
##  3rd Qu.: 19.23   3rd Qu.: 1.000   3rd Qu.:383009                     
##  Max.   :769.23   Max.   :96.000   Max.   :450040                     
##  NombreFamilia      NombreCategoria       Estado              Mts.2     
##  Length:200620      Length:200620      Length:200620      Min.   :47.0  
##  Class :character   Class :character   Class :character   1st Qu.:53.0  
##  Mode  :character   Mode  :character   Mode  :character   Median :60.0  
##                                                           Mean   :56.6  
##                                                           3rd Qu.:60.0  
##                                                           Max.   :62.0  
##  Tipo.ubicación         Giro           Hora.inicio        Hora.cierre       
##  Length:200620      Length:200620      Length:200620      Length:200620     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
## 

Tecnica 4. Convertir tipos de datos

Convertir de caracter a fecha

bd6 <- bd5
bd6$fecha <- as.Date(bd6$Fecha, format = "%d/%m/%Y")
tibble(bd6)
## # A tibble: 200,620 × 21
##    vcClaveTie…¹ DescG…² Fecha Hora  Marca Fabri…³ Produ…⁴ Precio Ult.C…⁵ Unida…⁶
##    <chr>        <chr>   <chr> <chr> <chr> <chr>   <chr>    <dbl>   <dbl>   <dbl>
##  1 MX001        Abarro… 19/0… 8:16… NUTR… MEXILAC Nutri …   16     12.3        1
##  2 MX001        Abarro… 19/0… 8:23… DAN … DANONE… DANUP …   14     14          1
##  3 MX001        Abarro… 19/0… 8:24… BIMBO GRUPO … Rebana…    5      5          1
##  4 MX001        Abarro… 19/0… 8:24… PEPSI PEPSI-… Pepsi …    8      8          1
##  5 MX001        Abarro… 19/0… 8:26… BLAN… FABRIC… Deterg…   19.5   15          1
##  6 MX001        Abarro… 19/0… 8:26… FLASH ALEN    Flash …    9.5    7.31       1
##  7 MX001        Abarro… 19/0… 8:26… VARI… DANONE… Danone…   11     11          1
##  8 MX001        Abarro… 19/0… 8:26… ZOTE  FABRIC… Jabon …    9.5    7.31       1
##  9 MX001        Abarro… 19/0… 8:26… ALWA… PROCTE… T Feme…   23.5   18.1        1
## 10 MX001        Abarro… 19/0… 15:2… JUMEX JUMEX   Jugo D…   12     12          1
## # … with 200,610 more rows, 11 more variables: F.Ticket <int>,
## #   NombreDepartamento <chr>, NombreFamilia <chr>, NombreCategoria <chr>,
## #   Estado <chr>, Mts.2 <int>, Tipo.ubicación <chr>, Giro <chr>,
## #   Hora.inicio <chr>, Hora.cierre <chr>, fecha <date>, and abbreviated
## #   variable names ¹​vcClaveTienda, ²​DescGiro, ³​Fabricante, ⁴​Producto,
## #   ⁵​Ult.Costo, ⁶​Unidades

Convertir a caracter entero

bd7 <- bd6
bd7$Hora <- substr(bd7$Hora, start = 1, stop = 2)
tibble(bd7)
## # A tibble: 200,620 × 21
##    vcClaveTie…¹ DescG…² Fecha Hora  Marca Fabri…³ Produ…⁴ Precio Ult.C…⁵ Unida…⁶
##    <chr>        <chr>   <chr> <chr> <chr> <chr>   <chr>    <dbl>   <dbl>   <dbl>
##  1 MX001        Abarro… 19/0… 8:    NUTR… MEXILAC Nutri …   16     12.3        1
##  2 MX001        Abarro… 19/0… 8:    DAN … DANONE… DANUP …   14     14          1
##  3 MX001        Abarro… 19/0… 8:    BIMBO GRUPO … Rebana…    5      5          1
##  4 MX001        Abarro… 19/0… 8:    PEPSI PEPSI-… Pepsi …    8      8          1
##  5 MX001        Abarro… 19/0… 8:    BLAN… FABRIC… Deterg…   19.5   15          1
##  6 MX001        Abarro… 19/0… 8:    FLASH ALEN    Flash …    9.5    7.31       1
##  7 MX001        Abarro… 19/0… 8:    VARI… DANONE… Danone…   11     11          1
##  8 MX001        Abarro… 19/0… 8:    ZOTE  FABRIC… Jabon …    9.5    7.31       1
##  9 MX001        Abarro… 19/0… 8:    ALWA… PROCTE… T Feme…   23.5   18.1        1
## 10 MX001        Abarro… 19/0… 15    JUMEX JUMEX   Jugo D…   12     12          1
## # … with 200,610 more rows, 11 more variables: F.Ticket <int>,
## #   NombreDepartamento <chr>, NombreFamilia <chr>, NombreCategoria <chr>,
## #   Estado <chr>, Mts.2 <int>, Tipo.ubicación <chr>, Giro <chr>,
## #   Hora.inicio <chr>, Hora.cierre <chr>, fecha <date>, and abbreviated
## #   variable names ¹​vcClaveTienda, ²​DescGiro, ³​Fabricante, ⁴​Producto,
## #   ⁵​Ult.Costo, ⁶​Unidades
bd7$Hora <- as.integer(bd7$Hora)
## Warning: NAs introduced by coercion
str(bd7)
## 'data.frame':    200620 obs. of  21 variables:
##  $ vcClaveTienda     : chr  "MX001" "MX001" "MX001" "MX001" ...
##  $ DescGiro          : chr  "Abarrotes" "Abarrotes" "Abarrotes" "Abarrotes" ...
##  $ Fecha             : chr  "19/06/2020" "19/06/2020" "19/06/2020" "19/06/2020" ...
##  $ Hora              : int  NA NA NA NA NA NA NA NA NA 15 ...
##  $ Marca             : chr  "NUTRI LECHE" "DAN UP" "BIMBO" "PEPSI" ...
##  $ Fabricante        : chr  "MEXILAC" "DANONE DE MEXICO" "GRUPO BIMBO" "PEPSI-COLA MEXICANA" ...
##  $ Producto          : chr  "Nutri Leche 1 Litro" "DANUP STRAWBERRY P/BEBER 350GR NAL" "Rebanadas Bimbo 2Pz" "Pepsi N.R. 400Ml" ...
##  $ Precio            : num  16 14 5 8 19.5 9.5 11 9.5 23.5 12 ...
##  $ Ult.Costo         : num  12.3 14 5 8 15 ...
##  $ Unidades          : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ F.Ticket          : int  1 2 3 3 4 4 4 4 4 5 ...
##  $ NombreDepartamento: chr  "Abarrotes" "Abarrotes" "Abarrotes" "Abarrotes" ...
##  $ NombreFamilia     : chr  "Lacteos y Refrigerados" "Lacteos y Refrigerados" "Pan y Tortilla" "Bebidas" ...
##  $ NombreCategoria   : chr  "Leche" "Yogurt" "Pan Dulce Empaquetado" "Refrescos Plástico (N.R.)" ...
##  $ Estado            : chr  "Nuevo León" "Nuevo León" "Nuevo León" "Nuevo León" ...
##  $ Mts.2             : int  60 60 60 60 60 60 60 60 60 60 ...
##  $ Tipo.ubicación    : chr  "Esquina" "Esquina" "Esquina" "Esquina" ...
##  $ Giro              : chr  "Abarrotes" "Abarrotes" "Abarrotes" "Abarrotes" ...
##  $ Hora.inicio       : chr  "8:00" "8:00" "8:00" "8:00" ...
##  $ Hora.cierre       : chr  "22:00" "22:00" "22:00" "22:00" ...
##  $ fecha             : Date, format: "2020-06-19" "2020-06-19" ...

Tecnica #5 Valores Faltantes

Cuantos Na tengo en la base de datos?

sum(is.na(bd7))
## [1] 16440
sum(is.na(bd))
## [1] 199183

Cuantos NA tengo por variable

sapply(bd7,function(x) sum (is.na(x)))
##      vcClaveTienda           DescGiro              Fecha               Hora 
##                  0                  0                  0              16440 
##              Marca         Fabricante           Producto             Precio 
##                  0                  0                  0                  0 
##          Ult.Costo           Unidades           F.Ticket NombreDepartamento 
##                  0                  0                  0                  0 
##      NombreFamilia    NombreCategoria             Estado              Mts.2 
##                  0                  0                  0                  0 
##     Tipo.ubicación               Giro        Hora.inicio        Hora.cierre 
##                  0                  0                  0                  0 
##              fecha 
##                  0
sapply(bd, function(x) sum(is.na(x)))
##      vcClaveTienda           DescGiro      Codigo.Barras                PLU 
##                  0                  0                  0             199183 
##              Fecha               Hora              Marca         Fabricante 
##                  0                  0                  0                  0 
##           Producto             Precio          Ult.Costo           Unidades 
##                  0                  0                  0                  0 
##           F.Ticket NombreDepartamento      NombreFamilia    NombreCategoria 
##                  0                  0                  0                  0 
##             Estado              Mts.2     Tipo.ubicación               Giro 
##                  0                  0                  0                  0 
##        Hora.inicio        Hora.cierre 
##                  0                  0

Borrar todos los registros Na DE UNA TABLA

bd8 <- bd
bd8 <- na.omit(bd8)
summary(bd8)
##  vcClaveTienda        DescGiro         Codigo.Barras            PLU        
##  Length:1437        Length:1437        Min.   :6.750e+08   Min.   : 1.000  
##  Class :character   Class :character   1st Qu.:6.750e+08   1st Qu.: 1.000  
##  Mode  :character   Mode  :character   Median :6.750e+08   Median : 1.000  
##                                        Mean   :2.616e+11   Mean   : 2.112  
##                                        3rd Qu.:6.750e+08   3rd Qu.: 1.000  
##                                        Max.   :7.501e+12   Max.   :30.000  
##     Fecha               Hora              Marca            Fabricante       
##  Length:1437        Length:1437        Length:1437        Length:1437       
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##    Producto             Precio        Ult.Costo        Unidades    
##  Length:1437        Min.   :30.00   Min.   : 1.00   Min.   :1.000  
##  Class :character   1st Qu.:90.00   1st Qu.:64.62   1st Qu.:1.000  
##  Mode  :character   Median :90.00   Median :64.62   Median :1.000  
##                     Mean   :87.94   Mean   :56.65   Mean   :1.124  
##                     3rd Qu.:90.00   3rd Qu.:64.62   3rd Qu.:1.000  
##                     Max.   :90.00   Max.   :64.62   Max.   :7.000  
##     F.Ticket      NombreDepartamento NombreFamilia      NombreCategoria   
##  Min.   :   772   Length:1437        Length:1437        Length:1437       
##  1st Qu.: 99955   Class :character   Class :character   Class :character  
##  Median :102493   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :100595                                                           
##  3rd Qu.:106546                                                           
##  Max.   :118356                                                           
##     Estado              Mts.2       Tipo.ubicación         Giro          
##  Length:1437        Min.   :58.00   Length:1437        Length:1437       
##  Class :character   1st Qu.:58.00   Class :character   Class :character  
##  Mode  :character   Median :58.00   Mode  :character   Mode  :character  
##                     Mean   :58.07                                        
##                     3rd Qu.:58.00                                        
##                     Max.   :60.00                                        
##  Hora.inicio        Hora.cierre       
##  Length:1437        Length:1437       
##  Class :character   Class :character  
##  Mode  :character   Mode  :character  
##                                       
##                                       
## 

Reemplazar NA con 0

bd9 <- bd
bd9[is.na(bd9)]<- 0
summary(bd9)
##  vcClaveTienda        DescGiro         Codigo.Barras            PLU          
##  Length:200620      Length:200620      Min.   :8.347e+05   Min.   : 0.00000  
##  Class :character   Class :character   1st Qu.:7.501e+12   1st Qu.: 0.00000  
##  Mode  :character   Mode  :character   Median :7.501e+12   Median : 0.00000  
##                                        Mean   :5.950e+12   Mean   : 0.01513  
##                                        3rd Qu.:7.501e+12   3rd Qu.: 0.00000  
##                                        Max.   :1.750e+13   Max.   :30.00000  
##     Fecha               Hora              Marca            Fabricante       
##  Length:200620      Length:200620      Length:200620      Length:200620     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##    Producto             Precio          Ult.Costo         Unidades     
##  Length:200620      Min.   :-147.00   Min.   :  0.38   Min.   : 0.200  
##  Class :character   1st Qu.:  11.00   1st Qu.:  8.46   1st Qu.: 1.000  
##  Mode  :character   Median :  16.00   Median : 12.31   Median : 1.000  
##                     Mean   :  19.42   Mean   : 15.31   Mean   : 1.262  
##                     3rd Qu.:  25.00   3rd Qu.: 19.23   3rd Qu.: 1.000  
##                     Max.   :1000.00   Max.   :769.23   Max.   :96.000  
##     F.Ticket      NombreDepartamento NombreFamilia      NombreCategoria   
##  Min.   :     1   Length:200620      Length:200620      Length:200620     
##  1st Qu.: 33967   Class :character   Class :character   Class :character  
##  Median :105996   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :193994                                                           
##  3rd Qu.:383009                                                           
##  Max.   :450040                                                           
##     Estado              Mts.2      Tipo.ubicación         Giro          
##  Length:200620      Min.   :47.0   Length:200620      Length:200620     
##  Class :character   1st Qu.:53.0   Class :character   Class :character  
##  Mode  :character   Median :60.0   Mode  :character   Mode  :character  
##                     Mean   :56.6                                        
##                     3rd Qu.:60.0                                        
##                     Max.   :62.0                                        
##  Hora.inicio        Hora.cierre       
##  Length:200620      Length:200620     
##  Class :character   Class :character  
##  Mode  :character   Mode  :character  
##                                       
##                                       
## 

Reemplazar NA con El Promedio

bd10 <- bd
bd10$PLU[is.na(bd10$PLU)]<-mean(bd10$PLU,na.rm = TRUE)
summary(bd10)
##  vcClaveTienda        DescGiro         Codigo.Barras            PLU        
##  Length:200620      Length:200620      Min.   :8.347e+05   Min.   : 1.000  
##  Class :character   Class :character   1st Qu.:7.501e+12   1st Qu.: 2.112  
##  Mode  :character   Mode  :character   Median :7.501e+12   Median : 2.112  
##                                        Mean   :5.950e+12   Mean   : 2.112  
##                                        3rd Qu.:7.501e+12   3rd Qu.: 2.112  
##                                        Max.   :1.750e+13   Max.   :30.000  
##     Fecha               Hora              Marca            Fabricante       
##  Length:200620      Length:200620      Length:200620      Length:200620     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##    Producto             Precio          Ult.Costo         Unidades     
##  Length:200620      Min.   :-147.00   Min.   :  0.38   Min.   : 0.200  
##  Class :character   1st Qu.:  11.00   1st Qu.:  8.46   1st Qu.: 1.000  
##  Mode  :character   Median :  16.00   Median : 12.31   Median : 1.000  
##                     Mean   :  19.42   Mean   : 15.31   Mean   : 1.262  
##                     3rd Qu.:  25.00   3rd Qu.: 19.23   3rd Qu.: 1.000  
##                     Max.   :1000.00   Max.   :769.23   Max.   :96.000  
##     F.Ticket      NombreDepartamento NombreFamilia      NombreCategoria   
##  Min.   :     1   Length:200620      Length:200620      Length:200620     
##  1st Qu.: 33967   Class :character   Class :character   Class :character  
##  Median :105996   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :193994                                                           
##  3rd Qu.:383009                                                           
##  Max.   :450040                                                           
##     Estado              Mts.2      Tipo.ubicación         Giro          
##  Length:200620      Min.   :47.0   Length:200620      Length:200620     
##  Class :character   1st Qu.:53.0   Class :character   Class :character  
##  Mode  :character   Median :60.0   Mode  :character   Mode  :character  
##                     Mean   :56.6                                        
##                     3rd Qu.:60.0                                        
##                     Max.   :62.0                                        
##  Hora.inicio        Hora.cierre       
##  Length:200620      Length:200620     
##  Class :character   Class :character  
##  Mode  :character   Mode  :character  
##                                       
##                                       
## 

Reemplazar negativos con cero

bd11 <- bd
bd11 [bd11 < 0] <- 0
summary(bd11)
##  vcClaveTienda        DescGiro         Codigo.Barras            PLU        
##  Length:200620      Length:200620      Min.   :8.347e+05   Min.   : 1.00   
##  Class :character   Class :character   1st Qu.:7.501e+12   1st Qu.: 1.00   
##  Mode  :character   Mode  :character   Median :7.501e+12   Median : 1.00   
##                                        Mean   :5.950e+12   Mean   : 2.11   
##                                        3rd Qu.:7.501e+12   3rd Qu.: 1.00   
##                                        Max.   :1.750e+13   Max.   :30.00   
##                                                            NA's   :199183  
##     Fecha               Hora              Marca            Fabricante       
##  Length:200620      Length:200620      Length:200620      Length:200620     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##    Producto             Precio          Ult.Costo         Unidades     
##  Length:200620      Min.   :   0.00   Min.   :  0.38   Min.   : 0.200  
##  Class :character   1st Qu.:  11.00   1st Qu.:  8.46   1st Qu.: 1.000  
##  Mode  :character   Median :  16.00   Median : 12.31   Median : 1.000  
##                     Mean   :  19.44   Mean   : 15.31   Mean   : 1.262  
##                     3rd Qu.:  25.00   3rd Qu.: 19.23   3rd Qu.: 1.000  
##                     Max.   :1000.00   Max.   :769.23   Max.   :96.000  
##                                                                        
##     F.Ticket      NombreDepartamento NombreFamilia      NombreCategoria   
##  Min.   :     1   Length:200620      Length:200620      Length:200620     
##  1st Qu.: 33967   Class :character   Class :character   Class :character  
##  Median :105996   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :193994                                                           
##  3rd Qu.:383009                                                           
##  Max.   :450040                                                           
##                                                                           
##     Estado              Mts.2      Tipo.ubicación         Giro          
##  Length:200620      Min.   :47.0   Length:200620      Length:200620     
##  Class :character   1st Qu.:53.0   Class :character   Class :character  
##  Mode  :character   Median :60.0   Mode  :character   Mode  :character  
##                     Mean   :56.6                                        
##                     3rd Qu.:60.0                                        
##                     Max.   :62.0                                        
##                                                                         
##  Hora.inicio        Hora.cierre       
##  Length:200620      Length:200620     
##  Class :character   Class :character  
##  Mode  :character   Mode  :character  
##                                       
##                                       
##                                       
## 

Tecnica 6 Metodo estadistico

bd12 <- bd7
boxplot(bd12$Precio, horizontal = TRUE)

boxplot(bd12$Unidades, horizontal = TRUE)

Agregar columnas

#install.packages("lubridate")
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
bd12$Dia_de_la_semana <- wday (bd12$Fecha)
summary(bd12)
##  vcClaveTienda        DescGiro            Fecha                Hora      
##  Length:200620      Length:200620      Length:200620      Min.   :10.00  
##  Class :character   Class :character   Class :character   1st Qu.:14.00  
##  Mode  :character   Mode  :character   Mode  :character   Median :18.00  
##                                                           Mean   :17.12  
##                                                           3rd Qu.:20.00  
##                                                           Max.   :23.00  
##                                                           NA's   :16440  
##     Marca            Fabricante          Producto             Precio       
##  Length:200620      Length:200620      Length:200620      Min.   :   0.50  
##  Class :character   Class :character   Class :character   1st Qu.:  11.00  
##  Mode  :character   Mode  :character   Mode  :character   Median :  16.00  
##                                                           Mean   :  19.45  
##                                                           3rd Qu.:  25.00  
##                                                           Max.   :1000.00  
##                                                                            
##    Ult.Costo         Unidades         F.Ticket      NombreDepartamento
##  Min.   :  0.38   Min.   : 0.200   Min.   :     1   Length:200620     
##  1st Qu.:  8.46   1st Qu.: 1.000   1st Qu.: 33967   Class :character  
##  Median : 12.31   Median : 1.000   Median :105996   Mode  :character  
##  Mean   : 15.31   Mean   : 1.262   Mean   :193994                     
##  3rd Qu.: 19.23   3rd Qu.: 1.000   3rd Qu.:383009                     
##  Max.   :769.23   Max.   :96.000   Max.   :450040                     
##                                                                       
##  NombreFamilia      NombreCategoria       Estado              Mts.2     
##  Length:200620      Length:200620      Length:200620      Min.   :47.0  
##  Class :character   Class :character   Class :character   1st Qu.:53.0  
##  Mode  :character   Mode  :character   Mode  :character   Median :60.0  
##                                                           Mean   :56.6  
##                                                           3rd Qu.:60.0  
##                                                           Max.   :62.0  
##                                                                         
##  Tipo.ubicación         Giro           Hora.inicio        Hora.cierre       
##  Length:200620      Length:200620      Length:200620      Length:200620     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##      fecha            Dia_de_la_semana
##  Min.   :2020-05-01   Min.   :1.000   
##  1st Qu.:2020-06-06   1st Qu.:2.000   
##  Median :2020-07-11   Median :4.000   
##  Mean   :2020-07-18   Mean   :3.919   
##  3rd Qu.:2020-08-29   3rd Qu.:6.000   
##  Max.   :2020-11-11   Max.   :7.000   
## 
bd12$subtotal <- bd12$Precio * bd12$Unidades
summary(bd12)
##  vcClaveTienda        DescGiro            Fecha                Hora      
##  Length:200620      Length:200620      Length:200620      Min.   :10.00  
##  Class :character   Class :character   Class :character   1st Qu.:14.00  
##  Mode  :character   Mode  :character   Mode  :character   Median :18.00  
##                                                           Mean   :17.12  
##                                                           3rd Qu.:20.00  
##                                                           Max.   :23.00  
##                                                           NA's   :16440  
##     Marca            Fabricante          Producto             Precio       
##  Length:200620      Length:200620      Length:200620      Min.   :   0.50  
##  Class :character   Class :character   Class :character   1st Qu.:  11.00  
##  Mode  :character   Mode  :character   Mode  :character   Median :  16.00  
##                                                           Mean   :  19.45  
##                                                           3rd Qu.:  25.00  
##                                                           Max.   :1000.00  
##                                                                            
##    Ult.Costo         Unidades         F.Ticket      NombreDepartamento
##  Min.   :  0.38   Min.   : 0.200   Min.   :     1   Length:200620     
##  1st Qu.:  8.46   1st Qu.: 1.000   1st Qu.: 33967   Class :character  
##  Median : 12.31   Median : 1.000   Median :105996   Mode  :character  
##  Mean   : 15.31   Mean   : 1.262   Mean   :193994                     
##  3rd Qu.: 19.23   3rd Qu.: 1.000   3rd Qu.:383009                     
##  Max.   :769.23   Max.   :96.000   Max.   :450040                     
##                                                                       
##  NombreFamilia      NombreCategoria       Estado              Mts.2     
##  Length:200620      Length:200620      Length:200620      Min.   :47.0  
##  Class :character   Class :character   Class :character   1st Qu.:53.0  
##  Mode  :character   Mode  :character   Mode  :character   Median :60.0  
##                                                           Mean   :56.6  
##                                                           3rd Qu.:60.0  
##                                                           Max.   :62.0  
##                                                                         
##  Tipo.ubicación         Giro           Hora.inicio        Hora.cierre       
##  Length:200620      Length:200620      Length:200620      Length:200620     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##      fecha            Dia_de_la_semana    subtotal      
##  Min.   :2020-05-01   Min.   :1.000    Min.   :   1.00  
##  1st Qu.:2020-06-06   1st Qu.:2.000    1st Qu.:  12.00  
##  Median :2020-07-11   Median :4.000    Median :  18.00  
##  Mean   :2020-07-18   Mean   :3.919    Mean   :  24.33  
##  3rd Qu.:2020-08-29   3rd Qu.:6.000    3rd Qu.:  27.00  
##  Max.   :2020-11-11   Max.   :7.000    Max.   :2496.00  
## 
bd12$utilidad <- bd12$Precio - bd12$Ult.Costo
summary (bd12)
##  vcClaveTienda        DescGiro            Fecha                Hora      
##  Length:200620      Length:200620      Length:200620      Min.   :10.00  
##  Class :character   Class :character   Class :character   1st Qu.:14.00  
##  Mode  :character   Mode  :character   Mode  :character   Median :18.00  
##                                                           Mean   :17.12  
##                                                           3rd Qu.:20.00  
##                                                           Max.   :23.00  
##                                                           NA's   :16440  
##     Marca            Fabricante          Producto             Precio       
##  Length:200620      Length:200620      Length:200620      Min.   :   0.50  
##  Class :character   Class :character   Class :character   1st Qu.:  11.00  
##  Mode  :character   Mode  :character   Mode  :character   Median :  16.00  
##                                                           Mean   :  19.45  
##                                                           3rd Qu.:  25.00  
##                                                           Max.   :1000.00  
##                                                                            
##    Ult.Costo         Unidades         F.Ticket      NombreDepartamento
##  Min.   :  0.38   Min.   : 0.200   Min.   :     1   Length:200620     
##  1st Qu.:  8.46   1st Qu.: 1.000   1st Qu.: 33967   Class :character  
##  Median : 12.31   Median : 1.000   Median :105996   Mode  :character  
##  Mean   : 15.31   Mean   : 1.262   Mean   :193994                     
##  3rd Qu.: 19.23   3rd Qu.: 1.000   3rd Qu.:383009                     
##  Max.   :769.23   Max.   :96.000   Max.   :450040                     
##                                                                       
##  NombreFamilia      NombreCategoria       Estado              Mts.2     
##  Length:200620      Length:200620      Length:200620      Min.   :47.0  
##  Class :character   Class :character   Class :character   1st Qu.:53.0  
##  Mode  :character   Mode  :character   Mode  :character   Median :60.0  
##                                                           Mean   :56.6  
##                                                           3rd Qu.:60.0  
##                                                           Max.   :62.0  
##                                                                         
##  Tipo.ubicación         Giro           Hora.inicio        Hora.cierre       
##  Length:200620      Length:200620      Length:200620      Length:200620     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##      fecha            Dia_de_la_semana    subtotal          utilidad      
##  Min.   :2020-05-01   Min.   :1.000    Min.   :   1.00   Min.   :  0.000  
##  1st Qu.:2020-06-06   1st Qu.:2.000    1st Qu.:  12.00   1st Qu.:  2.310  
##  Median :2020-07-11   Median :4.000    Median :  18.00   Median :  3.230  
##  Mean   :2020-07-18   Mean   :3.919    Mean   :  24.33   Mean   :  4.142  
##  3rd Qu.:2020-08-29   3rd Qu.:6.000    3rd Qu.:  27.00   3rd Qu.:  5.420  
##  Max.   :2020-11-11   Max.   :7.000    Max.   :2496.00   Max.   :230.770  
## 

Exportar base de datos limpia

bd_limpia <- bd12
write.csv(bd_limpia, file="abarrotes_bd_limpia.csv" , row.names = FALSE)

Market Basket analysis

#install.packages("arules")
#install.packages("arulesViz")
#install.packages("Matrix")
library(Matrix)
## 
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
library(arules)
## 
## Attaching package: 'arules'
## The following object is masked from 'package:dplyr':
## 
##     recode
## The following objects are masked from 'package:base':
## 
##     abbreviate, write
library(arulesViz)
#install.packages("datasets")
library(datasets)

Ordenar de menor a mayor los tickets

bd_limpia <- bd_limpia[order(bd_limpia$F.Ticket),]
head(bd_limpia)
##   vcClaveTienda  DescGiro      Fecha Hora                      Marca
## 1         MX001 Abarrotes 19/06/2020   NA                NUTRI LECHE
## 2         MX001 Abarrotes 19/06/2020   NA                     DAN UP
## 3         MX001 Abarrotes 19/06/2020   NA                      BIMBO
## 4         MX001 Abarrotes 19/06/2020   NA                      PEPSI
## 5         MX001 Abarrotes 19/06/2020   NA BLANCA NIEVES (DETERGENTE)
## 6         MX001 Abarrotes 19/06/2020   NA                      FLASH
##                   Fabricante                           Producto Precio
## 1                    MEXILAC                Nutri Leche 1 Litro   16.0
## 2           DANONE DE MEXICO DANUP STRAWBERRY P/BEBER 350GR NAL   14.0
## 3                GRUPO BIMBO                Rebanadas Bimbo 2Pz    5.0
## 4        PEPSI-COLA MEXICANA                   Pepsi N.R. 400Ml    8.0
## 5 FABRICA DE JABON LA CORONA      Detergente Blanca Nieves 500G   19.5
## 6                       ALEN      Flash Xtra Brisa Marina 500Ml    9.5
##   Ult.Costo Unidades F.Ticket NombreDepartamento          NombreFamilia
## 1     12.31        1        1          Abarrotes Lacteos y Refrigerados
## 2     14.00        1        2          Abarrotes Lacteos y Refrigerados
## 3      5.00        1        3          Abarrotes         Pan y Tortilla
## 4      8.00        1        3          Abarrotes                Bebidas
## 5     15.00        1        4          Abarrotes     Limpieza del Hogar
## 6      7.31        1        4          Abarrotes     Limpieza del Hogar
##             NombreCategoria     Estado Mts.2 Tipo.ubicación      Giro
## 1                     Leche Nuevo León    60        Esquina Abarrotes
## 2                    Yogurt Nuevo León    60        Esquina Abarrotes
## 3     Pan Dulce Empaquetado Nuevo León    60        Esquina Abarrotes
## 4 Refrescos Plástico (N.R.) Nuevo León    60        Esquina Abarrotes
## 5                Lavandería Nuevo León    60        Esquina Abarrotes
## 6      Limpiadores Líquidos Nuevo León    60        Esquina Abarrotes
##   Hora.inicio Hora.cierre      fecha Dia_de_la_semana subtotal utilidad
## 1        8:00       22:00 2020-06-19                5     16.0     3.69
## 2        8:00       22:00 2020-06-19                5     14.0     0.00
## 3        8:00       22:00 2020-06-19                5      5.0     0.00
## 4        8:00       22:00 2020-06-19                5      8.0     0.00
## 5        8:00       22:00 2020-06-19                5     19.5     4.50
## 6        8:00       22:00 2020-06-19                5      9.5     2.19
tail(bd_limpia)
##        vcClaveTienda   DescGiro      Fecha Hora          Marca
## 107394         MX004 Carnicería 15/10/2020   11         YEMINA
## 167771         MX004 Carnicería 15/10/2020   11     DEL FUERTE
## 149429         MX004 Carnicería 15/10/2020   11 COCA COLA ZERO
## 168750         MX004 Carnicería 15/10/2020   11       DIAMANTE
## 161193         MX004 Carnicería 15/10/2020   12          PEPSI
## 112970         MX004 Carnicería 15/10/2020   12      COCA COLA
##                  Fabricante                       Producto Precio Ult.Costo
## 107394               HERDEZ    PASTA SPAGHETTI YEMINA 200G      7      5.38
## 167771 ALIMENTOS DEL FUERTE PURE DE TOMATE DEL FUERTE 345G     12      9.23
## 149429            COCA COLA           COCA COLA ZERO 600ML     15     11.54
## 168750           EMPACADOS              ARROZ DIAMANTE225G     11      8.46
## 161193  PEPSI-COLA MEXICANA              PEPSI N. R. 500ML     10      7.69
## 112970            COCA COLA     COCA COLA RETORNABLE 500ML     10      7.69
##        Unidades F.Ticket NombreDepartamento        NombreFamilia
## 107394        2   450032          Abarrotes       Sopas y Pastas
## 167771        1   450032          Abarrotes Salsas y Sazonadores
## 149429        2   450034          Abarrotes              Bebidas
## 168750        1   450037          Abarrotes    Granos y Semillas
## 161193        1   450039          Abarrotes              Bebidas
## 112970        8   450040          Abarrotes              Bebidas
##                      NombreCategoria  Estado Mts.2 Tipo.ubicación      Giro
## 107394 Fideos, Spaguetti, Tallarines Sinaloa    53        Esquina Abarrotes
## 167771          Salsa para Spaguetti Sinaloa    53        Esquina Abarrotes
## 149429         Refrescos Retornables Sinaloa    53        Esquina Abarrotes
## 168750                         Arroz Sinaloa    53        Esquina Abarrotes
## 161193     Refrescos Plástico (N.R.) Sinaloa    53        Esquina Abarrotes
## 112970         Refrescos Retornables Sinaloa    53        Esquina Abarrotes
##        Hora.inicio Hora.cierre      fecha Dia_de_la_semana subtotal utilidad
## 107394        7:00       23:00 2020-10-15                3       14     1.62
## 167771        7:00       23:00 2020-10-15                3       12     2.77
## 149429        7:00       23:00 2020-10-15                3       30     3.46
## 168750        7:00       23:00 2020-10-15                3       11     2.54
## 161193        7:00       23:00 2020-10-15                3       10     2.31
## 112970        7:00       23:00 2020-10-15                3       80     2.31

Generar basket

#install.packages("plyr")
library(plyr)
## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
## 
## Attaching package: 'plyr'
## The following object is masked from 'package:purrr':
## 
##     compact
## The following objects are masked from 'package:dplyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
basket <- ddply(bd_limpia,c("F.Ticket"), function(bd_limpia)paste(bd_limpia$Marca, collapse = ","))

View(basket)

Eliminar numero de Ticket

basket$F.Ticket <- NULL

Renombramos el nombre de la columna

colnames(basket) <- c("Marca")

Exportar basket

write.csv (basket, "basket.csv", quote = FALSE, row.names = FALSE)

Importar transacciones

file.choose()
## [1] "C:\\Users\\jimen\\Downloads\\Market Basket.Rmd"
tr <- read.transactions("C:\\Users\\jimen\\Documents\\abarrotes\\basket.csv", format = "basket", sep= ",")
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string

## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in asMethod(object): removing duplicated items in transactions
reglas.asociacion <- apriori(tr, parameter = list (supp=0.001, conf=0.2, maxlen=10))
## Apriori
## 
## Parameter specification:
##  confidence minval smax arem  aval originalSupport maxtime support minlen
##         0.2    0.1    1 none FALSE            TRUE       5   0.001      1
##  maxlen target  ext
##      10  rules TRUE
## 
## Algorithmic control:
##  filter tree heap memopt load sort verbose
##     0.1 TRUE TRUE  FALSE TRUE    2    TRUE
## 
## Absolute minimum support count: 115 
## 
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[604 item(s), 115111 transaction(s)] done [0.03s].
## sorting and recoding items ... [207 item(s)] done [0.00s].
## creating transaction tree ... done [0.05s].
## checking subsets of size 1 2 3 done [0.00s].
## writing ... [11 rule(s)] done [0.00s].
## creating S4 object  ... done [0.01s].
#summary(reglas.asociacion)
inspect(reglas.asociacion)
##      lhs                  rhs         support     confidence coverage   
## [1]  {FANTA}           => {COCA COLA} 0.001051159 0.2439516  0.004308884
## [2]  {SALVO}           => {FABULOSO}  0.001103283 0.3097561  0.003561779
## [3]  {FABULOSO}        => {SALVO}     0.001103283 0.2347505  0.004699811
## [4]  {COCA COLA ZERO}  => {COCA COLA} 0.001416025 0.2969035  0.004769310
## [5]  {SPRITE}          => {COCA COLA} 0.001346526 0.2069426  0.006506763
## [6]  {PINOL}           => {CLORALEX}  0.001016410 0.2363636  0.004300197
## [7]  {BLUE HOUSE}      => {BIMBO}     0.001711392 0.2720994  0.006289581
## [8]  {HELLMANN´S}      => {BIMBO}     0.001537646 0.2649701  0.005803094
## [9]  {REYMA}           => {CONVERMEX} 0.002093631 0.2441743  0.008574333
## [10] {FUD}             => {BIMBO}     0.001589770 0.2183771  0.007279930
## [11] {COCA COLA LIGHT} => {COCA COLA} 0.002745176 0.2613730  0.010502906
##      lift      count
## [1]   1.561906 121  
## [2]  65.908196 127  
## [3]  65.908196 127  
## [4]   1.900932 163  
## [5]   1.324955 155  
## [6]  25.030409 117  
## [7]   4.078870 197  
## [8]   3.971997 177  
## [9]  18.564824 241  
## [10]  3.273552 183  
## [11]  1.673447 316
reglas.asociacion <- sort (reglas.asociacion, by ="confidence", decreasing = TRUE)
#summary(reglas.asociacion)
inspect(reglas.asociacion)
##      lhs                  rhs         support     confidence coverage   
## [1]  {SALVO}           => {FABULOSO}  0.001103283 0.3097561  0.003561779
## [2]  {COCA COLA ZERO}  => {COCA COLA} 0.001416025 0.2969035  0.004769310
## [3]  {BLUE HOUSE}      => {BIMBO}     0.001711392 0.2720994  0.006289581
## [4]  {HELLMANN´S}      => {BIMBO}     0.001537646 0.2649701  0.005803094
## [5]  {COCA COLA LIGHT} => {COCA COLA} 0.002745176 0.2613730  0.010502906
## [6]  {REYMA}           => {CONVERMEX} 0.002093631 0.2441743  0.008574333
## [7]  {FANTA}           => {COCA COLA} 0.001051159 0.2439516  0.004308884
## [8]  {PINOL}           => {CLORALEX}  0.001016410 0.2363636  0.004300197
## [9]  {FABULOSO}        => {SALVO}     0.001103283 0.2347505  0.004699811
## [10] {FUD}             => {BIMBO}     0.001589770 0.2183771  0.007279930
## [11] {SPRITE}          => {COCA COLA} 0.001346526 0.2069426  0.006506763
##      lift      count
## [1]  65.908196 127  
## [2]   1.900932 163  
## [3]   4.078870 197  
## [4]   3.971997 177  
## [5]   1.673447 316  
## [6]  18.564824 241  
## [7]   1.561906 121  
## [8]  25.030409 117  
## [9]  65.908196 127  
## [10]  3.273552 183  
## [11]  1.324955 155
top10reglas <- head(reglas.asociacion, n = 10, by = 'confidence')
plot(top10reglas, method = "graph", engine = "htmlwidget")

Conclusiones

Se puede concluir que al ver estas relaciones de diferentes productos entrelazados por la frecuencia con la que son comprados juntos se pueden realaizar promociones las cuales se creen combos para que se pueda vender aun mas y aun precio mejor para la empresa y el cliente.