# <span style= “color: red”; >Instalar paquetes y llamar librerías

#install.packages("tidyverse") #Paquete global para manipulación y aálisis de los datos
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
#install.packages("dplyr") #Para filtrar base de datos
library(dplyr)

#install.packages("janitor") #Examinar y limpiar base de datos
library(janitor)
## 
## Attaching package: 'janitor'
## 
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
#install.packages("Matrix") #Para trabajar con matrices 
library(Matrix)
## 
## Attaching package: 'Matrix'
## 
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
#install.packages("arules") #Generar reglas de asociación
library(arules)
## 
## Attaching package: 'arules'
## 
## The following object is masked from 'package:dplyr':
## 
##     recode
## 
## The following objects are masked from 'package:base':
## 
##     abbreviate, write
#install.packages("arulesViz") #Visualizar reglas de asociación 
library(arulesViz)

#install.packages("datasets")
library(datasets)

#install.packages("plyr")
library(plyr)
## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
## 
## Attaching package: 'plyr'
## 
## The following objects are masked from 'package:dplyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
## 
## The following object is masked from 'package:purrr':
## 
##     compact

Importar la base de datos

# file.choose()
df <- read.csv("/Users/daniafernandez/Documents/TEC/BOOTCAMP PROGRA/abarrotes.csv")

Análisis descriptivo

summary(df)
##  vcClaveTienda        DescGiro         Codigo.Barras            PLU        
##  Length:200625      Length:200625      Min.   :8.347e+05   Min.   : 1.00   
##  Class :character   Class :character   1st Qu.:7.501e+12   1st Qu.: 1.00   
##  Mode  :character   Mode  :character   Median :7.501e+12   Median : 1.00   
##                                        Mean   :5.950e+12   Mean   : 2.11   
##                                        3rd Qu.:7.501e+12   3rd Qu.: 1.00   
##                                        Max.   :1.750e+13   Max.   :30.00   
##                                                            NA's   :199188  
##     Fecha               Hora              Marca            Fabricante       
##  Length:200625      Length:200625      Length:200625      Length:200625     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##    Producto             Precio          Ult.Costo         Unidades     
##  Length:200625      Min.   :-147.00   Min.   :  0.38   Min.   : 0.200  
##  Class :character   1st Qu.:  11.00   1st Qu.:  8.46   1st Qu.: 1.000  
##  Mode  :character   Median :  16.00   Median : 12.31   Median : 1.000  
##                     Mean   :  19.42   Mean   : 15.31   Mean   : 1.262  
##                     3rd Qu.:  25.00   3rd Qu.: 19.23   3rd Qu.: 1.000  
##                     Max.   :1000.00   Max.   :769.23   Max.   :96.000  
##                                                                        
##     F.Ticket      NombreDepartamento NombreFamilia      NombreCategoria   
##  Min.   :     1   Length:200625      Length:200625      Length:200625     
##  1st Qu.: 33964   Class :character   Class :character   Class :character  
##  Median :105993   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :193990                                                           
##  3rd Qu.:383005                                                           
##  Max.   :450040                                                           
##                                                                           
##     Estado              Mts.2      Tipo.ubicación         Giro          
##  Length:200625      Min.   :47.0   Length:200625      Length:200625     
##  Class :character   1st Qu.:53.0   Class :character   Class :character  
##  Mode  :character   Median :60.0   Mode  :character   Mode  :character  
##                     Mean   :56.6                                        
##                     3rd Qu.:60.0                                        
##                     Max.   :62.0                                        
##                                                                         
##  Hora.inicio        Hora.cierre       
##  Length:200625      Length:200625     
##  Class :character   Class :character  
##  Mode  :character   Mode  :character  
##                                       
##                                       
##                                       
## 
str(df)
## 'data.frame':    200625 obs. of  22 variables:
##  $ vcClaveTienda     : chr  "MX001" "MX001" "MX001" "MX001" ...
##  $ DescGiro          : chr  "Abarrotes" "Abarrotes" "Abarrotes" "Abarrotes" ...
##  $ Codigo.Barras     : num  7.5e+12 7.5e+12 7.5e+12 7.5e+12 7.5e+12 ...
##  $ PLU               : int  NA NA NA NA NA NA NA NA NA NA ...
##  $ Fecha             : chr  "19/06/2020" "19/06/2020" "19/06/2020" "19/06/2020" ...
##  $ Hora              : chr  "08:16:21" "08:23:33" "08:24:33" "08:24:33" ...
##  $ Marca             : chr  "NUTRI LECHE" "DAN UP" "BIMBO" "PEPSI" ...
##  $ Fabricante        : chr  "MEXILAC" "DANONE DE MEXICO" "GRUPO BIMBO" "PEPSI-COLA MEXICANA" ...
##  $ Producto          : chr  "Nutri Leche 1 Litro" "DANUP STRAWBERRY P/BEBER 350GR NAL" "Rebanadas Bimbo 2Pz" "Pepsi N.R. 400Ml" ...
##  $ Precio            : num  16 14 5 8 19.5 16 14 5 8 19.5 ...
##  $ Ult.Costo         : num  12.3 14 5 8 15 ...
##  $ Unidades          : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ F.Ticket          : int  1 2 3 3 4 1 2 3 3 4 ...
##  $ NombreDepartamento: chr  "Abarrotes" "Abarrotes" "Abarrotes" "Abarrotes" ...
##  $ NombreFamilia     : chr  "Lacteos y Refrigerados" "Lacteos y Refrigerados" "Pan y Tortilla" "Bebidas" ...
##  $ NombreCategoria   : chr  "Leche" "Yogurt" "Pan Dulce Empaquetado" "Refrescos Plástico (N.R.)" ...
##  $ Estado            : chr  "Nuevo León" "Nuevo León" "Nuevo León" "Nuevo León" ...
##  $ Mts.2             : int  60 60 60 60 60 60 60 60 60 60 ...
##  $ Tipo.ubicación    : chr  "Esquina" "Esquina" "Esquina" "Esquina" ...
##  $ Giro              : chr  "Abarrotes" "Abarrotes" "Abarrotes" "Abarrotes" ...
##  $ Hora.inicio       : chr  "08:00" "08:00" "08:00" "08:00" ...
##  $ Hora.cierre       : chr  "22:00" "22:00" "22:00" "22:00" ...
#count(df, vcClaveTienda, sort = TRUE)
#count(df, DescGiro, sort = TRUE)
#count(df, Fecha, sort = TRUE)
#count(df, Hora, sort = TRUE)
#count(df, Marca, sort = TRUE)
#count(df, Fabricante, sort = TRUE)
#count(df, Producto, sort = TRUE)
#count(df, NombreDepartamento, sort = TRUE)
#count(df, NombreFamilia, sort = TRUE)
#count(df, NombreCategoria, sort = TRUE)
#count(df, Estado, sort = TRUE)
#count(df, Tipo.ubicación, sort = TRUE)
#count(df, Giro, sort = TRUE)
#count(df, Hora.inicio, sort = TRUE)
#count(df, Hora.cierre, sort = TRUE)

head(df, n=10)
##    vcClaveTienda  DescGiro Codigo.Barras PLU      Fecha     Hora
## 1          MX001 Abarrotes  7.501021e+12  NA 19/06/2020 08:16:21
## 2          MX001 Abarrotes  7.501032e+12  NA 19/06/2020 08:23:33
## 3          MX001 Abarrotes  7.501000e+12  NA 19/06/2020 08:24:33
## 4          MX001 Abarrotes  7.501031e+12  NA 19/06/2020 08:24:33
## 5          MX001 Abarrotes  7.501026e+12  NA 19/06/2020 08:26:28
## 6          MX001 Abarrotes  7.501021e+12  NA 19/06/2020 08:16:21
## 7          MX001 Abarrotes  7.501032e+12  NA 19/06/2020 08:23:33
## 8          MX001 Abarrotes  7.501000e+12  NA 19/06/2020 08:24:33
## 9          MX001 Abarrotes  7.501031e+12  NA 19/06/2020 08:24:33
## 10         MX001 Abarrotes  7.501026e+12  NA 19/06/2020 08:26:28
##                         Marca                 Fabricante
## 1                 NUTRI LECHE                    MEXILAC
## 2                      DAN UP           DANONE DE MEXICO
## 3                       BIMBO                GRUPO BIMBO
## 4                       PEPSI        PEPSI-COLA MEXICANA
## 5  BLANCA NIEVES (DETERGENTE) FABRICA DE JABON LA CORONA
## 6                 NUTRI LECHE                    MEXILAC
## 7                      DAN UP           DANONE DE MEXICO
## 8                       BIMBO                GRUPO BIMBO
## 9                       PEPSI        PEPSI-COLA MEXICANA
## 10 BLANCA NIEVES (DETERGENTE) FABRICA DE JABON LA CORONA
##                              Producto Precio Ult.Costo Unidades F.Ticket
## 1                 Nutri Leche 1 Litro   16.0     12.31        1        1
## 2  DANUP STRAWBERRY P/BEBER 350GR NAL   14.0     14.00        1        2
## 3                 Rebanadas Bimbo 2Pz    5.0      5.00        1        3
## 4                    Pepsi N.R. 400Ml    8.0      8.00        1        3
## 5       Detergente Blanca Nieves 500G   19.5     15.00        1        4
## 6                 Nutri Leche 1 Litro   16.0     12.31        1        1
## 7  DANUP STRAWBERRY P/BEBER 350GR NAL   14.0     14.00        1        2
## 8                 Rebanadas Bimbo 2Pz    5.0      5.00        1        3
## 9                    Pepsi N.R. 400Ml    8.0      8.00        1        3
## 10      Detergente Blanca Nieves 500G   19.5     15.00        1        4
##    NombreDepartamento          NombreFamilia           NombreCategoria
## 1           Abarrotes Lacteos y Refrigerados                     Leche
## 2           Abarrotes Lacteos y Refrigerados                    Yogurt
## 3           Abarrotes         Pan y Tortilla     Pan Dulce Empaquetado
## 4           Abarrotes                Bebidas Refrescos Plástico (N.R.)
## 5           Abarrotes     Limpieza del Hogar                Lavandería
## 6           Abarrotes Lacteos y Refrigerados                     Leche
## 7           Abarrotes Lacteos y Refrigerados                    Yogurt
## 8           Abarrotes         Pan y Tortilla     Pan Dulce Empaquetado
## 9           Abarrotes                Bebidas Refrescos Plástico (N.R.)
## 10          Abarrotes     Limpieza del Hogar                Lavandería
##        Estado Mts.2 Tipo.ubicación      Giro Hora.inicio Hora.cierre
## 1  Nuevo León    60        Esquina Abarrotes       08:00       22:00
## 2  Nuevo León    60        Esquina Abarrotes       08:00       22:00
## 3  Nuevo León    60        Esquina Abarrotes       08:00       22:00
## 4  Nuevo León    60        Esquina Abarrotes       08:00       22:00
## 5  Nuevo León    60        Esquina Abarrotes       08:00       22:00
## 6  Nuevo León    60        Esquina Abarrotes       08:00       22:00
## 7  Nuevo León    60        Esquina Abarrotes       08:00       22:00
## 8  Nuevo León    60        Esquina Abarrotes       08:00       22:00
## 9  Nuevo León    60        Esquina Abarrotes       08:00       22:00
## 10 Nuevo León    60        Esquina Abarrotes       08:00       22:00
tail(df, n=10)
##        vcClaveTienda DescGiro Codigo.Barras PLU      Fecha     Hora
## 200616         MX005 Depósito   7.62221e+12  NA 07/08/2020 19:30:13
## 200617         MX005 Depósito   7.62221e+12  NA 25/07/2020 18:42:24
## 200618         MX005 Depósito   7.62221e+12  NA 18/07/2020 22:45:58
## 200619         MX005 Depósito   7.62221e+12  NA 12/07/2020 00:36:34
## 200620         MX005 Depósito   7.62221e+12  NA 12/07/2020 01:08:25
## 200621         MX005 Depósito   7.62221e+12  NA 23/10/2020 22:17:37
## 200622         MX005 Depósito   7.62221e+12  NA 10/10/2020 20:30:20
## 200623         MX005 Depósito   7.62221e+12  NA 10/10/2020 22:40:43
## 200624         MX005 Depósito   7.62221e+12  NA 27/06/2020 22:30:19
## 200625         MX005 Depósito   7.62221e+12  NA 26/06/2020 23:43:34
##                    Marca    Fabricante                          Producto Precio
## 200616 TRIDENT XTRA CARE CADBURY ADAMS Trident Xtracare Freshmint 16.32G      9
## 200617 TRIDENT XTRA CARE CADBURY ADAMS Trident Xtracare Freshmint 16.32G      9
## 200618 TRIDENT XTRA CARE CADBURY ADAMS Trident Xtracare Freshmint 16.32G      9
## 200619 TRIDENT XTRA CARE CADBURY ADAMS Trident Xtracare Freshmint 16.32G      9
## 200620 TRIDENT XTRA CARE CADBURY ADAMS Trident Xtracare Freshmint 16.32G      9
## 200621 TRIDENT XTRA CARE CADBURY ADAMS Trident Xtracare Freshmint 16.32G      9
## 200622 TRIDENT XTRA CARE CADBURY ADAMS Trident Xtracare Freshmint 16.32G      9
## 200623 TRIDENT XTRA CARE CADBURY ADAMS Trident Xtracare Freshmint 16.32G      9
## 200624 TRIDENT XTRA CARE CADBURY ADAMS Trident Xtracare Freshmint 16.32G      9
## 200625 TRIDENT XTRA CARE CADBURY ADAMS Trident Xtracare Freshmint 16.32G      9
##        Ult.Costo Unidades F.Ticket NombreDepartamento NombreFamilia
## 200616      6.92        1   106411          Abarrotes      Dulcería
## 200617      6.92        1   104693          Abarrotes      Dulcería
## 200618      6.92        1   103856          Abarrotes      Dulcería
## 200619      6.92        1   103087          Abarrotes      Dulcería
## 200620      6.92        1   103100          Abarrotes      Dulcería
## 200621      6.92        1   116598          Abarrotes      Dulcería
## 200622      6.92        1   114886          Abarrotes      Dulcería
## 200623      6.92        1   114955          Abarrotes      Dulcería
## 200624      6.92        1   101121          Abarrotes      Dulcería
## 200625      6.92        1   100879          Abarrotes      Dulcería
##        NombreCategoria       Estado Mts.2 Tipo.ubicación       Giro Hora.inicio
## 200616 Gomas de Mazcar Quintana Roo    58        Esquina Mini súper       08:00
## 200617 Gomas de Mazcar Quintana Roo    58        Esquina Mini súper       08:00
## 200618 Gomas de Mazcar Quintana Roo    58        Esquina Mini súper       08:00
## 200619 Gomas de Mazcar Quintana Roo    58        Esquina Mini súper       08:00
## 200620 Gomas de Mazcar Quintana Roo    58        Esquina Mini súper       08:00
## 200621 Gomas de Mazcar Quintana Roo    58        Esquina Mini súper       08:00
## 200622 Gomas de Mazcar Quintana Roo    58        Esquina Mini súper       08:00
## 200623 Gomas de Mazcar Quintana Roo    58        Esquina Mini súper       08:00
## 200624 Gomas de Mazcar Quintana Roo    58        Esquina Mini súper       08:00
## 200625 Gomas de Mazcar Quintana Roo    58        Esquina Mini súper       08:00
##        Hora.cierre
## 200616       21:00
## 200617       21:00
## 200618       21:00
## 200619       21:00
## 200620       21:00
## 200621       21:00
## 200622       21:00
## 200623       21:00
## 200624       21:00
## 200625       21:00

Tablas

# Tabla de Tienda y Departamento
tabyl(df, vcClaveTienda, NombreDepartamento)
##  vcClaveTienda Abarrotes Bebes e Infantiles Carnes Farmacia Ferretería Mercería
##          MX001     95415                515      1      147        245       28
##          MX002      6590                 21      0        4         10        0
##          MX003      4026                 15      0        2          8        0
##          MX004     82234                932      0      102        114       16
##          MX005     10014                  0      0        0          0        0
##  Papelería Productos a Eliminar Vinos y Licores
##         35                    3              80
##          0                    0               4
##          0                    0               0
##         32                    5              20
##          7                    0               0
# Tabla de Estado y Hora de inicio
tabyl(df, Estado, Hora.inicio)
##        Estado 07:00 08:00 09:00
##       Chiapas  4051     0     0
##       Jalisco     0     0  6629
##    Nuevo León     0 96469     0
##  Quintana Roo     0 10021     0
##       Sinaloa 83455     0     0

Limpieza de datos

Técnica 1. Eliminar valores irrelevantes

#Eliminar columnas
#df <- subset(df, select = -c(PLU))

#Eliminar renglones
df <- df[df$Precio >0, ]

Técnica 2. Eliminar valores repetidos

df <- distinct(df) 

Técnica 3. Corregir errores tipográficos y similares

df$Unidades <- ceiling(df$Unidades)
summary(df$Unidades)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   1.000   1.000   1.000   1.261   1.000  96.000

Técnica 4. Convertir los tipos de datos

#Convertir de caracter a fecha
df$Fecha <- as.Date(df$Fecha, format = "%d/%m/%y")
str(df$Fecha)
##  Date[1:200473], format: "2020-06-19" "2020-06-19" "2020-06-19" "2020-06-19" "2020-06-19" ...
summary(df$Fecha)
##         Min.      1st Qu.       Median         Mean      3rd Qu.         Max. 
## "2020-05-01" "2020-06-06" "2020-07-11" "2020-07-18" "2020-08-29" "2020-11-11"

Técnica 5. Tratar valores faltantes

#Borrar todos los NAs
#df <- na.omit(df)

#Reempplazar los NAs con ceros
#df[is.na(df)] <- 0

#Reemplazar los NAs con el promedio
#df$altura[is.na(df$altura)] <- mean(df$altura, na.rm=TRUE)

Técnica 6. Herramientas Estadísticas

boxplot(df$Precio, horizontal = TRUE)

boxplot(df$Unidades, horizontal = TRUE)

Generar basket

#Ordenar de menor a mayor la columna Ticket
df <- df[order(df$F.Ticket), ]
head(df)
##   vcClaveTienda  DescGiro Codigo.Barras PLU      Fecha     Hora
## 1         MX001 Abarrotes  7.501021e+12  NA 2020-06-19 08:16:21
## 2         MX001 Abarrotes  7.501032e+12  NA 2020-06-19 08:23:33
## 3         MX001 Abarrotes  7.501000e+12  NA 2020-06-19 08:24:33
## 4         MX001 Abarrotes  7.501031e+12  NA 2020-06-19 08:24:33
## 5         MX001 Abarrotes  7.501026e+12  NA 2020-06-19 08:26:28
## 6         MX001 Abarrotes  7.501025e+12  NA 2020-06-19 08:26:28
##                        Marca                 Fabricante
## 1                NUTRI LECHE                    MEXILAC
## 2                     DAN UP           DANONE DE MEXICO
## 3                      BIMBO                GRUPO BIMBO
## 4                      PEPSI        PEPSI-COLA MEXICANA
## 5 BLANCA NIEVES (DETERGENTE) FABRICA DE JABON LA CORONA
## 6                      FLASH                       ALEN
##                             Producto Precio Ult.Costo Unidades F.Ticket
## 1                Nutri Leche 1 Litro   16.0     12.31        1        1
## 2 DANUP STRAWBERRY P/BEBER 350GR NAL   14.0     14.00        1        2
## 3                Rebanadas Bimbo 2Pz    5.0      5.00        1        3
## 4                   Pepsi N.R. 400Ml    8.0      8.00        1        3
## 5      Detergente Blanca Nieves 500G   19.5     15.00        1        4
## 6      Flash Xtra Brisa Marina 500Ml    9.5      7.31        1        4
##   NombreDepartamento          NombreFamilia           NombreCategoria
## 1          Abarrotes Lacteos y Refrigerados                     Leche
## 2          Abarrotes Lacteos y Refrigerados                    Yogurt
## 3          Abarrotes         Pan y Tortilla     Pan Dulce Empaquetado
## 4          Abarrotes                Bebidas Refrescos Plástico (N.R.)
## 5          Abarrotes     Limpieza del Hogar                Lavandería
## 6          Abarrotes     Limpieza del Hogar      Limpiadores Líquidos
##       Estado Mts.2 Tipo.ubicación      Giro Hora.inicio Hora.cierre
## 1 Nuevo León    60        Esquina Abarrotes       08:00       22:00
## 2 Nuevo León    60        Esquina Abarrotes       08:00       22:00
## 3 Nuevo León    60        Esquina Abarrotes       08:00       22:00
## 4 Nuevo León    60        Esquina Abarrotes       08:00       22:00
## 5 Nuevo León    60        Esquina Abarrotes       08:00       22:00
## 6 Nuevo León    60        Esquina Abarrotes       08:00       22:00
tail(df)
##        vcClaveTienda   DescGiro Codigo.Barras PLU      Fecha     Hora
## 107247         MX004 Carnicería  1.024877e+10  NA 2020-10-15 11:51:40
## 167624         MX004 Carnicería  7.501080e+12  NA 2020-10-15 11:51:40
## 149282         MX004 Carnicería  7.501055e+12  NA 2020-10-15 11:54:37
## 168603         MX004 Carnicería  7.501214e+12  NA 2020-10-15 11:56:52
## 161046         MX004 Carnicería  7.501031e+12  NA 2020-10-15 12:01:54
## 112823         MX004 Carnicería  7.500470e+07  NA 2020-10-15 12:02:36
##                 Marca           Fabricante                       Producto
## 107247         YEMINA               HERDEZ    PASTA SPAGHETTI YEMINA 200G
## 167624     DEL FUERTE ALIMENTOS DEL FUERTE PURE DE TOMATE DEL FUERTE 345G
## 149282 COCA COLA ZERO            COCA COLA           COCA COLA ZERO 600ML
## 168603       DIAMANTE           EMPACADOS              ARROZ DIAMANTE225G
## 161046          PEPSI  PEPSI-COLA MEXICANA              PEPSI N. R. 500ML
## 112823      COCA COLA            COCA COLA     COCA COLA RETORNABLE 500ML
##        Precio Ult.Costo Unidades F.Ticket NombreDepartamento
## 107247      7      5.38        2   450032          Abarrotes
## 167624     12      9.23        1   450032          Abarrotes
## 149282     15     11.54        2   450034          Abarrotes
## 168603     11      8.46        1   450037          Abarrotes
## 161046     10      7.69        1   450039          Abarrotes
## 112823     10      7.69        8   450040          Abarrotes
##               NombreFamilia               NombreCategoria  Estado Mts.2
## 107247       Sopas y Pastas Fideos, Spaguetti, Tallarines Sinaloa    53
## 167624 Salsas y Sazonadores          Salsa para Spaguetti Sinaloa    53
## 149282              Bebidas         Refrescos Retornables Sinaloa    53
## 168603    Granos y Semillas                         Arroz Sinaloa    53
## 161046              Bebidas     Refrescos Plástico (N.R.) Sinaloa    53
## 112823              Bebidas         Refrescos Retornables Sinaloa    53
##        Tipo.ubicación      Giro Hora.inicio Hora.cierre
## 107247        Esquina Abarrotes       07:00       23:00
## 167624        Esquina Abarrotes       07:00       23:00
## 149282        Esquina Abarrotes       07:00       23:00
## 168603        Esquina Abarrotes       07:00       23:00
## 161046        Esquina Abarrotes       07:00       23:00
## 112823        Esquina Abarrotes       07:00       23:00
# Generar el basket
basket <- ddply(df, c("F.Ticket"), function(df)paste(df$Marca, collapse = ","))

#Eliminar el número de Ticket
basket$F.Ticket <- NULL

#Cambiar el título de la columna V1 por Marca 

colnames(basket) <- c("Marca")

#Exportar basket
write.csv(basket, "basket.csv", quote = FALSE, row.names = FALSE)

Market Basket Analysis

#file.choose()
tr <- read.transactions("/Users/daniafernandez/Documents/TEC/BOOTCAMP PROGRA/basket.csv", format = "basket", sep = ",")

reglas.asociación <- apriori(tr, parameter = list(supp = 0.001, conf = 0.2, maxlen = 10))
## Apriori
## 
## Parameter specification:
##  confidence minval smax arem  aval originalSupport maxtime support minlen
##         0.2    0.1    1 none FALSE            TRUE       5   0.001      1
##  maxlen target  ext
##      10  rules TRUE
## 
## Algorithmic control:
##  filter tree heap memopt load sort verbose
##     0.1 TRUE TRUE  FALSE TRUE    2    TRUE
## 
## Absolute minimum support count: 115 
## 
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[604 item(s), 115031 transaction(s)] done [0.01s].
## sorting and recoding items ... [207 item(s)] done [0.00s].
## creating transaction tree ... done [0.02s].
## checking subsets of size 1 2 3 done [0.00s].
## writing ... [11 rule(s)] done [0.00s].
## creating S4 object  ... done [0.01s].
summary(reglas.asociación)
## set of 11 rules
## 
## rule length distribution (lhs + rhs):sizes
##  2 
## 11 
## 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       2       2       2       2       2       2 
## 
## summary of quality measures:
##     support           confidence        coverage             lift       
##  Min.   :0.001017   Min.   :0.2069   Min.   :0.003564   Min.   : 1.326  
##  1st Qu.:0.001104   1st Qu.:0.2358   1st Qu.:0.004507   1st Qu.: 1.789  
##  Median :0.001417   Median :0.2442   Median :0.005807   Median : 3.972  
##  Mean   :0.001521   Mean   :0.2537   Mean   :0.006056   Mean   :17.558  
##  3rd Qu.:0.001652   3rd Qu.:0.2685   3rd Qu.:0.006894   3rd Qu.:21.808  
##  Max.   :0.002747   Max.   :0.3098   Max.   :0.010502   Max.   :65.862  
##      count      
##  Min.   :117.0  
##  1st Qu.:127.0  
##  Median :163.0  
##  Mean   :174.9  
##  3rd Qu.:190.0  
##  Max.   :316.0  
## 
## mining info:
##  data ntransactions support confidence
##    tr        115031   0.001        0.2
##                                                                         call
##  apriori(data = tr, parameter = list(supp = 0.001, conf = 0.2, maxlen = 10))
inspect(reglas.asociación)
##      lhs                  rhs         support     confidence coverage   
## [1]  {FANTA}           => {COCA COLA} 0.001051890 0.2439516  0.004311881
## [2]  {SALVO}           => {FABULOSO}  0.001104050 0.3097561  0.003564257
## [3]  {FABULOSO}        => {SALVO}     0.001104050 0.2347505  0.004703080
## [4]  {COCA COLA ZERO}  => {COCA COLA} 0.001417009 0.2969035  0.004772627
## [5]  {SPRITE}          => {COCA COLA} 0.001347463 0.2069426  0.006511288
## [6]  {PINOL}           => {CLORALEX}  0.001017117 0.2368421  0.004294495
## [7]  {BLUE HOUSE}      => {BIMBO}     0.001712582 0.2720994  0.006293956
## [8]  {HELLMANN´S}      => {BIMBO}     0.001538716 0.2649701  0.005807130
## [9]  {REYMA}           => {CONVERMEX} 0.002095087 0.2441743  0.008580296
## [10] {FUD}             => {BIMBO}     0.001590876 0.2186380  0.007276299
## [11] {COCA COLA LIGHT} => {COCA COLA} 0.002747086 0.2615894  0.010501517
##      lift      count
## [1]   1.562646 121  
## [2]  65.862391 127  
## [3]  65.862391 127  
## [4]   1.901832 163  
## [5]   1.325583 155  
## [6]  25.063647 117  
## [7]   4.078691 197  
## [8]   3.971823 177  
## [9]  18.551922 241  
## [10]  3.277319 183  
## [11]  1.675626 316
reglas.asociación <- sort(reglas.asociación, by = "confidence", decreasing = TRUE)
summary(reglas.asociación)
## set of 11 rules
## 
## rule length distribution (lhs + rhs):sizes
##  2 
## 11 
## 
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       2       2       2       2       2       2 
## 
## summary of quality measures:
##     support           confidence        coverage             lift       
##  Min.   :0.001017   Min.   :0.2069   Min.   :0.003564   Min.   : 1.326  
##  1st Qu.:0.001104   1st Qu.:0.2358   1st Qu.:0.004507   1st Qu.: 1.789  
##  Median :0.001417   Median :0.2442   Median :0.005807   Median : 3.972  
##  Mean   :0.001521   Mean   :0.2537   Mean   :0.006056   Mean   :17.558  
##  3rd Qu.:0.001652   3rd Qu.:0.2685   3rd Qu.:0.006894   3rd Qu.:21.808  
##  Max.   :0.002747   Max.   :0.3098   Max.   :0.010502   Max.   :65.862  
##      count      
##  Min.   :117.0  
##  1st Qu.:127.0  
##  Median :163.0  
##  Mean   :174.9  
##  3rd Qu.:190.0  
##  Max.   :316.0  
## 
## mining info:
##  data ntransactions support confidence
##    tr        115031   0.001        0.2
##                                                                         call
##  apriori(data = tr, parameter = list(supp = 0.001, conf = 0.2, maxlen = 10))
inspect(reglas.asociación)
##      lhs                  rhs         support     confidence coverage   
## [1]  {SALVO}           => {FABULOSO}  0.001104050 0.3097561  0.003564257
## [2]  {COCA COLA ZERO}  => {COCA COLA} 0.001417009 0.2969035  0.004772627
## [3]  {BLUE HOUSE}      => {BIMBO}     0.001712582 0.2720994  0.006293956
## [4]  {HELLMANN´S}      => {BIMBO}     0.001538716 0.2649701  0.005807130
## [5]  {COCA COLA LIGHT} => {COCA COLA} 0.002747086 0.2615894  0.010501517
## [6]  {REYMA}           => {CONVERMEX} 0.002095087 0.2441743  0.008580296
## [7]  {FANTA}           => {COCA COLA} 0.001051890 0.2439516  0.004311881
## [8]  {PINOL}           => {CLORALEX}  0.001017117 0.2368421  0.004294495
## [9]  {FABULOSO}        => {SALVO}     0.001104050 0.2347505  0.004703080
## [10] {FUD}             => {BIMBO}     0.001590876 0.2186380  0.007276299
## [11] {SPRITE}          => {COCA COLA} 0.001347463 0.2069426  0.006511288
##      lift      count
## [1]  65.862391 127  
## [2]   1.901832 163  
## [3]   4.078691 197  
## [4]   3.971823 177  
## [5]   1.675626 316  
## [6]  18.551922 241  
## [7]   1.562646 121  
## [8]  25.063647 117  
## [9]  65.862391 127  
## [10]  3.277319 183  
## [11]  1.325583 155
top10reglas <- head(reglas.asociación, n = 10, by = "confidence")
plot(top10reglas, method = "graph", engine = "htmlwidget")
LS0tCnRpdGxlOiAiTWFya2V0IEJhc2tldCBBbmFseXNpcyIKYXV0aG9yOiAiRGFuaWEgRmVybsOhbmRleiIKZGF0ZTogIjIwMjQtMDktMTAiCm91dHB1dDogCiAgaHRtbF9kb2N1bWVudDogCiAgICB0b2M6IFRSVUUKICAgIHRvY19mbG9hdDogVFJVRQogICAgY29kZV9kb3dubG9hZDogVFJVRQogICAgdGhlbWU6IGNvc21vCi0tLQoKIVtdKC9Vc2Vycy9kYW5pYWZlcm5hbmRlei9Eb2N1bWVudHMvVEVDL0JPT1RDQU1QIFBST0dSQS93YWxtYXJ0LmdpZikKIyA8c3BhbiBzdHlsZT0gImNvbG9yOiByZWQiOyA+SW5zdGFsYXIgcGFxdWV0ZXMgeSBsbGFtYXIgbGlicmVyw61hczwvc3Bhbj4gCmBgYHtyfQojaW5zdGFsbC5wYWNrYWdlcygidGlkeXZlcnNlIikgI1BhcXVldGUgZ2xvYmFsIHBhcmEgbWFuaXB1bGFjacOzbiB5IGHDoWxpc2lzIGRlIGxvcyBkYXRvcwpsaWJyYXJ5KHRpZHl2ZXJzZSkKCiNpbnN0YWxsLnBhY2thZ2VzKCJkcGx5ciIpICNQYXJhIGZpbHRyYXIgYmFzZSBkZSBkYXRvcwpsaWJyYXJ5KGRwbHlyKQoKI2luc3RhbGwucGFja2FnZXMoImphbml0b3IiKSAjRXhhbWluYXIgeSBsaW1waWFyIGJhc2UgZGUgZGF0b3MKbGlicmFyeShqYW5pdG9yKQoKI2luc3RhbGwucGFja2FnZXMoIk1hdHJpeCIpICNQYXJhIHRyYWJhamFyIGNvbiBtYXRyaWNlcyAKbGlicmFyeShNYXRyaXgpCgojaW5zdGFsbC5wYWNrYWdlcygiYXJ1bGVzIikgI0dlbmVyYXIgcmVnbGFzIGRlIGFzb2NpYWNpw7NuCmxpYnJhcnkoYXJ1bGVzKQoKI2luc3RhbGwucGFja2FnZXMoImFydWxlc1ZpeiIpICNWaXN1YWxpemFyIHJlZ2xhcyBkZSBhc29jaWFjacOzbiAKbGlicmFyeShhcnVsZXNWaXopCgojaW5zdGFsbC5wYWNrYWdlcygiZGF0YXNldHMiKQpsaWJyYXJ5KGRhdGFzZXRzKQoKI2luc3RhbGwucGFja2FnZXMoInBseXIiKQpsaWJyYXJ5KHBseXIpCgpgYGAKCiMgPHNwYW4gc3R5bGU9ICJjb2xvcjogcmVkOyI+SW1wb3J0YXIgbGEgYmFzZSBkZSBkYXRvcyA8L3NwYW4+CmBgYHtyfQojIGZpbGUuY2hvb3NlKCkKZGYgPC0gcmVhZC5jc3YoIi9Vc2Vycy9kYW5pYWZlcm5hbmRlei9Eb2N1bWVudHMvVEVDL0JPT1RDQU1QIFBST0dSQS9hYmFycm90ZXMuY3N2IikKYGBgCgojIDxzcGFuIHN0eWxlPSAiY29sb3I6IHJlZDsiPkFuw6FsaXNpcyBkZXNjcmlwdGl2byA8L3NwYW4+CmBgYHtyfQpzdW1tYXJ5KGRmKQpzdHIoZGYpCgojY291bnQoZGYsIHZjQ2xhdmVUaWVuZGEsIHNvcnQgPSBUUlVFKQojY291bnQoZGYsIERlc2NHaXJvLCBzb3J0ID0gVFJVRSkKI2NvdW50KGRmLCBGZWNoYSwgc29ydCA9IFRSVUUpCiNjb3VudChkZiwgSG9yYSwgc29ydCA9IFRSVUUpCiNjb3VudChkZiwgTWFyY2EsIHNvcnQgPSBUUlVFKQojY291bnQoZGYsIEZhYnJpY2FudGUsIHNvcnQgPSBUUlVFKQojY291bnQoZGYsIFByb2R1Y3RvLCBzb3J0ID0gVFJVRSkKI2NvdW50KGRmLCBOb21icmVEZXBhcnRhbWVudG8sIHNvcnQgPSBUUlVFKQojY291bnQoZGYsIE5vbWJyZUZhbWlsaWEsIHNvcnQgPSBUUlVFKQojY291bnQoZGYsIE5vbWJyZUNhdGVnb3JpYSwgc29ydCA9IFRSVUUpCiNjb3VudChkZiwgRXN0YWRvLCBzb3J0ID0gVFJVRSkKI2NvdW50KGRmLCBUaXBvLnViaWNhY2nDs24sIHNvcnQgPSBUUlVFKQojY291bnQoZGYsIEdpcm8sIHNvcnQgPSBUUlVFKQojY291bnQoZGYsIEhvcmEuaW5pY2lvLCBzb3J0ID0gVFJVRSkKI2NvdW50KGRmLCBIb3JhLmNpZXJyZSwgc29ydCA9IFRSVUUpCgpoZWFkKGRmLCBuPTEwKQp0YWlsKGRmLCBuPTEwKQoKYGBgCgojIyA8c3BhbiBzdHlsZT0gImNvbG9yOiByZWQ7Ij5UYWJsYXMgPC9zcGFuPgpgYGB7cn0KIyBUYWJsYSBkZSBUaWVuZGEgeSBEZXBhcnRhbWVudG8KdGFieWwoZGYsIHZjQ2xhdmVUaWVuZGEsIE5vbWJyZURlcGFydGFtZW50bykKIyBUYWJsYSBkZSBFc3RhZG8geSBIb3JhIGRlIGluaWNpbwp0YWJ5bChkZiwgRXN0YWRvLCBIb3JhLmluaWNpbykKYGBgCgojIDxzcGFuIHN0eWxlPSAiY29sb3I6IHJlZDsiPkxpbXBpZXphIGRlIGRhdG9zIDwvc3Bhbj4KCiMjIDxzcGFuIHN0eWxlPSAiY29sb3I6IHJlZDsiPlTDqWNuaWNhIDEuIEVsaW1pbmFyIHZhbG9yZXMgaXJyZWxldmFudGVzIDwvc3Bhbj4KYGBge3J9CiNFbGltaW5hciBjb2x1bW5hcwojZGYgPC0gc3Vic2V0KGRmLCBzZWxlY3QgPSAtYyhQTFUpKQoKI0VsaW1pbmFyIHJlbmdsb25lcwpkZiA8LSBkZltkZiRQcmVjaW8gPjAsIF0KYGBgCgojIyA8c3BhbiBzdHlsZT0gImNvbG9yOiByZWQ7Ij5Uw6ljbmljYSAyLiBFbGltaW5hciB2YWxvcmVzIHJlcGV0aWRvcyA8L3NwYW4+CmBgYHtyfQpkZiA8LSBkaXN0aW5jdChkZikgCmBgYAoKIyMgPHNwYW4gc3R5bGU9ICJjb2xvcjogcmVkOyI+VMOpY25pY2EgMy4gQ29ycmVnaXIgZXJyb3JlcyB0aXBvZ3LDoWZpY29zIHkgc2ltaWxhcmVzIDwvc3Bhbj4KYGBge3J9CmRmJFVuaWRhZGVzIDwtIGNlaWxpbmcoZGYkVW5pZGFkZXMpCnN1bW1hcnkoZGYkVW5pZGFkZXMpCmBgYAoKIyMgPHNwYW4gc3R5bGU9ICJjb2xvcjogcmVkOyI+VMOpY25pY2EgNC4gQ29udmVydGlyIGxvcyB0aXBvcyBkZSBkYXRvcyA8L3NwYW4+CmBgYHtyfQojQ29udmVydGlyIGRlIGNhcmFjdGVyIGEgZmVjaGEKZGYkRmVjaGEgPC0gYXMuRGF0ZShkZiRGZWNoYSwgZm9ybWF0ID0gIiVkLyVtLyV5IikKc3RyKGRmJEZlY2hhKQpzdW1tYXJ5KGRmJEZlY2hhKQpgYGAKCiMjIDxzcGFuIHN0eWxlPSAiY29sb3I6IHJlZDsiPlTDqWNuaWNhIDUuIFRyYXRhciB2YWxvcmVzIGZhbHRhbnRlcyA8L3NwYW4+CmBgYHtyfQojQm9ycmFyIHRvZG9zIGxvcyBOQXMKI2RmIDwtIG5hLm9taXQoZGYpCgojUmVlbXBwbGF6YXIgbG9zIE5BcyBjb24gY2Vyb3MKI2RmW2lzLm5hKGRmKV0gPC0gMAoKI1JlZW1wbGF6YXIgbG9zIE5BcyBjb24gZWwgcHJvbWVkaW8KI2RmJGFsdHVyYVtpcy5uYShkZiRhbHR1cmEpXSA8LSBtZWFuKGRmJGFsdHVyYSwgbmEucm09VFJVRSkKYGBgCgojIyA8c3BhbiBzdHlsZT0gImNvbG9yOiByZWQ7Ij5Uw6ljbmljYSA2LiBIZXJyYW1pZW50YXMgRXN0YWTDrXN0aWNhczwvc3Bhbj4KYGBge3J9CmJveHBsb3QoZGYkUHJlY2lvLCBob3Jpem9udGFsID0gVFJVRSkKYm94cGxvdChkZiRVbmlkYWRlcywgaG9yaXpvbnRhbCA9IFRSVUUpCmBgYAoKIyA8c3BhbiBzdHlsZT0gImNvbG9yOiByZWQ7Ij5HZW5lcmFyIGJhc2tldDwvc3Bhbj4KYGBge3J9CiNPcmRlbmFyIGRlIG1lbm9yIGEgbWF5b3IgbGEgY29sdW1uYSBUaWNrZXQKZGYgPC0gZGZbb3JkZXIoZGYkRi5UaWNrZXQpLCBdCmhlYWQoZGYpCnRhaWwoZGYpCgojIEdlbmVyYXIgZWwgYmFza2V0CmJhc2tldCA8LSBkZHBseShkZiwgYygiRi5UaWNrZXQiKSwgZnVuY3Rpb24oZGYpcGFzdGUoZGYkTWFyY2EsIGNvbGxhcHNlID0gIiwiKSkKCiNFbGltaW5hciBlbCBuw7ptZXJvIGRlIFRpY2tldApiYXNrZXQkRi5UaWNrZXQgPC0gTlVMTAoKI0NhbWJpYXIgZWwgdMOtdHVsbyBkZSBsYSBjb2x1bW5hIFYxIHBvciBNYXJjYSAKCmNvbG5hbWVzKGJhc2tldCkgPC0gYygiTWFyY2EiKQoKI0V4cG9ydGFyIGJhc2tldAp3cml0ZS5jc3YoYmFza2V0LCAiYmFza2V0LmNzdiIsIHF1b3RlID0gRkFMU0UsIHJvdy5uYW1lcyA9IEZBTFNFKQoKYGBgCgojIDxzcGFuIHN0eWxlPSAiY29sb3I6IHJlZDsiPk1hcmtldCBCYXNrZXQgQW5hbHlzaXM8L3NwYW4+CmBgYHtyIHdhcm5pbmc9RkFMU0V9CiNmaWxlLmNob29zZSgpCnRyIDwtIHJlYWQudHJhbnNhY3Rpb25zKCIvVXNlcnMvZGFuaWFmZXJuYW5kZXovRG9jdW1lbnRzL1RFQy9CT09UQ0FNUCBQUk9HUkEvYmFza2V0LmNzdiIsIGZvcm1hdCA9ICJiYXNrZXQiLCBzZXAgPSAiLCIpCgpyZWdsYXMuYXNvY2lhY2nDs24gPC0gYXByaW9yaSh0ciwgcGFyYW1ldGVyID0gbGlzdChzdXBwID0gMC4wMDEsIGNvbmYgPSAwLjIsIG1heGxlbiA9IDEwKSkKc3VtbWFyeShyZWdsYXMuYXNvY2lhY2nDs24pCmluc3BlY3QocmVnbGFzLmFzb2NpYWNpw7NuKQoKcmVnbGFzLmFzb2NpYWNpw7NuIDwtIHNvcnQocmVnbGFzLmFzb2NpYWNpw7NuLCBieSA9ICJjb25maWRlbmNlIiwgZGVjcmVhc2luZyA9IFRSVUUpCnN1bW1hcnkocmVnbGFzLmFzb2NpYWNpw7NuKQppbnNwZWN0KHJlZ2xhcy5hc29jaWFjacOzbikKCnRvcDEwcmVnbGFzIDwtIGhlYWQocmVnbGFzLmFzb2NpYWNpw7NuLCBuID0gMTAsIGJ5ID0gImNvbmZpZGVuY2UiKQpwbG90KHRvcDEwcmVnbGFzLCBtZXRob2QgPSAiZ3JhcGgiLCBlbmdpbmUgPSAiaHRtbHdpZGdldCIpCgpgYGAKCgo=