file.choose()
## [1] "C:\\Users\\jimen\\Downloads\\Market Basket.Rmd"
bd <- read.csv("C:\\Users\\jimen\\Downloads\\Abarrotes_Ventas-4 (1).csv")
summary (bd)
## vcClaveTienda DescGiro Codigo.Barras PLU
## Length:200620 Length:200620 Min. :8.347e+05 Min. : 1.00
## Class :character Class :character 1st Qu.:7.501e+12 1st Qu.: 1.00
## Mode :character Mode :character Median :7.501e+12 Median : 1.00
## Mean :5.950e+12 Mean : 2.11
## 3rd Qu.:7.501e+12 3rd Qu.: 1.00
## Max. :1.750e+13 Max. :30.00
## NA's :199183
## Fecha Hora Marca Fabricante
## Length:200620 Length:200620 Length:200620 Length:200620
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## Producto Precio Ult.Costo Unidades
## Length:200620 Min. :-147.00 Min. : 0.38 Min. : 0.200
## Class :character 1st Qu.: 11.00 1st Qu.: 8.46 1st Qu.: 1.000
## Mode :character Median : 16.00 Median : 12.31 Median : 1.000
## Mean : 19.42 Mean : 15.31 Mean : 1.262
## 3rd Qu.: 25.00 3rd Qu.: 19.23 3rd Qu.: 1.000
## Max. :1000.00 Max. :769.23 Max. :96.000
##
## F.Ticket NombreDepartamento NombreFamilia NombreCategoria
## Min. : 1 Length:200620 Length:200620 Length:200620
## 1st Qu.: 33967 Class :character Class :character Class :character
## Median :105996 Mode :character Mode :character Mode :character
## Mean :193994
## 3rd Qu.:383009
## Max. :450040
##
## Estado Mts.2 Tipo.ubicación Giro
## Length:200620 Min. :47.0 Length:200620 Length:200620
## Class :character 1st Qu.:53.0 Class :character Class :character
## Mode :character Median :60.0 Mode :character Mode :character
## Mean :56.6
## 3rd Qu.:60.0
## Max. :62.0
##
## Hora.inicio Hora.cierre
## Length:200620 Length:200620
## Class :character Class :character
## Mode :character Mode :character
##
##
##
##
#install.packages("dplyr")
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
#count(bd, vcClaveTienda, sort= TRUE)
#count(bd, DescGiro, sort= TRUE)
#count(bd, Marca, sort= TRUE)
#count(bd, Fabricante, sort= TRUE)
#count(bd, NombreDepartamento, sort= TRUE)
#count(bd, Producto, sort= TRUE)
#count(bd, NombreCategoria, sort= TRUE)
#count(bd, Estado, sort= TRUE)
#count(bd, Mts.2, sort= TRUE)
#count(bd, Tipo.ubicaciC3n, sort= TRUE)
#count(bd, Giro, sort= TRUE)
#count(bd, Hora.inicio, sort= TRUE)
#count(bd, Hora.cierre, sort= TRUE)
#install.packages ("tidyverse")
library(tidyverse)
## ── Attaching packages
## ───────────────────────────────────────
## tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6 ✔ purrr 0.3.4
## ✔ tibble 3.1.8 ✔ stringr 1.4.1
## ✔ tidyr 1.2.0 ✔ forcats 0.5.2
## ✔ readr 2.1.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
tibble(bd)
## # A tibble: 200,620 × 22
## vcClaveTienda DescGiro Codig…¹ PLU Fecha Hora Marca Fabri…² Produ…³ Precio
## <chr> <chr> <dbl> <int> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 MX001 Abarrot… 7.50e12 NA 19/0… 8:16… NUTR… MEXILAC Nutri … 16
## 2 MX001 Abarrot… 7.50e12 NA 19/0… 8:23… DAN … DANONE… DANUP … 14
## 3 MX001 Abarrot… 7.50e12 NA 19/0… 8:24… BIMBO GRUPO … Rebana… 5
## 4 MX001 Abarrot… 7.50e12 NA 19/0… 8:24… PEPSI PEPSI-… Pepsi … 8
## 5 MX001 Abarrot… 7.50e12 NA 19/0… 8:26… BLAN… FABRIC… Deterg… 19.5
## 6 MX001 Abarrot… 7.50e12 NA 19/0… 8:26… FLASH ALEN Flash … 9.5
## 7 MX001 Abarrot… 7.50e12 NA 19/0… 8:26… VARI… DANONE… Danone… 11
## 8 MX001 Abarrot… 7.50e12 NA 19/0… 8:26… ZOTE FABRIC… Jabon … 9.5
## 9 MX001 Abarrot… 7.51e12 NA 19/0… 8:26… ALWA… PROCTE… T Feme… 23.5
## 10 MX001 Abarrot… 3.22e10 NA 19/0… 15:2… JUMEX JUMEX Jugo D… 12
## # … with 200,610 more rows, 12 more variables: Ult.Costo <dbl>, Unidades <dbl>,
## # F.Ticket <int>, NombreDepartamento <chr>, NombreFamilia <chr>,
## # NombreCategoria <chr>, Estado <chr>, Mts.2 <int>, Tipo.ubicación <chr>,
## # Giro <chr>, Hora.inicio <chr>, Hora.cierre <chr>, and abbreviated variable
## # names ¹Codigo.Barras, ²Fabricante, ³Producto
str(bd)
## 'data.frame': 200620 obs. of 22 variables:
## $ vcClaveTienda : chr "MX001" "MX001" "MX001" "MX001" ...
## $ DescGiro : chr "Abarrotes" "Abarrotes" "Abarrotes" "Abarrotes" ...
## $ Codigo.Barras : num 7.5e+12 7.5e+12 7.5e+12 7.5e+12 7.5e+12 ...
## $ PLU : int NA NA NA NA NA NA NA NA NA NA ...
## $ Fecha : chr "19/06/2020" "19/06/2020" "19/06/2020" "19/06/2020" ...
## $ Hora : chr "8:16:21" "8:23:33" "8:24:33" "8:24:33" ...
## $ Marca : chr "NUTRI LECHE" "DAN UP" "BIMBO" "PEPSI" ...
## $ Fabricante : chr "MEXILAC" "DANONE DE MEXICO" "GRUPO BIMBO" "PEPSI-COLA MEXICANA" ...
## $ Producto : chr "Nutri Leche 1 Litro" "DANUP STRAWBERRY P/BEBER 350GR NAL" "Rebanadas Bimbo 2Pz" "Pepsi N.R. 400Ml" ...
## $ Precio : num 16 14 5 8 19.5 9.5 11 9.5 23.5 12 ...
## $ Ult.Costo : num 12.3 14 5 8 15 ...
## $ Unidades : num 1 1 1 1 1 1 1 1 1 1 ...
## $ F.Ticket : int 1 2 3 3 4 4 4 4 4 5 ...
## $ NombreDepartamento: chr "Abarrotes" "Abarrotes" "Abarrotes" "Abarrotes" ...
## $ NombreFamilia : chr "Lacteos y Refrigerados" "Lacteos y Refrigerados" "Pan y Tortilla" "Bebidas" ...
## $ NombreCategoria : chr "Leche" "Yogurt" "Pan Dulce Empaquetado" "Refrescos Plástico (N.R.)" ...
## $ Estado : chr "Nuevo León" "Nuevo León" "Nuevo León" "Nuevo León" ...
## $ Mts.2 : int 60 60 60 60 60 60 60 60 60 60 ...
## $ Tipo.ubicación : chr "Esquina" "Esquina" "Esquina" "Esquina" ...
## $ Giro : chr "Abarrotes" "Abarrotes" "Abarrotes" "Abarrotes" ...
## $ Hora.inicio : chr "8:00" "8:00" "8:00" "8:00" ...
## $ Hora.cierre : chr "22:00" "22:00" "22:00" "22:00" ...
head(bd)
## vcClaveTienda DescGiro Codigo.Barras PLU Fecha Hora
## 1 MX001 Abarrotes 7.501021e+12 NA 19/06/2020 8:16:21
## 2 MX001 Abarrotes 7.501032e+12 NA 19/06/2020 8:23:33
## 3 MX001 Abarrotes 7.501000e+12 NA 19/06/2020 8:24:33
## 4 MX001 Abarrotes 7.501031e+12 NA 19/06/2020 8:24:33
## 5 MX001 Abarrotes 7.501026e+12 NA 19/06/2020 8:26:28
## 6 MX001 Abarrotes 7.501025e+12 NA 19/06/2020 8:26:28
## Marca Fabricante
## 1 NUTRI LECHE MEXILAC
## 2 DAN UP DANONE DE MEXICO
## 3 BIMBO GRUPO BIMBO
## 4 PEPSI PEPSI-COLA MEXICANA
## 5 BLANCA NIEVES (DETERGENTE) FABRICA DE JABON LA CORONA
## 6 FLASH ALEN
## Producto Precio Ult.Costo Unidades F.Ticket
## 1 Nutri Leche 1 Litro 16.0 12.31 1 1
## 2 DANUP STRAWBERRY P/BEBER 350GR NAL 14.0 14.00 1 2
## 3 Rebanadas Bimbo 2Pz 5.0 5.00 1 3
## 4 Pepsi N.R. 400Ml 8.0 8.00 1 3
## 5 Detergente Blanca Nieves 500G 19.5 15.00 1 4
## 6 Flash Xtra Brisa Marina 500Ml 9.5 7.31 1 4
## NombreDepartamento NombreFamilia NombreCategoria
## 1 Abarrotes Lacteos y Refrigerados Leche
## 2 Abarrotes Lacteos y Refrigerados Yogurt
## 3 Abarrotes Pan y Tortilla Pan Dulce Empaquetado
## 4 Abarrotes Bebidas Refrescos Plástico (N.R.)
## 5 Abarrotes Limpieza del Hogar Lavandería
## 6 Abarrotes Limpieza del Hogar Limpiadores Líquidos
## Estado Mts.2 Tipo.ubicación Giro Hora.inicio Hora.cierre
## 1 Nuevo León 60 Esquina Abarrotes 8:00 22:00
## 2 Nuevo León 60 Esquina Abarrotes 8:00 22:00
## 3 Nuevo León 60 Esquina Abarrotes 8:00 22:00
## 4 Nuevo León 60 Esquina Abarrotes 8:00 22:00
## 5 Nuevo León 60 Esquina Abarrotes 8:00 22:00
## 6 Nuevo León 60 Esquina Abarrotes 8:00 22:00
head(bd,n=7)
## vcClaveTienda DescGiro Codigo.Barras PLU Fecha Hora
## 1 MX001 Abarrotes 7.501021e+12 NA 19/06/2020 8:16:21
## 2 MX001 Abarrotes 7.501032e+12 NA 19/06/2020 8:23:33
## 3 MX001 Abarrotes 7.501000e+12 NA 19/06/2020 8:24:33
## 4 MX001 Abarrotes 7.501031e+12 NA 19/06/2020 8:24:33
## 5 MX001 Abarrotes 7.501026e+12 NA 19/06/2020 8:26:28
## 6 MX001 Abarrotes 7.501025e+12 NA 19/06/2020 8:26:28
## 7 MX001 Abarrotes 7.501032e+12 NA 19/06/2020 8:26:28
## Marca Fabricante
## 1 NUTRI LECHE MEXILAC
## 2 DAN UP DANONE DE MEXICO
## 3 BIMBO GRUPO BIMBO
## 4 PEPSI PEPSI-COLA MEXICANA
## 5 BLANCA NIEVES (DETERGENTE) FABRICA DE JABON LA CORONA
## 6 FLASH ALEN
## 7 VARIOS DANONE DANONE DE MEXICO
## Producto Precio Ult.Costo Unidades F.Ticket
## 1 Nutri Leche 1 Litro 16.0 12.31 1 1
## 2 DANUP STRAWBERRY P/BEBER 350GR NAL 14.0 14.00 1 2
## 3 Rebanadas Bimbo 2Pz 5.0 5.00 1 3
## 4 Pepsi N.R. 400Ml 8.0 8.00 1 3
## 5 Detergente Blanca Nieves 500G 19.5 15.00 1 4
## 6 Flash Xtra Brisa Marina 500Ml 9.5 7.31 1 4
## 7 Danone Bipack Fresa Chocoarroz 130G 11.0 11.00 1 4
## NombreDepartamento NombreFamilia NombreCategoria
## 1 Abarrotes Lacteos y Refrigerados Leche
## 2 Abarrotes Lacteos y Refrigerados Yogurt
## 3 Abarrotes Pan y Tortilla Pan Dulce Empaquetado
## 4 Abarrotes Bebidas Refrescos Plástico (N.R.)
## 5 Abarrotes Limpieza del Hogar Lavandería
## 6 Abarrotes Limpieza del Hogar Limpiadores Líquidos
## 7 Abarrotes Lacteos y Refrigerados Postres Refrigerados
## Estado Mts.2 Tipo.ubicación Giro Hora.inicio Hora.cierre
## 1 Nuevo León 60 Esquina Abarrotes 8:00 22:00
## 2 Nuevo León 60 Esquina Abarrotes 8:00 22:00
## 3 Nuevo León 60 Esquina Abarrotes 8:00 22:00
## 4 Nuevo León 60 Esquina Abarrotes 8:00 22:00
## 5 Nuevo León 60 Esquina Abarrotes 8:00 22:00
## 6 Nuevo León 60 Esquina Abarrotes 8:00 22:00
## 7 Nuevo León 60 Esquina Abarrotes 8:00 22:00
tail(bd)
## vcClaveTienda DescGiro Codigo.Barras PLU Fecha Hora
## 200615 MX005 Depósito 7.62221e+12 NA 12/07/2020 1:08:25
## 200616 MX005 Depósito 7.62221e+12 NA 23/10/2020 22:17:37
## 200617 MX005 Depósito 7.62221e+12 NA 10/10/2020 20:30:20
## 200618 MX005 Depósito 7.62221e+12 NA 10/10/2020 22:40:43
## 200619 MX005 Depósito 7.62221e+12 NA 27/06/2020 22:30:19
## 200620 MX005 Depósito 7.62221e+12 NA 26/06/2020 23:43:34
## Marca Fabricante Producto Precio
## 200615 TRIDENT XTRA CARE CADBURY ADAMS Trident Xtracare Freshmint 16.32G 9
## 200616 TRIDENT XTRA CARE CADBURY ADAMS Trident Xtracare Freshmint 16.32G 9
## 200617 TRIDENT XTRA CARE CADBURY ADAMS Trident Xtracare Freshmint 16.32G 9
## 200618 TRIDENT XTRA CARE CADBURY ADAMS Trident Xtracare Freshmint 16.32G 9
## 200619 TRIDENT XTRA CARE CADBURY ADAMS Trident Xtracare Freshmint 16.32G 9
## 200620 TRIDENT XTRA CARE CADBURY ADAMS Trident Xtracare Freshmint 16.32G 9
## Ult.Costo Unidades F.Ticket NombreDepartamento NombreFamilia
## 200615 6.92 1 103100 Abarrotes Dulcería
## 200616 6.92 1 116598 Abarrotes Dulcería
## 200617 6.92 1 114886 Abarrotes Dulcería
## 200618 6.92 1 114955 Abarrotes Dulcería
## 200619 6.92 1 101121 Abarrotes Dulcería
## 200620 6.92 1 100879 Abarrotes Dulcería
## NombreCategoria Estado Mts.2 Tipo.ubicación Giro Hora.inicio
## 200615 Gomas de Mazcar Quintana Roo 58 Esquina Mini súper 8:00
## 200616 Gomas de Mazcar Quintana Roo 58 Esquina Mini súper 8:00
## 200617 Gomas de Mazcar Quintana Roo 58 Esquina Mini súper 8:00
## 200618 Gomas de Mazcar Quintana Roo 58 Esquina Mini súper 8:00
## 200619 Gomas de Mazcar Quintana Roo 58 Esquina Mini súper 8:00
## 200620 Gomas de Mazcar Quintana Roo 58 Esquina Mini súper 8:00
## Hora.cierre
## 200615 21:00
## 200616 21:00
## 200617 21:00
## 200618 21:00
## 200619 21:00
## 200620 21:00
#install.packages("janitor")
library(janitor)
##
## Attaching package: 'janitor'
##
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
bd1 <- bd
bd1 <- subset (bd1,select = -c (PLU, Codigo.Barras))
bd2 <- bd1
bd2 <- bd2[bd2$precio > 0,]
summary (bd1)
## vcClaveTienda DescGiro Fecha Hora
## Length:200620 Length:200620 Length:200620 Length:200620
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## Marca Fabricante Producto Precio
## Length:200620 Length:200620 Length:200620 Min. :-147.00
## Class :character Class :character Class :character 1st Qu.: 11.00
## Mode :character Mode :character Mode :character Median : 16.00
## Mean : 19.42
## 3rd Qu.: 25.00
## Max. :1000.00
## Ult.Costo Unidades F.Ticket NombreDepartamento
## Min. : 0.38 Min. : 0.200 Min. : 1 Length:200620
## 1st Qu.: 8.46 1st Qu.: 1.000 1st Qu.: 33967 Class :character
## Median : 12.31 Median : 1.000 Median :105996 Mode :character
## Mean : 15.31 Mean : 1.262 Mean :193994
## 3rd Qu.: 19.23 3rd Qu.: 1.000 3rd Qu.:383009
## Max. :769.23 Max. :96.000 Max. :450040
## NombreFamilia NombreCategoria Estado Mts.2
## Length:200620 Length:200620 Length:200620 Min. :47.0
## Class :character Class :character Class :character 1st Qu.:53.0
## Mode :character Mode :character Mode :character Median :60.0
## Mean :56.6
## 3rd Qu.:60.0
## Max. :62.0
## Tipo.ubicación Giro Hora.inicio Hora.cierre
## Length:200620 Length:200620 Length:200620 Length:200620
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
summary (bd2)
## vcClaveTienda DescGiro Fecha Hora
## Length:0 Length:0 Length:0 Length:0
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## Marca Fabricante Producto Precio
## Length:0 Length:0 Length:0 Min. : NA
## Class :character Class :character Class :character 1st Qu.: NA
## Mode :character Mode :character Mode :character Median : NA
## Mean :NaN
## 3rd Qu.: NA
## Max. : NA
## Ult.Costo Unidades F.Ticket NombreDepartamento
## Min. : NA Min. : NA Min. : NA Length:0
## 1st Qu.: NA 1st Qu.: NA 1st Qu.: NA Class :character
## Median : NA Median : NA Median : NA Mode :character
## Mean :NaN Mean :NaN Mean :NaN
## 3rd Qu.: NA 3rd Qu.: NA 3rd Qu.: NA
## Max. : NA Max. : NA Max. : NA
## NombreFamilia NombreCategoria Estado Mts.2
## Length:0 Length:0 Length:0 Min. : NA
## Class :character Class :character Class :character 1st Qu.: NA
## Mode :character Mode :character Mode :character Median : NA
## Mean :NaN
## 3rd Qu.: NA
## Max. : NA
## Tipo.ubicación Giro Hora.inicio Hora.cierre
## Length:0 Length:0 Length:0 Length:0
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
bd1 [duplicated (bd1),]
## [1] vcClaveTienda DescGiro Fecha Hora
## [5] Marca Fabricante Producto Precio
## [9] Ult.Costo Unidades F.Ticket NombreDepartamento
## [13] NombreFamilia NombreCategoria Estado Mts.2
## [17] Tipo.ubicación Giro Hora.inicio Hora.cierre
## <0 rows> (or 0-length row.names)
sum(duplicated(bd1))
## [1] 0
bd3 <- bd1
library(dplyr)
bd3 <- distinct (bd3)
bd4 <- bd3
bd4$Precio <- abs(bd4$Precio)
summary(bd4)
## vcClaveTienda DescGiro Fecha Hora
## Length:200620 Length:200620 Length:200620 Length:200620
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## Marca Fabricante Producto Precio
## Length:200620 Length:200620 Length:200620 Min. : 0.50
## Class :character Class :character Class :character 1st Qu.: 11.00
## Mode :character Mode :character Mode :character Median : 16.00
## Mean : 19.45
## 3rd Qu.: 25.00
## Max. :1000.00
## Ult.Costo Unidades F.Ticket NombreDepartamento
## Min. : 0.38 Min. : 0.200 Min. : 1 Length:200620
## 1st Qu.: 8.46 1st Qu.: 1.000 1st Qu.: 33967 Class :character
## Median : 12.31 Median : 1.000 Median :105996 Mode :character
## Mean : 15.31 Mean : 1.262 Mean :193994
## 3rd Qu.: 19.23 3rd Qu.: 1.000 3rd Qu.:383009
## Max. :769.23 Max. :96.000 Max. :450040
## NombreFamilia NombreCategoria Estado Mts.2
## Length:200620 Length:200620 Length:200620 Min. :47.0
## Class :character Class :character Class :character 1st Qu.:53.0
## Mode :character Mode :character Mode :character Median :60.0
## Mean :56.6
## 3rd Qu.:60.0
## Max. :62.0
## Tipo.ubicación Giro Hora.inicio Hora.cierre
## Length:200620 Length:200620 Length:200620 Length:200620
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
bd5 <- bd4
bd4$Unidades <- ceiling (bd5$Unidades)
summary(bd5)
## vcClaveTienda DescGiro Fecha Hora
## Length:200620 Length:200620 Length:200620 Length:200620
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## Marca Fabricante Producto Precio
## Length:200620 Length:200620 Length:200620 Min. : 0.50
## Class :character Class :character Class :character 1st Qu.: 11.00
## Mode :character Mode :character Mode :character Median : 16.00
## Mean : 19.45
## 3rd Qu.: 25.00
## Max. :1000.00
## Ult.Costo Unidades F.Ticket NombreDepartamento
## Min. : 0.38 Min. : 0.200 Min. : 1 Length:200620
## 1st Qu.: 8.46 1st Qu.: 1.000 1st Qu.: 33967 Class :character
## Median : 12.31 Median : 1.000 Median :105996 Mode :character
## Mean : 15.31 Mean : 1.262 Mean :193994
## 3rd Qu.: 19.23 3rd Qu.: 1.000 3rd Qu.:383009
## Max. :769.23 Max. :96.000 Max. :450040
## NombreFamilia NombreCategoria Estado Mts.2
## Length:200620 Length:200620 Length:200620 Min. :47.0
## Class :character Class :character Class :character 1st Qu.:53.0
## Mode :character Mode :character Mode :character Median :60.0
## Mean :56.6
## 3rd Qu.:60.0
## Max. :62.0
## Tipo.ubicación Giro Hora.inicio Hora.cierre
## Length:200620 Length:200620 Length:200620 Length:200620
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
bd6 <- bd5
bd6$fecha <- as.Date(bd6$Fecha, format = "%d/%m/%Y")
tibble(bd6)
## # A tibble: 200,620 × 21
## vcClaveTie…¹ DescG…² Fecha Hora Marca Fabri…³ Produ…⁴ Precio Ult.C…⁵ Unida…⁶
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl>
## 1 MX001 Abarro… 19/0… 8:16… NUTR… MEXILAC Nutri … 16 12.3 1
## 2 MX001 Abarro… 19/0… 8:23… DAN … DANONE… DANUP … 14 14 1
## 3 MX001 Abarro… 19/0… 8:24… BIMBO GRUPO … Rebana… 5 5 1
## 4 MX001 Abarro… 19/0… 8:24… PEPSI PEPSI-… Pepsi … 8 8 1
## 5 MX001 Abarro… 19/0… 8:26… BLAN… FABRIC… Deterg… 19.5 15 1
## 6 MX001 Abarro… 19/0… 8:26… FLASH ALEN Flash … 9.5 7.31 1
## 7 MX001 Abarro… 19/0… 8:26… VARI… DANONE… Danone… 11 11 1
## 8 MX001 Abarro… 19/0… 8:26… ZOTE FABRIC… Jabon … 9.5 7.31 1
## 9 MX001 Abarro… 19/0… 8:26… ALWA… PROCTE… T Feme… 23.5 18.1 1
## 10 MX001 Abarro… 19/0… 15:2… JUMEX JUMEX Jugo D… 12 12 1
## # … with 200,610 more rows, 11 more variables: F.Ticket <int>,
## # NombreDepartamento <chr>, NombreFamilia <chr>, NombreCategoria <chr>,
## # Estado <chr>, Mts.2 <int>, Tipo.ubicación <chr>, Giro <chr>,
## # Hora.inicio <chr>, Hora.cierre <chr>, fecha <date>, and abbreviated
## # variable names ¹vcClaveTienda, ²DescGiro, ³Fabricante, ⁴Producto,
## # ⁵Ult.Costo, ⁶Unidades
bd7 <- bd6
bd7$Hora <- substr(bd7$Hora, start = 1, stop = 2)
tibble(bd7)
## # A tibble: 200,620 × 21
## vcClaveTie…¹ DescG…² Fecha Hora Marca Fabri…³ Produ…⁴ Precio Ult.C…⁵ Unida…⁶
## <chr> <chr> <chr> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl>
## 1 MX001 Abarro… 19/0… 8: NUTR… MEXILAC Nutri … 16 12.3 1
## 2 MX001 Abarro… 19/0… 8: DAN … DANONE… DANUP … 14 14 1
## 3 MX001 Abarro… 19/0… 8: BIMBO GRUPO … Rebana… 5 5 1
## 4 MX001 Abarro… 19/0… 8: PEPSI PEPSI-… Pepsi … 8 8 1
## 5 MX001 Abarro… 19/0… 8: BLAN… FABRIC… Deterg… 19.5 15 1
## 6 MX001 Abarro… 19/0… 8: FLASH ALEN Flash … 9.5 7.31 1
## 7 MX001 Abarro… 19/0… 8: VARI… DANONE… Danone… 11 11 1
## 8 MX001 Abarro… 19/0… 8: ZOTE FABRIC… Jabon … 9.5 7.31 1
## 9 MX001 Abarro… 19/0… 8: ALWA… PROCTE… T Feme… 23.5 18.1 1
## 10 MX001 Abarro… 19/0… 15 JUMEX JUMEX Jugo D… 12 12 1
## # … with 200,610 more rows, 11 more variables: F.Ticket <int>,
## # NombreDepartamento <chr>, NombreFamilia <chr>, NombreCategoria <chr>,
## # Estado <chr>, Mts.2 <int>, Tipo.ubicación <chr>, Giro <chr>,
## # Hora.inicio <chr>, Hora.cierre <chr>, fecha <date>, and abbreviated
## # variable names ¹vcClaveTienda, ²DescGiro, ³Fabricante, ⁴Producto,
## # ⁵Ult.Costo, ⁶Unidades
bd7$Hora <- as.integer(bd7$Hora)
## Warning: NAs introduced by coercion
str(bd7)
## 'data.frame': 200620 obs. of 21 variables:
## $ vcClaveTienda : chr "MX001" "MX001" "MX001" "MX001" ...
## $ DescGiro : chr "Abarrotes" "Abarrotes" "Abarrotes" "Abarrotes" ...
## $ Fecha : chr "19/06/2020" "19/06/2020" "19/06/2020" "19/06/2020" ...
## $ Hora : int NA NA NA NA NA NA NA NA NA 15 ...
## $ Marca : chr "NUTRI LECHE" "DAN UP" "BIMBO" "PEPSI" ...
## $ Fabricante : chr "MEXILAC" "DANONE DE MEXICO" "GRUPO BIMBO" "PEPSI-COLA MEXICANA" ...
## $ Producto : chr "Nutri Leche 1 Litro" "DANUP STRAWBERRY P/BEBER 350GR NAL" "Rebanadas Bimbo 2Pz" "Pepsi N.R. 400Ml" ...
## $ Precio : num 16 14 5 8 19.5 9.5 11 9.5 23.5 12 ...
## $ Ult.Costo : num 12.3 14 5 8 15 ...
## $ Unidades : num 1 1 1 1 1 1 1 1 1 1 ...
## $ F.Ticket : int 1 2 3 3 4 4 4 4 4 5 ...
## $ NombreDepartamento: chr "Abarrotes" "Abarrotes" "Abarrotes" "Abarrotes" ...
## $ NombreFamilia : chr "Lacteos y Refrigerados" "Lacteos y Refrigerados" "Pan y Tortilla" "Bebidas" ...
## $ NombreCategoria : chr "Leche" "Yogurt" "Pan Dulce Empaquetado" "Refrescos Plástico (N.R.)" ...
## $ Estado : chr "Nuevo León" "Nuevo León" "Nuevo León" "Nuevo León" ...
## $ Mts.2 : int 60 60 60 60 60 60 60 60 60 60 ...
## $ Tipo.ubicación : chr "Esquina" "Esquina" "Esquina" "Esquina" ...
## $ Giro : chr "Abarrotes" "Abarrotes" "Abarrotes" "Abarrotes" ...
## $ Hora.inicio : chr "8:00" "8:00" "8:00" "8:00" ...
## $ Hora.cierre : chr "22:00" "22:00" "22:00" "22:00" ...
## $ fecha : Date, format: "2020-06-19" "2020-06-19" ...
sum(is.na(bd7))
## [1] 16440
sum(is.na(bd))
## [1] 199183
sapply(bd7,function(x) sum (is.na(x)))
## vcClaveTienda DescGiro Fecha Hora
## 0 0 0 16440
## Marca Fabricante Producto Precio
## 0 0 0 0
## Ult.Costo Unidades F.Ticket NombreDepartamento
## 0 0 0 0
## NombreFamilia NombreCategoria Estado Mts.2
## 0 0 0 0
## Tipo.ubicación Giro Hora.inicio Hora.cierre
## 0 0 0 0
## fecha
## 0
sapply(bd, function(x) sum(is.na(x)))
## vcClaveTienda DescGiro Codigo.Barras PLU
## 0 0 0 199183
## Fecha Hora Marca Fabricante
## 0 0 0 0
## Producto Precio Ult.Costo Unidades
## 0 0 0 0
## F.Ticket NombreDepartamento NombreFamilia NombreCategoria
## 0 0 0 0
## Estado Mts.2 Tipo.ubicación Giro
## 0 0 0 0
## Hora.inicio Hora.cierre
## 0 0
bd8 <- bd
bd8 <- na.omit(bd8)
summary(bd8)
## vcClaveTienda DescGiro Codigo.Barras PLU
## Length:1437 Length:1437 Min. :6.750e+08 Min. : 1.000
## Class :character Class :character 1st Qu.:6.750e+08 1st Qu.: 1.000
## Mode :character Mode :character Median :6.750e+08 Median : 1.000
## Mean :2.616e+11 Mean : 2.112
## 3rd Qu.:6.750e+08 3rd Qu.: 1.000
## Max. :7.501e+12 Max. :30.000
## Fecha Hora Marca Fabricante
## Length:1437 Length:1437 Length:1437 Length:1437
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## Producto Precio Ult.Costo Unidades
## Length:1437 Min. :30.00 Min. : 1.00 Min. :1.000
## Class :character 1st Qu.:90.00 1st Qu.:64.62 1st Qu.:1.000
## Mode :character Median :90.00 Median :64.62 Median :1.000
## Mean :87.94 Mean :56.65 Mean :1.124
## 3rd Qu.:90.00 3rd Qu.:64.62 3rd Qu.:1.000
## Max. :90.00 Max. :64.62 Max. :7.000
## F.Ticket NombreDepartamento NombreFamilia NombreCategoria
## Min. : 772 Length:1437 Length:1437 Length:1437
## 1st Qu.: 99955 Class :character Class :character Class :character
## Median :102493 Mode :character Mode :character Mode :character
## Mean :100595
## 3rd Qu.:106546
## Max. :118356
## Estado Mts.2 Tipo.ubicación Giro
## Length:1437 Min. :58.00 Length:1437 Length:1437
## Class :character 1st Qu.:58.00 Class :character Class :character
## Mode :character Median :58.00 Mode :character Mode :character
## Mean :58.07
## 3rd Qu.:58.00
## Max. :60.00
## Hora.inicio Hora.cierre
## Length:1437 Length:1437
## Class :character Class :character
## Mode :character Mode :character
##
##
##
bd9 <- bd
bd9[is.na(bd9)]<- 0
summary(bd9)
## vcClaveTienda DescGiro Codigo.Barras PLU
## Length:200620 Length:200620 Min. :8.347e+05 Min. : 0.00000
## Class :character Class :character 1st Qu.:7.501e+12 1st Qu.: 0.00000
## Mode :character Mode :character Median :7.501e+12 Median : 0.00000
## Mean :5.950e+12 Mean : 0.01513
## 3rd Qu.:7.501e+12 3rd Qu.: 0.00000
## Max. :1.750e+13 Max. :30.00000
## Fecha Hora Marca Fabricante
## Length:200620 Length:200620 Length:200620 Length:200620
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## Producto Precio Ult.Costo Unidades
## Length:200620 Min. :-147.00 Min. : 0.38 Min. : 0.200
## Class :character 1st Qu.: 11.00 1st Qu.: 8.46 1st Qu.: 1.000
## Mode :character Median : 16.00 Median : 12.31 Median : 1.000
## Mean : 19.42 Mean : 15.31 Mean : 1.262
## 3rd Qu.: 25.00 3rd Qu.: 19.23 3rd Qu.: 1.000
## Max. :1000.00 Max. :769.23 Max. :96.000
## F.Ticket NombreDepartamento NombreFamilia NombreCategoria
## Min. : 1 Length:200620 Length:200620 Length:200620
## 1st Qu.: 33967 Class :character Class :character Class :character
## Median :105996 Mode :character Mode :character Mode :character
## Mean :193994
## 3rd Qu.:383009
## Max. :450040
## Estado Mts.2 Tipo.ubicación Giro
## Length:200620 Min. :47.0 Length:200620 Length:200620
## Class :character 1st Qu.:53.0 Class :character Class :character
## Mode :character Median :60.0 Mode :character Mode :character
## Mean :56.6
## 3rd Qu.:60.0
## Max. :62.0
## Hora.inicio Hora.cierre
## Length:200620 Length:200620
## Class :character Class :character
## Mode :character Mode :character
##
##
##
bd10 <- bd
bd10$PLU[is.na(bd10$PLU)]<-mean(bd10$PLU,na.rm = TRUE)
summary(bd10)
## vcClaveTienda DescGiro Codigo.Barras PLU
## Length:200620 Length:200620 Min. :8.347e+05 Min. : 1.000
## Class :character Class :character 1st Qu.:7.501e+12 1st Qu.: 2.112
## Mode :character Mode :character Median :7.501e+12 Median : 2.112
## Mean :5.950e+12 Mean : 2.112
## 3rd Qu.:7.501e+12 3rd Qu.: 2.112
## Max. :1.750e+13 Max. :30.000
## Fecha Hora Marca Fabricante
## Length:200620 Length:200620 Length:200620 Length:200620
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## Producto Precio Ult.Costo Unidades
## Length:200620 Min. :-147.00 Min. : 0.38 Min. : 0.200
## Class :character 1st Qu.: 11.00 1st Qu.: 8.46 1st Qu.: 1.000
## Mode :character Median : 16.00 Median : 12.31 Median : 1.000
## Mean : 19.42 Mean : 15.31 Mean : 1.262
## 3rd Qu.: 25.00 3rd Qu.: 19.23 3rd Qu.: 1.000
## Max. :1000.00 Max. :769.23 Max. :96.000
## F.Ticket NombreDepartamento NombreFamilia NombreCategoria
## Min. : 1 Length:200620 Length:200620 Length:200620
## 1st Qu.: 33967 Class :character Class :character Class :character
## Median :105996 Mode :character Mode :character Mode :character
## Mean :193994
## 3rd Qu.:383009
## Max. :450040
## Estado Mts.2 Tipo.ubicación Giro
## Length:200620 Min. :47.0 Length:200620 Length:200620
## Class :character 1st Qu.:53.0 Class :character Class :character
## Mode :character Median :60.0 Mode :character Mode :character
## Mean :56.6
## 3rd Qu.:60.0
## Max. :62.0
## Hora.inicio Hora.cierre
## Length:200620 Length:200620
## Class :character Class :character
## Mode :character Mode :character
##
##
##
bd11 <- bd
bd11 [bd11 < 0] <- 0
summary(bd11)
## vcClaveTienda DescGiro Codigo.Barras PLU
## Length:200620 Length:200620 Min. :8.347e+05 Min. : 1.00
## Class :character Class :character 1st Qu.:7.501e+12 1st Qu.: 1.00
## Mode :character Mode :character Median :7.501e+12 Median : 1.00
## Mean :5.950e+12 Mean : 2.11
## 3rd Qu.:7.501e+12 3rd Qu.: 1.00
## Max. :1.750e+13 Max. :30.00
## NA's :199183
## Fecha Hora Marca Fabricante
## Length:200620 Length:200620 Length:200620 Length:200620
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## Producto Precio Ult.Costo Unidades
## Length:200620 Min. : 0.00 Min. : 0.38 Min. : 0.200
## Class :character 1st Qu.: 11.00 1st Qu.: 8.46 1st Qu.: 1.000
## Mode :character Median : 16.00 Median : 12.31 Median : 1.000
## Mean : 19.44 Mean : 15.31 Mean : 1.262
## 3rd Qu.: 25.00 3rd Qu.: 19.23 3rd Qu.: 1.000
## Max. :1000.00 Max. :769.23 Max. :96.000
##
## F.Ticket NombreDepartamento NombreFamilia NombreCategoria
## Min. : 1 Length:200620 Length:200620 Length:200620
## 1st Qu.: 33967 Class :character Class :character Class :character
## Median :105996 Mode :character Mode :character Mode :character
## Mean :193994
## 3rd Qu.:383009
## Max. :450040
##
## Estado Mts.2 Tipo.ubicación Giro
## Length:200620 Min. :47.0 Length:200620 Length:200620
## Class :character 1st Qu.:53.0 Class :character Class :character
## Mode :character Median :60.0 Mode :character Mode :character
## Mean :56.6
## 3rd Qu.:60.0
## Max. :62.0
##
## Hora.inicio Hora.cierre
## Length:200620 Length:200620
## Class :character Class :character
## Mode :character Mode :character
##
##
##
##
bd12 <- bd7
boxplot(bd12$Precio, horizontal = TRUE)
boxplot(bd12$Unidades, horizontal = TRUE)
#install.packages("lubridate")
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
bd12$Dia_de_la_semana <- wday (bd12$Fecha)
summary(bd12)
## vcClaveTienda DescGiro Fecha Hora
## Length:200620 Length:200620 Length:200620 Min. :10.00
## Class :character Class :character Class :character 1st Qu.:14.00
## Mode :character Mode :character Mode :character Median :18.00
## Mean :17.12
## 3rd Qu.:20.00
## Max. :23.00
## NA's :16440
## Marca Fabricante Producto Precio
## Length:200620 Length:200620 Length:200620 Min. : 0.50
## Class :character Class :character Class :character 1st Qu.: 11.00
## Mode :character Mode :character Mode :character Median : 16.00
## Mean : 19.45
## 3rd Qu.: 25.00
## Max. :1000.00
##
## Ult.Costo Unidades F.Ticket NombreDepartamento
## Min. : 0.38 Min. : 0.200 Min. : 1 Length:200620
## 1st Qu.: 8.46 1st Qu.: 1.000 1st Qu.: 33967 Class :character
## Median : 12.31 Median : 1.000 Median :105996 Mode :character
## Mean : 15.31 Mean : 1.262 Mean :193994
## 3rd Qu.: 19.23 3rd Qu.: 1.000 3rd Qu.:383009
## Max. :769.23 Max. :96.000 Max. :450040
##
## NombreFamilia NombreCategoria Estado Mts.2
## Length:200620 Length:200620 Length:200620 Min. :47.0
## Class :character Class :character Class :character 1st Qu.:53.0
## Mode :character Mode :character Mode :character Median :60.0
## Mean :56.6
## 3rd Qu.:60.0
## Max. :62.0
##
## Tipo.ubicación Giro Hora.inicio Hora.cierre
## Length:200620 Length:200620 Length:200620 Length:200620
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## fecha Dia_de_la_semana
## Min. :2020-05-01 Min. :1.000
## 1st Qu.:2020-06-06 1st Qu.:2.000
## Median :2020-07-11 Median :4.000
## Mean :2020-07-18 Mean :3.919
## 3rd Qu.:2020-08-29 3rd Qu.:6.000
## Max. :2020-11-11 Max. :7.000
##
bd12$subtotal <- bd12$Precio * bd12$Unidades
summary(bd12)
## vcClaveTienda DescGiro Fecha Hora
## Length:200620 Length:200620 Length:200620 Min. :10.00
## Class :character Class :character Class :character 1st Qu.:14.00
## Mode :character Mode :character Mode :character Median :18.00
## Mean :17.12
## 3rd Qu.:20.00
## Max. :23.00
## NA's :16440
## Marca Fabricante Producto Precio
## Length:200620 Length:200620 Length:200620 Min. : 0.50
## Class :character Class :character Class :character 1st Qu.: 11.00
## Mode :character Mode :character Mode :character Median : 16.00
## Mean : 19.45
## 3rd Qu.: 25.00
## Max. :1000.00
##
## Ult.Costo Unidades F.Ticket NombreDepartamento
## Min. : 0.38 Min. : 0.200 Min. : 1 Length:200620
## 1st Qu.: 8.46 1st Qu.: 1.000 1st Qu.: 33967 Class :character
## Median : 12.31 Median : 1.000 Median :105996 Mode :character
## Mean : 15.31 Mean : 1.262 Mean :193994
## 3rd Qu.: 19.23 3rd Qu.: 1.000 3rd Qu.:383009
## Max. :769.23 Max. :96.000 Max. :450040
##
## NombreFamilia NombreCategoria Estado Mts.2
## Length:200620 Length:200620 Length:200620 Min. :47.0
## Class :character Class :character Class :character 1st Qu.:53.0
## Mode :character Mode :character Mode :character Median :60.0
## Mean :56.6
## 3rd Qu.:60.0
## Max. :62.0
##
## Tipo.ubicación Giro Hora.inicio Hora.cierre
## Length:200620 Length:200620 Length:200620 Length:200620
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## fecha Dia_de_la_semana subtotal
## Min. :2020-05-01 Min. :1.000 Min. : 1.00
## 1st Qu.:2020-06-06 1st Qu.:2.000 1st Qu.: 12.00
## Median :2020-07-11 Median :4.000 Median : 18.00
## Mean :2020-07-18 Mean :3.919 Mean : 24.33
## 3rd Qu.:2020-08-29 3rd Qu.:6.000 3rd Qu.: 27.00
## Max. :2020-11-11 Max. :7.000 Max. :2496.00
##
bd12$utilidad <- bd12$Precio - bd12$Ult.Costo
summary (bd12)
## vcClaveTienda DescGiro Fecha Hora
## Length:200620 Length:200620 Length:200620 Min. :10.00
## Class :character Class :character Class :character 1st Qu.:14.00
## Mode :character Mode :character Mode :character Median :18.00
## Mean :17.12
## 3rd Qu.:20.00
## Max. :23.00
## NA's :16440
## Marca Fabricante Producto Precio
## Length:200620 Length:200620 Length:200620 Min. : 0.50
## Class :character Class :character Class :character 1st Qu.: 11.00
## Mode :character Mode :character Mode :character Median : 16.00
## Mean : 19.45
## 3rd Qu.: 25.00
## Max. :1000.00
##
## Ult.Costo Unidades F.Ticket NombreDepartamento
## Min. : 0.38 Min. : 0.200 Min. : 1 Length:200620
## 1st Qu.: 8.46 1st Qu.: 1.000 1st Qu.: 33967 Class :character
## Median : 12.31 Median : 1.000 Median :105996 Mode :character
## Mean : 15.31 Mean : 1.262 Mean :193994
## 3rd Qu.: 19.23 3rd Qu.: 1.000 3rd Qu.:383009
## Max. :769.23 Max. :96.000 Max. :450040
##
## NombreFamilia NombreCategoria Estado Mts.2
## Length:200620 Length:200620 Length:200620 Min. :47.0
## Class :character Class :character Class :character 1st Qu.:53.0
## Mode :character Mode :character Mode :character Median :60.0
## Mean :56.6
## 3rd Qu.:60.0
## Max. :62.0
##
## Tipo.ubicación Giro Hora.inicio Hora.cierre
## Length:200620 Length:200620 Length:200620 Length:200620
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## fecha Dia_de_la_semana subtotal utilidad
## Min. :2020-05-01 Min. :1.000 Min. : 1.00 Min. : 0.000
## 1st Qu.:2020-06-06 1st Qu.:2.000 1st Qu.: 12.00 1st Qu.: 2.310
## Median :2020-07-11 Median :4.000 Median : 18.00 Median : 3.230
## Mean :2020-07-18 Mean :3.919 Mean : 24.33 Mean : 4.142
## 3rd Qu.:2020-08-29 3rd Qu.:6.000 3rd Qu.: 27.00 3rd Qu.: 5.420
## Max. :2020-11-11 Max. :7.000 Max. :2496.00 Max. :230.770
##
bd_limpia <- bd12
write.csv(bd_limpia, file="abarrotes_bd_limpia.csv" , row.names = FALSE)
#install.packages("arules")
#install.packages("arulesViz")
#install.packages("Matrix")
library(Matrix)
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
library(arules)
##
## Attaching package: 'arules'
## The following object is masked from 'package:dplyr':
##
## recode
## The following objects are masked from 'package:base':
##
## abbreviate, write
library(arulesViz)
#install.packages("datasets")
library(datasets)
bd_limpia <- bd_limpia[order(bd_limpia$F.Ticket),]
head(bd_limpia)
## vcClaveTienda DescGiro Fecha Hora Marca
## 1 MX001 Abarrotes 19/06/2020 NA NUTRI LECHE
## 2 MX001 Abarrotes 19/06/2020 NA DAN UP
## 3 MX001 Abarrotes 19/06/2020 NA BIMBO
## 4 MX001 Abarrotes 19/06/2020 NA PEPSI
## 5 MX001 Abarrotes 19/06/2020 NA BLANCA NIEVES (DETERGENTE)
## 6 MX001 Abarrotes 19/06/2020 NA FLASH
## Fabricante Producto Precio
## 1 MEXILAC Nutri Leche 1 Litro 16.0
## 2 DANONE DE MEXICO DANUP STRAWBERRY P/BEBER 350GR NAL 14.0
## 3 GRUPO BIMBO Rebanadas Bimbo 2Pz 5.0
## 4 PEPSI-COLA MEXICANA Pepsi N.R. 400Ml 8.0
## 5 FABRICA DE JABON LA CORONA Detergente Blanca Nieves 500G 19.5
## 6 ALEN Flash Xtra Brisa Marina 500Ml 9.5
## Ult.Costo Unidades F.Ticket NombreDepartamento NombreFamilia
## 1 12.31 1 1 Abarrotes Lacteos y Refrigerados
## 2 14.00 1 2 Abarrotes Lacteos y Refrigerados
## 3 5.00 1 3 Abarrotes Pan y Tortilla
## 4 8.00 1 3 Abarrotes Bebidas
## 5 15.00 1 4 Abarrotes Limpieza del Hogar
## 6 7.31 1 4 Abarrotes Limpieza del Hogar
## NombreCategoria Estado Mts.2 Tipo.ubicación Giro
## 1 Leche Nuevo León 60 Esquina Abarrotes
## 2 Yogurt Nuevo León 60 Esquina Abarrotes
## 3 Pan Dulce Empaquetado Nuevo León 60 Esquina Abarrotes
## 4 Refrescos Plástico (N.R.) Nuevo León 60 Esquina Abarrotes
## 5 Lavandería Nuevo León 60 Esquina Abarrotes
## 6 Limpiadores Líquidos Nuevo León 60 Esquina Abarrotes
## Hora.inicio Hora.cierre fecha Dia_de_la_semana subtotal utilidad
## 1 8:00 22:00 2020-06-19 5 16.0 3.69
## 2 8:00 22:00 2020-06-19 5 14.0 0.00
## 3 8:00 22:00 2020-06-19 5 5.0 0.00
## 4 8:00 22:00 2020-06-19 5 8.0 0.00
## 5 8:00 22:00 2020-06-19 5 19.5 4.50
## 6 8:00 22:00 2020-06-19 5 9.5 2.19
tail(bd_limpia)
## vcClaveTienda DescGiro Fecha Hora Marca
## 107394 MX004 Carnicería 15/10/2020 11 YEMINA
## 167771 MX004 Carnicería 15/10/2020 11 DEL FUERTE
## 149429 MX004 Carnicería 15/10/2020 11 COCA COLA ZERO
## 168750 MX004 Carnicería 15/10/2020 11 DIAMANTE
## 161193 MX004 Carnicería 15/10/2020 12 PEPSI
## 112970 MX004 Carnicería 15/10/2020 12 COCA COLA
## Fabricante Producto Precio Ult.Costo
## 107394 HERDEZ PASTA SPAGHETTI YEMINA 200G 7 5.38
## 167771 ALIMENTOS DEL FUERTE PURE DE TOMATE DEL FUERTE 345G 12 9.23
## 149429 COCA COLA COCA COLA ZERO 600ML 15 11.54
## 168750 EMPACADOS ARROZ DIAMANTE225G 11 8.46
## 161193 PEPSI-COLA MEXICANA PEPSI N. R. 500ML 10 7.69
## 112970 COCA COLA COCA COLA RETORNABLE 500ML 10 7.69
## Unidades F.Ticket NombreDepartamento NombreFamilia
## 107394 2 450032 Abarrotes Sopas y Pastas
## 167771 1 450032 Abarrotes Salsas y Sazonadores
## 149429 2 450034 Abarrotes Bebidas
## 168750 1 450037 Abarrotes Granos y Semillas
## 161193 1 450039 Abarrotes Bebidas
## 112970 8 450040 Abarrotes Bebidas
## NombreCategoria Estado Mts.2 Tipo.ubicación Giro
## 107394 Fideos, Spaguetti, Tallarines Sinaloa 53 Esquina Abarrotes
## 167771 Salsa para Spaguetti Sinaloa 53 Esquina Abarrotes
## 149429 Refrescos Retornables Sinaloa 53 Esquina Abarrotes
## 168750 Arroz Sinaloa 53 Esquina Abarrotes
## 161193 Refrescos Plástico (N.R.) Sinaloa 53 Esquina Abarrotes
## 112970 Refrescos Retornables Sinaloa 53 Esquina Abarrotes
## Hora.inicio Hora.cierre fecha Dia_de_la_semana subtotal utilidad
## 107394 7:00 23:00 2020-10-15 3 14 1.62
## 167771 7:00 23:00 2020-10-15 3 12 2.77
## 149429 7:00 23:00 2020-10-15 3 30 3.46
## 168750 7:00 23:00 2020-10-15 3 11 2.54
## 161193 7:00 23:00 2020-10-15 3 10 2.31
## 112970 7:00 23:00 2020-10-15 3 80 2.31
#install.packages("plyr")
library(plyr)
## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
##
## Attaching package: 'plyr'
## The following object is masked from 'package:purrr':
##
## compact
## The following objects are masked from 'package:dplyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
basket <- ddply(bd_limpia,c("F.Ticket"), function(bd_limpia)paste(bd_limpia$Marca, collapse = ","))
View(basket)
basket$F.Ticket <- NULL
colnames(basket) <- c("Marca")
write.csv (basket, "basket.csv", quote = FALSE, row.names = FALSE)
file.choose()
## [1] "C:\\Users\\jimen\\Downloads\\Market Basket.Rmd"
tr <- read.transactions("C:\\Users\\jimen\\Documents\\abarrotes\\basket.csv", format = "basket", sep= ",")
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in asMethod(object): removing duplicated items in transactions
reglas.asociacion <- apriori(tr, parameter = list (supp=0.001, conf=0.2, maxlen=10))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.2 0.1 1 none FALSE TRUE 5 0.001 1
## maxlen target ext
## 10 rules TRUE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 115
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[604 item(s), 115111 transaction(s)] done [0.03s].
## sorting and recoding items ... [207 item(s)] done [0.00s].
## creating transaction tree ... done [0.05s].
## checking subsets of size 1 2 3 done [0.00s].
## writing ... [11 rule(s)] done [0.00s].
## creating S4 object ... done [0.01s].
#summary(reglas.asociacion)
inspect(reglas.asociacion)
## lhs rhs support confidence coverage
## [1] {FANTA} => {COCA COLA} 0.001051159 0.2439516 0.004308884
## [2] {SALVO} => {FABULOSO} 0.001103283 0.3097561 0.003561779
## [3] {FABULOSO} => {SALVO} 0.001103283 0.2347505 0.004699811
## [4] {COCA COLA ZERO} => {COCA COLA} 0.001416025 0.2969035 0.004769310
## [5] {SPRITE} => {COCA COLA} 0.001346526 0.2069426 0.006506763
## [6] {PINOL} => {CLORALEX} 0.001016410 0.2363636 0.004300197
## [7] {BLUE HOUSE} => {BIMBO} 0.001711392 0.2720994 0.006289581
## [8] {HELLMANN´S} => {BIMBO} 0.001537646 0.2649701 0.005803094
## [9] {REYMA} => {CONVERMEX} 0.002093631 0.2441743 0.008574333
## [10] {FUD} => {BIMBO} 0.001589770 0.2183771 0.007279930
## [11] {COCA COLA LIGHT} => {COCA COLA} 0.002745176 0.2613730 0.010502906
## lift count
## [1] 1.561906 121
## [2] 65.908196 127
## [3] 65.908196 127
## [4] 1.900932 163
## [5] 1.324955 155
## [6] 25.030409 117
## [7] 4.078870 197
## [8] 3.971997 177
## [9] 18.564824 241
## [10] 3.273552 183
## [11] 1.673447 316
reglas.asociacion <- sort (reglas.asociacion, by ="confidence", decreasing = TRUE)
#summary(reglas.asociacion)
inspect(reglas.asociacion)
## lhs rhs support confidence coverage
## [1] {SALVO} => {FABULOSO} 0.001103283 0.3097561 0.003561779
## [2] {COCA COLA ZERO} => {COCA COLA} 0.001416025 0.2969035 0.004769310
## [3] {BLUE HOUSE} => {BIMBO} 0.001711392 0.2720994 0.006289581
## [4] {HELLMANN´S} => {BIMBO} 0.001537646 0.2649701 0.005803094
## [5] {COCA COLA LIGHT} => {COCA COLA} 0.002745176 0.2613730 0.010502906
## [6] {REYMA} => {CONVERMEX} 0.002093631 0.2441743 0.008574333
## [7] {FANTA} => {COCA COLA} 0.001051159 0.2439516 0.004308884
## [8] {PINOL} => {CLORALEX} 0.001016410 0.2363636 0.004300197
## [9] {FABULOSO} => {SALVO} 0.001103283 0.2347505 0.004699811
## [10] {FUD} => {BIMBO} 0.001589770 0.2183771 0.007279930
## [11] {SPRITE} => {COCA COLA} 0.001346526 0.2069426 0.006506763
## lift count
## [1] 65.908196 127
## [2] 1.900932 163
## [3] 4.078870 197
## [4] 3.971997 177
## [5] 1.673447 316
## [6] 18.564824 241
## [7] 1.561906 121
## [8] 25.030409 117
## [9] 65.908196 127
## [10] 3.273552 183
## [11] 1.324955 155
top10reglas <- head(reglas.asociacion, n = 10, by = 'confidence')
plot(top10reglas, method = "graph", engine = "htmlwidget")
Se puede concluir que al ver estas relaciones de diferentes productos entrelazados por la frecuencia con la que son comprados juntos se pueden realaizar promociones las cuales se creen combos para que se pueda vender aun mas y aun precio mejor para la empresa y el cliente.