#install.packages("dplyr")
#install.packages("tidyverse")
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ readr 2.1.4
## ✔ ggplot2 3.4.1 ✔ stringr 1.5.0
## ✔ lubridate 1.9.2 ✔ tibble 3.2.0
## ✔ purrr 1.0.1 ✔ tidyr 1.3.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors
#install.packages("janitor")
library(janitor)
##
## Attaching package: 'janitor'
##
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
#install.packages("lubridate")
library(lubridate)
#file.choose()
bd <- read.csv("/Users/genarorodriguezalcantara/Desktop/Tec/R Files Manipulación de Datos/BDD/abarrotes.csv")
summary(bd)
## vcClaveTienda DescGiro Codigo.Barras PLU
## Length:200625 Length:200625 Min. :8.347e+05 Min. : 1.00
## Class :character Class :character 1st Qu.:7.501e+12 1st Qu.: 1.00
## Mode :character Mode :character Median :7.501e+12 Median : 1.00
## Mean :5.950e+12 Mean : 2.11
## 3rd Qu.:7.501e+12 3rd Qu.: 1.00
## Max. :1.750e+13 Max. :30.00
## NA's :199188
## Fecha Hora Marca Fabricante
## Length:200625 Length:200625 Length:200625 Length:200625
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## Producto Precio Ult.Costo Unidades
## Length:200625 Min. :-147.00 Min. : 0.38 Min. : 0.200
## Class :character 1st Qu.: 11.00 1st Qu.: 8.46 1st Qu.: 1.000
## Mode :character Median : 16.00 Median : 12.31 Median : 1.000
## Mean : 19.42 Mean : 15.31 Mean : 1.262
## 3rd Qu.: 25.00 3rd Qu.: 19.23 3rd Qu.: 1.000
## Max. :1000.00 Max. :769.23 Max. :96.000
##
## F.Ticket NombreDepartamento NombreFamilia NombreCategoria
## Min. : 1 Length:200625 Length:200625 Length:200625
## 1st Qu.: 33964 Class :character Class :character Class :character
## Median :105993 Mode :character Mode :character Mode :character
## Mean :193990
## 3rd Qu.:383005
## Max. :450040
##
## Estado Mts.2 Tipo.ubicación Giro
## Length:200625 Min. :47.0 Length:200625 Length:200625
## Class :character 1st Qu.:53.0 Class :character Class :character
## Mode :character Median :60.0 Mode :character Mode :character
## Mean :56.6
## 3rd Qu.:60.0
## Max. :62.0
##
## Hora.inicio Hora.cierre
## Length:200625 Length:200625
## Class :character Class :character
## Mode :character Mode :character
##
##
##
##
#count(bd, vcClaveTienda, sort=TRUE)
#count(bd, DescGiro, sort=TRUE)
#count(bd, Marca, sort=TRUE)
#count(bd, Fabricante, sort=TRUE)
#count(bd, Producto, sort=TRUE)
#count(bd, NombreDepartamento, sort=TRUE)
#count(bd, NombreFamilia, sort=TRUE)
#count(bd, NombreCategoria, sort=TRUE)
#ount(bd, Estado, sort=TRUE)
#count(bd, Tipo.ubicación, sort=TRUE)
#count(bd, Giro, sort=TRUE)
tibble(bd)
## # A tibble: 200,625 × 22
## vcClaveTienda DescGiro Codig…¹ PLU Fecha Hora Marca Fabri…² Produ…³ Precio
## <chr> <chr> <dbl> <int> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 MX001 Abarrot… 7.50e12 NA 19/0… 08:1… NUTR… MEXILAC Nutri … 16
## 2 MX001 Abarrot… 7.50e12 NA 19/0… 08:2… DAN … DANONE… DANUP … 14
## 3 MX001 Abarrot… 7.50e12 NA 19/0… 08:2… BIMBO GRUPO … Rebana… 5
## 4 MX001 Abarrot… 7.50e12 NA 19/0… 08:2… PEPSI PEPSI-… Pepsi … 8
## 5 MX001 Abarrot… 7.50e12 NA 19/0… 08:2… BLAN… FABRIC… Deterg… 19.5
## 6 MX001 Abarrot… 7.50e12 NA 19/0… 08:1… NUTR… MEXILAC Nutri … 16
## 7 MX001 Abarrot… 7.50e12 NA 19/0… 08:2… DAN … DANONE… DANUP … 14
## 8 MX001 Abarrot… 7.50e12 NA 19/0… 08:2… BIMBO GRUPO … Rebana… 5
## 9 MX001 Abarrot… 7.50e12 NA 19/0… 08:2… PEPSI PEPSI-… Pepsi … 8
## 10 MX001 Abarrot… 7.50e12 NA 19/0… 08:2… BLAN… FABRIC… Deterg… 19.5
## # … with 200,615 more rows, 12 more variables: Ult.Costo <dbl>, Unidades <dbl>,
## # F.Ticket <int>, NombreDepartamento <chr>, NombreFamilia <chr>,
## # NombreCategoria <chr>, Estado <chr>, Mts.2 <int>, Tipo.ubicación <chr>,
## # Giro <chr>, Hora.inicio <chr>, Hora.cierre <chr>, and abbreviated variable
## # names ¹Codigo.Barras, ²Fabricante, ³Producto
head(bd, n=7)
## vcClaveTienda DescGiro Codigo.Barras PLU Fecha Hora
## 1 MX001 Abarrotes 7.50102e+12 NA 19/06/2020 08:16:21
## 2 MX001 Abarrotes 7.50103e+12 NA 19/06/2020 08:23:33
## 3 MX001 Abarrotes 7.50100e+12 NA 19/06/2020 08:24:33
## 4 MX001 Abarrotes 7.50103e+12 NA 19/06/2020 08:24:33
## 5 MX001 Abarrotes 7.50103e+12 NA 19/06/2020 08:26:28
## 6 MX001 Abarrotes 7.50102e+12 NA 19/06/2020 08:16:21
## 7 MX001 Abarrotes 7.50103e+12 NA 19/06/2020 08:23:33
## Marca Fabricante
## 1 NUTRI LECHE MEXILAC
## 2 DAN UP DANONE DE MEXICO
## 3 BIMBO GRUPO BIMBO
## 4 PEPSI PEPSI-COLA MEXICANA
## 5 BLANCA NIEVES (DETERGENTE) FABRICA DE JABON LA CORONA
## 6 NUTRI LECHE MEXILAC
## 7 DAN UP DANONE DE MEXICO
## Producto Precio Ult.Costo Unidades F.Ticket
## 1 Nutri Leche 1 Litro 16.0 12.31 1 1
## 2 DANUP STRAWBERRY P/BEBER 350GR NAL 14.0 14.00 1 2
## 3 Rebanadas Bimbo 2Pz 5.0 5.00 1 3
## 4 Pepsi N.R. 400Ml 8.0 8.00 1 3
## 5 Detergente Blanca Nieves 500G 19.5 15.00 1 4
## 6 Nutri Leche 1 Litro 16.0 12.31 1 1
## 7 DANUP STRAWBERRY P/BEBER 350GR NAL 14.0 14.00 1 2
## NombreDepartamento NombreFamilia NombreCategoria
## 1 Abarrotes Lacteos y Refrigerados Leche
## 2 Abarrotes Lacteos y Refrigerados Yogurt
## 3 Abarrotes Pan y Tortilla Pan Dulce Empaquetado
## 4 Abarrotes Bebidas Refrescos Plástico (N.R.)
## 5 Abarrotes Limpieza del Hogar Lavandería
## 6 Abarrotes Lacteos y Refrigerados Leche
## 7 Abarrotes Lacteos y Refrigerados Yogurt
## Estado Mts.2 Tipo.ubicación Giro Hora.inicio Hora.cierre
## 1 Nuevo León 60 Esquina Abarrotes 8:00 22:00
## 2 Nuevo León 60 Esquina Abarrotes 8:00 22:00
## 3 Nuevo León 60 Esquina Abarrotes 8:00 22:00
## 4 Nuevo León 60 Esquina Abarrotes 8:00 22:00
## 5 Nuevo León 60 Esquina Abarrotes 8:00 22:00
## 6 Nuevo León 60 Esquina Abarrotes 8:00 22:00
## 7 Nuevo León 60 Esquina Abarrotes 8:00 22:00
#?tail
tabyl(bd, vcClaveTienda, NombreDepartamento)
## vcClaveTienda Abarrotes Bebes e Infantiles Carnes Farmacia Ferretería Mercería
## MX001 95415 515 1 147 245 28
## MX002 6590 21 0 4 10 0
## MX003 4026 15 0 2 8 0
## MX004 82234 932 0 102 114 16
## MX005 10014 0 0 0 0 0
## Papelería Productos a Eliminar Vinos y Licores
## 35 3 80
## 0 0 4
## 0 0 0
## 32 5 20
## 7 0 0
tabyl(bd, NombreFamilia, vcClaveTienda)
## NombreFamilia MX001 MX002 MX003 MX004 MX005
## Accesorios 88 0 0 58 0
## Aceite 346 29 18 1088 2
## Aderezos 544 21 30 909 3
## Alcohol 6 2 0 8 0
## Alimentos 256 9 15 530 0
## Alimentos a Granel 1 0 0 0 0
## Alimentos para Mascotas 300 9 36 533 0
## Analgésicos 0 0 1 0 0
## Antiácido 0 0 1 0 0
## Antigripal 17 0 0 40 0
## Artículos de Escritura 0 0 0 6 0
## Azúcar y Miel 349 0 0 38 4
## Bebidas 38511 3416 1460 21504 27
## Bebidas Premezcladas 0 4 0 19 0
## Botanas 13051 1194 498 5724 1116
## C. Frías y Salchichonería 451 1 143 1528 0
## Cereales 533 7 10 210 0
## Cerveza 4644 196 26 1041 8110
## Cigarros 3775 451 75 2237 279
## Cuadernos 7 0 0 8 0
## Cuidado Personal 1940 117 40 3319 17
## Dermatológicos 33 1 0 20 0
## Desechables 809 38 25 2588 0
## Dulcería 1725 45 108 486 307
## Especias 1596 28 22 3249 26
## Galletas 3259 218 256 3754 0
## Granos y Semillas 1138 18 19 1488 0
## Harinas y Complementos 460 20 43 1237 0
## Lacteos y Refrigerados 6795 139 503 10221 1
## Latería 1540 90 108 3365 4
## Limpieza del Hogar 3771 295 172 4470 16
## Mantecas 203 7 6 581 0
## Material de Curación 46 0 0 11 0
## Materiales y Accesorios 28 0 0 18 7
## Pan y Tortilla 5782 39 294 4387 0
## Pañales 114 8 0 215 0
## Pegamentos 104 8 6 102 0
## Pilas para uso Doméstico 141 2 2 12 0
## Pollo 1 0 0 0 0
## Postres 29 0 2 56 0
## Productos Higiénicos 57 4 0 129 0
## Productos sin Familia 3 0 0 5 0
## Ron 1 0 0 0 0
## Salsas y Sazonadores 1550 94 59 3527 90
## Sangría 13 0 0 0 0
## Sopas y Pastas 1280 65 37 2749 10
## Te, Chocolate y Café 454 42 27 906 2
## Tequila 62 0 0 1 0
## Varios 73 1 0 39 0
## Velas y Veladoras 579 11 9 1039 0
## Whisky 4 0 0 0 0
### Eliminar columnas
bd1 <-bd
bd1 <- subset(bd1, select = -c(PLU, Codigo.Barras))
### Eliminar renglones
bd2 <- bd1
bd2 <- bd2[bd2$Precio >0,]
summary(bd2)
## vcClaveTienda DescGiro Fecha Hora
## Length:200478 Length:200478 Length:200478 Length:200478
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## Marca Fabricante Producto Precio
## Length:200478 Length:200478 Length:200478 Min. : 0.50
## Class :character Class :character Class :character 1st Qu.: 11.00
## Mode :character Mode :character Mode :character Median : 16.00
## Mean : 19.45
## 3rd Qu.: 25.00
## Max. :1000.00
## Ult.Costo Unidades F.Ticket NombreDepartamento
## Min. : 0.38 Min. : 0.200 Min. : 1 Length:200478
## 1st Qu.: 8.46 1st Qu.: 1.000 1st Qu.: 33977 Class :character
## Median : 12.31 Median : 1.000 Median :106034 Mode :character
## Mean : 15.31 Mean : 1.261 Mean :194096
## 3rd Qu.: 19.23 3rd Qu.: 1.000 3rd Qu.:383062
## Max. :769.23 Max. :96.000 Max. :450040
## NombreFamilia NombreCategoria Estado Mts.2
## Length:200478 Length:200478 Length:200478 Min. :47.0
## Class :character Class :character Class :character 1st Qu.:53.0
## Mode :character Mode :character Mode :character Median :60.0
## Mean :56.6
## 3rd Qu.:60.0
## Max. :62.0
## Tipo.ubicación Giro Hora.inicio Hora.cierre
## Length:200478 Length:200478 Length:200478 Length:200478
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
### ¿Cuántos renglones duplicados tenemos?
bd2[duplicated(bd2),]
## vcClaveTienda DescGiro Fecha Hora Marca
## 6 MX001 Abarrotes 19/06/2020 08:16:21 NUTRI LECHE
## 7 MX001 Abarrotes 19/06/2020 08:23:33 DAN UP
## 8 MX001 Abarrotes 19/06/2020 08:24:33 BIMBO
## 9 MX001 Abarrotes 19/06/2020 08:24:33 PEPSI
## 10 MX001 Abarrotes 19/06/2020 08:26:28 BLANCA NIEVES (DETERGENTE)
## Fabricante Producto Precio
## 6 MEXILAC Nutri Leche 1 Litro 16.0
## 7 DANONE DE MEXICO DANUP STRAWBERRY P/BEBER 350GR NAL 14.0
## 8 GRUPO BIMBO Rebanadas Bimbo 2Pz 5.0
## 9 PEPSI-COLA MEXICANA Pepsi N.R. 400Ml 8.0
## 10 FABRICA DE JABON LA CORONA Detergente Blanca Nieves 500G 19.5
## Ult.Costo Unidades F.Ticket NombreDepartamento NombreFamilia
## 6 12.31 1 1 Abarrotes Lacteos y Refrigerados
## 7 14.00 1 2 Abarrotes Lacteos y Refrigerados
## 8 5.00 1 3 Abarrotes Pan y Tortilla
## 9 8.00 1 3 Abarrotes Bebidas
## 10 15.00 1 4 Abarrotes Limpieza del Hogar
## NombreCategoria Estado Mts.2 Tipo.ubicación Giro
## 6 Leche Nuevo León 60 Esquina Abarrotes
## 7 Yogurt Nuevo León 60 Esquina Abarrotes
## 8 Pan Dulce Empaquetado Nuevo León 60 Esquina Abarrotes
## 9 Refrescos Plástico (N.R.) Nuevo León 60 Esquina Abarrotes
## 10 Lavandería Nuevo León 60 Esquina Abarrotes
## Hora.inicio Hora.cierre
## 6 8:00 22:00
## 7 8:00 22:00
## 8 8:00 22:00
## 9 8:00 22:00
## 10 8:00 22:00
sum(duplicated(bd2))
## [1] 5
# Eliminar renglones duplicados.
bd3 <- bd2
bd3 <- distinct(bd3)
### Precios en absoluto
bd4 <- bd1
bd4$Precio <- abs(bd4$Precio)
summary(bd4)
## vcClaveTienda DescGiro Fecha Hora
## Length:200625 Length:200625 Length:200625 Length:200625
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## Marca Fabricante Producto Precio
## Length:200625 Length:200625 Length:200625 Min. : 0.50
## Class :character Class :character Class :character 1st Qu.: 11.00
## Mode :character Mode :character Mode :character Median : 16.00
## Mean : 19.45
## 3rd Qu.: 25.00
## Max. :1000.00
## Ult.Costo Unidades F.Ticket NombreDepartamento
## Min. : 0.38 Min. : 0.200 Min. : 1 Length:200625
## 1st Qu.: 8.46 1st Qu.: 1.000 1st Qu.: 33964 Class :character
## Median : 12.31 Median : 1.000 Median :105993 Mode :character
## Mean : 15.31 Mean : 1.262 Mean :193990
## 3rd Qu.: 19.23 3rd Qu.: 1.000 3rd Qu.:383005
## Max. :769.23 Max. :96.000 Max. :450040
## NombreFamilia NombreCategoria Estado Mts.2
## Length:200625 Length:200625 Length:200625 Min. :47.0
## Class :character Class :character Class :character 1st Qu.:53.0
## Mode :character Mode :character Mode :character Median :60.0
## Mean :56.6
## 3rd Qu.:60.0
## Max. :62.0
## Tipo.ubicación Giro Hora.inicio Hora.cierre
## Length:200625 Length:200625 Length:200625 Length:200625
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
### Unidades en enteros
bd5 <- bd4
bd5$Unidades <- ceiling(bd5$Unidades)
summary(bd5)
## vcClaveTienda DescGiro Fecha Hora
## Length:200625 Length:200625 Length:200625 Length:200625
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## Marca Fabricante Producto Precio
## Length:200625 Length:200625 Length:200625 Min. : 0.50
## Class :character Class :character Class :character 1st Qu.: 11.00
## Mode :character Mode :character Mode :character Median : 16.00
## Mean : 19.45
## 3rd Qu.: 25.00
## Max. :1000.00
## Ult.Costo Unidades F.Ticket NombreDepartamento
## Min. : 0.38 Min. : 1.000 Min. : 1 Length:200625
## 1st Qu.: 8.46 1st Qu.: 1.000 1st Qu.: 33964 Class :character
## Median : 12.31 Median : 1.000 Median :105993 Mode :character
## Mean : 15.31 Mean : 1.262 Mean :193990
## 3rd Qu.: 19.23 3rd Qu.: 1.000 3rd Qu.:383005
## Max. :769.23 Max. :96.000 Max. :450040
## NombreFamilia NombreCategoria Estado Mts.2
## Length:200625 Length:200625 Length:200625 Min. :47.0
## Class :character Class :character Class :character 1st Qu.:53.0
## Mode :character Mode :character Mode :character Median :60.0
## Mean :56.6
## 3rd Qu.:60.0
## Max. :62.0
## Tipo.ubicación Giro Hora.inicio Hora.cierre
## Length:200625 Length:200625 Length:200625 Length:200625
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
### Convertir de caracter a fecha
bd6 <- bd3
bd6$Fecha <- as.Date(bd6$Fecha, format = "%d/%m/%Y")
summary(bd6)
## vcClaveTienda DescGiro Fecha Hora
## Length:200473 Length:200473 Min. :2020-05-01 Length:200473
## Class :character Class :character 1st Qu.:2020-06-06 Class :character
## Mode :character Mode :character Median :2020-07-11 Mode :character
## Mean :2020-07-18
## 3rd Qu.:2020-08-29
## Max. :2020-11-11
## Marca Fabricante Producto Precio
## Length:200473 Length:200473 Length:200473 Min. : 0.50
## Class :character Class :character Class :character 1st Qu.: 11.00
## Mode :character Mode :character Mode :character Median : 16.00
## Mean : 19.45
## 3rd Qu.: 25.00
## Max. :1000.00
## Ult.Costo Unidades F.Ticket NombreDepartamento
## Min. : 0.38 Min. : 0.200 Min. : 1 Length:200473
## 1st Qu.: 8.46 1st Qu.: 1.000 1st Qu.: 33978 Class :character
## Median : 12.31 Median : 1.000 Median :106035 Mode :character
## Mean : 15.31 Mean : 1.261 Mean :194101
## 3rd Qu.: 19.23 3rd Qu.: 1.000 3rd Qu.:383065
## Max. :769.23 Max. :96.000 Max. :450040
## NombreFamilia NombreCategoria Estado Mts.2
## Length:200473 Length:200473 Length:200473 Min. :47.0
## Class :character Class :character Class :character 1st Qu.:53.0
## Mode :character Mode :character Mode :character Median :60.0
## Mean :56.6
## 3rd Qu.:60.0
## Max. :62.0
## Tipo.ubicación Giro Hora.inicio Hora.cierre
## Length:200473 Length:200473 Length:200473 Length:200473
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
tibble(bd6)
## # A tibble: 200,473 × 20
## vcCla…¹ DescG…² Fecha Hora Marca Fabri…³ Produ…⁴ Precio Ult.C…⁵ Unida…⁶
## <chr> <chr> <date> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl>
## 1 MX001 Abarro… 2020-06-19 08:1… NUTR… MEXILAC Nutri … 16 12.3 1
## 2 MX001 Abarro… 2020-06-19 08:2… DAN … DANONE… DANUP … 14 14 1
## 3 MX001 Abarro… 2020-06-19 08:2… BIMBO GRUPO … Rebana… 5 5 1
## 4 MX001 Abarro… 2020-06-19 08:2… PEPSI PEPSI-… Pepsi … 8 8 1
## 5 MX001 Abarro… 2020-06-19 08:2… BLAN… FABRIC… Deterg… 19.5 15 1
## 6 MX001 Abarro… 2020-06-19 08:2… FLASH ALEN Flash … 9.5 7.31 1
## 7 MX001 Abarro… 2020-06-19 08:2… VARI… DANONE… Danone… 11 11 1
## 8 MX001 Abarro… 2020-06-19 08:2… ZOTE FABRIC… Jabon … 9.5 7.31 1
## 9 MX001 Abarro… 2020-06-19 08:2… ALWA… PROCTE… T Feme… 23.5 18.1 1
## 10 MX001 Abarro… 2020-06-19 15:2… JUMEX JUMEX Jugo D… 12 12 1
## # … with 200,463 more rows, 10 more variables: F.Ticket <int>,
## # NombreDepartamento <chr>, NombreFamilia <chr>, NombreCategoria <chr>,
## # Estado <chr>, Mts.2 <int>, Tipo.ubicación <chr>, Giro <chr>,
## # Hora.inicio <chr>, Hora.cierre <chr>, and abbreviated variable names
## # ¹vcClaveTienda, ²DescGiro, ³Fabricante, ⁴Producto, ⁵Ult.Costo, ⁶Unidades
### Convertir de caracter a entero
bd7 <- bd6
bd7$Hora <- substr(bd7$Hora, start = 1, stop = 2)
tibble(bd7)
## # A tibble: 200,473 × 20
## vcCla…¹ DescG…² Fecha Hora Marca Fabri…³ Produ…⁴ Precio Ult.C…⁵ Unida…⁶
## <chr> <chr> <date> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl>
## 1 MX001 Abarro… 2020-06-19 08 NUTR… MEXILAC Nutri … 16 12.3 1
## 2 MX001 Abarro… 2020-06-19 08 DAN … DANONE… DANUP … 14 14 1
## 3 MX001 Abarro… 2020-06-19 08 BIMBO GRUPO … Rebana… 5 5 1
## 4 MX001 Abarro… 2020-06-19 08 PEPSI PEPSI-… Pepsi … 8 8 1
## 5 MX001 Abarro… 2020-06-19 08 BLAN… FABRIC… Deterg… 19.5 15 1
## 6 MX001 Abarro… 2020-06-19 08 FLASH ALEN Flash … 9.5 7.31 1
## 7 MX001 Abarro… 2020-06-19 08 VARI… DANONE… Danone… 11 11 1
## 8 MX001 Abarro… 2020-06-19 08 ZOTE FABRIC… Jabon … 9.5 7.31 1
## 9 MX001 Abarro… 2020-06-19 08 ALWA… PROCTE… T Feme… 23.5 18.1 1
## 10 MX001 Abarro… 2020-06-19 15 JUMEX JUMEX Jugo D… 12 12 1
## # … with 200,463 more rows, 10 more variables: F.Ticket <int>,
## # NombreDepartamento <chr>, NombreFamilia <chr>, NombreCategoria <chr>,
## # Estado <chr>, Mts.2 <int>, Tipo.ubicación <chr>, Giro <chr>,
## # Hora.inicio <chr>, Hora.cierre <chr>, and abbreviated variable names
## # ¹vcClaveTienda, ²DescGiro, ³Fabricante, ⁴Producto, ⁵Ult.Costo, ⁶Unidades
bd7$Hora <- as.integer(bd7$Hora)
str(bd7)
## 'data.frame': 200473 obs. of 20 variables:
## $ vcClaveTienda : chr "MX001" "MX001" "MX001" "MX001" ...
## $ DescGiro : chr "Abarrotes" "Abarrotes" "Abarrotes" "Abarrotes" ...
## $ Fecha : Date, format: "2020-06-19" "2020-06-19" ...
## $ Hora : int 8 8 8 8 8 8 8 8 8 15 ...
## $ Marca : chr "NUTRI LECHE" "DAN UP" "BIMBO" "PEPSI" ...
## $ Fabricante : chr "MEXILAC" "DANONE DE MEXICO" "GRUPO BIMBO" "PEPSI-COLA MEXICANA" ...
## $ Producto : chr "Nutri Leche 1 Litro" "DANUP STRAWBERRY P/BEBER 350GR NAL" "Rebanadas Bimbo 2Pz" "Pepsi N.R. 400Ml" ...
## $ Precio : num 16 14 5 8 19.5 9.5 11 9.5 23.5 12 ...
## $ Ult.Costo : num 12.3 14 5 8 15 ...
## $ Unidades : num 1 1 1 1 1 1 1 1 1 1 ...
## $ F.Ticket : int 1 2 3 3 4 4 4 4 4 5 ...
## $ NombreDepartamento: chr "Abarrotes" "Abarrotes" "Abarrotes" "Abarrotes" ...
## $ NombreFamilia : chr "Lacteos y Refrigerados" "Lacteos y Refrigerados" "Pan y Tortilla" "Bebidas" ...
## $ NombreCategoria : chr "Leche" "Yogurt" "Pan Dulce Empaquetado" "Refrescos Plástico (N.R.)" ...
## $ Estado : chr "Nuevo León" "Nuevo León" "Nuevo León" "Nuevo León" ...
## $ Mts.2 : int 60 60 60 60 60 60 60 60 60 60 ...
## $ Tipo.ubicación : chr "Esquina" "Esquina" "Esquina" "Esquina" ...
## $ Giro : chr "Abarrotes" "Abarrotes" "Abarrotes" "Abarrotes" ...
## $ Hora.inicio : chr "8:00" "8:00" "8:00" "8:00" ...
## $ Hora.cierre : chr "22:00" "22:00" "22:00" "22:00" ...
### ¿Cuántos NA tengo en la base de datos?
sum(is.na(bd7))
## [1] 0
sum(is.na(bd))
## [1] 199188
### ¿Cuántos NA tengo por variable?
sapply(bd, function(x) sum(is.na(x)))
## vcClaveTienda DescGiro Codigo.Barras PLU
## 0 0 0 199188
## Fecha Hora Marca Fabricante
## 0 0 0 0
## Producto Precio Ult.Costo Unidades
## 0 0 0 0
## F.Ticket NombreDepartamento NombreFamilia NombreCategoria
## 0 0 0 0
## Estado Mts.2 Tipo.ubicación Giro
## 0 0 0 0
## Hora.inicio Hora.cierre
## 0 0
### Borrar todos los registros NA de una tabla
bd8 <- bd
bd8 <- na.omit(bd8)
summary(bd8)
## vcClaveTienda DescGiro Codigo.Barras PLU
## Length:1437 Length:1437 Min. :6.750e+08 Min. : 1.000
## Class :character Class :character 1st Qu.:6.750e+08 1st Qu.: 1.000
## Mode :character Mode :character Median :6.750e+08 Median : 1.000
## Mean :2.616e+11 Mean : 2.112
## 3rd Qu.:6.750e+08 3rd Qu.: 1.000
## Max. :7.501e+12 Max. :30.000
## Fecha Hora Marca Fabricante
## Length:1437 Length:1437 Length:1437 Length:1437
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## Producto Precio Ult.Costo Unidades
## Length:1437 Min. :30.00 Min. : 1.00 Min. :1.000
## Class :character 1st Qu.:90.00 1st Qu.:64.62 1st Qu.:1.000
## Mode :character Median :90.00 Median :64.62 Median :1.000
## Mean :87.94 Mean :56.65 Mean :1.124
## 3rd Qu.:90.00 3rd Qu.:64.62 3rd Qu.:1.000
## Max. :90.00 Max. :64.62 Max. :7.000
## F.Ticket NombreDepartamento NombreFamilia NombreCategoria
## Min. : 772 Length:1437 Length:1437 Length:1437
## 1st Qu.: 99955 Class :character Class :character Class :character
## Median :102493 Mode :character Mode :character Mode :character
## Mean :100595
## 3rd Qu.:106546
## Max. :118356
## Estado Mts.2 Tipo.ubicación Giro
## Length:1437 Min. :58.00 Length:1437 Length:1437
## Class :character 1st Qu.:58.00 Class :character Class :character
## Mode :character Median :58.00 Mode :character Mode :character
## Mean :58.07
## 3rd Qu.:58.00
## Max. :60.00
## Hora.inicio Hora.cierre
## Length:1437 Length:1437
## Class :character Class :character
## Mode :character Mode :character
##
##
##
### Reemplazar los NA con CEROS
bd9 <- bd
bd9[is.na(bd9)] <-0
summary(bd9)
## vcClaveTienda DescGiro Codigo.Barras PLU
## Length:200625 Length:200625 Min. :8.347e+05 Min. : 0.00000
## Class :character Class :character 1st Qu.:7.501e+12 1st Qu.: 0.00000
## Mode :character Mode :character Median :7.501e+12 Median : 0.00000
## Mean :5.950e+12 Mean : 0.01513
## 3rd Qu.:7.501e+12 3rd Qu.: 0.00000
## Max. :1.750e+13 Max. :30.00000
## Fecha Hora Marca Fabricante
## Length:200625 Length:200625 Length:200625 Length:200625
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## Producto Precio Ult.Costo Unidades
## Length:200625 Min. :-147.00 Min. : 0.38 Min. : 0.200
## Class :character 1st Qu.: 11.00 1st Qu.: 8.46 1st Qu.: 1.000
## Mode :character Median : 16.00 Median : 12.31 Median : 1.000
## Mean : 19.42 Mean : 15.31 Mean : 1.262
## 3rd Qu.: 25.00 3rd Qu.: 19.23 3rd Qu.: 1.000
## Max. :1000.00 Max. :769.23 Max. :96.000
## F.Ticket NombreDepartamento NombreFamilia NombreCategoria
## Min. : 1 Length:200625 Length:200625 Length:200625
## 1st Qu.: 33964 Class :character Class :character Class :character
## Median :105993 Mode :character Mode :character Mode :character
## Mean :193990
## 3rd Qu.:383005
## Max. :450040
## Estado Mts.2 Tipo.ubicación Giro
## Length:200625 Min. :47.0 Length:200625 Length:200625
## Class :character 1st Qu.:53.0 Class :character Class :character
## Mode :character Median :60.0 Mode :character Mode :character
## Mean :56.6
## 3rd Qu.:60.0
## Max. :62.0
## Hora.inicio Hora.cierre
## Length:200625 Length:200625
## Class :character Class :character
## Mode :character Mode :character
##
##
##
###Reemplazar los NA con el PROMEDIO
bd10 <- bd
bd10$PLU[is.na(bd10$PLU)] <- mean(bd10$PLU, na.rm = TRUE)
tibble(bd10)
## # A tibble: 200,625 × 22
## vcClaveTienda DescGiro Codig…¹ PLU Fecha Hora Marca Fabri…² Produ…³ Precio
## <chr> <chr> <dbl> <dbl> <chr> <chr> <chr> <chr> <chr> <dbl>
## 1 MX001 Abarrot… 7.50e12 2.11 19/0… 08:1… NUTR… MEXILAC Nutri … 16
## 2 MX001 Abarrot… 7.50e12 2.11 19/0… 08:2… DAN … DANONE… DANUP … 14
## 3 MX001 Abarrot… 7.50e12 2.11 19/0… 08:2… BIMBO GRUPO … Rebana… 5
## 4 MX001 Abarrot… 7.50e12 2.11 19/0… 08:2… PEPSI PEPSI-… Pepsi … 8
## 5 MX001 Abarrot… 7.50e12 2.11 19/0… 08:2… BLAN… FABRIC… Deterg… 19.5
## 6 MX001 Abarrot… 7.50e12 2.11 19/0… 08:1… NUTR… MEXILAC Nutri … 16
## 7 MX001 Abarrot… 7.50e12 2.11 19/0… 08:2… DAN … DANONE… DANUP … 14
## 8 MX001 Abarrot… 7.50e12 2.11 19/0… 08:2… BIMBO GRUPO … Rebana… 5
## 9 MX001 Abarrot… 7.50e12 2.11 19/0… 08:2… PEPSI PEPSI-… Pepsi … 8
## 10 MX001 Abarrot… 7.50e12 2.11 19/0… 08:2… BLAN… FABRIC… Deterg… 19.5
## # … with 200,615 more rows, 12 more variables: Ult.Costo <dbl>, Unidades <dbl>,
## # F.Ticket <int>, NombreDepartamento <chr>, NombreFamilia <chr>,
## # NombreCategoria <chr>, Estado <chr>, Mts.2 <int>, Tipo.ubicación <chr>,
## # Giro <chr>, Hora.inicio <chr>, Hora.cierre <chr>, and abbreviated variable
## # names ¹Codigo.Barras, ²Fabricante, ³Producto
bd11 <- bd7
boxplot(bd11$Precio, horizontal = TRUE)
boxplot(bd11$Unidades, horizontal = TRUE)
### Agregar columnas
bd11$diadelasemana <- wday(bd11$Fecha)
summary(bd11)
## vcClaveTienda DescGiro Fecha Hora
## Length:200473 Length:200473 Min. :2020-05-01 Min. : 0.00
## Class :character Class :character 1st Qu.:2020-06-06 1st Qu.:13.00
## Mode :character Mode :character Median :2020-07-11 Median :17.00
## Mean :2020-07-18 Mean :16.23
## 3rd Qu.:2020-08-29 3rd Qu.:20.00
## Max. :2020-11-11 Max. :23.00
## Marca Fabricante Producto Precio
## Length:200473 Length:200473 Length:200473 Min. : 0.50
## Class :character Class :character Class :character 1st Qu.: 11.00
## Mode :character Mode :character Mode :character Median : 16.00
## Mean : 19.45
## 3rd Qu.: 25.00
## Max. :1000.00
## Ult.Costo Unidades F.Ticket NombreDepartamento
## Min. : 0.38 Min. : 0.200 Min. : 1 Length:200473
## 1st Qu.: 8.46 1st Qu.: 1.000 1st Qu.: 33978 Class :character
## Median : 12.31 Median : 1.000 Median :106035 Mode :character
## Mean : 15.31 Mean : 1.261 Mean :194101
## 3rd Qu.: 19.23 3rd Qu.: 1.000 3rd Qu.:383065
## Max. :769.23 Max. :96.000 Max. :450040
## NombreFamilia NombreCategoria Estado Mts.2
## Length:200473 Length:200473 Length:200473 Min. :47.0
## Class :character Class :character Class :character 1st Qu.:53.0
## Mode :character Mode :character Mode :character Median :60.0
## Mean :56.6
## 3rd Qu.:60.0
## Max. :62.0
## Tipo.ubicación Giro Hora.inicio Hora.cierre
## Length:200473 Length:200473 Length:200473 Length:200473
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## diadelasemana
## Min. :1.000
## 1st Qu.:2.000
## Median :4.000
## Mean :3.911
## 3rd Qu.:6.000
## Max. :7.000
### Agregar columna de ventas
bd11$subtotal <- bd11$Precio * bd11$Unidades
summary(bd11)
## vcClaveTienda DescGiro Fecha Hora
## Length:200473 Length:200473 Min. :2020-05-01 Min. : 0.00
## Class :character Class :character 1st Qu.:2020-06-06 1st Qu.:13.00
## Mode :character Mode :character Median :2020-07-11 Median :17.00
## Mean :2020-07-18 Mean :16.23
## 3rd Qu.:2020-08-29 3rd Qu.:20.00
## Max. :2020-11-11 Max. :23.00
## Marca Fabricante Producto Precio
## Length:200473 Length:200473 Length:200473 Min. : 0.50
## Class :character Class :character Class :character 1st Qu.: 11.00
## Mode :character Mode :character Mode :character Median : 16.00
## Mean : 19.45
## 3rd Qu.: 25.00
## Max. :1000.00
## Ult.Costo Unidades F.Ticket NombreDepartamento
## Min. : 0.38 Min. : 0.200 Min. : 1 Length:200473
## 1st Qu.: 8.46 1st Qu.: 1.000 1st Qu.: 33978 Class :character
## Median : 12.31 Median : 1.000 Median :106035 Mode :character
## Mean : 15.31 Mean : 1.261 Mean :194101
## 3rd Qu.: 19.23 3rd Qu.: 1.000 3rd Qu.:383065
## Max. :769.23 Max. :96.000 Max. :450040
## NombreFamilia NombreCategoria Estado Mts.2
## Length:200473 Length:200473 Length:200473 Min. :47.0
## Class :character Class :character Class :character 1st Qu.:53.0
## Mode :character Mode :character Mode :character Median :60.0
## Mean :56.6
## 3rd Qu.:60.0
## Max. :62.0
## Tipo.ubicación Giro Hora.inicio Hora.cierre
## Length:200473 Length:200473 Length:200473 Length:200473
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## diadelasemana subtotal
## Min. :1.000 Min. : 1.0
## 1st Qu.:2.000 1st Qu.: 12.0
## Median :4.000 Median : 18.0
## Mean :3.911 Mean : 24.3
## 3rd Qu.:6.000 3rd Qu.: 27.0
## Max. :7.000 Max. :2496.0
bd_abarrotes_limpia <- bd11
write.csv(bd_abarrotes_limpia, file = "bd_abarrotes_limpia.csv", row.names = FALSE)
#install.packages("plyr")
library(plyr)
## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
##
## Attaching package: 'plyr'
## The following object is masked from 'package:purrr':
##
## compact
## The following objects are masked from 'package:dplyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
#install.packages("Matrix")
library(Matrix)
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
#install.packages("arules")
library(arules)
##
## Attaching package: 'arules'
## The following object is masked from 'package:dplyr':
##
## recode
## The following objects are masked from 'package:base':
##
## abbreviate, write
#install.packages("arulesViz")
library(arulesViz)
#install.packages("datasets")
library(datasets)
#file.choose()
bd_limpia <- read.csv("/Users/genarorodriguezalcantara/Desktop/Tec/R Files Manipulación de Datos/BDD/bd_abarrotes_limpia.csv")
#bd_limpia <- order(bd_limpia$F.Ticket)
head(bd_limpia)
## vcClaveTienda DescGiro Fecha Hora Marca
## 1 MX001 Abarrotes 2020-06-19 8 NUTRI LECHE
## 2 MX001 Abarrotes 2020-06-19 8 DAN UP
## 3 MX001 Abarrotes 2020-06-19 8 BIMBO
## 4 MX001 Abarrotes 2020-06-19 8 PEPSI
## 5 MX001 Abarrotes 2020-06-19 8 BLANCA NIEVES (DETERGENTE)
## 6 MX001 Abarrotes 2020-06-19 8 FLASH
## Fabricante Producto Precio
## 1 MEXILAC Nutri Leche 1 Litro 16.0
## 2 DANONE DE MEXICO DANUP STRAWBERRY P/BEBER 350GR NAL 14.0
## 3 GRUPO BIMBO Rebanadas Bimbo 2Pz 5.0
## 4 PEPSI-COLA MEXICANA Pepsi N.R. 400Ml 8.0
## 5 FABRICA DE JABON LA CORONA Detergente Blanca Nieves 500G 19.5
## 6 ALEN Flash Xtra Brisa Marina 500Ml 9.5
## Ult.Costo Unidades F.Ticket NombreDepartamento NombreFamilia
## 1 12.31 1 1 Abarrotes Lacteos y Refrigerados
## 2 14.00 1 2 Abarrotes Lacteos y Refrigerados
## 3 5.00 1 3 Abarrotes Pan y Tortilla
## 4 8.00 1 3 Abarrotes Bebidas
## 5 15.00 1 4 Abarrotes Limpieza del Hogar
## 6 7.31 1 4 Abarrotes Limpieza del Hogar
## NombreCategoria Estado Mts.2 Tipo.ubicación Giro
## 1 Leche Nuevo León 60 Esquina Abarrotes
## 2 Yogurt Nuevo León 60 Esquina Abarrotes
## 3 Pan Dulce Empaquetado Nuevo León 60 Esquina Abarrotes
## 4 Refrescos Plástico (N.R.) Nuevo León 60 Esquina Abarrotes
## 5 Lavandería Nuevo León 60 Esquina Abarrotes
## 6 Limpiadores Líquidos Nuevo León 60 Esquina Abarrotes
## Hora.inicio Hora.cierre diadelasemana subtotal
## 1 8:00 22:00 6 16.0
## 2 8:00 22:00 6 14.0
## 3 8:00 22:00 6 5.0
## 4 8:00 22:00 6 8.0
## 5 8:00 22:00 6 19.5
## 6 8:00 22:00 6 9.5
tail(bd_limpia)
## vcClaveTienda DescGiro Fecha Hora Marca Fabricante
## 200468 MX005 Depósito 2020-07-12 1 TRIDENT XTRA CARE CADBURY ADAMS
## 200469 MX005 Depósito 2020-10-23 22 TRIDENT XTRA CARE CADBURY ADAMS
## 200470 MX005 Depósito 2020-10-10 20 TRIDENT XTRA CARE CADBURY ADAMS
## 200471 MX005 Depósito 2020-10-10 22 TRIDENT XTRA CARE CADBURY ADAMS
## 200472 MX005 Depósito 2020-06-27 22 TRIDENT XTRA CARE CADBURY ADAMS
## 200473 MX005 Depósito 2020-06-26 23 TRIDENT XTRA CARE CADBURY ADAMS
## Producto Precio Ult.Costo Unidades F.Ticket
## 200468 Trident Xtracare Freshmint 16.32G 9 6.92 1 103100
## 200469 Trident Xtracare Freshmint 16.32G 9 6.92 1 116598
## 200470 Trident Xtracare Freshmint 16.32G 9 6.92 1 114886
## 200471 Trident Xtracare Freshmint 16.32G 9 6.92 1 114955
## 200472 Trident Xtracare Freshmint 16.32G 9 6.92 1 101121
## 200473 Trident Xtracare Freshmint 16.32G 9 6.92 1 100879
## NombreDepartamento NombreFamilia NombreCategoria Estado Mts.2
## 200468 Abarrotes Dulcería Gomas de Mazcar Quintana Roo 58
## 200469 Abarrotes Dulcería Gomas de Mazcar Quintana Roo 58
## 200470 Abarrotes Dulcería Gomas de Mazcar Quintana Roo 58
## 200471 Abarrotes Dulcería Gomas de Mazcar Quintana Roo 58
## 200472 Abarrotes Dulcería Gomas de Mazcar Quintana Roo 58
## 200473 Abarrotes Dulcería Gomas de Mazcar Quintana Roo 58
## Tipo.ubicación Giro Hora.inicio Hora.cierre diadelasemana subtotal
## 200468 Esquina Mini súper 8:00 21:00 1 9
## 200469 Esquina Mini súper 8:00 21:00 6 9
## 200470 Esquina Mini súper 8:00 21:00 7 9
## 200471 Esquina Mini súper 8:00 21:00 7 9
## 200472 Esquina Mini súper 8:00 21:00 7 9
## 200473 Esquina Mini súper 8:00 21:00 6 9
basket <- ddply(bd_limpia, c("F.Ticket"),function(bd_limpia)paste(bd_limpia$Marca,collapse=","))
head(bd_limpia)
## vcClaveTienda DescGiro Fecha Hora Marca
## 1 MX001 Abarrotes 2020-06-19 8 NUTRI LECHE
## 2 MX001 Abarrotes 2020-06-19 8 DAN UP
## 3 MX001 Abarrotes 2020-06-19 8 BIMBO
## 4 MX001 Abarrotes 2020-06-19 8 PEPSI
## 5 MX001 Abarrotes 2020-06-19 8 BLANCA NIEVES (DETERGENTE)
## 6 MX001 Abarrotes 2020-06-19 8 FLASH
## Fabricante Producto Precio
## 1 MEXILAC Nutri Leche 1 Litro 16.0
## 2 DANONE DE MEXICO DANUP STRAWBERRY P/BEBER 350GR NAL 14.0
## 3 GRUPO BIMBO Rebanadas Bimbo 2Pz 5.0
## 4 PEPSI-COLA MEXICANA Pepsi N.R. 400Ml 8.0
## 5 FABRICA DE JABON LA CORONA Detergente Blanca Nieves 500G 19.5
## 6 ALEN Flash Xtra Brisa Marina 500Ml 9.5
## Ult.Costo Unidades F.Ticket NombreDepartamento NombreFamilia
## 1 12.31 1 1 Abarrotes Lacteos y Refrigerados
## 2 14.00 1 2 Abarrotes Lacteos y Refrigerados
## 3 5.00 1 3 Abarrotes Pan y Tortilla
## 4 8.00 1 3 Abarrotes Bebidas
## 5 15.00 1 4 Abarrotes Limpieza del Hogar
## 6 7.31 1 4 Abarrotes Limpieza del Hogar
## NombreCategoria Estado Mts.2 Tipo.ubicación Giro
## 1 Leche Nuevo León 60 Esquina Abarrotes
## 2 Yogurt Nuevo León 60 Esquina Abarrotes
## 3 Pan Dulce Empaquetado Nuevo León 60 Esquina Abarrotes
## 4 Refrescos Plástico (N.R.) Nuevo León 60 Esquina Abarrotes
## 5 Lavandería Nuevo León 60 Esquina Abarrotes
## 6 Limpiadores Líquidos Nuevo León 60 Esquina Abarrotes
## Hora.inicio Hora.cierre diadelasemana subtotal
## 1 8:00 22:00 6 16.0
## 2 8:00 22:00 6 14.0
## 3 8:00 22:00 6 5.0
## 4 8:00 22:00 6 8.0
## 5 8:00 22:00 6 19.5
## 6 8:00 22:00 6 9.5
head(basket)
## F.Ticket V1
## 1 1 NUTRI LECHE
## 2 2 DAN UP
## 3 3 BIMBO,PEPSI
## 4 4 BLANCA NIEVES (DETERGENTE),FLASH,VARIOS DANONE,ZOTE,ALWAYS
## 5 5 JUMEX,PEPSI
## 6 6 VALLE FRUT
basket$F.Ticket <- NULL
colnames(basket) <- c("Marca")
#write.csv(basket,"basket.csv",quote = FALSE, row.names = FALSE)
#file.choose()
tra <- read.transactions("/Users/genarorodriguezalcantara/Desktop/Tec/R Files Manipulación de Datos/BDD/basket.csv", format = "basket",sep = ",")
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in asMethod(object): removing duplicated items in transactions
##Conclusiones Estratégicas ### 1. Ubicar productos Salvo junto con Fabuloso… ### 2. Promoción en Reyma, Pinol, Queso/Jamón/Mayonesa, Zero/Light/Fanta… ### 3. Realizar un Business Case para venta de sandwiches preparados…
reglas_as <- apriori(tra, parameter = list(supp=0.001, conf=0.2, maxlen=10))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.2 0.1 1 none FALSE TRUE 5 0.001 1
## maxlen target ext
## 10 rules TRUE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 115
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[604 item(s), 115031 transaction(s)] done [0.02s].
## sorting and recoding items ... [207 item(s)] done [0.00s].
## creating transaction tree ... done [0.02s].
## checking subsets of size 1 2 3 done [0.00s].
## writing ... [11 rule(s)] done [0.00s].
## creating S4 object ... done [0.01s].
summary(reglas_as)
## set of 11 rules
##
## rule length distribution (lhs + rhs):sizes
## 2
## 11
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2 2 2 2 2 2
##
## summary of quality measures:
## support confidence coverage lift
## Min. :0.001017 Min. :0.2069 Min. :0.003564 Min. : 1.326
## 1st Qu.:0.001104 1st Qu.:0.2358 1st Qu.:0.004507 1st Qu.: 1.789
## Median :0.001417 Median :0.2442 Median :0.005807 Median : 3.972
## Mean :0.001521 Mean :0.2537 Mean :0.006056 Mean :17.558
## 3rd Qu.:0.001652 3rd Qu.:0.2685 3rd Qu.:0.006894 3rd Qu.:21.808
## Max. :0.002747 Max. :0.3098 Max. :0.010502 Max. :65.862
## count
## Min. :117.0
## 1st Qu.:127.0
## Median :163.0
## Mean :174.9
## 3rd Qu.:190.0
## Max. :316.0
##
## mining info:
## data ntransactions support confidence
## tra 115031 0.001 0.2
## call
## apriori(data = tra, parameter = list(supp = 0.001, conf = 0.2, maxlen = 10))
inspect(reglas_as)
## lhs rhs support confidence coverage
## [1] {FANTA} => {COCA COLA} 0.001051890 0.2439516 0.004311881
## [2] {SALVO} => {FABULOSO} 0.001104050 0.3097561 0.003564257
## [3] {FABULOSO} => {SALVO} 0.001104050 0.2347505 0.004703080
## [4] {COCA COLA ZERO} => {COCA COLA} 0.001417009 0.2969035 0.004772627
## [5] {SPRITE} => {COCA COLA} 0.001347463 0.2069426 0.006511288
## [6] {PINOL} => {CLORALEX} 0.001017117 0.2368421 0.004294495
## [7] {BLUE HOUSE} => {BIMBO} 0.001712582 0.2720994 0.006293956
## [8] {HELLMANN´S} => {BIMBO} 0.001538716 0.2649701 0.005807130
## [9] {REYMA} => {CONVERMEX} 0.002095087 0.2441743 0.008580296
## [10] {FUD} => {BIMBO} 0.001590876 0.2186380 0.007276299
## [11] {COCA COLA LIGHT} => {COCA COLA} 0.002747086 0.2615894 0.010501517
## lift count
## [1] 1.562646 121
## [2] 65.862391 127
## [3] 65.862391 127
## [4] 1.901832 163
## [5] 1.325583 155
## [6] 25.063647 117
## [7] 4.078691 197
## [8] 3.971823 177
## [9] 18.551922 241
## [10] 3.277319 183
## [11] 1.675626 316
reglas_as <- sort(reglas_as, by="confidence", decreasing = TRUE)
summary(reglas_as)
## set of 11 rules
##
## rule length distribution (lhs + rhs):sizes
## 2
## 11
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2 2 2 2 2 2
##
## summary of quality measures:
## support confidence coverage lift
## Min. :0.001017 Min. :0.2069 Min. :0.003564 Min. : 1.326
## 1st Qu.:0.001104 1st Qu.:0.2358 1st Qu.:0.004507 1st Qu.: 1.789
## Median :0.001417 Median :0.2442 Median :0.005807 Median : 3.972
## Mean :0.001521 Mean :0.2537 Mean :0.006056 Mean :17.558
## 3rd Qu.:0.001652 3rd Qu.:0.2685 3rd Qu.:0.006894 3rd Qu.:21.808
## Max. :0.002747 Max. :0.3098 Max. :0.010502 Max. :65.862
## count
## Min. :117.0
## 1st Qu.:127.0
## Median :163.0
## Mean :174.9
## 3rd Qu.:190.0
## Max. :316.0
##
## mining info:
## data ntransactions support confidence
## tra 115031 0.001 0.2
## call
## apriori(data = tra, parameter = list(supp = 0.001, conf = 0.2, maxlen = 10))
inspect(reglas_as)
## lhs rhs support confidence coverage
## [1] {SALVO} => {FABULOSO} 0.001104050 0.3097561 0.003564257
## [2] {COCA COLA ZERO} => {COCA COLA} 0.001417009 0.2969035 0.004772627
## [3] {BLUE HOUSE} => {BIMBO} 0.001712582 0.2720994 0.006293956
## [4] {HELLMANN´S} => {BIMBO} 0.001538716 0.2649701 0.005807130
## [5] {COCA COLA LIGHT} => {COCA COLA} 0.002747086 0.2615894 0.010501517
## [6] {REYMA} => {CONVERMEX} 0.002095087 0.2441743 0.008580296
## [7] {FANTA} => {COCA COLA} 0.001051890 0.2439516 0.004311881
## [8] {PINOL} => {CLORALEX} 0.001017117 0.2368421 0.004294495
## [9] {FABULOSO} => {SALVO} 0.001104050 0.2347505 0.004703080
## [10] {FUD} => {BIMBO} 0.001590876 0.2186380 0.007276299
## [11] {SPRITE} => {COCA COLA} 0.001347463 0.2069426 0.006511288
## lift count
## [1] 65.862391 127
## [2] 1.901832 163
## [3] 4.078691 197
## [4] 3.971823 177
## [5] 1.675626 316
## [6] 18.551922 241
## [7] 1.562646 121
## [8] 25.063647 117
## [9] 65.862391 127
## [10] 3.277319 183
## [11] 1.325583 155
top10reglas <- head(reglas_as, n=10, by="confidence")
plot(top10reglas, method = "graph", engine = "htmlwidget")