file.choose()
bd <- read.csv("/Users/benjaminreyessanchez/Desktop/Analítica PN/Archivos R Studio/Abarrotes_Ventas.csv")
Esta base de datos contiene información sobre las ventas de 5 tiendas ubicadas en distintas ciudades de México.
summary(bd)
## vcClaveTienda DescGiro Codigo.Barras PLU
## Length:200620 Length:200620 Min. :8.347e+05 Min. : 1.00
## Class :character Class :character 1st Qu.:7.501e+12 1st Qu.: 1.00
## Mode :character Mode :character Median :7.501e+12 Median : 1.00
## Mean :5.950e+12 Mean : 2.11
## 3rd Qu.:7.501e+12 3rd Qu.: 1.00
## Max. :1.750e+13 Max. :30.00
## NA's :199183
## Fecha Hora Marca Fabricante
## Length:200620 Length:200620 Length:200620 Length:200620
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## Producto Precio Ult.Costo Unidades
## Length:200620 Min. :-147.00 Min. : 0.38 Min. : 0.200
## Class :character 1st Qu.: 11.00 1st Qu.: 8.46 1st Qu.: 1.000
## Mode :character Median : 16.00 Median : 12.31 Median : 1.000
## Mean : 19.42 Mean : 15.31 Mean : 1.262
## 3rd Qu.: 25.00 3rd Qu.: 19.23 3rd Qu.: 1.000
## Max. :1000.00 Max. :769.23 Max. :96.000
##
## F.Ticket NombreDepartamento NombreFamilia NombreCategoria
## Min. : 1 Length:200620 Length:200620 Length:200620
## 1st Qu.: 33967 Class :character Class :character Class :character
## Median :105996 Mode :character Mode :character Mode :character
## Mean :193994
## 3rd Qu.:383008
## Max. :450040
##
## Estado Mts.2 Tipo.ubicación Giro
## Length:200620 Min. :47.0 Length:200620 Length:200620
## Class :character 1st Qu.:53.0 Class :character Class :character
## Mode :character Median :60.0 Mode :character Mode :character
## Mean :56.6
## 3rd Qu.:60.0
## Max. :62.0
##
## Hora.inicio Hora.cierre
## Length:200620 Length:200620
## Class :character Class :character
## Mode :character Mode :character
##
##
##
##
Instalar paquetes = install,packages(“dplyr”) Llamar librería
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
count(bd)
## n
## 1 200620
count(bd, vcClaveTienda, sort = TRUE) # Sort para ordenar de mayor a menor
## vcClaveTienda n
## 1 MX001 96464
## 2 MX004 83455
## 3 MX005 10021
## 4 MX002 6629
## 5 MX003 4051
count(bd, DescGiro, sort = TRUE)
## DescGiro n
## 1 Abarrotes 100515
## 2 Carnicería 83455
## 3 Depósito 16650
#count(bd,Marca,sort = TRUE)
#count(bd, Fabricante, sort = TRUE)
#count(bd, Producto, sort = TRUE) (SE DESPLEGABA TODA LA LISTA COMPLETA)
count(bd, NombreDepartamento, sort = TRUE)
## NombreDepartamento n
## 1 Abarrotes 198274
## 2 Bebes e Infantiles 1483
## 3 Ferretería 377
## 4 Farmacia 255
## 5 Vinos y Licores 104
## 6 Papelería 74
## 7 Mercería 44
## 8 Productos a Eliminar 8
## 9 Carnes 1
count(bd, NombreFamilia, sort = TRUE)
## NombreFamilia n
## 1 Bebidas 64917
## 2 Botanas 21583
## 3 Lacteos y Refrigerados 17657
## 4 Cerveza 14017
## 5 Pan y Tortilla 10501
## 6 Limpieza del Hogar 8723
## 7 Galletas 7487
## 8 Cigarros 6817
## 9 Cuidado Personal 5433
## 10 Salsas y Sazonadores 5320
## 11 Latería 5107
## 12 Especias 4921
## 13 Sopas y Pastas 4141
## 14 Desechables 3460
## 15 Dulcería 2671
## 16 Granos y Semillas 2663
## 17 C. Frías y Salchichonería 2123
## 18 Harinas y Complementos 1760
## 19 Velas y Veladoras 1638
## 20 Aderezos 1507
## 21 Aceite 1483
## 22 Te, Chocolate y Café 1431
## 23 Alimentos para Mascotas 878
## 24 Alimentos 810
## 25 Mantecas 797
## 26 Cereales 760
## 27 Azúcar y Miel 391
## 28 Pañales 337
## 29 Pegamentos 220
## 30 Productos Higiénicos 190
## 31 Pilas para uso Doméstico 157
## 32 Accesorios 146
## 33 Varios 113
## 34 Postres 87
## 35 Tequila 63
## 36 Antigripal 57
## 37 Material de Curación 57
## 38 Dermatológicos 54
## 39 Materiales y Accesorios 53
## 40 Bebidas Premezcladas 23
## 41 Alcohol 16
## 42 Cuadernos 15
## 43 Sangría 13
## 44 Productos sin Familia 8
## 45 Artículos de Escritura 6
## 46 Whisky 4
## 47 Alimentos a Granel 1
## 48 Analgésicos 1
## 49 Antiácido 1
## 50 Pollo 1
## 51 Ron 1
count(bd, NombreCategoria, sort = TRUE)
## NombreCategoria n
## 1 Refrescos Plástico (N.R.) 32861
## 2 Refrescos Retornables 13880
## 3 Frituras 11082
## 4 Lata 8150
## 5 Leche 7053
## 6 Cajetilla 6329
## 7 Botella 5867
## 8 Productos sin Categoria 5455
## 9 Papas Fritas 5344
## 10 Jugos y Néctares 5295
## 11 Lavandería 4586
## 12 Pan Dulce Empaquetado 4465
## 13 Platos y Vasos 3271
## 14 Concentrados en Polvo 3211
## 15 Energizantes e Hidratantes 3077
## 16 Aguas 2985
## 17 Quesos 2912
## 18 Aguas Saborizadas 2630
## 19 Salsa para Spaguetti 2485
## 20 Cremas 2447
## 21 Yogurt 2373
## 22 Pastelillos 2162
## 23 Chiles Envasados 2125
## 24 Arroz 2047
## 25 Papel Higiénico 2035
## 26 Saladas 1812
## 27 Especias 1761
## 28 Consome 1604
## 29 Limpiadores Líquidos 1603
## 30 Leche Saborizada 1559
## 31 Tradicionales 1553
## 32 Sopas Instantáneas 1494
## 33 Fideos, Spaguetti, Tallarines 1404
## 34 Verduras y Vegetales 1366
## 35 Pastas de Figuritas 1243
## 36 Pan para Hamburguesa, HotDog y Tortas 1136
## 37 Blanqueador de Ropa 1123
## 38 Tostadas 1070
## 39 Café 1066
## 40 Higiene Femenina 999
## 41 Chocolate 963
## 42 Lavatrastes 962
## 43 Pan de Barra Blanco 954
## 44 Frijoles en Lata 949
## 45 Populares 915
## 46 Chocolates 897
## 47 Salsa Picante 874
## 48 Surtido 859
## 49 Empanizador 857
## 50 Mayonesa 820
## 51 Sal 806
## 52 Cacahuates 742
## 53 Sandwich 720
## 54 Comida para Perros 711
## 55 Sazonadores 706
## 56 Postres Refrigerados 686
## 57 Malvavisco 671
## 58 Cuidado del Cabello 657
## 59 Chicharrón de Harina 628
## 60 Mole 621
## 61 Salchicha para Asar 580
## 62 Pasta Dental 578
## 63 Trigo 569
## 64 Rellenas 537
## 65 Pan de Barra Integral 534
## 66 Maíz 528
## 67 Gomas de Mazcar 515
## 68 Refrescos en Lata 483
## 69 Nachos 480
## 70 Jamón de Pavo 471
## 71 Paletas 471
## 72 Cuidado de la Piel 444
## 73 Salchicha HotDog y Viena 431
## 74 Catsup 424
## 75 Atún 414
## 76 Semillas 411
## 77 Afeitado y Depilación 408
## 78 Té helado 381
## 79 Chile Seco 367
## 80 Chicharrón de Cerdo 339
## 81 Frijol Varios 336
## 82 Papillas 334
## 83 Palomitas para Microondas 329
## 84 Insecticidas y Venenos 326
## 85 Salsa Botanera 318
## 86 Azúcar 311
## 87 Cerillos 305
## 88 Cremas de Nieve 305
## 89 Chorizo 301
## 90 Margarina 298
## 91 Frijol Bayo 296
## 92 Pastillas 293
## 93 Pan Tostado y Crotones 282
## 94 Leche en Polvo 281
## 95 Palomitas 258
## 96 Barras Energéticas 254
## 97 Azucarados 191
## 98 Jamón de Cerdo 186
## 99 Encendedores 183
## 100 Comida para Gatos 167
## 101 Harina para Pastel 165
## 102 Mezclas para Cerveza 164
## 103 Bicarbonato de Sodio 159
## 104 Mantequilla 159
## 105 Polvo para Hornear 155
## 106 Atoles 151
## 107 Fruta Acidulada 148
## 108 Tocino 143
## 109 Saborizante para Leche 142
## 110 Cepillo Dental 134
## 111 Jugos 129
## 112 Achiotes 127
## 113 Desodorantes 119
## 114 Vinagre 118
## 115 Productos del mar Envasados 116
## 116 Hot Cakes 114
## 117 Tropicales 114
## 118 Varios 114
## 119 Leches y Cremas 112
## 120 Sal Combinada 103
## 121 Frutas secas 102
## 122 Servilletas 102
## 123 Helados 96
## 124 Tamarindos 94
## 125 Mostaza 90
## 126 Dulce Macizo 83
## 127 Mazapán 82
## 128 Avena 81
## 129 Mermeladas 80
## 130 Puré de Papa 78
## 131 Avena y Fibra 67
## 132 Miel 66
## 133 Gelatinas en Polvo y Ligth 61
## 134 Cucharas, Tenedores y Cuchillos 60
## 135 Tortillas de Harina e Integrales 58
## 136 Hojuelas de Maíz 57
## 137 Aderezo para Ensalada 55
## 138 Polvos 51
## 139 Cal Hidratada 50
## 140 Limpieza de Calzado 49
## 141 Antojitos Mexicanos 48
## 142 Aromatizantes de Ambiente 48
## 143 Chocolate Macizo 47
## 144 Finas 47
## 145 Limpieza de Cocina 41
## 146 Tortillas de Maíz 27
## 147 Artículos de Limpieza 26
## 148 Maíz Rocero 26
## 149 Pizzas 26
## 150 Repostería 26
## 151 Extractos 25
## 152 Frutas Envasadas 25
## 153 Palillos de Madera 25
## 154 Charal 23
## 155 Te 20
## 156 Azúcar Glass 19
## 157 Camarón Seco 19
## 158 Salsa Casera 17
## 159 Lentejas 15
## 160 Carne Seca 12
## 161 Mortadela 11
## 162 Cosméticos 9
## 163 Salsas Varias 8
## 164 Tendederos y Pinzas 8
## 165 Bombónes 7
## 166 Chiclosos 6
## 167 Gomas 6
## 168 Pistaches 6
## 169 Sustituto de Crema 5
## 170 Aluminios y Envolturas 2
## 171 Flan en Polvo 2
## 172 Fibras 1
## 173 Light 1
## 174 Pañal para Adulto 1
count(bd, Estado, sort = TRUE)
## Estado n
## 1 Nuevo León 96464
## 2 Sinaloa 83455
## 3 Quintana Roo 10021
## 4 Jalisco 6629
## 5 Chiapas 4051
count(bd, Tipo.ubicación, sort = TRUE)
## Tipo.ubicación n
## 1 Esquina 189940
## 2 Rotonda 6629
## 3 Entre calles 4051
count(bd, Giro, sort = TRUE)
## Giro n
## 1 Abarrotes 183970
## 2 Mini súper 16650
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ readr 2.1.4
## ✔ ggplot2 3.4.1 ✔ stringr 1.5.0
## ✔ lubridate 1.9.2 ✔ tibble 3.2.1
## ✔ purrr 1.0.1 ✔ tidyr 1.3.0
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
tibble(bd) #ver tabla 10 renglones y tipo de variable
## # A tibble: 200,620 × 22
## vcClaveTienda DescGiro Codigo.Barras PLU Fecha Hora Marca Fabricante
## <chr> <chr> <dbl> <int> <chr> <chr> <chr> <chr>
## 1 MX001 Abarrotes 7501020000000 NA Friday, 1… 08:1… NUTR… MEXILAC
## 2 MX001 Abarrotes 7501030000000 NA Friday, 1… 08:2… DAN … DANONE DE…
## 3 MX001 Abarrotes 7501000000000 NA Friday, 1… 08:2… BIMBO GRUPO BIM…
## 4 MX001 Abarrotes 7501030000000 NA Friday, 1… 08:2… PEPSI PEPSI-COL…
## 5 MX001 Abarrotes 7501030000000 NA Friday, 1… 08:2… BLAN… FABRICA D…
## 6 MX001 Abarrotes 7501030000000 NA Friday, 1… 08:2… FLASH ALEN
## 7 MX001 Abarrotes 7501030000000 NA Friday, 1… 08:2… VARI… DANONE DE…
## 8 MX001 Abarrotes 7501030000000 NA Friday, 1… 08:2… ZOTE FABRICA D…
## 9 MX001 Abarrotes 7506200000000 NA Friday, 1… 08:2… ALWA… PROCTER &…
## 10 MX001 Abarrotes 32239052017 NA Friday, 1… 15:2… JUMEX JUMEX
## # ℹ 200,610 more rows
## # ℹ 14 more variables: Producto <chr>, Precio <dbl>, Ult.Costo <dbl>,
## # Unidades <dbl>, F.Ticket <int>, NombreDepartamento <chr>,
## # NombreFamilia <chr>, NombreCategoria <chr>, Estado <chr>, Mts.2 <int>,
## # Tipo.ubicación <chr>, Giro <chr>, Hora.inicio <chr>, Hora.cierre <chr>
str(bd) #ver tabla, tipo dee variable y algunos ejemplos
## 'data.frame': 200620 obs. of 22 variables:
## $ vcClaveTienda : chr "MX001" "MX001" "MX001" "MX001" ...
## $ DescGiro : chr "Abarrotes" "Abarrotes" "Abarrotes" "Abarrotes" ...
## $ Codigo.Barras : num 7.5e+12 7.5e+12 7.5e+12 7.5e+12 7.5e+12 ...
## $ PLU : int NA NA NA NA NA NA NA NA NA NA ...
## $ Fecha : chr "Friday, 19 June 2020" "Friday, 19 June 2020" "Friday, 19 June 2020" "Friday, 19 June 2020" ...
## $ Hora : chr "08:16:21" "08:23:33" "08:24:33" "08:24:33" ...
## $ Marca : chr "NUTRI LECHE" "DAN UP" "BIMBO" "PEPSI" ...
## $ Fabricante : chr "MEXILAC" "DANONE DE MEXICO" "GRUPO BIMBO" "PEPSI-COLA MEXICANA" ...
## $ Producto : chr "Nutri Leche 1 Litro" "DANUP STRAWBERRY P/BEBER 350GR NAL" "Rebanadas Bimbo 2Pz" "Pepsi N.R. 400Ml" ...
## $ Precio : num 16 14 5 8 19.5 9.5 11 9.5 23.5 12 ...
## $ Ult.Costo : num 12.3 14 5 8 15 ...
## $ Unidades : num 1 1 1 1 1 1 1 1 1 1 ...
## $ F.Ticket : int 1 2 3 3 4 4 4 4 4 5 ...
## $ NombreDepartamento: chr "Abarrotes" "Abarrotes" "Abarrotes" "Abarrotes" ...
## $ NombreFamilia : chr "Lacteos y Refrigerados" "Lacteos y Refrigerados" "Pan y Tortilla" "Bebidas" ...
## $ NombreCategoria : chr "Leche" "Yogurt" "Pan Dulce Empaquetado" "Refrescos Plástico (N.R.)" ...
## $ Estado : chr "Nuevo León" "Nuevo León" "Nuevo León" "Nuevo León" ...
## $ Mts.2 : int 60 60 60 60 60 60 60 60 60 60 ...
## $ Tipo.ubicación : chr "Esquina" "Esquina" "Esquina" "Esquina" ...
## $ Giro : chr "Abarrotes" "Abarrotes" "Abarrotes" "Abarrotes" ...
## $ Hora.inicio : chr "08:00" "08:00" "08:00" "08:00" ...
## $ Hora.cierre : chr "22:00" "22:00" "22:00" "22:00" ...
head(bd, n=7)
## vcClaveTienda DescGiro Codigo.Barras PLU Fecha Hora
## 1 MX001 Abarrotes 7.50102e+12 NA Friday, 19 June 2020 08:16:21
## 2 MX001 Abarrotes 7.50103e+12 NA Friday, 19 June 2020 08:23:33
## 3 MX001 Abarrotes 7.50100e+12 NA Friday, 19 June 2020 08:24:33
## 4 MX001 Abarrotes 7.50103e+12 NA Friday, 19 June 2020 08:24:33
## 5 MX001 Abarrotes 7.50103e+12 NA Friday, 19 June 2020 08:26:28
## 6 MX001 Abarrotes 7.50103e+12 NA Friday, 19 June 2020 08:26:28
## 7 MX001 Abarrotes 7.50103e+12 NA Friday, 19 June 2020 08:26:28
## Marca Fabricante
## 1 NUTRI LECHE MEXILAC
## 2 DAN UP DANONE DE MEXICO
## 3 BIMBO GRUPO BIMBO
## 4 PEPSI PEPSI-COLA MEXICANA
## 5 BLANCA NIEVES (DETERGENTE) FABRICA DE JABON LA CORONA
## 6 FLASH ALEN
## 7 VARIOS DANONE DANONE DE MEXICO
## Producto Precio Ult.Costo Unidades F.Ticket
## 1 Nutri Leche 1 Litro 16.0 12.31 1 1
## 2 DANUP STRAWBERRY P/BEBER 350GR NAL 14.0 14.00 1 2
## 3 Rebanadas Bimbo 2Pz 5.0 5.00 1 3
## 4 Pepsi N.R. 400Ml 8.0 8.00 1 3
## 5 Detergente Blanca Nieves 500G 19.5 15.00 1 4
## 6 Flash Xtra Brisa Marina 500Ml 9.5 7.31 1 4
## 7 Danone Bipack Fresa Chocoarroz 130G 11.0 11.00 1 4
## NombreDepartamento NombreFamilia NombreCategoria
## 1 Abarrotes Lacteos y Refrigerados Leche
## 2 Abarrotes Lacteos y Refrigerados Yogurt
## 3 Abarrotes Pan y Tortilla Pan Dulce Empaquetado
## 4 Abarrotes Bebidas Refrescos Plástico (N.R.)
## 5 Abarrotes Limpieza del Hogar Lavandería
## 6 Abarrotes Limpieza del Hogar Limpiadores Líquidos
## 7 Abarrotes Lacteos y Refrigerados Postres Refrigerados
## Estado Mts.2 Tipo.ubicación Giro Hora.inicio Hora.cierre
## 1 Nuevo León 60 Esquina Abarrotes 08:00 22:00
## 2 Nuevo León 60 Esquina Abarrotes 08:00 22:00
## 3 Nuevo León 60 Esquina Abarrotes 08:00 22:00
## 4 Nuevo León 60 Esquina Abarrotes 08:00 22:00
## 5 Nuevo León 60 Esquina Abarrotes 08:00 22:00
## 6 Nuevo León 60 Esquina Abarrotes 08:00 22:00
## 7 Nuevo León 60 Esquina Abarrotes 08:00 22:00
tail(bd)
## vcClaveTienda DescGiro Codigo.Barras PLU Fecha
## 200615 MX005 Depósito 7.62221e+12 NA Sunday, 12 July 2020
## 200616 MX005 Depósito 7.62221e+12 NA Friday, 23 October 2020
## 200617 MX005 Depósito 7.62221e+12 NA Saturday, 10 October 2020
## 200618 MX005 Depósito 7.62221e+12 NA Saturday, 10 October 2020
## 200619 MX005 Depósito 7.62221e+12 NA Saturday, 27 June 2020
## 200620 MX005 Depósito 7.62221e+12 NA Friday, 26 June 2020
## Hora Marca Fabricante
## 200615 01:08:25 TRIDENT XTRA CARE CADBURY ADAMS
## 200616 22:17:37 TRIDENT XTRA CARE CADBURY ADAMS
## 200617 20:30:20 TRIDENT XTRA CARE CADBURY ADAMS
## 200618 22:40:43 TRIDENT XTRA CARE CADBURY ADAMS
## 200619 22:30:19 TRIDENT XTRA CARE CADBURY ADAMS
## 200620 23:43:34 TRIDENT XTRA CARE CADBURY ADAMS
## Producto Precio Ult.Costo Unidades F.Ticket
## 200615 Trident Xtracare Freshmint 16.32G 9 6.92 1 103100
## 200616 Trident Xtracare Freshmint 16.32G 9 6.92 1 116598
## 200617 Trident Xtracare Freshmint 16.32G 9 6.92 1 114886
## 200618 Trident Xtracare Freshmint 16.32G 9 6.92 1 114955
## 200619 Trident Xtracare Freshmint 16.32G 9 6.92 1 101121
## 200620 Trident Xtracare Freshmint 16.32G 9 6.92 1 100879
## NombreDepartamento NombreFamilia NombreCategoria Estado Mts.2
## 200615 Abarrotes Dulcería Gomas de Mazcar Quintana Roo 58
## 200616 Abarrotes Dulcería Gomas de Mazcar Quintana Roo 58
## 200617 Abarrotes Dulcería Gomas de Mazcar Quintana Roo 58
## 200618 Abarrotes Dulcería Gomas de Mazcar Quintana Roo 58
## 200619 Abarrotes Dulcería Gomas de Mazcar Quintana Roo 58
## 200620 Abarrotes Dulcería Gomas de Mazcar Quintana Roo 58
## Tipo.ubicación Giro Hora.inicio Hora.cierre
## 200615 Esquina Mini súper 08:00 21:00
## 200616 Esquina Mini súper 08:00 21:00
## 200617 Esquina Mini súper 08:00 21:00
## 200618 Esquina Mini súper 08:00 21:00
## 200619 Esquina Mini súper 08:00 21:00
## 200620 Esquina Mini súper 08:00 21:00
library(janitor)
##
## Attaching package: 'janitor'
##
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
library(vctrs)
##
## Attaching package: 'vctrs'
##
## The following object is masked from 'package:tibble':
##
## data_frame
##
## The following object is masked from 'package:dplyr':
##
## data_frame
tabyl(bd, vcClaveTienda, NombreDepartamento)
## vcClaveTienda Abarrotes Bebes e Infantiles Carnes Farmacia Ferretería Mercería
## MX001 95410 515 1 147 245 28
## MX002 6590 21 0 4 10 0
## MX003 4026 15 0 2 8 0
## MX004 82234 932 0 102 114 16
## MX005 10014 0 0 0 0 0
## Papelería Productos a Eliminar Vinos y Licores
## 35 3 80
## 0 0 4
## 0 0 0
## 32 5 20
## 7 0 0
#Eliminar columnas
bd1 <- bd
bd1 <- subset(bd1,select = -c(PLU))
#subset extraer de una base de datos
#-c es para borrar las columnas seleccionadas
#Eliminar renglones
bd2 <- bd1
bd2 <- bd2 [bd2$Precio>0,]
summary(bd1$Precio)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -147.00 11.00 16.00 19.42 25.00 1000.00
summary(bd2$Precio)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.50 11.00 16.00 19.45 25.00 1000.00
#¿Cuántos renglones/registros duplicados tenemos?
bd2[duplicated(bd2),] #cuántos duplicados hay
## [1] vcClaveTienda DescGiro Codigo.Barras Fecha
## [5] Hora Marca Fabricante Producto
## [9] Precio Ult.Costo Unidades F.Ticket
## [13] NombreDepartamento NombreFamilia NombreCategoria Estado
## [17] Mts.2 Tipo.ubicación Giro Hora.inicio
## [21] Hora.cierre
## <0 rows> (or 0-length row.names)
sum(duplicated(bd2)) #que me los sume
## [1] 0
#Eliminar registros duplicados
bd3 <- bd2
library(dplyr) #realizar operaciones de manipulación de datos comunes como: filtrar por fila, seleccionar columnas específicas, reordenar filas, añadir nuevas filas y agregar datos
bd3 <- distinct(bd3)
#Precios en absolutos
bd4 <- bd1
bd4$Precio <- abs(bd4$Precio)
summary(bd4$Precio)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.50 11.00 16.00 19.45 25.00 1000.00
#Cantidades en enteros
bd5 <- bd4
bd5$Unidades <- -ceiling(bd5$Unidades)
summary(bd5$Unidades)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -96.000 -1.000 -1.000 -1.262 -1.000 -1.000
summary(bd$Unidades)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.200 1.000 1.000 1.262 1.000 96.000
# Cantidades
tibble(bd5)
## # A tibble: 200,620 × 21
## vcClaveTienda DescGiro Codigo.Barras Fecha Hora Marca Fabricante Producto
## <chr> <chr> <dbl> <chr> <chr> <chr> <chr> <chr>
## 1 MX001 Abarrotes 7501020000000 Friday… 08:1… NUTR… MEXILAC Nutri L…
## 2 MX001 Abarrotes 7501030000000 Friday… 08:2… DAN … DANONE DE… DANUP S…
## 3 MX001 Abarrotes 7501000000000 Friday… 08:2… BIMBO GRUPO BIM… Rebanad…
## 4 MX001 Abarrotes 7501030000000 Friday… 08:2… PEPSI PEPSI-COL… Pepsi N…
## 5 MX001 Abarrotes 7501030000000 Friday… 08:2… BLAN… FABRICA D… Deterge…
## 6 MX001 Abarrotes 7501030000000 Friday… 08:2… FLASH ALEN Flash X…
## 7 MX001 Abarrotes 7501030000000 Friday… 08:2… VARI… DANONE DE… Danone …
## 8 MX001 Abarrotes 7501030000000 Friday… 08:2… ZOTE FABRICA D… Jabon Z…
## 9 MX001 Abarrotes 7506200000000 Friday… 08:2… ALWA… PROCTER &… T Femen…
## 10 MX001 Abarrotes 32239052017 Friday… 15:2… JUMEX JUMEX Jugo De…
## # ℹ 200,610 more rows
## # ℹ 13 more variables: Precio <dbl>, Ult.Costo <dbl>, Unidades <dbl>,
## # F.Ticket <int>, NombreDepartamento <chr>, NombreFamilia <chr>,
## # NombreCategoria <chr>, Estado <chr>, Mts.2 <int>, Tipo.ubicación <chr>,
## # Giro <chr>, Hora.inicio <chr>, Hora.cierre <chr>
# Convertir caracter a fecha
#bd6 <- bd5
#bd6$Fecha <- as.Date(bd6$Fecha, "%d/%m/Y")
# Convertir de carcater a entero
# bd7 <- bd6
# bd7$Hora <- substr(bd7$Hora, start = 1, stop = 2)
# tibble(bd7)
# bd7$Hora <- as.integer(bd7$Hora)
# str (bd7)
sum(is.na(bd5))
## [1] 0
sum(is.na(bd))
## [1] 199183
# Cuantos NA tengo por variable
sapply(bd5, function(x) sum(is.na(x)))
## vcClaveTienda DescGiro Codigo.Barras Fecha
## 0 0 0 0
## Hora Marca Fabricante Producto
## 0 0 0 0
## Precio Ult.Costo Unidades F.Ticket
## 0 0 0 0
## NombreDepartamento NombreFamilia NombreCategoria Estado
## 0 0 0 0
## Mts.2 Tipo.ubicación Giro Hora.inicio
## 0 0 0 0
## Hora.cierre
## 0
sapply(bd, function(x) sum(is.na(x)))
## vcClaveTienda DescGiro Codigo.Barras PLU
## 0 0 0 199183
## Fecha Hora Marca Fabricante
## 0 0 0 0
## Producto Precio Ult.Costo Unidades
## 0 0 0 0
## F.Ticket NombreDepartamento NombreFamilia NombreCategoria
## 0 0 0 0
## Estado Mts.2 Tipo.ubicación Giro
## 0 0 0 0
## Hora.inicio Hora.cierre
## 0 0
# Borrar todos los registros NA de una tabla
bd6 <- bd
bd6 <- na.omit(bd6)
summary(bd6)
## vcClaveTienda DescGiro Codigo.Barras PLU
## Length:1437 Length:1437 Min. :6.750e+08 Min. : 1.000
## Class :character Class :character 1st Qu.:6.750e+08 1st Qu.: 1.000
## Mode :character Mode :character Median :6.750e+08 Median : 1.000
## Mean :2.616e+11 Mean : 2.112
## 3rd Qu.:6.750e+08 3rd Qu.: 1.000
## Max. :7.501e+12 Max. :30.000
## Fecha Hora Marca Fabricante
## Length:1437 Length:1437 Length:1437 Length:1437
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## Producto Precio Ult.Costo Unidades
## Length:1437 Min. :30.00 Min. : 1.00 Min. :1.000
## Class :character 1st Qu.:90.00 1st Qu.:64.62 1st Qu.:1.000
## Mode :character Median :90.00 Median :64.62 Median :1.000
## Mean :87.94 Mean :56.65 Mean :1.124
## 3rd Qu.:90.00 3rd Qu.:64.62 3rd Qu.:1.000
## Max. :90.00 Max. :64.62 Max. :7.000
## F.Ticket NombreDepartamento NombreFamilia NombreCategoria
## Min. : 772 Length:1437 Length:1437 Length:1437
## 1st Qu.: 99955 Class :character Class :character Class :character
## Median :102493 Mode :character Mode :character Mode :character
## Mean :100595
## 3rd Qu.:106546
## Max. :118356
## Estado Mts.2 Tipo.ubicación Giro
## Length:1437 Min. :58.00 Length:1437 Length:1437
## Class :character 1st Qu.:58.00 Class :character Class :character
## Mode :character Median :58.00 Mode :character Mode :character
## Mean :58.07
## 3rd Qu.:58.00
## Max. :60.00
## Hora.inicio Hora.cierre
## Length:1437 Length:1437
## Class :character Class :character
## Mode :character Mode :character
##
##
##
#Reemplazar NA con CEROS
bd7 <- bd
bd7[is.na(bd7)]<-0
summary(bd7)
## vcClaveTienda DescGiro Codigo.Barras PLU
## Length:200620 Length:200620 Min. :8.347e+05 Min. : 0.00000
## Class :character Class :character 1st Qu.:7.501e+12 1st Qu.: 0.00000
## Mode :character Mode :character Median :7.501e+12 Median : 0.00000
## Mean :5.950e+12 Mean : 0.01513
## 3rd Qu.:7.501e+12 3rd Qu.: 0.00000
## Max. :1.750e+13 Max. :30.00000
## Fecha Hora Marca Fabricante
## Length:200620 Length:200620 Length:200620 Length:200620
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## Producto Precio Ult.Costo Unidades
## Length:200620 Min. :-147.00 Min. : 0.38 Min. : 0.200
## Class :character 1st Qu.: 11.00 1st Qu.: 8.46 1st Qu.: 1.000
## Mode :character Median : 16.00 Median : 12.31 Median : 1.000
## Mean : 19.42 Mean : 15.31 Mean : 1.262
## 3rd Qu.: 25.00 3rd Qu.: 19.23 3rd Qu.: 1.000
## Max. :1000.00 Max. :769.23 Max. :96.000
## F.Ticket NombreDepartamento NombreFamilia NombreCategoria
## Min. : 1 Length:200620 Length:200620 Length:200620
## 1st Qu.: 33967 Class :character Class :character Class :character
## Median :105996 Mode :character Mode :character Mode :character
## Mean :193994
## 3rd Qu.:383008
## Max. :450040
## Estado Mts.2 Tipo.ubicación Giro
## Length:200620 Min. :47.0 Length:200620 Length:200620
## Class :character 1st Qu.:53.0 Class :character Class :character
## Mode :character Median :60.0 Mode :character Mode :character
## Mean :56.6
## 3rd Qu.:60.0
## Max. :62.0
## Hora.inicio Hora.cierre
## Length:200620 Length:200620
## Class :character Class :character
## Mode :character Mode :character
##
##
##
# Reemplazar NA con eel PROMEDIO
bd8 <- bd
bd8$PLU[is.na(bd8$PLU)]<-mean(bd8$PLU, na.rm=TRUE)
summary(bd8)
## vcClaveTienda DescGiro Codigo.Barras PLU
## Length:200620 Length:200620 Min. :8.347e+05 Min. : 1.000
## Class :character Class :character 1st Qu.:7.501e+12 1st Qu.: 2.112
## Mode :character Mode :character Median :7.501e+12 Median : 2.112
## Mean :5.950e+12 Mean : 2.112
## 3rd Qu.:7.501e+12 3rd Qu.: 2.112
## Max. :1.750e+13 Max. :30.000
## Fecha Hora Marca Fabricante
## Length:200620 Length:200620 Length:200620 Length:200620
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## Producto Precio Ult.Costo Unidades
## Length:200620 Min. :-147.00 Min. : 0.38 Min. : 0.200
## Class :character 1st Qu.: 11.00 1st Qu.: 8.46 1st Qu.: 1.000
## Mode :character Median : 16.00 Median : 12.31 Median : 1.000
## Mean : 19.42 Mean : 15.31 Mean : 1.262
## 3rd Qu.: 25.00 3rd Qu.: 19.23 3rd Qu.: 1.000
## Max. :1000.00 Max. :769.23 Max. :96.000
## F.Ticket NombreDepartamento NombreFamilia NombreCategoria
## Min. : 1 Length:200620 Length:200620 Length:200620
## 1st Qu.: 33967 Class :character Class :character Class :character
## Median :105996 Mode :character Mode :character Mode :character
## Mean :193994
## 3rd Qu.:383008
## Max. :450040
## Estado Mts.2 Tipo.ubicación Giro
## Length:200620 Min. :47.0 Length:200620 Length:200620
## Class :character 1st Qu.:53.0 Class :character Class :character
## Mode :character Median :60.0 Mode :character Mode :character
## Mean :56.6
## 3rd Qu.:60.0
## Max. :62.0
## Hora.inicio Hora.cierre
## Length:200620 Length:200620
## Class :character Class :character
## Mode :character Mode :character
##
##
##
# Reemplazar negativos con CERO
bd9 <- bd
bd9[bd9<0]<-0
summary(bd9)
## vcClaveTienda DescGiro Codigo.Barras PLU
## Length:200620 Length:200620 Min. :8.347e+05 Min. : 1.00
## Class :character Class :character 1st Qu.:7.501e+12 1st Qu.: 1.00
## Mode :character Mode :character Median :7.501e+12 Median : 1.00
## Mean :5.950e+12 Mean : 2.11
## 3rd Qu.:7.501e+12 3rd Qu.: 1.00
## Max. :1.750e+13 Max. :30.00
## NA's :199183
## Fecha Hora Marca Fabricante
## Length:200620 Length:200620 Length:200620 Length:200620
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## Producto Precio Ult.Costo Unidades
## Length:200620 Min. : 0.00 Min. : 0.38 Min. : 0.200
## Class :character 1st Qu.: 11.00 1st Qu.: 8.46 1st Qu.: 1.000
## Mode :character Median : 16.00 Median : 12.31 Median : 1.000
## Mean : 19.44 Mean : 15.31 Mean : 1.262
## 3rd Qu.: 25.00 3rd Qu.: 19.23 3rd Qu.: 1.000
## Max. :1000.00 Max. :769.23 Max. :96.000
##
## F.Ticket NombreDepartamento NombreFamilia NombreCategoria
## Min. : 1 Length:200620 Length:200620 Length:200620
## 1st Qu.: 33967 Class :character Class :character Class :character
## Median :105996 Mode :character Mode :character Mode :character
## Mean :193994
## 3rd Qu.:383008
## Max. :450040
##
## Estado Mts.2 Tipo.ubicación Giro
## Length:200620 Min. :47.0 Length:200620 Length:200620
## Class :character 1st Qu.:53.0 Class :character Class :character
## Mode :character Median :60.0 Mode :character Mode :character
## Mean :56.6
## 3rd Qu.:60.0
## Max. :62.0
##
## Hora.inicio Hora.cierre
## Length:200620 Length:200620
## Class :character Class :character
## Mode :character Mode :character
##
##
##
##
# Diagrama de caja y bigotes
bd10 <- bd5
boxplot(bd10$Precio, horizontal = TRUE)
boxplot(bd10$Unidades, horizontal = TRUE)
# Agregar Columnas
library(lubridate)
#bd10$Dia_de_la_semana <- wday(bd10$Fecha)
#summary(bd10)
#?wday
bd10$Subtotal <- bd10$Precio * bd10$Unidades
summary(bd10)
## vcClaveTienda DescGiro Codigo.Barras Fecha
## Length:200620 Length:200620 Min. :8.347e+05 Length:200620
## Class :character Class :character 1st Qu.:7.501e+12 Class :character
## Mode :character Mode :character Median :7.501e+12 Mode :character
## Mean :5.950e+12
## 3rd Qu.:7.501e+12
## Max. :1.750e+13
## Hora Marca Fabricante Producto
## Length:200620 Length:200620 Length:200620 Length:200620
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## Precio Ult.Costo Unidades F.Ticket
## Min. : 0.50 Min. : 0.38 Min. :-96.000 Min. : 1
## 1st Qu.: 11.00 1st Qu.: 8.46 1st Qu.: -1.000 1st Qu.: 33967
## Median : 16.00 Median : 12.31 Median : -1.000 Median :105996
## Mean : 19.45 Mean : 15.31 Mean : -1.262 Mean :193994
## 3rd Qu.: 25.00 3rd Qu.: 19.23 3rd Qu.: -1.000 3rd Qu.:383008
## Max. :1000.00 Max. :769.23 Max. : -1.000 Max. :450040
## NombreDepartamento NombreFamilia NombreCategoria Estado
## Length:200620 Length:200620 Length:200620 Length:200620
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## Mts.2 Tipo.ubicación Giro Hora.inicio
## Min. :47.0 Length:200620 Length:200620 Length:200620
## 1st Qu.:53.0 Class :character Class :character Class :character
## Median :60.0 Mode :character Mode :character Mode :character
## Mean :56.6
## 3rd Qu.:60.0
## Max. :62.0
## Hora.cierre Subtotal
## Length:200620 Min. :-2496.00
## Class :character 1st Qu.: -27.00
## Mode :character Median : -18.00
## Mean : -24.33
## 3rd Qu.: -12.00
## Max. : -1.00
bd10$Utilidad <- bd10$Precio * bd10$Ult.Costo
summary(bd10)
## vcClaveTienda DescGiro Codigo.Barras Fecha
## Length:200620 Length:200620 Min. :8.347e+05 Length:200620
## Class :character Class :character 1st Qu.:7.501e+12 Class :character
## Mode :character Mode :character Median :7.501e+12 Mode :character
## Mean :5.950e+12
## 3rd Qu.:7.501e+12
## Max. :1.750e+13
## Hora Marca Fabricante Producto
## Length:200620 Length:200620 Length:200620 Length:200620
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## Precio Ult.Costo Unidades F.Ticket
## Min. : 0.50 Min. : 0.38 Min. :-96.000 Min. : 1
## 1st Qu.: 11.00 1st Qu.: 8.46 1st Qu.: -1.000 1st Qu.: 33967
## Median : 16.00 Median : 12.31 Median : -1.000 Median :105996
## Mean : 19.45 Mean : 15.31 Mean : -1.262 Mean :193994
## 3rd Qu.: 25.00 3rd Qu.: 19.23 3rd Qu.: -1.000 3rd Qu.:383008
## Max. :1000.00 Max. :769.23 Max. : -1.000 Max. :450040
## NombreDepartamento NombreFamilia NombreCategoria Estado
## Length:200620 Length:200620 Length:200620 Length:200620
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## Mts.2 Tipo.ubicación Giro Hora.inicio
## Min. :47.0 Length:200620 Length:200620 Length:200620
## 1st Qu.:53.0 Class :character Class :character Class :character
## Median :60.0 Mode :character Mode :character Mode :character
## Mean :56.6
## 3rd Qu.:60.0
## Max. :62.0
## Hora.cierre Subtotal Utilidad
## Length:200620 Min. :-2496.00 Min. : 0.2
## Class :character 1st Qu.: -27.00 1st Qu.: 93.1
## Mode :character Median : -18.00 Median : 197.0
## Mean : -24.33 Mean : 459.6
## 3rd Qu.: -12.00 3rd Qu.: 480.8
## Max. : -1.00 Max. :769230.0
bd_limpia <- bd10
write.csv(bd_limpia, file="abarrotes_limpia.csv", row.names = FALSE)
#install.packages("plyr")
library(plyr)
## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
##
## Attaching package: 'plyr'
## The following object is masked from 'package:purrr':
##
## compact
## The following objects are masked from 'package:dplyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
library(dplyr)
#install.packages("Matrix")
library(Matrix)
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
#install.packages("arules")
library(arules)
##
## Attaching package: 'arules'
## The following object is masked from 'package:dplyr':
##
## recode
## The following objects are masked from 'package:base':
##
## abbreviate, write
#install.packages("arulesViz")
library(arulesViz)
#install.packages("datasets")
library(datasets)
#install.packages("ddply")
#library(ddply)
# Ordenar de menor a mayor los tickets
bd_limpia <- bd_limpia[order(bd_limpia$F.Ticket),]
head(bd_limpia)
## vcClaveTienda DescGiro Codigo.Barras Fecha Hora
## 1 MX001 Abarrotes 7.50102e+12 Friday, 19 June 2020 08:16:21
## 2 MX001 Abarrotes 7.50103e+12 Friday, 19 June 2020 08:23:33
## 3 MX001 Abarrotes 7.50100e+12 Friday, 19 June 2020 08:24:33
## 4 MX001 Abarrotes 7.50103e+12 Friday, 19 June 2020 08:24:33
## 5 MX001 Abarrotes 7.50103e+12 Friday, 19 June 2020 08:26:28
## 6 MX001 Abarrotes 7.50103e+12 Friday, 19 June 2020 08:26:28
## Marca Fabricante
## 1 NUTRI LECHE MEXILAC
## 2 DAN UP DANONE DE MEXICO
## 3 BIMBO GRUPO BIMBO
## 4 PEPSI PEPSI-COLA MEXICANA
## 5 BLANCA NIEVES (DETERGENTE) FABRICA DE JABON LA CORONA
## 6 FLASH ALEN
## Producto Precio Ult.Costo Unidades F.Ticket
## 1 Nutri Leche 1 Litro 16.0 12.31 -1 1
## 2 DANUP STRAWBERRY P/BEBER 350GR NAL 14.0 14.00 -1 2
## 3 Rebanadas Bimbo 2Pz 5.0 5.00 -1 3
## 4 Pepsi N.R. 400Ml 8.0 8.00 -1 3
## 5 Detergente Blanca Nieves 500G 19.5 15.00 -1 4
## 6 Flash Xtra Brisa Marina 500Ml 9.5 7.31 -1 4
## NombreDepartamento NombreFamilia NombreCategoria
## 1 Abarrotes Lacteos y Refrigerados Leche
## 2 Abarrotes Lacteos y Refrigerados Yogurt
## 3 Abarrotes Pan y Tortilla Pan Dulce Empaquetado
## 4 Abarrotes Bebidas Refrescos Plástico (N.R.)
## 5 Abarrotes Limpieza del Hogar Lavandería
## 6 Abarrotes Limpieza del Hogar Limpiadores Líquidos
## Estado Mts.2 Tipo.ubicación Giro Hora.inicio Hora.cierre Subtotal
## 1 Nuevo León 60 Esquina Abarrotes 08:00 22:00 -16.0
## 2 Nuevo León 60 Esquina Abarrotes 08:00 22:00 -14.0
## 3 Nuevo León 60 Esquina Abarrotes 08:00 22:00 -5.0
## 4 Nuevo León 60 Esquina Abarrotes 08:00 22:00 -8.0
## 5 Nuevo León 60 Esquina Abarrotes 08:00 22:00 -19.5
## 6 Nuevo León 60 Esquina Abarrotes 08:00 22:00 -9.5
## Utilidad
## 1 196.960
## 2 196.000
## 3 25.000
## 4 64.000
## 5 292.500
## 6 69.445
tail(bd_limpia)
## vcClaveTienda DescGiro Codigo.Barras Fecha
## 107394 MX004 Carnicería 1.024877e+10 Thursday, 15 October 2020
## 167771 MX004 Carnicería 7.501080e+12 Thursday, 15 October 2020
## 149429 MX004 Carnicería 7.501060e+12 Thursday, 15 October 2020
## 168750 MX004 Carnicería 7.501210e+12 Thursday, 15 October 2020
## 161193 MX004 Carnicería 7.501030e+12 Thursday, 15 October 2020
## 112970 MX004 Carnicería 7.500470e+07 Thursday, 15 October 2020
## Hora Marca Fabricante
## 107394 11:51:40 YEMINA HERDEZ
## 167771 11:51:40 DEL FUERTE ALIMENTOS DEL FUERTE
## 149429 11:54:37 COCA COLA ZERO COCA COLA
## 168750 11:56:52 DIAMANTE EMPACADOS
## 161193 12:01:54 PEPSI PEPSI-COLA MEXICANA
## 112970 12:02:36 COCA COLA COCA COLA
## Producto Precio Ult.Costo Unidades F.Ticket
## 107394 PASTA SPAGHETTI YEMINA 200G 7 5.38 -2 450032
## 167771 PURE DE TOMATE DEL FUERTE 345G 12 9.23 -1 450032
## 149429 COCA COLA ZERO 600ML 15 11.54 -2 450034
## 168750 ARROZ DIAMANTE225G 11 8.46 -1 450037
## 161193 PEPSI N. R. 500ML 10 7.69 -1 450039
## 112970 COCA COLA RETORNABLE 500ML 10 7.69 -8 450040
## NombreDepartamento NombreFamilia NombreCategoria
## 107394 Abarrotes Sopas y Pastas Fideos, Spaguetti, Tallarines
## 167771 Abarrotes Salsas y Sazonadores Salsa para Spaguetti
## 149429 Abarrotes Bebidas Refrescos Retornables
## 168750 Abarrotes Granos y Semillas Arroz
## 161193 Abarrotes Bebidas Refrescos Plástico (N.R.)
## 112970 Abarrotes Bebidas Refrescos Retornables
## Estado Mts.2 Tipo.ubicación Giro Hora.inicio Hora.cierre Subtotal
## 107394 Sinaloa 53 Esquina Abarrotes 07:00 23:00 -14
## 167771 Sinaloa 53 Esquina Abarrotes 07:00 23:00 -12
## 149429 Sinaloa 53 Esquina Abarrotes 07:00 23:00 -30
## 168750 Sinaloa 53 Esquina Abarrotes 07:00 23:00 -11
## 161193 Sinaloa 53 Esquina Abarrotes 07:00 23:00 -10
## 112970 Sinaloa 53 Esquina Abarrotes 07:00 23:00 -80
## Utilidad
## 107394 37.66
## 167771 110.76
## 149429 173.10
## 168750 93.06
## 161193 76.90
## 112970 76.90
# Generar Basket
basket <- ddply(bd_limpia, c("F.Ticket"), function(bd_limpia)paste(bd_limpia$Marca, collapse = ","))
# Eliminar número de ticket
basket$F.Ticket <- NULL
# Renombrar el nombre de la columna
colnames(basket) <- c("Marca")
# Exportar Basket
write.csv(basket, "basket.csv", quote = FALSE, row.names = FALSE)
#importar Transacciones
#file.choose()
tr <- read.transactions("basket.csv", format = "basket", sep = ",")
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in scan(text = l, what = "character", sep = sep, quote = quote, : EOF
## within quoted string
## Warning in asMethod(object): removing duplicated items in transactions
# Reglas de asociación
reglas.asociacion <- apriori(tr,parameter= list(supp=0.001, conf=0.2, maxlen=10))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.2 0.1 1 none FALSE TRUE 5 0.001 1
## maxlen target ext
## 10 rules TRUE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 115
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[604 item(s), 115111 transaction(s)] done [0.03s].
## sorting and recoding items ... [207 item(s)] done [0.00s].
## creating transaction tree ... done [0.03s].
## checking subsets of size 1 2 3 done [0.00s].
## writing ... [11 rule(s)] done [0.00s].
## creating S4 object ... done [0.01s].
summary(reglas.asociacion)
## set of 11 rules
##
## rule length distribution (lhs + rhs):sizes
## 2
## 11
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2 2 2 2 2 2
##
## summary of quality measures:
## support confidence coverage lift
## Min. :0.001016 Min. :0.2069 Min. :0.003562 Min. : 1.325
## 1st Qu.:0.001103 1st Qu.:0.2356 1st Qu.:0.004504 1st Qu.: 1.787
## Median :0.001416 Median :0.2442 Median :0.005803 Median : 3.972
## Mean :0.001519 Mean :0.2536 Mean :0.006054 Mean :17.563
## 3rd Qu.:0.001651 3rd Qu.:0.2685 3rd Qu.:0.006893 3rd Qu.:21.798
## Max. :0.002745 Max. :0.3098 Max. :0.010503 Max. :65.908
## count
## Min. :117.0
## 1st Qu.:127.0
## Median :163.0
## Mean :174.9
## 3rd Qu.:190.0
## Max. :316.0
##
## mining info:
## data ntransactions support confidence
## tr 115111 0.001 0.2
## call
## apriori(data = tr, parameter = list(supp = 0.001, conf = 0.2, maxlen = 10))
inspect(reglas.asociacion)
## lhs rhs support confidence coverage
## [1] {FANTA} => {COCA COLA} 0.001051159 0.2439516 0.004308884
## [2] {SALVO} => {FABULOSO} 0.001103283 0.3097561 0.003561779
## [3] {FABULOSO} => {SALVO} 0.001103283 0.2347505 0.004699811
## [4] {COCA COLA ZERO} => {COCA COLA} 0.001416025 0.2969035 0.004769310
## [5] {SPRITE} => {COCA COLA} 0.001346526 0.2069426 0.006506763
## [6] {PINOL} => {CLORALEX} 0.001016410 0.2363636 0.004300197
## [7] {BLUE HOUSE} => {BIMBO} 0.001711392 0.2720994 0.006289581
## [8] {HELLMANN´S} => {BIMBO} 0.001537646 0.2649701 0.005803094
## [9] {REYMA} => {CONVERMEX} 0.002093631 0.2441743 0.008574333
## [10] {FUD} => {BIMBO} 0.001589770 0.2183771 0.007279930
## [11] {COCA COLA LIGHT} => {COCA COLA} 0.002745176 0.2613730 0.010502906
## lift count
## [1] 1.561906 121
## [2] 65.908196 127
## [3] 65.908196 127
## [4] 1.900932 163
## [5] 1.324955 155
## [6] 25.030409 117
## [7] 4.078870 197
## [8] 3.971997 177
## [9] 18.564824 241
## [10] 3.273552 183
## [11] 1.673447 316
# Ordenar Reglas de Asociación
reglas.asociacion <- sort(reglas.asociacion, by= "confidence", deecreasing=TRUE)
summary(reglas.asociacion)
## set of 11 rules
##
## rule length distribution (lhs + rhs):sizes
## 2
## 11
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2 2 2 2 2 2
##
## summary of quality measures:
## support confidence coverage lift
## Min. :0.001016 Min. :0.2069 Min. :0.003562 Min. : 1.325
## 1st Qu.:0.001103 1st Qu.:0.2356 1st Qu.:0.004504 1st Qu.: 1.787
## Median :0.001416 Median :0.2442 Median :0.005803 Median : 3.972
## Mean :0.001519 Mean :0.2536 Mean :0.006054 Mean :17.563
## 3rd Qu.:0.001651 3rd Qu.:0.2685 3rd Qu.:0.006893 3rd Qu.:21.798
## Max. :0.002745 Max. :0.3098 Max. :0.010503 Max. :65.908
## count
## Min. :117.0
## 1st Qu.:127.0
## Median :163.0
## Mean :174.9
## 3rd Qu.:190.0
## Max. :316.0
##
## mining info:
## data ntransactions support confidence
## tr 115111 0.001 0.2
## call
## apriori(data = tr, parameter = list(supp = 0.001, conf = 0.2, maxlen = 10))
inspect(reglas.asociacion)
## lhs rhs support confidence coverage
## [1] {SALVO} => {FABULOSO} 0.001103283 0.3097561 0.003561779
## [2] {COCA COLA ZERO} => {COCA COLA} 0.001416025 0.2969035 0.004769310
## [3] {BLUE HOUSE} => {BIMBO} 0.001711392 0.2720994 0.006289581
## [4] {HELLMANN´S} => {BIMBO} 0.001537646 0.2649701 0.005803094
## [5] {COCA COLA LIGHT} => {COCA COLA} 0.002745176 0.2613730 0.010502906
## [6] {REYMA} => {CONVERMEX} 0.002093631 0.2441743 0.008574333
## [7] {FANTA} => {COCA COLA} 0.001051159 0.2439516 0.004308884
## [8] {PINOL} => {CLORALEX} 0.001016410 0.2363636 0.004300197
## [9] {FABULOSO} => {SALVO} 0.001103283 0.2347505 0.004699811
## [10] {FUD} => {BIMBO} 0.001589770 0.2183771 0.007279930
## [11] {SPRITE} => {COCA COLA} 0.001346526 0.2069426 0.006506763
## lift count
## [1] 65.908196 127
## [2] 1.900932 163
## [3] 4.078870 197
## [4] 3.971997 177
## [5] 1.673447 316
## [6] 18.564824 241
## [7] 1.561906 121
## [8] 25.030409 117
## [9] 65.908196 127
## [10] 3.273552 183
## [11] 1.324955 155
# Visualizar reglas de asociación
top10reglas <- head(reglas.asociacion, n=10, by="confidence")
plot(top10reglas, method="graph", engine="htmlwidget")