library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyverse)
## ── Attaching packages
## ───────────────────────────────────────
## tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.5 ✔ purrr 0.3.4
## ✔ tibble 3.1.8 ✔ stringr 1.4.0
## ✔ tidyr 1.2.0 ✔ forcats 0.5.1
## ✔ readr 2.1.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(janitor)
##
## Attaching package: 'janitor'
##
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
library(ggplot2)
getwd()
#file.choose()
bd <- read_csv("/Users/danieltrevino/Documents/Quinto Semestre TEC/Bootcamp de Programación/abarrotes.csv")
## Rows: 200625 Columns: 22
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (12): vcClaveTienda, DescGiro, Fecha, Marca, Fabricante, Producto, Nomb...
## dbl (7): Codigo.Barras, PLU, Precio, Ult.Costo, Unidades, F.Ticket, Mts.2
## time (3): Hora, Hora.inicio, Hora.cierre
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
summary(bd)
## vcClaveTienda DescGiro Codigo.Barras PLU
## Length:200625 Length:200625 Min. :8.347e+05 Min. : 1.00
## Class :character Class :character 1st Qu.:7.500e+12 1st Qu.: 1.00
## Mode :character Mode :character Median :7.500e+12 Median : 1.00
## Mean :5.949e+12 Mean : 2.11
## 3rd Qu.:7.500e+12 3rd Qu.: 1.00
## Max. :1.750e+13 Max. :30.00
## NA's :199188
## Fecha Hora Marca Fabricante
## Length:200625 Length:200625 Length:200625 Length:200625
## Class :character Class1:hms Class :character Class :character
## Mode :character Class2:difftime Mode :character Mode :character
## Mode :numeric
##
##
##
## Producto Precio Ult.Costo Unidades
## Length:200625 Min. :-147.00 Min. : 0.38 Min. : 0.200
## Class :character 1st Qu.: 11.00 1st Qu.: 8.46 1st Qu.: 1.000
## Mode :character Median : 16.00 Median : 12.31 Median : 1.000
## Mean : 19.42 Mean : 15.31 Mean : 1.262
## 3rd Qu.: 25.00 3rd Qu.: 19.23 3rd Qu.: 1.000
## Max. :1000.00 Max. :769.23 Max. :96.000
##
## F.Ticket NombreDepartamento NombreFamilia NombreCategoria
## Min. : 1 Length:200625 Length:200625 Length:200625
## 1st Qu.: 33964 Class :character Class :character Class :character
## Median :105993 Mode :character Mode :character Mode :character
## Mean :193990
## 3rd Qu.:383005
## Max. :450040
##
## Estado Mts.2 Tipo.ubicación Giro
## Length:200625 Min. :47.0 Length:200625 Length:200625
## Class :character 1st Qu.:53.0 Class :character Class :character
## Mode :character Median :60.0 Mode :character Mode :character
## Mean :56.6
## 3rd Qu.:60.0
## Max. :62.0
##
## Hora.inicio Hora.cierre
## Length:200625 Length:200625
## Class1:hms Class1:hms
## Class2:difftime Class2:difftime
## Mode :numeric Mode :numeric
##
##
##
library(dplyr)
count(bd, vcClaveTienda, sort = TRUE)
## # A tibble: 5 × 2
## vcClaveTienda n
## <chr> <int>
## 1 MX001 96469
## 2 MX004 83455
## 3 MX005 10021
## 4 MX002 6629
## 5 MX003 4051
count(bd, DescGiro, sort = TRUE)
## # A tibble: 3 × 2
## DescGiro n
## <chr> <int>
## 1 Abarrotes 100520
## 2 Carnicería 83455
## 3 Depósito 16650
count(bd, Marca, sort = TRUE)
## # A tibble: 540 × 2
## Marca n
## <chr> <int>
## 1 COCA COLA 18686
## 2 PEPSI 15967
## 3 TECATE 11674
## 4 BIMBO 8317
## 5 LALA 5866
## 6 MARINELA 3696
## 7 DORITOS 3142
## 8 CHEETOS 3130
## 9 NUTRI LECHE 3128
## 10 MARLBORO 2579
## # … with 530 more rows
count(bd, Fabricante, sort = TRUE)
## # A tibble: 241 × 2
## Fabricante n
## <chr> <int>
## 1 COCA COLA 27519
## 2 PEPSI-COLA MEXICANA 22416
## 3 SABRITAS 14296
## 4 CERVECERIA CUAUHTEMOC MOCTEZUMA 13681
## 5 GRUPO BIMBO 13078
## 6 SIGMA ALIMENTOS 8014
## 7 GRUPO INDUSTRIAL LALA 5868
## 8 GRUPO GAMESA 5527
## 9 NESTLE 3698
## 10 JUGOS DEL VALLE S.A. DE C.V. 3581
## # … with 231 more rows
count(bd, Producto, sort = TRUE)
## # A tibble: 3,404 × 2
## Producto n
## <chr> <int>
## 1 Pepsi N.R. 1.5L 5108
## 2 Coca Cola Retornable 2.5L 3771
## 3 Caguamon Tecate Light 1.2Lt 3471
## 4 Pepsi N. R. 2.5L 2899
## 5 Cerveza Tecate Light 340Ml 2619
## 6 Cerveza Tecate Light 16Oz 2315
## 7 Coca Cola Retornable 1.5L 2124
## 8 Pepsi N.R. 3L 1832
## 9 Coca Cola Retornable 500Ml 1659
## 10 PEPSI N.R. 1.5L 1631
## # … with 3,394 more rows
count(bd, NombreDepartamento, sort = TRUE)
## # A tibble: 9 × 2
## NombreDepartamento n
## <chr> <int>
## 1 Abarrotes 198279
## 2 Bebes e Infantiles 1483
## 3 Ferretería 377
## 4 Farmacia 255
## 5 Vinos y Licores 104
## 6 Papelería 74
## 7 Mercería 44
## 8 Productos a Eliminar 8
## 9 Carnes 1
count(bd, NombreFamilia, sort = TRUE)
## # A tibble: 51 × 2
## NombreFamilia n
## <chr> <int>
## 1 Bebidas 64918
## 2 Botanas 21583
## 3 Lacteos y Refrigerados 17659
## 4 Cerveza 14017
## 5 Pan y Tortilla 10502
## 6 Limpieza del Hogar 8724
## 7 Galletas 7487
## 8 Cigarros 6817
## 9 Cuidado Personal 5433
## 10 Salsas y Sazonadores 5320
## # … with 41 more rows
count(bd, NombreCategoria, sort = TRUE)
## # A tibble: 174 × 2
## NombreCategoria n
## <chr> <int>
## 1 Refrescos Plástico (N.R.) 32862
## 2 Refrescos Retornables 13880
## 3 Frituras 11082
## 4 Lata 8150
## 5 Leche 7054
## 6 Cajetilla 6329
## 7 Botella 5867
## 8 Productos sin Categoria 5455
## 9 Papas Fritas 5344
## 10 Jugos y Néctares 5295
## # … with 164 more rows
count(bd, Estado, sort = TRUE)
## # A tibble: 5 × 2
## Estado n
## <chr> <int>
## 1 Nuevo León 96469
## 2 Sinaloa 83455
## 3 Quintana Roo 10021
## 4 Jalisco 6629
## 5 Chiapas 4051
count(bd, Mts.2, sort = TRUE)
## # A tibble: 5 × 2
## Mts.2 n
## <dbl> <int>
## 1 60 96469
## 2 53 83455
## 3 58 10021
## 4 47 6629
## 5 62 4051
count(bd, Tipo.ubicación , sort = TRUE)
## # A tibble: 3 × 2
## Tipo.ubicación n
## <chr> <int>
## 1 Esquina 189945
## 2 Rotonda 6629
## 3 Entre calles 4051
count(bd, Giro , sort = TRUE)
## # A tibble: 2 × 2
## Giro n
## <chr> <int>
## 1 Abarrotes 183975
## 2 Mini súper 16650
count(bd, Hora.inicio , sort = TRUE)
## # A tibble: 3 × 2
## Hora.inicio n
## <time> <int>
## 1 08:00 106490
## 2 07:00 87506
## 3 09:00 6629
count(bd, Hora.cierre , sort = TRUE)
## # A tibble: 3 × 2
## Hora.cierre n
## <time> <int>
## 1 22:00 103098
## 2 23:00 87506
## 3 21:00 10021
library(tidyverse)
tibble(bd)
## # A tibble: 200,625 × 22
## vcClaveTienda DescGiro Codigo.Barras PLU Fecha Hora Marca Fabricante
## <chr> <chr> <dbl> <dbl> <chr> <time> <chr> <chr>
## 1 MX001 Abarrotes 7500000000000 NA 19/06/… 08:16:21 NUTR… MEXILAC
## 2 MX001 Abarrotes 7500000000000 NA 19/06/… 08:23:33 DAN … DANONE DE…
## 3 MX001 Abarrotes 7500000000000 NA 19/06/… 08:24:33 BIMBO GRUPO BIM…
## 4 MX001 Abarrotes 7500000000000 NA 19/06/… 08:24:33 PEPSI PEPSI-COL…
## 5 MX001 Abarrotes 7500000000000 NA 19/06/… 08:26:28 BLAN… FABRICA D…
## 6 MX001 Abarrotes 7500000000000 NA 19/06/… 08:16:21 NUTR… MEXILAC
## 7 MX001 Abarrotes 7500000000000 NA 19/06/… 08:23:33 DAN … DANONE DE…
## 8 MX001 Abarrotes 7500000000000 NA 19/06/… 08:24:33 BIMBO GRUPO BIM…
## 9 MX001 Abarrotes 7500000000000 NA 19/06/… 08:24:33 PEPSI PEPSI-COL…
## 10 MX001 Abarrotes 7500000000000 NA 19/06/… 08:26:28 BLAN… FABRICA D…
## # … with 200,615 more rows, and 14 more variables: Producto <chr>,
## # Precio <dbl>, Ult.Costo <dbl>, Unidades <dbl>, F.Ticket <dbl>,
## # NombreDepartamento <chr>, NombreFamilia <chr>, NombreCategoria <chr>,
## # Estado <chr>, Mts.2 <dbl>, Tipo.ubicación <chr>, Giro <chr>,
## # Hora.inicio <time>, Hora.cierre <time>
str(bd)
## spec_tbl_df [200,625 × 22] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
## $ vcClaveTienda : chr [1:200625] "MX001" "MX001" "MX001" "MX001" ...
## $ DescGiro : chr [1:200625] "Abarrotes" "Abarrotes" "Abarrotes" "Abarrotes" ...
## $ Codigo.Barras : num [1:200625] 7.5e+12 7.5e+12 7.5e+12 7.5e+12 7.5e+12 ...
## $ PLU : num [1:200625] NA NA NA NA NA NA NA NA NA NA ...
## $ Fecha : chr [1:200625] "19/06/20" "19/06/20" "19/06/20" "19/06/20" ...
## $ Hora : 'hms' num [1:200625] 08:16:21 08:23:33 08:24:33 08:24:33 ...
## ..- attr(*, "units")= chr "secs"
## $ Marca : chr [1:200625] "NUTRI LECHE" "DAN UP" "BIMBO" "PEPSI" ...
## $ Fabricante : chr [1:200625] "MEXILAC" "DANONE DE MEXICO" "GRUPO BIMBO" "PEPSI-COLA MEXICANA" ...
## $ Producto : chr [1:200625] "Nutri Leche 1 Litro" "DANUP STRAWBERRY P/BEBER 350GR NAL" "Rebanadas Bimbo 2Pz" "Pepsi N.R. 400Ml" ...
## $ Precio : num [1:200625] 16 14 5 8 19.5 16 14 5 8 19.5 ...
## $ Ult.Costo : num [1:200625] 12.3 14 5 8 15 ...
## $ Unidades : num [1:200625] 1 1 1 1 1 1 1 1 1 1 ...
## $ F.Ticket : num [1:200625] 1 2 3 3 4 1 2 3 3 4 ...
## $ NombreDepartamento: chr [1:200625] "Abarrotes" "Abarrotes" "Abarrotes" "Abarrotes" ...
## $ NombreFamilia : chr [1:200625] "Lacteos y Refrigerados" "Lacteos y Refrigerados" "Pan y Tortilla" "Bebidas" ...
## $ NombreCategoria : chr [1:200625] "Leche" "Yogurt" "Pan Dulce Empaquetado" "Refrescos Plástico (N.R.)" ...
## $ Estado : chr [1:200625] "Nuevo León" "Nuevo León" "Nuevo León" "Nuevo León" ...
## $ Mts.2 : num [1:200625] 60 60 60 60 60 60 60 60 60 60 ...
## $ Tipo.ubicación : chr [1:200625] "Esquina" "Esquina" "Esquina" "Esquina" ...
## $ Giro : chr [1:200625] "Abarrotes" "Abarrotes" "Abarrotes" "Abarrotes" ...
## $ Hora.inicio : 'hms' num [1:200625] 08:00:00 08:00:00 08:00:00 08:00:00 ...
## ..- attr(*, "units")= chr "secs"
## $ Hora.cierre : 'hms' num [1:200625] 22:00:00 22:00:00 22:00:00 22:00:00 ...
## ..- attr(*, "units")= chr "secs"
## - attr(*, "spec")=
## .. cols(
## .. vcClaveTienda = col_character(),
## .. DescGiro = col_character(),
## .. Codigo.Barras = col_double(),
## .. PLU = col_double(),
## .. Fecha = col_character(),
## .. Hora = col_time(format = ""),
## .. Marca = col_character(),
## .. Fabricante = col_character(),
## .. Producto = col_character(),
## .. Precio = col_double(),
## .. Ult.Costo = col_double(),
## .. Unidades = col_double(),
## .. F.Ticket = col_double(),
## .. NombreDepartamento = col_character(),
## .. NombreFamilia = col_character(),
## .. NombreCategoria = col_character(),
## .. Estado = col_character(),
## .. Mts.2 = col_double(),
## .. Tipo.ubicación = col_character(),
## .. Giro = col_character(),
## .. Hora.inicio = col_time(format = ""),
## .. Hora.cierre = col_time(format = "")
## .. )
## - attr(*, "problems")=<externalptr>
head(bd, 7)
## # A tibble: 7 × 22
## vcClaveTienda DescGiro Codigo.Barras PLU Fecha Hora Marca Fabricante
## <chr> <chr> <dbl> <dbl> <chr> <time> <chr> <chr>
## 1 MX001 Abarrotes 7500000000000 NA 19/06/20 08:16:21 NUTR… MEXILAC
## 2 MX001 Abarrotes 7500000000000 NA 19/06/20 08:23:33 DAN … DANONE DE…
## 3 MX001 Abarrotes 7500000000000 NA 19/06/20 08:24:33 BIMBO GRUPO BIM…
## 4 MX001 Abarrotes 7500000000000 NA 19/06/20 08:24:33 PEPSI PEPSI-COL…
## 5 MX001 Abarrotes 7500000000000 NA 19/06/20 08:26:28 BLAN… FABRICA D…
## 6 MX001 Abarrotes 7500000000000 NA 19/06/20 08:16:21 NUTR… MEXILAC
## 7 MX001 Abarrotes 7500000000000 NA 19/06/20 08:23:33 DAN … DANONE DE…
## # … with 14 more variables: Producto <chr>, Precio <dbl>, Ult.Costo <dbl>,
## # Unidades <dbl>, F.Ticket <dbl>, NombreDepartamento <chr>,
## # NombreFamilia <chr>, NombreCategoria <chr>, Estado <chr>, Mts.2 <dbl>,
## # Tipo.ubicación <chr>, Giro <chr>, Hora.inicio <time>, Hora.cierre <time>
tail(bd)
## # A tibble: 6 × 22
## vcClaveTienda DescGiro Codigo.Barras PLU Fecha Hora Marca Fabricante
## <chr> <chr> <dbl> <dbl> <chr> <time> <chr> <chr>
## 1 MX005 Depósito 7620000000000 NA 12/07/20 01:08:25 TRIDE… CADBURY A…
## 2 MX005 Depósito 7620000000000 NA 23/10/20 22:17:37 TRIDE… CADBURY A…
## 3 MX005 Depósito 7620000000000 NA 10/10/20 20:30:20 TRIDE… CADBURY A…
## 4 MX005 Depósito 7620000000000 NA 10/10/20 22:40:43 TRIDE… CADBURY A…
## 5 MX005 Depósito 7620000000000 NA 27/06/20 22:30:19 TRIDE… CADBURY A…
## 6 MX005 Depósito 7620000000000 NA 26/06/20 23:43:34 TRIDE… CADBURY A…
## # … with 14 more variables: Producto <chr>, Precio <dbl>, Ult.Costo <dbl>,
## # Unidades <dbl>, F.Ticket <dbl>, NombreDepartamento <chr>,
## # NombreFamilia <chr>, NombreCategoria <chr>, Estado <chr>, Mts.2 <dbl>,
## # Tipo.ubicación <chr>, Giro <chr>, Hora.inicio <time>, Hora.cierre <time>
library(janitor)
tabyl(bd, vcClaveTienda, NombreDepartamento)
## vcClaveTienda Abarrotes Bebes e Infantiles Carnes Farmacia Ferretería Mercería
## MX001 95415 515 1 147 245 28
## MX002 6590 21 0 4 10 0
## MX003 4026 15 0 2 8 0
## MX004 82234 932 0 102 114 16
## MX005 10014 0 0 0 0 0
## Papelería Productos a Eliminar Vinos y Licores
## 35 3 80
## 0 0 4
## 0 0 0
## 32 5 20
## 7 0 0
1 - En producto, no hay un catalogo especifico. Ejemplo: Pepsi N.R. 1.5L y PEPSI N.R. 1.5L 2 - Casi ningun registro cuenta con PLU 3 - Cambiar formato de fecha 4 - Cambiar formato de hora 5 - Hay precios negativos 6 - Hay unidades menores a 1
# Eliminar columnas PLU y codigo de barras
bd1 <- bd
bd1 <- subset(bd1, select = -c (PLU, Codigo.Barras))
#Eliminar renglones
bd2 <- bd1
bd2 <- bd2[bd2$Precio > 0,]
summary(bd2)
## vcClaveTienda DescGiro Fecha Hora
## Length:200478 Length:200478 Length:200478 Length:200478
## Class :character Class :character Class :character Class1:hms
## Mode :character Mode :character Mode :character Class2:difftime
## Mode :numeric
##
##
## Marca Fabricante Producto Precio
## Length:200478 Length:200478 Length:200478 Min. : 0.50
## Class :character Class :character Class :character 1st Qu.: 11.00
## Mode :character Mode :character Mode :character Median : 16.00
## Mean : 19.45
## 3rd Qu.: 25.00
## Max. :1000.00
## Ult.Costo Unidades F.Ticket NombreDepartamento
## Min. : 0.38 Min. : 0.200 Min. : 1 Length:200478
## 1st Qu.: 8.46 1st Qu.: 1.000 1st Qu.: 33977 Class :character
## Median : 12.31 Median : 1.000 Median :106034 Mode :character
## Mean : 15.31 Mean : 1.261 Mean :194096
## 3rd Qu.: 19.23 3rd Qu.: 1.000 3rd Qu.:383062
## Max. :769.23 Max. :96.000 Max. :450040
## NombreFamilia NombreCategoria Estado Mts.2
## Length:200478 Length:200478 Length:200478 Min. :47.0
## Class :character Class :character Class :character 1st Qu.:53.0
## Mode :character Mode :character Mode :character Median :60.0
## Mean :56.6
## 3rd Qu.:60.0
## Max. :62.0
## Tipo.ubicación Giro Hora.inicio Hora.cierre
## Length:200478 Length:200478 Length:200478 Length:200478
## Class :character Class :character Class1:hms Class1:hms
## Mode :character Mode :character Class2:difftime Class2:difftime
## Mode :numeric Mode :numeric
##
##
# Eliminar renglones de acurdo a alguna variable
bd2 <- bd2[bd2$NombreDepartamento != "Productos a Eliminar", ]
count(bd, NombreDepartamento, sort = TRUE)
## # A tibble: 9 × 2
## NombreDepartamento n
## <chr> <int>
## 1 Abarrotes 198279
## 2 Bebes e Infantiles 1483
## 3 Ferretería 377
## 4 Farmacia 255
## 5 Vinos y Licores 104
## 6 Papelería 74
## 7 Mercería 44
## 8 Productos a Eliminar 8
## 9 Carnes 1
# Conocer cuantos renglones hay duplicados
bd1[duplicated(bd1),]
## # A tibble: 5 × 20
## vcClaveTienda DescGiro Fecha Hora Marca Fabricante Producto Precio
## <chr> <chr> <chr> <time> <chr> <chr> <chr> <dbl>
## 1 MX001 Abarrotes 19/06/20 08:16:21 NUTRI LE… MEXILAC Nutri L… 16
## 2 MX001 Abarrotes 19/06/20 08:23:33 DAN UP DANONE DE… DANUP S… 14
## 3 MX001 Abarrotes 19/06/20 08:24:33 BIMBO GRUPO BIM… Rebanad… 5
## 4 MX001 Abarrotes 19/06/20 08:24:33 PEPSI PEPSI-COL… Pepsi N… 8
## 5 MX001 Abarrotes 19/06/20 08:26:28 BLANCA N… FABRICA D… Deterge… 19.5
## # … with 12 more variables: Ult.Costo <dbl>, Unidades <dbl>, F.Ticket <dbl>,
## # NombreDepartamento <chr>, NombreFamilia <chr>, NombreCategoria <chr>,
## # Estado <chr>, Mts.2 <dbl>, Tipo.ubicación <chr>, Giro <chr>,
## # Hora.inicio <time>, Hora.cierre <time>
sum(duplicated(bd1))
## [1] 5
# Eliminar renglones duplicados
bd3 <- bd1
bd3 <- distinct(bd3)
# Precios en absoluto
bd4 <- bd3
bd4$Precio <- abs(bd4$Precio)
summary(bd4)
## vcClaveTienda DescGiro Fecha Hora
## Length:200620 Length:200620 Length:200620 Length:200620
## Class :character Class :character Class :character Class1:hms
## Mode :character Mode :character Mode :character Class2:difftime
## Mode :numeric
##
##
## Marca Fabricante Producto Precio
## Length:200620 Length:200620 Length:200620 Min. : 0.50
## Class :character Class :character Class :character 1st Qu.: 11.00
## Mode :character Mode :character Mode :character Median : 16.00
## Mean : 19.45
## 3rd Qu.: 25.00
## Max. :1000.00
## Ult.Costo Unidades F.Ticket NombreDepartamento
## Min. : 0.38 Min. : 0.200 Min. : 1 Length:200620
## 1st Qu.: 8.46 1st Qu.: 1.000 1st Qu.: 33967 Class :character
## Median : 12.31 Median : 1.000 Median :105996 Mode :character
## Mean : 15.31 Mean : 1.262 Mean :193994
## 3rd Qu.: 19.23 3rd Qu.: 1.000 3rd Qu.:383008
## Max. :769.23 Max. :96.000 Max. :450040
## NombreFamilia NombreCategoria Estado Mts.2
## Length:200620 Length:200620 Length:200620 Min. :47.0
## Class :character Class :character Class :character 1st Qu.:53.0
## Mode :character Mode :character Mode :character Median :60.0
## Mean :56.6
## 3rd Qu.:60.0
## Max. :62.0
## Tipo.ubicación Giro Hora.inicio Hora.cierre
## Length:200620 Length:200620 Length:200620 Length:200620
## Class :character Class :character Class1:hms Class1:hms
## Mode :character Mode :character Class2:difftime Class2:difftime
## Mode :numeric Mode :numeric
##
##
# Cantidades en enteros
bd5 <- bd4
bd5$Unidades <- ceiling(bd5$Unidades)
tibble(bd5)
## # A tibble: 200,620 × 20
## vcClaveTienda DescGiro Fecha Hora Marca Fabricante Producto Precio
## <chr> <chr> <chr> <time> <chr> <chr> <chr> <dbl>
## 1 MX001 Abarrotes 19/06/20 08:16:21 NUTRI L… MEXILAC Nutri L… 16
## 2 MX001 Abarrotes 19/06/20 08:23:33 DAN UP DANONE DE… DANUP S… 14
## 3 MX001 Abarrotes 19/06/20 08:24:33 BIMBO GRUPO BIM… Rebanad… 5
## 4 MX001 Abarrotes 19/06/20 08:24:33 PEPSI PEPSI-COL… Pepsi N… 8
## 5 MX001 Abarrotes 19/06/20 08:26:28 BLANCA … FABRICA D… Deterge… 19.5
## 6 MX001 Abarrotes 19/06/20 08:26:28 FLASH ALEN Flash X… 9.5
## 7 MX001 Abarrotes 19/06/20 08:26:28 VARIOS … DANONE DE… Danone … 11
## 8 MX001 Abarrotes 19/06/20 08:26:28 ZOTE FABRICA D… Jabon Z… 9.5
## 9 MX001 Abarrotes 19/06/20 08:26:28 ALWAYS PROCTER &… T Femen… 23.5
## 10 MX001 Abarrotes 19/06/20 15:24:02 JUMEX JUMEX Jugo De… 12
## # … with 200,610 more rows, and 12 more variables: Ult.Costo <dbl>,
## # Unidades <dbl>, F.Ticket <dbl>, NombreDepartamento <chr>,
## # NombreFamilia <chr>, NombreCategoria <chr>, Estado <chr>, Mts.2 <dbl>,
## # Tipo.ubicación <chr>, Giro <chr>, Hora.inicio <time>, Hora.cierre <time>
# Convertir de caracter a fecha
bd6 <- bd5
bd6$Fecha <- as.Date(bd6$Fecha, format = "%d/%m/%y")
tibble(bd6)
## # A tibble: 200,620 × 20
## vcClaveTienda DescGiro Fecha Hora Marca Fabricante Producto Precio
## <chr> <chr> <date> <time> <chr> <chr> <chr> <dbl>
## 1 MX001 Abarrotes 2020-06-19 08:16:21 NUTRI… MEXILAC Nutri L… 16
## 2 MX001 Abarrotes 2020-06-19 08:23:33 DAN UP DANONE DE… DANUP S… 14
## 3 MX001 Abarrotes 2020-06-19 08:24:33 BIMBO GRUPO BIM… Rebanad… 5
## 4 MX001 Abarrotes 2020-06-19 08:24:33 PEPSI PEPSI-COL… Pepsi N… 8
## 5 MX001 Abarrotes 2020-06-19 08:26:28 BLANC… FABRICA D… Deterge… 19.5
## 6 MX001 Abarrotes 2020-06-19 08:26:28 FLASH ALEN Flash X… 9.5
## 7 MX001 Abarrotes 2020-06-19 08:26:28 VARIO… DANONE DE… Danone … 11
## 8 MX001 Abarrotes 2020-06-19 08:26:28 ZOTE FABRICA D… Jabon Z… 9.5
## 9 MX001 Abarrotes 2020-06-19 08:26:28 ALWAYS PROCTER &… T Femen… 23.5
## 10 MX001 Abarrotes 2020-06-19 15:24:02 JUMEX JUMEX Jugo De… 12
## # … with 200,610 more rows, and 12 more variables: Ult.Costo <dbl>,
## # Unidades <dbl>, F.Ticket <dbl>, NombreDepartamento <chr>,
## # NombreFamilia <chr>, NombreCategoria <chr>, Estado <chr>, Mts.2 <dbl>,
## # Tipo.ubicación <chr>, Giro <chr>, Hora.inicio <time>, Hora.cierre <time>
# Convertir de caracter a entero
bd7 <- bd6
bd7$Hora <- substr(bd7$Hora, start = 1, stop = 2)
tibble(bd7)
## # A tibble: 200,620 × 20
## vcClaveTienda DescGiro Fecha Hora Marca Fabricante Producto Precio
## <chr> <chr> <date> <chr> <chr> <chr> <chr> <dbl>
## 1 MX001 Abarrotes 2020-06-19 08 NUTRI LE… MEXILAC Nutri L… 16
## 2 MX001 Abarrotes 2020-06-19 08 DAN UP DANONE DE… DANUP S… 14
## 3 MX001 Abarrotes 2020-06-19 08 BIMBO GRUPO BIM… Rebanad… 5
## 4 MX001 Abarrotes 2020-06-19 08 PEPSI PEPSI-COL… Pepsi N… 8
## 5 MX001 Abarrotes 2020-06-19 08 BLANCA N… FABRICA D… Deterge… 19.5
## 6 MX001 Abarrotes 2020-06-19 08 FLASH ALEN Flash X… 9.5
## 7 MX001 Abarrotes 2020-06-19 08 VARIOS D… DANONE DE… Danone … 11
## 8 MX001 Abarrotes 2020-06-19 08 ZOTE FABRICA D… Jabon Z… 9.5
## 9 MX001 Abarrotes 2020-06-19 08 ALWAYS PROCTER &… T Femen… 23.5
## 10 MX001 Abarrotes 2020-06-19 15 JUMEX JUMEX Jugo De… 12
## # … with 200,610 more rows, and 12 more variables: Ult.Costo <dbl>,
## # Unidades <dbl>, F.Ticket <dbl>, NombreDepartamento <chr>,
## # NombreFamilia <chr>, NombreCategoria <chr>, Estado <chr>, Mts.2 <dbl>,
## # Tipo.ubicación <chr>, Giro <chr>, Hora.inicio <time>, Hora.cierre <time>
bd7$Hora <- as.integer(bd7$Hora)
tibble(bd7)
## # A tibble: 200,620 × 20
## vcClaveTienda DescGiro Fecha Hora Marca Fabricante Producto Precio
## <chr> <chr> <date> <int> <chr> <chr> <chr> <dbl>
## 1 MX001 Abarrotes 2020-06-19 8 NUTRI LE… MEXILAC Nutri L… 16
## 2 MX001 Abarrotes 2020-06-19 8 DAN UP DANONE DE… DANUP S… 14
## 3 MX001 Abarrotes 2020-06-19 8 BIMBO GRUPO BIM… Rebanad… 5
## 4 MX001 Abarrotes 2020-06-19 8 PEPSI PEPSI-COL… Pepsi N… 8
## 5 MX001 Abarrotes 2020-06-19 8 BLANCA N… FABRICA D… Deterge… 19.5
## 6 MX001 Abarrotes 2020-06-19 8 FLASH ALEN Flash X… 9.5
## 7 MX001 Abarrotes 2020-06-19 8 VARIOS D… DANONE DE… Danone … 11
## 8 MX001 Abarrotes 2020-06-19 8 ZOTE FABRICA D… Jabon Z… 9.5
## 9 MX001 Abarrotes 2020-06-19 8 ALWAYS PROCTER &… T Femen… 23.5
## 10 MX001 Abarrotes 2020-06-19 15 JUMEX JUMEX Jugo De… 12
## # … with 200,610 more rows, and 12 more variables: Ult.Costo <dbl>,
## # Unidades <dbl>, F.Ticket <dbl>, NombreDepartamento <chr>,
## # NombreFamilia <chr>, NombreCategoria <chr>, Estado <chr>, Mts.2 <dbl>,
## # Tipo.ubicación <chr>, Giro <chr>, Hora.inicio <time>, Hora.cierre <time>
#Cuantos NA hay en total
sum(is.na(bd7))
## [1] 0
sum(is.na(bd))
## [1] 199188
# Cuanto NA hay por columna
colSums(is.na(bd7))
## vcClaveTienda DescGiro Fecha Hora
## 0 0 0 0
## Marca Fabricante Producto Precio
## 0 0 0 0
## Ult.Costo Unidades F.Ticket NombreDepartamento
## 0 0 0 0
## NombreFamilia NombreCategoria Estado Mts.2
## 0 0 0 0
## Tipo.ubicación Giro Hora.inicio Hora.cierre
## 0 0 0 0
colSums(is.na(bd))
## vcClaveTienda DescGiro Codigo.Barras PLU
## 0 0 0 199188
## Fecha Hora Marca Fabricante
## 0 0 0 0
## Producto Precio Ult.Costo Unidades
## 0 0 0 0
## F.Ticket NombreDepartamento NombreFamilia NombreCategoria
## 0 0 0 0
## Estado Mts.2 Tipo.ubicación Giro
## 0 0 0 0
## Hora.inicio Hora.cierre
## 0 0
# Borrar todos los registros NA de una tabla - borra toda la fila donde hay NA
bd8 <- bd
bd8 <- na.omit(bd8)
summary(bd8)
## vcClaveTienda DescGiro Codigo.Barras PLU
## Length:1437 Length:1437 Min. :6.750e+08 Min. : 1.000
## Class :character Class :character 1st Qu.:6.750e+08 1st Qu.: 1.000
## Mode :character Mode :character Median :6.750e+08 Median : 1.000
## Mean :2.616e+11 Mean : 2.112
## 3rd Qu.:6.750e+08 3rd Qu.: 1.000
## Max. :7.500e+12 Max. :30.000
## Fecha Hora Marca Fabricante
## Length:1437 Length:1437 Length:1437 Length:1437
## Class :character Class1:hms Class :character Class :character
## Mode :character Class2:difftime Mode :character Mode :character
## Mode :numeric
##
##
## Producto Precio Ult.Costo Unidades
## Length:1437 Min. :30.00 Min. : 1.00 Min. :1.000
## Class :character 1st Qu.:90.00 1st Qu.:64.62 1st Qu.:1.000
## Mode :character Median :90.00 Median :64.62 Median :1.000
## Mean :87.94 Mean :56.65 Mean :1.124
## 3rd Qu.:90.00 3rd Qu.:64.62 3rd Qu.:1.000
## Max. :90.00 Max. :64.62 Max. :7.000
## F.Ticket NombreDepartamento NombreFamilia NombreCategoria
## Min. : 772 Length:1437 Length:1437 Length:1437
## 1st Qu.: 99955 Class :character Class :character Class :character
## Median :102493 Mode :character Mode :character Mode :character
## Mean :100595
## 3rd Qu.:106546
## Max. :118356
## Estado Mts.2 Tipo.ubicación Giro
## Length:1437 Min. :58.00 Length:1437 Length:1437
## Class :character 1st Qu.:58.00 Class :character Class :character
## Mode :character Median :58.00 Mode :character Mode :character
## Mean :58.07
## 3rd Qu.:58.00
## Max. :60.00
## Hora.inicio Hora.cierre
## Length:1437 Length:1437
## Class1:hms Class1:hms
## Class2:difftime Class2:difftime
## Mode :numeric Mode :numeric
##
##
# Reemplazar NA con ceros 0
bd9 <- bd
bd9[is.na(bd9)] <- 0
summary(bd9)
## vcClaveTienda DescGiro Codigo.Barras PLU
## Length:200625 Length:200625 Min. :8.347e+05 Min. : 0.00000
## Class :character Class :character 1st Qu.:7.500e+12 1st Qu.: 0.00000
## Mode :character Mode :character Median :7.500e+12 Median : 0.00000
## Mean :5.949e+12 Mean : 0.01513
## 3rd Qu.:7.500e+12 3rd Qu.: 0.00000
## Max. :1.750e+13 Max. :30.00000
## Fecha Hora Marca Fabricante
## Length:200625 Length:200625 Length:200625 Length:200625
## Class :character Class1:hms Class :character Class :character
## Mode :character Class2:difftime Mode :character Mode :character
## Mode :numeric
##
##
## Producto Precio Ult.Costo Unidades
## Length:200625 Min. :-147.00 Min. : 0.38 Min. : 0.200
## Class :character 1st Qu.: 11.00 1st Qu.: 8.46 1st Qu.: 1.000
## Mode :character Median : 16.00 Median : 12.31 Median : 1.000
## Mean : 19.42 Mean : 15.31 Mean : 1.262
## 3rd Qu.: 25.00 3rd Qu.: 19.23 3rd Qu.: 1.000
## Max. :1000.00 Max. :769.23 Max. :96.000
## F.Ticket NombreDepartamento NombreFamilia NombreCategoria
## Min. : 1 Length:200625 Length:200625 Length:200625
## 1st Qu.: 33964 Class :character Class :character Class :character
## Median :105993 Mode :character Mode :character Mode :character
## Mean :193990
## 3rd Qu.:383005
## Max. :450040
## Estado Mts.2 Tipo.ubicación Giro
## Length:200625 Min. :47.0 Length:200625 Length:200625
## Class :character 1st Qu.:53.0 Class :character Class :character
## Mode :character Median :60.0 Mode :character Mode :character
## Mean :56.6
## 3rd Qu.:60.0
## Max. :62.0
## Hora.inicio Hora.cierre
## Length:200625 Length:200625
## Class1:hms Class1:hms
## Class2:difftime Class2:difftime
## Mode :numeric Mode :numeric
##
##
# Reemplazar NA con el Promedio
bd10 <- bd
bd10$PLU[is.na(bd10$PLU)] <- mean(bd10$PLU, na.rm = TRUE)
summary(bd10)
## vcClaveTienda DescGiro Codigo.Barras PLU
## Length:200625 Length:200625 Min. :8.347e+05 Min. : 1.000
## Class :character Class :character 1st Qu.:7.500e+12 1st Qu.: 2.112
## Mode :character Mode :character Median :7.500e+12 Median : 2.112
## Mean :5.949e+12 Mean : 2.112
## 3rd Qu.:7.500e+12 3rd Qu.: 2.112
## Max. :1.750e+13 Max. :30.000
## Fecha Hora Marca Fabricante
## Length:200625 Length:200625 Length:200625 Length:200625
## Class :character Class1:hms Class :character Class :character
## Mode :character Class2:difftime Mode :character Mode :character
## Mode :numeric
##
##
## Producto Precio Ult.Costo Unidades
## Length:200625 Min. :-147.00 Min. : 0.38 Min. : 0.200
## Class :character 1st Qu.: 11.00 1st Qu.: 8.46 1st Qu.: 1.000
## Mode :character Median : 16.00 Median : 12.31 Median : 1.000
## Mean : 19.42 Mean : 15.31 Mean : 1.262
## 3rd Qu.: 25.00 3rd Qu.: 19.23 3rd Qu.: 1.000
## Max. :1000.00 Max. :769.23 Max. :96.000
## F.Ticket NombreDepartamento NombreFamilia NombreCategoria
## Min. : 1 Length:200625 Length:200625 Length:200625
## 1st Qu.: 33964 Class :character Class :character Class :character
## Median :105993 Mode :character Mode :character Mode :character
## Mean :193990
## 3rd Qu.:383005
## Max. :450040
## Estado Mts.2 Tipo.ubicación Giro
## Length:200625 Min. :47.0 Length:200625 Length:200625
## Class :character 1st Qu.:53.0 Class :character Class :character
## Mode :character Median :60.0 Mode :character Mode :character
## Mean :56.6
## 3rd Qu.:60.0
## Max. :62.0
## Hora.inicio Hora.cierre
## Length:200625 Length:200625
## Class1:hms Class1:hms
## Class2:difftime Class2:difftime
## Mode :numeric Mode :numeric
##
##
# Reemplazar negativos con 0
bd11 <- bd
bd11[bd11 < 0] <- 0
summary(bd11)
## vcClaveTienda DescGiro Codigo.Barras PLU
## Length:200625 Length:200625 Min. :8.347e+05 Min. : 1.00
## Class :character Class :character 1st Qu.:7.500e+12 1st Qu.: 1.00
## Mode :character Mode :character Median :7.500e+12 Median : 1.00
## Mean :5.949e+12 Mean : 2.11
## 3rd Qu.:7.500e+12 3rd Qu.: 1.00
## Max. :1.750e+13 Max. :30.00
## NA's :199188
## Fecha Hora Marca Fabricante
## Length:200625 Length:200625 Length:200625 Length:200625
## Class :character Class1:hms Class :character Class :character
## Mode :character Class2:difftime Mode :character Mode :character
## Mode :numeric
##
##
##
## Producto Precio Ult.Costo Unidades
## Length:200625 Min. : 0.00 Min. : 0.38 Min. : 0.200
## Class :character 1st Qu.: 11.00 1st Qu.: 8.46 1st Qu.: 1.000
## Mode :character Median : 16.00 Median : 12.31 Median : 1.000
## Mean : 19.44 Mean : 15.31 Mean : 1.262
## 3rd Qu.: 25.00 3rd Qu.: 19.23 3rd Qu.: 1.000
## Max. :1000.00 Max. :769.23 Max. :96.000
##
## F.Ticket NombreDepartamento NombreFamilia NombreCategoria
## Min. : 1 Length:200625 Length:200625 Length:200625
## 1st Qu.: 33964 Class :character Class :character Class :character
## Median :105993 Mode :character Mode :character Mode :character
## Mean :193990
## 3rd Qu.:383005
## Max. :450040
##
## Estado Mts.2 Tipo.ubicación Giro
## Length:200625 Min. :47.0 Length:200625 Length:200625
## Class :character 1st Qu.:53.0 Class :character Class :character
## Mode :character Median :60.0 Mode :character Mode :character
## Mean :56.6
## 3rd Qu.:60.0
## Max. :62.0
##
## Hora.inicio Hora.cierre
## Length:200625 Length:200625
## Class1:hms Class1:hms
## Class2:difftime Class2:difftime
## Mode :numeric Mode :numeric
##
##
##
bd12 <- bd7
boxplot(bd12$Precio, horizontal = TRUE)
boxplot(bd12$Unidades, horizontal = TRUE)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
# Dia de la semana
bd12$Dia_de_la_Semana <- wday(bd12$Fecha)
summary(bd12)
## vcClaveTienda DescGiro Fecha Hora
## Length:200620 Length:200620 Min. :2020-05-01 Min. : 0.00
## Class :character Class :character 1st Qu.:2020-06-06 1st Qu.:13.00
## Mode :character Mode :character Median :2020-07-11 Median :17.00
## Mean :2020-07-18 Mean :16.23
## 3rd Qu.:2020-08-29 3rd Qu.:20.00
## Max. :2020-11-11 Max. :23.00
## Marca Fabricante Producto Precio
## Length:200620 Length:200620 Length:200620 Min. : 0.50
## Class :character Class :character Class :character 1st Qu.: 11.00
## Mode :character Mode :character Mode :character Median : 16.00
## Mean : 19.45
## 3rd Qu.: 25.00
## Max. :1000.00
## Ult.Costo Unidades F.Ticket NombreDepartamento
## Min. : 0.38 Min. : 1.000 Min. : 1 Length:200620
## 1st Qu.: 8.46 1st Qu.: 1.000 1st Qu.: 33967 Class :character
## Median : 12.31 Median : 1.000 Median :105996 Mode :character
## Mean : 15.31 Mean : 1.262 Mean :193994
## 3rd Qu.: 19.23 3rd Qu.: 1.000 3rd Qu.:383008
## Max. :769.23 Max. :96.000 Max. :450040
## NombreFamilia NombreCategoria Estado Mts.2
## Length:200620 Length:200620 Length:200620 Min. :47.0
## Class :character Class :character Class :character 1st Qu.:53.0
## Mode :character Mode :character Mode :character Median :60.0
## Mean :56.6
## 3rd Qu.:60.0
## Max. :62.0
## Tipo.ubicación Giro Hora.inicio Hora.cierre
## Length:200620 Length:200620 Length:200620 Length:200620
## Class :character Class :character Class1:hms Class1:hms
## Mode :character Mode :character Class2:difftime Class2:difftime
## Mode :numeric Mode :numeric
##
##
## Dia_de_la_Semana
## Min. :1.000
## 1st Qu.:2.000
## Median :4.000
## Mean :3.912
## 3rd Qu.:6.000
## Max. :7.000
# Subtotal de la compra
bd12$Subtotal <- bd12$Precio * bd12$Unidades
summary(bd12)
## vcClaveTienda DescGiro Fecha Hora
## Length:200620 Length:200620 Min. :2020-05-01 Min. : 0.00
## Class :character Class :character 1st Qu.:2020-06-06 1st Qu.:13.00
## Mode :character Mode :character Median :2020-07-11 Median :17.00
## Mean :2020-07-18 Mean :16.23
## 3rd Qu.:2020-08-29 3rd Qu.:20.00
## Max. :2020-11-11 Max. :23.00
## Marca Fabricante Producto Precio
## Length:200620 Length:200620 Length:200620 Min. : 0.50
## Class :character Class :character Class :character 1st Qu.: 11.00
## Mode :character Mode :character Mode :character Median : 16.00
## Mean : 19.45
## 3rd Qu.: 25.00
## Max. :1000.00
## Ult.Costo Unidades F.Ticket NombreDepartamento
## Min. : 0.38 Min. : 1.000 Min. : 1 Length:200620
## 1st Qu.: 8.46 1st Qu.: 1.000 1st Qu.: 33967 Class :character
## Median : 12.31 Median : 1.000 Median :105996 Mode :character
## Mean : 15.31 Mean : 1.262 Mean :193994
## 3rd Qu.: 19.23 3rd Qu.: 1.000 3rd Qu.:383008
## Max. :769.23 Max. :96.000 Max. :450040
## NombreFamilia NombreCategoria Estado Mts.2
## Length:200620 Length:200620 Length:200620 Min. :47.0
## Class :character Class :character Class :character 1st Qu.:53.0
## Mode :character Mode :character Mode :character Median :60.0
## Mean :56.6
## 3rd Qu.:60.0
## Max. :62.0
## Tipo.ubicación Giro Hora.inicio Hora.cierre
## Length:200620 Length:200620 Length:200620 Length:200620
## Class :character Class :character Class1:hms Class1:hms
## Mode :character Mode :character Class2:difftime Class2:difftime
## Mode :numeric Mode :numeric
##
##
## Dia_de_la_Semana Subtotal
## Min. :1.000 Min. : 1.00
## 1st Qu.:2.000 1st Qu.: 12.00
## Median :4.000 Median : 18.00
## Mean :3.912 Mean : 24.33
## 3rd Qu.:6.000 3rd Qu.: 27.00
## Max. :7.000 Max. :2496.00
# Utilidad por producto
bd12$Utilidad <- bd12$Precio - bd12$Ult.Costo
summary(bd12)
## vcClaveTienda DescGiro Fecha Hora
## Length:200620 Length:200620 Min. :2020-05-01 Min. : 0.00
## Class :character Class :character 1st Qu.:2020-06-06 1st Qu.:13.00
## Mode :character Mode :character Median :2020-07-11 Median :17.00
## Mean :2020-07-18 Mean :16.23
## 3rd Qu.:2020-08-29 3rd Qu.:20.00
## Max. :2020-11-11 Max. :23.00
## Marca Fabricante Producto Precio
## Length:200620 Length:200620 Length:200620 Min. : 0.50
## Class :character Class :character Class :character 1st Qu.: 11.00
## Mode :character Mode :character Mode :character Median : 16.00
## Mean : 19.45
## 3rd Qu.: 25.00
## Max. :1000.00
## Ult.Costo Unidades F.Ticket NombreDepartamento
## Min. : 0.38 Min. : 1.000 Min. : 1 Length:200620
## 1st Qu.: 8.46 1st Qu.: 1.000 1st Qu.: 33967 Class :character
## Median : 12.31 Median : 1.000 Median :105996 Mode :character
## Mean : 15.31 Mean : 1.262 Mean :193994
## 3rd Qu.: 19.23 3rd Qu.: 1.000 3rd Qu.:383008
## Max. :769.23 Max. :96.000 Max. :450040
## NombreFamilia NombreCategoria Estado Mts.2
## Length:200620 Length:200620 Length:200620 Min. :47.0
## Class :character Class :character Class :character 1st Qu.:53.0
## Mode :character Mode :character Mode :character Median :60.0
## Mean :56.6
## 3rd Qu.:60.0
## Max. :62.0
## Tipo.ubicación Giro Hora.inicio Hora.cierre
## Length:200620 Length:200620 Length:200620 Length:200620
## Class :character Class :character Class1:hms Class1:hms
## Mode :character Mode :character Class2:difftime Class2:difftime
## Mode :numeric Mode :numeric
##
##
## Dia_de_la_Semana Subtotal Utilidad
## Min. :1.000 Min. : 1.00 Min. : 0.000
## 1st Qu.:2.000 1st Qu.: 12.00 1st Qu.: 2.310
## Median :4.000 Median : 18.00 Median : 3.230
## Mean :3.912 Mean : 24.33 Mean : 4.142
## 3rd Qu.:6.000 3rd Qu.: 27.00 3rd Qu.: 5.420
## Max. :7.000 Max. :2496.00 Max. :230.770
#bd_limpia <- bd12
#write.csv(bd_limpia, file = "abarrote bd limpia.csv", row.names = FALSE)
#install.packages("Matrix")
library(Matrix)
##
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
library(arules)
##
## Attaching package: 'arules'
## The following object is masked from 'package:dplyr':
##
## recode
## The following objects are masked from 'package:base':
##
## abbreviate, write
library(arulesViz)
library(datasets)
#file.choose()
abarrotes_limpia <- read.csv("/Users/danieltrevino/Documents/Quinto Semestre TEC/Bootcamp de Programación/abarrote bd limpia.csv")
abarrotes_limpia <- abarrotes_limpia[order(abarrotes_limpia$F.Ticket),]
head(abarrotes_limpia)
## vcClaveTienda DescGiro Fecha Hora Marca
## 1 MX001 Abarrotes 2020-06-19 8 NUTRI LECHE
## 2 MX001 Abarrotes 2020-06-19 8 DAN UP
## 3 MX001 Abarrotes 2020-06-19 8 BIMBO
## 4 MX001 Abarrotes 2020-06-19 8 PEPSI
## 5 MX001 Abarrotes 2020-06-19 8 BLANCA NIEVES (DETERGENTE)
## 6 MX001 Abarrotes 2020-06-19 8 FLASH
## Fabricante Producto Precio
## 1 MEXILAC Nutri Leche 1 Litro 16.0
## 2 DANONE DE MEXICO DANUP STRAWBERRY P/BEBER 350GR NAL 14.0
## 3 GRUPO BIMBO Rebanadas Bimbo 2Pz 5.0
## 4 PEPSI-COLA MEXICANA Pepsi N.R. 400Ml 8.0
## 5 FABRICA DE JABON LA CORONA Detergente Blanca Nieves 500G 19.5
## 6 ALEN Flash Xtra Brisa Marina 500Ml 9.5
## Ult.Costo Unidades F.Ticket NombreDepartamento NombreFamilia
## 1 12.31 1 1 Abarrotes Lacteos y Refrigerados
## 2 14.00 1 2 Abarrotes Lacteos y Refrigerados
## 3 5.00 1 3 Abarrotes Pan y Tortilla
## 4 8.00 1 3 Abarrotes Bebidas
## 5 15.00 1 4 Abarrotes Limpieza del Hogar
## 6 7.31 1 4 Abarrotes Limpieza del Hogar
## NombreCategoria Estado Mts.2 Tipo.ubicación Giro
## 1 Leche Nuevo León 60 Esquina Abarrotes
## 2 Yogurt Nuevo León 60 Esquina Abarrotes
## 3 Pan Dulce Empaquetado Nuevo León 60 Esquina Abarrotes
## 4 Refrescos Plástico (N.R.) Nuevo León 60 Esquina Abarrotes
## 5 Lavandería Nuevo León 60 Esquina Abarrotes
## 6 Limpiadores Líquidos Nuevo León 60 Esquina Abarrotes
## Hora.inicio Hora.cierre Dia_de_la_Semana Subtotal Utilidad
## 1 08:00:00 22:00:00 6 16.0 3.69
## 2 08:00:00 22:00:00 6 14.0 0.00
## 3 08:00:00 22:00:00 6 5.0 0.00
## 4 08:00:00 22:00:00 6 8.0 0.00
## 5 08:00:00 22:00:00 6 19.5 4.50
## 6 08:00:00 22:00:00 6 9.5 2.19
tail(abarrotes_limpia)
## vcClaveTienda DescGiro Fecha Hora Marca
## 107394 MX004 Carnicería 2020-10-15 11 YEMINA
## 167771 MX004 Carnicería 2020-10-15 11 DEL FUERTE
## 149429 MX004 Carnicería 2020-10-15 11 COCA COLA ZERO
## 168750 MX004 Carnicería 2020-10-15 11 DIAMANTE
## 161193 MX004 Carnicería 2020-10-15 12 PEPSI
## 112970 MX004 Carnicería 2020-10-15 12 COCA COLA
## Fabricante Producto Precio Ult.Costo
## 107394 HERDEZ PASTA SPAGHETTI YEMINA 200G 7 5.38
## 167771 ALIMENTOS DEL FUERTE PURE DE TOMATE DEL FUERTE 345G 12 9.23
## 149429 COCA COLA COCA COLA ZERO 600ML 15 11.54
## 168750 EMPACADOS ARROZ DIAMANTE225G 11 8.46
## 161193 PEPSI-COLA MEXICANA PEPSI N. R. 500ML 10 7.69
## 112970 COCA COLA COCA COLA RETORNABLE 500ML 10 7.69
## Unidades F.Ticket NombreDepartamento NombreFamilia
## 107394 2 450032 Abarrotes Sopas y Pastas
## 167771 1 450032 Abarrotes Salsas y Sazonadores
## 149429 2 450034 Abarrotes Bebidas
## 168750 1 450037 Abarrotes Granos y Semillas
## 161193 1 450039 Abarrotes Bebidas
## 112970 8 450040 Abarrotes Bebidas
## NombreCategoria Estado Mts.2 Tipo.ubicación Giro
## 107394 Fideos, Spaguetti, Tallarines Sinaloa 53 Esquina Abarrotes
## 167771 Salsa para Spaguetti Sinaloa 53 Esquina Abarrotes
## 149429 Refrescos Retornables Sinaloa 53 Esquina Abarrotes
## 168750 Arroz Sinaloa 53 Esquina Abarrotes
## 161193 Refrescos Plástico (N.R.) Sinaloa 53 Esquina Abarrotes
## 112970 Refrescos Retornables Sinaloa 53 Esquina Abarrotes
## Hora.inicio Hora.cierre Dia_de_la_Semana Subtotal Utilidad
## 107394 07:00:00 23:00:00 5 14 1.62
## 167771 07:00:00 23:00:00 5 12 2.77
## 149429 07:00:00 23:00:00 5 30 3.46
## 168750 07:00:00 23:00:00 5 11 2.54
## 161193 07:00:00 23:00:00 5 10 2.31
## 112970 07:00:00 23:00:00 5 80 2.31
library(plyr)
## ------------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## ------------------------------------------------------------------------------
##
## Attaching package: 'plyr'
## The following object is masked from 'package:purrr':
##
## compact
## The following objects are masked from 'package:dplyr':
##
## arrange, count, desc, failwith, id, mutate, rename, summarise,
## summarize
basket <- ddply(abarrotes_limpia, c("F.Ticket"), function(abarrotes_limpia)paste(abarrotes_limpia$Marca, collapse = ","))
basket$F.Ticket <- NULL
colnames(basket) <- c("Marca")
write.csv(basket, "basket.csv", quote = FALSE, row.names = FALSE)
#file.choose()
tr <- read.transactions("basket.csv", format = c("basket", "single"), sep = "",
cols = NULL, rm.duplicates = FALSE,
quote = "\"'", skip = 0,
encoding = "unknown")
## Warning in asMethod(object): removing duplicated items in transactions
reglas.asociacion <- apriori(tr, parameter = list(supp= 0.001, conf=0.02, maxlen= 10))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.02 0.1 1 none FALSE TRUE 5 0.001 1
## maxlen target ext
## 10 rules TRUE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 115
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[27332 item(s), 115111 transaction(s)] done [0.16s].
## sorting and recoding items ... [191 item(s)] done [0.00s].
## creating transaction tree ... done [0.04s].
## checking subsets of size 1 2 3 4 done [0.00s].
## writing ... [172 rule(s)] done [0.00s].
## creating S4 object ... done [0.01s].
summary(reglas.asociacion)
## set of 172 rules
##
## rule length distribution (lhs + rhs):sizes
## 1 2 3 4
## 6 126 36 4
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 2.000 2.000 2.221 2.000 4.000
##
## summary of quality measures:
## support confidence coverage lift
## Min. :0.001008 Min. :0.02059 Min. :0.001008 Min. : 0.3269
## 1st Qu.:0.001446 1st Qu.:0.37681 1st Qu.:0.002128 1st Qu.: 14.2036
## Median :0.002042 Median :0.61845 Median :0.004148 Median : 86.2174
## Mean :0.005772 Mean :0.58457 Mean :0.047977 Mean :143.7319
## 3rd Qu.:0.004020 3rd Qu.:0.86837 3rd Qu.:0.010312 3rd Qu.:184.2080
## Max. :0.119172 Max. :1.00000 Max. :1.000000 Max. :828.1367
## count
## Min. : 116.0
## 1st Qu.: 166.5
## Median : 235.0
## Mean : 664.4
## 3rd Qu.: 462.8
## Max. :13718.0
##
## mining info:
## data ntransactions support confidence
## tr 115111 0.001 0.02
## call
## apriori(data = tr, parameter = list(supp = 0.001, conf = 0.02, maxlen = 10))
inspect(reglas.asociacion)
## lhs rhs support confidence
## [1] {} => {TECATE} 0.043670892 0.04367089
## [2] {} => {DEL} 0.023777050 0.02377705
## [3] {} => {PEPSI} 0.060333070 0.06033307
## [4] {} => {LA} 0.035452737 0.03545274
## [5] {} => {COCA} 0.113003970 0.11300397
## [6] {} => {COLA} 0.119171930 0.11917193
## [7] {ABSOR} => {SEC} 0.001138032 0.90972222
## [8] {SEC} => {ABSOR} 0.001138032 0.68947368
## [9] {COLA,JOYA} => {COCA} 0.001033785 0.72121212
## [10] {%} => {100} 0.001007723 1.00000000
## [11] {100} => {%} 0.001007723 0.83453237
## [12] {ZONA} => {SABOR} 0.001077221 0.96875000
## [13] {SABOR} => {ZONA} 0.001077221 0.53218884
## [14] {ZONA} => {DEL} 0.001111970 1.00000000
## [15] {DEL} => {ZONA} 0.001111970 0.04676653
## [16] {SABORES} => {PEÑAFIEL} 0.001033785 0.65384615
## [17] {PEÑAFIEL} => {SABORES} 0.001033785 0.46484375
## [18] {DOS} => {EQUIS} 0.001181468 1.00000000
## [19] {EQUIS} => {DOS} 0.001181468 0.46101695
## [20] {ROSA} => {TIA} 0.001059847 0.63874346
## [21] {TIA} => {ROSA} 0.001059847 0.42214533
## [22] {MARIA} => {DOÑA} 0.001450774 0.81862745
## [23] {DOÑA} => {MARIA} 0.001450774 0.41959799
## [24] {FIOR} => {DI} 0.001085908 1.00000000
## [25] {DI} => {FIOR} 0.001085908 0.56818182
## [26] {DART} => {(PLAST} 0.001181468 1.00000000
## [27] {(PLAST} => {DART} 0.001181468 0.71957672
## [28] {COLA,PEPSI} => {COCA} 0.001250966 0.66666667
## [29] {MODELO)} => {(CERV.} 0.001372588 1.00000000
## [30] {(CERV.} => {MODELO)} 0.001372588 0.80612245
## [31] {CREMAX} => {DE} 0.001372588 1.00000000
## [32] {DE} => {CREMAX} 0.001372588 0.13310868
## [33] {CHICO,COCA} => {TOPO} 0.001320465 0.91017964
## [34] {TOPO} => {CHICO,COCA} 0.001320465 0.10764873
## [35] {FORTILECHE} => {LECHE} 0.001598457 0.69696970
## [36] {LECHE} => {FORTILECHE} 0.001598457 0.09913793
## [37] {BIMBO,COCA} => {COLA} 0.001633206 0.71482890
## [38] {SIERRA} => {LA} 0.001268341 0.54275093
## [39] {LA} => {SIERRA} 0.001268341 0.03577555
## [40] {LIGHT,COCA} => {COLA} 0.001416025 0.98787879
## [41] {SEVEN} => {UP} 0.001450774 0.60507246
## [42] {UP} => {SEVEN} 0.001450774 0.41750000
## [43] {FINA} => {LA} 0.002223940 0.83116883
## [44] {LA} => {FINA} 0.002223940 0.06272972
## [45] {CARTA} => {BLANCA} 0.002223940 0.80757098
## [46] {BLANCA} => {CARTA} 0.002223940 0.25728643
## [47] {FRUTO)} => {BARRILITOS} 0.001494210 0.76785714
## [48] {BARRILITOS} => {FRUTO)} 0.001494210 0.60139860
## [49] {FRUTO)} => {(DEL} 0.001945948 1.00000000
## [50] {(DEL} => {FRUTO)} 0.001945948 0.60704607
## [51] {BLUE} => {HOUSE} 0.001433399 0.55369128
## [52] {HOUSE} => {BLUE} 0.001433399 0.47687861
## [53] {PEPSI,COCA} => {COLA} 0.002024133 0.76143791
## [54] {LAGER} => {EQUIS} 0.002058882 1.00000000
## [55] {EQUIS} => {LAGER} 0.002058882 0.80338983
## [56] {BENSON} => {HEDGES} 0.001476835 0.69672131
## [57] {HEDGES} => {BENSON} 0.001476835 0.67460317
## [58] {BENSON} => {&} 0.002119693 1.00000000
## [59] {&} => {BENSON} 0.002119693 0.60847880
## [60] {SABOR} => {DEL} 0.002024133 1.00000000
## [61] {DEL} => {SABOR} 0.002024133 0.08512970
## [62] {HEDGES} => {&} 0.002189191 1.00000000
## [63] {&} => {HEDGES} 0.002189191 0.62842893
## [64] {MANZANITA} => {SOL} 0.002362937 0.67830424
## [65] {SOL} => {MANZANITA} 0.002362937 0.40840841
## [66] {BARRILITOS} => {(DEL} 0.002484558 1.00000000
## [67] {(DEL} => {BARRILITOS} 0.002484558 0.77506775
## [68] {(DETERGENTE)} => {BLANCA} 0.001945948 0.68292683
## [69] {BLANCA} => {(DETERGENTE)} 0.001945948 0.22512563
## [70] {(DETERGENTE)} => {NIEVES} 0.002849424 1.00000000
## [71] {NIEVES} => {(DETERGENTE)} 0.002849424 0.39375750
## [72] {ARDILLA,LA} => {ARDILLA} 0.002319500 0.87828947
## [73] {ARDILLA} => {ARDILLA,LA} 0.002319500 0.17717319
## [74] {ARDILLA,LA} => {LA} 0.001320465 0.50000000
## [75] {LA} => {ARDILLA,LA} 0.001320465 0.03724577
## [76] {COLA,TECATE} => {COCA} 0.003605216 0.76851852
## [77] {COCA} => {COLA,TECATE} 0.003605216 0.03190344
## [78] {MEXICO} => {VELADORA} 0.004552128 0.95795247
## [79] {VELADORA} => {MEXICO} 0.004552128 0.46412755
## [80] {EL} => {ORO} 0.001859075 0.65045593
## [81] {ORO} => {EL} 0.001859075 0.38214286
## [82] {EL} => {GALLO} 0.002128380 0.74468085
## [83] {GALLO} => {EL} 0.002128380 0.36082474
## [84] {EL} => {DE} 0.002128380 0.74468085
## [85] {DE} => {EL} 0.002128380 0.20640270
## [86] {NUESTRA} => {LA} 0.002310813 0.50474383
## [87] {LA} => {NUESTRA} 0.002310813 0.06518010
## [88] {CHICO} => {TOPO} 0.004751935 0.84283513
## [89] {TOPO} => {CHICO} 0.004751935 0.38739377
## [90] {ZERO} => {COCA} 0.001824326 0.51980198
## [91] {ZERO} => {COLA} 0.003509656 1.00000000
## [92] {COLA} => {ZERO} 0.003509656 0.02945036
## [93] {(GAMESA)} => {SALADITAS} 0.004143826 0.68338109
## [94] {SALADITAS} => {(GAMESA)} 0.004143826 0.34970674
## [95] {PALL} => {MALL} 0.005759658 0.86666667
## [96] {MALL} => {PALL} 0.005759658 0.63688761
## [97] {MONTE} => {DEL} 0.003978768 0.52342857
## [98] {DEL} => {MONTE} 0.003978768 0.16733650
## [99] {FRUT} => {VALLE} 0.004526066 0.51380671
## [100] {VALLE} => {FRUT} 0.004526066 0.31826512
## [101] {ORO} => {GALLO} 0.004864870 1.00000000
## [102] {GALLO} => {ORO} 0.004864870 0.82474227
## [103] {ORO} => {DE} 0.004864870 1.00000000
## [104] {DE} => {ORO} 0.004864870 0.47177759
## [105] {COLA,COCA} => {COCA} 0.004725873 0.74215553
## [106] {COCA} => {COLA,COCA} 0.004725873 0.04182042
## [107] {COLA,COCA} => {COLA} 0.005151549 0.80900409
## [108] {COLA} => {COLA,COCA} 0.005151549 0.04322788
## [109] {BLANCA} => {NIEVES} 0.004343634 0.50251256
## [110] {NIEVES} => {BLANCA} 0.004343634 0.60024010
## [111] {FUERTE} => {DEL} 0.004890931 0.47510549
## [112] {DEL} => {FUERTE} 0.004890931 0.20569967
## [113] {COSTEÑA} => {LA} 0.005811782 0.56743003
## [114] {LA} => {COSTEÑA} 0.005811782 0.16393041
## [115] {GALLO} => {DE} 0.005898654 1.00000000
## [116] {DE} => {GALLO} 0.005898654 0.57203033
## [117] {LIGHT} => {PEPSI} 0.001242279 0.13414634
## [118] {PEPSI} => {LIGHT} 0.001242279 0.02059035
## [119] {LIGHT} => {COCA} 0.003952707 0.42682927
## [120] {COCA} => {LIGHT} 0.003952707 0.03497847
## [121] {LIGHT} => {COLA} 0.006385141 0.68949343
## [122] {COLA} => {LIGHT} 0.006385141 0.05357924
## [123] {TOPO} => {COLA} 0.001303090 0.10623229
## [124] {VALLE} => {DEL} 0.002936296 0.20647526
## [125] {DEL} => {VALLE} 0.002936296 0.12349288
## [126] {ARDILLA} => {LA} 0.007210432 0.55076311
## [127] {LA} => {ARDILLA} 0.007210432 0.20338152
## [128] {LECHE} => {NUTRI} 0.009521245 0.59051724
## [129] {NUTRI} => {LECHE} 0.009521245 0.53203883
## [130] {LA} => {COLA} 0.001381275 0.03896104
## [131] {COCA} => {COLA} 0.080574402 0.71302276
## [132] {COLA} => {COCA} 0.080574402 0.67611897
## [133] {SABOR, ZONA} => {DEL} 0.001077221 1.00000000
## [134] {DEL, ZONA} => {SABOR} 0.001077221 0.96875000
## [135] {DEL, SABOR} => {ZONA} 0.001077221 0.53218884
## [136] {BARRILITOS, FRUTO)} => {(DEL} 0.001494210 1.00000000
## [137] {(DEL, FRUTO)} => {BARRILITOS} 0.001494210 0.76785714
## [138] {(DEL, BARRILITOS} => {FRUTO)} 0.001494210 0.60139860
## [139] {BENSON, HEDGES} => {&} 0.001476835 1.00000000
## [140] {&, BENSON} => {HEDGES} 0.001476835 0.69672131
## [141] {&, HEDGES} => {BENSON} 0.001476835 0.67460317
## [142] {(DETERGENTE), BLANCA} => {NIEVES} 0.001945948 1.00000000
## [143] {(DETERGENTE), NIEVES} => {BLANCA} 0.001945948 0.68292683
## [144] {BLANCA, NIEVES} => {(DETERGENTE)} 0.001945948 0.44800000
## [145] {ARDILLA, ARDILLA,LA} => {LA} 0.001216217 0.52434457
## [146] {ARDILLA,LA, LA} => {ARDILLA} 0.001216217 0.92105263
## [147] {ARDILLA, LA} => {ARDILLA,LA} 0.001216217 0.16867470
## [148] {EL, ORO} => {GALLO} 0.001859075 1.00000000
## [149] {EL, GALLO} => {ORO} 0.001859075 0.87346939
## [150] {GALLO, ORO} => {EL} 0.001859075 0.38214286
## [151] {EL, ORO} => {DE} 0.001859075 1.00000000
## [152] {DE, EL} => {ORO} 0.001859075 0.87346939
## [153] {DE, ORO} => {EL} 0.001859075 0.38214286
## [154] {EL, GALLO} => {DE} 0.002128380 1.00000000
## [155] {DE, EL} => {GALLO} 0.002128380 1.00000000
## [156] {DE, GALLO} => {EL} 0.002128380 0.36082474
## [157] {COCA, ZERO} => {COLA} 0.001824326 1.00000000
## [158] {COLA, ZERO} => {COCA} 0.001824326 0.51980198
## [159] {COCA, COLA} => {ZERO} 0.001824326 0.02264151
## [160] {GALLO, ORO} => {DE} 0.004864870 1.00000000
## [161] {DE, ORO} => {GALLO} 0.004864870 1.00000000
## [162] {DE, GALLO} => {ORO} 0.004864870 0.82474227
## [163] {COCA, COLA,COCA} => {COLA} 0.003900583 0.82536765
## [164] {COLA, COLA,COCA} => {COCA} 0.003900583 0.75716695
## [165] {COCA, COLA} => {COLA,COCA} 0.003900583 0.04840970
## [166] {COCA, LIGHT} => {COLA} 0.003848459 0.97362637
## [167] {COLA, LIGHT} => {COCA} 0.003848459 0.60272109
## [168] {COCA, COLA} => {LIGHT} 0.003848459 0.04776280
## [169] {EL, GALLO, ORO} => {DE} 0.001859075 1.00000000
## [170] {DE, EL, ORO} => {GALLO} 0.001859075 1.00000000
## [171] {DE, EL, GALLO} => {ORO} 0.001859075 0.87346939
## [172] {DE, GALLO, ORO} => {EL} 0.001859075 0.38214286
## coverage lift count
## [1] 1.000000000 1.0000000 5027
## [2] 1.000000000 1.0000000 2737
## [3] 1.000000000 1.0000000 6945
## [4] 1.000000000 1.0000000 4081
## [5] 1.000000000 1.0000000 13008
## [6] 1.000000000 1.0000000 13718
## [7] 0.001250966 551.1528143 131
## [8] 0.001650581 551.1528143 131
## [9] 0.001433399 6.3821839 119
## [10] 0.001007723 828.1366906 116
## [11] 0.001207530 828.1366906 116
## [12] 0.001111970 478.5999195 124
## [13] 0.002024133 478.5999195 124
## [14] 0.001111970 42.0573621 128
## [15] 0.023777050 42.0573621 128
## [16] 0.001581083 294.0034555 119
## [17] 0.002223940 294.0034555 119
## [18] 0.001181468 390.2067797 136
## [19] 0.002562744 390.2067797 136
## [20] 0.001659268 254.4166018 122
## [21] 0.002510620 254.4166018 122
## [22] 0.001772202 236.7663932 167
## [23] 0.003457532 236.7663932 167
## [24] 0.001085908 523.2318182 125
## [25] 0.001911199 523.2318182 125
## [26] 0.001181468 609.0529101 136
## [27] 0.001641893 609.0529101 136
## [28] 0.001876450 5.8994977 144
## [29] 0.001372588 587.3010204 158
## [30] 0.001702704 587.3010204 158
## [31] 0.001372588 96.9764111 158
## [32] 0.010311786 96.9764111 158
## [33] 0.001450774 74.2009126 152
## [34] 0.012266421 74.2009126 152
## [35] 0.002293439 43.2267666 184
## [36] 0.016123568 43.2267666 184
## [37] 0.002284751 5.9982993 188
## [38] 0.002336875 15.3091405 146
## [39] 0.035452737 15.3091405 146
## [40] 0.001433399 8.2895258 163
## [41] 0.002397686 174.1262409 167
## [42] 0.003474907 174.1262409 167
## [43] 0.002675678 23.4444193 256
## [44] 0.035452737 23.4444193 256
## [45] 0.002753864 93.4274400 256
## [46] 0.008643831 93.4274400 256
## [47] 0.001945948 309.0517607 172
## [48] 0.002484558 309.0517607 172
## [49] 0.001945948 311.9539295 224
## [50] 0.003205602 311.9539295 224
## [51] 0.002588806 184.2079664 165
## [52] 0.003005794 184.2079664 165
## [53] 0.002658304 6.3894066 233
## [54] 0.002058882 390.2067797 237
## [55] 0.002562744 390.2067797 237
## [56] 0.002119693 318.2551067 170
## [57] 0.002189191 318.2551067 170
## [58] 0.002119693 287.0598504 244
## [59] 0.003483594 287.0598504 244
## [60] 0.002024133 42.0573621 233
## [61] 0.023777050 42.0573621 233
## [62] 0.002189191 287.0598504 252
## [63] 0.003483594 287.0598504 252
## [64] 0.003483594 117.2376566 272
## [65] 0.005785720 117.2376566 272
## [66] 0.002484558 311.9539295 286
## [67] 0.003205602 311.9539295 286
## [68] 0.002849424 79.0074274 224
## [69] 0.008643831 79.0074274 224
## [70] 0.002849424 138.1884754 328
## [71] 0.007236493 138.1884754 328
## [72] 0.002640929 67.0874450 267
## [73] 0.013091711 67.0874450 267
## [74] 0.002640929 14.1032835 152
## [75] 0.035452737 14.1032835 152
## [76] 0.004691124 6.8008099 415
## [77] 0.113003970 6.8008099 415
## [78] 0.004751935 97.6712724 524
## [79] 0.009807925 97.6712724 524
## [80] 0.002858111 133.7047004 214
## [81] 0.004864870 133.7047004 214
## [82] 0.002858111 126.2458873 245
## [83] 0.005898654 126.2458873 245
## [84] 0.002858111 72.2164764 245
## [85] 0.010311786 72.2164764 245
## [86] 0.004578190 14.2370908 266
## [87] 0.035452737 14.2370908 266
## [88] 0.005638036 68.7107612 547
## [89] 0.012266421 68.7107612 547
## [90] 0.003509656 4.5998559 210
## [91] 0.003509656 8.3912378 404
## [92] 0.119171930 8.3912378 404
## [93] 0.006063712 57.6720532 477
## [94] 0.011849432 57.6720532 477
## [95] 0.006645759 95.8336856 663
## [96] 0.009043445 95.8336856 663
## [97] 0.007601359 22.0140249 458
## [98] 0.023777050 22.0140249 458
## [99] 0.008808889 36.1299962 521
## [100] 0.014221056 36.1299962 521
## [101] 0.004864870 169.5301915 560
## [102] 0.005898654 169.5301915 560
## [103] 0.004864870 96.9764111 560
## [104] 0.010311786 96.9764111 560
## [105] 0.006367767 6.5675173 544
## [106] 0.113003970 6.5675173 544
## [107] 0.006367767 6.7885457 593
## [108] 0.119171930 6.7885457 593
## [109] 0.008643831 69.4414449 500
## [110] 0.007236493 69.4414449 500
## [111] 0.010294411 19.9816834 563
## [112] 0.023777050 19.9816834 563
## [113] 0.010242288 16.0052530 669
## [114] 0.035452737 16.0052530 669
## [115] 0.005898654 96.9764111 679
## [116] 0.010311786 96.9764111 679
## [117] 0.009260627 2.2234297 143
## [118] 0.060333070 2.2234297 143
## [119] 0.009260627 3.7771175 455
## [120] 0.113003970 3.7771175 455
## [121] 0.009260627 5.7857034 735
## [122] 0.119171930 5.7857034 735
## [123] 0.012266421 0.8914204 150
## [124] 0.014221056 8.6838048 338
## [125] 0.023777050 8.6838048 338
## [126] 0.013091711 15.5351364 830
## [127] 0.035452737 15.5351364 830
## [128] 0.016123568 32.9975875 1096
## [129] 0.017895770 32.9975875 1096
## [130] 0.035452737 0.3269313 159
## [131] 0.113003970 5.9831435 9275
## [132] 0.119171930 5.9831435 9275
## [133] 0.001077221 42.0573621 124
## [134] 0.001111970 478.5999195 124
## [135] 0.002024133 478.5999195 124
## [136] 0.001494210 311.9539295 172
## [137] 0.001945948 309.0517607 172
## [138] 0.002484558 309.0517607 172
## [139] 0.001476835 287.0598504 170
## [140] 0.002119693 318.2551067 170
## [141] 0.002189191 318.2551067 170
## [142] 0.001945948 138.1884754 224
## [143] 0.002849424 79.0074274 224
## [144] 0.004343634 157.2247805 224
## [145] 0.002319500 14.7899602 140
## [146] 0.001320465 70.3538749 140
## [147] 0.007210432 63.8694515 140
## [148] 0.001859075 169.5301915 214
## [149] 0.002128380 179.5463120 214
## [150] 0.004864870 133.7047004 214
## [151] 0.001859075 96.9764111 214
## [152] 0.002128380 179.5463120 214
## [153] 0.004864870 133.7047004 214
## [154] 0.002128380 96.9764111 245
## [155] 0.002128380 169.5301915 245
## [156] 0.005898654 126.2458873 245
## [157] 0.001824326 8.3912378 210
## [158] 0.003509656 4.5998559 210
## [159] 0.080574402 6.4512049 210
## [160] 0.004864870 96.9764111 560
## [161] 0.004864870 169.5301915 560
## [162] 0.005898654 169.5301915 560
## [163] 0.004725873 6.9258562 449
## [164] 0.005151549 6.7003571 449
## [165] 0.080574402 7.6023047 449
## [166] 0.003952707 8.1699304 443
## [167] 0.006385141 5.3336276 443
## [168] 0.080574402 5.1576211 443
## [169] 0.001859075 96.9764111 214
## [170] 0.001859075 169.5301915 214
## [171] 0.002128380 179.5463120 214
## [172] 0.004864870 133.7047004 214
top10reglas <- head(reglas.asociacion, n = 10, by = "confidence")
plot(top10reglas, method = "graph", engine = "htmlwidget")
En esta actividad, observamos 6 tecnicas diferentes para hacer una limpieza de datos. Como eliminar datos innecesarios, convertir variables, borrar repetidos, etc. También, se programo un basket, el cual nos permite conocer que variables provocan o influyen en otras variables de nuestra base de datos. En este caso, se filtro una base de datos con información sobre tiendas de abarrotes y se consiguio una grafica que nos muestra que productos provocan la compra de otros productos de la tienda.