A la base de datos se le hicieron los siguientes cambios:

1. Se cambió el formato a Formato a fecha corta.

2. Se duplicaron los primeros 5 registros.

3. Se cambió el formato a Hora (Español México).

4. Se cambió el formato a código de Barras (para que salga completo).

5. Se guardó como CSV UTF-8 (Delimitado por comas).

Importar la base de datos

bd <- read.csv("/Users/georginamartinez/Documents/Tec/Séptimo Semestre/Analítica para negocios, de los datos a decisiones/abarrotes.csv")

Entender la base de datos

summary(bd)
##  vcClaveTienda        DescGiro         Codigo.Barras            PLU        
##  Length:200625      Length:200625      Min.   :8.347e+05   Min.   : 1.00   
##  Class :character   Class :character   1st Qu.:7.501e+12   1st Qu.: 1.00   
##  Mode  :character   Mode  :character   Median :7.501e+12   Median : 1.00   
##                                        Mean   :5.950e+12   Mean   : 2.11   
##                                        3rd Qu.:7.501e+12   3rd Qu.: 1.00   
##                                        Max.   :1.750e+13   Max.   :30.00   
##                                                            NA's   :199188  
##     Fecha               Hora              Marca            Fabricante       
##  Length:200625      Length:200625      Length:200625      Length:200625     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##    Producto             Precio          Ult.Costo         Unidades     
##  Length:200625      Min.   :-147.00   Min.   :  0.38   Min.   : 0.200  
##  Class :character   1st Qu.:  11.00   1st Qu.:  8.46   1st Qu.: 1.000  
##  Mode  :character   Median :  16.00   Median : 12.31   Median : 1.000  
##                     Mean   :  19.42   Mean   : 15.31   Mean   : 1.262  
##                     3rd Qu.:  25.00   3rd Qu.: 19.23   3rd Qu.: 1.000  
##                     Max.   :1000.00   Max.   :769.23   Max.   :96.000  
##                                                                        
##     F.Ticket      NombreDepartamento NombreFamilia      NombreCategoria   
##  Min.   :     1   Length:200625      Length:200625      Length:200625     
##  1st Qu.: 33964   Class :character   Class :character   Class :character  
##  Median :105993   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :193990                                                           
##  3rd Qu.:383005                                                           
##  Max.   :450040                                                           
##                                                                           
##     Estado              Mts.2      Tipo.ubicación         Giro          
##  Length:200625      Min.   :47.0   Length:200625      Length:200625     
##  Class :character   1st Qu.:53.0   Class :character   Class :character  
##  Mode  :character   Median :60.0   Mode  :character   Mode  :character  
##                     Mean   :56.6                                        
##                     3rd Qu.:60.0                                        
##                     Max.   :62.0                                        
##                                                                         
##  Hora.inicio        Hora.cierre       
##  Length:200625      Length:200625     
##  Class :character   Class :character  
##  Mode  :character   Mode  :character  
##                                       
##                                       
##                                       
## 
# install.packages("dplyr")
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
# count(bd, vcClaveTienda, sort = TRUE)
# count(bd, DescGiro, sort = TRUE)
# count(bd, Marca, sort = TRUE)
# count(bd, Fabricante, sort = TRUE)
# count(bd, Producto, sort = TRUE)
# count(bd, NombreFamilia, sort = TRUE)
# count(bd, NombreCategoria, sort = TRUE)
# count(bd, Estado, sort = TRUE)
# count(bd, Mts.2, sort = TRUE)
# count(bd, Tipo.ubicación, sort = TRUE)
# count(bd, Giro, sort = TRUE)
# count(bd, Hora.inicio, sort = TRUE)
# count(bd, Hora.cierre, sort = TRUE)

# install.packages("tidyverse")
library(tidyverse)
## ── Attaching packages
## ───────────────────────────────────────
## tidyverse 1.3.2 ──
## ✔ ggplot2 3.3.6     ✔ purrr   0.3.4
## ✔ tibble  3.1.8     ✔ stringr 1.4.1
## ✔ tidyr   1.2.0     ✔ forcats 0.5.2
## ✔ readr   2.1.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
# tibble(bd)

# str(bd)

# head(bd)
# head(bd, n=7)

# tail(bd)

# install.packages("janitor")
library(janitor)
## 
## Attaching package: 'janitor'
## 
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
tabyl(bd, vcClaveTienda, NombreDepartamento)
##  vcClaveTienda Abarrotes Bebes e Infantiles Carnes Farmacia Ferretería Mercería
##          MX001     95415                515      1      147        245       28
##          MX002      6590                 21      0        4         10        0
##          MX003      4026                 15      0        2          8        0
##          MX004     82234                932      0      102        114       16
##          MX005     10014                  0      0        0          0        0
##  Papelería Productos a Eliminar Vinos y Licores
##         35                    3              80
##          0                    0               4
##          0                    0               0
##         32                    5              20
##          7                    0               0

Observaciones

1. Casi ningún registro cuenta con PLU.
2. Cambiar formato de fecha.
3. Cambiar formato de hora.
4. Hay precios negativos.
5. Hay unidades menores a 1.

Técnicas para limpieza de datos

Técnica 1. Remover valores irrelevantes

# Eliminar columnas  
# Primero crear una segunda copia para no eliminar la original  
bd1 <- bd
bd1 <- subset (bd1, select = -c (PLU, Codigo.Barras))

# Eliminar renglones
bd2 <- bd1
bd2 <- bd2[bd$Precio > 0, ]
summary (bd1)
##  vcClaveTienda        DescGiro            Fecha               Hora          
##  Length:200625      Length:200625      Length:200625      Length:200625     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##     Marca            Fabricante          Producto             Precio       
##  Length:200625      Length:200625      Length:200625      Min.   :-147.00  
##  Class :character   Class :character   Class :character   1st Qu.:  11.00  
##  Mode  :character   Mode  :character   Mode  :character   Median :  16.00  
##                                                           Mean   :  19.42  
##                                                           3rd Qu.:  25.00  
##                                                           Max.   :1000.00  
##    Ult.Costo         Unidades         F.Ticket      NombreDepartamento
##  Min.   :  0.38   Min.   : 0.200   Min.   :     1   Length:200625     
##  1st Qu.:  8.46   1st Qu.: 1.000   1st Qu.: 33964   Class :character  
##  Median : 12.31   Median : 1.000   Median :105993   Mode  :character  
##  Mean   : 15.31   Mean   : 1.262   Mean   :193990                     
##  3rd Qu.: 19.23   3rd Qu.: 1.000   3rd Qu.:383005                     
##  Max.   :769.23   Max.   :96.000   Max.   :450040                     
##  NombreFamilia      NombreCategoria       Estado              Mts.2     
##  Length:200625      Length:200625      Length:200625      Min.   :47.0  
##  Class :character   Class :character   Class :character   1st Qu.:53.0  
##  Mode  :character   Mode  :character   Mode  :character   Median :60.0  
##                                                           Mean   :56.6  
##                                                           3rd Qu.:60.0  
##                                                           Max.   :62.0  
##  Tipo.ubicación         Giro           Hora.inicio        Hora.cierre       
##  Length:200625      Length:200625      Length:200625      Length:200625     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
## 
summary (bd2)
##  vcClaveTienda        DescGiro            Fecha               Hora          
##  Length:200478      Length:200478      Length:200478      Length:200478     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##     Marca            Fabricante          Producto             Precio       
##  Length:200478      Length:200478      Length:200478      Min.   :   0.50  
##  Class :character   Class :character   Class :character   1st Qu.:  11.00  
##  Mode  :character   Mode  :character   Mode  :character   Median :  16.00  
##                                                           Mean   :  19.45  
##                                                           3rd Qu.:  25.00  
##                                                           Max.   :1000.00  
##    Ult.Costo         Unidades         F.Ticket      NombreDepartamento
##  Min.   :  0.38   Min.   : 0.200   Min.   :     1   Length:200478     
##  1st Qu.:  8.46   1st Qu.: 1.000   1st Qu.: 33977   Class :character  
##  Median : 12.31   Median : 1.000   Median :106034   Mode  :character  
##  Mean   : 15.31   Mean   : 1.261   Mean   :194096                     
##  3rd Qu.: 19.23   3rd Qu.: 1.000   3rd Qu.:383062                     
##  Max.   :769.23   Max.   :96.000   Max.   :450040                     
##  NombreFamilia      NombreCategoria       Estado              Mts.2     
##  Length:200478      Length:200478      Length:200478      Min.   :47.0  
##  Class :character   Class :character   Class :character   1st Qu.:53.0  
##  Mode  :character   Mode  :character   Mode  :character   Median :60.0  
##                                                           Mean   :56.6  
##                                                           3rd Qu.:60.0  
##                                                           Max.   :62.0  
##  Tipo.ubicación         Giro           Hora.inicio        Hora.cierre       
##  Length:200478      Length:200478      Length:200478      Length:200478     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
## 
# Esto no lo usaremos, pondremos precios negativos como absoluto 

Técnica 2. Remover valores duplicados

# ¿Cuántos renglones duplicados tenemos?
bd1[duplicated(bd1),]
##    vcClaveTienda  DescGiro      Fecha           Hora                      Marca
## 6          MX001 Abarrotes 19/06/2020 08:16:21 a. m.                NUTRI LECHE
## 7          MX001 Abarrotes 19/06/2020 08:23:33 a. m.                     DAN UP
## 8          MX001 Abarrotes 19/06/2020 08:24:33 a. m.                      BIMBO
## 9          MX001 Abarrotes 19/06/2020 08:24:33 a. m.                      PEPSI
## 10         MX001 Abarrotes 19/06/2020 08:26:28 a. m. BLANCA NIEVES (DETERGENTE)
##                    Fabricante                           Producto Precio
## 6                     MEXILAC                Nutri Leche 1 Litro   16.0
## 7            DANONE DE MEXICO DANUP STRAWBERRY P/BEBER 350GR NAL   14.0
## 8                 GRUPO BIMBO                Rebanadas Bimbo 2Pz    5.0
## 9         PEPSI-COLA MEXICANA                   Pepsi N.R. 400Ml    8.0
## 10 FABRICA DE JABON LA CORONA      Detergente Blanca Nieves 500G   19.5
##    Ult.Costo Unidades F.Ticket NombreDepartamento          NombreFamilia
## 6      12.31        1        1          Abarrotes Lacteos y Refrigerados
## 7      14.00        1        2          Abarrotes Lacteos y Refrigerados
## 8       5.00        1        3          Abarrotes         Pan y Tortilla
## 9       8.00        1        3          Abarrotes                Bebidas
## 10     15.00        1        4          Abarrotes     Limpieza del Hogar
##              NombreCategoria     Estado Mts.2 Tipo.ubicación      Giro
## 6                      Leche Nuevo León    60        Esquina Abarrotes
## 7                     Yogurt Nuevo León    60        Esquina Abarrotes
## 8      Pan Dulce Empaquetado Nuevo León    60        Esquina Abarrotes
## 9  Refrescos Plástico (N.R.) Nuevo León    60        Esquina Abarrotes
## 10                Lavandería Nuevo León    60        Esquina Abarrotes
##    Hora.inicio Hora.cierre
## 6         8:00       22:00
## 7         8:00       22:00
## 8         8:00       22:00
## 9         8:00       22:00
## 10        8:00       22:00
sum(duplicated(bd1))
## [1] 5
# Eliminar renglones duplicados
bd3 <- bd1
library(dplyr)
bd3 <- distinct(bd3)

Técnica 3. Errores tipográficos y errores similares

# Precios en absoluto
bd4 <- bd3
bd4$Precio <- abs(bd4$Precio)
summary(bd4)  
##  vcClaveTienda        DescGiro            Fecha               Hora          
##  Length:200620      Length:200620      Length:200620      Length:200620     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##     Marca            Fabricante          Producto             Precio       
##  Length:200620      Length:200620      Length:200620      Min.   :   0.50  
##  Class :character   Class :character   Class :character   1st Qu.:  11.00  
##  Mode  :character   Mode  :character   Mode  :character   Median :  16.00  
##                                                           Mean   :  19.45  
##                                                           3rd Qu.:  25.00  
##                                                           Max.   :1000.00  
##    Ult.Costo         Unidades         F.Ticket      NombreDepartamento
##  Min.   :  0.38   Min.   : 0.200   Min.   :     1   Length:200620     
##  1st Qu.:  8.46   1st Qu.: 1.000   1st Qu.: 33967   Class :character  
##  Median : 12.31   Median : 1.000   Median :105996   Mode  :character  
##  Mean   : 15.31   Mean   : 1.262   Mean   :193994                     
##  3rd Qu.: 19.23   3rd Qu.: 1.000   3rd Qu.:383008                     
##  Max.   :769.23   Max.   :96.000   Max.   :450040                     
##  NombreFamilia      NombreCategoria       Estado              Mts.2     
##  Length:200620      Length:200620      Length:200620      Min.   :47.0  
##  Class :character   Class :character   Class :character   1st Qu.:53.0  
##  Mode  :character   Mode  :character   Mode  :character   Median :60.0  
##                                                           Mean   :56.6  
##                                                           3rd Qu.:60.0  
##                                                           Max.   :62.0  
##  Tipo.ubicación         Giro           Hora.inicio        Hora.cierre       
##  Length:200620      Length:200620      Length:200620      Length:200620     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
## 
# Cantidades en enteros
bd5 <- bd4
bd5$Unidades <- ceiling(bd5$Unidades)
summary(bd5)  
##  vcClaveTienda        DescGiro            Fecha               Hora          
##  Length:200620      Length:200620      Length:200620      Length:200620     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##     Marca            Fabricante          Producto             Precio       
##  Length:200620      Length:200620      Length:200620      Min.   :   0.50  
##  Class :character   Class :character   Class :character   1st Qu.:  11.00  
##  Mode  :character   Mode  :character   Mode  :character   Median :  16.00  
##                                                           Mean   :  19.45  
##                                                           3rd Qu.:  25.00  
##                                                           Max.   :1000.00  
##    Ult.Costo         Unidades         F.Ticket      NombreDepartamento
##  Min.   :  0.38   Min.   : 1.000   Min.   :     1   Length:200620     
##  1st Qu.:  8.46   1st Qu.: 1.000   1st Qu.: 33967   Class :character  
##  Median : 12.31   Median : 1.000   Median :105996   Mode  :character  
##  Mean   : 15.31   Mean   : 1.262   Mean   :193994                     
##  3rd Qu.: 19.23   3rd Qu.: 1.000   3rd Qu.:383008                     
##  Max.   :769.23   Max.   :96.000   Max.   :450040                     
##  NombreFamilia      NombreCategoria       Estado              Mts.2     
##  Length:200620      Length:200620      Length:200620      Min.   :47.0  
##  Class :character   Class :character   Class :character   1st Qu.:53.0  
##  Mode  :character   Mode  :character   Mode  :character   Median :60.0  
##                                                           Mean   :56.6  
##                                                           3rd Qu.:60.0  
##                                                           Max.   :62.0  
##  Tipo.ubicación         Giro           Hora.inicio        Hora.cierre       
##  Length:200620      Length:200620      Length:200620      Length:200620     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
## 

Técnica 4. Convertir tipos de datos

# Convertir de caracter a fecha
bd6 <- bd5
bd6$Fecha <- as.Date(bd6$Fecha, format ="%d/%m/%y")
tibble(bd6)  
## # A tibble: 200,620 × 20
##    vcCla…¹ DescG…² Fecha      Hora  Marca Fabri…³ Produ…⁴ Precio Ult.C…⁵ Unida…⁶
##    <chr>   <chr>   <date>     <chr> <chr> <chr>   <chr>    <dbl>   <dbl>   <dbl>
##  1 MX001   Abarro… 2020-06-19 08:1… NUTR… MEXILAC Nutri …   16     12.3        1
##  2 MX001   Abarro… 2020-06-19 08:2… DAN … DANONE… DANUP …   14     14          1
##  3 MX001   Abarro… 2020-06-19 08:2… BIMBO GRUPO … Rebana…    5      5          1
##  4 MX001   Abarro… 2020-06-19 08:2… PEPSI PEPSI-… Pepsi …    8      8          1
##  5 MX001   Abarro… 2020-06-19 08:2… BLAN… FABRIC… Deterg…   19.5   15          1
##  6 MX001   Abarro… 2020-06-19 08:2… FLASH ALEN    Flash …    9.5    7.31       1
##  7 MX001   Abarro… 2020-06-19 08:2… VARI… DANONE… Danone…   11     11          1
##  8 MX001   Abarro… 2020-06-19 08:2… ZOTE  FABRIC… Jabon …    9.5    7.31       1
##  9 MX001   Abarro… 2020-06-19 08:2… ALWA… PROCTE… T Feme…   23.5   18.1        1
## 10 MX001   Abarro… 2020-06-19 03:2… JUMEX JUMEX   Jugo D…   12     12          1
## # … with 200,610 more rows, 10 more variables: F.Ticket <int>,
## #   NombreDepartamento <chr>, NombreFamilia <chr>, NombreCategoria <chr>,
## #   Estado <chr>, Mts.2 <int>, Tipo.ubicación <chr>, Giro <chr>,
## #   Hora.inicio <chr>, Hora.cierre <chr>, and abbreviated variable names
## #   ¹​vcClaveTienda, ²​DescGiro, ³​Fabricante, ⁴​Producto, ⁵​Ult.Costo, ⁶​Unidades
# Convertir de caracter a entero
bd7 <- bd6
bd7$Hora <- substr(bd7$Hora, start = 1, stop = 2)
tibble(bd7)
## # A tibble: 200,620 × 20
##    vcCla…¹ DescG…² Fecha      Hora  Marca Fabri…³ Produ…⁴ Precio Ult.C…⁵ Unida…⁶
##    <chr>   <chr>   <date>     <chr> <chr> <chr>   <chr>    <dbl>   <dbl>   <dbl>
##  1 MX001   Abarro… 2020-06-19 08    NUTR… MEXILAC Nutri …   16     12.3        1
##  2 MX001   Abarro… 2020-06-19 08    DAN … DANONE… DANUP …   14     14          1
##  3 MX001   Abarro… 2020-06-19 08    BIMBO GRUPO … Rebana…    5      5          1
##  4 MX001   Abarro… 2020-06-19 08    PEPSI PEPSI-… Pepsi …    8      8          1
##  5 MX001   Abarro… 2020-06-19 08    BLAN… FABRIC… Deterg…   19.5   15          1
##  6 MX001   Abarro… 2020-06-19 08    FLASH ALEN    Flash …    9.5    7.31       1
##  7 MX001   Abarro… 2020-06-19 08    VARI… DANONE… Danone…   11     11          1
##  8 MX001   Abarro… 2020-06-19 08    ZOTE  FABRIC… Jabon …    9.5    7.31       1
##  9 MX001   Abarro… 2020-06-19 08    ALWA… PROCTE… T Feme…   23.5   18.1        1
## 10 MX001   Abarro… 2020-06-19 03    JUMEX JUMEX   Jugo D…   12     12          1
## # … with 200,610 more rows, 10 more variables: F.Ticket <int>,
## #   NombreDepartamento <chr>, NombreFamilia <chr>, NombreCategoria <chr>,
## #   Estado <chr>, Mts.2 <int>, Tipo.ubicación <chr>, Giro <chr>,
## #   Hora.inicio <chr>, Hora.cierre <chr>, and abbreviated variable names
## #   ¹​vcClaveTienda, ²​DescGiro, ³​Fabricante, ⁴​Producto, ⁵​Ult.Costo, ⁶​Unidades
bd7$Hora <- as.integer(bd7$Hora)  
str(bd7) 
## 'data.frame':    200620 obs. of  20 variables:
##  $ vcClaveTienda     : chr  "MX001" "MX001" "MX001" "MX001" ...
##  $ DescGiro          : chr  "Abarrotes" "Abarrotes" "Abarrotes" "Abarrotes" ...
##  $ Fecha             : Date, format: "2020-06-19" "2020-06-19" ...
##  $ Hora              : int  8 8 8 8 8 8 8 8 8 3 ...
##  $ Marca             : chr  "NUTRI LECHE" "DAN UP" "BIMBO" "PEPSI" ...
##  $ Fabricante        : chr  "MEXILAC" "DANONE DE MEXICO" "GRUPO BIMBO" "PEPSI-COLA MEXICANA" ...
##  $ Producto          : chr  "Nutri Leche 1 Litro" "DANUP STRAWBERRY P/BEBER 350GR NAL" "Rebanadas Bimbo 2Pz" "Pepsi N.R. 400Ml" ...
##  $ Precio            : num  16 14 5 8 19.5 9.5 11 9.5 23.5 12 ...
##  $ Ult.Costo         : num  12.3 14 5 8 15 ...
##  $ Unidades          : num  1 1 1 1 1 1 1 1 1 1 ...
##  $ F.Ticket          : int  1 2 3 3 4 4 4 4 4 5 ...
##  $ NombreDepartamento: chr  "Abarrotes" "Abarrotes" "Abarrotes" "Abarrotes" ...
##  $ NombreFamilia     : chr  "Lacteos y Refrigerados" "Lacteos y Refrigerados" "Pan y Tortilla" "Bebidas" ...
##  $ NombreCategoria   : chr  "Leche" "Yogurt" "Pan Dulce Empaquetado" "Refrescos Plástico (N.R.)" ...
##  $ Estado            : chr  "Nuevo León" "Nuevo León" "Nuevo León" "Nuevo León" ...
##  $ Mts.2             : int  60 60 60 60 60 60 60 60 60 60 ...
##  $ Tipo.ubicación    : chr  "Esquina" "Esquina" "Esquina" "Esquina" ...
##  $ Giro              : chr  "Abarrotes" "Abarrotes" "Abarrotes" "Abarrotes" ...
##  $ Hora.inicio       : chr  "8:00" "8:00" "8:00" "8:00" ...
##  $ Hora.cierre       : chr  "22:00" "22:00" "22:00" "22:00" ...

Técnica 5. Valores faltantes

# ¿Cuántos NA tengo en la base de datos?
sum(is.na(bd7))
## [1] 0
sum(is.na(bd))
## [1] 199188
# ¿Cuántos NA tengo por variable?
sapply(bd7, function(x) sum(is.na(x)))
##      vcClaveTienda           DescGiro              Fecha               Hora 
##                  0                  0                  0                  0 
##              Marca         Fabricante           Producto             Precio 
##                  0                  0                  0                  0 
##          Ult.Costo           Unidades           F.Ticket NombreDepartamento 
##                  0                  0                  0                  0 
##      NombreFamilia    NombreCategoria             Estado              Mts.2 
##                  0                  0                  0                  0 
##     Tipo.ubicación               Giro        Hora.inicio        Hora.cierre 
##                  0                  0                  0                  0
sapply(bd, function(x) sum(is.na(x)))
##      vcClaveTienda           DescGiro      Codigo.Barras                PLU 
##                  0                  0                  0             199188 
##              Fecha               Hora              Marca         Fabricante 
##                  0                  0                  0                  0 
##           Producto             Precio          Ult.Costo           Unidades 
##                  0                  0                  0                  0 
##           F.Ticket NombreDepartamento      NombreFamilia    NombreCategoria 
##                  0                  0                  0                  0 
##             Estado              Mts.2     Tipo.ubicación               Giro 
##                  0                  0                  0                  0 
##        Hora.inicio        Hora.cierre 
##                  0                  0
# Borrar todos los registros NA de una tabla
bd8 <- bd
bd8 <- na.omit(bd8)
summary(bd8)
##  vcClaveTienda        DescGiro         Codigo.Barras            PLU        
##  Length:1437        Length:1437        Min.   :6.750e+08   Min.   : 1.000  
##  Class :character   Class :character   1st Qu.:6.750e+08   1st Qu.: 1.000  
##  Mode  :character   Mode  :character   Median :6.750e+08   Median : 1.000  
##                                        Mean   :2.616e+11   Mean   : 2.112  
##                                        3rd Qu.:6.750e+08   3rd Qu.: 1.000  
##                                        Max.   :7.501e+12   Max.   :30.000  
##     Fecha               Hora              Marca            Fabricante       
##  Length:1437        Length:1437        Length:1437        Length:1437       
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##    Producto             Precio        Ult.Costo        Unidades    
##  Length:1437        Min.   :30.00   Min.   : 1.00   Min.   :1.000  
##  Class :character   1st Qu.:90.00   1st Qu.:64.62   1st Qu.:1.000  
##  Mode  :character   Median :90.00   Median :64.62   Median :1.000  
##                     Mean   :87.94   Mean   :56.65   Mean   :1.124  
##                     3rd Qu.:90.00   3rd Qu.:64.62   3rd Qu.:1.000  
##                     Max.   :90.00   Max.   :64.62   Max.   :7.000  
##     F.Ticket      NombreDepartamento NombreFamilia      NombreCategoria   
##  Min.   :   772   Length:1437        Length:1437        Length:1437       
##  1st Qu.: 99955   Class :character   Class :character   Class :character  
##  Median :102493   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :100595                                                           
##  3rd Qu.:106546                                                           
##  Max.   :118356                                                           
##     Estado              Mts.2       Tipo.ubicación         Giro          
##  Length:1437        Min.   :58.00   Length:1437        Length:1437       
##  Class :character   1st Qu.:58.00   Class :character   Class :character  
##  Mode  :character   Median :58.00   Mode  :character   Mode  :character  
##                     Mean   :58.07                                        
##                     3rd Qu.:58.00                                        
##                     Max.   :60.00                                        
##  Hora.inicio        Hora.cierre       
##  Length:1437        Length:1437       
##  Class :character   Class :character  
##  Mode  :character   Mode  :character  
##                                       
##                                       
## 
# Remplazar NA con CEROS
bd9 <- bd
bd9[is.na(bd9)]<-0
summary(bd9)
##  vcClaveTienda        DescGiro         Codigo.Barras            PLU          
##  Length:200625      Length:200625      Min.   :8.347e+05   Min.   : 0.00000  
##  Class :character   Class :character   1st Qu.:7.501e+12   1st Qu.: 0.00000  
##  Mode  :character   Mode  :character   Median :7.501e+12   Median : 0.00000  
##                                        Mean   :5.950e+12   Mean   : 0.01513  
##                                        3rd Qu.:7.501e+12   3rd Qu.: 0.00000  
##                                        Max.   :1.750e+13   Max.   :30.00000  
##     Fecha               Hora              Marca            Fabricante       
##  Length:200625      Length:200625      Length:200625      Length:200625     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##    Producto             Precio          Ult.Costo         Unidades     
##  Length:200625      Min.   :-147.00   Min.   :  0.38   Min.   : 0.200  
##  Class :character   1st Qu.:  11.00   1st Qu.:  8.46   1st Qu.: 1.000  
##  Mode  :character   Median :  16.00   Median : 12.31   Median : 1.000  
##                     Mean   :  19.42   Mean   : 15.31   Mean   : 1.262  
##                     3rd Qu.:  25.00   3rd Qu.: 19.23   3rd Qu.: 1.000  
##                     Max.   :1000.00   Max.   :769.23   Max.   :96.000  
##     F.Ticket      NombreDepartamento NombreFamilia      NombreCategoria   
##  Min.   :     1   Length:200625      Length:200625      Length:200625     
##  1st Qu.: 33964   Class :character   Class :character   Class :character  
##  Median :105993   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :193990                                                           
##  3rd Qu.:383005                                                           
##  Max.   :450040                                                           
##     Estado              Mts.2      Tipo.ubicación         Giro          
##  Length:200625      Min.   :47.0   Length:200625      Length:200625     
##  Class :character   1st Qu.:53.0   Class :character   Class :character  
##  Mode  :character   Median :60.0   Mode  :character   Mode  :character  
##                     Mean   :56.6                                        
##                     3rd Qu.:60.0                                        
##                     Max.   :62.0                                        
##  Hora.inicio        Hora.cierre       
##  Length:200625      Length:200625     
##  Class :character   Class :character  
##  Mode  :character   Mode  :character  
##                                       
##                                       
## 
# Remplazar NA con el Promedio
bd10 <- bd
bd10$PLU[is.na(bd10$PLU)]<-mean(bd10$PLU, na.rm = TRUE)
summary(bd10)
##  vcClaveTienda        DescGiro         Codigo.Barras            PLU        
##  Length:200625      Length:200625      Min.   :8.347e+05   Min.   : 1.000  
##  Class :character   Class :character   1st Qu.:7.501e+12   1st Qu.: 2.112  
##  Mode  :character   Mode  :character   Median :7.501e+12   Median : 2.112  
##                                        Mean   :5.950e+12   Mean   : 2.112  
##                                        3rd Qu.:7.501e+12   3rd Qu.: 2.112  
##                                        Max.   :1.750e+13   Max.   :30.000  
##     Fecha               Hora              Marca            Fabricante       
##  Length:200625      Length:200625      Length:200625      Length:200625     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##    Producto             Precio          Ult.Costo         Unidades     
##  Length:200625      Min.   :-147.00   Min.   :  0.38   Min.   : 0.200  
##  Class :character   1st Qu.:  11.00   1st Qu.:  8.46   1st Qu.: 1.000  
##  Mode  :character   Median :  16.00   Median : 12.31   Median : 1.000  
##                     Mean   :  19.42   Mean   : 15.31   Mean   : 1.262  
##                     3rd Qu.:  25.00   3rd Qu.: 19.23   3rd Qu.: 1.000  
##                     Max.   :1000.00   Max.   :769.23   Max.   :96.000  
##     F.Ticket      NombreDepartamento NombreFamilia      NombreCategoria   
##  Min.   :     1   Length:200625      Length:200625      Length:200625     
##  1st Qu.: 33964   Class :character   Class :character   Class :character  
##  Median :105993   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :193990                                                           
##  3rd Qu.:383005                                                           
##  Max.   :450040                                                           
##     Estado              Mts.2      Tipo.ubicación         Giro          
##  Length:200625      Min.   :47.0   Length:200625      Length:200625     
##  Class :character   1st Qu.:53.0   Class :character   Class :character  
##  Mode  :character   Median :60.0   Mode  :character   Mode  :character  
##                     Mean   :56.6                                        
##                     3rd Qu.:60.0                                        
##                     Max.   :62.0                                        
##  Hora.inicio        Hora.cierre       
##  Length:200625      Length:200625     
##  Class :character   Class :character  
##  Mode  :character   Mode  :character  
##                                       
##                                       
## 
# Reemplazar negativos con cero
bd11 <- bd
bd11[bd11 < 0] <- 0
summary(bd11)
##  vcClaveTienda        DescGiro         Codigo.Barras            PLU        
##  Length:200625      Length:200625      Min.   :8.347e+05   Min.   : 1.00   
##  Class :character   Class :character   1st Qu.:7.501e+12   1st Qu.: 1.00   
##  Mode  :character   Mode  :character   Median :7.501e+12   Median : 1.00   
##                                        Mean   :5.950e+12   Mean   : 2.11   
##                                        3rd Qu.:7.501e+12   3rd Qu.: 1.00   
##                                        Max.   :1.750e+13   Max.   :30.00   
##                                                            NA's   :199188  
##     Fecha               Hora              Marca            Fabricante       
##  Length:200625      Length:200625      Length:200625      Length:200625     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##    Producto             Precio          Ult.Costo         Unidades     
##  Length:200625      Min.   :   0.00   Min.   :  0.38   Min.   : 0.200  
##  Class :character   1st Qu.:  11.00   1st Qu.:  8.46   1st Qu.: 1.000  
##  Mode  :character   Median :  16.00   Median : 12.31   Median : 1.000  
##                     Mean   :  19.44   Mean   : 15.31   Mean   : 1.262  
##                     3rd Qu.:  25.00   3rd Qu.: 19.23   3rd Qu.: 1.000  
##                     Max.   :1000.00   Max.   :769.23   Max.   :96.000  
##                                                                        
##     F.Ticket      NombreDepartamento NombreFamilia      NombreCategoria   
##  Min.   :     1   Length:200625      Length:200625      Length:200625     
##  1st Qu.: 33964   Class :character   Class :character   Class :character  
##  Median :105993   Mode  :character   Mode  :character   Mode  :character  
##  Mean   :193990                                                           
##  3rd Qu.:383005                                                           
##  Max.   :450040                                                           
##                                                                           
##     Estado              Mts.2      Tipo.ubicación         Giro          
##  Length:200625      Min.   :47.0   Length:200625      Length:200625     
##  Class :character   1st Qu.:53.0   Class :character   Class :character  
##  Mode  :character   Median :60.0   Mode  :character   Mode  :character  
##                     Mean   :56.6                                        
##                     3rd Qu.:60.0                                        
##                     Max.   :62.0                                        
##                                                                         
##  Hora.inicio        Hora.cierre       
##  Length:200625      Length:200625     
##  Class :character   Class :character  
##  Mode  :character   Mode  :character  
##                                       
##                                       
##                                       
## 

Técnica 6. Método estadístico

bd12 <-bd7
boxplot(bd12$Precio, horizontal = TRUE)

boxplot(bd12$Unidades, horizontal = TRUE)

# Agregar columnas
# install.packages("lubridate")
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
bd12$Dia_de_la_semana <- wday (bd12$Fecha)
summary(bd12)
##  vcClaveTienda        DescGiro             Fecha                 Hora       
##  Length:200620      Length:200620      Min.   :2020-05-01   Min.   : 1.000  
##  Class :character   Class :character   1st Qu.:2020-06-06   1st Qu.: 5.000  
##  Mode  :character   Mode  :character   Median :2020-07-11   Median : 8.000  
##                                        Mean   :2020-07-18   Mean   : 7.299  
##                                        3rd Qu.:2020-08-29   3rd Qu.:10.000  
##                                        Max.   :2020-11-11   Max.   :12.000  
##     Marca            Fabricante          Producto             Precio       
##  Length:200620      Length:200620      Length:200620      Min.   :   0.50  
##  Class :character   Class :character   Class :character   1st Qu.:  11.00  
##  Mode  :character   Mode  :character   Mode  :character   Median :  16.00  
##                                                           Mean   :  19.45  
##                                                           3rd Qu.:  25.00  
##                                                           Max.   :1000.00  
##    Ult.Costo         Unidades         F.Ticket      NombreDepartamento
##  Min.   :  0.38   Min.   : 1.000   Min.   :     1   Length:200620     
##  1st Qu.:  8.46   1st Qu.: 1.000   1st Qu.: 33967   Class :character  
##  Median : 12.31   Median : 1.000   Median :105996   Mode  :character  
##  Mean   : 15.31   Mean   : 1.262   Mean   :193994                     
##  3rd Qu.: 19.23   3rd Qu.: 1.000   3rd Qu.:383008                     
##  Max.   :769.23   Max.   :96.000   Max.   :450040                     
##  NombreFamilia      NombreCategoria       Estado              Mts.2     
##  Length:200620      Length:200620      Length:200620      Min.   :47.0  
##  Class :character   Class :character   Class :character   1st Qu.:53.0  
##  Mode  :character   Mode  :character   Mode  :character   Median :60.0  
##                                                           Mean   :56.6  
##                                                           3rd Qu.:60.0  
##                                                           Max.   :62.0  
##  Tipo.ubicación         Giro           Hora.inicio        Hora.cierre       
##  Length:200620      Length:200620      Length:200620      Length:200620     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##  Dia_de_la_semana
##  Min.   :1.000   
##  1st Qu.:2.000   
##  Median :4.000   
##  Mean   :3.912   
##  3rd Qu.:6.000   
##  Max.   :7.000
bd12$Subtotal <- bd12$Precio * bd12$Unidades  
summary(bd12)  
##  vcClaveTienda        DescGiro             Fecha                 Hora       
##  Length:200620      Length:200620      Min.   :2020-05-01   Min.   : 1.000  
##  Class :character   Class :character   1st Qu.:2020-06-06   1st Qu.: 5.000  
##  Mode  :character   Mode  :character   Median :2020-07-11   Median : 8.000  
##                                        Mean   :2020-07-18   Mean   : 7.299  
##                                        3rd Qu.:2020-08-29   3rd Qu.:10.000  
##                                        Max.   :2020-11-11   Max.   :12.000  
##     Marca            Fabricante          Producto             Precio       
##  Length:200620      Length:200620      Length:200620      Min.   :   0.50  
##  Class :character   Class :character   Class :character   1st Qu.:  11.00  
##  Mode  :character   Mode  :character   Mode  :character   Median :  16.00  
##                                                           Mean   :  19.45  
##                                                           3rd Qu.:  25.00  
##                                                           Max.   :1000.00  
##    Ult.Costo         Unidades         F.Ticket      NombreDepartamento
##  Min.   :  0.38   Min.   : 1.000   Min.   :     1   Length:200620     
##  1st Qu.:  8.46   1st Qu.: 1.000   1st Qu.: 33967   Class :character  
##  Median : 12.31   Median : 1.000   Median :105996   Mode  :character  
##  Mean   : 15.31   Mean   : 1.262   Mean   :193994                     
##  3rd Qu.: 19.23   3rd Qu.: 1.000   3rd Qu.:383008                     
##  Max.   :769.23   Max.   :96.000   Max.   :450040                     
##  NombreFamilia      NombreCategoria       Estado              Mts.2     
##  Length:200620      Length:200620      Length:200620      Min.   :47.0  
##  Class :character   Class :character   Class :character   1st Qu.:53.0  
##  Mode  :character   Mode  :character   Mode  :character   Median :60.0  
##                                                           Mean   :56.6  
##                                                           3rd Qu.:60.0  
##                                                           Max.   :62.0  
##  Tipo.ubicación         Giro           Hora.inicio        Hora.cierre       
##  Length:200620      Length:200620      Length:200620      Length:200620     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##  Dia_de_la_semana    Subtotal      
##  Min.   :1.000    Min.   :   1.00  
##  1st Qu.:2.000    1st Qu.:  12.00  
##  Median :4.000    Median :  18.00  
##  Mean   :3.912    Mean   :  24.33  
##  3rd Qu.:6.000    3rd Qu.:  27.00  
##  Max.   :7.000    Max.   :2496.00
bd12$Utilidad <- bd12$Precio - bd12$Ult.Costo
summary(bd12)
##  vcClaveTienda        DescGiro             Fecha                 Hora       
##  Length:200620      Length:200620      Min.   :2020-05-01   Min.   : 1.000  
##  Class :character   Class :character   1st Qu.:2020-06-06   1st Qu.: 5.000  
##  Mode  :character   Mode  :character   Median :2020-07-11   Median : 8.000  
##                                        Mean   :2020-07-18   Mean   : 7.299  
##                                        3rd Qu.:2020-08-29   3rd Qu.:10.000  
##                                        Max.   :2020-11-11   Max.   :12.000  
##     Marca            Fabricante          Producto             Precio       
##  Length:200620      Length:200620      Length:200620      Min.   :   0.50  
##  Class :character   Class :character   Class :character   1st Qu.:  11.00  
##  Mode  :character   Mode  :character   Mode  :character   Median :  16.00  
##                                                           Mean   :  19.45  
##                                                           3rd Qu.:  25.00  
##                                                           Max.   :1000.00  
##    Ult.Costo         Unidades         F.Ticket      NombreDepartamento
##  Min.   :  0.38   Min.   : 1.000   Min.   :     1   Length:200620     
##  1st Qu.:  8.46   1st Qu.: 1.000   1st Qu.: 33967   Class :character  
##  Median : 12.31   Median : 1.000   Median :105996   Mode  :character  
##  Mean   : 15.31   Mean   : 1.262   Mean   :193994                     
##  3rd Qu.: 19.23   3rd Qu.: 1.000   3rd Qu.:383008                     
##  Max.   :769.23   Max.   :96.000   Max.   :450040                     
##  NombreFamilia      NombreCategoria       Estado              Mts.2     
##  Length:200620      Length:200620      Length:200620      Min.   :47.0  
##  Class :character   Class :character   Class :character   1st Qu.:53.0  
##  Mode  :character   Mode  :character   Mode  :character   Median :60.0  
##                                                           Mean   :56.6  
##                                                           3rd Qu.:60.0  
##                                                           Max.   :62.0  
##  Tipo.ubicación         Giro           Hora.inicio        Hora.cierre       
##  Length:200620      Length:200620      Length:200620      Length:200620     
##  Class :character   Class :character   Class :character   Class :character  
##  Mode  :character   Mode  :character   Mode  :character   Mode  :character  
##                                                                             
##                                                                             
##                                                                             
##  Dia_de_la_semana    Subtotal          Utilidad      
##  Min.   :1.000    Min.   :   1.00   Min.   :  0.000  
##  1st Qu.:2.000    1st Qu.:  12.00   1st Qu.:  2.310  
##  Median :4.000    Median :  18.00   Median :  3.230  
##  Mean   :3.912    Mean   :  24.33   Mean   :  4.142  
##  3rd Qu.:6.000    3rd Qu.:  27.00   3rd Qu.:  5.420  
##  Max.   :7.000    Max.   :2496.00   Max.   :230.770

Exportar base de datos limpia

bd_limpia <- bd12
write.csv(bd_limpia, file="abarrotes_bd_limpia.csv", row.names = FALSE)

Conclusiones

Dentro de las bases de datos es muy común que se encuentren “sucias”, “incompletas” y es por eso que acudimos a limpieza de datos para que al momento de realizar análisis no entorpezca los resultados o que no tengan validez.

Existen distintas técnicas que nos ayudan a tener nuestra base de datos limpia y en este caso utilizamos:

  • Técnica 1. Remover valores irrelevantes

  • Técnica 2. Remover valores duplicados

  • Técnica 3. Errores tipográficos y errores similares

  • Técnica 4. Convertir tipos de datos

  • Técnica 5. Valores faltantes

  • Técnica 6. Método estadístico

En este caso nos dimos cuenta que necesitabamos eliminar columnas que no tuvieran relevancia, convertir los precios negativos a absolutos, eliminar valores faltantes, etc., para terminar creando una nueva base de datos limpia y descargarla ya modificada.

En conclusión, R es un gran programa que nos facilito este proceso para no hacerlo de manera manual si no con funciones que te permiten hacerlo de mejor manera y más rápida.

LS0tCnRpdGxlOiA8c3BhbiBzdHlsZT0iY29sb3I6UHVycGxlIj4iTGltcGllemEgQmFzZSBkZSBEYXRvcyBBYmFycm90ZXMiPC9zcGFuPgphdXRob3I6ICJLYXJsYSBHZW9yZ2luYSBNYXJ0w61uZXogR29uesOhbGV6IEEwMDgyNzUwMCIKZGF0ZTogIjIwMjItMDktMDYiCm91dHB1dDogCiAgaHRtbF9kb2N1bWVudDoKICAgIHRvYzogdHJ1ZQogICAgdG9jX2Zsb2F0OiB0cnVlCiAgICBjb2RlX2Rvd25sb2FkOiB0cnVlCi0tLQoKPGltZyBzcmM9ICIvVXNlcnMvZ2VvcmdpbmFtYXJ0aW5lei9Eb2N1bWVudHMvVGVjL1NlzIFwdGltbyBTZW1lc3RyZS9BbmFsacyBdGljYSBwYXJhIG5lZ29jaW9zLCBkZSBsb3MgZGF0b3MgYSBkZWNpc2lvbmVzL2xpbXBpZXphIGRhdG9zLndlYnAiPgoKIyMjIDxzcGFuIHN0eWxlPSJjb2xvcjpkYXJrYmx1ZSI+QSBsYSBiYXNlIGRlIGRhdG9zIHNlIGxlIGhpY2llcm9uIGxvcyBzaWd1aWVudGVzIGNhbWJpb3M6PC9zcGFuPgojIyMjICAgIDEuIFNlIGNhbWJpw7MgZWwgZm9ybWF0byBhIEZvcm1hdG8gYSBmZWNoYSBjb3J0YS4gIAojIyMjICAgIDIuIFNlIGR1cGxpY2Fyb24gbG9zIHByaW1lcm9zIDUgcmVnaXN0cm9zLiAgCiMjIyMgICAgMy4gU2UgY2FtYmnDsyBlbCBmb3JtYXRvIGEgSG9yYSAoRXNwYcOxb2wgTcOpeGljbykuICAKIyMjIyAgICA0LiBTZSBjYW1iacOzIGVsIGZvcm1hdG8gYSBjw7NkaWdvIGRlIEJhcnJhcyAocGFyYSBxdWUgc2FsZ2EgY29tcGxldG8pLiAgCiMjIyMgICAgNS4gU2UgZ3VhcmTDsyBjb21vIENTViBVVEYtOCAoRGVsaW1pdGFkbyBwb3IgY29tYXMpLiAgCgojIyA8c3BhbiBzdHlsZT0iY29sb3I6ZGFya2JsdWUiPkltcG9ydGFyIGxhIGJhc2UgZGUgZGF0b3M8L3NwYW4+CmBgYHtyfQpiZCA8LSByZWFkLmNzdigiL1VzZXJzL2dlb3JnaW5hbWFydGluZXovRG9jdW1lbnRzL1RlYy9TZcyBcHRpbW8gU2VtZXN0cmUvQW5hbGnMgXRpY2EgcGFyYSBuZWdvY2lvcywgZGUgbG9zIGRhdG9zIGEgZGVjaXNpb25lcy9hYmFycm90ZXMuY3N2IikKYGBgCgojIyA8c3BhbiBzdHlsZT0iY29sb3I6ZGFya2JsdWUiPkVudGVuZGVyIGxhIGJhc2UgZGUgZGF0b3M8L3NwYW4+CmBgYHtyfQpzdW1tYXJ5KGJkKQojIGluc3RhbGwucGFja2FnZXMoImRwbHlyIikKbGlicmFyeShkcGx5cikKCiMgY291bnQoYmQsIHZjQ2xhdmVUaWVuZGEsIHNvcnQgPSBUUlVFKQojIGNvdW50KGJkLCBEZXNjR2lybywgc29ydCA9IFRSVUUpCiMgY291bnQoYmQsIE1hcmNhLCBzb3J0ID0gVFJVRSkKIyBjb3VudChiZCwgRmFicmljYW50ZSwgc29ydCA9IFRSVUUpCiMgY291bnQoYmQsIFByb2R1Y3RvLCBzb3J0ID0gVFJVRSkKIyBjb3VudChiZCwgTm9tYnJlRmFtaWxpYSwgc29ydCA9IFRSVUUpCiMgY291bnQoYmQsIE5vbWJyZUNhdGVnb3JpYSwgc29ydCA9IFRSVUUpCiMgY291bnQoYmQsIEVzdGFkbywgc29ydCA9IFRSVUUpCiMgY291bnQoYmQsIE10cy4yLCBzb3J0ID0gVFJVRSkKIyBjb3VudChiZCwgVGlwby51YmljYWNpw7NuLCBzb3J0ID0gVFJVRSkKIyBjb3VudChiZCwgR2lybywgc29ydCA9IFRSVUUpCiMgY291bnQoYmQsIEhvcmEuaW5pY2lvLCBzb3J0ID0gVFJVRSkKIyBjb3VudChiZCwgSG9yYS5jaWVycmUsIHNvcnQgPSBUUlVFKQoKIyBpbnN0YWxsLnBhY2thZ2VzKCJ0aWR5dmVyc2UiKQpsaWJyYXJ5KHRpZHl2ZXJzZSkKCiMgdGliYmxlKGJkKQoKIyBzdHIoYmQpCgojIGhlYWQoYmQpCiMgaGVhZChiZCwgbj03KQoKIyB0YWlsKGJkKQoKIyBpbnN0YWxsLnBhY2thZ2VzKCJqYW5pdG9yIikKbGlicmFyeShqYW5pdG9yKQoKdGFieWwoYmQsIHZjQ2xhdmVUaWVuZGEsIE5vbWJyZURlcGFydGFtZW50bykKYGBgCgoKIyMjIyBPYnNlcnZhY2lvbmVzCiMjIyMjIDEuIENhc2kgbmluZ8O6biByZWdpc3RybyBjdWVudGEgY29uIFBMVS4gIAojIyMjIyAyLiBDYW1iaWFyIGZvcm1hdG8gZGUgZmVjaGEuICAKIyMjIyMgMy4gQ2FtYmlhciBmb3JtYXRvIGRlIGhvcmEuICAKIyMjIyMgNC4gSGF5IHByZWNpb3MgbmVnYXRpdm9zLiAgCiMjIyMjIDUuIEhheSB1bmlkYWRlcyBtZW5vcmVzIGEgMS4gIAoKIyA8c3BhbiBzdHlsZT0iY29sb3I6Ymx1ZSI+VMOpY25pY2FzIHBhcmEgbGltcGllemEgZGUgZGF0b3M8L3NwYW4+ICAgIAoKIyMgPHNwYW4gc3R5bGU9ImNvbG9yOmRhcmtibHVlIj5Uw6ljbmljYSAxLiBSZW1vdmVyIHZhbG9yZXMgaXJyZWxldmFudGVzPC9zcGFuPiAgICAKYGBge3J9CiMgRWxpbWluYXIgY29sdW1uYXMgIAojIFByaW1lcm8gY3JlYXIgdW5hIHNlZ3VuZGEgY29waWEgcGFyYSBubyBlbGltaW5hciBsYSBvcmlnaW5hbCAgCmJkMSA8LSBiZApiZDEgPC0gc3Vic2V0IChiZDEsIHNlbGVjdCA9IC1jIChQTFUsIENvZGlnby5CYXJyYXMpKQoKIyBFbGltaW5hciByZW5nbG9uZXMKYmQyIDwtIGJkMQpiZDIgPC0gYmQyW2JkJFByZWNpbyA+IDAsIF0Kc3VtbWFyeSAoYmQxKQpzdW1tYXJ5IChiZDIpCiMgRXN0byBubyBsbyB1c2FyZW1vcywgcG9uZHJlbW9zIHByZWNpb3MgbmVnYXRpdm9zIGNvbW8gYWJzb2x1dG8gCmBgYAoKIyMgPHNwYW4gc3R5bGU9ImNvbG9yOmRhcmtibHVlIj5Uw6ljbmljYSAyLiBSZW1vdmVyIHZhbG9yZXMgZHVwbGljYWRvczwvc3Bhbj4KYGBge3J9CiMgwr9DdcOhbnRvcyByZW5nbG9uZXMgZHVwbGljYWRvcyB0ZW5lbW9zPwpiZDFbZHVwbGljYXRlZChiZDEpLF0Kc3VtKGR1cGxpY2F0ZWQoYmQxKSkKCiMgRWxpbWluYXIgcmVuZ2xvbmVzIGR1cGxpY2Fkb3MKYmQzIDwtIGJkMQpsaWJyYXJ5KGRwbHlyKQpiZDMgPC0gZGlzdGluY3QoYmQzKQoKYGBgCgojIyA8c3BhbiBzdHlsZT0iY29sb3I6ZGFya2JsdWUiPlTDqWNuaWNhIDMuIEVycm9yZXMgdGlwb2dyw6FmaWNvcyB5IGVycm9yZXMgc2ltaWxhcmVzPC9zcGFuPgpgYGB7cn0KIyBQcmVjaW9zIGVuIGFic29sdXRvCmJkNCA8LSBiZDMKYmQ0JFByZWNpbyA8LSBhYnMoYmQ0JFByZWNpbykKc3VtbWFyeShiZDQpICAKCiMgQ2FudGlkYWRlcyBlbiBlbnRlcm9zCmJkNSA8LSBiZDQKYmQ1JFVuaWRhZGVzIDwtIGNlaWxpbmcoYmQ1JFVuaWRhZGVzKQpzdW1tYXJ5KGJkNSkgIApgYGAKCiMjIDxzcGFuIHN0eWxlPSJjb2xvcjpkYXJrYmx1ZSI+VMOpY25pY2EgNC4gQ29udmVydGlyIHRpcG9zIGRlIGRhdG9zPC9zcGFuPgpgYGB7cn0KIyBDb252ZXJ0aXIgZGUgY2FyYWN0ZXIgYSBmZWNoYQpiZDYgPC0gYmQ1CmJkNiRGZWNoYSA8LSBhcy5EYXRlKGJkNiRGZWNoYSwgZm9ybWF0ID0iJWQvJW0vJXkiKQp0aWJibGUoYmQ2KSAgCgojIENvbnZlcnRpciBkZSBjYXJhY3RlciBhIGVudGVybwpiZDcgPC0gYmQ2CmJkNyRIb3JhIDwtIHN1YnN0cihiZDckSG9yYSwgc3RhcnQgPSAxLCBzdG9wID0gMikKdGliYmxlKGJkNykKYmQ3JEhvcmEgPC0gYXMuaW50ZWdlcihiZDckSG9yYSkgIApzdHIoYmQ3KSAKYGBgCgojIyA8c3BhbiBzdHlsZT0iY29sb3I6ZGFya2JsdWUiPlTDqWNuaWNhIDUuIFZhbG9yZXMgZmFsdGFudGVzPC9zcGFuPgpgYGB7cn0KIyDCv0N1w6FudG9zIE5BIHRlbmdvIGVuIGxhIGJhc2UgZGUgZGF0b3M/CnN1bShpcy5uYShiZDcpKQpzdW0oaXMubmEoYmQpKQoKIyDCv0N1w6FudG9zIE5BIHRlbmdvIHBvciB2YXJpYWJsZT8Kc2FwcGx5KGJkNywgZnVuY3Rpb24oeCkgc3VtKGlzLm5hKHgpKSkKc2FwcGx5KGJkLCBmdW5jdGlvbih4KSBzdW0oaXMubmEoeCkpKQoKIyBCb3JyYXIgdG9kb3MgbG9zIHJlZ2lzdHJvcyBOQSBkZSB1bmEgdGFibGEKYmQ4IDwtIGJkCmJkOCA8LSBuYS5vbWl0KGJkOCkKc3VtbWFyeShiZDgpCgojIFJlbXBsYXphciBOQSBjb24gQ0VST1MKYmQ5IDwtIGJkCmJkOVtpcy5uYShiZDkpXTwtMApzdW1tYXJ5KGJkOSkKCiMgUmVtcGxhemFyIE5BIGNvbiBlbCBQcm9tZWRpbwpiZDEwIDwtIGJkCmJkMTAkUExVW2lzLm5hKGJkMTAkUExVKV08LW1lYW4oYmQxMCRQTFUsIG5hLnJtID0gVFJVRSkKc3VtbWFyeShiZDEwKQoKIyBSZWVtcGxhemFyIG5lZ2F0aXZvcyBjb24gY2VybwpiZDExIDwtIGJkCmJkMTFbYmQxMSA8IDBdIDwtIDAKc3VtbWFyeShiZDExKQpgYGAKCiMjIDxzcGFuIHN0eWxlPSJjb2xvcjpkYXJrYmx1ZSI+VMOpY25pY2EgNi4gTcOpdG9kbyBlc3RhZMOtc3RpY288L3NwYW4+CmBgYHtyfQpiZDEyIDwtYmQ3CmJveHBsb3QoYmQxMiRQcmVjaW8sIGhvcml6b250YWwgPSBUUlVFKQpib3hwbG90KGJkMTIkVW5pZGFkZXMsIGhvcml6b250YWwgPSBUUlVFKQoKIyBBZ3JlZ2FyIGNvbHVtbmFzCiMgaW5zdGFsbC5wYWNrYWdlcygibHVicmlkYXRlIikKbGlicmFyeShsdWJyaWRhdGUpCmJkMTIkRGlhX2RlX2xhX3NlbWFuYSA8LSB3ZGF5IChiZDEyJEZlY2hhKQpzdW1tYXJ5KGJkMTIpCgpiZDEyJFN1YnRvdGFsIDwtIGJkMTIkUHJlY2lvICogYmQxMiRVbmlkYWRlcyAgCnN1bW1hcnkoYmQxMikgIAoKYmQxMiRVdGlsaWRhZCA8LSBiZDEyJFByZWNpbyAtIGJkMTIkVWx0LkNvc3RvCnN1bW1hcnkoYmQxMikKYGBgCgojIyA8c3BhbiBzdHlsZT0iY29sb3I6ZGFya2JsdWUiPkV4cG9ydGFyIGJhc2UgZGUgZGF0b3MgbGltcGlhPC9zcGFuPgpgYGB7cn0KYmRfbGltcGlhIDwtIGJkMTIKd3JpdGUuY3N2KGJkX2xpbXBpYSwgZmlsZT0iYWJhcnJvdGVzX2JkX2xpbXBpYS5jc3YiLCByb3cubmFtZXMgPSBGQUxTRSkKYGBgCgojIDxzcGFuIHN0eWxlPSJjb2xvcjpibHVlIj5Db25jbHVzaW9uZXM8L3NwYW4+CkRlbnRybyBkZSBsYXMgYmFzZXMgZGUgZGF0b3MgZXMgbXV5IGNvbcO6biBxdWUgc2UgZW5jdWVudHJlbiAqInN1Y2lhcyIqLCAqImluY29tcGxldGFzIiogeSBlcyBwb3IgZXNvIHF1ZSBhY3VkaW1vcyBhIGxpbXBpZXphIGRlIGRhdG9zIHBhcmEgcXVlIGFsIG1vbWVudG8gZGUgcmVhbGl6YXIgYW7DoWxpc2lzIG5vIGVudG9ycGV6Y2EgbG9zIHJlc3VsdGFkb3MgbyBxdWUgbm8gdGVuZ2FuIHZhbGlkZXouICAKCkV4aXN0ZW4gZGlzdGludGFzIHTDqWNuaWNhcyBxdWUgbm9zIGF5dWRhbiBhIHRlbmVyIG51ZXN0cmEgYmFzZSBkZSBkYXRvcyBsaW1waWEgeSBlbiBlc3RlIGNhc28gdXRpbGl6YW1vczoKCiogVMOpY25pY2EgMS4gUmVtb3ZlciB2YWxvcmVzIGlycmVsZXZhbnRlcyAgCgoqIFTDqWNuaWNhIDIuIFJlbW92ZXIgdmFsb3JlcyBkdXBsaWNhZG9zICAKCiogVMOpY25pY2EgMy4gRXJyb3JlcyB0aXBvZ3LDoWZpY29zIHkgZXJyb3JlcyBzaW1pbGFyZXMgIAoKKiBUw6ljbmljYSA0LiBDb252ZXJ0aXIgdGlwb3MgZGUgZGF0b3MKCiogVMOpY25pY2EgNS4gVmFsb3JlcyBmYWx0YW50ZXMKCiogVMOpY25pY2EgNi4gTcOpdG9kbyBlc3RhZMOtc3RpY28gIAoKRW4gZXN0ZSBjYXNvIG5vcyBkaW1vcyBjdWVudGEgcXVlIG5lY2VzaXRhYmFtb3MgZWxpbWluYXIgY29sdW1uYXMgcXVlIG5vIHR1dmllcmFuIHJlbGV2YW5jaWEsIGNvbnZlcnRpciBsb3MgcHJlY2lvcyBuZWdhdGl2b3MgYSBhYnNvbHV0b3MsIGVsaW1pbmFyIHZhbG9yZXMgZmFsdGFudGVzLCBldGMuLCBwYXJhIHRlcm1pbmFyIGNyZWFuZG8gdW5hIG51ZXZhIGJhc2UgZGUgZGF0b3MgbGltcGlhIHkgZGVzY2FyZ2FybGEgeWEgbW9kaWZpY2FkYS4gIAoKRW4gY29uY2x1c2nDs24sIDxzcGFuIHN0eWxlPSJjb2xvcjpyZWQiPioqUioqPC9zcGFuPiBlcyB1biBncmFuIHByb2dyYW1hIHF1ZSBub3MgZmFjaWxpdG8gZXN0ZSBwcm9jZXNvIHBhcmEgbm8gaGFjZXJsbyBkZSBtYW5lcmEgbWFudWFsIHNpIG5vIGNvbiBmdW5jaW9uZXMgcXVlIHRlIHBlcm1pdGVuIGhhY2VybG8gZGUgbWVqb3IgbWFuZXJhIHkgbcOhcyByw6FwaWRhLiAgCg==