Entender la base de datos
#summary(bd)
#count(bd, vcClaveTienda, sort = TRUE)
#count(bd, DescGiro, sort = TRUE)
#count(bd, Marca, sort = TRUE)
#count(bd, Fabricante, sort = TRUE)
#count(bd, Producto, sort = TRUE)
#count(bd, NombreDepartamento, sort = TRUE)
#count(bd, NombreFamilia, sort = TRUE)
#count(bd, NombreCategoria, sort = TRUE)
#count(bd, Estado, sort = TRUE)
#count(bd, Mts.2, sort = TRUE)
#count(bd, Tipo.ubicación, sort = TRUE)
#count(bd, Giro, sort = TRUE)
#count(bd, Hora.inicio, sort = TRUE)
#count(bd, Hora.cierre, sort = TRUE)
#tibble(bd)
#str(bd)
#head(bd)
#head(bd,n=7)
#tail(bd)
#tabyl(bd, vcClaveTienda, NombreDepartamento)
Técnicas para limpieza de datos
Técnica 1. Remover valores irrelevantes
# Eliminar columnas PLU y codigo de bararas
bd1 <- bd
bd1 <- subset(bd1, select=-c(PLU, Codigo.Barras))
# Eliminar renglones
bd2 <- bd1
bd2 <- bd2[bd2$Precio > 0, ]
summary(bd1)
## vcClaveTienda DescGiro Fecha Hora
## Length:200625 Length:200625 Length:200625 Length:200625
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## Marca Fabricante Producto Precio
## Length:200625 Length:200625 Length:200625 Min. :-147.00
## Class :character Class :character Class :character 1st Qu.: 11.00
## Mode :character Mode :character Mode :character Median : 16.00
## Mean : 19.42
## 3rd Qu.: 25.00
## Max. :1000.00
## Ult.Costo Unidades F.Ticket NombreDepartamento
## Min. : 0.38 Min. : 0.200 Min. : 1 Length:200625
## 1st Qu.: 8.46 1st Qu.: 1.000 1st Qu.: 33964 Class :character
## Median : 12.31 Median : 1.000 Median :105993 Mode :character
## Mean : 15.31 Mean : 1.262 Mean :193990
## 3rd Qu.: 19.23 3rd Qu.: 1.000 3rd Qu.:383005
## Max. :769.23 Max. :96.000 Max. :450040
## NombreFamilia NombreCategoria Estado Mts.2
## Length:200625 Length:200625 Length:200625 Min. :47.0
## Class :character Class :character Class :character 1st Qu.:53.0
## Mode :character Mode :character Mode :character Median :60.0
## Mean :56.6
## 3rd Qu.:60.0
## Max. :62.0
## Tipo.ubicación Giro Hora.inicio Hora.cierre
## Length:200625 Length:200625 Length:200625 Length:200625
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
summary(bd2)
## vcClaveTienda DescGiro Fecha Hora
## Length:200478 Length:200478 Length:200478 Length:200478
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## Marca Fabricante Producto Precio
## Length:200478 Length:200478 Length:200478 Min. : 0.50
## Class :character Class :character Class :character 1st Qu.: 11.00
## Mode :character Mode :character Mode :character Median : 16.00
## Mean : 19.45
## 3rd Qu.: 25.00
## Max. :1000.00
## Ult.Costo Unidades F.Ticket NombreDepartamento
## Min. : 0.38 Min. : 0.200 Min. : 1 Length:200478
## 1st Qu.: 8.46 1st Qu.: 1.000 1st Qu.: 33977 Class :character
## Median : 12.31 Median : 1.000 Median :106034 Mode :character
## Mean : 15.31 Mean : 1.261 Mean :194096
## 3rd Qu.: 19.23 3rd Qu.: 1.000 3rd Qu.:383062
## Max. :769.23 Max. :96.000 Max. :450040
## NombreFamilia NombreCategoria Estado Mts.2
## Length:200478 Length:200478 Length:200478 Min. :47.0
## Class :character Class :character Class :character 1st Qu.:53.0
## Mode :character Mode :character Mode :character Median :60.0
## Mean :56.6
## 3rd Qu.:60.0
## Max. :62.0
## Tipo.ubicación Giro Hora.inicio Hora.cierre
## Length:200478 Length:200478 Length:200478 Length:200478
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
#Esto no lo usaremos, pondremos precios negativos como ab
Tecnica 2. Remover valores duplicados
# ¿Cuantos renglones duplicados tenemos?
bd1[duplicated(bd1),]
## vcClaveTienda DescGiro Fecha Hora Marca
## 6 MX001 Abarrotes 19/06/2020 08:16:21 NUTRI LECHE
## 7 MX001 Abarrotes 19/06/2020 08:23:33 DAN UP
## 8 MX001 Abarrotes 19/06/2020 08:24:33 BIMBO
## 9 MX001 Abarrotes 19/06/2020 08:24:33 PEPSI
## 10 MX001 Abarrotes 19/06/2020 08:26:28 BLANCA NIEVES (DETERGENTE)
## Fabricante Producto Precio
## 6 MEXILAC Nutri Leche 1 Litro 16.0
## 7 DANONE DE MEXICO DANUP STRAWBERRY P/BEBER 350GR NAL 14.0
## 8 GRUPO BIMBO Rebanadas Bimbo 2Pz 5.0
## 9 PEPSI-COLA MEXICANA Pepsi N.R. 400Ml 8.0
## 10 FABRICA DE JABON LA CORONA Detergente Blanca Nieves 500G 19.5
## Ult.Costo Unidades F.Ticket NombreDepartamento NombreFamilia
## 6 12.31 1 1 Abarrotes Lacteos y Refrigerados
## 7 14.00 1 2 Abarrotes Lacteos y Refrigerados
## 8 5.00 1 3 Abarrotes Pan y Tortilla
## 9 8.00 1 3 Abarrotes Bebidas
## 10 15.00 1 4 Abarrotes Limpieza del Hogar
## NombreCategoria Estado Mts.2 Tipo.ubicación Giro
## 6 Leche Nuevo León 60 Esquina Abarrotes
## 7 Yogurt Nuevo León 60 Esquina Abarrotes
## 8 Pan Dulce Empaquetado Nuevo León 60 Esquina Abarrotes
## 9 Refrescos Plástico (N.R.) Nuevo León 60 Esquina Abarrotes
## 10 Lavandería Nuevo León 60 Esquina Abarrotes
## Hora.inicio Hora.cierre
## 6 08:00 22:00
## 7 08:00 22:00
## 8 08:00 22:00
## 9 08:00 22:00
## 10 08:00 22:00
sum(duplicated(bd1))
## [1] 5
# Eliminar renglones duplicados
bd3 <- bd1
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
bd3 <- distinct(bd3)
Técnica 3. Errores tipográficos y errores similares
# Precios en absoluto
bd4 <- bd3
bd4$Precio <- abs(bd4$Precio)
summary(bd4)
## vcClaveTienda DescGiro Fecha Hora
## Length:200620 Length:200620 Length:200620 Length:200620
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## Marca Fabricante Producto Precio
## Length:200620 Length:200620 Length:200620 Min. : 0.50
## Class :character Class :character Class :character 1st Qu.: 11.00
## Mode :character Mode :character Mode :character Median : 16.00
## Mean : 19.45
## 3rd Qu.: 25.00
## Max. :1000.00
## Ult.Costo Unidades F.Ticket NombreDepartamento
## Min. : 0.38 Min. : 0.200 Min. : 1 Length:200620
## 1st Qu.: 8.46 1st Qu.: 1.000 1st Qu.: 33967 Class :character
## Median : 12.31 Median : 1.000 Median :105996 Mode :character
## Mean : 15.31 Mean : 1.262 Mean :193994
## 3rd Qu.: 19.23 3rd Qu.: 1.000 3rd Qu.:383008
## Max. :769.23 Max. :96.000 Max. :450040
## NombreFamilia NombreCategoria Estado Mts.2
## Length:200620 Length:200620 Length:200620 Min. :47.0
## Class :character Class :character Class :character 1st Qu.:53.0
## Mode :character Mode :character Mode :character Median :60.0
## Mean :56.6
## 3rd Qu.:60.0
## Max. :62.0
## Tipo.ubicación Giro Hora.inicio Hora.cierre
## Length:200620 Length:200620 Length:200620 Length:200620
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
# Cantidades en enteros
bd5 <- bd4
bd5$Unidades <- ceiling(bd5$Unidades)
summary(bd5)
## vcClaveTienda DescGiro Fecha Hora
## Length:200620 Length:200620 Length:200620 Length:200620
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## Marca Fabricante Producto Precio
## Length:200620 Length:200620 Length:200620 Min. : 0.50
## Class :character Class :character Class :character 1st Qu.: 11.00
## Mode :character Mode :character Mode :character Median : 16.00
## Mean : 19.45
## 3rd Qu.: 25.00
## Max. :1000.00
## Ult.Costo Unidades F.Ticket NombreDepartamento
## Min. : 0.38 Min. : 1.000 Min. : 1 Length:200620
## 1st Qu.: 8.46 1st Qu.: 1.000 1st Qu.: 33967 Class :character
## Median : 12.31 Median : 1.000 Median :105996 Mode :character
## Mean : 15.31 Mean : 1.262 Mean :193994
## 3rd Qu.: 19.23 3rd Qu.: 1.000 3rd Qu.:383008
## Max. :769.23 Max. :96.000 Max. :450040
## NombreFamilia NombreCategoria Estado Mts.2
## Length:200620 Length:200620 Length:200620 Min. :47.0
## Class :character Class :character Class :character 1st Qu.:53.0
## Mode :character Mode :character Mode :character Median :60.0
## Mean :56.6
## 3rd Qu.:60.0
## Max. :62.0
## Tipo.ubicación Giro Hora.inicio Hora.cierre
## Length:200620 Length:200620 Length:200620 Length:200620
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
Técnica 4. Convertir tipos de datos
# Convertir de caracter a fecha
bd6 <- bd5
bd6$Fecha <- as.Date(bd6$Fecha, format = "%d/%m/%y")
tibble(bd6)
## # A tibble: 200,620 × 20
## vcCla…¹ DescG…² Fecha Hora Marca Fabri…³ Produ…⁴ Precio Ult.C…⁵ Unida…⁶
## <chr> <chr> <date> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl>
## 1 MX001 Abarro… 2020-06-19 08:1… NUTR… MEXILAC Nutri … 16 12.3 1
## 2 MX001 Abarro… 2020-06-19 08:2… DAN … DANONE… DANUP … 14 14 1
## 3 MX001 Abarro… 2020-06-19 08:2… BIMBO GRUPO … Rebana… 5 5 1
## 4 MX001 Abarro… 2020-06-19 08:2… PEPSI PEPSI-… Pepsi … 8 8 1
## 5 MX001 Abarro… 2020-06-19 08:2… BLAN… FABRIC… Deterg… 19.5 15 1
## 6 MX001 Abarro… 2020-06-19 08:2… FLASH ALEN Flash … 9.5 7.31 1
## 7 MX001 Abarro… 2020-06-19 08:2… VARI… DANONE… Danone… 11 11 1
## 8 MX001 Abarro… 2020-06-19 08:2… ZOTE FABRIC… Jabon … 9.5 7.31 1
## 9 MX001 Abarro… 2020-06-19 08:2… ALWA… PROCTE… T Feme… 23.5 18.1 1
## 10 MX001 Abarro… 2020-06-19 15:2… JUMEX JUMEX Jugo D… 12 12 1
## # … with 200,610 more rows, 10 more variables: F.Ticket <int>,
## # NombreDepartamento <chr>, NombreFamilia <chr>, NombreCategoria <chr>,
## # Estado <chr>, Mts.2 <int>, Tipo.ubicación <chr>, Giro <chr>,
## # Hora.inicio <chr>, Hora.cierre <chr>, and abbreviated variable names
## # ¹vcClaveTienda, ²DescGiro, ³Fabricante, ⁴Producto, ⁵Ult.Costo, ⁶Unidades
# Convertir de caracter a entero
bd7 <- bd6
bd7$Hora <- substr(bd7$Hora, start = 1, stop = 2)
tibble(bd7)
## # A tibble: 200,620 × 20
## vcCla…¹ DescG…² Fecha Hora Marca Fabri…³ Produ…⁴ Precio Ult.C…⁵ Unida…⁶
## <chr> <chr> <date> <chr> <chr> <chr> <chr> <dbl> <dbl> <dbl>
## 1 MX001 Abarro… 2020-06-19 08 NUTR… MEXILAC Nutri … 16 12.3 1
## 2 MX001 Abarro… 2020-06-19 08 DAN … DANONE… DANUP … 14 14 1
## 3 MX001 Abarro… 2020-06-19 08 BIMBO GRUPO … Rebana… 5 5 1
## 4 MX001 Abarro… 2020-06-19 08 PEPSI PEPSI-… Pepsi … 8 8 1
## 5 MX001 Abarro… 2020-06-19 08 BLAN… FABRIC… Deterg… 19.5 15 1
## 6 MX001 Abarro… 2020-06-19 08 FLASH ALEN Flash … 9.5 7.31 1
## 7 MX001 Abarro… 2020-06-19 08 VARI… DANONE… Danone… 11 11 1
## 8 MX001 Abarro… 2020-06-19 08 ZOTE FABRIC… Jabon … 9.5 7.31 1
## 9 MX001 Abarro… 2020-06-19 08 ALWA… PROCTE… T Feme… 23.5 18.1 1
## 10 MX001 Abarro… 2020-06-19 15 JUMEX JUMEX Jugo D… 12 12 1
## # … with 200,610 more rows, 10 more variables: F.Ticket <int>,
## # NombreDepartamento <chr>, NombreFamilia <chr>, NombreCategoria <chr>,
## # Estado <chr>, Mts.2 <int>, Tipo.ubicación <chr>, Giro <chr>,
## # Hora.inicio <chr>, Hora.cierre <chr>, and abbreviated variable names
## # ¹vcClaveTienda, ²DescGiro, ³Fabricante, ⁴Producto, ⁵Ult.Costo, ⁶Unidades
bd7$Hora <- as.integer(bd7$Hora)
tibble(bd7)
## # A tibble: 200,620 × 20
## vcCla…¹ DescG…² Fecha Hora Marca Fabri…³ Produ…⁴ Precio Ult.C…⁵ Unida…⁶
## <chr> <chr> <date> <int> <chr> <chr> <chr> <dbl> <dbl> <dbl>
## 1 MX001 Abarro… 2020-06-19 8 NUTR… MEXILAC Nutri … 16 12.3 1
## 2 MX001 Abarro… 2020-06-19 8 DAN … DANONE… DANUP … 14 14 1
## 3 MX001 Abarro… 2020-06-19 8 BIMBO GRUPO … Rebana… 5 5 1
## 4 MX001 Abarro… 2020-06-19 8 PEPSI PEPSI-… Pepsi … 8 8 1
## 5 MX001 Abarro… 2020-06-19 8 BLAN… FABRIC… Deterg… 19.5 15 1
## 6 MX001 Abarro… 2020-06-19 8 FLASH ALEN Flash … 9.5 7.31 1
## 7 MX001 Abarro… 2020-06-19 8 VARI… DANONE… Danone… 11 11 1
## 8 MX001 Abarro… 2020-06-19 8 ZOTE FABRIC… Jabon … 9.5 7.31 1
## 9 MX001 Abarro… 2020-06-19 8 ALWA… PROCTE… T Feme… 23.5 18.1 1
## 10 MX001 Abarro… 2020-06-19 15 JUMEX JUMEX Jugo D… 12 12 1
## # … with 200,610 more rows, 10 more variables: F.Ticket <int>,
## # NombreDepartamento <chr>, NombreFamilia <chr>, NombreCategoria <chr>,
## # Estado <chr>, Mts.2 <int>, Tipo.ubicación <chr>, Giro <chr>,
## # Hora.inicio <chr>, Hora.cierre <chr>, and abbreviated variable names
## # ¹vcClaveTienda, ²DescGiro, ³Fabricante, ⁴Producto, ⁵Ult.Costo, ⁶Unidades
Técnica 5. Valores faltantes
# ¿Cuántos NA tengo en la base de datos?
sum(is.na(bd7))
## [1] 0
sum(is.na(bd))
## [1] 199188
# ¿Cuántos NA tengo por variable?
sapply(bd7, function(x) sum(is.na(x)))
## vcClaveTienda DescGiro Fecha Hora
## 0 0 0 0
## Marca Fabricante Producto Precio
## 0 0 0 0
## Ult.Costo Unidades F.Ticket NombreDepartamento
## 0 0 0 0
## NombreFamilia NombreCategoria Estado Mts.2
## 0 0 0 0
## Tipo.ubicación Giro Hora.inicio Hora.cierre
## 0 0 0 0
sapply(bd, function(x) sum(is.na(x)))
## vcClaveTienda DescGiro Codigo.Barras PLU
## 0 0 0 199188
## Fecha Hora Marca Fabricante
## 0 0 0 0
## Producto Precio Ult.Costo Unidades
## 0 0 0 0
## F.Ticket NombreDepartamento NombreFamilia NombreCategoria
## 0 0 0 0
## Estado Mts.2 Tipo.ubicación Giro
## 0 0 0 0
## Hora.inicio Hora.cierre
## 0 0
?sapply
# Borrar todos los registros NA de una tabla
bd8 <- bd
bd8 <- na.omit(bd8)
summary(bd8)
## vcClaveTienda DescGiro Codigo.Barras PLU
## Length:1437 Length:1437 Min. :6.750e+08 Min. : 1.000
## Class :character Class :character 1st Qu.:6.750e+08 1st Qu.: 1.000
## Mode :character Mode :character Median :6.750e+08 Median : 1.000
## Mean :2.616e+11 Mean : 2.112
## 3rd Qu.:6.750e+08 3rd Qu.: 1.000
## Max. :7.501e+12 Max. :30.000
## Fecha Hora Marca Fabricante
## Length:1437 Length:1437 Length:1437 Length:1437
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## Producto Precio Ult.Costo Unidades
## Length:1437 Min. :30.00 Min. : 1.00 Min. :1.000
## Class :character 1st Qu.:90.00 1st Qu.:64.62 1st Qu.:1.000
## Mode :character Median :90.00 Median :64.62 Median :1.000
## Mean :87.94 Mean :56.65 Mean :1.124
## 3rd Qu.:90.00 3rd Qu.:64.62 3rd Qu.:1.000
## Max. :90.00 Max. :64.62 Max. :7.000
## F.Ticket NombreDepartamento NombreFamilia NombreCategoria
## Min. : 772 Length:1437 Length:1437 Length:1437
## 1st Qu.: 99955 Class :character Class :character Class :character
## Median :102493 Mode :character Mode :character Mode :character
## Mean :100595
## 3rd Qu.:106546
## Max. :118356
## Estado Mts.2 Tipo.ubicación Giro
## Length:1437 Min. :58.00 Length:1437 Length:1437
## Class :character 1st Qu.:58.00 Class :character Class :character
## Mode :character Median :58.00 Mode :character Mode :character
## Mean :58.07
## 3rd Qu.:58.00
## Max. :60.00
## Hora.inicio Hora.cierre
## Length:1437 Length:1437
## Class :character Class :character
## Mode :character Mode :character
##
##
##
# Reemplazar NA con CEROS
bd9 <- bd
bd9[is.na(bd9)] <- 0
summary(bd9)
## vcClaveTienda DescGiro Codigo.Barras PLU
## Length:200625 Length:200625 Min. :8.347e+05 Min. : 0.00000
## Class :character Class :character 1st Qu.:7.501e+12 1st Qu.: 0.00000
## Mode :character Mode :character Median :7.501e+12 Median : 0.00000
## Mean :5.950e+12 Mean : 0.01513
## 3rd Qu.:7.501e+12 3rd Qu.: 0.00000
## Max. :1.750e+13 Max. :30.00000
## Fecha Hora Marca Fabricante
## Length:200625 Length:200625 Length:200625 Length:200625
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## Producto Precio Ult.Costo Unidades
## Length:200625 Min. :-147.00 Min. : 0.38 Min. : 0.200
## Class :character 1st Qu.: 11.00 1st Qu.: 8.46 1st Qu.: 1.000
## Mode :character Median : 16.00 Median : 12.31 Median : 1.000
## Mean : 19.42 Mean : 15.31 Mean : 1.262
## 3rd Qu.: 25.00 3rd Qu.: 19.23 3rd Qu.: 1.000
## Max. :1000.00 Max. :769.23 Max. :96.000
## F.Ticket NombreDepartamento NombreFamilia NombreCategoria
## Min. : 1 Length:200625 Length:200625 Length:200625
## 1st Qu.: 33964 Class :character Class :character Class :character
## Median :105993 Mode :character Mode :character Mode :character
## Mean :193990
## 3rd Qu.:383005
## Max. :450040
## Estado Mts.2 Tipo.ubicación Giro
## Length:200625 Min. :47.0 Length:200625 Length:200625
## Class :character 1st Qu.:53.0 Class :character Class :character
## Mode :character Median :60.0 Mode :character Mode :character
## Mean :56.6
## 3rd Qu.:60.0
## Max. :62.0
## Hora.inicio Hora.cierre
## Length:200625 Length:200625
## Class :character Class :character
## Mode :character Mode :character
##
##
##
# Reemplazar NA con el Promedio
bd10 <- bd
bd10$PLU[is.na(bd10$PLU)] <- mean(bd10$PLU, na.rm = TRUE)
summary(bd10)
## vcClaveTienda DescGiro Codigo.Barras PLU
## Length:200625 Length:200625 Min. :8.347e+05 Min. : 1.000
## Class :character Class :character 1st Qu.:7.501e+12 1st Qu.: 2.112
## Mode :character Mode :character Median :7.501e+12 Median : 2.112
## Mean :5.950e+12 Mean : 2.112
## 3rd Qu.:7.501e+12 3rd Qu.: 2.112
## Max. :1.750e+13 Max. :30.000
## Fecha Hora Marca Fabricante
## Length:200625 Length:200625 Length:200625 Length:200625
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## Producto Precio Ult.Costo Unidades
## Length:200625 Min. :-147.00 Min. : 0.38 Min. : 0.200
## Class :character 1st Qu.: 11.00 1st Qu.: 8.46 1st Qu.: 1.000
## Mode :character Median : 16.00 Median : 12.31 Median : 1.000
## Mean : 19.42 Mean : 15.31 Mean : 1.262
## 3rd Qu.: 25.00 3rd Qu.: 19.23 3rd Qu.: 1.000
## Max. :1000.00 Max. :769.23 Max. :96.000
## F.Ticket NombreDepartamento NombreFamilia NombreCategoria
## Min. : 1 Length:200625 Length:200625 Length:200625
## 1st Qu.: 33964 Class :character Class :character Class :character
## Median :105993 Mode :character Mode :character Mode :character
## Mean :193990
## 3rd Qu.:383005
## Max. :450040
## Estado Mts.2 Tipo.ubicación Giro
## Length:200625 Min. :47.0 Length:200625 Length:200625
## Class :character 1st Qu.:53.0 Class :character Class :character
## Mode :character Median :60.0 Mode :character Mode :character
## Mean :56.6
## 3rd Qu.:60.0
## Max. :62.0
## Hora.inicio Hora.cierre
## Length:200625 Length:200625
## Class :character Class :character
## Mode :character Mode :character
##
##
##
# Reemplazar negativos conCEROs
bd11 <- bd
bd11[bd11<0] <- 0
summary(bd11)
## vcClaveTienda DescGiro Codigo.Barras PLU
## Length:200625 Length:200625 Min. :8.347e+05 Min. : 1.00
## Class :character Class :character 1st Qu.:7.501e+12 1st Qu.: 1.00
## Mode :character Mode :character Median :7.501e+12 Median : 1.00
## Mean :5.950e+12 Mean : 2.11
## 3rd Qu.:7.501e+12 3rd Qu.: 1.00
## Max. :1.750e+13 Max. :30.00
## NA's :199188
## Fecha Hora Marca Fabricante
## Length:200625 Length:200625 Length:200625 Length:200625
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
##
## Producto Precio Ult.Costo Unidades
## Length:200625 Min. : 0.00 Min. : 0.38 Min. : 0.200
## Class :character 1st Qu.: 11.00 1st Qu.: 8.46 1st Qu.: 1.000
## Mode :character Median : 16.00 Median : 12.31 Median : 1.000
## Mean : 19.44 Mean : 15.31 Mean : 1.262
## 3rd Qu.: 25.00 3rd Qu.: 19.23 3rd Qu.: 1.000
## Max. :1000.00 Max. :769.23 Max. :96.000
##
## F.Ticket NombreDepartamento NombreFamilia NombreCategoria
## Min. : 1 Length:200625 Length:200625 Length:200625
## 1st Qu.: 33964 Class :character Class :character Class :character
## Median :105993 Mode :character Mode :character Mode :character
## Mean :193990
## 3rd Qu.:383005
## Max. :450040
##
## Estado Mts.2 Tipo.ubicación Giro
## Length:200625 Min. :47.0 Length:200625 Length:200625
## Class :character 1st Qu.:53.0 Class :character Class :character
## Mode :character Median :60.0 Mode :character Mode :character
## Mean :56.6
## 3rd Qu.:60.0
## Max. :62.0
##
## Hora.inicio Hora.cierre
## Length:200625 Length:200625
## Class :character Class :character
## Mode :character Mode :character
##
##
##
##
Técnica 6. M+etodo estadístico
#bd12 <- bd7
#boxplot(bd12$Precio, horizontal = TRUE)
#boxplot(bd12$Unidades, horizontal = TRUE)
## Agregar Columnas
# install.packages("lubridate")
# library(lubridate)
# bd12$Dia_de_la_Semana <- wday(bd12$Fecha)
# summary(bd12)
# bd12$Subtotal <- bd12$Precio * bd12$Unidades
# summary(bd12)
# summary(bd12)
# bd12$Utilidad <- bd12$Precio * bd12$Ult.Costo