#install.packages("dplyr")
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
#install.packages("lubridate")
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
#install.packages("janitor")
library(janitor)
##
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
#install.packages("tidyverse")
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0 ✔ stringr 1.5.0
## ✔ ggplot2 3.4.1 ✔ tibble 3.1.8
## ✔ purrr 1.0.1 ✔ tidyr 1.3.0
## ✔ readr 2.1.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors
#install.packages("readxl")
library(readxl)
library(visdat)
#install.packages("kableExtra")
library(kableExtra)
##
## Attaching package: 'kableExtra'
##
## The following object is masked from 'package:dplyr':
##
## group_rows
#install.packages("ggplot2")
library(ggplot2)
#install.packages("esquisse")
library(esquisse)
1. Export data base to R
db <- read_xlsx("~/Desktop/R_DATABASES/Abarrotes-Ventas-1.xlsx")
abarrotes <- data.frame(db)
The excel database has been uploaded in the variable ‘db’
head(db)
## # A tibble: 6 × 22
## CveTienda DescGiro Codig…¹ PLU Fecha Hora Marca
## <chr> <chr> <dbl> <lgl> <dttm> <dttm> <chr>
## 1 MX001 Abarrot… 7.50e12 NA 2020-06-19 08:16:20 1899-12-31 08:16:21 NUTR…
## 2 MX001 Abarrot… 7.50e12 NA 2020-06-19 08:23:32 1899-12-31 08:23:33 DAN …
## 3 MX001 Abarrot… 7.50e12 NA 2020-06-19 08:24:33 1899-12-31 08:24:33 BIMBO
## 4 MX001 Abarrot… 7.50e12 NA 2020-06-19 08:24:33 1899-12-31 08:24:33 PEPSI
## 5 MX001 Abarrot… 7.50e12 NA 2020-06-19 08:26:28 1899-12-31 08:26:28 BLAN…
## 6 MX001 Abarrot… 7.50e12 NA 2020-06-19 08:26:28 1899-12-31 08:26:28 FLASH
## # … with 15 more variables: Fabricante <chr>, Producto <chr>, Precio <dbl>,
## # Ult.Costo <dbl>, Unidades <dbl>, F.Ticket <dbl>, NombreDepartamento <chr>,
## # NombreFamilia <chr>, NombreCategoria <chr>, Estado <chr>, `Mts 2` <dbl>,
## # `Tipo ubicación` <chr>, Giro <chr>, `Hora inicio` <dttm>,
## # `Hora cierre` <dttm>, and abbreviated variable name ¹`Codigo Barras`
2. Adding ‘MontoTotal’ column
abarrotes <- abarrotes%>%
mutate(MontoTotal = Precio * Unidades)%>%
select(CveTienda,DescGiro,Codigo.Barras,PLU,Fecha,Hora,
Marca,Fabricante,Producto,Precio,Ult.Costo,Unidades,MontoTotal,F.Ticket,
NombreDepartamento,NombreFamilia,NombreCategoria,Estado,Mts.2,Tipo.ubicación,Giro,
Hora.inicio,Hora.cierre)
head(abarrotes)
## CveTienda DescGiro Codigo.Barras PLU Fecha Hora
## 1 MX001 Abarrotes 7.501021e+12 NA 2020-06-19 08:16:20 1899-12-31 08:16:21
## 2 MX001 Abarrotes 7.501032e+12 NA 2020-06-19 08:23:32 1899-12-31 08:23:33
## 3 MX001 Abarrotes 7.501000e+12 NA 2020-06-19 08:24:33 1899-12-31 08:24:33
## 4 MX001 Abarrotes 7.501031e+12 NA 2020-06-19 08:24:33 1899-12-31 08:24:33
## 5 MX001 Abarrotes 7.501026e+12 NA 2020-06-19 08:26:28 1899-12-31 08:26:28
## 6 MX001 Abarrotes 7.501025e+12 NA 2020-06-19 08:26:28 1899-12-31 08:26:28
## Marca Fabricante
## 1 NUTRI LECHE MEXILAC
## 2 DAN UP DANONE DE MEXICO
## 3 BIMBO GRUPO BIMBO
## 4 PEPSI PEPSI-COLA MEXICANA
## 5 BLANCA NIEVES (DETERGENTE) FABRICA DE JABON LA CORONA
## 6 FLASH ALEN
## Producto Precio Ult.Costo Unidades MontoTotal
## 1 Nutri Leche 1 Litro 16.0 12.31 1 16.0
## 2 DANUP STRAWBERRY P/BEBER 350GR NAL 14.0 14.00 1 14.0
## 3 Rebanadas Bimbo 2Pz 5.0 5.00 1 5.0
## 4 Pepsi N.R. 400Ml 8.0 8.00 1 8.0
## 5 Detergente Blanca Nieves 500G 19.5 15.00 1 19.5
## 6 Flash Xtra Brisa Marina 500Ml 9.5 7.31 1 9.5
## F.Ticket NombreDepartamento NombreFamilia NombreCategoria
## 1 1 Abarrotes Lacteos y Refrigerados Leche
## 2 2 Abarrotes Lacteos y Refrigerados Yogurt
## 3 3 Abarrotes Pan y Tortilla Pan Dulce Empaquetado
## 4 3 Abarrotes Bebidas Refrescos Plástico (N.R.)
## 5 4 Abarrotes Limpieza del Hogar Lavandería
## 6 4 Abarrotes Limpieza del Hogar Limpiadores Líquidos
## Estado Mts.2 Tipo.ubicación Giro Hora.inicio
## 1 Nuevo León 60 Esquina Abarrotes 1899-12-31 08:00:00
## 2 Nuevo León 60 Esquina Abarrotes 1899-12-31 08:00:00
## 3 Nuevo León 60 Esquina Abarrotes 1899-12-31 08:00:00
## 4 Nuevo León 60 Esquina Abarrotes 1899-12-31 08:00:00
## 5 Nuevo León 60 Esquina Abarrotes 1899-12-31 08:00:00
## 6 Nuevo León 60 Esquina Abarrotes 1899-12-31 08:00:00
## Hora.cierre
## 1 1899-12-31 22:00:00
## 2 1899-12-31 22:00:00
## 3 1899-12-31 22:00:00
## 4 1899-12-31 22:00:00
## 5 1899-12-31 22:00:00
## 6 1899-12-31 22:00:00
3. Destandardized code bars
a) How many records do not meet that parameter?
abarrotes %>%
count(nchar(Codigo.Barras) != 13)
## nchar(Codigo.Barras) != 13 n
## 1 FALSE 158134
## 2 TRUE 42486
42486 records do not meet the parameter.
b) What do you suggest to correct these erroneous data?
The best way to solve this problem would be to ask for those code
bars and replace them.
4. PLU codes
a) What percentage of records have a PLU and how many do not?

b) What do you suggest doing about this variable?
I would join another database of the IFPS in order to complete those
missing variables, or if the column is not needed, it can be
deleted.
5. Wrong prices
a) How many records have a negative price?
## There are: 147 records with negative value.
b) Correct those negative values indicating the criteria used to do
so.
abarrotes$Precio <- abs(abarrotes$Precio)
## There are: 0 records with negative value.
6. ‘F.Ticket’ variable
to_answer6 <- abarrotes %>%
group_by(F.Ticket) %>%
summarize(Unidades=sum(Unidades))%>%
ungroup()
a) The minimum number of items purchased per ticket.
min_items <- which.min(to_answer6$Unidades)
## The minimum amount of items purchased per ticket was: 1
b) The maximum number of items purchased per ticket.
max_items <- which.max(to_answer6$Unidades)
## The maximum amount of items purchased per ticket was: 16042
c) The average number of items purchased per ticket.
average_items <- mean(to_answer6$Unidades)
## The average amount of items purchased per ticket was: 2.200212
7. Stores analysis
to_answer7 <- abarrotes%>%
group_by(CveTienda, Producto)%>%
summarize(Unidades = sum(Unidades))
## `summarise()` has grouped output by 'CveTienda'. You can override using the
## `.groups` argument.
to_answer7bc <- abarrotes%>%
group_by(CveTienda, Producto)%>%
select(CveTienda,Producto,Precio)
a) The product that sells the most in units.
a7 <- to_answer7%>%
group_by(CveTienda)%>%
slice_max(Unidades, n=1)
CveTienda
|
Producto
|
Unidades
|
MX001
|
Cerveza Tecate Light 340Ml
|
7476
|
MX002
|
Coca Cola Retornable 2.5L
|
329
|
MX003
|
Pepsi N.R. 1.5L
|
150
|
MX004
|
TECATE LIGHT LATA 340ML
|
1987
|
MX005
|
Cerveza Tecate Light 340Ml
|
7336
|
b) The product sold with the lowest unit price.
b7 <- to_answer7bc%>%
group_by(CveTienda)%>%
slice_min(Precio, n=1)%>%
ungroup()%>%
group_by(Producto, CveTienda)%>%
summarize(Precio = mean(Precio))%>%
ungroup()%>%
arrange(CveTienda)
## `summarise()` has grouped output by 'Producto'. You can override using the
## `.groups` argument.
Producto
|
CveTienda
|
Precio
|
Limoncho Sal y Limon 25pz 1.5g
|
MX001
|
0.5
|
Trident Yerbabuena 4´s 5.2gr
|
MX002
|
2.0
|
Canels Sabor Menta 5g
|
MX003
|
1.0
|
bubalo
|
MX004
|
1.5
|
bubalo 5.5g chicle
|
MX004
|
1.5
|
Clorets Cajita 2Pz
|
MX005
|
1.0
|
c) The product sold with the highest unit price.
c7 <- to_answer7bc%>%
group_by(CveTienda)%>%
slice_max(Precio, n=1)%>%
ungroup()%>%
group_by(Producto, CveTienda)%>%
summarize(Precio = mean(Precio))%>%
ungroup()%>%
arrange(CveTienda)
## `summarise()` has grouped output by 'Producto'. You can override using the
## `.groups` argument.
Producto
|
CveTienda
|
Precio
|
Whisky Buchanan´s 1L
|
MX001
|
1000
|
NIDO KINDER 1 MAS 800G
|
MX002
|
145
|
Tocino Fud 250Grs
|
MX003
|
72
|
TEQUILA CAMPO AZUL 1 LTO
|
MX004
|
197
|
Six Tecate Light 355ML
|
MX005
|
90
|
Six Tecate Rojo 355ML
|
MX005
|
90
|
8. Monthly Sales per store
abarrotes_monthly <- abarrotes%>%
select(CveTienda, Fecha, MontoTotal)%>%
group_by(CveTienda, month(Fecha))%>%
summarize(MontoTotal = sum(MontoTotal))
## `summarise()` has grouped output by 'CveTienda'. You can override using the
## `.groups` argument.

Main Learnings
During this course our main learnings were the importance of data
management in business analytics and with that, certain technics that
help us to achieve a correct management of data and being able to
visualize it to share it in an understandable way. After identifying the
incoherences in the database we were able to: - Clean and organize the
data - Understand the numbers - Visualize the situation - In the future,
make business strategies and make the best strategic desicions for the
business