#install.packages("dplyr")
library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
#install.packages("lubridate")
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
#install.packages("janitor")
library(janitor)
## 
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
#install.packages("tidyverse")
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats 1.0.0     ✔ stringr 1.5.0
## ✔ ggplot2 3.4.1     ✔ tibble  3.1.8
## ✔ purrr   1.0.1     ✔ tidyr   1.3.0
## ✔ readr   2.1.4
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the ]8;;http://conflicted.r-lib.org/conflicted package]8;; to force all conflicts to become errors
#install.packages("readxl")
library(readxl)
library(visdat)
#install.packages("kableExtra")
library(kableExtra)
## 
## Attaching package: 'kableExtra'
## 
## The following object is masked from 'package:dplyr':
## 
##     group_rows
#install.packages("ggplot2")
library(ggplot2)
#install.packages("esquisse")
library(esquisse)

1. Export data base to R

db <- read_xlsx("~/Desktop/R_DATABASES/Abarrotes-Ventas-1.xlsx")
abarrotes <- data.frame(db)

The excel database has been uploaded in the variable ‘db’

head(db)
## # A tibble: 6 × 22
##   CveTienda DescGiro Codig…¹ PLU   Fecha               Hora                Marca
##   <chr>     <chr>      <dbl> <lgl> <dttm>              <dttm>              <chr>
## 1 MX001     Abarrot… 7.50e12 NA    2020-06-19 08:16:20 1899-12-31 08:16:21 NUTR…
## 2 MX001     Abarrot… 7.50e12 NA    2020-06-19 08:23:32 1899-12-31 08:23:33 DAN …
## 3 MX001     Abarrot… 7.50e12 NA    2020-06-19 08:24:33 1899-12-31 08:24:33 BIMBO
## 4 MX001     Abarrot… 7.50e12 NA    2020-06-19 08:24:33 1899-12-31 08:24:33 PEPSI
## 5 MX001     Abarrot… 7.50e12 NA    2020-06-19 08:26:28 1899-12-31 08:26:28 BLAN…
## 6 MX001     Abarrot… 7.50e12 NA    2020-06-19 08:26:28 1899-12-31 08:26:28 FLASH
## # … with 15 more variables: Fabricante <chr>, Producto <chr>, Precio <dbl>,
## #   Ult.Costo <dbl>, Unidades <dbl>, F.Ticket <dbl>, NombreDepartamento <chr>,
## #   NombreFamilia <chr>, NombreCategoria <chr>, Estado <chr>, `Mts 2` <dbl>,
## #   `Tipo ubicación` <chr>, Giro <chr>, `Hora inicio` <dttm>,
## #   `Hora cierre` <dttm>, and abbreviated variable name ¹​`Codigo Barras`

2. Adding ‘MontoTotal’ column

abarrotes <- abarrotes%>%
  mutate(MontoTotal = Precio * Unidades)%>%
  select(CveTienda,DescGiro,Codigo.Barras,PLU,Fecha,Hora,
         Marca,Fabricante,Producto,Precio,Ult.Costo,Unidades,MontoTotal,F.Ticket,
         NombreDepartamento,NombreFamilia,NombreCategoria,Estado,Mts.2,Tipo.ubicación,Giro,
         Hora.inicio,Hora.cierre)
head(abarrotes)
##   CveTienda  DescGiro Codigo.Barras PLU               Fecha                Hora
## 1     MX001 Abarrotes  7.501021e+12  NA 2020-06-19 08:16:20 1899-12-31 08:16:21
## 2     MX001 Abarrotes  7.501032e+12  NA 2020-06-19 08:23:32 1899-12-31 08:23:33
## 3     MX001 Abarrotes  7.501000e+12  NA 2020-06-19 08:24:33 1899-12-31 08:24:33
## 4     MX001 Abarrotes  7.501031e+12  NA 2020-06-19 08:24:33 1899-12-31 08:24:33
## 5     MX001 Abarrotes  7.501026e+12  NA 2020-06-19 08:26:28 1899-12-31 08:26:28
## 6     MX001 Abarrotes  7.501025e+12  NA 2020-06-19 08:26:28 1899-12-31 08:26:28
##                        Marca                 Fabricante
## 1                NUTRI LECHE                    MEXILAC
## 2                     DAN UP           DANONE DE MEXICO
## 3                      BIMBO                GRUPO BIMBO
## 4                      PEPSI        PEPSI-COLA MEXICANA
## 5 BLANCA NIEVES (DETERGENTE) FABRICA DE JABON LA CORONA
## 6                      FLASH                       ALEN
##                             Producto Precio Ult.Costo Unidades MontoTotal
## 1                Nutri Leche 1 Litro   16.0     12.31        1       16.0
## 2 DANUP STRAWBERRY P/BEBER 350GR NAL   14.0     14.00        1       14.0
## 3                Rebanadas Bimbo 2Pz    5.0      5.00        1        5.0
## 4                   Pepsi N.R. 400Ml    8.0      8.00        1        8.0
## 5      Detergente Blanca Nieves 500G   19.5     15.00        1       19.5
## 6      Flash Xtra Brisa Marina 500Ml    9.5      7.31        1        9.5
##   F.Ticket NombreDepartamento          NombreFamilia           NombreCategoria
## 1        1          Abarrotes Lacteos y Refrigerados                     Leche
## 2        2          Abarrotes Lacteos y Refrigerados                    Yogurt
## 3        3          Abarrotes         Pan y Tortilla     Pan Dulce Empaquetado
## 4        3          Abarrotes                Bebidas Refrescos Plástico (N.R.)
## 5        4          Abarrotes     Limpieza del Hogar                Lavandería
## 6        4          Abarrotes     Limpieza del Hogar      Limpiadores Líquidos
##       Estado Mts.2 Tipo.ubicación      Giro         Hora.inicio
## 1 Nuevo León    60        Esquina Abarrotes 1899-12-31 08:00:00
## 2 Nuevo León    60        Esquina Abarrotes 1899-12-31 08:00:00
## 3 Nuevo León    60        Esquina Abarrotes 1899-12-31 08:00:00
## 4 Nuevo León    60        Esquina Abarrotes 1899-12-31 08:00:00
## 5 Nuevo León    60        Esquina Abarrotes 1899-12-31 08:00:00
## 6 Nuevo León    60        Esquina Abarrotes 1899-12-31 08:00:00
##           Hora.cierre
## 1 1899-12-31 22:00:00
## 2 1899-12-31 22:00:00
## 3 1899-12-31 22:00:00
## 4 1899-12-31 22:00:00
## 5 1899-12-31 22:00:00
## 6 1899-12-31 22:00:00

3. Destandardized code bars

a) How many records do not meet that parameter?

abarrotes %>%
  count(nchar(Codigo.Barras) != 13)
##   nchar(Codigo.Barras) != 13      n
## 1                      FALSE 158134
## 2                       TRUE  42486

42486 records do not meet the parameter.

b) What do you suggest to correct these erroneous data?

The best way to solve this problem would be to ask for those code bars and replace them.

4. PLU codes

a) What percentage of records have a PLU and how many do not?

b) What do you suggest doing about this variable?

I would join another database of the IFPS in order to complete those missing variables, or if the column is not needed, it can be deleted.

5. Wrong prices

a) How many records have a negative price?

## There are: 147 records with negative value.

b) Correct those negative values indicating the criteria used to do so.

abarrotes$Precio <- abs(abarrotes$Precio)
## There are: 0 records with negative value.

6. ‘F.Ticket’ variable

to_answer6 <- abarrotes %>%
  group_by(F.Ticket) %>%
  summarize(Unidades=sum(Unidades))%>%
  ungroup()

a) The minimum number of items purchased per ticket.

min_items <- which.min(to_answer6$Unidades)
## The minimum amount of items purchased per ticket was: 1

b) The maximum number of items purchased per ticket.

max_items <- which.max(to_answer6$Unidades)
## The maximum amount of items purchased per ticket was: 16042

c) The average number of items purchased per ticket.

average_items <- mean(to_answer6$Unidades)
## The average amount of items purchased per ticket was: 2.200212

7. Stores analysis

to_answer7 <- abarrotes%>%
  group_by(CveTienda, Producto)%>%
  summarize(Unidades = sum(Unidades))
## `summarise()` has grouped output by 'CveTienda'. You can override using the
## `.groups` argument.
to_answer7bc <- abarrotes%>%
  group_by(CveTienda, Producto)%>%
  select(CveTienda,Producto,Precio)

a) The product that sells the most in units.

a7 <- to_answer7%>%
  group_by(CveTienda)%>%
  slice_max(Unidades, n=1)
CveTienda Producto Unidades
MX001 Cerveza Tecate Light 340Ml 7476
MX002 Coca Cola Retornable 2.5L 329
MX003 Pepsi N.R. 1.5L 150
MX004 TECATE LIGHT LATA 340ML 1987
MX005 Cerveza Tecate Light 340Ml 7336

b) The product sold with the lowest unit price.

b7 <- to_answer7bc%>%
  group_by(CveTienda)%>%
  slice_min(Precio, n=1)%>%
  ungroup()%>%
  group_by(Producto, CveTienda)%>%
  summarize(Precio = mean(Precio))%>%
  ungroup()%>%
  arrange(CveTienda)
## `summarise()` has grouped output by 'Producto'. You can override using the
## `.groups` argument.
Producto CveTienda Precio
Limoncho Sal y Limon 25pz 1.5g MX001 0.5
Trident Yerbabuena 4´s 5.2gr MX002 2.0
Canels Sabor Menta 5g MX003 1.0
bubalo MX004 1.5
bubalo 5.5g chicle MX004 1.5
Clorets Cajita 2Pz MX005 1.0

c) The product sold with the highest unit price.

c7 <- to_answer7bc%>%
  group_by(CveTienda)%>%
  slice_max(Precio, n=1)%>%
  ungroup()%>%
  group_by(Producto, CveTienda)%>%
  summarize(Precio = mean(Precio))%>%
  ungroup()%>%
  arrange(CveTienda)
## `summarise()` has grouped output by 'Producto'. You can override using the
## `.groups` argument.
Producto CveTienda Precio
Whisky Buchanan´s 1L MX001 1000
NIDO KINDER 1 MAS 800G MX002 145
Tocino Fud 250Grs MX003 72
TEQUILA CAMPO AZUL 1 LTO MX004 197
Six Tecate Light 355ML MX005 90
Six Tecate Rojo 355ML MX005 90

8. Monthly Sales per store

abarrotes_monthly <- abarrotes%>%
  select(CveTienda, Fecha, MontoTotal)%>%
  group_by(CveTienda, month(Fecha))%>%
  summarize(MontoTotal = sum(MontoTotal))
## `summarise()` has grouped output by 'CveTienda'. You can override using the
## `.groups` argument.

Main Learnings

During this course our main learnings were the importance of data management in business analytics and with that, certain technics that help us to achieve a correct management of data and being able to visualize it to share it in an understandable way. After identifying the incoherences in the database we were able to: - Clean and organize the data - Understand the numbers - Visualize the situation - In the future, make business strategies and make the best strategic desicions for the business