R Markdown

This is an R Markdown document. Markdown is a simple formatting syntax for authoring HTML, PDF, and MS Word documents. For more details on using R Markdown see http://rmarkdown.rstudio.com.

When you click the Knit button a document will be generated that includes both content as well as the output of any embedded R code chunks within the document. You can embed an R code chunk like this:

summary(cars)
##      speed           dist       
##  Min.   : 4.0   Min.   :  2.00  
##  1st Qu.:12.0   1st Qu.: 26.00  
##  Median :15.0   Median : 36.00  
##  Mean   :15.4   Mean   : 42.98  
##  3rd Qu.:19.0   3rd Qu.: 56.00  
##  Max.   :25.0   Max.   :120.00

Including Plots

You can also embed plots, for example:

Note that the echo = FALSE parameter was added to the code chunk to prevent printing of the R code that generated the plot.

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.2     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.2     ✔ tibble    3.2.1
## ✔ lubridate 1.9.2     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(nycflights13)
library(dplyr)
library(readxl)
data <-  read_xlsx("/Users/enrique/Downloads/Semestre 7- 1 Bloque R/Abarrotes_Ventas.xlsx")
summary(data)
##  vcClaveTienda        DescGiro         Codigo Barras         PLU         
##  Length:200620      Length:200620      Min.   :8.347e+05   Mode:logical  
##  Class :character   Class :character   1st Qu.:7.501e+12   TRUE:1437     
##  Mode  :character   Mode  :character   Median :7.501e+12   NA's:199183   
##                                        Mean   :5.950e+12                 
##                                        3rd Qu.:7.501e+12                 
##                                        Max.   :1.750e+13                 
##      Fecha                             Hora                       
##  Min.   :2020-05-01 00:00:31.08   Min.   :1899-12-31 00:00:00.00  
##  1st Qu.:2020-06-06 13:24:49.08   1st Qu.:1899-12-31 13:12:42.75  
##  Median :2020-07-11 14:10:21.46   Median :1899-12-31 17:35:59.00  
##  Mean   :2020-07-19 15:19:40.65   Mean   :1899-12-31 16:43:52.05  
##  3rd Qu.:2020-08-29 22:07:47.33   3rd Qu.:1899-12-31 20:47:06.00  
##  Max.   :2020-11-11 23:53:47.73   Max.   :1899-12-31 23:59:59.00  
##     Marca            Fabricante          Producto             Precio       
##  Length:200620      Length:200620      Length:200620      Min.   :-147.00  
##  Class :character   Class :character   Class :character   1st Qu.:  11.00  
##  Mode  :character   Mode  :character   Mode  :character   Median :  16.00  
##                                                           Mean   :  19.42  
##                                                           3rd Qu.:  25.00  
##                                                           Max.   :1000.00  
##    Ult.Costo         Unidades         F.Ticket      NombreDepartamento
##  Min.   :  0.38   Min.   : 0.200   Min.   :     1   Length:200620     
##  1st Qu.:  8.46   1st Qu.: 1.000   1st Qu.: 33967   Class :character  
##  Median : 12.31   Median : 1.000   Median :105996   Mode  :character  
##  Mean   : 15.31   Mean   : 1.262   Mean   :193994                     
##  3rd Qu.: 19.23   3rd Qu.: 1.000   3rd Qu.:383008                     
##  Max.   :769.23   Max.   :96.000   Max.   :450040                     
##  NombreFamilia      NombreCategoria       Estado              Mts.2     
##  Length:200620      Length:200620      Length:200620      Min.   :47.0  
##  Class :character   Class :character   Class :character   1st Qu.:53.0  
##  Mode  :character   Mode  :character   Mode  :character   Median :60.0  
##                                                           Mean   :56.6  
##                                                           3rd Qu.:60.0  
##                                                           Max.   :62.0  
##  Tipo.ubicación         Giro            Hora.inicio                    
##  Length:200620      Length:200620      Min.   :1899-12-31 07:00:00.00  
##  Class :character   Class :character   1st Qu.:1899-12-31 07:00:00.00  
##  Mode  :character   Mode  :character   Median :1899-12-31 08:00:00.00  
##                                        Mean   :1899-12-31 07:35:49.71  
##                                        3rd Qu.:1899-12-31 08:00:00.00  
##                                        Max.   :1899-12-31 09:00:00.00  
##   Hora.cierre                    
##  Min.   :1899-12-31 21:00:00.00  
##  1st Qu.:1899-12-31 22:00:00.00  
##  Median :1899-12-31 22:00:00.00  
##  Mean   :1899-12-31 22:23:11.42  
##  3rd Qu.:1899-12-31 23:00:00.00  
##  Max.   :1899-12-31 23:00:00.00
library(dplyr)

count(data,vcClaveTienda, sort=TRUE)
## # A tibble: 5 × 2
##   vcClaveTienda     n
##   <chr>         <int>
## 1 MX001         96464
## 2 MX004         83455
## 3 MX005         10021
## 4 MX002          6629
## 5 MX003          4051
count(data,DescGiro, sort=TRUE)
## # A tibble: 3 × 2
##   DescGiro        n
##   <chr>       <int>
## 1 Abarrotes  100515
## 2 Carnicería  83455
## 3 Depósito    16650
count(data,Marca, sort=TRUE)
## # A tibble: 540 × 2
##    Marca           n
##    <chr>       <int>
##  1 COCA COLA   18686
##  2 PEPSI       15966
##  3 TECATE      11674
##  4 BIMBO        8316
##  5 LALA         5866
##  6 MARINELA     3696
##  7 DORITOS      3142
##  8 CHEETOS      3130
##  9 NUTRI LECHE  3127
## 10 MARLBORO     2579
## # ℹ 530 more rows
count(data,Fabricante, sort=TRUE)
## # A tibble: 241 × 2
##    Fabricante                          n
##    <chr>                           <int>
##  1 COCA COLA                       27519
##  2 PEPSI-COLA MEXICANA             22415
##  3 SABRITAS                        14296
##  4 CERVECERIA CUAUHTEMOC MOCTEZUMA 13681
##  5 GRUPO BIMBO                     13077
##  6 SIGMA ALIMENTOS                  8014
##  7 GRUPO INDUSTRIAL LALA            5868
##  8 GRUPO GAMESA                     5527
##  9 NESTLE                           3698
## 10 JUGOS DEL VALLE S.A. DE C.V.     3581
## # ℹ 231 more rows
count(data,Producto, sort=TRUE)
## # A tibble: 3,404 × 2
##    Producto                        n
##    <chr>                       <int>
##  1 Pepsi N.R. 1.5L              5108
##  2 Coca Cola Retornable 2.5L    3771
##  3 Caguamon Tecate Light 1.2Lt  3471
##  4 Pepsi N. R. 2.5L             2899
##  5 Cerveza Tecate Light 340Ml   2619
##  6 Cerveza Tecate Light 16Oz    2315
##  7 Coca Cola Retornable 1.5L    2124
##  8 Pepsi N.R. 3L                1832
##  9 Coca Cola Retornable 500Ml   1659
## 10 PEPSI N.R. 1.5L              1631
## # ℹ 3,394 more rows
count(data,NombreDepartamento, sort=TRUE)
## # A tibble: 9 × 2
##   NombreDepartamento        n
##   <chr>                 <int>
## 1 Abarrotes            198274
## 2 Bebes e Infantiles     1483
## 3 Ferretería              377
## 4 Farmacia                255
## 5 Vinos y Licores         104
## 6 Papelería                74
## 7 Mercería                 44
## 8 Productos a Eliminar      8
## 9 Carnes                    1
count(data,NombreFamilia, sort=TRUE)
## # A tibble: 51 × 2
##    NombreFamilia              n
##    <chr>                  <int>
##  1 Bebidas                64917
##  2 Botanas                21583
##  3 Lacteos y Refrigerados 17657
##  4 Cerveza                14017
##  5 Pan y Tortilla         10501
##  6 Limpieza del Hogar      8723
##  7 Galletas                7487
##  8 Cigarros                6817
##  9 Cuidado Personal        5433
## 10 Salsas y Sazonadores    5320
## # ℹ 41 more rows
count(data,NombreCategoria, sort=TRUE)
## # A tibble: 174 × 2
##    NombreCategoria               n
##    <chr>                     <int>
##  1 Refrescos Plástico (N.R.) 32861
##  2 Refrescos Retornables     13880
##  3 Frituras                  11082
##  4 Lata                       8150
##  5 Leche                      7053
##  6 Cajetilla                  6329
##  7 Botella                    5867
##  8 Productos sin Categoria    5455
##  9 Papas Fritas               5344
## 10 Jugos y Néctares           5295
## # ℹ 164 more rows
count(data,Estado, sort=TRUE)
## # A tibble: 5 × 2
##   Estado           n
##   <chr>        <int>
## 1 Nuevo León   96464
## 2 Sinaloa      83455
## 3 Quintana Roo 10021
## 4 Jalisco       6629
## 5 Chiapas       4051
count(data,Mts.2, sort=TRUE)
## # A tibble: 5 × 2
##   Mts.2     n
##   <dbl> <int>
## 1    60 96464
## 2    53 83455
## 3    58 10021
## 4    47  6629
## 5    62  4051
count(data,Tipo.ubicación, sort=TRUE)
## # A tibble: 3 × 2
##   Tipo.ubicación      n
##   <chr>           <int>
## 1 Esquina        189940
## 2 Rotonda          6629
## 3 Entre calles     4051
count(data,Giro, sort=TRUE)
## # A tibble: 2 × 2
##   Giro            n
##   <chr>       <int>
## 1 Abarrotes  183970
## 2 Mini súper  16650
count(data,Hora.inicio, sort=TRUE)
## # A tibble: 3 × 2
##   Hora.inicio              n
##   <dttm>               <int>
## 1 1899-12-31 08:00:00 106485
## 2 1899-12-31 07:00:00  87506
## 3 1899-12-31 09:00:00   6629
count(data,Hora.cierre, sort=TRUE)
## # A tibble: 3 × 2
##   Hora.cierre              n
##   <dttm>               <int>
## 1 1899-12-31 22:00:00 103093
## 2 1899-12-31 23:00:00  87506
## 3 1899-12-31 21:00:00  10021
table(data$NombreDepartamento)
## 
##            Abarrotes   Bebes e Infantiles               Carnes 
##               198274                 1483                    1 
##             Farmacia           Ferretería             Mercería 
##                  255                  377                   44 
##            Papelería Productos a Eliminar      Vinos y Licores 
##                   74                    8                  104
table(data$Estado)
## 
##      Chiapas      Jalisco   Nuevo León Quintana Roo      Sinaloa 
##         4051         6629        96464        10021        83455
library(janitor)
## 
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
tabla1<-tabyl(data, vcClaveTienda, NombreDepartamento)
library(ggplot2)
data$Subtotal<- data$Precio*data$Unidades

ggplot(data, aes(x=vcClaveTienda, y=Subtotal)) +
  geom_bar(stat="identity")

library(qcc)
## Package 'qcc' version 2.7
## Type 'citation("qcc")' for citing this R package in publications.
tabla1 <- subset(tabla1, select = c(vcClaveTienda, Abarrotes))
pareto.chart (tabla1$Abarrotes)

##    
## Pareto chart analysis for tabla1$Abarrotes
##        Frequency    Cum.Freq.   Percentage Cum.Percent.
##   A 9.541000e+04 9.541000e+04 4.812028e+01 4.812028e+01
##   D 8.223400e+04 1.776440e+05 4.147493e+01 8.959521e+01
##   E 1.001400e+04 1.876580e+05 5.050587e+00 9.464579e+01
##   B 6.590000e+03 1.942480e+05 3.323683e+00 9.796948e+01
##   C 4.026000e+03 1.982740e+05 2.030523e+00 1.000000e+02
plot(data$Precio, data$Unidades, main="Relacion entre Precio y Unidades", xlab="Precio", ylab="Unidades")

boxplot(data$Precio, horizontal = TRUE)

boxplot(data$Unidades, horizontal = TRUE)