Importar base de datos

#Se limpio la base previamente
#file.choose()
performance <- read.csv("/Users/Karen/Documents/FORM.Performance.limpia")

Importar librerias

#install.packages("foreign")
library(foreign)
#install.packages("dplyr")
library(dplyr)        # data manipulation 
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
#install.packages("forcats")
library(forcats)      # to work with categorical variables
#install.packages ("ggplot2")
library(ggplot2)      # data visualization 
#install.packages ("janitor")
library(janitor)      # data exploration and cleaning 
## 
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
## 
##     chisq.test, fisher.test
#install.packages("Hmisc")
library(Hmisc)        # several useful functions for data analysis 
## Loading required package: lattice
## Loading required package: survival
## Loading required package: Formula
## 
## Attaching package: 'Hmisc'
## The following objects are masked from 'package:dplyr':
## 
##     src, summarize
## The following objects are masked from 'package:base':
## 
##     format.pval, units
#install.packages ("psych")
library(psych)        # functions for multivariate analysis 
## 
## Attaching package: 'psych'
## The following object is masked from 'package:Hmisc':
## 
##     describe
## The following objects are masked from 'package:ggplot2':
## 
##     %+%, alpha
#install.packages("naniar")
library(naniar)       # summaries and visualization of missing values NAs
#install.packages("dlookr")
library(dlookr)       # summaries and visualization of missing values NAs
## 
## Attaching package: 'dlookr'
## The following object is masked from 'package:psych':
## 
##     describe
## The following object is masked from 'package:Hmisc':
## 
##     describe
## The following object is masked from 'package:base':
## 
##     transform
#install.packages ("kableExtra")
library(kableExtra)
## 
## Attaching package: 'kableExtra'
## The following object is masked from 'package:dplyr':
## 
##     group_rows
#install.packages ("readr")
library(readr)
#install.packages ("corrplot")
library(corrplot)
## corrplot 0.92 loaded
#install.packages ("jtools")
library(jtools) 
## 
## Attaching package: 'jtools'
## The following object is masked from 'package:Hmisc':
## 
##     %nin%
#install.packages ("lmtest")
library(lmtest) 
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
#install.packages ("car")
library(car) 
## Loading required package: carData
## 
## Attaching package: 'car'
## The following object is masked from 'package:psych':
## 
##     logit
## The following object is masked from 'package:dplyr':
## 
##     recode
#install.packages ("olsrr")
library(olsrr) 
## 
## Attaching package: 'olsrr'
## The following object is masked from 'package:datasets':
## 
##     rivers
#install.packages ("gmodels")
library(gmodels)

Analisis estadistico descriptivo

describe (performance)
## # A tibble: 6 × 26
##   described_…¹     n    na  mean    sd se_mean   IQR skewn…² kurto…³   p00   p01
##   <chr>        <int> <int> <dbl> <dbl>   <dbl> <dbl>   <dbl>   <dbl> <dbl> <dbl>
## 1 Target        1440     0 1     0      0       0    NaN     NaN       1       1
## 2 Vueltas       1440     0 1.75  0.829  0.0219  1.25   0.494  -1.37    1       1
## 3 Plan.arrival  1440     0 6.62  7.50   0.198  10.8    0.614  -1.12    0       0
## 4 Real.arrival  1440     0 3.82  6.51   0.171   8      1.51    0.965   0       0
## 5 Real.depart…  1440     0 4.14  6.95   0.183   9      1.44    0.706   0       0
## 6 Diference     1440     0 0.316 0.922  0.0243  0.8    2.73  211.    -14.4     0
## # … with 15 more variables: p05 <dbl>, p10 <dbl>, p20 <dbl>, p25 <dbl>,
## #   p30 <dbl>, p40 <dbl>, p50 <dbl>, p60 <dbl>, p70 <dbl>, p75 <dbl>,
## #   p80 <dbl>, p90 <dbl>, p95 <dbl>, p99 <dbl>, p100 <dbl>, and abbreviated
## #   variable names ¹​described_variables, ²​skewness, ³​kurtosis

Tabla de frecuencia

Se realizó una tabla de frecuencia de la variable plan arrival con el objetivo de validar las frecuencias en el estimado de llegada para cada uno de los clientes.

proportion <- prop.table(table(performance$Cliente,performance$Plan.arrival))
proportion %>%
  kbl() %>%
  kable_styling()
0 8 9 16 20
MAGNA 0.125 0.000 0.000 0.000 0.000
MAHLE 0.000 0.125 0.125 0.000 0.125
PRINTEL 0.000 0.000 0.000 0.125 0.000
VARROC 0.375 0.000 0.000 0.000 0.000

Tabla cruzada

Posterior a la tabla de frecuencia, se realizo una tabla cruzada donde se visualizo como es la proporcion respecto a la variable de las estimaciones de llegada de pedidos por cada cliente.

proportion %>%
  kbl() %>%
  kable_material (c("striped","hover"))
0 8 9 16 20
MAGNA 0.125 0.000 0.000 0.000 0.000
MAHLE 0.000 0.125 0.125 0.000 0.125
PRINTEL 0.000 0.000 0.000 0.125 0.000
VARROC 0.375 0.000 0.000 0.000 0.000
location <- CrossTable(performance$Cliente, performance$Plan.arrival, prop.t=TRUE, prop.r=TRUE, prop.c=TRUE)
## 
##  
##    Cell Contents
## |-------------------------|
## |                       N |
## | Chi-square contribution |
## |           N / Row Total |
## |           N / Col Total |
## |         N / Table Total |
## |-------------------------|
## 
##  
## Total Observations in Table:  1440 
## 
##  
##                     | performance$Plan.arrival 
## performance$Cliente |         0 |         8 |         9 |        16 |        20 | Row Total | 
## --------------------|-----------|-----------|-----------|-----------|-----------|-----------|
##               MAGNA |       180 |         0 |         0 |         0 |         0 |       180 | 
##                     |    90.000 |    22.500 |    22.500 |    22.500 |    22.500 |           | 
##                     |     1.000 |     0.000 |     0.000 |     0.000 |     0.000 |     0.125 | 
##                     |     0.250 |     0.000 |     0.000 |     0.000 |     0.000 |           | 
##                     |     0.125 |     0.000 |     0.000 |     0.000 |     0.000 |           | 
## --------------------|-----------|-----------|-----------|-----------|-----------|-----------|
##               MAHLE |         0 |       180 |       180 |         0 |       180 |       540 | 
##                     |   270.000 |   187.500 |   187.500 |    67.500 |   187.500 |           | 
##                     |     0.000 |     0.333 |     0.333 |     0.000 |     0.333 |     0.375 | 
##                     |     0.000 |     1.000 |     1.000 |     0.000 |     1.000 |           | 
##                     |     0.000 |     0.125 |     0.125 |     0.000 |     0.125 |           | 
## --------------------|-----------|-----------|-----------|-----------|-----------|-----------|
##            PRINTEL  |         0 |         0 |         0 |       180 |         0 |       180 | 
##                     |    90.000 |    22.500 |    22.500 |  1102.500 |    22.500 |           | 
##                     |     0.000 |     0.000 |     0.000 |     1.000 |     0.000 |     0.125 | 
##                     |     0.000 |     0.000 |     0.000 |     1.000 |     0.000 |           | 
##                     |     0.000 |     0.000 |     0.000 |     0.125 |     0.000 |           | 
## --------------------|-----------|-----------|-----------|-----------|-----------|-----------|
##              VARROC |       540 |         0 |         0 |         0 |         0 |       540 | 
##                     |   270.000 |    67.500 |    67.500 |    67.500 |    67.500 |           | 
##                     |     1.000 |     0.000 |     0.000 |     0.000 |     0.000 |     0.375 | 
##                     |     0.750 |     0.000 |     0.000 |     0.000 |     0.000 |           | 
##                     |     0.375 |     0.000 |     0.000 |     0.000 |     0.000 |           | 
## --------------------|-----------|-----------|-----------|-----------|-----------|-----------|
##        Column Total |       720 |       180 |       180 |       180 |       180 |      1440 | 
##                     |     0.500 |     0.125 |     0.125 |     0.125 |     0.125 |           | 
## --------------------|-----------|-----------|-----------|-----------|-----------|-----------|
## 
## 
kbl(location) %>%
  kable_classic()
0 8 9 16 20
MAGNA 180 0 0 0 0
MAHLE 0 180 180 0 180
PRINTEL 0 0 0 180 0
VARROC 540 0 0 0 0
0 8 9 16 20
MAGNA 1 0.0000000 0.0000000 0 0.0000000
MAHLE 0 0.3333333 0.3333333 0 0.3333333
PRINTEL 0 0.0000000 0.0000000 1 0.0000000
VARROC 1 0.0000000 0.0000000 0 0.0000000
0 8 9 16 20
MAGNA 0.25 0 0 0 0
MAHLE 0.00 1 1 0 1
PRINTEL 0.00 0 0 1 0
VARROC 0.75 0 0 0 0
0 8 9 16 20
MAGNA 0.125 0.000 0.000 0.000 0.000
MAHLE 0.000 0.125 0.125 0.000 0.125
PRINTEL 0.000 0.000 0.000 0.125 0.000
VARROC 0.375 0.000 0.000 0.000 0.000

Graficos de datos cualitativos y cuantitativos

#Boxplot 
#En dicha grafica de boxplot se presenta como se encuentran posicionados los datos de la base, tanto el estimado (Plan arrival), como la llegado real (real arrival) y la salida real (real departure) de los pedidos realizados por todos los clientes
Delivery <- data.frame(performance$Cliente, performance$Plan.arrival, performance$Real.arrival, performance$Real.departure, performance$Date)
colnames(Delivery)<-c('Cliente', 'Plan.arrival', 'Real.arrival', 'Real.departure', 'Date')
Realarrival_boxplot = subset(Delivery, select =-c (Cliente, Date))
boxplot(Realarrival_boxplot,data=data,main="Deliverys", 
        col=rainbow(ncol(trees)))

#Bar plots 1
#En la siguiente grafica se presenta la llegada real de los pedidos a lo largo del primer semestre del 2022
Realarrival <- data.frame(performance$Date,performance$Real.arrival)
colnames(Realarrival)<-c('Date','Real.arrival')

ggplot(data = Realarrival, aes (x=Date, y=Real.arrival)) +
  geom_bar(stat = "identity", fill="#FFAEB9") + scale_fill_grey() + # Add bars to the plot
  labs(title = "Real arrival", # Add a title
       x = "2022")

#Bar plots 2
#A modo de comparacion, en la siguiente grafica se presenta la salida real de los pedidos a lo largo del primer semestre del 2022
Realdeparture <- data.frame(performance$Date,performance$Real.departure)
colnames(Realdeparture)<-c('Date','Real.departure')

ggplot(data = Realdeparture, aes (x=Date, y=Real.departure)) +
  geom_bar(stat = "identity", fill="#7AC5CD") + scale_fill_grey() + # Add bars to the plot
  labs(title = "Real departure", # Add a title
       x = "2022")

Graficos de dispersión

Validando las diversas variables en la base de datos y observando su distribucion, se decidio utilizar la variable de diferencia (entre la llegada real y la salida real) para observar su comportamiento y ver la comparación.

#Histograma 
hist(performance$Diference,main="Histograma",xlab="Delivery diference",col='#FF7F24')

#Diagrama de dispersion 
qqnorm(performance$Diference, main="Grafica de dispersion", ylab="Delivery diference",col='#FF7F24')
qqline(performance$Diference, col='#FF7F24')

#Graficos de normalidad
plot_normality(performance,Diference, col='#FF7F24')

Insights

-MAHLE es el unico cliente con la mayor frecuencia de 20 como estimado de llegada para los pedidos

-El estimado real de los pedidos tiene una mediana mucho mayor que la llegada real.

-La salida real vs la llegada real tiene una diferencia observable en los graficos.

-Entre Febrero-Marzo 2022 fue el pico con mayores pedidos para los clientes.

-La diferencia entre la llegada y salida de pedidos sigue una distribucion normal