#file.choose()
base_de_datos<-read.csv("/Users/ricardogc/Desktop/Entregable 2.4 Reto/Formdeliveryfinal3.csv" )
library(foreign)
library(dplyr) # data manipulation
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(forcats) # to work with categorical variables
library(ggplot2) # data visualization
library(janitor) # data exploration and cleaning
##
## Attaching package: 'janitor'
## The following objects are masked from 'package:stats':
##
## chisq.test, fisher.test
#install.packages("Hmisc")
#library(Hmisc) # several useful functions for data analysis
#install.packages("psych")
#library(psych) # functions for multivariate analysis
#install.packages("naniar")
#library(naniar) # summaries and visualization of missing values NAs
#install.packages("dlookr")
#library(dlookr) # summaries and visualization of missing values NAs
#install.packages("corrplot")
#library(corrplot) # correlation plots
#install.packages("jtools")
#library(jtools) # presentation of regression analysis
#install.packages("lmtest")
#library(lmtest) # diagnostic checks - linear regression analysis
#install.packages("car")
#library(car) # diagnostic checks - linear regression analysis
#install.packages("olsrr")
#library(olsrr) # diagnostic checks - linear regression analysis
#install.packages("kableExtra")
#library(kableExtra) # HTML table attributes
library(tidyverse)
## ── Attaching packages
## ───────────────────────────────────────
## tidyverse 1.3.2 ──
## ✔ tibble 3.1.8 ✔ purrr 0.3.4
## ✔ tidyr 1.2.0 ✔ stringr 1.4.1
## ✔ readr 2.1.3
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
bd<-base_de_datos
summary(bd)
## Target Cliente Vueltas Plan.arrival Real.arrival
## Min. :1 MAGNA :180 Min. :1.00 Min. : 0.000 Min. : 0.000
## 1st Qu.:1 MAHLE :540 1st Qu.:1.00 1st Qu.: 0.000 1st Qu.: 0.000
## Median :1 PRINTEL :180 Median :1.50 Median : 4.000 Median : 0.000
## Mean :1 VARROC :540 Mean :1.75 Mean : 6.625 Mean : 3.823
## 3rd Qu.:1 3rd Qu.:2.25 3rd Qu.:10.750 3rd Qu.: 8.000
## Max. :1 Max. :3.00 Max. :20.000 Max. :23.500
##
## Real.departure Diference Fecha
## Min. : 0.000 Min. :-14.3500 01/02/22: 8
## 1st Qu.: 0.000 1st Qu.: 0.0000 01/03/22: 8
## Median : 0.000 Median : 0.0000 01/04/22: 8
## Mean : 4.142 Mean : 0.3155 01/05/22: 8
## 3rd Qu.: 9.000 3rd Qu.: 0.8000 01/06/22: 8
## Max. :24.500 Max. : 20.0000 01/07/22: 8
## (Other) :1392
str(bd)
## 'data.frame': 1440 obs. of 8 variables:
## $ Target : int 1 1 1 1 1 1 1 1 1 1 ...
## $ Cliente : Factor w/ 4 levels "MAGNA","MAHLE",..: 3 2 2 2 1 4 4 4 3 2 ...
## $ Vueltas : int 1 1 2 3 1 1 2 3 1 1 ...
## $ Plan.arrival : int 16 8 9 20 0 0 0 0 16 8 ...
## $ Real.arrival : num 16 8 9 20 0 0 0 0 16 8 ...
## $ Real.departure: num 19.3 8.55 10 21 0 0 0 0 18.1 9 ...
## $ Diference : num 3.3 0.55 1 1 0 0 0 0 2.1 1 ...
## $ Fecha : Factor w/ 180 levels "01/02/22","01/03/22",..: 7 7 7 7 7 7 7 7 13 13 ...
Variable<-c("`Target`","`Cliente`","`Vueltas`","`Plan.arrival`","`Real.arrival`","`Real.departure`","`Diference`","`Fecha`")
Type<-c("quantiative (discrete)", "quantiative (continous)", "quantitative (continous)", "quantitative (continous)")
table<-data.frame(Variable,Type)
knitr::kable(table)
| Variable | Type |
|---|---|
Target |
quantiative (discrete) |
Cliente |
quantiative (continous) |
Vueltas |
quantitative (continous) |
Plan.arrival |
quantitative (continous) |
Real.arrival |
quantiative (discrete) |
Real.departure |
quantiative (continous) |
Diference |
quantitative (continous) |
Fecha |
quantitative (continous) |
variables <- c("`Target`","`Cliente`","`Vueltas`","`Plan.arrival`","`Real.arrival`","`Real.departure`","`Diference`","`Fecha`")
tipos <- c("cuantitativo (discreto) ", "cualitativo ", "cuantitativo (discreto) ", "cuantitativo (continuo) ", "cuantitativo (continuo) ", "cuantitativo (continuo) ", "cuantitativo (continuo) ", "cualitativo ")
escalas <- c("intervalo", "nominal", "razon", "razon", "razon", "razon", "razon", "ordinal")
table1 <- data.frame (variables, tipos, escalas)
knitr::kable(table1)
| variables | tipos | escalas |
|---|---|---|
Target |
cuantitativo (discreto) | intervalo |
Cliente |
cualitativo | nominal |
Vueltas |
cuantitativo (discreto) | razon |
Plan.arrival |
cuantitativo (continuo) | razon |
Real.arrival |
cuantitativo (continuo) | razon |
Real.departure |
cuantitativo (continuo) | razon |
Diference |
cuantitativo (continuo) | razon |
Fecha |
cualitativo | ordinal |
Las técnicas elegidas en nuestra limpia de datos se eligieron por las necesidades que presentaba la base y la utilidad o no que tenían nuestras variables
bd1<-bd
bd1$Fecha<- as.Date(bd1$Fecha, format= "%d/%m/%y")
summary(bd1)
## Target Cliente Vueltas Plan.arrival Real.arrival
## Min. :1 MAGNA :180 Min. :1.00 Min. : 0.000 Min. : 0.000
## 1st Qu.:1 MAHLE :540 1st Qu.:1.00 1st Qu.: 0.000 1st Qu.: 0.000
## Median :1 PRINTEL :180 Median :1.50 Median : 4.000 Median : 0.000
## Mean :1 VARROC :540 Mean :1.75 Mean : 6.625 Mean : 3.823
## 3rd Qu.:1 3rd Qu.:2.25 3rd Qu.:10.750 3rd Qu.: 8.000
## Max. :1 Max. :3.00 Max. :20.000 Max. :23.500
## Real.departure Diference Fecha
## Min. : 0.000 Min. :-14.3500 Min. :2022-01-02
## 1st Qu.: 0.000 1st Qu.: 0.0000 1st Qu.:2022-02-23
## Median : 0.000 Median : 0.0000 Median :2022-04-17
## Mean : 4.142 Mean : 0.3155 Mean :2022-04-16
## 3rd Qu.: 9.000 3rd Qu.: 0.8000 3rd Qu.:2022-06-08
## Max. :24.500 Max. : 20.0000 Max. :2022-07-23
tibble(bd1)
## # A tibble: 1,440 × 8
## Target Cliente Vueltas Plan.arrival Real.arri…¹ Real.…² Difer…³ Fecha
## <int> <fct> <int> <int> <dbl> <dbl> <dbl> <date>
## 1 1 "PRINTEL " 1 16 16 19.3 3.3 2022-01-02
## 2 1 "MAHLE" 1 8 8 8.55 0.55 2022-01-02
## 3 1 "MAHLE" 2 9 9 10 1 2022-01-02
## 4 1 "MAHLE" 3 20 20 21 1 2022-01-02
## 5 1 "MAGNA" 1 0 0 0 0 2022-01-02
## 6 1 "VARROC" 1 0 0 0 0 2022-01-02
## 7 1 "VARROC" 2 0 0 0 0 2022-01-02
## 8 1 "VARROC" 3 0 0 0 0 2022-01-02
## 9 1 "PRINTEL " 1 16 16 18.1 2.1 2022-01-03
## 10 1 "MAHLE" 1 8 8 9 1 2022-01-03
## # … with 1,430 more rows, and abbreviated variable names ¹Real.arrival,
## # ²Real.departure, ³Diference
sum(is.na(bd1))
## [1] 0
bd2<-bd1
bd2 <- subset(bd2, select =-c (Target))
Se elimina el target ya que no nos muestra una información detallada al darnos datos nulos de tener un 1 en todos lados.
bd3<-bd2
boxplot(bd3$Plan.arrival, horizontal=TRUE)
boxplot(bd3$Real.arrival, horizontal=TRUE)
boxplot(bd3$Real.departure, horizontal=TRUE)
boxplot(bd3$Diference, horizontal=TRUE)
bd3$Promedio_real_arrival<- mean(bd3$Real.arrival)
bd3$Vueltas<-as.numeric(bd3$Vueltas) ### missing values are converted to NA's
bd3$Plan.arrival<-as.numeric(bd3$Plan.arrival)
bd3$Real.arrival<-as.numeric(bd3$Real.arrival)
bd3$Real.departure<-as.numeric(bd3$Real.departure)
bd3$Diference<-as.numeric(bd3$Diference)
bd3$Promedio_real_arrival<-as.numeric(bd3$Promedio_real_arrival)
bd3<-as.data.frame(bd3)
bd3$Fecha<-as.Date(bd3$Fecha,format="%m/%d/%Y")
bd3$Cliente<-as.factor(bd3$Cliente)
tibble(bd3)
## # A tibble: 1,440 × 8
## Cliente Vueltas Plan.arrival Real.arr…¹ Real.…² Difer…³ Fecha Prome…⁴
## <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <date> <dbl>
## 1 "PRINTEL " 1 16 16 19.3 3.3 2022-01-02 3.82
## 2 "MAHLE" 1 8 8 8.55 0.55 2022-01-02 3.82
## 3 "MAHLE" 2 9 9 10 1 2022-01-02 3.82
## 4 "MAHLE" 3 20 20 21 1 2022-01-02 3.82
## 5 "MAGNA" 1 0 0 0 0 2022-01-02 3.82
## 6 "VARROC" 1 0 0 0 0 2022-01-02 3.82
## 7 "VARROC" 2 0 0 0 0 2022-01-02 3.82
## 8 "VARROC" 3 0 0 0 0 2022-01-02 3.82
## 9 "PRINTEL " 1 16 16 18.1 2.1 2022-01-03 3.82
## 10 "MAHLE" 1 8 8 9 1 2022-01-03 3.82
## # … with 1,430 more rows, and abbreviated variable names ¹Real.arrival,
## # ²Real.departure, ³Diference, ⁴Promedio_real_arrival
Se muestra que la diferencia de nuestros arrivals no es mucha, pero habría que revisar si el promedio total nos ayudaría. se agrego una columna de promedio general de nuestros arrivals reales.
#Seleccionar variables
bd4 <- bd3 %>% select (one_of("Cliente","Vueltas","Plan.arrival","Real.arrival","Real.departure", "Diference", "Fecha","Promedio_real_arrival"))
#Renombrar variabes
colnames(bd3)<-c("Cle", "Vueltas", "P.arrival", "R.arrival", "R.dep", "Dif", "Fecha", "Prom.R.arrival")
bd_limpia<-bd3
write.csv(bd_limpia, file="formbaselimpia.csv", row.names = FALSE)
Para esta actividad, pudimos limpiar la base de datos que tiene Form al modificarla al 100% para poder acomodar los requerimientos que tiene R para insertar las bases y que hagan sentido a la hora de trabajarlo.
Para esta base de datos ahora lo que nos permitirá lograr es hacer gráficas y tablas para detallar y mostrar la información importante de nuestra base.
#file.choose()
bd_limpia<-read.csv("/Users/ricardogc/Desktop/Entregable 2.4 Reto/formbaselimpia.csv")
bd_limpia <- bd3
summary(bd_limpia)
## Cle Vueltas P.arrival R.arrival
## MAGNA :180 Min. :1.00 Min. : 0.000 Min. : 0.000
## MAHLE :540 1st Qu.:1.00 1st Qu.: 0.000 1st Qu.: 0.000
## PRINTEL :180 Median :1.50 Median : 4.000 Median : 0.000
## VARROC :540 Mean :1.75 Mean : 6.625 Mean : 3.823
## 3rd Qu.:2.25 3rd Qu.:10.750 3rd Qu.: 8.000
## Max. :3.00 Max. :20.000 Max. :23.500
## R.dep Dif Fecha Prom.R.arrival
## Min. : 0.000 Min. :-14.3500 Min. :2022-01-02 Min. :3.823
## 1st Qu.: 0.000 1st Qu.: 0.0000 1st Qu.:2022-02-23 1st Qu.:3.823
## Median : 0.000 Median : 0.0000 Median :2022-04-17 Median :3.823
## Mean : 4.142 Mean : 0.3155 Mean :2022-04-16 Mean :3.823
## 3rd Qu.: 9.000 3rd Qu.: 0.8000 3rd Qu.:2022-06-08 3rd Qu.:3.823
## Max. :24.500 Max. : 20.0000 Max. :2022-07-23 Max. :3.823
sd(bd_limpia$Vueltas, na.rm = FALSE)
## [1] 0.8294442
sd(bd_limpia$P.arrival, na.rm = FALSE)
## [1] 7.501563
sd(bd_limpia$R.arrival, na.rm = FALSE)
## [1] 6.505902
sd(bd_limpia$R.dep, na.rm = FALSE)
## [1] 6.948665
sd(bd_limpia$Dif, na.rm = FALSE)
## [1] 0.9218758
Variable<-c("Vueltas","P.arrival", "R.arrival", "R.dep", "Dif")
Mediana<-c("1.50 ","4.000","0.000 ","0.000 ","0.0000 ")
Media <- c("1.75","6.625","3.823","4.142","4.142")
Moda <- c("1","0","0","0","0")
Desviación_estandar<-c("0.8294442"," 7.501563","6.505902","6.948665","6.948665")
table1 <- data.frame (Variable, Mediana, Media, Moda, Desviación_estandar)
knitr::kable(table1)
| Variable | Mediana | Media | Moda | Desviación_estandar |
|---|---|---|---|---|
| Vueltas | 1.50 | 1.75 | 1 | 0.8294442 |
| P.arrival | 4.000 | 6.625 | 0 | 7.501563 |
| R.arrival | 0.000 | 3.823 | 0 | 6.505902 |
| R.dep | 0.000 | 4.142 | 0 | 6.948665 |
| Dif | 0.0000 | 4.142 | 0 | 6.948665 |
Se ocupo summary para poder obtener la moda, mediana y media de las variables y posteriormente se calcula la desviación estandar con la función sd()
#install.packages('epiDisplay')
library(epiDisplay)
## Loading required package: survival
## Loading required package: MASS
##
## Attaching package: 'MASS'
## The following object is masked from 'package:dplyr':
##
## select
## Loading required package: nnet
##
## Attaching package: 'epiDisplay'
## The following object is masked from 'package:ggplot2':
##
## alpha
tab1(bd_limpia$Dif, sort.group = "decreasing", cum.percent = TRUE)
## bd_limpia$Dif :
## Frequency Percent Cum. percent
## 0 1014 70.4 70.4
## 1 192 13.3 83.8
## 1.1 20 1.4 85.1
## 1.05 17 1.2 86.3
## 1.2 16 1.1 87.4
## 0.5 14 1.0 88.4
## 1.15 11 0.8 89.2
## 0.9 8 0.6 89.7
## 0.4 8 0.6 90.3
## 1.5 7 0.5 90.8
## 0.8 7 0.5 91.2
## 0.55 7 0.5 91.7
## 2 6 0.4 92.2
## 1.8 6 0.4 92.6
## 1.3 6 0.4 93.0
## 0.3 6 0.4 93.4
## 1.4 5 0.3 93.8
## 1.25 5 0.3 94.1
## 0.95 5 0.3 94.4
## 0.2 5 0.3 94.8
## 1.45 4 0.3 95.1
## 1.35 3 0.2 95.3
## 0.45 3 0.2 95.5
## 0.35 3 0.2 95.7
## 0.1 3 0.2 95.9
## 3.1 2 0.1 96.0
## 2.6 2 0.1 96.2
## 2.38 2 0.1 96.3
## 2.15 2 0.1 96.5
## 1.6 2 0.1 96.6
## 1.03 2 0.1 96.7
## 0.85 2 0.1 96.9
## 0.7 2 0.1 97.0
## 0.6 2 0.1 97.2
## 0.15 2 0.1 97.3
## 4.4 1 0.1 97.4
## 4 1 0.1 97.4
## 3.3 1 0.1 97.5
## 3.05 1 0.1 97.6
## 20 1 0.1 97.6
## 2.85 1 0.1 97.7
## 2.52 1 0.1 97.8
## 2.5 1 0.1 97.8
## 2.4 1 0.1 97.9
## 2.35 1 0.1 98.0
## 2.3 1 0.1 98.1
## 2.27 1 0.1 98.1
## 2.2 1 0.1 98.2
## 2.1 1 0.1 98.3
## 2.05 1 0.1 98.3
## 1.95 1 0.1 98.4
## 1.9 1 0.1 98.5
## 1.88 1 0.1 98.5
## 1.85 1 0.1 98.6
## 1.74 1 0.1 98.7
## 1.73 1 0.1 98.8
## 1.7 1 0.1 98.8
## 1.66 1 0.1 98.9
## 1.65 1 0.1 99.0
## 1.55 1 0.1 99.0
## 1.14 1 0.1 99.1
## 1.13 1 0.1 99.2
## 1.07 1 0.1 99.2
## 1.04 1 0.1 99.3
## 1.01 1 0.1 99.4
## 0.99 1 0.1 99.4
## 0.98 1 0.1 99.5
## 0.75 1 0.1 99.6
## 0.47 1 0.1 99.7
## 0.43 1 0.1 99.7
## 0.34 1 0.1 99.8
## 0.25 1 0.1 99.9
## -14.35 1 0.1 99.9
## -12.15 1 0.1 100.0
## Total 1440 100.0 100.0
tab1(bd_limpia$R.arrival, sort.group = "decreasing", cum.percent = TRUE)
## bd_limpia$R.arrival :
## Frequency Percent Cum. percent
## 0 1009 70.1 70.1
## 8 100 6.9 77.0
## 20 83 5.8 82.8
## 9 81 5.6 88.4
## 18 11 0.8 89.2
## 9.4 10 0.7 89.9
## 15 9 0.6 90.5
## 18.2 8 0.6 91.0
## 16 7 0.5 91.5
## 9.5 6 0.4 91.9
## 9.2 6 0.4 92.4
## 9.1 6 0.4 92.8
## 10 6 0.4 93.2
## 9.15 5 0.3 93.5
## 15.1 5 0.3 93.9
## 16.2 4 0.3 94.2
## 9.3 3 0.2 94.4
## 15.2 3 0.2 94.6
## 9.45 2 0.1 94.7
## 9.26 2 0.1 94.9
## 9.12 2 0.1 95.0
## 9.08 2 0.1 95.1
## 8.1 2 0.1 95.3
## 8.05 2 0.1 95.4
## 7.35 2 0.1 95.6
## 7.3 2 0.1 95.7
## 6 2 0.1 95.8
## 23.3 2 0.1 96.0
## 20.2 2 0.1 96.1
## 20.1 2 0.1 96.2
## 19.2 2 0.1 96.4
## 18.4 2 0.1 96.5
## 18.3 2 0.1 96.7
## 16.4 2 0.1 96.8
## 15.3 2 0.1 96.9
## 13 2 0.1 97.1
## 9.35 1 0.1 97.2
## 9.25 1 0.1 97.2
## 9.17 1 0.1 97.3
## 9.11 1 0.1 97.4
## 9.05 1 0.1 97.4
## 8.52 1 0.1 97.5
## 8.45 1 0.1 97.6
## 7.45 1 0.1 97.6
## 7.34 1 0.1 97.7
## 23.5 1 0.1 97.8
## 23.2 1 0.1 97.8
## 23.15 1 0.1 97.9
## 23 1 0.1 98.0
## 22.4 1 0.1 98.1
## 22.08 1 0.1 98.1
## 21.15 1 0.1 98.2
## 21.1 1 0.1 98.3
## 21.05 1 0.1 98.3
## 20.4 1 0.1 98.4
## 20.3 1 0.1 98.5
## 20.15 1 0.1 98.5
## 20.05 1 0.1 98.6
## 19.48 1 0.1 98.7
## 19.15 1 0.1 98.8
## 19.1 1 0.1 98.8
## 18.5 1 0.1 98.9
## 18.35 1 0.1 99.0
## 18.15 1 0.1 99.0
## 18.1 1 0.1 99.1
## 18.05 1 0.1 99.2
## 17.3 1 0.1 99.2
## 17 1 0.1 99.3
## 15.45 1 0.1 99.4
## 15.4 1 0.1 99.4
## 15.16 1 0.1 99.5
## 14 1 0.1 99.6
## 13.4 1 0.1 99.7
## 11.37 1 0.1 99.7
## 11 1 0.1 99.8
## 10.42 1 0.1 99.9
## 10.4 1 0.1 99.9
## 10.05 1 0.1 100.0
## Total 1440 100.0 100.0
Viendo el gráfico de distribución se pueden ver las frecuencias en las diferencias, se ve una mayor parte en el número 0, lo cual nos indica un buen desarrollo en delivery.
#Checar que la base de datos esta estructurada y un resumen de la descripción estadistica
summary(bd_limpia)
## Cle Vueltas P.arrival R.arrival
## MAGNA :180 Min. :1.00 Min. : 0.000 Min. : 0.000
## MAHLE :540 1st Qu.:1.00 1st Qu.: 0.000 1st Qu.: 0.000
## PRINTEL :180 Median :1.50 Median : 4.000 Median : 0.000
## VARROC :540 Mean :1.75 Mean : 6.625 Mean : 3.823
## 3rd Qu.:2.25 3rd Qu.:10.750 3rd Qu.: 8.000
## Max. :3.00 Max. :20.000 Max. :23.500
## R.dep Dif Fecha Prom.R.arrival
## Min. : 0.000 Min. :-14.3500 Min. :2022-01-02 Min. :3.823
## 1st Qu.: 0.000 1st Qu.: 0.0000 1st Qu.:2022-02-23 1st Qu.:3.823
## Median : 0.000 Median : 0.0000 Median :2022-04-17 Median :3.823
## Mean : 4.142 Mean : 0.3155 Mean :2022-04-16 Mean :3.823
## 3rd Qu.: 9.000 3rd Qu.: 0.8000 3rd Qu.:2022-06-08 3rd Qu.:3.823
## Max. :24.500 Max. : 20.0000 Max. :2022-07-23 Max. :3.823
#Graficar
ggplot(bd_limpia,aes(x=Fecha, y=Dif,color=Cle))+
geom_line()+
labs(x="Fecha",y="Delay in Minutes", color="Legend")+
ggtitle("Delays in Performance by Client")
### Checar la presencia de missing values
bd_limpia1 <- bd_limpia
bd_limpia1<-bd_limpia1[-c(300,292),] # Quitar las filas 300 y 292 por que presentan datos negativos y que no sirven para el analisis.
summary(bd_limpia1) # no missing values
## Cle Vueltas P.arrival R.arrival
## MAGNA :180 Min. :1.000 Min. : 0.000 Min. : 0.000
## MAHLE :538 1st Qu.:1.000 1st Qu.: 0.000 1st Qu.: 0.000
## PRINTEL :180 Median :1.000 Median : 0.000 Median : 0.000
## VARROC :540 Mean :1.748 Mean : 6.606 Mean : 3.796
## 3rd Qu.:2.000 3rd Qu.: 9.000 3rd Qu.: 8.000
## Max. :3.000 Max. :20.000 Max. :23.500
## R.dep Dif Fecha Prom.R.arrival
## Min. : 0.000 Min. : 0.0000 Min. :2022-01-02 Min. :3.823
## 1st Qu.: 0.000 1st Qu.: 0.0000 1st Qu.:2022-02-24 1st Qu.:3.823
## Median : 0.000 Median : 0.0000 Median :2022-04-18 Median :3.823
## Mean : 4.134 Mean : 0.3344 Mean :2022-04-16 Mean :3.823
## 3rd Qu.: 9.000 3rd Qu.: 0.8000 3rd Qu.:2022-06-08 3rd Qu.:3.823
## Max. :24.500 Max. :20.0000 Max. :2022-07-23 Max. :3.823
bd_limpia1<-bd_limpia1[bd_limpia1$Cle!="MAGNA",]
bd_limpia1<-bd_limpia1[bd_limpia1$Cle!="VARROC",]
summary(bd_limpia1)
## Cle Vueltas P.arrival R.arrival
## MAGNA : 0 Min. :1.000 Min. : 8.00 Min. : 0.000
## MAHLE :538 1st Qu.:1.000 1st Qu.: 8.25 1st Qu.: 0.000
## PRINTEL :180 Median :1.000 Median : 9.00 Median : 8.000
## VARROC : 0 Mean :1.747 Mean :13.23 Mean : 7.603
## 3rd Qu.:2.000 3rd Qu.:16.00 3rd Qu.:10.000
## Max. :3.000 Max. :20.00 Max. :23.500
## R.dep Dif Fecha Prom.R.arrival
## Min. : 0.00 Min. : 0.0000 Min. :2022-01-02 Min. :3.823
## 1st Qu.: 0.00 1st Qu.: 0.0000 1st Qu.:2022-02-24 1st Qu.:3.823
## Median : 9.00 Median : 0.8000 Median :2022-04-18 Median :3.823
## Mean : 8.28 Mean : 0.6697 Mean :2022-04-16 Mean :3.823
## 3rd Qu.:11.50 3rd Qu.: 1.0000 3rd Qu.:2022-06-08 3rd Qu.:3.823
## Max. :24.50 Max. :20.0000 Max. :2022-07-23 Max. :3.823
ggplot(bd_limpia1,aes(x=Fecha, y=Dif,color=Cle))+
geom_line()+ geom_hline(yintercept=0.8000,linetype="dashed",color="black")+
labs(x="Fecha",y="Diferencia por día", color="Legend")+
ggtitle("Diferencia de entrega por Cliente")
### Nota (Hallazgo): Encontramos gracias a la grafica realizada de la
Diferencia de tiempo en la entrega de producto por cliente, que los
clientes con mayor Delay son MAHLE Y PRINTEL, al mismo tiempo
encontramos que entre estos dos clientes, el que tiene mayor Delay es
MAHLE, por lo que sería bueno para la empresa el tener un mayor enfoque,
obvio en ambos, pero tambien un poco más en Mahle, para que tenga mejor
referencia de este cliente y mayor confianza con la empresa en las
entregas.