bd_limpia <- read.csv("/Users/ivannagarza/Ventas limpia.csv")Paso 1. Definir el área de negocio que buscamos impactar o
mejorar y su KPI.
El departamento de administración del supermercado con el indicador
de Número de visitas y gasto por cliente
Paso 2. Seleccionar plantilla (-s) para crear valor a partir
de los datos de los clientes.
Visión / Segmentación / Personalización /
Contextualización
Paso 3. Generar ideas o conceptos específicos
Elaborar los clusters para obtener visualización de Total de Compra
por cliente.
Paso 4. Reunir los datos requeridos
Elaborar un modelo visual que ayude a visualizar y conocer el
consumo y comportamiento del cliente.
Paso 5. Plan de ejecución
Dirección y mercadotecnia realizarán una estratégia que incite la
visita del cliente al supermercado más frecuente
##Convertir de caracter a fecha
bd1 <- bd_limpia
bd1$Date <- as.Date(bd1$Date, format = "%d/%m/%Y")
# install.packages ("tibble")
library (tibble)
tibble (bd1)## # A tibble: 514,266 × 8
## BillNo Itemname Quant…¹ Date Hour Price Custo…² Country
## <chr> <chr> <int> <date> <chr> <dbl> <int> <chr>
## 1 536365 WHITE HANGING HEART T-… 6 2010-12-01 08:2… 2.55 17850 United…
## 2 536365 WHITE METAL LANTERN 6 2010-12-01 08:2… 3.39 17850 United…
## 3 536365 CREAM CUPID HEARTS COA… 8 2010-12-01 08:2… 2.75 17850 United…
## 4 536365 KNITTED UNION FLAG HOT… 6 2010-12-01 08:2… 3.39 17850 United…
## 5 536365 RED WOOLLY HOTTIE WHIT… 6 2010-12-01 08:2… 3.39 17850 United…
## 6 536365 SET 7 BABUSHKA NESTING… 2 2010-12-01 08:2… 7.65 17850 United…
## 7 536365 GLASS STAR FROSTED T-L… 6 2010-12-01 08:2… 4.25 17850 United…
## 8 536366 HAND WARMER UNION JACK 6 2010-12-01 08:2… 1.85 17850 United…
## 9 536366 HAND WARMER RED POLKA … 6 2010-12-01 08:2… 1.85 17850 United…
## 10 536367 ASSORTED COLOUR BIRD O… 32 2010-12-01 08:3… 1.69 13047 United…
## # … with 514,256 more rows, and abbreviated variable names ¹Quantity,
## # ²CustomerID
##Convertir de caracter a entero
bd2 <- bd1
bd2$Hour <- substr(bd2$Hour, start = 1, stop = 2)
bd2$Hour <- as.integer(bd2$Hour)
str(bd2) ## 'data.frame': 514266 obs. of 8 variables:
## $ BillNo : chr "536365" "536365" "536365" "536365" ...
## $ Itemname : chr "WHITE HANGING HEART T-LIGHT HOLDER" "WHITE METAL LANTERN" "CREAM CUPID HEARTS COAT HANGER" "KNITTED UNION FLAG HOT WATER BOTTLE" ...
## $ Quantity : int 6 6 8 6 6 2 6 6 6 32 ...
## $ Date : Date, format: "2010-12-01" "2010-12-01" ...
## $ Hour : int 8 8 8 8 8 8 8 8 8 8 ...
## $ Price : num 2.55 3.39 2.75 3.39 3.39 7.65 4.25 1.85 1.85 1.69 ...
## $ CustomerID: int 17850 17850 17850 17850 17850 17850 17850 17850 17850 13047 ...
## $ Country : chr "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom" ...
attach(bd2)
summary(bd2)## BillNo Itemname Quantity Date
## Length:514266 Length:514266 Min. : 1.00 Min. :2010-12-01
## Class :character Class :character 1st Qu.: 1.00 1st Qu.:2011-03-28
## Mode :character Mode :character Median : 3.00 Median :2011-07-19
## Mean : 10.47 Mean :2011-07-03
## 3rd Qu.: 10.00 3rd Qu.:2011-10-19
## Max. :80995.00 Max. :2011-12-09
##
## Hour Price CustomerID Country
## Min. : 6.0 Min. : 0.040 Min. :12346 Length:514266
## 1st Qu.:11.0 1st Qu.: 1.250 1st Qu.:13930 Class :character
## Median :13.0 Median : 2.080 Median :15249 Mode :character
## Mean :13.1 Mean : 3.903 Mean :15310
## 3rd Qu.:15.0 3rd Qu.: 4.130 3rd Qu.:16820
## Max. :20.0 Max. :13541.330 Max. :18287
## NA's :131497
library(dplyr)##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
bd2$TotalCompra <- bd2$Quantity * bd2$Price
str(bd2)## 'data.frame': 514266 obs. of 9 variables:
## $ BillNo : chr "536365" "536365" "536365" "536365" ...
## $ Itemname : chr "WHITE HANGING HEART T-LIGHT HOLDER" "WHITE METAL LANTERN" "CREAM CUPID HEARTS COAT HANGER" "KNITTED UNION FLAG HOT WATER BOTTLE" ...
## $ Quantity : int 6 6 8 6 6 2 6 6 6 32 ...
## $ Date : Date, format: "2010-12-01" "2010-12-01" ...
## $ Hour : int 8 8 8 8 8 8 8 8 8 8 ...
## $ Price : num 2.55 3.39 2.75 3.39 3.39 7.65 4.25 1.85 1.85 1.69 ...
## $ CustomerID : int 17850 17850 17850 17850 17850 17850 17850 17850 17850 13047 ...
## $ Country : chr "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom" ...
## $ TotalCompra: num 15.3 20.3 22 20.3 20.3 ...
bd3<- bd2
bd3<-aggregate(TotalCompra ~ CustomerID + BillNo, data = bd3 ,sum)
summary(bd3)## CustomerID BillNo TotalCompra
## Min. :12346 Length:18159 Min. : 0.38
## 1st Qu.:13767 Class :character 1st Qu.: 155.62
## Median :15232 Mode :character Median : 301.32
## Mean :15285 Mean : 470.32
## 3rd Qu.:16813 3rd Qu.: 460.69
## Max. :18287 Max. :168469.60
bd4<- bd3
bd4<- aggregate(TotalCompra ~ CustomerID, data = bd4, mean)bd5<- bd3
bd5 <- group_by(bd3,CustomerID) %>% summarise(Visitas = n_distinct(BillNo))bd6<- merge(bd4,bd5, by= "CustomerID")
summary(bd6)## CustomerID TotalCompra Visitas
## Min. :12346 Min. : 3.45 Min. : 1.000
## 1st Qu.:13832 1st Qu.: 177.46 1st Qu.: 1.000
## Median :15322 Median : 290.17 Median : 2.000
## Mean :15316 Mean : 414.09 Mean : 4.227
## 3rd Qu.:16790 3rd Qu.: 425.47 3rd Qu.: 5.000
## Max. :18287 Max. :84236.25 Max. :209.000
write.csv(bd6, "datos_clientes.csv" , row.names = FALSE)install.packages(“factoextra”) library(factoextra) fviz_cluster (segmentos, data = bd6, palette=c(“red”, “blue”, “black”, “darkgreen”), ellipse.type = “euclid”, star.plot = T, repel = T, ggtheme = theme())
library(cluster)
# install.packages("data.table")
library(data.table)##
## Attaching package: 'data.table'
## The following objects are masked from 'package:dplyr':
##
## between, first, last
# install.packages("clusGap")
set.seed(123)optimizacion <- clusGap(bd6, FUN = kmeans, nstart = 25, K.max = 10, B = 50)## Warning: Quick-TRANSfer stage steps exceeded maximum (= 214800)
## Warning: Quick-TRANSfer stage steps exceeded maximum (= 214800)
## Warning: Quick-TRANSfer stage steps exceeded maximum (= 214800)
## Warning: Quick-TRANSfer stage steps exceeded maximum (= 214800)
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
## Warning: did not converge in 10 iterations
plot(optimizacion, xlab = "Numero de clusters k")