TeorÃa
# 1. Crear base de datos.
df<-data.frame(x=c(2,2,8,5,7,6,1,4),y=c(10,5,4,8,5,4,2,9))
# 2. Numero de grupos
grupos <- 3
# 3. Clasificacion
segmentos <- kmeans(df,grupos)
segmentos
## K-means clustering with 3 clusters of sizes 2, 3, 3
##
## Cluster means:
## x y
## 1 1.500000 3.500000
## 2 7.000000 4.333333
## 3 3.666667 9.000000
##
## Clustering vector:
## [1] 3 1 2 3 2 2 1 3
##
## Within cluster sum of squares by cluster:
## [1] 5.000000 2.666667 6.666667
## (between_SS / total_SS = 85.8 %)
##
## Available components:
##
## [1] "cluster" "centers" "totss" "withinss" "tot.withinss"
## [6] "betweenss" "size" "iter" "ifault"
# 4. Asignacion de grupos
asignacion <- cbind(df,cluster=segmentos$cluster)
asignacion
## x y cluster
## 1 2 10 3
## 2 2 5 1
## 3 8 4 2
## 4 5 8 3
## 5 7 5 2
## 6 6 4 2
## 7 1 2 1
## 8 4 9 3
# 5. Graficar resultados
# install.packages("ggplot2")
# install.packages("factoextra")
library(ggplot2)
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
fviz_cluster(segmentos, data=df,
palette=c("gray","lightblue","darkgreen"),
ellipse.type = "euclid",
star.plot = T,
repel= T,
ggtheme = theme()
)
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse

# Optimizar
library(cluster)
library(data.table)
set.seed(123)
optimizacion <-clusGap(df,FUN = kmeans, nstart=1,K.max = 7)
plot(optimizacion, xlab= "numero de clusters k")

#El punto mas alto de la grafica indica la cantidad de grupos optimos
Ejercicio
Importar base de datos
##file.choose()
ventas <- read.csv("/Users/davidcavazos/Desktop/ventas.csv")
Entender la base de datos
## BillNo Itemname Quantity Date
## Length:522064 Length:522064 Min. :-9600.00 Length:522064
## Class :character Class :character 1st Qu.: 1.00 Class :character
## Mode :character Mode :character Median : 3.00 Mode :character
## Mean : 10.09
## 3rd Qu.: 10.00
## Max. :80995.00
##
## Hour Price CustomerID Country
## Length:522064 Min. :-11062.060 Min. :12346 Length:522064
## Class :character 1st Qu.: 1.250 1st Qu.:13950 Class :character
## Mode :character Median : 2.080 Median :15265 Mode :character
## Mean : 3.827 Mean :15317
## 3rd Qu.: 4.130 3rd Qu.:16837
## Max. : 13541.330 Max. :18287
## NA's :134041
## Total
## Min. :-11062.06
## 1st Qu.: 3.75
## Median : 9.78
## Mean : 19.69
## 3rd Qu.: 17.40
## Max. :168469.60
##
## 'data.frame': 522064 obs. of 9 variables:
## $ BillNo : chr "536365" "536365" "536365" "536365" ...
## $ Itemname : chr "WHITE HANGING HEART T-LIGHT HOLDER" "WHITE METAL LANTERN" "CREAM CUPID HEARTS COAT HANGER" "KNITTED UNION FLAG HOT WATER BOTTLE" ...
## $ Quantity : int 6 6 8 6 6 2 6 6 6 32 ...
## $ Date : chr "01/12/2010" "01/12/2010" "01/12/2010" "01/12/2010" ...
## $ Hour : chr "08:26:00" "08:26:00" "08:26:00" "08:26:00" ...
## $ Price : num 2.55 3.39 2.75 3.39 3.39 7.65 4.25 1.85 1.85 1.69 ...
## $ CustomerID: int 17850 17850 17850 17850 17850 17850 17850 17850 17850 13047 ...
## $ Country : chr "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom" ...
## $ Total : num 15.3 20.3 22 20.3 20.3 ...
LS0tCnRpdGxlOiAiQWN0IDQuMSIKYXV0aG9yOiAiRGF2aWQgQ2F2YXpvcyAtIEEwMTE3NzIzOCIKZGF0ZTogIjIwMjMtMDktMTciCm91dHB1dDogCiBodG1sX2RvY3VtZW50OgogICAgdG9jOiBUUlVFCiAgICB0b2NfZmxvYXQ6IFRSVUUKICAgIGNvZGVfZG93bmxvYWQ6IFRSVUUKICAgIHRoZW1lOiAieWV0aSIKICAgIGhpZ2hsaWdodDogInRhbmdvIgotLS0KCiFbIF0oL1VzZXJzL2RhdmlkY2F2YXpvcy9EZXNrdG9wL211ZWJsZXMucG5nKQoKIyMgIDxzcGFuIHN0eWxlID0gImNvbG9yOiBibHVlOyI+ICoqVGVvcsOtYSoqCgpgYGB7cn0KCiMgMS4gQ3JlYXIgYmFzZSBkZSBkYXRvcy4gCgpkZjwtZGF0YS5mcmFtZSh4PWMoMiwyLDgsNSw3LDYsMSw0KSx5PWMoMTAsNSw0LDgsNSw0LDIsOSkpCgojIDIuIE51bWVybyBkZSBncnVwb3MgCgpncnVwb3MgPC0gMyAKCiMgMy4gQ2xhc2lmaWNhY2lvbgoKc2VnbWVudG9zIDwtIGttZWFucyhkZixncnVwb3MpCnNlZ21lbnRvcwoKIyA0LiBBc2lnbmFjaW9uIGRlIGdydXBvcwoKYXNpZ25hY2lvbiA8LSBjYmluZChkZixjbHVzdGVyPXNlZ21lbnRvcyRjbHVzdGVyKQphc2lnbmFjaW9uCgojIDUuIEdyYWZpY2FyIHJlc3VsdGFkb3MgCgojIGluc3RhbGwucGFja2FnZXMoImdncGxvdDIiKQojIGluc3RhbGwucGFja2FnZXMoImZhY3RvZXh0cmEiKQoKbGlicmFyeShnZ3Bsb3QyKQpsaWJyYXJ5KGZhY3RvZXh0cmEpCgpmdml6X2NsdXN0ZXIoc2VnbWVudG9zLCBkYXRhPWRmLAogICAgICAgICAgICAgcGFsZXR0ZT1jKCJncmF5IiwibGlnaHRibHVlIiwiZGFya2dyZWVuIiksCiAgICAgICAgICAgICBlbGxpcHNlLnR5cGUgPSAiZXVjbGlkIiwKICAgICAgICAgICAgIHN0YXIucGxvdCA9IFQsCiAgICAgICAgICAgICByZXBlbD0gVCwKICAgICAgICAgICAgIGdndGhlbWUgPSB0aGVtZSgpCiAgICAgICAgICAgICApCgojIE9wdGltaXphciAKCmxpYnJhcnkoY2x1c3RlcikKbGlicmFyeShkYXRhLnRhYmxlKQpzZXQuc2VlZCgxMjMpCm9wdGltaXphY2lvbiA8LWNsdXNHYXAoZGYsRlVOID0ga21lYW5zLCBuc3RhcnQ9MSxLLm1heCA9IDcpCnBsb3Qob3B0aW1pemFjaW9uLCB4bGFiPSAibnVtZXJvIGRlIGNsdXN0ZXJzIGsiKQoKI0VsIHB1bnRvIG1hcyBhbHRvIGRlIGxhIGdyYWZpY2EgaW5kaWNhIGxhIGNhbnRpZGFkIGRlIGdydXBvcyBvcHRpbW9zCmBgYAoKCiMjIDxzcGFuIHN0eWxlID0gImNvbG9yOiBibHVlOyI+ICoqRWplcmNpY2lvKioKCiMjIyA8c3BhbiBzdHlsZSA9ICJjb2xvcjogYmx1ZTsiPiBJbXBvcnRhciBiYXNlIGRlIGRhdG9zCgpgYGB7cn0KIyNmaWxlLmNob29zZSgpCnZlbnRhcyA8LSByZWFkLmNzdigiL1VzZXJzL2RhdmlkY2F2YXpvcy9EZXNrdG9wL3ZlbnRhcy5jc3YiKQpgYGAKCiMjIyA8c3BhbiBzdHlsZSA9ICJjb2xvcjogYmx1ZTsiPiBFbnRlbmRlciBsYSBiYXNlIGRlIGRhdG9zIAoKYGBge3J9CnN1bW1hcnkodmVudGFzKQpgYGAKCmBgYHtyfQpzdHIodmVudGFzKQpgYGAKCgo=