Teoría

# 1. Crear base de datos. 

df<-data.frame(x=c(2,2,8,5,7,6,1,4),y=c(10,5,4,8,5,4,2,9))

# 2. Numero de grupos 

grupos <- 3 

# 3. Clasificacion

segmentos <- kmeans(df,grupos)
segmentos
## K-means clustering with 3 clusters of sizes 2, 3, 3
## 
## Cluster means:
##          x        y
## 1 1.500000 3.500000
## 2 7.000000 4.333333
## 3 3.666667 9.000000
## 
## Clustering vector:
## [1] 3 1 2 3 2 2 1 3
## 
## Within cluster sum of squares by cluster:
## [1] 5.000000 2.666667 6.666667
##  (between_SS / total_SS =  85.8 %)
## 
## Available components:
## 
## [1] "cluster"      "centers"      "totss"        "withinss"     "tot.withinss"
## [6] "betweenss"    "size"         "iter"         "ifault"
# 4. Asignacion de grupos

asignacion <- cbind(df,cluster=segmentos$cluster)
asignacion
##   x  y cluster
## 1 2 10       3
## 2 2  5       1
## 3 8  4       2
## 4 5  8       3
## 5 7  5       2
## 6 6  4       2
## 7 1  2       1
## 8 4  9       3
# 5. Graficar resultados 

# install.packages("ggplot2")
# install.packages("factoextra")

library(ggplot2)
library(factoextra)
## Welcome! Want to learn more? See two factoextra-related books at https://goo.gl/ve3WBa
fviz_cluster(segmentos, data=df,
             palette=c("gray","lightblue","darkgreen"),
             ellipse.type = "euclid",
             star.plot = T,
             repel= T,
             ggtheme = theme()
             )
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse
## Too few points to calculate an ellipse

# Optimizar 

library(cluster)
library(data.table)
set.seed(123)
optimizacion <-clusGap(df,FUN = kmeans, nstart=1,K.max = 7)
plot(optimizacion, xlab= "numero de clusters k")

#El punto mas alto de la grafica indica la cantidad de grupos optimos

Ejercicio

Importar base de datos

##file.choose()
ventas <- read.csv("/Users/davidcavazos/Desktop/ventas.csv")

Entender la base de datos

summary(ventas)
##     BillNo            Itemname            Quantity            Date          
##  Length:522064      Length:522064      Min.   :-9600.00   Length:522064     
##  Class :character   Class :character   1st Qu.:    1.00   Class :character  
##  Mode  :character   Mode  :character   Median :    3.00   Mode  :character  
##                                        Mean   :   10.09                     
##                                        3rd Qu.:   10.00                     
##                                        Max.   :80995.00                     
##                                                                             
##      Hour               Price              CustomerID       Country         
##  Length:522064      Min.   :-11062.060   Min.   :12346    Length:522064     
##  Class :character   1st Qu.:     1.250   1st Qu.:13950    Class :character  
##  Mode  :character   Median :     2.080   Median :15265    Mode  :character  
##                     Mean   :     3.827   Mean   :15317                      
##                     3rd Qu.:     4.130   3rd Qu.:16837                      
##                     Max.   : 13541.330   Max.   :18287                      
##                                          NA's   :134041                     
##      Total          
##  Min.   :-11062.06  
##  1st Qu.:     3.75  
##  Median :     9.78  
##  Mean   :    19.69  
##  3rd Qu.:    17.40  
##  Max.   :168469.60  
## 
str(ventas)
## 'data.frame':    522064 obs. of  9 variables:
##  $ BillNo    : chr  "536365" "536365" "536365" "536365" ...
##  $ Itemname  : chr  "WHITE HANGING HEART T-LIGHT HOLDER" "WHITE METAL LANTERN" "CREAM CUPID HEARTS COAT HANGER" "KNITTED UNION FLAG HOT WATER BOTTLE" ...
##  $ Quantity  : int  6 6 8 6 6 2 6 6 6 32 ...
##  $ Date      : chr  "01/12/2010" "01/12/2010" "01/12/2010" "01/12/2010" ...
##  $ Hour      : chr  "08:26:00" "08:26:00" "08:26:00" "08:26:00" ...
##  $ Price     : num  2.55 3.39 2.75 3.39 3.39 7.65 4.25 1.85 1.85 1.69 ...
##  $ CustomerID: int  17850 17850 17850 17850 17850 17850 17850 17850 17850 13047 ...
##  $ Country   : chr  "United Kingdom" "United Kingdom" "United Kingdom" "United Kingdom" ...
##  $ Total     : num  15.3 20.3 22 20.3 20.3 ...
LS0tCnRpdGxlOiAiQWN0IDQuMSIKYXV0aG9yOiAiRGF2aWQgQ2F2YXpvcyAtIEEwMTE3NzIzOCIKZGF0ZTogIjIwMjMtMDktMTciCm91dHB1dDogCiBodG1sX2RvY3VtZW50OgogICAgdG9jOiBUUlVFCiAgICB0b2NfZmxvYXQ6IFRSVUUKICAgIGNvZGVfZG93bmxvYWQ6IFRSVUUKICAgIHRoZW1lOiAieWV0aSIKICAgIGhpZ2hsaWdodDogInRhbmdvIgotLS0KCiFbIF0oL1VzZXJzL2RhdmlkY2F2YXpvcy9EZXNrdG9wL211ZWJsZXMucG5nKQoKIyMgIDxzcGFuIHN0eWxlID0gImNvbG9yOiBibHVlOyI+ICoqVGVvcsOtYSoqCgpgYGB7cn0KCiMgMS4gQ3JlYXIgYmFzZSBkZSBkYXRvcy4gCgpkZjwtZGF0YS5mcmFtZSh4PWMoMiwyLDgsNSw3LDYsMSw0KSx5PWMoMTAsNSw0LDgsNSw0LDIsOSkpCgojIDIuIE51bWVybyBkZSBncnVwb3MgCgpncnVwb3MgPC0gMyAKCiMgMy4gQ2xhc2lmaWNhY2lvbgoKc2VnbWVudG9zIDwtIGttZWFucyhkZixncnVwb3MpCnNlZ21lbnRvcwoKIyA0LiBBc2lnbmFjaW9uIGRlIGdydXBvcwoKYXNpZ25hY2lvbiA8LSBjYmluZChkZixjbHVzdGVyPXNlZ21lbnRvcyRjbHVzdGVyKQphc2lnbmFjaW9uCgojIDUuIEdyYWZpY2FyIHJlc3VsdGFkb3MgCgojIGluc3RhbGwucGFja2FnZXMoImdncGxvdDIiKQojIGluc3RhbGwucGFja2FnZXMoImZhY3RvZXh0cmEiKQoKbGlicmFyeShnZ3Bsb3QyKQpsaWJyYXJ5KGZhY3RvZXh0cmEpCgpmdml6X2NsdXN0ZXIoc2VnbWVudG9zLCBkYXRhPWRmLAogICAgICAgICAgICAgcGFsZXR0ZT1jKCJncmF5IiwibGlnaHRibHVlIiwiZGFya2dyZWVuIiksCiAgICAgICAgICAgICBlbGxpcHNlLnR5cGUgPSAiZXVjbGlkIiwKICAgICAgICAgICAgIHN0YXIucGxvdCA9IFQsCiAgICAgICAgICAgICByZXBlbD0gVCwKICAgICAgICAgICAgIGdndGhlbWUgPSB0aGVtZSgpCiAgICAgICAgICAgICApCgojIE9wdGltaXphciAKCmxpYnJhcnkoY2x1c3RlcikKbGlicmFyeShkYXRhLnRhYmxlKQpzZXQuc2VlZCgxMjMpCm9wdGltaXphY2lvbiA8LWNsdXNHYXAoZGYsRlVOID0ga21lYW5zLCBuc3RhcnQ9MSxLLm1heCA9IDcpCnBsb3Qob3B0aW1pemFjaW9uLCB4bGFiPSAibnVtZXJvIGRlIGNsdXN0ZXJzIGsiKQoKI0VsIHB1bnRvIG1hcyBhbHRvIGRlIGxhIGdyYWZpY2EgaW5kaWNhIGxhIGNhbnRpZGFkIGRlIGdydXBvcyBvcHRpbW9zCmBgYAoKCiMjIDxzcGFuIHN0eWxlID0gImNvbG9yOiBibHVlOyI+ICoqRWplcmNpY2lvKioKCiMjIyA8c3BhbiBzdHlsZSA9ICJjb2xvcjogYmx1ZTsiPiBJbXBvcnRhciBiYXNlIGRlIGRhdG9zCgpgYGB7cn0KIyNmaWxlLmNob29zZSgpCnZlbnRhcyA8LSByZWFkLmNzdigiL1VzZXJzL2RhdmlkY2F2YXpvcy9EZXNrdG9wL3ZlbnRhcy5jc3YiKQpgYGAKCiMjIyA8c3BhbiBzdHlsZSA9ICJjb2xvcjogYmx1ZTsiPiBFbnRlbmRlciBsYSBiYXNlIGRlIGRhdG9zIAoKYGBge3J9CnN1bW1hcnkodmVudGFzKQpgYGAKCmBgYHtyfQpzdHIodmVudGFzKQpgYGAKCgo=